blogger backup
Want to backup your Blogger posts? Here is a quick Python script that does just that. It will create an HTML file for each post, with the subject of the post as the file name and the post date as the file timestamp.
import os import urllib import re from optparse import OptionParser from xml import sax from xml.sax import ContentHandler import time from time import mktime class FileWriterContentHandler(ContentHandler): seenItem = False def __init__(self, dir): self.dir = dir def startElement(self, tag, attrs): if (tag == "item"): self.seenItem = True self.content = "" def characters(self, content): self.content += content # the content for a single XML node will get passed-in in chuncks def endElement(self, tag): setattr(self, tag, self.content) if self.seenItem: # there is a description tag in the header as well, only write a file if you're past that if (tag == "description"): # the last tag we need to write the file fileName = createFileName(self.dir, getattr(self, "title")) writeFile(fileName, self.content) setFileDateTime(fileName, makeUnixTime(getattr(self, "pubDate"))) def createFileName(dir, title): # remove any special characters from the file name, ie can't save a file with the name "blog 1/2.html" return os.path.expanduser(os.path.join(dir, re.sub("[^A-Za-z0-9 .()]", "", title) + ".html")) def writeFile(fileName, content): print fileName open(fileName, "w").write(content.encode("utf-8")) # the file will be closed as soon as the program is done def setFileDateTime(path, time): os.utime(path, (time, time)) def getOptions(): arguments = OptionParser() arguments.add_options(["--blogID", "--dir"]) return arguments.parse_args()[0] # options # Example: "Mon, 09 Feb 2009 20:14:00 +0000" def makeUnixTime(pubDate): return mktime(time.strptime(pubDate, "%a, %d %b %Y %H:%M:%S +0000")) def downloadRSS(blogID): url = "http://www.blogger.com/feeds/%s/posts/default?alt=rss&prettyprint=true&start-index=1" % blogID print "Backing up %s" % url return urllib.urlopen(url).read() if __name__ == '__main__': options = getOptions() rssXml = downloadRSS(options.blogID) sax.parseString(rssXml, FileWriterContentHandler(options.dir))
To run the program, first find your blogger ID. If you view the source of your blog, it will be at the top in the RSS feed link:
<link rel="service.post" type="application/atom+xml" title="bitkickers - Atom" href="http://www.blogger.com/feeds/7663029716914672257/posts/default" />
Then, execute the script like so:
python backup.py --blogID 7663029716914672257 --dir ./output
Update: tested on Windows/Linux