blogger backup

Want to backup your Blogger posts? Here is a quick Python script that does just that. It will create an HTML file for each post, with the subject of the post as the file name and the post date as the file timestamp.

import os
import urllib
import re  
from optparse import OptionParser
from xml import sax
from xml.sax import ContentHandler
import time
from time import mktime

class FileWriterContentHandler(ContentHandler):
    seenItem = False
    def __init__(self, dir):
        self.dir = dir        
    def startElement(self, tag, attrs):
        if (tag == "item"): self.seenItem = True 
        self.content = ""
    def characters(self, content):    
        self.content += content # the content for a single  XML node will get passed-in in chuncks
    def endElement(self, tag):
        setattr(self, tag, self.content)
        if self.seenItem: # there is a description tag in the header as well, only write a file if you're past that
            if (tag == "description"): # the last tag we need to write the file
                fileName = createFileName(self.dir, getattr(self, "title"))
                writeFile(fileName, self.content)
                setFileDateTime(fileName, makeUnixTime(getattr(self, "pubDate")))
                
def createFileName(dir, title):    
    # remove any special characters from the file name, ie can't save a file with the name "blog 1/2.html"
    return os.path.expanduser(os.path.join(dir, re.sub("[^A-Za-z0-9 .()]", "", title) + ".html")) 
    
def writeFile(fileName, content):
    print fileName    
    open(fileName, "w").write(content.encode("utf-8")) # the file will be closed as soon as the program is done
    
def setFileDateTime(path, time):
    os.utime(path, (time, time))

def getOptions():
    arguments = OptionParser()
    arguments.add_options(["--blogID", "--dir"])
    return arguments.parse_args()[0] # options

# Example: "Mon, 09 Feb 2009 20:14:00 +0000"
def makeUnixTime(pubDate):
    return mktime(time.strptime(pubDate, "%a, %d %b %Y %H:%M:%S +0000"))

def downloadRSS(blogID):
    url = "http://www.blogger.com/feeds/%s/posts/default?alt=rss&prettyprint=true&start-index=1" % blogID
    print "Backing up %s" % url
    return urllib.urlopen(url).read()

if __name__ == '__main__':
    options = getOptions()
    rssXml = downloadRSS(options.blogID)    
    sax.parseString(rssXml, FileWriterContentHandler(options.dir))

To run the program, first find your blogger ID. If you view the source of your blog, it will be at the top in the RSS feed link:

<link rel="service.post" type="application/atom+xml" title="bitkickers - Atom" href="http://www.blogger.com/feeds/7663029716914672257/posts/default" />

Then, execute the script like so:

python backup.py --blogID 7663029716914672257 --dir ./output

Update: tested on Windows/Linux

Chase Seibert