blogger backup
Want to backup your Blogger posts? Here is a quick Python script that does just that. It will create an HTML file for each post, with the subject of the post as the file name and the post date as the file timestamp.
import os
import urllib
import re
from optparse import OptionParser
from xml import sax
from xml.sax import ContentHandler
import time
from time import mktime
class FileWriterContentHandler(ContentHandler):
seenItem = False
def __init__(self, dir):
self.dir = dir
def startElement(self, tag, attrs):
if (tag == "item"): self.seenItem = True
self.content = ""
def characters(self, content):
self.content += content # the content for a single XML node will get passed-in in chuncks
def endElement(self, tag):
setattr(self, tag, self.content)
if self.seenItem: # there is a description tag in the header as well, only write a file if you're past that
if (tag == "description"): # the last tag we need to write the file
fileName = createFileName(self.dir, getattr(self, "title"))
writeFile(fileName, self.content)
setFileDateTime(fileName, makeUnixTime(getattr(self, "pubDate")))
def createFileName(dir, title):
# remove any special characters from the file name, ie can't save a file with the name "blog 1/2.html"
return os.path.expanduser(os.path.join(dir, re.sub("[^A-Za-z0-9 .()]", "", title) + ".html"))
def writeFile(fileName, content):
print fileName
open(fileName, "w").write(content.encode("utf-8")) # the file will be closed as soon as the program is done
def setFileDateTime(path, time):
os.utime(path, (time, time))
def getOptions():
arguments = OptionParser()
arguments.add_options(["--blogID", "--dir"])
return arguments.parse_args()[0] # options
# Example: "Mon, 09 Feb 2009 20:14:00 +0000"
def makeUnixTime(pubDate):
return mktime(time.strptime(pubDate, "%a, %d %b %Y %H:%M:%S +0000"))
def downloadRSS(blogID):
url = "http://www.blogger.com/feeds/%s/posts/default?alt=rss&prettyprint=true&start-index=1" % blogID
print "Backing up %s" % url
return urllib.urlopen(url).read()
if __name__ == '__main__':
options = getOptions()
rssXml = downloadRSS(options.blogID)
sax.parseString(rssXml, FileWriterContentHandler(options.dir))
To run the program, first find your blogger ID. If you view the source of your blog, it will be at the top in the RSS feed link:
<link rel="service.post" type="application/atom+xml" title="bitkickers - Atom" href="http://www.blogger.com/feeds/7663029716914672257/posts/default" />
Then, execute the script like so:
python backup.py --blogID 7663029716914672257 --dir ./output
Update: tested on Windows/Linux