Backup your Amazon order history with Python
Ever wanted to download your Amazon order history? Maybe you want to get it into a spreadsheet, or just keep it around in case Amazon decides to delete this information. Here is some Python code to screen scrape your account pages.
import sys from optparse import OptionParser import datetime import mechanize from BeautifulSoup import BeautifulSoup import pprint """ Usage: python amazon.py --username=foo --password=bar --firstyear=2004 """ def getOptions(): arguments = OptionParser() arguments.add_options(["--username", "--password", "--firstyear"]) return arguments.parse_args()[0] def _text(node): return "".join([unicode(s) for s in node.contents]).strip() def _parse_orders(html): soup = BeautifulSoup(html) orders = [] for order in soup.findAll("div", {"class": "order"}): date = order.find("h2") for item in order.findAll("li", {"class": "item "}): title = item.find("span", {"class": "item-title"}) link = item.find("a") image = item.find("img") orders.append({ "date": _text(date), "title": _text(title), "link": link["href"], "image": image["src"] }) return orders if __name__ == '__main__': options = getOptions() br = mechanize.Browser() br.set_handle_robots(False) br.addheaders = [("User-agent", "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.2.13) Gecko/20101206 Ubuntu/10.10 (maverick) Firefox/3.6.13")] sign_in = br.open("http://www.amazon.com/gp/flex/sign-out.html") br.select_form(name="sign-in") br["email"] = options.username br["password"] = options.password logged_in = br.submit() error_str = "The e-mail address and password you entered do not match any accounts on record." if error_str in logged_in.read(): print error_str sys.exit(1) orders = [] for year in range(int(options.firstyear), datetime.datetime.now().year): orders_html = br.open("https://www.amazon.com/gp/css/history/orders/view.html?orderFilter=year-%s&startAtIndex=1000" % year) new_orders = _parse_orders(orders_html.read()) if new_orders: orders.append(new_orders) if len(orders) == 0: print "No orders found." sys.exit(1) pp = pprint.PrettyPrinter(indent=4) pp.pprint(orders)
# usage example, your username and password are your Amazon login. The firstyear is the year of your oldest order. python amazon.py --username=foo --password=bar --firstyear=2004
# example output (json) [ { 'date': u'November 26, 2007', 'image': u'https://images-na.ssl-images-amazon.com/images/I/517SDCCC3KL._SX100_.jpg', 'link': u'http://www.amazon.com/gp/product/0847827852/ref=oss_product', 'title': u'Chip Kidd: Book One: Work: 1986-2006 (Chip Kidd)'}, { 'date': u'November 23, 2007', 'image': u'https://images-na.ssl-images-amazon.com/images/I/118MVgvXzoL._SX100_.jpg', 'link': u'http://www.amazon.com/gp/product/B000OUP9NE/ref=oss_product', 'title': u'Canon Black Ink Cartridge - PGI5'}, { 'date': u'November 23, 2007', 'image': u'https://images-na.ssl-images-amazon.com/images/I/41sW%2B3x4VJL._SY100_.jpg', 'link': u'http://www.amazon.com/gp/product/B000BUWNH2/ref=oss_product', 'title': u'Canon CLI-8 4-Color Multipack Ink Tanks'}, { 'date': u'November 22, 2007', 'image': u'https://images-na.ssl-images-amazon.com/images/I/51YT5W1SEeL._SY100_.jpg', 'link': u'http://www.amazon.com/gp/product/B0002F5E0E/ref=oss_product', 'title': u'Meinl Kenny Aronoff Steel Bell Series Cowbell, 8 Inches'}, ...
Please, drop me a comment if you have any bug fixes or interesting use cases for this code.