Ever wanted to download your Amazon order history? Maybe you want to get it into a spreadsheet, or just keep it around in case Amazon decides to delete this information. Here is some Python code to screen scrape your account pages.

import sys
from optparse import OptionParser
import datetime
import mechanize
from BeautifulSoup import BeautifulSoup
import pprint

"""
Usage: python amazon.py --username=foo --password=bar --firstyear=2004
"""

def getOptions():
   arguments = OptionParser()
   arguments.add_options(["--username", "--password", "--firstyear"])
   return arguments.parse_args()[0]

def _text(node):
      return "".join([unicode(s) for s in node.contents]).strip()

def _parse_orders(html):
      soup = BeautifulSoup(html)
      orders = []
      for order in soup.findAll("div", {"class": "order"}):
            date = order.find("h2")
            for item in order.findAll("li", {"class": "item "}):
                  title = item.find("span", {"class": "item-title"})
                  link = item.find("a")
                  image = item.find("img")
                  orders.append({
                        "date": _text(date),
                        "title": _text(title),
                        "link": link["href"],
                        "image": image["src"]
                  })
      return orders

if __name__ == '__main__':

      options = getOptions()

      br = mechanize.Browser()
      br.set_handle_robots(False)
      br.addheaders = [("User-agent", "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.2.13) Gecko/20101206 Ubuntu/10.10 (maverick) Firefox/3.6.13")]

      sign_in = br.open("http://www.amazon.com/gp/flex/sign-out.html")

      br.select_form(name="sign-in")
      br["email"] = options.username
      br["password"] = options.password
      logged_in = br.submit()

      error_str = "The e-mail address and password you entered do not match any accounts on record."
      if error_str in logged_in.read():
            print error_str
            sys.exit(1)

      orders = []
      for year in range(int(options.firstyear),  datetime.datetime.now().year):
            orders_html = br.open("https://www.amazon.com/gp/css/history/orders/view.html?orderFilter=year-%s&startAtIndex=1000" % year)
            new_orders = _parse_orders(orders_html.read())
            if new_orders:
                  orders.append(new_orders)

      if len(orders) == 0:
            print "No orders found."
            sys.exit(1)

      pp = pprint.PrettyPrinter(indent=4)
      pp.pprint(orders)

# usage example, your username and password are your Amazon login. The firstyear is the year of your oldest order.
python amazon.py --username=foo --password=bar --firstyear=2004
# example output (json)
    [   {   'date': u'November 26, 2007',
            'image': u'https://images-na.ssl-images-amazon.com/images/I/517SDCCC3KL._SX100_.jpg',
            'link': u'http://www.amazon.com/gp/product/0847827852/ref=oss_product',
            'title': u'Chip Kidd: Book One: Work: 1986-2006 (Chip Kidd)'},
        {   'date': u'November 23, 2007',
            'image': u'https://images-na.ssl-images-amazon.com/images/I/118MVgvXzoL._SX100_.jpg',
            'link': u'http://www.amazon.com/gp/product/B000OUP9NE/ref=oss_product',
            'title': u'Canon Black Ink Cartridge - PGI5'},
        {   'date': u'November 23, 2007',
            'image': u'https://images-na.ssl-images-amazon.com/images/I/41sW%2B3x4VJL._SY100_.jpg',
            'link': u'http://www.amazon.com/gp/product/B000BUWNH2/ref=oss_product',
            'title': u'Canon CLI-8 4-Color Multipack Ink Tanks'},
        {   'date': u'November 22, 2007',
            'image': u'https://images-na.ssl-images-amazon.com/images/I/51YT5W1SEeL._SY100_.jpg',
            'link': u'http://www.amazon.com/gp/product/B0002F5E0E/ref=oss_product',
            'title': u'Meinl Kenny Aronoff Steel Bell Series Cowbell,  8 Inches'},

        ...

Please, drop me a comment if you have any bug fixes or interesting use cases for this code.