import urllib.request from bs4 import BeautifulSoup inputpage = urllib.request.urlopen("http://gnats.netbsd.org/summary/year/2012-perf.html") page = inputpage.read() soup = BeautifulSoup(page) names = [] cdict = {0:[], 1:[]} # dictionary of "td positions to contents" tables = soup.findAll('table') for tt in tables[1].find_all('tr')[1:]: # skip first