import urllib2 from bs4 import BeautifulSoup # Create / open a file called wunderdata.txt which will be a CSVfile f = open('wunderdata.txt', 'w') # Iterate through months and day for m in range(1, 13): for d in range(1,32): # Check if already processed all days in the month if (m == 2 and d> 28): break elif (m in[4, 6, 9, 11] and d > 30): break # Open wunderground.com url timestamp = '2009' + str(m) + str(d) print "Getting data for " + timestamp url = "http://www.wunderground.com/history/airport/KBUF/2009/" + str(m) + "/" + str(d) + "/DailyHistory.html" page = urllib2.urlopen(url) # Get temperature from page soup = BeautifulSoup(page, "html.parser") #the following two lines print every tag found #for tag in soup.find_all(True): # print(tag.name) #the following two lines are the original (textbook) and first attempt to fix # dayTemp = soup.body.wx-value.b.string # dayTemp = soup.findAll(attrs={"class":"wx-value"})[6].span.string #Indexes get the following data #[0] Mean Actual Temp [1] Mean Average Temp #[2] Max Actual Temp [3] Max Average Temp [4] Max Record Temp #[5] Min Actual Temp [6] Min Average Temp [7] Min Record Temp dayTemp = soup.findAll(attrs={"class":"wx-value"})[0].get_text() # Format month for timestamp if len(str(m)) < 2: mStamp = '0' + str(m) else: mStamp = str(m) # Format day for timestamp if len(str(d)) < 2: dStamp = '0' + str(d) else: dStamp = str(d) # Build timestamp timestamp = '2009' + mStamp + dStamp # Write timestamp and temperature to file f.write(timestamp + ',' + dayTemp + '\n') # Done getting data! Close file. f.close()