import urllib2
from bs4 import BeautifulSoup

# Create / open a file called wunderdata.txt which will be a CSVfile
f = open('wunderdata.txt', 'w')

# Iterate through months and day
for m in range(1, 13):
	for d in range(1,32):
		# Check if already processed all days in the month
		if (m == 2 and d> 28):
			break
		elif (m in[4, 6, 9, 11] and d > 30):
			break
			
		# Open wunderground.com url
		timestamp = '2009' + str(m) + str(d)
		print "Getting data for " + timestamp
		url = "http://www.wunderground.com/history/airport/KBUF/2009/" + str(m) + "/" + str(d) + "/DailyHistory.html"
		page = urllib2.urlopen(url)
		
		# Get temperature from page
		soup = BeautifulSoup(page, "html.parser")
		
		#the following two lines print every tag found
		#for tag in soup.find_all(True):
		#	print(tag.name)
		
		#the following two lines are the original (textbook) and first attempt to fix
		# dayTemp = soup.body.wx-value.b.string
		# dayTemp = soup.findAll(attrs={"class":"wx-value"})[6].span.string
		#Indexes get the following data
		#[0] Mean Actual Temp    [1] Mean Average Temp
		#[2] Max Actual Temp     [3] Max Average Temp     [4] Max Record Temp
		#[5] Min Actual Temp     [6] Min Average Temp     [7] Min Record Temp
		dayTemp = soup.findAll(attrs={"class":"wx-value"})[0].get_text()
	
		
		# Format month for timestamp
		if len(str(m)) < 2:
			mStamp = '0' + str(m)
		else:
			mStamp = str(m)
			
		# Format day for timestamp
		if len(str(d)) < 2:
			dStamp = '0' + str(d)
		else:
			dStamp = str(d)
			
		# Build timestamp
		timestamp = '2009' + mStamp + dStamp
		
		# Write timestamp and temperature to file
		f.write(timestamp + ',' + dayTemp + '\n')
		
# Done getting data! Close file.
f.close()