Browse Source

starting to flush out instance from scraper method

partial
Raphael Roberts 7 years ago
parent
commit
cda0a97959
  1. 13
      scraper.py

13
scraper.py

@ -19,8 +19,9 @@ def datetime2date_time(dtime,mode):
def seconds_from_midnight(t): def seconds_from_midnight(t):
return t.hour*60**2+ t.minute*60+t.second return t.hour*60**2+ t.minute*60+t.second
class Class: class Class:
def __init__(self,title,session,days,location,time_range,date_range):
def __init__(self,title,abrv,session,days,location,time_range,date_range,lab):
self.title = title self.title = title
self.abrv = abrv
self.session = session self.session = session
self.days = days self.days = days
self.location = location self.location = location
@ -31,7 +32,7 @@ class Class:
def scrape(self,data): def scrape(self,data):
info,times = data info,times = data
# info # info
self.title,self.abrv,self.session = info.find('caption').text.split(' - ')
title,abrv,sesession = info.find('caption').text.split(' - ')
self.lab = None self.lab = None
self.session = int(self.session) self.session = int(self.session)
rows = info.find_all('tr') rows = info.find_all('tr')
@ -60,15 +61,17 @@ class Class:
# time # time
headers,*data = times.find_all('tr') headers,*data = times.find_all('tr')
headers = (header.text.lower() for header in headers.find_all('th'))
if len(data) > 1: if len(data) > 1:
data,lab = data[:2]
data,lab = map(lambda row: parse_horz_row(headers,row),data[:2])
else else
lab = None lab = None
data = data[0] data = data[0]
data = (col.text for col in data.find_all('td'))
headers = (header.text.lower() for header in headers.find_all('th'))
def parse_horz_row(headers,row): def parse_horz_row(headers,row):
data = (col.text for col in row.find_all('td'))
ret = {} ret = {}
time_data = dict(zip(headers,data)) time_data = dict(zip(headers,data))
if time_data['time'] == 'TBA': if time_data['time'] == 'TBA':

Loading…
Cancel
Save