Browse Source

renamed test file to schedule.html

partial
Raphael Roberts 7 years ago
parent
commit
510a8ee489
  1. 0
      schedule.html
  2. 13
      scraper.py

Student Detail Schedule.html → schedule.html

13
scraper.py

@ -19,22 +19,21 @@ def datetime2date_time(dtime,mode):
def seconds_from_midnight(t):
return t.hour*60**2+ t.minute*60+t.second
class Class:
def __init__(self,title,abrv,session,days,location,time_range,date_range,lab):
def __init__(self,title,abrv,session,days,location,time_range,date_range,lab=None):
self.title = title
self.abrv = abrv
self.session = session
self.days = days
self.location = location
self.time_range = time_range
self.lab = None
self.lab = lab
self.date_range = date_range
# data is a list of two html tables
def scrape(self,data):
info,times = data
# info
title,abrv,sesession = info.find('caption').text.split(' - ')
self.lab = None
self.session = int(self.session)
session = int(self.session)
rows = info.find_all('tr')
for row in rows:
name = row.find('th').text.rstrip(':')
@ -64,11 +63,13 @@ class Class:
headers = (header.text.lower() for header in headers.find_all('th'))
if len(data) > 1:
data,lab = map(lambda row: parse_horz_row(headers,row),data[:2])
lab = Class(title + " - Lab",abrv,session,**lab)
else
else:
lab = None
data = data[0]
return Class(title,abrv,session,lab=lab,**data)
def parse_horz_row(headers,row):
data = (col.text for col in row.find_all('td'))
@ -109,5 +110,5 @@ def get_classes(page):
if __name__ == "__main__":
with open('schedule.html') as file:
page = BS(file.read(),'lxml')
page = BS(file.read())
class1,*classes = get_classes(page)
Loading…
Cancel
Save