6 changed files with 261 additions and 261 deletions
-
6Student Detail Schedule.html
-
172body_create.py
-
8gcalendar.py
-
92get_classes.py
-
24main.py
-
220scraper.py
@ -1,87 +1,87 @@ |
|||||
from dateutil import rrule |
|
||||
from gcalendar import dateTime |
|
||||
import datetime |
|
||||
import pickle |
|
||||
import json |
|
||||
import re |
|
||||
LOCATION = "5500 St Louis Ave, Chicago, IL 60625" |
|
||||
event = { |
|
||||
'summary': 'Google I/O 2015', |
|
||||
'location': '800 Howard St., San Francisco, CA 94103', |
|
||||
'description': 'A chance to hear more about Google\'s developer products.', |
|
||||
'start': { |
|
||||
'dateTime': '2015-05-28T09:00:00-07:00', |
|
||||
'timeZone': 'America/Los_Angeles', |
|
||||
}, |
|
||||
'end': { |
|
||||
'dateTime': '2015-05-28T17:00:00-07:00', |
|
||||
'timeZone': 'America/Los_Angeles', |
|
||||
}, |
|
||||
'recurrence': [ |
|
||||
'RRULE:FREQ=DAILY;COUNT=2' |
|
||||
], |
|
||||
'attendees': [ |
|
||||
{'email': 'lpage@example.com'}, |
|
||||
{'email': 'sbrin@example.com'}, |
|
||||
], |
|
||||
'reminders': { |
|
||||
'useDefault': False, |
|
||||
'overrides': [ |
|
||||
{'method': 'email', 'minutes': 24 * 60}, |
|
||||
{'method': 'popup', 'minutes': 10}, |
|
||||
], |
|
||||
}, |
|
||||
} |
|
||||
|
|
||||
def rrule_former(class_obj): |
|
||||
days = class_obj.days |
|
||||
start =datetime.datetime.combine(class_obj.date_range[0],class_obj.time_range[0]).astimezone() |
|
||||
end =datetime.datetime.combine(class_obj.date_range[1],class_obj.time_range[1]).astimezone() |
|
||||
|
|
||||
days = [ (day -1) % 7 for day in days] |
|
||||
ret = rrule.rrule(freq=rrule.WEEKLY,dtstart=start,wkst=rrule.SU,until=end,byweekday=days) |
|
||||
ret_str = str(ret).split('\n')[-1] |
|
||||
ret_str=re.sub(r'(UNTIL=[^;]+)',r'\1Z',ret_str) |
|
||||
return 'RRULE:{}'.format(ret_str) |
|
||||
def create_body(_class): |
|
||||
if _class.time_range: |
|
||||
body = { |
|
||||
# "kind": "calendar#event", |
|
||||
} |
|
||||
body['recurrence'] = [rrule_former(_class)] |
|
||||
body['start'] = dateTime(datetime.datetime.combine(_class.date_range[0],_class.time_range[0])) |
|
||||
body['end'] = dateTime(datetime.datetime.combine(_class.date_range[0],_class.time_range[1])) |
|
||||
body['summary'] = _class.title |
|
||||
body['description'] = 'location: {}'.format(_class.location) |
|
||||
body['location'] = LOCATION |
|
||||
body['reminders'] = {'useDefault':True} |
|
||||
return body |
|
||||
def json_dump(obj): |
|
||||
with open('classes.json','w') as file: |
|
||||
json.dump(obj,file) |
|
||||
def test_rrule(): |
|
||||
#test |
|
||||
now = datetime.datetime.now() |
|
||||
from munch import Munch |
|
||||
test_obj = Munch( |
|
||||
days=[1,3,5], |
|
||||
time_range=[ |
|
||||
now.time(), |
|
||||
(now+datetime.timedelta(seconds=50*60)).time() |
|
||||
], |
|
||||
date_range=[ |
|
||||
now.date(), |
|
||||
(now+datetime.timedelta(days=20)).date() |
|
||||
], |
|
||||
) |
|
||||
test_result = rrule_former(test_obj) |
|
||||
return locals() |
|
||||
|
|
||||
def test_class2body(): |
|
||||
with open('classes.pkl','rb') as file: |
|
||||
classes = pickle.load(file) |
|
||||
test_result = list(filter(bool,map(create_body,classes))) |
|
||||
return test_result |
|
||||
|
|
||||
if __name__ == "__main__": |
|
||||
|
from dateutil import rrule |
||||
|
from gcalendar import dateTime |
||||
|
import datetime |
||||
|
import pickle |
||||
|
import json |
||||
|
import re |
||||
|
LOCATION = "5500 St Louis Ave, Chicago, IL 60625" |
||||
|
event = { |
||||
|
'summary': 'Google I/O 2015', |
||||
|
'location': '800 Howard St., San Francisco, CA 94103', |
||||
|
'description': 'A chance to hear more about Google\'s developer products.', |
||||
|
'start': { |
||||
|
'dateTime': '2015-05-28T09:00:00-07:00', |
||||
|
'timeZone': 'America/Los_Angeles', |
||||
|
}, |
||||
|
'end': { |
||||
|
'dateTime': '2015-05-28T17:00:00-07:00', |
||||
|
'timeZone': 'America/Los_Angeles', |
||||
|
}, |
||||
|
'recurrence': [ |
||||
|
'RRULE:FREQ=DAILY;COUNT=2' |
||||
|
], |
||||
|
'attendees': [ |
||||
|
{'email': 'lpage@example.com'}, |
||||
|
{'email': 'sbrin@example.com'}, |
||||
|
], |
||||
|
'reminders': { |
||||
|
'useDefault': False, |
||||
|
'overrides': [ |
||||
|
{'method': 'email', 'minutes': 24 * 60}, |
||||
|
{'method': 'popup', 'minutes': 10}, |
||||
|
], |
||||
|
}, |
||||
|
} |
||||
|
|
||||
|
def rrule_former(class_obj): |
||||
|
days = class_obj.days |
||||
|
start =datetime.datetime.combine(class_obj.date_range[0],class_obj.time_range[0]).astimezone() |
||||
|
end =datetime.datetime.combine(class_obj.date_range[1],class_obj.time_range[1]).astimezone() |
||||
|
|
||||
|
days = [ (day -1) % 7 for day in days] |
||||
|
ret = rrule.rrule(freq=rrule.WEEKLY,dtstart=start,wkst=rrule.SU,until=end,byweekday=days) |
||||
|
ret_str = str(ret).split('\n')[-1] |
||||
|
ret_str=re.sub(r'(UNTIL=[^;]+)',r'\1Z',ret_str) |
||||
|
return 'RRULE:{}'.format(ret_str) |
||||
|
def create_body(_class): |
||||
|
if _class.time_range: |
||||
|
body = { |
||||
|
# "kind": "calendar#event", |
||||
|
} |
||||
|
body['recurrence'] = [rrule_former(_class)] |
||||
|
body['start'] = dateTime(datetime.datetime.combine(_class.date_range[0],_class.time_range[0])) |
||||
|
body['end'] = dateTime(datetime.datetime.combine(_class.date_range[0],_class.time_range[1])) |
||||
|
body['summary'] = _class.title |
||||
|
body['description'] = 'location: {}'.format(_class.location) |
||||
|
body['location'] = LOCATION |
||||
|
body['reminders'] = {'useDefault':True} |
||||
|
return body |
||||
|
def json_dump(obj): |
||||
|
with open('classes.json','w') as file: |
||||
|
json.dump(obj,file) |
||||
|
def test_rrule(): |
||||
|
#test |
||||
|
now = datetime.datetime.now() |
||||
|
from munch import Munch |
||||
|
test_obj = Munch( |
||||
|
days=[1,3,5], |
||||
|
time_range=[ |
||||
|
now.time(), |
||||
|
(now+datetime.timedelta(seconds=50*60)).time() |
||||
|
], |
||||
|
date_range=[ |
||||
|
now.date(), |
||||
|
(now+datetime.timedelta(days=20)).date() |
||||
|
], |
||||
|
) |
||||
|
test_result = rrule_former(test_obj) |
||||
|
return locals() |
||||
|
|
||||
|
def test_class2body(): |
||||
|
with open('classes.pkl','rb') as file: |
||||
|
classes = pickle.load(file) |
||||
|
test_result = list(filter(bool,map(create_body,classes))) |
||||
|
return test_result |
||||
|
|
||||
|
if __name__ == "__main__": |
||||
json_dump(test_class2body()) |
json_dump(test_class2body()) |
||||
@ -1,5 +1,5 @@ |
|||||
import os |
|
||||
import sys |
|
||||
parent = os.path.dirname(__file__) |
|
||||
sys.path.insert(0,os.path.join(parent,'google_api_wrapper')) |
|
||||
|
import os |
||||
|
import sys |
||||
|
parent = os.path.dirname(__file__) |
||||
|
sys.path.insert(0,os.path.join(parent,'google_api_wrapper')) |
||||
from gapi.calendar_api import * |
from gapi.calendar_api import * |
||||
@ -1,46 +1,46 @@ |
|||||
from pyppeteer import launch |
|
||||
import asyncio |
|
||||
import time |
|
||||
import scraper |
|
||||
set_semester = "document.getElementsByName('term_in')[0].selectedIndex = 0" |
|
||||
xpaths = { |
|
||||
'tab':".//a[text()='Current Student']", |
|
||||
'schedule':".//a[text()='Student Detail Schedule']", |
|
||||
'submit':"//input[@value='Submit']", |
|
||||
'frame':"//frame[@src='/cp/ip/login?sys=sctssb&url=https://ssb.neiu.edu/mercury_neiuprod/bwskfshd.P_CrseSchdDetl']" |
|
||||
} |
|
||||
async def xpath_single_element(xpath,page): |
|
||||
await page.waitForXPath(xpath) |
|
||||
elements = await page.xpath(xpath) |
|
||||
return elements[0] |
|
||||
async def main_loop(login): |
|
||||
browser = await launch(headless = False) |
|
||||
page_list = await browser.pages() |
|
||||
page = page_list[0] |
|
||||
r = await page.goto('https://neiuport.neiu.edu/cp/home/displaylogin') |
|
||||
await page.evaluate(login) |
|
||||
await page.waitFor('#tab') |
|
||||
student_tab = await xpath_single_element(xpaths['tab'],page) |
|
||||
await student_tab.click() |
|
||||
await page.waitForXPath(xpaths['schedule']) |
|
||||
schedule = await xpath_single_element(xpaths['schedule'],page) |
|
||||
await schedule.click() |
|
||||
page.waitForXPath(xpaths['frame']) |
|
||||
await asyncio.sleep(3) |
|
||||
frame = page.frames[-1] |
|
||||
submit= await xpath_single_element(xpaths['submit'],frame) |
|
||||
await submit.click() |
|
||||
await asyncio.sleep(1) |
|
||||
content = await page.frames[-1].content() |
|
||||
await browser.close() |
|
||||
return scraper.get_classes(content) |
|
||||
|
|
||||
def get_classes(user,password): |
|
||||
login = """document.getElementById('user').value='{}' |
|
||||
document.getElementById('pass').value='{}' |
|
||||
login()""".format(user,password) |
|
||||
loop = asyncio.get_event_loop() |
|
||||
r = loop.run_until_complete |
|
||||
return r(main_loop(login)) |
|
||||
if __name__ == "__main__": |
|
||||
cl = get_classes('rlroberts5','YxmZZ905p0w6') |
|
||||
|
from pyppeteer import launch |
||||
|
import asyncio |
||||
|
import time |
||||
|
import scraper |
||||
|
set_semester = "document.getElementsByName('term_in')[0].selectedIndex = 0" |
||||
|
xpaths = { |
||||
|
'tab':".//a[text()='Current Student']", |
||||
|
'schedule':".//a[text()='Student Detail Schedule']", |
||||
|
'submit':"//input[@value='Submit']", |
||||
|
'frame':"//frame[@src='/cp/ip/login?sys=sctssb&url=https://ssb.neiu.edu/mercury_neiuprod/bwskfshd.P_CrseSchdDetl']" |
||||
|
} |
||||
|
async def xpath_single_element(xpath,page): |
||||
|
await page.waitForXPath(xpath) |
||||
|
elements = await page.xpath(xpath) |
||||
|
return elements[0] |
||||
|
async def main_loop(login): |
||||
|
browser = await launch(headless = False) |
||||
|
page_list = await browser.pages() |
||||
|
page = page_list[0] |
||||
|
r = await page.goto('https://neiuport.neiu.edu/cp/home/displaylogin') |
||||
|
await page.evaluate(login) |
||||
|
await page.waitFor('#tab') |
||||
|
student_tab = await xpath_single_element(xpaths['tab'],page) |
||||
|
await student_tab.click() |
||||
|
await page.waitForXPath(xpaths['schedule']) |
||||
|
schedule = await xpath_single_element(xpaths['schedule'],page) |
||||
|
await schedule.click() |
||||
|
page.waitForXPath(xpaths['frame']) |
||||
|
await asyncio.sleep(3) |
||||
|
frame = page.frames[-1] |
||||
|
submit= await xpath_single_element(xpaths['submit'],frame) |
||||
|
await submit.click() |
||||
|
await asyncio.sleep(1) |
||||
|
content = await page.frames[-1].content() |
||||
|
await browser.close() |
||||
|
return scraper.get_classes(content) |
||||
|
|
||||
|
def get_classes(user,password): |
||||
|
login = """document.getElementById('user').value='{}' |
||||
|
document.getElementById('pass').value='{}' |
||||
|
login()""".format(user,password) |
||||
|
loop = asyncio.get_event_loop() |
||||
|
r = loop.run_until_complete |
||||
|
return r(main_loop(login)) |
||||
|
if __name__ == "__main__": |
||||
|
cl = get_classes('rlroberts5','YxmZZ905p0w6') |
||||
@ -1,13 +1,13 @@ |
|||||
from gcalendar import api |
|
||||
import json |
|
||||
import pprint |
|
||||
api = api(r'api_info\client_secret.json','api_info') |
|
||||
cals = api.get_calendars() |
|
||||
cal = next(filter(lambda cal: cal['id'] == api.ids['school schedule'],cals)) |
|
||||
with open('classes.json') as file: |
|
||||
bodies = json.load(file) |
|
||||
for body in bodies: |
|
||||
# body['colorId'] = cal['colorId'] |
|
||||
# pprint.pprint(body) |
|
||||
# input() |
|
||||
|
from gcalendar import api |
||||
|
import json |
||||
|
import pprint |
||||
|
api = api(r'api_info\client_secret.json','api_info') |
||||
|
cals = api.get_calendars() |
||||
|
cal = next(filter(lambda cal: cal['id'] == api.ids['school schedule'],cals)) |
||||
|
with open('classes.json') as file: |
||||
|
bodies = json.load(file) |
||||
|
for body in bodies: |
||||
|
# body['colorId'] = cal['colorId'] |
||||
|
# pprint.pprint(body) |
||||
|
# input() |
||||
api.create_event('school schedule',body) |
api.create_event('school schedule',body) |
||||
@ -1,110 +1,110 @@ |
|||||
from bs4 import BeautifulSoup as BS |
|
||||
import datetime |
|
||||
import re |
|
||||
from operator import sub |
|
||||
def dateparse(datetime_str): |
|
||||
date = '%b %d, %Y' |
|
||||
time = '%I:%M %p' |
|
||||
try: |
|
||||
return datetime.datetime.strptime(datetime_str,date) |
|
||||
except ValueError: |
|
||||
return datetime.datetime.strptime(datetime_str,time) |
|
||||
days = [None,'M','T','W','R','F',None] |
|
||||
simp_exceptions = ['Grade Mode'] |
|
||||
def datetime2date_time(dtime,mode): |
|
||||
if mode == 'date': |
|
||||
return datetime.date(dtime.year,dtime.month,dtime.day) |
|
||||
elif mode == 'time': |
|
||||
return datetime.time(dtime.hour,dtime.minute,dtime.second) |
|
||||
def seconds_from_midnight(t): |
|
||||
return t.hour*60**2+ t.minute*60+t.second |
|
||||
class Class: |
|
||||
def __init__(self,title,session,days,location,time_range,date_range): |
|
||||
self.title = title |
|
||||
self.session = session |
|
||||
self.days = days |
|
||||
self.location = location |
|
||||
self.time_range = time_range |
|
||||
self.lab = None |
|
||||
self.date_range = date_range |
|
||||
# data is a list of two html tables |
|
||||
def scrape(self,data): |
|
||||
info,times = data |
|
||||
# info |
|
||||
self.title,self.abrv,self.session = info.find('caption').text.split(' - ') |
|
||||
self.lab = None |
|
||||
self.session = int(self.session) |
|
||||
rows = info.find_all('tr') |
|
||||
for row in rows: |
|
||||
name = row.find('th').text.rstrip(':') |
|
||||
data = re.sub(r'^ +|[\n\r\t]','',row.find('td').text) |
|
||||
|
|
||||
if name == 'Status': |
|
||||
type,date = data.split(' on ') |
|
||||
type = type.replace('*','') |
|
||||
self.type = type |
|
||||
self.registration_date = dateparse(date) |
|
||||
else: |
|
||||
if name in simp_exceptions: |
|
||||
name = name.lower().replace(' ','_') |
|
||||
else: |
|
||||
name = name.lower().split(' ')[-1] |
|
||||
if name != 'instructor': |
|
||||
data = data.lower() |
|
||||
try: |
|
||||
data = int(re.sub(r'\.\d+','',data)) |
|
||||
except: |
|
||||
|
|
||||
pass |
|
||||
self.__dict__[name] = data |
|
||||
|
|
||||
# time |
|
||||
headers,*data = times.find_all('tr') |
|
||||
if len(data) > 1: |
|
||||
data,lab = data[:2] |
|
||||
else |
|
||||
lab = None |
|
||||
data = data[0] |
|
||||
data = (col.text for col in data.find_all('td')) |
|
||||
headers = (header.text.lower() for header in headers.find_all('th')) |
|
||||
|
|
||||
def parse_horz_row(headers,row): |
|
||||
ret = {} |
|
||||
time_data = dict(zip(headers,data)) |
|
||||
if time_data['time'] == 'TBA': |
|
||||
ret['time_range'] = None |
|
||||
else: |
|
||||
s,e = map(dateparse,time_data['time'].split(' - ')) |
|
||||
ret['time_range'] = ( |
|
||||
datetime2date_time(s,'time'), |
|
||||
datetime2date_time(e,'time'), |
|
||||
) |
|
||||
s,e = map(dateparse,time_data['date range'].split(' - ')) |
|
||||
ret['date_range'] = ( |
|
||||
datetime2date_time(s,'date'), |
|
||||
datetime2date_time(e,'date'), |
|
||||
) |
|
||||
time_data['days'] = re.sub('[^{}]'.format(''.join(filter(bool,days))),'',time_data['days']) |
|
||||
ret['days'] = list(days.index(time_data['days'][i]) for i in range(len(time_data['days']))) |
|
||||
ret['location'] = time_data['where'] |
|
||||
return ret |
|
||||
|
|
||||
@property |
|
||||
def length(self): |
|
||||
return datetime.timedelta(seconds = sub( |
|
||||
seconds_from_midnight(self.time_range[1]), |
|
||||
seconds_from_midnight(self.time_range[0]), |
|
||||
)) |
|
||||
|
|
||||
|
|
||||
def get_classes(page): |
|
||||
if not isinstance(page,BS): |
|
||||
page = BS(page,'lxml') |
|
||||
tables = page.find_all('table',attrs= {'class':'datadisplaytable'}) |
|
||||
groups = ((tables[i],tables[i+1]) for i in range(0,len(tables),2)) |
|
||||
return list(map(Class.scrape,groups)) |
|
||||
|
|
||||
if __name__ == "__main__": |
|
||||
with open('schedule.html') as file: |
|
||||
page = BS(file.read(),'lxml') |
|
||||
class1,*classes = get_classes(page) |
|
||||
|
from bs4 import BeautifulSoup as BS |
||||
|
import datetime |
||||
|
import re |
||||
|
from operator import sub |
||||
|
def dateparse(datetime_str): |
||||
|
date = '%b %d, %Y' |
||||
|
time = '%I:%M %p' |
||||
|
try: |
||||
|
return datetime.datetime.strptime(datetime_str,date) |
||||
|
except ValueError: |
||||
|
return datetime.datetime.strptime(datetime_str,time) |
||||
|
days = [None,'M','T','W','R','F',None] |
||||
|
simp_exceptions = ['Grade Mode'] |
||||
|
def datetime2date_time(dtime,mode): |
||||
|
if mode == 'date': |
||||
|
return datetime.date(dtime.year,dtime.month,dtime.day) |
||||
|
elif mode == 'time': |
||||
|
return datetime.time(dtime.hour,dtime.minute,dtime.second) |
||||
|
def seconds_from_midnight(t): |
||||
|
return t.hour*60**2+ t.minute*60+t.second |
||||
|
class Class: |
||||
|
def __init__(self,title,session,days,location,time_range,date_range): |
||||
|
self.title = title |
||||
|
self.session = session |
||||
|
self.days = days |
||||
|
self.location = location |
||||
|
self.time_range = time_range |
||||
|
self.lab = None |
||||
|
self.date_range = date_range |
||||
|
# data is a list of two html tables |
||||
|
def scrape(self,data): |
||||
|
info,times = data |
||||
|
# info |
||||
|
self.title,self.abrv,self.session = info.find('caption').text.split(' - ') |
||||
|
self.lab = None |
||||
|
self.session = int(self.session) |
||||
|
rows = info.find_all('tr') |
||||
|
for row in rows: |
||||
|
name = row.find('th').text.rstrip(':') |
||||
|
data = re.sub(r'^ +|[\n\r\t]','',row.find('td').text) |
||||
|
|
||||
|
if name == 'Status': |
||||
|
type,date = data.split(' on ') |
||||
|
type = type.replace('*','') |
||||
|
self.type = type |
||||
|
self.registration_date = dateparse(date) |
||||
|
else: |
||||
|
if name in simp_exceptions: |
||||
|
name = name.lower().replace(' ','_') |
||||
|
else: |
||||
|
name = name.lower().split(' ')[-1] |
||||
|
if name != 'instructor': |
||||
|
data = data.lower() |
||||
|
try: |
||||
|
data = int(re.sub(r'\.\d+','',data)) |
||||
|
except: |
||||
|
|
||||
|
pass |
||||
|
self.__dict__[name] = data |
||||
|
|
||||
|
# time |
||||
|
headers,*data = times.find_all('tr') |
||||
|
if len(data) > 1: |
||||
|
data,lab = data[:2] |
||||
|
else |
||||
|
lab = None |
||||
|
data = data[0] |
||||
|
data = (col.text for col in data.find_all('td')) |
||||
|
headers = (header.text.lower() for header in headers.find_all('th')) |
||||
|
|
||||
|
def parse_horz_row(headers,row): |
||||
|
ret = {} |
||||
|
time_data = dict(zip(headers,data)) |
||||
|
if time_data['time'] == 'TBA': |
||||
|
ret['time_range'] = None |
||||
|
else: |
||||
|
s,e = map(dateparse,time_data['time'].split(' - ')) |
||||
|
ret['time_range'] = ( |
||||
|
datetime2date_time(s,'time'), |
||||
|
datetime2date_time(e,'time'), |
||||
|
) |
||||
|
s,e = map(dateparse,time_data['date range'].split(' - ')) |
||||
|
ret['date_range'] = ( |
||||
|
datetime2date_time(s,'date'), |
||||
|
datetime2date_time(e,'date'), |
||||
|
) |
||||
|
time_data['days'] = re.sub('[^{}]'.format(''.join(filter(bool,days))),'',time_data['days']) |
||||
|
ret['days'] = list(days.index(time_data['days'][i]) for i in range(len(time_data['days']))) |
||||
|
ret['location'] = time_data['where'] |
||||
|
return ret |
||||
|
|
||||
|
@property |
||||
|
def length(self): |
||||
|
return datetime.timedelta(seconds = sub( |
||||
|
seconds_from_midnight(self.time_range[1]), |
||||
|
seconds_from_midnight(self.time_range[0]), |
||||
|
)) |
||||
|
|
||||
|
|
||||
|
def get_classes(page): |
||||
|
if not isinstance(page,BS): |
||||
|
page = BS(page,'lxml') |
||||
|
tables = page.find_all('table',attrs= {'class':'datadisplaytable'}) |
||||
|
groups = ((tables[i],tables[i+1]) for i in range(0,len(tables),2)) |
||||
|
return list(map(Class.scrape,groups)) |
||||
|
|
||||
|
if __name__ == "__main__": |
||||
|
with open('schedule.html') as file: |
||||
|
page = BS(file.read(),'lxml') |
||||
|
class1,*classes = get_classes(page) |
||||
Write
Preview
Loading…
Cancel
Save
Reference in new issue