From a1799f5a61b5ed420a3a8a2e3bc1d316f73dab1c Mon Sep 17 00:00:00 2001 From: Raphael Roberts Date: Sat, 1 Sep 2018 12:34:58 -0500 Subject: [PATCH] Initial commit --- .gitignore | 3 ++ body_create.py | 59 +++++++++++++++++++++++ gcalendar.py | 128 +++++++++++++++++++++++++++++++++++++++++++++++++ get_classes.py | 46 ++++++++++++++++++ scraper.py | 88 ++++++++++++++++++++++++++++++++++ 5 files changed, 324 insertions(+) create mode 100644 .gitignore create mode 100644 body_create.py create mode 100644 gcalendar.py create mode 100644 get_classes.py create mode 100644 scraper.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..15c7049 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +*.pkl +__pycache__ +/api_info \ No newline at end of file diff --git a/body_create.py b/body_create.py new file mode 100644 index 0000000..e85eae6 --- /dev/null +++ b/body_create.py @@ -0,0 +1,59 @@ +from dateutil import rrule +import datetime +import pickle +# event = { + # 'summary': 'Google I/O 2015', + # 'location': '800 Howard St., San Francisco, CA 94103', + # 'description': 'A chance to hear more about Google\'s developer products.', + # 'start': { + # 'dateTime': '2015-05-28T09:00:00-07:00', + # 'timeZone': 'America/Los_Angeles', + # }, + # 'end': { + # 'dateTime': '2015-05-28T17:00:00-07:00', + # 'timeZone': 'America/Los_Angeles', + # }, + # 'recurrence': [ + # 'RRULE:FREQ=DAILY;COUNT=2' + # ], + # 'attendees': [ + # {'email': 'lpage@example.com'}, + # {'email': 'sbrin@example.com'}, + # ], + # 'reminders': { + # 'useDefault': False, + # 'overrides': [ + # {'method': 'email', 'minutes': 24 * 60}, + # {'method': 'popup', 'minutes': 10}, + # ], + # }, +# } + +def rrule_former(class_obj): + days = class_obj.days + start =datetime.datetime.combine(class_obj.date_range[0],class_obj.time_range[0]).astimezone() + end =datetime.datetime.combine(class_obj.date_range[1],class_obj.time_range[1]).astimezone() + + days = list(map(lambda day: (day -1) % 7,days)) + ret = rrule.rrule(freq=rrule.WEEKLY,dtstart=start,wkst=rrule.SU,until=end,byweekday=days) + return ret + + + +def test(): + #test + now = datetime.datetime.now() + from munch import Munch + test_obj = Munch( + days=[1,3,5], + time_range=[ + now.time(), + (now+datetime.timedelta(seconds=50*60)).time() + ], + date_range=[ + now.date(), + (now+datetime.timedelta(days=20)).date() + ], + ) + test_result = rrule_former(test_obj) + return locals() \ No newline at end of file diff --git a/gcalendar.py b/gcalendar.py new file mode 100644 index 0000000..94885ee --- /dev/null +++ b/gcalendar.py @@ -0,0 +1,128 @@ +# from __future__ import print_function +from apiclient import discovery +import datetime +from googleapiclient.discovery import build +from googleapiclient.errors import HttpError + +from oauth2client import client +from oauth2client import tools +from oauth2client.file import Storage +from oauth2client.service_account import ServiceAccountCredentials + +import httplib2 +import os +import pytz +import sys +import argparse +import tzlocal +flags = argparse.ArgumentParser(parents=[tools.argparser]).parse_args() +dt_fmt = '%Y-%m-%dT%H:%M:%S' +APPLICATION_NAME = 'Google Calendar API Python' +def dateTime(datetime): + if not datetime.tzinfo: + datetime = datetime.astimezone() + zone = tzlocal.get_localzone().zone + datetime = datetime.isoformat(timespec='seconds') + return { + "timeZone":zone, + "dateTime":datetime, + } + +class api: + def __init__(self,client_secret_file,credentials_dir,scopes = 'https://www.googleapis.com/auth/calendar'): + self.client_secret_file = client_secret_file + self.credentials_dir = credentials_dir + self.scopes = scopes + self._service = None + self._service_settime = None + self.calendars=self.get_calendars() + self.ids = dict((calendar['summary'].lower(),calendar['id']) for calendar in self.calendars) + + def get_credentials(self): + + credential_path = os.path.join(self.credentials_dir, + 'token.json') + + store = Storage(credential_path) + credentials = store.get() + if not credentials or credentials.invalid: + flow = client.flow_from_clientsecrets(self.client_secret_file, self.scopes) + flow.user_agent = APPLICATION_NAME + if flags: + credentials = tools.run_flow(flow, store, flags) + else: # Needed only for compatibility with Python 2.6 + credentials = tools.run(flow, store) + print('Storing credentials to ' + credential_path) + return credentials + + def build_service(self): + credentials = self.get_credentials() + + http = credentials.authorize(httplib2.Http()) + + service = build('calendar', 'v3', http=http, cache_discovery=False) + return service + + def _needs_renewal(self): + now = datetime.datetime.today() + if self._service_settime: + return (now - self._service_settime) > datetime.timedelta(seconds = 60**2) + else: + return True + # elif + @property + def service(self): + if self._needs_renewal(): + service = self.build_service() + self._service = service + self._service_settime = datetime.datetime.today() + return service + else: + return self._service + + + def create_event(self, calendar_id, body): + + service = self.service + event = service.events().insert(calendarId=calendar_id, body=body).execute() + return event['id'] + + def update_event(self,calendar_id, event_id, body): + service = self.service + try: + event = service.events().get(calendarId=calendar_id, eventId=event_id).execute() + except HttpError as e: + if e.resp.status==404: + return self.create_event(calendar_id, body) + + updated_event = service.events().update(calendarId=calendar_id, eventId=event['id'], body=body).execute() + return updated_event["id"] + + def get_calendars(self): + page_token = None + cl = [] + while True: + calendar_list = self.service.calendarList().list(pageToken=page_token).execute() + cl += list(calendar_list_entry for calendar_list_entry in calendar_list['items']) + page_token = calendar_list.get('nextPageToken') + if not page_token: + break + return cl + + def get_events(self,id): + service = self.service + try: + id = self.ids[id] + except KeyError: + pass + page_token = None + ret = [] + while True: + events = service.events().list(calendarId='primary', pageToken=page_token).execute() + ret += events['items'] + page_token = events.get('nextPageToken') + if not page_token: + break + return ret +# if __name__ == "__main__": + # test = api(r"X:\Users\Ralphie\Downloads\client_secret.json",r"X:\Users\Ralphie\Downloads") diff --git a/get_classes.py b/get_classes.py new file mode 100644 index 0000000..2e84b0f --- /dev/null +++ b/get_classes.py @@ -0,0 +1,46 @@ +from pyppeteer import launch +import asyncio +import time +import scraper +set_semester = "document.getElementsByName('term_in')[0].selectedIndex = 0" +xpaths = { + 'tab':".//a[text()='Current Student']", + 'schedule':".//a[text()='Student Detail Schedule']", + 'submit':"//input[@value='Submit']", + 'frame':"//frame[@src='/cp/ip/login?sys=sctssb&url=https://ssb.neiu.edu/mercury_neiuprod/bwskfshd.P_CrseSchdDetl']" + } +async def xpath_single_element(xpath,page): + await page.waitForXPath(xpath) + elements = await page.xpath(xpath) + return elements[0] +async def main_loop(login): + browser = await launch(headless = False) + page_list = await browser.pages() + page = page_list[0] + r = await page.goto('https://neiuport.neiu.edu/cp/home/displaylogin') + await page.evaluate(login) + await page.waitFor('#tab') + student_tab = await xpath_single_element(xpaths['tab'],page) + await student_tab.click() + await page.waitForXPath(xpaths['schedule']) + schedule = await xpath_single_element(xpaths['schedule'],page) + await schedule.click() + page.waitForXPath(xpaths['frame']) + await asyncio.sleep(3) + frame = page.frames[-1] + submit= await xpath_single_element(xpaths['submit'],frame) + await submit.click() + await asyncio.sleep(1) + content = await page.frames[-1].content() + await browser.close() + return scraper.get_classes(content) + +def get_classes(user,password): + login = """document.getElementById('user').value='{}' + document.getElementById('pass').value='{}' + login()""".format(user,password) + loop = asyncio.get_event_loop() + r = loop.run_until_complete + return r(main_loop(login)) +if __name__ == "__main__": + cl = get_classes('rlroberts5','YxmZZ905p0w6') diff --git a/scraper.py b/scraper.py new file mode 100644 index 0000000..a0f95a4 --- /dev/null +++ b/scraper.py @@ -0,0 +1,88 @@ +from bs4 import BeautifulSoup as BS +import datetime +import re +from operator import sub +def dateparse(datetime_str): + date = '%b %d, %Y' + time = '%I:%M %p' + try: + return datetime.datetime.strptime(datetime_str,date) + except ValueError: + return datetime.datetime.strptime(datetime_str,time) +days = [None,'M','T','W','R','F',None] +simp_exceptions = ['Grade Mode'] +def datetime2date_time(dtime,mode): + if mode == 'date': + return datetime.date(dtime.year,dtime.month,dtime.day) + elif mode == 'time': + return datetime.time(dtime.hour,dtime.minute,dtime.second) +def seconds_from_midnight(t): + return t.hour*60**2+ t.minute*60+t.second +class Class: + def __init__(self,data): + info,times = data + #info + self.title,self.abrv,self.session = info.find('caption').text.split(' - ') + self.session = int(self.session) + rows = info.find_all('tr') + for row in rows: + name = row.find('th').text.rstrip(':') + data = re.sub(r'^ +|[\n\r\t]','',row.find('td').text) + + if name == 'Status': + type,date = data.split(' on ') + type = type.replace('*','') + self.type = type + self.registration_date = dateparse(date) + else: + if name in simp_exceptions: + name = name.lower().replace(' ','_') + else: + name = name.lower().split(' ')[-1] + if name != 'instructor': + data = data.lower() + try: + data = int(re.sub(r'\.\d+','',data)) + except: + + pass + self.__dict__[name] = data + + #time + headers,data = times.find_all('tr') + data = (col.text for col in data.find_all('td')) + headers = (header.text.lower() for header in headers.find_all('th')) + time_data = dict(zip(headers,data)) + if time_data['time'] == 'TBA': + self.time_range = None + else: + s,e = map(dateparse,time_data['time'].split(' - ')) + self.time_range = ( + datetime2date_time(s,'time'), + datetime2date_time(e,'time'), + ) + s,e = map(dateparse,time_data['date range'].split(' - ')) + self.date_range = ( + datetime2date_time(s,'date'), + datetime2date_time(e,'date'), + ) + time_data['days'] = re.sub('[^{}]'.format(''.join(filter(bool,days))),'',time_data['days']) + self.days = list(days.index(time_data['days'][i]) for i in range(len(time_data['days']))) + self.location = time_data['where'] + @property + def length(self): + return datetime.timedelta(seconds = sub( + seconds_from_midnight(self.time_range[1]), + seconds_from_midnight(self.time_range[0]), + )) +def get_classes(page): + if not isinstance(page,BS): + page = BS(page,'lxml') + tables = page.find_all('table',attrs= {'class':'datadisplaytable'}) + groups = ((tables[i],tables[i+1]) for i in range(0,len(tables),2)) + return list(map(Class,groups)) + +if __name__ == "__main__": + with open('schedule.html') as file: + page = BS(file.read(),'lxml') + class1,*classes = get_classes(page) \ No newline at end of file