dos2unixed files

7 years ago · a864d7c46d
6 changed files with 261 additions and 261 deletions
--- a/Schedule.html
+++ b/Schedule.html
@ -141,8 +141,8 @@ Jan 06, 2019 07:50 pm<br>
 </DIV>
 <div class="pagebodydiv">
 <!--  ** END OF twbkwbis.P_OpenDoc **  -->
 <div class="infotextdiv"><table  CLASS="infotexttable" SUMMARY="This layout table contains information that may be helpful in understanding the content and functionality of this page.  It could be a brief set of instructions, a description of error messages, or other special information."><tr><td CLASS="indefault"><img src="/wtlgifs/web_info_cascade.png" alt="Information" CLASS="headerImg" TITLE="Information"  NAME="web_info" HSPACE=0 VSPACE=0 BORDER=0 HEIGHT=12 WIDTH=14 /></td><td CLASS="indefault"><SPAN class="infotext"> <b>Class Schedule</b><br>
 The <a href="https://www.neiu.edu/academics/registrar-services/class-schedules-and-registration"> Schedule of Classes </a> contains important information you should know including but not limited to: registering for classes, add/drop dates, semester calendars, final exam schedule, tuition/fees rates, and how to make payments or request a payment plan.  If there are questions we can answer for you, please contact the <a href="mailto:registration@neiu.edu"> Registration Office.</a><br><br>
 <div class="infotextdiv"><table  CLASS="infotexttable" SUMMARY="This layout table contains information that may be helpful in understanding the content and functionality of this page.  It could be a brief set of instructions, a description of error messages, or other special information."><tr><td CLASS="indefault"><img src="/wtlgifs/web_info_cascade.png" alt="Information" CLASS="headerImg" TITLE="Information"  NAME="web_info" HSPACE=0 VSPACE=0 BORDER=0 HEIGHT=12 WIDTH=14 /></td><td CLASS="indefault"><SPAN class="infotext"> <b>Class Schedule</b><br>
 The <a href="https://www.neiu.edu/academics/registrar-services/class-schedules-and-registration"> Schedule of Classes </a> contains important information you should know including but not limited to: registering for classes, add/drop dates, semester calendars, final exam schedule, tuition/fees rates, and how to make payments or request a payment plan.  If there are questions we can answer for you, please contact the <a href="mailto:registration@neiu.edu"> Registration Office.</a><br><br>
 Remember to review your class schedule prior to the first day of classes for possible changes in class location and times.<br><br></SPAN></td></tr></table><p></DIV>
 Total Credit Hours: 15.000
 <br />
@ -556,7 +556,7 @@ Mayo Tiana<a href="mailto:M-Tiana@neiu.edu"    target="Mayo Tiana" ><img src="/w
 </table>
 <br />
 <a href="javascript:history.go(-1)" onMouseOver="window.status='Return to Previous';  return true" onFocus="window.status='Return to Previous';  return true" onMouseOut="window.status='';  return true"onBlur="window.status='';  return true">Return to Previous</a>
 <!--  ** START OF twbkwbis.P_CloseDoc **  -->
 <table  CLASS="plaintable" SUMMARY="This is table displays line separator at end of the page."
                                             WIDTH="100%" cellSpacing=0 cellPadding=0 border=0><tr><TD class="bgtabon" width="100%" colSpan=2><img src="/wtlgifs/web_transparent.gif" alt="Transparent Image" CLASS="headerImg" TITLE="Transparent Image"  NAME="web_transparent" HSPACE=0 VSPACE=0 BORDER=0 HEIGHT=3 WIDTH=10 /></TD></tr></table>
--- a/body_create.py
+++ b/body_create.py
@ -1,87 +1,87 @@
 from dateutil import rrule
 from gcalendar import dateTime
 import datetime
 import pickle
 import json
 import re
 LOCATION = "5500 St Louis Ave, Chicago, IL 60625"
 event = {
  'summary': 'Google I/O 2015',
  'location': '800 Howard St., San Francisco, CA 94103',
  'description': 'A chance to hear more about Google\'s developer products.',
  'start': {
    'dateTime': '2015-05-28T09:00:00-07:00',
    'timeZone': 'America/Los_Angeles',
  },
  'end': {
    'dateTime': '2015-05-28T17:00:00-07:00',
    'timeZone': 'America/Los_Angeles',
  },
  'recurrence': [
    'RRULE:FREQ=DAILY;COUNT=2'
  ],
  'attendees': [
    {'email': 'lpage@example.com'},
    {'email': 'sbrin@example.com'},
  ],
  'reminders': {
    'useDefault': False,
    'overrides': [
      {'method': 'email', 'minutes': 24 * 60},
      {'method': 'popup', 'minutes': 10},
    ],
  },
 }
 def rrule_former(class_obj):
    days = class_obj.days
    start =datetime.datetime.combine(class_obj.date_range[0],class_obj.time_range[0]).astimezone()
    end =datetime.datetime.combine(class_obj.date_range[1],class_obj.time_range[1]).astimezone()
    days = [ (day -1) % 7 for day in days]
    ret = rrule.rrule(freq=rrule.WEEKLY,dtstart=start,wkst=rrule.SU,until=end,byweekday=days)
    ret_str = str(ret).split('\n')[-1]
    ret_str=re.sub(r'(UNTIL=[^;]+)',r'\1Z',ret_str)
    return 'RRULE:{}'.format(ret_str)
 def create_body(_class):
    if _class.time_range:
        body = {
            # "kind": "calendar#event",
        }
        body['recurrence'] = [rrule_former(_class)]
        body['start'] = dateTime(datetime.datetime.combine(_class.date_range[0],_class.time_range[0]))
        body['end'] = dateTime(datetime.datetime.combine(_class.date_range[0],_class.time_range[1]))
        body['summary'] = _class.title
        body['description'] = 'location: {}'.format(_class.location)
        body['location'] = LOCATION
        body['reminders'] = {'useDefault':True}
        return body
 def json_dump(obj):
    with open('classes.json','w') as file:
        json.dump(obj,file)
 def test_rrule():
    #test
    now = datetime.datetime.now()
    from munch import Munch
    test_obj = Munch(
        days=[1,3,5],
        time_range=[
            now.time(),
            (now+datetime.timedelta(seconds=50*60)).time()
            ],
        date_range=[
            now.date(),
            (now+datetime.timedelta(days=20)).date()
            ],
        )
    test_result = rrule_former(test_obj)
    return locals()
 def test_class2body():
    with open('classes.pkl','rb') as file:
        classes = pickle.load(file)
    test_result = list(filter(bool,map(create_body,classes)))
    return test_result
 if __name__ == "__main__":
 from dateutil import rrule
 from gcalendar import dateTime
 import datetime
 import pickle
 import json
 import re
 LOCATION = "5500 St Louis Ave, Chicago, IL 60625"
 event = {
  'summary': 'Google I/O 2015',
  'location': '800 Howard St., San Francisco, CA 94103',
  'description': 'A chance to hear more about Google\'s developer products.',
  'start': {
    'dateTime': '2015-05-28T09:00:00-07:00',
    'timeZone': 'America/Los_Angeles',
  },
  'end': {
    'dateTime': '2015-05-28T17:00:00-07:00',
    'timeZone': 'America/Los_Angeles',
  },
  'recurrence': [
    'RRULE:FREQ=DAILY;COUNT=2'
  ],
  'attendees': [
    {'email': 'lpage@example.com'},
    {'email': 'sbrin@example.com'},
  ],
  'reminders': {
    'useDefault': False,
    'overrides': [
      {'method': 'email', 'minutes': 24 * 60},
      {'method': 'popup', 'minutes': 10},
    ],
  },
 }
 def rrule_former(class_obj):
    days = class_obj.days
    start =datetime.datetime.combine(class_obj.date_range[0],class_obj.time_range[0]).astimezone()
    end =datetime.datetime.combine(class_obj.date_range[1],class_obj.time_range[1]).astimezone()
    days = [ (day -1) % 7 for day in days]
    ret = rrule.rrule(freq=rrule.WEEKLY,dtstart=start,wkst=rrule.SU,until=end,byweekday=days)
    ret_str = str(ret).split('\n')[-1]
    ret_str=re.sub(r'(UNTIL=[^;]+)',r'\1Z',ret_str)
    return 'RRULE:{}'.format(ret_str)
 def create_body(_class):
    if _class.time_range:
        body = {
            # "kind": "calendar#event",
        }
        body['recurrence'] = [rrule_former(_class)]
        body['start'] = dateTime(datetime.datetime.combine(_class.date_range[0],_class.time_range[0]))
        body['end'] = dateTime(datetime.datetime.combine(_class.date_range[0],_class.time_range[1]))
        body['summary'] = _class.title
        body['description'] = 'location: {}'.format(_class.location)
        body['location'] = LOCATION
        body['reminders'] = {'useDefault':True}
        return body
 def json_dump(obj):
    with open('classes.json','w') as file:
        json.dump(obj,file)
 def test_rrule():
    #test
    now = datetime.datetime.now()
    from munch import Munch
    test_obj = Munch(
        days=[1,3,5],
        time_range=[
            now.time(),
            (now+datetime.timedelta(seconds=50*60)).time()
            ],
        date_range=[
            now.date(),
            (now+datetime.timedelta(days=20)).date()
            ],
        )
    test_result = rrule_former(test_obj)
    return locals()
 def test_class2body():
    with open('classes.pkl','rb') as file:
        classes = pickle.load(file)
    test_result = list(filter(bool,map(create_body,classes)))
    return test_result
 if __name__ == "__main__":
    json_dump(test_class2body())
--- a/gcalendar.py
+++ b/gcalendar.py
@ -1,5 +1,5 @@
 import os
 import sys
 parent = os.path.dirname(__file__)
 sys.path.insert(0,os.path.join(parent,'google_api_wrapper'))
 import os
 import sys
 parent = os.path.dirname(__file__)
 sys.path.insert(0,os.path.join(parent,'google_api_wrapper'))
 from gapi.calendar_api import *
--- a/get_classes.py
+++ b/get_classes.py
@ -1,46 +1,46 @@
 from pyppeteer import launch
 import asyncio
 import time
 import scraper
 set_semester = "document.getElementsByName('term_in')[0].selectedIndex = 0"
 xpaths = {
    'tab':".//a[text()='Current Student']",
    'schedule':".//a[text()='Student Detail Schedule']",
    'submit':"//input[@value='Submit']",
    'frame':"//frame[@src='/cp/ip/login?sys=sctssb&url=https://ssb.neiu.edu/mercury_neiuprod/bwskfshd.P_CrseSchdDetl']"
    }
 async def xpath_single_element(xpath,page):
    await page.waitForXPath(xpath)
    elements = await page.xpath(xpath)
    return elements[0]
 async def main_loop(login):
    browser = await launch(headless = False)
    page_list = await browser.pages()
    page = page_list[0]
    r = await page.goto('https://neiuport.neiu.edu/cp/home/displaylogin')
    await page.evaluate(login)
    await page.waitFor('#tab')
    student_tab = await xpath_single_element(xpaths['tab'],page)
    await student_tab.click()
    await page.waitForXPath(xpaths['schedule'])
    schedule = await xpath_single_element(xpaths['schedule'],page)
    await schedule.click()
    page.waitForXPath(xpaths['frame'])
    await asyncio.sleep(3)
    frame = page.frames[-1]
    submit= await xpath_single_element(xpaths['submit'],frame)
    await submit.click()
    await asyncio.sleep(1)
    content = await page.frames[-1].content()
    await browser.close()
    return scraper.get_classes(content)
 def get_classes(user,password):
    login = """document.getElementById('user').value='{}'
    document.getElementById('pass').value='{}'
    login()""".format(user,password)
    loop = asyncio.get_event_loop()
    r = loop.run_until_complete
    return r(main_loop(login))
 if __name__ == "__main__":
    cl = get_classes('rlroberts5','YxmZZ905p0w6')
 from pyppeteer import launch
 import asyncio
 import time
 import scraper
 set_semester = "document.getElementsByName('term_in')[0].selectedIndex = 0"
 xpaths = {
    'tab':".//a[text()='Current Student']",
    'schedule':".//a[text()='Student Detail Schedule']",
    'submit':"//input[@value='Submit']",
    'frame':"//frame[@src='/cp/ip/login?sys=sctssb&url=https://ssb.neiu.edu/mercury_neiuprod/bwskfshd.P_CrseSchdDetl']"
    }
 async def xpath_single_element(xpath,page):
    await page.waitForXPath(xpath)
    elements = await page.xpath(xpath)
    return elements[0]
 async def main_loop(login):
    browser = await launch(headless = False)
    page_list = await browser.pages()
    page = page_list[0]
    r = await page.goto('https://neiuport.neiu.edu/cp/home/displaylogin')
    await page.evaluate(login)
    await page.waitFor('#tab')
    student_tab = await xpath_single_element(xpaths['tab'],page)
    await student_tab.click()
    await page.waitForXPath(xpaths['schedule'])
    schedule = await xpath_single_element(xpaths['schedule'],page)
    await schedule.click()
    page.waitForXPath(xpaths['frame'])
    await asyncio.sleep(3)
    frame = page.frames[-1]
    submit= await xpath_single_element(xpaths['submit'],frame)
    await submit.click()
    await asyncio.sleep(1)
    content = await page.frames[-1].content()
    await browser.close()
    return scraper.get_classes(content)
 def get_classes(user,password):
    login = """document.getElementById('user').value='{}'
    document.getElementById('pass').value='{}'
    login()""".format(user,password)
    loop = asyncio.get_event_loop()
    r = loop.run_until_complete
    return r(main_loop(login))
 if __name__ == "__main__":
    cl = get_classes('rlroberts5','YxmZZ905p0w6')
--- a/main.py
+++ b/main.py
@ -1,13 +1,13 @@
 from gcalendar import api
 import json
 import pprint
 api = api(r'api_info\client_secret.json','api_info')
 cals = api.get_calendars()
 cal = next(filter(lambda cal: cal['id'] == api.ids['school schedule'],cals))
 with open('classes.json') as file:
    bodies = json.load(file)
 for body in bodies:
    # body['colorId'] = cal['colorId']
    # pprint.pprint(body)
    # input()
 from gcalendar import api
 import json
 import pprint
 api = api(r'api_info\client_secret.json','api_info')
 cals = api.get_calendars()
 cal = next(filter(lambda cal: cal['id'] == api.ids['school schedule'],cals))
 with open('classes.json') as file:
    bodies = json.load(file)
 for body in bodies:
    # body['colorId'] = cal['colorId']
    # pprint.pprint(body)
    # input()
    api.create_event('school schedule',body)
--- a/scraper.py
+++ b/scraper.py
@ -1,110 +1,110 @@
 from bs4 import BeautifulSoup as BS
 import datetime
 import re
 from operator import sub
 def dateparse(datetime_str):
    date = '%b %d, %Y'
    time = '%I:%M %p'
    try:
        return datetime.datetime.strptime(datetime_str,date)
    except ValueError:
        return datetime.datetime.strptime(datetime_str,time)
 days = [None,'M','T','W','R','F',None]
 simp_exceptions = ['Grade Mode']
 def datetime2date_time(dtime,mode):
    if mode == 'date':
        return datetime.date(dtime.year,dtime.month,dtime.day)
    elif mode == 'time':
        return datetime.time(dtime.hour,dtime.minute,dtime.second)
 def seconds_from_midnight(t):
    return t.hour*60**2+ t.minute*60+t.second
 class Class:
    def __init__(self,title,session,days,location,time_range,date_range):
        self.title = title
        self.session = session
        self.days = days
        self.location = location
        self.time_range = time_range
        self.lab = None
        self.date_range = date_range
    # data is a list of two html tables
    def scrape(self,data):
        info,times = data
        # info
        self.title,self.abrv,self.session = info.find('caption').text.split(' - ')
        self.lab =  None
        self.session = int(self.session)
        rows = info.find_all('tr')
        for row in rows:
            name = row.find('th').text.rstrip(':')
            data = re.sub(r'^ +|[\n\r\t]','',row.find('td').text)
            if name == 'Status':
                type,date = data.split(' on ')
                type = type.replace('*','')
                self.type = type
                self.registration_date = dateparse(date)
            else:
                if name in simp_exceptions:
                    name = name.lower().replace(' ','_')
                else:
                    name = name.lower().split(' ')[-1]
                if name != 'instructor':
                    data = data.lower()
                try:
                    data = int(re.sub(r'\.\d+','',data))
                except:
                    pass
                self.__dict__[name] = data
        # time
        headers,*data = times.find_all('tr')
        if len(data) > 1:
            data,lab = data[:2]
        else
            lab = None
            data = data[0]
        data = (col.text for col in data.find_all('td'))
        headers = (header.text.lower() for header in headers.find_all('th'))
    def parse_horz_row(headers,row):
        ret = {}
        time_data = dict(zip(headers,data))
        if time_data['time'] == 'TBA':
            ret['time_range'] = None
        else:
            s,e = map(dateparse,time_data['time'].split(' - '))
            ret['time_range'] = (
                datetime2date_time(s,'time'),
                datetime2date_time(e,'time'),
                )
        s,e = map(dateparse,time_data['date range'].split(' - '))
        ret['date_range'] = (
            datetime2date_time(s,'date'),
            datetime2date_time(e,'date'),
            )
        time_data['days'] = re.sub('[^{}]'.format(''.join(filter(bool,days))),'',time_data['days'])
        ret['days'] = list(days.index(time_data['days'][i]) for i in range(len(time_data['days'])))
        ret['location'] = time_data['where']
        return ret
    @property
    def length(self):
        return datetime.timedelta(seconds = sub(
            seconds_from_midnight(self.time_range[1]),
            seconds_from_midnight(self.time_range[0]),
            ))
 def get_classes(page):
    if not isinstance(page,BS):
        page = BS(page,'lxml')
    tables = page.find_all('table',attrs= {'class':'datadisplaytable'})
    groups = ((tables[i],tables[i+1]) for i in range(0,len(tables),2))
    return list(map(Class.scrape,groups))
 if __name__ == "__main__":
    with open('schedule.html') as file:
        page = BS(file.read(),'lxml')
    class1,*classes = get_classes(page)
 from bs4 import BeautifulSoup as BS
 import datetime
 import re
 from operator import sub
 def dateparse(datetime_str):
    date = '%b %d, %Y'
    time = '%I:%M %p'
    try:
        return datetime.datetime.strptime(datetime_str,date)
    except ValueError:
        return datetime.datetime.strptime(datetime_str,time)
 days = [None,'M','T','W','R','F',None]
 simp_exceptions = ['Grade Mode']
 def datetime2date_time(dtime,mode):
    if mode == 'date':
        return datetime.date(dtime.year,dtime.month,dtime.day)
    elif mode == 'time':
        return datetime.time(dtime.hour,dtime.minute,dtime.second)
 def seconds_from_midnight(t):
    return t.hour*60**2+ t.minute*60+t.second
 class Class:
    def __init__(self,title,session,days,location,time_range,date_range):
        self.title = title
        self.session = session
        self.days = days
        self.location = location
        self.time_range = time_range
        self.lab = None
        self.date_range = date_range
    # data is a list of two html tables
    def scrape(self,data):
        info,times = data
        # info
        self.title,self.abrv,self.session = info.find('caption').text.split(' - ')
        self.lab =  None
        self.session = int(self.session)
        rows = info.find_all('tr')
        for row in rows:
            name = row.find('th').text.rstrip(':')
            data = re.sub(r'^ +|[\n\r\t]','',row.find('td').text)
            if name == 'Status':
                type,date = data.split(' on ')
                type = type.replace('*','')
                self.type = type
                self.registration_date = dateparse(date)
            else:
                if name in simp_exceptions:
                    name = name.lower().replace(' ','_')
                else:
                    name = name.lower().split(' ')[-1]
                if name != 'instructor':
                    data = data.lower()
                try:
                    data = int(re.sub(r'\.\d+','',data))
                except:
                    pass
                self.__dict__[name] = data
        # time
        headers,*data = times.find_all('tr')
        if len(data) > 1:
            data,lab = data[:2]
        else
            lab = None
            data = data[0]
        data = (col.text for col in data.find_all('td'))
        headers = (header.text.lower() for header in headers.find_all('th'))
    def parse_horz_row(headers,row):
        ret = {}
        time_data = dict(zip(headers,data))
        if time_data['time'] == 'TBA':
            ret['time_range'] = None
        else:
            s,e = map(dateparse,time_data['time'].split(' - '))
            ret['time_range'] = (
                datetime2date_time(s,'time'),
                datetime2date_time(e,'time'),
                )
        s,e = map(dateparse,time_data['date range'].split(' - '))
        ret['date_range'] = (
            datetime2date_time(s,'date'),
            datetime2date_time(e,'date'),
            )
        time_data['days'] = re.sub('[^{}]'.format(''.join(filter(bool,days))),'',time_data['days'])
        ret['days'] = list(days.index(time_data['days'][i]) for i in range(len(time_data['days'])))
        ret['location'] = time_data['where']
        return ret
    @property
    def length(self):
        return datetime.timedelta(seconds = sub(
            seconds_from_midnight(self.time_range[1]),
            seconds_from_midnight(self.time_range[0]),
            ))
 def get_classes(page):
    if not isinstance(page,BS):
        page = BS(page,'lxml')
    tables = page.find_all('table',attrs= {'class':'datadisplaytable'})
    groups = ((tables[i],tables[i+1]) for i in range(0,len(tables),2))
    return list(map(Class.scrape,groups))
 if __name__ == "__main__":
    with open('schedule.html') as file:
        page = BS(file.read(),'lxml')
    class1,*classes = get_classes(page)