diff --git a/restscrape/browser.py b/restscrape/browser.py new file mode 100644 index 0000000..372d5fa --- /dev/null +++ b/restscrape/browser.py @@ -0,0 +1,42 @@ +import asyncio +import os +import pyppeteer +import time +def run(coroutine): + loop = asyncio.get_event_loop() + return loop.run_until_complete(coroutine) + +class browser: + '''wrapper around pyppeteer browser''' + def __init__(self,**launch_opts): + self.browser = None + self.launch_opts = launch_opts + + def restart_browser(self,proxy=None,use_adblock=True): + if self.browser is not None: + self.close() + opts = {} + opts.update(self.launch_opts) + if use_adblock: + if __name__ == "__main__": + ext = os.path.join(os.getcwd(),'uBlock') + else: + ext = os.path.join(os.path.dirname(__file__),'uBlock') + opts.setdefault('args',[]).extend(['--disable-extensions-except='+ext, '--load-extension='+ext]) + if proxy is not None: + opts.setdefault('args',[]).extend(['--proxy-server='+proxy]) + self.browser = run(pyppeteer.launch(**opts)) + + def open(self,url,wait_for = 0): + page = run(self.browser.pages())[0] + run(page.goto(url)) + time.sleep(wait_for) + return run(page.content()) + + def close(self): + run(self.browser.close()) + +if __name__ == "__main__": + b = browser(headless=False) + b.restart_browser() + page = b.open('https://www.google.com') \ No newline at end of file