From 4eb44250a65b240196565150f56ba46bb957480b Mon Sep 17 00:00:00 2001 From: Raphael Roberts Date: Fri, 31 May 2019 05:05:15 -0500 Subject: [PATCH] Greatly simplified browser.py --- restscrape/scraping/browser.py | 85 +++++++++++++++++++--------------- 1 file changed, 47 insertions(+), 38 deletions(-) diff --git a/restscrape/scraping/browser.py b/restscrape/scraping/browser.py index 070be62..5fbc7e9 100644 --- a/restscrape/scraping/browser.py +++ b/restscrape/scraping/browser.py @@ -1,8 +1,8 @@ import asyncio -import os import time from pathlib import Path + import pyppeteer EVENT_LOOP = None @@ -15,51 +15,60 @@ def run(coroutine): return EVENT_LOOP.run_until_complete(coroutine) -class Browser: - def __init__(self, **launch_opts): - self.connected = False - self.browser_instance: pyppeteer.browser.Browser = None - self.address = None - self.launch_opts = launch_opts - - def connect(self, socket_address): - self.browser_instance = run( - pyppeteer.launcher.connect(browserWSEndpoint=socket_address) - ) - self.address = socket_address - self.page: pyppeteer.page.Page = run(self.browser_instance.pages())[0] - # self.page: pyppeteer.page.Page = run(b.browser_instance.newPage()) - - def start_browser(self, proxy=None, use_adblock=True): - opts = {} - opts.update(self.launch_opts) - if use_adblock: - adblock_path = Path(__file__).parent / "uBlock" - opts.setdefault("args", []).extend( - [ - "--disable-extensions-except={}".format(adblock_path), - "--load-extension={}".format(adblock_path), - ] +class BrowserConnection: + def __init__(self, address=None, browser_handle: pyppeteer.browser.Browser = None): + if browser_handle is None: + self.browser_handle: pyppeteer.browser.Browser = run( + pyppeteer.launcher.connect(browserWSEndpoint=address) ) - if proxy is not None: - opts.setdefault("args", []).extend(["--proxy-server=" + proxy]) - opts.setdefault("args", []).append("about:blank") - self.browser_instance = run(pyppeteer.launch(**opts)) - self.address = self.browser_instance.wsEndpoint - self.page: pyppeteer.page.Page = run(self.browser_instance.pages())[0] - # self.page: pyppeteer.page.Page = run(b.browser_instance.newPage()) + self.address = browser_handle.address + elif address is None: + self.browser_handle = browser_handle + self.address = browser_handle.wsEndpoint + + def create_tab(self): + return Tab(self.browser_handle) def close(self): - run(self.browser_instance.close()) + run(self.browser_handle.close()) + + +class Tab: + def __init__(self, browser_handle): + self.browser_handle = browser_handle + self.page_handle = run(browser_handle.newPage()) + + def __enter__(self): + return self + + def __exit__(self, *args): + run(self.page_handle.close()) + + def get_source(self): + return run(self.page_handle.content()) def open(self, url, wait_for=0): - run(self.page.goto(url, waitUntil="domcontentloaded")) + run(self.page_handle.goto(url, waitUntil="domcontentloaded")) time.sleep(wait_for) return self.get_source() - def get_source(self): - return run(self.page.content()) + +def start_browser(proxy=None, use_adblock=True, **launch_opts): + opts = launch_opts + if use_adblock: + adblock_path = Path(__file__).parent / "uBlock" + opts.setdefault("args", []).extend( + [ + "--disable-extensions-except={}".format(adblock_path), + "--load-extension={}".format(adblock_path), + ] + ) + if proxy is not None: + opts.setdefault("args", []).extend(["--proxy-server=" + proxy]) + opts.setdefault("args", []).append("about:blank") + browser_handle = run(pyppeteer.launch(**opts)) + return BrowserConnection(browser_handle=browser_handle) if __name__ == "__main__": - b = Browser(headless=False) + b = start_browser(headless=False)