Browse Source

Greatly simplified browser.py

master
Raphael Roberts 7 years ago
parent
commit
4eb44250a6
  1. 85
      restscrape/scraping/browser.py

85
restscrape/scraping/browser.py

@ -1,8 +1,8 @@
import asyncio
import os
import time
from pathlib import Path
import pyppeteer
EVENT_LOOP = None
@ -15,51 +15,60 @@ def run(coroutine):
return EVENT_LOOP.run_until_complete(coroutine)
class Browser:
def __init__(self, **launch_opts):
self.connected = False
self.browser_instance: pyppeteer.browser.Browser = None
self.address = None
self.launch_opts = launch_opts
def connect(self, socket_address):
self.browser_instance = run(
pyppeteer.launcher.connect(browserWSEndpoint=socket_address)
)
self.address = socket_address
self.page: pyppeteer.page.Page = run(self.browser_instance.pages())[0]
# self.page: pyppeteer.page.Page = run(b.browser_instance.newPage())
def start_browser(self, proxy=None, use_adblock=True):
opts = {}
opts.update(self.launch_opts)
if use_adblock:
adblock_path = Path(__file__).parent / "uBlock"
opts.setdefault("args", []).extend(
[
"--disable-extensions-except={}".format(adblock_path),
"--load-extension={}".format(adblock_path),
]
class BrowserConnection:
def __init__(self, address=None, browser_handle: pyppeteer.browser.Browser = None):
if browser_handle is None:
self.browser_handle: pyppeteer.browser.Browser = run(
pyppeteer.launcher.connect(browserWSEndpoint=address)
)
if proxy is not None:
opts.setdefault("args", []).extend(["--proxy-server=" + proxy])
opts.setdefault("args", []).append("about:blank")
self.browser_instance = run(pyppeteer.launch(**opts))
self.address = self.browser_instance.wsEndpoint
self.page: pyppeteer.page.Page = run(self.browser_instance.pages())[0]
# self.page: pyppeteer.page.Page = run(b.browser_instance.newPage())
self.address = browser_handle.address
elif address is None:
self.browser_handle = browser_handle
self.address = browser_handle.wsEndpoint
def create_tab(self):
return Tab(self.browser_handle)
def close(self):
run(self.browser_instance.close())
run(self.browser_handle.close())
class Tab:
def __init__(self, browser_handle):
self.browser_handle = browser_handle
self.page_handle = run(browser_handle.newPage())
def __enter__(self):
return self
def __exit__(self, *args):
run(self.page_handle.close())
def get_source(self):
return run(self.page_handle.content())
def open(self, url, wait_for=0):
run(self.page.goto(url, waitUntil="domcontentloaded"))
run(self.page_handle.goto(url, waitUntil="domcontentloaded"))
time.sleep(wait_for)
return self.get_source()
def get_source(self):
return run(self.page.content())
def start_browser(proxy=None, use_adblock=True, **launch_opts):
opts = launch_opts
if use_adblock:
adblock_path = Path(__file__).parent / "uBlock"
opts.setdefault("args", []).extend(
[
"--disable-extensions-except={}".format(adblock_path),
"--load-extension={}".format(adblock_path),
]
)
if proxy is not None:
opts.setdefault("args", []).extend(["--proxy-server=" + proxy])
opts.setdefault("args", []).append("about:blank")
browser_handle = run(pyppeteer.launch(**opts))
return BrowserConnection(browser_handle=browser_handle)
if __name__ == "__main__":
b = Browser(headless=False)
b = start_browser(headless=False)
Loading…
Cancel
Save