import pyppeteer import pyppeteer.errors import asyncio import os async def _get_link(browser,link,xpath): pages = await browser.pages() page = pages[0] await page.goto(link,waitUntil='documentloaded') xpath = [xpath['name'],xpath['price']] for _xpath in xpath: print(repr(_xpath)) try: await page.waitForXPath(_xpath) except pyppeteer.errors.TimeoutError: pass await asyncio.sleep(1) webpage = None for i in range(20): try: webpage = await page.content() break except: await asyncio.sleep(1) return webpage async def _single_link(browser,link,xpath): webpage = await _get_link(browser,link,xpath) await browser.close() return webpage async def _multi_link(browser,links,xpaths): results = {} for link in links: xpath = xpaths[link] webpage = await _get_link(browser,link,xpath) results[link] = webpage await browser.close() return results def get_link(links,xpaths,headless = False,proxy = None): loop = asyncio.get_event_loop() run = loop.run_until_complete opts = { 'headless':headless, } if proxy: opts['args'] = [f'--proxy-server={proxy}'] else: opts['args'] = [] ext = os.path.join(os.path.dirname(__file__),'uBlock0.chromium') opts['args'] += [f'--disable-extensions-except={ext}', f'--load-extension={ext}'] # print(opts) browser = run(pyppeteer.launch(**opts)) try: if isinstance(links,list): result = run(_multi_link(browser,links,xpaths)) else: result = run(_single_link(browser,links,xpaths[links])) return result except Exception as e: run(browser.close()) raise e