You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

66 lines
1.8 KiB

import pyppeteer
import pyppeteer.errors
import asyncio
import os
async def _get_link(browser,link,xpath):
pages = await browser.pages()
page = pages[0]
await page.goto(link,waitUntil='documentloaded')
xpath = [xpath['name'],xpath['price']]
for _xpath in xpath:
print(repr(_xpath))
try:
await page.waitForXPath(_xpath)
except pyppeteer.errors.TimeoutError:
pass
await asyncio.sleep(1)
webpage = None
for i in range(20):
try:
webpage = await page.content()
break
except:
await asyncio.sleep(1)
return webpage
async def _single_link(browser,link,xpath):
webpage = await _get_link(browser,link,xpath)
await browser.close()
return webpage
async def _multi_link(browser,links,xpaths):
results = {}
for link in links:
xpath = xpaths[link]
webpage = await _get_link(browser,link,xpath)
results[link] = webpage
await browser.close()
return results
def get_link(links,xpaths,headless = False,proxy = None):
loop = asyncio.get_event_loop()
run = loop.run_until_complete
opts = {
'headless':headless,
}
if proxy:
opts['args'] = [f'--proxy-server={proxy}']
else:
opts['args'] = []
ext = os.path.join(os.path.dirname(__file__),'uBlock0.chromium')
opts['args'] += [f'--disable-extensions-except={ext}', f'--load-extension={ext}']
# print(opts)
browser = run(pyppeteer.launch(**opts))
try:
if isinstance(links,list):
result = run(_multi_link(browser,links,xpaths))
else:
result = run(_single_link(browser,links,xpaths[links]))
return result
except Exception as e:
run(browser.close())
raise e