You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

65 lines
1.8 KiB

7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
  1. import pyppeteer
  2. import pyppeteer.errors
  3. import asyncio
  4. import os
  5. async def _get_link(browser,link,xpath):
  6. pages = await browser.pages()
  7. page = pages[0]
  8. await page.goto(link,waitUntil='documentloaded')
  9. xpath = [xpath['name'],xpath['price']]
  10. for _xpath in xpath:
  11. print(repr(_xpath))
  12. try:
  13. await page.waitForXPath(_xpath)
  14. except pyppeteer.errors.TimeoutError:
  15. pass
  16. await asyncio.sleep(1)
  17. webpage = None
  18. for i in range(20):
  19. try:
  20. webpage = await page.content()
  21. break
  22. except:
  23. await asyncio.sleep(1)
  24. return webpage
  25. async def _single_link(browser,link,xpath):
  26. webpage = await _get_link(browser,link,xpath)
  27. await browser.close()
  28. return webpage
  29. async def _multi_link(browser,links,xpaths):
  30. results = {}
  31. for link in links:
  32. xpath = xpaths[link]
  33. webpage = await _get_link(browser,link,xpath)
  34. results[link] = webpage
  35. await browser.close()
  36. return results
  37. def get_link(links,xpaths,headless = False,proxy = None):
  38. loop = asyncio.get_event_loop()
  39. run = loop.run_until_complete
  40. opts = {
  41. 'headless':headless,
  42. }
  43. if proxy:
  44. opts['args'] = [f'--proxy-server={proxy}']
  45. else:
  46. opts['args'] = []
  47. ext = os.path.join(os.path.dirname(__file__),'uBlock0.chromium')
  48. opts['args'] += [f'--disable-extensions-except={ext}', f'--load-extension={ext}']
  49. # print(opts)
  50. browser = run(pyppeteer.launch(**opts))
  51. try:
  52. if isinstance(links,list):
  53. result = run(_multi_link(browser,links,xpaths))
  54. else:
  55. result = run(_single_link(browser,links,xpaths[links]))
  56. return result
  57. except Exception as e:
  58. run(browser.close())
  59. raise e