Compare commits
merge into: rlbr:master
rlbr:master
rlbr:no_requests_html
rlbr:xpath
pull from: rlbr:no_requests_html
rlbr:master
rlbr:no_requests_html
rlbr:xpath
1 Commits
master
...
no_request
| Author | SHA1 | Message | Date |
|---|---|---|---|
|
|
f76f5dfefc |
get_link uses chrome to open links we'll see how this one works
|
7 years ago |
3 changed files with 131 additions and 47 deletions
-
130batch_process.py
-
42get_link.py
-
6price_finder.py
@ -0,0 +1,42 @@ |
|||||
|
import pyppeteer |
||||
|
import asyncio |
||||
|
async def _get_link(browser,link): |
||||
|
pages = await browser.pages() |
||||
|
page = pages[0] |
||||
|
await page.goto(link) |
||||
|
webpage = None |
||||
|
for i in range(20): |
||||
|
try: |
||||
|
webpage = await page.content() |
||||
|
break |
||||
|
except: |
||||
|
time.sleep(1) |
||||
|
return webpage |
||||
|
|
||||
|
async def _single_link(brower,link): |
||||
|
webpage = await _get_link(brower,link) |
||||
|
await brower.close() |
||||
|
return webpage |
||||
|
|
||||
|
async def _multi_link(brower,links): |
||||
|
results = {} |
||||
|
for link in links: |
||||
|
webpage = await _get_link(brower,link) |
||||
|
result[link] = webpage |
||||
|
await brower.close() |
||||
|
return results |
||||
|
|
||||
|
def get_link(links,headless = False,proxy = None): |
||||
|
loop = asyncio.get_event_loop() |
||||
|
run = loop.run_until_complete |
||||
|
opts = { |
||||
|
'headless':headless, |
||||
|
} |
||||
|
if proxy: |
||||
|
opts['args'] = ['--proxy-server={}'.format(proxy)] |
||||
|
browser = run(pyppeteer.launch(**opts)) |
||||
|
if isinstance(links,list): |
||||
|
result = run(_multi_link(brower,links)) |
||||
|
else: |
||||
|
result = run(_single_link(browser,links)) |
||||
|
return result |
||||
Write
Preview
Loading…
Cancel
Save
Reference in new issue