2 changed files with 62 additions and 28 deletions
@ -1,28 +1,57 @@ |
|||||
from selenium import webdriver |
|
||||
from price_finder import price_finder,BS |
from price_finder import price_finder,BS |
||||
|
from itertools import cycle |
||||
|
from requests_html import HTMLSession |
||||
|
from ipaddress import ip_address |
||||
|
def get_proxies(country = 'United States'): |
||||
|
ses = HTMLSession() |
||||
|
r = ses.get('https://free-proxy-list.net/') |
||||
|
page = BS(r.html.raw_html,'lxml') |
||||
|
table = page.find(id='proxylisttable') |
||||
|
headers,*rows = table.find_all('tr') |
||||
|
headers = list(tag.text.lower() for tag in headers.find_all('th')) |
||||
|
ip,port = headers.index('ip address'),headers.index('port') |
||||
|
https_support = headers.index('https') |
||||
|
country_id = headers.index('country') |
||||
|
proxies = [] |
||||
|
for row in rows: |
||||
|
if row.find('td'): |
||||
|
tr = list(tag.text for tag in row.find_all('td')) |
||||
|
try: |
||||
|
try: |
||||
|
ip_address(tr[ip]) |
||||
|
assert int(port) >= 0 and int(port) < 2**16 |
||||
|
if tr[https_support] == "yes" and tr[country_id] == country: |
||||
|
proxies.append('{}:{}'.format(tr[ip],tr[port])) |
||||
|
except (ValueError,AssertionError): |
||||
|
pass |
||||
|
except Exception as e: |
||||
|
print(row) |
||||
|
raise e |
||||
|
return cycle(proxies) |
||||
|
proxies = get_proxies() |
||||
def get_prices(links): |
def get_prices(links): |
||||
try: |
|
||||
opts = webdriver.chrome.options.Options() |
|
||||
opts.add_argument('--headless') |
|
||||
driver = webdriver.Chrome(chrome_options = opts,headless= True) |
|
||||
results = [] |
|
||||
for link in links: |
|
||||
driver.get(link) |
|
||||
|
ret = [] |
||||
|
s = HTMLSession() |
||||
|
ret = [] |
||||
|
bad_proxies= set() |
||||
|
for link in links: |
||||
|
print(link) |
||||
|
while True: |
||||
|
proxy = next(proxies) |
||||
|
while proxy in bad_proxies: |
||||
|
proxy = next(proxies) |
||||
|
print(proxy) |
||||
try: |
try: |
||||
results.append( |
|
||||
price_finder( |
|
||||
url = link,bs=BS(driver.page_source,'lxml') |
|
||||
) |
|
||||
) |
|
||||
except AttributeError: |
|
||||
results.append(price_finder(link)) |
|
||||
driver.quit() |
|
||||
return results |
|
||||
except Exception as excpt: |
|
||||
driver.quit() |
|
||||
raise excpt |
|
||||
if __name__ == "__main__": |
|
||||
|
|
||||
import saveto |
|
||||
links = saveto.load('quad_links') |
|
||||
products = get_prices(links) |
|
||||
|
r = s.get(link,proxies={'http':proxy,'https':proxy}) |
||||
|
print('got') |
||||
|
r.html.render() |
||||
|
print('rendered') |
||||
|
ret.append(price_finder(link,bs=BS(r.html.raw_html,'lxml'))) |
||||
|
break |
||||
|
except Exception as e: |
||||
|
print(e) |
||||
|
print('!'+proxy) |
||||
|
bad_proxies.update([proxy]) |
||||
|
pass |
||||
|
s.close() |
||||
|
return ret |
||||
Write
Preview
Loading…
Cancel
Save
Reference in new issue