2 changed files with 62 additions and 28 deletions
@ -1,28 +1,57 @@ |
|||
from selenium import webdriver |
|||
from price_finder import price_finder,BS |
|||
from itertools import cycle |
|||
from requests_html import HTMLSession |
|||
from ipaddress import ip_address |
|||
def get_proxies(country = 'United States'): |
|||
ses = HTMLSession() |
|||
r = ses.get('https://free-proxy-list.net/') |
|||
page = BS(r.html.raw_html,'lxml') |
|||
table = page.find(id='proxylisttable') |
|||
headers,*rows = table.find_all('tr') |
|||
headers = list(tag.text.lower() for tag in headers.find_all('th')) |
|||
ip,port = headers.index('ip address'),headers.index('port') |
|||
https_support = headers.index('https') |
|||
country_id = headers.index('country') |
|||
proxies = [] |
|||
for row in rows: |
|||
if row.find('td'): |
|||
tr = list(tag.text for tag in row.find_all('td')) |
|||
try: |
|||
try: |
|||
ip_address(tr[ip]) |
|||
assert int(port) >= 0 and int(port) < 2**16 |
|||
if tr[https_support] == "yes" and tr[country_id] == country: |
|||
proxies.append('{}:{}'.format(tr[ip],tr[port])) |
|||
except (ValueError,AssertionError): |
|||
pass |
|||
except Exception as e: |
|||
print(row) |
|||
raise e |
|||
return cycle(proxies) |
|||
proxies = get_proxies() |
|||
def get_prices(links): |
|||
try: |
|||
opts = webdriver.chrome.options.Options() |
|||
opts.add_argument('--headless') |
|||
driver = webdriver.Chrome(chrome_options = opts,headless= True) |
|||
results = [] |
|||
for link in links: |
|||
driver.get(link) |
|||
ret = [] |
|||
s = HTMLSession() |
|||
ret = [] |
|||
bad_proxies= set() |
|||
for link in links: |
|||
print(link) |
|||
while True: |
|||
proxy = next(proxies) |
|||
while proxy in bad_proxies: |
|||
proxy = next(proxies) |
|||
print(proxy) |
|||
try: |
|||
results.append( |
|||
price_finder( |
|||
url = link,bs=BS(driver.page_source,'lxml') |
|||
) |
|||
) |
|||
except AttributeError: |
|||
results.append(price_finder(link)) |
|||
driver.quit() |
|||
return results |
|||
except Exception as excpt: |
|||
driver.quit() |
|||
raise excpt |
|||
if __name__ == "__main__": |
|||
|
|||
import saveto |
|||
links = saveto.load('quad_links') |
|||
products = get_prices(links) |
|||
r = s.get(link,proxies={'http':proxy,'https':proxy}) |
|||
print('got') |
|||
r.html.render() |
|||
print('rendered') |
|||
ret.append(price_finder(link,bs=BS(r.html.raw_html,'lxml'))) |
|||
break |
|||
except Exception as e: |
|||
print(e) |
|||
print('!'+proxy) |
|||
bad_proxies.update([proxy]) |
|||
pass |
|||
s.close() |
|||
return ret |
|||
Write
Preview
Loading…
Cancel
Save
Reference in new issue