You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

113 lines
3.7 KiB

from price_finder import price_finder,BS
from itertools import cycle
import requests
# import requests_html
from ipaddress import ip_address
from get_link import get_link
def get_proxies(link='https://free-proxy-list.net/',country = 'United States'):
ses = requests_html.HTMLSession()
r = requests.get(link)
page = BS(r.html.raw_html,'lxml')
table = page.find(id='proxylisttable')
headers,*rows = table.find_all('tr')
headers = list(tag.text.lower() for tag in headers.find_all('th'))
ip,port = headers.index('ip address'),headers.index('port')
https_support = headers.index('https')
country_id = headers.index('country')
proxies = []
for row in rows:
if row.find('td'):
tr = list(tag.text for tag in row.find_all('td'))
try:
try:
ip_address(tr[ip])
assert int(port) >= 0 and int(port) < 2**16
if (tr[https_support] == "yes" or False) and tr[country_id] == country:
proxies.append('{}:{}'.format(tr[ip],tr[port]))
except (ValueError,AssertionError):
pass
except Exception as e:
print(row)
raise e
return proxies
# def render_page(link,proxies,ses):
# print(link)
# bad_proxies = set()
# page = None
# render_attempts = 0
# for proxy in proxies:
# print(proxy)
# try:
# r = ses.get(link,proxies={'http':proxy,'https':proxy})
# print('got')
# except (requests.exceptions.ProxyError,requests.exceptions.SSLError):
# print('!g!'+proxy)
# bad_proxies.add(proxy)
# continue
# if render_attempts < 3:
# render_attempts += 1
# try:
# r.html.render(timeout=10, sleep=10)
# print('rendered')
# except requests_html.MaxRetries:
# print('!r!'+proxy)
# bad_proxies.add(proxy)
# continue
# page = r.html.raw_html
# break
# if page:
# return page,{proxy},bad_proxies
# else:
# raise Exception("All proxies used up")
def get_prices(links,no_reuse = True,use_proxies = True):
if use_proxies:
else:
def get_prices_old(links,no_reuse = True,use_proxies=True):
if use_proxies:
proxies = set(get_proxies() + get_proxies('https://www.us-proxy.org/'))
ses = requests_html.HTMLSession()
ret = []
if use_proxies:
prev = set()
if use_proxies:
bad_proxies_set= set()
for link in links:
if use_proxies:
if no_reuse:
working_set = proxies-prev
# if use_proxies:
else:
working_set = proxies
page,prev,bad_proxies = render_page(link,working_set,ses)
else:
r=ses.get(link)
r.html.render()
page = r.html.raw_html
ret.append(price_finder(link,bs=BS(page,'lxml')))
if use_proxies:
bad_proxies_set |= bad_proxies
proxies -= bad_proxies
if use_proxies:
print(bad_proxies_set)
ses.close()
return ret
# if __name__ == "__main__":
# ses = requests_html.HTMLSession()
# proxies = get_proxies('https://www.us-proxy.org/')
# page = render_page('https://www.banggood.com/Aomway-Commander-Goggles-V1-2D-3D-40CH-5_8G-FPV-Video-Headset-Support-HDMI-DVR-Headtracker-p-1107684.html?cur_warehouse=CN',
# proxies,
# ses)
# import saveto
# import random
# ql = saveto.load('quad_links')
# random.shuffle(ql)
# products = get_prices(ql)
# pass