You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

74 lines
2.6 KiB

8 years ago
8 years ago
  1. from price_finder import price_finder,BS
  2. from itertools import cycle
  3. import requests
  4. import requests_html
  5. from ipaddress import ip_address
  6. def get_proxies(country = 'United States'):
  7. ses = requests_html.HTMLSession()
  8. r = ses.get('https://free-proxy-list.net/')
  9. page = BS(r.html.raw_html,'lxml')
  10. table = page.find(id='proxylisttable')
  11. headers,*rows = table.find_all('tr')
  12. headers = list(tag.text.lower() for tag in headers.find_all('th'))
  13. ip,port = headers.index('ip address'),headers.index('port')
  14. https_support = headers.index('https')
  15. country_id = headers.index('country')
  16. proxies = []
  17. for row in rows:
  18. if row.find('td'):
  19. tr = list(tag.text for tag in row.find_all('td'))
  20. try:
  21. try:
  22. ip_address(tr[ip])
  23. assert int(port) >= 0 and int(port) < 2**16
  24. if tr[https_support] == "yes" and tr[country_id] == country:
  25. proxies.append('{}:{}'.format(tr[ip],tr[port]))
  26. except (ValueError,AssertionError):
  27. pass
  28. except Exception as e:
  29. print(row)
  30. raise e
  31. return cycle(proxies)
  32. def get_prices(links):
  33. proxies = get_proxies()
  34. s = requests_html.HTMLSession()
  35. ret = []
  36. bad_proxies= set()
  37. for link in links:
  38. page = None
  39. render_tries = 0
  40. print(link)
  41. while not page:
  42. proxy = next(proxies)
  43. while proxy in bad_proxies:
  44. proxy = next(proxies)
  45. print(proxy)
  46. try:
  47. r = s.get(link,proxies={'http':proxy,'https':proxy})
  48. print('got')
  49. try:
  50. render_tries += 1
  51. r.html.render()
  52. print('rendered')
  53. except requests_html.MaxRetries:
  54. if render_tries > 2:
  55. pass
  56. else:
  57. print('!'+proxy)
  58. bad_proxies.update([proxy])
  59. continue
  60. page = r.html.raw_html
  61. ret.append(price_finder(link,bs=BS(page,'lxml')))
  62. except (requests.exceptions.ProxyError,requests.exceptions.SSLError):
  63. print('!'+proxy)
  64. bad_proxies.update([proxy])
  65. print(bad_proxies)
  66. s.close()
  67. return ret
  68. if __name__ == "__main__":
  69. import saveto
  70. import random
  71. ql = saveto.load('quad_links')
  72. random.shuffle(ql)
  73. products = get_prices(ql)