You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

112 lines
3.7 KiB

8 years ago
  1. from price_finder import price_finder,BS
  2. from itertools import cycle
  3. import requests
  4. # import requests_html
  5. from ipaddress import ip_address
  6. from get_link import get_link
  7. def get_proxies(link='https://free-proxy-list.net/',country = 'United States'):
  8. ses = requests_html.HTMLSession()
  9. r = requests.get(link)
  10. page = BS(r.html.raw_html,'lxml')
  11. table = page.find(id='proxylisttable')
  12. headers,*rows = table.find_all('tr')
  13. headers = list(tag.text.lower() for tag in headers.find_all('th'))
  14. ip,port = headers.index('ip address'),headers.index('port')
  15. https_support = headers.index('https')
  16. country_id = headers.index('country')
  17. proxies = []
  18. for row in rows:
  19. if row.find('td'):
  20. tr = list(tag.text for tag in row.find_all('td'))
  21. try:
  22. try:
  23. ip_address(tr[ip])
  24. assert int(port) >= 0 and int(port) < 2**16
  25. if (tr[https_support] == "yes" or False) and tr[country_id] == country:
  26. proxies.append('{}:{}'.format(tr[ip],tr[port]))
  27. except (ValueError,AssertionError):
  28. pass
  29. except Exception as e:
  30. print(row)
  31. raise e
  32. return proxies
  33. # def render_page(link,proxies,ses):
  34. # print(link)
  35. # bad_proxies = set()
  36. # page = None
  37. # render_attempts = 0
  38. # for proxy in proxies:
  39. # print(proxy)
  40. # try:
  41. # r = ses.get(link,proxies={'http':proxy,'https':proxy})
  42. # print('got')
  43. # except (requests.exceptions.ProxyError,requests.exceptions.SSLError):
  44. # print('!g!'+proxy)
  45. # bad_proxies.add(proxy)
  46. # continue
  47. # if render_attempts < 3:
  48. # render_attempts += 1
  49. # try:
  50. # r.html.render(timeout=10, sleep=10)
  51. # print('rendered')
  52. # except requests_html.MaxRetries:
  53. # print('!r!'+proxy)
  54. # bad_proxies.add(proxy)
  55. # continue
  56. # page = r.html.raw_html
  57. # break
  58. # if page:
  59. # return page,{proxy},bad_proxies
  60. # else:
  61. # raise Exception("All proxies used up")
  62. def get_prices(links,no_reuse = True,use_proxies = True):
  63. if use_proxies:
  64. else:
  65. def get_prices_old(links,no_reuse = True,use_proxies=True):
  66. if use_proxies:
  67. proxies = set(get_proxies() + get_proxies('https://www.us-proxy.org/'))
  68. ses = requests_html.HTMLSession()
  69. ret = []
  70. if use_proxies:
  71. prev = set()
  72. if use_proxies:
  73. bad_proxies_set= set()
  74. for link in links:
  75. if use_proxies:
  76. if no_reuse:
  77. working_set = proxies-prev
  78. # if use_proxies:
  79. else:
  80. working_set = proxies
  81. page,prev,bad_proxies = render_page(link,working_set,ses)
  82. else:
  83. r=ses.get(link)
  84. r.html.render()
  85. page = r.html.raw_html
  86. ret.append(price_finder(link,bs=BS(page,'lxml')))
  87. if use_proxies:
  88. bad_proxies_set |= bad_proxies
  89. proxies -= bad_proxies
  90. if use_proxies:
  91. print(bad_proxies_set)
  92. ses.close()
  93. return ret
  94. # if __name__ == "__main__":
  95. # ses = requests_html.HTMLSession()
  96. # proxies = get_proxies('https://www.us-proxy.org/')
  97. # page = render_page('https://www.banggood.com/Aomway-Commander-Goggles-V1-2D-3D-40CH-5_8G-FPV-Video-Headset-Support-HDMI-DVR-Headtracker-p-1107684.html?cur_warehouse=CN',
  98. # proxies,
  99. # ses)
  100. # import saveto
  101. # import random
  102. # ql = saveto.load('quad_links')
  103. # random.shuffle(ql)
  104. # products = get_prices(ql)
  105. # pass