|
|
@ -0,0 +1,32 @@ |
|
|
|
|
|
from restscrape.scraper import proxy_scraper |
|
|
|
|
|
import pyppeteer |
|
|
|
|
|
import requests |
|
|
|
|
|
US_PROXY_URL = 'https://www.us-proxy.org/' |
|
|
|
|
|
class proxy_iter: |
|
|
|
|
|
'''Like itertools.cycle but uses a set underneath the hood and adds a method to remove an item from iteration (if proxy doesn't work etc)''' |
|
|
|
|
|
def __init__(self,proxy_list): |
|
|
|
|
|
self.proxy_set = set(proxy_list) |
|
|
|
|
|
self.bad = set() |
|
|
|
|
|
self.iterator = iter(self) |
|
|
|
|
|
|
|
|
|
|
|
def __iter__(self): |
|
|
|
|
|
for proxy in self.proxy_set: |
|
|
|
|
|
if not proxy in self.bad: |
|
|
|
|
|
yield proxy |
|
|
|
|
|
self.proxy_set -= self.bad |
|
|
|
|
|
if len(self.proxy_set) > 0: |
|
|
|
|
|
yield from iter(self) |
|
|
|
|
|
|
|
|
|
|
|
def __next__(self): |
|
|
|
|
|
return next(self.iterator) |
|
|
|
|
|
|
|
|
|
|
|
def blacklist(self,proxy): |
|
|
|
|
|
self.bad.add(proxy) |
|
|
|
|
|
|
|
|
|
|
|
def __len__(self): |
|
|
|
|
|
return len(self.proxy_set - self.bad) |
|
|
|
|
|
def create_proxy_iter(url): |
|
|
|
|
|
'''Create a proxy_iter from proxy_webpage''' |
|
|
|
|
|
resp = requests.get(url) |
|
|
|
|
|
resp.raise_for_status() |
|
|
|
|
|
return proxy_iter('{ip address}:{port}'.format(**row) for row in proxy_scraper(resp.text)) |