|
|
|
@ -6,8 +6,13 @@ import re |
|
|
|
import datetime |
|
|
|
|
|
|
|
user_agent = UserAgent().chrome |
|
|
|
re_words = lambda n: re.compile(r"( ?[^ ]+ ?)"+"{0,"+str(n-1)+"}"+r"[^ ]+") |
|
|
|
debug = None |
|
|
|
def get_words(string,n): |
|
|
|
words = re.finditer(r"(\b[^ \n]+\b)",string) |
|
|
|
word_list = list(match.group(0) for match in words) |
|
|
|
if len(word_list) > n: |
|
|
|
word_list = word_list[:n] |
|
|
|
return ' '.join(word_list) |
|
|
|
def get_page(url): |
|
|
|
page = None |
|
|
|
while not page: |
|
|
|
@ -34,28 +39,29 @@ class price_finder: |
|
|
|
"price":lambda page: page.find("meta",attrs = {"itemprop":"price"}).get("content") |
|
|
|
}, |
|
|
|
"www.gearbest.com":{ |
|
|
|
"name":lambda page:re.sub(' {2,}|\n','',page.find("div",attrs = {"class":"goodsIntro_titleWrap"}).find("h1").text), |
|
|
|
"name":lambda page:re.sub(" {2,}|\n","",page.find("div",attrs = {"class":"goodsIntro_titleWrap"}).find("h1").text), |
|
|
|
"price":lambda page: page.find("span",attrs={"class":"goodsIntro_price"}).text |
|
|
|
}, |
|
|
|
"hobbyking.com":{ |
|
|
|
"name":lambda page: page.find("h1",attrs={"class":"product-name"}).text, |
|
|
|
"price":lambda page: page.find("span",id = re.compile(r"product-price.*")).find("span",attrs={"class":"price"}).text |
|
|
|
}, |
|
|
|
'www.getfpv.com':{ |
|
|
|
'name': lambda page: re.sub(r'\\n|\n','', page.find("div",attrs={"class":"product-name"}).text), |
|
|
|
'price': lambda page: re.sub(r'\\n|\n','', page.find('span',attrs={'id':re.compile('product-price.*')}).text) |
|
|
|
"www.getfpv.com":{ |
|
|
|
"name": lambda page: re.sub(r"\\n|\n","", page.find("div",attrs={"class":"product-name"}).text), |
|
|
|
"price": lambda page: re.sub(r"\\n|\n","", page.find("span",attrs={"id":re.compile("product-price.*")}).text) |
|
|
|
} |
|
|
|
} |
|
|
|
def __init__(self,url,space_seperated_categories = 7,bs=None): |
|
|
|
self.url=url |
|
|
|
self.info_url = urllib.parse.urlparse(url) |
|
|
|
self.word_len = space_seperated_categories |
|
|
|
if self.info_url.netloc not in price_finder.page_funcs.keys(): |
|
|
|
raise NotImplementedError("Not implemented for {}".format(self.info_url.netloc)) |
|
|
|
if bs: |
|
|
|
self.bs= bs |
|
|
|
else: |
|
|
|
self.bs = get_BS(url) |
|
|
|
self.words = re_words(space_seperated_categories) |
|
|
|
# self.words = re_words(space_seperated_categories) |
|
|
|
self.time = datetime.datetime.today() |
|
|
|
self.info_product = self._get_product_info_() |
|
|
|
|
|
|
|
@ -65,9 +71,7 @@ class price_finder: |
|
|
|
# print(self.url) |
|
|
|
|
|
|
|
return { |
|
|
|
"product_name":self.words.match( |
|
|
|
funcs["name"](self.bs) |
|
|
|
).group(0), |
|
|
|
"product_name":get_words(funcs["name"](self.bs),self.word_len), |
|
|
|
"price":funcs["price"](self.bs).replace("$",""), |
|
|
|
} |
|
|
|
|