Browse Source

using selenium to get more accurate prices

master
Raphael Roberts 8 years ago
parent
commit
af16f66769
  1. 11
      batch_process.py
  2. 4
      price_finder.py

11
batch_process.py

@ -8,16 +8,21 @@ def get_prices(links):
results = [] results = []
for link in links: for link in links:
driver.get(link) driver.get(link)
try:
results.append( results.append(
price_finder( price_finder(
url = link,bs=BS(driver.page_source,'lxml') url = link,bs=BS(driver.page_source,'lxml')
) )
) )
except AttributeError:
results.append(price_finder(link))
driver.quit() driver.quit()
return results return results
except Exception as excpt: except Exception as excpt:
driver.quit() driver.quit()
raise excpt raise excpt
import saveto
links = saveto.load('quad_links')
products = get_prices(links)
if __name__ == "__main__":
import saveto
links = saveto.load('quad_links')
products = get_prices(links)

4
price_finder.py

@ -6,6 +6,7 @@ import datetime
user_agent = UserAgent().chrome user_agent = UserAgent().chrome
re_words = lambda n: re.compile(r"( ?[^ ]+ ?)"+"{0,"+str(n-1)+"}"+r"[^ ]+") re_words = lambda n: re.compile(r"( ?[^ ]+ ?)"+"{0,"+str(n-1)+"}"+r"[^ ]+")
debug = None
def get_page(url): def get_page(url):
page = None page = None
while not page: while not page:
@ -56,9 +57,12 @@ class price_finder:
def _get_product_info_(self): def _get_product_info_(self):
funcs = price_finder.page_funcs[self.info_url.netloc] funcs = price_finder.page_funcs[self.info_url.netloc]
print(self.url)
return { return {
"product_name":self.words.match( "product_name":self.words.match(
funcs["name"](self.bs) funcs["name"](self.bs)
).group(0), ).group(0),
"price":funcs["price"](self.bs).replace("$",""), "price":funcs["price"](self.bs).replace("$",""),
} }
Loading…
Cancel
Save