|
|
|
@ -3,6 +3,8 @@ import re |
|
|
|
|
|
|
|
import edlib |
|
|
|
|
|
|
|
words_re = re.compile(r"[a-zA-Z0-9]+") |
|
|
|
|
|
|
|
|
|
|
|
def editdistance(a, b): |
|
|
|
return edlib.align(a, b)["editDistance"] |
|
|
|
@ -26,21 +28,27 @@ class Search: |
|
|
|
class StopSearch(Search): |
|
|
|
def __init__(self, query): |
|
|
|
super().__init__(query) |
|
|
|
query = query.lower() |
|
|
|
parts = re.split(r" ?(?:(?<!\w)and(?!\w)|&) ?", query) |
|
|
|
self.query = " & ".join(parts) |
|
|
|
self.query_reversed = " & ".join(reversed(parts)) |
|
|
|
no_ampersand = self.raw_lower.replace("&", "and") |
|
|
|
self.parts = words_re.findall(no_ampersand) |
|
|
|
|
|
|
|
def __call__(self, stop): |
|
|
|
stop = stop.lower() |
|
|
|
paren = re.search(r"\((?P<data>[^\)]+)\)", stop) |
|
|
|
ret = [editdistance(self.query, stop), editdistance(self.query_reversed, stop)] |
|
|
|
if paren: |
|
|
|
paren = paren.group("data") |
|
|
|
ret.append(editdistance(self.query, paren)) |
|
|
|
if self.raw_lower in stop: |
|
|
|
ret = (item - 100 for item in ret) |
|
|
|
return min(ret) |
|
|
|
raw_lower = stop.lower() |
|
|
|
no_ampersand = raw_lower.replace("&", "and") |
|
|
|
parts = words_re.findall(no_ampersand) |
|
|
|
replaceable_parts = self.parts.copy() |
|
|
|
total_distance = 0 |
|
|
|
while len(parts) > 0 and len(replaceable_parts) > 0: |
|
|
|
part = parts.pop() |
|
|
|
lowest = max(map(len, replaceable_parts)) |
|
|
|
index = 0 |
|
|
|
for i in range(len(replaceable_parts)): |
|
|
|
distance = editdistance(replaceable_parts[i], part) |
|
|
|
if distance < lowest: |
|
|
|
index = i |
|
|
|
lowest = distance |
|
|
|
total_distance += lowest |
|
|
|
del replaceable_parts[index] |
|
|
|
return total_distance |
|
|
|
|
|
|
|
def __str__(self): |
|
|
|
return "{}|{}".format(self.query, self.query_reversed) |
|
|
|
|