|
|
|
@ -1,6 +1,5 @@ |
|
|
|
import codecs |
|
|
|
import base64 |
|
|
|
import hashlib |
|
|
|
import json |
|
|
|
import os |
|
|
|
import pickle |
|
|
|
import re |
|
|
|
@ -12,11 +11,13 @@ if sys.maxsize > 2**32: |
|
|
|
else: |
|
|
|
HASH_FUNC = hashlib.blake2s |
|
|
|
|
|
|
|
|
|
|
|
def load_words(filename): |
|
|
|
with open(filename) as file: |
|
|
|
text = file.read() |
|
|
|
return set(map(str.lower, filter(bool, text.split('\n')))) |
|
|
|
|
|
|
|
|
|
|
|
def _get_wordlist_hash(word_list_s): |
|
|
|
_hash = HASH_FUNC() |
|
|
|
for word in sorted(word_list_s): |
|
|
|
@ -24,14 +25,14 @@ def _get_wordlist_hash(word_list_s): |
|
|
|
_hash.update(word_bytes) |
|
|
|
return _hash.digest() |
|
|
|
|
|
|
|
|
|
|
|
def hash_wordlist(word_list, raw=False): |
|
|
|
word_list = sorted(word_list) |
|
|
|
fhash = _get_wordlist_hash(word_list) |
|
|
|
if raw: |
|
|
|
return fhash |
|
|
|
illegal_hash = codecs.encode(fhash,'base64').decode() |
|
|
|
replacements = {'+':'-','/':'_',None: ''} |
|
|
|
return re.sub(r'(\+|\/)|\n',lambda match: replacements[match.group(1)],illegal_hash) |
|
|
|
return base64.urlsafe_b64decode(fhash) |
|
|
|
|
|
|
|
|
|
|
|
def load_freq_cache(word_list): |
|
|
|
fname = hash_wordlist(word_list) + '.pkl' |
|
|
|
@ -40,6 +41,7 @@ def load_freq_cache(word_list): |
|
|
|
with open(fname, 'rb') as file: |
|
|
|
return pickle.load(file) |
|
|
|
|
|
|
|
|
|
|
|
def save_freq_cache(word_list, freq): |
|
|
|
if not os.path.exists('__hangcache__'): |
|
|
|
os.mkdir('__hangcache__') |
|
|
|
@ -48,6 +50,7 @@ def save_freq_cache(word_list,freq): |
|
|
|
with open(fname, 'wb') as file: |
|
|
|
pickle.dump(freq, file) |
|
|
|
|
|
|
|
|
|
|
|
def generate_letter_frequency(word_list): |
|
|
|
cached = load_freq_cache(word_list) |
|
|
|
if cached is not None: |
|
|
|
@ -71,17 +74,22 @@ def generate_letter_frequency(word_list): |
|
|
|
save_freq_cache(word_list, ret) |
|
|
|
return ret |
|
|
|
|
|
|
|
|
|
|
|
def filter_wordlist(input, remaining_letters, word_list): |
|
|
|
regex = re.compile(input.replace('.','[{}]'.format(''.join(remaining_letters))) + '$') |
|
|
|
regex = re.compile(input.replace( |
|
|
|
'.', '[{}]'.format(''.join(remaining_letters))) + '$') |
|
|
|
matches = map(regex.match, word_list) |
|
|
|
remaining_words = (group[1] for group in filter(lambda group: group[0],zip(matches,word_list))) |
|
|
|
remaining_words = (group[1] for group in filter( |
|
|
|
lambda group: group[0], zip(matches, word_list))) |
|
|
|
return list(remaining_words) |
|
|
|
|
|
|
|
|
|
|
|
PROMPT = "Enter word with '.' to represent missing letters\n('/' to separate multiple words): " |
|
|
|
PROMPT = """Enter word with '.' to represent missing letters |
|
|
|
('/' to separate multiple words): """ |
|
|
|
NEG_PROMPT = 'Enter letters which are confirmed not to occur: ' |
|
|
|
ALPHABET = set(letter for letter in alphabet) |
|
|
|
|
|
|
|
|
|
|
|
def shorten(chars, max_length): |
|
|
|
rows = [''] * max_length |
|
|
|
for i, char in enumerate(chars): |
|
|
|
@ -90,6 +98,7 @@ def shorten(chars,max_length): |
|
|
|
rows[row_num] += addition |
|
|
|
return '\n'.join(map(str.rstrip, rows)) |
|
|
|
|
|
|
|
|
|
|
|
def multi_word(l_words, n=10): |
|
|
|
# breakpoint() |
|
|
|
rows = [''] * (n+1) |
|
|
|
@ -108,12 +117,18 @@ def multi_word(l_words,n = 10): |
|
|
|
|
|
|
|
|
|
|
|
def print_likely_chars(remaining_letters, let_freq): |
|
|
|
overall = shorten(sorted(remaining_letters,key = lambda letter: let_freq[letter][0],reverse = True),5) |
|
|
|
per_word = shorten(sorted(remaining_letters,key = lambda letter: let_freq[letter][1],reverse = True),5) |
|
|
|
overall = shorten(sorted(remaining_letters, |
|
|
|
key=lambda letter: let_freq[letter][0], |
|
|
|
reverse=True), 5) |
|
|
|
per_word = shorten(sorted(remaining_letters, |
|
|
|
key=lambda letter: let_freq[letter][1], |
|
|
|
reverse=True), 5) |
|
|
|
print('Good candidates by overall frequency:', overall, sep='\n') |
|
|
|
print('Good candidates by per-word frequency:', per_word, sep='\n') |
|
|
|
|
|
|
|
# ensures that new expression could come from previous entry |
|
|
|
|
|
|
|
|
|
|
|
def check(prev, new, remaining_letters): |
|
|
|
prev = '/'.join(prev) |
|
|
|
new = '/'.join(new) |
|
|
|
@ -134,8 +149,10 @@ def check(prev,new,remaining_letters): |
|
|
|
else: |
|
|
|
return False |
|
|
|
|
|
|
|
|
|
|
|
negatives = set() |
|
|
|
|
|
|
|
|
|
|
|
def iterate(word_list, let_freq, prev_word=None): |
|
|
|
if prev_word is None: |
|
|
|
entered_words = re.sub(r'[^a-z\./]', '', input(PROMPT)).split('/') |
|
|
|
@ -146,22 +163,25 @@ def iterate(word_list,let_freq,prev_word = None): |
|
|
|
valid = check(prev_word, entered_words, ALPHABET-negatives) |
|
|
|
try: |
|
|
|
word_list[0][0] |
|
|
|
except: |
|
|
|
except Exception as e: |
|
|
|
print("Exception:", e) |
|
|
|
word_list = [word_list] * len(entered_words) |
|
|
|
negative_letters = re.findall('[a-z]', input(NEG_PROMPT)) |
|
|
|
negatives.update(negative_letters) |
|
|
|
output = [] |
|
|
|
entered_letters = set() |
|
|
|
for word in entered_words: |
|
|
|
entered_letters.update(re.findall('[a-z]', word)) |
|
|
|
remaining_letters = (ALPHABET & set(let_freq.keys())) - entered_letters - negatives |
|
|
|
remaining_letters = (ALPHABET & set(let_freq.keys()) |
|
|
|
) - entered_letters - negatives |
|
|
|
for i, word in enumerate(entered_words): |
|
|
|
remaining_possibilities = filter_wordlist(word,remaining_letters,word_list[i]) |
|
|
|
remaining_possibilities = filter_wordlist( |
|
|
|
word, remaining_letters, word_list[i]) |
|
|
|
word_list[i] = remaining_possibilities |
|
|
|
print('Matches found:', '\n'.join(multi_word(word_list, 10)), sep='\n') |
|
|
|
print_likely_chars(remaining_letters, let_freq) |
|
|
|
return entered_words, word_list |
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
# src: https://github.com/dwyl/english-words |
|
|
|
words = load_words('words.txt') |
|
|
|
|