|
|
|
@ -1,11 +1,59 @@ |
|
|
|
import random |
|
|
|
import codecs |
|
|
|
import functools |
|
|
|
import hashlib |
|
|
|
import json |
|
|
|
import pickle |
|
|
|
import re |
|
|
|
import sys |
|
|
|
|
|
|
|
#32 or 64 bit platform? |
|
|
|
if sys.maxsize > 2**32: |
|
|
|
HASH_FUNC = hashlib.blake2b |
|
|
|
else: |
|
|
|
HASH_FUNC = hashlib.blake2s |
|
|
|
|
|
|
|
def load_words(filename): |
|
|
|
with open(filename) as file: |
|
|
|
text = file.read() |
|
|
|
WORDS = set(map(str.lower,filter(bool,text.split('\n')))) |
|
|
|
#TODO: cache wordfreq with hash of words |
|
|
|
with open(filename) as file: |
|
|
|
text = file.read() |
|
|
|
return set(map(str.lower,filter(bool,text.split('\n')))) |
|
|
|
|
|
|
|
@functools.lru_cache(maxsize=None) |
|
|
|
def _get_wordlist_hash(word_list_s): |
|
|
|
hash = HASH_FUNC() |
|
|
|
for word in sorted(word_list_s): |
|
|
|
word_bytes = word.encdode() |
|
|
|
hash.update(word_bytes) |
|
|
|
return hash.digest() |
|
|
|
|
|
|
|
def hash_wordlist(word_list,raw = False): |
|
|
|
hash = HASH_FUNC() |
|
|
|
word_list = sorted(word_list) |
|
|
|
fhash = _get_wordlist_hash(word_list) |
|
|
|
if raw: |
|
|
|
return fhash |
|
|
|
illegal_hash = codecs.encdode(fhash,'base64').decode() |
|
|
|
t_table = str.maketrans({'+':'-','/':'_'}) |
|
|
|
return illegal_hash.translate(t_table) |
|
|
|
|
|
|
|
def load_freq_cache(word_list): |
|
|
|
fname = hash_wordlist(word_list) + '.pkl' |
|
|
|
fname = os.path.join('__hangchache__',fname) |
|
|
|
if os.path.exists(fname): |
|
|
|
with open(fname,'rb') as file: |
|
|
|
return pickle.load(file) |
|
|
|
|
|
|
|
def save_freq_cache(word_list,freq): |
|
|
|
if not os.path.exists('__hangchache__'): |
|
|
|
os.mkdir('__hangchache__') |
|
|
|
fname = hash_wordlist(word_list) + '.pkl' |
|
|
|
fname = os.path.join('__hangchache__',fname) |
|
|
|
with open(fname,'wb') as file: |
|
|
|
pickle.dump(file,freq) |
|
|
|
|
|
|
|
def generate_letter_frequency(word_list): |
|
|
|
cached = load_freq_cache(word_list) |
|
|
|
if cached is not None: |
|
|
|
return cached |
|
|
|
ret = {} |
|
|
|
for word_num,word in enumerate(word_list): |
|
|
|
letter_counts = {} |
|
|
|
@ -21,8 +69,27 @@ def generate_letter_frequency(word_list): |
|
|
|
avg = (ret[letter][1] * word_num) + word_portion |
|
|
|
avg /= word_num + 1 |
|
|
|
ret[letter][1] = avg |
|
|
|
|
|
|
|
if cached is None: |
|
|
|
save_freq_cache(word_list,ret) |
|
|
|
return ret |
|
|
|
PROMPT = "Enter word with '.' to represent missing letters: " |
|
|
|
def iterate(word_list,let_freq): |
|
|
|
entered_word = input(PROMPT) |
|
|
|
entered_word = entered_word.replace(' ') |
|
|
|
entered_letters = set(letter for letter in entered_word.replace('.','')) |
|
|
|
remaining_letters = set(let_freq.keys()) - entered_letters |
|
|
|
regex = entered_word.replace('.','[A-Za-z]') |
|
|
|
remaining_possibilities = list(filter(lambda word: re.match(regex,word),word_list)) |
|
|
|
print('Matches found:\n' + '\n'.join(remaining_possibilities[i] for i in range(min(30,len(remaining_possibilities))))) |
|
|
|
print( 'Good candidates by overall frequency:\n' + '\n'.join(sorted(remaining_letters,key = lambda letter: let_freq[letter][0],reverse = True)) ) |
|
|
|
print('Good candidates by per-word frequency:\n' + '\n'.join(sorted(remaining_letters,key = lambda letter: let_freq[letter][1],reverse = True)) ) |
|
|
|
return entered_word,remaining_possibilities |
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
l = generate_letter_frequency(WORDS) |
|
|
|
words = load_words('words.txt') |
|
|
|
FREQ = generate_letter_frequency(words) |
|
|
|
while True: |
|
|
|
try: |
|
|
|
last,WORDS = iterate(words,FREQ) |
|
|
|
except KeyboardInterrupt: |
|
|
|
break |