You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
92 lines
3.3 KiB
92 lines
3.3 KiB
import codecs
|
|
import hashlib
|
|
import json
|
|
import pickle
|
|
import re
|
|
import sys
|
|
|
|
#32 or 64 bit platform?
|
|
if sys.maxsize > 2**32:
|
|
HASH_FUNC = hashlib.blake2b
|
|
else:
|
|
HASH_FUNC = hashlib.blake2s
|
|
|
|
def load_words(filename):
|
|
with open(filename) as file:
|
|
text = file.read()
|
|
return set(map(str.lower,filter(bool,text.split('\n'))))
|
|
|
|
def _get_wordlist_hash(word_list_s):
|
|
_hash = HASH_FUNC()
|
|
for word in sorted(word_list_s):
|
|
word_bytes = word.encode()
|
|
_hash.update(word_bytes)
|
|
return _hash.digest()
|
|
|
|
def hash_wordlist(word_list,raw = False):
|
|
word_list = sorted(word_list)
|
|
fhash = _get_wordlist_hash(word_list)
|
|
if raw:
|
|
return fhash
|
|
illegal_hash = codecs.encode(fhash,'base64').decode()
|
|
t_table = str.maketrans({'+':'-','/':'_'})
|
|
return illegal_hash.translate(t_table)
|
|
|
|
def load_freq_cache(word_list):
|
|
fname = hash_wordlist(word_list) + '.pkl'
|
|
fname = os.path.join('__hangchache__',fname)
|
|
if os.path.exists(fname):
|
|
with open(fname,'rb') as file:
|
|
return pickle.load(file)
|
|
|
|
def save_freq_cache(word_list,freq):
|
|
if not os.path.exists('__hangchache__'):
|
|
os.mkdir('__hangchache__')
|
|
fname = hash_wordlist(word_list) + '.pkl'
|
|
fname = os.path.join('__hangchache__',fname)
|
|
with open(fname,'wb') as file:
|
|
pickle.dump(file,freq)
|
|
|
|
def generate_letter_frequency(word_list):
|
|
cached = load_freq_cache(word_list)
|
|
if cached is not None:
|
|
return cached
|
|
ret = {}
|
|
for word_num,word in enumerate(word_list):
|
|
letter_counts = {}
|
|
for i,letter in enumerate(word):
|
|
try:
|
|
ret[letter][0] += 1
|
|
except KeyError:
|
|
ret[letter] = [1,0]
|
|
in_word = letter_counts.get(letter,0) + 1
|
|
letter_counts[letter] = in_word
|
|
for letter,count in letter_counts.items():
|
|
word_portion = count/len(word)
|
|
avg = (ret[letter][1] * word_num) + word_portion
|
|
avg /= word_num + 1
|
|
ret[letter][1] = avg
|
|
if cached is None:
|
|
save_freq_cache(word_list,ret)
|
|
return ret
|
|
PROMPT = "Enter word with '.' to represent missing letters: "
|
|
def iterate(word_list,let_freq):
|
|
entered_word = input(PROMPT)
|
|
entered_word = entered_word.replace(' ')
|
|
entered_letters = set(letter for letter in entered_word.replace('.',''))
|
|
remaining_letters = set(let_freq.keys()) - entered_letters
|
|
regex = entered_word.replace('.','[A-Za-z]')
|
|
remaining_possibilities = list(filter(lambda word: re.match(regex,word),word_list))
|
|
print('Matches found:\n' + '\n'.join(remaining_possibilities[i] for i in range(min(30,len(remaining_possibilities)))))
|
|
print( 'Good candidates by overall frequency:\n' + '\n'.join(sorted(remaining_letters,key = lambda letter: let_freq[letter][0],reverse = True)) )
|
|
print('Good candidates by per-word frequency:\n' + '\n'.join(sorted(remaining_letters,key = lambda letter: let_freq[letter][1],reverse = True)) )
|
|
return entered_word,remaining_possibilities
|
|
|
|
if __name__ == "__main__":
|
|
words = load_words('words.txt')
|
|
FREQ = generate_letter_frequency(words)
|
|
while True:
|
|
try:
|
|
last,WORDS = iterate(words,FREQ)
|
|
except KeyboardInterrupt:
|
|
break
|