import codecs import hashlib import json import pickle import re import sys #32 or 64 bit platform? if sys.maxsize > 2**32: HASH_FUNC = hashlib.blake2b else: HASH_FUNC = hashlib.blake2s def load_words(filename): with open(filename) as file: text = file.read() return set(map(str.lower,filter(bool,text.split('\n')))) def _get_wordlist_hash(word_list_s): _hash = HASH_FUNC() for word in sorted(word_list_s): word_bytes = word.encode() _hash.update(word_bytes) return _hash.digest() def hash_wordlist(word_list,raw = False): word_list = sorted(word_list) fhash = _get_wordlist_hash(word_list) if raw: return fhash illegal_hash = codecs.encode(fhash,'base64').decode() t_table = str.maketrans({'+':'-','/':'_'}) return illegal_hash.translate(t_table) def load_freq_cache(word_list): fname = hash_wordlist(word_list) + '.pkl' fname = os.path.join('__hangchache__',fname) if os.path.exists(fname): with open(fname,'rb') as file: return pickle.load(file) def save_freq_cache(word_list,freq): if not os.path.exists('__hangchache__'): os.mkdir('__hangchache__') fname = hash_wordlist(word_list) + '.pkl' fname = os.path.join('__hangchache__',fname) with open(fname,'wb') as file: pickle.dump(file,freq) def generate_letter_frequency(word_list): cached = load_freq_cache(word_list) if cached is not None: return cached ret = {} for word_num,word in enumerate(word_list): letter_counts = {} for i,letter in enumerate(word): try: ret[letter][0] += 1 except KeyError: ret[letter] = [1,0] in_word = letter_counts.get(letter,0) + 1 letter_counts[letter] = in_word for letter,count in letter_counts.items(): word_portion = count/len(word) avg = (ret[letter][1] * word_num) + word_portion avg /= word_num + 1 ret[letter][1] = avg if cached is None: save_freq_cache(word_list,ret) return ret PROMPT = "Enter word with '.' to represent missing letters: " def iterate(word_list,let_freq): entered_word = input(PROMPT) entered_word = entered_word.replace(' ') entered_letters = set(letter for letter in entered_word.replace('.','')) remaining_letters = set(let_freq.keys()) - entered_letters regex = entered_word.replace('.','[A-Za-z]') remaining_possibilities = list(filter(lambda word: re.match(regex,word),word_list)) print('Matches found:\n' + '\n'.join(remaining_possibilities[i] for i in range(min(30,len(remaining_possibilities))))) print( 'Good candidates by overall frequency:\n' + '\n'.join(sorted(remaining_letters,key = lambda letter: let_freq[letter][0],reverse = True)) ) print('Good candidates by per-word frequency:\n' + '\n'.join(sorted(remaining_letters,key = lambda letter: let_freq[letter][1],reverse = True)) ) return entered_word,remaining_possibilities if __name__ == "__main__": words = load_words('words.txt') FREQ = generate_letter_frequency(words) while True: try: last,WORDS = iterate(words,FREQ) except KeyboardInterrupt: break