|
|
|
@ -1,10 +1,11 @@ |
|
|
|
import codecs |
|
|
|
import hashlib |
|
|
|
import json |
|
|
|
import os |
|
|
|
import pickle |
|
|
|
import re |
|
|
|
from string import ascii_lowercase as alphabet |
|
|
|
import sys |
|
|
|
|
|
|
|
#32 or 64 bit platform? |
|
|
|
if sys.maxsize > 2**32: |
|
|
|
HASH_FUNC = hashlib.blake2b |
|
|
|
@ -29,23 +30,23 @@ def hash_wordlist(word_list,raw = False): |
|
|
|
if raw: |
|
|
|
return fhash |
|
|
|
illegal_hash = codecs.encode(fhash,'base64').decode() |
|
|
|
t_table = str.maketrans({'+':'-','/':'_'}) |
|
|
|
return illegal_hash.translate(t_table) |
|
|
|
replacements = {'+':'-','/':'_',None: ''} |
|
|
|
return re.sub(r'(\+|\/)|\n',lambda match: replacements[match.group(1)],illegal_hash) |
|
|
|
|
|
|
|
def load_freq_cache(word_list): |
|
|
|
fname = hash_wordlist(word_list) + '.pkl' |
|
|
|
fname = os.path.join('__hangchache__',fname) |
|
|
|
fname = os.path.join('__hangcache__',fname) |
|
|
|
if os.path.exists(fname): |
|
|
|
with open(fname,'rb') as file: |
|
|
|
return pickle.load(file) |
|
|
|
|
|
|
|
def save_freq_cache(word_list,freq): |
|
|
|
if not os.path.exists('__hangchache__'): |
|
|
|
os.mkdir('__hangchache__') |
|
|
|
if not os.path.exists('__hangcache__'): |
|
|
|
os.mkdir('__hangcache__') |
|
|
|
fname = hash_wordlist(word_list) + '.pkl' |
|
|
|
fname = os.path.join('__hangchache__',fname) |
|
|
|
fname = os.path.join('__hangcache__',fname) |
|
|
|
with open(fname,'wb') as file: |
|
|
|
pickle.dump(file,freq) |
|
|
|
pickle.dump(freq,file) |
|
|
|
|
|
|
|
def generate_letter_frequency(word_list): |
|
|
|
cached = load_freq_cache(word_list) |
|
|
|
@ -70,23 +71,43 @@ def generate_letter_frequency(word_list): |
|
|
|
save_freq_cache(word_list,ret) |
|
|
|
return ret |
|
|
|
PROMPT = "Enter word with '.' to represent missing letters: " |
|
|
|
NEG_PROMPT = 'Enter letters which are confirmed not to occur: ' |
|
|
|
ALPHABET = set(letter for letter in alphabet) |
|
|
|
|
|
|
|
def shorten(chars,max_length): |
|
|
|
rows = [''] * max_length |
|
|
|
for i,char in enumerate(chars): |
|
|
|
row_num = i%max_length |
|
|
|
addition = char + ' ' * 4 |
|
|
|
rows[row_num] += addition |
|
|
|
return '\n'.join(map(str.rstrip,rows)) |
|
|
|
|
|
|
|
def print_likely_chars(remaining_letters,let_freq): |
|
|
|
overall = shorten(sorted(remaining_letters,key = lambda letter: let_freq[letter][0],reverse = True),5) |
|
|
|
per_word = shorten(sorted(remaining_letters,key = lambda letter: let_freq[letter][1],reverse = True),5) |
|
|
|
print( 'Good candidates by overall frequency:\n' + overall ) |
|
|
|
print('Good candidates by per-word frequency:\n' + per_word ) |
|
|
|
|
|
|
|
negatives = set() |
|
|
|
def iterate(word_list,let_freq): |
|
|
|
entered_word = input(PROMPT) |
|
|
|
entered_word = entered_word.replace(' ') |
|
|
|
negative_letters = re.findall('[a-z]',input(NEG_PROMPT)) |
|
|
|
negatives.update(negative_letters) |
|
|
|
entered_word = entered_word.replace(' ','') |
|
|
|
entered_letters = set(letter for letter in entered_word.replace('.','')) |
|
|
|
remaining_letters = set(let_freq.keys()) - entered_letters |
|
|
|
regex = entered_word.replace('.','[A-Za-z]') |
|
|
|
remaining_letters = set(filter(lambda letter: letter in ALPHABET,let_freq.keys())) - entered_letters - negatives |
|
|
|
regex = entered_word.replace('.','[{}]'.format(''.join(remaining_letters))) + '$' |
|
|
|
remaining_possibilities = list(filter(lambda word: re.match(regex,word),word_list)) |
|
|
|
print('Matches found:\n' + '\n'.join(remaining_possibilities[i] for i in range(min(30,len(remaining_possibilities))))) |
|
|
|
print( 'Good candidates by overall frequency:\n' + '\n'.join(sorted(remaining_letters,key = lambda letter: let_freq[letter][0],reverse = True)) ) |
|
|
|
print('Good candidates by per-word frequency:\n' + '\n'.join(sorted(remaining_letters,key = lambda letter: let_freq[letter][1],reverse = True)) ) |
|
|
|
print('Matches found:\n' + '\n'.join(remaining_possibilities[i] for i in range(min(10,len(remaining_possibilities))))) |
|
|
|
print_likely_chars(remaining_letters,let_freq) |
|
|
|
return entered_word,remaining_possibilities |
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
words = load_words('words.txt') |
|
|
|
FREQ = generate_letter_frequency(words) |
|
|
|
print_likely_chars(ALPHABET,FREQ) |
|
|
|
while True: |
|
|
|
try: |
|
|
|
last,WORDS = iterate(words,FREQ) |
|
|
|
last,words = iterate(words,FREQ) |
|
|
|
except KeyboardInterrupt: |
|
|
|
break |