|
|
|
@ -70,7 +70,15 @@ def generate_letter_frequency(word_list): |
|
|
|
if cached is None: |
|
|
|
save_freq_cache(word_list,ret) |
|
|
|
return ret |
|
|
|
PROMPT = "Enter word with '.' to represent missing letters: " |
|
|
|
|
|
|
|
def filter_wordlist(input,remaining_letters,word_list): |
|
|
|
regex = re.compile(input.replace('.','[{}]'.format(''.join(remaining_letters))) + '$') |
|
|
|
matches = map(regex.match,word_list) |
|
|
|
remaining_words = (group[1] for group in filter(lambda group: group[0],zip(matches,word_list))) |
|
|
|
return list(remaining_words) |
|
|
|
|
|
|
|
|
|
|
|
PROMPT = "Enter word with '.' to represent missing letters ('/' to separate multiple words): " |
|
|
|
NEG_PROMPT = 'Enter letters which are confirmed not to occur: ' |
|
|
|
ALPHABET = set(letter for letter in alphabet) |
|
|
|
|
|
|
|
@ -82,32 +90,86 @@ def shorten(chars,max_length): |
|
|
|
rows[row_num] += addition |
|
|
|
return '\n'.join(map(str.rstrip,rows)) |
|
|
|
|
|
|
|
def multi_word(l_words,n = 10): |
|
|
|
# breakpoint() |
|
|
|
rows = [''] * (n+1) |
|
|
|
first = True |
|
|
|
for count,words in enumerate(l_words): |
|
|
|
offset = max(map(len,rows)) |
|
|
|
working_set = words[:min(len(words),n)] |
|
|
|
working_set.insert(0,str(count+1)) |
|
|
|
for i,word in enumerate(working_set): |
|
|
|
prev_line = rows[i] |
|
|
|
if len(prev_line) < offset: |
|
|
|
prev_line += ' '*(offset-len(prev_line)) |
|
|
|
rows[i] = prev_line+(' '*4 if not first else '' )+word |
|
|
|
first = False |
|
|
|
return filter(bool,map(str.rstrip,rows)) |
|
|
|
|
|
|
|
|
|
|
|
def print_likely_chars(remaining_letters,let_freq): |
|
|
|
overall = shorten(sorted(remaining_letters,key = lambda letter: let_freq[letter][0],reverse = True),5) |
|
|
|
per_word = shorten(sorted(remaining_letters,key = lambda letter: let_freq[letter][1],reverse = True),5) |
|
|
|
print( 'Good candidates by overall frequency:\n' + overall ) |
|
|
|
print('Good candidates by per-word frequency:\n' + per_word ) |
|
|
|
print( 'Good candidates by overall frequency:', overall, sep = '\n') |
|
|
|
print('Good candidates by per-word frequency:', per_word, sep = '\n') |
|
|
|
|
|
|
|
# ensures that new expression could come from previous entry |
|
|
|
def check(prev,new,remaining_letters): |
|
|
|
prev = '/'.join(prev) |
|
|
|
new = '/'.join(new) |
|
|
|
if len(prev) == len(new): |
|
|
|
good = set(re.findall('[a-z]',prev)) <= remaining_letters |
|
|
|
for i in range(len(prev)): |
|
|
|
p_cur = prev[i] |
|
|
|
n_cur = new[i] |
|
|
|
if p_cur == '/': |
|
|
|
good = p_cur == n_cur |
|
|
|
elif p_cur == '.': |
|
|
|
continue |
|
|
|
else: |
|
|
|
good == p_cur == n_cur |
|
|
|
if not good: |
|
|
|
return False |
|
|
|
return good |
|
|
|
else: |
|
|
|
return False |
|
|
|
|
|
|
|
negatives = set() |
|
|
|
def iterate(word_list,let_freq): |
|
|
|
entered_word = re.sub(r'[^a-z\.]','',input(PROMPT)) |
|
|
|
|
|
|
|
def iterate(word_list,let_freq,prev_word = None): |
|
|
|
if prev_word is None: |
|
|
|
entered_words = re.sub(r'[^a-z\./]','',input(PROMPT)).split('/') |
|
|
|
else: |
|
|
|
valid = False |
|
|
|
while not valid: |
|
|
|
entered_words = re.sub(r'[^a-z\./]','',input(PROMPT)).split('/') |
|
|
|
valid = check(prev_word,entered_words,ALPHABET-negatives) |
|
|
|
try: |
|
|
|
word_list[0][0] |
|
|
|
except: |
|
|
|
word_list = [word_list] * len(entered_words) |
|
|
|
negative_letters = re.findall('[a-z]',input(NEG_PROMPT)) |
|
|
|
negatives.update(negative_letters) |
|
|
|
entered_letters = set(letter for letter in entered_word.replace('.','')) |
|
|
|
remaining_letters = set(filter(lambda letter: letter in ALPHABET,let_freq.keys())) - entered_letters - negatives |
|
|
|
regex = entered_word.replace('.','[{}]'.format(''.join(remaining_letters))) + '$' |
|
|
|
remaining_possibilities = list(filter(lambda word: re.match(regex,word),word_list)) |
|
|
|
print('Matches found:\n' + '\n'.join(remaining_possibilities[i] for i in range(min(10,len(remaining_possibilities))))) |
|
|
|
output = [] |
|
|
|
entered_letters = set() |
|
|
|
for word in entered_words: |
|
|
|
entered_letters.update(re.findall('[a-z]',word)) |
|
|
|
remaining_letters = (ALPHABET & set(let_freq.keys())) - entered_letters - negatives |
|
|
|
for i,word in enumerate(entered_words): |
|
|
|
remaining_possibilities = filter_wordlist(word,remaining_letters,word_list[i]) |
|
|
|
word_list[i] = remaining_possibilities |
|
|
|
print('Matches found:', '\n'.join(multi_word(word_list,10)),sep='\n') |
|
|
|
print_likely_chars(remaining_letters,let_freq) |
|
|
|
return entered_word,remaining_possibilities |
|
|
|
return entered_words,word_list |
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
#src: https://github.com/dwyl/english-words |
|
|
|
words = load_words('words.txt') |
|
|
|
FREQ = generate_letter_frequency(words) |
|
|
|
print_likely_chars(ALPHABET,FREQ) |
|
|
|
last = None |
|
|
|
while True: |
|
|
|
try: |
|
|
|
last,words = iterate(words,FREQ) |
|
|
|
last,words = iterate(words,FREQ,last) |
|
|
|
except KeyboardInterrupt: |
|
|
|
break |