From 3d72bad2539a04e2af5ba1226a5cf5c3f46975e5 Mon Sep 17 00:00:00 2001 From: Raphael Roberts Date: Sat, 16 Mar 2019 12:23:15 -0500 Subject: [PATCH] Pep8 compliant using url-safe base64 encoding --- hangman.py | 146 ++++++++++++++++++++++++++++++----------------------- 1 file changed, 83 insertions(+), 63 deletions(-) diff --git a/hangman.py b/hangman.py index 38c4f4f..681e1a3 100644 --- a/hangman.py +++ b/hangman.py @@ -1,21 +1,22 @@ -import codecs +import base64 import hashlib -import json import os import pickle import re from string import ascii_lowercase as alphabet import sys -#32 or 64 bit platform? +# 32 or 64 bit platform? if sys.maxsize > 2**32: HASH_FUNC = hashlib.blake2b else: HASH_FUNC = hashlib.blake2s + def load_words(filename): with open(filename) as file: text = file.read() - return set(map(str.lower,filter(bool,text.split('\n')))) + return set(map(str.lower, filter(bool, text.split('\n')))) + def _get_wordlist_hash(word_list_s): _hash = HASH_FUNC() @@ -24,101 +25,115 @@ def _get_wordlist_hash(word_list_s): _hash.update(word_bytes) return _hash.digest() -def hash_wordlist(word_list,raw = False): + +def hash_wordlist(word_list, raw=False): word_list = sorted(word_list) fhash = _get_wordlist_hash(word_list) if raw: return fhash - illegal_hash = codecs.encode(fhash,'base64').decode() - replacements = {'+':'-','/':'_',None: ''} - return re.sub(r'(\+|\/)|\n',lambda match: replacements[match.group(1)],illegal_hash) + return base64.urlsafe_b64decode(fhash) + def load_freq_cache(word_list): fname = hash_wordlist(word_list) + '.pkl' - fname = os.path.join('__hangcache__',fname) + fname = os.path.join('__hangcache__', fname) if os.path.exists(fname): - with open(fname,'rb') as file: + with open(fname, 'rb') as file: return pickle.load(file) -def save_freq_cache(word_list,freq): + +def save_freq_cache(word_list, freq): if not os.path.exists('__hangcache__'): os.mkdir('__hangcache__') fname = hash_wordlist(word_list) + '.pkl' - fname = os.path.join('__hangcache__',fname) - with open(fname,'wb') as file: - pickle.dump(freq,file) + fname = os.path.join('__hangcache__', fname) + with open(fname, 'wb') as file: + pickle.dump(freq, file) + def generate_letter_frequency(word_list): cached = load_freq_cache(word_list) if cached is not None: return cached ret = {} - for word_num,word in enumerate(word_list): + for word_num, word in enumerate(word_list): letter_counts = {} - for i,letter in enumerate(word): + for i, letter in enumerate(word): try: ret[letter][0] += 1 except KeyError: - ret[letter] = [1,0] - in_word = letter_counts.get(letter,0) + 1 + ret[letter] = [1, 0] + in_word = letter_counts.get(letter, 0) + 1 letter_counts[letter] = in_word - for letter,count in letter_counts.items(): + for letter, count in letter_counts.items(): word_portion = count/len(word) - avg = (ret[letter][1] * word_num) + word_portion + avg = (ret[letter][1] * word_num) + word_portion avg /= word_num + 1 ret[letter][1] = avg if cached is None: - save_freq_cache(word_list,ret) + save_freq_cache(word_list, ret) return ret -def filter_wordlist(input,remaining_letters,word_list): - regex = re.compile(input.replace('.','[{}]'.format(''.join(remaining_letters))) + '$') - matches = map(regex.match,word_list) - remaining_words = (group[1] for group in filter(lambda group: group[0],zip(matches,word_list))) + +def filter_wordlist(input, remaining_letters, word_list): + regex = re.compile(input.replace( + '.', '[{}]'.format(''.join(remaining_letters))) + '$') + matches = map(regex.match, word_list) + remaining_words = (group[1] for group in filter( + lambda group: group[0], zip(matches, word_list))) return list(remaining_words) -PROMPT = "Enter word with '.' to represent missing letters\n('/' to separate multiple words): " +PROMPT = """Enter word with '.' to represent missing letters +('/' to separate multiple words): """ NEG_PROMPT = 'Enter letters which are confirmed not to occur: ' ALPHABET = set(letter for letter in alphabet) -def shorten(chars,max_length): + +def shorten(chars, max_length): rows = [''] * max_length - for i,char in enumerate(chars): - row_num = i%max_length + for i, char in enumerate(chars): + row_num = i % max_length addition = char + ' ' * 4 rows[row_num] += addition - return '\n'.join(map(str.rstrip,rows)) + return '\n'.join(map(str.rstrip, rows)) + -def multi_word(l_words,n = 10): +def multi_word(l_words, n=10): # breakpoint() rows = [''] * (n+1) first = True - for count,words in enumerate(l_words): - offset = max(map(len,rows)) - working_set = words[:min(len(words),n)] - working_set.insert(0,str(count+1)) - for i,word in enumerate(working_set): + for count, words in enumerate(l_words): + offset = max(map(len, rows)) + working_set = words[:min(len(words), n)] + working_set.insert(0, str(count+1)) + for i, word in enumerate(working_set): prev_line = rows[i] if len(prev_line) < offset: prev_line += ' '*(offset-len(prev_line)) - rows[i] = prev_line+(' '*4 if not first else '' )+word + rows[i] = prev_line+(' '*4 if not first else '')+word first = False - return filter(bool,map(str.rstrip,rows)) + return filter(bool, map(str.rstrip, rows)) -def print_likely_chars(remaining_letters,let_freq): - overall = shorten(sorted(remaining_letters,key = lambda letter: let_freq[letter][0],reverse = True),5) - per_word = shorten(sorted(remaining_letters,key = lambda letter: let_freq[letter][1],reverse = True),5) - print( 'Good candidates by overall frequency:', overall, sep = '\n') - print('Good candidates by per-word frequency:', per_word, sep = '\n') +def print_likely_chars(remaining_letters, let_freq): + overall = shorten(sorted(remaining_letters, + key=lambda letter: let_freq[letter][0], + reverse=True), 5) + per_word = shorten(sorted(remaining_letters, + key=lambda letter: let_freq[letter][1], + reverse=True), 5) + print('Good candidates by overall frequency:', overall, sep='\n') + print('Good candidates by per-word frequency:', per_word, sep='\n') # ensures that new expression could come from previous entry -def check(prev,new,remaining_letters): + + +def check(prev, new, remaining_letters): prev = '/'.join(prev) new = '/'.join(new) if len(prev) == len(new): - good = set(re.findall('[a-z]',prev)) <= remaining_letters + good = set(re.findall('[a-z]', prev)) <= remaining_letters for i in range(len(prev)): p_cur = prev[i] n_cur = new[i] @@ -134,42 +149,47 @@ def check(prev,new,remaining_letters): else: return False + negatives = set() -def iterate(word_list,let_freq,prev_word = None): + +def iterate(word_list, let_freq, prev_word=None): if prev_word is None: - entered_words = re.sub(r'[^a-z\./]','',input(PROMPT)).split('/') + entered_words = re.sub(r'[^a-z\./]', '', input(PROMPT)).split('/') else: valid = False while not valid: - entered_words = re.sub(r'[^a-z\./]','',input(PROMPT)).split('/') - valid = check(prev_word,entered_words,ALPHABET-negatives) + entered_words = re.sub(r'[^a-z\./]', '', input(PROMPT)).split('/') + valid = check(prev_word, entered_words, ALPHABET-negatives) try: word_list[0][0] - except: + except Exception as e: + print("Exception:", e) word_list = [word_list] * len(entered_words) - negative_letters = re.findall('[a-z]',input(NEG_PROMPT)) + negative_letters = re.findall('[a-z]', input(NEG_PROMPT)) negatives.update(negative_letters) - output = [] entered_letters = set() for word in entered_words: - entered_letters.update(re.findall('[a-z]',word)) - remaining_letters = (ALPHABET & set(let_freq.keys())) - entered_letters - negatives - for i,word in enumerate(entered_words): - remaining_possibilities = filter_wordlist(word,remaining_letters,word_list[i]) + entered_letters.update(re.findall('[a-z]', word)) + remaining_letters = (ALPHABET & set(let_freq.keys()) + ) - entered_letters - negatives + for i, word in enumerate(entered_words): + remaining_possibilities = filter_wordlist( + word, remaining_letters, word_list[i]) word_list[i] = remaining_possibilities - print('Matches found:', '\n'.join(multi_word(word_list,10)),sep='\n') - print_likely_chars(remaining_letters,let_freq) - return entered_words,word_list + print('Matches found:', '\n'.join(multi_word(word_list, 10)), sep='\n') + print_likely_chars(remaining_letters, let_freq) + return entered_words, word_list + if __name__ == "__main__": - #src: https://github.com/dwyl/english-words + # src: https://github.com/dwyl/english-words words = load_words('words.txt') FREQ = generate_letter_frequency(words) - print_likely_chars(ALPHABET,FREQ) + print_likely_chars(ALPHABET, FREQ) last = None while True: try: - last,words = iterate(words,FREQ,last) + last, words = iterate(words, FREQ, last) except KeyboardInterrupt: - break \ No newline at end of file + break