Pep8 compliant using url-safe base64 encoding

7 years ago · 3d72bad253
1 changed files with 83 additions and 63 deletions
--- a/hangman.py
+++ b/hangman.py
@ -1,21 +1,22 @@
-import codecs
+import base64
 import hashlib
-import json
 import os
 import pickle
 import re
 from string import ascii_lowercase as alphabet
 import sys
-#32 or 64 bit platform?
+# 32 or 64 bit platform?
 if sys.maxsize > 2**32:
    HASH_FUNC = hashlib.blake2b
 else:
    HASH_FUNC = hashlib.blake2s

+
 def load_words(filename):
    with open(filename) as file:
        text = file.read()
-    return set(map(str.lower,filter(bool,text.split('\n'))))
+    return set(map(str.lower, filter(bool, text.split('\n'))))
+

 def _get_wordlist_hash(word_list_s):
    _hash = HASH_FUNC()
@ -24,101 +25,115 @@ def _get_wordlist_hash(word_list_s):
        _hash.update(word_bytes)
    return _hash.digest()

-def hash_wordlist(word_list,raw = False):
+
+def hash_wordlist(word_list, raw=False):
    word_list = sorted(word_list)
    fhash = _get_wordlist_hash(word_list)
    if raw:
        return fhash
-    illegal_hash = codecs.encode(fhash,'base64').decode()
-    replacements = {'+':'-','/':'_',None: ''}
-    return re.sub(r'(\+|\/)|\n',lambda match: replacements[match.group(1)],illegal_hash)
+    return base64.urlsafe_b64decode(fhash)
+

 def load_freq_cache(word_list):
    fname = hash_wordlist(word_list) + '.pkl'
-    fname = os.path.join('__hangcache__',fname)
+    fname = os.path.join('__hangcache__', fname)
    if os.path.exists(fname):
-        with open(fname,'rb') as file:
+        with open(fname, 'rb') as file:
            return pickle.load(file)

-def save_freq_cache(word_list,freq):
+
+def save_freq_cache(word_list, freq):
    if not os.path.exists('__hangcache__'):
        os.mkdir('__hangcache__')
    fname = hash_wordlist(word_list) + '.pkl'
-    fname = os.path.join('__hangcache__',fname)
-    with open(fname,'wb') as file:
-        pickle.dump(freq,file)
+    fname = os.path.join('__hangcache__', fname)
+    with open(fname, 'wb') as file:
+        pickle.dump(freq, file)
+

 def generate_letter_frequency(word_list):
    cached = load_freq_cache(word_list)
    if cached is not None:
        return cached
    ret = {}
-    for word_num,word in enumerate(word_list):
+    for word_num, word in enumerate(word_list):
        letter_counts = {}
-        for i,letter in enumerate(word):
+        for i, letter in enumerate(word):
            try:
                ret[letter][0] += 1
            except KeyError:
-                ret[letter] = [1,0]
-            in_word = letter_counts.get(letter,0) + 1
+                ret[letter] = [1, 0]
+            in_word = letter_counts.get(letter, 0) + 1
            letter_counts[letter] = in_word
-        for letter,count in letter_counts.items():
+        for letter, count in letter_counts.items():
            word_portion = count/len(word)
-            avg = (ret[letter][1] * word_num)  + word_portion
+            avg = (ret[letter][1] * word_num) + word_portion
            avg /= word_num + 1
            ret[letter][1] = avg
    if cached is None:
-        save_freq_cache(word_list,ret)
+        save_freq_cache(word_list, ret)
    return ret

-def filter_wordlist(input,remaining_letters,word_list):
-    regex = re.compile(input.replace('.','[{}]'.format(''.join(remaining_letters))) + '$')
-    matches = map(regex.match,word_list)
-    remaining_words = (group[1] for group in filter(lambda group: group[0],zip(matches,word_list)))
+
+def filter_wordlist(input, remaining_letters, word_list):
+    regex = re.compile(input.replace(
+        '.', '[{}]'.format(''.join(remaining_letters))) + '$')
+    matches = map(regex.match, word_list)
+    remaining_words = (group[1] for group in filter(
+        lambda group: group[0], zip(matches, word_list)))
    return list(remaining_words)


-PROMPT = "Enter word with '.' to represent missing letters\n('/' to separate multiple words): "
+PROMPT = """Enter word with '.' to represent missing letters
+('/' to separate multiple words): """
 NEG_PROMPT = 'Enter letters which are confirmed not to occur: '
 ALPHABET = set(letter for letter in alphabet)

-def shorten(chars,max_length):
+
+def shorten(chars, max_length):
    rows = [''] * max_length
-    for i,char in enumerate(chars):
-        row_num = i%max_length
+    for i, char in enumerate(chars):
+        row_num = i % max_length
        addition = char + ' ' * 4
        rows[row_num] += addition
-    return '\n'.join(map(str.rstrip,rows))
+    return '\n'.join(map(str.rstrip, rows))
+

-def multi_word(l_words,n = 10):
+def multi_word(l_words, n=10):
    # breakpoint()
    rows = [''] * (n+1)
    first = True
-    for count,words in enumerate(l_words):
-        offset = max(map(len,rows))
-        working_set = words[:min(len(words),n)]
-        working_set.insert(0,str(count+1))
-        for i,word in enumerate(working_set):
+    for count, words in enumerate(l_words):
+        offset = max(map(len, rows))
+        working_set = words[:min(len(words), n)]
+        working_set.insert(0, str(count+1))
+        for i, word in enumerate(working_set):
            prev_line = rows[i]
            if len(prev_line) < offset:
                prev_line += ' '*(offset-len(prev_line))
-            rows[i] = prev_line+(' '*4 if not first else '' )+word
+            rows[i] = prev_line+(' '*4 if not first else '')+word
        first = False
-    return filter(bool,map(str.rstrip,rows))
+    return filter(bool, map(str.rstrip, rows))


-def print_likely_chars(remaining_letters,let_freq):
-    overall = shorten(sorted(remaining_letters,key = lambda letter: let_freq[letter][0],reverse = True),5)
-    per_word = shorten(sorted(remaining_letters,key = lambda letter: let_freq[letter][1],reverse = True),5)
-    print( 'Good candidates by overall frequency:', overall, sep = '\n')
-    print('Good candidates by per-word frequency:', per_word, sep = '\n')
+def print_likely_chars(remaining_letters, let_freq):
+    overall = shorten(sorted(remaining_letters,
+                             key=lambda letter: let_freq[letter][0],
+                             reverse=True), 5)
+    per_word = shorten(sorted(remaining_letters,
+                              key=lambda letter: let_freq[letter][1],
+                              reverse=True), 5)
+    print('Good candidates by overall frequency:', overall, sep='\n')
+    print('Good candidates by per-word frequency:', per_word, sep='\n')

 # ensures that new expression could come from previous entry
-def check(prev,new,remaining_letters):
+
+
+def check(prev, new, remaining_letters):
    prev = '/'.join(prev)
    new = '/'.join(new)
    if len(prev) == len(new):
-        good = set(re.findall('[a-z]',prev)) <= remaining_letters
+        good = set(re.findall('[a-z]', prev)) <= remaining_letters
        for i in range(len(prev)):
            p_cur = prev[i]
            n_cur = new[i]
@ -134,42 +149,47 @@ def check(prev,new,remaining_letters):
    else:
        return False

+
 negatives = set()

-def iterate(word_list,let_freq,prev_word = None):
+
+def iterate(word_list, let_freq, prev_word=None):
    if prev_word is None:
-        entered_words = re.sub(r'[^a-z\./]','',input(PROMPT)).split('/')
+        entered_words = re.sub(r'[^a-z\./]', '', input(PROMPT)).split('/')
    else:
        valid = False
        while not valid:
-            entered_words = re.sub(r'[^a-z\./]','',input(PROMPT)).split('/')
-            valid = check(prev_word,entered_words,ALPHABET-negatives)
+            entered_words = re.sub(r'[^a-z\./]', '', input(PROMPT)).split('/')
+            valid = check(prev_word, entered_words, ALPHABET-negatives)
    try:
        word_list[0][0]
-    except:
+    except Exception as e:
+        print("Exception:", e)
        word_list = [word_list] * len(entered_words)
-    negative_letters = re.findall('[a-z]',input(NEG_PROMPT))
+    negative_letters = re.findall('[a-z]', input(NEG_PROMPT))
    negatives.update(negative_letters)
-    output = []
    entered_letters = set()
    for word in entered_words:
-        entered_letters.update(re.findall('[a-z]',word))
-    remaining_letters = (ALPHABET & set(let_freq.keys())) - entered_letters - negatives
-    for i,word in enumerate(entered_words):
-        remaining_possibilities = filter_wordlist(word,remaining_letters,word_list[i])
+        entered_letters.update(re.findall('[a-z]', word))
+    remaining_letters = (ALPHABET & set(let_freq.keys())
+                         ) - entered_letters - negatives
+    for i, word in enumerate(entered_words):
+        remaining_possibilities = filter_wordlist(
+            word, remaining_letters, word_list[i])
        word_list[i] = remaining_possibilities
-    print('Matches found:', '\n'.join(multi_word(word_list,10)),sep='\n')
-    print_likely_chars(remaining_letters,let_freq)
-    return entered_words,word_list
+    print('Matches found:', '\n'.join(multi_word(word_list, 10)), sep='\n')
+    print_likely_chars(remaining_letters, let_freq)
+    return entered_words, word_list
+

 if __name__ == "__main__":
-    #src: https://github.com/dwyl/english-words
+    # src: https://github.com/dwyl/english-words
    words = load_words('words.txt')
    FREQ = generate_letter_frequency(words)
-    print_likely_chars(ALPHABET,FREQ)
+    print_likely_chars(ALPHABET, FREQ)
    last = None
    while True:
        try:
-            last,words = iterate(words,FREQ,last)
+            last, words = iterate(words, FREQ, last)
        except KeyboardInterrupt:
-            break
+            break