Browse Source

Pep8 compliant using url-safe base64 encoding

master
Raphael Roberts 7 years ago
parent
commit
3d72bad253
  1. 48
      hangman.py

48
hangman.py

@ -1,6 +1,5 @@
import codecs
import base64
import hashlib import hashlib
import json
import os import os
import pickle import pickle
import re import re
@ -12,11 +11,13 @@ if sys.maxsize > 2**32:
else: else:
HASH_FUNC = hashlib.blake2s HASH_FUNC = hashlib.blake2s
def load_words(filename): def load_words(filename):
with open(filename) as file: with open(filename) as file:
text = file.read() text = file.read()
return set(map(str.lower, filter(bool, text.split('\n')))) return set(map(str.lower, filter(bool, text.split('\n'))))
def _get_wordlist_hash(word_list_s): def _get_wordlist_hash(word_list_s):
_hash = HASH_FUNC() _hash = HASH_FUNC()
for word in sorted(word_list_s): for word in sorted(word_list_s):
@ -24,14 +25,14 @@ def _get_wordlist_hash(word_list_s):
_hash.update(word_bytes) _hash.update(word_bytes)
return _hash.digest() return _hash.digest()
def hash_wordlist(word_list, raw=False): def hash_wordlist(word_list, raw=False):
word_list = sorted(word_list) word_list = sorted(word_list)
fhash = _get_wordlist_hash(word_list) fhash = _get_wordlist_hash(word_list)
if raw: if raw:
return fhash return fhash
illegal_hash = codecs.encode(fhash,'base64').decode()
replacements = {'+':'-','/':'_',None: ''}
return re.sub(r'(\+|\/)|\n',lambda match: replacements[match.group(1)],illegal_hash)
return base64.urlsafe_b64decode(fhash)
def load_freq_cache(word_list): def load_freq_cache(word_list):
fname = hash_wordlist(word_list) + '.pkl' fname = hash_wordlist(word_list) + '.pkl'
@ -40,6 +41,7 @@ def load_freq_cache(word_list):
with open(fname, 'rb') as file: with open(fname, 'rb') as file:
return pickle.load(file) return pickle.load(file)
def save_freq_cache(word_list, freq): def save_freq_cache(word_list, freq):
if not os.path.exists('__hangcache__'): if not os.path.exists('__hangcache__'):
os.mkdir('__hangcache__') os.mkdir('__hangcache__')
@ -48,6 +50,7 @@ def save_freq_cache(word_list,freq):
with open(fname, 'wb') as file: with open(fname, 'wb') as file:
pickle.dump(freq, file) pickle.dump(freq, file)
def generate_letter_frequency(word_list): def generate_letter_frequency(word_list):
cached = load_freq_cache(word_list) cached = load_freq_cache(word_list)
if cached is not None: if cached is not None:
@ -71,17 +74,22 @@ def generate_letter_frequency(word_list):
save_freq_cache(word_list, ret) save_freq_cache(word_list, ret)
return ret return ret
def filter_wordlist(input, remaining_letters, word_list): def filter_wordlist(input, remaining_letters, word_list):
regex = re.compile(input.replace('.','[{}]'.format(''.join(remaining_letters))) + '$')
regex = re.compile(input.replace(
'.', '[{}]'.format(''.join(remaining_letters))) + '$')
matches = map(regex.match, word_list) matches = map(regex.match, word_list)
remaining_words = (group[1] for group in filter(lambda group: group[0],zip(matches,word_list)))
remaining_words = (group[1] for group in filter(
lambda group: group[0], zip(matches, word_list)))
return list(remaining_words) return list(remaining_words)
PROMPT = "Enter word with '.' to represent missing letters\n('/' to separate multiple words): "
PROMPT = """Enter word with '.' to represent missing letters
('/' to separate multiple words): """
NEG_PROMPT = 'Enter letters which are confirmed not to occur: ' NEG_PROMPT = 'Enter letters which are confirmed not to occur: '
ALPHABET = set(letter for letter in alphabet) ALPHABET = set(letter for letter in alphabet)
def shorten(chars, max_length): def shorten(chars, max_length):
rows = [''] * max_length rows = [''] * max_length
for i, char in enumerate(chars): for i, char in enumerate(chars):
@ -90,6 +98,7 @@ def shorten(chars,max_length):
rows[row_num] += addition rows[row_num] += addition
return '\n'.join(map(str.rstrip, rows)) return '\n'.join(map(str.rstrip, rows))
def multi_word(l_words, n=10): def multi_word(l_words, n=10):
# breakpoint() # breakpoint()
rows = [''] * (n+1) rows = [''] * (n+1)
@ -108,12 +117,18 @@ def multi_word(l_words,n = 10):
def print_likely_chars(remaining_letters, let_freq): def print_likely_chars(remaining_letters, let_freq):
overall = shorten(sorted(remaining_letters,key = lambda letter: let_freq[letter][0],reverse = True),5)
per_word = shorten(sorted(remaining_letters,key = lambda letter: let_freq[letter][1],reverse = True),5)
overall = shorten(sorted(remaining_letters,
key=lambda letter: let_freq[letter][0],
reverse=True), 5)
per_word = shorten(sorted(remaining_letters,
key=lambda letter: let_freq[letter][1],
reverse=True), 5)
print('Good candidates by overall frequency:', overall, sep='\n') print('Good candidates by overall frequency:', overall, sep='\n')
print('Good candidates by per-word frequency:', per_word, sep='\n') print('Good candidates by per-word frequency:', per_word, sep='\n')
# ensures that new expression could come from previous entry # ensures that new expression could come from previous entry
def check(prev, new, remaining_letters): def check(prev, new, remaining_letters):
prev = '/'.join(prev) prev = '/'.join(prev)
new = '/'.join(new) new = '/'.join(new)
@ -134,8 +149,10 @@ def check(prev,new,remaining_letters):
else: else:
return False return False
negatives = set() negatives = set()
def iterate(word_list, let_freq, prev_word=None): def iterate(word_list, let_freq, prev_word=None):
if prev_word is None: if prev_word is None:
entered_words = re.sub(r'[^a-z\./]', '', input(PROMPT)).split('/') entered_words = re.sub(r'[^a-z\./]', '', input(PROMPT)).split('/')
@ -146,22 +163,25 @@ def iterate(word_list,let_freq,prev_word = None):
valid = check(prev_word, entered_words, ALPHABET-negatives) valid = check(prev_word, entered_words, ALPHABET-negatives)
try: try:
word_list[0][0] word_list[0][0]
except:
except Exception as e:
print("Exception:", e)
word_list = [word_list] * len(entered_words) word_list = [word_list] * len(entered_words)
negative_letters = re.findall('[a-z]', input(NEG_PROMPT)) negative_letters = re.findall('[a-z]', input(NEG_PROMPT))
negatives.update(negative_letters) negatives.update(negative_letters)
output = []
entered_letters = set() entered_letters = set()
for word in entered_words: for word in entered_words:
entered_letters.update(re.findall('[a-z]', word)) entered_letters.update(re.findall('[a-z]', word))
remaining_letters = (ALPHABET & set(let_freq.keys())) - entered_letters - negatives
remaining_letters = (ALPHABET & set(let_freq.keys())
) - entered_letters - negatives
for i, word in enumerate(entered_words): for i, word in enumerate(entered_words):
remaining_possibilities = filter_wordlist(word,remaining_letters,word_list[i])
remaining_possibilities = filter_wordlist(
word, remaining_letters, word_list[i])
word_list[i] = remaining_possibilities word_list[i] = remaining_possibilities
print('Matches found:', '\n'.join(multi_word(word_list, 10)), sep='\n') print('Matches found:', '\n'.join(multi_word(word_list, 10)), sep='\n')
print_likely_chars(remaining_letters, let_freq) print_likely_chars(remaining_letters, let_freq)
return entered_words, word_list return entered_words, word_list
if __name__ == "__main__": if __name__ == "__main__":
# src: https://github.com/dwyl/english-words # src: https://github.com/dwyl/english-words
words = load_words('words.txt') words = load_words('words.txt')

Loading…
Cancel
Save