hangman-ftw/hangman.py


								import codecs

								import hashlib

								import json

								import os

								import pickle

								import re

								from string import ascii_lowercase as alphabet

								import sys

								#32 or 64 bit platform?

								if sys.maxsize > 2**32:

								    HASH_FUNC = hashlib.blake2b

								else:

								    HASH_FUNC = hashlib.blake2s


								def load_words(filename):

								    with open(filename) as file:

								        text = file.read()

								    return set(map(str.lower,filter(bool,text.split('\n'))))


								def _get_wordlist_hash(word_list_s):

								    _hash = HASH_FUNC()

								    for word in sorted(word_list_s):

								        word_bytes = word.encode()

								        _hash.update(word_bytes)

								    return _hash.digest()


								def hash_wordlist(word_list,raw = False):

								    word_list = sorted(word_list)

								    fhash = _get_wordlist_hash(word_list)

								    if raw:

								        return fhash

								    illegal_hash = codecs.encode(fhash,'base64').decode()

								    replacements = {'+':'-','/':'_',None: ''}

								    return re.sub(r'(\+|\/)|\n',lambda match: replacements[match.group(1)],illegal_hash)


								def load_freq_cache(word_list):

								    fname = hash_wordlist(word_list) + '.pkl'

								    fname = os.path.join('__hangcache__',fname)

								    if os.path.exists(fname):

								        with open(fname,'rb') as file:

								            return pickle.load(file)


								def save_freq_cache(word_list,freq):

								    if not os.path.exists('__hangcache__'):

								        os.mkdir('__hangcache__')

								    fname = hash_wordlist(word_list) + '.pkl'

								    fname = os.path.join('__hangcache__',fname)

								    with open(fname,'wb') as file:

								        pickle.dump(freq,file)


								def generate_letter_frequency(word_list):

								    cached = load_freq_cache(word_list)

								    if cached is not None:

								        return cached

								    ret = {}

								    for word_num,word in enumerate(word_list):

								        letter_counts = {}

								        for i,letter in enumerate(word):

								            try:

								                ret[letter][0] += 1

								            except KeyError:

								                ret[letter] = [1,0]

								            in_word = letter_counts.get(letter,0) + 1

								            letter_counts[letter] = in_word

								        for letter,count in letter_counts.items():

								            word_portion = count/len(word)

								            avg = (ret[letter][1] * word_num)  + word_portion

								            avg /= word_num + 1

								            ret[letter][1] = avg

								    if cached is None:

								        save_freq_cache(word_list,ret)

								    return ret

								PROMPT = "Enter word with '.' to represent missing letters: "

								NEG_PROMPT = 'Enter letters which are confirmed not to occur: '

								ALPHABET = set(letter for letter in alphabet)


								def shorten(chars,max_length):

								    rows = [''] * max_length

								    for i,char in enumerate(chars):

								        row_num = i%max_length

								        addition = char + ' ' * 4

								        rows[row_num] += addition

								    return '\n'.join(map(str.rstrip,rows))


								def print_likely_chars(remaining_letters,let_freq):

								    overall = shorten(sorted(remaining_letters,key = lambda letter: let_freq[letter][0],reverse = True),5)

								    per_word = shorten(sorted(remaining_letters,key = lambda letter: let_freq[letter][1],reverse = True),5)

								    print( 'Good candidates by overall frequency:\n' + overall )

								    print('Good candidates by per-word frequency:\n' + per_word )


								negatives = set()

								def iterate(word_list,let_freq):

								    entered_word = re.sub(r'[^a-z\.]','',input(PROMPT))

								    negative_letters = re.findall('[a-z]',input(NEG_PROMPT))

								    negatives.update(negative_letters)

								    entered_letters = set(letter for letter in entered_word.replace('.',''))

								    remaining_letters = set(filter(lambda letter: letter in ALPHABET,let_freq.keys())) - entered_letters - negatives

								    regex = entered_word.replace('.','[{}]'.format(''.join(remaining_letters))) + '$'

								    remaining_possibilities = list(filter(lambda word: re.match(regex,word),word_list))

								    print('Matches found:\n' + '\n'.join(remaining_possibilities[i] for i in range(min(10,len(remaining_possibilities)))))

								    print_likely_chars(remaining_letters,let_freq)

								    return entered_word,remaining_possibilities


								if __name__ == "__main__":

								    words = load_words('words.txt')

								    FREQ = generate_letter_frequency(words)

								    print_likely_chars(ALPHABET,FREQ)

								    while True:

								        try:

								            last,words = iterate(words,FREQ)

								        except KeyboardInterrupt:

								            break