hangman-ftw/hangman.py


								import base64

								import hashlib

								import os

								import pickle

								import re

								from string import ascii_lowercase as alphabet

								import sys

								# 32 or 64 bit platform?

								if sys.maxsize > 2**32:

								    HASH_FUNC = hashlib.blake2b

								else:

								    HASH_FUNC = hashlib.blake2s


								def load_words(filename):

								    with open(filename) as file:

								        text = file.read()

								    return set(map(str.lower, filter(bool, text.split('\n'))))


								def _get_wordlist_hash(word_list_s):

								    _hash = HASH_FUNC()

								    for word in sorted(word_list_s):

								        word_bytes = word.encode()

								        _hash.update(word_bytes)

								    return _hash.digest()


								def hash_wordlist(word_list, raw=False):

								    word_list = sorted(word_list)

								    fhash = _get_wordlist_hash(word_list)

								    if raw:

								        return fhash

								    return base64.urlsafe_b64decode(fhash)


								def load_freq_cache(word_list):

								    fname = hash_wordlist(word_list) + '.pkl'

								    fname = os.path.join('__hangcache__', fname)

								    if os.path.exists(fname):

								        with open(fname, 'rb') as file:

								            return pickle.load(file)


								def save_freq_cache(word_list, freq):

								    if not os.path.exists('__hangcache__'):

								        os.mkdir('__hangcache__')

								    fname = hash_wordlist(word_list) + '.pkl'

								    fname = os.path.join('__hangcache__', fname)

								    with open(fname, 'wb') as file:

								        pickle.dump(freq, file)


								def generate_letter_frequency(word_list):

								    cached = load_freq_cache(word_list)

								    if cached is not None:

								        return cached

								    ret = {}

								    for word_num, word in enumerate(word_list):

								        letter_counts = {}

								        for i, letter in enumerate(word):

								            try:

								                ret[letter][0] += 1

								            except KeyError:

								                ret[letter] = [1, 0]

								            in_word = letter_counts.get(letter, 0) + 1

								            letter_counts[letter] = in_word

								        for letter, count in letter_counts.items():

								            word_portion = count/len(word)

								            avg = (ret[letter][1] * word_num) + word_portion

								            avg /= word_num + 1

								            ret[letter][1] = avg

								    if cached is None:

								        save_freq_cache(word_list, ret)

								    return ret


								def filter_wordlist(input, remaining_letters, word_list):

								    regex = re.compile(input.replace(

								        '.', '[{}]'.format(''.join(remaining_letters))) + '$')

								    matches = map(regex.match, word_list)

								    remaining_words = (group[1] for group in filter(

								        lambda group: group[0], zip(matches, word_list)))

								    return list(remaining_words)


								PROMPT = """Enter word with '.' to represent missing letters

								('/' to separate multiple words): """

								NEG_PROMPT = 'Enter letters which are confirmed not to occur: '

								ALPHABET = set(letter for letter in alphabet)


								def shorten(chars, max_length):

								    rows = [''] * max_length

								    for i, char in enumerate(chars):

								        row_num = i % max_length

								        addition = char + ' ' * 4

								        rows[row_num] += addition

								    return '\n'.join(map(str.rstrip, rows))


								def multi_word(l_words, n=10):

								    # breakpoint()

								    rows = [''] * (n+1)

								    first = True

								    for count, words in enumerate(l_words):

								        offset = max(map(len, rows))

								        working_set = words[:min(len(words), n)]

								        working_set.insert(0, str(count+1))

								        for i, word in enumerate(working_set):

								            prev_line = rows[i]

								            if len(prev_line) < offset:

								                prev_line += ' '*(offset-len(prev_line))

								            rows[i] = prev_line+(' '*4 if not first else '')+word

								        first = False

								    return filter(bool, map(str.rstrip, rows))


								def print_likely_chars(remaining_letters, let_freq):

								    overall = shorten(sorted(remaining_letters,

								                             key=lambda letter: let_freq[letter][0],

								                             reverse=True), 5)

								    per_word = shorten(sorted(remaining_letters,

								                              key=lambda letter: let_freq[letter][1],

								                              reverse=True), 5)

								    print('Good candidates by overall frequency:', overall, sep='\n')

								    print('Good candidates by per-word frequency:', per_word, sep='\n')


								# ensures that new expression could come from previous entry


								def check(prev, new, remaining_letters):

								    prev = '/'.join(prev)

								    new = '/'.join(new)

								    if len(prev) == len(new):

								        good = set(re.findall('[a-z]', prev)) <= remaining_letters

								        for i in range(len(prev)):

								            p_cur = prev[i]

								            n_cur = new[i]

								            if p_cur == '/':

								                good = p_cur == n_cur

								            elif p_cur == '.':

								                continue

								            else:

								                good == p_cur == n_cur

								            if not good:

								                return False

								        return good

								    else:

								        return False


								negatives = set()


								def iterate(word_list, let_freq, prev_word=None):

								    if prev_word is None:

								        entered_words = re.sub(r'[^a-z\./]', '', input(PROMPT)).split('/')

								    else:

								        valid = False

								        while not valid:

								            entered_words = re.sub(r'[^a-z\./]', '', input(PROMPT)).split('/')

								            valid = check(prev_word, entered_words, ALPHABET-negatives)

								    try:

								        word_list[0][0]

								    except Exception as e:

								        print("Exception:", e)

								        word_list = [word_list] * len(entered_words)

								    negative_letters = re.findall('[a-z]', input(NEG_PROMPT))

								    negatives.update(negative_letters)

								    entered_letters = set()

								    for word in entered_words:

								        entered_letters.update(re.findall('[a-z]', word))

								    remaining_letters = (ALPHABET & set(let_freq.keys())

								                         ) - entered_letters - negatives

								    for i, word in enumerate(entered_words):

								        remaining_possibilities = filter_wordlist(

								            word, remaining_letters, word_list[i])

								        word_list[i] = remaining_possibilities

								    print('Matches found:', '\n'.join(multi_word(word_list, 10)), sep='\n')

								    print_likely_chars(remaining_letters, let_freq)

								    return entered_words, word_list


								if __name__ == "__main__":

								    # src: https://github.com/dwyl/english-words

								    words = load_words('words.txt')

								    FREQ = generate_letter_frequency(words)

								    print_likely_chars(ALPHABET, FREQ)

								    last = None

								    while True:

								        try:

								            last, words = iterate(words, FREQ, last)

								        except KeyboardInterrupt:

								            break