You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

91 lines
3.3 KiB

7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
  1. import codecs
  2. import hashlib
  3. import json
  4. import pickle
  5. import re
  6. import sys
  7. #32 or 64 bit platform?
  8. if sys.maxsize > 2**32:
  9. HASH_FUNC = hashlib.blake2b
  10. else:
  11. HASH_FUNC = hashlib.blake2s
  12. def load_words(filename):
  13. with open(filename) as file:
  14. text = file.read()
  15. return set(map(str.lower,filter(bool,text.split('\n'))))
  16. def _get_wordlist_hash(word_list_s):
  17. _hash = HASH_FUNC()
  18. for word in sorted(word_list_s):
  19. word_bytes = word.encode()
  20. _hash.update(word_bytes)
  21. return _hash.digest()
  22. def hash_wordlist(word_list,raw = False):
  23. word_list = sorted(word_list)
  24. fhash = _get_wordlist_hash(word_list)
  25. if raw:
  26. return fhash
  27. illegal_hash = codecs.encode(fhash,'base64').decode()
  28. t_table = str.maketrans({'+':'-','/':'_'})
  29. return illegal_hash.translate(t_table)
  30. def load_freq_cache(word_list):
  31. fname = hash_wordlist(word_list) + '.pkl'
  32. fname = os.path.join('__hangchache__',fname)
  33. if os.path.exists(fname):
  34. with open(fname,'rb') as file:
  35. return pickle.load(file)
  36. def save_freq_cache(word_list,freq):
  37. if not os.path.exists('__hangchache__'):
  38. os.mkdir('__hangchache__')
  39. fname = hash_wordlist(word_list) + '.pkl'
  40. fname = os.path.join('__hangchache__',fname)
  41. with open(fname,'wb') as file:
  42. pickle.dump(file,freq)
  43. def generate_letter_frequency(word_list):
  44. cached = load_freq_cache(word_list)
  45. if cached is not None:
  46. return cached
  47. ret = {}
  48. for word_num,word in enumerate(word_list):
  49. letter_counts = {}
  50. for i,letter in enumerate(word):
  51. try:
  52. ret[letter][0] += 1
  53. except KeyError:
  54. ret[letter] = [1,0]
  55. in_word = letter_counts.get(letter,0) + 1
  56. letter_counts[letter] = in_word
  57. for letter,count in letter_counts.items():
  58. word_portion = count/len(word)
  59. avg = (ret[letter][1] * word_num) + word_portion
  60. avg /= word_num + 1
  61. ret[letter][1] = avg
  62. if cached is None:
  63. save_freq_cache(word_list,ret)
  64. return ret
  65. PROMPT = "Enter word with '.' to represent missing letters: "
  66. def iterate(word_list,let_freq):
  67. entered_word = input(PROMPT)
  68. entered_word = entered_word.replace(' ')
  69. entered_letters = set(letter for letter in entered_word.replace('.',''))
  70. remaining_letters = set(let_freq.keys()) - entered_letters
  71. regex = entered_word.replace('.','[A-Za-z]')
  72. remaining_possibilities = list(filter(lambda word: re.match(regex,word),word_list))
  73. print('Matches found:\n' + '\n'.join(remaining_possibilities[i] for i in range(min(30,len(remaining_possibilities)))))
  74. print( 'Good candidates by overall frequency:\n' + '\n'.join(sorted(remaining_letters,key = lambda letter: let_freq[letter][0],reverse = True)) )
  75. print('Good candidates by per-word frequency:\n' + '\n'.join(sorted(remaining_letters,key = lambda letter: let_freq[letter][1],reverse = True)) )
  76. return entered_word,remaining_possibilities
  77. if __name__ == "__main__":
  78. words = load_words('words.txt')
  79. FREQ = generate_letter_frequency(words)
  80. while True:
  81. try:
  82. last,WORDS = iterate(words,FREQ)
  83. except KeyboardInterrupt:
  84. break