You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

94 lines
3.4 KiB

7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
  1. import codecs
  2. import functools
  3. import hashlib
  4. import json
  5. import pickle
  6. import re
  7. import sys
  8. #32 or 64 bit platform?
  9. if sys.maxsize > 2**32:
  10. HASH_FUNC = hashlib.blake2b
  11. else:
  12. HASH_FUNC = hashlib.blake2s
  13. def load_words(filename):
  14. with open(filename) as file:
  15. text = file.read()
  16. return set(map(str.lower,filter(bool,text.split('\n'))))
  17. @functools.lru_cache(maxsize=None)
  18. def _get_wordlist_hash(word_list_s):
  19. hash = HASH_FUNC()
  20. for word in sorted(word_list_s):
  21. word_bytes = word.encdode()
  22. hash.update(word_bytes)
  23. return hash.digest()
  24. def hash_wordlist(word_list,raw = False):
  25. hash = HASH_FUNC()
  26. word_list = sorted(word_list)
  27. fhash = _get_wordlist_hash(word_list)
  28. if raw:
  29. return fhash
  30. illegal_hash = codecs.encdode(fhash,'base64').decode()
  31. t_table = str.maketrans({'+':'-','/':'_'})
  32. return illegal_hash.translate(t_table)
  33. def load_freq_cache(word_list):
  34. fname = hash_wordlist(word_list) + '.pkl'
  35. fname = os.path.join('__hangchache__',fname)
  36. if os.path.exists(fname):
  37. with open(fname,'rb') as file:
  38. return pickle.load(file)
  39. def save_freq_cache(word_list,freq):
  40. if not os.path.exists('__hangchache__'):
  41. os.mkdir('__hangchache__')
  42. fname = hash_wordlist(word_list) + '.pkl'
  43. fname = os.path.join('__hangchache__',fname)
  44. with open(fname,'wb') as file:
  45. pickle.dump(file,freq)
  46. def generate_letter_frequency(word_list):
  47. cached = load_freq_cache(word_list)
  48. if cached is not None:
  49. return cached
  50. ret = {}
  51. for word_num,word in enumerate(word_list):
  52. letter_counts = {}
  53. for i,letter in enumerate(word):
  54. try:
  55. ret[letter][0] += 1
  56. except KeyError:
  57. ret[letter] = [1,0]
  58. in_word = letter_counts.get(letter,0) + 1
  59. letter_counts[letter] = in_word
  60. for letter,count in letter_counts.items():
  61. word_portion = count/len(word)
  62. avg = (ret[letter][1] * word_num) + word_portion
  63. avg /= word_num + 1
  64. ret[letter][1] = avg
  65. if cached is None:
  66. save_freq_cache(word_list,ret)
  67. return ret
  68. PROMPT = "Enter word with '.' to represent missing letters: "
  69. def iterate(word_list,let_freq):
  70. entered_word = input(PROMPT)
  71. entered_word = entered_word.replace(' ')
  72. entered_letters = set(letter for letter in entered_word.replace('.',''))
  73. remaining_letters = set(let_freq.keys()) - entered_letters
  74. regex = entered_word.replace('.','[A-Za-z]')
  75. remaining_possibilities = list(filter(lambda word: re.match(regex,word),word_list))
  76. print('Matches found:\n' + '\n'.join(remaining_possibilities[i] for i in range(min(30,len(remaining_possibilities)))))
  77. print( 'Good candidates by overall frequency:\n' + '\n'.join(sorted(remaining_letters,key = lambda letter: let_freq[letter][0],reverse = True)) )
  78. print('Good candidates by per-word frequency:\n' + '\n'.join(sorted(remaining_letters,key = lambda letter: let_freq[letter][1],reverse = True)) )
  79. return entered_word,remaining_possibilities
  80. if __name__ == "__main__":
  81. words = load_words('words.txt')
  82. FREQ = generate_letter_frequency(words)
  83. while True:
  84. try:
  85. last,WORDS = iterate(words,FREQ)
  86. except KeyboardInterrupt:
  87. break