You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

28 lines
912 B

7 years ago
  1. import random
  2. def load_words(filename):
  3. with open(filename) as file:
  4. text = file.read()
  5. WORDS = set(map(str.lower,filter(bool,text.split('\n'))))
  6. #TODO: cache wordfreq with hash of words
  7. def generate_letter_frequency(word_list):
  8. ret = {}
  9. for word_num,word in enumerate(word_list):
  10. letter_counts = {}
  11. for i,letter in enumerate(word):
  12. try:
  13. ret[letter][0] += 1
  14. except KeyError:
  15. ret[letter] = [1,0]
  16. in_word = letter_counts.get(letter,0) + 1
  17. letter_counts[letter] = in_word
  18. for letter,count in letter_counts.items():
  19. word_portion = count/len(word)
  20. avg = (ret[letter][1] * word_num) + word_portion
  21. avg /= word_num + 1
  22. ret[letter][1] = avg
  23. return ret
  24. if __name__ == "__main__":
  25. l = generate_letter_frequency(WORDS)