You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

28 lines
912 B

import random
def load_words(filename):
with open(filename) as file:
text = file.read()
WORDS = set(map(str.lower,filter(bool,text.split('\n'))))
#TODO: cache wordfreq with hash of words
def generate_letter_frequency(word_list):
ret = {}
for word_num,word in enumerate(word_list):
letter_counts = {}
for i,letter in enumerate(word):
try:
ret[letter][0] += 1
except KeyError:
ret[letter] = [1,0]
in_word = letter_counts.get(letter,0) + 1
letter_counts[letter] = in_word
for letter,count in letter_counts.items():
word_portion = count/len(word)
avg = (ret[letter][1] * word_num) + word_portion
avg /= word_num + 1
ret[letter][1] = avg
return ret
if __name__ == "__main__":
l = generate_letter_frequency(WORDS)