You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
28 lines
912 B
28 lines
912 B
import random
|
|
def load_words(filename):
|
|
with open(filename) as file:
|
|
text = file.read()
|
|
WORDS = set(map(str.lower,filter(bool,text.split('\n'))))
|
|
#TODO: cache wordfreq with hash of words
|
|
|
|
def generate_letter_frequency(word_list):
|
|
ret = {}
|
|
for word_num,word in enumerate(word_list):
|
|
letter_counts = {}
|
|
for i,letter in enumerate(word):
|
|
try:
|
|
ret[letter][0] += 1
|
|
except KeyError:
|
|
ret[letter] = [1,0]
|
|
in_word = letter_counts.get(letter,0) + 1
|
|
letter_counts[letter] = in_word
|
|
for letter,count in letter_counts.items():
|
|
word_portion = count/len(word)
|
|
avg = (ret[letter][1] * word_num) + word_portion
|
|
avg /= word_num + 1
|
|
ret[letter][1] = avg
|
|
|
|
return ret
|
|
|
|
if __name__ == "__main__":
|
|
l = generate_letter_frequency(WORDS)
|