You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
|
|
import randomdef load_words(filename):with open(filename) as file: text = file.read()WORDS = set(map(str.lower,filter(bool,text.split('\n'))))#TODO: cache wordfreq with hash of words
def generate_letter_frequency(word_list): ret = {} for word_num,word in enumerate(word_list): letter_counts = {} for i,letter in enumerate(word): try: ret[letter][0] += 1 except KeyError: ret[letter] = [1,0] in_word = letter_counts.get(letter,0) + 1 letter_counts[letter] = in_word for letter,count in letter_counts.items(): word_portion = count/len(word) avg = (ret[letter][1] * word_num) + word_portion avg /= word_num + 1 ret[letter][1] = avg
return ret
if __name__ == "__main__": l = generate_letter_frequency(WORDS)
|