import random def load_words(filename): with open(filename) as file: text = file.read() WORDS = set(map(str.lower,filter(bool,text.split('\n')))) #TODO: cache wordfreq with hash of words def generate_letter_frequency(word_list): ret = {} for word_num,word in enumerate(word_list): letter_counts = {} for i,letter in enumerate(word): try: ret[letter][0] += 1 except KeyError: ret[letter] = [1,0] in_word = letter_counts.get(letter,0) + 1 letter_counts[letter] = in_word for letter,count in letter_counts.items(): word_portion = count/len(word) avg = (ret[letter][1] * word_num) + word_portion avg /= word_num + 1 ret[letter][1] = avg return ret if __name__ == "__main__": l = generate_letter_frequency(WORDS)