wordle-analyzer/scripts/discard_unimportant_words.py

19 lines
642 B
Python

import json
# Load the word frequency dictionary
with open('../data/wordlists/german_SUBTLEX-DE_full.json', 'r') as f:
word_freqs = json.load(f)
# Set a frequency threshold (e.g., 0.001)
freq_threshold = 0.000001
# Set a maximum word length (e.g., 10)
max_word_length = 10
# Filter out words with low frequency and long length
filtered_word_freqs = {word: freq for word, freq in word_freqs.items() if freq >= freq_threshold and len(word) <= max_word_length}
# Save the filtered word frequencies to a new JSON file
with open('../data/wordlists/german_SUBTLEX-DE_small.json', 'w') as f:
json.dump(filtered_word_freqs, f, indent=4)