wordle-analyzer/scripts/discard_unimportant_words.py

import json

# Load the word frequency dictionary
with open('../data/wordlists/german_SUBTLEX-DE_full.json', 'r') as f:
    word_freqs = json.load(f)

# Set a frequency threshold (e.g., 0.001)
freq_threshold = 0.000001

# Set a maximum word length (e.g., 10)
max_word_length = 10

# Filter out words with low frequency and long length
filtered_word_freqs = {word: freq for word, freq in word_freqs.items() if freq >= freq_threshold and len(word) <= max_word_length}

# Save the filtered word frequencies to a new JSON file
with open('../data/wordlists/german_SUBTLEX-DE_small.json', 'w') as f:
    json.dump(filtered_word_freqs, f, indent=4)
feat(german): add german wordlist with data from SUBTLEX-DE 2024-07-25 16:31:42 +02:00			`import json`

			`# Load the word frequency dictionary`
			`with open('../data/wordlists/german_SUBTLEX-DE_full.json', 'r') as f:`
			`word_freqs = json.load(f)`

			`# Set a frequency threshold (e.g., 0.001)`
			`freq_threshold = 0.000001`

			`# Set a maximum word length (e.g., 10)`
			`max_word_length = 10`

			`# Filter out words with low frequency and long length`
			`filtered_word_freqs = {word: freq for word, freq in word_freqs.items() if freq >= freq_threshold and len(word) <= max_word_length}`

			`# Save the filtered word frequencies to a new JSON file`
			`with open('../data/wordlists/german_SUBTLEX-DE_small.json', 'w') as f:`
			`json.dump(filtered_word_freqs, f, indent=4)`