generated from PlexSheep/rs-base
feat(german): add german wordlist with data from SUBTLEX-DE
cargo devel CI / cargo CI (push) Has been cancelled
Details
cargo devel CI / cargo CI (push) Has been cancelled
Details
This commit is contained in:
parent
82e0b49434
commit
d651756265
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,18 @@
|
||||||
|
import json
|
||||||
|
|
||||||
|
# Load the word frequency dictionary
|
||||||
|
with open('../data/wordlists/german_SUBTLEX-DE_full.json', 'r') as f:
|
||||||
|
word_freqs = json.load(f)
|
||||||
|
|
||||||
|
# Set a frequency threshold (e.g., 0.001)
|
||||||
|
freq_threshold = 0.000001
|
||||||
|
|
||||||
|
# Set a maximum word length (e.g., 10)
|
||||||
|
max_word_length = 10
|
||||||
|
|
||||||
|
# Filter out words with low frequency and long length
|
||||||
|
filtered_word_freqs = {word: freq for word, freq in word_freqs.items() if freq >= freq_threshold and len(word) <= max_word_length}
|
||||||
|
|
||||||
|
# Save the filtered word frequencies to a new JSON file
|
||||||
|
with open('../data/wordlists/german_SUBTLEX-DE_small.json', 'w') as f:
|
||||||
|
json.dump(filtered_word_freqs, f, indent=4)
|
Loading…
Reference in New Issue