generated from PlexSheep/rs-base
refactor(wordlist): filter out all words not of specified length
This commit is contained in:
parent
8f96f6662a
commit
47c2c9e892
|
@ -5,7 +5,7 @@ use serde_json;
|
|||
|
||||
use crate::error::WordlistError;
|
||||
|
||||
use super::{Word, WordList};
|
||||
use super::{Word, WordList, WordMapInner};
|
||||
|
||||
pub const RAW_WORDLIST_BUNDLED_ENGLISH: &str =
|
||||
include_str!("../../data/wordlists/en_US_3b1b_freq_map.json");
|
||||
|
@ -18,6 +18,7 @@ pub const RAW_WORDLIST_PATH_GERMAN_SMALL: &str = "../../data/wordlists/german_SU
|
|||
#[derive(Clone)]
|
||||
pub struct BuiltinWList {
|
||||
words: super::WordMap,
|
||||
name: String,
|
||||
}
|
||||
|
||||
impl BuiltinWList {
|
||||
|
@ -33,31 +34,48 @@ impl BuiltinWList {
|
|||
///
|
||||
/// Where the number is the frequency. Higher/Lower case is ignored.
|
||||
///
|
||||
/// Only words with the specified length will be included.
|
||||
///
|
||||
/// ## Errors
|
||||
///
|
||||
/// Will fail if the file path cannot be read or the format is wrong.
|
||||
pub fn load<P: AsRef<std::path::Path>>(wl_path: P) -> Result<Self, WordlistError> {
|
||||
pub fn load<P: AsRef<std::path::Path>>(wl_path: P, len: usize) -> Result<Self, WordlistError> {
|
||||
let path: &Path = wl_path.as_ref();
|
||||
let file = std::fs::File::open(path)?;
|
||||
|
||||
// don't load the whole string into memory
|
||||
let reader = std::io::BufReader::new(file);
|
||||
let words: super::WordMap = serde_json::from_reader(reader)?;
|
||||
let mut words: super::WordMap = serde_json::from_reader(reader)?;
|
||||
words.only_words_with_len(len);
|
||||
|
||||
Ok(Self { words })
|
||||
let name: String = if let Some(osstr) = path.file_name() {
|
||||
osstr.to_str().unwrap_or("(no name)").to_string()
|
||||
} else {
|
||||
"(no name)".to_string()
|
||||
};
|
||||
|
||||
Ok(Self { words, name })
|
||||
}
|
||||
|
||||
pub fn english() -> Self {
|
||||
let words: super::WordMap = serde_json::from_str(RAW_WORDLIST_BUNDLED_ENGLISH).unwrap();
|
||||
pub fn english(len: usize) -> Self {
|
||||
let mut words: super::WordMap = serde_json::from_str(RAW_WORDLIST_BUNDLED_ENGLISH).unwrap();
|
||||
words.only_words_with_len(len);
|
||||
|
||||
Self { words }
|
||||
Self {
|
||||
words,
|
||||
name: "(builtin english)".to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn german() -> Self {
|
||||
let words: super::WordMap =
|
||||
pub fn german(len: usize) -> Self {
|
||||
let mut words: super::WordMap =
|
||||
serde_json::from_str(RAW_WORDLIST_BUNDLED_GERMAN_SMALL).unwrap();
|
||||
words.only_words_with_len(len);
|
||||
|
||||
Self { words }
|
||||
Self {
|
||||
words,
|
||||
name: "(builtin german)".to_string(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -75,36 +93,27 @@ impl super::WordList for BuiltinWList {
|
|||
|
||||
impl Default for BuiltinWList {
|
||||
fn default() -> Self {
|
||||
Self::english()
|
||||
Self::english(5)
|
||||
}
|
||||
}
|
||||
|
||||
impl Debug for BuiltinWList {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write(
|
||||
f,
|
||||
format_args!(
|
||||
"BuiltinWList {{ \n\
|
||||
\tamount: {}, \n\
|
||||
\ttotal_freq: {}, \n\
|
||||
\tcommon: {}, \n\
|
||||
\tthreshold: {}, \n\
|
||||
\tfreq_range: {:?}, \n\
|
||||
\tover_threshold: {:#?}, \n\
|
||||
}}",
|
||||
self.amount(),
|
||||
self.total_freq(),
|
||||
self.wordmap().n_common(),
|
||||
self.wordmap().threshold(),
|
||||
self.wordmap().freq_range(),
|
||||
self.over_threashold()
|
||||
),
|
||||
)
|
||||
f.debug_struct("BuiltinWList")
|
||||
.field("name", &self.name)
|
||||
.field("words", &self.words)
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for BuiltinWList {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{self:#?}")
|
||||
writeln!(
|
||||
f,
|
||||
"{}:\nwords:\t{}\ntop 5:\t{:?}",
|
||||
self.name,
|
||||
self.len(),
|
||||
self.n_most_likely(5)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -11,7 +11,7 @@ pub mod builtin;
|
|||
pub mod word;
|
||||
use word::*;
|
||||
|
||||
use crate::error::{Error, WResult, WordlistError};
|
||||
use crate::error::{WResult, WordlistError};
|
||||
|
||||
pub type AnyWordlist = Box<dyn WordList>;
|
||||
|
||||
|
@ -35,9 +35,14 @@ pub trait WordList: Clone + std::fmt::Debug + Default + Sync + Display {
|
|||
(w.0.clone(), *w.1)
|
||||
}
|
||||
fn length_range(&self) -> impl RangeBounds<usize>;
|
||||
fn amount(&self) -> usize {
|
||||
#[must_use]
|
||||
fn len(&self) -> usize {
|
||||
self.solutions().len()
|
||||
}
|
||||
#[must_use]
|
||||
fn is_empty(&self) -> bool {
|
||||
self.solutions().len() == 0
|
||||
}
|
||||
fn wordmap(&self) -> &WordMap;
|
||||
fn total_freq(&self) -> Frequency {
|
||||
self.wordmap().values().map(|a| a.to_owned()).sum()
|
||||
|
|
|
@ -11,12 +11,13 @@ pub type Frequency = f64;
|
|||
pub type Word = String;
|
||||
pub type WordData = (Word, Frequency);
|
||||
pub type WordDataRef<'wl> = (&'wl Word, &'wl Frequency);
|
||||
pub(crate) type WordMapInner = HashMap<Word, Frequency>;
|
||||
|
||||
#[derive(Clone)]
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
pub struct WordMap {
|
||||
#[serde(flatten)]
|
||||
inner: HashMap<Word, Frequency>,
|
||||
inner: WordMapInner,
|
||||
}
|
||||
|
||||
impl Default for WordMap {
|
||||
|
@ -71,6 +72,9 @@ impl WordMap {
|
|||
pub fn inner(&self) -> &HashMap<Word, Frequency> {
|
||||
&self.inner
|
||||
}
|
||||
pub fn inner_mut(&mut self) -> &mut HashMap<Word, Frequency> {
|
||||
&mut self.inner
|
||||
}
|
||||
pub fn get<I: std::fmt::Display>(&self, word: I) -> Option<WordData> {
|
||||
self.inner
|
||||
.get(&word.to_string())
|
||||
|
@ -82,6 +86,14 @@ impl WordMap {
|
|||
abs.into_iter().map(|p| (p.0, p.1 as f64 / n)).collect();
|
||||
relative.into()
|
||||
}
|
||||
pub fn only_words_with_len(&mut self, len: usize) {
|
||||
self.inner = self
|
||||
.inner
|
||||
.iter()
|
||||
.filter(|a| a.0.len() == len)
|
||||
.map(|a| (a.0.to_owned(), *a.1))
|
||||
.collect::<WordMapInner>();
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for WordMap {
|
||||
|
|
Loading…
Reference in New Issue