generated from PlexSheep/rs-base
refactor(wordlist): filter out all words not of specified length
This commit is contained in:
parent
8f96f6662a
commit
47c2c9e892
|
@ -5,7 +5,7 @@ use serde_json;
|
||||||
|
|
||||||
use crate::error::WordlistError;
|
use crate::error::WordlistError;
|
||||||
|
|
||||||
use super::{Word, WordList};
|
use super::{Word, WordList, WordMapInner};
|
||||||
|
|
||||||
pub const RAW_WORDLIST_BUNDLED_ENGLISH: &str =
|
pub const RAW_WORDLIST_BUNDLED_ENGLISH: &str =
|
||||||
include_str!("../../data/wordlists/en_US_3b1b_freq_map.json");
|
include_str!("../../data/wordlists/en_US_3b1b_freq_map.json");
|
||||||
|
@ -18,6 +18,7 @@ pub const RAW_WORDLIST_PATH_GERMAN_SMALL: &str = "../../data/wordlists/german_SU
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct BuiltinWList {
|
pub struct BuiltinWList {
|
||||||
words: super::WordMap,
|
words: super::WordMap,
|
||||||
|
name: String,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl BuiltinWList {
|
impl BuiltinWList {
|
||||||
|
@ -33,31 +34,48 @@ impl BuiltinWList {
|
||||||
///
|
///
|
||||||
/// Where the number is the frequency. Higher/Lower case is ignored.
|
/// Where the number is the frequency. Higher/Lower case is ignored.
|
||||||
///
|
///
|
||||||
|
/// Only words with the specified length will be included.
|
||||||
|
///
|
||||||
/// ## Errors
|
/// ## Errors
|
||||||
///
|
///
|
||||||
/// Will fail if the file path cannot be read or the format is wrong.
|
/// Will fail if the file path cannot be read or the format is wrong.
|
||||||
pub fn load<P: AsRef<std::path::Path>>(wl_path: P) -> Result<Self, WordlistError> {
|
pub fn load<P: AsRef<std::path::Path>>(wl_path: P, len: usize) -> Result<Self, WordlistError> {
|
||||||
let path: &Path = wl_path.as_ref();
|
let path: &Path = wl_path.as_ref();
|
||||||
let file = std::fs::File::open(path)?;
|
let file = std::fs::File::open(path)?;
|
||||||
|
|
||||||
// don't load the whole string into memory
|
// don't load the whole string into memory
|
||||||
let reader = std::io::BufReader::new(file);
|
let reader = std::io::BufReader::new(file);
|
||||||
let words: super::WordMap = serde_json::from_reader(reader)?;
|
let mut words: super::WordMap = serde_json::from_reader(reader)?;
|
||||||
|
words.only_words_with_len(len);
|
||||||
|
|
||||||
Ok(Self { words })
|
let name: String = if let Some(osstr) = path.file_name() {
|
||||||
|
osstr.to_str().unwrap_or("(no name)").to_string()
|
||||||
|
} else {
|
||||||
|
"(no name)".to_string()
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok(Self { words, name })
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn english() -> Self {
|
pub fn english(len: usize) -> Self {
|
||||||
let words: super::WordMap = serde_json::from_str(RAW_WORDLIST_BUNDLED_ENGLISH).unwrap();
|
let mut words: super::WordMap = serde_json::from_str(RAW_WORDLIST_BUNDLED_ENGLISH).unwrap();
|
||||||
|
words.only_words_with_len(len);
|
||||||
|
|
||||||
Self { words }
|
Self {
|
||||||
|
words,
|
||||||
|
name: "(builtin english)".to_string(),
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn german() -> Self {
|
pub fn german(len: usize) -> Self {
|
||||||
let words: super::WordMap =
|
let mut words: super::WordMap =
|
||||||
serde_json::from_str(RAW_WORDLIST_BUNDLED_GERMAN_SMALL).unwrap();
|
serde_json::from_str(RAW_WORDLIST_BUNDLED_GERMAN_SMALL).unwrap();
|
||||||
|
words.only_words_with_len(len);
|
||||||
|
|
||||||
Self { words }
|
Self {
|
||||||
|
words,
|
||||||
|
name: "(builtin german)".to_string(),
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -75,36 +93,27 @@ impl super::WordList for BuiltinWList {
|
||||||
|
|
||||||
impl Default for BuiltinWList {
|
impl Default for BuiltinWList {
|
||||||
fn default() -> Self {
|
fn default() -> Self {
|
||||||
Self::english()
|
Self::english(5)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Debug for BuiltinWList {
|
impl Debug for BuiltinWList {
|
||||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
write(
|
f.debug_struct("BuiltinWList")
|
||||||
f,
|
.field("name", &self.name)
|
||||||
format_args!(
|
.field("words", &self.words)
|
||||||
"BuiltinWList {{ \n\
|
.finish()
|
||||||
\tamount: {}, \n\
|
|
||||||
\ttotal_freq: {}, \n\
|
|
||||||
\tcommon: {}, \n\
|
|
||||||
\tthreshold: {}, \n\
|
|
||||||
\tfreq_range: {:?}, \n\
|
|
||||||
\tover_threshold: {:#?}, \n\
|
|
||||||
}}",
|
|
||||||
self.amount(),
|
|
||||||
self.total_freq(),
|
|
||||||
self.wordmap().n_common(),
|
|
||||||
self.wordmap().threshold(),
|
|
||||||
self.wordmap().freq_range(),
|
|
||||||
self.over_threashold()
|
|
||||||
),
|
|
||||||
)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Display for BuiltinWList {
|
impl Display for BuiltinWList {
|
||||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
write!(f, "{self:#?}")
|
writeln!(
|
||||||
|
f,
|
||||||
|
"{}:\nwords:\t{}\ntop 5:\t{:?}",
|
||||||
|
self.name,
|
||||||
|
self.len(),
|
||||||
|
self.n_most_likely(5)
|
||||||
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -11,7 +11,7 @@ pub mod builtin;
|
||||||
pub mod word;
|
pub mod word;
|
||||||
use word::*;
|
use word::*;
|
||||||
|
|
||||||
use crate::error::{Error, WResult, WordlistError};
|
use crate::error::{WResult, WordlistError};
|
||||||
|
|
||||||
pub type AnyWordlist = Box<dyn WordList>;
|
pub type AnyWordlist = Box<dyn WordList>;
|
||||||
|
|
||||||
|
@ -35,9 +35,14 @@ pub trait WordList: Clone + std::fmt::Debug + Default + Sync + Display {
|
||||||
(w.0.clone(), *w.1)
|
(w.0.clone(), *w.1)
|
||||||
}
|
}
|
||||||
fn length_range(&self) -> impl RangeBounds<usize>;
|
fn length_range(&self) -> impl RangeBounds<usize>;
|
||||||
fn amount(&self) -> usize {
|
#[must_use]
|
||||||
|
fn len(&self) -> usize {
|
||||||
self.solutions().len()
|
self.solutions().len()
|
||||||
}
|
}
|
||||||
|
#[must_use]
|
||||||
|
fn is_empty(&self) -> bool {
|
||||||
|
self.solutions().len() == 0
|
||||||
|
}
|
||||||
fn wordmap(&self) -> &WordMap;
|
fn wordmap(&self) -> &WordMap;
|
||||||
fn total_freq(&self) -> Frequency {
|
fn total_freq(&self) -> Frequency {
|
||||||
self.wordmap().values().map(|a| a.to_owned()).sum()
|
self.wordmap().values().map(|a| a.to_owned()).sum()
|
||||||
|
|
|
@ -11,12 +11,13 @@ pub type Frequency = f64;
|
||||||
pub type Word = String;
|
pub type Word = String;
|
||||||
pub type WordData = (Word, Frequency);
|
pub type WordData = (Word, Frequency);
|
||||||
pub type WordDataRef<'wl> = (&'wl Word, &'wl Frequency);
|
pub type WordDataRef<'wl> = (&'wl Word, &'wl Frequency);
|
||||||
|
pub(crate) type WordMapInner = HashMap<Word, Frequency>;
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||||
pub struct WordMap {
|
pub struct WordMap {
|
||||||
#[serde(flatten)]
|
#[serde(flatten)]
|
||||||
inner: HashMap<Word, Frequency>,
|
inner: WordMapInner,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Default for WordMap {
|
impl Default for WordMap {
|
||||||
|
@ -71,6 +72,9 @@ impl WordMap {
|
||||||
pub fn inner(&self) -> &HashMap<Word, Frequency> {
|
pub fn inner(&self) -> &HashMap<Word, Frequency> {
|
||||||
&self.inner
|
&self.inner
|
||||||
}
|
}
|
||||||
|
pub fn inner_mut(&mut self) -> &mut HashMap<Word, Frequency> {
|
||||||
|
&mut self.inner
|
||||||
|
}
|
||||||
pub fn get<I: std::fmt::Display>(&self, word: I) -> Option<WordData> {
|
pub fn get<I: std::fmt::Display>(&self, word: I) -> Option<WordData> {
|
||||||
self.inner
|
self.inner
|
||||||
.get(&word.to_string())
|
.get(&word.to_string())
|
||||||
|
@ -82,6 +86,14 @@ impl WordMap {
|
||||||
abs.into_iter().map(|p| (p.0, p.1 as f64 / n)).collect();
|
abs.into_iter().map(|p| (p.0, p.1 as f64 / n)).collect();
|
||||||
relative.into()
|
relative.into()
|
||||||
}
|
}
|
||||||
|
pub fn only_words_with_len(&mut self, len: usize) {
|
||||||
|
self.inner = self
|
||||||
|
.inner
|
||||||
|
.iter()
|
||||||
|
.filter(|a| a.0.len() == len)
|
||||||
|
.map(|a| (a.0.to_owned(), *a.1))
|
||||||
|
.collect::<WordMapInner>();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl std::fmt::Debug for WordMap {
|
impl std::fmt::Debug for WordMap {
|
||||||
|
|
Loading…
Reference in New Issue