freq as struct

This commit is contained in:
Christoph J. Scherr 2024-03-21 15:31:32 +01:00
parent 880826dd85
commit ca473824fe
Signed by: cscherrNT
GPG Key ID: 8E2B45BC51A27EA7
6 changed files with 119 additions and 14 deletions

3
data/wordlists/test.json Normal file
View File

@ -0,0 +1,3 @@
{
"word": 0.001
}

View File

@ -31,6 +31,8 @@ fn main() -> anyhow::Result<()> {
.precompute(cli.precompute) .precompute(cli.precompute)
.build()?; .build()?;
debug!("game: {:#?}", game);
Ok(()) Ok(())
} }

View File

@ -1,3 +1,4 @@
use crate::wlist::word::Word;
use crate::wlist::WordList; use crate::wlist::WordList;
#[derive(Debug, Clone, PartialEq, Eq)] #[derive(Debug, Clone, PartialEq, Eq)]
@ -9,7 +10,7 @@ where
precompute: bool, precompute: bool,
max_steps: usize, max_steps: usize,
step: usize, step: usize,
solution: String, solution: Word,
wordlist: WL, wordlist: WL,
} }
@ -31,16 +32,17 @@ impl<WL: WordList> Game<WL> {
/// ///
/// This function will return an error if . /// This function will return an error if .
pub(crate) fn build(length: usize, precompute: bool, max_steps: usize, wlist: WL) -> anyhow::Result<Self> { pub(crate) fn build(length: usize, precompute: bool, max_steps: usize, wlist: WL) -> anyhow::Result<Self> {
let _game = Game { let mut game = Game {
length, length,
precompute, precompute,
max_steps, max_steps,
step: 0, step: 0,
solution: String::default(), // we actually set this later solution: Word::default(), // we actually set this later
wordlist: wlist wordlist: wlist
}; };
todo!(); game.solution = game.wordlist.rand_solution().into();
Ok(game)
} }
} }

View File

@ -1,30 +1,48 @@
use std::fmt::{write, Debug};
use serde_json; use serde_json;
use super::Word; use super::{Word, WordList};
const RAW_WORDLIST_FILE: &str = include_str!("../../data/wordlists/en_US_3b1b_freq_map.json"); const RAW_WORDLIST_FILE: &str = include_str!("../../data/wordlists/en_US_3b1b_freq_map.json");
#[derive(Clone, Debug)] #[derive(Clone)]
pub struct BuiltinWList { pub struct BuiltinWList {
words: super::WordMap words: super::WordMap,
} }
impl super::WordList for BuiltinWList { impl super::WordList for BuiltinWList {
fn solutions(&self) -> Vec<&Word> { fn solutions(&self) -> Vec<&Word> {
// PERF: this can be made faster if we were to use parallel iterators or chunking // PERF: this can be made faster if we were to use parallel iterators or chunking
self.words.keys().collect() // TODO: Filter should be a bit more elegant
let threshold = self.total_freq() / 2;
self.wordmap().iter().filter(|i| i.1 > )
} }
fn length_range(&self) -> impl std::ops::RangeBounds<usize> { fn length_range(&self) -> impl std::ops::RangeBounds<usize> {
5..5 5..5
} }
fn wordmap(&self) -> &super::WordMap {
&self.words
}
} }
impl Default for BuiltinWList { impl Default for BuiltinWList {
fn default() -> Self { fn default() -> Self {
let words: super::WordMap = serde_json::from_str(RAW_WORDLIST_FILE).unwrap(); let words: super::WordMap = serde_json::from_str(RAW_WORDLIST_FILE).unwrap();
Self { Self { words }
words }
} }
impl Debug for BuiltinWList {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write(
f,
format_args!(
"BuiltinWList {{ amount: {}, total_freq: {} }}",
self.amount(),
self.total_freq()
),
)
} }
} }

View File

@ -22,4 +22,11 @@ pub trait WordList: Clone + std::fmt::Debug + Default {
self.solutions().iter().choose(&mut rng).unwrap() self.solutions().iter().choose(&mut rng).unwrap()
} }
fn length_range(&self) -> impl RangeBounds<usize>; fn length_range(&self) -> impl RangeBounds<usize>;
fn amount(&self) -> usize {
self.solutions().len()
}
fn wordmap(&self) -> &WordMap;
fn total_freq(&self) -> Frequency {
self.wordmap().values().map(|a| a.to_owned()).sum()
}
} }

View File

@ -1,25 +1,98 @@
use std::collections::HashMap; use std::collections::HashMap;
use std::fmt::{write, Display};
use std::iter::Sum;
#[cfg(feature = "serde")] #[cfg(feature = "serde")]
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
// NOTE: We might need a different implementation for more precision // NOTE: We might need a different implementation for more precision
// NOTE: This struct requires a custom Serialize and Deserialize implementation
#[derive(Clone, Debug, PartialEq, PartialOrd)] #[derive(Clone, Debug, PartialEq, PartialOrd)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct Frequency { pub struct Frequency {
inner: f64 inner: f64,
} }
// PERF: Hash for String is probably a bottleneck // PERF: Hash for String is probably a bottleneck
pub type Word = String; pub type Word = String;
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct WordMap { pub struct WordMap {
inner: HashMap<Word,Frequency> #[serde(flatten)]
inner: HashMap<Word, Frequency>,
} }
impl WordMap { impl WordMap {
pub fn keys(&self) -> std::collections::hash_map::Keys<'_, String, Frequency> { pub fn keys(&self) -> std::collections::hash_map::Keys<'_, String, Frequency> {
self.inner.keys() self.inner.keys()
} }
pub fn values(&self) -> std::collections::hash_map::Values<'_, String, Frequency> {
self.inner.values()
}
pub fn iter(&self) -> std::collections::hash_map::Iter<'_, String, Frequency> {
self.inner.iter()
}
}
// We need custom Serialize and Deserialize of Frequency, because they are only primitive types.
// Serde does not support serializing directly to and from primitives (such as floats)
#[cfg(feature = "serde")]
impl<'de> Deserialize<'de> for Frequency {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: serde::Deserializer<'de>,
{
struct FrequencyVisitor;
impl<'v> serde::de::Visitor<'v> for FrequencyVisitor {
type Value = Frequency;
fn expecting(&self, formatter: &mut core::fmt::Formatter) -> core::fmt::Result {
write!(formatter, "a floating-point number")
}
fn visit_f64<E>(self, v: f64) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
Ok(Frequency { inner: v })
}
}
deserializer.deserialize_any(FrequencyVisitor)
}
}
#[cfg(feature = "serde")]
impl Serialize for Frequency {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
serializer.serialize_f64(self.inner)
}
}
impl From<Frequency> for f64 {
fn from(value: Frequency) -> Self {
value.inner
}
}
impl From<f64> for Frequency {
fn from(value: f64) -> Self {
Frequency { inner: value }
}
}
impl Sum for Frequency {
fn sum<I: Iterator<Item = Self>>(iter: I) -> Self {
iter.fold(Self { inner: 0.0 }, |a, b| Self {
inner: a.inner + b.inner,
})
}
}
impl Display for Frequency {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write(f, format_args!("{}", self.inner))
}
} }