From 4e3cf296088d77656649964fd522c9467631a78e Mon Sep 17 00:00:00 2001 From: "Christoph J. Scherr" Date: Thu, 21 Mar 2024 16:28:26 +0100 Subject: [PATCH] i have an okay word chooser --- src/game/mod.rs | 10 ++-- src/wlist/builtin.rs | 21 +++++--- src/wlist/mod.rs | 27 ++++++---- src/wlist/word.rs | 115 +++++++++++++++++++------------------------ 4 files changed, 87 insertions(+), 86 deletions(-) diff --git a/src/game/mod.rs b/src/game/mod.rs index 58c4b07..665ec77 100644 --- a/src/game/mod.rs +++ b/src/game/mod.rs @@ -1,7 +1,7 @@ -use crate::wlist::word::Word; +use crate::wlist::word::{Frequency, Solution, Word}; use crate::wlist::WordList; -#[derive(Debug, Clone, PartialEq, Eq)] +#[derive(Debug, Clone, PartialEq)] pub struct Game where WL: WordList, @@ -10,7 +10,7 @@ where precompute: bool, max_steps: usize, step: usize, - solution: Word, + solution: Solution, wordlist: WL, } @@ -32,16 +32,16 @@ impl Game { /// /// This function will return an error if . pub(crate) fn build(length: usize, precompute: bool, max_steps: usize, wlist: WL) -> anyhow::Result { + let solution = wlist.rand_solution(); let mut game = Game { length, precompute, max_steps, step: 0, - solution: Word::default(), // we actually set this later + solution, wordlist: wlist }; - game.solution = game.wordlist.rand_solution().into(); Ok(game) } } diff --git a/src/wlist/builtin.rs b/src/wlist/builtin.rs index 8a24326..030f7fb 100644 --- a/src/wlist/builtin.rs +++ b/src/wlist/builtin.rs @@ -12,12 +12,6 @@ pub struct BuiltinWList { } impl super::WordList for BuiltinWList { - fn solutions(&self) -> Vec<&Word> { - // PERF: this can be made faster if we were to use parallel iterators or chunking - // TODO: Filter should be a bit more elegant - let threshold = self.total_freq() / 2; - self.wordmap().iter().filter(|i| i.1 > ) - } fn length_range(&self) -> impl std::ops::RangeBounds { 5..5 } @@ -39,9 +33,20 @@ impl Debug for BuiltinWList { write( f, format_args!( - "BuiltinWList {{ amount: {}, total_freq: {} }}", + "BuiltinWList {{ \n\ + \tamount: {}, \n\ + \ttotal_freq: {}, \n\ + \tcommon: {}, \n\ + \tthreshold: {}, \n\ + \tfreq_range: {:?}, \n\ + \tover_threshold: {:#?}, \n\ + }}", self.amount(), - self.total_freq() + self.total_freq(), + self.wordmap().n_common(), + self.wordmap().threshold(), + self.wordmap().freq_range(), + self.over_threashold() ), ) } diff --git a/src/wlist/mod.rs b/src/wlist/mod.rs index 86efc46..7ba493c 100644 --- a/src/wlist/mod.rs +++ b/src/wlist/mod.rs @@ -10,16 +10,15 @@ use word::*; pub type AnyWordlist = Box; pub trait WordList: Clone + std::fmt::Debug + Default { - // NOTE: The possible answers should be determined with a wordlist that has the - // frequencies/probabilities of the words. We then use a sigmoid function to determine if a - // word can be a solution based on that value. Only words above some threshold of - // commonness will be available as solutions then. Next, we choose one of the allowed words - // randomly. - // NOTE: must never return nothing - fn solutions(&self) -> Vec<&Word>; - fn rand_solution(&self) -> &Word { + fn solutions(&self) -> ManySolutions { + let wmap = self.wordmap(); + let threshold = wmap.threshold(); + wmap.iter().filter(|i| *i.1 > threshold).collect() + } + fn rand_solution(&self) -> Solution { let mut rng = rand::thread_rng(); - self.solutions().iter().choose(&mut rng).unwrap() + let sol = *self.solutions().iter().choose(&mut rng).unwrap(); + (sol.0.to_owned(), sol.1.to_owned()) } fn length_range(&self) -> impl RangeBounds; fn amount(&self) -> usize { @@ -29,4 +28,14 @@ pub trait WordList: Clone + std::fmt::Debug + Default { fn total_freq(&self) -> Frequency { self.wordmap().values().map(|a| a.to_owned()).sum() } + fn over_threashold(&self) -> WordMap { + let wmap = self.wordmap(); + let threshold = wmap.threshold(); + let wpairs: Vec<(_, _)> = wmap.iter().filter(|i| *i.1 > threshold).collect(); + let mut hm = HashMap::new(); + for (k, v) in wpairs { + hm.insert(k.into(), *v); + } + WordMap::new(hm) + } } diff --git a/src/wlist/word.rs b/src/wlist/word.rs index 6b65c9d..e366015 100644 --- a/src/wlist/word.rs +++ b/src/wlist/word.rs @@ -1,21 +1,20 @@ use std::collections::HashMap; use std::fmt::{write, Display}; use std::iter::Sum; +use std::ops::RangeFull; +use libpt::log::debug; #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; -// NOTE: We might need a different implementation for more precision -// NOTE: This struct requires a custom Serialize and Deserialize implementation -#[derive(Clone, Debug, PartialEq, PartialOrd)] -pub struct Frequency { - inner: f64, -} +pub type Frequency = f64; // PERF: Hash for String is probably a bottleneck pub type Word = String; +pub type ManySolutions<'a> = Vec<(&'a Word, &'a Frequency)>; +pub type Solution = (Word, Frequency); -#[derive(Clone, Debug)] +#[derive(Clone)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub struct WordMap { #[serde(flatten)] @@ -23,6 +22,9 @@ pub struct WordMap { } impl WordMap { + pub fn new(inner: HashMap) -> Self { + Self { inner } + } pub fn keys(&self) -> std::collections::hash_map::Keys<'_, String, Frequency> { self.inner.keys() } @@ -32,67 +34,52 @@ impl WordMap { pub fn iter(&self) -> std::collections::hash_map::Iter<'_, String, Frequency> { self.inner.iter() } -} - -// We need custom Serialize and Deserialize of Frequency, because they are only primitive types. -// Serde does not support serializing directly to and from primitives (such as floats) -#[cfg(feature = "serde")] -impl<'de> Deserialize<'de> for Frequency { - fn deserialize(deserializer: D) -> Result - where - D: serde::Deserializer<'de>, - { - struct FrequencyVisitor; - impl<'v> serde::de::Visitor<'v> for FrequencyVisitor { - type Value = Frequency; - - fn expecting(&self, formatter: &mut core::fmt::Formatter) -> core::fmt::Result { - write!(formatter, "a floating-point number") - } - - fn visit_f64(self, v: f64) -> Result - where - E: serde::de::Error, - { - Ok(Frequency { inner: v }) - } - } - - deserializer.deserialize_any(FrequencyVisitor) + pub fn freq_range(&self) -> std::ops::Range { + return 0.1e-10..1e-6; + let lowest: Frequency = todo!(); + let highest: Frequency = todo!(); + lowest..highest } -} -#[cfg(feature = "serde")] -impl Serialize for Frequency { - fn serialize(&self, serializer: S) -> Result - where - S: serde::Serializer, - { - serializer.serialize_f64(self.inner) + pub fn len(&self) -> usize { + self.inner.len() + } + #[must_use] + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + pub fn n_common(&self) -> usize { + // TODO: calculate the amount of relatively common words + 3000 + } + pub fn threshold(&self) -> Frequency { + // HACK: I completely butchered the math here + // see https://github.com/3b1b/videos/blob/master/_2022/wordle/simulations.py + let l_under_sigmoid = 10_f64; + let len = self.len(); + let mut c: f64 = l_under_sigmoid * (0.5 + self.n_common() as f64 / len as f64); + c *= 1e-7; + debug!(threshold=c); + c + } + pub fn inner(&self) -> &HashMap { + &self.inner } } -impl From for f64 { - fn from(value: Frequency) -> Self { - value.inner - } -} - -impl From for Frequency { - fn from(value: f64) -> Self { - Frequency { inner: value } - } -} - -impl Sum for Frequency { - fn sum>(iter: I) -> Self { - iter.fold(Self { inner: 0.0 }, |a, b| Self { - inner: a.inner + b.inner, - }) - } -} - -impl Display for Frequency { +impl std::fmt::Debug for WordMap { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write(f, format_args!("{}", self.inner)) + write( + f, + format_args!( + "WordMap {{\n\ + \t\tlen: {}\n\ + \t\tfreq_range: {:?}\n\ + \t\tcommon: {:?}\n\ + \t}}", + self.len(), + self.freq_range(), + self.n_common() + ), + ) } }