i have an okay word chooser

This commit is contained in:
Christoph J. Scherr 2024-03-21 16:28:26 +01:00
parent ca473824fe
commit 4e3cf29608
Signed by: cscherrNT
GPG Key ID: 8E2B45BC51A27EA7
4 changed files with 87 additions and 86 deletions

View File

@ -1,7 +1,7 @@
use crate::wlist::word::Word; use crate::wlist::word::{Frequency, Solution, Word};
use crate::wlist::WordList; use crate::wlist::WordList;
#[derive(Debug, Clone, PartialEq, Eq)] #[derive(Debug, Clone, PartialEq)]
pub struct Game<WL> pub struct Game<WL>
where where
WL: WordList, WL: WordList,
@ -10,7 +10,7 @@ where
precompute: bool, precompute: bool,
max_steps: usize, max_steps: usize,
step: usize, step: usize,
solution: Word, solution: Solution,
wordlist: WL, wordlist: WL,
} }
@ -32,16 +32,16 @@ impl<WL: WordList> Game<WL> {
/// ///
/// This function will return an error if . /// This function will return an error if .
pub(crate) fn build(length: usize, precompute: bool, max_steps: usize, wlist: WL) -> anyhow::Result<Self> { pub(crate) fn build(length: usize, precompute: bool, max_steps: usize, wlist: WL) -> anyhow::Result<Self> {
let solution = wlist.rand_solution();
let mut game = Game { let mut game = Game {
length, length,
precompute, precompute,
max_steps, max_steps,
step: 0, step: 0,
solution: Word::default(), // we actually set this later solution,
wordlist: wlist wordlist: wlist
}; };
game.solution = game.wordlist.rand_solution().into();
Ok(game) Ok(game)
} }
} }

View File

@ -12,12 +12,6 @@ pub struct BuiltinWList {
} }
impl super::WordList for BuiltinWList { impl super::WordList for BuiltinWList {
fn solutions(&self) -> Vec<&Word> {
// PERF: this can be made faster if we were to use parallel iterators or chunking
// TODO: Filter should be a bit more elegant
let threshold = self.total_freq() / 2;
self.wordmap().iter().filter(|i| i.1 > )
}
fn length_range(&self) -> impl std::ops::RangeBounds<usize> { fn length_range(&self) -> impl std::ops::RangeBounds<usize> {
5..5 5..5
} }
@ -39,9 +33,20 @@ impl Debug for BuiltinWList {
write( write(
f, f,
format_args!( format_args!(
"BuiltinWList {{ amount: {}, total_freq: {} }}", "BuiltinWList {{ \n\
\tamount: {}, \n\
\ttotal_freq: {}, \n\
\tcommon: {}, \n\
\tthreshold: {}, \n\
\tfreq_range: {:?}, \n\
\tover_threshold: {:#?}, \n\
}}",
self.amount(), self.amount(),
self.total_freq() self.total_freq(),
self.wordmap().n_common(),
self.wordmap().threshold(),
self.wordmap().freq_range(),
self.over_threashold()
), ),
) )
} }

View File

@ -10,16 +10,15 @@ use word::*;
pub type AnyWordlist = Box<dyn WordList>; pub type AnyWordlist = Box<dyn WordList>;
pub trait WordList: Clone + std::fmt::Debug + Default { pub trait WordList: Clone + std::fmt::Debug + Default {
// NOTE: The possible answers should be determined with a wordlist that has the fn solutions(&self) -> ManySolutions {
// frequencies/probabilities of the words. We then use a sigmoid function to determine if a let wmap = self.wordmap();
// word can be a solution based on that value. Only words above some threshold of let threshold = wmap.threshold();
// commonness will be available as solutions then. Next, we choose one of the allowed words wmap.iter().filter(|i| *i.1 > threshold).collect()
// randomly. }
// NOTE: must never return nothing fn rand_solution(&self) -> Solution {
fn solutions(&self) -> Vec<&Word>;
fn rand_solution(&self) -> &Word {
let mut rng = rand::thread_rng(); let mut rng = rand::thread_rng();
self.solutions().iter().choose(&mut rng).unwrap() let sol = *self.solutions().iter().choose(&mut rng).unwrap();
(sol.0.to_owned(), sol.1.to_owned())
} }
fn length_range(&self) -> impl RangeBounds<usize>; fn length_range(&self) -> impl RangeBounds<usize>;
fn amount(&self) -> usize { fn amount(&self) -> usize {
@ -29,4 +28,14 @@ pub trait WordList: Clone + std::fmt::Debug + Default {
fn total_freq(&self) -> Frequency { fn total_freq(&self) -> Frequency {
self.wordmap().values().map(|a| a.to_owned()).sum() self.wordmap().values().map(|a| a.to_owned()).sum()
} }
fn over_threashold(&self) -> WordMap {
let wmap = self.wordmap();
let threshold = wmap.threshold();
let wpairs: Vec<(_, _)> = wmap.iter().filter(|i| *i.1 > threshold).collect();
let mut hm = HashMap::new();
for (k, v) in wpairs {
hm.insert(k.into(), *v);
}
WordMap::new(hm)
}
} }

View File

@ -1,21 +1,20 @@
use std::collections::HashMap; use std::collections::HashMap;
use std::fmt::{write, Display}; use std::fmt::{write, Display};
use std::iter::Sum; use std::iter::Sum;
use std::ops::RangeFull;
use libpt::log::debug;
#[cfg(feature = "serde")] #[cfg(feature = "serde")]
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
// NOTE: We might need a different implementation for more precision pub type Frequency = f64;
// NOTE: This struct requires a custom Serialize and Deserialize implementation
#[derive(Clone, Debug, PartialEq, PartialOrd)]
pub struct Frequency {
inner: f64,
}
// PERF: Hash for String is probably a bottleneck // PERF: Hash for String is probably a bottleneck
pub type Word = String; pub type Word = String;
pub type ManySolutions<'a> = Vec<(&'a Word, &'a Frequency)>;
pub type Solution = (Word, Frequency);
#[derive(Clone, Debug)] #[derive(Clone)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct WordMap { pub struct WordMap {
#[serde(flatten)] #[serde(flatten)]
@ -23,6 +22,9 @@ pub struct WordMap {
} }
impl WordMap { impl WordMap {
pub fn new(inner: HashMap<Word, Frequency>) -> Self {
Self { inner }
}
pub fn keys(&self) -> std::collections::hash_map::Keys<'_, String, Frequency> { pub fn keys(&self) -> std::collections::hash_map::Keys<'_, String, Frequency> {
self.inner.keys() self.inner.keys()
} }
@ -32,67 +34,52 @@ impl WordMap {
pub fn iter(&self) -> std::collections::hash_map::Iter<'_, String, Frequency> { pub fn iter(&self) -> std::collections::hash_map::Iter<'_, String, Frequency> {
self.inner.iter() self.inner.iter()
} }
} pub fn freq_range(&self) -> std::ops::Range<Frequency> {
return 0.1e-10..1e-6;
// We need custom Serialize and Deserialize of Frequency, because they are only primitive types. let lowest: Frequency = todo!();
// Serde does not support serializing directly to and from primitives (such as floats) let highest: Frequency = todo!();
#[cfg(feature = "serde")] lowest..highest
impl<'de> Deserialize<'de> for Frequency {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: serde::Deserializer<'de>,
{
struct FrequencyVisitor;
impl<'v> serde::de::Visitor<'v> for FrequencyVisitor {
type Value = Frequency;
fn expecting(&self, formatter: &mut core::fmt::Formatter) -> core::fmt::Result {
write!(formatter, "a floating-point number")
}
fn visit_f64<E>(self, v: f64) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
Ok(Frequency { inner: v })
}
}
deserializer.deserialize_any(FrequencyVisitor)
} }
} pub fn len(&self) -> usize {
#[cfg(feature = "serde")] self.inner.len()
impl Serialize for Frequency { }
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> #[must_use]
where pub fn is_empty(&self) -> bool {
S: serde::Serializer, self.len() == 0
{ }
serializer.serialize_f64(self.inner) pub fn n_common(&self) -> usize {
// TODO: calculate the amount of relatively common words
3000
}
pub fn threshold(&self) -> Frequency {
// HACK: I completely butchered the math here
// see https://github.com/3b1b/videos/blob/master/_2022/wordle/simulations.py
let l_under_sigmoid = 10_f64;
let len = self.len();
let mut c: f64 = l_under_sigmoid * (0.5 + self.n_common() as f64 / len as f64);
c *= 1e-7;
debug!(threshold=c);
c
}
pub fn inner(&self) -> &HashMap<Word, Frequency> {
&self.inner
} }
} }
impl From<Frequency> for f64 { impl std::fmt::Debug for WordMap {
fn from(value: Frequency) -> Self {
value.inner
}
}
impl From<f64> for Frequency {
fn from(value: f64) -> Self {
Frequency { inner: value }
}
}
impl Sum for Frequency {
fn sum<I: Iterator<Item = Self>>(iter: I) -> Self {
iter.fold(Self { inner: 0.0 }, |a, b| Self {
inner: a.inner + b.inner,
})
}
}
impl Display for Frequency {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write(f, format_args!("{}", self.inner)) write(
f,
format_args!(
"WordMap {{\n\
\t\tlen: {}\n\
\t\tfreq_range: {:?}\n\
\t\tcommon: {:?}\n\
\t}}",
self.len(),
self.freq_range(),
self.n_common()
),
)
} }
} }