generated from PlexSheep/rs-base
i have an okay word chooser
This commit is contained in:
parent
ca473824fe
commit
4e3cf29608
|
@ -1,7 +1,7 @@
|
||||||
use crate::wlist::word::Word;
|
use crate::wlist::word::{Frequency, Solution, Word};
|
||||||
use crate::wlist::WordList;
|
use crate::wlist::WordList;
|
||||||
|
|
||||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
#[derive(Debug, Clone, PartialEq)]
|
||||||
pub struct Game<WL>
|
pub struct Game<WL>
|
||||||
where
|
where
|
||||||
WL: WordList,
|
WL: WordList,
|
||||||
|
@ -10,7 +10,7 @@ where
|
||||||
precompute: bool,
|
precompute: bool,
|
||||||
max_steps: usize,
|
max_steps: usize,
|
||||||
step: usize,
|
step: usize,
|
||||||
solution: Word,
|
solution: Solution,
|
||||||
wordlist: WL,
|
wordlist: WL,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -32,16 +32,16 @@ impl<WL: WordList> Game<WL> {
|
||||||
///
|
///
|
||||||
/// This function will return an error if .
|
/// This function will return an error if .
|
||||||
pub(crate) fn build(length: usize, precompute: bool, max_steps: usize, wlist: WL) -> anyhow::Result<Self> {
|
pub(crate) fn build(length: usize, precompute: bool, max_steps: usize, wlist: WL) -> anyhow::Result<Self> {
|
||||||
|
let solution = wlist.rand_solution();
|
||||||
let mut game = Game {
|
let mut game = Game {
|
||||||
length,
|
length,
|
||||||
precompute,
|
precompute,
|
||||||
max_steps,
|
max_steps,
|
||||||
step: 0,
|
step: 0,
|
||||||
solution: Word::default(), // we actually set this later
|
solution,
|
||||||
wordlist: wlist
|
wordlist: wlist
|
||||||
};
|
};
|
||||||
|
|
||||||
game.solution = game.wordlist.rand_solution().into();
|
|
||||||
Ok(game)
|
Ok(game)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -12,12 +12,6 @@ pub struct BuiltinWList {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl super::WordList for BuiltinWList {
|
impl super::WordList for BuiltinWList {
|
||||||
fn solutions(&self) -> Vec<&Word> {
|
|
||||||
// PERF: this can be made faster if we were to use parallel iterators or chunking
|
|
||||||
// TODO: Filter should be a bit more elegant
|
|
||||||
let threshold = self.total_freq() / 2;
|
|
||||||
self.wordmap().iter().filter(|i| i.1 > )
|
|
||||||
}
|
|
||||||
fn length_range(&self) -> impl std::ops::RangeBounds<usize> {
|
fn length_range(&self) -> impl std::ops::RangeBounds<usize> {
|
||||||
5..5
|
5..5
|
||||||
}
|
}
|
||||||
|
@ -39,9 +33,20 @@ impl Debug for BuiltinWList {
|
||||||
write(
|
write(
|
||||||
f,
|
f,
|
||||||
format_args!(
|
format_args!(
|
||||||
"BuiltinWList {{ amount: {}, total_freq: {} }}",
|
"BuiltinWList {{ \n\
|
||||||
|
\tamount: {}, \n\
|
||||||
|
\ttotal_freq: {}, \n\
|
||||||
|
\tcommon: {}, \n\
|
||||||
|
\tthreshold: {}, \n\
|
||||||
|
\tfreq_range: {:?}, \n\
|
||||||
|
\tover_threshold: {:#?}, \n\
|
||||||
|
}}",
|
||||||
self.amount(),
|
self.amount(),
|
||||||
self.total_freq()
|
self.total_freq(),
|
||||||
|
self.wordmap().n_common(),
|
||||||
|
self.wordmap().threshold(),
|
||||||
|
self.wordmap().freq_range(),
|
||||||
|
self.over_threashold()
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
|
@ -10,16 +10,15 @@ use word::*;
|
||||||
pub type AnyWordlist = Box<dyn WordList>;
|
pub type AnyWordlist = Box<dyn WordList>;
|
||||||
|
|
||||||
pub trait WordList: Clone + std::fmt::Debug + Default {
|
pub trait WordList: Clone + std::fmt::Debug + Default {
|
||||||
// NOTE: The possible answers should be determined with a wordlist that has the
|
fn solutions(&self) -> ManySolutions {
|
||||||
// frequencies/probabilities of the words. We then use a sigmoid function to determine if a
|
let wmap = self.wordmap();
|
||||||
// word can be a solution based on that value. Only words above some threshold of
|
let threshold = wmap.threshold();
|
||||||
// commonness will be available as solutions then. Next, we choose one of the allowed words
|
wmap.iter().filter(|i| *i.1 > threshold).collect()
|
||||||
// randomly.
|
}
|
||||||
// NOTE: must never return nothing
|
fn rand_solution(&self) -> Solution {
|
||||||
fn solutions(&self) -> Vec<&Word>;
|
|
||||||
fn rand_solution(&self) -> &Word {
|
|
||||||
let mut rng = rand::thread_rng();
|
let mut rng = rand::thread_rng();
|
||||||
self.solutions().iter().choose(&mut rng).unwrap()
|
let sol = *self.solutions().iter().choose(&mut rng).unwrap();
|
||||||
|
(sol.0.to_owned(), sol.1.to_owned())
|
||||||
}
|
}
|
||||||
fn length_range(&self) -> impl RangeBounds<usize>;
|
fn length_range(&self) -> impl RangeBounds<usize>;
|
||||||
fn amount(&self) -> usize {
|
fn amount(&self) -> usize {
|
||||||
|
@ -29,4 +28,14 @@ pub trait WordList: Clone + std::fmt::Debug + Default {
|
||||||
fn total_freq(&self) -> Frequency {
|
fn total_freq(&self) -> Frequency {
|
||||||
self.wordmap().values().map(|a| a.to_owned()).sum()
|
self.wordmap().values().map(|a| a.to_owned()).sum()
|
||||||
}
|
}
|
||||||
|
fn over_threashold(&self) -> WordMap {
|
||||||
|
let wmap = self.wordmap();
|
||||||
|
let threshold = wmap.threshold();
|
||||||
|
let wpairs: Vec<(_, _)> = wmap.iter().filter(|i| *i.1 > threshold).collect();
|
||||||
|
let mut hm = HashMap::new();
|
||||||
|
for (k, v) in wpairs {
|
||||||
|
hm.insert(k.into(), *v);
|
||||||
|
}
|
||||||
|
WordMap::new(hm)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,21 +1,20 @@
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::fmt::{write, Display};
|
use std::fmt::{write, Display};
|
||||||
use std::iter::Sum;
|
use std::iter::Sum;
|
||||||
|
use std::ops::RangeFull;
|
||||||
|
|
||||||
|
use libpt::log::debug;
|
||||||
#[cfg(feature = "serde")]
|
#[cfg(feature = "serde")]
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
// NOTE: We might need a different implementation for more precision
|
pub type Frequency = f64;
|
||||||
// NOTE: This struct requires a custom Serialize and Deserialize implementation
|
|
||||||
#[derive(Clone, Debug, PartialEq, PartialOrd)]
|
|
||||||
pub struct Frequency {
|
|
||||||
inner: f64,
|
|
||||||
}
|
|
||||||
|
|
||||||
// PERF: Hash for String is probably a bottleneck
|
// PERF: Hash for String is probably a bottleneck
|
||||||
pub type Word = String;
|
pub type Word = String;
|
||||||
|
pub type ManySolutions<'a> = Vec<(&'a Word, &'a Frequency)>;
|
||||||
|
pub type Solution = (Word, Frequency);
|
||||||
|
|
||||||
#[derive(Clone, Debug)]
|
#[derive(Clone)]
|
||||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||||
pub struct WordMap {
|
pub struct WordMap {
|
||||||
#[serde(flatten)]
|
#[serde(flatten)]
|
||||||
|
@ -23,6 +22,9 @@ pub struct WordMap {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl WordMap {
|
impl WordMap {
|
||||||
|
pub fn new(inner: HashMap<Word, Frequency>) -> Self {
|
||||||
|
Self { inner }
|
||||||
|
}
|
||||||
pub fn keys(&self) -> std::collections::hash_map::Keys<'_, String, Frequency> {
|
pub fn keys(&self) -> std::collections::hash_map::Keys<'_, String, Frequency> {
|
||||||
self.inner.keys()
|
self.inner.keys()
|
||||||
}
|
}
|
||||||
|
@ -32,67 +34,52 @@ impl WordMap {
|
||||||
pub fn iter(&self) -> std::collections::hash_map::Iter<'_, String, Frequency> {
|
pub fn iter(&self) -> std::collections::hash_map::Iter<'_, String, Frequency> {
|
||||||
self.inner.iter()
|
self.inner.iter()
|
||||||
}
|
}
|
||||||
|
pub fn freq_range(&self) -> std::ops::Range<Frequency> {
|
||||||
|
return 0.1e-10..1e-6;
|
||||||
|
let lowest: Frequency = todo!();
|
||||||
|
let highest: Frequency = todo!();
|
||||||
|
lowest..highest
|
||||||
}
|
}
|
||||||
|
pub fn len(&self) -> usize {
|
||||||
// We need custom Serialize and Deserialize of Frequency, because they are only primitive types.
|
self.inner.len()
|
||||||
// Serde does not support serializing directly to and from primitives (such as floats)
|
|
||||||
#[cfg(feature = "serde")]
|
|
||||||
impl<'de> Deserialize<'de> for Frequency {
|
|
||||||
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
|
|
||||||
where
|
|
||||||
D: serde::Deserializer<'de>,
|
|
||||||
{
|
|
||||||
struct FrequencyVisitor;
|
|
||||||
impl<'v> serde::de::Visitor<'v> for FrequencyVisitor {
|
|
||||||
type Value = Frequency;
|
|
||||||
|
|
||||||
fn expecting(&self, formatter: &mut core::fmt::Formatter) -> core::fmt::Result {
|
|
||||||
write!(formatter, "a floating-point number")
|
|
||||||
}
|
}
|
||||||
|
#[must_use]
|
||||||
fn visit_f64<E>(self, v: f64) -> Result<Self::Value, E>
|
pub fn is_empty(&self) -> bool {
|
||||||
where
|
self.len() == 0
|
||||||
E: serde::de::Error,
|
}
|
||||||
{
|
pub fn n_common(&self) -> usize {
|
||||||
Ok(Frequency { inner: v })
|
// TODO: calculate the amount of relatively common words
|
||||||
|
3000
|
||||||
|
}
|
||||||
|
pub fn threshold(&self) -> Frequency {
|
||||||
|
// HACK: I completely butchered the math here
|
||||||
|
// see https://github.com/3b1b/videos/blob/master/_2022/wordle/simulations.py
|
||||||
|
let l_under_sigmoid = 10_f64;
|
||||||
|
let len = self.len();
|
||||||
|
let mut c: f64 = l_under_sigmoid * (0.5 + self.n_common() as f64 / len as f64);
|
||||||
|
c *= 1e-7;
|
||||||
|
debug!(threshold=c);
|
||||||
|
c
|
||||||
|
}
|
||||||
|
pub fn inner(&self) -> &HashMap<Word, Frequency> {
|
||||||
|
&self.inner
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
deserializer.deserialize_any(FrequencyVisitor)
|
impl std::fmt::Debug for WordMap {
|
||||||
}
|
|
||||||
}
|
|
||||||
#[cfg(feature = "serde")]
|
|
||||||
impl Serialize for Frequency {
|
|
||||||
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
|
|
||||||
where
|
|
||||||
S: serde::Serializer,
|
|
||||||
{
|
|
||||||
serializer.serialize_f64(self.inner)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl From<Frequency> for f64 {
|
|
||||||
fn from(value: Frequency) -> Self {
|
|
||||||
value.inner
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl From<f64> for Frequency {
|
|
||||||
fn from(value: f64) -> Self {
|
|
||||||
Frequency { inner: value }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Sum for Frequency {
|
|
||||||
fn sum<I: Iterator<Item = Self>>(iter: I) -> Self {
|
|
||||||
iter.fold(Self { inner: 0.0 }, |a, b| Self {
|
|
||||||
inner: a.inner + b.inner,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Display for Frequency {
|
|
||||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
write(f, format_args!("{}", self.inner))
|
write(
|
||||||
|
f,
|
||||||
|
format_args!(
|
||||||
|
"WordMap {{\n\
|
||||||
|
\t\tlen: {}\n\
|
||||||
|
\t\tfreq_range: {:?}\n\
|
||||||
|
\t\tcommon: {:?}\n\
|
||||||
|
\t}}",
|
||||||
|
self.len(),
|
||||||
|
self.freq_range(),
|
||||||
|
self.n_common()
|
||||||
|
),
|
||||||
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue