From 8d03d416796685cfb77cfcb0194e6fbaca158cba Mon Sep 17 00:00:00 2001 From: "Christoph J. Scherr" Date: Wed, 4 Jun 2025 22:22:13 +0200 Subject: [PATCH] move datastructures to its own crate --- Cargo.toml | 3 +- crates/datastructures/Cargo.toml | 6 - crates/datastructures/src/btree.rs | 189 ------------------- crates/datastructures/src/lib.rs | 3 - crates/datastructures/src/raw_vec.rs | 70 -------- crates/datastructures/src/vec.rs | 260 --------------------------- 6 files changed, 1 insertion(+), 530 deletions(-) delete mode 100644 crates/datastructures/Cargo.toml delete mode 100644 crates/datastructures/src/btree.rs delete mode 100644 crates/datastructures/src/lib.rs delete mode 100644 crates/datastructures/src/raw_vec.rs delete mode 100644 crates/datastructures/src/vec.rs diff --git a/Cargo.toml b/Cargo.toml index 0060a57..d6e7013 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -31,9 +31,8 @@ members = [ "crates/rfd-demo", "crates/iter-prod", "crates/tpdemo", - "crates/datastructures", "crates/graph", - "crates/sdl-idiot" + "crates/sdl-idiot", ] default-members = ["."] diff --git a/crates/datastructures/Cargo.toml b/crates/datastructures/Cargo.toml deleted file mode 100644 index 1ae4ad0..0000000 --- a/crates/datastructures/Cargo.toml +++ /dev/null @@ -1,6 +0,0 @@ -[package] -name = "minitree" -version = "0.1.0" -edition = "2024" - -[dependencies] diff --git a/crates/datastructures/src/btree.rs b/crates/datastructures/src/btree.rs deleted file mode 100644 index 82cfb96..0000000 --- a/crates/datastructures/src/btree.rs +++ /dev/null @@ -1,189 +0,0 @@ -use std::mem; - -use crate::vec::Vec; - -pub const DEFAULT_DEGREE: usize = 1; - -#[derive(Clone, Debug)] -pub struct BTree { - root: Node, - properties: BTreeProperties, -} - -#[derive(Clone, Debug, Copy)] -pub struct BTreeProperties { - degree: usize, - max_keys: usize, - mid_key_index: usize, -} - -#[derive(Clone, Debug)] -struct Node { - keys: Vec, - children: Vec>, -} - -impl BTreeProperties { - fn new(degree: usize) -> Self { - Self { - degree, - max_keys: degree - 1, - mid_key_index: (degree - 1) / 2, - } - } - - fn split_child(&self, parent: &mut Node, child_index: usize) { - let child = &mut parent.children[child_index]; - let middle_key: T = child.keys[self.mid_key_index].clone(); - let right_keys = match child.keys.split_off(self.mid_key_index).split_first() { - Some((_first, _others)) => { - // We don't need _first, as it will move to parent node. - _others.into() - } - None => Vec::::with_capacity(self.max_keys), - }; - let right_children = if !child.is_leaf() { - Some(child.children.split_off(self.mid_key_index + 1)) - } else { - None - }; - let new_child_node: Node = Node::new(self.degree, Some(right_keys), right_children); - - parent.keys.insert(child_index, middle_key); - parent.children.insert(child_index + 1, new_child_node); - } - - fn is_maxed_out(&self, node: &Node) -> bool { - node.keys.len() == self.max_keys - } - - fn insert_non_full(&mut self, node: &mut Node, key: T) { - let mut index: isize = isize::try_from(node.keys.len()).ok().unwrap() - 1; - while index >= 0 && node.keys[index as usize] >= key { - index -= 1; - } - - let mut u_index: usize = usize::try_from(index + 1).ok().unwrap(); - if node.is_leaf() { - // Just insert it, as we know this method will be called only when node is not full - node.keys.insert(u_index, key); - } else { - if self.is_maxed_out(&node.children[u_index]) { - self.split_child(node, u_index); - if node.keys[u_index] < key { - u_index += 1; - } - } - - self.insert_non_full(&mut node.children[u_index], key); - } - } -} - -impl Node -where - T: Ord, -{ - fn new(degree: usize, keys: Option>, children: Option>>) -> Self { - Node { - keys: match keys { - Some(keys) => keys, - None => Vec::with_capacity(degree - 1), - }, - children: match children { - Some(children) => children, - None => Vec::with_capacity(degree), - }, - } - } - - fn is_leaf(&self) -> bool { - self.children.is_empty() - } -} - -impl BTree { - pub fn new(branch_factor: usize) -> Self { - let degree = 2 * branch_factor; - Self { - root: Node::new(degree, None, None), - properties: BTreeProperties::new(degree), - } - } - - pub fn clear(&mut self) { - self.root = Node::new(self.properties.degree, None, None); - } - - pub fn insert(&mut self, key: T) { - if self.properties.is_maxed_out(&self.root) { - // Create an empty root and split the old root... - let mut new_root = Node::new(self.properties.degree, None, None); - mem::swap(&mut new_root, &mut self.root); - self.root.children.insert(0, new_root); - self.properties.split_child(&mut self.root, 0); - } - self.properties.insert_non_full(&mut self.root, key) - } - - #[must_use] - pub fn has(&self, key: T) -> bool { - let mut current_node = &self.root; - loop { - match current_node.keys.binary_search(&key) { - Ok(_) => return true, - Err(idx) => { - if current_node.is_leaf() { - return false; - } - current_node = ¤t_node.children[idx]; - } - } - } - } -} - -#[cfg(test)] -mod test { - use super::*; - - #[test] - fn test_create() { - let _tree = BTree::::new(DEFAULT_DEGREE); - } - - #[test] - fn test_insert_easy() { - let mut tree = BTree::::new(DEFAULT_DEGREE); - let data = &[19, 125, 25, 16, 2, 73, 384, 435, 12924, 42, 125251, 2548]; - - for d in data { - tree.insert(*d) - } - - for d in data { - assert!(tree.has(*d)) - } - } - - #[test] - fn test_insert_many() { - let mut tree = BTree::::new(DEFAULT_DEGREE); - let mut data = vec![19, 125, 25, 16, 2, 73, 384, 435, 12924, 42, 125251, 2548]; - - for _ in 0..10 { - data.extend(data.clone()); - } - - // data has 12288 elements here! This is a lot, but should be reasonably possible for a btree. - println!("len of data: {}", data.len()); - - for d in &data { - tree.insert(*d) - } - - for d in &data { - assert!(tree.has(*d)) - } - } -} diff --git a/crates/datastructures/src/lib.rs b/crates/datastructures/src/lib.rs deleted file mode 100644 index b895c26..0000000 --- a/crates/datastructures/src/lib.rs +++ /dev/null @@ -1,3 +0,0 @@ -pub mod btree; -pub mod raw_vec; -pub mod vec; diff --git a/crates/datastructures/src/raw_vec.rs b/crates/datastructures/src/raw_vec.rs deleted file mode 100644 index 71bfa8a..0000000 --- a/crates/datastructures/src/raw_vec.rs +++ /dev/null @@ -1,70 +0,0 @@ -use std::{ - alloc::{self, Layout}, - ptr::NonNull, -}; - -#[derive(Clone, Debug)] -pub(crate) struct RawVec { - pub(crate) ptr: NonNull, - pub(crate) capacity: usize, -} - -impl RawVec { - pub(crate) fn new() -> Self { - Self { - ptr: NonNull::dangling(), - capacity: 0, - } - } - - // See rustonomicon, chapter 9.2 - pub(crate) fn grow_by(&mut self, added_capacity: usize) { - let new_cap = self.capacity + added_capacity; - // `Layout::array` checks that the number of bytes is <= usize::MAX, - // but this is redundant since old_layout.size() <= isize::MAX, - // so the `unwrap` should never fail. - let new_layout = Layout::array::(new_cap).unwrap(); - - // Ensure that the new allocation doesn't exceed `isize::MAX` bytes. - if new_layout.size() > isize::MAX as usize { - alloc::handle_alloc_error(new_layout); - } - - let new_ptr = if self.capacity == 0 { - unsafe { alloc::alloc(new_layout) } - } else { - let old_layout = Layout::array::(self.capacity).unwrap(); - let old_ptr = self.ptr.as_ptr() as *mut u8; - unsafe { alloc::realloc(old_ptr, old_layout, new_layout.size()) } - }; - - // If allocation fails, `new_ptr` will be null, in which case we abort. - self.ptr = match NonNull::new(new_ptr as *mut T) { - Some(p) => p, - None => alloc::handle_alloc_error(new_layout), - }; - self.capacity = new_cap; - } - - pub(crate) fn grow(&mut self) { - if self.capacity == 0 { - self.grow_by(1); - } else { - self.grow_by(self.capacity); - } - } -} - -impl Drop for RawVec { - fn drop(&mut self) { - if self.capacity != 0 { - let layout = Layout::array::(self.capacity).unwrap(); - unsafe { - alloc::dealloc(self.ptr.as_ptr() as *mut u8, layout); - } - } - } -} - -unsafe impl Send for RawVec {} -unsafe impl Sync for RawVec {} diff --git a/crates/datastructures/src/vec.rs b/crates/datastructures/src/vec.rs deleted file mode 100644 index 5596975..0000000 --- a/crates/datastructures/src/vec.rs +++ /dev/null @@ -1,260 +0,0 @@ -//! Custom implementation of the Vector datastructure -//! -//! Many thanks to the rustonomicon, chapter 9: -//! https://doc.rust-lang.org/nomicon/vec/vec.html - -use std::{ - mem, - ops::{Deref, DerefMut, Index, IndexMut}, - ptr, -}; - -use crate::raw_vec::RawVec; - -#[derive(Clone, Debug)] -pub struct Vec { - used: usize, - buf: RawVec, -} - -impl Default for Vec { - fn default() -> Self { - Self::new() - } -} - -impl Vec { - pub fn new() -> Self { - if mem::size_of::() == 0 { - panic!("We're not ready to handle ZSTs"); - } - Vec { - used: 0, - buf: RawVec::new(), - } - } - - pub fn with_capacity(capacity: usize) -> Self { - if mem::size_of::() == 0 { - panic!("We're not ready to handle ZSTs"); - } - - let mut v = Self::new(); - v.reserve(capacity); - v - } - - pub fn from_slice(data: &[T]) -> Self { - let mut v = Vec::::with_capacity(data.len()); - unsafe { - ptr::copy_nonoverlapping(data.as_ptr(), v.as_mut_ptr(), data.len()); - } - v - } - - pub fn pop(&mut self) -> Option { - if self.used == 0 { - None - } else { - self.used -= 1; - unsafe { Some(ptr::read(self.buf.ptr.as_ptr().add(self.used))) } - } - } - - pub fn push(&mut self, value: T) { - if self.used == self.buf.capacity { - self.buf.grow(); - } - - unsafe { - ptr::write(self.buf.ptr.as_ptr().add(self.used), value); - } - - self.used += 1; - } - - #[must_use] - pub fn len(&self) -> usize { - self.used - } - - #[must_use] - pub fn capacity(&self) -> usize { - self.buf.capacity - } - - pub fn is_empty(&self) -> bool { - self.len() == 0 - } - - pub fn insert(&mut self, index: usize, elem: T) { - // Note: `<=` because it's valid to insert after everything - // which would be equivalent to push. - assert!(index <= self.used, "index out of bounds"); - if self.used == self.buf.capacity { - self.buf.grow(); - } - - unsafe { - // ptr::copy(src, dest, len): "copy from src to dest len elems" - ptr::copy( - self.buf.ptr.as_ptr().add(index), - self.buf.ptr.as_ptr().add(index + 1), - self.used - index, - ); - ptr::write(self.buf.ptr.as_ptr().add(index), elem); - } - - self.used += 1; - } - - pub fn remove(&mut self, index: usize) -> T { - // Note: `<` because it's *not* valid to remove after everything - assert!(index < self.used, "index out of bounds"); - unsafe { - self.used -= 1; - let result = ptr::read(self.buf.ptr.as_ptr().add(index)); - ptr::copy( - self.buf.ptr.as_ptr().add(index + 1), - self.buf.ptr.as_ptr().add(index), - self.used - index, - ); - result - } - } - - pub fn reserve(&mut self, added_capacity: usize) { - self.buf.grow_by(added_capacity); - } - - #[must_use] - pub fn split_off(&mut self, at: usize) -> Self { - let other_len = self.used - at; - let mut other = Self::with_capacity(other_len); - unsafe { - self.set_len(at); - other.set_len(other_len); - ptr::copy_nonoverlapping(self.as_ptr().add(at), other.as_mut_ptr(), other.len()); - } - other - } - - unsafe fn set_len(&mut self, new_length: usize) { - self.used = new_length - } - - #[must_use] - pub const fn as_ptr(&self) -> *const T { - self.buf.ptr.as_ptr() - } - - #[must_use] - pub const fn as_mut_ptr(&mut self) -> *mut T { - self.buf.ptr.as_ptr() - } -} - -impl Index for Vec { - type Output = T; - - #[inline] - fn index(&self, index: usize) -> &Self::Output { - Index::index(&**self, index) - } -} - -impl IndexMut for Vec { - #[inline] - fn index_mut(&mut self, index: usize) -> &mut Self::Output { - IndexMut::index_mut(&mut **self, index) - } -} - -impl Deref for Vec { - type Target = [T]; - fn deref(&self) -> &[T] { - unsafe { std::slice::from_raw_parts(self.buf.ptr.as_ptr(), self.used) } - } -} - -impl DerefMut for Vec { - fn deref_mut(&mut self) -> &mut [T] { - unsafe { std::slice::from_raw_parts_mut(self.buf.ptr.as_ptr(), self.used) } - } -} - -impl Drop for Vec { - fn drop(&mut self) { - while self.pop().is_some() {} - } -} - -impl From<&[T]> for Vec { - fn from(value: &[T]) -> Self { - Self::from_slice(value) - } -} - -unsafe impl Send for Vec {} -unsafe impl Sync for Vec {} - -#[cfg(test)] -mod test { - use super::*; - #[test] - fn test_create() { - let _v = Vec::::new(); - } - - #[test] - fn test_pushpop_num() { - let mut v = Vec::new(); - let vals = &[19, 9, 14, 255, 19191919, 13890, 21521, 1251, 6216, 1830]; - - for val in vals { - v.push(*val); - } - for val in vals.iter().rev() { - assert_eq!(v.pop().unwrap(), *val); - } - } - - #[test] - fn test_pushpop_str() { - let mut v = Vec::new(); - let vals = &["AAAA", "ABBAB", "BBABBABBAJJJ"]; - - for val in vals { - v.push(*val); - } - for val in vals.iter().rev() { - assert_eq!(v.pop().unwrap(), *val); - } - } - - #[test] - fn test_pushindex_num() { - let mut v = Vec::new(); - let vals = &[19, 9, 14, 255, 19191919, 13890, 21521, 1251, 6216, 1830]; - - for val in vals { - v.push(*val); - } - for (idx, val) in vals.iter().enumerate() { - assert_eq!(v[idx], *val); - } - } - - #[test] - fn test_pushindex_str() { - let mut v = Vec::new(); - let vals = &["AAAA", "ABBAB", "BBABBABBAJJJ"]; - - for val in vals { - v.push(*val); - } - for (idx, val) in vals.iter().enumerate() { - assert_eq!(v[idx], *val); - } - } -}