From 6e8b6b327bf508a26dfaa5c2c8499b8abd7288f0 Mon Sep 17 00:00:00 2001 From: "Christoph J. Scherr" Date: Thu, 11 Jan 2024 11:46:34 +0100 Subject: [PATCH] better explanation for rsqrt --- members/revsqrt/benches/rsqrt-bench.rs | 6 +++--- members/revsqrt/src/lib.rs | 21 +++++++++++++++++---- 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/members/revsqrt/benches/rsqrt-bench.rs b/members/revsqrt/benches/rsqrt-bench.rs index 1036b98..d4274f8 100644 --- a/members/revsqrt/benches/rsqrt-bench.rs +++ b/members/revsqrt/benches/rsqrt-bench.rs @@ -1,12 +1,12 @@ #![allow(unused)] use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion, Throughput}; -use revsqrt::{fast_inverse_sqrt, inverse_sqrt}; +use revsqrt::{fast_inverse_sqrt, regular_inverse_sqrt}; const SIZE: f32 = 1337.1337; const FCONST: f32 = 1024.12481224; const FCONST1: f32 = 4025.724812234; pub fn single_input(c: &mut Criterion) { c.bench_with_input(BenchmarkId::new("regular rsqrt", SIZE), &SIZE, |b, &s| { - b.iter(|| inverse_sqrt(s)) + b.iter(|| regular_inverse_sqrt(s)) }); c.bench_with_input(BenchmarkId::new("fast rsqrt", SIZE), &SIZE, |b, &s| { b.iter(|| fast_inverse_sqrt(s)) @@ -29,7 +29,7 @@ pub fn multi_input(c: &mut Criterion) { BenchmarkId::new("regular rsqrt mixed input", FCONST), size, |b, &size| { - b.iter(|| inverse_sqrt(size)); + b.iter(|| regular_inverse_sqrt(size)); }, ); } diff --git a/members/revsqrt/src/lib.rs b/members/revsqrt/src/lib.rs index 43d77f2..cfb60d2 100644 --- a/members/revsqrt/src/lib.rs +++ b/members/revsqrt/src/lib.rs @@ -1,20 +1,33 @@ +/// Calculate the inverse square of a number n using the rust std library. +/// This is faster than the fast inverse square root algorithm, because sqrt +/// is a cpu instruction on modern cpus (`fsqrt`). Inverting as 1/x seems to +/// be pretty fast too, just using a regular division. #[inline] -pub fn inverse_sqrt(n: f32) -> f32 { - 1f32 / n.sqrt() +pub fn regular_inverse_sqrt(n: f32) -> f32 { + n.sqrt().recip() } +/// Helper union that lets us convert between [u32] and [f32] for the fast inverse square root +/// algorithm. union MixedIntFloat { f: f32, i: u32, } +/// Interestingly, the benchmark shows that this function is not faster than regular inverse sqrt. +/// This is probably due to the cpu being able to calculate the reverse square root with a regular +/// calculation in two instructions: sqrt and division. +/// /// see https://en.wikipedia.org/wiki/Fast_inverse_square_root +/// +/// This is unsafe, but I've decided not to put it in rs-unsafe instead of rs-basic, +/// as I only use this example for benchmarking with criterion. #[inline] pub fn fast_inverse_sqrt(n: f32) -> f32 { let mut conv: MixedIntFloat = MixedIntFloat { f: n }; unsafe { + // reading from a union is unsafe in Rust. conv.i = 0x5f3759df - (conv.i >> 1); - conv.f *= 1.5f32 - (n * 0.5f32 * conv.f * conv.f); - conv.f + conv.f * (1.5 - n * 0.5 * conv.f * conv.f) } }