better explanation for rsqrt
Cargo Format, Check and Test / cargo fmt (push) Successful in 47s Details
Cargo Format, Check and Test / cargo check (push) Successful in 42s Details
Cargo Format, Check and Test / cargo test (push) Successful in 1m17s Details

This commit is contained in:
Christoph J. Scherr 2024-01-11 11:46:34 +01:00
parent 0d694f69d6
commit 6e8b6b327b
Signed by: cscherrNT
GPG Key ID: 8E2B45BC51A27EA7
2 changed files with 20 additions and 7 deletions

View File

@ -1,12 +1,12 @@
#![allow(unused)] #![allow(unused)]
use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion, Throughput}; use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
use revsqrt::{fast_inverse_sqrt, inverse_sqrt}; use revsqrt::{fast_inverse_sqrt, regular_inverse_sqrt};
const SIZE: f32 = 1337.1337; const SIZE: f32 = 1337.1337;
const FCONST: f32 = 1024.12481224; const FCONST: f32 = 1024.12481224;
const FCONST1: f32 = 4025.724812234; const FCONST1: f32 = 4025.724812234;
pub fn single_input(c: &mut Criterion) { pub fn single_input(c: &mut Criterion) {
c.bench_with_input(BenchmarkId::new("regular rsqrt", SIZE), &SIZE, |b, &s| { c.bench_with_input(BenchmarkId::new("regular rsqrt", SIZE), &SIZE, |b, &s| {
b.iter(|| inverse_sqrt(s)) b.iter(|| regular_inverse_sqrt(s))
}); });
c.bench_with_input(BenchmarkId::new("fast rsqrt", SIZE), &SIZE, |b, &s| { c.bench_with_input(BenchmarkId::new("fast rsqrt", SIZE), &SIZE, |b, &s| {
b.iter(|| fast_inverse_sqrt(s)) b.iter(|| fast_inverse_sqrt(s))
@ -29,7 +29,7 @@ pub fn multi_input(c: &mut Criterion) {
BenchmarkId::new("regular rsqrt mixed input", FCONST), BenchmarkId::new("regular rsqrt mixed input", FCONST),
size, size,
|b, &size| { |b, &size| {
b.iter(|| inverse_sqrt(size)); b.iter(|| regular_inverse_sqrt(size));
}, },
); );
} }

View File

@ -1,20 +1,33 @@
/// Calculate the inverse square of a number n using the rust std library.
/// This is faster than the fast inverse square root algorithm, because sqrt
/// is a cpu instruction on modern cpus (`fsqrt`). Inverting as 1/x seems to
/// be pretty fast too, just using a regular division.
#[inline] #[inline]
pub fn inverse_sqrt(n: f32) -> f32 { pub fn regular_inverse_sqrt(n: f32) -> f32 {
1f32 / n.sqrt() n.sqrt().recip()
} }
/// Helper union that lets us convert between [u32] and [f32] for the fast inverse square root
/// algorithm.
union MixedIntFloat { union MixedIntFloat {
f: f32, f: f32,
i: u32, i: u32,
} }
/// Interestingly, the benchmark shows that this function is not faster than regular inverse sqrt.
/// This is probably due to the cpu being able to calculate the reverse square root with a regular
/// calculation in two instructions: sqrt and division.
///
/// see https://en.wikipedia.org/wiki/Fast_inverse_square_root /// see https://en.wikipedia.org/wiki/Fast_inverse_square_root
///
/// This is unsafe, but I've decided not to put it in rs-unsafe instead of rs-basic,
/// as I only use this example for benchmarking with criterion.
#[inline] #[inline]
pub fn fast_inverse_sqrt(n: f32) -> f32 { pub fn fast_inverse_sqrt(n: f32) -> f32 {
let mut conv: MixedIntFloat = MixedIntFloat { f: n }; let mut conv: MixedIntFloat = MixedIntFloat { f: n };
unsafe { unsafe {
// reading from a union is unsafe in Rust.
conv.i = 0x5f3759df - (conv.i >> 1); conv.i = 0x5f3759df - (conv.i >> 1);
conv.f *= 1.5f32 - (n * 0.5f32 * conv.f * conv.f); conv.f * (1.5 - n * 0.5 * conv.f * conv.f)
conv.f
} }
} }