diff options
Diffstat (limited to 'rand/src/distributions/utils.rs')
-rw-r--r-- | rand/src/distributions/utils.rs | 488 |
1 files changed, 0 insertions, 488 deletions
diff --git a/rand/src/distributions/utils.rs b/rand/src/distributions/utils.rs deleted file mode 100644 index 3af4e86..0000000 --- a/rand/src/distributions/utils.rs +++ /dev/null @@ -1,488 +0,0 @@ -// Copyright 2018 Developers of the Rand project. -// -// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or -// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license -// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -//! Math helper functions - -#[cfg(feature="simd_support")] -use packed_simd::*; -#[cfg(feature="std")] -use crate::distributions::ziggurat_tables; -#[cfg(feature="std")] -use crate::Rng; - - -pub trait WideningMultiply<RHS = Self> { - type Output; - - fn wmul(self, x: RHS) -> Self::Output; -} - -macro_rules! wmul_impl { - ($ty:ty, $wide:ty, $shift:expr) => { - impl WideningMultiply for $ty { - type Output = ($ty, $ty); - - #[inline(always)] - fn wmul(self, x: $ty) -> Self::Output { - let tmp = (self as $wide) * (x as $wide); - ((tmp >> $shift) as $ty, tmp as $ty) - } - } - }; - - // simd bulk implementation - ($(($ty:ident, $wide:ident),)+, $shift:expr) => { - $( - impl WideningMultiply for $ty { - type Output = ($ty, $ty); - - #[inline(always)] - fn wmul(self, x: $ty) -> Self::Output { - // For supported vectors, this should compile to a couple - // supported multiply & swizzle instructions (no actual - // casting). - // TODO: optimize - let y: $wide = self.cast(); - let x: $wide = x.cast(); - let tmp = y * x; - let hi: $ty = (tmp >> $shift).cast(); - let lo: $ty = tmp.cast(); - (hi, lo) - } - } - )+ - }; -} -wmul_impl! { u8, u16, 8 } -wmul_impl! { u16, u32, 16 } -wmul_impl! { u32, u64, 32 } -#[cfg(not(target_os = "emscripten"))] -wmul_impl! { u64, u128, 64 } - -// This code is a translation of the __mulddi3 function in LLVM's -// compiler-rt. It is an optimised variant of the common method -// `(a + b) * (c + d) = ac + ad + bc + bd`. -// -// For some reason LLVM can optimise the C version very well, but -// keeps shuffling registers in this Rust translation. -macro_rules! wmul_impl_large { - ($ty:ty, $half:expr) => { - impl WideningMultiply for $ty { - type Output = ($ty, $ty); - - #[inline(always)] - fn wmul(self, b: $ty) -> Self::Output { - const LOWER_MASK: $ty = !0 >> $half; - let mut low = (self & LOWER_MASK).wrapping_mul(b & LOWER_MASK); - let mut t = low >> $half; - low &= LOWER_MASK; - t += (self >> $half).wrapping_mul(b & LOWER_MASK); - low += (t & LOWER_MASK) << $half; - let mut high = t >> $half; - t = low >> $half; - low &= LOWER_MASK; - t += (b >> $half).wrapping_mul(self & LOWER_MASK); - low += (t & LOWER_MASK) << $half; - high += t >> $half; - high += (self >> $half).wrapping_mul(b >> $half); - - (high, low) - } - } - }; - - // simd bulk implementation - (($($ty:ty,)+) $scalar:ty, $half:expr) => { - $( - impl WideningMultiply for $ty { - type Output = ($ty, $ty); - - #[inline(always)] - fn wmul(self, b: $ty) -> Self::Output { - // needs wrapping multiplication - const LOWER_MASK: $scalar = !0 >> $half; - let mut low = (self & LOWER_MASK) * (b & LOWER_MASK); - let mut t = low >> $half; - low &= LOWER_MASK; - t += (self >> $half) * (b & LOWER_MASK); - low += (t & LOWER_MASK) << $half; - let mut high = t >> $half; - t = low >> $half; - low &= LOWER_MASK; - t += (b >> $half) * (self & LOWER_MASK); - low += (t & LOWER_MASK) << $half; - high += t >> $half; - high += (self >> $half) * (b >> $half); - - (high, low) - } - } - )+ - }; -} -#[cfg(target_os = "emscripten")] -wmul_impl_large! { u64, 32 } -#[cfg(not(target_os = "emscripten"))] -wmul_impl_large! { u128, 64 } - -macro_rules! wmul_impl_usize { - ($ty:ty) => { - impl WideningMultiply for usize { - type Output = (usize, usize); - - #[inline(always)] - fn wmul(self, x: usize) -> Self::Output { - let (high, low) = (self as $ty).wmul(x as $ty); - (high as usize, low as usize) - } - } - } -} -#[cfg(target_pointer_width = "32")] -wmul_impl_usize! { u32 } -#[cfg(target_pointer_width = "64")] -wmul_impl_usize! { u64 } - -#[cfg(all(feature = "simd_support", feature = "nightly"))] -mod simd_wmul { - #[cfg(target_arch = "x86")] - use core::arch::x86::*; - #[cfg(target_arch = "x86_64")] - use core::arch::x86_64::*; - use super::*; - - wmul_impl! { - (u8x2, u16x2), - (u8x4, u16x4), - (u8x8, u16x8), - (u8x16, u16x16), - (u8x32, u16x32),, - 8 - } - - wmul_impl! { (u16x2, u32x2),, 16 } - #[cfg(not(target_feature = "sse2"))] - wmul_impl! { (u16x4, u32x4),, 16 } - #[cfg(not(target_feature = "sse4.2"))] - wmul_impl! { (u16x8, u32x8),, 16 } - #[cfg(not(target_feature = "avx2"))] - wmul_impl! { (u16x16, u32x16),, 16 } - - // 16-bit lane widths allow use of the x86 `mulhi` instructions, which - // means `wmul` can be implemented with only two instructions. - #[allow(unused_macros)] - macro_rules! wmul_impl_16 { - ($ty:ident, $intrinsic:ident, $mulhi:ident, $mullo:ident) => { - impl WideningMultiply for $ty { - type Output = ($ty, $ty); - - #[inline(always)] - fn wmul(self, x: $ty) -> Self::Output { - let b = $intrinsic::from_bits(x); - let a = $intrinsic::from_bits(self); - let hi = $ty::from_bits(unsafe { $mulhi(a, b) }); - let lo = $ty::from_bits(unsafe { $mullo(a, b) }); - (hi, lo) - } - } - }; - } - - #[cfg(target_feature = "sse2")] - wmul_impl_16! { u16x4, __m64, _mm_mulhi_pu16, _mm_mullo_pi16 } - #[cfg(target_feature = "sse4.2")] - wmul_impl_16! { u16x8, __m128i, _mm_mulhi_epu16, _mm_mullo_epi16 } - #[cfg(target_feature = "avx2")] - wmul_impl_16! { u16x16, __m256i, _mm256_mulhi_epu16, _mm256_mullo_epi16 } - // FIXME: there are no `__m512i` types in stdsimd yet, so `wmul::<u16x32>` - // cannot use the same implementation. - - wmul_impl! { - (u32x2, u64x2), - (u32x4, u64x4), - (u32x8, u64x8),, - 32 - } - - // TODO: optimize, this seems to seriously slow things down - wmul_impl_large! { (u8x64,) u8, 4 } - wmul_impl_large! { (u16x32,) u16, 8 } - wmul_impl_large! { (u32x16,) u32, 16 } - wmul_impl_large! { (u64x2, u64x4, u64x8,) u64, 32 } -} -#[cfg(all(feature = "simd_support", feature = "nightly"))] -pub use self::simd_wmul::*; - - -/// Helper trait when dealing with scalar and SIMD floating point types. -pub(crate) trait FloatSIMDUtils { - // `PartialOrd` for vectors compares lexicographically. We want to compare all - // the individual SIMD lanes instead, and get the combined result over all - // lanes. This is possible using something like `a.lt(b).all()`, but we - // implement it as a trait so we can write the same code for `f32` and `f64`. - // Only the comparison functions we need are implemented. - fn all_lt(self, other: Self) -> bool; - fn all_le(self, other: Self) -> bool; - fn all_finite(self) -> bool; - - type Mask; - fn finite_mask(self) -> Self::Mask; - fn gt_mask(self, other: Self) -> Self::Mask; - fn ge_mask(self, other: Self) -> Self::Mask; - - // Decrease all lanes where the mask is `true` to the next lower value - // representable by the floating-point type. At least one of the lanes - // must be set. - fn decrease_masked(self, mask: Self::Mask) -> Self; - - // Convert from int value. Conversion is done while retaining the numerical - // value, not by retaining the binary representation. - type UInt; - fn cast_from_int(i: Self::UInt) -> Self; -} - -/// Implement functions available in std builds but missing from core primitives -#[cfg(not(std))] -pub(crate) trait Float : Sized { - fn is_nan(self) -> bool; - fn is_infinite(self) -> bool; - fn is_finite(self) -> bool; -} - -/// Implement functions on f32/f64 to give them APIs similar to SIMD types -pub(crate) trait FloatAsSIMD : Sized { - #[inline(always)] - fn lanes() -> usize { 1 } - #[inline(always)] - fn splat(scalar: Self) -> Self { scalar } - #[inline(always)] - fn extract(self, index: usize) -> Self { debug_assert_eq!(index, 0); self } - #[inline(always)] - fn replace(self, index: usize, new_value: Self) -> Self { debug_assert_eq!(index, 0); new_value } -} - -pub(crate) trait BoolAsSIMD : Sized { - fn any(self) -> bool; - fn all(self) -> bool; - fn none(self) -> bool; -} - -impl BoolAsSIMD for bool { - #[inline(always)] - fn any(self) -> bool { self } - #[inline(always)] - fn all(self) -> bool { self } - #[inline(always)] - fn none(self) -> bool { !self } -} - -macro_rules! scalar_float_impl { - ($ty:ident, $uty:ident) => { - #[cfg(not(std))] - impl Float for $ty { - #[inline] - fn is_nan(self) -> bool { - self != self - } - - #[inline] - fn is_infinite(self) -> bool { - self == ::core::$ty::INFINITY || self == ::core::$ty::NEG_INFINITY - } - - #[inline] - fn is_finite(self) -> bool { - !(self.is_nan() || self.is_infinite()) - } - } - - impl FloatSIMDUtils for $ty { - type Mask = bool; - #[inline(always)] - fn all_lt(self, other: Self) -> bool { self < other } - #[inline(always)] - fn all_le(self, other: Self) -> bool { self <= other } - #[inline(always)] - fn all_finite(self) -> bool { self.is_finite() } - #[inline(always)] - fn finite_mask(self) -> Self::Mask { self.is_finite() } - #[inline(always)] - fn gt_mask(self, other: Self) -> Self::Mask { self > other } - #[inline(always)] - fn ge_mask(self, other: Self) -> Self::Mask { self >= other } - #[inline(always)] - fn decrease_masked(self, mask: Self::Mask) -> Self { - debug_assert!(mask, "At least one lane must be set"); - <$ty>::from_bits(self.to_bits() - 1) - } - type UInt = $uty; - fn cast_from_int(i: Self::UInt) -> Self { i as $ty } - } - - impl FloatAsSIMD for $ty {} - } -} - -scalar_float_impl!(f32, u32); -scalar_float_impl!(f64, u64); - - -#[cfg(feature="simd_support")] -macro_rules! simd_impl { - ($ty:ident, $f_scalar:ident, $mty:ident, $uty:ident) => { - impl FloatSIMDUtils for $ty { - type Mask = $mty; - #[inline(always)] - fn all_lt(self, other: Self) -> bool { self.lt(other).all() } - #[inline(always)] - fn all_le(self, other: Self) -> bool { self.le(other).all() } - #[inline(always)] - fn all_finite(self) -> bool { self.finite_mask().all() } - #[inline(always)] - fn finite_mask(self) -> Self::Mask { - // This can possibly be done faster by checking bit patterns - let neg_inf = $ty::splat(::core::$f_scalar::NEG_INFINITY); - let pos_inf = $ty::splat(::core::$f_scalar::INFINITY); - self.gt(neg_inf) & self.lt(pos_inf) - } - #[inline(always)] - fn gt_mask(self, other: Self) -> Self::Mask { self.gt(other) } - #[inline(always)] - fn ge_mask(self, other: Self) -> Self::Mask { self.ge(other) } - #[inline(always)] - fn decrease_masked(self, mask: Self::Mask) -> Self { - // Casting a mask into ints will produce all bits set for - // true, and 0 for false. Adding that to the binary - // representation of a float means subtracting one from - // the binary representation, resulting in the next lower - // value representable by $ty. This works even when the - // current value is infinity. - debug_assert!(mask.any(), "At least one lane must be set"); - <$ty>::from_bits(<$uty>::from_bits(self) + <$uty>::from_bits(mask)) - } - type UInt = $uty; - #[inline] - fn cast_from_int(i: Self::UInt) -> Self { i.cast() } - } - } -} - -#[cfg(feature="simd_support")] simd_impl! { f32x2, f32, m32x2, u32x2 } -#[cfg(feature="simd_support")] simd_impl! { f32x4, f32, m32x4, u32x4 } -#[cfg(feature="simd_support")] simd_impl! { f32x8, f32, m32x8, u32x8 } -#[cfg(feature="simd_support")] simd_impl! { f32x16, f32, m32x16, u32x16 } -#[cfg(feature="simd_support")] simd_impl! { f64x2, f64, m64x2, u64x2 } -#[cfg(feature="simd_support")] simd_impl! { f64x4, f64, m64x4, u64x4 } -#[cfg(feature="simd_support")] simd_impl! { f64x8, f64, m64x8, u64x8 } - -/// Calculates ln(gamma(x)) (natural logarithm of the gamma -/// function) using the Lanczos approximation. -/// -/// The approximation expresses the gamma function as: -/// `gamma(z+1) = sqrt(2*pi)*(z+g+0.5)^(z+0.5)*exp(-z-g-0.5)*Ag(z)` -/// `g` is an arbitrary constant; we use the approximation with `g=5`. -/// -/// Noting that `gamma(z+1) = z*gamma(z)` and applying `ln` to both sides: -/// `ln(gamma(z)) = (z+0.5)*ln(z+g+0.5)-(z+g+0.5) + ln(sqrt(2*pi)*Ag(z)/z)` -/// -/// `Ag(z)` is an infinite series with coefficients that can be calculated -/// ahead of time - we use just the first 6 terms, which is good enough -/// for most purposes. -#[cfg(feature="std")] -pub fn log_gamma(x: f64) -> f64 { - // precalculated 6 coefficients for the first 6 terms of the series - let coefficients: [f64; 6] = [ - 76.18009172947146, - -86.50532032941677, - 24.01409824083091, - -1.231739572450155, - 0.1208650973866179e-2, - -0.5395239384953e-5, - ]; - - // (x+0.5)*ln(x+g+0.5)-(x+g+0.5) - let tmp = x + 5.5; - let log = (x + 0.5) * tmp.ln() - tmp; - - // the first few terms of the series for Ag(x) - let mut a = 1.000000000190015; - let mut denom = x; - for coeff in &coefficients { - denom += 1.0; - a += coeff / denom; - } - - // get everything together - // a is Ag(x) - // 2.5066... is sqrt(2pi) - log + (2.5066282746310005 * a / x).ln() -} - -/// Sample a random number using the Ziggurat method (specifically the -/// ZIGNOR variant from Doornik 2005). Most of the arguments are -/// directly from the paper: -/// -/// * `rng`: source of randomness -/// * `symmetric`: whether this is a symmetric distribution, or one-sided with P(x < 0) = 0. -/// * `X`: the $x_i$ abscissae. -/// * `F`: precomputed values of the PDF at the $x_i$, (i.e. $f(x_i)$) -/// * `F_DIFF`: precomputed values of $f(x_i) - f(x_{i+1})$ -/// * `pdf`: the probability density function -/// * `zero_case`: manual sampling from the tail when we chose the -/// bottom box (i.e. i == 0) - -// the perf improvement (25-50%) is definitely worth the extra code -// size from force-inlining. -#[cfg(feature="std")] -#[inline(always)] -pub fn ziggurat<R: Rng + ?Sized, P, Z>( - rng: &mut R, - symmetric: bool, - x_tab: ziggurat_tables::ZigTable, - f_tab: ziggurat_tables::ZigTable, - mut pdf: P, - mut zero_case: Z) - -> f64 where P: FnMut(f64) -> f64, Z: FnMut(&mut R, f64) -> f64 { - use crate::distributions::float::IntoFloat; - loop { - // As an optimisation we re-implement the conversion to a f64. - // From the remaining 12 most significant bits we use 8 to construct `i`. - // This saves us generating a whole extra random number, while the added - // precision of using 64 bits for f64 does not buy us much. - let bits = rng.next_u64(); - let i = bits as usize & 0xff; - - let u = if symmetric { - // Convert to a value in the range [2,4) and substract to get [-1,1) - // We can't convert to an open range directly, that would require - // substracting `3.0 - EPSILON`, which is not representable. - // It is possible with an extra step, but an open range does not - // seem neccesary for the ziggurat algorithm anyway. - (bits >> 12).into_float_with_exponent(1) - 3.0 - } else { - // Convert to a value in the range [1,2) and substract to get (0,1) - (bits >> 12).into_float_with_exponent(0) - - (1.0 - ::core::f64::EPSILON / 2.0) - }; - let x = u * x_tab[i]; - - let test_x = if symmetric { x.abs() } else {x}; - - // algebraically equivalent to |u| < x_tab[i+1]/x_tab[i] (or u < x_tab[i+1]/x_tab[i]) - if test_x < x_tab[i + 1] { - return x; - } - if i == 0 { - return zero_case(rng, u); - } - // algebraically equivalent to f1 + DRanU()*(f0 - f1) < 1 - if f_tab[i + 1] + (f_tab[i] - f_tab[i + 1]) * rng.gen::<f64>() < pdf(x) { - return x; - } - } -} |