From fd091b04316db9dc5fafadbd6bdbe60b127408a9 Mon Sep 17 00:00:00 2001 From: Daniel Mueller Date: Thu, 2 Jan 2020 08:32:06 -0800 Subject: Update nitrokey crate to 0.4.0 This change finally updates the version of the nitrokey crate that we consume to 0.4.0. Along with that we update rand_core, one of its dependencies, to 0.5.1. Further more we add cfg-if in version 0.1.10 and getrandom in version 0.1.13, both of which are now new (non-development) dependencies. Import subrepo nitrokey/:nitrokey at e81057037e9b4f370b64c0a030a725bc6bdfb870 Import subrepo cfg-if/:cfg-if at 4484a6faf816ff8058088ad857b0c6bb2f4b02b2 Import subrepo getrandom/:getrandom at d661aa7e1b8cc80b47dabe3d2135b3b47d2858af Import subrepo rand/:rand at d877ed528248b52d947e0484364a4e1ae59ca502 --- rand/src/seq/index.rs | 145 ++++++++------ rand/src/seq/mod.rs | 521 +++++++++++++++++++++++--------------------------- 2 files changed, 326 insertions(+), 340 deletions(-) (limited to 'rand/src/seq') diff --git a/rand/src/seq/index.rs b/rand/src/seq/index.rs index 3d4df3a..22a5733 100644 --- a/rand/src/seq/index.rs +++ b/rand/src/seq/index.rs @@ -6,18 +6,18 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -//! Index sampling +//! Low-level API for sampling indices #[cfg(feature="alloc")] use core::slice; #[cfg(feature="std")] use std::vec; -#[cfg(all(feature="alloc", not(feature="std")))] use alloc::vec::{self, Vec}; +#[cfg(all(feature="alloc", not(feature="std")))] use crate::alloc::vec::{self, Vec}; // BTreeMap is not as fast in tests, but better than nothing. #[cfg(feature="std")] use std::collections::{HashSet}; -#[cfg(all(feature="alloc", not(feature="std")))] use alloc::collections::BTreeSet; +#[cfg(all(feature="alloc", not(feature="std")))] use crate::alloc::collections::BTreeSet; -#[cfg(feature="alloc")] use distributions::{Distribution, Uniform}; -use Rng; +#[cfg(feature="alloc")] use crate::distributions::{Distribution, Uniform, uniform::SampleUniform}; +use crate::Rng; /// A vector of indices. /// @@ -30,25 +30,37 @@ pub enum IndexVec { impl IndexVec { /// Returns the number of indices + #[inline] pub fn len(&self) -> usize { - match self { - &IndexVec::U32(ref v) => v.len(), - &IndexVec::USize(ref v) => v.len(), + match *self { + IndexVec::U32(ref v) => v.len(), + IndexVec::USize(ref v) => v.len(), + } + } + + /// Returns `true` if the length is 0. + #[inline] + pub fn is_empty(&self) -> bool { + match *self { + IndexVec::U32(ref v) => v.is_empty(), + IndexVec::USize(ref v) => v.is_empty(), } } /// Return the value at the given `index`. /// - /// (Note: we cannot implement `std::ops::Index` because of lifetime + /// (Note: we cannot implement [`std::ops::Index`] because of lifetime /// restrictions.) + #[inline] pub fn index(&self, index: usize) -> usize { - match self { - &IndexVec::U32(ref v) => v[index] as usize, - &IndexVec::USize(ref v) => v[index], + match *self { + IndexVec::U32(ref v) => v[index] as usize, + IndexVec::USize(ref v) => v[index], } } /// Return result as a `Vec`. Conversion may or may not be trivial. + #[inline] pub fn into_vec(self) -> Vec { match self { IndexVec::U32(v) => v.into_iter().map(|i| i as usize).collect(), @@ -57,14 +69,16 @@ impl IndexVec { } /// Iterate over the indices as a sequence of `usize` values - pub fn iter<'a>(&'a self) -> IndexVecIter<'a> { - match self { - &IndexVec::U32(ref v) => IndexVecIter::U32(v.iter()), - &IndexVec::USize(ref v) => IndexVecIter::USize(v.iter()), + #[inline] + pub fn iter(&self) -> IndexVecIter<'_> { + match *self { + IndexVec::U32(ref v) => IndexVecIter::U32(v.iter()), + IndexVec::USize(ref v) => IndexVecIter::USize(v.iter()), } } /// Convert into an iterator over the indices as a sequence of `usize` values + #[inline] pub fn into_iter(self) -> IndexVecIntoIter { match self { IndexVec::U32(v) => IndexVecIntoIter::U32(v.into_iter()), @@ -88,12 +102,14 @@ impl PartialEq for IndexVec { } impl From> for IndexVec { + #[inline] fn from(v: Vec) -> Self { IndexVec::U32(v) } } impl From> for IndexVec { + #[inline] fn from(v: Vec) -> Self { IndexVec::USize(v) } @@ -108,18 +124,20 @@ pub enum IndexVecIter<'a> { impl<'a> Iterator for IndexVecIter<'a> { type Item = usize; + #[inline] fn next(&mut self) -> Option { use self::IndexVecIter::*; - match self { - &mut U32(ref mut iter) => iter.next().map(|i| *i as usize), - &mut USize(ref mut iter) => iter.next().cloned(), + match *self { + U32(ref mut iter) => iter.next().map(|i| *i as usize), + USize(ref mut iter) => iter.next().cloned(), } } + #[inline] fn size_hint(&self) -> (usize, Option) { - match self { - &IndexVecIter::U32(ref v) => v.size_hint(), - &IndexVecIter::USize(ref v) => v.size_hint(), + match *self { + IndexVecIter::U32(ref v) => v.size_hint(), + IndexVecIter::USize(ref v) => v.size_hint(), } } } @@ -136,19 +154,21 @@ pub enum IndexVecIntoIter { impl Iterator for IndexVecIntoIter { type Item = usize; + #[inline] fn next(&mut self) -> Option { use self::IndexVecIntoIter::*; - match self { - &mut U32(ref mut v) => v.next().map(|i| i as usize), - &mut USize(ref mut v) => v.next(), + match *self { + U32(ref mut v) => v.next().map(|i| i as usize), + USize(ref mut v) => v.next(), } } + #[inline] fn size_hint(&self) -> (usize, Option) { use self::IndexVecIntoIter::*; - match self { - &U32(ref v) => v.size_hint(), - &USize(ref v) => v.size_hint(), + match *self { + U32(ref v) => v.size_hint(), + USize(ref v) => v.size_hint(), } } } @@ -173,14 +193,13 @@ impl ExactSizeIterator for IndexVecIntoIter {} /// Note that performance is significantly better over `u32` indices than over /// `u64` indices. Because of this we hide the underlying type behind an /// abstraction, `IndexVec`. -/// +/// /// If an allocation-free `no_std` function is required, it is suggested /// to adapt the internal `sample_floyd` implementation. /// /// Panics if `amount > length`. pub fn sample(rng: &mut R, length: usize, amount: usize) -> IndexVec - where R: Rng + ?Sized, -{ +where R: Rng + ?Sized { if amount > length { panic!("`amount` of samples must be less than or equal to `length`"); } @@ -213,9 +232,7 @@ pub fn sample(rng: &mut R, length: usize, amount: usize) -> IndexVec if (length as f32) < C[j] * (amount as f32) { sample_inplace(rng, length, amount) } else { - // note: could have a specific u32 impl, but I'm lazy and - // generics don't have usable conversions - sample_rejection(rng, length as usize, amount as usize) + sample_rejection(rng, length, amount) } } } @@ -227,8 +244,7 @@ pub fn sample(rng: &mut R, length: usize, amount: usize) -> IndexVec /// /// This implementation uses `O(amount)` memory and `O(amount^2)` time. fn sample_floyd(rng: &mut R, length: u32, amount: u32) -> IndexVec - where R: Rng + ?Sized, -{ +where R: Rng + ?Sized { // For small amount we use Floyd's fully-shuffled variant. For larger // amounts this is slow due to Vec::insert performance, so we shuffle // afterwards. Benchmarks show little overhead from extra logic. @@ -243,11 +259,9 @@ fn sample_floyd(rng: &mut R, length: u32, amount: u32) -> IndexVec indices.insert(pos, j); continue; } - } else { - if indices.contains(&t) { - indices.push(j); - continue; - } + } else if indices.contains(&t) { + indices.push(j); + continue; } indices.push(t); } @@ -274,8 +288,7 @@ fn sample_floyd(rng: &mut R, length: u32, amount: u32) -> IndexVec /// /// Set-up is `O(length)` time and memory and shuffling is `O(amount)` time. fn sample_inplace(rng: &mut R, length: u32, amount: u32) -> IndexVec - where R: Rng + ?Sized, -{ +where R: Rng + ?Sized { debug_assert!(amount <= length); let mut indices: Vec = Vec::with_capacity(length as usize); indices.extend(0..length); @@ -288,21 +301,36 @@ fn sample_inplace(rng: &mut R, length: u32, amount: u32) -> IndexVec IndexVec::from(indices) } +trait UInt: Copy + PartialOrd + Ord + PartialEq + Eq + SampleUniform + core::hash::Hash { + fn zero() -> Self; + fn as_usize(self) -> usize; +} +impl UInt for u32 { + #[inline] fn zero() -> Self { 0 } + #[inline] fn as_usize(self) -> usize { self as usize } +} +impl UInt for usize { + #[inline] fn zero() -> Self { 0 } + #[inline] fn as_usize(self) -> usize { self } +} + /// Randomly sample exactly `amount` indices from `0..length`, using rejection /// sampling. -/// +/// /// Since `amount <<< length` there is a low chance of a random sample in /// `0..length` being a duplicate. We test for duplicates and resample where /// necessary. The algorithm is `O(amount)` time and memory. -fn sample_rejection(rng: &mut R, length: usize, amount: usize) -> IndexVec - where R: Rng + ?Sized, -{ +/// +/// This function is generic over X primarily so that results are value-stable +/// over 32-bit and 64-bit platforms. +fn sample_rejection(rng: &mut R, length: X, amount: X) -> IndexVec +where R: Rng + ?Sized, IndexVec: From> { debug_assert!(amount < length); - #[cfg(feature="std")] let mut cache = HashSet::with_capacity(amount); + #[cfg(feature="std")] let mut cache = HashSet::with_capacity(amount.as_usize()); #[cfg(not(feature="std"))] let mut cache = BTreeSet::new(); - let distr = Uniform::new(0, length); - let mut indices = Vec::with_capacity(amount); - for _ in 0..amount { + let distr = Uniform::new(X::zero(), length); + let mut indices = Vec::with_capacity(amount.as_usize()); + for _ in 0..amount.as_usize() { let mut pos = distr.sample(rng); while !cache.insert(pos) { pos = distr.sample(rng); @@ -310,30 +338,32 @@ fn sample_rejection(rng: &mut R, length: usize, amount: usize) -> IndexVec indices.push(pos); } - debug_assert_eq!(indices.len(), amount); + debug_assert_eq!(indices.len(), amount.as_usize()); IndexVec::from(indices) } #[cfg(test)] mod test { + #[cfg(feature="std")] use std::vec; + #[cfg(all(feature="alloc", not(feature="std")))] use crate::alloc::vec; use super::*; #[test] fn test_sample_boundaries() { - let mut r = ::test::rng(404); + let mut r = crate::test::rng(404); assert_eq!(sample_inplace(&mut r, 0, 0).len(), 0); assert_eq!(sample_inplace(&mut r, 1, 0).len(), 0); assert_eq!(sample_inplace(&mut r, 1, 1).into_vec(), vec![0]); - assert_eq!(sample_rejection(&mut r, 1, 0).len(), 0); + assert_eq!(sample_rejection(&mut r, 1u32, 0).len(), 0); assert_eq!(sample_floyd(&mut r, 0, 0).len(), 0); assert_eq!(sample_floyd(&mut r, 1, 0).len(), 0); assert_eq!(sample_floyd(&mut r, 1, 1).into_vec(), vec![0]); // These algorithms should be fast with big numbers. Test average. - let sum: usize = sample_rejection(&mut r, 1 << 25, 10) + let sum: usize = sample_rejection(&mut r, 1 << 25, 10u32) .into_iter().sum(); assert!(1 << 25 < sum && sum < (1 << 25) * 25); @@ -343,8 +373,9 @@ mod test { } #[test] + #[cfg(not(miri))] // Miri is too slow fn test_sample_alg() { - let seed_rng = ::test::rng; + let seed_rng = crate::test::rng; // We can't test which algorithm is used directly, but Floyd's alg // should produce different results from the others. (Also, `inplace` @@ -371,7 +402,7 @@ mod test { // A large length and larger amount should use cache let (length, amount): (usize, usize) = (1<<20, 600); let v1 = sample(&mut seed_rng(422), length, amount); - let v2 = sample_rejection(&mut seed_rng(422), length, amount); + let v2 = sample_rejection(&mut seed_rng(422), length as u32, amount as u32); assert!(v1.iter().all(|e| e < length)); assert_eq!(v1, v2); } diff --git a/rand/src/seq/mod.rs b/rand/src/seq/mod.rs index 9959602..cec9bb1 100644 --- a/rand/src/seq/mod.rs +++ b/rand/src/seq/mod.rs @@ -6,25 +6,55 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -//! Functions for randomly accessing and sampling sequences. +//! Sequence-related functionality //! -//! TODO: module doc +//! This module provides: +//! +//! * [`seq::SliceRandom`] slice sampling and mutation +//! * [`seq::IteratorRandom`] iterator sampling +//! * [`seq::index::sample`] low-level API to choose multiple indices from +//! `0..length` +//! +//! Also see: +//! +//! * [`distributions::weighted`] module which provides implementations of +//! weighted index sampling. +//! +//! In order to make results reproducible across 32-64 bit architectures, all +//! `usize` indices are sampled as a `u32` where possible (also providing a +//! small performance boost in some cases). #[cfg(feature="alloc")] pub mod index; #[cfg(feature="alloc")] use core::ops::Index; -#[cfg(all(feature="alloc", not(feature="std")))] use alloc::vec::Vec; +#[cfg(all(feature="alloc", not(feature="std")))] use crate::alloc::vec::Vec; -use Rng; -#[cfg(feature="alloc")] use distributions::WeightedError; -#[cfg(feature="alloc")] use distributions::uniform::{SampleUniform, SampleBorrow}; +use crate::Rng; +#[cfg(feature="alloc")] use crate::distributions::WeightedError; +#[cfg(feature="alloc")] use crate::distributions::uniform::{SampleUniform, SampleBorrow}; /// Extension trait on slices, providing random mutation and sampling methods. /// -/// An implementation is provided for slices. This may also be implementable for -/// other types. +/// This trait is implemented on all `[T]` slice types, providing several +/// methods for choosing and shuffling elements. You must `use` this trait: +/// +/// ``` +/// use rand::seq::SliceRandom; +/// +/// fn main() { +/// let mut rng = rand::thread_rng(); +/// let mut bytes = "Hello, random!".to_string().into_bytes(); +/// bytes.shuffle(&mut rng); +/// let str = String::from_utf8(bytes).unwrap(); +/// println!("{}", str); +/// } +/// ``` +/// Example output (non-deterministic): +/// ```none +/// l,nmroHado !le +/// ``` pub trait SliceRandom { /// The element type. type Item; @@ -32,7 +62,7 @@ pub trait SliceRandom { /// Returns a reference to one random element of the slice, or `None` if the /// slice is empty. /// - /// Depending on the implementation, complexity is expected to be `O(1)`. + /// For slices, complexity is `O(1)`. /// /// # Example /// @@ -46,33 +76,33 @@ pub trait SliceRandom { /// assert_eq!(choices[..0].choose(&mut rng), None); /// ``` fn choose(&self, rng: &mut R) -> Option<&Self::Item> - where R: Rng + ?Sized; + where R: Rng + ?Sized; /// Returns a mutable reference to one random element of the slice, or /// `None` if the slice is empty. - /// - /// Depending on the implementation, complexity is expected to be `O(1)`. + /// + /// For slices, complexity is `O(1)`. fn choose_mut(&mut self, rng: &mut R) -> Option<&mut Self::Item> - where R: Rng + ?Sized; + where R: Rng + ?Sized; - /// Produces an iterator that chooses `amount` elements from the slice at - /// random without repeating any, and returns them in random order. - /// - /// In case this API is not sufficiently flexible, use `index::sample` then - /// apply the indices to the slice. - /// - /// Complexity is expected to be the same as `index::sample`. - /// + /// Chooses `amount` elements from the slice at random, without repetition, + /// and in random order. The returned iterator is appropriate both for + /// collection into a `Vec` and filling an existing buffer (see example). + /// + /// In case this API is not sufficiently flexible, use [`index::sample`]. + /// + /// For slices, complexity is the same as [`index::sample`]. + /// /// # Example /// ``` /// use rand::seq::SliceRandom; - /// + /// /// let mut rng = &mut rand::thread_rng(); /// let sample = "Hello, audience!".as_bytes(); - /// + /// /// // collect the results into a vector: /// let v: Vec = sample.choose_multiple(&mut rng, 3).cloned().collect(); - /// + /// /// // store in a buffer: /// let mut buf = [0u8; 5]; /// for (b, slot) in sample.choose_multiple(&mut rng, buf.len()).zip(buf.iter_mut()) { @@ -81,13 +111,18 @@ pub trait SliceRandom { /// ``` #[cfg(feature = "alloc")] fn choose_multiple(&self, rng: &mut R, amount: usize) -> SliceChooseIter - where R: Rng + ?Sized; + where R: Rng + ?Sized; - /// Similar to [`choose`], where the likelihood of each outcome may be - /// specified. The specified function `weight` maps items `x` to a relative + /// Similar to [`choose`], but where the likelihood of each outcome may be + /// specified. + /// + /// The specified function `weight` maps each item `x` to a relative /// likelihood `weight(x)`. The probability of each item being selected is /// therefore `weight(x) / s`, where `s` is the sum of all `weight(x)`. /// + /// For slices of length `n`, complexity is `O(n)`. + /// See also [`choose_weighted_mut`], [`distributions::weighted`]. + /// /// # Example /// /// ``` @@ -98,47 +133,59 @@ pub trait SliceRandom { /// // 50% chance to print 'a', 25% chance to print 'b', 25% chance to print 'c' /// println!("{:?}", choices.choose_weighted(&mut rng, |item| item.1).unwrap().0); /// ``` - /// [`choose`]: trait.SliceRandom.html#method.choose + /// [`choose`]: SliceRandom::choose + /// [`choose_weighted_mut`]: SliceRandom::choose_weighted_mut + /// [`distributions::weighted`]: crate::distributions::weighted #[cfg(feature = "alloc")] - fn choose_weighted(&self, rng: &mut R, weight: F) -> Result<&Self::Item, WeightedError> - where R: Rng + ?Sized, - F: Fn(&Self::Item) -> B, - B: SampleBorrow, - X: SampleUniform + - for<'a> ::core::ops::AddAssign<&'a X> + - ::core::cmp::PartialOrd + - Clone + - Default; - - /// Similar to [`choose_mut`], where the likelihood of each outcome may be - /// specified. The specified function `weight` maps items `x` to a relative + fn choose_weighted( + &self, rng: &mut R, weight: F, + ) -> Result<&Self::Item, WeightedError> + where + R: Rng + ?Sized, + F: Fn(&Self::Item) -> B, + B: SampleBorrow, + X: SampleUniform + + for<'a> ::core::ops::AddAssign<&'a X> + + ::core::cmp::PartialOrd + + Clone + + Default; + + /// Similar to [`choose_mut`], but where the likelihood of each outcome may + /// be specified. + /// + /// The specified function `weight` maps each item `x` to a relative /// likelihood `weight(x)`. The probability of each item being selected is /// therefore `weight(x) / s`, where `s` is the sum of all `weight(x)`. /// - /// See also [`choose_weighted`]. + /// For slices of length `n`, complexity is `O(n)`. + /// See also [`choose_weighted`], [`distributions::weighted`]. /// - /// [`choose_mut`]: trait.SliceRandom.html#method.choose_mut - /// [`choose_weighted`]: trait.SliceRandom.html#method.choose_weighted + /// [`choose_mut`]: SliceRandom::choose_mut + /// [`choose_weighted`]: SliceRandom::choose_weighted + /// [`distributions::weighted`]: crate::distributions::weighted #[cfg(feature = "alloc")] - fn choose_weighted_mut(&mut self, rng: &mut R, weight: F) -> Result<&mut Self::Item, WeightedError> - where R: Rng + ?Sized, - F: Fn(&Self::Item) -> B, - B: SampleBorrow, - X: SampleUniform + - for<'a> ::core::ops::AddAssign<&'a X> + - ::core::cmp::PartialOrd + - Clone + - Default; + fn choose_weighted_mut( + &mut self, rng: &mut R, weight: F, + ) -> Result<&mut Self::Item, WeightedError> + where + R: Rng + ?Sized, + F: Fn(&Self::Item) -> B, + B: SampleBorrow, + X: SampleUniform + + for<'a> ::core::ops::AddAssign<&'a X> + + ::core::cmp::PartialOrd + + Clone + + Default; /// Shuffle a mutable slice in place. - /// - /// Depending on the implementation, complexity is expected to be `O(1)`. + /// + /// For slices of length `n`, complexity is `O(n)`. /// /// # Example /// /// ``` - /// use rand::thread_rng; /// use rand::seq::SliceRandom; + /// use rand::thread_rng; /// /// let mut rng = thread_rng(); /// let mut y = [1, 2, 3, 4, 5]; @@ -146,7 +193,8 @@ pub trait SliceRandom { /// y.shuffle(&mut rng); /// println!("Shuffled: {:?}", y); /// ``` - fn shuffle(&mut self, rng: &mut R) where R: Rng + ?Sized; + fn shuffle(&mut self, rng: &mut R) + where R: Rng + ?Sized; /// Shuffle a slice in place, but exit early. /// @@ -164,47 +212,65 @@ pub trait SliceRandom { /// If `amount` is greater than the number of elements in the slice, this /// will perform a full shuffle. /// - /// Complexity is expected to be `O(m)` where `m = amount`. - fn partial_shuffle(&mut self, rng: &mut R, amount: usize) - -> (&mut [Self::Item], &mut [Self::Item]) where R: Rng + ?Sized; + /// For slices, complexity is `O(m)` where `m = amount`. + fn partial_shuffle( + &mut self, rng: &mut R, amount: usize, + ) -> (&mut [Self::Item], &mut [Self::Item]) + where R: Rng + ?Sized; } /// Extension trait on iterators, providing random sampling methods. +/// +/// This trait is implemented on all sized iterators, providing methods for +/// choosing one or more elements. You must `use` this trait: +/// +/// ``` +/// use rand::seq::IteratorRandom; +/// +/// fn main() { +/// let mut rng = rand::thread_rng(); +/// +/// let faces = "πŸ˜€πŸ˜ŽπŸ˜πŸ˜•πŸ˜ πŸ˜’"; +/// println!("I am {}!", faces.chars().choose(&mut rng).unwrap()); +/// } +/// ``` +/// Example output (non-deterministic): +/// ```none +/// I am πŸ˜€! +/// ``` pub trait IteratorRandom: Iterator + Sized { - /// Choose one element at random from the iterator. If you have a slice, - /// it's significantly faster to call the [`choose`] or [`choose_mut`] - /// functions using the slice instead. - /// - /// Returns `None` if and only if the iterator is empty. + /// Choose one element at random from the iterator. /// - /// Complexity is `O(n)`, where `n` is the length of the iterator. - /// This likely consumes multiple random numbers, but the exact number - /// is unspecified. + /// Returns `None` if and only if the iterator is empty. /// - /// [`choose`]: trait.SliceRandom.html#method.choose - /// [`choose_mut`]: trait.SliceRandom.html#method.choose_mut + /// This method uses [`Iterator::size_hint`] for optimisation. With an + /// accurate hint and where [`Iterator::nth`] is a constant-time operation + /// this method can offer `O(1)` performance. Where no size hint is + /// available, complexity is `O(n)` where `n` is the iterator length. + /// Partial hints (where `lower > 0`) also improve performance. + /// + /// For slices, prefer [`SliceRandom::choose`] which guarantees `O(1)` + /// performance. fn choose(mut self, rng: &mut R) -> Option - where R: Rng + ?Sized - { + where R: Rng + ?Sized { let (mut lower, mut upper) = self.size_hint(); let mut consumed = 0; let mut result = None; if upper == Some(lower) { - return if lower == 0 { None } else { self.nth(rng.gen_range(0, lower)) }; + return if lower == 0 { None } else { self.nth(gen_index(rng, lower)) }; } // Continue until the iterator is exhausted loop { if lower > 1 { - let ix = rng.gen_range(0, lower + consumed); - let skip; - if ix < lower { + let ix = gen_index(rng, lower + consumed); + let skip = if ix < lower { result = self.nth(ix); - skip = lower - (ix + 1); + lower - (ix + 1) } else { - skip = lower; - } + lower + }; if upper == Some(lower) { return result; } @@ -230,21 +296,21 @@ pub trait IteratorRandom: Iterator + Sized { } } - /// Collects `amount` values at random from the iterator into a supplied - /// buffer. - /// + /// Collects values at random from the iterator into a supplied buffer + /// until that buffer is filled. + /// /// Although the elements are selected randomly, the order of elements in /// the buffer is neither stable nor fully random. If random ordering is /// desired, shuffle the result. - /// - /// Returns the number of elements added to the buffer. This equals `amount` - /// unless the iterator contains insufficient elements, in which case this - /// equals the number of elements available. - /// + /// + /// Returns the number of elements added to the buffer. This equals the length + /// of the buffer unless the iterator contains insufficient elements, in which + /// case this equals the number of elements available. + /// /// Complexity is `O(n)` where `n` is the length of the iterator. - fn choose_multiple_fill(mut self, rng: &mut R, buf: &mut [Self::Item]) - -> usize where R: Rng + ?Sized - { + /// For slices, prefer [`SliceRandom::choose_multiple`]. + fn choose_multiple_fill(mut self, rng: &mut R, buf: &mut [Self::Item]) -> usize + where R: Rng + ?Sized { let amount = buf.len(); let mut len = 0; while len < amount { @@ -259,7 +325,7 @@ pub trait IteratorRandom: Iterator + Sized { // Continue, since the iterator was not exhausted for (i, elem) in self.enumerate() { - let k = rng.gen_range(0, i + 1 + amount); + let k = gen_index(rng, i + 1 + amount); if let Some(slot) = buf.get_mut(k) { *slot = elem; } @@ -274,16 +340,16 @@ pub trait IteratorRandom: Iterator + Sized { /// Although the elements are selected randomly, the order of elements in /// the buffer is neither stable nor fully random. If random ordering is /// desired, shuffle the result. - /// + /// /// The length of the returned vector equals `amount` unless the iterator /// contains insufficient elements, in which case it equals the number of /// elements available. - /// + /// /// Complexity is `O(n)` where `n` is the length of the iterator. + /// For slices, prefer [`SliceRandom::choose_multiple`]. #[cfg(feature = "alloc")] fn choose_multiple(mut self, rng: &mut R, amount: usize) -> Vec - where R: Rng + ?Sized - { + where R: Rng + ?Sized { let mut reservoir = Vec::with_capacity(amount); reservoir.extend(self.by_ref().take(amount)); @@ -293,7 +359,7 @@ pub trait IteratorRandom: Iterator + Sized { // If the iterator stops once, then so do we. if reservoir.len() == amount { for (i, elem) in self.enumerate() { - let k = rng.gen_range(0, i + 1 + amount); + let k = gen_index(rng, i + 1 + amount); if let Some(slot) = reservoir.get_mut(k) { *slot = elem; } @@ -312,31 +378,27 @@ impl SliceRandom for [T] { type Item = T; fn choose(&self, rng: &mut R) -> Option<&Self::Item> - where R: Rng + ?Sized - { + where R: Rng + ?Sized { if self.is_empty() { None } else { - Some(&self[rng.gen_range(0, self.len())]) + Some(&self[gen_index(rng, self.len())]) } } fn choose_mut(&mut self, rng: &mut R) -> Option<&mut Self::Item> - where R: Rng + ?Sized - { + where R: Rng + ?Sized { if self.is_empty() { None } else { let len = self.len(); - Some(&mut self[rng.gen_range(0, len)]) + Some(&mut self[gen_index(rng, len)]) } } #[cfg(feature = "alloc")] - fn choose_multiple(&self, rng: &mut R, amount: usize) - -> SliceChooseIter - where R: Rng + ?Sized - { + fn choose_multiple(&self, rng: &mut R, amount: usize) -> SliceChooseIter + where R: Rng + ?Sized { let amount = ::core::cmp::min(amount, self.len()); SliceChooseIter { slice: self, @@ -346,57 +408,66 @@ impl SliceRandom for [T] { } #[cfg(feature = "alloc")] - fn choose_weighted(&self, rng: &mut R, weight: F) -> Result<&Self::Item, WeightedError> - where R: Rng + ?Sized, - F: Fn(&Self::Item) -> B, - B: SampleBorrow, - X: SampleUniform + - for<'a> ::core::ops::AddAssign<&'a X> + - ::core::cmp::PartialOrd + - Clone + - Default { - use distributions::{Distribution, WeightedIndex}; + fn choose_weighted( + &self, rng: &mut R, weight: F, + ) -> Result<&Self::Item, WeightedError> + where + R: Rng + ?Sized, + F: Fn(&Self::Item) -> B, + B: SampleBorrow, + X: SampleUniform + + for<'a> ::core::ops::AddAssign<&'a X> + + ::core::cmp::PartialOrd + + Clone + + Default, + { + use crate::distributions::{Distribution, WeightedIndex}; let distr = WeightedIndex::new(self.iter().map(weight))?; Ok(&self[distr.sample(rng)]) } #[cfg(feature = "alloc")] - fn choose_weighted_mut(&mut self, rng: &mut R, weight: F) -> Result<&mut Self::Item, WeightedError> - where R: Rng + ?Sized, - F: Fn(&Self::Item) -> B, - B: SampleBorrow, - X: SampleUniform + - for<'a> ::core::ops::AddAssign<&'a X> + - ::core::cmp::PartialOrd + - Clone + - Default { - use distributions::{Distribution, WeightedIndex}; + fn choose_weighted_mut( + &mut self, rng: &mut R, weight: F, + ) -> Result<&mut Self::Item, WeightedError> + where + R: Rng + ?Sized, + F: Fn(&Self::Item) -> B, + B: SampleBorrow, + X: SampleUniform + + for<'a> ::core::ops::AddAssign<&'a X> + + ::core::cmp::PartialOrd + + Clone + + Default, + { + use crate::distributions::{Distribution, WeightedIndex}; let distr = WeightedIndex::new(self.iter().map(weight))?; Ok(&mut self[distr.sample(rng)]) } - fn shuffle(&mut self, rng: &mut R) where R: Rng + ?Sized - { + fn shuffle(&mut self, rng: &mut R) + where R: Rng + ?Sized { for i in (1..self.len()).rev() { // invariant: elements with index > i have been locked in place. - self.swap(i, rng.gen_range(0, i + 1)); + self.swap(i, gen_index(rng, i + 1)); } } - fn partial_shuffle(&mut self, rng: &mut R, amount: usize) - -> (&mut [Self::Item], &mut [Self::Item]) where R: Rng + ?Sized - { + fn partial_shuffle( + &mut self, rng: &mut R, amount: usize, + ) -> (&mut [Self::Item], &mut [Self::Item]) + where R: Rng + ?Sized { // This applies Durstenfeld's algorithm for the // [Fisher–Yates shuffle](https://en.wikipedia.org/wiki/Fisher%E2%80%93Yates_shuffle#The_modern_algorithm) // for an unbiased permutation, but exits early after choosing `amount` // elements. - + let len = self.len(); let end = if amount >= len { 0 } else { len - amount }; - + for i in (end..len).rev() { // invariant: elements with index > i have been locked in place. - self.swap(i, rng.gen_range(0, i + 1)); + self.swap(i, gen_index(rng, i + 1)); } let r = self.split_at_mut(end); (r.1, r.0) @@ -406,8 +477,10 @@ impl SliceRandom for [T] { impl IteratorRandom for I where I: Iterator + Sized {} -/// Iterator over multiple choices, as returned by [`SliceRandom::choose_multiple]( -/// trait.SliceRandom.html#method.choose_multiple). +/// An iterator over multiple slice elements. +/// +/// This struct is created by +/// [`SliceRandom::choose_multiple`](trait.SliceRandom.html#tymethod.choose_multiple). #[cfg(feature = "alloc")] #[derive(Debug)] pub struct SliceChooseIter<'a, S: ?Sized + 'a, T: 'a> { @@ -424,7 +497,7 @@ impl<'a, S: Index + ?Sized + 'a, T: 'a> Iterator for SliceCho // TODO: investigate using SliceIndex::get_unchecked when stable self.indices.next().map(|i| &self.slice[i as usize]) } - + fn size_hint(&self) -> (usize, Option) { (self.indices.len(), Some(self.indices.len())) } @@ -440,94 +513,39 @@ impl<'a, S: Index + ?Sized + 'a, T: 'a> ExactSizeIterator } -/// Randomly sample `amount` elements from a finite iterator. -/// -/// Deprecated: use [`IteratorRandom::choose_multiple`] instead. -/// -/// [`IteratorRandom::choose_multiple`]: trait.IteratorRandom.html#method.choose_multiple -#[cfg(feature = "alloc")] -#[deprecated(since="0.6.0", note="use IteratorRandom::choose_multiple instead")] -pub fn sample_iter(rng: &mut R, iterable: I, amount: usize) -> Result, Vec> - where I: IntoIterator, - R: Rng + ?Sized, -{ - use seq::IteratorRandom; - let iter = iterable.into_iter(); - let result = iter.choose_multiple(rng, amount); - if result.len() == amount { - Ok(result) +// Sample a number uniformly between 0 and `ubound`. Uses 32-bit sampling where +// possible, primarily in order to produce the same output on 32-bit and 64-bit +// platforms. +#[inline] +fn gen_index(rng: &mut R, ubound: usize) -> usize { + if ubound <= (core::u32::MAX as usize) { + rng.gen_range(0, ubound as u32) as usize } else { - Err(result) + rng.gen_range(0, ubound) } } -/// Randomly sample exactly `amount` values from `slice`. -/// -/// The values are non-repeating and in random order. -/// -/// This implementation uses `O(amount)` time and memory. -/// -/// Panics if `amount > slice.len()` -/// -/// Deprecated: use [`SliceRandom::choose_multiple`] instead. -/// -/// [`SliceRandom::choose_multiple`]: trait.SliceRandom.html#method.choose_multiple -#[cfg(feature = "alloc")] -#[deprecated(since="0.6.0", note="use SliceRandom::choose_multiple instead")] -pub fn sample_slice(rng: &mut R, slice: &[T], amount: usize) -> Vec - where R: Rng + ?Sized, - T: Clone -{ - let indices = index::sample(rng, slice.len(), amount).into_iter(); - - let mut out = Vec::with_capacity(amount); - out.extend(indices.map(|i| slice[i].clone())); - out -} - -/// Randomly sample exactly `amount` references from `slice`. -/// -/// The references are non-repeating and in random order. -/// -/// This implementation uses `O(amount)` time and memory. -/// -/// Panics if `amount > slice.len()` -/// -/// Deprecated: use [`SliceRandom::choose_multiple`] instead. -/// -/// [`SliceRandom::choose_multiple`]: trait.SliceRandom.html#method.choose_multiple -#[cfg(feature = "alloc")] -#[deprecated(since="0.6.0", note="use SliceRandom::choose_multiple instead")] -pub fn sample_slice_ref<'a, R, T>(rng: &mut R, slice: &'a [T], amount: usize) -> Vec<&'a T> - where R: Rng + ?Sized -{ - let indices = index::sample(rng, slice.len(), amount).into_iter(); - - let mut out = Vec::with_capacity(amount); - out.extend(indices.map(|i| &slice[i])); - out -} #[cfg(test)] mod test { use super::*; - #[cfg(feature = "alloc")] use {Rng, SeedableRng}; - #[cfg(feature = "alloc")] use rngs::SmallRng; + #[cfg(feature = "alloc")] use crate::Rng; #[cfg(all(feature="alloc", not(feature="std")))] use alloc::vec::Vec; #[test] fn test_slice_choose() { - let mut r = ::test::rng(107); + let mut r = crate::test::rng(107); let chars = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n']; let mut chosen = [0i32; 14]; + // The below all use a binomial distribution with n=1000, p=1/14. + // binocdf(40, 1000, 1/14) ~= 2e-5; 1-binocdf(106, ..) ~= 2e-5 for _ in 0..1000 { let picked = *chars.choose(&mut r).unwrap(); chosen[(picked as usize) - ('a' as usize)] += 1; } for count in chosen.iter() { - let err = *count - (1000 / (chars.len() as i32)); - assert!(-20 <= err && err <= 20); + assert!(40 < *count && *count < 106); } chosen.iter_mut().for_each(|x| *x = 0); @@ -535,8 +553,7 @@ mod test { *chosen.choose_mut(&mut r).unwrap() += 1; } for count in chosen.iter() { - let err = *count - (1000 / (chosen.len() as i32)); - assert!(-20 <= err && err <= 20); + assert!(40 < *count && *count < 106); } let mut v: [isize; 0] = []; @@ -597,8 +614,9 @@ mod test { } #[test] + #[cfg(not(miri))] // Miri is too slow fn test_iterator_choose() { - let r = &mut ::test::rng(109); + let r = &mut crate::test::rng(109); fn test_iter + Clone>(r: &mut R, iter: Iter) { let mut chosen = [0i32; 9]; for _ in 0..1000 { @@ -628,8 +646,9 @@ mod test { } #[test] + #[cfg(not(miri))] // Miri is too slow fn test_shuffle() { - let mut r = ::test::rng(108); + let mut r = crate::test::rng(108); let empty: &mut [isize] = &mut []; empty.shuffle(&mut r); let mut one = [1]; @@ -669,14 +688,16 @@ mod test { counts[permutation] += 1; } for count in counts.iter() { - let err = *count - 10000i32 / 24; - assert!(-50 <= err && err <= 50); + // Binomial(10000, 1/24) with average 416.667 + // Octave: binocdf(n, 10000, 1/24) + // 99.9% chance samples lie within this range: + assert!(352 <= *count && *count <= 483, "count: {}", count); } } #[test] fn test_partial_shuffle() { - let mut r = ::test::rng(118); + let mut r = crate::test::rng(118); let mut empty: [u32; 0] = []; let res = empty.partial_shuffle(&mut r, 10); @@ -696,7 +717,7 @@ mod test { let min_val = 1; let max_val = 100; - let mut r = ::test::rng(401); + let mut r = crate::test::rng(401); let vals = (min_val..max_val).collect::>(); let small_sample = vals.iter().choose_multiple(&mut r, 5); let large_sample = vals.iter().choose_multiple(&mut r, vals.len() + 5); @@ -713,75 +734,9 @@ mod test { #[test] #[cfg(feature = "alloc")] - #[allow(deprecated)] - fn test_sample_slice_boundaries() { - let empty: &[u8] = &[]; - - let mut r = ::test::rng(402); - - // sample 0 items - assert_eq!(&sample_slice(&mut r, empty, 0)[..], [0u8; 0]); - assert_eq!(&sample_slice(&mut r, &[42, 2, 42], 0)[..], [0u8; 0]); - - // sample 1 item - assert_eq!(&sample_slice(&mut r, &[42], 1)[..], [42]); - let v = sample_slice(&mut r, &[1, 42], 1)[0]; - assert!(v == 1 || v == 42); - - // sample "all" the items - let v = sample_slice(&mut r, &[42, 133], 2); - assert!(&v[..] == [42, 133] || v[..] == [133, 42]); - - // Make sure lucky 777's aren't lucky - let slice = &[42, 777]; - let mut num_42 = 0; - let total = 1000; - for _ in 0..total { - let v = sample_slice(&mut r, slice, 1); - assert_eq!(v.len(), 1); - let v = v[0]; - assert!(v == 42 || v == 777); - if v == 42 { - num_42 += 1; - } - } - let ratio_42 = num_42 as f64 / 1000 as f64; - assert!(0.4 <= ratio_42 || ratio_42 <= 0.6, "{}", ratio_42); - } - - #[test] - #[cfg(feature = "alloc")] - #[allow(deprecated)] - fn test_sample_slice() { - let seeded_rng = SmallRng::from_seed; - - let mut r = ::test::rng(403); - - for n in 1..20 { - let length = 5*n - 4; // 1, 6, ... - let amount = r.gen_range(0, length); - let mut seed = [0u8; 16]; - r.fill(&mut seed); - - // assert the basics work - let regular = index::sample(&mut seeded_rng(seed), length, amount); - assert_eq!(regular.len(), amount); - assert!(regular.iter().all(|e| e < length)); - - // also test that sampling the slice works - let vec: Vec = (0..(length as u32)).collect(); - let result = sample_slice(&mut seeded_rng(seed), &vec, amount); - assert_eq!(result, regular.iter().map(|i| i as u32).collect::>()); - - let result = sample_slice_ref(&mut seeded_rng(seed), &vec, amount); - assert!(result.iter().zip(regular.iter()).all(|(i,j)| **i == j as u32)); - } - } - - #[test] - #[cfg(feature = "alloc")] + #[cfg(not(miri))] // Miri is too slow fn test_weighted() { - let mut r = ::test::rng(406); + let mut r = crate::test::rng(406); const N_REPS: u32 = 3000; let weights = [1u32, 2, 3, 0, 5, 6, 7, 1, 2, 3, 4, 5, 6, 7]; let total_weight = weights.iter().sum::() as f32; @@ -830,7 +785,7 @@ mod test { assert_eq!(empty_slice.choose_weighted(&mut r, |_| 1), Err(WeightedError::NoItem)); assert_eq!(empty_slice.choose_weighted_mut(&mut r, |_| 1), Err(WeightedError::NoItem)); assert_eq!(['x'].choose_weighted_mut(&mut r, |_| 0), Err(WeightedError::AllWeightsZero)); - assert_eq!([0, -1].choose_weighted_mut(&mut r, |x| *x), Err(WeightedError::NegativeWeight)); - assert_eq!([-1, 0].choose_weighted_mut(&mut r, |x| *x), Err(WeightedError::NegativeWeight)); + assert_eq!([0, -1].choose_weighted_mut(&mut r, |x| *x), Err(WeightedError::InvalidWeight)); + assert_eq!([-1, 0].choose_weighted_mut(&mut r, |x| *x), Err(WeightedError::InvalidWeight)); } } -- cgit v1.2.1