From ecf3474223ca3d16a10f12dc2272e3b0ed72c1bb Mon Sep 17 00:00:00 2001 From: Daniel Mueller Date: Wed, 2 Jan 2019 21:14:10 -0800 Subject: Update nitrokey crate to 0.2.3 This change updates the nitrokey crate to version 0.2.3. This version bumps the rand crate used to 0.6.1, which in turn requires an additional set of dependencies. Import subrepo nitrokey/:nitrokey at b3e2adc5bb1300441ca74cc7672617c042f3ea31 Import subrepo rand/:rand at 73613ff903512e9503e41cc8ba9eae76269dc598 Import subrepo rustc_version/:rustc_version at 0294f2ba2018bf7be672abd53db351ce5055fa02 Import subrepo semver-parser/:semver-parser at 750da9b11a04125231b1fb293866ca036845acee Import subrepo semver/:semver at 5eb6db94fa03f4d5c64a625a56188f496be47598 --- rand/src/seq/index.rs | 378 +++++++++++++++++++++++ rand/src/seq/mod.rs | 836 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 1214 insertions(+) create mode 100644 rand/src/seq/index.rs create mode 100644 rand/src/seq/mod.rs (limited to 'rand/src/seq') diff --git a/rand/src/seq/index.rs b/rand/src/seq/index.rs new file mode 100644 index 0000000..3d4df3a --- /dev/null +++ b/rand/src/seq/index.rs @@ -0,0 +1,378 @@ +// Copyright 2018 Developers of the Rand project. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +//! Index sampling + +#[cfg(feature="alloc")] use core::slice; + +#[cfg(feature="std")] use std::vec; +#[cfg(all(feature="alloc", not(feature="std")))] use alloc::vec::{self, Vec}; +// BTreeMap is not as fast in tests, but better than nothing. +#[cfg(feature="std")] use std::collections::{HashSet}; +#[cfg(all(feature="alloc", not(feature="std")))] use alloc::collections::BTreeSet; + +#[cfg(feature="alloc")] use distributions::{Distribution, Uniform}; +use Rng; + +/// A vector of indices. +/// +/// Multiple internal representations are possible. +#[derive(Clone, Debug)] +pub enum IndexVec { + #[doc(hidden)] U32(Vec), + #[doc(hidden)] USize(Vec), +} + +impl IndexVec { + /// Returns the number of indices + pub fn len(&self) -> usize { + match self { + &IndexVec::U32(ref v) => v.len(), + &IndexVec::USize(ref v) => v.len(), + } + } + + /// Return the value at the given `index`. + /// + /// (Note: we cannot implement `std::ops::Index` because of lifetime + /// restrictions.) + pub fn index(&self, index: usize) -> usize { + match self { + &IndexVec::U32(ref v) => v[index] as usize, + &IndexVec::USize(ref v) => v[index], + } + } + + /// Return result as a `Vec`. Conversion may or may not be trivial. + pub fn into_vec(self) -> Vec { + match self { + IndexVec::U32(v) => v.into_iter().map(|i| i as usize).collect(), + IndexVec::USize(v) => v, + } + } + + /// Iterate over the indices as a sequence of `usize` values + pub fn iter<'a>(&'a self) -> IndexVecIter<'a> { + match self { + &IndexVec::U32(ref v) => IndexVecIter::U32(v.iter()), + &IndexVec::USize(ref v) => IndexVecIter::USize(v.iter()), + } + } + + /// Convert into an iterator over the indices as a sequence of `usize` values + pub fn into_iter(self) -> IndexVecIntoIter { + match self { + IndexVec::U32(v) => IndexVecIntoIter::U32(v.into_iter()), + IndexVec::USize(v) => IndexVecIntoIter::USize(v.into_iter()), + } + } +} + +impl PartialEq for IndexVec { + fn eq(&self, other: &IndexVec) -> bool { + use self::IndexVec::*; + match (self, other) { + (&U32(ref v1), &U32(ref v2)) => v1 == v2, + (&USize(ref v1), &USize(ref v2)) => v1 == v2, + (&U32(ref v1), &USize(ref v2)) => (v1.len() == v2.len()) + && (v1.iter().zip(v2.iter()).all(|(x, y)| *x as usize == *y)), + (&USize(ref v1), &U32(ref v2)) => (v1.len() == v2.len()) + && (v1.iter().zip(v2.iter()).all(|(x, y)| *x == *y as usize)), + } + } +} + +impl From> for IndexVec { + fn from(v: Vec) -> Self { + IndexVec::U32(v) + } +} + +impl From> for IndexVec { + fn from(v: Vec) -> Self { + IndexVec::USize(v) + } +} + +/// Return type of `IndexVec::iter`. +#[derive(Debug)] +pub enum IndexVecIter<'a> { + #[doc(hidden)] U32(slice::Iter<'a, u32>), + #[doc(hidden)] USize(slice::Iter<'a, usize>), +} + +impl<'a> Iterator for IndexVecIter<'a> { + type Item = usize; + fn next(&mut self) -> Option { + use self::IndexVecIter::*; + match self { + &mut U32(ref mut iter) => iter.next().map(|i| *i as usize), + &mut USize(ref mut iter) => iter.next().cloned(), + } + } + + fn size_hint(&self) -> (usize, Option) { + match self { + &IndexVecIter::U32(ref v) => v.size_hint(), + &IndexVecIter::USize(ref v) => v.size_hint(), + } + } +} + +impl<'a> ExactSizeIterator for IndexVecIter<'a> {} + +/// Return type of `IndexVec::into_iter`. +#[derive(Clone, Debug)] +pub enum IndexVecIntoIter { + #[doc(hidden)] U32(vec::IntoIter), + #[doc(hidden)] USize(vec::IntoIter), +} + +impl Iterator for IndexVecIntoIter { + type Item = usize; + + fn next(&mut self) -> Option { + use self::IndexVecIntoIter::*; + match self { + &mut U32(ref mut v) => v.next().map(|i| i as usize), + &mut USize(ref mut v) => v.next(), + } + } + + fn size_hint(&self) -> (usize, Option) { + use self::IndexVecIntoIter::*; + match self { + &U32(ref v) => v.size_hint(), + &USize(ref v) => v.size_hint(), + } + } +} + +impl ExactSizeIterator for IndexVecIntoIter {} + + +/// Randomly sample exactly `amount` distinct indices from `0..length`, and +/// return them in random order (fully shuffled). +/// +/// This method is used internally by the slice sampling methods, but it can +/// sometimes be useful to have the indices themselves so this is provided as +/// an alternative. +/// +/// The implementation used is not specified; we automatically select the +/// fastest available algorithm for the `length` and `amount` parameters +/// (based on detailed profiling on an Intel Haswell CPU). Roughly speaking, +/// complexity is `O(amount)`, except that when `amount` is small, performance +/// is closer to `O(amount^2)`, and when `length` is close to `amount` then +/// `O(length)`. +/// +/// Note that performance is significantly better over `u32` indices than over +/// `u64` indices. Because of this we hide the underlying type behind an +/// abstraction, `IndexVec`. +/// +/// If an allocation-free `no_std` function is required, it is suggested +/// to adapt the internal `sample_floyd` implementation. +/// +/// Panics if `amount > length`. +pub fn sample(rng: &mut R, length: usize, amount: usize) -> IndexVec + where R: Rng + ?Sized, +{ + if amount > length { + panic!("`amount` of samples must be less than or equal to `length`"); + } + if length > (::core::u32::MAX as usize) { + // We never want to use inplace here, but could use floyd's alg + // Lazy version: always use the cache alg. + return sample_rejection(rng, length, amount); + } + let amount = amount as u32; + let length = length as u32; + + // Choice of algorithm here depends on both length and amount. See: + // https://github.com/rust-random/rand/pull/479 + // We do some calculations with f32. Accuracy is not very important. + + if amount < 163 { + const C: [[f32; 2]; 2] = [[1.6, 8.0/45.0], [10.0, 70.0/9.0]]; + let j = if length < 500_000 { 0 } else { 1 }; + let amount_fp = amount as f32; + let m4 = C[0][j] * amount_fp; + // Short-cut: when amount < 12, floyd's is always faster + if amount > 11 && (length as f32) < (C[1][j] + m4) * amount_fp { + sample_inplace(rng, length, amount) + } else { + sample_floyd(rng, length, amount) + } + } else { + const C: [f32; 2] = [270.0, 330.0/9.0]; + let j = if length < 500_000 { 0 } else { 1 }; + if (length as f32) < C[j] * (amount as f32) { + sample_inplace(rng, length, amount) + } else { + // note: could have a specific u32 impl, but I'm lazy and + // generics don't have usable conversions + sample_rejection(rng, length as usize, amount as usize) + } + } +} + +/// Randomly sample exactly `amount` indices from `0..length`, using Floyd's +/// combination algorithm. +/// +/// The output values are fully shuffled. (Overhead is under 50%.) +/// +/// This implementation uses `O(amount)` memory and `O(amount^2)` time. +fn sample_floyd(rng: &mut R, length: u32, amount: u32) -> IndexVec + where R: Rng + ?Sized, +{ + // For small amount we use Floyd's fully-shuffled variant. For larger + // amounts this is slow due to Vec::insert performance, so we shuffle + // afterwards. Benchmarks show little overhead from extra logic. + let floyd_shuffle = amount < 50; + + debug_assert!(amount <= length); + let mut indices = Vec::with_capacity(amount as usize); + for j in length - amount .. length { + let t = rng.gen_range(0, j + 1); + if floyd_shuffle { + if let Some(pos) = indices.iter().position(|&x| x == t) { + indices.insert(pos, j); + continue; + } + } else { + if indices.contains(&t) { + indices.push(j); + continue; + } + } + indices.push(t); + } + if !floyd_shuffle { + // Reimplement SliceRandom::shuffle with smaller indices + for i in (1..amount).rev() { + // invariant: elements with index > i have been locked in place. + indices.swap(i as usize, rng.gen_range(0, i + 1) as usize); + } + } + IndexVec::from(indices) +} + +/// Randomly sample exactly `amount` indices from `0..length`, using an inplace +/// partial Fisher-Yates method. +/// Sample an amount of indices using an inplace partial fisher yates method. +/// +/// This allocates the entire `length` of indices and randomizes only the first `amount`. +/// It then truncates to `amount` and returns. +/// +/// This method is not appropriate for large `length` and potentially uses a lot +/// of memory; because of this we only implement for `u32` index (which improves +/// performance in all cases). +/// +/// Set-up is `O(length)` time and memory and shuffling is `O(amount)` time. +fn sample_inplace(rng: &mut R, length: u32, amount: u32) -> IndexVec + where R: Rng + ?Sized, +{ + debug_assert!(amount <= length); + let mut indices: Vec = Vec::with_capacity(length as usize); + indices.extend(0..length); + for i in 0..amount { + let j: u32 = rng.gen_range(i, length); + indices.swap(i as usize, j as usize); + } + indices.truncate(amount as usize); + debug_assert_eq!(indices.len(), amount as usize); + IndexVec::from(indices) +} + +/// Randomly sample exactly `amount` indices from `0..length`, using rejection +/// sampling. +/// +/// Since `amount <<< length` there is a low chance of a random sample in +/// `0..length` being a duplicate. We test for duplicates and resample where +/// necessary. The algorithm is `O(amount)` time and memory. +fn sample_rejection(rng: &mut R, length: usize, amount: usize) -> IndexVec + where R: Rng + ?Sized, +{ + debug_assert!(amount < length); + #[cfg(feature="std")] let mut cache = HashSet::with_capacity(amount); + #[cfg(not(feature="std"))] let mut cache = BTreeSet::new(); + let distr = Uniform::new(0, length); + let mut indices = Vec::with_capacity(amount); + for _ in 0..amount { + let mut pos = distr.sample(rng); + while !cache.insert(pos) { + pos = distr.sample(rng); + } + indices.push(pos); + } + + debug_assert_eq!(indices.len(), amount); + IndexVec::from(indices) +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_sample_boundaries() { + let mut r = ::test::rng(404); + + assert_eq!(sample_inplace(&mut r, 0, 0).len(), 0); + assert_eq!(sample_inplace(&mut r, 1, 0).len(), 0); + assert_eq!(sample_inplace(&mut r, 1, 1).into_vec(), vec![0]); + + assert_eq!(sample_rejection(&mut r, 1, 0).len(), 0); + + assert_eq!(sample_floyd(&mut r, 0, 0).len(), 0); + assert_eq!(sample_floyd(&mut r, 1, 0).len(), 0); + assert_eq!(sample_floyd(&mut r, 1, 1).into_vec(), vec![0]); + + // These algorithms should be fast with big numbers. Test average. + let sum: usize = sample_rejection(&mut r, 1 << 25, 10) + .into_iter().sum(); + assert!(1 << 25 < sum && sum < (1 << 25) * 25); + + let sum: usize = sample_floyd(&mut r, 1 << 25, 10) + .into_iter().sum(); + assert!(1 << 25 < sum && sum < (1 << 25) * 25); + } + + #[test] + fn test_sample_alg() { + let seed_rng = ::test::rng; + + // We can't test which algorithm is used directly, but Floyd's alg + // should produce different results from the others. (Also, `inplace` + // and `cached` currently use different sizes thus produce different results.) + + // A small length and relatively large amount should use inplace + let (length, amount): (usize, usize) = (100, 50); + let v1 = sample(&mut seed_rng(420), length, amount); + let v2 = sample_inplace(&mut seed_rng(420), length as u32, amount as u32); + assert!(v1.iter().all(|e| e < length)); + assert_eq!(v1, v2); + + // Test Floyd's alg does produce different results + let v3 = sample_floyd(&mut seed_rng(420), length as u32, amount as u32); + assert!(v1 != v3); + + // A large length and small amount should use Floyd + let (length, amount): (usize, usize) = (1<<20, 50); + let v1 = sample(&mut seed_rng(421), length, amount); + let v2 = sample_floyd(&mut seed_rng(421), length as u32, amount as u32); + assert!(v1.iter().all(|e| e < length)); + assert_eq!(v1, v2); + + // A large length and larger amount should use cache + let (length, amount): (usize, usize) = (1<<20, 600); + let v1 = sample(&mut seed_rng(422), length, amount); + let v2 = sample_rejection(&mut seed_rng(422), length, amount); + assert!(v1.iter().all(|e| e < length)); + assert_eq!(v1, v2); + } +} diff --git a/rand/src/seq/mod.rs b/rand/src/seq/mod.rs new file mode 100644 index 0000000..9959602 --- /dev/null +++ b/rand/src/seq/mod.rs @@ -0,0 +1,836 @@ +// Copyright 2018 Developers of the Rand project. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +//! Functions for randomly accessing and sampling sequences. +//! +//! TODO: module doc + + +#[cfg(feature="alloc")] pub mod index; + +#[cfg(feature="alloc")] use core::ops::Index; + +#[cfg(all(feature="alloc", not(feature="std")))] use alloc::vec::Vec; + +use Rng; +#[cfg(feature="alloc")] use distributions::WeightedError; +#[cfg(feature="alloc")] use distributions::uniform::{SampleUniform, SampleBorrow}; + +/// Extension trait on slices, providing random mutation and sampling methods. +/// +/// An implementation is provided for slices. This may also be implementable for +/// other types. +pub trait SliceRandom { + /// The element type. + type Item; + + /// Returns a reference to one random element of the slice, or `None` if the + /// slice is empty. + /// + /// Depending on the implementation, complexity is expected to be `O(1)`. + /// + /// # Example + /// + /// ``` + /// use rand::thread_rng; + /// use rand::seq::SliceRandom; + /// + /// let choices = [1, 2, 4, 8, 16, 32]; + /// let mut rng = thread_rng(); + /// println!("{:?}", choices.choose(&mut rng)); + /// assert_eq!(choices[..0].choose(&mut rng), None); + /// ``` + fn choose(&self, rng: &mut R) -> Option<&Self::Item> + where R: Rng + ?Sized; + + /// Returns a mutable reference to one random element of the slice, or + /// `None` if the slice is empty. + /// + /// Depending on the implementation, complexity is expected to be `O(1)`. + fn choose_mut(&mut self, rng: &mut R) -> Option<&mut Self::Item> + where R: Rng + ?Sized; + + /// Produces an iterator that chooses `amount` elements from the slice at + /// random without repeating any, and returns them in random order. + /// + /// In case this API is not sufficiently flexible, use `index::sample` then + /// apply the indices to the slice. + /// + /// Complexity is expected to be the same as `index::sample`. + /// + /// # Example + /// ``` + /// use rand::seq::SliceRandom; + /// + /// let mut rng = &mut rand::thread_rng(); + /// let sample = "Hello, audience!".as_bytes(); + /// + /// // collect the results into a vector: + /// let v: Vec = sample.choose_multiple(&mut rng, 3).cloned().collect(); + /// + /// // store in a buffer: + /// let mut buf = [0u8; 5]; + /// for (b, slot) in sample.choose_multiple(&mut rng, buf.len()).zip(buf.iter_mut()) { + /// *slot = *b; + /// } + /// ``` + #[cfg(feature = "alloc")] + fn choose_multiple(&self, rng: &mut R, amount: usize) -> SliceChooseIter + where R: Rng + ?Sized; + + /// Similar to [`choose`], where the likelihood of each outcome may be + /// specified. The specified function `weight` maps items `x` to a relative + /// likelihood `weight(x)`. The probability of each item being selected is + /// therefore `weight(x) / s`, where `s` is the sum of all `weight(x)`. + /// + /// # Example + /// + /// ``` + /// use rand::prelude::*; + /// + /// let choices = [('a', 2), ('b', 1), ('c', 1)]; + /// let mut rng = thread_rng(); + /// // 50% chance to print 'a', 25% chance to print 'b', 25% chance to print 'c' + /// println!("{:?}", choices.choose_weighted(&mut rng, |item| item.1).unwrap().0); + /// ``` + /// [`choose`]: trait.SliceRandom.html#method.choose + #[cfg(feature = "alloc")] + fn choose_weighted(&self, rng: &mut R, weight: F) -> Result<&Self::Item, WeightedError> + where R: Rng + ?Sized, + F: Fn(&Self::Item) -> B, + B: SampleBorrow, + X: SampleUniform + + for<'a> ::core::ops::AddAssign<&'a X> + + ::core::cmp::PartialOrd + + Clone + + Default; + + /// Similar to [`choose_mut`], where the likelihood of each outcome may be + /// specified. The specified function `weight` maps items `x` to a relative + /// likelihood `weight(x)`. The probability of each item being selected is + /// therefore `weight(x) / s`, where `s` is the sum of all `weight(x)`. + /// + /// See also [`choose_weighted`]. + /// + /// [`choose_mut`]: trait.SliceRandom.html#method.choose_mut + /// [`choose_weighted`]: trait.SliceRandom.html#method.choose_weighted + #[cfg(feature = "alloc")] + fn choose_weighted_mut(&mut self, rng: &mut R, weight: F) -> Result<&mut Self::Item, WeightedError> + where R: Rng + ?Sized, + F: Fn(&Self::Item) -> B, + B: SampleBorrow, + X: SampleUniform + + for<'a> ::core::ops::AddAssign<&'a X> + + ::core::cmp::PartialOrd + + Clone + + Default; + + /// Shuffle a mutable slice in place. + /// + /// Depending on the implementation, complexity is expected to be `O(1)`. + /// + /// # Example + /// + /// ``` + /// use rand::thread_rng; + /// use rand::seq::SliceRandom; + /// + /// let mut rng = thread_rng(); + /// let mut y = [1, 2, 3, 4, 5]; + /// println!("Unshuffled: {:?}", y); + /// y.shuffle(&mut rng); + /// println!("Shuffled: {:?}", y); + /// ``` + fn shuffle(&mut self, rng: &mut R) where R: Rng + ?Sized; + + /// Shuffle a slice in place, but exit early. + /// + /// Returns two mutable slices from the source slice. The first contains + /// `amount` elements randomly permuted. The second has the remaining + /// elements that are not fully shuffled. + /// + /// This is an efficient method to select `amount` elements at random from + /// the slice, provided the slice may be mutated. + /// + /// If you only need to choose elements randomly and `amount > self.len()/2` + /// then you may improve performance by taking + /// `amount = values.len() - amount` and using only the second slice. + /// + /// If `amount` is greater than the number of elements in the slice, this + /// will perform a full shuffle. + /// + /// Complexity is expected to be `O(m)` where `m = amount`. + fn partial_shuffle(&mut self, rng: &mut R, amount: usize) + -> (&mut [Self::Item], &mut [Self::Item]) where R: Rng + ?Sized; +} + +/// Extension trait on iterators, providing random sampling methods. +pub trait IteratorRandom: Iterator + Sized { + /// Choose one element at random from the iterator. If you have a slice, + /// it's significantly faster to call the [`choose`] or [`choose_mut`] + /// functions using the slice instead. + /// + /// Returns `None` if and only if the iterator is empty. + /// + /// Complexity is `O(n)`, where `n` is the length of the iterator. + /// This likely consumes multiple random numbers, but the exact number + /// is unspecified. + /// + /// [`choose`]: trait.SliceRandom.html#method.choose + /// [`choose_mut`]: trait.SliceRandom.html#method.choose_mut + fn choose(mut self, rng: &mut R) -> Option + where R: Rng + ?Sized + { + let (mut lower, mut upper) = self.size_hint(); + let mut consumed = 0; + let mut result = None; + + if upper == Some(lower) { + return if lower == 0 { None } else { self.nth(rng.gen_range(0, lower)) }; + } + + // Continue until the iterator is exhausted + loop { + if lower > 1 { + let ix = rng.gen_range(0, lower + consumed); + let skip; + if ix < lower { + result = self.nth(ix); + skip = lower - (ix + 1); + } else { + skip = lower; + } + if upper == Some(lower) { + return result; + } + consumed += lower; + if skip > 0 { + self.nth(skip - 1); + } + } else { + let elem = self.next(); + if elem.is_none() { + return result; + } + consumed += 1; + let denom = consumed as f64; // accurate to 2^53 elements + if rng.gen_bool(1.0 / denom) { + result = elem; + } + } + + let hint = self.size_hint(); + lower = hint.0; + upper = hint.1; + } + } + + /// Collects `amount` values at random from the iterator into a supplied + /// buffer. + /// + /// Although the elements are selected randomly, the order of elements in + /// the buffer is neither stable nor fully random. If random ordering is + /// desired, shuffle the result. + /// + /// Returns the number of elements added to the buffer. This equals `amount` + /// unless the iterator contains insufficient elements, in which case this + /// equals the number of elements available. + /// + /// Complexity is `O(n)` where `n` is the length of the iterator. + fn choose_multiple_fill(mut self, rng: &mut R, buf: &mut [Self::Item]) + -> usize where R: Rng + ?Sized + { + let amount = buf.len(); + let mut len = 0; + while len < amount { + if let Some(elem) = self.next() { + buf[len] = elem; + len += 1; + } else { + // Iterator exhausted; stop early + return len; + } + } + + // Continue, since the iterator was not exhausted + for (i, elem) in self.enumerate() { + let k = rng.gen_range(0, i + 1 + amount); + if let Some(slot) = buf.get_mut(k) { + *slot = elem; + } + } + len + } + + /// Collects `amount` values at random from the iterator into a vector. + /// + /// This is equivalent to `choose_multiple_fill` except for the result type. + /// + /// Although the elements are selected randomly, the order of elements in + /// the buffer is neither stable nor fully random. If random ordering is + /// desired, shuffle the result. + /// + /// The length of the returned vector equals `amount` unless the iterator + /// contains insufficient elements, in which case it equals the number of + /// elements available. + /// + /// Complexity is `O(n)` where `n` is the length of the iterator. + #[cfg(feature = "alloc")] + fn choose_multiple(mut self, rng: &mut R, amount: usize) -> Vec + where R: Rng + ?Sized + { + let mut reservoir = Vec::with_capacity(amount); + reservoir.extend(self.by_ref().take(amount)); + + // Continue unless the iterator was exhausted + // + // note: this prevents iterators that "restart" from causing problems. + // If the iterator stops once, then so do we. + if reservoir.len() == amount { + for (i, elem) in self.enumerate() { + let k = rng.gen_range(0, i + 1 + amount); + if let Some(slot) = reservoir.get_mut(k) { + *slot = elem; + } + } + } else { + // Don't hang onto extra memory. There is a corner case where + // `amount` was much less than `self.len()`. + reservoir.shrink_to_fit(); + } + reservoir + } +} + + +impl SliceRandom for [T] { + type Item = T; + + fn choose(&self, rng: &mut R) -> Option<&Self::Item> + where R: Rng + ?Sized + { + if self.is_empty() { + None + } else { + Some(&self[rng.gen_range(0, self.len())]) + } + } + + fn choose_mut(&mut self, rng: &mut R) -> Option<&mut Self::Item> + where R: Rng + ?Sized + { + if self.is_empty() { + None + } else { + let len = self.len(); + Some(&mut self[rng.gen_range(0, len)]) + } + } + + #[cfg(feature = "alloc")] + fn choose_multiple(&self, rng: &mut R, amount: usize) + -> SliceChooseIter + where R: Rng + ?Sized + { + let amount = ::core::cmp::min(amount, self.len()); + SliceChooseIter { + slice: self, + _phantom: Default::default(), + indices: index::sample(rng, self.len(), amount).into_iter(), + } + } + + #[cfg(feature = "alloc")] + fn choose_weighted(&self, rng: &mut R, weight: F) -> Result<&Self::Item, WeightedError> + where R: Rng + ?Sized, + F: Fn(&Self::Item) -> B, + B: SampleBorrow, + X: SampleUniform + + for<'a> ::core::ops::AddAssign<&'a X> + + ::core::cmp::PartialOrd + + Clone + + Default { + use distributions::{Distribution, WeightedIndex}; + let distr = WeightedIndex::new(self.iter().map(weight))?; + Ok(&self[distr.sample(rng)]) + } + + #[cfg(feature = "alloc")] + fn choose_weighted_mut(&mut self, rng: &mut R, weight: F) -> Result<&mut Self::Item, WeightedError> + where R: Rng + ?Sized, + F: Fn(&Self::Item) -> B, + B: SampleBorrow, + X: SampleUniform + + for<'a> ::core::ops::AddAssign<&'a X> + + ::core::cmp::PartialOrd + + Clone + + Default { + use distributions::{Distribution, WeightedIndex}; + let distr = WeightedIndex::new(self.iter().map(weight))?; + Ok(&mut self[distr.sample(rng)]) + } + + fn shuffle(&mut self, rng: &mut R) where R: Rng + ?Sized + { + for i in (1..self.len()).rev() { + // invariant: elements with index > i have been locked in place. + self.swap(i, rng.gen_range(0, i + 1)); + } + } + + fn partial_shuffle(&mut self, rng: &mut R, amount: usize) + -> (&mut [Self::Item], &mut [Self::Item]) where R: Rng + ?Sized + { + // This applies Durstenfeld's algorithm for the + // [Fisher–Yates shuffle](https://en.wikipedia.org/wiki/Fisher%E2%80%93Yates_shuffle#The_modern_algorithm) + // for an unbiased permutation, but exits early after choosing `amount` + // elements. + + let len = self.len(); + let end = if amount >= len { 0 } else { len - amount }; + + for i in (end..len).rev() { + // invariant: elements with index > i have been locked in place. + self.swap(i, rng.gen_range(0, i + 1)); + } + let r = self.split_at_mut(end); + (r.1, r.0) + } +} + +impl IteratorRandom for I where I: Iterator + Sized {} + + +/// Iterator over multiple choices, as returned by [`SliceRandom::choose_multiple]( +/// trait.SliceRandom.html#method.choose_multiple). +#[cfg(feature = "alloc")] +#[derive(Debug)] +pub struct SliceChooseIter<'a, S: ?Sized + 'a, T: 'a> { + slice: &'a S, + _phantom: ::core::marker::PhantomData, + indices: index::IndexVecIntoIter, +} + +#[cfg(feature = "alloc")] +impl<'a, S: Index + ?Sized + 'a, T: 'a> Iterator for SliceChooseIter<'a, S, T> { + type Item = &'a T; + + fn next(&mut self) -> Option { + // TODO: investigate using SliceIndex::get_unchecked when stable + self.indices.next().map(|i| &self.slice[i as usize]) + } + + fn size_hint(&self) -> (usize, Option) { + (self.indices.len(), Some(self.indices.len())) + } +} + +#[cfg(feature = "alloc")] +impl<'a, S: Index + ?Sized + 'a, T: 'a> ExactSizeIterator + for SliceChooseIter<'a, S, T> +{ + fn len(&self) -> usize { + self.indices.len() + } +} + + +/// Randomly sample `amount` elements from a finite iterator. +/// +/// Deprecated: use [`IteratorRandom::choose_multiple`] instead. +/// +/// [`IteratorRandom::choose_multiple`]: trait.IteratorRandom.html#method.choose_multiple +#[cfg(feature = "alloc")] +#[deprecated(since="0.6.0", note="use IteratorRandom::choose_multiple instead")] +pub fn sample_iter(rng: &mut R, iterable: I, amount: usize) -> Result, Vec> + where I: IntoIterator, + R: Rng + ?Sized, +{ + use seq::IteratorRandom; + let iter = iterable.into_iter(); + let result = iter.choose_multiple(rng, amount); + if result.len() == amount { + Ok(result) + } else { + Err(result) + } +} + +/// Randomly sample exactly `amount` values from `slice`. +/// +/// The values are non-repeating and in random order. +/// +/// This implementation uses `O(amount)` time and memory. +/// +/// Panics if `amount > slice.len()` +/// +/// Deprecated: use [`SliceRandom::choose_multiple`] instead. +/// +/// [`SliceRandom::choose_multiple`]: trait.SliceRandom.html#method.choose_multiple +#[cfg(feature = "alloc")] +#[deprecated(since="0.6.0", note="use SliceRandom::choose_multiple instead")] +pub fn sample_slice(rng: &mut R, slice: &[T], amount: usize) -> Vec + where R: Rng + ?Sized, + T: Clone +{ + let indices = index::sample(rng, slice.len(), amount).into_iter(); + + let mut out = Vec::with_capacity(amount); + out.extend(indices.map(|i| slice[i].clone())); + out +} + +/// Randomly sample exactly `amount` references from `slice`. +/// +/// The references are non-repeating and in random order. +/// +/// This implementation uses `O(amount)` time and memory. +/// +/// Panics if `amount > slice.len()` +/// +/// Deprecated: use [`SliceRandom::choose_multiple`] instead. +/// +/// [`SliceRandom::choose_multiple`]: trait.SliceRandom.html#method.choose_multiple +#[cfg(feature = "alloc")] +#[deprecated(since="0.6.0", note="use SliceRandom::choose_multiple instead")] +pub fn sample_slice_ref<'a, R, T>(rng: &mut R, slice: &'a [T], amount: usize) -> Vec<&'a T> + where R: Rng + ?Sized +{ + let indices = index::sample(rng, slice.len(), amount).into_iter(); + + let mut out = Vec::with_capacity(amount); + out.extend(indices.map(|i| &slice[i])); + out +} + +#[cfg(test)] +mod test { + use super::*; + #[cfg(feature = "alloc")] use {Rng, SeedableRng}; + #[cfg(feature = "alloc")] use rngs::SmallRng; + #[cfg(all(feature="alloc", not(feature="std")))] + use alloc::vec::Vec; + + #[test] + fn test_slice_choose() { + let mut r = ::test::rng(107); + let chars = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n']; + let mut chosen = [0i32; 14]; + for _ in 0..1000 { + let picked = *chars.choose(&mut r).unwrap(); + chosen[(picked as usize) - ('a' as usize)] += 1; + } + for count in chosen.iter() { + let err = *count - (1000 / (chars.len() as i32)); + assert!(-20 <= err && err <= 20); + } + + chosen.iter_mut().for_each(|x| *x = 0); + for _ in 0..1000 { + *chosen.choose_mut(&mut r).unwrap() += 1; + } + for count in chosen.iter() { + let err = *count - (1000 / (chosen.len() as i32)); + assert!(-20 <= err && err <= 20); + } + + let mut v: [isize; 0] = []; + assert_eq!(v.choose(&mut r), None); + assert_eq!(v.choose_mut(&mut r), None); + } + + #[derive(Clone)] + struct UnhintedIterator { + iter: I, + } + impl Iterator for UnhintedIterator { + type Item = I::Item; + fn next(&mut self) -> Option { + self.iter.next() + } + } + + #[derive(Clone)] + struct ChunkHintedIterator { + iter: I, + chunk_remaining: usize, + chunk_size: usize, + hint_total_size: bool, + } + impl Iterator for ChunkHintedIterator { + type Item = I::Item; + fn next(&mut self) -> Option { + if self.chunk_remaining == 0 { + self.chunk_remaining = ::core::cmp::min(self.chunk_size, + self.iter.len()); + } + self.chunk_remaining = self.chunk_remaining.saturating_sub(1); + + self.iter.next() + } + fn size_hint(&self) -> (usize, Option) { + (self.chunk_remaining, + if self.hint_total_size { Some(self.iter.len()) } else { None }) + } + } + + #[derive(Clone)] + struct WindowHintedIterator { + iter: I, + window_size: usize, + hint_total_size: bool, + } + impl Iterator for WindowHintedIterator { + type Item = I::Item; + fn next(&mut self) -> Option { + self.iter.next() + } + fn size_hint(&self) -> (usize, Option) { + (::core::cmp::min(self.iter.len(), self.window_size), + if self.hint_total_size { Some(self.iter.len()) } else { None }) + } + } + + #[test] + fn test_iterator_choose() { + let r = &mut ::test::rng(109); + fn test_iter + Clone>(r: &mut R, iter: Iter) { + let mut chosen = [0i32; 9]; + for _ in 0..1000 { + let picked = iter.clone().choose(r).unwrap(); + chosen[picked] += 1; + } + for count in chosen.iter() { + // Samples should follow Binomial(1000, 1/9) + // Octave: binopdf(x, 1000, 1/9) gives the prob of *count == x + // Note: have seen 153, which is unlikely but not impossible. + assert!(72 < *count && *count < 154, "count not close to 1000/9: {}", count); + } + } + + test_iter(r, 0..9); + test_iter(r, [0, 1, 2, 3, 4, 5, 6, 7, 8].iter().cloned()); + #[cfg(feature = "alloc")] + test_iter(r, (0..9).collect::>().into_iter()); + test_iter(r, UnhintedIterator { iter: 0..9 }); + test_iter(r, ChunkHintedIterator { iter: 0..9, chunk_size: 4, chunk_remaining: 4, hint_total_size: false }); + test_iter(r, ChunkHintedIterator { iter: 0..9, chunk_size: 4, chunk_remaining: 4, hint_total_size: true }); + test_iter(r, WindowHintedIterator { iter: 0..9, window_size: 2, hint_total_size: false }); + test_iter(r, WindowHintedIterator { iter: 0..9, window_size: 2, hint_total_size: true }); + + assert_eq!((0..0).choose(r), None); + assert_eq!(UnhintedIterator{ iter: 0..0 }.choose(r), None); + } + + #[test] + fn test_shuffle() { + let mut r = ::test::rng(108); + let empty: &mut [isize] = &mut []; + empty.shuffle(&mut r); + let mut one = [1]; + one.shuffle(&mut r); + let b: &[_] = &[1]; + assert_eq!(one, b); + + let mut two = [1, 2]; + two.shuffle(&mut r); + assert!(two == [1, 2] || two == [2, 1]); + + fn move_last(slice: &mut [usize], pos: usize) { + // use slice[pos..].rotate_left(1); once we can use that + let last_val = slice[pos]; + for i in pos..slice.len() - 1 { + slice[i] = slice[i + 1]; + } + *slice.last_mut().unwrap() = last_val; + } + let mut counts = [0i32; 24]; + for _ in 0..10000 { + let mut arr: [usize; 4] = [0, 1, 2, 3]; + arr.shuffle(&mut r); + let mut permutation = 0usize; + let mut pos_value = counts.len(); + for i in 0..4 { + pos_value /= 4 - i; + let pos = arr.iter().position(|&x| x == i).unwrap(); + assert!(pos < (4 - i)); + permutation += pos * pos_value; + move_last(&mut arr, pos); + assert_eq!(arr[3], i); + } + for i in 0..4 { + assert_eq!(arr[i], i); + } + counts[permutation] += 1; + } + for count in counts.iter() { + let err = *count - 10000i32 / 24; + assert!(-50 <= err && err <= 50); + } + } + + #[test] + fn test_partial_shuffle() { + let mut r = ::test::rng(118); + + let mut empty: [u32; 0] = []; + let res = empty.partial_shuffle(&mut r, 10); + assert_eq!((res.0.len(), res.1.len()), (0, 0)); + + let mut v = [1, 2, 3, 4, 5]; + let res = v.partial_shuffle(&mut r, 2); + assert_eq!((res.0.len(), res.1.len()), (2, 3)); + assert!(res.0[0] != res.0[1]); + // First elements are only modified if selected, so at least one isn't modified: + assert!(res.1[0] == 1 || res.1[1] == 2 || res.1[2] == 3); + } + + #[test] + #[cfg(feature = "alloc")] + fn test_sample_iter() { + let min_val = 1; + let max_val = 100; + + let mut r = ::test::rng(401); + let vals = (min_val..max_val).collect::>(); + let small_sample = vals.iter().choose_multiple(&mut r, 5); + let large_sample = vals.iter().choose_multiple(&mut r, vals.len() + 5); + + assert_eq!(small_sample.len(), 5); + assert_eq!(large_sample.len(), vals.len()); + // no randomization happens when amount >= len + assert_eq!(large_sample, vals.iter().collect::>()); + + assert!(small_sample.iter().all(|e| { + **e >= min_val && **e <= max_val + })); + } + + #[test] + #[cfg(feature = "alloc")] + #[allow(deprecated)] + fn test_sample_slice_boundaries() { + let empty: &[u8] = &[]; + + let mut r = ::test::rng(402); + + // sample 0 items + assert_eq!(&sample_slice(&mut r, empty, 0)[..], [0u8; 0]); + assert_eq!(&sample_slice(&mut r, &[42, 2, 42], 0)[..], [0u8; 0]); + + // sample 1 item + assert_eq!(&sample_slice(&mut r, &[42], 1)[..], [42]); + let v = sample_slice(&mut r, &[1, 42], 1)[0]; + assert!(v == 1 || v == 42); + + // sample "all" the items + let v = sample_slice(&mut r, &[42, 133], 2); + assert!(&v[..] == [42, 133] || v[..] == [133, 42]); + + // Make sure lucky 777's aren't lucky + let slice = &[42, 777]; + let mut num_42 = 0; + let total = 1000; + for _ in 0..total { + let v = sample_slice(&mut r, slice, 1); + assert_eq!(v.len(), 1); + let v = v[0]; + assert!(v == 42 || v == 777); + if v == 42 { + num_42 += 1; + } + } + let ratio_42 = num_42 as f64 / 1000 as f64; + assert!(0.4 <= ratio_42 || ratio_42 <= 0.6, "{}", ratio_42); + } + + #[test] + #[cfg(feature = "alloc")] + #[allow(deprecated)] + fn test_sample_slice() { + let seeded_rng = SmallRng::from_seed; + + let mut r = ::test::rng(403); + + for n in 1..20 { + let length = 5*n - 4; // 1, 6, ... + let amount = r.gen_range(0, length); + let mut seed = [0u8; 16]; + r.fill(&mut seed); + + // assert the basics work + let regular = index::sample(&mut seeded_rng(seed), length, amount); + assert_eq!(regular.len(), amount); + assert!(regular.iter().all(|e| e < length)); + + // also test that sampling the slice works + let vec: Vec = (0..(length as u32)).collect(); + let result = sample_slice(&mut seeded_rng(seed), &vec, amount); + assert_eq!(result, regular.iter().map(|i| i as u32).collect::>()); + + let result = sample_slice_ref(&mut seeded_rng(seed), &vec, amount); + assert!(result.iter().zip(regular.iter()).all(|(i,j)| **i == j as u32)); + } + } + + #[test] + #[cfg(feature = "alloc")] + fn test_weighted() { + let mut r = ::test::rng(406); + const N_REPS: u32 = 3000; + let weights = [1u32, 2, 3, 0, 5, 6, 7, 1, 2, 3, 4, 5, 6, 7]; + let total_weight = weights.iter().sum::() as f32; + + let verify = |result: [i32; 14]| { + for (i, count) in result.iter().enumerate() { + let exp = (weights[i] * N_REPS) as f32 / total_weight; + let mut err = (*count as f32 - exp).abs(); + if err != 0.0 { + err /= exp; + } + assert!(err <= 0.25); + } + }; + + // choose_weighted + fn get_weight(item: &(u32, T)) -> u32 { + item.0 + } + let mut chosen = [0i32; 14]; + let mut items = [(0u32, 0usize); 14]; // (weight, index) + for (i, item) in items.iter_mut().enumerate() { + *item = (weights[i], i); + } + for _ in 0..N_REPS { + let item = items.choose_weighted(&mut r, get_weight).unwrap(); + chosen[item.1] += 1; + } + verify(chosen); + + // choose_weighted_mut + let mut items = [(0u32, 0i32); 14]; // (weight, count) + for (i, item) in items.iter_mut().enumerate() { + *item = (weights[i], 0); + } + for _ in 0..N_REPS { + items.choose_weighted_mut(&mut r, get_weight).unwrap().1 += 1; + } + for (ch, item) in chosen.iter_mut().zip(items.iter()) { + *ch = item.1; + } + verify(chosen); + + // Check error cases + let empty_slice = &mut [10][0..0]; + assert_eq!(empty_slice.choose_weighted(&mut r, |_| 1), Err(WeightedError::NoItem)); + assert_eq!(empty_slice.choose_weighted_mut(&mut r, |_| 1), Err(WeightedError::NoItem)); + assert_eq!(['x'].choose_weighted_mut(&mut r, |_| 0), Err(WeightedError::AllWeightsZero)); + assert_eq!([0, -1].choose_weighted_mut(&mut r, |x| *x), Err(WeightedError::NegativeWeight)); + assert_eq!([-1, 0].choose_weighted_mut(&mut r, |x| *x), Err(WeightedError::NegativeWeight)); + } +} -- cgit v1.2.3