aboutsummaryrefslogtreecommitdiff
path: root/rand/src/seq
diff options
context:
space:
mode:
Diffstat (limited to 'rand/src/seq')
-rw-r--r--rand/src/seq/index.rs378
-rw-r--r--rand/src/seq/mod.rs836
2 files changed, 1214 insertions, 0 deletions
diff --git a/rand/src/seq/index.rs b/rand/src/seq/index.rs
new file mode 100644
index 0000000..3d4df3a
--- /dev/null
+++ b/rand/src/seq/index.rs
@@ -0,0 +1,378 @@
+// Copyright 2018 Developers of the Rand project.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+//! Index sampling
+
+#[cfg(feature="alloc")] use core::slice;
+
+#[cfg(feature="std")] use std::vec;
+#[cfg(all(feature="alloc", not(feature="std")))] use alloc::vec::{self, Vec};
+// BTreeMap is not as fast in tests, but better than nothing.
+#[cfg(feature="std")] use std::collections::{HashSet};
+#[cfg(all(feature="alloc", not(feature="std")))] use alloc::collections::BTreeSet;
+
+#[cfg(feature="alloc")] use distributions::{Distribution, Uniform};
+use Rng;
+
+/// A vector of indices.
+///
+/// Multiple internal representations are possible.
+#[derive(Clone, Debug)]
+pub enum IndexVec {
+ #[doc(hidden)] U32(Vec<u32>),
+ #[doc(hidden)] USize(Vec<usize>),
+}
+
+impl IndexVec {
+ /// Returns the number of indices
+ pub fn len(&self) -> usize {
+ match self {
+ &IndexVec::U32(ref v) => v.len(),
+ &IndexVec::USize(ref v) => v.len(),
+ }
+ }
+
+ /// Return the value at the given `index`.
+ ///
+ /// (Note: we cannot implement `std::ops::Index` because of lifetime
+ /// restrictions.)
+ pub fn index(&self, index: usize) -> usize {
+ match self {
+ &IndexVec::U32(ref v) => v[index] as usize,
+ &IndexVec::USize(ref v) => v[index],
+ }
+ }
+
+ /// Return result as a `Vec<usize>`. Conversion may or may not be trivial.
+ pub fn into_vec(self) -> Vec<usize> {
+ match self {
+ IndexVec::U32(v) => v.into_iter().map(|i| i as usize).collect(),
+ IndexVec::USize(v) => v,
+ }
+ }
+
+ /// Iterate over the indices as a sequence of `usize` values
+ pub fn iter<'a>(&'a self) -> IndexVecIter<'a> {
+ match self {
+ &IndexVec::U32(ref v) => IndexVecIter::U32(v.iter()),
+ &IndexVec::USize(ref v) => IndexVecIter::USize(v.iter()),
+ }
+ }
+
+ /// Convert into an iterator over the indices as a sequence of `usize` values
+ pub fn into_iter(self) -> IndexVecIntoIter {
+ match self {
+ IndexVec::U32(v) => IndexVecIntoIter::U32(v.into_iter()),
+ IndexVec::USize(v) => IndexVecIntoIter::USize(v.into_iter()),
+ }
+ }
+}
+
+impl PartialEq for IndexVec {
+ fn eq(&self, other: &IndexVec) -> bool {
+ use self::IndexVec::*;
+ match (self, other) {
+ (&U32(ref v1), &U32(ref v2)) => v1 == v2,
+ (&USize(ref v1), &USize(ref v2)) => v1 == v2,
+ (&U32(ref v1), &USize(ref v2)) => (v1.len() == v2.len())
+ && (v1.iter().zip(v2.iter()).all(|(x, y)| *x as usize == *y)),
+ (&USize(ref v1), &U32(ref v2)) => (v1.len() == v2.len())
+ && (v1.iter().zip(v2.iter()).all(|(x, y)| *x == *y as usize)),
+ }
+ }
+}
+
+impl From<Vec<u32>> for IndexVec {
+ fn from(v: Vec<u32>) -> Self {
+ IndexVec::U32(v)
+ }
+}
+
+impl From<Vec<usize>> for IndexVec {
+ fn from(v: Vec<usize>) -> Self {
+ IndexVec::USize(v)
+ }
+}
+
+/// Return type of `IndexVec::iter`.
+#[derive(Debug)]
+pub enum IndexVecIter<'a> {
+ #[doc(hidden)] U32(slice::Iter<'a, u32>),
+ #[doc(hidden)] USize(slice::Iter<'a, usize>),
+}
+
+impl<'a> Iterator for IndexVecIter<'a> {
+ type Item = usize;
+ fn next(&mut self) -> Option<usize> {
+ use self::IndexVecIter::*;
+ match self {
+ &mut U32(ref mut iter) => iter.next().map(|i| *i as usize),
+ &mut USize(ref mut iter) => iter.next().cloned(),
+ }
+ }
+
+ fn size_hint(&self) -> (usize, Option<usize>) {
+ match self {
+ &IndexVecIter::U32(ref v) => v.size_hint(),
+ &IndexVecIter::USize(ref v) => v.size_hint(),
+ }
+ }
+}
+
+impl<'a> ExactSizeIterator for IndexVecIter<'a> {}
+
+/// Return type of `IndexVec::into_iter`.
+#[derive(Clone, Debug)]
+pub enum IndexVecIntoIter {
+ #[doc(hidden)] U32(vec::IntoIter<u32>),
+ #[doc(hidden)] USize(vec::IntoIter<usize>),
+}
+
+impl Iterator for IndexVecIntoIter {
+ type Item = usize;
+
+ fn next(&mut self) -> Option<Self::Item> {
+ use self::IndexVecIntoIter::*;
+ match self {
+ &mut U32(ref mut v) => v.next().map(|i| i as usize),
+ &mut USize(ref mut v) => v.next(),
+ }
+ }
+
+ fn size_hint(&self) -> (usize, Option<usize>) {
+ use self::IndexVecIntoIter::*;
+ match self {
+ &U32(ref v) => v.size_hint(),
+ &USize(ref v) => v.size_hint(),
+ }
+ }
+}
+
+impl ExactSizeIterator for IndexVecIntoIter {}
+
+
+/// Randomly sample exactly `amount` distinct indices from `0..length`, and
+/// return them in random order (fully shuffled).
+///
+/// This method is used internally by the slice sampling methods, but it can
+/// sometimes be useful to have the indices themselves so this is provided as
+/// an alternative.
+///
+/// The implementation used is not specified; we automatically select the
+/// fastest available algorithm for the `length` and `amount` parameters
+/// (based on detailed profiling on an Intel Haswell CPU). Roughly speaking,
+/// complexity is `O(amount)`, except that when `amount` is small, performance
+/// is closer to `O(amount^2)`, and when `length` is close to `amount` then
+/// `O(length)`.
+///
+/// Note that performance is significantly better over `u32` indices than over
+/// `u64` indices. Because of this we hide the underlying type behind an
+/// abstraction, `IndexVec`.
+///
+/// If an allocation-free `no_std` function is required, it is suggested
+/// to adapt the internal `sample_floyd` implementation.
+///
+/// Panics if `amount > length`.
+pub fn sample<R>(rng: &mut R, length: usize, amount: usize) -> IndexVec
+ where R: Rng + ?Sized,
+{
+ if amount > length {
+ panic!("`amount` of samples must be less than or equal to `length`");
+ }
+ if length > (::core::u32::MAX as usize) {
+ // We never want to use inplace here, but could use floyd's alg
+ // Lazy version: always use the cache alg.
+ return sample_rejection(rng, length, amount);
+ }
+ let amount = amount as u32;
+ let length = length as u32;
+
+ // Choice of algorithm here depends on both length and amount. See:
+ // https://github.com/rust-random/rand/pull/479
+ // We do some calculations with f32. Accuracy is not very important.
+
+ if amount < 163 {
+ const C: [[f32; 2]; 2] = [[1.6, 8.0/45.0], [10.0, 70.0/9.0]];
+ let j = if length < 500_000 { 0 } else { 1 };
+ let amount_fp = amount as f32;
+ let m4 = C[0][j] * amount_fp;
+ // Short-cut: when amount < 12, floyd's is always faster
+ if amount > 11 && (length as f32) < (C[1][j] + m4) * amount_fp {
+ sample_inplace(rng, length, amount)
+ } else {
+ sample_floyd(rng, length, amount)
+ }
+ } else {
+ const C: [f32; 2] = [270.0, 330.0/9.0];
+ let j = if length < 500_000 { 0 } else { 1 };
+ if (length as f32) < C[j] * (amount as f32) {
+ sample_inplace(rng, length, amount)
+ } else {
+ // note: could have a specific u32 impl, but I'm lazy and
+ // generics don't have usable conversions
+ sample_rejection(rng, length as usize, amount as usize)
+ }
+ }
+}
+
+/// Randomly sample exactly `amount` indices from `0..length`, using Floyd's
+/// combination algorithm.
+///
+/// The output values are fully shuffled. (Overhead is under 50%.)
+///
+/// This implementation uses `O(amount)` memory and `O(amount^2)` time.
+fn sample_floyd<R>(rng: &mut R, length: u32, amount: u32) -> IndexVec
+ where R: Rng + ?Sized,
+{
+ // For small amount we use Floyd's fully-shuffled variant. For larger
+ // amounts this is slow due to Vec::insert performance, so we shuffle
+ // afterwards. Benchmarks show little overhead from extra logic.
+ let floyd_shuffle = amount < 50;
+
+ debug_assert!(amount <= length);
+ let mut indices = Vec::with_capacity(amount as usize);
+ for j in length - amount .. length {
+ let t = rng.gen_range(0, j + 1);
+ if floyd_shuffle {
+ if let Some(pos) = indices.iter().position(|&x| x == t) {
+ indices.insert(pos, j);
+ continue;
+ }
+ } else {
+ if indices.contains(&t) {
+ indices.push(j);
+ continue;
+ }
+ }
+ indices.push(t);
+ }
+ if !floyd_shuffle {
+ // Reimplement SliceRandom::shuffle with smaller indices
+ for i in (1..amount).rev() {
+ // invariant: elements with index > i have been locked in place.
+ indices.swap(i as usize, rng.gen_range(0, i + 1) as usize);
+ }
+ }
+ IndexVec::from(indices)
+}
+
+/// Randomly sample exactly `amount` indices from `0..length`, using an inplace
+/// partial Fisher-Yates method.
+/// Sample an amount of indices using an inplace partial fisher yates method.
+///
+/// This allocates the entire `length` of indices and randomizes only the first `amount`.
+/// It then truncates to `amount` and returns.
+///
+/// This method is not appropriate for large `length` and potentially uses a lot
+/// of memory; because of this we only implement for `u32` index (which improves
+/// performance in all cases).
+///
+/// Set-up is `O(length)` time and memory and shuffling is `O(amount)` time.
+fn sample_inplace<R>(rng: &mut R, length: u32, amount: u32) -> IndexVec
+ where R: Rng + ?Sized,
+{
+ debug_assert!(amount <= length);
+ let mut indices: Vec<u32> = Vec::with_capacity(length as usize);
+ indices.extend(0..length);
+ for i in 0..amount {
+ let j: u32 = rng.gen_range(i, length);
+ indices.swap(i as usize, j as usize);
+ }
+ indices.truncate(amount as usize);
+ debug_assert_eq!(indices.len(), amount as usize);
+ IndexVec::from(indices)
+}
+
+/// Randomly sample exactly `amount` indices from `0..length`, using rejection
+/// sampling.
+///
+/// Since `amount <<< length` there is a low chance of a random sample in
+/// `0..length` being a duplicate. We test for duplicates and resample where
+/// necessary. The algorithm is `O(amount)` time and memory.
+fn sample_rejection<R>(rng: &mut R, length: usize, amount: usize) -> IndexVec
+ where R: Rng + ?Sized,
+{
+ debug_assert!(amount < length);
+ #[cfg(feature="std")] let mut cache = HashSet::with_capacity(amount);
+ #[cfg(not(feature="std"))] let mut cache = BTreeSet::new();
+ let distr = Uniform::new(0, length);
+ let mut indices = Vec::with_capacity(amount);
+ for _ in 0..amount {
+ let mut pos = distr.sample(rng);
+ while !cache.insert(pos) {
+ pos = distr.sample(rng);
+ }
+ indices.push(pos);
+ }
+
+ debug_assert_eq!(indices.len(), amount);
+ IndexVec::from(indices)
+}
+
+#[cfg(test)]
+mod test {
+ use super::*;
+
+ #[test]
+ fn test_sample_boundaries() {
+ let mut r = ::test::rng(404);
+
+ assert_eq!(sample_inplace(&mut r, 0, 0).len(), 0);
+ assert_eq!(sample_inplace(&mut r, 1, 0).len(), 0);
+ assert_eq!(sample_inplace(&mut r, 1, 1).into_vec(), vec![0]);
+
+ assert_eq!(sample_rejection(&mut r, 1, 0).len(), 0);
+
+ assert_eq!(sample_floyd(&mut r, 0, 0).len(), 0);
+ assert_eq!(sample_floyd(&mut r, 1, 0).len(), 0);
+ assert_eq!(sample_floyd(&mut r, 1, 1).into_vec(), vec![0]);
+
+ // These algorithms should be fast with big numbers. Test average.
+ let sum: usize = sample_rejection(&mut r, 1 << 25, 10)
+ .into_iter().sum();
+ assert!(1 << 25 < sum && sum < (1 << 25) * 25);
+
+ let sum: usize = sample_floyd(&mut r, 1 << 25, 10)
+ .into_iter().sum();
+ assert!(1 << 25 < sum && sum < (1 << 25) * 25);
+ }
+
+ #[test]
+ fn test_sample_alg() {
+ let seed_rng = ::test::rng;
+
+ // We can't test which algorithm is used directly, but Floyd's alg
+ // should produce different results from the others. (Also, `inplace`
+ // and `cached` currently use different sizes thus produce different results.)
+
+ // A small length and relatively large amount should use inplace
+ let (length, amount): (usize, usize) = (100, 50);
+ let v1 = sample(&mut seed_rng(420), length, amount);
+ let v2 = sample_inplace(&mut seed_rng(420), length as u32, amount as u32);
+ assert!(v1.iter().all(|e| e < length));
+ assert_eq!(v1, v2);
+
+ // Test Floyd's alg does produce different results
+ let v3 = sample_floyd(&mut seed_rng(420), length as u32, amount as u32);
+ assert!(v1 != v3);
+
+ // A large length and small amount should use Floyd
+ let (length, amount): (usize, usize) = (1<<20, 50);
+ let v1 = sample(&mut seed_rng(421), length, amount);
+ let v2 = sample_floyd(&mut seed_rng(421), length as u32, amount as u32);
+ assert!(v1.iter().all(|e| e < length));
+ assert_eq!(v1, v2);
+
+ // A large length and larger amount should use cache
+ let (length, amount): (usize, usize) = (1<<20, 600);
+ let v1 = sample(&mut seed_rng(422), length, amount);
+ let v2 = sample_rejection(&mut seed_rng(422), length, amount);
+ assert!(v1.iter().all(|e| e < length));
+ assert_eq!(v1, v2);
+ }
+}
diff --git a/rand/src/seq/mod.rs b/rand/src/seq/mod.rs
new file mode 100644
index 0000000..9959602
--- /dev/null
+++ b/rand/src/seq/mod.rs
@@ -0,0 +1,836 @@
+// Copyright 2018 Developers of the Rand project.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+//! Functions for randomly accessing and sampling sequences.
+//!
+//! TODO: module doc
+
+
+#[cfg(feature="alloc")] pub mod index;
+
+#[cfg(feature="alloc")] use core::ops::Index;
+
+#[cfg(all(feature="alloc", not(feature="std")))] use alloc::vec::Vec;
+
+use Rng;
+#[cfg(feature="alloc")] use distributions::WeightedError;
+#[cfg(feature="alloc")] use distributions::uniform::{SampleUniform, SampleBorrow};
+
+/// Extension trait on slices, providing random mutation and sampling methods.
+///
+/// An implementation is provided for slices. This may also be implementable for
+/// other types.
+pub trait SliceRandom {
+ /// The element type.
+ type Item;
+
+ /// Returns a reference to one random element of the slice, or `None` if the
+ /// slice is empty.
+ ///
+ /// Depending on the implementation, complexity is expected to be `O(1)`.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use rand::thread_rng;
+ /// use rand::seq::SliceRandom;
+ ///
+ /// let choices = [1, 2, 4, 8, 16, 32];
+ /// let mut rng = thread_rng();
+ /// println!("{:?}", choices.choose(&mut rng));
+ /// assert_eq!(choices[..0].choose(&mut rng), None);
+ /// ```
+ fn choose<R>(&self, rng: &mut R) -> Option<&Self::Item>
+ where R: Rng + ?Sized;
+
+ /// Returns a mutable reference to one random element of the slice, or
+ /// `None` if the slice is empty.
+ ///
+ /// Depending on the implementation, complexity is expected to be `O(1)`.
+ fn choose_mut<R>(&mut self, rng: &mut R) -> Option<&mut Self::Item>
+ where R: Rng + ?Sized;
+
+ /// Produces an iterator that chooses `amount` elements from the slice at
+ /// random without repeating any, and returns them in random order.
+ ///
+ /// In case this API is not sufficiently flexible, use `index::sample` then
+ /// apply the indices to the slice.
+ ///
+ /// Complexity is expected to be the same as `index::sample`.
+ ///
+ /// # Example
+ /// ```
+ /// use rand::seq::SliceRandom;
+ ///
+ /// let mut rng = &mut rand::thread_rng();
+ /// let sample = "Hello, audience!".as_bytes();
+ ///
+ /// // collect the results into a vector:
+ /// let v: Vec<u8> = sample.choose_multiple(&mut rng, 3).cloned().collect();
+ ///
+ /// // store in a buffer:
+ /// let mut buf = [0u8; 5];
+ /// for (b, slot) in sample.choose_multiple(&mut rng, buf.len()).zip(buf.iter_mut()) {
+ /// *slot = *b;
+ /// }
+ /// ```
+ #[cfg(feature = "alloc")]
+ fn choose_multiple<R>(&self, rng: &mut R, amount: usize) -> SliceChooseIter<Self, Self::Item>
+ where R: Rng + ?Sized;
+
+ /// Similar to [`choose`], where the likelihood of each outcome may be
+ /// specified. The specified function `weight` maps items `x` to a relative
+ /// likelihood `weight(x)`. The probability of each item being selected is
+ /// therefore `weight(x) / s`, where `s` is the sum of all `weight(x)`.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use rand::prelude::*;
+ ///
+ /// let choices = [('a', 2), ('b', 1), ('c', 1)];
+ /// let mut rng = thread_rng();
+ /// // 50% chance to print 'a', 25% chance to print 'b', 25% chance to print 'c'
+ /// println!("{:?}", choices.choose_weighted(&mut rng, |item| item.1).unwrap().0);
+ /// ```
+ /// [`choose`]: trait.SliceRandom.html#method.choose
+ #[cfg(feature = "alloc")]
+ fn choose_weighted<R, F, B, X>(&self, rng: &mut R, weight: F) -> Result<&Self::Item, WeightedError>
+ where R: Rng + ?Sized,
+ F: Fn(&Self::Item) -> B,
+ B: SampleBorrow<X>,
+ X: SampleUniform +
+ for<'a> ::core::ops::AddAssign<&'a X> +
+ ::core::cmp::PartialOrd<X> +
+ Clone +
+ Default;
+
+ /// Similar to [`choose_mut`], where the likelihood of each outcome may be
+ /// specified. The specified function `weight` maps items `x` to a relative
+ /// likelihood `weight(x)`. The probability of each item being selected is
+ /// therefore `weight(x) / s`, where `s` is the sum of all `weight(x)`.
+ ///
+ /// See also [`choose_weighted`].
+ ///
+ /// [`choose_mut`]: trait.SliceRandom.html#method.choose_mut
+ /// [`choose_weighted`]: trait.SliceRandom.html#method.choose_weighted
+ #[cfg(feature = "alloc")]
+ fn choose_weighted_mut<R, F, B, X>(&mut self, rng: &mut R, weight: F) -> Result<&mut Self::Item, WeightedError>
+ where R: Rng + ?Sized,
+ F: Fn(&Self::Item) -> B,
+ B: SampleBorrow<X>,
+ X: SampleUniform +
+ for<'a> ::core::ops::AddAssign<&'a X> +
+ ::core::cmp::PartialOrd<X> +
+ Clone +
+ Default;
+
+ /// Shuffle a mutable slice in place.
+ ///
+ /// Depending on the implementation, complexity is expected to be `O(1)`.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use rand::thread_rng;
+ /// use rand::seq::SliceRandom;
+ ///
+ /// let mut rng = thread_rng();
+ /// let mut y = [1, 2, 3, 4, 5];
+ /// println!("Unshuffled: {:?}", y);
+ /// y.shuffle(&mut rng);
+ /// println!("Shuffled: {:?}", y);
+ /// ```
+ fn shuffle<R>(&mut self, rng: &mut R) where R: Rng + ?Sized;
+
+ /// Shuffle a slice in place, but exit early.
+ ///
+ /// Returns two mutable slices from the source slice. The first contains
+ /// `amount` elements randomly permuted. The second has the remaining
+ /// elements that are not fully shuffled.
+ ///
+ /// This is an efficient method to select `amount` elements at random from
+ /// the slice, provided the slice may be mutated.
+ ///
+ /// If you only need to choose elements randomly and `amount > self.len()/2`
+ /// then you may improve performance by taking
+ /// `amount = values.len() - amount` and using only the second slice.
+ ///
+ /// If `amount` is greater than the number of elements in the slice, this
+ /// will perform a full shuffle.
+ ///
+ /// Complexity is expected to be `O(m)` where `m = amount`.
+ fn partial_shuffle<R>(&mut self, rng: &mut R, amount: usize)
+ -> (&mut [Self::Item], &mut [Self::Item]) where R: Rng + ?Sized;
+}
+
+/// Extension trait on iterators, providing random sampling methods.
+pub trait IteratorRandom: Iterator + Sized {
+ /// Choose one element at random from the iterator. If you have a slice,
+ /// it's significantly faster to call the [`choose`] or [`choose_mut`]
+ /// functions using the slice instead.
+ ///
+ /// Returns `None` if and only if the iterator is empty.
+ ///
+ /// Complexity is `O(n)`, where `n` is the length of the iterator.
+ /// This likely consumes multiple random numbers, but the exact number
+ /// is unspecified.
+ ///
+ /// [`choose`]: trait.SliceRandom.html#method.choose
+ /// [`choose_mut`]: trait.SliceRandom.html#method.choose_mut
+ fn choose<R>(mut self, rng: &mut R) -> Option<Self::Item>
+ where R: Rng + ?Sized
+ {
+ let (mut lower, mut upper) = self.size_hint();
+ let mut consumed = 0;
+ let mut result = None;
+
+ if upper == Some(lower) {
+ return if lower == 0 { None } else { self.nth(rng.gen_range(0, lower)) };
+ }
+
+ // Continue until the iterator is exhausted
+ loop {
+ if lower > 1 {
+ let ix = rng.gen_range(0, lower + consumed);
+ let skip;
+ if ix < lower {
+ result = self.nth(ix);
+ skip = lower - (ix + 1);
+ } else {
+ skip = lower;
+ }
+ if upper == Some(lower) {
+ return result;
+ }
+ consumed += lower;
+ if skip > 0 {
+ self.nth(skip - 1);
+ }
+ } else {
+ let elem = self.next();
+ if elem.is_none() {
+ return result;
+ }
+ consumed += 1;
+ let denom = consumed as f64; // accurate to 2^53 elements
+ if rng.gen_bool(1.0 / denom) {
+ result = elem;
+ }
+ }
+
+ let hint = self.size_hint();
+ lower = hint.0;
+ upper = hint.1;
+ }
+ }
+
+ /// Collects `amount` values at random from the iterator into a supplied
+ /// buffer.
+ ///
+ /// Although the elements are selected randomly, the order of elements in
+ /// the buffer is neither stable nor fully random. If random ordering is
+ /// desired, shuffle the result.
+ ///
+ /// Returns the number of elements added to the buffer. This equals `amount`
+ /// unless the iterator contains insufficient elements, in which case this
+ /// equals the number of elements available.
+ ///
+ /// Complexity is `O(n)` where `n` is the length of the iterator.
+ fn choose_multiple_fill<R>(mut self, rng: &mut R, buf: &mut [Self::Item])
+ -> usize where R: Rng + ?Sized
+ {
+ let amount = buf.len();
+ let mut len = 0;
+ while len < amount {
+ if let Some(elem) = self.next() {
+ buf[len] = elem;
+ len += 1;
+ } else {
+ // Iterator exhausted; stop early
+ return len;
+ }
+ }
+
+ // Continue, since the iterator was not exhausted
+ for (i, elem) in self.enumerate() {
+ let k = rng.gen_range(0, i + 1 + amount);
+ if let Some(slot) = buf.get_mut(k) {
+ *slot = elem;
+ }
+ }
+ len
+ }
+
+ /// Collects `amount` values at random from the iterator into a vector.
+ ///
+ /// This is equivalent to `choose_multiple_fill` except for the result type.
+ ///
+ /// Although the elements are selected randomly, the order of elements in
+ /// the buffer is neither stable nor fully random. If random ordering is
+ /// desired, shuffle the result.
+ ///
+ /// The length of the returned vector equals `amount` unless the iterator
+ /// contains insufficient elements, in which case it equals the number of
+ /// elements available.
+ ///
+ /// Complexity is `O(n)` where `n` is the length of the iterator.
+ #[cfg(feature = "alloc")]
+ fn choose_multiple<R>(mut self, rng: &mut R, amount: usize) -> Vec<Self::Item>
+ where R: Rng + ?Sized
+ {
+ let mut reservoir = Vec::with_capacity(amount);
+ reservoir.extend(self.by_ref().take(amount));
+
+ // Continue unless the iterator was exhausted
+ //
+ // note: this prevents iterators that "restart" from causing problems.
+ // If the iterator stops once, then so do we.
+ if reservoir.len() == amount {
+ for (i, elem) in self.enumerate() {
+ let k = rng.gen_range(0, i + 1 + amount);
+ if let Some(slot) = reservoir.get_mut(k) {
+ *slot = elem;
+ }
+ }
+ } else {
+ // Don't hang onto extra memory. There is a corner case where
+ // `amount` was much less than `self.len()`.
+ reservoir.shrink_to_fit();
+ }
+ reservoir
+ }
+}
+
+
+impl<T> SliceRandom for [T] {
+ type Item = T;
+
+ fn choose<R>(&self, rng: &mut R) -> Option<&Self::Item>
+ where R: Rng + ?Sized
+ {
+ if self.is_empty() {
+ None
+ } else {
+ Some(&self[rng.gen_range(0, self.len())])
+ }
+ }
+
+ fn choose_mut<R>(&mut self, rng: &mut R) -> Option<&mut Self::Item>
+ where R: Rng + ?Sized
+ {
+ if self.is_empty() {
+ None
+ } else {
+ let len = self.len();
+ Some(&mut self[rng.gen_range(0, len)])
+ }
+ }
+
+ #[cfg(feature = "alloc")]
+ fn choose_multiple<R>(&self, rng: &mut R, amount: usize)
+ -> SliceChooseIter<Self, Self::Item>
+ where R: Rng + ?Sized
+ {
+ let amount = ::core::cmp::min(amount, self.len());
+ SliceChooseIter {
+ slice: self,
+ _phantom: Default::default(),
+ indices: index::sample(rng, self.len(), amount).into_iter(),
+ }
+ }
+
+ #[cfg(feature = "alloc")]
+ fn choose_weighted<R, F, B, X>(&self, rng: &mut R, weight: F) -> Result<&Self::Item, WeightedError>
+ where R: Rng + ?Sized,
+ F: Fn(&Self::Item) -> B,
+ B: SampleBorrow<X>,
+ X: SampleUniform +
+ for<'a> ::core::ops::AddAssign<&'a X> +
+ ::core::cmp::PartialOrd<X> +
+ Clone +
+ Default {
+ use distributions::{Distribution, WeightedIndex};
+ let distr = WeightedIndex::new(self.iter().map(weight))?;
+ Ok(&self[distr.sample(rng)])
+ }
+
+ #[cfg(feature = "alloc")]
+ fn choose_weighted_mut<R, F, B, X>(&mut self, rng: &mut R, weight: F) -> Result<&mut Self::Item, WeightedError>
+ where R: Rng + ?Sized,
+ F: Fn(&Self::Item) -> B,
+ B: SampleBorrow<X>,
+ X: SampleUniform +
+ for<'a> ::core::ops::AddAssign<&'a X> +
+ ::core::cmp::PartialOrd<X> +
+ Clone +
+ Default {
+ use distributions::{Distribution, WeightedIndex};
+ let distr = WeightedIndex::new(self.iter().map(weight))?;
+ Ok(&mut self[distr.sample(rng)])
+ }
+
+ fn shuffle<R>(&mut self, rng: &mut R) where R: Rng + ?Sized
+ {
+ for i in (1..self.len()).rev() {
+ // invariant: elements with index > i have been locked in place.
+ self.swap(i, rng.gen_range(0, i + 1));
+ }
+ }
+
+ fn partial_shuffle<R>(&mut self, rng: &mut R, amount: usize)
+ -> (&mut [Self::Item], &mut [Self::Item]) where R: Rng + ?Sized
+ {
+ // This applies Durstenfeld's algorithm for the
+ // [Fisher–Yates shuffle](https://en.wikipedia.org/wiki/Fisher%E2%80%93Yates_shuffle#The_modern_algorithm)
+ // for an unbiased permutation, but exits early after choosing `amount`
+ // elements.
+
+ let len = self.len();
+ let end = if amount >= len { 0 } else { len - amount };
+
+ for i in (end..len).rev() {
+ // invariant: elements with index > i have been locked in place.
+ self.swap(i, rng.gen_range(0, i + 1));
+ }
+ let r = self.split_at_mut(end);
+ (r.1, r.0)
+ }
+}
+
+impl<I> IteratorRandom for I where I: Iterator + Sized {}
+
+
+/// Iterator over multiple choices, as returned by [`SliceRandom::choose_multiple](
+/// trait.SliceRandom.html#method.choose_multiple).
+#[cfg(feature = "alloc")]
+#[derive(Debug)]
+pub struct SliceChooseIter<'a, S: ?Sized + 'a, T: 'a> {
+ slice: &'a S,
+ _phantom: ::core::marker::PhantomData<T>,
+ indices: index::IndexVecIntoIter,
+}
+
+#[cfg(feature = "alloc")]
+impl<'a, S: Index<usize, Output = T> + ?Sized + 'a, T: 'a> Iterator for SliceChooseIter<'a, S, T> {
+ type Item = &'a T;
+
+ fn next(&mut self) -> Option<Self::Item> {
+ // TODO: investigate using SliceIndex::get_unchecked when stable
+ self.indices.next().map(|i| &self.slice[i as usize])
+ }
+
+ fn size_hint(&self) -> (usize, Option<usize>) {
+ (self.indices.len(), Some(self.indices.len()))
+ }
+}
+
+#[cfg(feature = "alloc")]
+impl<'a, S: Index<usize, Output = T> + ?Sized + 'a, T: 'a> ExactSizeIterator
+ for SliceChooseIter<'a, S, T>
+{
+ fn len(&self) -> usize {
+ self.indices.len()
+ }
+}
+
+
+/// Randomly sample `amount` elements from a finite iterator.
+///
+/// Deprecated: use [`IteratorRandom::choose_multiple`] instead.
+///
+/// [`IteratorRandom::choose_multiple`]: trait.IteratorRandom.html#method.choose_multiple
+#[cfg(feature = "alloc")]
+#[deprecated(since="0.6.0", note="use IteratorRandom::choose_multiple instead")]
+pub fn sample_iter<T, I, R>(rng: &mut R, iterable: I, amount: usize) -> Result<Vec<T>, Vec<T>>
+ where I: IntoIterator<Item=T>,
+ R: Rng + ?Sized,
+{
+ use seq::IteratorRandom;
+ let iter = iterable.into_iter();
+ let result = iter.choose_multiple(rng, amount);
+ if result.len() == amount {
+ Ok(result)
+ } else {
+ Err(result)
+ }
+}
+
+/// Randomly sample exactly `amount` values from `slice`.
+///
+/// The values are non-repeating and in random order.
+///
+/// This implementation uses `O(amount)` time and memory.
+///
+/// Panics if `amount > slice.len()`
+///
+/// Deprecated: use [`SliceRandom::choose_multiple`] instead.
+///
+/// [`SliceRandom::choose_multiple`]: trait.SliceRandom.html#method.choose_multiple
+#[cfg(feature = "alloc")]
+#[deprecated(since="0.6.0", note="use SliceRandom::choose_multiple instead")]
+pub fn sample_slice<R, T>(rng: &mut R, slice: &[T], amount: usize) -> Vec<T>
+ where R: Rng + ?Sized,
+ T: Clone
+{
+ let indices = index::sample(rng, slice.len(), amount).into_iter();
+
+ let mut out = Vec::with_capacity(amount);
+ out.extend(indices.map(|i| slice[i].clone()));
+ out
+}
+
+/// Randomly sample exactly `amount` references from `slice`.
+///
+/// The references are non-repeating and in random order.
+///
+/// This implementation uses `O(amount)` time and memory.
+///
+/// Panics if `amount > slice.len()`
+///
+/// Deprecated: use [`SliceRandom::choose_multiple`] instead.
+///
+/// [`SliceRandom::choose_multiple`]: trait.SliceRandom.html#method.choose_multiple
+#[cfg(feature = "alloc")]
+#[deprecated(since="0.6.0", note="use SliceRandom::choose_multiple instead")]
+pub fn sample_slice_ref<'a, R, T>(rng: &mut R, slice: &'a [T], amount: usize) -> Vec<&'a T>
+ where R: Rng + ?Sized
+{
+ let indices = index::sample(rng, slice.len(), amount).into_iter();
+
+ let mut out = Vec::with_capacity(amount);
+ out.extend(indices.map(|i| &slice[i]));
+ out
+}
+
+#[cfg(test)]
+mod test {
+ use super::*;
+ #[cfg(feature = "alloc")] use {Rng, SeedableRng};
+ #[cfg(feature = "alloc")] use rngs::SmallRng;
+ #[cfg(all(feature="alloc", not(feature="std")))]
+ use alloc::vec::Vec;
+
+ #[test]
+ fn test_slice_choose() {
+ let mut r = ::test::rng(107);
+ let chars = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n'];
+ let mut chosen = [0i32; 14];
+ for _ in 0..1000 {
+ let picked = *chars.choose(&mut r).unwrap();
+ chosen[(picked as usize) - ('a' as usize)] += 1;
+ }
+ for count in chosen.iter() {
+ let err = *count - (1000 / (chars.len() as i32));
+ assert!(-20 <= err && err <= 20);
+ }
+
+ chosen.iter_mut().for_each(|x| *x = 0);
+ for _ in 0..1000 {
+ *chosen.choose_mut(&mut r).unwrap() += 1;
+ }
+ for count in chosen.iter() {
+ let err = *count - (1000 / (chosen.len() as i32));
+ assert!(-20 <= err && err <= 20);
+ }
+
+ let mut v: [isize; 0] = [];
+ assert_eq!(v.choose(&mut r), None);
+ assert_eq!(v.choose_mut(&mut r), None);
+ }
+
+ #[derive(Clone)]
+ struct UnhintedIterator<I: Iterator + Clone> {
+ iter: I,
+ }
+ impl<I: Iterator + Clone> Iterator for UnhintedIterator<I> {
+ type Item = I::Item;
+ fn next(&mut self) -> Option<Self::Item> {
+ self.iter.next()
+ }
+ }
+
+ #[derive(Clone)]
+ struct ChunkHintedIterator<I: ExactSizeIterator + Iterator + Clone> {
+ iter: I,
+ chunk_remaining: usize,
+ chunk_size: usize,
+ hint_total_size: bool,
+ }
+ impl<I: ExactSizeIterator + Iterator + Clone> Iterator for ChunkHintedIterator<I> {
+ type Item = I::Item;
+ fn next(&mut self) -> Option<Self::Item> {
+ if self.chunk_remaining == 0 {
+ self.chunk_remaining = ::core::cmp::min(self.chunk_size,
+ self.iter.len());
+ }
+ self.chunk_remaining = self.chunk_remaining.saturating_sub(1);
+
+ self.iter.next()
+ }
+ fn size_hint(&self) -> (usize, Option<usize>) {
+ (self.chunk_remaining,
+ if self.hint_total_size { Some(self.iter.len()) } else { None })
+ }
+ }
+
+ #[derive(Clone)]
+ struct WindowHintedIterator<I: ExactSizeIterator + Iterator + Clone> {
+ iter: I,
+ window_size: usize,
+ hint_total_size: bool,
+ }
+ impl<I: ExactSizeIterator + Iterator + Clone> Iterator for WindowHintedIterator<I> {
+ type Item = I::Item;
+ fn next(&mut self) -> Option<Self::Item> {
+ self.iter.next()
+ }
+ fn size_hint(&self) -> (usize, Option<usize>) {
+ (::core::cmp::min(self.iter.len(), self.window_size),
+ if self.hint_total_size { Some(self.iter.len()) } else { None })
+ }
+ }
+
+ #[test]
+ fn test_iterator_choose() {
+ let r = &mut ::test::rng(109);
+ fn test_iter<R: Rng + ?Sized, Iter: Iterator<Item=usize> + Clone>(r: &mut R, iter: Iter) {
+ let mut chosen = [0i32; 9];
+ for _ in 0..1000 {
+ let picked = iter.clone().choose(r).unwrap();
+ chosen[picked] += 1;
+ }
+ for count in chosen.iter() {
+ // Samples should follow Binomial(1000, 1/9)
+ // Octave: binopdf(x, 1000, 1/9) gives the prob of *count == x
+ // Note: have seen 153, which is unlikely but not impossible.
+ assert!(72 < *count && *count < 154, "count not close to 1000/9: {}", count);
+ }
+ }
+
+ test_iter(r, 0..9);
+ test_iter(r, [0, 1, 2, 3, 4, 5, 6, 7, 8].iter().cloned());
+ #[cfg(feature = "alloc")]
+ test_iter(r, (0..9).collect::<Vec<_>>().into_iter());
+ test_iter(r, UnhintedIterator { iter: 0..9 });
+ test_iter(r, ChunkHintedIterator { iter: 0..9, chunk_size: 4, chunk_remaining: 4, hint_total_size: false });
+ test_iter(r, ChunkHintedIterator { iter: 0..9, chunk_size: 4, chunk_remaining: 4, hint_total_size: true });
+ test_iter(r, WindowHintedIterator { iter: 0..9, window_size: 2, hint_total_size: false });
+ test_iter(r, WindowHintedIterator { iter: 0..9, window_size: 2, hint_total_size: true });
+
+ assert_eq!((0..0).choose(r), None);
+ assert_eq!(UnhintedIterator{ iter: 0..0 }.choose(r), None);
+ }
+
+ #[test]
+ fn test_shuffle() {
+ let mut r = ::test::rng(108);
+ let empty: &mut [isize] = &mut [];
+ empty.shuffle(&mut r);
+ let mut one = [1];
+ one.shuffle(&mut r);
+ let b: &[_] = &[1];
+ assert_eq!(one, b);
+
+ let mut two = [1, 2];
+ two.shuffle(&mut r);
+ assert!(two == [1, 2] || two == [2, 1]);
+
+ fn move_last(slice: &mut [usize], pos: usize) {
+ // use slice[pos..].rotate_left(1); once we can use that
+ let last_val = slice[pos];
+ for i in pos..slice.len() - 1 {
+ slice[i] = slice[i + 1];
+ }
+ *slice.last_mut().unwrap() = last_val;
+ }
+ let mut counts = [0i32; 24];
+ for _ in 0..10000 {
+ let mut arr: [usize; 4] = [0, 1, 2, 3];
+ arr.shuffle(&mut r);
+ let mut permutation = 0usize;
+ let mut pos_value = counts.len();
+ for i in 0..4 {
+ pos_value /= 4 - i;
+ let pos = arr.iter().position(|&x| x == i).unwrap();
+ assert!(pos < (4 - i));
+ permutation += pos * pos_value;
+ move_last(&mut arr, pos);
+ assert_eq!(arr[3], i);
+ }
+ for i in 0..4 {
+ assert_eq!(arr[i], i);
+ }
+ counts[permutation] += 1;
+ }
+ for count in counts.iter() {
+ let err = *count - 10000i32 / 24;
+ assert!(-50 <= err && err <= 50);
+ }
+ }
+
+ #[test]
+ fn test_partial_shuffle() {
+ let mut r = ::test::rng(118);
+
+ let mut empty: [u32; 0] = [];
+ let res = empty.partial_shuffle(&mut r, 10);
+ assert_eq!((res.0.len(), res.1.len()), (0, 0));
+
+ let mut v = [1, 2, 3, 4, 5];
+ let res = v.partial_shuffle(&mut r, 2);
+ assert_eq!((res.0.len(), res.1.len()), (2, 3));
+ assert!(res.0[0] != res.0[1]);
+ // First elements are only modified if selected, so at least one isn't modified:
+ assert!(res.1[0] == 1 || res.1[1] == 2 || res.1[2] == 3);
+ }
+
+ #[test]
+ #[cfg(feature = "alloc")]
+ fn test_sample_iter() {
+ let min_val = 1;
+ let max_val = 100;
+
+ let mut r = ::test::rng(401);
+ let vals = (min_val..max_val).collect::<Vec<i32>>();
+ let small_sample = vals.iter().choose_multiple(&mut r, 5);
+ let large_sample = vals.iter().choose_multiple(&mut r, vals.len() + 5);
+
+ assert_eq!(small_sample.len(), 5);
+ assert_eq!(large_sample.len(), vals.len());
+ // no randomization happens when amount >= len
+ assert_eq!(large_sample, vals.iter().collect::<Vec<_>>());
+
+ assert!(small_sample.iter().all(|e| {
+ **e >= min_val && **e <= max_val
+ }));
+ }
+
+ #[test]
+ #[cfg(feature = "alloc")]
+ #[allow(deprecated)]
+ fn test_sample_slice_boundaries() {
+ let empty: &[u8] = &[];
+
+ let mut r = ::test::rng(402);
+
+ // sample 0 items
+ assert_eq!(&sample_slice(&mut r, empty, 0)[..], [0u8; 0]);
+ assert_eq!(&sample_slice(&mut r, &[42, 2, 42], 0)[..], [0u8; 0]);
+
+ // sample 1 item
+ assert_eq!(&sample_slice(&mut r, &[42], 1)[..], [42]);
+ let v = sample_slice(&mut r, &[1, 42], 1)[0];
+ assert!(v == 1 || v == 42);
+
+ // sample "all" the items
+ let v = sample_slice(&mut r, &[42, 133], 2);
+ assert!(&v[..] == [42, 133] || v[..] == [133, 42]);
+
+ // Make sure lucky 777's aren't lucky
+ let slice = &[42, 777];
+ let mut num_42 = 0;
+ let total = 1000;
+ for _ in 0..total {
+ let v = sample_slice(&mut r, slice, 1);
+ assert_eq!(v.len(), 1);
+ let v = v[0];
+ assert!(v == 42 || v == 777);
+ if v == 42 {
+ num_42 += 1;
+ }
+ }
+ let ratio_42 = num_42 as f64 / 1000 as f64;
+ assert!(0.4 <= ratio_42 || ratio_42 <= 0.6, "{}", ratio_42);
+ }
+
+ #[test]
+ #[cfg(feature = "alloc")]
+ #[allow(deprecated)]
+ fn test_sample_slice() {
+ let seeded_rng = SmallRng::from_seed;
+
+ let mut r = ::test::rng(403);
+
+ for n in 1..20 {
+ let length = 5*n - 4; // 1, 6, ...
+ let amount = r.gen_range(0, length);
+ let mut seed = [0u8; 16];
+ r.fill(&mut seed);
+
+ // assert the basics work
+ let regular = index::sample(&mut seeded_rng(seed), length, amount);
+ assert_eq!(regular.len(), amount);
+ assert!(regular.iter().all(|e| e < length));
+
+ // also test that sampling the slice works
+ let vec: Vec<u32> = (0..(length as u32)).collect();
+ let result = sample_slice(&mut seeded_rng(seed), &vec, amount);
+ assert_eq!(result, regular.iter().map(|i| i as u32).collect::<Vec<_>>());
+
+ let result = sample_slice_ref(&mut seeded_rng(seed), &vec, amount);
+ assert!(result.iter().zip(regular.iter()).all(|(i,j)| **i == j as u32));
+ }
+ }
+
+ #[test]
+ #[cfg(feature = "alloc")]
+ fn test_weighted() {
+ let mut r = ::test::rng(406);
+ const N_REPS: u32 = 3000;
+ let weights = [1u32, 2, 3, 0, 5, 6, 7, 1, 2, 3, 4, 5, 6, 7];
+ let total_weight = weights.iter().sum::<u32>() as f32;
+
+ let verify = |result: [i32; 14]| {
+ for (i, count) in result.iter().enumerate() {
+ let exp = (weights[i] * N_REPS) as f32 / total_weight;
+ let mut err = (*count as f32 - exp).abs();
+ if err != 0.0 {
+ err /= exp;
+ }
+ assert!(err <= 0.25);
+ }
+ };
+
+ // choose_weighted
+ fn get_weight<T>(item: &(u32, T)) -> u32 {
+ item.0
+ }
+ let mut chosen = [0i32; 14];
+ let mut items = [(0u32, 0usize); 14]; // (weight, index)
+ for (i, item) in items.iter_mut().enumerate() {
+ *item = (weights[i], i);
+ }
+ for _ in 0..N_REPS {
+ let item = items.choose_weighted(&mut r, get_weight).unwrap();
+ chosen[item.1] += 1;
+ }
+ verify(chosen);
+
+ // choose_weighted_mut
+ let mut items = [(0u32, 0i32); 14]; // (weight, count)
+ for (i, item) in items.iter_mut().enumerate() {
+ *item = (weights[i], 0);
+ }
+ for _ in 0..N_REPS {
+ items.choose_weighted_mut(&mut r, get_weight).unwrap().1 += 1;
+ }
+ for (ch, item) in chosen.iter_mut().zip(items.iter()) {
+ *ch = item.1;
+ }
+ verify(chosen);
+
+ // Check error cases
+ let empty_slice = &mut [10][0..0];
+ assert_eq!(empty_slice.choose_weighted(&mut r, |_| 1), Err(WeightedError::NoItem));
+ assert_eq!(empty_slice.choose_weighted_mut(&mut r, |_| 1), Err(WeightedError::NoItem));
+ assert_eq!(['x'].choose_weighted_mut(&mut r, |_| 0), Err(WeightedError::AllWeightsZero));
+ assert_eq!([0, -1].choose_weighted_mut(&mut r, |x| *x), Err(WeightedError::NegativeWeight));
+ assert_eq!([-1, 0].choose_weighted_mut(&mut r, |x| *x), Err(WeightedError::NegativeWeight));
+ }
+}