summaryrefslogtreecommitdiff
path: root/rand/src/seq
diff options
context:
space:
mode:
Diffstat (limited to 'rand/src/seq')
-rw-r--r--rand/src/seq/index.rs409
-rw-r--r--rand/src/seq/mod.rs791
2 files changed, 0 insertions, 1200 deletions
diff --git a/rand/src/seq/index.rs b/rand/src/seq/index.rs
deleted file mode 100644
index 22a5733..0000000
--- a/rand/src/seq/index.rs
+++ /dev/null
@@ -1,409 +0,0 @@
-// Copyright 2018 Developers of the Rand project.
-//
-// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
-// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
-// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
-// option. This file may not be copied, modified, or distributed
-// except according to those terms.
-
-//! Low-level API for sampling indices
-
-#[cfg(feature="alloc")] use core::slice;
-
-#[cfg(feature="std")] use std::vec;
-#[cfg(all(feature="alloc", not(feature="std")))] use crate::alloc::vec::{self, Vec};
-// BTreeMap is not as fast in tests, but better than nothing.
-#[cfg(feature="std")] use std::collections::{HashSet};
-#[cfg(all(feature="alloc", not(feature="std")))] use crate::alloc::collections::BTreeSet;
-
-#[cfg(feature="alloc")] use crate::distributions::{Distribution, Uniform, uniform::SampleUniform};
-use crate::Rng;
-
-/// A vector of indices.
-///
-/// Multiple internal representations are possible.
-#[derive(Clone, Debug)]
-pub enum IndexVec {
- #[doc(hidden)] U32(Vec<u32>),
- #[doc(hidden)] USize(Vec<usize>),
-}
-
-impl IndexVec {
- /// Returns the number of indices
- #[inline]
- pub fn len(&self) -> usize {
- match *self {
- IndexVec::U32(ref v) => v.len(),
- IndexVec::USize(ref v) => v.len(),
- }
- }
-
- /// Returns `true` if the length is 0.
- #[inline]
- pub fn is_empty(&self) -> bool {
- match *self {
- IndexVec::U32(ref v) => v.is_empty(),
- IndexVec::USize(ref v) => v.is_empty(),
- }
- }
-
- /// Return the value at the given `index`.
- ///
- /// (Note: we cannot implement [`std::ops::Index`] because of lifetime
- /// restrictions.)
- #[inline]
- pub fn index(&self, index: usize) -> usize {
- match *self {
- IndexVec::U32(ref v) => v[index] as usize,
- IndexVec::USize(ref v) => v[index],
- }
- }
-
- /// Return result as a `Vec<usize>`. Conversion may or may not be trivial.
- #[inline]
- pub fn into_vec(self) -> Vec<usize> {
- match self {
- IndexVec::U32(v) => v.into_iter().map(|i| i as usize).collect(),
- IndexVec::USize(v) => v,
- }
- }
-
- /// Iterate over the indices as a sequence of `usize` values
- #[inline]
- pub fn iter(&self) -> IndexVecIter<'_> {
- match *self {
- IndexVec::U32(ref v) => IndexVecIter::U32(v.iter()),
- IndexVec::USize(ref v) => IndexVecIter::USize(v.iter()),
- }
- }
-
- /// Convert into an iterator over the indices as a sequence of `usize` values
- #[inline]
- pub fn into_iter(self) -> IndexVecIntoIter {
- match self {
- IndexVec::U32(v) => IndexVecIntoIter::U32(v.into_iter()),
- IndexVec::USize(v) => IndexVecIntoIter::USize(v.into_iter()),
- }
- }
-}
-
-impl PartialEq for IndexVec {
- fn eq(&self, other: &IndexVec) -> bool {
- use self::IndexVec::*;
- match (self, other) {
- (&U32(ref v1), &U32(ref v2)) => v1 == v2,
- (&USize(ref v1), &USize(ref v2)) => v1 == v2,
- (&U32(ref v1), &USize(ref v2)) => (v1.len() == v2.len())
- && (v1.iter().zip(v2.iter()).all(|(x, y)| *x as usize == *y)),
- (&USize(ref v1), &U32(ref v2)) => (v1.len() == v2.len())
- && (v1.iter().zip(v2.iter()).all(|(x, y)| *x == *y as usize)),
- }
- }
-}
-
-impl From<Vec<u32>> for IndexVec {
- #[inline]
- fn from(v: Vec<u32>) -> Self {
- IndexVec::U32(v)
- }
-}
-
-impl From<Vec<usize>> for IndexVec {
- #[inline]
- fn from(v: Vec<usize>) -> Self {
- IndexVec::USize(v)
- }
-}
-
-/// Return type of `IndexVec::iter`.
-#[derive(Debug)]
-pub enum IndexVecIter<'a> {
- #[doc(hidden)] U32(slice::Iter<'a, u32>),
- #[doc(hidden)] USize(slice::Iter<'a, usize>),
-}
-
-impl<'a> Iterator for IndexVecIter<'a> {
- type Item = usize;
- #[inline]
- fn next(&mut self) -> Option<usize> {
- use self::IndexVecIter::*;
- match *self {
- U32(ref mut iter) => iter.next().map(|i| *i as usize),
- USize(ref mut iter) => iter.next().cloned(),
- }
- }
-
- #[inline]
- fn size_hint(&self) -> (usize, Option<usize>) {
- match *self {
- IndexVecIter::U32(ref v) => v.size_hint(),
- IndexVecIter::USize(ref v) => v.size_hint(),
- }
- }
-}
-
-impl<'a> ExactSizeIterator for IndexVecIter<'a> {}
-
-/// Return type of `IndexVec::into_iter`.
-#[derive(Clone, Debug)]
-pub enum IndexVecIntoIter {
- #[doc(hidden)] U32(vec::IntoIter<u32>),
- #[doc(hidden)] USize(vec::IntoIter<usize>),
-}
-
-impl Iterator for IndexVecIntoIter {
- type Item = usize;
-
- #[inline]
- fn next(&mut self) -> Option<Self::Item> {
- use self::IndexVecIntoIter::*;
- match *self {
- U32(ref mut v) => v.next().map(|i| i as usize),
- USize(ref mut v) => v.next(),
- }
- }
-
- #[inline]
- fn size_hint(&self) -> (usize, Option<usize>) {
- use self::IndexVecIntoIter::*;
- match *self {
- U32(ref v) => v.size_hint(),
- USize(ref v) => v.size_hint(),
- }
- }
-}
-
-impl ExactSizeIterator for IndexVecIntoIter {}
-
-
-/// Randomly sample exactly `amount` distinct indices from `0..length`, and
-/// return them in random order (fully shuffled).
-///
-/// This method is used internally by the slice sampling methods, but it can
-/// sometimes be useful to have the indices themselves so this is provided as
-/// an alternative.
-///
-/// The implementation used is not specified; we automatically select the
-/// fastest available algorithm for the `length` and `amount` parameters
-/// (based on detailed profiling on an Intel Haswell CPU). Roughly speaking,
-/// complexity is `O(amount)`, except that when `amount` is small, performance
-/// is closer to `O(amount^2)`, and when `length` is close to `amount` then
-/// `O(length)`.
-///
-/// Note that performance is significantly better over `u32` indices than over
-/// `u64` indices. Because of this we hide the underlying type behind an
-/// abstraction, `IndexVec`.
-///
-/// If an allocation-free `no_std` function is required, it is suggested
-/// to adapt the internal `sample_floyd` implementation.
-///
-/// Panics if `amount > length`.
-pub fn sample<R>(rng: &mut R, length: usize, amount: usize) -> IndexVec
-where R: Rng + ?Sized {
- if amount > length {
- panic!("`amount` of samples must be less than or equal to `length`");
- }
- if length > (::core::u32::MAX as usize) {
- // We never want to use inplace here, but could use floyd's alg
- // Lazy version: always use the cache alg.
- return sample_rejection(rng, length, amount);
- }
- let amount = amount as u32;
- let length = length as u32;
-
- // Choice of algorithm here depends on both length and amount. See:
- // https://github.com/rust-random/rand/pull/479
- // We do some calculations with f32. Accuracy is not very important.
-
- if amount < 163 {
- const C: [[f32; 2]; 2] = [[1.6, 8.0/45.0], [10.0, 70.0/9.0]];
- let j = if length < 500_000 { 0 } else { 1 };
- let amount_fp = amount as f32;
- let m4 = C[0][j] * amount_fp;
- // Short-cut: when amount < 12, floyd's is always faster
- if amount > 11 && (length as f32) < (C[1][j] + m4) * amount_fp {
- sample_inplace(rng, length, amount)
- } else {
- sample_floyd(rng, length, amount)
- }
- } else {
- const C: [f32; 2] = [270.0, 330.0/9.0];
- let j = if length < 500_000 { 0 } else { 1 };
- if (length as f32) < C[j] * (amount as f32) {
- sample_inplace(rng, length, amount)
- } else {
- sample_rejection(rng, length, amount)
- }
- }
-}
-
-/// Randomly sample exactly `amount` indices from `0..length`, using Floyd's
-/// combination algorithm.
-///
-/// The output values are fully shuffled. (Overhead is under 50%.)
-///
-/// This implementation uses `O(amount)` memory and `O(amount^2)` time.
-fn sample_floyd<R>(rng: &mut R, length: u32, amount: u32) -> IndexVec
-where R: Rng + ?Sized {
- // For small amount we use Floyd's fully-shuffled variant. For larger
- // amounts this is slow due to Vec::insert performance, so we shuffle
- // afterwards. Benchmarks show little overhead from extra logic.
- let floyd_shuffle = amount < 50;
-
- debug_assert!(amount <= length);
- let mut indices = Vec::with_capacity(amount as usize);
- for j in length - amount .. length {
- let t = rng.gen_range(0, j + 1);
- if floyd_shuffle {
- if let Some(pos) = indices.iter().position(|&x| x == t) {
- indices.insert(pos, j);
- continue;
- }
- } else if indices.contains(&t) {
- indices.push(j);
- continue;
- }
- indices.push(t);
- }
- if !floyd_shuffle {
- // Reimplement SliceRandom::shuffle with smaller indices
- for i in (1..amount).rev() {
- // invariant: elements with index > i have been locked in place.
- indices.swap(i as usize, rng.gen_range(0, i + 1) as usize);
- }
- }
- IndexVec::from(indices)
-}
-
-/// Randomly sample exactly `amount` indices from `0..length`, using an inplace
-/// partial Fisher-Yates method.
-/// Sample an amount of indices using an inplace partial fisher yates method.
-///
-/// This allocates the entire `length` of indices and randomizes only the first `amount`.
-/// It then truncates to `amount` and returns.
-///
-/// This method is not appropriate for large `length` and potentially uses a lot
-/// of memory; because of this we only implement for `u32` index (which improves
-/// performance in all cases).
-///
-/// Set-up is `O(length)` time and memory and shuffling is `O(amount)` time.
-fn sample_inplace<R>(rng: &mut R, length: u32, amount: u32) -> IndexVec
-where R: Rng + ?Sized {
- debug_assert!(amount <= length);
- let mut indices: Vec<u32> = Vec::with_capacity(length as usize);
- indices.extend(0..length);
- for i in 0..amount {
- let j: u32 = rng.gen_range(i, length);
- indices.swap(i as usize, j as usize);
- }
- indices.truncate(amount as usize);
- debug_assert_eq!(indices.len(), amount as usize);
- IndexVec::from(indices)
-}
-
-trait UInt: Copy + PartialOrd + Ord + PartialEq + Eq + SampleUniform + core::hash::Hash {
- fn zero() -> Self;
- fn as_usize(self) -> usize;
-}
-impl UInt for u32 {
- #[inline] fn zero() -> Self { 0 }
- #[inline] fn as_usize(self) -> usize { self as usize }
-}
-impl UInt for usize {
- #[inline] fn zero() -> Self { 0 }
- #[inline] fn as_usize(self) -> usize { self }
-}
-
-/// Randomly sample exactly `amount` indices from `0..length`, using rejection
-/// sampling.
-///
-/// Since `amount <<< length` there is a low chance of a random sample in
-/// `0..length` being a duplicate. We test for duplicates and resample where
-/// necessary. The algorithm is `O(amount)` time and memory.
-///
-/// This function is generic over X primarily so that results are value-stable
-/// over 32-bit and 64-bit platforms.
-fn sample_rejection<X: UInt, R>(rng: &mut R, length: X, amount: X) -> IndexVec
-where R: Rng + ?Sized, IndexVec: From<Vec<X>> {
- debug_assert!(amount < length);
- #[cfg(feature="std")] let mut cache = HashSet::with_capacity(amount.as_usize());
- #[cfg(not(feature="std"))] let mut cache = BTreeSet::new();
- let distr = Uniform::new(X::zero(), length);
- let mut indices = Vec::with_capacity(amount.as_usize());
- for _ in 0..amount.as_usize() {
- let mut pos = distr.sample(rng);
- while !cache.insert(pos) {
- pos = distr.sample(rng);
- }
- indices.push(pos);
- }
-
- debug_assert_eq!(indices.len(), amount.as_usize());
- IndexVec::from(indices)
-}
-
-#[cfg(test)]
-mod test {
- #[cfg(feature="std")] use std::vec;
- #[cfg(all(feature="alloc", not(feature="std")))] use crate::alloc::vec;
- use super::*;
-
- #[test]
- fn test_sample_boundaries() {
- let mut r = crate::test::rng(404);
-
- assert_eq!(sample_inplace(&mut r, 0, 0).len(), 0);
- assert_eq!(sample_inplace(&mut r, 1, 0).len(), 0);
- assert_eq!(sample_inplace(&mut r, 1, 1).into_vec(), vec![0]);
-
- assert_eq!(sample_rejection(&mut r, 1u32, 0).len(), 0);
-
- assert_eq!(sample_floyd(&mut r, 0, 0).len(), 0);
- assert_eq!(sample_floyd(&mut r, 1, 0).len(), 0);
- assert_eq!(sample_floyd(&mut r, 1, 1).into_vec(), vec![0]);
-
- // These algorithms should be fast with big numbers. Test average.
- let sum: usize = sample_rejection(&mut r, 1 << 25, 10u32)
- .into_iter().sum();
- assert!(1 << 25 < sum && sum < (1 << 25) * 25);
-
- let sum: usize = sample_floyd(&mut r, 1 << 25, 10)
- .into_iter().sum();
- assert!(1 << 25 < sum && sum < (1 << 25) * 25);
- }
-
- #[test]
- #[cfg(not(miri))] // Miri is too slow
- fn test_sample_alg() {
- let seed_rng = crate::test::rng;
-
- // We can't test which algorithm is used directly, but Floyd's alg
- // should produce different results from the others. (Also, `inplace`
- // and `cached` currently use different sizes thus produce different results.)
-
- // A small length and relatively large amount should use inplace
- let (length, amount): (usize, usize) = (100, 50);
- let v1 = sample(&mut seed_rng(420), length, amount);
- let v2 = sample_inplace(&mut seed_rng(420), length as u32, amount as u32);
- assert!(v1.iter().all(|e| e < length));
- assert_eq!(v1, v2);
-
- // Test Floyd's alg does produce different results
- let v3 = sample_floyd(&mut seed_rng(420), length as u32, amount as u32);
- assert!(v1 != v3);
-
- // A large length and small amount should use Floyd
- let (length, amount): (usize, usize) = (1<<20, 50);
- let v1 = sample(&mut seed_rng(421), length, amount);
- let v2 = sample_floyd(&mut seed_rng(421), length as u32, amount as u32);
- assert!(v1.iter().all(|e| e < length));
- assert_eq!(v1, v2);
-
- // A large length and larger amount should use cache
- let (length, amount): (usize, usize) = (1<<20, 600);
- let v1 = sample(&mut seed_rng(422), length, amount);
- let v2 = sample_rejection(&mut seed_rng(422), length as u32, amount as u32);
- assert!(v1.iter().all(|e| e < length));
- assert_eq!(v1, v2);
- }
-}
diff --git a/rand/src/seq/mod.rs b/rand/src/seq/mod.rs
deleted file mode 100644
index cec9bb1..0000000
--- a/rand/src/seq/mod.rs
+++ /dev/null
@@ -1,791 +0,0 @@
-// Copyright 2018 Developers of the Rand project.
-//
-// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
-// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
-// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
-// option. This file may not be copied, modified, or distributed
-// except according to those terms.
-
-//! Sequence-related functionality
-//!
-//! This module provides:
-//!
-//! * [`seq::SliceRandom`] slice sampling and mutation
-//! * [`seq::IteratorRandom`] iterator sampling
-//! * [`seq::index::sample`] low-level API to choose multiple indices from
-//! `0..length`
-//!
-//! Also see:
-//!
-//! * [`distributions::weighted`] module which provides implementations of
-//! weighted index sampling.
-//!
-//! In order to make results reproducible across 32-64 bit architectures, all
-//! `usize` indices are sampled as a `u32` where possible (also providing a
-//! small performance boost in some cases).
-
-
-#[cfg(feature="alloc")] pub mod index;
-
-#[cfg(feature="alloc")] use core::ops::Index;
-
-#[cfg(all(feature="alloc", not(feature="std")))] use crate::alloc::vec::Vec;
-
-use crate::Rng;
-#[cfg(feature="alloc")] use crate::distributions::WeightedError;
-#[cfg(feature="alloc")] use crate::distributions::uniform::{SampleUniform, SampleBorrow};
-
-/// Extension trait on slices, providing random mutation and sampling methods.
-///
-/// This trait is implemented on all `[T]` slice types, providing several
-/// methods for choosing and shuffling elements. You must `use` this trait:
-///
-/// ```
-/// use rand::seq::SliceRandom;
-///
-/// fn main() {
-/// let mut rng = rand::thread_rng();
-/// let mut bytes = "Hello, random!".to_string().into_bytes();
-/// bytes.shuffle(&mut rng);
-/// let str = String::from_utf8(bytes).unwrap();
-/// println!("{}", str);
-/// }
-/// ```
-/// Example output (non-deterministic):
-/// ```none
-/// l,nmroHado !le
-/// ```
-pub trait SliceRandom {
- /// The element type.
- type Item;
-
- /// Returns a reference to one random element of the slice, or `None` if the
- /// slice is empty.
- ///
- /// For slices, complexity is `O(1)`.
- ///
- /// # Example
- ///
- /// ```
- /// use rand::thread_rng;
- /// use rand::seq::SliceRandom;
- ///
- /// let choices = [1, 2, 4, 8, 16, 32];
- /// let mut rng = thread_rng();
- /// println!("{:?}", choices.choose(&mut rng));
- /// assert_eq!(choices[..0].choose(&mut rng), None);
- /// ```
- fn choose<R>(&self, rng: &mut R) -> Option<&Self::Item>
- where R: Rng + ?Sized;
-
- /// Returns a mutable reference to one random element of the slice, or
- /// `None` if the slice is empty.
- ///
- /// For slices, complexity is `O(1)`.
- fn choose_mut<R>(&mut self, rng: &mut R) -> Option<&mut Self::Item>
- where R: Rng + ?Sized;
-
- /// Chooses `amount` elements from the slice at random, without repetition,
- /// and in random order. The returned iterator is appropriate both for
- /// collection into a `Vec` and filling an existing buffer (see example).
- ///
- /// In case this API is not sufficiently flexible, use [`index::sample`].
- ///
- /// For slices, complexity is the same as [`index::sample`].
- ///
- /// # Example
- /// ```
- /// use rand::seq::SliceRandom;
- ///
- /// let mut rng = &mut rand::thread_rng();
- /// let sample = "Hello, audience!".as_bytes();
- ///
- /// // collect the results into a vector:
- /// let v: Vec<u8> = sample.choose_multiple(&mut rng, 3).cloned().collect();
- ///
- /// // store in a buffer:
- /// let mut buf = [0u8; 5];
- /// for (b, slot) in sample.choose_multiple(&mut rng, buf.len()).zip(buf.iter_mut()) {
- /// *slot = *b;
- /// }
- /// ```
- #[cfg(feature = "alloc")]
- fn choose_multiple<R>(&self, rng: &mut R, amount: usize) -> SliceChooseIter<Self, Self::Item>
- where R: Rng + ?Sized;
-
- /// Similar to [`choose`], but where the likelihood of each outcome may be
- /// specified.
- ///
- /// The specified function `weight` maps each item `x` to a relative
- /// likelihood `weight(x)`. The probability of each item being selected is
- /// therefore `weight(x) / s`, where `s` is the sum of all `weight(x)`.
- ///
- /// For slices of length `n`, complexity is `O(n)`.
- /// See also [`choose_weighted_mut`], [`distributions::weighted`].
- ///
- /// # Example
- ///
- /// ```
- /// use rand::prelude::*;
- ///
- /// let choices = [('a', 2), ('b', 1), ('c', 1)];
- /// let mut rng = thread_rng();
- /// // 50% chance to print 'a', 25% chance to print 'b', 25% chance to print 'c'
- /// println!("{:?}", choices.choose_weighted(&mut rng, |item| item.1).unwrap().0);
- /// ```
- /// [`choose`]: SliceRandom::choose
- /// [`choose_weighted_mut`]: SliceRandom::choose_weighted_mut
- /// [`distributions::weighted`]: crate::distributions::weighted
- #[cfg(feature = "alloc")]
- fn choose_weighted<R, F, B, X>(
- &self, rng: &mut R, weight: F,
- ) -> Result<&Self::Item, WeightedError>
- where
- R: Rng + ?Sized,
- F: Fn(&Self::Item) -> B,
- B: SampleBorrow<X>,
- X: SampleUniform
- + for<'a> ::core::ops::AddAssign<&'a X>
- + ::core::cmp::PartialOrd<X>
- + Clone
- + Default;
-
- /// Similar to [`choose_mut`], but where the likelihood of each outcome may
- /// be specified.
- ///
- /// The specified function `weight` maps each item `x` to a relative
- /// likelihood `weight(x)`. The probability of each item being selected is
- /// therefore `weight(x) / s`, where `s` is the sum of all `weight(x)`.
- ///
- /// For slices of length `n`, complexity is `O(n)`.
- /// See also [`choose_weighted`], [`distributions::weighted`].
- ///
- /// [`choose_mut`]: SliceRandom::choose_mut
- /// [`choose_weighted`]: SliceRandom::choose_weighted
- /// [`distributions::weighted`]: crate::distributions::weighted
- #[cfg(feature = "alloc")]
- fn choose_weighted_mut<R, F, B, X>(
- &mut self, rng: &mut R, weight: F,
- ) -> Result<&mut Self::Item, WeightedError>
- where
- R: Rng + ?Sized,
- F: Fn(&Self::Item) -> B,
- B: SampleBorrow<X>,
- X: SampleUniform
- + for<'a> ::core::ops::AddAssign<&'a X>
- + ::core::cmp::PartialOrd<X>
- + Clone
- + Default;
-
- /// Shuffle a mutable slice in place.
- ///
- /// For slices of length `n`, complexity is `O(n)`.
- ///
- /// # Example
- ///
- /// ```
- /// use rand::seq::SliceRandom;
- /// use rand::thread_rng;
- ///
- /// let mut rng = thread_rng();
- /// let mut y = [1, 2, 3, 4, 5];
- /// println!("Unshuffled: {:?}", y);
- /// y.shuffle(&mut rng);
- /// println!("Shuffled: {:?}", y);
- /// ```
- fn shuffle<R>(&mut self, rng: &mut R)
- where R: Rng + ?Sized;
-
- /// Shuffle a slice in place, but exit early.
- ///
- /// Returns two mutable slices from the source slice. The first contains
- /// `amount` elements randomly permuted. The second has the remaining
- /// elements that are not fully shuffled.
- ///
- /// This is an efficient method to select `amount` elements at random from
- /// the slice, provided the slice may be mutated.
- ///
- /// If you only need to choose elements randomly and `amount > self.len()/2`
- /// then you may improve performance by taking
- /// `amount = values.len() - amount` and using only the second slice.
- ///
- /// If `amount` is greater than the number of elements in the slice, this
- /// will perform a full shuffle.
- ///
- /// For slices, complexity is `O(m)` where `m = amount`.
- fn partial_shuffle<R>(
- &mut self, rng: &mut R, amount: usize,
- ) -> (&mut [Self::Item], &mut [Self::Item])
- where R: Rng + ?Sized;
-}
-
-/// Extension trait on iterators, providing random sampling methods.
-///
-/// This trait is implemented on all sized iterators, providing methods for
-/// choosing one or more elements. You must `use` this trait:
-///
-/// ```
-/// use rand::seq::IteratorRandom;
-///
-/// fn main() {
-/// let mut rng = rand::thread_rng();
-///
-/// let faces = "πŸ˜€πŸ˜ŽπŸ˜πŸ˜•πŸ˜ πŸ˜’";
-/// println!("I am {}!", faces.chars().choose(&mut rng).unwrap());
-/// }
-/// ```
-/// Example output (non-deterministic):
-/// ```none
-/// I am πŸ˜€!
-/// ```
-pub trait IteratorRandom: Iterator + Sized {
- /// Choose one element at random from the iterator.
- ///
- /// Returns `None` if and only if the iterator is empty.
- ///
- /// This method uses [`Iterator::size_hint`] for optimisation. With an
- /// accurate hint and where [`Iterator::nth`] is a constant-time operation
- /// this method can offer `O(1)` performance. Where no size hint is
- /// available, complexity is `O(n)` where `n` is the iterator length.
- /// Partial hints (where `lower > 0`) also improve performance.
- ///
- /// For slices, prefer [`SliceRandom::choose`] which guarantees `O(1)`
- /// performance.
- fn choose<R>(mut self, rng: &mut R) -> Option<Self::Item>
- where R: Rng + ?Sized {
- let (mut lower, mut upper) = self.size_hint();
- let mut consumed = 0;
- let mut result = None;
-
- if upper == Some(lower) {
- return if lower == 0 { None } else { self.nth(gen_index(rng, lower)) };
- }
-
- // Continue until the iterator is exhausted
- loop {
- if lower > 1 {
- let ix = gen_index(rng, lower + consumed);
- let skip = if ix < lower {
- result = self.nth(ix);
- lower - (ix + 1)
- } else {
- lower
- };
- if upper == Some(lower) {
- return result;
- }
- consumed += lower;
- if skip > 0 {
- self.nth(skip - 1);
- }
- } else {
- let elem = self.next();
- if elem.is_none() {
- return result;
- }
- consumed += 1;
- let denom = consumed as f64; // accurate to 2^53 elements
- if rng.gen_bool(1.0 / denom) {
- result = elem;
- }
- }
-
- let hint = self.size_hint();
- lower = hint.0;
- upper = hint.1;
- }
- }
-
- /// Collects values at random from the iterator into a supplied buffer
- /// until that buffer is filled.
- ///
- /// Although the elements are selected randomly, the order of elements in
- /// the buffer is neither stable nor fully random. If random ordering is
- /// desired, shuffle the result.
- ///
- /// Returns the number of elements added to the buffer. This equals the length
- /// of the buffer unless the iterator contains insufficient elements, in which
- /// case this equals the number of elements available.
- ///
- /// Complexity is `O(n)` where `n` is the length of the iterator.
- /// For slices, prefer [`SliceRandom::choose_multiple`].
- fn choose_multiple_fill<R>(mut self, rng: &mut R, buf: &mut [Self::Item]) -> usize
- where R: Rng + ?Sized {
- let amount = buf.len();
- let mut len = 0;
- while len < amount {
- if let Some(elem) = self.next() {
- buf[len] = elem;
- len += 1;
- } else {
- // Iterator exhausted; stop early
- return len;
- }
- }
-
- // Continue, since the iterator was not exhausted
- for (i, elem) in self.enumerate() {
- let k = gen_index(rng, i + 1 + amount);
- if let Some(slot) = buf.get_mut(k) {
- *slot = elem;
- }
- }
- len
- }
-
- /// Collects `amount` values at random from the iterator into a vector.
- ///
- /// This is equivalent to `choose_multiple_fill` except for the result type.
- ///
- /// Although the elements are selected randomly, the order of elements in
- /// the buffer is neither stable nor fully random. If random ordering is
- /// desired, shuffle the result.
- ///
- /// The length of the returned vector equals `amount` unless the iterator
- /// contains insufficient elements, in which case it equals the number of
- /// elements available.
- ///
- /// Complexity is `O(n)` where `n` is the length of the iterator.
- /// For slices, prefer [`SliceRandom::choose_multiple`].
- #[cfg(feature = "alloc")]
- fn choose_multiple<R>(mut self, rng: &mut R, amount: usize) -> Vec<Self::Item>
- where R: Rng + ?Sized {
- let mut reservoir = Vec::with_capacity(amount);
- reservoir.extend(self.by_ref().take(amount));
-
- // Continue unless the iterator was exhausted
- //
- // note: this prevents iterators that "restart" from causing problems.
- // If the iterator stops once, then so do we.
- if reservoir.len() == amount {
- for (i, elem) in self.enumerate() {
- let k = gen_index(rng, i + 1 + amount);
- if let Some(slot) = reservoir.get_mut(k) {
- *slot = elem;
- }
- }
- } else {
- // Don't hang onto extra memory. There is a corner case where
- // `amount` was much less than `self.len()`.
- reservoir.shrink_to_fit();
- }
- reservoir
- }
-}
-
-
-impl<T> SliceRandom for [T] {
- type Item = T;
-
- fn choose<R>(&self, rng: &mut R) -> Option<&Self::Item>
- where R: Rng + ?Sized {
- if self.is_empty() {
- None
- } else {
- Some(&self[gen_index(rng, self.len())])
- }
- }
-
- fn choose_mut<R>(&mut self, rng: &mut R) -> Option<&mut Self::Item>
- where R: Rng + ?Sized {
- if self.is_empty() {
- None
- } else {
- let len = self.len();
- Some(&mut self[gen_index(rng, len)])
- }
- }
-
- #[cfg(feature = "alloc")]
- fn choose_multiple<R>(&self, rng: &mut R, amount: usize) -> SliceChooseIter<Self, Self::Item>
- where R: Rng + ?Sized {
- let amount = ::core::cmp::min(amount, self.len());
- SliceChooseIter {
- slice: self,
- _phantom: Default::default(),
- indices: index::sample(rng, self.len(), amount).into_iter(),
- }
- }
-
- #[cfg(feature = "alloc")]
- fn choose_weighted<R, F, B, X>(
- &self, rng: &mut R, weight: F,
- ) -> Result<&Self::Item, WeightedError>
- where
- R: Rng + ?Sized,
- F: Fn(&Self::Item) -> B,
- B: SampleBorrow<X>,
- X: SampleUniform
- + for<'a> ::core::ops::AddAssign<&'a X>
- + ::core::cmp::PartialOrd<X>
- + Clone
- + Default,
- {
- use crate::distributions::{Distribution, WeightedIndex};
- let distr = WeightedIndex::new(self.iter().map(weight))?;
- Ok(&self[distr.sample(rng)])
- }
-
- #[cfg(feature = "alloc")]
- fn choose_weighted_mut<R, F, B, X>(
- &mut self, rng: &mut R, weight: F,
- ) -> Result<&mut Self::Item, WeightedError>
- where
- R: Rng + ?Sized,
- F: Fn(&Self::Item) -> B,
- B: SampleBorrow<X>,
- X: SampleUniform
- + for<'a> ::core::ops::AddAssign<&'a X>
- + ::core::cmp::PartialOrd<X>
- + Clone
- + Default,
- {
- use crate::distributions::{Distribution, WeightedIndex};
- let distr = WeightedIndex::new(self.iter().map(weight))?;
- Ok(&mut self[distr.sample(rng)])
- }
-
- fn shuffle<R>(&mut self, rng: &mut R)
- where R: Rng + ?Sized {
- for i in (1..self.len()).rev() {
- // invariant: elements with index > i have been locked in place.
- self.swap(i, gen_index(rng, i + 1));
- }
- }
-
- fn partial_shuffle<R>(
- &mut self, rng: &mut R, amount: usize,
- ) -> (&mut [Self::Item], &mut [Self::Item])
- where R: Rng + ?Sized {
- // This applies Durstenfeld's algorithm for the
- // [Fisher–Yates shuffle](https://en.wikipedia.org/wiki/Fisher%E2%80%93Yates_shuffle#The_modern_algorithm)
- // for an unbiased permutation, but exits early after choosing `amount`
- // elements.
-
- let len = self.len();
- let end = if amount >= len { 0 } else { len - amount };
-
- for i in (end..len).rev() {
- // invariant: elements with index > i have been locked in place.
- self.swap(i, gen_index(rng, i + 1));
- }
- let r = self.split_at_mut(end);
- (r.1, r.0)
- }
-}
-
-impl<I> IteratorRandom for I where I: Iterator + Sized {}
-
-
-/// An iterator over multiple slice elements.
-///
-/// This struct is created by
-/// [`SliceRandom::choose_multiple`](trait.SliceRandom.html#tymethod.choose_multiple).
-#[cfg(feature = "alloc")]
-#[derive(Debug)]
-pub struct SliceChooseIter<'a, S: ?Sized + 'a, T: 'a> {
- slice: &'a S,
- _phantom: ::core::marker::PhantomData<T>,
- indices: index::IndexVecIntoIter,
-}
-
-#[cfg(feature = "alloc")]
-impl<'a, S: Index<usize, Output = T> + ?Sized + 'a, T: 'a> Iterator for SliceChooseIter<'a, S, T> {
- type Item = &'a T;
-
- fn next(&mut self) -> Option<Self::Item> {
- // TODO: investigate using SliceIndex::get_unchecked when stable
- self.indices.next().map(|i| &self.slice[i as usize])
- }
-
- fn size_hint(&self) -> (usize, Option<usize>) {
- (self.indices.len(), Some(self.indices.len()))
- }
-}
-
-#[cfg(feature = "alloc")]
-impl<'a, S: Index<usize, Output = T> + ?Sized + 'a, T: 'a> ExactSizeIterator
- for SliceChooseIter<'a, S, T>
-{
- fn len(&self) -> usize {
- self.indices.len()
- }
-}
-
-
-// Sample a number uniformly between 0 and `ubound`. Uses 32-bit sampling where
-// possible, primarily in order to produce the same output on 32-bit and 64-bit
-// platforms.
-#[inline]
-fn gen_index<R: Rng + ?Sized>(rng: &mut R, ubound: usize) -> usize {
- if ubound <= (core::u32::MAX as usize) {
- rng.gen_range(0, ubound as u32) as usize
- } else {
- rng.gen_range(0, ubound)
- }
-}
-
-
-#[cfg(test)]
-mod test {
- use super::*;
- #[cfg(feature = "alloc")] use crate::Rng;
- #[cfg(all(feature="alloc", not(feature="std")))]
- use alloc::vec::Vec;
-
- #[test]
- fn test_slice_choose() {
- let mut r = crate::test::rng(107);
- let chars = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n'];
- let mut chosen = [0i32; 14];
- // The below all use a binomial distribution with n=1000, p=1/14.
- // binocdf(40, 1000, 1/14) ~= 2e-5; 1-binocdf(106, ..) ~= 2e-5
- for _ in 0..1000 {
- let picked = *chars.choose(&mut r).unwrap();
- chosen[(picked as usize) - ('a' as usize)] += 1;
- }
- for count in chosen.iter() {
- assert!(40 < *count && *count < 106);
- }
-
- chosen.iter_mut().for_each(|x| *x = 0);
- for _ in 0..1000 {
- *chosen.choose_mut(&mut r).unwrap() += 1;
- }
- for count in chosen.iter() {
- assert!(40 < *count && *count < 106);
- }
-
- let mut v: [isize; 0] = [];
- assert_eq!(v.choose(&mut r), None);
- assert_eq!(v.choose_mut(&mut r), None);
- }
-
- #[derive(Clone)]
- struct UnhintedIterator<I: Iterator + Clone> {
- iter: I,
- }
- impl<I: Iterator + Clone> Iterator for UnhintedIterator<I> {
- type Item = I::Item;
- fn next(&mut self) -> Option<Self::Item> {
- self.iter.next()
- }
- }
-
- #[derive(Clone)]
- struct ChunkHintedIterator<I: ExactSizeIterator + Iterator + Clone> {
- iter: I,
- chunk_remaining: usize,
- chunk_size: usize,
- hint_total_size: bool,
- }
- impl<I: ExactSizeIterator + Iterator + Clone> Iterator for ChunkHintedIterator<I> {
- type Item = I::Item;
- fn next(&mut self) -> Option<Self::Item> {
- if self.chunk_remaining == 0 {
- self.chunk_remaining = ::core::cmp::min(self.chunk_size,
- self.iter.len());
- }
- self.chunk_remaining = self.chunk_remaining.saturating_sub(1);
-
- self.iter.next()
- }
- fn size_hint(&self) -> (usize, Option<usize>) {
- (self.chunk_remaining,
- if self.hint_total_size { Some(self.iter.len()) } else { None })
- }
- }
-
- #[derive(Clone)]
- struct WindowHintedIterator<I: ExactSizeIterator + Iterator + Clone> {
- iter: I,
- window_size: usize,
- hint_total_size: bool,
- }
- impl<I: ExactSizeIterator + Iterator + Clone> Iterator for WindowHintedIterator<I> {
- type Item = I::Item;
- fn next(&mut self) -> Option<Self::Item> {
- self.iter.next()
- }
- fn size_hint(&self) -> (usize, Option<usize>) {
- (::core::cmp::min(self.iter.len(), self.window_size),
- if self.hint_total_size { Some(self.iter.len()) } else { None })
- }
- }
-
- #[test]
- #[cfg(not(miri))] // Miri is too slow
- fn test_iterator_choose() {
- let r = &mut crate::test::rng(109);
- fn test_iter<R: Rng + ?Sized, Iter: Iterator<Item=usize> + Clone>(r: &mut R, iter: Iter) {
- let mut chosen = [0i32; 9];
- for _ in 0..1000 {
- let picked = iter.clone().choose(r).unwrap();
- chosen[picked] += 1;
- }
- for count in chosen.iter() {
- // Samples should follow Binomial(1000, 1/9)
- // Octave: binopdf(x, 1000, 1/9) gives the prob of *count == x
- // Note: have seen 153, which is unlikely but not impossible.
- assert!(72 < *count && *count < 154, "count not close to 1000/9: {}", count);
- }
- }
-
- test_iter(r, 0..9);
- test_iter(r, [0, 1, 2, 3, 4, 5, 6, 7, 8].iter().cloned());
- #[cfg(feature = "alloc")]
- test_iter(r, (0..9).collect::<Vec<_>>().into_iter());
- test_iter(r, UnhintedIterator { iter: 0..9 });
- test_iter(r, ChunkHintedIterator { iter: 0..9, chunk_size: 4, chunk_remaining: 4, hint_total_size: false });
- test_iter(r, ChunkHintedIterator { iter: 0..9, chunk_size: 4, chunk_remaining: 4, hint_total_size: true });
- test_iter(r, WindowHintedIterator { iter: 0..9, window_size: 2, hint_total_size: false });
- test_iter(r, WindowHintedIterator { iter: 0..9, window_size: 2, hint_total_size: true });
-
- assert_eq!((0..0).choose(r), None);
- assert_eq!(UnhintedIterator{ iter: 0..0 }.choose(r), None);
- }
-
- #[test]
- #[cfg(not(miri))] // Miri is too slow
- fn test_shuffle() {
- let mut r = crate::test::rng(108);
- let empty: &mut [isize] = &mut [];
- empty.shuffle(&mut r);
- let mut one = [1];
- one.shuffle(&mut r);
- let b: &[_] = &[1];
- assert_eq!(one, b);
-
- let mut two = [1, 2];
- two.shuffle(&mut r);
- assert!(two == [1, 2] || two == [2, 1]);
-
- fn move_last(slice: &mut [usize], pos: usize) {
- // use slice[pos..].rotate_left(1); once we can use that
- let last_val = slice[pos];
- for i in pos..slice.len() - 1 {
- slice[i] = slice[i + 1];
- }
- *slice.last_mut().unwrap() = last_val;
- }
- let mut counts = [0i32; 24];
- for _ in 0..10000 {
- let mut arr: [usize; 4] = [0, 1, 2, 3];
- arr.shuffle(&mut r);
- let mut permutation = 0usize;
- let mut pos_value = counts.len();
- for i in 0..4 {
- pos_value /= 4 - i;
- let pos = arr.iter().position(|&x| x == i).unwrap();
- assert!(pos < (4 - i));
- permutation += pos * pos_value;
- move_last(&mut arr, pos);
- assert_eq!(arr[3], i);
- }
- for i in 0..4 {
- assert_eq!(arr[i], i);
- }
- counts[permutation] += 1;
- }
- for count in counts.iter() {
- // Binomial(10000, 1/24) with average 416.667
- // Octave: binocdf(n, 10000, 1/24)
- // 99.9% chance samples lie within this range:
- assert!(352 <= *count && *count <= 483, "count: {}", count);
- }
- }
-
- #[test]
- fn test_partial_shuffle() {
- let mut r = crate::test::rng(118);
-
- let mut empty: [u32; 0] = [];
- let res = empty.partial_shuffle(&mut r, 10);
- assert_eq!((res.0.len(), res.1.len()), (0, 0));
-
- let mut v = [1, 2, 3, 4, 5];
- let res = v.partial_shuffle(&mut r, 2);
- assert_eq!((res.0.len(), res.1.len()), (2, 3));
- assert!(res.0[0] != res.0[1]);
- // First elements are only modified if selected, so at least one isn't modified:
- assert!(res.1[0] == 1 || res.1[1] == 2 || res.1[2] == 3);
- }
-
- #[test]
- #[cfg(feature = "alloc")]
- fn test_sample_iter() {
- let min_val = 1;
- let max_val = 100;
-
- let mut r = crate::test::rng(401);
- let vals = (min_val..max_val).collect::<Vec<i32>>();
- let small_sample = vals.iter().choose_multiple(&mut r, 5);
- let large_sample = vals.iter().choose_multiple(&mut r, vals.len() + 5);
-
- assert_eq!(small_sample.len(), 5);
- assert_eq!(large_sample.len(), vals.len());
- // no randomization happens when amount >= len
- assert_eq!(large_sample, vals.iter().collect::<Vec<_>>());
-
- assert!(small_sample.iter().all(|e| {
- **e >= min_val && **e <= max_val
- }));
- }
-
- #[test]
- #[cfg(feature = "alloc")]
- #[cfg(not(miri))] // Miri is too slow
- fn test_weighted() {
- let mut r = crate::test::rng(406);
- const N_REPS: u32 = 3000;
- let weights = [1u32, 2, 3, 0, 5, 6, 7, 1, 2, 3, 4, 5, 6, 7];
- let total_weight = weights.iter().sum::<u32>() as f32;
-
- let verify = |result: [i32; 14]| {
- for (i, count) in result.iter().enumerate() {
- let exp = (weights[i] * N_REPS) as f32 / total_weight;
- let mut err = (*count as f32 - exp).abs();
- if err != 0.0 {
- err /= exp;
- }
- assert!(err <= 0.25);
- }
- };
-
- // choose_weighted
- fn get_weight<T>(item: &(u32, T)) -> u32 {
- item.0
- }
- let mut chosen = [0i32; 14];
- let mut items = [(0u32, 0usize); 14]; // (weight, index)
- for (i, item) in items.iter_mut().enumerate() {
- *item = (weights[i], i);
- }
- for _ in 0..N_REPS {
- let item = items.choose_weighted(&mut r, get_weight).unwrap();
- chosen[item.1] += 1;
- }
- verify(chosen);
-
- // choose_weighted_mut
- let mut items = [(0u32, 0i32); 14]; // (weight, count)
- for (i, item) in items.iter_mut().enumerate() {
- *item = (weights[i], 0);
- }
- for _ in 0..N_REPS {
- items.choose_weighted_mut(&mut r, get_weight).unwrap().1 += 1;
- }
- for (ch, item) in chosen.iter_mut().zip(items.iter()) {
- *ch = item.1;
- }
- verify(chosen);
-
- // Check error cases
- let empty_slice = &mut [10][0..0];
- assert_eq!(empty_slice.choose_weighted(&mut r, |_| 1), Err(WeightedError::NoItem));
- assert_eq!(empty_slice.choose_weighted_mut(&mut r, |_| 1), Err(WeightedError::NoItem));
- assert_eq!(['x'].choose_weighted_mut(&mut r, |_| 0), Err(WeightedError::AllWeightsZero));
- assert_eq!([0, -1].choose_weighted_mut(&mut r, |x| *x), Err(WeightedError::InvalidWeight));
- assert_eq!([-1, 0].choose_weighted_mut(&mut r, |x| *x), Err(WeightedError::InvalidWeight));
- }
-}