diff options
Diffstat (limited to 'unicode-segmentation/src/test.rs')
-rw-r--r-- | unicode-segmentation/src/test.rs | 197 |
1 files changed, 0 insertions, 197 deletions
diff --git a/unicode-segmentation/src/test.rs b/unicode-segmentation/src/test.rs deleted file mode 100644 index 75b77c5..0000000 --- a/unicode-segmentation/src/test.rs +++ /dev/null @@ -1,197 +0,0 @@ -// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT -// file at the top-level directory of this distribution and at -// http://rust-lang.org/COPYRIGHT. -// -// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or -// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license -// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -use super::UnicodeSegmentation; - -use std::prelude::v1::*; - -#[test] -fn test_graphemes() { - use testdata::{TEST_SAME, TEST_DIFF}; - - pub const EXTRA_DIFF: &'static [(&'static str, - &'static [&'static str], - &'static [&'static str])] = &[ - // Official test suite doesn't include two Prepend chars between two other chars. - ("\u{20}\u{600}\u{600}\u{20}", - &["\u{20}", "\u{600}\u{600}\u{20}"], - &["\u{20}", "\u{600}", "\u{600}", "\u{20}"]), - - // Test for Prepend followed by two Any chars - ("\u{600}\u{20}\u{20}", - &["\u{600}\u{20}", "\u{20}"], - &["\u{600}", "\u{20}", "\u{20}"]), - ]; - - pub const EXTRA_SAME: &'static [(&'static str, &'static [&'static str])] = &[ - // family emoji (more than two emoji joined by ZWJ) - ("\u{1f468}\u{200d}\u{1f467}\u{200d}\u{1f466}", - &["\u{1f468}\u{200d}\u{1f467}\u{200d}\u{1f466}"]), - // cartwheel emoji followed by two fitzpatrick skin tone modifiers - // (test case from issue #19) - ("\u{1F938}\u{1F3FE}\u{1F3FE}", - &["\u{1F938}\u{1F3FE}\u{1F3FE}"]), - ]; - - for &(s, g) in TEST_SAME.iter().chain(EXTRA_SAME) { - // test forward iterator - assert!(UnicodeSegmentation::graphemes(s, true).eq(g.iter().cloned())); - assert!(UnicodeSegmentation::graphemes(s, false).eq(g.iter().cloned())); - - // test reverse iterator - assert!(UnicodeSegmentation::graphemes(s, true).rev().eq(g.iter().rev().cloned())); - assert!(UnicodeSegmentation::graphemes(s, false).rev().eq(g.iter().rev().cloned())); - } - - for &(s, gt, gf) in TEST_DIFF.iter().chain(EXTRA_DIFF) { - // test forward iterator - assert!(UnicodeSegmentation::graphemes(s, true).eq(gt.iter().cloned())); - assert!(UnicodeSegmentation::graphemes(s, false).eq(gf.iter().cloned())); - - // test reverse iterator - assert!(UnicodeSegmentation::graphemes(s, true).rev().eq(gt.iter().rev().cloned())); - assert!(UnicodeSegmentation::graphemes(s, false).rev().eq(gf.iter().rev().cloned())); - } - - // test the indices iterators - let s = "a̐éö̲\r\n"; - let gr_inds = UnicodeSegmentation::grapheme_indices(s, true).collect::<Vec<(usize, &str)>>(); - let b: &[_] = &[(0, "a̐"), (3, "é"), (6, "ö̲"), (11, "\r\n")]; - assert_eq!(gr_inds, b); - let gr_inds = UnicodeSegmentation::grapheme_indices(s, true).rev().collect::<Vec<(usize, &str)>>(); - let b: &[_] = &[(11, "\r\n"), (6, "ö̲"), (3, "é"), (0, "a̐")]; - assert_eq!(gr_inds, b); - let mut gr_inds_iter = UnicodeSegmentation::grapheme_indices(s, true); - { - let gr_inds = gr_inds_iter.by_ref(); - let e1 = gr_inds.size_hint(); - assert_eq!(e1, (1, Some(13))); - let c = gr_inds.count(); - assert_eq!(c, 4); - } - let e2 = gr_inds_iter.size_hint(); - assert_eq!(e2, (0, Some(0))); - - // make sure the reverse iterator does the right thing with "\n" at beginning of string - let s = "\n\r\n\r"; - let gr = UnicodeSegmentation::graphemes(s, true).rev().collect::<Vec<&str>>(); - let b: &[_] = &["\r", "\r\n", "\n"]; - assert_eq!(gr, b); -} - -#[test] -fn test_words() { - use testdata::TEST_WORD; - - // Unicode's official tests don't really test longer chains of flag emoji - // TODO This could be improved with more tests like flag emoji with interspersed Extend chars and ZWJ - const EXTRA_TESTS: &'static [(&'static str, &'static [&'static str])] = &[ - ("🇦🇫🇦🇽🇦🇱🇩🇿🇦🇸🇦🇩🇦🇴", &["🇦🇫", "🇦🇽", "🇦🇱", "🇩🇿", "🇦🇸", "🇦🇩", "🇦🇴"]), - ("🇦🇫🇦🇽🇦🇱🇩🇿🇦🇸🇦🇩🇦", &["🇦🇫", "🇦🇽", "🇦🇱", "🇩🇿", "🇦🇸", "🇦🇩", "🇦"]), - ("🇦a🇫🇦🇽a🇦🇱🇩🇿🇦🇸🇦🇩🇦", &["🇦", "a", "🇫🇦", "🇽", "a", "🇦🇱", "🇩🇿", "🇦🇸", "🇦🇩", "🇦"]), - ("\u{1f468}\u{200d}\u{1f468}\u{200d}\u{1f466}", &["\u{1f468}\u{200d}\u{1f468}\u{200d}\u{1f466}"]), - ("😌👎🏼", &["😌", "👎🏼"]), - // perhaps wrong, spaces should not be included? - ("hello world", &["hello", " ", "world"]), - ("🇨🇦🇨🇭🇿🇲🇿 hi", &["🇨🇦", "🇨🇭", "🇿🇲", "🇿", " ", "hi"]), - ]; - for &(s, w) in TEST_WORD.iter().chain(EXTRA_TESTS.iter()) { - macro_rules! assert_ { - ($test:expr, $exp:expr, $name:expr) => { - // collect into vector for better diagnostics in failure case - let testing = $test.collect::<Vec<_>>(); - let expected = $exp.collect::<Vec<_>>(); - assert_eq!(testing, expected, "{} test for testcase ({:?}, {:?}) failed.", $name, s, w) - } - } - // test forward iterator - assert_!(s.split_word_bounds(), - w.iter().cloned(), - "Forward word boundaries"); - - // test reverse iterator - assert_!(s.split_word_bounds().rev(), - w.iter().rev().cloned(), - "Reverse word boundaries"); - - // generate offsets from word string lengths - let mut indices = vec![0]; - for i in w.iter().cloned().map(|s| s.len()).scan(0, |t, n| { *t += n; Some(*t) }) { - indices.push(i); - } - indices.pop(); - let indices = indices; - - // test forward indices iterator - assert_!(s.split_word_bound_indices().map(|(l,_)| l), - indices.iter().cloned(), - "Forward word indices"); - - // test backward indices iterator - assert_!(s.split_word_bound_indices().rev().map(|(l,_)| l), - indices.iter().rev().cloned(), - "Reverse word indices"); - } -} - - -#[test] -fn test_sentences() { - use testdata::TEST_SENTENCE; - - for &(s, w) in TEST_SENTENCE.iter() { - macro_rules! assert_ { - ($test:expr, $exp:expr, $name:expr) => { - // collect into vector for better diagnostics in failure case - let testing = $test.collect::<Vec<_>>(); - let expected = $exp.collect::<Vec<_>>(); - assert_eq!(testing, expected, "{} test for testcase ({:?}, {:?}) failed.", $name, s, w) - } - } - - assert_!(s.split_sentence_bounds(), - w.iter().cloned(), - "Forward sentence boundaries"); - } -} - -quickcheck! { - fn quickcheck_forward_reverse_graphemes_extended(s: String) -> bool { - let a = s.graphemes(true).collect::<Vec<_>>(); - let mut b = s.graphemes(true).rev().collect::<Vec<_>>(); - b.reverse(); - a == b - } - - fn quickcheck_forward_reverse_graphemes_legacy(s: String) -> bool { - let a = s.graphemes(false).collect::<Vec<_>>(); - let mut b = s.graphemes(false).rev().collect::<Vec<_>>(); - b.reverse(); - a == b - } - - fn quickcheck_join_graphemes(s: String) -> bool { - let a = s.graphemes(true).collect::<String>(); - let b = s.graphemes(false).collect::<String>(); - a == s && b == s - } - - fn quickcheck_forward_reverse_words(s: String) -> bool { - let a = s.split_word_bounds().collect::<Vec<_>>(); - let mut b = s.split_word_bounds().rev().collect::<Vec<_>>(); - b.reverse(); - a == b - } - - fn quickcheck_join_words(s: String) -> bool { - let a = s.split_word_bounds().collect::<String>(); - a == s - } -} |