diff options
Diffstat (limited to 'textwrap/src')
| -rw-r--r-- | textwrap/src/indentation.rs | 294 | ||||
| -rw-r--r-- | textwrap/src/lib.rs | 987 | ||||
| -rw-r--r-- | textwrap/src/splitting.rs | 139 | 
3 files changed, 1420 insertions, 0 deletions
| diff --git a/textwrap/src/indentation.rs b/textwrap/src/indentation.rs new file mode 100644 index 0000000..276ba10 --- /dev/null +++ b/textwrap/src/indentation.rs @@ -0,0 +1,294 @@ +//! Functions related to adding and removing indentation from lines of +//! text. +//! +//! The functions here can be used to uniformly indent or dedent +//! (unindent) word wrapped lines of text. + +/// Add prefix to each non-empty line. +/// +/// ``` +/// use textwrap::indent; +/// +/// assert_eq!(indent(" +/// Foo +/// Bar +/// ", "  "), " +///   Foo +///   Bar +/// "); +/// ``` +/// +/// Empty lines (lines consisting only of whitespace) are not indented +/// and the whitespace is replaced by a single newline (`\n`): +/// +/// ``` +/// use textwrap::indent; +/// +/// assert_eq!(indent(" +/// Foo +/// +/// Bar +///   \t +/// Baz +/// ", "->"), " +/// ->Foo +/// +/// ->Bar +/// +/// ->Baz +/// "); +/// ``` +/// +/// Leading and trailing whitespace on non-empty lines is kept +/// unchanged: +/// +/// ``` +/// use textwrap::indent; +/// +/// assert_eq!(indent(" \t  Foo   ", "->"), "-> \t  Foo   \n"); +/// ``` +pub fn indent(s: &str, prefix: &str) -> String { +    let mut result = String::new(); +    for line in s.lines() { +        if line.chars().any(|c| !c.is_whitespace()) { +            result.push_str(prefix); +            result.push_str(line); +        } +        result.push('\n'); +    } +    result +} + +/// Removes common leading whitespace from each line. +/// +/// This function will look at each non-empty line and determine the +/// maximum amount of whitespace that can be removed from all lines: +/// +/// ``` +/// use textwrap::dedent; +/// +/// assert_eq!(dedent(" +///     1st line +///       2nd line +///     3rd line +/// "), " +/// 1st line +///   2nd line +/// 3rd line +/// "); +/// ``` +pub fn dedent(s: &str) -> String { +    let mut prefix = ""; +    let mut lines = s.lines(); + +    // We first search for a non-empty line to find a prefix. +    for line in &mut lines { +        let mut whitespace_idx = line.len(); +        for (idx, ch) in line.char_indices() { +            if !ch.is_whitespace() { +                whitespace_idx = idx; +                break; +            } +        } + +        // Check if the line had anything but whitespace +        if whitespace_idx < line.len() { +            prefix = &line[..whitespace_idx]; +            break; +        } +    } + +    // We then continue looking through the remaining lines to +    // possibly shorten the prefix. +    for line in &mut lines { +        let mut whitespace_idx = line.len(); +        for ((idx, a), b) in line.char_indices().zip(prefix.chars()) { +            if a != b { +                whitespace_idx = idx; +                break; +            } +        } + +        // Check if the line had anything but whitespace and if we +        // have found a shorter prefix +        if whitespace_idx < line.len() && whitespace_idx < prefix.len() { +            prefix = &line[..whitespace_idx]; +        } +    } + +    // We now go over the lines a second time to build the result. +    let mut result = String::new(); +    for line in s.lines() { +        if line.starts_with(&prefix) && line.chars().any(|c| !c.is_whitespace()) { +            let (_, tail) = line.split_at(prefix.len()); +            result.push_str(tail); +        } +        result.push('\n'); +    } + +    if result.ends_with('\n') && !s.ends_with('\n') { +        let new_len = result.len() - 1; +        result.truncate(new_len); +    } + +    result +} + +#[cfg(test)] +mod tests { +    use super::*; + +    /// Add newlines. Ensures that the final line in the vector also +    /// has a newline. +    fn add_nl(lines: &[&str]) -> String { +        lines.join("\n") + "\n" +    } + +    #[test] +    fn indent_empty() { +        assert_eq!(indent("\n", "  "), "\n"); +    } + +    #[test] +    #[cfg_attr(rustfmt, rustfmt_skip)] +    fn indent_nonempty() { +        let x = vec!["  foo", +                     "bar", +                     "  baz"]; +        let y = vec!["//  foo", +                     "//bar", +                     "//  baz"]; +        assert_eq!(indent(&add_nl(&x), "//"), add_nl(&y)); +    } + +    #[test] +    #[cfg_attr(rustfmt, rustfmt_skip)] +    fn indent_empty_line() { +        let x = vec!["  foo", +                     "bar", +                     "", +                     "  baz"]; +        let y = vec!["//  foo", +                     "//bar", +                     "", +                     "//  baz"]; +        assert_eq!(indent(&add_nl(&x), "//"), add_nl(&y)); +    } + +    #[test] +    fn dedent_empty() { +        assert_eq!(dedent(""), ""); +    } + +    #[test] +    #[cfg_attr(rustfmt, rustfmt_skip)] +    fn dedent_multi_line() { +        let x = vec!["    foo", +                     "  bar", +                     "    baz"]; +        let y = vec!["  foo", +                     "bar", +                     "  baz"]; +        assert_eq!(dedent(&add_nl(&x)), add_nl(&y)); +    } + +    #[test] +    #[cfg_attr(rustfmt, rustfmt_skip)] +    fn dedent_empty_line() { +        let x = vec!["    foo", +                     "  bar", +                     "   ", +                     "    baz"]; +        let y = vec!["  foo", +                     "bar", +                     "", +                     "  baz"]; +        assert_eq!(dedent(&add_nl(&x)), add_nl(&y)); +    } + +    #[test] +    #[cfg_attr(rustfmt, rustfmt_skip)] +    fn dedent_blank_line() { +        let x = vec!["      foo", +                     "", +                     "        bar", +                     "          foo", +                     "          bar", +                     "          baz"]; +        let y = vec!["foo", +                     "", +                     "  bar", +                     "    foo", +                     "    bar", +                     "    baz"]; +        assert_eq!(dedent(&add_nl(&x)), add_nl(&y)); +    } + +    #[test] +    #[cfg_attr(rustfmt, rustfmt_skip)] +    fn dedent_whitespace_line() { +        let x = vec!["      foo", +                     " ", +                     "        bar", +                     "          foo", +                     "          bar", +                     "          baz"]; +        let y = vec!["foo", +                     "", +                     "  bar", +                     "    foo", +                     "    bar", +                     "    baz"]; +        assert_eq!(dedent(&add_nl(&x)), add_nl(&y)); +    } + +    #[test] +    #[cfg_attr(rustfmt, rustfmt_skip)] +    fn dedent_mixed_whitespace() { +        let x = vec!["\tfoo", +                     "  bar"]; +        let y = vec!["\tfoo", +                     "  bar"]; +        assert_eq!(dedent(&add_nl(&x)), add_nl(&y)); +    } + +    #[test] +    #[cfg_attr(rustfmt, rustfmt_skip)] +    fn dedent_tabbed_whitespace() { +        let x = vec!["\t\tfoo", +                     "\t\t\tbar"]; +        let y = vec!["foo", +                     "\tbar"]; +        assert_eq!(dedent(&add_nl(&x)), add_nl(&y)); +    } + +    #[test] +    #[cfg_attr(rustfmt, rustfmt_skip)] +    fn dedent_mixed_tabbed_whitespace() { +        let x = vec!["\t  \tfoo", +                     "\t  \t\tbar"]; +        let y = vec!["foo", +                     "\tbar"]; +        assert_eq!(dedent(&add_nl(&x)), add_nl(&y)); +    } + +    #[test] +    #[cfg_attr(rustfmt, rustfmt_skip)] +    fn dedent_mixed_tabbed_whitespace2() { +        let x = vec!["\t  \tfoo", +                     "\t    \tbar"]; +        let y = vec!["\tfoo", +                     "  \tbar"]; +        assert_eq!(dedent(&add_nl(&x)), add_nl(&y)); +    } + +    #[test] +    #[cfg_attr(rustfmt, rustfmt_skip)] +    fn dedent_preserve_no_terminating_newline() { +        let x = vec!["  foo", +                     "    bar"].join("\n"); +        let y = vec!["foo", +                     "  bar"].join("\n"); +        assert_eq!(dedent(&x), y); +    } +} diff --git a/textwrap/src/lib.rs b/textwrap/src/lib.rs new file mode 100644 index 0000000..2f82325 --- /dev/null +++ b/textwrap/src/lib.rs @@ -0,0 +1,987 @@ +//! `textwrap` provides functions for word wrapping and filling text. +//! +//! Wrapping text can be very useful in commandline programs where you +//! want to format dynamic output nicely so it looks good in a +//! terminal. A quick example: +//! +//! ```no_run +//! extern crate textwrap; +//! use textwrap::fill; +//! +//! fn main() { +//!     let text = "textwrap: a small library for wrapping text."; +//!     println!("{}", fill(text, 18)); +//! } +//! ``` +//! +//! This will display the following output: +//! +//! ```text +//! textwrap: a small +//! library for +//! wrapping text. +//! ``` +//! +//! # Displayed Width vs Byte Size +//! +//! To word wrap text, one must know the width of each word so one can +//! know when to break lines. This library measures the width of text +//! using the [displayed width][unicode-width], not the size in bytes. +//! +//! This is important for non-ASCII text. ASCII characters such as `a` +//! and `!` are simple and take up one column each. This means that +//! the displayed width is equal to the string length in bytes. +//! However, non-ASCII characters and symbols take up more than one +//! byte when UTF-8 encoded: `é` is `0xc3 0xa9` (two bytes) and `⚙` is +//! `0xe2 0x9a 0x99` (three bytes) in UTF-8, respectively. +//! +//! This is why we take care to use the displayed width instead of the +//! byte count when computing line lengths. All functions in this +//! library handle Unicode characters like this. +//! +//! [unicode-width]: https://docs.rs/unicode-width/ + +#![doc(html_root_url = "https://docs.rs/textwrap/0.11.0")] +#![deny(missing_docs)] +#![deny(missing_debug_implementations)] + +#[cfg(feature = "hyphenation")] +extern crate hyphenation; +#[cfg(feature = "term_size")] +extern crate term_size; +extern crate unicode_width; + +use std::borrow::Cow; +use std::str::CharIndices; + +use unicode_width::UnicodeWidthChar; +use unicode_width::UnicodeWidthStr; + +/// A non-breaking space. +const NBSP: char = '\u{a0}'; + +mod indentation; +pub use indentation::dedent; +pub use indentation::indent; + +mod splitting; +pub use splitting::{HyphenSplitter, NoHyphenation, WordSplitter}; + +/// A Wrapper holds settings for wrapping and filling text. Use it +/// when the convenience [`wrap_iter`], [`wrap`] and [`fill`] functions +/// are not flexible enough. +/// +/// [`wrap_iter`]: fn.wrap_iter.html +/// [`wrap`]: fn.wrap.html +/// [`fill`]: fn.fill.html +/// +/// The algorithm used by the `WrapIter` iterator (returned from the +/// `wrap_iter` method)  works by doing successive partial scans over +/// words in the input string (where each single scan yields a single +/// line) so that the overall time and memory complexity is O(*n*) where +/// *n* is the length of the input string. +#[derive(Clone, Debug)] +pub struct Wrapper<'a, S: WordSplitter> { +    /// The width in columns at which the text will be wrapped. +    pub width: usize, +    /// Indentation used for the first line of output. +    pub initial_indent: &'a str, +    /// Indentation used for subsequent lines of output. +    pub subsequent_indent: &'a str, +    /// Allow long words to be broken if they cannot fit on a line. +    /// When set to `false`, some lines may be longer than +    /// `self.width`. +    pub break_words: bool, +    /// The method for splitting words. If the `hyphenation` feature +    /// is enabled, you can use a `hyphenation::Standard` dictionary +    /// here to get language-aware hyphenation. +    pub splitter: S, +} + +impl<'a> Wrapper<'a, HyphenSplitter> { +    /// Create a new Wrapper for wrapping at the specified width. By +    /// default, we allow words longer than `width` to be broken. A +    /// [`HyphenSplitter`] will be used by default for splitting +    /// words. See the [`WordSplitter`] trait for other options. +    /// +    /// [`HyphenSplitter`]: struct.HyphenSplitter.html +    /// [`WordSplitter`]: trait.WordSplitter.html +    pub fn new(width: usize) -> Wrapper<'a, HyphenSplitter> { +        Wrapper::with_splitter(width, HyphenSplitter) +    } + +    /// Create a new Wrapper for wrapping text at the current terminal +    /// width. If the terminal width cannot be determined (typically +    /// because the standard input and output is not connected to a +    /// terminal), a width of 80 characters will be used. Other +    /// settings use the same defaults as `Wrapper::new`. +    /// +    /// Equivalent to: +    /// +    /// ```no_run +    /// # #![allow(unused_variables)] +    /// use textwrap::{Wrapper, termwidth}; +    /// +    /// let wrapper = Wrapper::new(termwidth()); +    /// ``` +    #[cfg(feature = "term_size")] +    pub fn with_termwidth() -> Wrapper<'a, HyphenSplitter> { +        Wrapper::new(termwidth()) +    } +} + +impl<'a, S: WordSplitter> Wrapper<'a, S> { +    /// Use the given [`WordSplitter`] to create a new Wrapper for +    /// wrapping at the specified width. By default, we allow words +    /// longer than `width` to be broken. +    /// +    /// [`WordSplitter`]: trait.WordSplitter.html +    pub fn with_splitter(width: usize, splitter: S) -> Wrapper<'a, S> { +        Wrapper { +            width: width, +            initial_indent: "", +            subsequent_indent: "", +            break_words: true, +            splitter: splitter, +        } +    } + +    /// Change [`self.initial_indent`]. The initial indentation is +    /// used on the very first line of output. +    /// +    /// # Examples +    /// +    /// Classic paragraph indentation can be achieved by specifying an +    /// initial indentation and wrapping each paragraph by itself: +    /// +    /// ```no_run +    /// # #![allow(unused_variables)] +    /// use textwrap::Wrapper; +    /// +    /// let wrapper = Wrapper::new(15).initial_indent("    "); +    /// ``` +    /// +    /// [`self.initial_indent`]: #structfield.initial_indent +    pub fn initial_indent(self, indent: &'a str) -> Wrapper<'a, S> { +        Wrapper { +            initial_indent: indent, +            ..self +        } +    } + +    /// Change [`self.subsequent_indent`]. The subsequent indentation +    /// is used on lines following the first line of output. +    /// +    /// # Examples +    /// +    /// Combining initial and subsequent indentation lets you format a +    /// single paragraph as a bullet list: +    /// +    /// ```no_run +    /// # #![allow(unused_variables)] +    /// use textwrap::Wrapper; +    /// +    /// let wrapper = Wrapper::new(15) +    ///     .initial_indent("* ") +    ///     .subsequent_indent("  "); +    /// ``` +    /// +    /// [`self.subsequent_indent`]: #structfield.subsequent_indent +    pub fn subsequent_indent(self, indent: &'a str) -> Wrapper<'a, S> { +        Wrapper { +            subsequent_indent: indent, +            ..self +        } +    } + +    /// Change [`self.break_words`]. This controls if words longer +    /// than `self.width` can be broken, or if they will be left +    /// sticking out into the right margin. +    /// +    /// [`self.break_words`]: #structfield.break_words +    pub fn break_words(self, setting: bool) -> Wrapper<'a, S> { +        Wrapper { +            break_words: setting, +            ..self +        } +    } + +    /// Fill a line of text at `self.width` characters. Strings are +    /// wrapped based on their displayed width, not their size in +    /// bytes. +    /// +    /// The result is a string with newlines between each line. Use +    /// the `wrap` method if you need access to the individual lines. +    /// +    /// # Complexities +    /// +    /// This method simply joins the lines produced by `wrap_iter`. As +    /// such, it inherits the O(*n*) time and memory complexity where +    /// *n* is the input string length. +    /// +    /// # Examples +    /// +    /// ``` +    /// use textwrap::Wrapper; +    /// +    /// let wrapper = Wrapper::new(15); +    /// assert_eq!(wrapper.fill("Memory safety without garbage collection."), +    ///            "Memory safety\nwithout garbage\ncollection."); +    /// ``` +    pub fn fill(&self, s: &str) -> String { +        // This will avoid reallocation in simple cases (no +        // indentation, no hyphenation). +        let mut result = String::with_capacity(s.len()); + +        for (i, line) in self.wrap_iter(s).enumerate() { +            if i > 0 { +                result.push('\n'); +            } +            result.push_str(&line); +        } + +        result +    } + +    /// Wrap a line of text at `self.width` characters. Strings are +    /// wrapped based on their displayed width, not their size in +    /// bytes. +    /// +    /// # Complexities +    /// +    /// This method simply collects the lines produced by `wrap_iter`. +    /// As such, it inherits the O(*n*) overall time and memory +    /// complexity where *n* is the input string length. +    /// +    /// # Examples +    /// +    /// ``` +    /// use textwrap::Wrapper; +    /// +    /// let wrap15 = Wrapper::new(15); +    /// assert_eq!(wrap15.wrap("Concurrency without data races."), +    ///            vec!["Concurrency", +    ///                 "without data", +    ///                 "races."]); +    /// +    /// let wrap20 = Wrapper::new(20); +    /// assert_eq!(wrap20.wrap("Concurrency without data races."), +    ///            vec!["Concurrency without", +    ///                 "data races."]); +    /// ``` +    /// +    /// Notice that newlines in the input are preserved. This means +    /// that they force a line break, regardless of how long the +    /// current line is: +    /// +    /// ``` +    /// use textwrap::Wrapper; +    /// +    /// let wrapper = Wrapper::new(40); +    /// assert_eq!(wrapper.wrap("First line.\nSecond line."), +    ///            vec!["First line.", "Second line."]); +    /// ``` +    /// +    pub fn wrap(&self, s: &'a str) -> Vec<Cow<'a, str>> { +        self.wrap_iter(s).collect::<Vec<_>>() +    } + +    /// Lazily wrap a line of text at `self.width` characters. Strings +    /// are wrapped based on their displayed width, not their size in +    /// bytes. +    /// +    /// The [`WordSplitter`] stored in [`self.splitter`] is used +    /// whenever when a word is too large to fit on the current line. +    /// By changing the field, different hyphenation strategies can be +    /// implemented. +    /// +    /// # Complexities +    /// +    /// This method returns a [`WrapIter`] iterator which borrows this +    /// `Wrapper`. The algorithm used has a linear complexity, so +    /// getting the next line from the iterator will take O(*w*) time, +    /// where *w* is the wrapping width. Fully processing the iterator +    /// will take O(*n*) time for an input string of length *n*. +    /// +    /// When no indentation is used, each line returned is a slice of +    /// the input string and the memory overhead is thus constant. +    /// Otherwise new memory is allocated for each line returned. +    /// +    /// # Examples +    /// +    /// ``` +    /// use std::borrow::Cow; +    /// use textwrap::Wrapper; +    /// +    /// let wrap20 = Wrapper::new(20); +    /// let mut wrap20_iter = wrap20.wrap_iter("Zero-cost abstractions."); +    /// assert_eq!(wrap20_iter.next(), Some(Cow::from("Zero-cost"))); +    /// assert_eq!(wrap20_iter.next(), Some(Cow::from("abstractions."))); +    /// assert_eq!(wrap20_iter.next(), None); +    /// +    /// let wrap25 = Wrapper::new(25); +    /// let mut wrap25_iter = wrap25.wrap_iter("Zero-cost abstractions."); +    /// assert_eq!(wrap25_iter.next(), Some(Cow::from("Zero-cost abstractions."))); +    /// assert_eq!(wrap25_iter.next(), None); +    /// ``` +    /// +    /// [`self.splitter`]: #structfield.splitter +    /// [`WordSplitter`]: trait.WordSplitter.html +    /// [`WrapIter`]: struct.WrapIter.html +    pub fn wrap_iter<'w>(&'w self, s: &'a str) -> WrapIter<'w, 'a, S> { +        WrapIter { +            wrapper: self, +            inner: WrapIterImpl::new(self, s), +        } +    } + +    /// Lazily wrap a line of text at `self.width` characters. Strings +    /// are wrapped based on their displayed width, not their size in +    /// bytes. +    /// +    /// The [`WordSplitter`] stored in [`self.splitter`] is used +    /// whenever when a word is too large to fit on the current line. +    /// By changing the field, different hyphenation strategies can be +    /// implemented. +    /// +    /// # Complexities +    /// +    /// This method consumes the `Wrapper` and returns a +    /// [`IntoWrapIter`] iterator. Fully processing the iterator has +    /// the same O(*n*) time complexity as [`wrap_iter`], where *n* is +    /// the length of the input string. +    /// +    /// # Examples +    /// +    /// ``` +    /// use std::borrow::Cow; +    /// use textwrap::Wrapper; +    /// +    /// let wrap20 = Wrapper::new(20); +    /// let mut wrap20_iter = wrap20.into_wrap_iter("Zero-cost abstractions."); +    /// assert_eq!(wrap20_iter.next(), Some(Cow::from("Zero-cost"))); +    /// assert_eq!(wrap20_iter.next(), Some(Cow::from("abstractions."))); +    /// assert_eq!(wrap20_iter.next(), None); +    /// ``` +    /// +    /// [`self.splitter`]: #structfield.splitter +    /// [`WordSplitter`]: trait.WordSplitter.html +    /// [`IntoWrapIter`]: struct.IntoWrapIter.html +    /// [`wrap_iter`]: #method.wrap_iter +    pub fn into_wrap_iter(self, s: &'a str) -> IntoWrapIter<'a, S> { +        let inner = WrapIterImpl::new(&self, s); + +        IntoWrapIter { +            wrapper: self, +            inner: inner, +        } +    } +} + +/// An iterator over the lines of the input string which owns a +/// `Wrapper`. An instance of `IntoWrapIter` is typically obtained +/// through either [`wrap_iter`] or [`Wrapper::into_wrap_iter`]. +/// +/// Each call of `.next()` method yields a line wrapped in `Some` if the +/// input hasn't been fully processed yet. Otherwise it returns `None`. +/// +/// [`wrap_iter`]: fn.wrap_iter.html +/// [`Wrapper::into_wrap_iter`]: struct.Wrapper.html#method.into_wrap_iter +#[derive(Debug)] +pub struct IntoWrapIter<'a, S: WordSplitter> { +    wrapper: Wrapper<'a, S>, +    inner: WrapIterImpl<'a>, +} + +impl<'a, S: WordSplitter> Iterator for IntoWrapIter<'a, S> { +    type Item = Cow<'a, str>; + +    fn next(&mut self) -> Option<Cow<'a, str>> { +        self.inner.next(&self.wrapper) +    } +} + +/// An iterator over the lines of the input string which borrows a +/// `Wrapper`. An instance of `WrapIter` is typically obtained +/// through the [`Wrapper::wrap_iter`] method. +/// +/// Each call of `.next()` method yields a line wrapped in `Some` if the +/// input hasn't been fully processed yet. Otherwise it returns `None`. +/// +/// [`Wrapper::wrap_iter`]: struct.Wrapper.html#method.wrap_iter +#[derive(Debug)] +pub struct WrapIter<'w, 'a: 'w, S: WordSplitter + 'w> { +    wrapper: &'w Wrapper<'a, S>, +    inner: WrapIterImpl<'a>, +} + +impl<'w, 'a: 'w, S: WordSplitter> Iterator for WrapIter<'w, 'a, S> { +    type Item = Cow<'a, str>; + +    fn next(&mut self) -> Option<Cow<'a, str>> { +        self.inner.next(self.wrapper) +    } +} + +/// Like `char::is_whitespace`, but non-breaking spaces don't count. +#[inline] +fn is_whitespace(ch: char) -> bool { +    ch.is_whitespace() && ch != NBSP +} + +/// Common implementation details for `WrapIter` and `IntoWrapIter`. +#[derive(Debug)] +struct WrapIterImpl<'a> { +    // String to wrap. +    source: &'a str, +    // CharIndices iterator over self.source. +    char_indices: CharIndices<'a>, +    // Byte index where the current line starts. +    start: usize, +    // Byte index of the last place where the string can be split. +    split: usize, +    // Size in bytes of the character at self.source[self.split]. +    split_len: usize, +    // Width of self.source[self.start..idx]. +    line_width: usize, +    // Width of self.source[self.start..self.split]. +    line_width_at_split: usize, +    // Tracking runs of whitespace characters. +    in_whitespace: bool, +    // Has iterator finished producing elements? +    finished: bool, +} + +impl<'a> WrapIterImpl<'a> { +    fn new<S: WordSplitter>(wrapper: &Wrapper<'a, S>, s: &'a str) -> WrapIterImpl<'a> { +        WrapIterImpl { +            source: s, +            char_indices: s.char_indices(), +            start: 0, +            split: 0, +            split_len: 0, +            line_width: wrapper.initial_indent.width(), +            line_width_at_split: wrapper.initial_indent.width(), +            in_whitespace: false, +            finished: false, +        } +    } + +    fn create_result_line<S: WordSplitter>(&self, wrapper: &Wrapper<'a, S>) -> Cow<'a, str> { +        if self.start == 0 { +            Cow::from(wrapper.initial_indent) +        } else { +            Cow::from(wrapper.subsequent_indent) +        } +    } + +    fn next<S: WordSplitter>(&mut self, wrapper: &Wrapper<'a, S>) -> Option<Cow<'a, str>> { +        if self.finished { +            return None; +        } + +        while let Some((idx, ch)) = self.char_indices.next() { +            let char_width = ch.width().unwrap_or(0); +            let char_len = ch.len_utf8(); + +            if ch == '\n' { +                self.split = idx; +                self.split_len = char_len; +                self.line_width_at_split = self.line_width; +                self.in_whitespace = false; + +                // If this is not the final line, return the current line. Otherwise, +                // we will return the line with its line break after exiting the loop +                if self.split + self.split_len < self.source.len() { +                    let mut line = self.create_result_line(wrapper); +                    line += &self.source[self.start..self.split]; + +                    self.start = self.split + self.split_len; +                    self.line_width = wrapper.subsequent_indent.width(); + +                    return Some(line); +                } +            } else if is_whitespace(ch) { +                // Extend the previous split or create a new one. +                if self.in_whitespace { +                    self.split_len += char_len; +                } else { +                    self.split = idx; +                    self.split_len = char_len; +                } +                self.line_width_at_split = self.line_width + char_width; +                self.in_whitespace = true; +            } else if self.line_width + char_width > wrapper.width { +                // There is no room for this character on the current +                // line. Try to split the final word. +                self.in_whitespace = false; +                let remaining_text = &self.source[self.split + self.split_len..]; +                let final_word = match remaining_text.find(is_whitespace) { +                    Some(i) => &remaining_text[..i], +                    None => remaining_text, +                }; + +                let mut hyphen = ""; +                let splits = wrapper.splitter.split(final_word); +                for &(head, hyp, _) in splits.iter().rev() { +                    if self.line_width_at_split + head.width() + hyp.width() <= wrapper.width { +                        // We can fit head into the current line. +                        // Advance the split point by the width of the +                        // whitespace and the head length. +                        self.split += self.split_len + head.len(); +                        self.split_len = 0; +                        hyphen = hyp; +                        break; +                    } +                } + +                if self.start >= self.split { +                    // The word is too big to fit on a single line, so we +                    // need to split it at the current index. +                    if wrapper.break_words { +                        // Break work at current index. +                        self.split = idx; +                        self.split_len = 0; +                        self.line_width_at_split = self.line_width; +                    } else { +                        // Add smallest split. +                        self.split = self.start + splits[0].0.len(); +                        self.split_len = 0; +                        self.line_width_at_split = self.line_width; +                    } +                } + +                if self.start < self.split { +                    let mut line = self.create_result_line(wrapper); +                    line += &self.source[self.start..self.split]; +                    line += hyphen; + +                    self.start = self.split + self.split_len; +                    self.line_width += wrapper.subsequent_indent.width(); +                    self.line_width -= self.line_width_at_split; +                    self.line_width += char_width; + +                    return Some(line); +                } +            } else { +                self.in_whitespace = false; +            } +            self.line_width += char_width; +        } + +        self.finished = true; + +        // Add final line. +        if self.start < self.source.len() { +            let mut line = self.create_result_line(wrapper); +            line += &self.source[self.start..]; +            return Some(line); +        } + +        None +    } +} + +/// Return the current terminal width. If the terminal width cannot be +/// determined (typically because the standard output is not connected +/// to a terminal), a default width of 80 characters will be used. +/// +/// # Examples +/// +/// Create a `Wrapper` for the current terminal with a two column +/// margin: +/// +/// ```no_run +/// # #![allow(unused_variables)] +/// use textwrap::{Wrapper, NoHyphenation, termwidth}; +/// +/// let width = termwidth() - 4; // Two columns on each side. +/// let wrapper = Wrapper::with_splitter(width, NoHyphenation) +///     .initial_indent("  ") +///     .subsequent_indent("  "); +/// ``` +#[cfg(feature = "term_size")] +pub fn termwidth() -> usize { +    term_size::dimensions_stdout().map_or(80, |(w, _)| w) +} + +/// Fill a line of text at `width` characters. Strings are wrapped +/// based on their displayed width, not their size in bytes. +/// +/// The result is a string with newlines between each line. Use +/// [`wrap`] if you need access to the individual lines or +/// [`wrap_iter`] for its iterator counterpart. +/// +/// ``` +/// use textwrap::fill; +/// +/// assert_eq!(fill("Memory safety without garbage collection.", 15), +///            "Memory safety\nwithout garbage\ncollection."); +/// ``` +/// +/// This function creates a Wrapper on the fly with default settings. +/// If you need to set a language corpus for automatic hyphenation, or +/// need to fill many strings, then it is suggested to create a Wrapper +/// and call its [`fill` method]. +/// +/// [`wrap`]: fn.wrap.html +/// [`wrap_iter`]: fn.wrap_iter.html +/// [`fill` method]: struct.Wrapper.html#method.fill +pub fn fill(s: &str, width: usize) -> String { +    Wrapper::new(width).fill(s) +} + +/// Wrap a line of text at `width` characters. Strings are wrapped +/// based on their displayed width, not their size in bytes. +/// +/// This function creates a Wrapper on the fly with default settings. +/// If you need to set a language corpus for automatic hyphenation, or +/// need to wrap many strings, then it is suggested to create a Wrapper +/// and call its [`wrap` method]. +/// +/// The result is a vector of strings. Use [`wrap_iter`] if you need an +/// iterator version. +/// +/// # Examples +/// +/// ``` +/// use textwrap::wrap; +/// +/// assert_eq!(wrap("Concurrency without data races.", 15), +///            vec!["Concurrency", +///                 "without data", +///                 "races."]); +/// +/// assert_eq!(wrap("Concurrency without data races.", 20), +///            vec!["Concurrency without", +///                 "data races."]); +/// ``` +/// +/// [`wrap_iter`]: fn.wrap_iter.html +/// [`wrap` method]: struct.Wrapper.html#method.wrap +pub fn wrap(s: &str, width: usize) -> Vec<Cow<str>> { +    Wrapper::new(width).wrap(s) +} + +/// Lazily wrap a line of text at `width` characters. Strings are +/// wrapped based on their displayed width, not their size in bytes. +/// +/// This function creates a Wrapper on the fly with default settings. +/// It then calls the [`into_wrap_iter`] method. Hence, the return +/// value is an [`IntoWrapIter`], not a [`WrapIter`] as the function +/// name would otherwise suggest. +/// +/// If you need to set a language corpus for automatic hyphenation, or +/// need to wrap many strings, then it is suggested to create a Wrapper +/// and call its [`wrap_iter`] or [`into_wrap_iter`] methods. +/// +/// # Examples +/// +/// ``` +/// use std::borrow::Cow; +/// use textwrap::wrap_iter; +/// +/// let mut wrap20_iter = wrap_iter("Zero-cost abstractions.", 20); +/// assert_eq!(wrap20_iter.next(), Some(Cow::from("Zero-cost"))); +/// assert_eq!(wrap20_iter.next(), Some(Cow::from("abstractions."))); +/// assert_eq!(wrap20_iter.next(), None); +/// +/// let mut wrap25_iter = wrap_iter("Zero-cost abstractions.", 25); +/// assert_eq!(wrap25_iter.next(), Some(Cow::from("Zero-cost abstractions."))); +/// assert_eq!(wrap25_iter.next(), None); +/// ``` +/// +/// [`wrap_iter`]: struct.Wrapper.html#method.wrap_iter +/// [`into_wrap_iter`]: struct.Wrapper.html#method.into_wrap_iter +/// [`IntoWrapIter`]: struct.IntoWrapIter.html +/// [`WrapIter`]: struct.WrapIter.html +pub fn wrap_iter(s: &str, width: usize) -> IntoWrapIter<HyphenSplitter> { +    Wrapper::new(width).into_wrap_iter(s) +} + +#[cfg(test)] +mod tests { +    #[cfg(feature = "hyphenation")] +    extern crate hyphenation; + +    use super::*; +    #[cfg(feature = "hyphenation")] +    use hyphenation::{Language, Load, Standard}; + +    #[test] +    fn no_wrap() { +        assert_eq!(wrap("foo", 10), vec!["foo"]); +    } + +    #[test] +    fn simple() { +        assert_eq!(wrap("foo bar baz", 5), vec!["foo", "bar", "baz"]); +    } + +    #[test] +    fn multi_word_on_line() { +        assert_eq!(wrap("foo bar baz", 10), vec!["foo bar", "baz"]); +    } + +    #[test] +    fn long_word() { +        assert_eq!(wrap("foo", 0), vec!["f", "o", "o"]); +    } + +    #[test] +    fn long_words() { +        assert_eq!(wrap("foo bar", 0), vec!["f", "o", "o", "b", "a", "r"]); +    } + +    #[test] +    fn max_width() { +        assert_eq!(wrap("foo bar", usize::max_value()), vec!["foo bar"]); +    } + +    #[test] +    fn leading_whitespace() { +        assert_eq!(wrap("  foo bar", 6), vec!["  foo", "bar"]); +    } + +    #[test] +    fn trailing_whitespace() { +        assert_eq!(wrap("foo bar  ", 6), vec!["foo", "bar  "]); +    } + +    #[test] +    fn interior_whitespace() { +        assert_eq!(wrap("foo:   bar baz", 10), vec!["foo:   bar", "baz"]); +    } + +    #[test] +    fn extra_whitespace_start_of_line() { +        // Whitespace is only significant inside a line. After a line +        // gets too long and is broken, the first word starts in +        // column zero and is not indented. The line before might end +        // up with trailing whitespace. +        assert_eq!(wrap("foo               bar", 5), vec!["foo", "bar"]); +    } + +    #[test] +    fn issue_99() { +        // We did not reset the in_whitespace flag correctly and did +        // not handle single-character words after a line break. +        assert_eq!( +            wrap("aaabbbccc x yyyzzzwww", 9), +            vec!["aaabbbccc", "x", "yyyzzzwww"] +        ); +    } + +    #[test] +    fn issue_129() { +        // The dash is an em-dash which takes up four bytes. We used +        // to panic since we tried to index into the character. +        assert_eq!(wrap("x – x", 1), vec!["x", "–", "x"]); +    } + +    #[test] +    fn wide_character_handling() { +        assert_eq!(wrap("Hello, World!", 15), vec!["Hello, World!"]); +        assert_eq!( +            wrap("Hello, World!", 15), +            vec!["Hello,", "World!"] +        ); +    } + +    #[test] +    fn empty_input_not_indented() { +        let wrapper = Wrapper::new(10).initial_indent("!!!"); +        assert_eq!(wrapper.fill(""), ""); +    } + +    #[test] +    fn indent_single_line() { +        let wrapper = Wrapper::new(10).initial_indent(">>>"); // No trailing space +        assert_eq!(wrapper.fill("foo"), ">>>foo"); +    } + +    #[test] +    fn indent_multiple_lines() { +        let wrapper = Wrapper::new(6).initial_indent("* ").subsequent_indent("  "); +        assert_eq!(wrapper.wrap("foo bar baz"), vec!["* foo", "  bar", "  baz"]); +    } + +    #[test] +    fn indent_break_words() { +        let wrapper = Wrapper::new(5).initial_indent("* ").subsequent_indent("  "); +        assert_eq!(wrapper.wrap("foobarbaz"), vec!["* foo", "  bar", "  baz"]); +    } + +    #[test] +    fn hyphens() { +        assert_eq!(wrap("foo-bar", 5), vec!["foo-", "bar"]); +    } + +    #[test] +    fn trailing_hyphen() { +        let wrapper = Wrapper::new(5).break_words(false); +        assert_eq!(wrapper.wrap("foobar-"), vec!["foobar-"]); +    } + +    #[test] +    fn multiple_hyphens() { +        assert_eq!(wrap("foo-bar-baz", 5), vec!["foo-", "bar-", "baz"]); +    } + +    #[test] +    fn hyphens_flag() { +        let wrapper = Wrapper::new(5).break_words(false); +        assert_eq!( +            wrapper.wrap("The --foo-bar flag."), +            vec!["The", "--foo-", "bar", "flag."] +        ); +    } + +    #[test] +    fn repeated_hyphens() { +        let wrapper = Wrapper::new(4).break_words(false); +        assert_eq!(wrapper.wrap("foo--bar"), vec!["foo--bar"]); +    } + +    #[test] +    fn hyphens_alphanumeric() { +        assert_eq!(wrap("Na2-CH4", 5), vec!["Na2-", "CH4"]); +    } + +    #[test] +    fn hyphens_non_alphanumeric() { +        let wrapper = Wrapper::new(5).break_words(false); +        assert_eq!(wrapper.wrap("foo(-)bar"), vec!["foo(-)bar"]); +    } + +    #[test] +    fn multiple_splits() { +        assert_eq!(wrap("foo-bar-baz", 9), vec!["foo-bar-", "baz"]); +    } + +    #[test] +    fn forced_split() { +        let wrapper = Wrapper::new(5).break_words(false); +        assert_eq!(wrapper.wrap("foobar-baz"), vec!["foobar-", "baz"]); +    } + +    #[test] +    fn no_hyphenation() { +        let wrapper = Wrapper::with_splitter(8, NoHyphenation); +        assert_eq!(wrapper.wrap("foo bar-baz"), vec!["foo", "bar-baz"]); +    } + +    #[test] +    #[cfg(feature = "hyphenation")] +    fn auto_hyphenation() { +        let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap(); +        let wrapper = Wrapper::new(10); +        assert_eq!( +            wrapper.wrap("Internationalization"), +            vec!["Internatio", "nalization"] +        ); + +        let wrapper = Wrapper::with_splitter(10, dictionary); +        assert_eq!( +            wrapper.wrap("Internationalization"), +            vec!["Interna-", "tionaliza-", "tion"] +        ); +    } + +    #[test] +    #[cfg(feature = "hyphenation")] +    fn split_len_hyphenation() { +        // Test that hyphenation takes the width of the wihtespace +        // into account. +        let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap(); +        let wrapper = Wrapper::with_splitter(15, dictionary); +        assert_eq!( +            wrapper.wrap("garbage   collection"), +            vec!["garbage   col-", "lection"] +        ); +    } + +    #[test] +    #[cfg(feature = "hyphenation")] +    fn borrowed_lines() { +        // Lines that end with an extra hyphen are owned, the final +        // line is borrowed. +        use std::borrow::Cow::{Borrowed, Owned}; +        let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap(); +        let wrapper = Wrapper::with_splitter(10, dictionary); +        let lines = wrapper.wrap("Internationalization"); +        if let Borrowed(s) = lines[0] { +            assert!(false, "should not have been borrowed: {:?}", s); +        } +        if let Borrowed(s) = lines[1] { +            assert!(false, "should not have been borrowed: {:?}", s); +        } +        if let Owned(ref s) = lines[2] { +            assert!(false, "should not have been owned: {:?}", s); +        } +    } + +    #[test] +    #[cfg(feature = "hyphenation")] +    fn auto_hyphenation_with_hyphen() { +        let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap(); +        let wrapper = Wrapper::new(8).break_words(false); +        assert_eq!(wrapper.wrap("over-caffinated"), vec!["over-", "caffinated"]); + +        let wrapper = Wrapper::with_splitter(8, dictionary).break_words(false); +        assert_eq!( +            wrapper.wrap("over-caffinated"), +            vec!["over-", "caffi-", "nated"] +        ); +    } + +    #[test] +    fn break_words() { +        assert_eq!(wrap("foobarbaz", 3), vec!["foo", "bar", "baz"]); +    } + +    #[test] +    fn break_words_wide_characters() { +        assert_eq!(wrap("Hello", 5), vec!["He", "ll", "o"]); +    } + +    #[test] +    fn break_words_zero_width() { +        assert_eq!(wrap("foobar", 0), vec!["f", "o", "o", "b", "a", "r"]); +    } + +    #[test] +    fn break_words_line_breaks() { +        assert_eq!(fill("ab\ncdefghijkl", 5), "ab\ncdefg\nhijkl"); +        assert_eq!(fill("abcdefgh\nijkl", 5), "abcde\nfgh\nijkl"); +    } + +    #[test] +    fn preserve_line_breaks() { +        assert_eq!(fill("test\n", 11), "test\n"); +        assert_eq!(fill("test\n\na\n\n", 11), "test\n\na\n\n"); +        assert_eq!(fill("1 3 5 7\n1 3 5 7", 7), "1 3 5 7\n1 3 5 7"); +    } + +    #[test] +    fn wrap_preserve_line_breaks() { +        assert_eq!(fill("1 3 5 7\n1 3 5 7", 5), "1 3 5\n7\n1 3 5\n7"); +    } + +    #[test] +    fn non_breaking_space() { +        let wrapper = Wrapper::new(5).break_words(false); +        assert_eq!(wrapper.fill("foo bar baz"), "foo bar baz"); +    } + +    #[test] +    fn non_breaking_hyphen() { +        let wrapper = Wrapper::new(5).break_words(false); +        assert_eq!(wrapper.fill("foo‑bar‑baz"), "foo‑bar‑baz"); +    } + +    #[test] +    fn fill_simple() { +        assert_eq!(fill("foo bar baz", 10), "foo bar\nbaz"); +    } +} diff --git a/textwrap/src/splitting.rs b/textwrap/src/splitting.rs new file mode 100644 index 0000000..f6b65af --- /dev/null +++ b/textwrap/src/splitting.rs @@ -0,0 +1,139 @@ +//! Word splitting functionality. +//! +//! To wrap text into lines, long words sometimes need to be split +//! across lines. The [`WordSplitter`] trait defines this +//! functionality. [`HyphenSplitter`] is the default implementation of +//! this treat: it will simply split words on existing hyphens. + +#[cfg(feature = "hyphenation")] +use hyphenation::{Hyphenator, Standard}; + +/// An interface for splitting words. +/// +/// When the [`wrap_iter`] method will try to fit text into a line, it +/// will eventually find a word that it too large the current text +/// width. It will then call the currently configured `WordSplitter` to +/// have it attempt to split the word into smaller parts. This trait +/// describes that functionality via the [`split`] method. +/// +/// If the `textwrap` crate has been compiled with the `hyphenation` +/// feature enabled, you will find an implementation of `WordSplitter` +/// by the `hyphenation::language::Corpus` struct. Use this struct for +/// language-aware hyphenation. See the [`hyphenation` documentation] +/// for details. +/// +/// [`wrap_iter`]: ../struct.Wrapper.html#method.wrap_iter +/// [`split`]: #tymethod.split +/// [`hyphenation` documentation]: https://docs.rs/hyphenation/ +pub trait WordSplitter { +    /// Return all possible splits of word. Each split is a triple +    /// with a head, a hyphen, and a tail where `head + &hyphen + +    /// &tail == word`. The hyphen can be empty if there is already a +    /// hyphen in the head. +    /// +    /// The splits should go from smallest to longest and should +    /// include no split at all. So the word "technology" could be +    /// split into +    /// +    /// ```no_run +    /// vec![("tech", "-", "nology"), +    ///      ("technol", "-", "ogy"), +    ///      ("technolo", "-", "gy"), +    ///      ("technology", "", "")]; +    /// ``` +    fn split<'w>(&self, word: &'w str) -> Vec<(&'w str, &'w str, &'w str)>; +} + +/// Use this as a [`Wrapper.splitter`] to avoid any kind of +/// hyphenation: +/// +/// ``` +/// use textwrap::{Wrapper, NoHyphenation}; +/// +/// let wrapper = Wrapper::with_splitter(8, NoHyphenation); +/// assert_eq!(wrapper.wrap("foo bar-baz"), vec!["foo", "bar-baz"]); +/// ``` +/// +/// [`Wrapper.splitter`]: ../struct.Wrapper.html#structfield.splitter +#[derive(Clone, Debug)] +pub struct NoHyphenation; + +/// `NoHyphenation` implements `WordSplitter` by not splitting the +/// word at all. +impl WordSplitter for NoHyphenation { +    fn split<'w>(&self, word: &'w str) -> Vec<(&'w str, &'w str, &'w str)> { +        vec![(word, "", "")] +    } +} + +/// Simple and default way to split words: splitting on existing +/// hyphens only. +/// +/// You probably don't need to use this type since it's already used +/// by default by `Wrapper::new`. +#[derive(Clone, Debug)] +pub struct HyphenSplitter; + +/// `HyphenSplitter` is the default `WordSplitter` used by +/// `Wrapper::new`. It will split words on any existing hyphens in the +/// word. +/// +/// It will only use hyphens that are surrounded by alphanumeric +/// characters, which prevents a word like "--foo-bar" from being +/// split on the first or second hyphen. +impl WordSplitter for HyphenSplitter { +    fn split<'w>(&self, word: &'w str) -> Vec<(&'w str, &'w str, &'w str)> { +        let mut triples = Vec::new(); +        // Split on hyphens, smallest split first. We only use hyphens +        // that are surrounded by alphanumeric characters. This is to +        // avoid splitting on repeated hyphens, such as those found in +        // --foo-bar. +        let mut char_indices = word.char_indices(); +        // Early return if the word is empty. +        let mut prev = match char_indices.next() { +            None => return vec![(word, "", "")], +            Some((_, ch)) => ch, +        }; + +        // Find current word, or return early if the word only has a +        // single character. +        let (mut idx, mut cur) = match char_indices.next() { +            None => return vec![(word, "", "")], +            Some((idx, cur)) => (idx, cur), +        }; + +        for (i, next) in char_indices { +            if prev.is_alphanumeric() && cur == '-' && next.is_alphanumeric() { +                let (head, tail) = word.split_at(idx + 1); +                triples.push((head, "", tail)); +            } +            prev = cur; +            idx = i; +            cur = next; +        } + +        // Finally option is no split at all. +        triples.push((word, "", "")); + +        triples +    } +} + +/// A hyphenation dictionary can be used to do language-specific +/// hyphenation using patterns from the hyphenation crate. +#[cfg(feature = "hyphenation")] +impl WordSplitter for Standard { +    fn split<'w>(&self, word: &'w str) -> Vec<(&'w str, &'w str, &'w str)> { +        // Find splits based on language dictionary. +        let mut triples = Vec::new(); +        for n in self.hyphenate(word).breaks { +            let (head, tail) = word.split_at(n); +            let hyphen = if head.ends_with('-') { "" } else { "-" }; +            triples.push((head, hyphen, tail)); +        } +        // Finally option is no split at all. +        triples.push((word, "", "")); + +        triples +    } +} | 
