aboutsummaryrefslogtreecommitdiff
path: root/textwrap/src
diff options
context:
space:
mode:
Diffstat (limited to 'textwrap/src')
-rw-r--r--textwrap/src/indentation.rs294
-rw-r--r--textwrap/src/lib.rs987
-rw-r--r--textwrap/src/splitting.rs139
3 files changed, 0 insertions, 1420 deletions
diff --git a/textwrap/src/indentation.rs b/textwrap/src/indentation.rs
deleted file mode 100644
index 276ba10..0000000
--- a/textwrap/src/indentation.rs
+++ /dev/null
@@ -1,294 +0,0 @@
-//! Functions related to adding and removing indentation from lines of
-//! text.
-//!
-//! The functions here can be used to uniformly indent or dedent
-//! (unindent) word wrapped lines of text.
-
-/// Add prefix to each non-empty line.
-///
-/// ```
-/// use textwrap::indent;
-///
-/// assert_eq!(indent("
-/// Foo
-/// Bar
-/// ", " "), "
-/// Foo
-/// Bar
-/// ");
-/// ```
-///
-/// Empty lines (lines consisting only of whitespace) are not indented
-/// and the whitespace is replaced by a single newline (`\n`):
-///
-/// ```
-/// use textwrap::indent;
-///
-/// assert_eq!(indent("
-/// Foo
-///
-/// Bar
-/// \t
-/// Baz
-/// ", "->"), "
-/// ->Foo
-///
-/// ->Bar
-///
-/// ->Baz
-/// ");
-/// ```
-///
-/// Leading and trailing whitespace on non-empty lines is kept
-/// unchanged:
-///
-/// ```
-/// use textwrap::indent;
-///
-/// assert_eq!(indent(" \t Foo ", "->"), "-> \t Foo \n");
-/// ```
-pub fn indent(s: &str, prefix: &str) -> String {
- let mut result = String::new();
- for line in s.lines() {
- if line.chars().any(|c| !c.is_whitespace()) {
- result.push_str(prefix);
- result.push_str(line);
- }
- result.push('\n');
- }
- result
-}
-
-/// Removes common leading whitespace from each line.
-///
-/// This function will look at each non-empty line and determine the
-/// maximum amount of whitespace that can be removed from all lines:
-///
-/// ```
-/// use textwrap::dedent;
-///
-/// assert_eq!(dedent("
-/// 1st line
-/// 2nd line
-/// 3rd line
-/// "), "
-/// 1st line
-/// 2nd line
-/// 3rd line
-/// ");
-/// ```
-pub fn dedent(s: &str) -> String {
- let mut prefix = "";
- let mut lines = s.lines();
-
- // We first search for a non-empty line to find a prefix.
- for line in &mut lines {
- let mut whitespace_idx = line.len();
- for (idx, ch) in line.char_indices() {
- if !ch.is_whitespace() {
- whitespace_idx = idx;
- break;
- }
- }
-
- // Check if the line had anything but whitespace
- if whitespace_idx < line.len() {
- prefix = &line[..whitespace_idx];
- break;
- }
- }
-
- // We then continue looking through the remaining lines to
- // possibly shorten the prefix.
- for line in &mut lines {
- let mut whitespace_idx = line.len();
- for ((idx, a), b) in line.char_indices().zip(prefix.chars()) {
- if a != b {
- whitespace_idx = idx;
- break;
- }
- }
-
- // Check if the line had anything but whitespace and if we
- // have found a shorter prefix
- if whitespace_idx < line.len() && whitespace_idx < prefix.len() {
- prefix = &line[..whitespace_idx];
- }
- }
-
- // We now go over the lines a second time to build the result.
- let mut result = String::new();
- for line in s.lines() {
- if line.starts_with(&prefix) && line.chars().any(|c| !c.is_whitespace()) {
- let (_, tail) = line.split_at(prefix.len());
- result.push_str(tail);
- }
- result.push('\n');
- }
-
- if result.ends_with('\n') && !s.ends_with('\n') {
- let new_len = result.len() - 1;
- result.truncate(new_len);
- }
-
- result
-}
-
-#[cfg(test)]
-mod tests {
- use super::*;
-
- /// Add newlines. Ensures that the final line in the vector also
- /// has a newline.
- fn add_nl(lines: &[&str]) -> String {
- lines.join("\n") + "\n"
- }
-
- #[test]
- fn indent_empty() {
- assert_eq!(indent("\n", " "), "\n");
- }
-
- #[test]
- #[cfg_attr(rustfmt, rustfmt_skip)]
- fn indent_nonempty() {
- let x = vec![" foo",
- "bar",
- " baz"];
- let y = vec!["// foo",
- "//bar",
- "// baz"];
- assert_eq!(indent(&add_nl(&x), "//"), add_nl(&y));
- }
-
- #[test]
- #[cfg_attr(rustfmt, rustfmt_skip)]
- fn indent_empty_line() {
- let x = vec![" foo",
- "bar",
- "",
- " baz"];
- let y = vec!["// foo",
- "//bar",
- "",
- "// baz"];
- assert_eq!(indent(&add_nl(&x), "//"), add_nl(&y));
- }
-
- #[test]
- fn dedent_empty() {
- assert_eq!(dedent(""), "");
- }
-
- #[test]
- #[cfg_attr(rustfmt, rustfmt_skip)]
- fn dedent_multi_line() {
- let x = vec![" foo",
- " bar",
- " baz"];
- let y = vec![" foo",
- "bar",
- " baz"];
- assert_eq!(dedent(&add_nl(&x)), add_nl(&y));
- }
-
- #[test]
- #[cfg_attr(rustfmt, rustfmt_skip)]
- fn dedent_empty_line() {
- let x = vec![" foo",
- " bar",
- " ",
- " baz"];
- let y = vec![" foo",
- "bar",
- "",
- " baz"];
- assert_eq!(dedent(&add_nl(&x)), add_nl(&y));
- }
-
- #[test]
- #[cfg_attr(rustfmt, rustfmt_skip)]
- fn dedent_blank_line() {
- let x = vec![" foo",
- "",
- " bar",
- " foo",
- " bar",
- " baz"];
- let y = vec!["foo",
- "",
- " bar",
- " foo",
- " bar",
- " baz"];
- assert_eq!(dedent(&add_nl(&x)), add_nl(&y));
- }
-
- #[test]
- #[cfg_attr(rustfmt, rustfmt_skip)]
- fn dedent_whitespace_line() {
- let x = vec![" foo",
- " ",
- " bar",
- " foo",
- " bar",
- " baz"];
- let y = vec!["foo",
- "",
- " bar",
- " foo",
- " bar",
- " baz"];
- assert_eq!(dedent(&add_nl(&x)), add_nl(&y));
- }
-
- #[test]
- #[cfg_attr(rustfmt, rustfmt_skip)]
- fn dedent_mixed_whitespace() {
- let x = vec!["\tfoo",
- " bar"];
- let y = vec!["\tfoo",
- " bar"];
- assert_eq!(dedent(&add_nl(&x)), add_nl(&y));
- }
-
- #[test]
- #[cfg_attr(rustfmt, rustfmt_skip)]
- fn dedent_tabbed_whitespace() {
- let x = vec!["\t\tfoo",
- "\t\t\tbar"];
- let y = vec!["foo",
- "\tbar"];
- assert_eq!(dedent(&add_nl(&x)), add_nl(&y));
- }
-
- #[test]
- #[cfg_attr(rustfmt, rustfmt_skip)]
- fn dedent_mixed_tabbed_whitespace() {
- let x = vec!["\t \tfoo",
- "\t \t\tbar"];
- let y = vec!["foo",
- "\tbar"];
- assert_eq!(dedent(&add_nl(&x)), add_nl(&y));
- }
-
- #[test]
- #[cfg_attr(rustfmt, rustfmt_skip)]
- fn dedent_mixed_tabbed_whitespace2() {
- let x = vec!["\t \tfoo",
- "\t \tbar"];
- let y = vec!["\tfoo",
- " \tbar"];
- assert_eq!(dedent(&add_nl(&x)), add_nl(&y));
- }
-
- #[test]
- #[cfg_attr(rustfmt, rustfmt_skip)]
- fn dedent_preserve_no_terminating_newline() {
- let x = vec![" foo",
- " bar"].join("\n");
- let y = vec!["foo",
- " bar"].join("\n");
- assert_eq!(dedent(&x), y);
- }
-}
diff --git a/textwrap/src/lib.rs b/textwrap/src/lib.rs
deleted file mode 100644
index 2f82325..0000000
--- a/textwrap/src/lib.rs
+++ /dev/null
@@ -1,987 +0,0 @@
-//! `textwrap` provides functions for word wrapping and filling text.
-//!
-//! Wrapping text can be very useful in commandline programs where you
-//! want to format dynamic output nicely so it looks good in a
-//! terminal. A quick example:
-//!
-//! ```no_run
-//! extern crate textwrap;
-//! use textwrap::fill;
-//!
-//! fn main() {
-//! let text = "textwrap: a small library for wrapping text.";
-//! println!("{}", fill(text, 18));
-//! }
-//! ```
-//!
-//! This will display the following output:
-//!
-//! ```text
-//! textwrap: a small
-//! library for
-//! wrapping text.
-//! ```
-//!
-//! # Displayed Width vs Byte Size
-//!
-//! To word wrap text, one must know the width of each word so one can
-//! know when to break lines. This library measures the width of text
-//! using the [displayed width][unicode-width], not the size in bytes.
-//!
-//! This is important for non-ASCII text. ASCII characters such as `a`
-//! and `!` are simple and take up one column each. This means that
-//! the displayed width is equal to the string length in bytes.
-//! However, non-ASCII characters and symbols take up more than one
-//! byte when UTF-8 encoded: `é` is `0xc3 0xa9` (two bytes) and `⚙` is
-//! `0xe2 0x9a 0x99` (three bytes) in UTF-8, respectively.
-//!
-//! This is why we take care to use the displayed width instead of the
-//! byte count when computing line lengths. All functions in this
-//! library handle Unicode characters like this.
-//!
-//! [unicode-width]: https://docs.rs/unicode-width/
-
-#![doc(html_root_url = "https://docs.rs/textwrap/0.11.0")]
-#![deny(missing_docs)]
-#![deny(missing_debug_implementations)]
-
-#[cfg(feature = "hyphenation")]
-extern crate hyphenation;
-#[cfg(feature = "term_size")]
-extern crate term_size;
-extern crate unicode_width;
-
-use std::borrow::Cow;
-use std::str::CharIndices;
-
-use unicode_width::UnicodeWidthChar;
-use unicode_width::UnicodeWidthStr;
-
-/// A non-breaking space.
-const NBSP: char = '\u{a0}';
-
-mod indentation;
-pub use indentation::dedent;
-pub use indentation::indent;
-
-mod splitting;
-pub use splitting::{HyphenSplitter, NoHyphenation, WordSplitter};
-
-/// A Wrapper holds settings for wrapping and filling text. Use it
-/// when the convenience [`wrap_iter`], [`wrap`] and [`fill`] functions
-/// are not flexible enough.
-///
-/// [`wrap_iter`]: fn.wrap_iter.html
-/// [`wrap`]: fn.wrap.html
-/// [`fill`]: fn.fill.html
-///
-/// The algorithm used by the `WrapIter` iterator (returned from the
-/// `wrap_iter` method) works by doing successive partial scans over
-/// words in the input string (where each single scan yields a single
-/// line) so that the overall time and memory complexity is O(*n*) where
-/// *n* is the length of the input string.
-#[derive(Clone, Debug)]
-pub struct Wrapper<'a, S: WordSplitter> {
- /// The width in columns at which the text will be wrapped.
- pub width: usize,
- /// Indentation used for the first line of output.
- pub initial_indent: &'a str,
- /// Indentation used for subsequent lines of output.
- pub subsequent_indent: &'a str,
- /// Allow long words to be broken if they cannot fit on a line.
- /// When set to `false`, some lines may be longer than
- /// `self.width`.
- pub break_words: bool,
- /// The method for splitting words. If the `hyphenation` feature
- /// is enabled, you can use a `hyphenation::Standard` dictionary
- /// here to get language-aware hyphenation.
- pub splitter: S,
-}
-
-impl<'a> Wrapper<'a, HyphenSplitter> {
- /// Create a new Wrapper for wrapping at the specified width. By
- /// default, we allow words longer than `width` to be broken. A
- /// [`HyphenSplitter`] will be used by default for splitting
- /// words. See the [`WordSplitter`] trait for other options.
- ///
- /// [`HyphenSplitter`]: struct.HyphenSplitter.html
- /// [`WordSplitter`]: trait.WordSplitter.html
- pub fn new(width: usize) -> Wrapper<'a, HyphenSplitter> {
- Wrapper::with_splitter(width, HyphenSplitter)
- }
-
- /// Create a new Wrapper for wrapping text at the current terminal
- /// width. If the terminal width cannot be determined (typically
- /// because the standard input and output is not connected to a
- /// terminal), a width of 80 characters will be used. Other
- /// settings use the same defaults as `Wrapper::new`.
- ///
- /// Equivalent to:
- ///
- /// ```no_run
- /// # #![allow(unused_variables)]
- /// use textwrap::{Wrapper, termwidth};
- ///
- /// let wrapper = Wrapper::new(termwidth());
- /// ```
- #[cfg(feature = "term_size")]
- pub fn with_termwidth() -> Wrapper<'a, HyphenSplitter> {
- Wrapper::new(termwidth())
- }
-}
-
-impl<'a, S: WordSplitter> Wrapper<'a, S> {
- /// Use the given [`WordSplitter`] to create a new Wrapper for
- /// wrapping at the specified width. By default, we allow words
- /// longer than `width` to be broken.
- ///
- /// [`WordSplitter`]: trait.WordSplitter.html
- pub fn with_splitter(width: usize, splitter: S) -> Wrapper<'a, S> {
- Wrapper {
- width: width,
- initial_indent: "",
- subsequent_indent: "",
- break_words: true,
- splitter: splitter,
- }
- }
-
- /// Change [`self.initial_indent`]. The initial indentation is
- /// used on the very first line of output.
- ///
- /// # Examples
- ///
- /// Classic paragraph indentation can be achieved by specifying an
- /// initial indentation and wrapping each paragraph by itself:
- ///
- /// ```no_run
- /// # #![allow(unused_variables)]
- /// use textwrap::Wrapper;
- ///
- /// let wrapper = Wrapper::new(15).initial_indent(" ");
- /// ```
- ///
- /// [`self.initial_indent`]: #structfield.initial_indent
- pub fn initial_indent(self, indent: &'a str) -> Wrapper<'a, S> {
- Wrapper {
- initial_indent: indent,
- ..self
- }
- }
-
- /// Change [`self.subsequent_indent`]. The subsequent indentation
- /// is used on lines following the first line of output.
- ///
- /// # Examples
- ///
- /// Combining initial and subsequent indentation lets you format a
- /// single paragraph as a bullet list:
- ///
- /// ```no_run
- /// # #![allow(unused_variables)]
- /// use textwrap::Wrapper;
- ///
- /// let wrapper = Wrapper::new(15)
- /// .initial_indent("* ")
- /// .subsequent_indent(" ");
- /// ```
- ///
- /// [`self.subsequent_indent`]: #structfield.subsequent_indent
- pub fn subsequent_indent(self, indent: &'a str) -> Wrapper<'a, S> {
- Wrapper {
- subsequent_indent: indent,
- ..self
- }
- }
-
- /// Change [`self.break_words`]. This controls if words longer
- /// than `self.width` can be broken, or if they will be left
- /// sticking out into the right margin.
- ///
- /// [`self.break_words`]: #structfield.break_words
- pub fn break_words(self, setting: bool) -> Wrapper<'a, S> {
- Wrapper {
- break_words: setting,
- ..self
- }
- }
-
- /// Fill a line of text at `self.width` characters. Strings are
- /// wrapped based on their displayed width, not their size in
- /// bytes.
- ///
- /// The result is a string with newlines between each line. Use
- /// the `wrap` method if you need access to the individual lines.
- ///
- /// # Complexities
- ///
- /// This method simply joins the lines produced by `wrap_iter`. As
- /// such, it inherits the O(*n*) time and memory complexity where
- /// *n* is the input string length.
- ///
- /// # Examples
- ///
- /// ```
- /// use textwrap::Wrapper;
- ///
- /// let wrapper = Wrapper::new(15);
- /// assert_eq!(wrapper.fill("Memory safety without garbage collection."),
- /// "Memory safety\nwithout garbage\ncollection.");
- /// ```
- pub fn fill(&self, s: &str) -> String {
- // This will avoid reallocation in simple cases (no
- // indentation, no hyphenation).
- let mut result = String::with_capacity(s.len());
-
- for (i, line) in self.wrap_iter(s).enumerate() {
- if i > 0 {
- result.push('\n');
- }
- result.push_str(&line);
- }
-
- result
- }
-
- /// Wrap a line of text at `self.width` characters. Strings are
- /// wrapped based on their displayed width, not their size in
- /// bytes.
- ///
- /// # Complexities
- ///
- /// This method simply collects the lines produced by `wrap_iter`.
- /// As such, it inherits the O(*n*) overall time and memory
- /// complexity where *n* is the input string length.
- ///
- /// # Examples
- ///
- /// ```
- /// use textwrap::Wrapper;
- ///
- /// let wrap15 = Wrapper::new(15);
- /// assert_eq!(wrap15.wrap("Concurrency without data races."),
- /// vec!["Concurrency",
- /// "without data",
- /// "races."]);
- ///
- /// let wrap20 = Wrapper::new(20);
- /// assert_eq!(wrap20.wrap("Concurrency without data races."),
- /// vec!["Concurrency without",
- /// "data races."]);
- /// ```
- ///
- /// Notice that newlines in the input are preserved. This means
- /// that they force a line break, regardless of how long the
- /// current line is:
- ///
- /// ```
- /// use textwrap::Wrapper;
- ///
- /// let wrapper = Wrapper::new(40);
- /// assert_eq!(wrapper.wrap("First line.\nSecond line."),
- /// vec!["First line.", "Second line."]);
- /// ```
- ///
- pub fn wrap(&self, s: &'a str) -> Vec<Cow<'a, str>> {
- self.wrap_iter(s).collect::<Vec<_>>()
- }
-
- /// Lazily wrap a line of text at `self.width` characters. Strings
- /// are wrapped based on their displayed width, not their size in
- /// bytes.
- ///
- /// The [`WordSplitter`] stored in [`self.splitter`] is used
- /// whenever when a word is too large to fit on the current line.
- /// By changing the field, different hyphenation strategies can be
- /// implemented.
- ///
- /// # Complexities
- ///
- /// This method returns a [`WrapIter`] iterator which borrows this
- /// `Wrapper`. The algorithm used has a linear complexity, so
- /// getting the next line from the iterator will take O(*w*) time,
- /// where *w* is the wrapping width. Fully processing the iterator
- /// will take O(*n*) time for an input string of length *n*.
- ///
- /// When no indentation is used, each line returned is a slice of
- /// the input string and the memory overhead is thus constant.
- /// Otherwise new memory is allocated for each line returned.
- ///
- /// # Examples
- ///
- /// ```
- /// use std::borrow::Cow;
- /// use textwrap::Wrapper;
- ///
- /// let wrap20 = Wrapper::new(20);
- /// let mut wrap20_iter = wrap20.wrap_iter("Zero-cost abstractions.");
- /// assert_eq!(wrap20_iter.next(), Some(Cow::from("Zero-cost")));
- /// assert_eq!(wrap20_iter.next(), Some(Cow::from("abstractions.")));
- /// assert_eq!(wrap20_iter.next(), None);
- ///
- /// let wrap25 = Wrapper::new(25);
- /// let mut wrap25_iter = wrap25.wrap_iter("Zero-cost abstractions.");
- /// assert_eq!(wrap25_iter.next(), Some(Cow::from("Zero-cost abstractions.")));
- /// assert_eq!(wrap25_iter.next(), None);
- /// ```
- ///
- /// [`self.splitter`]: #structfield.splitter
- /// [`WordSplitter`]: trait.WordSplitter.html
- /// [`WrapIter`]: struct.WrapIter.html
- pub fn wrap_iter<'w>(&'w self, s: &'a str) -> WrapIter<'w, 'a, S> {
- WrapIter {
- wrapper: self,
- inner: WrapIterImpl::new(self, s),
- }
- }
-
- /// Lazily wrap a line of text at `self.width` characters. Strings
- /// are wrapped based on their displayed width, not their size in
- /// bytes.
- ///
- /// The [`WordSplitter`] stored in [`self.splitter`] is used
- /// whenever when a word is too large to fit on the current line.
- /// By changing the field, different hyphenation strategies can be
- /// implemented.
- ///
- /// # Complexities
- ///
- /// This method consumes the `Wrapper` and returns a
- /// [`IntoWrapIter`] iterator. Fully processing the iterator has
- /// the same O(*n*) time complexity as [`wrap_iter`], where *n* is
- /// the length of the input string.
- ///
- /// # Examples
- ///
- /// ```
- /// use std::borrow::Cow;
- /// use textwrap::Wrapper;
- ///
- /// let wrap20 = Wrapper::new(20);
- /// let mut wrap20_iter = wrap20.into_wrap_iter("Zero-cost abstractions.");
- /// assert_eq!(wrap20_iter.next(), Some(Cow::from("Zero-cost")));
- /// assert_eq!(wrap20_iter.next(), Some(Cow::from("abstractions.")));
- /// assert_eq!(wrap20_iter.next(), None);
- /// ```
- ///
- /// [`self.splitter`]: #structfield.splitter
- /// [`WordSplitter`]: trait.WordSplitter.html
- /// [`IntoWrapIter`]: struct.IntoWrapIter.html
- /// [`wrap_iter`]: #method.wrap_iter
- pub fn into_wrap_iter(self, s: &'a str) -> IntoWrapIter<'a, S> {
- let inner = WrapIterImpl::new(&self, s);
-
- IntoWrapIter {
- wrapper: self,
- inner: inner,
- }
- }
-}
-
-/// An iterator over the lines of the input string which owns a
-/// `Wrapper`. An instance of `IntoWrapIter` is typically obtained
-/// through either [`wrap_iter`] or [`Wrapper::into_wrap_iter`].
-///
-/// Each call of `.next()` method yields a line wrapped in `Some` if the
-/// input hasn't been fully processed yet. Otherwise it returns `None`.
-///
-/// [`wrap_iter`]: fn.wrap_iter.html
-/// [`Wrapper::into_wrap_iter`]: struct.Wrapper.html#method.into_wrap_iter
-#[derive(Debug)]
-pub struct IntoWrapIter<'a, S: WordSplitter> {
- wrapper: Wrapper<'a, S>,
- inner: WrapIterImpl<'a>,
-}
-
-impl<'a, S: WordSplitter> Iterator for IntoWrapIter<'a, S> {
- type Item = Cow<'a, str>;
-
- fn next(&mut self) -> Option<Cow<'a, str>> {
- self.inner.next(&self.wrapper)
- }
-}
-
-/// An iterator over the lines of the input string which borrows a
-/// `Wrapper`. An instance of `WrapIter` is typically obtained
-/// through the [`Wrapper::wrap_iter`] method.
-///
-/// Each call of `.next()` method yields a line wrapped in `Some` if the
-/// input hasn't been fully processed yet. Otherwise it returns `None`.
-///
-/// [`Wrapper::wrap_iter`]: struct.Wrapper.html#method.wrap_iter
-#[derive(Debug)]
-pub struct WrapIter<'w, 'a: 'w, S: WordSplitter + 'w> {
- wrapper: &'w Wrapper<'a, S>,
- inner: WrapIterImpl<'a>,
-}
-
-impl<'w, 'a: 'w, S: WordSplitter> Iterator for WrapIter<'w, 'a, S> {
- type Item = Cow<'a, str>;
-
- fn next(&mut self) -> Option<Cow<'a, str>> {
- self.inner.next(self.wrapper)
- }
-}
-
-/// Like `char::is_whitespace`, but non-breaking spaces don't count.
-#[inline]
-fn is_whitespace(ch: char) -> bool {
- ch.is_whitespace() && ch != NBSP
-}
-
-/// Common implementation details for `WrapIter` and `IntoWrapIter`.
-#[derive(Debug)]
-struct WrapIterImpl<'a> {
- // String to wrap.
- source: &'a str,
- // CharIndices iterator over self.source.
- char_indices: CharIndices<'a>,
- // Byte index where the current line starts.
- start: usize,
- // Byte index of the last place where the string can be split.
- split: usize,
- // Size in bytes of the character at self.source[self.split].
- split_len: usize,
- // Width of self.source[self.start..idx].
- line_width: usize,
- // Width of self.source[self.start..self.split].
- line_width_at_split: usize,
- // Tracking runs of whitespace characters.
- in_whitespace: bool,
- // Has iterator finished producing elements?
- finished: bool,
-}
-
-impl<'a> WrapIterImpl<'a> {
- fn new<S: WordSplitter>(wrapper: &Wrapper<'a, S>, s: &'a str) -> WrapIterImpl<'a> {
- WrapIterImpl {
- source: s,
- char_indices: s.char_indices(),
- start: 0,
- split: 0,
- split_len: 0,
- line_width: wrapper.initial_indent.width(),
- line_width_at_split: wrapper.initial_indent.width(),
- in_whitespace: false,
- finished: false,
- }
- }
-
- fn create_result_line<S: WordSplitter>(&self, wrapper: &Wrapper<'a, S>) -> Cow<'a, str> {
- if self.start == 0 {
- Cow::from(wrapper.initial_indent)
- } else {
- Cow::from(wrapper.subsequent_indent)
- }
- }
-
- fn next<S: WordSplitter>(&mut self, wrapper: &Wrapper<'a, S>) -> Option<Cow<'a, str>> {
- if self.finished {
- return None;
- }
-
- while let Some((idx, ch)) = self.char_indices.next() {
- let char_width = ch.width().unwrap_or(0);
- let char_len = ch.len_utf8();
-
- if ch == '\n' {
- self.split = idx;
- self.split_len = char_len;
- self.line_width_at_split = self.line_width;
- self.in_whitespace = false;
-
- // If this is not the final line, return the current line. Otherwise,
- // we will return the line with its line break after exiting the loop
- if self.split + self.split_len < self.source.len() {
- let mut line = self.create_result_line(wrapper);
- line += &self.source[self.start..self.split];
-
- self.start = self.split + self.split_len;
- self.line_width = wrapper.subsequent_indent.width();
-
- return Some(line);
- }
- } else if is_whitespace(ch) {
- // Extend the previous split or create a new one.
- if self.in_whitespace {
- self.split_len += char_len;
- } else {
- self.split = idx;
- self.split_len = char_len;
- }
- self.line_width_at_split = self.line_width + char_width;
- self.in_whitespace = true;
- } else if self.line_width + char_width > wrapper.width {
- // There is no room for this character on the current
- // line. Try to split the final word.
- self.in_whitespace = false;
- let remaining_text = &self.source[self.split + self.split_len..];
- let final_word = match remaining_text.find(is_whitespace) {
- Some(i) => &remaining_text[..i],
- None => remaining_text,
- };
-
- let mut hyphen = "";
- let splits = wrapper.splitter.split(final_word);
- for &(head, hyp, _) in splits.iter().rev() {
- if self.line_width_at_split + head.width() + hyp.width() <= wrapper.width {
- // We can fit head into the current line.
- // Advance the split point by the width of the
- // whitespace and the head length.
- self.split += self.split_len + head.len();
- self.split_len = 0;
- hyphen = hyp;
- break;
- }
- }
-
- if self.start >= self.split {
- // The word is too big to fit on a single line, so we
- // need to split it at the current index.
- if wrapper.break_words {
- // Break work at current index.
- self.split = idx;
- self.split_len = 0;
- self.line_width_at_split = self.line_width;
- } else {
- // Add smallest split.
- self.split = self.start + splits[0].0.len();
- self.split_len = 0;
- self.line_width_at_split = self.line_width;
- }
- }
-
- if self.start < self.split {
- let mut line = self.create_result_line(wrapper);
- line += &self.source[self.start..self.split];
- line += hyphen;
-
- self.start = self.split + self.split_len;
- self.line_width += wrapper.subsequent_indent.width();
- self.line_width -= self.line_width_at_split;
- self.line_width += char_width;
-
- return Some(line);
- }
- } else {
- self.in_whitespace = false;
- }
- self.line_width += char_width;
- }
-
- self.finished = true;
-
- // Add final line.
- if self.start < self.source.len() {
- let mut line = self.create_result_line(wrapper);
- line += &self.source[self.start..];
- return Some(line);
- }
-
- None
- }
-}
-
-/// Return the current terminal width. If the terminal width cannot be
-/// determined (typically because the standard output is not connected
-/// to a terminal), a default width of 80 characters will be used.
-///
-/// # Examples
-///
-/// Create a `Wrapper` for the current terminal with a two column
-/// margin:
-///
-/// ```no_run
-/// # #![allow(unused_variables)]
-/// use textwrap::{Wrapper, NoHyphenation, termwidth};
-///
-/// let width = termwidth() - 4; // Two columns on each side.
-/// let wrapper = Wrapper::with_splitter(width, NoHyphenation)
-/// .initial_indent(" ")
-/// .subsequent_indent(" ");
-/// ```
-#[cfg(feature = "term_size")]
-pub fn termwidth() -> usize {
- term_size::dimensions_stdout().map_or(80, |(w, _)| w)
-}
-
-/// Fill a line of text at `width` characters. Strings are wrapped
-/// based on their displayed width, not their size in bytes.
-///
-/// The result is a string with newlines between each line. Use
-/// [`wrap`] if you need access to the individual lines or
-/// [`wrap_iter`] for its iterator counterpart.
-///
-/// ```
-/// use textwrap::fill;
-///
-/// assert_eq!(fill("Memory safety without garbage collection.", 15),
-/// "Memory safety\nwithout garbage\ncollection.");
-/// ```
-///
-/// This function creates a Wrapper on the fly with default settings.
-/// If you need to set a language corpus for automatic hyphenation, or
-/// need to fill many strings, then it is suggested to create a Wrapper
-/// and call its [`fill` method].
-///
-/// [`wrap`]: fn.wrap.html
-/// [`wrap_iter`]: fn.wrap_iter.html
-/// [`fill` method]: struct.Wrapper.html#method.fill
-pub fn fill(s: &str, width: usize) -> String {
- Wrapper::new(width).fill(s)
-}
-
-/// Wrap a line of text at `width` characters. Strings are wrapped
-/// based on their displayed width, not their size in bytes.
-///
-/// This function creates a Wrapper on the fly with default settings.
-/// If you need to set a language corpus for automatic hyphenation, or
-/// need to wrap many strings, then it is suggested to create a Wrapper
-/// and call its [`wrap` method].
-///
-/// The result is a vector of strings. Use [`wrap_iter`] if you need an
-/// iterator version.
-///
-/// # Examples
-///
-/// ```
-/// use textwrap::wrap;
-///
-/// assert_eq!(wrap("Concurrency without data races.", 15),
-/// vec!["Concurrency",
-/// "without data",
-/// "races."]);
-///
-/// assert_eq!(wrap("Concurrency without data races.", 20),
-/// vec!["Concurrency without",
-/// "data races."]);
-/// ```
-///
-/// [`wrap_iter`]: fn.wrap_iter.html
-/// [`wrap` method]: struct.Wrapper.html#method.wrap
-pub fn wrap(s: &str, width: usize) -> Vec<Cow<str>> {
- Wrapper::new(width).wrap(s)
-}
-
-/// Lazily wrap a line of text at `width` characters. Strings are
-/// wrapped based on their displayed width, not their size in bytes.
-///
-/// This function creates a Wrapper on the fly with default settings.
-/// It then calls the [`into_wrap_iter`] method. Hence, the return
-/// value is an [`IntoWrapIter`], not a [`WrapIter`] as the function
-/// name would otherwise suggest.
-///
-/// If you need to set a language corpus for automatic hyphenation, or
-/// need to wrap many strings, then it is suggested to create a Wrapper
-/// and call its [`wrap_iter`] or [`into_wrap_iter`] methods.
-///
-/// # Examples
-///
-/// ```
-/// use std::borrow::Cow;
-/// use textwrap::wrap_iter;
-///
-/// let mut wrap20_iter = wrap_iter("Zero-cost abstractions.", 20);
-/// assert_eq!(wrap20_iter.next(), Some(Cow::from("Zero-cost")));
-/// assert_eq!(wrap20_iter.next(), Some(Cow::from("abstractions.")));
-/// assert_eq!(wrap20_iter.next(), None);
-///
-/// let mut wrap25_iter = wrap_iter("Zero-cost abstractions.", 25);
-/// assert_eq!(wrap25_iter.next(), Some(Cow::from("Zero-cost abstractions.")));
-/// assert_eq!(wrap25_iter.next(), None);
-/// ```
-///
-/// [`wrap_iter`]: struct.Wrapper.html#method.wrap_iter
-/// [`into_wrap_iter`]: struct.Wrapper.html#method.into_wrap_iter
-/// [`IntoWrapIter`]: struct.IntoWrapIter.html
-/// [`WrapIter`]: struct.WrapIter.html
-pub fn wrap_iter(s: &str, width: usize) -> IntoWrapIter<HyphenSplitter> {
- Wrapper::new(width).into_wrap_iter(s)
-}
-
-#[cfg(test)]
-mod tests {
- #[cfg(feature = "hyphenation")]
- extern crate hyphenation;
-
- use super::*;
- #[cfg(feature = "hyphenation")]
- use hyphenation::{Language, Load, Standard};
-
- #[test]
- fn no_wrap() {
- assert_eq!(wrap("foo", 10), vec!["foo"]);
- }
-
- #[test]
- fn simple() {
- assert_eq!(wrap("foo bar baz", 5), vec!["foo", "bar", "baz"]);
- }
-
- #[test]
- fn multi_word_on_line() {
- assert_eq!(wrap("foo bar baz", 10), vec!["foo bar", "baz"]);
- }
-
- #[test]
- fn long_word() {
- assert_eq!(wrap("foo", 0), vec!["f", "o", "o"]);
- }
-
- #[test]
- fn long_words() {
- assert_eq!(wrap("foo bar", 0), vec!["f", "o", "o", "b", "a", "r"]);
- }
-
- #[test]
- fn max_width() {
- assert_eq!(wrap("foo bar", usize::max_value()), vec!["foo bar"]);
- }
-
- #[test]
- fn leading_whitespace() {
- assert_eq!(wrap(" foo bar", 6), vec![" foo", "bar"]);
- }
-
- #[test]
- fn trailing_whitespace() {
- assert_eq!(wrap("foo bar ", 6), vec!["foo", "bar "]);
- }
-
- #[test]
- fn interior_whitespace() {
- assert_eq!(wrap("foo: bar baz", 10), vec!["foo: bar", "baz"]);
- }
-
- #[test]
- fn extra_whitespace_start_of_line() {
- // Whitespace is only significant inside a line. After a line
- // gets too long and is broken, the first word starts in
- // column zero and is not indented. The line before might end
- // up with trailing whitespace.
- assert_eq!(wrap("foo bar", 5), vec!["foo", "bar"]);
- }
-
- #[test]
- fn issue_99() {
- // We did not reset the in_whitespace flag correctly and did
- // not handle single-character words after a line break.
- assert_eq!(
- wrap("aaabbbccc x yyyzzzwww", 9),
- vec!["aaabbbccc", "x", "yyyzzzwww"]
- );
- }
-
- #[test]
- fn issue_129() {
- // The dash is an em-dash which takes up four bytes. We used
- // to panic since we tried to index into the character.
- assert_eq!(wrap("x – x", 1), vec!["x", "–", "x"]);
- }
-
- #[test]
- fn wide_character_handling() {
- assert_eq!(wrap("Hello, World!", 15), vec!["Hello, World!"]);
- assert_eq!(
- wrap("Hello, World!", 15),
- vec!["Hello,", "World!"]
- );
- }
-
- #[test]
- fn empty_input_not_indented() {
- let wrapper = Wrapper::new(10).initial_indent("!!!");
- assert_eq!(wrapper.fill(""), "");
- }
-
- #[test]
- fn indent_single_line() {
- let wrapper = Wrapper::new(10).initial_indent(">>>"); // No trailing space
- assert_eq!(wrapper.fill("foo"), ">>>foo");
- }
-
- #[test]
- fn indent_multiple_lines() {
- let wrapper = Wrapper::new(6).initial_indent("* ").subsequent_indent(" ");
- assert_eq!(wrapper.wrap("foo bar baz"), vec!["* foo", " bar", " baz"]);
- }
-
- #[test]
- fn indent_break_words() {
- let wrapper = Wrapper::new(5).initial_indent("* ").subsequent_indent(" ");
- assert_eq!(wrapper.wrap("foobarbaz"), vec!["* foo", " bar", " baz"]);
- }
-
- #[test]
- fn hyphens() {
- assert_eq!(wrap("foo-bar", 5), vec!["foo-", "bar"]);
- }
-
- #[test]
- fn trailing_hyphen() {
- let wrapper = Wrapper::new(5).break_words(false);
- assert_eq!(wrapper.wrap("foobar-"), vec!["foobar-"]);
- }
-
- #[test]
- fn multiple_hyphens() {
- assert_eq!(wrap("foo-bar-baz", 5), vec!["foo-", "bar-", "baz"]);
- }
-
- #[test]
- fn hyphens_flag() {
- let wrapper = Wrapper::new(5).break_words(false);
- assert_eq!(
- wrapper.wrap("The --foo-bar flag."),
- vec!["The", "--foo-", "bar", "flag."]
- );
- }
-
- #[test]
- fn repeated_hyphens() {
- let wrapper = Wrapper::new(4).break_words(false);
- assert_eq!(wrapper.wrap("foo--bar"), vec!["foo--bar"]);
- }
-
- #[test]
- fn hyphens_alphanumeric() {
- assert_eq!(wrap("Na2-CH4", 5), vec!["Na2-", "CH4"]);
- }
-
- #[test]
- fn hyphens_non_alphanumeric() {
- let wrapper = Wrapper::new(5).break_words(false);
- assert_eq!(wrapper.wrap("foo(-)bar"), vec!["foo(-)bar"]);
- }
-
- #[test]
- fn multiple_splits() {
- assert_eq!(wrap("foo-bar-baz", 9), vec!["foo-bar-", "baz"]);
- }
-
- #[test]
- fn forced_split() {
- let wrapper = Wrapper::new(5).break_words(false);
- assert_eq!(wrapper.wrap("foobar-baz"), vec!["foobar-", "baz"]);
- }
-
- #[test]
- fn no_hyphenation() {
- let wrapper = Wrapper::with_splitter(8, NoHyphenation);
- assert_eq!(wrapper.wrap("foo bar-baz"), vec!["foo", "bar-baz"]);
- }
-
- #[test]
- #[cfg(feature = "hyphenation")]
- fn auto_hyphenation() {
- let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap();
- let wrapper = Wrapper::new(10);
- assert_eq!(
- wrapper.wrap("Internationalization"),
- vec!["Internatio", "nalization"]
- );
-
- let wrapper = Wrapper::with_splitter(10, dictionary);
- assert_eq!(
- wrapper.wrap("Internationalization"),
- vec!["Interna-", "tionaliza-", "tion"]
- );
- }
-
- #[test]
- #[cfg(feature = "hyphenation")]
- fn split_len_hyphenation() {
- // Test that hyphenation takes the width of the wihtespace
- // into account.
- let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap();
- let wrapper = Wrapper::with_splitter(15, dictionary);
- assert_eq!(
- wrapper.wrap("garbage collection"),
- vec!["garbage col-", "lection"]
- );
- }
-
- #[test]
- #[cfg(feature = "hyphenation")]
- fn borrowed_lines() {
- // Lines that end with an extra hyphen are owned, the final
- // line is borrowed.
- use std::borrow::Cow::{Borrowed, Owned};
- let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap();
- let wrapper = Wrapper::with_splitter(10, dictionary);
- let lines = wrapper.wrap("Internationalization");
- if let Borrowed(s) = lines[0] {
- assert!(false, "should not have been borrowed: {:?}", s);
- }
- if let Borrowed(s) = lines[1] {
- assert!(false, "should not have been borrowed: {:?}", s);
- }
- if let Owned(ref s) = lines[2] {
- assert!(false, "should not have been owned: {:?}", s);
- }
- }
-
- #[test]
- #[cfg(feature = "hyphenation")]
- fn auto_hyphenation_with_hyphen() {
- let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap();
- let wrapper = Wrapper::new(8).break_words(false);
- assert_eq!(wrapper.wrap("over-caffinated"), vec!["over-", "caffinated"]);
-
- let wrapper = Wrapper::with_splitter(8, dictionary).break_words(false);
- assert_eq!(
- wrapper.wrap("over-caffinated"),
- vec!["over-", "caffi-", "nated"]
- );
- }
-
- #[test]
- fn break_words() {
- assert_eq!(wrap("foobarbaz", 3), vec!["foo", "bar", "baz"]);
- }
-
- #[test]
- fn break_words_wide_characters() {
- assert_eq!(wrap("Hello", 5), vec!["He", "ll", "o"]);
- }
-
- #[test]
- fn break_words_zero_width() {
- assert_eq!(wrap("foobar", 0), vec!["f", "o", "o", "b", "a", "r"]);
- }
-
- #[test]
- fn break_words_line_breaks() {
- assert_eq!(fill("ab\ncdefghijkl", 5), "ab\ncdefg\nhijkl");
- assert_eq!(fill("abcdefgh\nijkl", 5), "abcde\nfgh\nijkl");
- }
-
- #[test]
- fn preserve_line_breaks() {
- assert_eq!(fill("test\n", 11), "test\n");
- assert_eq!(fill("test\n\na\n\n", 11), "test\n\na\n\n");
- assert_eq!(fill("1 3 5 7\n1 3 5 7", 7), "1 3 5 7\n1 3 5 7");
- }
-
- #[test]
- fn wrap_preserve_line_breaks() {
- assert_eq!(fill("1 3 5 7\n1 3 5 7", 5), "1 3 5\n7\n1 3 5\n7");
- }
-
- #[test]
- fn non_breaking_space() {
- let wrapper = Wrapper::new(5).break_words(false);
- assert_eq!(wrapper.fill("foo bar baz"), "foo bar baz");
- }
-
- #[test]
- fn non_breaking_hyphen() {
- let wrapper = Wrapper::new(5).break_words(false);
- assert_eq!(wrapper.fill("foo‑bar‑baz"), "foo‑bar‑baz");
- }
-
- #[test]
- fn fill_simple() {
- assert_eq!(fill("foo bar baz", 10), "foo bar\nbaz");
- }
-}
diff --git a/textwrap/src/splitting.rs b/textwrap/src/splitting.rs
deleted file mode 100644
index f6b65af..0000000
--- a/textwrap/src/splitting.rs
+++ /dev/null
@@ -1,139 +0,0 @@
-//! Word splitting functionality.
-//!
-//! To wrap text into lines, long words sometimes need to be split
-//! across lines. The [`WordSplitter`] trait defines this
-//! functionality. [`HyphenSplitter`] is the default implementation of
-//! this treat: it will simply split words on existing hyphens.
-
-#[cfg(feature = "hyphenation")]
-use hyphenation::{Hyphenator, Standard};
-
-/// An interface for splitting words.
-///
-/// When the [`wrap_iter`] method will try to fit text into a line, it
-/// will eventually find a word that it too large the current text
-/// width. It will then call the currently configured `WordSplitter` to
-/// have it attempt to split the word into smaller parts. This trait
-/// describes that functionality via the [`split`] method.
-///
-/// If the `textwrap` crate has been compiled with the `hyphenation`
-/// feature enabled, you will find an implementation of `WordSplitter`
-/// by the `hyphenation::language::Corpus` struct. Use this struct for
-/// language-aware hyphenation. See the [`hyphenation` documentation]
-/// for details.
-///
-/// [`wrap_iter`]: ../struct.Wrapper.html#method.wrap_iter
-/// [`split`]: #tymethod.split
-/// [`hyphenation` documentation]: https://docs.rs/hyphenation/
-pub trait WordSplitter {
- /// Return all possible splits of word. Each split is a triple
- /// with a head, a hyphen, and a tail where `head + &hyphen +
- /// &tail == word`. The hyphen can be empty if there is already a
- /// hyphen in the head.
- ///
- /// The splits should go from smallest to longest and should
- /// include no split at all. So the word "technology" could be
- /// split into
- ///
- /// ```no_run
- /// vec![("tech", "-", "nology"),
- /// ("technol", "-", "ogy"),
- /// ("technolo", "-", "gy"),
- /// ("technology", "", "")];
- /// ```
- fn split<'w>(&self, word: &'w str) -> Vec<(&'w str, &'w str, &'w str)>;
-}
-
-/// Use this as a [`Wrapper.splitter`] to avoid any kind of
-/// hyphenation:
-///
-/// ```
-/// use textwrap::{Wrapper, NoHyphenation};
-///
-/// let wrapper = Wrapper::with_splitter(8, NoHyphenation);
-/// assert_eq!(wrapper.wrap("foo bar-baz"), vec!["foo", "bar-baz"]);
-/// ```
-///
-/// [`Wrapper.splitter`]: ../struct.Wrapper.html#structfield.splitter
-#[derive(Clone, Debug)]
-pub struct NoHyphenation;
-
-/// `NoHyphenation` implements `WordSplitter` by not splitting the
-/// word at all.
-impl WordSplitter for NoHyphenation {
- fn split<'w>(&self, word: &'w str) -> Vec<(&'w str, &'w str, &'w str)> {
- vec![(word, "", "")]
- }
-}
-
-/// Simple and default way to split words: splitting on existing
-/// hyphens only.
-///
-/// You probably don't need to use this type since it's already used
-/// by default by `Wrapper::new`.
-#[derive(Clone, Debug)]
-pub struct HyphenSplitter;
-
-/// `HyphenSplitter` is the default `WordSplitter` used by
-/// `Wrapper::new`. It will split words on any existing hyphens in the
-/// word.
-///
-/// It will only use hyphens that are surrounded by alphanumeric
-/// characters, which prevents a word like "--foo-bar" from being
-/// split on the first or second hyphen.
-impl WordSplitter for HyphenSplitter {
- fn split<'w>(&self, word: &'w str) -> Vec<(&'w str, &'w str, &'w str)> {
- let mut triples = Vec::new();
- // Split on hyphens, smallest split first. We only use hyphens
- // that are surrounded by alphanumeric characters. This is to
- // avoid splitting on repeated hyphens, such as those found in
- // --foo-bar.
- let mut char_indices = word.char_indices();
- // Early return if the word is empty.
- let mut prev = match char_indices.next() {
- None => return vec![(word, "", "")],
- Some((_, ch)) => ch,
- };
-
- // Find current word, or return early if the word only has a
- // single character.
- let (mut idx, mut cur) = match char_indices.next() {
- None => return vec![(word, "", "")],
- Some((idx, cur)) => (idx, cur),
- };
-
- for (i, next) in char_indices {
- if prev.is_alphanumeric() && cur == '-' && next.is_alphanumeric() {
- let (head, tail) = word.split_at(idx + 1);
- triples.push((head, "", tail));
- }
- prev = cur;
- idx = i;
- cur = next;
- }
-
- // Finally option is no split at all.
- triples.push((word, "", ""));
-
- triples
- }
-}
-
-/// A hyphenation dictionary can be used to do language-specific
-/// hyphenation using patterns from the hyphenation crate.
-#[cfg(feature = "hyphenation")]
-impl WordSplitter for Standard {
- fn split<'w>(&self, word: &'w str) -> Vec<(&'w str, &'w str, &'w str)> {
- // Find splits based on language dictionary.
- let mut triples = Vec::new();
- for n in self.hyphenate(word).breaks {
- let (head, tail) = word.split_at(n);
- let hyphen = if head.ends_with('-') { "" } else { "-" };
- triples.push((head, hyphen, tail));
- }
- // Finally option is no split at all.
- triples.push((word, "", ""));
-
- triples
- }
-}