aboutsummaryrefslogtreecommitdiff
path: root/textwrap/src
diff options
context:
space:
mode:
authorRobin Krahl <robin.krahl@ireas.org>2020-01-07 11:18:04 +0000
committerDaniel Mueller <deso@posteo.net>2020-01-08 09:20:25 -0800
commit5e20a29b4fdc8a2d442d1093681b396dcb4b816b (patch)
tree55ab083fa8999d2ccbb5e921c1ffe52560dca152 /textwrap/src
parent203e691f46d591a2cc8acdfd850fa9f5b0fb8a98 (diff)
downloadnitrocli-5e20a29b4fdc8a2d442d1093681b396dcb4b816b.tar.gz
nitrocli-5e20a29b4fdc8a2d442d1093681b396dcb4b816b.tar.bz2
Add structopt dependency in version 0.3.7
This patch series replaces argparse with structopt in the argument handling code. As a first step, we need structopt as a dependency. Import subrepo structopt/:structopt at efbdda4753592e27bc430fb01f7b9650b2f3174d Import subrepo bitflags/:bitflags at 30668016aca6bd3b02c766e8347e0b4080d4c296 Import subrepo clap/:clap at 784524f7eb193e35f81082cc69454c8c21b948f7 Import subrepo heck/:heck at 093d56fbf001e1506e56dbfa38631d99b1066df1 Import subrepo proc-macro-error/:proc-macro-error at 6c4cfe79a622c5de8ae68557993542be46eacae2 Import subrepo proc-macro2/:proc-macro2 at d5d48eddca4566e5438e8a2cbed4a74e049544de Import subrepo quote/:quote at 727436c6c137b20f0f34dde5d8fda2679b9747ad Import subrepo rustversion/:rustversion at 0c5663313516263059ce9059ef81fc7a1cf655ca Import subrepo syn-mid/:syn-mid at 5d3d85414a9e6674e1857ec22a87b96e04a6851a Import subrepo syn/:syn at e87c27e87f6f4ef8919d0372bdb056d53ef0d8f3 Import subrepo textwrap/:textwrap at abcd618beae3f74841032aa5b53c1086b0a57ca2 Import subrepo unicode-segmentation/:unicode-segmentation at 637c9874c4fe0c205ff27787faf150a40295c6c3 Import subrepo unicode-width/:unicode-width at 3033826f8bf05e82724140a981d5941e48fce393 Import subrepo unicode-xid/:unicode-xid at 4baae9fffb156ba229665b972a9cd5991787ceb7
Diffstat (limited to 'textwrap/src')
-rw-r--r--textwrap/src/indentation.rs294
-rw-r--r--textwrap/src/lib.rs987
-rw-r--r--textwrap/src/splitting.rs139
3 files changed, 1420 insertions, 0 deletions
diff --git a/textwrap/src/indentation.rs b/textwrap/src/indentation.rs
new file mode 100644
index 0000000..276ba10
--- /dev/null
+++ b/textwrap/src/indentation.rs
@@ -0,0 +1,294 @@
+//! Functions related to adding and removing indentation from lines of
+//! text.
+//!
+//! The functions here can be used to uniformly indent or dedent
+//! (unindent) word wrapped lines of text.
+
+/// Add prefix to each non-empty line.
+///
+/// ```
+/// use textwrap::indent;
+///
+/// assert_eq!(indent("
+/// Foo
+/// Bar
+/// ", " "), "
+/// Foo
+/// Bar
+/// ");
+/// ```
+///
+/// Empty lines (lines consisting only of whitespace) are not indented
+/// and the whitespace is replaced by a single newline (`\n`):
+///
+/// ```
+/// use textwrap::indent;
+///
+/// assert_eq!(indent("
+/// Foo
+///
+/// Bar
+/// \t
+/// Baz
+/// ", "->"), "
+/// ->Foo
+///
+/// ->Bar
+///
+/// ->Baz
+/// ");
+/// ```
+///
+/// Leading and trailing whitespace on non-empty lines is kept
+/// unchanged:
+///
+/// ```
+/// use textwrap::indent;
+///
+/// assert_eq!(indent(" \t Foo ", "->"), "-> \t Foo \n");
+/// ```
+pub fn indent(s: &str, prefix: &str) -> String {
+ let mut result = String::new();
+ for line in s.lines() {
+ if line.chars().any(|c| !c.is_whitespace()) {
+ result.push_str(prefix);
+ result.push_str(line);
+ }
+ result.push('\n');
+ }
+ result
+}
+
+/// Removes common leading whitespace from each line.
+///
+/// This function will look at each non-empty line and determine the
+/// maximum amount of whitespace that can be removed from all lines:
+///
+/// ```
+/// use textwrap::dedent;
+///
+/// assert_eq!(dedent("
+/// 1st line
+/// 2nd line
+/// 3rd line
+/// "), "
+/// 1st line
+/// 2nd line
+/// 3rd line
+/// ");
+/// ```
+pub fn dedent(s: &str) -> String {
+ let mut prefix = "";
+ let mut lines = s.lines();
+
+ // We first search for a non-empty line to find a prefix.
+ for line in &mut lines {
+ let mut whitespace_idx = line.len();
+ for (idx, ch) in line.char_indices() {
+ if !ch.is_whitespace() {
+ whitespace_idx = idx;
+ break;
+ }
+ }
+
+ // Check if the line had anything but whitespace
+ if whitespace_idx < line.len() {
+ prefix = &line[..whitespace_idx];
+ break;
+ }
+ }
+
+ // We then continue looking through the remaining lines to
+ // possibly shorten the prefix.
+ for line in &mut lines {
+ let mut whitespace_idx = line.len();
+ for ((idx, a), b) in line.char_indices().zip(prefix.chars()) {
+ if a != b {
+ whitespace_idx = idx;
+ break;
+ }
+ }
+
+ // Check if the line had anything but whitespace and if we
+ // have found a shorter prefix
+ if whitespace_idx < line.len() && whitespace_idx < prefix.len() {
+ prefix = &line[..whitespace_idx];
+ }
+ }
+
+ // We now go over the lines a second time to build the result.
+ let mut result = String::new();
+ for line in s.lines() {
+ if line.starts_with(&prefix) && line.chars().any(|c| !c.is_whitespace()) {
+ let (_, tail) = line.split_at(prefix.len());
+ result.push_str(tail);
+ }
+ result.push('\n');
+ }
+
+ if result.ends_with('\n') && !s.ends_with('\n') {
+ let new_len = result.len() - 1;
+ result.truncate(new_len);
+ }
+
+ result
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ /// Add newlines. Ensures that the final line in the vector also
+ /// has a newline.
+ fn add_nl(lines: &[&str]) -> String {
+ lines.join("\n") + "\n"
+ }
+
+ #[test]
+ fn indent_empty() {
+ assert_eq!(indent("\n", " "), "\n");
+ }
+
+ #[test]
+ #[cfg_attr(rustfmt, rustfmt_skip)]
+ fn indent_nonempty() {
+ let x = vec![" foo",
+ "bar",
+ " baz"];
+ let y = vec!["// foo",
+ "//bar",
+ "// baz"];
+ assert_eq!(indent(&add_nl(&x), "//"), add_nl(&y));
+ }
+
+ #[test]
+ #[cfg_attr(rustfmt, rustfmt_skip)]
+ fn indent_empty_line() {
+ let x = vec![" foo",
+ "bar",
+ "",
+ " baz"];
+ let y = vec!["// foo",
+ "//bar",
+ "",
+ "// baz"];
+ assert_eq!(indent(&add_nl(&x), "//"), add_nl(&y));
+ }
+
+ #[test]
+ fn dedent_empty() {
+ assert_eq!(dedent(""), "");
+ }
+
+ #[test]
+ #[cfg_attr(rustfmt, rustfmt_skip)]
+ fn dedent_multi_line() {
+ let x = vec![" foo",
+ " bar",
+ " baz"];
+ let y = vec![" foo",
+ "bar",
+ " baz"];
+ assert_eq!(dedent(&add_nl(&x)), add_nl(&y));
+ }
+
+ #[test]
+ #[cfg_attr(rustfmt, rustfmt_skip)]
+ fn dedent_empty_line() {
+ let x = vec![" foo",
+ " bar",
+ " ",
+ " baz"];
+ let y = vec![" foo",
+ "bar",
+ "",
+ " baz"];
+ assert_eq!(dedent(&add_nl(&x)), add_nl(&y));
+ }
+
+ #[test]
+ #[cfg_attr(rustfmt, rustfmt_skip)]
+ fn dedent_blank_line() {
+ let x = vec![" foo",
+ "",
+ " bar",
+ " foo",
+ " bar",
+ " baz"];
+ let y = vec!["foo",
+ "",
+ " bar",
+ " foo",
+ " bar",
+ " baz"];
+ assert_eq!(dedent(&add_nl(&x)), add_nl(&y));
+ }
+
+ #[test]
+ #[cfg_attr(rustfmt, rustfmt_skip)]
+ fn dedent_whitespace_line() {
+ let x = vec![" foo",
+ " ",
+ " bar",
+ " foo",
+ " bar",
+ " baz"];
+ let y = vec!["foo",
+ "",
+ " bar",
+ " foo",
+ " bar",
+ " baz"];
+ assert_eq!(dedent(&add_nl(&x)), add_nl(&y));
+ }
+
+ #[test]
+ #[cfg_attr(rustfmt, rustfmt_skip)]
+ fn dedent_mixed_whitespace() {
+ let x = vec!["\tfoo",
+ " bar"];
+ let y = vec!["\tfoo",
+ " bar"];
+ assert_eq!(dedent(&add_nl(&x)), add_nl(&y));
+ }
+
+ #[test]
+ #[cfg_attr(rustfmt, rustfmt_skip)]
+ fn dedent_tabbed_whitespace() {
+ let x = vec!["\t\tfoo",
+ "\t\t\tbar"];
+ let y = vec!["foo",
+ "\tbar"];
+ assert_eq!(dedent(&add_nl(&x)), add_nl(&y));
+ }
+
+ #[test]
+ #[cfg_attr(rustfmt, rustfmt_skip)]
+ fn dedent_mixed_tabbed_whitespace() {
+ let x = vec!["\t \tfoo",
+ "\t \t\tbar"];
+ let y = vec!["foo",
+ "\tbar"];
+ assert_eq!(dedent(&add_nl(&x)), add_nl(&y));
+ }
+
+ #[test]
+ #[cfg_attr(rustfmt, rustfmt_skip)]
+ fn dedent_mixed_tabbed_whitespace2() {
+ let x = vec!["\t \tfoo",
+ "\t \tbar"];
+ let y = vec!["\tfoo",
+ " \tbar"];
+ assert_eq!(dedent(&add_nl(&x)), add_nl(&y));
+ }
+
+ #[test]
+ #[cfg_attr(rustfmt, rustfmt_skip)]
+ fn dedent_preserve_no_terminating_newline() {
+ let x = vec![" foo",
+ " bar"].join("\n");
+ let y = vec!["foo",
+ " bar"].join("\n");
+ assert_eq!(dedent(&x), y);
+ }
+}
diff --git a/textwrap/src/lib.rs b/textwrap/src/lib.rs
new file mode 100644
index 0000000..2f82325
--- /dev/null
+++ b/textwrap/src/lib.rs
@@ -0,0 +1,987 @@
+//! `textwrap` provides functions for word wrapping and filling text.
+//!
+//! Wrapping text can be very useful in commandline programs where you
+//! want to format dynamic output nicely so it looks good in a
+//! terminal. A quick example:
+//!
+//! ```no_run
+//! extern crate textwrap;
+//! use textwrap::fill;
+//!
+//! fn main() {
+//! let text = "textwrap: a small library for wrapping text.";
+//! println!("{}", fill(text, 18));
+//! }
+//! ```
+//!
+//! This will display the following output:
+//!
+//! ```text
+//! textwrap: a small
+//! library for
+//! wrapping text.
+//! ```
+//!
+//! # Displayed Width vs Byte Size
+//!
+//! To word wrap text, one must know the width of each word so one can
+//! know when to break lines. This library measures the width of text
+//! using the [displayed width][unicode-width], not the size in bytes.
+//!
+//! This is important for non-ASCII text. ASCII characters such as `a`
+//! and `!` are simple and take up one column each. This means that
+//! the displayed width is equal to the string length in bytes.
+//! However, non-ASCII characters and symbols take up more than one
+//! byte when UTF-8 encoded: `é` is `0xc3 0xa9` (two bytes) and `⚙` is
+//! `0xe2 0x9a 0x99` (three bytes) in UTF-8, respectively.
+//!
+//! This is why we take care to use the displayed width instead of the
+//! byte count when computing line lengths. All functions in this
+//! library handle Unicode characters like this.
+//!
+//! [unicode-width]: https://docs.rs/unicode-width/
+
+#![doc(html_root_url = "https://docs.rs/textwrap/0.11.0")]
+#![deny(missing_docs)]
+#![deny(missing_debug_implementations)]
+
+#[cfg(feature = "hyphenation")]
+extern crate hyphenation;
+#[cfg(feature = "term_size")]
+extern crate term_size;
+extern crate unicode_width;
+
+use std::borrow::Cow;
+use std::str::CharIndices;
+
+use unicode_width::UnicodeWidthChar;
+use unicode_width::UnicodeWidthStr;
+
+/// A non-breaking space.
+const NBSP: char = '\u{a0}';
+
+mod indentation;
+pub use indentation::dedent;
+pub use indentation::indent;
+
+mod splitting;
+pub use splitting::{HyphenSplitter, NoHyphenation, WordSplitter};
+
+/// A Wrapper holds settings for wrapping and filling text. Use it
+/// when the convenience [`wrap_iter`], [`wrap`] and [`fill`] functions
+/// are not flexible enough.
+///
+/// [`wrap_iter`]: fn.wrap_iter.html
+/// [`wrap`]: fn.wrap.html
+/// [`fill`]: fn.fill.html
+///
+/// The algorithm used by the `WrapIter` iterator (returned from the
+/// `wrap_iter` method) works by doing successive partial scans over
+/// words in the input string (where each single scan yields a single
+/// line) so that the overall time and memory complexity is O(*n*) where
+/// *n* is the length of the input string.
+#[derive(Clone, Debug)]
+pub struct Wrapper<'a, S: WordSplitter> {
+ /// The width in columns at which the text will be wrapped.
+ pub width: usize,
+ /// Indentation used for the first line of output.
+ pub initial_indent: &'a str,
+ /// Indentation used for subsequent lines of output.
+ pub subsequent_indent: &'a str,
+ /// Allow long words to be broken if they cannot fit on a line.
+ /// When set to `false`, some lines may be longer than
+ /// `self.width`.
+ pub break_words: bool,
+ /// The method for splitting words. If the `hyphenation` feature
+ /// is enabled, you can use a `hyphenation::Standard` dictionary
+ /// here to get language-aware hyphenation.
+ pub splitter: S,
+}
+
+impl<'a> Wrapper<'a, HyphenSplitter> {
+ /// Create a new Wrapper for wrapping at the specified width. By
+ /// default, we allow words longer than `width` to be broken. A
+ /// [`HyphenSplitter`] will be used by default for splitting
+ /// words. See the [`WordSplitter`] trait for other options.
+ ///
+ /// [`HyphenSplitter`]: struct.HyphenSplitter.html
+ /// [`WordSplitter`]: trait.WordSplitter.html
+ pub fn new(width: usize) -> Wrapper<'a, HyphenSplitter> {
+ Wrapper::with_splitter(width, HyphenSplitter)
+ }
+
+ /// Create a new Wrapper for wrapping text at the current terminal
+ /// width. If the terminal width cannot be determined (typically
+ /// because the standard input and output is not connected to a
+ /// terminal), a width of 80 characters will be used. Other
+ /// settings use the same defaults as `Wrapper::new`.
+ ///
+ /// Equivalent to:
+ ///
+ /// ```no_run
+ /// # #![allow(unused_variables)]
+ /// use textwrap::{Wrapper, termwidth};
+ ///
+ /// let wrapper = Wrapper::new(termwidth());
+ /// ```
+ #[cfg(feature = "term_size")]
+ pub fn with_termwidth() -> Wrapper<'a, HyphenSplitter> {
+ Wrapper::new(termwidth())
+ }
+}
+
+impl<'a, S: WordSplitter> Wrapper<'a, S> {
+ /// Use the given [`WordSplitter`] to create a new Wrapper for
+ /// wrapping at the specified width. By default, we allow words
+ /// longer than `width` to be broken.
+ ///
+ /// [`WordSplitter`]: trait.WordSplitter.html
+ pub fn with_splitter(width: usize, splitter: S) -> Wrapper<'a, S> {
+ Wrapper {
+ width: width,
+ initial_indent: "",
+ subsequent_indent: "",
+ break_words: true,
+ splitter: splitter,
+ }
+ }
+
+ /// Change [`self.initial_indent`]. The initial indentation is
+ /// used on the very first line of output.
+ ///
+ /// # Examples
+ ///
+ /// Classic paragraph indentation can be achieved by specifying an
+ /// initial indentation and wrapping each paragraph by itself:
+ ///
+ /// ```no_run
+ /// # #![allow(unused_variables)]
+ /// use textwrap::Wrapper;
+ ///
+ /// let wrapper = Wrapper::new(15).initial_indent(" ");
+ /// ```
+ ///
+ /// [`self.initial_indent`]: #structfield.initial_indent
+ pub fn initial_indent(self, indent: &'a str) -> Wrapper<'a, S> {
+ Wrapper {
+ initial_indent: indent,
+ ..self
+ }
+ }
+
+ /// Change [`self.subsequent_indent`]. The subsequent indentation
+ /// is used on lines following the first line of output.
+ ///
+ /// # Examples
+ ///
+ /// Combining initial and subsequent indentation lets you format a
+ /// single paragraph as a bullet list:
+ ///
+ /// ```no_run
+ /// # #![allow(unused_variables)]
+ /// use textwrap::Wrapper;
+ ///
+ /// let wrapper = Wrapper::new(15)
+ /// .initial_indent("* ")
+ /// .subsequent_indent(" ");
+ /// ```
+ ///
+ /// [`self.subsequent_indent`]: #structfield.subsequent_indent
+ pub fn subsequent_indent(self, indent: &'a str) -> Wrapper<'a, S> {
+ Wrapper {
+ subsequent_indent: indent,
+ ..self
+ }
+ }
+
+ /// Change [`self.break_words`]. This controls if words longer
+ /// than `self.width` can be broken, or if they will be left
+ /// sticking out into the right margin.
+ ///
+ /// [`self.break_words`]: #structfield.break_words
+ pub fn break_words(self, setting: bool) -> Wrapper<'a, S> {
+ Wrapper {
+ break_words: setting,
+ ..self
+ }
+ }
+
+ /// Fill a line of text at `self.width` characters. Strings are
+ /// wrapped based on their displayed width, not their size in
+ /// bytes.
+ ///
+ /// The result is a string with newlines between each line. Use
+ /// the `wrap` method if you need access to the individual lines.
+ ///
+ /// # Complexities
+ ///
+ /// This method simply joins the lines produced by `wrap_iter`. As
+ /// such, it inherits the O(*n*) time and memory complexity where
+ /// *n* is the input string length.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use textwrap::Wrapper;
+ ///
+ /// let wrapper = Wrapper::new(15);
+ /// assert_eq!(wrapper.fill("Memory safety without garbage collection."),
+ /// "Memory safety\nwithout garbage\ncollection.");
+ /// ```
+ pub fn fill(&self, s: &str) -> String {
+ // This will avoid reallocation in simple cases (no
+ // indentation, no hyphenation).
+ let mut result = String::with_capacity(s.len());
+
+ for (i, line) in self.wrap_iter(s).enumerate() {
+ if i > 0 {
+ result.push('\n');
+ }
+ result.push_str(&line);
+ }
+
+ result
+ }
+
+ /// Wrap a line of text at `self.width` characters. Strings are
+ /// wrapped based on their displayed width, not their size in
+ /// bytes.
+ ///
+ /// # Complexities
+ ///
+ /// This method simply collects the lines produced by `wrap_iter`.
+ /// As such, it inherits the O(*n*) overall time and memory
+ /// complexity where *n* is the input string length.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use textwrap::Wrapper;
+ ///
+ /// let wrap15 = Wrapper::new(15);
+ /// assert_eq!(wrap15.wrap("Concurrency without data races."),
+ /// vec!["Concurrency",
+ /// "without data",
+ /// "races."]);
+ ///
+ /// let wrap20 = Wrapper::new(20);
+ /// assert_eq!(wrap20.wrap("Concurrency without data races."),
+ /// vec!["Concurrency without",
+ /// "data races."]);
+ /// ```
+ ///
+ /// Notice that newlines in the input are preserved. This means
+ /// that they force a line break, regardless of how long the
+ /// current line is:
+ ///
+ /// ```
+ /// use textwrap::Wrapper;
+ ///
+ /// let wrapper = Wrapper::new(40);
+ /// assert_eq!(wrapper.wrap("First line.\nSecond line."),
+ /// vec!["First line.", "Second line."]);
+ /// ```
+ ///
+ pub fn wrap(&self, s: &'a str) -> Vec<Cow<'a, str>> {
+ self.wrap_iter(s).collect::<Vec<_>>()
+ }
+
+ /// Lazily wrap a line of text at `self.width` characters. Strings
+ /// are wrapped based on their displayed width, not their size in
+ /// bytes.
+ ///
+ /// The [`WordSplitter`] stored in [`self.splitter`] is used
+ /// whenever when a word is too large to fit on the current line.
+ /// By changing the field, different hyphenation strategies can be
+ /// implemented.
+ ///
+ /// # Complexities
+ ///
+ /// This method returns a [`WrapIter`] iterator which borrows this
+ /// `Wrapper`. The algorithm used has a linear complexity, so
+ /// getting the next line from the iterator will take O(*w*) time,
+ /// where *w* is the wrapping width. Fully processing the iterator
+ /// will take O(*n*) time for an input string of length *n*.
+ ///
+ /// When no indentation is used, each line returned is a slice of
+ /// the input string and the memory overhead is thus constant.
+ /// Otherwise new memory is allocated for each line returned.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use std::borrow::Cow;
+ /// use textwrap::Wrapper;
+ ///
+ /// let wrap20 = Wrapper::new(20);
+ /// let mut wrap20_iter = wrap20.wrap_iter("Zero-cost abstractions.");
+ /// assert_eq!(wrap20_iter.next(), Some(Cow::from("Zero-cost")));
+ /// assert_eq!(wrap20_iter.next(), Some(Cow::from("abstractions.")));
+ /// assert_eq!(wrap20_iter.next(), None);
+ ///
+ /// let wrap25 = Wrapper::new(25);
+ /// let mut wrap25_iter = wrap25.wrap_iter("Zero-cost abstractions.");
+ /// assert_eq!(wrap25_iter.next(), Some(Cow::from("Zero-cost abstractions.")));
+ /// assert_eq!(wrap25_iter.next(), None);
+ /// ```
+ ///
+ /// [`self.splitter`]: #structfield.splitter
+ /// [`WordSplitter`]: trait.WordSplitter.html
+ /// [`WrapIter`]: struct.WrapIter.html
+ pub fn wrap_iter<'w>(&'w self, s: &'a str) -> WrapIter<'w, 'a, S> {
+ WrapIter {
+ wrapper: self,
+ inner: WrapIterImpl::new(self, s),
+ }
+ }
+
+ /// Lazily wrap a line of text at `self.width` characters. Strings
+ /// are wrapped based on their displayed width, not their size in
+ /// bytes.
+ ///
+ /// The [`WordSplitter`] stored in [`self.splitter`] is used
+ /// whenever when a word is too large to fit on the current line.
+ /// By changing the field, different hyphenation strategies can be
+ /// implemented.
+ ///
+ /// # Complexities
+ ///
+ /// This method consumes the `Wrapper` and returns a
+ /// [`IntoWrapIter`] iterator. Fully processing the iterator has
+ /// the same O(*n*) time complexity as [`wrap_iter`], where *n* is
+ /// the length of the input string.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use std::borrow::Cow;
+ /// use textwrap::Wrapper;
+ ///
+ /// let wrap20 = Wrapper::new(20);
+ /// let mut wrap20_iter = wrap20.into_wrap_iter("Zero-cost abstractions.");
+ /// assert_eq!(wrap20_iter.next(), Some(Cow::from("Zero-cost")));
+ /// assert_eq!(wrap20_iter.next(), Some(Cow::from("abstractions.")));
+ /// assert_eq!(wrap20_iter.next(), None);
+ /// ```
+ ///
+ /// [`self.splitter`]: #structfield.splitter
+ /// [`WordSplitter`]: trait.WordSplitter.html
+ /// [`IntoWrapIter`]: struct.IntoWrapIter.html
+ /// [`wrap_iter`]: #method.wrap_iter
+ pub fn into_wrap_iter(self, s: &'a str) -> IntoWrapIter<'a, S> {
+ let inner = WrapIterImpl::new(&self, s);
+
+ IntoWrapIter {
+ wrapper: self,
+ inner: inner,
+ }
+ }
+}
+
+/// An iterator over the lines of the input string which owns a
+/// `Wrapper`. An instance of `IntoWrapIter` is typically obtained
+/// through either [`wrap_iter`] or [`Wrapper::into_wrap_iter`].
+///
+/// Each call of `.next()` method yields a line wrapped in `Some` if the
+/// input hasn't been fully processed yet. Otherwise it returns `None`.
+///
+/// [`wrap_iter`]: fn.wrap_iter.html
+/// [`Wrapper::into_wrap_iter`]: struct.Wrapper.html#method.into_wrap_iter
+#[derive(Debug)]
+pub struct IntoWrapIter<'a, S: WordSplitter> {
+ wrapper: Wrapper<'a, S>,
+ inner: WrapIterImpl<'a>,
+}
+
+impl<'a, S: WordSplitter> Iterator for IntoWrapIter<'a, S> {
+ type Item = Cow<'a, str>;
+
+ fn next(&mut self) -> Option<Cow<'a, str>> {
+ self.inner.next(&self.wrapper)
+ }
+}
+
+/// An iterator over the lines of the input string which borrows a
+/// `Wrapper`. An instance of `WrapIter` is typically obtained
+/// through the [`Wrapper::wrap_iter`] method.
+///
+/// Each call of `.next()` method yields a line wrapped in `Some` if the
+/// input hasn't been fully processed yet. Otherwise it returns `None`.
+///
+/// [`Wrapper::wrap_iter`]: struct.Wrapper.html#method.wrap_iter
+#[derive(Debug)]
+pub struct WrapIter<'w, 'a: 'w, S: WordSplitter + 'w> {
+ wrapper: &'w Wrapper<'a, S>,
+ inner: WrapIterImpl<'a>,
+}
+
+impl<'w, 'a: 'w, S: WordSplitter> Iterator for WrapIter<'w, 'a, S> {
+ type Item = Cow<'a, str>;
+
+ fn next(&mut self) -> Option<Cow<'a, str>> {
+ self.inner.next(self.wrapper)
+ }
+}
+
+/// Like `char::is_whitespace`, but non-breaking spaces don't count.
+#[inline]
+fn is_whitespace(ch: char) -> bool {
+ ch.is_whitespace() && ch != NBSP
+}
+
+/// Common implementation details for `WrapIter` and `IntoWrapIter`.
+#[derive(Debug)]
+struct WrapIterImpl<'a> {
+ // String to wrap.
+ source: &'a str,
+ // CharIndices iterator over self.source.
+ char_indices: CharIndices<'a>,
+ // Byte index where the current line starts.
+ start: usize,
+ // Byte index of the last place where the string can be split.
+ split: usize,
+ // Size in bytes of the character at self.source[self.split].
+ split_len: usize,
+ // Width of self.source[self.start..idx].
+ line_width: usize,
+ // Width of self.source[self.start..self.split].
+ line_width_at_split: usize,
+ // Tracking runs of whitespace characters.
+ in_whitespace: bool,
+ // Has iterator finished producing elements?
+ finished: bool,
+}
+
+impl<'a> WrapIterImpl<'a> {
+ fn new<S: WordSplitter>(wrapper: &Wrapper<'a, S>, s: &'a str) -> WrapIterImpl<'a> {
+ WrapIterImpl {
+ source: s,
+ char_indices: s.char_indices(),
+ start: 0,
+ split: 0,
+ split_len: 0,
+ line_width: wrapper.initial_indent.width(),
+ line_width_at_split: wrapper.initial_indent.width(),
+ in_whitespace: false,
+ finished: false,
+ }
+ }
+
+ fn create_result_line<S: WordSplitter>(&self, wrapper: &Wrapper<'a, S>) -> Cow<'a, str> {
+ if self.start == 0 {
+ Cow::from(wrapper.initial_indent)
+ } else {
+ Cow::from(wrapper.subsequent_indent)
+ }
+ }
+
+ fn next<S: WordSplitter>(&mut self, wrapper: &Wrapper<'a, S>) -> Option<Cow<'a, str>> {
+ if self.finished {
+ return None;
+ }
+
+ while let Some((idx, ch)) = self.char_indices.next() {
+ let char_width = ch.width().unwrap_or(0);
+ let char_len = ch.len_utf8();
+
+ if ch == '\n' {
+ self.split = idx;
+ self.split_len = char_len;
+ self.line_width_at_split = self.line_width;
+ self.in_whitespace = false;
+
+ // If this is not the final line, return the current line. Otherwise,
+ // we will return the line with its line break after exiting the loop
+ if self.split + self.split_len < self.source.len() {
+ let mut line = self.create_result_line(wrapper);
+ line += &self.source[self.start..self.split];
+
+ self.start = self.split + self.split_len;
+ self.line_width = wrapper.subsequent_indent.width();
+
+ return Some(line);
+ }
+ } else if is_whitespace(ch) {
+ // Extend the previous split or create a new one.
+ if self.in_whitespace {
+ self.split_len += char_len;
+ } else {
+ self.split = idx;
+ self.split_len = char_len;
+ }
+ self.line_width_at_split = self.line_width + char_width;
+ self.in_whitespace = true;
+ } else if self.line_width + char_width > wrapper.width {
+ // There is no room for this character on the current
+ // line. Try to split the final word.
+ self.in_whitespace = false;
+ let remaining_text = &self.source[self.split + self.split_len..];
+ let final_word = match remaining_text.find(is_whitespace) {
+ Some(i) => &remaining_text[..i],
+ None => remaining_text,
+ };
+
+ let mut hyphen = "";
+ let splits = wrapper.splitter.split(final_word);
+ for &(head, hyp, _) in splits.iter().rev() {
+ if self.line_width_at_split + head.width() + hyp.width() <= wrapper.width {
+ // We can fit head into the current line.
+ // Advance the split point by the width of the
+ // whitespace and the head length.
+ self.split += self.split_len + head.len();
+ self.split_len = 0;
+ hyphen = hyp;
+ break;
+ }
+ }
+
+ if self.start >= self.split {
+ // The word is too big to fit on a single line, so we
+ // need to split it at the current index.
+ if wrapper.break_words {
+ // Break work at current index.
+ self.split = idx;
+ self.split_len = 0;
+ self.line_width_at_split = self.line_width;
+ } else {
+ // Add smallest split.
+ self.split = self.start + splits[0].0.len();
+ self.split_len = 0;
+ self.line_width_at_split = self.line_width;
+ }
+ }
+
+ if self.start < self.split {
+ let mut line = self.create_result_line(wrapper);
+ line += &self.source[self.start..self.split];
+ line += hyphen;
+
+ self.start = self.split + self.split_len;
+ self.line_width += wrapper.subsequent_indent.width();
+ self.line_width -= self.line_width_at_split;
+ self.line_width += char_width;
+
+ return Some(line);
+ }
+ } else {
+ self.in_whitespace = false;
+ }
+ self.line_width += char_width;
+ }
+
+ self.finished = true;
+
+ // Add final line.
+ if self.start < self.source.len() {
+ let mut line = self.create_result_line(wrapper);
+ line += &self.source[self.start..];
+ return Some(line);
+ }
+
+ None
+ }
+}
+
+/// Return the current terminal width. If the terminal width cannot be
+/// determined (typically because the standard output is not connected
+/// to a terminal), a default width of 80 characters will be used.
+///
+/// # Examples
+///
+/// Create a `Wrapper` for the current terminal with a two column
+/// margin:
+///
+/// ```no_run
+/// # #![allow(unused_variables)]
+/// use textwrap::{Wrapper, NoHyphenation, termwidth};
+///
+/// let width = termwidth() - 4; // Two columns on each side.
+/// let wrapper = Wrapper::with_splitter(width, NoHyphenation)
+/// .initial_indent(" ")
+/// .subsequent_indent(" ");
+/// ```
+#[cfg(feature = "term_size")]
+pub fn termwidth() -> usize {
+ term_size::dimensions_stdout().map_or(80, |(w, _)| w)
+}
+
+/// Fill a line of text at `width` characters. Strings are wrapped
+/// based on their displayed width, not their size in bytes.
+///
+/// The result is a string with newlines between each line. Use
+/// [`wrap`] if you need access to the individual lines or
+/// [`wrap_iter`] for its iterator counterpart.
+///
+/// ```
+/// use textwrap::fill;
+///
+/// assert_eq!(fill("Memory safety without garbage collection.", 15),
+/// "Memory safety\nwithout garbage\ncollection.");
+/// ```
+///
+/// This function creates a Wrapper on the fly with default settings.
+/// If you need to set a language corpus for automatic hyphenation, or
+/// need to fill many strings, then it is suggested to create a Wrapper
+/// and call its [`fill` method].
+///
+/// [`wrap`]: fn.wrap.html
+/// [`wrap_iter`]: fn.wrap_iter.html
+/// [`fill` method]: struct.Wrapper.html#method.fill
+pub fn fill(s: &str, width: usize) -> String {
+ Wrapper::new(width).fill(s)
+}
+
+/// Wrap a line of text at `width` characters. Strings are wrapped
+/// based on their displayed width, not their size in bytes.
+///
+/// This function creates a Wrapper on the fly with default settings.
+/// If you need to set a language corpus for automatic hyphenation, or
+/// need to wrap many strings, then it is suggested to create a Wrapper
+/// and call its [`wrap` method].
+///
+/// The result is a vector of strings. Use [`wrap_iter`] if you need an
+/// iterator version.
+///
+/// # Examples
+///
+/// ```
+/// use textwrap::wrap;
+///
+/// assert_eq!(wrap("Concurrency without data races.", 15),
+/// vec!["Concurrency",
+/// "without data",
+/// "races."]);
+///
+/// assert_eq!(wrap("Concurrency without data races.", 20),
+/// vec!["Concurrency without",
+/// "data races."]);
+/// ```
+///
+/// [`wrap_iter`]: fn.wrap_iter.html
+/// [`wrap` method]: struct.Wrapper.html#method.wrap
+pub fn wrap(s: &str, width: usize) -> Vec<Cow<str>> {
+ Wrapper::new(width).wrap(s)
+}
+
+/// Lazily wrap a line of text at `width` characters. Strings are
+/// wrapped based on their displayed width, not their size in bytes.
+///
+/// This function creates a Wrapper on the fly with default settings.
+/// It then calls the [`into_wrap_iter`] method. Hence, the return
+/// value is an [`IntoWrapIter`], not a [`WrapIter`] as the function
+/// name would otherwise suggest.
+///
+/// If you need to set a language corpus for automatic hyphenation, or
+/// need to wrap many strings, then it is suggested to create a Wrapper
+/// and call its [`wrap_iter`] or [`into_wrap_iter`] methods.
+///
+/// # Examples
+///
+/// ```
+/// use std::borrow::Cow;
+/// use textwrap::wrap_iter;
+///
+/// let mut wrap20_iter = wrap_iter("Zero-cost abstractions.", 20);
+/// assert_eq!(wrap20_iter.next(), Some(Cow::from("Zero-cost")));
+/// assert_eq!(wrap20_iter.next(), Some(Cow::from("abstractions.")));
+/// assert_eq!(wrap20_iter.next(), None);
+///
+/// let mut wrap25_iter = wrap_iter("Zero-cost abstractions.", 25);
+/// assert_eq!(wrap25_iter.next(), Some(Cow::from("Zero-cost abstractions.")));
+/// assert_eq!(wrap25_iter.next(), None);
+/// ```
+///
+/// [`wrap_iter`]: struct.Wrapper.html#method.wrap_iter
+/// [`into_wrap_iter`]: struct.Wrapper.html#method.into_wrap_iter
+/// [`IntoWrapIter`]: struct.IntoWrapIter.html
+/// [`WrapIter`]: struct.WrapIter.html
+pub fn wrap_iter(s: &str, width: usize) -> IntoWrapIter<HyphenSplitter> {
+ Wrapper::new(width).into_wrap_iter(s)
+}
+
+#[cfg(test)]
+mod tests {
+ #[cfg(feature = "hyphenation")]
+ extern crate hyphenation;
+
+ use super::*;
+ #[cfg(feature = "hyphenation")]
+ use hyphenation::{Language, Load, Standard};
+
+ #[test]
+ fn no_wrap() {
+ assert_eq!(wrap("foo", 10), vec!["foo"]);
+ }
+
+ #[test]
+ fn simple() {
+ assert_eq!(wrap("foo bar baz", 5), vec!["foo", "bar", "baz"]);
+ }
+
+ #[test]
+ fn multi_word_on_line() {
+ assert_eq!(wrap("foo bar baz", 10), vec!["foo bar", "baz"]);
+ }
+
+ #[test]
+ fn long_word() {
+ assert_eq!(wrap("foo", 0), vec!["f", "o", "o"]);
+ }
+
+ #[test]
+ fn long_words() {
+ assert_eq!(wrap("foo bar", 0), vec!["f", "o", "o", "b", "a", "r"]);
+ }
+
+ #[test]
+ fn max_width() {
+ assert_eq!(wrap("foo bar", usize::max_value()), vec!["foo bar"]);
+ }
+
+ #[test]
+ fn leading_whitespace() {
+ assert_eq!(wrap(" foo bar", 6), vec![" foo", "bar"]);
+ }
+
+ #[test]
+ fn trailing_whitespace() {
+ assert_eq!(wrap("foo bar ", 6), vec!["foo", "bar "]);
+ }
+
+ #[test]
+ fn interior_whitespace() {
+ assert_eq!(wrap("foo: bar baz", 10), vec!["foo: bar", "baz"]);
+ }
+
+ #[test]
+ fn extra_whitespace_start_of_line() {
+ // Whitespace is only significant inside a line. After a line
+ // gets too long and is broken, the first word starts in
+ // column zero and is not indented. The line before might end
+ // up with trailing whitespace.
+ assert_eq!(wrap("foo bar", 5), vec!["foo", "bar"]);
+ }
+
+ #[test]
+ fn issue_99() {
+ // We did not reset the in_whitespace flag correctly and did
+ // not handle single-character words after a line break.
+ assert_eq!(
+ wrap("aaabbbccc x yyyzzzwww", 9),
+ vec!["aaabbbccc", "x", "yyyzzzwww"]
+ );
+ }
+
+ #[test]
+ fn issue_129() {
+ // The dash is an em-dash which takes up four bytes. We used
+ // to panic since we tried to index into the character.
+ assert_eq!(wrap("x – x", 1), vec!["x", "–", "x"]);
+ }
+
+ #[test]
+ fn wide_character_handling() {
+ assert_eq!(wrap("Hello, World!", 15), vec!["Hello, World!"]);
+ assert_eq!(
+ wrap("Hello, World!", 15),
+ vec!["Hello,", "World!"]
+ );
+ }
+
+ #[test]
+ fn empty_input_not_indented() {
+ let wrapper = Wrapper::new(10).initial_indent("!!!");
+ assert_eq!(wrapper.fill(""), "");
+ }
+
+ #[test]
+ fn indent_single_line() {
+ let wrapper = Wrapper::new(10).initial_indent(">>>"); // No trailing space
+ assert_eq!(wrapper.fill("foo"), ">>>foo");
+ }
+
+ #[test]
+ fn indent_multiple_lines() {
+ let wrapper = Wrapper::new(6).initial_indent("* ").subsequent_indent(" ");
+ assert_eq!(wrapper.wrap("foo bar baz"), vec!["* foo", " bar", " baz"]);
+ }
+
+ #[test]
+ fn indent_break_words() {
+ let wrapper = Wrapper::new(5).initial_indent("* ").subsequent_indent(" ");
+ assert_eq!(wrapper.wrap("foobarbaz"), vec!["* foo", " bar", " baz"]);
+ }
+
+ #[test]
+ fn hyphens() {
+ assert_eq!(wrap("foo-bar", 5), vec!["foo-", "bar"]);
+ }
+
+ #[test]
+ fn trailing_hyphen() {
+ let wrapper = Wrapper::new(5).break_words(false);
+ assert_eq!(wrapper.wrap("foobar-"), vec!["foobar-"]);
+ }
+
+ #[test]
+ fn multiple_hyphens() {
+ assert_eq!(wrap("foo-bar-baz", 5), vec!["foo-", "bar-", "baz"]);
+ }
+
+ #[test]
+ fn hyphens_flag() {
+ let wrapper = Wrapper::new(5).break_words(false);
+ assert_eq!(
+ wrapper.wrap("The --foo-bar flag."),
+ vec!["The", "--foo-", "bar", "flag."]
+ );
+ }
+
+ #[test]
+ fn repeated_hyphens() {
+ let wrapper = Wrapper::new(4).break_words(false);
+ assert_eq!(wrapper.wrap("foo--bar"), vec!["foo--bar"]);
+ }
+
+ #[test]
+ fn hyphens_alphanumeric() {
+ assert_eq!(wrap("Na2-CH4", 5), vec!["Na2-", "CH4"]);
+ }
+
+ #[test]
+ fn hyphens_non_alphanumeric() {
+ let wrapper = Wrapper::new(5).break_words(false);
+ assert_eq!(wrapper.wrap("foo(-)bar"), vec!["foo(-)bar"]);
+ }
+
+ #[test]
+ fn multiple_splits() {
+ assert_eq!(wrap("foo-bar-baz", 9), vec!["foo-bar-", "baz"]);
+ }
+
+ #[test]
+ fn forced_split() {
+ let wrapper = Wrapper::new(5).break_words(false);
+ assert_eq!(wrapper.wrap("foobar-baz"), vec!["foobar-", "baz"]);
+ }
+
+ #[test]
+ fn no_hyphenation() {
+ let wrapper = Wrapper::with_splitter(8, NoHyphenation);
+ assert_eq!(wrapper.wrap("foo bar-baz"), vec!["foo", "bar-baz"]);
+ }
+
+ #[test]
+ #[cfg(feature = "hyphenation")]
+ fn auto_hyphenation() {
+ let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap();
+ let wrapper = Wrapper::new(10);
+ assert_eq!(
+ wrapper.wrap("Internationalization"),
+ vec!["Internatio", "nalization"]
+ );
+
+ let wrapper = Wrapper::with_splitter(10, dictionary);
+ assert_eq!(
+ wrapper.wrap("Internationalization"),
+ vec!["Interna-", "tionaliza-", "tion"]
+ );
+ }
+
+ #[test]
+ #[cfg(feature = "hyphenation")]
+ fn split_len_hyphenation() {
+ // Test that hyphenation takes the width of the wihtespace
+ // into account.
+ let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap();
+ let wrapper = Wrapper::with_splitter(15, dictionary);
+ assert_eq!(
+ wrapper.wrap("garbage collection"),
+ vec!["garbage col-", "lection"]
+ );
+ }
+
+ #[test]
+ #[cfg(feature = "hyphenation")]
+ fn borrowed_lines() {
+ // Lines that end with an extra hyphen are owned, the final
+ // line is borrowed.
+ use std::borrow::Cow::{Borrowed, Owned};
+ let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap();
+ let wrapper = Wrapper::with_splitter(10, dictionary);
+ let lines = wrapper.wrap("Internationalization");
+ if let Borrowed(s) = lines[0] {
+ assert!(false, "should not have been borrowed: {:?}", s);
+ }
+ if let Borrowed(s) = lines[1] {
+ assert!(false, "should not have been borrowed: {:?}", s);
+ }
+ if let Owned(ref s) = lines[2] {
+ assert!(false, "should not have been owned: {:?}", s);
+ }
+ }
+
+ #[test]
+ #[cfg(feature = "hyphenation")]
+ fn auto_hyphenation_with_hyphen() {
+ let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap();
+ let wrapper = Wrapper::new(8).break_words(false);
+ assert_eq!(wrapper.wrap("over-caffinated"), vec!["over-", "caffinated"]);
+
+ let wrapper = Wrapper::with_splitter(8, dictionary).break_words(false);
+ assert_eq!(
+ wrapper.wrap("over-caffinated"),
+ vec!["over-", "caffi-", "nated"]
+ );
+ }
+
+ #[test]
+ fn break_words() {
+ assert_eq!(wrap("foobarbaz", 3), vec!["foo", "bar", "baz"]);
+ }
+
+ #[test]
+ fn break_words_wide_characters() {
+ assert_eq!(wrap("Hello", 5), vec!["He", "ll", "o"]);
+ }
+
+ #[test]
+ fn break_words_zero_width() {
+ assert_eq!(wrap("foobar", 0), vec!["f", "o", "o", "b", "a", "r"]);
+ }
+
+ #[test]
+ fn break_words_line_breaks() {
+ assert_eq!(fill("ab\ncdefghijkl", 5), "ab\ncdefg\nhijkl");
+ assert_eq!(fill("abcdefgh\nijkl", 5), "abcde\nfgh\nijkl");
+ }
+
+ #[test]
+ fn preserve_line_breaks() {
+ assert_eq!(fill("test\n", 11), "test\n");
+ assert_eq!(fill("test\n\na\n\n", 11), "test\n\na\n\n");
+ assert_eq!(fill("1 3 5 7\n1 3 5 7", 7), "1 3 5 7\n1 3 5 7");
+ }
+
+ #[test]
+ fn wrap_preserve_line_breaks() {
+ assert_eq!(fill("1 3 5 7\n1 3 5 7", 5), "1 3 5\n7\n1 3 5\n7");
+ }
+
+ #[test]
+ fn non_breaking_space() {
+ let wrapper = Wrapper::new(5).break_words(false);
+ assert_eq!(wrapper.fill("foo bar baz"), "foo bar baz");
+ }
+
+ #[test]
+ fn non_breaking_hyphen() {
+ let wrapper = Wrapper::new(5).break_words(false);
+ assert_eq!(wrapper.fill("foo‑bar‑baz"), "foo‑bar‑baz");
+ }
+
+ #[test]
+ fn fill_simple() {
+ assert_eq!(fill("foo bar baz", 10), "foo bar\nbaz");
+ }
+}
diff --git a/textwrap/src/splitting.rs b/textwrap/src/splitting.rs
new file mode 100644
index 0000000..f6b65af
--- /dev/null
+++ b/textwrap/src/splitting.rs
@@ -0,0 +1,139 @@
+//! Word splitting functionality.
+//!
+//! To wrap text into lines, long words sometimes need to be split
+//! across lines. The [`WordSplitter`] trait defines this
+//! functionality. [`HyphenSplitter`] is the default implementation of
+//! this treat: it will simply split words on existing hyphens.
+
+#[cfg(feature = "hyphenation")]
+use hyphenation::{Hyphenator, Standard};
+
+/// An interface for splitting words.
+///
+/// When the [`wrap_iter`] method will try to fit text into a line, it
+/// will eventually find a word that it too large the current text
+/// width. It will then call the currently configured `WordSplitter` to
+/// have it attempt to split the word into smaller parts. This trait
+/// describes that functionality via the [`split`] method.
+///
+/// If the `textwrap` crate has been compiled with the `hyphenation`
+/// feature enabled, you will find an implementation of `WordSplitter`
+/// by the `hyphenation::language::Corpus` struct. Use this struct for
+/// language-aware hyphenation. See the [`hyphenation` documentation]
+/// for details.
+///
+/// [`wrap_iter`]: ../struct.Wrapper.html#method.wrap_iter
+/// [`split`]: #tymethod.split
+/// [`hyphenation` documentation]: https://docs.rs/hyphenation/
+pub trait WordSplitter {
+ /// Return all possible splits of word. Each split is a triple
+ /// with a head, a hyphen, and a tail where `head + &hyphen +
+ /// &tail == word`. The hyphen can be empty if there is already a
+ /// hyphen in the head.
+ ///
+ /// The splits should go from smallest to longest and should
+ /// include no split at all. So the word "technology" could be
+ /// split into
+ ///
+ /// ```no_run
+ /// vec![("tech", "-", "nology"),
+ /// ("technol", "-", "ogy"),
+ /// ("technolo", "-", "gy"),
+ /// ("technology", "", "")];
+ /// ```
+ fn split<'w>(&self, word: &'w str) -> Vec<(&'w str, &'w str, &'w str)>;
+}
+
+/// Use this as a [`Wrapper.splitter`] to avoid any kind of
+/// hyphenation:
+///
+/// ```
+/// use textwrap::{Wrapper, NoHyphenation};
+///
+/// let wrapper = Wrapper::with_splitter(8, NoHyphenation);
+/// assert_eq!(wrapper.wrap("foo bar-baz"), vec!["foo", "bar-baz"]);
+/// ```
+///
+/// [`Wrapper.splitter`]: ../struct.Wrapper.html#structfield.splitter
+#[derive(Clone, Debug)]
+pub struct NoHyphenation;
+
+/// `NoHyphenation` implements `WordSplitter` by not splitting the
+/// word at all.
+impl WordSplitter for NoHyphenation {
+ fn split<'w>(&self, word: &'w str) -> Vec<(&'w str, &'w str, &'w str)> {
+ vec![(word, "", "")]
+ }
+}
+
+/// Simple and default way to split words: splitting on existing
+/// hyphens only.
+///
+/// You probably don't need to use this type since it's already used
+/// by default by `Wrapper::new`.
+#[derive(Clone, Debug)]
+pub struct HyphenSplitter;
+
+/// `HyphenSplitter` is the default `WordSplitter` used by
+/// `Wrapper::new`. It will split words on any existing hyphens in the
+/// word.
+///
+/// It will only use hyphens that are surrounded by alphanumeric
+/// characters, which prevents a word like "--foo-bar" from being
+/// split on the first or second hyphen.
+impl WordSplitter for HyphenSplitter {
+ fn split<'w>(&self, word: &'w str) -> Vec<(&'w str, &'w str, &'w str)> {
+ let mut triples = Vec::new();
+ // Split on hyphens, smallest split first. We only use hyphens
+ // that are surrounded by alphanumeric characters. This is to
+ // avoid splitting on repeated hyphens, such as those found in
+ // --foo-bar.
+ let mut char_indices = word.char_indices();
+ // Early return if the word is empty.
+ let mut prev = match char_indices.next() {
+ None => return vec![(word, "", "")],
+ Some((_, ch)) => ch,
+ };
+
+ // Find current word, or return early if the word only has a
+ // single character.
+ let (mut idx, mut cur) = match char_indices.next() {
+ None => return vec![(word, "", "")],
+ Some((idx, cur)) => (idx, cur),
+ };
+
+ for (i, next) in char_indices {
+ if prev.is_alphanumeric() && cur == '-' && next.is_alphanumeric() {
+ let (head, tail) = word.split_at(idx + 1);
+ triples.push((head, "", tail));
+ }
+ prev = cur;
+ idx = i;
+ cur = next;
+ }
+
+ // Finally option is no split at all.
+ triples.push((word, "", ""));
+
+ triples
+ }
+}
+
+/// A hyphenation dictionary can be used to do language-specific
+/// hyphenation using patterns from the hyphenation crate.
+#[cfg(feature = "hyphenation")]
+impl WordSplitter for Standard {
+ fn split<'w>(&self, word: &'w str) -> Vec<(&'w str, &'w str, &'w str)> {
+ // Find splits based on language dictionary.
+ let mut triples = Vec::new();
+ for n in self.hyphenate(word).breaks {
+ let (head, tail) = word.split_at(n);
+ let hyphen = if head.ends_with('-') { "" } else { "-" };
+ triples.push((head, hyphen, tail));
+ }
+ // Finally option is no split at all.
+ triples.push((word, "", ""));
+
+ triples
+ }
+}