diff options
Diffstat (limited to 'textwrap')
-rw-r--r-- | textwrap/.appveyor.yml | 23 | ||||
-rw-r--r-- | textwrap/.circleci/config.yml | 13 | ||||
-rw-r--r-- | textwrap/.codecov.yml | 13 | ||||
-rw-r--r-- | textwrap/.dir-locals.el | 3 | ||||
-rw-r--r-- | textwrap/.gitignore | 5 | ||||
-rw-r--r-- | textwrap/.travis.yml | 17 | ||||
-rw-r--r-- | textwrap/Cargo.toml | 38 | ||||
-rw-r--r-- | textwrap/LICENSE | 21 | ||||
-rw-r--r-- | textwrap/README.md | 337 | ||||
-rw-r--r-- | textwrap/benches/linear.rs | 122 | ||||
-rw-r--r-- | textwrap/examples/layout.rs | 38 | ||||
-rw-r--r-- | textwrap/examples/termwidth.rs | 41 | ||||
-rw-r--r-- | textwrap/src/indentation.rs | 294 | ||||
-rw-r--r-- | textwrap/src/lib.rs | 987 | ||||
-rw-r--r-- | textwrap/src/splitting.rs | 139 | ||||
-rw-r--r-- | textwrap/tests/version-numbers.rs | 17 |
16 files changed, 2108 insertions, 0 deletions
diff --git a/textwrap/.appveyor.yml b/textwrap/.appveyor.yml new file mode 100644 index 0000000..3555d73 --- /dev/null +++ b/textwrap/.appveyor.yml @@ -0,0 +1,23 @@ +environment: + matrix: + - TOOLCHAIN: stable + - TOOLCHAIN: nightly + +matrix: + allow_failures: + - TOOLCHAIN: nightly + +install: + - ps: Start-FileDownload 'https://static.rust-lang.org/rustup/dist/i686-pc-windows-gnu/rustup-init.exe' + - rustup-init.exe -y --default-toolchain %TOOLCHAIN% + - set PATH=%PATH%;%USERPROFILE%\.cargo\bin + +build_script: + - cargo build --verbose --all-features + +test_script: + - cargo test --verbose --all-features + +cache: + - '%USERPROFILE%\.cargo' + - target diff --git a/textwrap/.circleci/config.yml b/textwrap/.circleci/config.yml new file mode 100644 index 0000000..4f928a6 --- /dev/null +++ b/textwrap/.circleci/config.yml @@ -0,0 +1,13 @@ +version: 2 +jobs: + build: + docker: + - image: xd009642/tarpaulin + steps: + - checkout + - run: + name: Generate coverage report + command: cargo tarpaulin --out Xml --all-features + - run: + name: Upload to codecov.io + command: bash <(curl -s https://codecov.io/bash) -Z -f cobertura.xml diff --git a/textwrap/.codecov.yml b/textwrap/.codecov.yml new file mode 100644 index 0000000..a9b0bd2 --- /dev/null +++ b/textwrap/.codecov.yml @@ -0,0 +1,13 @@ +codecov: + # Do not wait for these CI providers since they will not upload any + # coverage reports. + ci: + - !appveyor + - !travis + +coverage: + status: + project: + default: + # Allow a 5% drop in overall project coverage on a PR. + threshold: 5% diff --git a/textwrap/.dir-locals.el b/textwrap/.dir-locals.el new file mode 100644 index 0000000..0faa664 --- /dev/null +++ b/textwrap/.dir-locals.el @@ -0,0 +1,3 @@ +((nil + (bug-reference-bug-regexp . "#\\(?2:[0-9]+\\)") + (bug-reference-url-format . "https://github.com/mgeisler/textwrap/issues/%s"))) diff --git a/textwrap/.gitignore b/textwrap/.gitignore new file mode 100644 index 0000000..6513848 --- /dev/null +++ b/textwrap/.gitignore @@ -0,0 +1,5 @@ +Cargo.lock +target/ + +*~ +*.orig diff --git a/textwrap/.travis.yml b/textwrap/.travis.yml new file mode 100644 index 0000000..156418b --- /dev/null +++ b/textwrap/.travis.yml @@ -0,0 +1,17 @@ +language: rust + +rust: + - stable + - beta + - nightly + - 1.22.0 + +cache: cargo + +script: + - cargo build --verbose --all-features + - cargo test --verbose --all-features + +matrix: + allow_failures: + - rust: nightly diff --git a/textwrap/Cargo.toml b/textwrap/Cargo.toml new file mode 100644 index 0000000..9d82ca5 --- /dev/null +++ b/textwrap/Cargo.toml @@ -0,0 +1,38 @@ +[package] +name = "textwrap" +version = "0.11.0" +authors = ["Martin Geisler <martin@geisler.net>"] +description = """ +Textwrap is a small library for word wrapping, indenting, and +dedenting strings. + +You can use it to format strings (such as help and error messages) for +display in commandline applications. It is designed to be efficient +and handle Unicode characters correctly. +""" +documentation = "https://docs.rs/textwrap/" +repository = "https://github.com/mgeisler/textwrap" +readme = "README.md" +keywords = ["text", "formatting", "wrap", "typesetting", "hyphenation"] +categories = ["text-processing", "command-line-interface"] +license = "MIT" +exclude = [".dir-locals.el"] + +[package.metadata.docs.rs] +all-features = true + +[badges] +travis-ci = { repository = "mgeisler/textwrap" } +appveyor = { repository = "mgeisler/textwrap" } +codecov = { repository = "mgeisler/textwrap" } + +[dependencies] +unicode-width = "0.1.3" +term_size = { version = "0.3.0", optional = true } +hyphenation = { version = "0.7.1", optional = true, features = ["embed_all"] } + +[dev-dependencies] +lipsum = "0.6" +rand = "0.6" +rand_xorshift = "0.1" +version-sync = "0.6" diff --git a/textwrap/LICENSE b/textwrap/LICENSE new file mode 100644 index 0000000..0d37ec3 --- /dev/null +++ b/textwrap/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2016 Martin Geisler + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/textwrap/README.md b/textwrap/README.md new file mode 100644 index 0000000..23a5439 --- /dev/null +++ b/textwrap/README.md @@ -0,0 +1,337 @@ +# Textwrap + +[![](https://img.shields.io/crates/v/textwrap.svg)][crates-io] +[![](https://docs.rs/textwrap/badge.svg)][api-docs] +[![](https://travis-ci.org/mgeisler/textwrap.svg?branch=master)][travis-ci] +[![](https://ci.appveyor.com/api/projects/status/github/mgeisler/textwrap?branch=master&svg=true)][appveyor] +[![](https://codecov.io/gh/mgeisler/textwrap/branch/master/graph/badge.svg)][codecov] + +Textwrap is a small Rust crate for word wrapping text. You can use it +to format strings for display in commandline applications. The crate +name and interface is inspired by +the [Python textwrap module][py-textwrap]. + +## Usage + +Add this to your `Cargo.toml`: +```toml +[dependencies] +textwrap = "0.11" +``` + +and this to your crate root: +```rust +extern crate textwrap; +``` + +If you would like to have automatic hyphenation, specify the +dependency as: +```toml +[dependencies] +textwrap = { version = "0.11", features = ["hyphenation"] } +``` + +To conveniently wrap text at the current terminal width, enable the +`term_size` feature: + +```toml +[dependencies] +textwrap = { version = "0.11", features = ["term_size"] } +``` + +## Documentation + +**[API documentation][api-docs]** + +## Getting Started + +Word wrapping single strings is easy using the `fill` function: +```rust +extern crate textwrap; +use textwrap::fill; + +fn main() { + let text = "textwrap: a small library for wrapping text."; + println!("{}", fill(text, 18)); +} +``` +The output is +``` +textwrap: a small +library for +wrapping text. +``` + +With the `hyphenation` feature, you can get automatic hyphenation +for [about 70 languages][patterns]. Your program must load and +configure the hyphenation patterns to use: +```rust +extern crate hyphenation; +extern crate textwrap; + +use hyphenation::{Language, Load, Standard}; +use textwrap::Wrapper; + +fn main() { + let hyphenator = Standard::from_embedded(Language::EnglishUS).unwrap(); + let wrapper = Wrapper::with_splitter(18, hyphenator); + let text = "textwrap: a small library for wrapping text."; + println!("{}", wrapper.fill(text)) +} +``` + +The output now looks like this: +``` +textwrap: a small +library for wrap- +ping text. +``` + +The hyphenation uses high-quality TeX hyphenation patterns. + +## Examples + +The library comes with some small example programs that shows various +features. + +### Layout Example + +The `layout` example shows how a fixed example string is wrapped at +different widths. Run the example with: + +```shell +$ cargo run --features hyphenation --example layout +``` + +The program will use the following string: + +> Memory safety without garbage collection. Concurrency without data +> races. Zero-cost abstractions. + +The string is wrapped at all widths between 15 and 60 columns. With +narrow columns the output looks like this: + +``` +.--- Width: 15 ---. +| Memory safety | +| without garbage | +| collection. | +| Concurrency | +| without data | +| races. Zero- | +| cost abstrac- | +| tions. | +.--- Width: 16 ----. +| Memory safety | +| without garbage | +| collection. Con- | +| currency without | +| data races. Ze- | +| ro-cost abstrac- | +| tions. | +``` + +Later, longer lines are used and the output now looks like this: + +``` +.-------------------- Width: 49 --------------------. +| Memory safety without garbage collection. Concur- | +| rency without data races. Zero-cost abstractions. | +.---------------------- Width: 53 ----------------------. +| Memory safety without garbage collection. Concurrency | +| without data races. Zero-cost abstractions. | +.------------------------- Width: 59 -------------------------. +| Memory safety without garbage collection. Concurrency with- | +| out data races. Zero-cost abstractions. | +``` + +Notice how words are split at hyphens (such as "zero-cost") but also +how words are hyphenated using automatic/machine hyphenation. + +### Terminal Width Example + +The `termwidth` example simply shows how the width can be set +automatically to the current terminal width. Run it with this command: + +``` +$ cargo run --example termwidth +``` + +If you run it in a narrow terminal, you'll see output like this: +``` +Formatted in within 60 columns: +---- +Memory safety without garbage collection. Concurrency +without data races. Zero-cost abstractions. +---- +``` + +If `stdout` is not connected to the terminal, the program will use a +default of 80 columns for the width: + +``` +$ cargo run --example termwidth | cat +Formatted in within 80 columns: +---- +Memory safety without garbage collection. Concurrency without data races. Zero- +cost abstractions. +---- +``` + +## Release History + +This section lists the largest changes per release. + +### Version 0.11.0 — December 9th, 2018 + +Due to our dependencies bumping their minimum supported version of +Rust, the minimum version of Rust we test against is now 1.22.0. + +* Merged [#141][issue-141]: Fix `dedent` handling of empty lines and + trailing newlines. Thanks @bbqsrc! +* Fixed [#151][issue-151]: Release of version with hyphenation 0.7. + +### Version 0.10.0 — April 28th, 2018 + +Due to our dependencies bumping their minimum supported version of +Rust, the minimum version of Rust we test against is now 1.17.0. + +* Fixed [#99][issue-99]: Word broken even though it would fit on line. +* Fixed [#107][issue-107]: Automatic hyphenation is off by one. +* Fixed [#122][issue-122]: Take newlines into account when wrapping. +* Fixed [#129][issue-129]: Panic on string with em-dash. + +### Version 0.9.0 — October 5th, 2017 + +The dependency on `term_size` is now optional, and by default this +feature is not enabled. This is a *breaking change* for users of +`Wrapper::with_termwidth`. Enable the `term_size` feature to restore +the old functionality. + +Added a regression test for the case where `width` is set to +`usize::MAX`, thanks @Fraser999! All public structs now implement +`Debug`, thanks @hcpl! + +* Fixed [#101][issue-101]: Make `term_size` an optional dependency. + +### Version 0.8.0 — September 4th, 2017 + +The `Wrapper` stuct is now generic over the type of word splitter +being used. This means less boxing and a nicer API. The +`Wrapper::word_splitter` method has been removed. This is a *breaking +API change* if you used the method to change the word splitter. + +The `Wrapper` struct has two new methods that will wrap the input text +lazily: `Wrapper::wrap_iter` and `Wrapper::into_wrap_iter`. Use those +if you will be iterating over the wrapped lines one by one. + +* Fixed [#59][issue-59]: `wrap` could return an iterator. Thanks + @hcpl! +* Fixed [#81][issue-81]: Set `html_root_url`. + +### Version 0.7.0 — July 20th, 2017 + +Version 0.7.0 changes the return type of `Wrapper::wrap` from +`Vec<String>` to `Vec<Cow<'a, str>>`. This means that the output lines +borrow data from the input string. This is a *breaking API change* if +you relied on the exact return type of `Wrapper::wrap`. Callers of the +`textwrap::fill` convenience function will see no breakage. + +The above change and other optimizations makes version 0.7.0 roughly +15-30% faster than version 0.6.0. + +The `squeeze_whitespace` option has been removed since it was +complicating the above optimization. Let us know if this option is +important for you so we can provide a work around. + +* Fixed [#58][issue-58]: Add a "fast_wrap" function. +* Fixed [#61][issue-61]: Documentation errors. + +### Version 0.6.0 — May 22nd, 2017 + +Version 0.6.0 adds builder methods to `Wrapper` for easy one-line +initialization and configuration: + +```rust +let wrapper = Wrapper::new(60).break_words(false); +``` + +It also add a new `NoHyphenation` word splitter that will never split +words, not even at existing hyphens. + +* Fixed [#28][issue-28]: Support not squeezing whitespace. + +### Version 0.5.0 — May 15th, 2017 + +Version 0.5.0 has *breaking API changes*. However, this only affects +code using the hyphenation feature. The feature is now optional, so +you will first need to enable the `hyphenation` feature as described +above. Afterwards, please change your code from +```rust +wrapper.corpus = Some(&corpus); +``` +to +```rust +wrapper.splitter = Box::new(corpus); +``` + +Other changes include optimizations, so version 0.5.0 is roughly +10-15% faster than version 0.4.0. + +* Fixed [#19][issue-19]: Add support for finding terminal size. +* Fixed [#25][issue-25]: Handle words longer than `self.width`. +* Fixed [#26][issue-26]: Support custom indentation. +* Fixed [#36][issue-36]: Support building without `hyphenation`. +* Fixed [#39][issue-39]: Respect non-breaking spaces. + +### Version 0.4.0 — January 24th, 2017 + +Documented complexities and tested these via `cargo bench`. + +* Fixed [#13][issue-13]: Immediatedly add word if it fits. +* Fixed [#14][issue-14]: Avoid splitting on initial hyphens. + +### Version 0.3.0 — January 7th, 2017 + +Added support for automatic hyphenation. + +### Version 0.2.0 — December 28th, 2016 + +Introduced `Wrapper` struct. Added support for wrapping on hyphens. + +### Version 0.1.0 — December 17th, 2016 + +First public release with support for wrapping strings on whitespace. + +## License + +Textwrap can be distributed according to the [MIT license][mit]. +Contributions will be accepted under the same license. + +[crates-io]: https://crates.io/crates/textwrap +[travis-ci]: https://travis-ci.org/mgeisler/textwrap +[appveyor]: https://ci.appveyor.com/project/mgeisler/textwrap +[codecov]: https://codecov.io/gh/mgeisler/textwrap +[py-textwrap]: https://docs.python.org/library/textwrap +[patterns]: https://github.com/tapeinosyne/hyphenation/tree/master/patterns-tex +[api-docs]: https://docs.rs/textwrap/ +[issue-13]: https://github.com/mgeisler/textwrap/issues/13 +[issue-14]: https://github.com/mgeisler/textwrap/issues/14 +[issue-19]: https://github.com/mgeisler/textwrap/issues/19 +[issue-25]: https://github.com/mgeisler/textwrap/issues/25 +[issue-26]: https://github.com/mgeisler/textwrap/issues/26 +[issue-28]: https://github.com/mgeisler/textwrap/issues/28 +[issue-36]: https://github.com/mgeisler/textwrap/issues/36 +[issue-39]: https://github.com/mgeisler/textwrap/issues/39 +[issue-58]: https://github.com/mgeisler/textwrap/issues/58 +[issue-59]: https://github.com/mgeisler/textwrap/issues/59 +[issue-61]: https://github.com/mgeisler/textwrap/issues/61 +[issue-81]: https://github.com/mgeisler/textwrap/issues/81 +[issue-99]: https://github.com/mgeisler/textwrap/issues/99 +[issue-101]: https://github.com/mgeisler/textwrap/issues/101 +[issue-107]: https://github.com/mgeisler/textwrap/issues/107 +[issue-122]: https://github.com/mgeisler/textwrap/issues/122 +[issue-129]: https://github.com/mgeisler/textwrap/issues/129 +[issue-141]: https://github.com/mgeisler/textwrap/issues/141 +[issue-151]: https://github.com/mgeisler/textwrap/issues/151 +[mit]: LICENSE diff --git a/textwrap/benches/linear.rs b/textwrap/benches/linear.rs new file mode 100644 index 0000000..104398a --- /dev/null +++ b/textwrap/benches/linear.rs @@ -0,0 +1,122 @@ +#![feature(test)] + +// The benchmarks here verify that the complexity grows as O(*n*) +// where *n* is the number of characters in the text to be wrapped. + +#[cfg(feature = "hyphenation")] +extern crate hyphenation; +extern crate lipsum; +extern crate rand; +extern crate rand_xorshift; +extern crate test; +extern crate textwrap; + +#[cfg(feature = "hyphenation")] +use hyphenation::{Language, Load, Standard}; +use lipsum::MarkovChain; +use rand::SeedableRng; +use rand_xorshift::XorShiftRng; +use test::Bencher; +#[cfg(feature = "hyphenation")] +use textwrap::Wrapper; + +const LINE_LENGTH: usize = 60; + +/// Generate a lorem ipsum text with the given number of characters. +fn lorem_ipsum(length: usize) -> String { + // The average word length in the lorem ipsum text is somewhere + // between 6 and 7. So we conservatively divide by 5 to have a + // long enough text that we can truncate below. + let rng = XorShiftRng::seed_from_u64(0); + let mut chain = MarkovChain::new_with_rng(rng); + chain.learn(lipsum::LOREM_IPSUM); + chain.learn(lipsum::LIBER_PRIMUS); + + let mut text = chain.generate_from(length / 5, ("Lorem", "ipsum")); + text.truncate(length); + text +} + +#[bench] +fn fill_100(b: &mut Bencher) { + let text = &lorem_ipsum(100); + b.iter(|| textwrap::fill(text, LINE_LENGTH)) +} + +#[bench] +fn fill_200(b: &mut Bencher) { + let text = &lorem_ipsum(200); + b.iter(|| textwrap::fill(text, LINE_LENGTH)) +} + +#[bench] +fn fill_400(b: &mut Bencher) { + let text = &lorem_ipsum(400); + b.iter(|| textwrap::fill(text, LINE_LENGTH)) +} + +#[bench] +fn fill_800(b: &mut Bencher) { + let text = &lorem_ipsum(800); + b.iter(|| textwrap::fill(text, LINE_LENGTH)) +} + +#[bench] +fn wrap_100(b: &mut Bencher) { + let text = &lorem_ipsum(100); + b.iter(|| textwrap::wrap(text, LINE_LENGTH)) +} + +#[bench] +fn wrap_200(b: &mut Bencher) { + let text = &lorem_ipsum(200); + b.iter(|| textwrap::wrap(text, LINE_LENGTH)) +} + +#[bench] +fn wrap_400(b: &mut Bencher) { + let text = &lorem_ipsum(400); + b.iter(|| textwrap::wrap(text, LINE_LENGTH)) +} + +#[bench] +fn wrap_800(b: &mut Bencher) { + let text = &lorem_ipsum(800); + b.iter(|| textwrap::wrap(text, LINE_LENGTH)) +} + +#[bench] +#[cfg(feature = "hyphenation")] +fn hyphenation_fill_100(b: &mut Bencher) { + let text = &lorem_ipsum(100); + let dictionary = Standard::from_embedded(Language::Latin).unwrap(); + let wrapper = Wrapper::with_splitter(LINE_LENGTH, dictionary); + b.iter(|| wrapper.fill(text)) +} + +#[bench] +#[cfg(feature = "hyphenation")] +fn hyphenation_fill_200(b: &mut Bencher) { + let text = &lorem_ipsum(200); + let dictionary = Standard::from_embedded(Language::Latin).unwrap(); + let wrapper = Wrapper::with_splitter(LINE_LENGTH, dictionary); + b.iter(|| wrapper.fill(text)) +} + +#[bench] +#[cfg(feature = "hyphenation")] +fn hyphenation_fill_400(b: &mut Bencher) { + let text = &lorem_ipsum(400); + let dictionary = Standard::from_embedded(Language::Latin).unwrap(); + let wrapper = Wrapper::with_splitter(LINE_LENGTH, dictionary); + b.iter(|| wrapper.fill(text)) +} + +#[bench] +#[cfg(feature = "hyphenation")] +fn hyphenation_fill_800(b: &mut Bencher) { + let text = &lorem_ipsum(800); + let dictionary = Standard::from_embedded(Language::Latin).unwrap(); + let wrapper = Wrapper::with_splitter(LINE_LENGTH, dictionary); + b.iter(|| wrapper.fill(text)) +} diff --git a/textwrap/examples/layout.rs b/textwrap/examples/layout.rs new file mode 100644 index 0000000..d36cb3a --- /dev/null +++ b/textwrap/examples/layout.rs @@ -0,0 +1,38 @@ +#[cfg(feature = "hyphenation")] +extern crate hyphenation; +extern crate textwrap; + +#[cfg(feature = "hyphenation")] +use hyphenation::{Language, Load}; +use textwrap::Wrapper; + +#[cfg(not(feature = "hyphenation"))] +fn new_wrapper<'a>() -> Wrapper<'a, textwrap::HyphenSplitter> { + Wrapper::new(0) +} + +#[cfg(feature = "hyphenation")] +fn new_wrapper<'a>() -> Wrapper<'a, hyphenation::Standard> { + let dictionary = hyphenation::Standard::from_embedded(Language::EnglishUS).unwrap(); + Wrapper::with_splitter(0, dictionary) +} + +fn main() { + let example = "Memory safety without garbage collection. \ + Concurrency without data races. \ + Zero-cost abstractions."; + let mut prev_lines = vec![]; + let mut wrapper = new_wrapper(); + for width in 15..60 { + wrapper.width = width; + let lines = wrapper.wrap(example); + if lines != prev_lines { + let title = format!(" Width: {} ", width); + println!(".{:-^1$}.", title, width + 2); + for line in &lines { + println!("| {:1$} |", line, width); + } + prev_lines = lines; + } + } +} diff --git a/textwrap/examples/termwidth.rs b/textwrap/examples/termwidth.rs new file mode 100644 index 0000000..75db3aa --- /dev/null +++ b/textwrap/examples/termwidth.rs @@ -0,0 +1,41 @@ +#[cfg(feature = "hyphenation")] +extern crate hyphenation; +extern crate textwrap; + +#[cfg(feature = "hyphenation")] +use hyphenation::{Language, Load, Standard}; +#[cfg(feature = "term_size")] +use textwrap::Wrapper; + +#[cfg(not(feature = "term_size"))] +fn main() { + println!("Please enable the term_size feature to run this example."); +} + +#[cfg(feature = "term_size")] +fn main() { + #[cfg(not(feature = "hyphenation"))] + fn new_wrapper<'a>() -> (&'static str, Wrapper<'a, textwrap::HyphenSplitter>) { + ("without hyphenation", Wrapper::with_termwidth()) + } + + #[cfg(feature = "hyphenation")] + fn new_wrapper<'a>() -> (&'static str, Wrapper<'a, Standard>) { + let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap(); + ( + "with hyphenation", + Wrapper::with_splitter(textwrap::termwidth(), dictionary), + ) + } + + let example = "Memory safety without garbage collection. \ + Concurrency without data races. \ + Zero-cost abstractions."; + // Create a new Wrapper -- automatically set the width to the + // current terminal width. + let (msg, wrapper) = new_wrapper(); + println!("Formatted {} in {} columns:", msg, wrapper.width); + println!("----"); + println!("{}", wrapper.fill(example)); + println!("----"); +} diff --git a/textwrap/src/indentation.rs b/textwrap/src/indentation.rs new file mode 100644 index 0000000..276ba10 --- /dev/null +++ b/textwrap/src/indentation.rs @@ -0,0 +1,294 @@ +//! Functions related to adding and removing indentation from lines of +//! text. +//! +//! The functions here can be used to uniformly indent or dedent +//! (unindent) word wrapped lines of text. + +/// Add prefix to each non-empty line. +/// +/// ``` +/// use textwrap::indent; +/// +/// assert_eq!(indent(" +/// Foo +/// Bar +/// ", " "), " +/// Foo +/// Bar +/// "); +/// ``` +/// +/// Empty lines (lines consisting only of whitespace) are not indented +/// and the whitespace is replaced by a single newline (`\n`): +/// +/// ``` +/// use textwrap::indent; +/// +/// assert_eq!(indent(" +/// Foo +/// +/// Bar +/// \t +/// Baz +/// ", "->"), " +/// ->Foo +/// +/// ->Bar +/// +/// ->Baz +/// "); +/// ``` +/// +/// Leading and trailing whitespace on non-empty lines is kept +/// unchanged: +/// +/// ``` +/// use textwrap::indent; +/// +/// assert_eq!(indent(" \t Foo ", "->"), "-> \t Foo \n"); +/// ``` +pub fn indent(s: &str, prefix: &str) -> String { + let mut result = String::new(); + for line in s.lines() { + if line.chars().any(|c| !c.is_whitespace()) { + result.push_str(prefix); + result.push_str(line); + } + result.push('\n'); + } + result +} + +/// Removes common leading whitespace from each line. +/// +/// This function will look at each non-empty line and determine the +/// maximum amount of whitespace that can be removed from all lines: +/// +/// ``` +/// use textwrap::dedent; +/// +/// assert_eq!(dedent(" +/// 1st line +/// 2nd line +/// 3rd line +/// "), " +/// 1st line +/// 2nd line +/// 3rd line +/// "); +/// ``` +pub fn dedent(s: &str) -> String { + let mut prefix = ""; + let mut lines = s.lines(); + + // We first search for a non-empty line to find a prefix. + for line in &mut lines { + let mut whitespace_idx = line.len(); + for (idx, ch) in line.char_indices() { + if !ch.is_whitespace() { + whitespace_idx = idx; + break; + } + } + + // Check if the line had anything but whitespace + if whitespace_idx < line.len() { + prefix = &line[..whitespace_idx]; + break; + } + } + + // We then continue looking through the remaining lines to + // possibly shorten the prefix. + for line in &mut lines { + let mut whitespace_idx = line.len(); + for ((idx, a), b) in line.char_indices().zip(prefix.chars()) { + if a != b { + whitespace_idx = idx; + break; + } + } + + // Check if the line had anything but whitespace and if we + // have found a shorter prefix + if whitespace_idx < line.len() && whitespace_idx < prefix.len() { + prefix = &line[..whitespace_idx]; + } + } + + // We now go over the lines a second time to build the result. + let mut result = String::new(); + for line in s.lines() { + if line.starts_with(&prefix) && line.chars().any(|c| !c.is_whitespace()) { + let (_, tail) = line.split_at(prefix.len()); + result.push_str(tail); + } + result.push('\n'); + } + + if result.ends_with('\n') && !s.ends_with('\n') { + let new_len = result.len() - 1; + result.truncate(new_len); + } + + result +} + +#[cfg(test)] +mod tests { + use super::*; + + /// Add newlines. Ensures that the final line in the vector also + /// has a newline. + fn add_nl(lines: &[&str]) -> String { + lines.join("\n") + "\n" + } + + #[test] + fn indent_empty() { + assert_eq!(indent("\n", " "), "\n"); + } + + #[test] + #[cfg_attr(rustfmt, rustfmt_skip)] + fn indent_nonempty() { + let x = vec![" foo", + "bar", + " baz"]; + let y = vec!["// foo", + "//bar", + "// baz"]; + assert_eq!(indent(&add_nl(&x), "//"), add_nl(&y)); + } + + #[test] + #[cfg_attr(rustfmt, rustfmt_skip)] + fn indent_empty_line() { + let x = vec![" foo", + "bar", + "", + " baz"]; + let y = vec!["// foo", + "//bar", + "", + "// baz"]; + assert_eq!(indent(&add_nl(&x), "//"), add_nl(&y)); + } + + #[test] + fn dedent_empty() { + assert_eq!(dedent(""), ""); + } + + #[test] + #[cfg_attr(rustfmt, rustfmt_skip)] + fn dedent_multi_line() { + let x = vec![" foo", + " bar", + " baz"]; + let y = vec![" foo", + "bar", + " baz"]; + assert_eq!(dedent(&add_nl(&x)), add_nl(&y)); + } + + #[test] + #[cfg_attr(rustfmt, rustfmt_skip)] + fn dedent_empty_line() { + let x = vec![" foo", + " bar", + " ", + " baz"]; + let y = vec![" foo", + "bar", + "", + " baz"]; + assert_eq!(dedent(&add_nl(&x)), add_nl(&y)); + } + + #[test] + #[cfg_attr(rustfmt, rustfmt_skip)] + fn dedent_blank_line() { + let x = vec![" foo", + "", + " bar", + " foo", + " bar", + " baz"]; + let y = vec!["foo", + "", + " bar", + " foo", + " bar", + " baz"]; + assert_eq!(dedent(&add_nl(&x)), add_nl(&y)); + } + + #[test] + #[cfg_attr(rustfmt, rustfmt_skip)] + fn dedent_whitespace_line() { + let x = vec![" foo", + " ", + " bar", + " foo", + " bar", + " baz"]; + let y = vec!["foo", + "", + " bar", + " foo", + " bar", + " baz"]; + assert_eq!(dedent(&add_nl(&x)), add_nl(&y)); + } + + #[test] + #[cfg_attr(rustfmt, rustfmt_skip)] + fn dedent_mixed_whitespace() { + let x = vec!["\tfoo", + " bar"]; + let y = vec!["\tfoo", + " bar"]; + assert_eq!(dedent(&add_nl(&x)), add_nl(&y)); + } + + #[test] + #[cfg_attr(rustfmt, rustfmt_skip)] + fn dedent_tabbed_whitespace() { + let x = vec!["\t\tfoo", + "\t\t\tbar"]; + let y = vec!["foo", + "\tbar"]; + assert_eq!(dedent(&add_nl(&x)), add_nl(&y)); + } + + #[test] + #[cfg_attr(rustfmt, rustfmt_skip)] + fn dedent_mixed_tabbed_whitespace() { + let x = vec!["\t \tfoo", + "\t \t\tbar"]; + let y = vec!["foo", + "\tbar"]; + assert_eq!(dedent(&add_nl(&x)), add_nl(&y)); + } + + #[test] + #[cfg_attr(rustfmt, rustfmt_skip)] + fn dedent_mixed_tabbed_whitespace2() { + let x = vec!["\t \tfoo", + "\t \tbar"]; + let y = vec!["\tfoo", + " \tbar"]; + assert_eq!(dedent(&add_nl(&x)), add_nl(&y)); + } + + #[test] + #[cfg_attr(rustfmt, rustfmt_skip)] + fn dedent_preserve_no_terminating_newline() { + let x = vec![" foo", + " bar"].join("\n"); + let y = vec!["foo", + " bar"].join("\n"); + assert_eq!(dedent(&x), y); + } +} diff --git a/textwrap/src/lib.rs b/textwrap/src/lib.rs new file mode 100644 index 0000000..2f82325 --- /dev/null +++ b/textwrap/src/lib.rs @@ -0,0 +1,987 @@ +//! `textwrap` provides functions for word wrapping and filling text. +//! +//! Wrapping text can be very useful in commandline programs where you +//! want to format dynamic output nicely so it looks good in a +//! terminal. A quick example: +//! +//! ```no_run +//! extern crate textwrap; +//! use textwrap::fill; +//! +//! fn main() { +//! let text = "textwrap: a small library for wrapping text."; +//! println!("{}", fill(text, 18)); +//! } +//! ``` +//! +//! This will display the following output: +//! +//! ```text +//! textwrap: a small +//! library for +//! wrapping text. +//! ``` +//! +//! # Displayed Width vs Byte Size +//! +//! To word wrap text, one must know the width of each word so one can +//! know when to break lines. This library measures the width of text +//! using the [displayed width][unicode-width], not the size in bytes. +//! +//! This is important for non-ASCII text. ASCII characters such as `a` +//! and `!` are simple and take up one column each. This means that +//! the displayed width is equal to the string length in bytes. +//! However, non-ASCII characters and symbols take up more than one +//! byte when UTF-8 encoded: `é` is `0xc3 0xa9` (two bytes) and `⚙` is +//! `0xe2 0x9a 0x99` (three bytes) in UTF-8, respectively. +//! +//! This is why we take care to use the displayed width instead of the +//! byte count when computing line lengths. All functions in this +//! library handle Unicode characters like this. +//! +//! [unicode-width]: https://docs.rs/unicode-width/ + +#![doc(html_root_url = "https://docs.rs/textwrap/0.11.0")] +#![deny(missing_docs)] +#![deny(missing_debug_implementations)] + +#[cfg(feature = "hyphenation")] +extern crate hyphenation; +#[cfg(feature = "term_size")] +extern crate term_size; +extern crate unicode_width; + +use std::borrow::Cow; +use std::str::CharIndices; + +use unicode_width::UnicodeWidthChar; +use unicode_width::UnicodeWidthStr; + +/// A non-breaking space. +const NBSP: char = '\u{a0}'; + +mod indentation; +pub use indentation::dedent; +pub use indentation::indent; + +mod splitting; +pub use splitting::{HyphenSplitter, NoHyphenation, WordSplitter}; + +/// A Wrapper holds settings for wrapping and filling text. Use it +/// when the convenience [`wrap_iter`], [`wrap`] and [`fill`] functions +/// are not flexible enough. +/// +/// [`wrap_iter`]: fn.wrap_iter.html +/// [`wrap`]: fn.wrap.html +/// [`fill`]: fn.fill.html +/// +/// The algorithm used by the `WrapIter` iterator (returned from the +/// `wrap_iter` method) works by doing successive partial scans over +/// words in the input string (where each single scan yields a single +/// line) so that the overall time and memory complexity is O(*n*) where +/// *n* is the length of the input string. +#[derive(Clone, Debug)] +pub struct Wrapper<'a, S: WordSplitter> { + /// The width in columns at which the text will be wrapped. + pub width: usize, + /// Indentation used for the first line of output. + pub initial_indent: &'a str, + /// Indentation used for subsequent lines of output. + pub subsequent_indent: &'a str, + /// Allow long words to be broken if they cannot fit on a line. + /// When set to `false`, some lines may be longer than + /// `self.width`. + pub break_words: bool, + /// The method for splitting words. If the `hyphenation` feature + /// is enabled, you can use a `hyphenation::Standard` dictionary + /// here to get language-aware hyphenation. + pub splitter: S, +} + +impl<'a> Wrapper<'a, HyphenSplitter> { + /// Create a new Wrapper for wrapping at the specified width. By + /// default, we allow words longer than `width` to be broken. A + /// [`HyphenSplitter`] will be used by default for splitting + /// words. See the [`WordSplitter`] trait for other options. + /// + /// [`HyphenSplitter`]: struct.HyphenSplitter.html + /// [`WordSplitter`]: trait.WordSplitter.html + pub fn new(width: usize) -> Wrapper<'a, HyphenSplitter> { + Wrapper::with_splitter(width, HyphenSplitter) + } + + /// Create a new Wrapper for wrapping text at the current terminal + /// width. If the terminal width cannot be determined (typically + /// because the standard input and output is not connected to a + /// terminal), a width of 80 characters will be used. Other + /// settings use the same defaults as `Wrapper::new`. + /// + /// Equivalent to: + /// + /// ```no_run + /// # #![allow(unused_variables)] + /// use textwrap::{Wrapper, termwidth}; + /// + /// let wrapper = Wrapper::new(termwidth()); + /// ``` + #[cfg(feature = "term_size")] + pub fn with_termwidth() -> Wrapper<'a, HyphenSplitter> { + Wrapper::new(termwidth()) + } +} + +impl<'a, S: WordSplitter> Wrapper<'a, S> { + /// Use the given [`WordSplitter`] to create a new Wrapper for + /// wrapping at the specified width. By default, we allow words + /// longer than `width` to be broken. + /// + /// [`WordSplitter`]: trait.WordSplitter.html + pub fn with_splitter(width: usize, splitter: S) -> Wrapper<'a, S> { + Wrapper { + width: width, + initial_indent: "", + subsequent_indent: "", + break_words: true, + splitter: splitter, + } + } + + /// Change [`self.initial_indent`]. The initial indentation is + /// used on the very first line of output. + /// + /// # Examples + /// + /// Classic paragraph indentation can be achieved by specifying an + /// initial indentation and wrapping each paragraph by itself: + /// + /// ```no_run + /// # #![allow(unused_variables)] + /// use textwrap::Wrapper; + /// + /// let wrapper = Wrapper::new(15).initial_indent(" "); + /// ``` + /// + /// [`self.initial_indent`]: #structfield.initial_indent + pub fn initial_indent(self, indent: &'a str) -> Wrapper<'a, S> { + Wrapper { + initial_indent: indent, + ..self + } + } + + /// Change [`self.subsequent_indent`]. The subsequent indentation + /// is used on lines following the first line of output. + /// + /// # Examples + /// + /// Combining initial and subsequent indentation lets you format a + /// single paragraph as a bullet list: + /// + /// ```no_run + /// # #![allow(unused_variables)] + /// use textwrap::Wrapper; + /// + /// let wrapper = Wrapper::new(15) + /// .initial_indent("* ") + /// .subsequent_indent(" "); + /// ``` + /// + /// [`self.subsequent_indent`]: #structfield.subsequent_indent + pub fn subsequent_indent(self, indent: &'a str) -> Wrapper<'a, S> { + Wrapper { + subsequent_indent: indent, + ..self + } + } + + /// Change [`self.break_words`]. This controls if words longer + /// than `self.width` can be broken, or if they will be left + /// sticking out into the right margin. + /// + /// [`self.break_words`]: #structfield.break_words + pub fn break_words(self, setting: bool) -> Wrapper<'a, S> { + Wrapper { + break_words: setting, + ..self + } + } + + /// Fill a line of text at `self.width` characters. Strings are + /// wrapped based on their displayed width, not their size in + /// bytes. + /// + /// The result is a string with newlines between each line. Use + /// the `wrap` method if you need access to the individual lines. + /// + /// # Complexities + /// + /// This method simply joins the lines produced by `wrap_iter`. As + /// such, it inherits the O(*n*) time and memory complexity where + /// *n* is the input string length. + /// + /// # Examples + /// + /// ``` + /// use textwrap::Wrapper; + /// + /// let wrapper = Wrapper::new(15); + /// assert_eq!(wrapper.fill("Memory safety without garbage collection."), + /// "Memory safety\nwithout garbage\ncollection."); + /// ``` + pub fn fill(&self, s: &str) -> String { + // This will avoid reallocation in simple cases (no + // indentation, no hyphenation). + let mut result = String::with_capacity(s.len()); + + for (i, line) in self.wrap_iter(s).enumerate() { + if i > 0 { + result.push('\n'); + } + result.push_str(&line); + } + + result + } + + /// Wrap a line of text at `self.width` characters. Strings are + /// wrapped based on their displayed width, not their size in + /// bytes. + /// + /// # Complexities + /// + /// This method simply collects the lines produced by `wrap_iter`. + /// As such, it inherits the O(*n*) overall time and memory + /// complexity where *n* is the input string length. + /// + /// # Examples + /// + /// ``` + /// use textwrap::Wrapper; + /// + /// let wrap15 = Wrapper::new(15); + /// assert_eq!(wrap15.wrap("Concurrency without data races."), + /// vec!["Concurrency", + /// "without data", + /// "races."]); + /// + /// let wrap20 = Wrapper::new(20); + /// assert_eq!(wrap20.wrap("Concurrency without data races."), + /// vec!["Concurrency without", + /// "data races."]); + /// ``` + /// + /// Notice that newlines in the input are preserved. This means + /// that they force a line break, regardless of how long the + /// current line is: + /// + /// ``` + /// use textwrap::Wrapper; + /// + /// let wrapper = Wrapper::new(40); + /// assert_eq!(wrapper.wrap("First line.\nSecond line."), + /// vec!["First line.", "Second line."]); + /// ``` + /// + pub fn wrap(&self, s: &'a str) -> Vec<Cow<'a, str>> { + self.wrap_iter(s).collect::<Vec<_>>() + } + + /// Lazily wrap a line of text at `self.width` characters. Strings + /// are wrapped based on their displayed width, not their size in + /// bytes. + /// + /// The [`WordSplitter`] stored in [`self.splitter`] is used + /// whenever when a word is too large to fit on the current line. + /// By changing the field, different hyphenation strategies can be + /// implemented. + /// + /// # Complexities + /// + /// This method returns a [`WrapIter`] iterator which borrows this + /// `Wrapper`. The algorithm used has a linear complexity, so + /// getting the next line from the iterator will take O(*w*) time, + /// where *w* is the wrapping width. Fully processing the iterator + /// will take O(*n*) time for an input string of length *n*. + /// + /// When no indentation is used, each line returned is a slice of + /// the input string and the memory overhead is thus constant. + /// Otherwise new memory is allocated for each line returned. + /// + /// # Examples + /// + /// ``` + /// use std::borrow::Cow; + /// use textwrap::Wrapper; + /// + /// let wrap20 = Wrapper::new(20); + /// let mut wrap20_iter = wrap20.wrap_iter("Zero-cost abstractions."); + /// assert_eq!(wrap20_iter.next(), Some(Cow::from("Zero-cost"))); + /// assert_eq!(wrap20_iter.next(), Some(Cow::from("abstractions."))); + /// assert_eq!(wrap20_iter.next(), None); + /// + /// let wrap25 = Wrapper::new(25); + /// let mut wrap25_iter = wrap25.wrap_iter("Zero-cost abstractions."); + /// assert_eq!(wrap25_iter.next(), Some(Cow::from("Zero-cost abstractions."))); + /// assert_eq!(wrap25_iter.next(), None); + /// ``` + /// + /// [`self.splitter`]: #structfield.splitter + /// [`WordSplitter`]: trait.WordSplitter.html + /// [`WrapIter`]: struct.WrapIter.html + pub fn wrap_iter<'w>(&'w self, s: &'a str) -> WrapIter<'w, 'a, S> { + WrapIter { + wrapper: self, + inner: WrapIterImpl::new(self, s), + } + } + + /// Lazily wrap a line of text at `self.width` characters. Strings + /// are wrapped based on their displayed width, not their size in + /// bytes. + /// + /// The [`WordSplitter`] stored in [`self.splitter`] is used + /// whenever when a word is too large to fit on the current line. + /// By changing the field, different hyphenation strategies can be + /// implemented. + /// + /// # Complexities + /// + /// This method consumes the `Wrapper` and returns a + /// [`IntoWrapIter`] iterator. Fully processing the iterator has + /// the same O(*n*) time complexity as [`wrap_iter`], where *n* is + /// the length of the input string. + /// + /// # Examples + /// + /// ``` + /// use std::borrow::Cow; + /// use textwrap::Wrapper; + /// + /// let wrap20 = Wrapper::new(20); + /// let mut wrap20_iter = wrap20.into_wrap_iter("Zero-cost abstractions."); + /// assert_eq!(wrap20_iter.next(), Some(Cow::from("Zero-cost"))); + /// assert_eq!(wrap20_iter.next(), Some(Cow::from("abstractions."))); + /// assert_eq!(wrap20_iter.next(), None); + /// ``` + /// + /// [`self.splitter`]: #structfield.splitter + /// [`WordSplitter`]: trait.WordSplitter.html + /// [`IntoWrapIter`]: struct.IntoWrapIter.html + /// [`wrap_iter`]: #method.wrap_iter + pub fn into_wrap_iter(self, s: &'a str) -> IntoWrapIter<'a, S> { + let inner = WrapIterImpl::new(&self, s); + + IntoWrapIter { + wrapper: self, + inner: inner, + } + } +} + +/// An iterator over the lines of the input string which owns a +/// `Wrapper`. An instance of `IntoWrapIter` is typically obtained +/// through either [`wrap_iter`] or [`Wrapper::into_wrap_iter`]. +/// +/// Each call of `.next()` method yields a line wrapped in `Some` if the +/// input hasn't been fully processed yet. Otherwise it returns `None`. +/// +/// [`wrap_iter`]: fn.wrap_iter.html +/// [`Wrapper::into_wrap_iter`]: struct.Wrapper.html#method.into_wrap_iter +#[derive(Debug)] +pub struct IntoWrapIter<'a, S: WordSplitter> { + wrapper: Wrapper<'a, S>, + inner: WrapIterImpl<'a>, +} + +impl<'a, S: WordSplitter> Iterator for IntoWrapIter<'a, S> { + type Item = Cow<'a, str>; + + fn next(&mut self) -> Option<Cow<'a, str>> { + self.inner.next(&self.wrapper) + } +} + +/// An iterator over the lines of the input string which borrows a +/// `Wrapper`. An instance of `WrapIter` is typically obtained +/// through the [`Wrapper::wrap_iter`] method. +/// +/// Each call of `.next()` method yields a line wrapped in `Some` if the +/// input hasn't been fully processed yet. Otherwise it returns `None`. +/// +/// [`Wrapper::wrap_iter`]: struct.Wrapper.html#method.wrap_iter +#[derive(Debug)] +pub struct WrapIter<'w, 'a: 'w, S: WordSplitter + 'w> { + wrapper: &'w Wrapper<'a, S>, + inner: WrapIterImpl<'a>, +} + +impl<'w, 'a: 'w, S: WordSplitter> Iterator for WrapIter<'w, 'a, S> { + type Item = Cow<'a, str>; + + fn next(&mut self) -> Option<Cow<'a, str>> { + self.inner.next(self.wrapper) + } +} + +/// Like `char::is_whitespace`, but non-breaking spaces don't count. +#[inline] +fn is_whitespace(ch: char) -> bool { + ch.is_whitespace() && ch != NBSP +} + +/// Common implementation details for `WrapIter` and `IntoWrapIter`. +#[derive(Debug)] +struct WrapIterImpl<'a> { + // String to wrap. + source: &'a str, + // CharIndices iterator over self.source. + char_indices: CharIndices<'a>, + // Byte index where the current line starts. + start: usize, + // Byte index of the last place where the string can be split. + split: usize, + // Size in bytes of the character at self.source[self.split]. + split_len: usize, + // Width of self.source[self.start..idx]. + line_width: usize, + // Width of self.source[self.start..self.split]. + line_width_at_split: usize, + // Tracking runs of whitespace characters. + in_whitespace: bool, + // Has iterator finished producing elements? + finished: bool, +} + +impl<'a> WrapIterImpl<'a> { + fn new<S: WordSplitter>(wrapper: &Wrapper<'a, S>, s: &'a str) -> WrapIterImpl<'a> { + WrapIterImpl { + source: s, + char_indices: s.char_indices(), + start: 0, + split: 0, + split_len: 0, + line_width: wrapper.initial_indent.width(), + line_width_at_split: wrapper.initial_indent.width(), + in_whitespace: false, + finished: false, + } + } + + fn create_result_line<S: WordSplitter>(&self, wrapper: &Wrapper<'a, S>) -> Cow<'a, str> { + if self.start == 0 { + Cow::from(wrapper.initial_indent) + } else { + Cow::from(wrapper.subsequent_indent) + } + } + + fn next<S: WordSplitter>(&mut self, wrapper: &Wrapper<'a, S>) -> Option<Cow<'a, str>> { + if self.finished { + return None; + } + + while let Some((idx, ch)) = self.char_indices.next() { + let char_width = ch.width().unwrap_or(0); + let char_len = ch.len_utf8(); + + if ch == '\n' { + self.split = idx; + self.split_len = char_len; + self.line_width_at_split = self.line_width; + self.in_whitespace = false; + + // If this is not the final line, return the current line. Otherwise, + // we will return the line with its line break after exiting the loop + if self.split + self.split_len < self.source.len() { + let mut line = self.create_result_line(wrapper); + line += &self.source[self.start..self.split]; + + self.start = self.split + self.split_len; + self.line_width = wrapper.subsequent_indent.width(); + + return Some(line); + } + } else if is_whitespace(ch) { + // Extend the previous split or create a new one. + if self.in_whitespace { + self.split_len += char_len; + } else { + self.split = idx; + self.split_len = char_len; + } + self.line_width_at_split = self.line_width + char_width; + self.in_whitespace = true; + } else if self.line_width + char_width > wrapper.width { + // There is no room for this character on the current + // line. Try to split the final word. + self.in_whitespace = false; + let remaining_text = &self.source[self.split + self.split_len..]; + let final_word = match remaining_text.find(is_whitespace) { + Some(i) => &remaining_text[..i], + None => remaining_text, + }; + + let mut hyphen = ""; + let splits = wrapper.splitter.split(final_word); + for &(head, hyp, _) in splits.iter().rev() { + if self.line_width_at_split + head.width() + hyp.width() <= wrapper.width { + // We can fit head into the current line. + // Advance the split point by the width of the + // whitespace and the head length. + self.split += self.split_len + head.len(); + self.split_len = 0; + hyphen = hyp; + break; + } + } + + if self.start >= self.split { + // The word is too big to fit on a single line, so we + // need to split it at the current index. + if wrapper.break_words { + // Break work at current index. + self.split = idx; + self.split_len = 0; + self.line_width_at_split = self.line_width; + } else { + // Add smallest split. + self.split = self.start + splits[0].0.len(); + self.split_len = 0; + self.line_width_at_split = self.line_width; + } + } + + if self.start < self.split { + let mut line = self.create_result_line(wrapper); + line += &self.source[self.start..self.split]; + line += hyphen; + + self.start = self.split + self.split_len; + self.line_width += wrapper.subsequent_indent.width(); + self.line_width -= self.line_width_at_split; + self.line_width += char_width; + + return Some(line); + } + } else { + self.in_whitespace = false; + } + self.line_width += char_width; + } + + self.finished = true; + + // Add final line. + if self.start < self.source.len() { + let mut line = self.create_result_line(wrapper); + line += &self.source[self.start..]; + return Some(line); + } + + None + } +} + +/// Return the current terminal width. If the terminal width cannot be +/// determined (typically because the standard output is not connected +/// to a terminal), a default width of 80 characters will be used. +/// +/// # Examples +/// +/// Create a `Wrapper` for the current terminal with a two column +/// margin: +/// +/// ```no_run +/// # #![allow(unused_variables)] +/// use textwrap::{Wrapper, NoHyphenation, termwidth}; +/// +/// let width = termwidth() - 4; // Two columns on each side. +/// let wrapper = Wrapper::with_splitter(width, NoHyphenation) +/// .initial_indent(" ") +/// .subsequent_indent(" "); +/// ``` +#[cfg(feature = "term_size")] +pub fn termwidth() -> usize { + term_size::dimensions_stdout().map_or(80, |(w, _)| w) +} + +/// Fill a line of text at `width` characters. Strings are wrapped +/// based on their displayed width, not their size in bytes. +/// +/// The result is a string with newlines between each line. Use +/// [`wrap`] if you need access to the individual lines or +/// [`wrap_iter`] for its iterator counterpart. +/// +/// ``` +/// use textwrap::fill; +/// +/// assert_eq!(fill("Memory safety without garbage collection.", 15), +/// "Memory safety\nwithout garbage\ncollection."); +/// ``` +/// +/// This function creates a Wrapper on the fly with default settings. +/// If you need to set a language corpus for automatic hyphenation, or +/// need to fill many strings, then it is suggested to create a Wrapper +/// and call its [`fill` method]. +/// +/// [`wrap`]: fn.wrap.html +/// [`wrap_iter`]: fn.wrap_iter.html +/// [`fill` method]: struct.Wrapper.html#method.fill +pub fn fill(s: &str, width: usize) -> String { + Wrapper::new(width).fill(s) +} + +/// Wrap a line of text at `width` characters. Strings are wrapped +/// based on their displayed width, not their size in bytes. +/// +/// This function creates a Wrapper on the fly with default settings. +/// If you need to set a language corpus for automatic hyphenation, or +/// need to wrap many strings, then it is suggested to create a Wrapper +/// and call its [`wrap` method]. +/// +/// The result is a vector of strings. Use [`wrap_iter`] if you need an +/// iterator version. +/// +/// # Examples +/// +/// ``` +/// use textwrap::wrap; +/// +/// assert_eq!(wrap("Concurrency without data races.", 15), +/// vec!["Concurrency", +/// "without data", +/// "races."]); +/// +/// assert_eq!(wrap("Concurrency without data races.", 20), +/// vec!["Concurrency without", +/// "data races."]); +/// ``` +/// +/// [`wrap_iter`]: fn.wrap_iter.html +/// [`wrap` method]: struct.Wrapper.html#method.wrap +pub fn wrap(s: &str, width: usize) -> Vec<Cow<str>> { + Wrapper::new(width).wrap(s) +} + +/// Lazily wrap a line of text at `width` characters. Strings are +/// wrapped based on their displayed width, not their size in bytes. +/// +/// This function creates a Wrapper on the fly with default settings. +/// It then calls the [`into_wrap_iter`] method. Hence, the return +/// value is an [`IntoWrapIter`], not a [`WrapIter`] as the function +/// name would otherwise suggest. +/// +/// If you need to set a language corpus for automatic hyphenation, or +/// need to wrap many strings, then it is suggested to create a Wrapper +/// and call its [`wrap_iter`] or [`into_wrap_iter`] methods. +/// +/// # Examples +/// +/// ``` +/// use std::borrow::Cow; +/// use textwrap::wrap_iter; +/// +/// let mut wrap20_iter = wrap_iter("Zero-cost abstractions.", 20); +/// assert_eq!(wrap20_iter.next(), Some(Cow::from("Zero-cost"))); +/// assert_eq!(wrap20_iter.next(), Some(Cow::from("abstractions."))); +/// assert_eq!(wrap20_iter.next(), None); +/// +/// let mut wrap25_iter = wrap_iter("Zero-cost abstractions.", 25); +/// assert_eq!(wrap25_iter.next(), Some(Cow::from("Zero-cost abstractions."))); +/// assert_eq!(wrap25_iter.next(), None); +/// ``` +/// +/// [`wrap_iter`]: struct.Wrapper.html#method.wrap_iter +/// [`into_wrap_iter`]: struct.Wrapper.html#method.into_wrap_iter +/// [`IntoWrapIter`]: struct.IntoWrapIter.html +/// [`WrapIter`]: struct.WrapIter.html +pub fn wrap_iter(s: &str, width: usize) -> IntoWrapIter<HyphenSplitter> { + Wrapper::new(width).into_wrap_iter(s) +} + +#[cfg(test)] +mod tests { + #[cfg(feature = "hyphenation")] + extern crate hyphenation; + + use super::*; + #[cfg(feature = "hyphenation")] + use hyphenation::{Language, Load, Standard}; + + #[test] + fn no_wrap() { + assert_eq!(wrap("foo", 10), vec!["foo"]); + } + + #[test] + fn simple() { + assert_eq!(wrap("foo bar baz", 5), vec!["foo", "bar", "baz"]); + } + + #[test] + fn multi_word_on_line() { + assert_eq!(wrap("foo bar baz", 10), vec!["foo bar", "baz"]); + } + + #[test] + fn long_word() { + assert_eq!(wrap("foo", 0), vec!["f", "o", "o"]); + } + + #[test] + fn long_words() { + assert_eq!(wrap("foo bar", 0), vec!["f", "o", "o", "b", "a", "r"]); + } + + #[test] + fn max_width() { + assert_eq!(wrap("foo bar", usize::max_value()), vec!["foo bar"]); + } + + #[test] + fn leading_whitespace() { + assert_eq!(wrap(" foo bar", 6), vec![" foo", "bar"]); + } + + #[test] + fn trailing_whitespace() { + assert_eq!(wrap("foo bar ", 6), vec!["foo", "bar "]); + } + + #[test] + fn interior_whitespace() { + assert_eq!(wrap("foo: bar baz", 10), vec!["foo: bar", "baz"]); + } + + #[test] + fn extra_whitespace_start_of_line() { + // Whitespace is only significant inside a line. After a line + // gets too long and is broken, the first word starts in + // column zero and is not indented. The line before might end + // up with trailing whitespace. + assert_eq!(wrap("foo bar", 5), vec!["foo", "bar"]); + } + + #[test] + fn issue_99() { + // We did not reset the in_whitespace flag correctly and did + // not handle single-character words after a line break. + assert_eq!( + wrap("aaabbbccc x yyyzzzwww", 9), + vec!["aaabbbccc", "x", "yyyzzzwww"] + ); + } + + #[test] + fn issue_129() { + // The dash is an em-dash which takes up four bytes. We used + // to panic since we tried to index into the character. + assert_eq!(wrap("x – x", 1), vec!["x", "–", "x"]); + } + + #[test] + fn wide_character_handling() { + assert_eq!(wrap("Hello, World!", 15), vec!["Hello, World!"]); + assert_eq!( + wrap("Hello, World!", 15), + vec!["Hello,", "World!"] + ); + } + + #[test] + fn empty_input_not_indented() { + let wrapper = Wrapper::new(10).initial_indent("!!!"); + assert_eq!(wrapper.fill(""), ""); + } + + #[test] + fn indent_single_line() { + let wrapper = Wrapper::new(10).initial_indent(">>>"); // No trailing space + assert_eq!(wrapper.fill("foo"), ">>>foo"); + } + + #[test] + fn indent_multiple_lines() { + let wrapper = Wrapper::new(6).initial_indent("* ").subsequent_indent(" "); + assert_eq!(wrapper.wrap("foo bar baz"), vec!["* foo", " bar", " baz"]); + } + + #[test] + fn indent_break_words() { + let wrapper = Wrapper::new(5).initial_indent("* ").subsequent_indent(" "); + assert_eq!(wrapper.wrap("foobarbaz"), vec!["* foo", " bar", " baz"]); + } + + #[test] + fn hyphens() { + assert_eq!(wrap("foo-bar", 5), vec!["foo-", "bar"]); + } + + #[test] + fn trailing_hyphen() { + let wrapper = Wrapper::new(5).break_words(false); + assert_eq!(wrapper.wrap("foobar-"), vec!["foobar-"]); + } + + #[test] + fn multiple_hyphens() { + assert_eq!(wrap("foo-bar-baz", 5), vec!["foo-", "bar-", "baz"]); + } + + #[test] + fn hyphens_flag() { + let wrapper = Wrapper::new(5).break_words(false); + assert_eq!( + wrapper.wrap("The --foo-bar flag."), + vec!["The", "--foo-", "bar", "flag."] + ); + } + + #[test] + fn repeated_hyphens() { + let wrapper = Wrapper::new(4).break_words(false); + assert_eq!(wrapper.wrap("foo--bar"), vec!["foo--bar"]); + } + + #[test] + fn hyphens_alphanumeric() { + assert_eq!(wrap("Na2-CH4", 5), vec!["Na2-", "CH4"]); + } + + #[test] + fn hyphens_non_alphanumeric() { + let wrapper = Wrapper::new(5).break_words(false); + assert_eq!(wrapper.wrap("foo(-)bar"), vec!["foo(-)bar"]); + } + + #[test] + fn multiple_splits() { + assert_eq!(wrap("foo-bar-baz", 9), vec!["foo-bar-", "baz"]); + } + + #[test] + fn forced_split() { + let wrapper = Wrapper::new(5).break_words(false); + assert_eq!(wrapper.wrap("foobar-baz"), vec!["foobar-", "baz"]); + } + + #[test] + fn no_hyphenation() { + let wrapper = Wrapper::with_splitter(8, NoHyphenation); + assert_eq!(wrapper.wrap("foo bar-baz"), vec!["foo", "bar-baz"]); + } + + #[test] + #[cfg(feature = "hyphenation")] + fn auto_hyphenation() { + let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap(); + let wrapper = Wrapper::new(10); + assert_eq!( + wrapper.wrap("Internationalization"), + vec!["Internatio", "nalization"] + ); + + let wrapper = Wrapper::with_splitter(10, dictionary); + assert_eq!( + wrapper.wrap("Internationalization"), + vec!["Interna-", "tionaliza-", "tion"] + ); + } + + #[test] + #[cfg(feature = "hyphenation")] + fn split_len_hyphenation() { + // Test that hyphenation takes the width of the wihtespace + // into account. + let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap(); + let wrapper = Wrapper::with_splitter(15, dictionary); + assert_eq!( + wrapper.wrap("garbage collection"), + vec!["garbage col-", "lection"] + ); + } + + #[test] + #[cfg(feature = "hyphenation")] + fn borrowed_lines() { + // Lines that end with an extra hyphen are owned, the final + // line is borrowed. + use std::borrow::Cow::{Borrowed, Owned}; + let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap(); + let wrapper = Wrapper::with_splitter(10, dictionary); + let lines = wrapper.wrap("Internationalization"); + if let Borrowed(s) = lines[0] { + assert!(false, "should not have been borrowed: {:?}", s); + } + if let Borrowed(s) = lines[1] { + assert!(false, "should not have been borrowed: {:?}", s); + } + if let Owned(ref s) = lines[2] { + assert!(false, "should not have been owned: {:?}", s); + } + } + + #[test] + #[cfg(feature = "hyphenation")] + fn auto_hyphenation_with_hyphen() { + let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap(); + let wrapper = Wrapper::new(8).break_words(false); + assert_eq!(wrapper.wrap("over-caffinated"), vec!["over-", "caffinated"]); + + let wrapper = Wrapper::with_splitter(8, dictionary).break_words(false); + assert_eq!( + wrapper.wrap("over-caffinated"), + vec!["over-", "caffi-", "nated"] + ); + } + + #[test] + fn break_words() { + assert_eq!(wrap("foobarbaz", 3), vec!["foo", "bar", "baz"]); + } + + #[test] + fn break_words_wide_characters() { + assert_eq!(wrap("Hello", 5), vec!["He", "ll", "o"]); + } + + #[test] + fn break_words_zero_width() { + assert_eq!(wrap("foobar", 0), vec!["f", "o", "o", "b", "a", "r"]); + } + + #[test] + fn break_words_line_breaks() { + assert_eq!(fill("ab\ncdefghijkl", 5), "ab\ncdefg\nhijkl"); + assert_eq!(fill("abcdefgh\nijkl", 5), "abcde\nfgh\nijkl"); + } + + #[test] + fn preserve_line_breaks() { + assert_eq!(fill("test\n", 11), "test\n"); + assert_eq!(fill("test\n\na\n\n", 11), "test\n\na\n\n"); + assert_eq!(fill("1 3 5 7\n1 3 5 7", 7), "1 3 5 7\n1 3 5 7"); + } + + #[test] + fn wrap_preserve_line_breaks() { + assert_eq!(fill("1 3 5 7\n1 3 5 7", 5), "1 3 5\n7\n1 3 5\n7"); + } + + #[test] + fn non_breaking_space() { + let wrapper = Wrapper::new(5).break_words(false); + assert_eq!(wrapper.fill("foo bar baz"), "foo bar baz"); + } + + #[test] + fn non_breaking_hyphen() { + let wrapper = Wrapper::new(5).break_words(false); + assert_eq!(wrapper.fill("foo‑bar‑baz"), "foo‑bar‑baz"); + } + + #[test] + fn fill_simple() { + assert_eq!(fill("foo bar baz", 10), "foo bar\nbaz"); + } +} diff --git a/textwrap/src/splitting.rs b/textwrap/src/splitting.rs new file mode 100644 index 0000000..f6b65af --- /dev/null +++ b/textwrap/src/splitting.rs @@ -0,0 +1,139 @@ +//! Word splitting functionality. +//! +//! To wrap text into lines, long words sometimes need to be split +//! across lines. The [`WordSplitter`] trait defines this +//! functionality. [`HyphenSplitter`] is the default implementation of +//! this treat: it will simply split words on existing hyphens. + +#[cfg(feature = "hyphenation")] +use hyphenation::{Hyphenator, Standard}; + +/// An interface for splitting words. +/// +/// When the [`wrap_iter`] method will try to fit text into a line, it +/// will eventually find a word that it too large the current text +/// width. It will then call the currently configured `WordSplitter` to +/// have it attempt to split the word into smaller parts. This trait +/// describes that functionality via the [`split`] method. +/// +/// If the `textwrap` crate has been compiled with the `hyphenation` +/// feature enabled, you will find an implementation of `WordSplitter` +/// by the `hyphenation::language::Corpus` struct. Use this struct for +/// language-aware hyphenation. See the [`hyphenation` documentation] +/// for details. +/// +/// [`wrap_iter`]: ../struct.Wrapper.html#method.wrap_iter +/// [`split`]: #tymethod.split +/// [`hyphenation` documentation]: https://docs.rs/hyphenation/ +pub trait WordSplitter { + /// Return all possible splits of word. Each split is a triple + /// with a head, a hyphen, and a tail where `head + &hyphen + + /// &tail == word`. The hyphen can be empty if there is already a + /// hyphen in the head. + /// + /// The splits should go from smallest to longest and should + /// include no split at all. So the word "technology" could be + /// split into + /// + /// ```no_run + /// vec![("tech", "-", "nology"), + /// ("technol", "-", "ogy"), + /// ("technolo", "-", "gy"), + /// ("technology", "", "")]; + /// ``` + fn split<'w>(&self, word: &'w str) -> Vec<(&'w str, &'w str, &'w str)>; +} + +/// Use this as a [`Wrapper.splitter`] to avoid any kind of +/// hyphenation: +/// +/// ``` +/// use textwrap::{Wrapper, NoHyphenation}; +/// +/// let wrapper = Wrapper::with_splitter(8, NoHyphenation); +/// assert_eq!(wrapper.wrap("foo bar-baz"), vec!["foo", "bar-baz"]); +/// ``` +/// +/// [`Wrapper.splitter`]: ../struct.Wrapper.html#structfield.splitter +#[derive(Clone, Debug)] +pub struct NoHyphenation; + +/// `NoHyphenation` implements `WordSplitter` by not splitting the +/// word at all. +impl WordSplitter for NoHyphenation { + fn split<'w>(&self, word: &'w str) -> Vec<(&'w str, &'w str, &'w str)> { + vec![(word, "", "")] + } +} + +/// Simple and default way to split words: splitting on existing +/// hyphens only. +/// +/// You probably don't need to use this type since it's already used +/// by default by `Wrapper::new`. +#[derive(Clone, Debug)] +pub struct HyphenSplitter; + +/// `HyphenSplitter` is the default `WordSplitter` used by +/// `Wrapper::new`. It will split words on any existing hyphens in the +/// word. +/// +/// It will only use hyphens that are surrounded by alphanumeric +/// characters, which prevents a word like "--foo-bar" from being +/// split on the first or second hyphen. +impl WordSplitter for HyphenSplitter { + fn split<'w>(&self, word: &'w str) -> Vec<(&'w str, &'w str, &'w str)> { + let mut triples = Vec::new(); + // Split on hyphens, smallest split first. We only use hyphens + // that are surrounded by alphanumeric characters. This is to + // avoid splitting on repeated hyphens, such as those found in + // --foo-bar. + let mut char_indices = word.char_indices(); + // Early return if the word is empty. + let mut prev = match char_indices.next() { + None => return vec![(word, "", "")], + Some((_, ch)) => ch, + }; + + // Find current word, or return early if the word only has a + // single character. + let (mut idx, mut cur) = match char_indices.next() { + None => return vec![(word, "", "")], + Some((idx, cur)) => (idx, cur), + }; + + for (i, next) in char_indices { + if prev.is_alphanumeric() && cur == '-' && next.is_alphanumeric() { + let (head, tail) = word.split_at(idx + 1); + triples.push((head, "", tail)); + } + prev = cur; + idx = i; + cur = next; + } + + // Finally option is no split at all. + triples.push((word, "", "")); + + triples + } +} + +/// A hyphenation dictionary can be used to do language-specific +/// hyphenation using patterns from the hyphenation crate. +#[cfg(feature = "hyphenation")] +impl WordSplitter for Standard { + fn split<'w>(&self, word: &'w str) -> Vec<(&'w str, &'w str, &'w str)> { + // Find splits based on language dictionary. + let mut triples = Vec::new(); + for n in self.hyphenate(word).breaks { + let (head, tail) = word.split_at(n); + let hyphen = if head.ends_with('-') { "" } else { "-" }; + triples.push((head, hyphen, tail)); + } + // Finally option is no split at all. + triples.push((word, "", "")); + + triples + } +} diff --git a/textwrap/tests/version-numbers.rs b/textwrap/tests/version-numbers.rs new file mode 100644 index 0000000..85c52e3 --- /dev/null +++ b/textwrap/tests/version-numbers.rs @@ -0,0 +1,17 @@ +#[macro_use] +extern crate version_sync; + +#[test] +fn test_readme_deps() { + assert_markdown_deps_updated!("README.md"); +} + +#[test] +fn test_readme_changelog() { + assert_contains_regex!("README.md", r"^### Version {version} — .* \d\d?.., 20\d\d$"); +} + +#[test] +fn test_html_root_url() { + assert_html_root_url_updated!("src/lib.rs"); +} |