aboutsummaryrefslogtreecommitdiff
path: root/textwrap/src/splitting.rs
diff options
context:
space:
mode:
authorRobin Krahl <robin.krahl@ireas.org>2020-01-07 11:18:04 +0000
committerDaniel Mueller <deso@posteo.net>2020-01-08 09:20:25 -0800
commit5e20a29b4fdc8a2d442d1093681b396dcb4b816b (patch)
tree55ab083fa8999d2ccbb5e921c1ffe52560dca152 /textwrap/src/splitting.rs
parent203e691f46d591a2cc8acdfd850fa9f5b0fb8a98 (diff)
downloadnitrocli-5e20a29b4fdc8a2d442d1093681b396dcb4b816b.tar.gz
nitrocli-5e20a29b4fdc8a2d442d1093681b396dcb4b816b.tar.bz2
Add structopt dependency in version 0.3.7
This patch series replaces argparse with structopt in the argument handling code. As a first step, we need structopt as a dependency. Import subrepo structopt/:structopt at efbdda4753592e27bc430fb01f7b9650b2f3174d Import subrepo bitflags/:bitflags at 30668016aca6bd3b02c766e8347e0b4080d4c296 Import subrepo clap/:clap at 784524f7eb193e35f81082cc69454c8c21b948f7 Import subrepo heck/:heck at 093d56fbf001e1506e56dbfa38631d99b1066df1 Import subrepo proc-macro-error/:proc-macro-error at 6c4cfe79a622c5de8ae68557993542be46eacae2 Import subrepo proc-macro2/:proc-macro2 at d5d48eddca4566e5438e8a2cbed4a74e049544de Import subrepo quote/:quote at 727436c6c137b20f0f34dde5d8fda2679b9747ad Import subrepo rustversion/:rustversion at 0c5663313516263059ce9059ef81fc7a1cf655ca Import subrepo syn-mid/:syn-mid at 5d3d85414a9e6674e1857ec22a87b96e04a6851a Import subrepo syn/:syn at e87c27e87f6f4ef8919d0372bdb056d53ef0d8f3 Import subrepo textwrap/:textwrap at abcd618beae3f74841032aa5b53c1086b0a57ca2 Import subrepo unicode-segmentation/:unicode-segmentation at 637c9874c4fe0c205ff27787faf150a40295c6c3 Import subrepo unicode-width/:unicode-width at 3033826f8bf05e82724140a981d5941e48fce393 Import subrepo unicode-xid/:unicode-xid at 4baae9fffb156ba229665b972a9cd5991787ceb7
Diffstat (limited to 'textwrap/src/splitting.rs')
-rw-r--r--textwrap/src/splitting.rs139
1 files changed, 139 insertions, 0 deletions
diff --git a/textwrap/src/splitting.rs b/textwrap/src/splitting.rs
new file mode 100644
index 0000000..f6b65af
--- /dev/null
+++ b/textwrap/src/splitting.rs
@@ -0,0 +1,139 @@
+//! Word splitting functionality.
+//!
+//! To wrap text into lines, long words sometimes need to be split
+//! across lines. The [`WordSplitter`] trait defines this
+//! functionality. [`HyphenSplitter`] is the default implementation of
+//! this treat: it will simply split words on existing hyphens.
+
+#[cfg(feature = "hyphenation")]
+use hyphenation::{Hyphenator, Standard};
+
+/// An interface for splitting words.
+///
+/// When the [`wrap_iter`] method will try to fit text into a line, it
+/// will eventually find a word that it too large the current text
+/// width. It will then call the currently configured `WordSplitter` to
+/// have it attempt to split the word into smaller parts. This trait
+/// describes that functionality via the [`split`] method.
+///
+/// If the `textwrap` crate has been compiled with the `hyphenation`
+/// feature enabled, you will find an implementation of `WordSplitter`
+/// by the `hyphenation::language::Corpus` struct. Use this struct for
+/// language-aware hyphenation. See the [`hyphenation` documentation]
+/// for details.
+///
+/// [`wrap_iter`]: ../struct.Wrapper.html#method.wrap_iter
+/// [`split`]: #tymethod.split
+/// [`hyphenation` documentation]: https://docs.rs/hyphenation/
+pub trait WordSplitter {
+ /// Return all possible splits of word. Each split is a triple
+ /// with a head, a hyphen, and a tail where `head + &hyphen +
+ /// &tail == word`. The hyphen can be empty if there is already a
+ /// hyphen in the head.
+ ///
+ /// The splits should go from smallest to longest and should
+ /// include no split at all. So the word "technology" could be
+ /// split into
+ ///
+ /// ```no_run
+ /// vec![("tech", "-", "nology"),
+ /// ("technol", "-", "ogy"),
+ /// ("technolo", "-", "gy"),
+ /// ("technology", "", "")];
+ /// ```
+ fn split<'w>(&self, word: &'w str) -> Vec<(&'w str, &'w str, &'w str)>;
+}
+
+/// Use this as a [`Wrapper.splitter`] to avoid any kind of
+/// hyphenation:
+///
+/// ```
+/// use textwrap::{Wrapper, NoHyphenation};
+///
+/// let wrapper = Wrapper::with_splitter(8, NoHyphenation);
+/// assert_eq!(wrapper.wrap("foo bar-baz"), vec!["foo", "bar-baz"]);
+/// ```
+///
+/// [`Wrapper.splitter`]: ../struct.Wrapper.html#structfield.splitter
+#[derive(Clone, Debug)]
+pub struct NoHyphenation;
+
+/// `NoHyphenation` implements `WordSplitter` by not splitting the
+/// word at all.
+impl WordSplitter for NoHyphenation {
+ fn split<'w>(&self, word: &'w str) -> Vec<(&'w str, &'w str, &'w str)> {
+ vec![(word, "", "")]
+ }
+}
+
+/// Simple and default way to split words: splitting on existing
+/// hyphens only.
+///
+/// You probably don't need to use this type since it's already used
+/// by default by `Wrapper::new`.
+#[derive(Clone, Debug)]
+pub struct HyphenSplitter;
+
+/// `HyphenSplitter` is the default `WordSplitter` used by
+/// `Wrapper::new`. It will split words on any existing hyphens in the
+/// word.
+///
+/// It will only use hyphens that are surrounded by alphanumeric
+/// characters, which prevents a word like "--foo-bar" from being
+/// split on the first or second hyphen.
+impl WordSplitter for HyphenSplitter {
+ fn split<'w>(&self, word: &'w str) -> Vec<(&'w str, &'w str, &'w str)> {
+ let mut triples = Vec::new();
+ // Split on hyphens, smallest split first. We only use hyphens
+ // that are surrounded by alphanumeric characters. This is to
+ // avoid splitting on repeated hyphens, such as those found in
+ // --foo-bar.
+ let mut char_indices = word.char_indices();
+ // Early return if the word is empty.
+ let mut prev = match char_indices.next() {
+ None => return vec![(word, "", "")],
+ Some((_, ch)) => ch,
+ };
+
+ // Find current word, or return early if the word only has a
+ // single character.
+ let (mut idx, mut cur) = match char_indices.next() {
+ None => return vec![(word, "", "")],
+ Some((idx, cur)) => (idx, cur),
+ };
+
+ for (i, next) in char_indices {
+ if prev.is_alphanumeric() && cur == '-' && next.is_alphanumeric() {
+ let (head, tail) = word.split_at(idx + 1);
+ triples.push((head, "", tail));
+ }
+ prev = cur;
+ idx = i;
+ cur = next;
+ }
+
+ // Finally option is no split at all.
+ triples.push((word, "", ""));
+
+ triples
+ }
+}
+
+/// A hyphenation dictionary can be used to do language-specific
+/// hyphenation using patterns from the hyphenation crate.
+#[cfg(feature = "hyphenation")]
+impl WordSplitter for Standard {
+ fn split<'w>(&self, word: &'w str) -> Vec<(&'w str, &'w str, &'w str)> {
+ // Find splits based on language dictionary.
+ let mut triples = Vec::new();
+ for n in self.hyphenate(word).breaks {
+ let (head, tail) = word.split_at(n);
+ let hyphen = if head.ends_with('-') { "" } else { "-" };
+ triples.push((head, hyphen, tail));
+ }
+ // Finally option is no split at all.
+ triples.push((word, "", ""));
+
+ triples
+ }
+}