From 5e20a29b4fdc8a2d442d1093681b396dcb4b816b Mon Sep 17 00:00:00 2001 From: Robin Krahl Date: Tue, 7 Jan 2020 11:18:04 +0000 Subject: Add structopt dependency in version 0.3.7 This patch series replaces argparse with structopt in the argument handling code. As a first step, we need structopt as a dependency. Import subrepo structopt/:structopt at efbdda4753592e27bc430fb01f7b9650b2f3174d Import subrepo bitflags/:bitflags at 30668016aca6bd3b02c766e8347e0b4080d4c296 Import subrepo clap/:clap at 784524f7eb193e35f81082cc69454c8c21b948f7 Import subrepo heck/:heck at 093d56fbf001e1506e56dbfa38631d99b1066df1 Import subrepo proc-macro-error/:proc-macro-error at 6c4cfe79a622c5de8ae68557993542be46eacae2 Import subrepo proc-macro2/:proc-macro2 at d5d48eddca4566e5438e8a2cbed4a74e049544de Import subrepo quote/:quote at 727436c6c137b20f0f34dde5d8fda2679b9747ad Import subrepo rustversion/:rustversion at 0c5663313516263059ce9059ef81fc7a1cf655ca Import subrepo syn-mid/:syn-mid at 5d3d85414a9e6674e1857ec22a87b96e04a6851a Import subrepo syn/:syn at e87c27e87f6f4ef8919d0372bdb056d53ef0d8f3 Import subrepo textwrap/:textwrap at abcd618beae3f74841032aa5b53c1086b0a57ca2 Import subrepo unicode-segmentation/:unicode-segmentation at 637c9874c4fe0c205ff27787faf150a40295c6c3 Import subrepo unicode-width/:unicode-width at 3033826f8bf05e82724140a981d5941e48fce393 Import subrepo unicode-xid/:unicode-xid at 4baae9fffb156ba229665b972a9cd5991787ceb7 --- unicode-width/.gitignore | 3 + unicode-width/.travis.yml | 28 ++++ unicode-width/COPYRIGHT | 7 + unicode-width/Cargo.toml | 29 ++++ unicode-width/LICENSE-APACHE | 201 ++++++++++++++++++++++++ unicode-width/LICENSE-MIT | 25 +++ unicode-width/README.md | 58 +++++++ unicode-width/scripts/unicode.py | 321 +++++++++++++++++++++++++++++++++++++++ unicode-width/src/lib.rs | 131 ++++++++++++++++ unicode-width/src/tables.rs | 284 ++++++++++++++++++++++++++++++++++ unicode-width/src/tests.rs | 175 +++++++++++++++++++++ 11 files changed, 1262 insertions(+) create mode 100644 unicode-width/.gitignore create mode 100644 unicode-width/.travis.yml create mode 100644 unicode-width/COPYRIGHT create mode 100644 unicode-width/Cargo.toml create mode 100644 unicode-width/LICENSE-APACHE create mode 100644 unicode-width/LICENSE-MIT create mode 100644 unicode-width/README.md create mode 100755 unicode-width/scripts/unicode.py create mode 100644 unicode-width/src/lib.rs create mode 100644 unicode-width/src/tables.rs create mode 100644 unicode-width/src/tests.rs (limited to 'unicode-width') diff --git a/unicode-width/.gitignore b/unicode-width/.gitignore new file mode 100644 index 0000000..5cdcdba --- /dev/null +++ b/unicode-width/.gitignore @@ -0,0 +1,3 @@ +target +Cargo.lock +scripts/tmp diff --git a/unicode-width/.travis.yml b/unicode-width/.travis.yml new file mode 100644 index 0000000..64196fa --- /dev/null +++ b/unicode-width/.travis.yml @@ -0,0 +1,28 @@ +language: rust +rust: 'nightly' +sudo: false +script: + - cargo build --verbose --features bench + - cargo test --verbose --features bench + - cargo bench --verbose --features bench + - cargo clean + - cargo build --verbose + - cargo test --verbose +# next line is an ugly hack to fix an annoying bug where rustdoc tries to use the rustc_private unicode_width crate +# (there is probably a better fix than this) + - rm $(find /home/travis/.rustup -type f -name 'libunicode_width*') + - rustdoc --test README.md -L target/debug -L target/debug/deps + - cargo doc +after_success: | + [ $TRAVIS_BRANCH = master ] && + [ $TRAVIS_PULL_REQUEST = false ] && + echo '' > target/doc/index.html && + pip install ghp-import --user $USER && + $HOME/.local/bin/ghp-import -n target/doc && + git push -qf https://${TOKEN}@github.com/${TRAVIS_REPO_SLUG}.git gh-pages +env: + global: + secure: vHL3zrN8AF+H79jrB8OfzuPqsUHevo6ECzwqXPj2dMSqcSXEeCY/ENAfiyFg+oW8yEVP8X2BS1a/C9yvVQRLqLbm1HbZ/5vUpoggT9S0IhKqZMyAcLYXfIEUDMDQuaSdFndDaHvq8275ScgX1LRv1kcPjQoZHuaXWMH8y/Suvyo= +notifications: + email: + on_success: never diff --git a/unicode-width/COPYRIGHT b/unicode-width/COPYRIGHT new file mode 100644 index 0000000..b286ec1 --- /dev/null +++ b/unicode-width/COPYRIGHT @@ -0,0 +1,7 @@ +Licensed under the Apache License, Version 2.0 + or the MIT +license , +at your option. All files in the project carrying such +notice may not be copied, modified, or distributed except +according to those terms. diff --git a/unicode-width/Cargo.toml b/unicode-width/Cargo.toml new file mode 100644 index 0000000..35228f2 --- /dev/null +++ b/unicode-width/Cargo.toml @@ -0,0 +1,29 @@ +[package] + +name = "unicode-width" +version = "0.1.7" +authors = ["kwantam ", "Manish Goregaokar "] + +homepage = "https://github.com/unicode-rs/unicode-width" +repository = "https://github.com/unicode-rs/unicode-width" +documentation = "https://unicode-rs.github.io/unicode-width" +license = "MIT/Apache-2.0" +keywords = ["text", "width", "unicode"] +readme = "README.md" +description = """ +Determine displayed width of `char` and `str` types +according to Unicode Standard Annex #11 rules. +""" + +exclude = [ "target/*", "Cargo.lock" ] + +[dependencies] +std = { version = "1.0", package = "rustc-std-workspace-std", optional = true } +core = { version = "1.0", package = "rustc-std-workspace-core", optional = true } +compiler_builtins = { version = "0.1", optional = true } + +[features] +default = [] +no_std = [] +bench = [] +rustc-dep-of-std = ['std', 'core', 'compiler_builtins'] diff --git a/unicode-width/LICENSE-APACHE b/unicode-width/LICENSE-APACHE new file mode 100644 index 0000000..16fe87b --- /dev/null +++ b/unicode-width/LICENSE-APACHE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + +Copyright [yyyy] [name of copyright owner] + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/unicode-width/LICENSE-MIT b/unicode-width/LICENSE-MIT new file mode 100644 index 0000000..e69282e --- /dev/null +++ b/unicode-width/LICENSE-MIT @@ -0,0 +1,25 @@ +Copyright (c) 2015 The Rust Project Developers + +Permission is hereby granted, free of charge, to any +person obtaining a copy of this software and associated +documentation files (the "Software"), to deal in the +Software without restriction, including without +limitation the rights to use, copy, modify, merge, +publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software +is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice +shall be included in all copies or substantial portions +of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. diff --git a/unicode-width/README.md b/unicode-width/README.md new file mode 100644 index 0000000..595e163 --- /dev/null +++ b/unicode-width/README.md @@ -0,0 +1,58 @@ +# unicode-width + +Determine displayed width of `char` and `str` types according to +[Unicode Standard Annex #11][UAX11] rules. + +[UAX11]: http://www.unicode.org/reports/tr11/ + +[![Build Status](https://travis-ci.org/unicode-rs/unicode-width.svg)](https://travis-ci.org/unicode-rs/unicode-width) + +[Documentation](https://unicode-rs.github.io/unicode-width/unicode_width/index.html) + +```rust +extern crate unicode_width; + +use unicode_width::UnicodeWidthStr; + +fn main() { + let teststr = "Hello, world!"; + let width = UnicodeWidthStr::width(teststr); + println!("{}", teststr); + println!("The above string is {} columns wide.", width); + let width = teststr.width_cjk(); + println!("The above string is {} columns wide (CJK).", width); +} +``` + +**NOTE:** The computed width values may not match the actual rendered column +width. For example, the woman scientist emoji comprises of a woman emoji, a +zero-width joiner and a microscope emoji. + +```rust +extern crate unicode_width; +use unicode_width::UnicodeWidthStr; + +fn main() { + assert_eq!(UnicodeWidthStr::width("👩"), 2); // Woman + assert_eq!(UnicodeWidthStr::width("🔬"), 2); // Microscope + assert_eq!(UnicodeWidthStr::width("👩‍🔬"), 4); // Woman scientist +} +``` + +See [Unicode Standard Annex #11][UAX11] for precise details on what is and isn't +covered by this crate. + +## features + +unicode-width does not depend on libstd, so it can be used in crates +with the `#![no_std]` attribute. + +## crates.io + +You can use this package in your project by adding the following +to your `Cargo.toml`: + +```toml +[dependencies] +unicode-width = "0.1.7" +``` diff --git a/unicode-width/scripts/unicode.py b/unicode-width/scripts/unicode.py new file mode 100755 index 0000000..5456ee3 --- /dev/null +++ b/unicode-width/scripts/unicode.py @@ -0,0 +1,321 @@ +#!/usr/bin/env python3 +# +# Copyright 2011-2015 The Rust Project Developers. See the COPYRIGHT +# file at the top-level directory of this distribution and at +# http://rust-lang.org/COPYRIGHT. +# +# Licensed under the Apache License, Version 2.0 or the MIT license +# , at your +# option. This file may not be copied, modified, or distributed +# except according to those terms. + +# This script uses the following Unicode tables: +# - EastAsianWidth.txt +# - ReadMe.txt +# - UnicodeData.txt +# +# Since this should not require frequent updates, we just store this +# out-of-line and check the unicode.rs file into git. + +import fileinput, re, os, sys, operator + +preamble = '''// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +// NOTE: The following code was generated by "scripts/unicode.py", do not edit directly + +#![allow(missing_docs, non_upper_case_globals, non_snake_case)] +''' + +# Mapping taken from Table 12 from: +# http://www.unicode.org/reports/tr44/#General_Category_Values +expanded_categories = { + 'Lu': ['LC', 'L'], 'Ll': ['LC', 'L'], 'Lt': ['LC', 'L'], + 'Lm': ['L'], 'Lo': ['L'], + 'Mn': ['M'], 'Mc': ['M'], 'Me': ['M'], + 'Nd': ['N'], 'Nl': ['N'], 'No': ['No'], + 'Pc': ['P'], 'Pd': ['P'], 'Ps': ['P'], 'Pe': ['P'], + 'Pi': ['P'], 'Pf': ['P'], 'Po': ['P'], + 'Sm': ['S'], 'Sc': ['S'], 'Sk': ['S'], 'So': ['S'], + 'Zs': ['Z'], 'Zl': ['Z'], 'Zp': ['Z'], + 'Cc': ['C'], 'Cf': ['C'], 'Cs': ['C'], 'Co': ['C'], 'Cn': ['C'], +} + +# these are the surrogate codepoints, which are not valid rust characters +surrogate_codepoints = (0xd800, 0xdfff) + +def fetch(f): + if not os.path.exists(os.path.basename(f)): + os.system("curl -O http://www.unicode.org/Public/UNIDATA/%s" + % f) + + if not os.path.exists(os.path.basename(f)): + sys.stderr.write("cannot load %s" % f) + exit(1) + +def is_surrogate(n): + return surrogate_codepoints[0] <= n <= surrogate_codepoints[1] + +def load_unicode_data(f): + fetch(f) + gencats = {} + + udict = {} + range_start = -1 + for line in fileinput.input(f): + data = line.split(';') + if len(data) != 15: + continue + cp = int(data[0], 16) + if is_surrogate(cp): + continue + if range_start >= 0: + for i in range(range_start, cp): + udict[i] = data + range_start = -1 + if data[1].endswith(", First>"): + range_start = cp + continue + udict[cp] = data + + for code in udict: + [code_org, name, gencat, combine, bidi, + decomp, deci, digit, num, mirror, + old, iso, upcase, lowcase, titlecase ] = udict[code] + + # place letter in categories as appropriate + for cat in [gencat, "Assigned"] + expanded_categories.get(gencat, []): + if cat not in gencats: + gencats[cat] = [] + gencats[cat].append(code) + + gencats = group_cats(gencats) + + return gencats + +def group_cats(cats): + cats_out = {} + for cat in cats: + cats_out[cat] = group_cat(cats[cat]) + return cats_out + +def group_cat(cat): + cat_out = [] + letters = sorted(set(cat)) + cur_start = letters.pop(0) + cur_end = cur_start + for letter in letters: + assert letter > cur_end, \ + "cur_end: %s, letter: %s" % (hex(cur_end), hex(letter)) + if letter == cur_end + 1: + cur_end = letter + else: + cat_out.append((cur_start, cur_end)) + cur_start = cur_end = letter + cat_out.append((cur_start, cur_end)) + return cat_out + +def format_table_content(f, content, indent): + line = " "*indent + first = True + for chunk in content.split(","): + if len(line) + len(chunk) < 98: + if first: + line += chunk + else: + line += ", " + chunk + first = False + else: + f.write(line + ",\n") + line = " "*indent + chunk + f.write(line) + +# load all widths of want_widths, except those in except_cats +def load_east_asian_width(want_widths, except_cats): + f = "EastAsianWidth.txt" + fetch(f) + widths = {} + re1 = re.compile("^([0-9A-F]+);(\w+) +# (\w+)") + re2 = re.compile("^([0-9A-F]+)\.\.([0-9A-F]+);(\w+) +# (\w+)") + + for line in fileinput.input(f): + width = None + d_lo = 0 + d_hi = 0 + cat = None + m = re1.match(line) + if m: + d_lo = m.group(1) + d_hi = m.group(1) + width = m.group(2) + cat = m.group(3) + else: + m = re2.match(line) + if m: + d_lo = m.group(1) + d_hi = m.group(2) + width = m.group(3) + cat = m.group(4) + else: + continue + if cat in except_cats or width not in want_widths: + continue + d_lo = int(d_lo, 16) + d_hi = int(d_hi, 16) + if width not in widths: + widths[width] = [] + widths[width].append((d_lo, d_hi)) + return widths + +def escape_char(c): + return "'\\u{%x}'" % c + +def emit_table(f, name, t_data, t_type = "&'static [(char, char)]", is_pub=True, + pfun=lambda x: "(%s,%s)" % (escape_char(x[0]), escape_char(x[1])), is_const=True): + pub_string = "const" + if not is_const: + pub_string = "let" + if is_pub: + pub_string = "pub " + pub_string + f.write(" %s %s: %s = &[\n" % (pub_string, name, t_type)) + data = "" + first = True + for dat in t_data: + if not first: + data += "," + first = False + data += pfun(dat) + format_table_content(f, data, 8) + f.write("\n ];\n\n") + +def emit_charwidth_module(f, width_table): + f.write("pub mod charwidth {") + f.write(""" + use core::option::Option::{self, Some, None}; + use core::result::Result::{Ok, Err}; + + #[inline] + fn bsearch_range_value_table(c: char, is_cjk: bool, r: &'static [(char, char, u8, u8)]) -> u8 { + use core::cmp::Ordering::{Equal, Less, Greater}; + match r.binary_search_by(|&(lo, hi, _, _)| { + if lo <= c && c <= hi { Equal } + else if hi < c { Less } + else { Greater } + }) { + Ok(idx) => { + let (_, _, r_ncjk, r_cjk) = r[idx]; + if is_cjk { r_cjk } else { r_ncjk } + } + Err(_) => 1 + } + } +""") + + f.write(""" + #[inline] + pub fn width(c: char, is_cjk: bool) -> Option { + match c as usize { + _c @ 0 => Some(0), // null is zero width + cu if cu < 0x20 => None, // control sequences have no width + cu if cu < 0x7F => Some(1), // ASCII + cu if cu < 0xA0 => None, // more control sequences + _ => Some(bsearch_range_value_table(c, is_cjk, charwidth_table) as usize) + } + } + +""") + + f.write(" // character width table. Based on Markus Kuhn's free wcwidth() implementation,\n") + f.write(" // http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c\n") + emit_table(f, "charwidth_table", width_table, "&'static [(char, char, u8, u8)]", is_pub=False, + pfun=lambda x: "(%s,%s,%s,%s)" % (escape_char(x[0]), escape_char(x[1]), x[2], x[3])) + f.write("}\n\n") + +def remove_from_wtable(wtable, val): + wtable_out = [] + while wtable: + if wtable[0][1] < val: + wtable_out.append(wtable.pop(0)) + elif wtable[0][0] > val: + break + else: + (wt_lo, wt_hi, width, width_cjk) = wtable.pop(0) + if wt_lo == wt_hi == val: + continue + elif wt_lo == val: + wtable_out.append((wt_lo+1, wt_hi, width, width_cjk)) + elif wt_hi == val: + wtable_out.append((wt_lo, wt_hi-1, width, width_cjk)) + else: + wtable_out.append((wt_lo, val-1, width, width_cjk)) + wtable_out.append((val+1, wt_hi, width, width_cjk)) + if wtable: + wtable_out.extend(wtable) + return wtable_out + + + +def optimize_width_table(wtable): + wtable_out = [] + w_this = wtable.pop(0) + while wtable: + if w_this[1] == wtable[0][0] - 1 and w_this[2:3] == wtable[0][2:3]: + w_tmp = wtable.pop(0) + w_this = (w_this[0], w_tmp[1], w_tmp[2], w_tmp[3]) + else: + wtable_out.append(w_this) + w_this = wtable.pop(0) + wtable_out.append(w_this) + return wtable_out + +if __name__ == "__main__": + r = "tables.rs" + if os.path.exists(r): + os.remove(r) + with open(r, "w") as rf: + # write the file's preamble + rf.write(preamble) + + # download and parse all the data + fetch("ReadMe.txt") + with open("ReadMe.txt") as readme: + pattern = "for Version (\d+)\.(\d+)\.(\d+) of the Unicode" + unicode_version = re.search(pattern, readme.read()).groups() + rf.write(""" +/// The version of [Unicode](http://www.unicode.org/) +/// that this version of unicode-width is based on. +pub const UNICODE_VERSION: (u64, u64, u64) = (%s, %s, %s); + +""" % unicode_version) + gencats = load_unicode_data("UnicodeData.txt") + + ### character width module + width_table = [] + for zwcat in ["Me", "Mn", "Cf"]: + width_table.extend([(lo_hi[0], lo_hi[1], 0, 0) for lo_hi in gencats[zwcat]]) + width_table.append((4448, 4607, 0, 0)) + + # get widths, except those that are explicitly marked zero-width above + ea_widths = load_east_asian_width(["W", "F", "A"], ["Me", "Mn", "Cf"]) + # these are doublewidth + for dwcat in ["W", "F"]: + width_table.extend([(lo_hi1[0], lo_hi1[1], 2, 2) for lo_hi1 in ea_widths[dwcat]]) + width_table.extend([(lo_hi2[0], lo_hi2[1], 1, 2) for lo_hi2 in ea_widths["A"]]) + + width_table.sort(key=lambda w: w[0]) + + # soft hyphen is not zero width in preformatted text; it's used to indicate + # a hyphen inserted to facilitate a linebreak. + width_table = remove_from_wtable(width_table, 173) + + # optimize the width table by collapsing adjacent entities when possible + width_table = optimize_width_table(width_table) + emit_charwidth_module(rf, width_table) diff --git a/unicode-width/src/lib.rs b/unicode-width/src/lib.rs new file mode 100644 index 0000000..1ee35c8 --- /dev/null +++ b/unicode-width/src/lib.rs @@ -0,0 +1,131 @@ +// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +//! Determine displayed width of `char` and `str` types according to +//! [Unicode Standard Annex #11](http://www.unicode.org/reports/tr11/) +//! rules. +//! +//! ```rust +//! extern crate unicode_width; +//! +//! use unicode_width::UnicodeWidthStr; +//! +//! fn main() { +//! let teststr = "Hello, world!"; +//! let width = UnicodeWidthStr::width(teststr); +//! println!("{}", teststr); +//! println!("The above string is {} columns wide.", width); +//! let width = teststr.width_cjk(); +//! println!("The above string is {} columns wide (CJK).", width); +//! } +//! ``` +//! +//! # features +//! +//! unicode-width supports a `no_std` feature. This eliminates dependence +//! on std, and instead uses equivalent functions from core. +//! +//! # crates.io +//! +//! You can use this package in your project by adding the following +//! to your `Cargo.toml`: +//! +//! ```toml +//! [dependencies] +//! unicode-width = "0.1.5" +//! ``` + +#![deny(missing_docs, unsafe_code)] +#![doc(html_logo_url = "https://unicode-rs.github.io/unicode-rs_sm.png", + html_favicon_url = "https://unicode-rs.github.io/unicode-rs_sm.png")] + +#![cfg_attr(feature = "bench", feature(test))] +#![no_std] + +#[cfg(test)] +#[macro_use] +extern crate std; + +#[cfg(feature = "bench")] +extern crate test; + +use tables::charwidth as cw; +pub use tables::UNICODE_VERSION; + +use core::ops::Add; + +mod tables; + +#[cfg(test)] +mod tests; + +/// Methods for determining displayed width of Unicode characters. +pub trait UnicodeWidthChar { + /// Returns the character's displayed width in columns, or `None` if the + /// character is a control character other than `'\x00'`. + /// + /// This function treats characters in the Ambiguous category according + /// to [Unicode Standard Annex #11](http://www.unicode.org/reports/tr11/) + /// as 1 column wide. This is consistent with the recommendations for non-CJK + /// contexts, or when the context cannot be reliably determined. + fn width(self) -> Option; + + /// Returns the character's displayed width in columns, or `None` if the + /// character is a control character other than `'\x00'`. + /// + /// This function treats characters in the Ambiguous category according + /// to [Unicode Standard Annex #11](http://www.unicode.org/reports/tr11/) + /// as 2 columns wide. This is consistent with the recommendations for + /// CJK contexts. + fn width_cjk(self) -> Option; +} + +impl UnicodeWidthChar for char { + #[inline] + fn width(self) -> Option { cw::width(self, false) } + + #[inline] + fn width_cjk(self) -> Option { cw::width(self, true) } +} + +/// Methods for determining displayed width of Unicode strings. +pub trait UnicodeWidthStr { + /// Returns the string's displayed width in columns. + /// + /// Control characters are treated as having zero width. + /// + /// This function treats characters in the Ambiguous category according + /// to [Unicode Standard Annex #11](http://www.unicode.org/reports/tr11/) + /// as 1 column wide. This is consistent with the recommendations for + /// non-CJK contexts, or when the context cannot be reliably determined. + fn width<'a>(&'a self) -> usize; + + /// Returns the string's displayed width in columns. + /// + /// Control characters are treated as having zero width. + /// + /// This function treats characters in the Ambiguous category according + /// to [Unicode Standard Annex #11](http://www.unicode.org/reports/tr11/) + /// as 2 column wide. This is consistent with the recommendations for + /// CJK contexts. + fn width_cjk<'a>(&'a self) -> usize; +} + +impl UnicodeWidthStr for str { + #[inline] + fn width(&self) -> usize { + self.chars().map(|c| cw::width(c, false).unwrap_or(0)).fold(0, Add::add) + } + + #[inline] + fn width_cjk(&self) -> usize { + self.chars().map(|c| cw::width(c, true).unwrap_or(0)).fold(0, Add::add) + } +} diff --git a/unicode-width/src/tables.rs b/unicode-width/src/tables.rs new file mode 100644 index 0000000..7ae450b --- /dev/null +++ b/unicode-width/src/tables.rs @@ -0,0 +1,284 @@ +// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +// NOTE: The following code was generated by "scripts/unicode.py", do not edit directly + +#![allow(missing_docs, non_upper_case_globals, non_snake_case)] + +/// The version of [Unicode](http://www.unicode.org/) +/// that this version of unicode-width is based on. +pub const UNICODE_VERSION: (u64, u64, u64) = (12, 1, 0); + +pub mod charwidth { + use core::option::Option::{self, Some, None}; + use core::result::Result::{Ok, Err}; + + #[inline] + fn bsearch_range_value_table(c: char, is_cjk: bool, r: &'static [(char, char, u8, u8)]) -> u8 { + use core::cmp::Ordering::{Equal, Less, Greater}; + match r.binary_search_by(|&(lo, hi, _, _)| { + if lo <= c && c <= hi { Equal } + else if hi < c { Less } + else { Greater } + }) { + Ok(idx) => { + let (_, _, r_ncjk, r_cjk) = r[idx]; + if is_cjk { r_cjk } else { r_ncjk } + } + Err(_) => 1 + } + } + + #[inline] + pub fn width(c: char, is_cjk: bool) -> Option { + match c as usize { + _c @ 0 => Some(0), // null is zero width + cu if cu < 0x20 => None, // control sequences have no width + cu if cu < 0x7F => Some(1), // ASCII + cu if cu < 0xA0 => None, // more control sequences + _ => Some(bsearch_range_value_table(c, is_cjk, charwidth_table) as usize) + } + } + + // character width table. Based on Markus Kuhn's free wcwidth() implementation, + // http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c + const charwidth_table: &'static [(char, char, u8, u8)] = &[ + ('\u{a1}', '\u{a1}', 1, 2), ('\u{a4}', '\u{a4}', 1, 2), ('\u{a7}', '\u{a8}', 1, 2), + ('\u{aa}', '\u{aa}', 1, 2), ('\u{ae}', '\u{ae}', 1, 2), ('\u{b0}', '\u{b4}', 1, 2), + ('\u{b6}', '\u{ba}', 1, 2), ('\u{bc}', '\u{bf}', 1, 2), ('\u{c6}', '\u{c6}', 1, 2), + ('\u{d0}', '\u{d0}', 1, 2), ('\u{d7}', '\u{d8}', 1, 2), ('\u{de}', '\u{e1}', 1, 2), + ('\u{e6}', '\u{e6}', 1, 2), ('\u{e8}', '\u{ea}', 1, 2), ('\u{ec}', '\u{ed}', 1, 2), + ('\u{f0}', '\u{f0}', 1, 2), ('\u{f2}', '\u{f3}', 1, 2), ('\u{f7}', '\u{fa}', 1, 2), + ('\u{fc}', '\u{fc}', 1, 2), ('\u{fe}', '\u{fe}', 1, 2), ('\u{101}', '\u{101}', 1, 2), + ('\u{111}', '\u{111}', 1, 2), ('\u{113}', '\u{113}', 1, 2), ('\u{11b}', '\u{11b}', 1, 2), + ('\u{126}', '\u{127}', 1, 2), ('\u{12b}', '\u{12b}', 1, 2), ('\u{131}', '\u{133}', 1, 2), + ('\u{138}', '\u{138}', 1, 2), ('\u{13f}', '\u{142}', 1, 2), ('\u{144}', '\u{144}', 1, 2), + ('\u{148}', '\u{14b}', 1, 2), ('\u{14d}', '\u{14d}', 1, 2), ('\u{152}', '\u{153}', 1, 2), + ('\u{166}', '\u{167}', 1, 2), ('\u{16b}', '\u{16b}', 1, 2), ('\u{1ce}', '\u{1ce}', 1, 2), + ('\u{1d0}', '\u{1d0}', 1, 2), ('\u{1d2}', '\u{1d2}', 1, 2), ('\u{1d4}', '\u{1d4}', 1, 2), + ('\u{1d6}', '\u{1d6}', 1, 2), ('\u{1d8}', '\u{1d8}', 1, 2), ('\u{1da}', '\u{1da}', 1, 2), + ('\u{1dc}', '\u{1dc}', 1, 2), ('\u{251}', '\u{251}', 1, 2), ('\u{261}', '\u{261}', 1, 2), + ('\u{2c4}', '\u{2c4}', 1, 2), ('\u{2c7}', '\u{2c7}', 1, 2), ('\u{2c9}', '\u{2cb}', 1, 2), + ('\u{2cd}', '\u{2cd}', 1, 2), ('\u{2d0}', '\u{2d0}', 1, 2), ('\u{2d8}', '\u{2db}', 1, 2), + ('\u{2dd}', '\u{2dd}', 1, 2), ('\u{2df}', '\u{2df}', 1, 2), ('\u{300}', '\u{36f}', 0, 0), + ('\u{391}', '\u{3a1}', 1, 2), ('\u{3a3}', '\u{3a9}', 1, 2), ('\u{3b1}', '\u{3c1}', 1, 2), + ('\u{3c3}', '\u{3c9}', 1, 2), ('\u{401}', '\u{401}', 1, 2), ('\u{410}', '\u{44f}', 1, 2), + ('\u{451}', '\u{451}', 1, 2), ('\u{483}', '\u{489}', 0, 0), ('\u{591}', '\u{5bd}', 0, 0), + ('\u{5bf}', '\u{5bf}', 0, 0), ('\u{5c1}', '\u{5c2}', 0, 0), ('\u{5c4}', '\u{5c5}', 0, 0), + ('\u{5c7}', '\u{5c7}', 0, 0), ('\u{600}', '\u{605}', 0, 0), ('\u{610}', '\u{61a}', 0, 0), + ('\u{61c}', '\u{61c}', 0, 0), ('\u{64b}', '\u{65f}', 0, 0), ('\u{670}', '\u{670}', 0, 0), + ('\u{6d6}', '\u{6dd}', 0, 0), ('\u{6df}', '\u{6e4}', 0, 0), ('\u{6e7}', '\u{6e8}', 0, 0), + ('\u{6ea}', '\u{6ed}', 0, 0), ('\u{70f}', '\u{70f}', 0, 0), ('\u{711}', '\u{711}', 0, 0), + ('\u{730}', '\u{74a}', 0, 0), ('\u{7a6}', '\u{7b0}', 0, 0), ('\u{7eb}', '\u{7f3}', 0, 0), + ('\u{7fd}', '\u{7fd}', 0, 0), ('\u{816}', '\u{819}', 0, 0), ('\u{81b}', '\u{823}', 0, 0), + ('\u{825}', '\u{827}', 0, 0), ('\u{829}', '\u{82d}', 0, 0), ('\u{859}', '\u{85b}', 0, 0), + ('\u{8d3}', '\u{902}', 0, 0), ('\u{93a}', '\u{93a}', 0, 0), ('\u{93c}', '\u{93c}', 0, 0), + ('\u{941}', '\u{948}', 0, 0), ('\u{94d}', '\u{94d}', 0, 0), ('\u{951}', '\u{957}', 0, 0), + ('\u{962}', '\u{963}', 0, 0), ('\u{981}', '\u{981}', 0, 0), ('\u{9bc}', '\u{9bc}', 0, 0), + ('\u{9c1}', '\u{9c4}', 0, 0), ('\u{9cd}', '\u{9cd}', 0, 0), ('\u{9e2}', '\u{9e3}', 0, 0), + ('\u{9fe}', '\u{9fe}', 0, 0), ('\u{a01}', '\u{a02}', 0, 0), ('\u{a3c}', '\u{a3c}', 0, 0), + ('\u{a41}', '\u{a42}', 0, 0), ('\u{a47}', '\u{a48}', 0, 0), ('\u{a4b}', '\u{a4d}', 0, 0), + ('\u{a51}', '\u{a51}', 0, 0), ('\u{a70}', '\u{a71}', 0, 0), ('\u{a75}', '\u{a75}', 0, 0), + ('\u{a81}', '\u{a82}', 0, 0), ('\u{abc}', '\u{abc}', 0, 0), ('\u{ac1}', '\u{ac5}', 0, 0), + ('\u{ac7}', '\u{ac8}', 0, 0), ('\u{acd}', '\u{acd}', 0, 0), ('\u{ae2}', '\u{ae3}', 0, 0), + ('\u{afa}', '\u{aff}', 0, 0), ('\u{b01}', '\u{b01}', 0, 0), ('\u{b3c}', '\u{b3c}', 0, 0), + ('\u{b3f}', '\u{b3f}', 0, 0), ('\u{b41}', '\u{b44}', 0, 0), ('\u{b4d}', '\u{b4d}', 0, 0), + ('\u{b56}', '\u{b56}', 0, 0), ('\u{b62}', '\u{b63}', 0, 0), ('\u{b82}', '\u{b82}', 0, 0), + ('\u{bc0}', '\u{bc0}', 0, 0), ('\u{bcd}', '\u{bcd}', 0, 0), ('\u{c00}', '\u{c00}', 0, 0), + ('\u{c04}', '\u{c04}', 0, 0), ('\u{c3e}', '\u{c40}', 0, 0), ('\u{c46}', '\u{c48}', 0, 0), + ('\u{c4a}', '\u{c4d}', 0, 0), ('\u{c55}', '\u{c56}', 0, 0), ('\u{c62}', '\u{c63}', 0, 0), + ('\u{c81}', '\u{c81}', 0, 0), ('\u{cbc}', '\u{cbc}', 0, 0), ('\u{cbf}', '\u{cbf}', 0, 0), + ('\u{cc6}', '\u{cc6}', 0, 0), ('\u{ccc}', '\u{ccd}', 0, 0), ('\u{ce2}', '\u{ce3}', 0, 0), + ('\u{d00}', '\u{d01}', 0, 0), ('\u{d3b}', '\u{d3c}', 0, 0), ('\u{d41}', '\u{d44}', 0, 0), + ('\u{d4d}', '\u{d4d}', 0, 0), ('\u{d62}', '\u{d63}', 0, 0), ('\u{dca}', '\u{dca}', 0, 0), + ('\u{dd2}', '\u{dd4}', 0, 0), ('\u{dd6}', '\u{dd6}', 0, 0), ('\u{e31}', '\u{e31}', 0, 0), + ('\u{e34}', '\u{e3a}', 0, 0), ('\u{e47}', '\u{e4e}', 0, 0), ('\u{eb1}', '\u{eb1}', 0, 0), + ('\u{eb4}', '\u{ebc}', 0, 0), ('\u{ec8}', '\u{ecd}', 0, 0), ('\u{f18}', '\u{f19}', 0, 0), + ('\u{f35}', '\u{f35}', 0, 0), ('\u{f37}', '\u{f37}', 0, 0), ('\u{f39}', '\u{f39}', 0, 0), + ('\u{f71}', '\u{f7e}', 0, 0), ('\u{f80}', '\u{f84}', 0, 0), ('\u{f86}', '\u{f87}', 0, 0), + ('\u{f8d}', '\u{f97}', 0, 0), ('\u{f99}', '\u{fbc}', 0, 0), ('\u{fc6}', '\u{fc6}', 0, 0), + ('\u{102d}', '\u{1030}', 0, 0), ('\u{1032}', '\u{1037}', 0, 0), ('\u{1039}', '\u{103a}', 0, + 0), ('\u{103d}', '\u{103e}', 0, 0), ('\u{1058}', '\u{1059}', 0, 0), ('\u{105e}', '\u{1060}', + 0, 0), ('\u{1071}', '\u{1074}', 0, 0), ('\u{1082}', '\u{1082}', 0, 0), ('\u{1085}', + '\u{1086}', 0, 0), ('\u{108d}', '\u{108d}', 0, 0), ('\u{109d}', '\u{109d}', 0, 0), + ('\u{1100}', '\u{115f}', 2, 2), ('\u{1160}', '\u{11ff}', 0, 0), ('\u{135d}', '\u{135f}', 0, + 0), ('\u{1712}', '\u{1714}', 0, 0), ('\u{1732}', '\u{1734}', 0, 0), ('\u{1752}', '\u{1753}', + 0, 0), ('\u{1772}', '\u{1773}', 0, 0), ('\u{17b4}', '\u{17b5}', 0, 0), ('\u{17b7}', + '\u{17bd}', 0, 0), ('\u{17c6}', '\u{17c6}', 0, 0), ('\u{17c9}', '\u{17d3}', 0, 0), + ('\u{17dd}', '\u{17dd}', 0, 0), ('\u{180b}', '\u{180e}', 0, 0), ('\u{1885}', '\u{1886}', 0, + 0), ('\u{18a9}', '\u{18a9}', 0, 0), ('\u{1920}', '\u{1922}', 0, 0), ('\u{1927}', '\u{1928}', + 0, 0), ('\u{1932}', '\u{1932}', 0, 0), ('\u{1939}', '\u{193b}', 0, 0), ('\u{1a17}', + '\u{1a18}', 0, 0), ('\u{1a1b}', '\u{1a1b}', 0, 0), ('\u{1a56}', '\u{1a56}', 0, 0), + ('\u{1a58}', '\u{1a5e}', 0, 0), ('\u{1a60}', '\u{1a60}', 0, 0), ('\u{1a62}', '\u{1a62}', 0, + 0), ('\u{1a65}', '\u{1a6c}', 0, 0), ('\u{1a73}', '\u{1a7c}', 0, 0), ('\u{1a7f}', '\u{1a7f}', + 0, 0), ('\u{1ab0}', '\u{1abe}', 0, 0), ('\u{1b00}', '\u{1b03}', 0, 0), ('\u{1b34}', + '\u{1b34}', 0, 0), ('\u{1b36}', '\u{1b3a}', 0, 0), ('\u{1b3c}', '\u{1b3c}', 0, 0), + ('\u{1b42}', '\u{1b42}', 0, 0), ('\u{1b6b}', '\u{1b73}', 0, 0), ('\u{1b80}', '\u{1b81}', 0, + 0), ('\u{1ba2}', '\u{1ba5}', 0, 0), ('\u{1ba8}', '\u{1ba9}', 0, 0), ('\u{1bab}', '\u{1bad}', + 0, 0), ('\u{1be6}', '\u{1be6}', 0, 0), ('\u{1be8}', '\u{1be9}', 0, 0), ('\u{1bed}', + '\u{1bed}', 0, 0), ('\u{1bef}', '\u{1bf1}', 0, 0), ('\u{1c2c}', '\u{1c33}', 0, 0), + ('\u{1c36}', '\u{1c37}', 0, 0), ('\u{1cd0}', '\u{1cd2}', 0, 0), ('\u{1cd4}', '\u{1ce0}', 0, + 0), ('\u{1ce2}', '\u{1ce8}', 0, 0), ('\u{1ced}', '\u{1ced}', 0, 0), ('\u{1cf4}', '\u{1cf4}', + 0, 0), ('\u{1cf8}', '\u{1cf9}', 0, 0), ('\u{1dc0}', '\u{1df9}', 0, 0), ('\u{1dfb}', + '\u{1dff}', 0, 0), ('\u{200b}', '\u{200f}', 0, 0), ('\u{2010}', '\u{2010}', 1, 2), + ('\u{2013}', '\u{2016}', 1, 2), ('\u{2018}', '\u{2019}', 1, 2), ('\u{201c}', '\u{201d}', 1, + 2), ('\u{2020}', '\u{2022}', 1, 2), ('\u{2024}', '\u{2027}', 1, 2), ('\u{202a}', '\u{202e}', + 0, 0), ('\u{2030}', '\u{2030}', 1, 2), ('\u{2032}', '\u{2033}', 1, 2), ('\u{2035}', + '\u{2035}', 1, 2), ('\u{203b}', '\u{203b}', 1, 2), ('\u{203e}', '\u{203e}', 1, 2), + ('\u{2060}', '\u{2064}', 0, 0), ('\u{2066}', '\u{206f}', 0, 0), ('\u{2074}', '\u{2074}', 1, + 2), ('\u{207f}', '\u{207f}', 1, 2), ('\u{2081}', '\u{2084}', 1, 2), ('\u{20ac}', '\u{20ac}', + 1, 2), ('\u{20d0}', '\u{20f0}', 0, 0), ('\u{2103}', '\u{2103}', 1, 2), ('\u{2105}', + '\u{2105}', 1, 2), ('\u{2109}', '\u{2109}', 1, 2), ('\u{2113}', '\u{2113}', 1, 2), + ('\u{2116}', '\u{2116}', 1, 2), ('\u{2121}', '\u{2122}', 1, 2), ('\u{2126}', '\u{2126}', 1, + 2), ('\u{212b}', '\u{212b}', 1, 2), ('\u{2153}', '\u{2154}', 1, 2), ('\u{215b}', '\u{215e}', + 1, 2), ('\u{2160}', '\u{216b}', 1, 2), ('\u{2170}', '\u{2179}', 1, 2), ('\u{2189}', + '\u{2189}', 1, 2), ('\u{2190}', '\u{2199}', 1, 2), ('\u{21b8}', '\u{21b9}', 1, 2), + ('\u{21d2}', '\u{21d2}', 1, 2), ('\u{21d4}', '\u{21d4}', 1, 2), ('\u{21e7}', '\u{21e7}', 1, + 2), ('\u{2200}', '\u{2200}', 1, 2), ('\u{2202}', '\u{2203}', 1, 2), ('\u{2207}', '\u{2208}', + 1, 2), ('\u{220b}', '\u{220b}', 1, 2), ('\u{220f}', '\u{220f}', 1, 2), ('\u{2211}', + '\u{2211}', 1, 2), ('\u{2215}', '\u{2215}', 1, 2), ('\u{221a}', '\u{221a}', 1, 2), + ('\u{221d}', '\u{2220}', 1, 2), ('\u{2223}', '\u{2223}', 1, 2), ('\u{2225}', '\u{2225}', 1, + 2), ('\u{2227}', '\u{222c}', 1, 2), ('\u{222e}', '\u{222e}', 1, 2), ('\u{2234}', '\u{2237}', + 1, 2), ('\u{223c}', '\u{223d}', 1, 2), ('\u{2248}', '\u{2248}', 1, 2), ('\u{224c}', + '\u{224c}', 1, 2), ('\u{2252}', '\u{2252}', 1, 2), ('\u{2260}', '\u{2261}', 1, 2), + ('\u{2264}', '\u{2267}', 1, 2), ('\u{226a}', '\u{226b}', 1, 2), ('\u{226e}', '\u{226f}', 1, + 2), ('\u{2282}', '\u{2283}', 1, 2), ('\u{2286}', '\u{2287}', 1, 2), ('\u{2295}', '\u{2295}', + 1, 2), ('\u{2299}', '\u{2299}', 1, 2), ('\u{22a5}', '\u{22a5}', 1, 2), ('\u{22bf}', + '\u{22bf}', 1, 2), ('\u{2312}', '\u{2312}', 1, 2), ('\u{231a}', '\u{231b}', 2, 2), + ('\u{2329}', '\u{232a}', 2, 2), ('\u{23e9}', '\u{23ec}', 2, 2), ('\u{23f0}', '\u{23f0}', 2, + 2), ('\u{23f3}', '\u{23f3}', 2, 2), ('\u{2460}', '\u{24e9}', 1, 2), ('\u{24eb}', '\u{254b}', + 1, 2), ('\u{2550}', '\u{2573}', 1, 2), ('\u{2580}', '\u{258f}', 1, 2), ('\u{2592}', + '\u{2595}', 1, 2), ('\u{25a0}', '\u{25a1}', 1, 2), ('\u{25a3}', '\u{25a9}', 1, 2), + ('\u{25b2}', '\u{25b3}', 1, 2), ('\u{25b6}', '\u{25b7}', 1, 2), ('\u{25bc}', '\u{25bd}', 1, + 2), ('\u{25c0}', '\u{25c1}', 1, 2), ('\u{25c6}', '\u{25c8}', 1, 2), ('\u{25cb}', '\u{25cb}', + 1, 2), ('\u{25ce}', '\u{25d1}', 1, 2), ('\u{25e2}', '\u{25e5}', 1, 2), ('\u{25ef}', + '\u{25ef}', 1, 2), ('\u{25fd}', '\u{25fe}', 2, 2), ('\u{2605}', '\u{2606}', 1, 2), + ('\u{2609}', '\u{2609}', 1, 2), ('\u{260e}', '\u{260f}', 1, 2), ('\u{2614}', '\u{2615}', 2, + 2), ('\u{261c}', '\u{261c}', 1, 2), ('\u{261e}', '\u{261e}', 1, 2), ('\u{2640}', '\u{2640}', + 1, 2), ('\u{2642}', '\u{2642}', 1, 2), ('\u{2648}', '\u{2653}', 2, 2), ('\u{2660}', + '\u{2661}', 1, 2), ('\u{2663}', '\u{2665}', 1, 2), ('\u{2667}', '\u{266a}', 1, 2), + ('\u{266c}', '\u{266d}', 1, 2), ('\u{266f}', '\u{266f}', 1, 2), ('\u{267f}', '\u{267f}', 2, + 2), ('\u{2693}', '\u{2693}', 2, 2), ('\u{269e}', '\u{269f}', 1, 2), ('\u{26a1}', '\u{26a1}', + 2, 2), ('\u{26aa}', '\u{26ab}', 2, 2), ('\u{26bd}', '\u{26be}', 2, 2), ('\u{26bf}', + '\u{26bf}', 1, 2), ('\u{26c4}', '\u{26c5}', 2, 2), ('\u{26c6}', '\u{26cd}', 1, 2), + ('\u{26ce}', '\u{26ce}', 2, 2), ('\u{26cf}', '\u{26d3}', 1, 2), ('\u{26d4}', '\u{26d4}', 2, + 2), ('\u{26d5}', '\u{26e1}', 1, 2), ('\u{26e3}', '\u{26e3}', 1, 2), ('\u{26e8}', '\u{26e9}', + 1, 2), ('\u{26ea}', '\u{26ea}', 2, 2), ('\u{26eb}', '\u{26f1}', 1, 2), ('\u{26f2}', + '\u{26f3}', 2, 2), ('\u{26f4}', '\u{26f4}', 1, 2), ('\u{26f5}', '\u{26f5}', 2, 2), + ('\u{26f6}', '\u{26f9}', 1, 2), ('\u{26fa}', '\u{26fa}', 2, 2), ('\u{26fb}', '\u{26fc}', 1, + 2), ('\u{26fd}', '\u{26fd}', 2, 2), ('\u{26fe}', '\u{26ff}', 1, 2), ('\u{2705}', '\u{2705}', + 2, 2), ('\u{270a}', '\u{270b}', 2, 2), ('\u{2728}', '\u{2728}', 2, 2), ('\u{273d}', + '\u{273d}', 1, 2), ('\u{274c}', '\u{274c}', 2, 2), ('\u{274e}', '\u{274e}', 2, 2), + ('\u{2753}', '\u{2755}', 2, 2), ('\u{2757}', '\u{2757}', 2, 2), ('\u{2776}', '\u{277f}', 1, + 2), ('\u{2795}', '\u{2797}', 2, 2), ('\u{27b0}', '\u{27b0}', 2, 2), ('\u{27bf}', '\u{27bf}', + 2, 2), ('\u{2b1b}', '\u{2b1c}', 2, 2), ('\u{2b50}', '\u{2b50}', 2, 2), ('\u{2b55}', + '\u{2b55}', 2, 2), ('\u{2b56}', '\u{2b59}', 1, 2), ('\u{2cef}', '\u{2cf1}', 0, 0), + ('\u{2d7f}', '\u{2d7f}', 0, 0), ('\u{2de0}', '\u{2dff}', 0, 0), ('\u{2e80}', '\u{2e99}', 2, + 2), ('\u{2e9b}', '\u{2ef3}', 2, 2), ('\u{2f00}', '\u{2fd5}', 2, 2), ('\u{2ff0}', '\u{2ffb}', + 2, 2), ('\u{3000}', '\u{3029}', 2, 2), ('\u{302a}', '\u{302d}', 0, 0), ('\u{302e}', + '\u{303e}', 2, 2), ('\u{3041}', '\u{3096}', 2, 2), ('\u{3099}', '\u{309a}', 0, 0), + ('\u{309b}', '\u{30ff}', 2, 2), ('\u{3105}', '\u{312f}', 2, 2), ('\u{3131}', '\u{318e}', 2, + 2), ('\u{3190}', '\u{31ba}', 2, 2), ('\u{31c0}', '\u{31e3}', 2, 2), ('\u{31f0}', '\u{321e}', + 2, 2), ('\u{3220}', '\u{3247}', 2, 2), ('\u{3248}', '\u{324f}', 1, 2), ('\u{3250}', + '\u{4dbf}', 2, 2), ('\u{4e00}', '\u{a48c}', 2, 2), ('\u{a490}', '\u{a4c6}', 2, 2), + ('\u{a66f}', '\u{a672}', 0, 0), ('\u{a674}', '\u{a67d}', 0, 0), ('\u{a69e}', '\u{a69f}', 0, + 0), ('\u{a6f0}', '\u{a6f1}', 0, 0), ('\u{a802}', '\u{a802}', 0, 0), ('\u{a806}', '\u{a806}', + 0, 0), ('\u{a80b}', '\u{a80b}', 0, 0), ('\u{a825}', '\u{a826}', 0, 0), ('\u{a8c4}', + '\u{a8c5}', 0, 0), ('\u{a8e0}', '\u{a8f1}', 0, 0), ('\u{a8ff}', '\u{a8ff}', 0, 0), + ('\u{a926}', '\u{a92d}', 0, 0), ('\u{a947}', '\u{a951}', 0, 0), ('\u{a960}', '\u{a97c}', 2, + 2), ('\u{a980}', '\u{a982}', 0, 0), ('\u{a9b3}', '\u{a9b3}', 0, 0), ('\u{a9b6}', '\u{a9b9}', + 0, 0), ('\u{a9bc}', '\u{a9bd}', 0, 0), ('\u{a9e5}', '\u{a9e5}', 0, 0), ('\u{aa29}', + '\u{aa2e}', 0, 0), ('\u{aa31}', '\u{aa32}', 0, 0), ('\u{aa35}', '\u{aa36}', 0, 0), + ('\u{aa43}', '\u{aa43}', 0, 0), ('\u{aa4c}', '\u{aa4c}', 0, 0), ('\u{aa7c}', '\u{aa7c}', 0, + 0), ('\u{aab0}', '\u{aab0}', 0, 0), ('\u{aab2}', '\u{aab4}', 0, 0), ('\u{aab7}', '\u{aab8}', + 0, 0), ('\u{aabe}', '\u{aabf}', 0, 0), ('\u{aac1}', '\u{aac1}', 0, 0), ('\u{aaec}', + '\u{aaed}', 0, 0), ('\u{aaf6}', '\u{aaf6}', 0, 0), ('\u{abe5}', '\u{abe5}', 0, 0), + ('\u{abe8}', '\u{abe8}', 0, 0), ('\u{abed}', '\u{abed}', 0, 0), ('\u{ac00}', '\u{d7a3}', 2, + 2), ('\u{e000}', '\u{f8ff}', 1, 2), ('\u{f900}', '\u{faff}', 2, 2), ('\u{fb1e}', '\u{fb1e}', + 0, 0), ('\u{fe00}', '\u{fe0f}', 0, 0), ('\u{fe10}', '\u{fe19}', 2, 2), ('\u{fe20}', + '\u{fe2f}', 0, 0), ('\u{fe30}', '\u{fe52}', 2, 2), ('\u{fe54}', '\u{fe66}', 2, 2), + ('\u{fe68}', '\u{fe6b}', 2, 2), ('\u{feff}', '\u{feff}', 0, 0), ('\u{ff01}', '\u{ff60}', 2, + 2), ('\u{ffe0}', '\u{ffe6}', 2, 2), ('\u{fff9}', '\u{fffb}', 0, 0), ('\u{fffd}', '\u{fffd}', + 1, 2), ('\u{101fd}', '\u{101fd}', 0, 0), ('\u{102e0}', '\u{102e0}', 0, 0), ('\u{10376}', + '\u{1037a}', 0, 0), ('\u{10a01}', '\u{10a03}', 0, 0), ('\u{10a05}', '\u{10a06}', 0, 0), + ('\u{10a0c}', '\u{10a0f}', 0, 0), ('\u{10a38}', '\u{10a3a}', 0, 0), ('\u{10a3f}', + '\u{10a3f}', 0, 0), ('\u{10ae5}', '\u{10ae6}', 0, 0), ('\u{10d24}', '\u{10d27}', 0, 0), + ('\u{10f46}', '\u{10f50}', 0, 0), ('\u{11001}', '\u{11001}', 0, 0), ('\u{11038}', + '\u{11046}', 0, 0), ('\u{1107f}', '\u{11081}', 0, 0), ('\u{110b3}', '\u{110b6}', 0, 0), + ('\u{110b9}', '\u{110ba}', 0, 0), ('\u{110bd}', '\u{110bd}', 0, 0), ('\u{110cd}', + '\u{110cd}', 0, 0), ('\u{11100}', '\u{11102}', 0, 0), ('\u{11127}', '\u{1112b}', 0, 0), + ('\u{1112d}', '\u{11134}', 0, 0), ('\u{11173}', '\u{11173}', 0, 0), ('\u{11180}', + '\u{11181}', 0, 0), ('\u{111b6}', '\u{111be}', 0, 0), ('\u{111c9}', '\u{111cc}', 0, 0), + ('\u{1122f}', '\u{11231}', 0, 0), ('\u{11234}', '\u{11234}', 0, 0), ('\u{11236}', + '\u{11237}', 0, 0), ('\u{1123e}', '\u{1123e}', 0, 0), ('\u{112df}', '\u{112df}', 0, 0), + ('\u{112e3}', '\u{112ea}', 0, 0), ('\u{11300}', '\u{11301}', 0, 0), ('\u{1133b}', + '\u{1133c}', 0, 0), ('\u{11340}', '\u{11340}', 0, 0), ('\u{11366}', '\u{1136c}', 0, 0), + ('\u{11370}', '\u{11374}', 0, 0), ('\u{11438}', '\u{1143f}', 0, 0), ('\u{11442}', + '\u{11444}', 0, 0), ('\u{11446}', '\u{11446}', 0, 0), ('\u{1145e}', '\u{1145e}', 0, 0), + ('\u{114b3}', '\u{114b8}', 0, 0), ('\u{114ba}', '\u{114ba}', 0, 0), ('\u{114bf}', + '\u{114c0}', 0, 0), ('\u{114c2}', '\u{114c3}', 0, 0), ('\u{115b2}', '\u{115b5}', 0, 0), + ('\u{115bc}', '\u{115bd}', 0, 0), ('\u{115bf}', '\u{115c0}', 0, 0), ('\u{115dc}', + '\u{115dd}', 0, 0), ('\u{11633}', '\u{1163a}', 0, 0), ('\u{1163d}', '\u{1163d}', 0, 0), + ('\u{1163f}', '\u{11640}', 0, 0), ('\u{116ab}', '\u{116ab}', 0, 0), ('\u{116ad}', + '\u{116ad}', 0, 0), ('\u{116b0}', '\u{116b5}', 0, 0), ('\u{116b7}', '\u{116b7}', 0, 0), + ('\u{1171d}', '\u{1171f}', 0, 0), ('\u{11722}', '\u{11725}', 0, 0), ('\u{11727}', + '\u{1172b}', 0, 0), ('\u{1182f}', '\u{11837}', 0, 0), ('\u{11839}', '\u{1183a}', 0, 0), + ('\u{119d4}', '\u{119d7}', 0, 0), ('\u{119da}', '\u{119db}', 0, 0), ('\u{119e0}', + '\u{119e0}', 0, 0), ('\u{11a01}', '\u{11a0a}', 0, 0), ('\u{11a33}', '\u{11a38}', 0, 0), + ('\u{11a3b}', '\u{11a3e}', 0, 0), ('\u{11a47}', '\u{11a47}', 0, 0), ('\u{11a51}', + '\u{11a56}', 0, 0), ('\u{11a59}', '\u{11a5b}', 0, 0), ('\u{11a8a}', '\u{11a96}', 0, 0), + ('\u{11a98}', '\u{11a99}', 0, 0), ('\u{11c30}', '\u{11c36}', 0, 0), ('\u{11c38}', + '\u{11c3d}', 0, 0), ('\u{11c3f}', '\u{11c3f}', 0, 0), ('\u{11c92}', '\u{11ca7}', 0, 0), + ('\u{11caa}', '\u{11cb0}', 0, 0), ('\u{11cb2}', '\u{11cb3}', 0, 0), ('\u{11cb5}', + '\u{11cb6}', 0, 0), ('\u{11d31}', '\u{11d36}', 0, 0), ('\u{11d3a}', '\u{11d3a}', 0, 0), + ('\u{11d3c}', '\u{11d3d}', 0, 0), ('\u{11d3f}', '\u{11d45}', 0, 0), ('\u{11d47}', + '\u{11d47}', 0, 0), ('\u{11d90}', '\u{11d91}', 0, 0), ('\u{11d95}', '\u{11d95}', 0, 0), + ('\u{11d97}', '\u{11d97}', 0, 0), ('\u{11ef3}', '\u{11ef4}', 0, 0), ('\u{13430}', + '\u{13438}', 0, 0), ('\u{16af0}', '\u{16af4}', 0, 0), ('\u{16b30}', '\u{16b36}', 0, 0), + ('\u{16f4f}', '\u{16f4f}', 0, 0), ('\u{16f8f}', '\u{16f92}', 0, 0), ('\u{16fe0}', + '\u{16fe3}', 2, 2), ('\u{17000}', '\u{187f7}', 2, 2), ('\u{18800}', '\u{18af2}', 2, 2), + ('\u{1b000}', '\u{1b11e}', 2, 2), ('\u{1b150}', '\u{1b152}', 2, 2), ('\u{1b164}', + '\u{1b167}', 2, 2), ('\u{1b170}', '\u{1b2fb}', 2, 2), ('\u{1bc9d}', '\u{1bc9e}', 0, 0), + ('\u{1bca0}', '\u{1bca3}', 0, 0), ('\u{1d167}', '\u{1d169}', 0, 0), ('\u{1d173}', + '\u{1d182}', 0, 0), ('\u{1d185}', '\u{1d18b}', 0, 0), ('\u{1d1aa}', '\u{1d1ad}', 0, 0), + ('\u{1d242}', '\u{1d244}', 0, 0), ('\u{1da00}', '\u{1da36}', 0, 0), ('\u{1da3b}', + '\u{1da6c}', 0, 0), ('\u{1da75}', '\u{1da75}', 0, 0), ('\u{1da84}', '\u{1da84}', 0, 0), + ('\u{1da9b}', '\u{1da9f}', 0, 0), ('\u{1daa1}', '\u{1daaf}', 0, 0), ('\u{1e000}', + '\u{1e006}', 0, 0), ('\u{1e008}', '\u{1e018}', 0, 0), ('\u{1e01b}', '\u{1e021}', 0, 0), + ('\u{1e023}', '\u{1e024}', 0, 0), ('\u{1e026}', '\u{1e02a}', 0, 0), ('\u{1e130}', + '\u{1e136}', 0, 0), ('\u{1e2ec}', '\u{1e2ef}', 0, 0), ('\u{1e8d0}', '\u{1e8d6}', 0, 0), + ('\u{1e944}', '\u{1e94a}', 0, 0), ('\u{1f004}', '\u{1f004}', 2, 2), ('\u{1f0cf}', + '\u{1f0cf}', 2, 2), ('\u{1f100}', '\u{1f10a}', 1, 2), ('\u{1f110}', '\u{1f12d}', 1, 2), + ('\u{1f130}', '\u{1f169}', 1, 2), ('\u{1f170}', '\u{1f18d}', 1, 2), ('\u{1f18e}', + '\u{1f18e}', 2, 2), ('\u{1f18f}', '\u{1f190}', 1, 2), ('\u{1f191}', '\u{1f19a}', 2, 2), + ('\u{1f19b}', '\u{1f1ac}', 1, 2), ('\u{1f200}', '\u{1f202}', 2, 2), ('\u{1f210}', + '\u{1f23b}', 2, 2), ('\u{1f240}', '\u{1f248}', 2, 2), ('\u{1f250}', '\u{1f251}', 2, 2), + ('\u{1f260}', '\u{1f265}', 2, 2), ('\u{1f300}', '\u{1f320}', 2, 2), ('\u{1f32d}', + '\u{1f335}', 2, 2), ('\u{1f337}', '\u{1f37c}', 2, 2), ('\u{1f37e}', '\u{1f393}', 2, 2), + ('\u{1f3a0}', '\u{1f3ca}', 2, 2), ('\u{1f3cf}', '\u{1f3d3}', 2, 2), ('\u{1f3e0}', + '\u{1f3f0}', 2, 2), ('\u{1f3f4}', '\u{1f3f4}', 2, 2), ('\u{1f3f8}', '\u{1f43e}', 2, 2), + ('\u{1f440}', '\u{1f440}', 2, 2), ('\u{1f442}', '\u{1f4fc}', 2, 2), ('\u{1f4ff}', + '\u{1f53d}', 2, 2), ('\u{1f54b}', '\u{1f54e}', 2, 2), ('\u{1f550}', '\u{1f567}', 2, 2), + ('\u{1f57a}', '\u{1f57a}', 2, 2), ('\u{1f595}', '\u{1f596}', 2, 2), ('\u{1f5a4}', + '\u{1f5a4}', 2, 2), ('\u{1f5fb}', '\u{1f64f}', 2, 2), ('\u{1f680}', '\u{1f6c5}', 2, 2), + ('\u{1f6cc}', '\u{1f6cc}', 2, 2), ('\u{1f6d0}', '\u{1f6d2}', 2, 2), ('\u{1f6d5}', + '\u{1f6d5}', 2, 2), ('\u{1f6eb}', '\u{1f6ec}', 2, 2), ('\u{1f6f4}', '\u{1f6fa}', 2, 2), + ('\u{1f7e0}', '\u{1f7eb}', 2, 2), ('\u{1f90d}', '\u{1f971}', 2, 2), ('\u{1f973}', + '\u{1f976}', 2, 2), ('\u{1f97a}', '\u{1f9a2}', 2, 2), ('\u{1f9a5}', '\u{1f9aa}', 2, 2), + ('\u{1f9ae}', '\u{1f9ca}', 2, 2), ('\u{1f9cd}', '\u{1f9ff}', 2, 2), ('\u{1fa70}', + '\u{1fa73}', 2, 2), ('\u{1fa78}', '\u{1fa7a}', 2, 2), ('\u{1fa80}', '\u{1fa82}', 2, 2), + ('\u{1fa90}', '\u{1fa95}', 2, 2), ('\u{20000}', '\u{2fffd}', 2, 2), ('\u{30000}', + '\u{3fffd}', 2, 2), ('\u{e0001}', '\u{e0001}', 0, 0), ('\u{e0020}', '\u{e007f}', 0, 0), + ('\u{e0100}', '\u{e01ef}', 0, 0), ('\u{f0000}', '\u{ffffd}', 1, 2), ('\u{100000}', + '\u{10fffd}', 1, 2) + ]; + +} + diff --git a/unicode-width/src/tests.rs b/unicode-width/src/tests.rs new file mode 100644 index 0000000..72808c6 --- /dev/null +++ b/unicode-width/src/tests.rs @@ -0,0 +1,175 @@ +// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +#[cfg(feature = "bench")] +use std::iter; +#[cfg(feature = "bench")] +use test::{self, Bencher}; +#[cfg(feature = "bench")] +use super::UnicodeWidthChar; + +use std::prelude::v1::*; + +#[cfg(feature = "bench")] +#[bench] +fn cargo(b: &mut Bencher) { + let string = iter::repeat('a').take(4096).collect::(); + + b.iter(|| { + for c in string.chars() { + test::black_box(UnicodeWidthChar::width(c)); + } + }); +} + +#[cfg(feature = "bench")] +#[bench] +#[allow(deprecated)] +fn stdlib(b: &mut Bencher) { + let string = iter::repeat('a').take(4096).collect::(); + + b.iter(|| { + for c in string.chars() { + test::black_box(c.width()); + } + }); +} + +#[cfg(feature = "bench")] +#[bench] +fn simple_if(b: &mut Bencher) { + let string = iter::repeat('a').take(4096).collect::(); + + b.iter(|| { + for c in string.chars() { + test::black_box(simple_width_if(c)); + } + }); +} + +#[cfg(feature = "bench")] +#[bench] +fn simple_match(b: &mut Bencher) { + let string = iter::repeat('a').take(4096).collect::(); + + b.iter(|| { + for c in string.chars() { + test::black_box(simple_width_match(c)); + } + }); +} + +#[cfg(feature = "bench")] +#[inline] +fn simple_width_if(c: char) -> Option { + let cu = c as u32; + if cu < 127 { + if cu > 31 { + Some(1) + } else if cu == 0 { + Some(0) + } else { + None + } + } else { + UnicodeWidthChar::width(c) + } +} + +#[cfg(feature = "bench")] +#[inline] +fn simple_width_match(c: char) -> Option { + match c as u32 { + cu if cu == 0 => Some(0), + cu if cu < 0x20 => None, + cu if cu < 0x7f => Some(1), + _ => UnicodeWidthChar::width(c) + } +} + +#[test] +fn test_str() { + use super::UnicodeWidthStr; + + assert_eq!(UnicodeWidthStr::width("hello"), 10); + assert_eq!("hello".width_cjk(), 10); + assert_eq!(UnicodeWidthStr::width("\0\0\0\x01\x01"), 0); + assert_eq!("\0\0\0\x01\x01".width_cjk(), 0); + assert_eq!(UnicodeWidthStr::width(""), 0); + assert_eq!("".width_cjk(), 0); + assert_eq!(UnicodeWidthStr::width("\u{2081}\u{2082}\u{2083}\u{2084}"), 4); + assert_eq!("\u{2081}\u{2082}\u{2083}\u{2084}".width_cjk(), 8); +} + +#[test] +fn test_emoji() { + // Example from the README. + use super::UnicodeWidthStr; + + assert_eq!(UnicodeWidthStr::width("👩"), 2); // Woman + assert_eq!(UnicodeWidthStr::width("🔬"), 2); // Microscope + assert_eq!(UnicodeWidthStr::width("👩‍🔬"), 4); // Woman scientist +} + +#[test] +fn test_char() { + use super::UnicodeWidthChar; + #[cfg(feature = "no_std")] + use core::option::Option::{Some, None}; + + assert_eq!(UnicodeWidthChar::width('h'), Some(2)); + assert_eq!('h'.width_cjk(), Some(2)); + assert_eq!(UnicodeWidthChar::width('\x00'), Some(0)); + assert_eq!('\x00'.width_cjk(), Some(0)); + assert_eq!(UnicodeWidthChar::width('\x01'), None); + assert_eq!('\x01'.width_cjk(), None); + assert_eq!(UnicodeWidthChar::width('\u{2081}'), Some(1)); + assert_eq!('\u{2081}'.width_cjk(), Some(2)); +} + +#[test] +fn test_char2() { + use super::UnicodeWidthChar; + #[cfg(feature = "no_std")] + use core::option::Option::{Some, None}; + + assert_eq!(UnicodeWidthChar::width('\x00'),Some(0)); + assert_eq!('\x00'.width_cjk(),Some(0)); + + assert_eq!(UnicodeWidthChar::width('\x0A'),None); + assert_eq!('\x0A'.width_cjk(),None); + + assert_eq!(UnicodeWidthChar::width('w'),Some(1)); + assert_eq!('w'.width_cjk(),Some(1)); + + assert_eq!(UnicodeWidthChar::width('h'),Some(2)); + assert_eq!('h'.width_cjk(),Some(2)); + + assert_eq!(UnicodeWidthChar::width('\u{AD}'),Some(1)); + assert_eq!('\u{AD}'.width_cjk(),Some(1)); + + assert_eq!(UnicodeWidthChar::width('\u{1160}'),Some(0)); + assert_eq!('\u{1160}'.width_cjk(),Some(0)); + + assert_eq!(UnicodeWidthChar::width('\u{a1}'),Some(1)); + assert_eq!('\u{a1}'.width_cjk(),Some(2)); + + assert_eq!(UnicodeWidthChar::width('\u{300}'),Some(0)); + assert_eq!('\u{300}'.width_cjk(),Some(0)); +} + +#[test] +fn unicode_12() { + use super::UnicodeWidthChar; + #[cfg(feature = "no_std")] + use core::option::Option::{Some, None}; + + assert_eq!(UnicodeWidthChar::width('\u{1F971}'), Some(2)); +} -- cgit v1.2.1