aboutsummaryrefslogtreecommitdiff
path: root/unicode-width
diff options
context:
space:
mode:
Diffstat (limited to 'unicode-width')
-rw-r--r--unicode-width/.gitignore3
-rw-r--r--unicode-width/.travis.yml28
-rw-r--r--unicode-width/COPYRIGHT7
-rw-r--r--unicode-width/Cargo.toml29
-rw-r--r--unicode-width/LICENSE-APACHE201
-rw-r--r--unicode-width/LICENSE-MIT25
-rw-r--r--unicode-width/README.md58
-rwxr-xr-xunicode-width/scripts/unicode.py321
-rw-r--r--unicode-width/src/lib.rs131
-rw-r--r--unicode-width/src/tables.rs284
-rw-r--r--unicode-width/src/tests.rs175
11 files changed, 1262 insertions, 0 deletions
diff --git a/unicode-width/.gitignore b/unicode-width/.gitignore
new file mode 100644
index 0000000..5cdcdba
--- /dev/null
+++ b/unicode-width/.gitignore
@@ -0,0 +1,3 @@
+target
+Cargo.lock
+scripts/tmp
diff --git a/unicode-width/.travis.yml b/unicode-width/.travis.yml
new file mode 100644
index 0000000..64196fa
--- /dev/null
+++ b/unicode-width/.travis.yml
@@ -0,0 +1,28 @@
+language: rust
+rust: 'nightly'
+sudo: false
+script:
+ - cargo build --verbose --features bench
+ - cargo test --verbose --features bench
+ - cargo bench --verbose --features bench
+ - cargo clean
+ - cargo build --verbose
+ - cargo test --verbose
+# next line is an ugly hack to fix an annoying bug where rustdoc tries to use the rustc_private unicode_width crate
+# (there is probably a better fix than this)
+ - rm $(find /home/travis/.rustup -type f -name 'libunicode_width*')
+ - rustdoc --test README.md -L target/debug -L target/debug/deps
+ - cargo doc
+after_success: |
+ [ $TRAVIS_BRANCH = master ] &&
+ [ $TRAVIS_PULL_REQUEST = false ] &&
+ echo '<meta http-equiv=refresh content=0;url=unicode_width/index.html>' > target/doc/index.html &&
+ pip install ghp-import --user $USER &&
+ $HOME/.local/bin/ghp-import -n target/doc &&
+ git push -qf https://${TOKEN}@github.com/${TRAVIS_REPO_SLUG}.git gh-pages
+env:
+ global:
+ secure: vHL3zrN8AF+H79jrB8OfzuPqsUHevo6ECzwqXPj2dMSqcSXEeCY/ENAfiyFg+oW8yEVP8X2BS1a/C9yvVQRLqLbm1HbZ/5vUpoggT9S0IhKqZMyAcLYXfIEUDMDQuaSdFndDaHvq8275ScgX1LRv1kcPjQoZHuaXWMH8y/Suvyo=
+notifications:
+ email:
+ on_success: never
diff --git a/unicode-width/COPYRIGHT b/unicode-width/COPYRIGHT
new file mode 100644
index 0000000..b286ec1
--- /dev/null
+++ b/unicode-width/COPYRIGHT
@@ -0,0 +1,7 @@
+Licensed under the Apache License, Version 2.0
+<LICENSE-APACHE or
+http://www.apache.org/licenses/LICENSE-2.0> or the MIT
+license <LICENSE-MIT or http://opensource.org/licenses/MIT>,
+at your option. All files in the project carrying such
+notice may not be copied, modified, or distributed except
+according to those terms.
diff --git a/unicode-width/Cargo.toml b/unicode-width/Cargo.toml
new file mode 100644
index 0000000..35228f2
--- /dev/null
+++ b/unicode-width/Cargo.toml
@@ -0,0 +1,29 @@
+[package]
+
+name = "unicode-width"
+version = "0.1.7"
+authors = ["kwantam <kwantam@gmail.com>", "Manish Goregaokar <manishsmail@gmail.com>"]
+
+homepage = "https://github.com/unicode-rs/unicode-width"
+repository = "https://github.com/unicode-rs/unicode-width"
+documentation = "https://unicode-rs.github.io/unicode-width"
+license = "MIT/Apache-2.0"
+keywords = ["text", "width", "unicode"]
+readme = "README.md"
+description = """
+Determine displayed width of `char` and `str` types
+according to Unicode Standard Annex #11 rules.
+"""
+
+exclude = [ "target/*", "Cargo.lock" ]
+
+[dependencies]
+std = { version = "1.0", package = "rustc-std-workspace-std", optional = true }
+core = { version = "1.0", package = "rustc-std-workspace-core", optional = true }
+compiler_builtins = { version = "0.1", optional = true }
+
+[features]
+default = []
+no_std = []
+bench = []
+rustc-dep-of-std = ['std', 'core', 'compiler_builtins']
diff --git a/unicode-width/LICENSE-APACHE b/unicode-width/LICENSE-APACHE
new file mode 100644
index 0000000..16fe87b
--- /dev/null
+++ b/unicode-width/LICENSE-APACHE
@@ -0,0 +1,201 @@
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+END OF TERMS AND CONDITIONS
+
+APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+Copyright [yyyy] [name of copyright owner]
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
diff --git a/unicode-width/LICENSE-MIT b/unicode-width/LICENSE-MIT
new file mode 100644
index 0000000..e69282e
--- /dev/null
+++ b/unicode-width/LICENSE-MIT
@@ -0,0 +1,25 @@
+Copyright (c) 2015 The Rust Project Developers
+
+Permission is hereby granted, free of charge, to any
+person obtaining a copy of this software and associated
+documentation files (the "Software"), to deal in the
+Software without restriction, including without
+limitation the rights to use, copy, modify, merge,
+publish, distribute, sublicense, and/or sell copies of
+the Software, and to permit persons to whom the Software
+is furnished to do so, subject to the following
+conditions:
+
+The above copyright notice and this permission notice
+shall be included in all copies or substantial portions
+of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
+ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
+TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
+SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
+IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
diff --git a/unicode-width/README.md b/unicode-width/README.md
new file mode 100644
index 0000000..595e163
--- /dev/null
+++ b/unicode-width/README.md
@@ -0,0 +1,58 @@
+# unicode-width
+
+Determine displayed width of `char` and `str` types according to
+[Unicode Standard Annex #11][UAX11] rules.
+
+[UAX11]: http://www.unicode.org/reports/tr11/
+
+[![Build Status](https://travis-ci.org/unicode-rs/unicode-width.svg)](https://travis-ci.org/unicode-rs/unicode-width)
+
+[Documentation](https://unicode-rs.github.io/unicode-width/unicode_width/index.html)
+
+```rust
+extern crate unicode_width;
+
+use unicode_width::UnicodeWidthStr;
+
+fn main() {
+ let teststr = "Hello, world!";
+ let width = UnicodeWidthStr::width(teststr);
+ println!("{}", teststr);
+ println!("The above string is {} columns wide.", width);
+ let width = teststr.width_cjk();
+ println!("The above string is {} columns wide (CJK).", width);
+}
+```
+
+**NOTE:** The computed width values may not match the actual rendered column
+width. For example, the woman scientist emoji comprises of a woman emoji, a
+zero-width joiner and a microscope emoji.
+
+```rust
+extern crate unicode_width;
+use unicode_width::UnicodeWidthStr;
+
+fn main() {
+ assert_eq!(UnicodeWidthStr::width("👩"), 2); // Woman
+ assert_eq!(UnicodeWidthStr::width("🔬"), 2); // Microscope
+ assert_eq!(UnicodeWidthStr::width("👩‍🔬"), 4); // Woman scientist
+}
+```
+
+See [Unicode Standard Annex #11][UAX11] for precise details on what is and isn't
+covered by this crate.
+
+## features
+
+unicode-width does not depend on libstd, so it can be used in crates
+with the `#![no_std]` attribute.
+
+## crates.io
+
+You can use this package in your project by adding the following
+to your `Cargo.toml`:
+
+```toml
+[dependencies]
+unicode-width = "0.1.7"
+```
diff --git a/unicode-width/scripts/unicode.py b/unicode-width/scripts/unicode.py
new file mode 100755
index 0000000..5456ee3
--- /dev/null
+++ b/unicode-width/scripts/unicode.py
@@ -0,0 +1,321 @@
+#!/usr/bin/env python3
+#
+# Copyright 2011-2015 The Rust Project Developers. See the COPYRIGHT
+# file at the top-level directory of this distribution and at
+# http://rust-lang.org/COPYRIGHT.
+#
+# Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+# http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+# <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+# option. This file may not be copied, modified, or distributed
+# except according to those terms.
+
+# This script uses the following Unicode tables:
+# - EastAsianWidth.txt
+# - ReadMe.txt
+# - UnicodeData.txt
+#
+# Since this should not require frequent updates, we just store this
+# out-of-line and check the unicode.rs file into git.
+
+import fileinput, re, os, sys, operator
+
+preamble = '''// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+// NOTE: The following code was generated by "scripts/unicode.py", do not edit directly
+
+#![allow(missing_docs, non_upper_case_globals, non_snake_case)]
+'''
+
+# Mapping taken from Table 12 from:
+# http://www.unicode.org/reports/tr44/#General_Category_Values
+expanded_categories = {
+ 'Lu': ['LC', 'L'], 'Ll': ['LC', 'L'], 'Lt': ['LC', 'L'],
+ 'Lm': ['L'], 'Lo': ['L'],
+ 'Mn': ['M'], 'Mc': ['M'], 'Me': ['M'],
+ 'Nd': ['N'], 'Nl': ['N'], 'No': ['No'],
+ 'Pc': ['P'], 'Pd': ['P'], 'Ps': ['P'], 'Pe': ['P'],
+ 'Pi': ['P'], 'Pf': ['P'], 'Po': ['P'],
+ 'Sm': ['S'], 'Sc': ['S'], 'Sk': ['S'], 'So': ['S'],
+ 'Zs': ['Z'], 'Zl': ['Z'], 'Zp': ['Z'],
+ 'Cc': ['C'], 'Cf': ['C'], 'Cs': ['C'], 'Co': ['C'], 'Cn': ['C'],
+}
+
+# these are the surrogate codepoints, which are not valid rust characters
+surrogate_codepoints = (0xd800, 0xdfff)
+
+def fetch(f):
+ if not os.path.exists(os.path.basename(f)):
+ os.system("curl -O http://www.unicode.org/Public/UNIDATA/%s"
+ % f)
+
+ if not os.path.exists(os.path.basename(f)):
+ sys.stderr.write("cannot load %s" % f)
+ exit(1)
+
+def is_surrogate(n):
+ return surrogate_codepoints[0] <= n <= surrogate_codepoints[1]
+
+def load_unicode_data(f):
+ fetch(f)
+ gencats = {}
+
+ udict = {}
+ range_start = -1
+ for line in fileinput.input(f):
+ data = line.split(';')
+ if len(data) != 15:
+ continue
+ cp = int(data[0], 16)
+ if is_surrogate(cp):
+ continue
+ if range_start >= 0:
+ for i in range(range_start, cp):
+ udict[i] = data
+ range_start = -1
+ if data[1].endswith(", First>"):
+ range_start = cp
+ continue
+ udict[cp] = data
+
+ for code in udict:
+ [code_org, name, gencat, combine, bidi,
+ decomp, deci, digit, num, mirror,
+ old, iso, upcase, lowcase, titlecase ] = udict[code]
+
+ # place letter in categories as appropriate
+ for cat in [gencat, "Assigned"] + expanded_categories.get(gencat, []):
+ if cat not in gencats:
+ gencats[cat] = []
+ gencats[cat].append(code)
+
+ gencats = group_cats(gencats)
+
+ return gencats
+
+def group_cats(cats):
+ cats_out = {}
+ for cat in cats:
+ cats_out[cat] = group_cat(cats[cat])
+ return cats_out
+
+def group_cat(cat):
+ cat_out = []
+ letters = sorted(set(cat))
+ cur_start = letters.pop(0)
+ cur_end = cur_start
+ for letter in letters:
+ assert letter > cur_end, \
+ "cur_end: %s, letter: %s" % (hex(cur_end), hex(letter))
+ if letter == cur_end + 1:
+ cur_end = letter
+ else:
+ cat_out.append((cur_start, cur_end))
+ cur_start = cur_end = letter
+ cat_out.append((cur_start, cur_end))
+ return cat_out
+
+def format_table_content(f, content, indent):
+ line = " "*indent
+ first = True
+ for chunk in content.split(","):
+ if len(line) + len(chunk) < 98:
+ if first:
+ line += chunk
+ else:
+ line += ", " + chunk
+ first = False
+ else:
+ f.write(line + ",\n")
+ line = " "*indent + chunk
+ f.write(line)
+
+# load all widths of want_widths, except those in except_cats
+def load_east_asian_width(want_widths, except_cats):
+ f = "EastAsianWidth.txt"
+ fetch(f)
+ widths = {}
+ re1 = re.compile("^([0-9A-F]+);(\w+) +# (\w+)")
+ re2 = re.compile("^([0-9A-F]+)\.\.([0-9A-F]+);(\w+) +# (\w+)")
+
+ for line in fileinput.input(f):
+ width = None
+ d_lo = 0
+ d_hi = 0
+ cat = None
+ m = re1.match(line)
+ if m:
+ d_lo = m.group(1)
+ d_hi = m.group(1)
+ width = m.group(2)
+ cat = m.group(3)
+ else:
+ m = re2.match(line)
+ if m:
+ d_lo = m.group(1)
+ d_hi = m.group(2)
+ width = m.group(3)
+ cat = m.group(4)
+ else:
+ continue
+ if cat in except_cats or width not in want_widths:
+ continue
+ d_lo = int(d_lo, 16)
+ d_hi = int(d_hi, 16)
+ if width not in widths:
+ widths[width] = []
+ widths[width].append((d_lo, d_hi))
+ return widths
+
+def escape_char(c):
+ return "'\\u{%x}'" % c
+
+def emit_table(f, name, t_data, t_type = "&'static [(char, char)]", is_pub=True,
+ pfun=lambda x: "(%s,%s)" % (escape_char(x[0]), escape_char(x[1])), is_const=True):
+ pub_string = "const"
+ if not is_const:
+ pub_string = "let"
+ if is_pub:
+ pub_string = "pub " + pub_string
+ f.write(" %s %s: %s = &[\n" % (pub_string, name, t_type))
+ data = ""
+ first = True
+ for dat in t_data:
+ if not first:
+ data += ","
+ first = False
+ data += pfun(dat)
+ format_table_content(f, data, 8)
+ f.write("\n ];\n\n")
+
+def emit_charwidth_module(f, width_table):
+ f.write("pub mod charwidth {")
+ f.write("""
+ use core::option::Option::{self, Some, None};
+ use core::result::Result::{Ok, Err};
+
+ #[inline]
+ fn bsearch_range_value_table(c: char, is_cjk: bool, r: &'static [(char, char, u8, u8)]) -> u8 {
+ use core::cmp::Ordering::{Equal, Less, Greater};
+ match r.binary_search_by(|&(lo, hi, _, _)| {
+ if lo <= c && c <= hi { Equal }
+ else if hi < c { Less }
+ else { Greater }
+ }) {
+ Ok(idx) => {
+ let (_, _, r_ncjk, r_cjk) = r[idx];
+ if is_cjk { r_cjk } else { r_ncjk }
+ }
+ Err(_) => 1
+ }
+ }
+""")
+
+ f.write("""
+ #[inline]
+ pub fn width(c: char, is_cjk: bool) -> Option<usize> {
+ match c as usize {
+ _c @ 0 => Some(0), // null is zero width
+ cu if cu < 0x20 => None, // control sequences have no width
+ cu if cu < 0x7F => Some(1), // ASCII
+ cu if cu < 0xA0 => None, // more control sequences
+ _ => Some(bsearch_range_value_table(c, is_cjk, charwidth_table) as usize)
+ }
+ }
+
+""")
+
+ f.write(" // character width table. Based on Markus Kuhn's free wcwidth() implementation,\n")
+ f.write(" // http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c\n")
+ emit_table(f, "charwidth_table", width_table, "&'static [(char, char, u8, u8)]", is_pub=False,
+ pfun=lambda x: "(%s,%s,%s,%s)" % (escape_char(x[0]), escape_char(x[1]), x[2], x[3]))
+ f.write("}\n\n")
+
+def remove_from_wtable(wtable, val):
+ wtable_out = []
+ while wtable:
+ if wtable[0][1] < val:
+ wtable_out.append(wtable.pop(0))
+ elif wtable[0][0] > val:
+ break
+ else:
+ (wt_lo, wt_hi, width, width_cjk) = wtable.pop(0)
+ if wt_lo == wt_hi == val:
+ continue
+ elif wt_lo == val:
+ wtable_out.append((wt_lo+1, wt_hi, width, width_cjk))
+ elif wt_hi == val:
+ wtable_out.append((wt_lo, wt_hi-1, width, width_cjk))
+ else:
+ wtable_out.append((wt_lo, val-1, width, width_cjk))
+ wtable_out.append((val+1, wt_hi, width, width_cjk))
+ if wtable:
+ wtable_out.extend(wtable)
+ return wtable_out
+
+
+
+def optimize_width_table(wtable):
+ wtable_out = []
+ w_this = wtable.pop(0)
+ while wtable:
+ if w_this[1] == wtable[0][0] - 1 and w_this[2:3] == wtable[0][2:3]:
+ w_tmp = wtable.pop(0)
+ w_this = (w_this[0], w_tmp[1], w_tmp[2], w_tmp[3])
+ else:
+ wtable_out.append(w_this)
+ w_this = wtable.pop(0)
+ wtable_out.append(w_this)
+ return wtable_out
+
+if __name__ == "__main__":
+ r = "tables.rs"
+ if os.path.exists(r):
+ os.remove(r)
+ with open(r, "w") as rf:
+ # write the file's preamble
+ rf.write(preamble)
+
+ # download and parse all the data
+ fetch("ReadMe.txt")
+ with open("ReadMe.txt") as readme:
+ pattern = "for Version (\d+)\.(\d+)\.(\d+) of the Unicode"
+ unicode_version = re.search(pattern, readme.read()).groups()
+ rf.write("""
+/// The version of [Unicode](http://www.unicode.org/)
+/// that this version of unicode-width is based on.
+pub const UNICODE_VERSION: (u64, u64, u64) = (%s, %s, %s);
+
+""" % unicode_version)
+ gencats = load_unicode_data("UnicodeData.txt")
+
+ ### character width module
+ width_table = []
+ for zwcat in ["Me", "Mn", "Cf"]:
+ width_table.extend([(lo_hi[0], lo_hi[1], 0, 0) for lo_hi in gencats[zwcat]])
+ width_table.append((4448, 4607, 0, 0))
+
+ # get widths, except those that are explicitly marked zero-width above
+ ea_widths = load_east_asian_width(["W", "F", "A"], ["Me", "Mn", "Cf"])
+ # these are doublewidth
+ for dwcat in ["W", "F"]:
+ width_table.extend([(lo_hi1[0], lo_hi1[1], 2, 2) for lo_hi1 in ea_widths[dwcat]])
+ width_table.extend([(lo_hi2[0], lo_hi2[1], 1, 2) for lo_hi2 in ea_widths["A"]])
+
+ width_table.sort(key=lambda w: w[0])
+
+ # soft hyphen is not zero width in preformatted text; it's used to indicate
+ # a hyphen inserted to facilitate a linebreak.
+ width_table = remove_from_wtable(width_table, 173)
+
+ # optimize the width table by collapsing adjacent entities when possible
+ width_table = optimize_width_table(width_table)
+ emit_charwidth_module(rf, width_table)
diff --git a/unicode-width/src/lib.rs b/unicode-width/src/lib.rs
new file mode 100644
index 0000000..1ee35c8
--- /dev/null
+++ b/unicode-width/src/lib.rs
@@ -0,0 +1,131 @@
+// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+//! Determine displayed width of `char` and `str` types according to
+//! [Unicode Standard Annex #11](http://www.unicode.org/reports/tr11/)
+//! rules.
+//!
+//! ```rust
+//! extern crate unicode_width;
+//!
+//! use unicode_width::UnicodeWidthStr;
+//!
+//! fn main() {
+//! let teststr = "Hello, world!";
+//! let width = UnicodeWidthStr::width(teststr);
+//! println!("{}", teststr);
+//! println!("The above string is {} columns wide.", width);
+//! let width = teststr.width_cjk();
+//! println!("The above string is {} columns wide (CJK).", width);
+//! }
+//! ```
+//!
+//! # features
+//!
+//! unicode-width supports a `no_std` feature. This eliminates dependence
+//! on std, and instead uses equivalent functions from core.
+//!
+//! # crates.io
+//!
+//! You can use this package in your project by adding the following
+//! to your `Cargo.toml`:
+//!
+//! ```toml
+//! [dependencies]
+//! unicode-width = "0.1.5"
+//! ```
+
+#![deny(missing_docs, unsafe_code)]
+#![doc(html_logo_url = "https://unicode-rs.github.io/unicode-rs_sm.png",
+ html_favicon_url = "https://unicode-rs.github.io/unicode-rs_sm.png")]
+
+#![cfg_attr(feature = "bench", feature(test))]
+#![no_std]
+
+#[cfg(test)]
+#[macro_use]
+extern crate std;
+
+#[cfg(feature = "bench")]
+extern crate test;
+
+use tables::charwidth as cw;
+pub use tables::UNICODE_VERSION;
+
+use core::ops::Add;
+
+mod tables;
+
+#[cfg(test)]
+mod tests;
+
+/// Methods for determining displayed width of Unicode characters.
+pub trait UnicodeWidthChar {
+ /// Returns the character's displayed width in columns, or `None` if the
+ /// character is a control character other than `'\x00'`.
+ ///
+ /// This function treats characters in the Ambiguous category according
+ /// to [Unicode Standard Annex #11](http://www.unicode.org/reports/tr11/)
+ /// as 1 column wide. This is consistent with the recommendations for non-CJK
+ /// contexts, or when the context cannot be reliably determined.
+ fn width(self) -> Option<usize>;
+
+ /// Returns the character's displayed width in columns, or `None` if the
+ /// character is a control character other than `'\x00'`.
+ ///
+ /// This function treats characters in the Ambiguous category according
+ /// to [Unicode Standard Annex #11](http://www.unicode.org/reports/tr11/)
+ /// as 2 columns wide. This is consistent with the recommendations for
+ /// CJK contexts.
+ fn width_cjk(self) -> Option<usize>;
+}
+
+impl UnicodeWidthChar for char {
+ #[inline]
+ fn width(self) -> Option<usize> { cw::width(self, false) }
+
+ #[inline]
+ fn width_cjk(self) -> Option<usize> { cw::width(self, true) }
+}
+
+/// Methods for determining displayed width of Unicode strings.
+pub trait UnicodeWidthStr {
+ /// Returns the string's displayed width in columns.
+ ///
+ /// Control characters are treated as having zero width.
+ ///
+ /// This function treats characters in the Ambiguous category according
+ /// to [Unicode Standard Annex #11](http://www.unicode.org/reports/tr11/)
+ /// as 1 column wide. This is consistent with the recommendations for
+ /// non-CJK contexts, or when the context cannot be reliably determined.
+ fn width<'a>(&'a self) -> usize;
+
+ /// Returns the string's displayed width in columns.
+ ///
+ /// Control characters are treated as having zero width.
+ ///
+ /// This function treats characters in the Ambiguous category according
+ /// to [Unicode Standard Annex #11](http://www.unicode.org/reports/tr11/)
+ /// as 2 column wide. This is consistent with the recommendations for
+ /// CJK contexts.
+ fn width_cjk<'a>(&'a self) -> usize;
+}
+
+impl UnicodeWidthStr for str {
+ #[inline]
+ fn width(&self) -> usize {
+ self.chars().map(|c| cw::width(c, false).unwrap_or(0)).fold(0, Add::add)
+ }
+
+ #[inline]
+ fn width_cjk(&self) -> usize {
+ self.chars().map(|c| cw::width(c, true).unwrap_or(0)).fold(0, Add::add)
+ }
+}
diff --git a/unicode-width/src/tables.rs b/unicode-width/src/tables.rs
new file mode 100644
index 0000000..7ae450b
--- /dev/null
+++ b/unicode-width/src/tables.rs
@@ -0,0 +1,284 @@
+// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+// NOTE: The following code was generated by "scripts/unicode.py", do not edit directly
+
+#![allow(missing_docs, non_upper_case_globals, non_snake_case)]
+
+/// The version of [Unicode](http://www.unicode.org/)
+/// that this version of unicode-width is based on.
+pub const UNICODE_VERSION: (u64, u64, u64) = (12, 1, 0);
+
+pub mod charwidth {
+ use core::option::Option::{self, Some, None};
+ use core::result::Result::{Ok, Err};
+
+ #[inline]
+ fn bsearch_range_value_table(c: char, is_cjk: bool, r: &'static [(char, char, u8, u8)]) -> u8 {
+ use core::cmp::Ordering::{Equal, Less, Greater};
+ match r.binary_search_by(|&(lo, hi, _, _)| {
+ if lo <= c && c <= hi { Equal }
+ else if hi < c { Less }
+ else { Greater }
+ }) {
+ Ok(idx) => {
+ let (_, _, r_ncjk, r_cjk) = r[idx];
+ if is_cjk { r_cjk } else { r_ncjk }
+ }
+ Err(_) => 1
+ }
+ }
+
+ #[inline]
+ pub fn width(c: char, is_cjk: bool) -> Option<usize> {
+ match c as usize {
+ _c @ 0 => Some(0), // null is zero width
+ cu if cu < 0x20 => None, // control sequences have no width
+ cu if cu < 0x7F => Some(1), // ASCII
+ cu if cu < 0xA0 => None, // more control sequences
+ _ => Some(bsearch_range_value_table(c, is_cjk, charwidth_table) as usize)
+ }
+ }
+
+ // character width table. Based on Markus Kuhn's free wcwidth() implementation,
+ // http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c
+ const charwidth_table: &'static [(char, char, u8, u8)] = &[
+ ('\u{a1}', '\u{a1}', 1, 2), ('\u{a4}', '\u{a4}', 1, 2), ('\u{a7}', '\u{a8}', 1, 2),
+ ('\u{aa}', '\u{aa}', 1, 2), ('\u{ae}', '\u{ae}', 1, 2), ('\u{b0}', '\u{b4}', 1, 2),
+ ('\u{b6}', '\u{ba}', 1, 2), ('\u{bc}', '\u{bf}', 1, 2), ('\u{c6}', '\u{c6}', 1, 2),
+ ('\u{d0}', '\u{d0}', 1, 2), ('\u{d7}', '\u{d8}', 1, 2), ('\u{de}', '\u{e1}', 1, 2),
+ ('\u{e6}', '\u{e6}', 1, 2), ('\u{e8}', '\u{ea}', 1, 2), ('\u{ec}', '\u{ed}', 1, 2),
+ ('\u{f0}', '\u{f0}', 1, 2), ('\u{f2}', '\u{f3}', 1, 2), ('\u{f7}', '\u{fa}', 1, 2),
+ ('\u{fc}', '\u{fc}', 1, 2), ('\u{fe}', '\u{fe}', 1, 2), ('\u{101}', '\u{101}', 1, 2),
+ ('\u{111}', '\u{111}', 1, 2), ('\u{113}', '\u{113}', 1, 2), ('\u{11b}', '\u{11b}', 1, 2),
+ ('\u{126}', '\u{127}', 1, 2), ('\u{12b}', '\u{12b}', 1, 2), ('\u{131}', '\u{133}', 1, 2),
+ ('\u{138}', '\u{138}', 1, 2), ('\u{13f}', '\u{142}', 1, 2), ('\u{144}', '\u{144}', 1, 2),
+ ('\u{148}', '\u{14b}', 1, 2), ('\u{14d}', '\u{14d}', 1, 2), ('\u{152}', '\u{153}', 1, 2),
+ ('\u{166}', '\u{167}', 1, 2), ('\u{16b}', '\u{16b}', 1, 2), ('\u{1ce}', '\u{1ce}', 1, 2),
+ ('\u{1d0}', '\u{1d0}', 1, 2), ('\u{1d2}', '\u{1d2}', 1, 2), ('\u{1d4}', '\u{1d4}', 1, 2),
+ ('\u{1d6}', '\u{1d6}', 1, 2), ('\u{1d8}', '\u{1d8}', 1, 2), ('\u{1da}', '\u{1da}', 1, 2),
+ ('\u{1dc}', '\u{1dc}', 1, 2), ('\u{251}', '\u{251}', 1, 2), ('\u{261}', '\u{261}', 1, 2),
+ ('\u{2c4}', '\u{2c4}', 1, 2), ('\u{2c7}', '\u{2c7}', 1, 2), ('\u{2c9}', '\u{2cb}', 1, 2),
+ ('\u{2cd}', '\u{2cd}', 1, 2), ('\u{2d0}', '\u{2d0}', 1, 2), ('\u{2d8}', '\u{2db}', 1, 2),
+ ('\u{2dd}', '\u{2dd}', 1, 2), ('\u{2df}', '\u{2df}', 1, 2), ('\u{300}', '\u{36f}', 0, 0),
+ ('\u{391}', '\u{3a1}', 1, 2), ('\u{3a3}', '\u{3a9}', 1, 2), ('\u{3b1}', '\u{3c1}', 1, 2),
+ ('\u{3c3}', '\u{3c9}', 1, 2), ('\u{401}', '\u{401}', 1, 2), ('\u{410}', '\u{44f}', 1, 2),
+ ('\u{451}', '\u{451}', 1, 2), ('\u{483}', '\u{489}', 0, 0), ('\u{591}', '\u{5bd}', 0, 0),
+ ('\u{5bf}', '\u{5bf}', 0, 0), ('\u{5c1}', '\u{5c2}', 0, 0), ('\u{5c4}', '\u{5c5}', 0, 0),
+ ('\u{5c7}', '\u{5c7}', 0, 0), ('\u{600}', '\u{605}', 0, 0), ('\u{610}', '\u{61a}', 0, 0),
+ ('\u{61c}', '\u{61c}', 0, 0), ('\u{64b}', '\u{65f}', 0, 0), ('\u{670}', '\u{670}', 0, 0),
+ ('\u{6d6}', '\u{6dd}', 0, 0), ('\u{6df}', '\u{6e4}', 0, 0), ('\u{6e7}', '\u{6e8}', 0, 0),
+ ('\u{6ea}', '\u{6ed}', 0, 0), ('\u{70f}', '\u{70f}', 0, 0), ('\u{711}', '\u{711}', 0, 0),
+ ('\u{730}', '\u{74a}', 0, 0), ('\u{7a6}', '\u{7b0}', 0, 0), ('\u{7eb}', '\u{7f3}', 0, 0),
+ ('\u{7fd}', '\u{7fd}', 0, 0), ('\u{816}', '\u{819}', 0, 0), ('\u{81b}', '\u{823}', 0, 0),
+ ('\u{825}', '\u{827}', 0, 0), ('\u{829}', '\u{82d}', 0, 0), ('\u{859}', '\u{85b}', 0, 0),
+ ('\u{8d3}', '\u{902}', 0, 0), ('\u{93a}', '\u{93a}', 0, 0), ('\u{93c}', '\u{93c}', 0, 0),
+ ('\u{941}', '\u{948}', 0, 0), ('\u{94d}', '\u{94d}', 0, 0), ('\u{951}', '\u{957}', 0, 0),
+ ('\u{962}', '\u{963}', 0, 0), ('\u{981}', '\u{981}', 0, 0), ('\u{9bc}', '\u{9bc}', 0, 0),
+ ('\u{9c1}', '\u{9c4}', 0, 0), ('\u{9cd}', '\u{9cd}', 0, 0), ('\u{9e2}', '\u{9e3}', 0, 0),
+ ('\u{9fe}', '\u{9fe}', 0, 0), ('\u{a01}', '\u{a02}', 0, 0), ('\u{a3c}', '\u{a3c}', 0, 0),
+ ('\u{a41}', '\u{a42}', 0, 0), ('\u{a47}', '\u{a48}', 0, 0), ('\u{a4b}', '\u{a4d}', 0, 0),
+ ('\u{a51}', '\u{a51}', 0, 0), ('\u{a70}', '\u{a71}', 0, 0), ('\u{a75}', '\u{a75}', 0, 0),
+ ('\u{a81}', '\u{a82}', 0, 0), ('\u{abc}', '\u{abc}', 0, 0), ('\u{ac1}', '\u{ac5}', 0, 0),
+ ('\u{ac7}', '\u{ac8}', 0, 0), ('\u{acd}', '\u{acd}', 0, 0), ('\u{ae2}', '\u{ae3}', 0, 0),
+ ('\u{afa}', '\u{aff}', 0, 0), ('\u{b01}', '\u{b01}', 0, 0), ('\u{b3c}', '\u{b3c}', 0, 0),
+ ('\u{b3f}', '\u{b3f}', 0, 0), ('\u{b41}', '\u{b44}', 0, 0), ('\u{b4d}', '\u{b4d}', 0, 0),
+ ('\u{b56}', '\u{b56}', 0, 0), ('\u{b62}', '\u{b63}', 0, 0), ('\u{b82}', '\u{b82}', 0, 0),
+ ('\u{bc0}', '\u{bc0}', 0, 0), ('\u{bcd}', '\u{bcd}', 0, 0), ('\u{c00}', '\u{c00}', 0, 0),
+ ('\u{c04}', '\u{c04}', 0, 0), ('\u{c3e}', '\u{c40}', 0, 0), ('\u{c46}', '\u{c48}', 0, 0),
+ ('\u{c4a}', '\u{c4d}', 0, 0), ('\u{c55}', '\u{c56}', 0, 0), ('\u{c62}', '\u{c63}', 0, 0),
+ ('\u{c81}', '\u{c81}', 0, 0), ('\u{cbc}', '\u{cbc}', 0, 0), ('\u{cbf}', '\u{cbf}', 0, 0),
+ ('\u{cc6}', '\u{cc6}', 0, 0), ('\u{ccc}', '\u{ccd}', 0, 0), ('\u{ce2}', '\u{ce3}', 0, 0),
+ ('\u{d00}', '\u{d01}', 0, 0), ('\u{d3b}', '\u{d3c}', 0, 0), ('\u{d41}', '\u{d44}', 0, 0),
+ ('\u{d4d}', '\u{d4d}', 0, 0), ('\u{d62}', '\u{d63}', 0, 0), ('\u{dca}', '\u{dca}', 0, 0),
+ ('\u{dd2}', '\u{dd4}', 0, 0), ('\u{dd6}', '\u{dd6}', 0, 0), ('\u{e31}', '\u{e31}', 0, 0),
+ ('\u{e34}', '\u{e3a}', 0, 0), ('\u{e47}', '\u{e4e}', 0, 0), ('\u{eb1}', '\u{eb1}', 0, 0),
+ ('\u{eb4}', '\u{ebc}', 0, 0), ('\u{ec8}', '\u{ecd}', 0, 0), ('\u{f18}', '\u{f19}', 0, 0),
+ ('\u{f35}', '\u{f35}', 0, 0), ('\u{f37}', '\u{f37}', 0, 0), ('\u{f39}', '\u{f39}', 0, 0),
+ ('\u{f71}', '\u{f7e}', 0, 0), ('\u{f80}', '\u{f84}', 0, 0), ('\u{f86}', '\u{f87}', 0, 0),
+ ('\u{f8d}', '\u{f97}', 0, 0), ('\u{f99}', '\u{fbc}', 0, 0), ('\u{fc6}', '\u{fc6}', 0, 0),
+ ('\u{102d}', '\u{1030}', 0, 0), ('\u{1032}', '\u{1037}', 0, 0), ('\u{1039}', '\u{103a}', 0,
+ 0), ('\u{103d}', '\u{103e}', 0, 0), ('\u{1058}', '\u{1059}', 0, 0), ('\u{105e}', '\u{1060}',
+ 0, 0), ('\u{1071}', '\u{1074}', 0, 0), ('\u{1082}', '\u{1082}', 0, 0), ('\u{1085}',
+ '\u{1086}', 0, 0), ('\u{108d}', '\u{108d}', 0, 0), ('\u{109d}', '\u{109d}', 0, 0),
+ ('\u{1100}', '\u{115f}', 2, 2), ('\u{1160}', '\u{11ff}', 0, 0), ('\u{135d}', '\u{135f}', 0,
+ 0), ('\u{1712}', '\u{1714}', 0, 0), ('\u{1732}', '\u{1734}', 0, 0), ('\u{1752}', '\u{1753}',
+ 0, 0), ('\u{1772}', '\u{1773}', 0, 0), ('\u{17b4}', '\u{17b5}', 0, 0), ('\u{17b7}',
+ '\u{17bd}', 0, 0), ('\u{17c6}', '\u{17c6}', 0, 0), ('\u{17c9}', '\u{17d3}', 0, 0),
+ ('\u{17dd}', '\u{17dd}', 0, 0), ('\u{180b}', '\u{180e}', 0, 0), ('\u{1885}', '\u{1886}', 0,
+ 0), ('\u{18a9}', '\u{18a9}', 0, 0), ('\u{1920}', '\u{1922}', 0, 0), ('\u{1927}', '\u{1928}',
+ 0, 0), ('\u{1932}', '\u{1932}', 0, 0), ('\u{1939}', '\u{193b}', 0, 0), ('\u{1a17}',
+ '\u{1a18}', 0, 0), ('\u{1a1b}', '\u{1a1b}', 0, 0), ('\u{1a56}', '\u{1a56}', 0, 0),
+ ('\u{1a58}', '\u{1a5e}', 0, 0), ('\u{1a60}', '\u{1a60}', 0, 0), ('\u{1a62}', '\u{1a62}', 0,
+ 0), ('\u{1a65}', '\u{1a6c}', 0, 0), ('\u{1a73}', '\u{1a7c}', 0, 0), ('\u{1a7f}', '\u{1a7f}',
+ 0, 0), ('\u{1ab0}', '\u{1abe}', 0, 0), ('\u{1b00}', '\u{1b03}', 0, 0), ('\u{1b34}',
+ '\u{1b34}', 0, 0), ('\u{1b36}', '\u{1b3a}', 0, 0), ('\u{1b3c}', '\u{1b3c}', 0, 0),
+ ('\u{1b42}', '\u{1b42}', 0, 0), ('\u{1b6b}', '\u{1b73}', 0, 0), ('\u{1b80}', '\u{1b81}', 0,
+ 0), ('\u{1ba2}', '\u{1ba5}', 0, 0), ('\u{1ba8}', '\u{1ba9}', 0, 0), ('\u{1bab}', '\u{1bad}',
+ 0, 0), ('\u{1be6}', '\u{1be6}', 0, 0), ('\u{1be8}', '\u{1be9}', 0, 0), ('\u{1bed}',
+ '\u{1bed}', 0, 0), ('\u{1bef}', '\u{1bf1}', 0, 0), ('\u{1c2c}', '\u{1c33}', 0, 0),
+ ('\u{1c36}', '\u{1c37}', 0, 0), ('\u{1cd0}', '\u{1cd2}', 0, 0), ('\u{1cd4}', '\u{1ce0}', 0,
+ 0), ('\u{1ce2}', '\u{1ce8}', 0, 0), ('\u{1ced}', '\u{1ced}', 0, 0), ('\u{1cf4}', '\u{1cf4}',
+ 0, 0), ('\u{1cf8}', '\u{1cf9}', 0, 0), ('\u{1dc0}', '\u{1df9}', 0, 0), ('\u{1dfb}',
+ '\u{1dff}', 0, 0), ('\u{200b}', '\u{200f}', 0, 0), ('\u{2010}', '\u{2010}', 1, 2),
+ ('\u{2013}', '\u{2016}', 1, 2), ('\u{2018}', '\u{2019}', 1, 2), ('\u{201c}', '\u{201d}', 1,
+ 2), ('\u{2020}', '\u{2022}', 1, 2), ('\u{2024}', '\u{2027}', 1, 2), ('\u{202a}', '\u{202e}',
+ 0, 0), ('\u{2030}', '\u{2030}', 1, 2), ('\u{2032}', '\u{2033}', 1, 2), ('\u{2035}',
+ '\u{2035}', 1, 2), ('\u{203b}', '\u{203b}', 1, 2), ('\u{203e}', '\u{203e}', 1, 2),
+ ('\u{2060}', '\u{2064}', 0, 0), ('\u{2066}', '\u{206f}', 0, 0), ('\u{2074}', '\u{2074}', 1,
+ 2), ('\u{207f}', '\u{207f}', 1, 2), ('\u{2081}', '\u{2084}', 1, 2), ('\u{20ac}', '\u{20ac}',
+ 1, 2), ('\u{20d0}', '\u{20f0}', 0, 0), ('\u{2103}', '\u{2103}', 1, 2), ('\u{2105}',
+ '\u{2105}', 1, 2), ('\u{2109}', '\u{2109}', 1, 2), ('\u{2113}', '\u{2113}', 1, 2),
+ ('\u{2116}', '\u{2116}', 1, 2), ('\u{2121}', '\u{2122}', 1, 2), ('\u{2126}', '\u{2126}', 1,
+ 2), ('\u{212b}', '\u{212b}', 1, 2), ('\u{2153}', '\u{2154}', 1, 2), ('\u{215b}', '\u{215e}',
+ 1, 2), ('\u{2160}', '\u{216b}', 1, 2), ('\u{2170}', '\u{2179}', 1, 2), ('\u{2189}',
+ '\u{2189}', 1, 2), ('\u{2190}', '\u{2199}', 1, 2), ('\u{21b8}', '\u{21b9}', 1, 2),
+ ('\u{21d2}', '\u{21d2}', 1, 2), ('\u{21d4}', '\u{21d4}', 1, 2), ('\u{21e7}', '\u{21e7}', 1,
+ 2), ('\u{2200}', '\u{2200}', 1, 2), ('\u{2202}', '\u{2203}', 1, 2), ('\u{2207}', '\u{2208}',
+ 1, 2), ('\u{220b}', '\u{220b}', 1, 2), ('\u{220f}', '\u{220f}', 1, 2), ('\u{2211}',
+ '\u{2211}', 1, 2), ('\u{2215}', '\u{2215}', 1, 2), ('\u{221a}', '\u{221a}', 1, 2),
+ ('\u{221d}', '\u{2220}', 1, 2), ('\u{2223}', '\u{2223}', 1, 2), ('\u{2225}', '\u{2225}', 1,
+ 2), ('\u{2227}', '\u{222c}', 1, 2), ('\u{222e}', '\u{222e}', 1, 2), ('\u{2234}', '\u{2237}',
+ 1, 2), ('\u{223c}', '\u{223d}', 1, 2), ('\u{2248}', '\u{2248}', 1, 2), ('\u{224c}',
+ '\u{224c}', 1, 2), ('\u{2252}', '\u{2252}', 1, 2), ('\u{2260}', '\u{2261}', 1, 2),
+ ('\u{2264}', '\u{2267}', 1, 2), ('\u{226a}', '\u{226b}', 1, 2), ('\u{226e}', '\u{226f}', 1,
+ 2), ('\u{2282}', '\u{2283}', 1, 2), ('\u{2286}', '\u{2287}', 1, 2), ('\u{2295}', '\u{2295}',
+ 1, 2), ('\u{2299}', '\u{2299}', 1, 2), ('\u{22a5}', '\u{22a5}', 1, 2), ('\u{22bf}',
+ '\u{22bf}', 1, 2), ('\u{2312}', '\u{2312}', 1, 2), ('\u{231a}', '\u{231b}', 2, 2),
+ ('\u{2329}', '\u{232a}', 2, 2), ('\u{23e9}', '\u{23ec}', 2, 2), ('\u{23f0}', '\u{23f0}', 2,
+ 2), ('\u{23f3}', '\u{23f3}', 2, 2), ('\u{2460}', '\u{24e9}', 1, 2), ('\u{24eb}', '\u{254b}',
+ 1, 2), ('\u{2550}', '\u{2573}', 1, 2), ('\u{2580}', '\u{258f}', 1, 2), ('\u{2592}',
+ '\u{2595}', 1, 2), ('\u{25a0}', '\u{25a1}', 1, 2), ('\u{25a3}', '\u{25a9}', 1, 2),
+ ('\u{25b2}', '\u{25b3}', 1, 2), ('\u{25b6}', '\u{25b7}', 1, 2), ('\u{25bc}', '\u{25bd}', 1,
+ 2), ('\u{25c0}', '\u{25c1}', 1, 2), ('\u{25c6}', '\u{25c8}', 1, 2), ('\u{25cb}', '\u{25cb}',
+ 1, 2), ('\u{25ce}', '\u{25d1}', 1, 2), ('\u{25e2}', '\u{25e5}', 1, 2), ('\u{25ef}',
+ '\u{25ef}', 1, 2), ('\u{25fd}', '\u{25fe}', 2, 2), ('\u{2605}', '\u{2606}', 1, 2),
+ ('\u{2609}', '\u{2609}', 1, 2), ('\u{260e}', '\u{260f}', 1, 2), ('\u{2614}', '\u{2615}', 2,
+ 2), ('\u{261c}', '\u{261c}', 1, 2), ('\u{261e}', '\u{261e}', 1, 2), ('\u{2640}', '\u{2640}',
+ 1, 2), ('\u{2642}', '\u{2642}', 1, 2), ('\u{2648}', '\u{2653}', 2, 2), ('\u{2660}',
+ '\u{2661}', 1, 2), ('\u{2663}', '\u{2665}', 1, 2), ('\u{2667}', '\u{266a}', 1, 2),
+ ('\u{266c}', '\u{266d}', 1, 2), ('\u{266f}', '\u{266f}', 1, 2), ('\u{267f}', '\u{267f}', 2,
+ 2), ('\u{2693}', '\u{2693}', 2, 2), ('\u{269e}', '\u{269f}', 1, 2), ('\u{26a1}', '\u{26a1}',
+ 2, 2), ('\u{26aa}', '\u{26ab}', 2, 2), ('\u{26bd}', '\u{26be}', 2, 2), ('\u{26bf}',
+ '\u{26bf}', 1, 2), ('\u{26c4}', '\u{26c5}', 2, 2), ('\u{26c6}', '\u{26cd}', 1, 2),
+ ('\u{26ce}', '\u{26ce}', 2, 2), ('\u{26cf}', '\u{26d3}', 1, 2), ('\u{26d4}', '\u{26d4}', 2,
+ 2), ('\u{26d5}', '\u{26e1}', 1, 2), ('\u{26e3}', '\u{26e3}', 1, 2), ('\u{26e8}', '\u{26e9}',
+ 1, 2), ('\u{26ea}', '\u{26ea}', 2, 2), ('\u{26eb}', '\u{26f1}', 1, 2), ('\u{26f2}',
+ '\u{26f3}', 2, 2), ('\u{26f4}', '\u{26f4}', 1, 2), ('\u{26f5}', '\u{26f5}', 2, 2),
+ ('\u{26f6}', '\u{26f9}', 1, 2), ('\u{26fa}', '\u{26fa}', 2, 2), ('\u{26fb}', '\u{26fc}', 1,
+ 2), ('\u{26fd}', '\u{26fd}', 2, 2), ('\u{26fe}', '\u{26ff}', 1, 2), ('\u{2705}', '\u{2705}',
+ 2, 2), ('\u{270a}', '\u{270b}', 2, 2), ('\u{2728}', '\u{2728}', 2, 2), ('\u{273d}',
+ '\u{273d}', 1, 2), ('\u{274c}', '\u{274c}', 2, 2), ('\u{274e}', '\u{274e}', 2, 2),
+ ('\u{2753}', '\u{2755}', 2, 2), ('\u{2757}', '\u{2757}', 2, 2), ('\u{2776}', '\u{277f}', 1,
+ 2), ('\u{2795}', '\u{2797}', 2, 2), ('\u{27b0}', '\u{27b0}', 2, 2), ('\u{27bf}', '\u{27bf}',
+ 2, 2), ('\u{2b1b}', '\u{2b1c}', 2, 2), ('\u{2b50}', '\u{2b50}', 2, 2), ('\u{2b55}',
+ '\u{2b55}', 2, 2), ('\u{2b56}', '\u{2b59}', 1, 2), ('\u{2cef}', '\u{2cf1}', 0, 0),
+ ('\u{2d7f}', '\u{2d7f}', 0, 0), ('\u{2de0}', '\u{2dff}', 0, 0), ('\u{2e80}', '\u{2e99}', 2,
+ 2), ('\u{2e9b}', '\u{2ef3}', 2, 2), ('\u{2f00}', '\u{2fd5}', 2, 2), ('\u{2ff0}', '\u{2ffb}',
+ 2, 2), ('\u{3000}', '\u{3029}', 2, 2), ('\u{302a}', '\u{302d}', 0, 0), ('\u{302e}',
+ '\u{303e}', 2, 2), ('\u{3041}', '\u{3096}', 2, 2), ('\u{3099}', '\u{309a}', 0, 0),
+ ('\u{309b}', '\u{30ff}', 2, 2), ('\u{3105}', '\u{312f}', 2, 2), ('\u{3131}', '\u{318e}', 2,
+ 2), ('\u{3190}', '\u{31ba}', 2, 2), ('\u{31c0}', '\u{31e3}', 2, 2), ('\u{31f0}', '\u{321e}',
+ 2, 2), ('\u{3220}', '\u{3247}', 2, 2), ('\u{3248}', '\u{324f}', 1, 2), ('\u{3250}',
+ '\u{4dbf}', 2, 2), ('\u{4e00}', '\u{a48c}', 2, 2), ('\u{a490}', '\u{a4c6}', 2, 2),
+ ('\u{a66f}', '\u{a672}', 0, 0), ('\u{a674}', '\u{a67d}', 0, 0), ('\u{a69e}', '\u{a69f}', 0,
+ 0), ('\u{a6f0}', '\u{a6f1}', 0, 0), ('\u{a802}', '\u{a802}', 0, 0), ('\u{a806}', '\u{a806}',
+ 0, 0), ('\u{a80b}', '\u{a80b}', 0, 0), ('\u{a825}', '\u{a826}', 0, 0), ('\u{a8c4}',
+ '\u{a8c5}', 0, 0), ('\u{a8e0}', '\u{a8f1}', 0, 0), ('\u{a8ff}', '\u{a8ff}', 0, 0),
+ ('\u{a926}', '\u{a92d}', 0, 0), ('\u{a947}', '\u{a951}', 0, 0), ('\u{a960}', '\u{a97c}', 2,
+ 2), ('\u{a980}', '\u{a982}', 0, 0), ('\u{a9b3}', '\u{a9b3}', 0, 0), ('\u{a9b6}', '\u{a9b9}',
+ 0, 0), ('\u{a9bc}', '\u{a9bd}', 0, 0), ('\u{a9e5}', '\u{a9e5}', 0, 0), ('\u{aa29}',
+ '\u{aa2e}', 0, 0), ('\u{aa31}', '\u{aa32}', 0, 0), ('\u{aa35}', '\u{aa36}', 0, 0),
+ ('\u{aa43}', '\u{aa43}', 0, 0), ('\u{aa4c}', '\u{aa4c}', 0, 0), ('\u{aa7c}', '\u{aa7c}', 0,
+ 0), ('\u{aab0}', '\u{aab0}', 0, 0), ('\u{aab2}', '\u{aab4}', 0, 0), ('\u{aab7}', '\u{aab8}',
+ 0, 0), ('\u{aabe}', '\u{aabf}', 0, 0), ('\u{aac1}', '\u{aac1}', 0, 0), ('\u{aaec}',
+ '\u{aaed}', 0, 0), ('\u{aaf6}', '\u{aaf6}', 0, 0), ('\u{abe5}', '\u{abe5}', 0, 0),
+ ('\u{abe8}', '\u{abe8}', 0, 0), ('\u{abed}', '\u{abed}', 0, 0), ('\u{ac00}', '\u{d7a3}', 2,
+ 2), ('\u{e000}', '\u{f8ff}', 1, 2), ('\u{f900}', '\u{faff}', 2, 2), ('\u{fb1e}', '\u{fb1e}',
+ 0, 0), ('\u{fe00}', '\u{fe0f}', 0, 0), ('\u{fe10}', '\u{fe19}', 2, 2), ('\u{fe20}',
+ '\u{fe2f}', 0, 0), ('\u{fe30}', '\u{fe52}', 2, 2), ('\u{fe54}', '\u{fe66}', 2, 2),
+ ('\u{fe68}', '\u{fe6b}', 2, 2), ('\u{feff}', '\u{feff}', 0, 0), ('\u{ff01}', '\u{ff60}', 2,
+ 2), ('\u{ffe0}', '\u{ffe6}', 2, 2), ('\u{fff9}', '\u{fffb}', 0, 0), ('\u{fffd}', '\u{fffd}',
+ 1, 2), ('\u{101fd}', '\u{101fd}', 0, 0), ('\u{102e0}', '\u{102e0}', 0, 0), ('\u{10376}',
+ '\u{1037a}', 0, 0), ('\u{10a01}', '\u{10a03}', 0, 0), ('\u{10a05}', '\u{10a06}', 0, 0),
+ ('\u{10a0c}', '\u{10a0f}', 0, 0), ('\u{10a38}', '\u{10a3a}', 0, 0), ('\u{10a3f}',
+ '\u{10a3f}', 0, 0), ('\u{10ae5}', '\u{10ae6}', 0, 0), ('\u{10d24}', '\u{10d27}', 0, 0),
+ ('\u{10f46}', '\u{10f50}', 0, 0), ('\u{11001}', '\u{11001}', 0, 0), ('\u{11038}',
+ '\u{11046}', 0, 0), ('\u{1107f}', '\u{11081}', 0, 0), ('\u{110b3}', '\u{110b6}', 0, 0),
+ ('\u{110b9}', '\u{110ba}', 0, 0), ('\u{110bd}', '\u{110bd}', 0, 0), ('\u{110cd}',
+ '\u{110cd}', 0, 0), ('\u{11100}', '\u{11102}', 0, 0), ('\u{11127}', '\u{1112b}', 0, 0),
+ ('\u{1112d}', '\u{11134}', 0, 0), ('\u{11173}', '\u{11173}', 0, 0), ('\u{11180}',
+ '\u{11181}', 0, 0), ('\u{111b6}', '\u{111be}', 0, 0), ('\u{111c9}', '\u{111cc}', 0, 0),
+ ('\u{1122f}', '\u{11231}', 0, 0), ('\u{11234}', '\u{11234}', 0, 0), ('\u{11236}',
+ '\u{11237}', 0, 0), ('\u{1123e}', '\u{1123e}', 0, 0), ('\u{112df}', '\u{112df}', 0, 0),
+ ('\u{112e3}', '\u{112ea}', 0, 0), ('\u{11300}', '\u{11301}', 0, 0), ('\u{1133b}',
+ '\u{1133c}', 0, 0), ('\u{11340}', '\u{11340}', 0, 0), ('\u{11366}', '\u{1136c}', 0, 0),
+ ('\u{11370}', '\u{11374}', 0, 0), ('\u{11438}', '\u{1143f}', 0, 0), ('\u{11442}',
+ '\u{11444}', 0, 0), ('\u{11446}', '\u{11446}', 0, 0), ('\u{1145e}', '\u{1145e}', 0, 0),
+ ('\u{114b3}', '\u{114b8}', 0, 0), ('\u{114ba}', '\u{114ba}', 0, 0), ('\u{114bf}',
+ '\u{114c0}', 0, 0), ('\u{114c2}', '\u{114c3}', 0, 0), ('\u{115b2}', '\u{115b5}', 0, 0),
+ ('\u{115bc}', '\u{115bd}', 0, 0), ('\u{115bf}', '\u{115c0}', 0, 0), ('\u{115dc}',
+ '\u{115dd}', 0, 0), ('\u{11633}', '\u{1163a}', 0, 0), ('\u{1163d}', '\u{1163d}', 0, 0),
+ ('\u{1163f}', '\u{11640}', 0, 0), ('\u{116ab}', '\u{116ab}', 0, 0), ('\u{116ad}',
+ '\u{116ad}', 0, 0), ('\u{116b0}', '\u{116b5}', 0, 0), ('\u{116b7}', '\u{116b7}', 0, 0),
+ ('\u{1171d}', '\u{1171f}', 0, 0), ('\u{11722}', '\u{11725}', 0, 0), ('\u{11727}',
+ '\u{1172b}', 0, 0), ('\u{1182f}', '\u{11837}', 0, 0), ('\u{11839}', '\u{1183a}', 0, 0),
+ ('\u{119d4}', '\u{119d7}', 0, 0), ('\u{119da}', '\u{119db}', 0, 0), ('\u{119e0}',
+ '\u{119e0}', 0, 0), ('\u{11a01}', '\u{11a0a}', 0, 0), ('\u{11a33}', '\u{11a38}', 0, 0),
+ ('\u{11a3b}', '\u{11a3e}', 0, 0), ('\u{11a47}', '\u{11a47}', 0, 0), ('\u{11a51}',
+ '\u{11a56}', 0, 0), ('\u{11a59}', '\u{11a5b}', 0, 0), ('\u{11a8a}', '\u{11a96}', 0, 0),
+ ('\u{11a98}', '\u{11a99}', 0, 0), ('\u{11c30}', '\u{11c36}', 0, 0), ('\u{11c38}',
+ '\u{11c3d}', 0, 0), ('\u{11c3f}', '\u{11c3f}', 0, 0), ('\u{11c92}', '\u{11ca7}', 0, 0),
+ ('\u{11caa}', '\u{11cb0}', 0, 0), ('\u{11cb2}', '\u{11cb3}', 0, 0), ('\u{11cb5}',
+ '\u{11cb6}', 0, 0), ('\u{11d31}', '\u{11d36}', 0, 0), ('\u{11d3a}', '\u{11d3a}', 0, 0),
+ ('\u{11d3c}', '\u{11d3d}', 0, 0), ('\u{11d3f}', '\u{11d45}', 0, 0), ('\u{11d47}',
+ '\u{11d47}', 0, 0), ('\u{11d90}', '\u{11d91}', 0, 0), ('\u{11d95}', '\u{11d95}', 0, 0),
+ ('\u{11d97}', '\u{11d97}', 0, 0), ('\u{11ef3}', '\u{11ef4}', 0, 0), ('\u{13430}',
+ '\u{13438}', 0, 0), ('\u{16af0}', '\u{16af4}', 0, 0), ('\u{16b30}', '\u{16b36}', 0, 0),
+ ('\u{16f4f}', '\u{16f4f}', 0, 0), ('\u{16f8f}', '\u{16f92}', 0, 0), ('\u{16fe0}',
+ '\u{16fe3}', 2, 2), ('\u{17000}', '\u{187f7}', 2, 2), ('\u{18800}', '\u{18af2}', 2, 2),
+ ('\u{1b000}', '\u{1b11e}', 2, 2), ('\u{1b150}', '\u{1b152}', 2, 2), ('\u{1b164}',
+ '\u{1b167}', 2, 2), ('\u{1b170}', '\u{1b2fb}', 2, 2), ('\u{1bc9d}', '\u{1bc9e}', 0, 0),
+ ('\u{1bca0}', '\u{1bca3}', 0, 0), ('\u{1d167}', '\u{1d169}', 0, 0), ('\u{1d173}',
+ '\u{1d182}', 0, 0), ('\u{1d185}', '\u{1d18b}', 0, 0), ('\u{1d1aa}', '\u{1d1ad}', 0, 0),
+ ('\u{1d242}', '\u{1d244}', 0, 0), ('\u{1da00}', '\u{1da36}', 0, 0), ('\u{1da3b}',
+ '\u{1da6c}', 0, 0), ('\u{1da75}', '\u{1da75}', 0, 0), ('\u{1da84}', '\u{1da84}', 0, 0),
+ ('\u{1da9b}', '\u{1da9f}', 0, 0), ('\u{1daa1}', '\u{1daaf}', 0, 0), ('\u{1e000}',
+ '\u{1e006}', 0, 0), ('\u{1e008}', '\u{1e018}', 0, 0), ('\u{1e01b}', '\u{1e021}', 0, 0),
+ ('\u{1e023}', '\u{1e024}', 0, 0), ('\u{1e026}', '\u{1e02a}', 0, 0), ('\u{1e130}',
+ '\u{1e136}', 0, 0), ('\u{1e2ec}', '\u{1e2ef}', 0, 0), ('\u{1e8d0}', '\u{1e8d6}', 0, 0),
+ ('\u{1e944}', '\u{1e94a}', 0, 0), ('\u{1f004}', '\u{1f004}', 2, 2), ('\u{1f0cf}',
+ '\u{1f0cf}', 2, 2), ('\u{1f100}', '\u{1f10a}', 1, 2), ('\u{1f110}', '\u{1f12d}', 1, 2),
+ ('\u{1f130}', '\u{1f169}', 1, 2), ('\u{1f170}', '\u{1f18d}', 1, 2), ('\u{1f18e}',
+ '\u{1f18e}', 2, 2), ('\u{1f18f}', '\u{1f190}', 1, 2), ('\u{1f191}', '\u{1f19a}', 2, 2),
+ ('\u{1f19b}', '\u{1f1ac}', 1, 2), ('\u{1f200}', '\u{1f202}', 2, 2), ('\u{1f210}',
+ '\u{1f23b}', 2, 2), ('\u{1f240}', '\u{1f248}', 2, 2), ('\u{1f250}', '\u{1f251}', 2, 2),
+ ('\u{1f260}', '\u{1f265}', 2, 2), ('\u{1f300}', '\u{1f320}', 2, 2), ('\u{1f32d}',
+ '\u{1f335}', 2, 2), ('\u{1f337}', '\u{1f37c}', 2, 2), ('\u{1f37e}', '\u{1f393}', 2, 2),
+ ('\u{1f3a0}', '\u{1f3ca}', 2, 2), ('\u{1f3cf}', '\u{1f3d3}', 2, 2), ('\u{1f3e0}',
+ '\u{1f3f0}', 2, 2), ('\u{1f3f4}', '\u{1f3f4}', 2, 2), ('\u{1f3f8}', '\u{1f43e}', 2, 2),
+ ('\u{1f440}', '\u{1f440}', 2, 2), ('\u{1f442}', '\u{1f4fc}', 2, 2), ('\u{1f4ff}',
+ '\u{1f53d}', 2, 2), ('\u{1f54b}', '\u{1f54e}', 2, 2), ('\u{1f550}', '\u{1f567}', 2, 2),
+ ('\u{1f57a}', '\u{1f57a}', 2, 2), ('\u{1f595}', '\u{1f596}', 2, 2), ('\u{1f5a4}',
+ '\u{1f5a4}', 2, 2), ('\u{1f5fb}', '\u{1f64f}', 2, 2), ('\u{1f680}', '\u{1f6c5}', 2, 2),
+ ('\u{1f6cc}', '\u{1f6cc}', 2, 2), ('\u{1f6d0}', '\u{1f6d2}', 2, 2), ('\u{1f6d5}',
+ '\u{1f6d5}', 2, 2), ('\u{1f6eb}', '\u{1f6ec}', 2, 2), ('\u{1f6f4}', '\u{1f6fa}', 2, 2),
+ ('\u{1f7e0}', '\u{1f7eb}', 2, 2), ('\u{1f90d}', '\u{1f971}', 2, 2), ('\u{1f973}',
+ '\u{1f976}', 2, 2), ('\u{1f97a}', '\u{1f9a2}', 2, 2), ('\u{1f9a5}', '\u{1f9aa}', 2, 2),
+ ('\u{1f9ae}', '\u{1f9ca}', 2, 2), ('\u{1f9cd}', '\u{1f9ff}', 2, 2), ('\u{1fa70}',
+ '\u{1fa73}', 2, 2), ('\u{1fa78}', '\u{1fa7a}', 2, 2), ('\u{1fa80}', '\u{1fa82}', 2, 2),
+ ('\u{1fa90}', '\u{1fa95}', 2, 2), ('\u{20000}', '\u{2fffd}', 2, 2), ('\u{30000}',
+ '\u{3fffd}', 2, 2), ('\u{e0001}', '\u{e0001}', 0, 0), ('\u{e0020}', '\u{e007f}', 0, 0),
+ ('\u{e0100}', '\u{e01ef}', 0, 0), ('\u{f0000}', '\u{ffffd}', 1, 2), ('\u{100000}',
+ '\u{10fffd}', 1, 2)
+ ];
+
+}
+
diff --git a/unicode-width/src/tests.rs b/unicode-width/src/tests.rs
new file mode 100644
index 0000000..72808c6
--- /dev/null
+++ b/unicode-width/src/tests.rs
@@ -0,0 +1,175 @@
+// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+#[cfg(feature = "bench")]
+use std::iter;
+#[cfg(feature = "bench")]
+use test::{self, Bencher};
+#[cfg(feature = "bench")]
+use super::UnicodeWidthChar;
+
+use std::prelude::v1::*;
+
+#[cfg(feature = "bench")]
+#[bench]
+fn cargo(b: &mut Bencher) {
+ let string = iter::repeat('a').take(4096).collect::<String>();
+
+ b.iter(|| {
+ for c in string.chars() {
+ test::black_box(UnicodeWidthChar::width(c));
+ }
+ });
+}
+
+#[cfg(feature = "bench")]
+#[bench]
+#[allow(deprecated)]
+fn stdlib(b: &mut Bencher) {
+ let string = iter::repeat('a').take(4096).collect::<String>();
+
+ b.iter(|| {
+ for c in string.chars() {
+ test::black_box(c.width());
+ }
+ });
+}
+
+#[cfg(feature = "bench")]
+#[bench]
+fn simple_if(b: &mut Bencher) {
+ let string = iter::repeat('a').take(4096).collect::<String>();
+
+ b.iter(|| {
+ for c in string.chars() {
+ test::black_box(simple_width_if(c));
+ }
+ });
+}
+
+#[cfg(feature = "bench")]
+#[bench]
+fn simple_match(b: &mut Bencher) {
+ let string = iter::repeat('a').take(4096).collect::<String>();
+
+ b.iter(|| {
+ for c in string.chars() {
+ test::black_box(simple_width_match(c));
+ }
+ });
+}
+
+#[cfg(feature = "bench")]
+#[inline]
+fn simple_width_if(c: char) -> Option<usize> {
+ let cu = c as u32;
+ if cu < 127 {
+ if cu > 31 {
+ Some(1)
+ } else if cu == 0 {
+ Some(0)
+ } else {
+ None
+ }
+ } else {
+ UnicodeWidthChar::width(c)
+ }
+}
+
+#[cfg(feature = "bench")]
+#[inline]
+fn simple_width_match(c: char) -> Option<usize> {
+ match c as u32 {
+ cu if cu == 0 => Some(0),
+ cu if cu < 0x20 => None,
+ cu if cu < 0x7f => Some(1),
+ _ => UnicodeWidthChar::width(c)
+ }
+}
+
+#[test]
+fn test_str() {
+ use super::UnicodeWidthStr;
+
+ assert_eq!(UnicodeWidthStr::width("hello"), 10);
+ assert_eq!("hello".width_cjk(), 10);
+ assert_eq!(UnicodeWidthStr::width("\0\0\0\x01\x01"), 0);
+ assert_eq!("\0\0\0\x01\x01".width_cjk(), 0);
+ assert_eq!(UnicodeWidthStr::width(""), 0);
+ assert_eq!("".width_cjk(), 0);
+ assert_eq!(UnicodeWidthStr::width("\u{2081}\u{2082}\u{2083}\u{2084}"), 4);
+ assert_eq!("\u{2081}\u{2082}\u{2083}\u{2084}".width_cjk(), 8);
+}
+
+#[test]
+fn test_emoji() {
+ // Example from the README.
+ use super::UnicodeWidthStr;
+
+ assert_eq!(UnicodeWidthStr::width("👩"), 2); // Woman
+ assert_eq!(UnicodeWidthStr::width("🔬"), 2); // Microscope
+ assert_eq!(UnicodeWidthStr::width("👩‍🔬"), 4); // Woman scientist
+}
+
+#[test]
+fn test_char() {
+ use super::UnicodeWidthChar;
+ #[cfg(feature = "no_std")]
+ use core::option::Option::{Some, None};
+
+ assert_eq!(UnicodeWidthChar::width('h'), Some(2));
+ assert_eq!('h'.width_cjk(), Some(2));
+ assert_eq!(UnicodeWidthChar::width('\x00'), Some(0));
+ assert_eq!('\x00'.width_cjk(), Some(0));
+ assert_eq!(UnicodeWidthChar::width('\x01'), None);
+ assert_eq!('\x01'.width_cjk(), None);
+ assert_eq!(UnicodeWidthChar::width('\u{2081}'), Some(1));
+ assert_eq!('\u{2081}'.width_cjk(), Some(2));
+}
+
+#[test]
+fn test_char2() {
+ use super::UnicodeWidthChar;
+ #[cfg(feature = "no_std")]
+ use core::option::Option::{Some, None};
+
+ assert_eq!(UnicodeWidthChar::width('\x00'),Some(0));
+ assert_eq!('\x00'.width_cjk(),Some(0));
+
+ assert_eq!(UnicodeWidthChar::width('\x0A'),None);
+ assert_eq!('\x0A'.width_cjk(),None);
+
+ assert_eq!(UnicodeWidthChar::width('w'),Some(1));
+ assert_eq!('w'.width_cjk(),Some(1));
+
+ assert_eq!(UnicodeWidthChar::width('h'),Some(2));
+ assert_eq!('h'.width_cjk(),Some(2));
+
+ assert_eq!(UnicodeWidthChar::width('\u{AD}'),Some(1));
+ assert_eq!('\u{AD}'.width_cjk(),Some(1));
+
+ assert_eq!(UnicodeWidthChar::width('\u{1160}'),Some(0));
+ assert_eq!('\u{1160}'.width_cjk(),Some(0));
+
+ assert_eq!(UnicodeWidthChar::width('\u{a1}'),Some(1));
+ assert_eq!('\u{a1}'.width_cjk(),Some(2));
+
+ assert_eq!(UnicodeWidthChar::width('\u{300}'),Some(0));
+ assert_eq!('\u{300}'.width_cjk(),Some(0));
+}
+
+#[test]
+fn unicode_12() {
+ use super::UnicodeWidthChar;
+ #[cfg(feature = "no_std")]
+ use core::option::Option::{Some, None};
+
+ assert_eq!(UnicodeWidthChar::width('\u{1F971}'), Some(2));
+}