diff options
Diffstat (limited to 'unicode-xid')
| -rw-r--r-- | unicode-xid/.gitignore | 3 | ||||
| -rw-r--r-- | unicode-xid/.travis.yml | 25 | ||||
| -rw-r--r-- | unicode-xid/COPYRIGHT | 7 | ||||
| -rw-r--r-- | unicode-xid/Cargo.toml | 28 | ||||
| -rw-r--r-- | unicode-xid/LICENSE-APACHE | 201 | ||||
| -rw-r--r-- | unicode-xid/LICENSE-MIT | 25 | ||||
| -rw-r--r-- | unicode-xid/README.md | 44 | ||||
| -rwxr-xr-x | unicode-xid/scripts/unicode.py | 187 | ||||
| -rw-r--r-- | unicode-xid/src/lib.rs | 87 | ||||
| -rw-r--r-- | unicode-xid/src/tables.rs | 451 | ||||
| -rw-r--r-- | unicode-xid/src/tests.rs | 111 | 
11 files changed, 1169 insertions, 0 deletions
| diff --git a/unicode-xid/.gitignore b/unicode-xid/.gitignore new file mode 100644 index 0000000..5cdcdba --- /dev/null +++ b/unicode-xid/.gitignore @@ -0,0 +1,3 @@ +target +Cargo.lock +scripts/tmp diff --git a/unicode-xid/.travis.yml b/unicode-xid/.travis.yml new file mode 100644 index 0000000..d9c5c2a --- /dev/null +++ b/unicode-xid/.travis.yml @@ -0,0 +1,25 @@ +language: rust +rust: 'nightly' +sudo: false +script: +  - cargo build --verbose --features bench +  - cargo test --verbose --features bench +  - cargo bench --verbose --features bench +  - cargo clean +  - cargo build --verbose +  - cargo test --verbose +  - rustdoc --test README.md -L target/debug -L target/debug/deps +  - cargo doc +after_success: | +  [ $TRAVIS_BRANCH = master ] && +  [ $TRAVIS_PULL_REQUEST = false ] && +  echo '<meta http-equiv=refresh content=0;url=unicode_xid/index.html>' > target/doc/index.html && +  pip install ghp-import --user $USER && +  $HOME/.local/bin/ghp-import -n target/doc && +  git push -qf https://${TOKEN}@github.com/${TRAVIS_REPO_SLUG}.git gh-pages +env: +  global: +    secure: gTlge+/OQlVkV0R+RThWXeN0aknmS7iUTPBMYKJyRdLz7T2vubw3w80a2CVE87JlpV87A5cVGD+LgR+AhYrhKtvqHb1brMDd99gylBBi2DfV7YapDSwSCuFgVR+FjZfJRcXBtI8po5urUZ84V0WLzRX8SyWqWgoD3oCkSL3Wp3w= +notifications: +  email: +    on_success: never diff --git a/unicode-xid/COPYRIGHT b/unicode-xid/COPYRIGHT new file mode 100644 index 0000000..b286ec1 --- /dev/null +++ b/unicode-xid/COPYRIGHT @@ -0,0 +1,7 @@ +Licensed under the Apache License, Version 2.0 +<LICENSE-APACHE or +http://www.apache.org/licenses/LICENSE-2.0> or the MIT +license <LICENSE-MIT or http://opensource.org/licenses/MIT>, +at your option. All files in the project carrying such +notice may not be copied, modified, or distributed except +according to those terms. diff --git a/unicode-xid/Cargo.toml b/unicode-xid/Cargo.toml new file mode 100644 index 0000000..e813762 --- /dev/null +++ b/unicode-xid/Cargo.toml @@ -0,0 +1,28 @@ +[package] + +name = "unicode-xid" +version = "0.2.0" +authors = ["erick.tryzelaar <erick.tryzelaar@gmail.com>", +           "kwantam <kwantam@gmail.com>", +           ] + +homepage = "https://github.com/unicode-rs/unicode-xid" +repository = "https://github.com/unicode-rs/unicode-xid" +documentation = "https://unicode-rs.github.io/unicode-xid" +license = "MIT OR Apache-2.0" +keywords = ["text", "unicode", "xid"] +readme = "README.md" +description = """ +Determine whether characters have the XID_Start +or XID_Continue properties according to +Unicode Standard Annex #31. +""" +exclude = ["/scripts/*", "/.travis.yml"] + +[badges] +travis-ci = { repository = "unicode-rs/unicode-xid" } + +[features] +default = [] +no_std = [] +bench = [] diff --git a/unicode-xid/LICENSE-APACHE b/unicode-xid/LICENSE-APACHE new file mode 100644 index 0000000..16fe87b --- /dev/null +++ b/unicode-xid/LICENSE-APACHE @@ -0,0 +1,201 @@ +                              Apache License +                        Version 2.0, January 2004 +                     http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + +   "License" shall mean the terms and conditions for use, reproduction, +   and distribution as defined by Sections 1 through 9 of this document. + +   "Licensor" shall mean the copyright owner or entity authorized by +   the copyright owner that is granting the License. + +   "Legal Entity" shall mean the union of the acting entity and all +   other entities that control, are controlled by, or are under common +   control with that entity. For the purposes of this definition, +   "control" means (i) the power, direct or indirect, to cause the +   direction or management of such entity, whether by contract or +   otherwise, or (ii) ownership of fifty percent (50%) or more of the +   outstanding shares, or (iii) beneficial ownership of such entity. + +   "You" (or "Your") shall mean an individual or Legal Entity +   exercising permissions granted by this License. + +   "Source" form shall mean the preferred form for making modifications, +   including but not limited to software source code, documentation +   source, and configuration files. + +   "Object" form shall mean any form resulting from mechanical +   transformation or translation of a Source form, including but +   not limited to compiled object code, generated documentation, +   and conversions to other media types. + +   "Work" shall mean the work of authorship, whether in Source or +   Object form, made available under the License, as indicated by a +   copyright notice that is included in or attached to the work +   (an example is provided in the Appendix below). + +   "Derivative Works" shall mean any work, whether in Source or Object +   form, that is based on (or derived from) the Work and for which the +   editorial revisions, annotations, elaborations, or other modifications +   represent, as a whole, an original work of authorship. For the purposes +   of this License, Derivative Works shall not include works that remain +   separable from, or merely link (or bind by name) to the interfaces of, +   the Work and Derivative Works thereof. + +   "Contribution" shall mean any work of authorship, including +   the original version of the Work and any modifications or additions +   to that Work or Derivative Works thereof, that is intentionally +   submitted to Licensor for inclusion in the Work by the copyright owner +   or by an individual or Legal Entity authorized to submit on behalf of +   the copyright owner. For the purposes of this definition, "submitted" +   means any form of electronic, verbal, or written communication sent +   to the Licensor or its representatives, including but not limited to +   communication on electronic mailing lists, source code control systems, +   and issue tracking systems that are managed by, or on behalf of, the +   Licensor for the purpose of discussing and improving the Work, but +   excluding communication that is conspicuously marked or otherwise +   designated in writing by the copyright owner as "Not a Contribution." + +   "Contributor" shall mean Licensor and any individual or Legal Entity +   on behalf of whom a Contribution has been received by Licensor and +   subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of +   this License, each Contributor hereby grants to You a perpetual, +   worldwide, non-exclusive, no-charge, royalty-free, irrevocable +   copyright license to reproduce, prepare Derivative Works of, +   publicly display, publicly perform, sublicense, and distribute the +   Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of +   this License, each Contributor hereby grants to You a perpetual, +   worldwide, non-exclusive, no-charge, royalty-free, irrevocable +   (except as stated in this section) patent license to make, have made, +   use, offer to sell, sell, import, and otherwise transfer the Work, +   where such license applies only to those patent claims licensable +   by such Contributor that are necessarily infringed by their +   Contribution(s) alone or by combination of their Contribution(s) +   with the Work to which such Contribution(s) was submitted. If You +   institute patent litigation against any entity (including a +   cross-claim or counterclaim in a lawsuit) alleging that the Work +   or a Contribution incorporated within the Work constitutes direct +   or contributory patent infringement, then any patent licenses +   granted to You under this License for that Work shall terminate +   as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the +   Work or Derivative Works thereof in any medium, with or without +   modifications, and in Source or Object form, provided that You +   meet the following conditions: + +   (a) You must give any other recipients of the Work or +       Derivative Works a copy of this License; and + +   (b) You must cause any modified files to carry prominent notices +       stating that You changed the files; and + +   (c) You must retain, in the Source form of any Derivative Works +       that You distribute, all copyright, patent, trademark, and +       attribution notices from the Source form of the Work, +       excluding those notices that do not pertain to any part of +       the Derivative Works; and + +   (d) If the Work includes a "NOTICE" text file as part of its +       distribution, then any Derivative Works that You distribute must +       include a readable copy of the attribution notices contained +       within such NOTICE file, excluding those notices that do not +       pertain to any part of the Derivative Works, in at least one +       of the following places: within a NOTICE text file distributed +       as part of the Derivative Works; within the Source form or +       documentation, if provided along with the Derivative Works; or, +       within a display generated by the Derivative Works, if and +       wherever such third-party notices normally appear. The contents +       of the NOTICE file are for informational purposes only and +       do not modify the License. You may add Your own attribution +       notices within Derivative Works that You distribute, alongside +       or as an addendum to the NOTICE text from the Work, provided +       that such additional attribution notices cannot be construed +       as modifying the License. + +   You may add Your own copyright statement to Your modifications and +   may provide additional or different license terms and conditions +   for use, reproduction, or distribution of Your modifications, or +   for any such Derivative Works as a whole, provided Your use, +   reproduction, and distribution of the Work otherwise complies with +   the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, +   any Contribution intentionally submitted for inclusion in the Work +   by You to the Licensor shall be under the terms and conditions of +   this License, without any additional terms or conditions. +   Notwithstanding the above, nothing herein shall supersede or modify +   the terms of any separate license agreement you may have executed +   with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade +   names, trademarks, service marks, or product names of the Licensor, +   except as required for reasonable and customary use in describing the +   origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or +   agreed to in writing, Licensor provides the Work (and each +   Contributor provides its Contributions) on an "AS IS" BASIS, +   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +   implied, including, without limitation, any warranties or conditions +   of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A +   PARTICULAR PURPOSE. You are solely responsible for determining the +   appropriateness of using or redistributing the Work and assume any +   risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, +   whether in tort (including negligence), contract, or otherwise, +   unless required by applicable law (such as deliberate and grossly +   negligent acts) or agreed to in writing, shall any Contributor be +   liable to You for damages, including any direct, indirect, special, +   incidental, or consequential damages of any character arising as a +   result of this License or out of the use or inability to use the +   Work (including but not limited to damages for loss of goodwill, +   work stoppage, computer failure or malfunction, or any and all +   other commercial damages or losses), even if such Contributor +   has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing +   the Work or Derivative Works thereof, You may choose to offer, +   and charge a fee for, acceptance of support, warranty, indemnity, +   or other liability obligations and/or rights consistent with this +   License. However, in accepting such obligations, You may act only +   on Your own behalf and on Your sole responsibility, not on behalf +   of any other Contributor, and only if You agree to indemnify, +   defend, and hold each Contributor harmless for any liability +   incurred by, or claims asserted against, such Contributor by reason +   of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work. + +   To apply the Apache License to your work, attach the following +   boilerplate notice, with the fields enclosed by brackets "[]" +   replaced with your own identifying information. (Don't include +   the brackets!)  The text should be enclosed in the appropriate +   comment syntax for the file format. We also recommend that a +   file or class name and description of purpose be included on the +   same "printed page" as the copyright notice for easier +   identification within third-party archives. + +Copyright [yyyy] [name of copyright owner] + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +	http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/unicode-xid/LICENSE-MIT b/unicode-xid/LICENSE-MIT new file mode 100644 index 0000000..e69282e --- /dev/null +++ b/unicode-xid/LICENSE-MIT @@ -0,0 +1,25 @@ +Copyright (c) 2015 The Rust Project Developers + +Permission is hereby granted, free of charge, to any +person obtaining a copy of this software and associated +documentation files (the "Software"), to deal in the +Software without restriction, including without +limitation the rights to use, copy, modify, merge, +publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software +is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice +shall be included in all copies or substantial portions +of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. diff --git a/unicode-xid/README.md b/unicode-xid/README.md new file mode 100644 index 0000000..3c1ef2a --- /dev/null +++ b/unicode-xid/README.md @@ -0,0 +1,44 @@ +# unicode-xid + +Determine if a `char` is a valid identifier for a parser and/or lexer according to +[Unicode Standard Annex #31](http://www.unicode.org/reports/tr31/) rules. + +[](https://travis-ci.org/unicode-rs/unicode-xid) + +[Documentation](https://unicode-rs.github.io/unicode-xid/unicode_xid/index.html) + +```rust +extern crate unicode_xid; + +use unicode_xid::UnicodeXID; + +fn main() { +    let ch = 'a'; +    println!("Is {} a valid start of an identifier? {}", ch, UnicodeXID::is_xid_start(ch)); +} +``` + +# features + +unicode-xid supports a `no_std` feature. This eliminates dependence +on std, and instead uses equivalent functions from core. + +# crates.io + +You can use this package in your project by adding the following +to your `Cargo.toml`: + +```toml +[dependencies] +unicode-xid = "0.1.0" +``` + +# changelog + +## 0.2.0 + +- Update to Unicode 12.1.0. + +## 0.1.0 + +- Initial release. diff --git a/unicode-xid/scripts/unicode.py b/unicode-xid/scripts/unicode.py new file mode 100755 index 0000000..393f901 --- /dev/null +++ b/unicode-xid/scripts/unicode.py @@ -0,0 +1,187 @@ +#!/usr/bin/env python +# +# Copyright 2011-2015 The Rust Project Developers. See the COPYRIGHT +# file at the top-level directory of this distribution and at +# http://rust-lang.org/COPYRIGHT. +# +# Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or +# http://www.apache.org/licenses/LICENSE-2.0> or the MIT license +# <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your +# option. This file may not be copied, modified, or distributed +# except according to those terms. + +# This script uses the following Unicode tables: +# - DerivedCoreProperties.txt +# - ReadMe.txt +# +# Since this should not require frequent updates, we just store this +# out-of-line and check the unicode.rs file into git. + +import fileinput, re, os, sys + +preamble = '''// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or +// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +// NOTE: The following code was generated by "scripts/unicode.py", do not edit directly + +#![allow(missing_docs, non_upper_case_globals, non_snake_case)] +''' + +def fetch(f): +    if not os.path.exists(os.path.basename(f)): +        os.system("curl -O http://www.unicode.org/Public/UNIDATA/%s" +                  % f) + +    if not os.path.exists(os.path.basename(f)): +        sys.stderr.write("cannot load %s" % f) +        exit(1) + +def group_cat(cat): +    cat_out = [] +    letters = sorted(set(cat)) +    cur_start = letters.pop(0) +    cur_end = cur_start +    for letter in letters: +        assert letter > cur_end, \ +            "cur_end: %s, letter: %s" % (hex(cur_end), hex(letter)) +        if letter == cur_end + 1: +            cur_end = letter +        else: +            cat_out.append((cur_start, cur_end)) +            cur_start = cur_end = letter +    cat_out.append((cur_start, cur_end)) +    return cat_out + +def ungroup_cat(cat): +    cat_out = [] +    for (lo, hi) in cat: +        while lo <= hi: +            cat_out.append(lo) +            lo += 1 +    return cat_out + +def format_table_content(f, content, indent): +    line = " "*indent +    first = True +    for chunk in content.split(","): +        if len(line) + len(chunk) < 98: +            if first: +                line += chunk +            else: +                line += ", " + chunk +            first = False +        else: +            f.write(line + ",\n") +            line = " "*indent + chunk +    f.write(line) + +def load_properties(f, interestingprops): +    fetch(f) +    props = {} +    re1 = re.compile("^ *([0-9A-F]+) *; *(\w+)") +    re2 = re.compile("^ *([0-9A-F]+)\.\.([0-9A-F]+) *; *(\w+)") + +    for line in fileinput.input(os.path.basename(f)): +        prop = None +        d_lo = 0 +        d_hi = 0 +        m = re1.match(line) +        if m: +            d_lo = m.group(1) +            d_hi = m.group(1) +            prop = m.group(2) +        else: +            m = re2.match(line) +            if m: +                d_lo = m.group(1) +                d_hi = m.group(2) +                prop = m.group(3) +            else: +                continue +        if interestingprops and prop not in interestingprops: +            continue +        d_lo = int(d_lo, 16) +        d_hi = int(d_hi, 16) +        if prop not in props: +            props[prop] = [] +        props[prop].append((d_lo, d_hi)) + +    # optimize if possible +    for prop in props: +        props[prop] = group_cat(ungroup_cat(props[prop])) + +    return props + +def escape_char(c): +    return "'\\u{%x}'" % c + +def emit_bsearch_range_table(f): +    f.write(""" +fn bsearch_range_table(c: char, r: &[(char,char)]) -> bool { +    use core::cmp::Ordering::{Equal, Less, Greater}; + +    r.binary_search_by(|&(lo,hi)| { +        if lo <= c && c <= hi { Equal } +        else if hi < c { Less } +        else { Greater } +    }).is_ok() +}\n +""") + +def emit_table(f, name, t_data, t_type = "&[(char, char)]", is_pub=True, +        pfun=lambda x: "(%s,%s)" % (escape_char(x[0]), escape_char(x[1])), is_const=True): +    pub_string = "const" +    if not is_const: +        pub_string = "let" +    if is_pub: +        pub_string = "pub " + pub_string +    f.write("    %s %s: %s = &[\n" % (pub_string, name, t_type)) +    data = "" +    first = True +    for dat in t_data: +        if not first: +            data += "," +        first = False +        data += pfun(dat) +    format_table_content(f, data, 8) +    f.write("\n    ];\n\n") + +def emit_property_module(f, mod, tbl, emit): +    f.write("pub mod %s {\n" % mod) +    for cat in sorted(emit): +        emit_table(f, "%s_table" % cat, tbl[cat]) +        f.write("    pub fn %s(c: char) -> bool {\n" % cat) +        f.write("        super::bsearch_range_table(c, %s_table)\n" % cat) +        f.write("    }\n\n") +    f.write("}\n\n") + +if __name__ == "__main__": +    r = "tables.rs" +    if os.path.exists(r): +        os.remove(r) +    with open(r, "w") as rf: +        # write the file's preamble +        rf.write(preamble) + +        # download and parse all the data +        fetch("ReadMe.txt") +        with open("ReadMe.txt") as readme: +            pattern = "for Version (\d+)\.(\d+)\.(\d+) of the Unicode" +            unicode_version = re.search(pattern, readme.read()).groups() +        rf.write(""" +/// The version of [Unicode](http://www.unicode.org/) +/// that this version of unicode-xid is based on. +pub const UNICODE_VERSION: (u64, u64, u64) = (%s, %s, %s); +""" % unicode_version) +        emit_bsearch_range_table(rf) + +        want_derived = ["XID_Start", "XID_Continue"] +        derived = load_properties("DerivedCoreProperties.txt", want_derived) +        emit_property_module(rf, "derived_property", derived, want_derived) diff --git a/unicode-xid/src/lib.rs b/unicode-xid/src/lib.rs new file mode 100644 index 0000000..7dd95ba --- /dev/null +++ b/unicode-xid/src/lib.rs @@ -0,0 +1,87 @@ +// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or +// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +//! Determine if a `char` is a valid identifier for a parser and/or lexer according to +//! [Unicode Standard Annex #31](http://www.unicode.org/reports/tr31/) rules. +//! +//! ```rust +//! extern crate unicode_xid; +//! +//! use unicode_xid::UnicodeXID; +//! +//! fn main() { +//!     let ch = 'a'; +//!     println!("Is {} a valid start of an identifier? {}", ch, UnicodeXID::is_xid_start(ch)); +//! } +//! ``` +//! +//! # features +//! +//! unicode-xid supports a `no_std` feature. This eliminates dependence +//! on std, and instead uses equivalent functions from core. +//! +//! # crates.io +//! +//! You can use this package in your project by adding the following +//! to your `Cargo.toml`: +//! +//! ```toml +//! [dependencies] +//! unicode-xid = "0.0.4" +//! ``` + +#![deny(missing_docs, unsafe_code)] +#![doc(html_logo_url = "https://unicode-rs.github.io/unicode-rs_sm.png", +       html_favicon_url = "https://unicode-rs.github.io/unicode-rs_sm.png")] + +#![no_std] +#![cfg_attr(feature = "bench", feature(test, unicode_internals))] + +#[cfg(test)] +#[macro_use] +extern crate std; + +#[cfg(feature = "bench")] +extern crate test; + +use tables::derived_property; +pub use tables::UNICODE_VERSION; + +mod tables; + +#[cfg(test)] +mod tests; + +/// Methods for determining if a character is a valid identifier character. +pub trait UnicodeXID { +    /// Returns whether the specified character satisfies the 'XID_Start' +    /// Unicode property. +    /// +    /// 'XID_Start' is a Unicode Derived Property specified in +    /// [UAX #31](http://unicode.org/reports/tr31/#NFKC_Modifications), +    /// mostly similar to ID_Start but modified for closure under NFKx. +    fn is_xid_start(self) -> bool; + +    /// Returns whether the specified `char` satisfies the 'XID_Continue' +    /// Unicode property. +    /// +    /// 'XID_Continue' is a Unicode Derived Property specified in +    /// [UAX #31](http://unicode.org/reports/tr31/#NFKC_Modifications), +    /// mostly similar to 'ID_Continue' but modified for closure under NFKx. +    fn is_xid_continue(self) -> bool; +} + +impl UnicodeXID for char { +    #[inline] +    fn is_xid_start(self) -> bool { derived_property::XID_Start(self) } + +    #[inline] +    fn is_xid_continue(self) -> bool { derived_property::XID_Continue(self) } +} diff --git a/unicode-xid/src/tables.rs b/unicode-xid/src/tables.rs new file mode 100644 index 0000000..edd51d0 --- /dev/null +++ b/unicode-xid/src/tables.rs @@ -0,0 +1,451 @@ +// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or +// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +// NOTE: The following code was generated by "scripts/unicode.py", do not edit directly + +#![allow(missing_docs, non_upper_case_globals, non_snake_case)] + +/// The version of [Unicode](http://www.unicode.org/) +/// that this version of unicode-xid is based on. +pub const UNICODE_VERSION: (u64, u64, u64) = (12, 1, 0); + +fn bsearch_range_table(c: char, r: &[(char,char)]) -> bool { +    use core::cmp::Ordering::{Equal, Less, Greater}; + +    r.binary_search_by(|&(lo,hi)| { +        if lo <= c && c <= hi { Equal } +        else if hi < c { Less } +        else { Greater } +    }).is_ok() +} + +pub mod derived_property { +    pub const XID_Continue_table: &[(char, char)] = &[ +        ('\u{30}', '\u{39}'), ('\u{41}', '\u{5a}'), ('\u{5f}', '\u{5f}'), ('\u{61}', '\u{7a}'), +        ('\u{aa}', '\u{aa}'), ('\u{b5}', '\u{b5}'), ('\u{b7}', '\u{b7}'), ('\u{ba}', '\u{ba}'), +        ('\u{c0}', '\u{d6}'), ('\u{d8}', '\u{f6}'), ('\u{f8}', '\u{2c1}'), ('\u{2c6}', '\u{2d1}'), +        ('\u{2e0}', '\u{2e4}'), ('\u{2ec}', '\u{2ec}'), ('\u{2ee}', '\u{2ee}'), ('\u{300}', +        '\u{374}'), ('\u{376}', '\u{377}'), ('\u{37b}', '\u{37d}'), ('\u{37f}', '\u{37f}'), +        ('\u{386}', '\u{38a}'), ('\u{38c}', '\u{38c}'), ('\u{38e}', '\u{3a1}'), ('\u{3a3}', +        '\u{3f5}'), ('\u{3f7}', '\u{481}'), ('\u{483}', '\u{487}'), ('\u{48a}', '\u{52f}'), +        ('\u{531}', '\u{556}'), ('\u{559}', '\u{559}'), ('\u{560}', '\u{588}'), ('\u{591}', +        '\u{5bd}'), ('\u{5bf}', '\u{5bf}'), ('\u{5c1}', '\u{5c2}'), ('\u{5c4}', '\u{5c5}'), +        ('\u{5c7}', '\u{5c7}'), ('\u{5d0}', '\u{5ea}'), ('\u{5ef}', '\u{5f2}'), ('\u{610}', +        '\u{61a}'), ('\u{620}', '\u{669}'), ('\u{66e}', '\u{6d3}'), ('\u{6d5}', '\u{6dc}'), +        ('\u{6df}', '\u{6e8}'), ('\u{6ea}', '\u{6fc}'), ('\u{6ff}', '\u{6ff}'), ('\u{710}', +        '\u{74a}'), ('\u{74d}', '\u{7b1}'), ('\u{7c0}', '\u{7f5}'), ('\u{7fa}', '\u{7fa}'), +        ('\u{7fd}', '\u{7fd}'), ('\u{800}', '\u{82d}'), ('\u{840}', '\u{85b}'), ('\u{860}', +        '\u{86a}'), ('\u{8a0}', '\u{8b4}'), ('\u{8b6}', '\u{8bd}'), ('\u{8d3}', '\u{8e1}'), +        ('\u{8e3}', '\u{963}'), ('\u{966}', '\u{96f}'), ('\u{971}', '\u{983}'), ('\u{985}', +        '\u{98c}'), ('\u{98f}', '\u{990}'), ('\u{993}', '\u{9a8}'), ('\u{9aa}', '\u{9b0}'), +        ('\u{9b2}', '\u{9b2}'), ('\u{9b6}', '\u{9b9}'), ('\u{9bc}', '\u{9c4}'), ('\u{9c7}', +        '\u{9c8}'), ('\u{9cb}', '\u{9ce}'), ('\u{9d7}', '\u{9d7}'), ('\u{9dc}', '\u{9dd}'), +        ('\u{9df}', '\u{9e3}'), ('\u{9e6}', '\u{9f1}'), ('\u{9fc}', '\u{9fc}'), ('\u{9fe}', +        '\u{9fe}'), ('\u{a01}', '\u{a03}'), ('\u{a05}', '\u{a0a}'), ('\u{a0f}', '\u{a10}'), +        ('\u{a13}', '\u{a28}'), ('\u{a2a}', '\u{a30}'), ('\u{a32}', '\u{a33}'), ('\u{a35}', +        '\u{a36}'), ('\u{a38}', '\u{a39}'), ('\u{a3c}', '\u{a3c}'), ('\u{a3e}', '\u{a42}'), +        ('\u{a47}', '\u{a48}'), ('\u{a4b}', '\u{a4d}'), ('\u{a51}', '\u{a51}'), ('\u{a59}', +        '\u{a5c}'), ('\u{a5e}', '\u{a5e}'), ('\u{a66}', '\u{a75}'), ('\u{a81}', '\u{a83}'), +        ('\u{a85}', '\u{a8d}'), ('\u{a8f}', '\u{a91}'), ('\u{a93}', '\u{aa8}'), ('\u{aaa}', +        '\u{ab0}'), ('\u{ab2}', '\u{ab3}'), ('\u{ab5}', '\u{ab9}'), ('\u{abc}', '\u{ac5}'), +        ('\u{ac7}', '\u{ac9}'), ('\u{acb}', '\u{acd}'), ('\u{ad0}', '\u{ad0}'), ('\u{ae0}', +        '\u{ae3}'), ('\u{ae6}', '\u{aef}'), ('\u{af9}', '\u{aff}'), ('\u{b01}', '\u{b03}'), +        ('\u{b05}', '\u{b0c}'), ('\u{b0f}', '\u{b10}'), ('\u{b13}', '\u{b28}'), ('\u{b2a}', +        '\u{b30}'), ('\u{b32}', '\u{b33}'), ('\u{b35}', '\u{b39}'), ('\u{b3c}', '\u{b44}'), +        ('\u{b47}', '\u{b48}'), ('\u{b4b}', '\u{b4d}'), ('\u{b56}', '\u{b57}'), ('\u{b5c}', +        '\u{b5d}'), ('\u{b5f}', '\u{b63}'), ('\u{b66}', '\u{b6f}'), ('\u{b71}', '\u{b71}'), +        ('\u{b82}', '\u{b83}'), ('\u{b85}', '\u{b8a}'), ('\u{b8e}', '\u{b90}'), ('\u{b92}', +        '\u{b95}'), ('\u{b99}', '\u{b9a}'), ('\u{b9c}', '\u{b9c}'), ('\u{b9e}', '\u{b9f}'), +        ('\u{ba3}', '\u{ba4}'), ('\u{ba8}', '\u{baa}'), ('\u{bae}', '\u{bb9}'), ('\u{bbe}', +        '\u{bc2}'), ('\u{bc6}', '\u{bc8}'), ('\u{bca}', '\u{bcd}'), ('\u{bd0}', '\u{bd0}'), +        ('\u{bd7}', '\u{bd7}'), ('\u{be6}', '\u{bef}'), ('\u{c00}', '\u{c0c}'), ('\u{c0e}', +        '\u{c10}'), ('\u{c12}', '\u{c28}'), ('\u{c2a}', '\u{c39}'), ('\u{c3d}', '\u{c44}'), +        ('\u{c46}', '\u{c48}'), ('\u{c4a}', '\u{c4d}'), ('\u{c55}', '\u{c56}'), ('\u{c58}', +        '\u{c5a}'), ('\u{c60}', '\u{c63}'), ('\u{c66}', '\u{c6f}'), ('\u{c80}', '\u{c83}'), +        ('\u{c85}', '\u{c8c}'), ('\u{c8e}', '\u{c90}'), ('\u{c92}', '\u{ca8}'), ('\u{caa}', +        '\u{cb3}'), ('\u{cb5}', '\u{cb9}'), ('\u{cbc}', '\u{cc4}'), ('\u{cc6}', '\u{cc8}'), +        ('\u{cca}', '\u{ccd}'), ('\u{cd5}', '\u{cd6}'), ('\u{cde}', '\u{cde}'), ('\u{ce0}', +        '\u{ce3}'), ('\u{ce6}', '\u{cef}'), ('\u{cf1}', '\u{cf2}'), ('\u{d00}', '\u{d03}'), +        ('\u{d05}', '\u{d0c}'), ('\u{d0e}', '\u{d10}'), ('\u{d12}', '\u{d44}'), ('\u{d46}', +        '\u{d48}'), ('\u{d4a}', '\u{d4e}'), ('\u{d54}', '\u{d57}'), ('\u{d5f}', '\u{d63}'), +        ('\u{d66}', '\u{d6f}'), ('\u{d7a}', '\u{d7f}'), ('\u{d82}', '\u{d83}'), ('\u{d85}', +        '\u{d96}'), ('\u{d9a}', '\u{db1}'), ('\u{db3}', '\u{dbb}'), ('\u{dbd}', '\u{dbd}'), +        ('\u{dc0}', '\u{dc6}'), ('\u{dca}', '\u{dca}'), ('\u{dcf}', '\u{dd4}'), ('\u{dd6}', +        '\u{dd6}'), ('\u{dd8}', '\u{ddf}'), ('\u{de6}', '\u{def}'), ('\u{df2}', '\u{df3}'), +        ('\u{e01}', '\u{e3a}'), ('\u{e40}', '\u{e4e}'), ('\u{e50}', '\u{e59}'), ('\u{e81}', +        '\u{e82}'), ('\u{e84}', '\u{e84}'), ('\u{e86}', '\u{e8a}'), ('\u{e8c}', '\u{ea3}'), +        ('\u{ea5}', '\u{ea5}'), ('\u{ea7}', '\u{ebd}'), ('\u{ec0}', '\u{ec4}'), ('\u{ec6}', +        '\u{ec6}'), ('\u{ec8}', '\u{ecd}'), ('\u{ed0}', '\u{ed9}'), ('\u{edc}', '\u{edf}'), +        ('\u{f00}', '\u{f00}'), ('\u{f18}', '\u{f19}'), ('\u{f20}', '\u{f29}'), ('\u{f35}', +        '\u{f35}'), ('\u{f37}', '\u{f37}'), ('\u{f39}', '\u{f39}'), ('\u{f3e}', '\u{f47}'), +        ('\u{f49}', '\u{f6c}'), ('\u{f71}', '\u{f84}'), ('\u{f86}', '\u{f97}'), ('\u{f99}', +        '\u{fbc}'), ('\u{fc6}', '\u{fc6}'), ('\u{1000}', '\u{1049}'), ('\u{1050}', '\u{109d}'), +        ('\u{10a0}', '\u{10c5}'), ('\u{10c7}', '\u{10c7}'), ('\u{10cd}', '\u{10cd}'), ('\u{10d0}', +        '\u{10fa}'), ('\u{10fc}', '\u{1248}'), ('\u{124a}', '\u{124d}'), ('\u{1250}', '\u{1256}'), +        ('\u{1258}', '\u{1258}'), ('\u{125a}', '\u{125d}'), ('\u{1260}', '\u{1288}'), ('\u{128a}', +        '\u{128d}'), ('\u{1290}', '\u{12b0}'), ('\u{12b2}', '\u{12b5}'), ('\u{12b8}', '\u{12be}'), +        ('\u{12c0}', '\u{12c0}'), ('\u{12c2}', '\u{12c5}'), ('\u{12c8}', '\u{12d6}'), ('\u{12d8}', +        '\u{1310}'), ('\u{1312}', '\u{1315}'), ('\u{1318}', '\u{135a}'), ('\u{135d}', '\u{135f}'), +        ('\u{1369}', '\u{1371}'), ('\u{1380}', '\u{138f}'), ('\u{13a0}', '\u{13f5}'), ('\u{13f8}', +        '\u{13fd}'), ('\u{1401}', '\u{166c}'), ('\u{166f}', '\u{167f}'), ('\u{1681}', '\u{169a}'), +        ('\u{16a0}', '\u{16ea}'), ('\u{16ee}', '\u{16f8}'), ('\u{1700}', '\u{170c}'), ('\u{170e}', +        '\u{1714}'), ('\u{1720}', '\u{1734}'), ('\u{1740}', '\u{1753}'), ('\u{1760}', '\u{176c}'), +        ('\u{176e}', '\u{1770}'), ('\u{1772}', '\u{1773}'), ('\u{1780}', '\u{17d3}'), ('\u{17d7}', +        '\u{17d7}'), ('\u{17dc}', '\u{17dd}'), ('\u{17e0}', '\u{17e9}'), ('\u{180b}', '\u{180d}'), +        ('\u{1810}', '\u{1819}'), ('\u{1820}', '\u{1878}'), ('\u{1880}', '\u{18aa}'), ('\u{18b0}', +        '\u{18f5}'), ('\u{1900}', '\u{191e}'), ('\u{1920}', '\u{192b}'), ('\u{1930}', '\u{193b}'), +        ('\u{1946}', '\u{196d}'), ('\u{1970}', '\u{1974}'), ('\u{1980}', '\u{19ab}'), ('\u{19b0}', +        '\u{19c9}'), ('\u{19d0}', '\u{19da}'), ('\u{1a00}', '\u{1a1b}'), ('\u{1a20}', '\u{1a5e}'), +        ('\u{1a60}', '\u{1a7c}'), ('\u{1a7f}', '\u{1a89}'), ('\u{1a90}', '\u{1a99}'), ('\u{1aa7}', +        '\u{1aa7}'), ('\u{1ab0}', '\u{1abd}'), ('\u{1b00}', '\u{1b4b}'), ('\u{1b50}', '\u{1b59}'), +        ('\u{1b6b}', '\u{1b73}'), ('\u{1b80}', '\u{1bf3}'), ('\u{1c00}', '\u{1c37}'), ('\u{1c40}', +        '\u{1c49}'), ('\u{1c4d}', '\u{1c7d}'), ('\u{1c80}', '\u{1c88}'), ('\u{1c90}', '\u{1cba}'), +        ('\u{1cbd}', '\u{1cbf}'), ('\u{1cd0}', '\u{1cd2}'), ('\u{1cd4}', '\u{1cfa}'), ('\u{1d00}', +        '\u{1df9}'), ('\u{1dfb}', '\u{1f15}'), ('\u{1f18}', '\u{1f1d}'), ('\u{1f20}', '\u{1f45}'), +        ('\u{1f48}', '\u{1f4d}'), ('\u{1f50}', '\u{1f57}'), ('\u{1f59}', '\u{1f59}'), ('\u{1f5b}', +        '\u{1f5b}'), ('\u{1f5d}', '\u{1f5d}'), ('\u{1f5f}', '\u{1f7d}'), ('\u{1f80}', '\u{1fb4}'), +        ('\u{1fb6}', '\u{1fbc}'), ('\u{1fbe}', '\u{1fbe}'), ('\u{1fc2}', '\u{1fc4}'), ('\u{1fc6}', +        '\u{1fcc}'), ('\u{1fd0}', '\u{1fd3}'), ('\u{1fd6}', '\u{1fdb}'), ('\u{1fe0}', '\u{1fec}'), +        ('\u{1ff2}', '\u{1ff4}'), ('\u{1ff6}', '\u{1ffc}'), ('\u{203f}', '\u{2040}'), ('\u{2054}', +        '\u{2054}'), ('\u{2071}', '\u{2071}'), ('\u{207f}', '\u{207f}'), ('\u{2090}', '\u{209c}'), +        ('\u{20d0}', '\u{20dc}'), ('\u{20e1}', '\u{20e1}'), ('\u{20e5}', '\u{20f0}'), ('\u{2102}', +        '\u{2102}'), ('\u{2107}', '\u{2107}'), ('\u{210a}', '\u{2113}'), ('\u{2115}', '\u{2115}'), +        ('\u{2118}', '\u{211d}'), ('\u{2124}', '\u{2124}'), ('\u{2126}', '\u{2126}'), ('\u{2128}', +        '\u{2128}'), ('\u{212a}', '\u{2139}'), ('\u{213c}', '\u{213f}'), ('\u{2145}', '\u{2149}'), +        ('\u{214e}', '\u{214e}'), ('\u{2160}', '\u{2188}'), ('\u{2c00}', '\u{2c2e}'), ('\u{2c30}', +        '\u{2c5e}'), ('\u{2c60}', '\u{2ce4}'), ('\u{2ceb}', '\u{2cf3}'), ('\u{2d00}', '\u{2d25}'), +        ('\u{2d27}', '\u{2d27}'), ('\u{2d2d}', '\u{2d2d}'), ('\u{2d30}', '\u{2d67}'), ('\u{2d6f}', +        '\u{2d6f}'), ('\u{2d7f}', '\u{2d96}'), ('\u{2da0}', '\u{2da6}'), ('\u{2da8}', '\u{2dae}'), +        ('\u{2db0}', '\u{2db6}'), ('\u{2db8}', '\u{2dbe}'), ('\u{2dc0}', '\u{2dc6}'), ('\u{2dc8}', +        '\u{2dce}'), ('\u{2dd0}', '\u{2dd6}'), ('\u{2dd8}', '\u{2dde}'), ('\u{2de0}', '\u{2dff}'), +        ('\u{3005}', '\u{3007}'), ('\u{3021}', '\u{302f}'), ('\u{3031}', '\u{3035}'), ('\u{3038}', +        '\u{303c}'), ('\u{3041}', '\u{3096}'), ('\u{3099}', '\u{309a}'), ('\u{309d}', '\u{309f}'), +        ('\u{30a1}', '\u{30fa}'), ('\u{30fc}', '\u{30ff}'), ('\u{3105}', '\u{312f}'), ('\u{3131}', +        '\u{318e}'), ('\u{31a0}', '\u{31ba}'), ('\u{31f0}', '\u{31ff}'), ('\u{3400}', '\u{4db5}'), +        ('\u{4e00}', '\u{9fef}'), ('\u{a000}', '\u{a48c}'), ('\u{a4d0}', '\u{a4fd}'), ('\u{a500}', +        '\u{a60c}'), ('\u{a610}', '\u{a62b}'), ('\u{a640}', '\u{a66f}'), ('\u{a674}', '\u{a67d}'), +        ('\u{a67f}', '\u{a6f1}'), ('\u{a717}', '\u{a71f}'), ('\u{a722}', '\u{a788}'), ('\u{a78b}', +        '\u{a7bf}'), ('\u{a7c2}', '\u{a7c6}'), ('\u{a7f7}', '\u{a827}'), ('\u{a840}', '\u{a873}'), +        ('\u{a880}', '\u{a8c5}'), ('\u{a8d0}', '\u{a8d9}'), ('\u{a8e0}', '\u{a8f7}'), ('\u{a8fb}', +        '\u{a8fb}'), ('\u{a8fd}', '\u{a92d}'), ('\u{a930}', '\u{a953}'), ('\u{a960}', '\u{a97c}'), +        ('\u{a980}', '\u{a9c0}'), ('\u{a9cf}', '\u{a9d9}'), ('\u{a9e0}', '\u{a9fe}'), ('\u{aa00}', +        '\u{aa36}'), ('\u{aa40}', '\u{aa4d}'), ('\u{aa50}', '\u{aa59}'), ('\u{aa60}', '\u{aa76}'), +        ('\u{aa7a}', '\u{aac2}'), ('\u{aadb}', '\u{aadd}'), ('\u{aae0}', '\u{aaef}'), ('\u{aaf2}', +        '\u{aaf6}'), ('\u{ab01}', '\u{ab06}'), ('\u{ab09}', '\u{ab0e}'), ('\u{ab11}', '\u{ab16}'), +        ('\u{ab20}', '\u{ab26}'), ('\u{ab28}', '\u{ab2e}'), ('\u{ab30}', '\u{ab5a}'), ('\u{ab5c}', +        '\u{ab67}'), ('\u{ab70}', '\u{abea}'), ('\u{abec}', '\u{abed}'), ('\u{abf0}', '\u{abf9}'), +        ('\u{ac00}', '\u{d7a3}'), ('\u{d7b0}', '\u{d7c6}'), ('\u{d7cb}', '\u{d7fb}'), ('\u{f900}', +        '\u{fa6d}'), ('\u{fa70}', '\u{fad9}'), ('\u{fb00}', '\u{fb06}'), ('\u{fb13}', '\u{fb17}'), +        ('\u{fb1d}', '\u{fb28}'), ('\u{fb2a}', '\u{fb36}'), ('\u{fb38}', '\u{fb3c}'), ('\u{fb3e}', +        '\u{fb3e}'), ('\u{fb40}', '\u{fb41}'), ('\u{fb43}', '\u{fb44}'), ('\u{fb46}', '\u{fbb1}'), +        ('\u{fbd3}', '\u{fc5d}'), ('\u{fc64}', '\u{fd3d}'), ('\u{fd50}', '\u{fd8f}'), ('\u{fd92}', +        '\u{fdc7}'), ('\u{fdf0}', '\u{fdf9}'), ('\u{fe00}', '\u{fe0f}'), ('\u{fe20}', '\u{fe2f}'), +        ('\u{fe33}', '\u{fe34}'), ('\u{fe4d}', '\u{fe4f}'), ('\u{fe71}', '\u{fe71}'), ('\u{fe73}', +        '\u{fe73}'), ('\u{fe77}', '\u{fe77}'), ('\u{fe79}', '\u{fe79}'), ('\u{fe7b}', '\u{fe7b}'), +        ('\u{fe7d}', '\u{fe7d}'), ('\u{fe7f}', '\u{fefc}'), ('\u{ff10}', '\u{ff19}'), ('\u{ff21}', +        '\u{ff3a}'), ('\u{ff3f}', '\u{ff3f}'), ('\u{ff41}', '\u{ff5a}'), ('\u{ff66}', '\u{ffbe}'), +        ('\u{ffc2}', '\u{ffc7}'), ('\u{ffca}', '\u{ffcf}'), ('\u{ffd2}', '\u{ffd7}'), ('\u{ffda}', +        '\u{ffdc}'), ('\u{10000}', '\u{1000b}'), ('\u{1000d}', '\u{10026}'), ('\u{10028}', +        '\u{1003a}'), ('\u{1003c}', '\u{1003d}'), ('\u{1003f}', '\u{1004d}'), ('\u{10050}', +        '\u{1005d}'), ('\u{10080}', '\u{100fa}'), ('\u{10140}', '\u{10174}'), ('\u{101fd}', +        '\u{101fd}'), ('\u{10280}', '\u{1029c}'), ('\u{102a0}', '\u{102d0}'), ('\u{102e0}', +        '\u{102e0}'), ('\u{10300}', '\u{1031f}'), ('\u{1032d}', '\u{1034a}'), ('\u{10350}', +        '\u{1037a}'), ('\u{10380}', '\u{1039d}'), ('\u{103a0}', '\u{103c3}'), ('\u{103c8}', +        '\u{103cf}'), ('\u{103d1}', '\u{103d5}'), ('\u{10400}', '\u{1049d}'), ('\u{104a0}', +        '\u{104a9}'), ('\u{104b0}', '\u{104d3}'), ('\u{104d8}', '\u{104fb}'), ('\u{10500}', +        '\u{10527}'), ('\u{10530}', '\u{10563}'), ('\u{10600}', '\u{10736}'), ('\u{10740}', +        '\u{10755}'), ('\u{10760}', '\u{10767}'), ('\u{10800}', '\u{10805}'), ('\u{10808}', +        '\u{10808}'), ('\u{1080a}', '\u{10835}'), ('\u{10837}', '\u{10838}'), ('\u{1083c}', +        '\u{1083c}'), ('\u{1083f}', '\u{10855}'), ('\u{10860}', '\u{10876}'), ('\u{10880}', +        '\u{1089e}'), ('\u{108e0}', '\u{108f2}'), ('\u{108f4}', '\u{108f5}'), ('\u{10900}', +        '\u{10915}'), ('\u{10920}', '\u{10939}'), ('\u{10980}', '\u{109b7}'), ('\u{109be}', +        '\u{109bf}'), ('\u{10a00}', '\u{10a03}'), ('\u{10a05}', '\u{10a06}'), ('\u{10a0c}', +        '\u{10a13}'), ('\u{10a15}', '\u{10a17}'), ('\u{10a19}', '\u{10a35}'), ('\u{10a38}', +        '\u{10a3a}'), ('\u{10a3f}', '\u{10a3f}'), ('\u{10a60}', '\u{10a7c}'), ('\u{10a80}', +        '\u{10a9c}'), ('\u{10ac0}', '\u{10ac7}'), ('\u{10ac9}', '\u{10ae6}'), ('\u{10b00}', +        '\u{10b35}'), ('\u{10b40}', '\u{10b55}'), ('\u{10b60}', '\u{10b72}'), ('\u{10b80}', +        '\u{10b91}'), ('\u{10c00}', '\u{10c48}'), ('\u{10c80}', '\u{10cb2}'), ('\u{10cc0}', +        '\u{10cf2}'), ('\u{10d00}', '\u{10d27}'), ('\u{10d30}', '\u{10d39}'), ('\u{10f00}', +        '\u{10f1c}'), ('\u{10f27}', '\u{10f27}'), ('\u{10f30}', '\u{10f50}'), ('\u{10fe0}', +        '\u{10ff6}'), ('\u{11000}', '\u{11046}'), ('\u{11066}', '\u{1106f}'), ('\u{1107f}', +        '\u{110ba}'), ('\u{110d0}', '\u{110e8}'), ('\u{110f0}', '\u{110f9}'), ('\u{11100}', +        '\u{11134}'), ('\u{11136}', '\u{1113f}'), ('\u{11144}', '\u{11146}'), ('\u{11150}', +        '\u{11173}'), ('\u{11176}', '\u{11176}'), ('\u{11180}', '\u{111c4}'), ('\u{111c9}', +        '\u{111cc}'), ('\u{111d0}', '\u{111da}'), ('\u{111dc}', '\u{111dc}'), ('\u{11200}', +        '\u{11211}'), ('\u{11213}', '\u{11237}'), ('\u{1123e}', '\u{1123e}'), ('\u{11280}', +        '\u{11286}'), ('\u{11288}', '\u{11288}'), ('\u{1128a}', '\u{1128d}'), ('\u{1128f}', +        '\u{1129d}'), ('\u{1129f}', '\u{112a8}'), ('\u{112b0}', '\u{112ea}'), ('\u{112f0}', +        '\u{112f9}'), ('\u{11300}', '\u{11303}'), ('\u{11305}', '\u{1130c}'), ('\u{1130f}', +        '\u{11310}'), ('\u{11313}', '\u{11328}'), ('\u{1132a}', '\u{11330}'), ('\u{11332}', +        '\u{11333}'), ('\u{11335}', '\u{11339}'), ('\u{1133b}', '\u{11344}'), ('\u{11347}', +        '\u{11348}'), ('\u{1134b}', '\u{1134d}'), ('\u{11350}', '\u{11350}'), ('\u{11357}', +        '\u{11357}'), ('\u{1135d}', '\u{11363}'), ('\u{11366}', '\u{1136c}'), ('\u{11370}', +        '\u{11374}'), ('\u{11400}', '\u{1144a}'), ('\u{11450}', '\u{11459}'), ('\u{1145e}', +        '\u{1145f}'), ('\u{11480}', '\u{114c5}'), ('\u{114c7}', '\u{114c7}'), ('\u{114d0}', +        '\u{114d9}'), ('\u{11580}', '\u{115b5}'), ('\u{115b8}', '\u{115c0}'), ('\u{115d8}', +        '\u{115dd}'), ('\u{11600}', '\u{11640}'), ('\u{11644}', '\u{11644}'), ('\u{11650}', +        '\u{11659}'), ('\u{11680}', '\u{116b8}'), ('\u{116c0}', '\u{116c9}'), ('\u{11700}', +        '\u{1171a}'), ('\u{1171d}', '\u{1172b}'), ('\u{11730}', '\u{11739}'), ('\u{11800}', +        '\u{1183a}'), ('\u{118a0}', '\u{118e9}'), ('\u{118ff}', '\u{118ff}'), ('\u{119a0}', +        '\u{119a7}'), ('\u{119aa}', '\u{119d7}'), ('\u{119da}', '\u{119e1}'), ('\u{119e3}', +        '\u{119e4}'), ('\u{11a00}', '\u{11a3e}'), ('\u{11a47}', '\u{11a47}'), ('\u{11a50}', +        '\u{11a99}'), ('\u{11a9d}', '\u{11a9d}'), ('\u{11ac0}', '\u{11af8}'), ('\u{11c00}', +        '\u{11c08}'), ('\u{11c0a}', '\u{11c36}'), ('\u{11c38}', '\u{11c40}'), ('\u{11c50}', +        '\u{11c59}'), ('\u{11c72}', '\u{11c8f}'), ('\u{11c92}', '\u{11ca7}'), ('\u{11ca9}', +        '\u{11cb6}'), ('\u{11d00}', '\u{11d06}'), ('\u{11d08}', '\u{11d09}'), ('\u{11d0b}', +        '\u{11d36}'), ('\u{11d3a}', '\u{11d3a}'), ('\u{11d3c}', '\u{11d3d}'), ('\u{11d3f}', +        '\u{11d47}'), ('\u{11d50}', '\u{11d59}'), ('\u{11d60}', '\u{11d65}'), ('\u{11d67}', +        '\u{11d68}'), ('\u{11d6a}', '\u{11d8e}'), ('\u{11d90}', '\u{11d91}'), ('\u{11d93}', +        '\u{11d98}'), ('\u{11da0}', '\u{11da9}'), ('\u{11ee0}', '\u{11ef6}'), ('\u{12000}', +        '\u{12399}'), ('\u{12400}', '\u{1246e}'), ('\u{12480}', '\u{12543}'), ('\u{13000}', +        '\u{1342e}'), ('\u{14400}', '\u{14646}'), ('\u{16800}', '\u{16a38}'), ('\u{16a40}', +        '\u{16a5e}'), ('\u{16a60}', '\u{16a69}'), ('\u{16ad0}', '\u{16aed}'), ('\u{16af0}', +        '\u{16af4}'), ('\u{16b00}', '\u{16b36}'), ('\u{16b40}', '\u{16b43}'), ('\u{16b50}', +        '\u{16b59}'), ('\u{16b63}', '\u{16b77}'), ('\u{16b7d}', '\u{16b8f}'), ('\u{16e40}', +        '\u{16e7f}'), ('\u{16f00}', '\u{16f4a}'), ('\u{16f4f}', '\u{16f87}'), ('\u{16f8f}', +        '\u{16f9f}'), ('\u{16fe0}', '\u{16fe1}'), ('\u{16fe3}', '\u{16fe3}'), ('\u{17000}', +        '\u{187f7}'), ('\u{18800}', '\u{18af2}'), ('\u{1b000}', '\u{1b11e}'), ('\u{1b150}', +        '\u{1b152}'), ('\u{1b164}', '\u{1b167}'), ('\u{1b170}', '\u{1b2fb}'), ('\u{1bc00}', +        '\u{1bc6a}'), ('\u{1bc70}', '\u{1bc7c}'), ('\u{1bc80}', '\u{1bc88}'), ('\u{1bc90}', +        '\u{1bc99}'), ('\u{1bc9d}', '\u{1bc9e}'), ('\u{1d165}', '\u{1d169}'), ('\u{1d16d}', +        '\u{1d172}'), ('\u{1d17b}', '\u{1d182}'), ('\u{1d185}', '\u{1d18b}'), ('\u{1d1aa}', +        '\u{1d1ad}'), ('\u{1d242}', '\u{1d244}'), ('\u{1d400}', '\u{1d454}'), ('\u{1d456}', +        '\u{1d49c}'), ('\u{1d49e}', '\u{1d49f}'), ('\u{1d4a2}', '\u{1d4a2}'), ('\u{1d4a5}', +        '\u{1d4a6}'), ('\u{1d4a9}', '\u{1d4ac}'), ('\u{1d4ae}', '\u{1d4b9}'), ('\u{1d4bb}', +        '\u{1d4bb}'), ('\u{1d4bd}', '\u{1d4c3}'), ('\u{1d4c5}', '\u{1d505}'), ('\u{1d507}', +        '\u{1d50a}'), ('\u{1d50d}', '\u{1d514}'), ('\u{1d516}', '\u{1d51c}'), ('\u{1d51e}', +        '\u{1d539}'), ('\u{1d53b}', '\u{1d53e}'), ('\u{1d540}', '\u{1d544}'), ('\u{1d546}', +        '\u{1d546}'), ('\u{1d54a}', '\u{1d550}'), ('\u{1d552}', '\u{1d6a5}'), ('\u{1d6a8}', +        '\u{1d6c0}'), ('\u{1d6c2}', '\u{1d6da}'), ('\u{1d6dc}', '\u{1d6fa}'), ('\u{1d6fc}', +        '\u{1d714}'), ('\u{1d716}', '\u{1d734}'), ('\u{1d736}', '\u{1d74e}'), ('\u{1d750}', +        '\u{1d76e}'), ('\u{1d770}', '\u{1d788}'), ('\u{1d78a}', '\u{1d7a8}'), ('\u{1d7aa}', +        '\u{1d7c2}'), ('\u{1d7c4}', '\u{1d7cb}'), ('\u{1d7ce}', '\u{1d7ff}'), ('\u{1da00}', +        '\u{1da36}'), ('\u{1da3b}', '\u{1da6c}'), ('\u{1da75}', '\u{1da75}'), ('\u{1da84}', +        '\u{1da84}'), ('\u{1da9b}', '\u{1da9f}'), ('\u{1daa1}', '\u{1daaf}'), ('\u{1e000}', +        '\u{1e006}'), ('\u{1e008}', '\u{1e018}'), ('\u{1e01b}', '\u{1e021}'), ('\u{1e023}', +        '\u{1e024}'), ('\u{1e026}', '\u{1e02a}'), ('\u{1e100}', '\u{1e12c}'), ('\u{1e130}', +        '\u{1e13d}'), ('\u{1e140}', '\u{1e149}'), ('\u{1e14e}', '\u{1e14e}'), ('\u{1e2c0}', +        '\u{1e2f9}'), ('\u{1e800}', '\u{1e8c4}'), ('\u{1e8d0}', '\u{1e8d6}'), ('\u{1e900}', +        '\u{1e94b}'), ('\u{1e950}', '\u{1e959}'), ('\u{1ee00}', '\u{1ee03}'), ('\u{1ee05}', +        '\u{1ee1f}'), ('\u{1ee21}', '\u{1ee22}'), ('\u{1ee24}', '\u{1ee24}'), ('\u{1ee27}', +        '\u{1ee27}'), ('\u{1ee29}', '\u{1ee32}'), ('\u{1ee34}', '\u{1ee37}'), ('\u{1ee39}', +        '\u{1ee39}'), ('\u{1ee3b}', '\u{1ee3b}'), ('\u{1ee42}', '\u{1ee42}'), ('\u{1ee47}', +        '\u{1ee47}'), ('\u{1ee49}', '\u{1ee49}'), ('\u{1ee4b}', '\u{1ee4b}'), ('\u{1ee4d}', +        '\u{1ee4f}'), ('\u{1ee51}', '\u{1ee52}'), ('\u{1ee54}', '\u{1ee54}'), ('\u{1ee57}', +        '\u{1ee57}'), ('\u{1ee59}', '\u{1ee59}'), ('\u{1ee5b}', '\u{1ee5b}'), ('\u{1ee5d}', +        '\u{1ee5d}'), ('\u{1ee5f}', '\u{1ee5f}'), ('\u{1ee61}', '\u{1ee62}'), ('\u{1ee64}', +        '\u{1ee64}'), ('\u{1ee67}', '\u{1ee6a}'), ('\u{1ee6c}', '\u{1ee72}'), ('\u{1ee74}', +        '\u{1ee77}'), ('\u{1ee79}', '\u{1ee7c}'), ('\u{1ee7e}', '\u{1ee7e}'), ('\u{1ee80}', +        '\u{1ee89}'), ('\u{1ee8b}', '\u{1ee9b}'), ('\u{1eea1}', '\u{1eea3}'), ('\u{1eea5}', +        '\u{1eea9}'), ('\u{1eeab}', '\u{1eebb}'), ('\u{20000}', '\u{2a6d6}'), ('\u{2a700}', +        '\u{2b734}'), ('\u{2b740}', '\u{2b81d}'), ('\u{2b820}', '\u{2cea1}'), ('\u{2ceb0}', +        '\u{2ebe0}'), ('\u{2f800}', '\u{2fa1d}'), ('\u{e0100}', '\u{e01ef}') +    ]; + +    pub fn XID_Continue(c: char) -> bool { +        super::bsearch_range_table(c, XID_Continue_table) +    } + +    pub const XID_Start_table: &[(char, char)] = &[ +        ('\u{41}', '\u{5a}'), ('\u{61}', '\u{7a}'), ('\u{aa}', '\u{aa}'), ('\u{b5}', '\u{b5}'), +        ('\u{ba}', '\u{ba}'), ('\u{c0}', '\u{d6}'), ('\u{d8}', '\u{f6}'), ('\u{f8}', '\u{2c1}'), +        ('\u{2c6}', '\u{2d1}'), ('\u{2e0}', '\u{2e4}'), ('\u{2ec}', '\u{2ec}'), ('\u{2ee}', +        '\u{2ee}'), ('\u{370}', '\u{374}'), ('\u{376}', '\u{377}'), ('\u{37b}', '\u{37d}'), +        ('\u{37f}', '\u{37f}'), ('\u{386}', '\u{386}'), ('\u{388}', '\u{38a}'), ('\u{38c}', +        '\u{38c}'), ('\u{38e}', '\u{3a1}'), ('\u{3a3}', '\u{3f5}'), ('\u{3f7}', '\u{481}'), +        ('\u{48a}', '\u{52f}'), ('\u{531}', '\u{556}'), ('\u{559}', '\u{559}'), ('\u{560}', +        '\u{588}'), ('\u{5d0}', '\u{5ea}'), ('\u{5ef}', '\u{5f2}'), ('\u{620}', '\u{64a}'), +        ('\u{66e}', '\u{66f}'), ('\u{671}', '\u{6d3}'), ('\u{6d5}', '\u{6d5}'), ('\u{6e5}', +        '\u{6e6}'), ('\u{6ee}', '\u{6ef}'), ('\u{6fa}', '\u{6fc}'), ('\u{6ff}', '\u{6ff}'), +        ('\u{710}', '\u{710}'), ('\u{712}', '\u{72f}'), ('\u{74d}', '\u{7a5}'), ('\u{7b1}', +        '\u{7b1}'), ('\u{7ca}', '\u{7ea}'), ('\u{7f4}', '\u{7f5}'), ('\u{7fa}', '\u{7fa}'), +        ('\u{800}', '\u{815}'), ('\u{81a}', '\u{81a}'), ('\u{824}', '\u{824}'), ('\u{828}', +        '\u{828}'), ('\u{840}', '\u{858}'), ('\u{860}', '\u{86a}'), ('\u{8a0}', '\u{8b4}'), +        ('\u{8b6}', '\u{8bd}'), ('\u{904}', '\u{939}'), ('\u{93d}', '\u{93d}'), ('\u{950}', +        '\u{950}'), ('\u{958}', '\u{961}'), ('\u{971}', '\u{980}'), ('\u{985}', '\u{98c}'), +        ('\u{98f}', '\u{990}'), ('\u{993}', '\u{9a8}'), ('\u{9aa}', '\u{9b0}'), ('\u{9b2}', +        '\u{9b2}'), ('\u{9b6}', '\u{9b9}'), ('\u{9bd}', '\u{9bd}'), ('\u{9ce}', '\u{9ce}'), +        ('\u{9dc}', '\u{9dd}'), ('\u{9df}', '\u{9e1}'), ('\u{9f0}', '\u{9f1}'), ('\u{9fc}', +        '\u{9fc}'), ('\u{a05}', '\u{a0a}'), ('\u{a0f}', '\u{a10}'), ('\u{a13}', '\u{a28}'), +        ('\u{a2a}', '\u{a30}'), ('\u{a32}', '\u{a33}'), ('\u{a35}', '\u{a36}'), ('\u{a38}', +        '\u{a39}'), ('\u{a59}', '\u{a5c}'), ('\u{a5e}', '\u{a5e}'), ('\u{a72}', '\u{a74}'), +        ('\u{a85}', '\u{a8d}'), ('\u{a8f}', '\u{a91}'), ('\u{a93}', '\u{aa8}'), ('\u{aaa}', +        '\u{ab0}'), ('\u{ab2}', '\u{ab3}'), ('\u{ab5}', '\u{ab9}'), ('\u{abd}', '\u{abd}'), +        ('\u{ad0}', '\u{ad0}'), ('\u{ae0}', '\u{ae1}'), ('\u{af9}', '\u{af9}'), ('\u{b05}', +        '\u{b0c}'), ('\u{b0f}', '\u{b10}'), ('\u{b13}', '\u{b28}'), ('\u{b2a}', '\u{b30}'), +        ('\u{b32}', '\u{b33}'), ('\u{b35}', '\u{b39}'), ('\u{b3d}', '\u{b3d}'), ('\u{b5c}', +        '\u{b5d}'), ('\u{b5f}', '\u{b61}'), ('\u{b71}', '\u{b71}'), ('\u{b83}', '\u{b83}'), +        ('\u{b85}', '\u{b8a}'), ('\u{b8e}', '\u{b90}'), ('\u{b92}', '\u{b95}'), ('\u{b99}', +        '\u{b9a}'), ('\u{b9c}', '\u{b9c}'), ('\u{b9e}', '\u{b9f}'), ('\u{ba3}', '\u{ba4}'), +        ('\u{ba8}', '\u{baa}'), ('\u{bae}', '\u{bb9}'), ('\u{bd0}', '\u{bd0}'), ('\u{c05}', +        '\u{c0c}'), ('\u{c0e}', '\u{c10}'), ('\u{c12}', '\u{c28}'), ('\u{c2a}', '\u{c39}'), +        ('\u{c3d}', '\u{c3d}'), ('\u{c58}', '\u{c5a}'), ('\u{c60}', '\u{c61}'), ('\u{c80}', +        '\u{c80}'), ('\u{c85}', '\u{c8c}'), ('\u{c8e}', '\u{c90}'), ('\u{c92}', '\u{ca8}'), +        ('\u{caa}', '\u{cb3}'), ('\u{cb5}', '\u{cb9}'), ('\u{cbd}', '\u{cbd}'), ('\u{cde}', +        '\u{cde}'), ('\u{ce0}', '\u{ce1}'), ('\u{cf1}', '\u{cf2}'), ('\u{d05}', '\u{d0c}'), +        ('\u{d0e}', '\u{d10}'), ('\u{d12}', '\u{d3a}'), ('\u{d3d}', '\u{d3d}'), ('\u{d4e}', +        '\u{d4e}'), ('\u{d54}', '\u{d56}'), ('\u{d5f}', '\u{d61}'), ('\u{d7a}', '\u{d7f}'), +        ('\u{d85}', '\u{d96}'), ('\u{d9a}', '\u{db1}'), ('\u{db3}', '\u{dbb}'), ('\u{dbd}', +        '\u{dbd}'), ('\u{dc0}', '\u{dc6}'), ('\u{e01}', '\u{e30}'), ('\u{e32}', '\u{e32}'), +        ('\u{e40}', '\u{e46}'), ('\u{e81}', '\u{e82}'), ('\u{e84}', '\u{e84}'), ('\u{e86}', +        '\u{e8a}'), ('\u{e8c}', '\u{ea3}'), ('\u{ea5}', '\u{ea5}'), ('\u{ea7}', '\u{eb0}'), +        ('\u{eb2}', '\u{eb2}'), ('\u{ebd}', '\u{ebd}'), ('\u{ec0}', '\u{ec4}'), ('\u{ec6}', +        '\u{ec6}'), ('\u{edc}', '\u{edf}'), ('\u{f00}', '\u{f00}'), ('\u{f40}', '\u{f47}'), +        ('\u{f49}', '\u{f6c}'), ('\u{f88}', '\u{f8c}'), ('\u{1000}', '\u{102a}'), ('\u{103f}', +        '\u{103f}'), ('\u{1050}', '\u{1055}'), ('\u{105a}', '\u{105d}'), ('\u{1061}', '\u{1061}'), +        ('\u{1065}', '\u{1066}'), ('\u{106e}', '\u{1070}'), ('\u{1075}', '\u{1081}'), ('\u{108e}', +        '\u{108e}'), ('\u{10a0}', '\u{10c5}'), ('\u{10c7}', '\u{10c7}'), ('\u{10cd}', '\u{10cd}'), +        ('\u{10d0}', '\u{10fa}'), ('\u{10fc}', '\u{1248}'), ('\u{124a}', '\u{124d}'), ('\u{1250}', +        '\u{1256}'), ('\u{1258}', '\u{1258}'), ('\u{125a}', '\u{125d}'), ('\u{1260}', '\u{1288}'), +        ('\u{128a}', '\u{128d}'), ('\u{1290}', '\u{12b0}'), ('\u{12b2}', '\u{12b5}'), ('\u{12b8}', +        '\u{12be}'), ('\u{12c0}', '\u{12c0}'), ('\u{12c2}', '\u{12c5}'), ('\u{12c8}', '\u{12d6}'), +        ('\u{12d8}', '\u{1310}'), ('\u{1312}', '\u{1315}'), ('\u{1318}', '\u{135a}'), ('\u{1380}', +        '\u{138f}'), ('\u{13a0}', '\u{13f5}'), ('\u{13f8}', '\u{13fd}'), ('\u{1401}', '\u{166c}'), +        ('\u{166f}', '\u{167f}'), ('\u{1681}', '\u{169a}'), ('\u{16a0}', '\u{16ea}'), ('\u{16ee}', +        '\u{16f8}'), ('\u{1700}', '\u{170c}'), ('\u{170e}', '\u{1711}'), ('\u{1720}', '\u{1731}'), +        ('\u{1740}', '\u{1751}'), ('\u{1760}', '\u{176c}'), ('\u{176e}', '\u{1770}'), ('\u{1780}', +        '\u{17b3}'), ('\u{17d7}', '\u{17d7}'), ('\u{17dc}', '\u{17dc}'), ('\u{1820}', '\u{1878}'), +        ('\u{1880}', '\u{18a8}'), ('\u{18aa}', '\u{18aa}'), ('\u{18b0}', '\u{18f5}'), ('\u{1900}', +        '\u{191e}'), ('\u{1950}', '\u{196d}'), ('\u{1970}', '\u{1974}'), ('\u{1980}', '\u{19ab}'), +        ('\u{19b0}', '\u{19c9}'), ('\u{1a00}', '\u{1a16}'), ('\u{1a20}', '\u{1a54}'), ('\u{1aa7}', +        '\u{1aa7}'), ('\u{1b05}', '\u{1b33}'), ('\u{1b45}', '\u{1b4b}'), ('\u{1b83}', '\u{1ba0}'), +        ('\u{1bae}', '\u{1baf}'), ('\u{1bba}', '\u{1be5}'), ('\u{1c00}', '\u{1c23}'), ('\u{1c4d}', +        '\u{1c4f}'), ('\u{1c5a}', '\u{1c7d}'), ('\u{1c80}', '\u{1c88}'), ('\u{1c90}', '\u{1cba}'), +        ('\u{1cbd}', '\u{1cbf}'), ('\u{1ce9}', '\u{1cec}'), ('\u{1cee}', '\u{1cf3}'), ('\u{1cf5}', +        '\u{1cf6}'), ('\u{1cfa}', '\u{1cfa}'), ('\u{1d00}', '\u{1dbf}'), ('\u{1e00}', '\u{1f15}'), +        ('\u{1f18}', '\u{1f1d}'), ('\u{1f20}', '\u{1f45}'), ('\u{1f48}', '\u{1f4d}'), ('\u{1f50}', +        '\u{1f57}'), ('\u{1f59}', '\u{1f59}'), ('\u{1f5b}', '\u{1f5b}'), ('\u{1f5d}', '\u{1f5d}'), +        ('\u{1f5f}', '\u{1f7d}'), ('\u{1f80}', '\u{1fb4}'), ('\u{1fb6}', '\u{1fbc}'), ('\u{1fbe}', +        '\u{1fbe}'), ('\u{1fc2}', '\u{1fc4}'), ('\u{1fc6}', '\u{1fcc}'), ('\u{1fd0}', '\u{1fd3}'), +        ('\u{1fd6}', '\u{1fdb}'), ('\u{1fe0}', '\u{1fec}'), ('\u{1ff2}', '\u{1ff4}'), ('\u{1ff6}', +        '\u{1ffc}'), ('\u{2071}', '\u{2071}'), ('\u{207f}', '\u{207f}'), ('\u{2090}', '\u{209c}'), +        ('\u{2102}', '\u{2102}'), ('\u{2107}', '\u{2107}'), ('\u{210a}', '\u{2113}'), ('\u{2115}', +        '\u{2115}'), ('\u{2118}', '\u{211d}'), ('\u{2124}', '\u{2124}'), ('\u{2126}', '\u{2126}'), +        ('\u{2128}', '\u{2128}'), ('\u{212a}', '\u{2139}'), ('\u{213c}', '\u{213f}'), ('\u{2145}', +        '\u{2149}'), ('\u{214e}', '\u{214e}'), ('\u{2160}', '\u{2188}'), ('\u{2c00}', '\u{2c2e}'), +        ('\u{2c30}', '\u{2c5e}'), ('\u{2c60}', '\u{2ce4}'), ('\u{2ceb}', '\u{2cee}'), ('\u{2cf2}', +        '\u{2cf3}'), ('\u{2d00}', '\u{2d25}'), ('\u{2d27}', '\u{2d27}'), ('\u{2d2d}', '\u{2d2d}'), +        ('\u{2d30}', '\u{2d67}'), ('\u{2d6f}', '\u{2d6f}'), ('\u{2d80}', '\u{2d96}'), ('\u{2da0}', +        '\u{2da6}'), ('\u{2da8}', '\u{2dae}'), ('\u{2db0}', '\u{2db6}'), ('\u{2db8}', '\u{2dbe}'), +        ('\u{2dc0}', '\u{2dc6}'), ('\u{2dc8}', '\u{2dce}'), ('\u{2dd0}', '\u{2dd6}'), ('\u{2dd8}', +        '\u{2dde}'), ('\u{3005}', '\u{3007}'), ('\u{3021}', '\u{3029}'), ('\u{3031}', '\u{3035}'), +        ('\u{3038}', '\u{303c}'), ('\u{3041}', '\u{3096}'), ('\u{309d}', '\u{309f}'), ('\u{30a1}', +        '\u{30fa}'), ('\u{30fc}', '\u{30ff}'), ('\u{3105}', '\u{312f}'), ('\u{3131}', '\u{318e}'), +        ('\u{31a0}', '\u{31ba}'), ('\u{31f0}', '\u{31ff}'), ('\u{3400}', '\u{4db5}'), ('\u{4e00}', +        '\u{9fef}'), ('\u{a000}', '\u{a48c}'), ('\u{a4d0}', '\u{a4fd}'), ('\u{a500}', '\u{a60c}'), +        ('\u{a610}', '\u{a61f}'), ('\u{a62a}', '\u{a62b}'), ('\u{a640}', '\u{a66e}'), ('\u{a67f}', +        '\u{a69d}'), ('\u{a6a0}', '\u{a6ef}'), ('\u{a717}', '\u{a71f}'), ('\u{a722}', '\u{a788}'), +        ('\u{a78b}', '\u{a7bf}'), ('\u{a7c2}', '\u{a7c6}'), ('\u{a7f7}', '\u{a801}'), ('\u{a803}', +        '\u{a805}'), ('\u{a807}', '\u{a80a}'), ('\u{a80c}', '\u{a822}'), ('\u{a840}', '\u{a873}'), +        ('\u{a882}', '\u{a8b3}'), ('\u{a8f2}', '\u{a8f7}'), ('\u{a8fb}', '\u{a8fb}'), ('\u{a8fd}', +        '\u{a8fe}'), ('\u{a90a}', '\u{a925}'), ('\u{a930}', '\u{a946}'), ('\u{a960}', '\u{a97c}'), +        ('\u{a984}', '\u{a9b2}'), ('\u{a9cf}', '\u{a9cf}'), ('\u{a9e0}', '\u{a9e4}'), ('\u{a9e6}', +        '\u{a9ef}'), ('\u{a9fa}', '\u{a9fe}'), ('\u{aa00}', '\u{aa28}'), ('\u{aa40}', '\u{aa42}'), +        ('\u{aa44}', '\u{aa4b}'), ('\u{aa60}', '\u{aa76}'), ('\u{aa7a}', '\u{aa7a}'), ('\u{aa7e}', +        '\u{aaaf}'), ('\u{aab1}', '\u{aab1}'), ('\u{aab5}', '\u{aab6}'), ('\u{aab9}', '\u{aabd}'), +        ('\u{aac0}', '\u{aac0}'), ('\u{aac2}', '\u{aac2}'), ('\u{aadb}', '\u{aadd}'), ('\u{aae0}', +        '\u{aaea}'), ('\u{aaf2}', '\u{aaf4}'), ('\u{ab01}', '\u{ab06}'), ('\u{ab09}', '\u{ab0e}'), +        ('\u{ab11}', '\u{ab16}'), ('\u{ab20}', '\u{ab26}'), ('\u{ab28}', '\u{ab2e}'), ('\u{ab30}', +        '\u{ab5a}'), ('\u{ab5c}', '\u{ab67}'), ('\u{ab70}', '\u{abe2}'), ('\u{ac00}', '\u{d7a3}'), +        ('\u{d7b0}', '\u{d7c6}'), ('\u{d7cb}', '\u{d7fb}'), ('\u{f900}', '\u{fa6d}'), ('\u{fa70}', +        '\u{fad9}'), ('\u{fb00}', '\u{fb06}'), ('\u{fb13}', '\u{fb17}'), ('\u{fb1d}', '\u{fb1d}'), +        ('\u{fb1f}', '\u{fb28}'), ('\u{fb2a}', '\u{fb36}'), ('\u{fb38}', '\u{fb3c}'), ('\u{fb3e}', +        '\u{fb3e}'), ('\u{fb40}', '\u{fb41}'), ('\u{fb43}', '\u{fb44}'), ('\u{fb46}', '\u{fbb1}'), +        ('\u{fbd3}', '\u{fc5d}'), ('\u{fc64}', '\u{fd3d}'), ('\u{fd50}', '\u{fd8f}'), ('\u{fd92}', +        '\u{fdc7}'), ('\u{fdf0}', '\u{fdf9}'), ('\u{fe71}', '\u{fe71}'), ('\u{fe73}', '\u{fe73}'), +        ('\u{fe77}', '\u{fe77}'), ('\u{fe79}', '\u{fe79}'), ('\u{fe7b}', '\u{fe7b}'), ('\u{fe7d}', +        '\u{fe7d}'), ('\u{fe7f}', '\u{fefc}'), ('\u{ff21}', '\u{ff3a}'), ('\u{ff41}', '\u{ff5a}'), +        ('\u{ff66}', '\u{ff9d}'), ('\u{ffa0}', '\u{ffbe}'), ('\u{ffc2}', '\u{ffc7}'), ('\u{ffca}', +        '\u{ffcf}'), ('\u{ffd2}', '\u{ffd7}'), ('\u{ffda}', '\u{ffdc}'), ('\u{10000}', '\u{1000b}'), +        ('\u{1000d}', '\u{10026}'), ('\u{10028}', '\u{1003a}'), ('\u{1003c}', '\u{1003d}'), +        ('\u{1003f}', '\u{1004d}'), ('\u{10050}', '\u{1005d}'), ('\u{10080}', '\u{100fa}'), +        ('\u{10140}', '\u{10174}'), ('\u{10280}', '\u{1029c}'), ('\u{102a0}', '\u{102d0}'), +        ('\u{10300}', '\u{1031f}'), ('\u{1032d}', '\u{1034a}'), ('\u{10350}', '\u{10375}'), +        ('\u{10380}', '\u{1039d}'), ('\u{103a0}', '\u{103c3}'), ('\u{103c8}', '\u{103cf}'), +        ('\u{103d1}', '\u{103d5}'), ('\u{10400}', '\u{1049d}'), ('\u{104b0}', '\u{104d3}'), +        ('\u{104d8}', '\u{104fb}'), ('\u{10500}', '\u{10527}'), ('\u{10530}', '\u{10563}'), +        ('\u{10600}', '\u{10736}'), ('\u{10740}', '\u{10755}'), ('\u{10760}', '\u{10767}'), +        ('\u{10800}', '\u{10805}'), ('\u{10808}', '\u{10808}'), ('\u{1080a}', '\u{10835}'), +        ('\u{10837}', '\u{10838}'), ('\u{1083c}', '\u{1083c}'), ('\u{1083f}', '\u{10855}'), +        ('\u{10860}', '\u{10876}'), ('\u{10880}', '\u{1089e}'), ('\u{108e0}', '\u{108f2}'), +        ('\u{108f4}', '\u{108f5}'), ('\u{10900}', '\u{10915}'), ('\u{10920}', '\u{10939}'), +        ('\u{10980}', '\u{109b7}'), ('\u{109be}', '\u{109bf}'), ('\u{10a00}', '\u{10a00}'), +        ('\u{10a10}', '\u{10a13}'), ('\u{10a15}', '\u{10a17}'), ('\u{10a19}', '\u{10a35}'), +        ('\u{10a60}', '\u{10a7c}'), ('\u{10a80}', '\u{10a9c}'), ('\u{10ac0}', '\u{10ac7}'), +        ('\u{10ac9}', '\u{10ae4}'), ('\u{10b00}', '\u{10b35}'), ('\u{10b40}', '\u{10b55}'), +        ('\u{10b60}', '\u{10b72}'), ('\u{10b80}', '\u{10b91}'), ('\u{10c00}', '\u{10c48}'), +        ('\u{10c80}', '\u{10cb2}'), ('\u{10cc0}', '\u{10cf2}'), ('\u{10d00}', '\u{10d23}'), +        ('\u{10f00}', '\u{10f1c}'), ('\u{10f27}', '\u{10f27}'), ('\u{10f30}', '\u{10f45}'), +        ('\u{10fe0}', '\u{10ff6}'), ('\u{11003}', '\u{11037}'), ('\u{11083}', '\u{110af}'), +        ('\u{110d0}', '\u{110e8}'), ('\u{11103}', '\u{11126}'), ('\u{11144}', '\u{11144}'), +        ('\u{11150}', '\u{11172}'), ('\u{11176}', '\u{11176}'), ('\u{11183}', '\u{111b2}'), +        ('\u{111c1}', '\u{111c4}'), ('\u{111da}', '\u{111da}'), ('\u{111dc}', '\u{111dc}'), +        ('\u{11200}', '\u{11211}'), ('\u{11213}', '\u{1122b}'), ('\u{11280}', '\u{11286}'), +        ('\u{11288}', '\u{11288}'), ('\u{1128a}', '\u{1128d}'), ('\u{1128f}', '\u{1129d}'), +        ('\u{1129f}', '\u{112a8}'), ('\u{112b0}', '\u{112de}'), ('\u{11305}', '\u{1130c}'), +        ('\u{1130f}', '\u{11310}'), ('\u{11313}', '\u{11328}'), ('\u{1132a}', '\u{11330}'), +        ('\u{11332}', '\u{11333}'), ('\u{11335}', '\u{11339}'), ('\u{1133d}', '\u{1133d}'), +        ('\u{11350}', '\u{11350}'), ('\u{1135d}', '\u{11361}'), ('\u{11400}', '\u{11434}'), +        ('\u{11447}', '\u{1144a}'), ('\u{1145f}', '\u{1145f}'), ('\u{11480}', '\u{114af}'), +        ('\u{114c4}', '\u{114c5}'), ('\u{114c7}', '\u{114c7}'), ('\u{11580}', '\u{115ae}'), +        ('\u{115d8}', '\u{115db}'), ('\u{11600}', '\u{1162f}'), ('\u{11644}', '\u{11644}'), +        ('\u{11680}', '\u{116aa}'), ('\u{116b8}', '\u{116b8}'), ('\u{11700}', '\u{1171a}'), +        ('\u{11800}', '\u{1182b}'), ('\u{118a0}', '\u{118df}'), ('\u{118ff}', '\u{118ff}'), +        ('\u{119a0}', '\u{119a7}'), ('\u{119aa}', '\u{119d0}'), ('\u{119e1}', '\u{119e1}'), +        ('\u{119e3}', '\u{119e3}'), ('\u{11a00}', '\u{11a00}'), ('\u{11a0b}', '\u{11a32}'), +        ('\u{11a3a}', '\u{11a3a}'), ('\u{11a50}', '\u{11a50}'), ('\u{11a5c}', '\u{11a89}'), +        ('\u{11a9d}', '\u{11a9d}'), ('\u{11ac0}', '\u{11af8}'), ('\u{11c00}', '\u{11c08}'), +        ('\u{11c0a}', '\u{11c2e}'), ('\u{11c40}', '\u{11c40}'), ('\u{11c72}', '\u{11c8f}'), +        ('\u{11d00}', '\u{11d06}'), ('\u{11d08}', '\u{11d09}'), ('\u{11d0b}', '\u{11d30}'), +        ('\u{11d46}', '\u{11d46}'), ('\u{11d60}', '\u{11d65}'), ('\u{11d67}', '\u{11d68}'), +        ('\u{11d6a}', '\u{11d89}'), ('\u{11d98}', '\u{11d98}'), ('\u{11ee0}', '\u{11ef2}'), +        ('\u{12000}', '\u{12399}'), ('\u{12400}', '\u{1246e}'), ('\u{12480}', '\u{12543}'), +        ('\u{13000}', '\u{1342e}'), ('\u{14400}', '\u{14646}'), ('\u{16800}', '\u{16a38}'), +        ('\u{16a40}', '\u{16a5e}'), ('\u{16ad0}', '\u{16aed}'), ('\u{16b00}', '\u{16b2f}'), +        ('\u{16b40}', '\u{16b43}'), ('\u{16b63}', '\u{16b77}'), ('\u{16b7d}', '\u{16b8f}'), +        ('\u{16e40}', '\u{16e7f}'), ('\u{16f00}', '\u{16f4a}'), ('\u{16f50}', '\u{16f50}'), +        ('\u{16f93}', '\u{16f9f}'), ('\u{16fe0}', '\u{16fe1}'), ('\u{16fe3}', '\u{16fe3}'), +        ('\u{17000}', '\u{187f7}'), ('\u{18800}', '\u{18af2}'), ('\u{1b000}', '\u{1b11e}'), +        ('\u{1b150}', '\u{1b152}'), ('\u{1b164}', '\u{1b167}'), ('\u{1b170}', '\u{1b2fb}'), +        ('\u{1bc00}', '\u{1bc6a}'), ('\u{1bc70}', '\u{1bc7c}'), ('\u{1bc80}', '\u{1bc88}'), +        ('\u{1bc90}', '\u{1bc99}'), ('\u{1d400}', '\u{1d454}'), ('\u{1d456}', '\u{1d49c}'), +        ('\u{1d49e}', '\u{1d49f}'), ('\u{1d4a2}', '\u{1d4a2}'), ('\u{1d4a5}', '\u{1d4a6}'), +        ('\u{1d4a9}', '\u{1d4ac}'), ('\u{1d4ae}', '\u{1d4b9}'), ('\u{1d4bb}', '\u{1d4bb}'), +        ('\u{1d4bd}', '\u{1d4c3}'), ('\u{1d4c5}', '\u{1d505}'), ('\u{1d507}', '\u{1d50a}'), +        ('\u{1d50d}', '\u{1d514}'), ('\u{1d516}', '\u{1d51c}'), ('\u{1d51e}', '\u{1d539}'), +        ('\u{1d53b}', '\u{1d53e}'), ('\u{1d540}', '\u{1d544}'), ('\u{1d546}', '\u{1d546}'), +        ('\u{1d54a}', '\u{1d550}'), ('\u{1d552}', '\u{1d6a5}'), ('\u{1d6a8}', '\u{1d6c0}'), +        ('\u{1d6c2}', '\u{1d6da}'), ('\u{1d6dc}', '\u{1d6fa}'), ('\u{1d6fc}', '\u{1d714}'), +        ('\u{1d716}', '\u{1d734}'), ('\u{1d736}', '\u{1d74e}'), ('\u{1d750}', '\u{1d76e}'), +        ('\u{1d770}', '\u{1d788}'), ('\u{1d78a}', '\u{1d7a8}'), ('\u{1d7aa}', '\u{1d7c2}'), +        ('\u{1d7c4}', '\u{1d7cb}'), ('\u{1e100}', '\u{1e12c}'), ('\u{1e137}', '\u{1e13d}'), +        ('\u{1e14e}', '\u{1e14e}'), ('\u{1e2c0}', '\u{1e2eb}'), ('\u{1e800}', '\u{1e8c4}'), +        ('\u{1e900}', '\u{1e943}'), ('\u{1e94b}', '\u{1e94b}'), ('\u{1ee00}', '\u{1ee03}'), +        ('\u{1ee05}', '\u{1ee1f}'), ('\u{1ee21}', '\u{1ee22}'), ('\u{1ee24}', '\u{1ee24}'), +        ('\u{1ee27}', '\u{1ee27}'), ('\u{1ee29}', '\u{1ee32}'), ('\u{1ee34}', '\u{1ee37}'), +        ('\u{1ee39}', '\u{1ee39}'), ('\u{1ee3b}', '\u{1ee3b}'), ('\u{1ee42}', '\u{1ee42}'), +        ('\u{1ee47}', '\u{1ee47}'), ('\u{1ee49}', '\u{1ee49}'), ('\u{1ee4b}', '\u{1ee4b}'), +        ('\u{1ee4d}', '\u{1ee4f}'), ('\u{1ee51}', '\u{1ee52}'), ('\u{1ee54}', '\u{1ee54}'), +        ('\u{1ee57}', '\u{1ee57}'), ('\u{1ee59}', '\u{1ee59}'), ('\u{1ee5b}', '\u{1ee5b}'), +        ('\u{1ee5d}', '\u{1ee5d}'), ('\u{1ee5f}', '\u{1ee5f}'), ('\u{1ee61}', '\u{1ee62}'), +        ('\u{1ee64}', '\u{1ee64}'), ('\u{1ee67}', '\u{1ee6a}'), ('\u{1ee6c}', '\u{1ee72}'), +        ('\u{1ee74}', '\u{1ee77}'), ('\u{1ee79}', '\u{1ee7c}'), ('\u{1ee7e}', '\u{1ee7e}'), +        ('\u{1ee80}', '\u{1ee89}'), ('\u{1ee8b}', '\u{1ee9b}'), ('\u{1eea1}', '\u{1eea3}'), +        ('\u{1eea5}', '\u{1eea9}'), ('\u{1eeab}', '\u{1eebb}'), ('\u{20000}', '\u{2a6d6}'), +        ('\u{2a700}', '\u{2b734}'), ('\u{2b740}', '\u{2b81d}'), ('\u{2b820}', '\u{2cea1}'), +        ('\u{2ceb0}', '\u{2ebe0}'), ('\u{2f800}', '\u{2fa1d}') +    ]; + +    pub fn XID_Start(c: char) -> bool { +        super::bsearch_range_table(c, XID_Start_table) +    } + +} + diff --git a/unicode-xid/src/tests.rs b/unicode-xid/src/tests.rs new file mode 100644 index 0000000..3c9b731 --- /dev/null +++ b/unicode-xid/src/tests.rs @@ -0,0 +1,111 @@ +// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or +// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +#[cfg(feature = "bench")] +use std::iter; +#[cfg(feature = "bench")] +use test::Bencher; +#[cfg(feature = "bench")] +use std::prelude::v1::*; + +#[cfg(feature = "bench")] +#[bench] +fn cargo_is_xid_start(b: &mut Bencher) { +    let string = iter::repeat('a').take(4096).collect::<String>(); + +    b.bytes = string.len() as u64; +    b.iter(|| { +        string.chars().all(super::UnicodeXID::is_xid_start) +    }); +} + +#[cfg(feature = "bench")] +#[bench] +fn stdlib_is_xid_start(b: &mut Bencher) { +    let string = iter::repeat('a').take(4096).collect::<String>(); + +    b.bytes = string.len() as u64; +    b.iter(|| { +        string.chars().all(char::is_xid_start) +    }); +} + +#[cfg(feature = "bench")] +#[bench] +fn cargo_xid_continue(b: &mut Bencher) { +    let string = iter::repeat('a').take(4096).collect::<String>(); + +    b.bytes = string.len() as u64; +    b.iter(|| { +        string.chars().all(super::UnicodeXID::is_xid_continue) +    }); +} + +#[cfg(feature = "bench")] +#[bench] +fn stdlib_xid_continue(b: &mut Bencher) { +    let string = iter::repeat('a').take(4096).collect::<String>(); + +    b.bytes = string.len() as u64; +    b.iter(|| { +        string.chars().all(char::is_xid_continue) +    }); +} + +#[test] +fn test_is_xid_start() { +    let chars = [ +        'A', 'Z', 'a', 'z', +        '\u{1000d}', '\u{10026}', +    ]; + +    for ch in &chars { +        assert!(super::UnicodeXID::is_xid_start(*ch), "{}", ch); +    } +} + +#[test] +fn test_is_not_xid_start() { +    let chars = [ +        '\x00', '\x01', +        '0', '9', +        ' ', '[', '<', '{', '(', +        '\u{02c2}', '\u{ffff}', +    ]; + +    for ch in &chars { +        assert!(!super::UnicodeXID::is_xid_start(*ch), "{}", ch); +    } +} + +#[test] +fn test_is_xid_continue() { +    let chars = [ +        '0', '9', 'A', 'Z', 'a', 'z', '_', +        '\u{1000d}', '\u{10026}', +    ]; + +    for ch in &chars { +        assert!(super::UnicodeXID::is_xid_continue(*ch), "{}", ch); +    } +} + +#[test] +fn test_is_not_xid_continue() { +    let chars = [ +        '\x00', '\x01', +        ' ', '[', '<', '{', '(', +        '\u{02c2}', '\u{ffff}', +    ]; + +    for &ch in &chars { +        assert!(!super::UnicodeXID::is_xid_continue(ch), "{}", ch); +    } +} | 
