From 5e20a29b4fdc8a2d442d1093681b396dcb4b816b Mon Sep 17 00:00:00 2001 From: Robin Krahl Date: Tue, 7 Jan 2020 11:18:04 +0000 Subject: Add structopt dependency in version 0.3.7 This patch series replaces argparse with structopt in the argument handling code. As a first step, we need structopt as a dependency. Import subrepo structopt/:structopt at efbdda4753592e27bc430fb01f7b9650b2f3174d Import subrepo bitflags/:bitflags at 30668016aca6bd3b02c766e8347e0b4080d4c296 Import subrepo clap/:clap at 784524f7eb193e35f81082cc69454c8c21b948f7 Import subrepo heck/:heck at 093d56fbf001e1506e56dbfa38631d99b1066df1 Import subrepo proc-macro-error/:proc-macro-error at 6c4cfe79a622c5de8ae68557993542be46eacae2 Import subrepo proc-macro2/:proc-macro2 at d5d48eddca4566e5438e8a2cbed4a74e049544de Import subrepo quote/:quote at 727436c6c137b20f0f34dde5d8fda2679b9747ad Import subrepo rustversion/:rustversion at 0c5663313516263059ce9059ef81fc7a1cf655ca Import subrepo syn-mid/:syn-mid at 5d3d85414a9e6674e1857ec22a87b96e04a6851a Import subrepo syn/:syn at e87c27e87f6f4ef8919d0372bdb056d53ef0d8f3 Import subrepo textwrap/:textwrap at abcd618beae3f74841032aa5b53c1086b0a57ca2 Import subrepo unicode-segmentation/:unicode-segmentation at 637c9874c4fe0c205ff27787faf150a40295c6c3 Import subrepo unicode-width/:unicode-width at 3033826f8bf05e82724140a981d5941e48fce393 Import subrepo unicode-xid/:unicode-xid at 4baae9fffb156ba229665b972a9cd5991787ceb7 --- unicode-xid/scripts/unicode.py | 187 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 187 insertions(+) create mode 100755 unicode-xid/scripts/unicode.py (limited to 'unicode-xid/scripts') diff --git a/unicode-xid/scripts/unicode.py b/unicode-xid/scripts/unicode.py new file mode 100755 index 0000000..393f901 --- /dev/null +++ b/unicode-xid/scripts/unicode.py @@ -0,0 +1,187 @@ +#!/usr/bin/env python +# +# Copyright 2011-2015 The Rust Project Developers. See the COPYRIGHT +# file at the top-level directory of this distribution and at +# http://rust-lang.org/COPYRIGHT. +# +# Licensed under the Apache License, Version 2.0 or the MIT license +# , at your +# option. This file may not be copied, modified, or distributed +# except according to those terms. + +# This script uses the following Unicode tables: +# - DerivedCoreProperties.txt +# - ReadMe.txt +# +# Since this should not require frequent updates, we just store this +# out-of-line and check the unicode.rs file into git. + +import fileinput, re, os, sys + +preamble = '''// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +// NOTE: The following code was generated by "scripts/unicode.py", do not edit directly + +#![allow(missing_docs, non_upper_case_globals, non_snake_case)] +''' + +def fetch(f): + if not os.path.exists(os.path.basename(f)): + os.system("curl -O http://www.unicode.org/Public/UNIDATA/%s" + % f) + + if not os.path.exists(os.path.basename(f)): + sys.stderr.write("cannot load %s" % f) + exit(1) + +def group_cat(cat): + cat_out = [] + letters = sorted(set(cat)) + cur_start = letters.pop(0) + cur_end = cur_start + for letter in letters: + assert letter > cur_end, \ + "cur_end: %s, letter: %s" % (hex(cur_end), hex(letter)) + if letter == cur_end + 1: + cur_end = letter + else: + cat_out.append((cur_start, cur_end)) + cur_start = cur_end = letter + cat_out.append((cur_start, cur_end)) + return cat_out + +def ungroup_cat(cat): + cat_out = [] + for (lo, hi) in cat: + while lo <= hi: + cat_out.append(lo) + lo += 1 + return cat_out + +def format_table_content(f, content, indent): + line = " "*indent + first = True + for chunk in content.split(","): + if len(line) + len(chunk) < 98: + if first: + line += chunk + else: + line += ", " + chunk + first = False + else: + f.write(line + ",\n") + line = " "*indent + chunk + f.write(line) + +def load_properties(f, interestingprops): + fetch(f) + props = {} + re1 = re.compile("^ *([0-9A-F]+) *; *(\w+)") + re2 = re.compile("^ *([0-9A-F]+)\.\.([0-9A-F]+) *; *(\w+)") + + for line in fileinput.input(os.path.basename(f)): + prop = None + d_lo = 0 + d_hi = 0 + m = re1.match(line) + if m: + d_lo = m.group(1) + d_hi = m.group(1) + prop = m.group(2) + else: + m = re2.match(line) + if m: + d_lo = m.group(1) + d_hi = m.group(2) + prop = m.group(3) + else: + continue + if interestingprops and prop not in interestingprops: + continue + d_lo = int(d_lo, 16) + d_hi = int(d_hi, 16) + if prop not in props: + props[prop] = [] + props[prop].append((d_lo, d_hi)) + + # optimize if possible + for prop in props: + props[prop] = group_cat(ungroup_cat(props[prop])) + + return props + +def escape_char(c): + return "'\\u{%x}'" % c + +def emit_bsearch_range_table(f): + f.write(""" +fn bsearch_range_table(c: char, r: &[(char,char)]) -> bool { + use core::cmp::Ordering::{Equal, Less, Greater}; + + r.binary_search_by(|&(lo,hi)| { + if lo <= c && c <= hi { Equal } + else if hi < c { Less } + else { Greater } + }).is_ok() +}\n +""") + +def emit_table(f, name, t_data, t_type = "&[(char, char)]", is_pub=True, + pfun=lambda x: "(%s,%s)" % (escape_char(x[0]), escape_char(x[1])), is_const=True): + pub_string = "const" + if not is_const: + pub_string = "let" + if is_pub: + pub_string = "pub " + pub_string + f.write(" %s %s: %s = &[\n" % (pub_string, name, t_type)) + data = "" + first = True + for dat in t_data: + if not first: + data += "," + first = False + data += pfun(dat) + format_table_content(f, data, 8) + f.write("\n ];\n\n") + +def emit_property_module(f, mod, tbl, emit): + f.write("pub mod %s {\n" % mod) + for cat in sorted(emit): + emit_table(f, "%s_table" % cat, tbl[cat]) + f.write(" pub fn %s(c: char) -> bool {\n" % cat) + f.write(" super::bsearch_range_table(c, %s_table)\n" % cat) + f.write(" }\n\n") + f.write("}\n\n") + +if __name__ == "__main__": + r = "tables.rs" + if os.path.exists(r): + os.remove(r) + with open(r, "w") as rf: + # write the file's preamble + rf.write(preamble) + + # download and parse all the data + fetch("ReadMe.txt") + with open("ReadMe.txt") as readme: + pattern = "for Version (\d+)\.(\d+)\.(\d+) of the Unicode" + unicode_version = re.search(pattern, readme.read()).groups() + rf.write(""" +/// The version of [Unicode](http://www.unicode.org/) +/// that this version of unicode-xid is based on. +pub const UNICODE_VERSION: (u64, u64, u64) = (%s, %s, %s); +""" % unicode_version) + emit_bsearch_range_table(rf) + + want_derived = ["XID_Start", "XID_Continue"] + derived = load_properties("DerivedCoreProperties.txt", want_derived) + emit_property_module(rf, "derived_property", derived, want_derived) -- cgit v1.2.3