1 files changed, 0 insertions, 212 deletions
diff --git a/unicode-segmentation/scripts/unicode_gen_breaktests.py b/unicode-segmentation/scripts/unicode_gen_breaktests.py
deleted file mode 100755
index 113afa9..0000000
--- a/unicode-segmentation/scripts/unicode_gen_breaktests.py
+++ /dev/null
@@ -1,212 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8
-#
-# Copyright 2015 The Rust Project Developers. See the COPYRIGHT
-# file at the top-level directory of this distribution and at
-# http://rust-lang.org/COPYRIGHT.
-#
-# Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
-# http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
-# <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
-# option. This file may not be copied, modified, or distributed
-# except according to those terms.
-
-# This script uses the following Unicode tables:
-# - auxiliary/GraphemeBreakTest.txt
-# - auxiliary/WordBreakTest.txt
-#
-# Since this should not require frequent updates, we just store this
-# out-of-line and check the unicode.rs file into git.
-from __future__ import print_function
-
-import unicode, re, os, fileinput
-
-def load_test_data(f, optsplit=[]):
-    testRe1 = re.compile(r"^÷\s+([^\s].*[^\s])\s+÷\s+#\s+÷\s+\[0.2\].*?([÷×].*)\s+÷\s+\[0.3\]\s*$")
-
-    unicode.fetch(f)
-    data = []
-    for line in fileinput.input(os.path.basename(f)):
-        # lines that include a test start with the ÷ character
-        if len(line) < 2 or not line.startswith('÷'):
-            continue
-
-        m = testRe1.match(line)
-        if not m:
-            print("error: no match on line where test was expected: %s" % line)
-            continue
-
-        # process the characters in this test case
-        chars = process_split_string(m.group(1))
-        # skip test case if it contains invalid characters (viz., surrogates)
-        if not chars:
-            continue
-
-        # now process test cases
-        (chars, info) = process_split_info(m.group(2), chars, optsplit)
-
-        # make sure that we have break info for each break!
-        assert len(chars) - 1 == len(info)
-
-        data.append((chars, info))
-
-    return data
-
-def process_split_info(s, c, o):
-    outcs = []
-    outis = []
-    workcs = c.pop(0)
-
-    # are we on a × or a ÷?
-    isX = False
-    if s.startswith('×'):
-        isX = True
-
-    # find each instance of '(÷|×) [x.y] '
-    while s:
-        # find the currently considered rule number
-        sInd = s.index('[') + 1
-        eInd = s.index(']')
-
-        # if it's '× [a.b]' where 'a.b' is in o, then
-        # we consider it a split even though it's not
-        # marked as one
-        # if it's ÷ then it's always a split
-        if not isX or s[sInd:eInd] in o:
-            outis.append(s[sInd:eInd])
-            outcs.append(workcs)
-            workcs = c.pop(0)
-        else:
-            workcs.extend(c.pop(0))
-
-        idx = 1
-        while idx < len(s):
-            if s[idx:].startswith('×'):
-                isX = True
-                break
-            if s[idx:].startswith('÷'):
-                isX = False
-                break
-            idx += 1
-        s = s[idx:]
-
-    outcs.append(workcs)
-    return (outcs, outis)
-
-def process_split_string(s):
-    outls = []
-    workls = []
-
-    inls = s.split()
-
-    for i in inls:
-        if i == '÷' or i == '×':
-            outls.append(workls)
-            workls = []
-            continue
-
-        ival = int(i,16)
-
-        if unicode.is_surrogate(ival):
-            return []
-
-        workls.append(ival)
-
-    if workls:
-        outls.append(workls)
-
-    return outls
-
-def showfun(x):
-    outstr = '("'
-    for c in x[0]:
-        outstr += "\\u{%x}" % c
-    outstr += '",&['
-    xfirst = True
-    for xx in x[1:]:
-        if not xfirst:
-            outstr += '],&['
-        xfirst = False
-        sfirst = True
-        for sp in xx:
-            if not sfirst:
-                outstr += ','
-            sfirst = False
-            outstr += '"'
-            for c in sp:
-                outstr += "\\u{%x}" % c
-            outstr += '"'
-    outstr += '])'
-    return outstr
-
-def create_grapheme_data(f):
-    # rules 9.1 and 9.2 are for extended graphemes only
-    optsplits = ['9.1','9.2']
-    d = load_test_data("auxiliary/GraphemeBreakTest.txt", optsplits)
-
-    test_same = []
-    test_diff = []
-
-    for (c, i) in d:
-        allchars = [cn for s in c for cn in s]
-        extgraphs = []
-        extwork = []
-
-        extwork.extend(c[0])
-        for n in range(0,len(i)):
-            if i[n] in optsplits:
-                extwork.extend(c[n+1])
-            else:
-                extgraphs.append(extwork)
-                extwork = []
-                extwork.extend(c[n+1])
-
-        # these are the extended grapheme clusters
-        extgraphs.append(extwork)
-
-        if extgraphs == c:
-            test_same.append((allchars, c))
-        else:
-            test_diff.append((allchars, extgraphs, c))
-
-    stype = "&'static [(&'static str, &'static [&'static str])]"
-    dtype = "&'static [(&'static str, &'static [&'static str], &'static [&'static str])]"
-    f.write("    // official Unicode test data\n")
-    f.write("    // http://www.unicode.org/Public/%s/ucd/auxiliary/GraphemeBreakTest.txt\n" % unicode.UNICODE_VERSION_NUMBER)
-    unicode.emit_table(f, "TEST_SAME", test_same, stype, True, showfun, True)
-    unicode.emit_table(f, "TEST_DIFF", test_diff, dtype, True, showfun, True)
-
-def create_words_data(f):
-    d = load_test_data("auxiliary/WordBreakTest.txt")
-
-    test = []
-
-    for (c, i) in d:
-        allchars = [cn for s in c for cn in s]
-        test.append((allchars, c))
-
-    wtype = "&'static [(&'static str, &'static [&'static str])]"
-    f.write("    // official Unicode test data\n")
-    f.write("    // http://www.unicode.org/Public/%s/ucd/auxiliary/WordBreakTest.txt\n" % unicode.UNICODE_VERSION_NUMBER)
-    unicode.emit_table(f, "TEST_WORD", test, wtype, True, showfun, True)
-
-def create_sentence_data(f):
-    d = load_test_data("auxiliary/SentenceBreakTest.txt")
-
-    test = []
-
-    for (c, i) in d:
-        allchars = [cn for s in c for cn in s]
-        test.append((allchars, c))
-
-    wtype = "&'static [(&'static str, &'static [&'static str])]"
-    f.write("    // official Unicode test data\n")
-    f.write("    // http://www.unicode.org/Public/%s/ucd/auxiliary/SentenceBreakTest.txt\n" % unicode.UNICODE_VERSION_NUMBER)
-    unicode.emit_table(f, "TEST_SENTENCE", test, wtype, True, showfun, True)
-
-if __name__ == "__main__":
-    with open("testdata.rs", "w") as rf:
-        rf.write(unicode.preamble)
-        create_grapheme_data(rf)
-        create_words_data(rf)
-        create_sentence_data(rf)