(index<- ) ./libstd/unicode.rs
git branch: * master 5200215 auto merge of #14035 : alexcrichton/rust/experimental, r=huonw
modified: Fri May 9 13:02:28 2014
1 // Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
4 //
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
10
11 // The following code was generated by "src/etc/unicode.py"
12
13 #![allow(missing_doc, non_uppercase_statics)]
14
15 pub mod decompose {
16 use option::{Some, None};
17 use slice::ImmutableVector;
18
19 fn bsearch_range_value_table(c: char, r: &'static [(char, char, u8)]) -> u8 {
20 use cmp::{Equal, Less, Greater};
21 match r.bsearch(|&(lo, hi, _)| {
22 if lo <= c && c <= hi { Equal }
23 else if hi < c { Less }
24 else { Greater }
25 }) {
26 Some(idx) => {
27 let (_, _, result) = r[idx];
28 result
29 }
30 None => 0
31 }
32 }
33
34
35 static combining_class_table : &'static [(char, char, u8)] = &[
36 ('\u0300', '\u0314', 230), ('\u0315', '\u0315', 232),
37 ('\u0316', '\u0319', 220), ('\u031a', '\u031a', 232),
38 ('\u031b', '\u031b', 216), ('\u031c', '\u0320', 220),
39 ('\u0321', '\u0322', 202), ('\u0323', '\u0326', 220),
40 ('\u0327', '\u0328', 202), ('\u0329', '\u0333', 220),
41 ('\u0334', '\u0338', 1), ('\u0339', '\u033c', 220),
42 ('\u033d', '\u0344', 230), ('\u0345', '\u0345', 240),
43 ('\u0346', '\u0346', 230), ('\u0347', '\u0349', 220),
44 ('\u034a', '\u034c', 230), ('\u034d', '\u034e', 220),
45 ('\u0350', '\u0352', 230), ('\u0353', '\u0356', 220),
46 ('\u0357', '\u0357', 230), ('\u0358', '\u0358', 232),
47 ('\u0359', '\u035a', 220), ('\u035b', '\u035b', 230),
48 ('\u035c', '\u035c', 233), ('\u035d', '\u035e', 234),
49 ('\u035f', '\u035f', 233), ('\u0360', '\u0361', 234),
50 ('\u0362', '\u0362', 233), ('\u0363', '\u036f', 230),
51 ('\u0483', '\u0487', 230), ('\u0591', '\u0591', 220),
52 ('\u0592', '\u0595', 230), ('\u0596', '\u0596', 220),
53 ('\u0597', '\u0599', 230), ('\u059a', '\u059a', 222),
54 ('\u059b', '\u059b', 220), ('\u059c', '\u05a1', 230),
55 ('\u05a2', '\u05a7', 220), ('\u05a8', '\u05a9', 230),
56 ('\u05aa', '\u05aa', 220), ('\u05ab', '\u05ac', 230),
57 ('\u05ad', '\u05ad', 222), ('\u05ae', '\u05ae', 228),
58 ('\u05af', '\u05af', 230), ('\u05b0', '\u05b0', 10),
59 ('\u05b1', '\u05b1', 11), ('\u05b2', '\u05b2', 12),
60 ('\u05b3', '\u05b3', 13), ('\u05b4', '\u05b4', 14),
61 ('\u05b5', '\u05b5', 15), ('\u05b6', '\u05b6', 16),
62 ('\u05b7', '\u05b7', 17), ('\u05b8', '\u05b8', 18),
63 ('\u05b9', '\u05ba', 19), ('\u05bb', '\u05bb', 20),
64 ('\u05bc', '\u05bc', 21), ('\u05bd', '\u05bd', 22),
65 ('\u05bf', '\u05bf', 23), ('\u05c1', '\u05c1', 24),
66 ('\u05c2', '\u05c2', 25), ('\u05c4', '\u05c4', 230),
67 ('\u05c5', '\u05c5', 220), ('\u05c7', '\u05c7', 18),
68 ('\u0610', '\u0617', 230), ('\u0618', '\u0618', 30),
69 ('\u0619', '\u0619', 31), ('\u061a', '\u061a', 32),
70 ('\u064b', '\u064b', 27), ('\u064c', '\u064c', 28),
71 ('\u064d', '\u064d', 29), ('\u064e', '\u064e', 30),
72 ('\u064f', '\u064f', 31), ('\u0650', '\u0650', 32),
73 ('\u0651', '\u0651', 33), ('\u0652', '\u0652', 34),
74 ('\u0653', '\u0654', 230), ('\u0655', '\u0656', 220),
75 ('\u0657', '\u065b', 230), ('\u065c', '\u065c', 220),
76 ('\u065d', '\u065e', 230), ('\u065f', '\u065f', 220),
77 ('\u0670', '\u0670', 35), ('\u06d6', '\u06dc', 230),
78 ('\u06df', '\u06e2', 230), ('\u06e3', '\u06e3', 220),
79 ('\u06e4', '\u06e4', 230), ('\u06e7', '\u06e8', 230),
80 ('\u06ea', '\u06ea', 220), ('\u06eb', '\u06ec', 230),
81 ('\u06ed', '\u06ed', 220), ('\u0711', '\u0711', 36),
82 ('\u0730', '\u0730', 230), ('\u0731', '\u0731', 220),
83 ('\u0732', '\u0733', 230), ('\u0734', '\u0734', 220),
84 ('\u0735', '\u0736', 230), ('\u0737', '\u0739', 220),
85 ('\u073a', '\u073a', 230), ('\u073b', '\u073c', 220),
86 ('\u073d', '\u073d', 230), ('\u073e', '\u073e', 220),
87 ('\u073f', '\u0741', 230), ('\u0742', '\u0742', 220),
88 ('\u0743', '\u0743', 230), ('\u0744', '\u0744', 220),
89 ('\u0745', '\u0745', 230), ('\u0746', '\u0746', 220),
90 ('\u0747', '\u0747', 230), ('\u0748', '\u0748', 220),
91 ('\u0749', '\u074a', 230), ('\u07eb', '\u07f1', 230),
92 ('\u07f2', '\u07f2', 220), ('\u07f3', '\u07f3', 230),
93 ('\u0816', '\u0819', 230), ('\u081b', '\u0823', 230),
94 ('\u0825', '\u0827', 230), ('\u0829', '\u082d', 230),
95 ('\u0859', '\u085b', 220), ('\u08e4', '\u08e5', 230),
96 ('\u08e6', '\u08e6', 220), ('\u08e7', '\u08e8', 230),
97 ('\u08e9', '\u08e9', 220), ('\u08ea', '\u08ec', 230),
98 ('\u08ed', '\u08ef', 220), ('\u08f0', '\u08f0', 27),
99 ('\u08f1', '\u08f1', 28), ('\u08f2', '\u08f2', 29),
100 ('\u08f3', '\u08f5', 230), ('\u08f6', '\u08f6', 220),
101 ('\u08f7', '\u08f8', 230), ('\u08f9', '\u08fa', 220),
102 ('\u08fb', '\u08fe', 230), ('\u093c', '\u093c', 7),
103 ('\u094d', '\u094d', 9), ('\u0951', '\u0951', 230),
104 ('\u0952', '\u0952', 220), ('\u0953', '\u0954', 230),
105 ('\u09bc', '\u09bc', 7), ('\u09cd', '\u09cd', 9),
106 ('\u0a3c', '\u0a3c', 7), ('\u0a4d', '\u0a4d', 9),
107 ('\u0abc', '\u0abc', 7), ('\u0acd', '\u0acd', 9),
108 ('\u0b3c', '\u0b3c', 7), ('\u0b4d', '\u0b4d', 9),
109 ('\u0bcd', '\u0bcd', 9), ('\u0c4d', '\u0c4d', 9),
110 ('\u0c55', '\u0c55', 84), ('\u0c56', '\u0c56', 91),
111 ('\u0cbc', '\u0cbc', 7), ('\u0ccd', '\u0ccd', 9),
112 ('\u0d4d', '\u0d4d', 9), ('\u0dca', '\u0dca', 9),
113 ('\u0e38', '\u0e39', 103), ('\u0e3a', '\u0e3a', 9),
114 ('\u0e48', '\u0e4b', 107), ('\u0eb8', '\u0eb9', 118),
115 ('\u0ec8', '\u0ecb', 122), ('\u0f18', '\u0f19', 220),
116 ('\u0f35', '\u0f35', 220), ('\u0f37', '\u0f37', 220),
117 ('\u0f39', '\u0f39', 216), ('\u0f71', '\u0f71', 129),
118 ('\u0f72', '\u0f72', 130), ('\u0f74', '\u0f74', 132),
119 ('\u0f7a', '\u0f7d', 130), ('\u0f80', '\u0f80', 130),
120 ('\u0f82', '\u0f83', 230), ('\u0f84', '\u0f84', 9),
121 ('\u0f86', '\u0f87', 230), ('\u0fc6', '\u0fc6', 220),
122 ('\u1037', '\u1037', 7), ('\u1039', '\u103a', 9),
123 ('\u108d', '\u108d', 220), ('\u135d', '\u135f', 230),
124 ('\u1714', '\u1714', 9), ('\u1734', '\u1734', 9),
125 ('\u17d2', '\u17d2', 9), ('\u17dd', '\u17dd', 230),
126 ('\u18a9', '\u18a9', 228), ('\u1939', '\u1939', 222),
127 ('\u193a', '\u193a', 230), ('\u193b', '\u193b', 220),
128 ('\u1a17', '\u1a17', 230), ('\u1a18', '\u1a18', 220),
129 ('\u1a60', '\u1a60', 9), ('\u1a75', '\u1a7c', 230),
130 ('\u1a7f', '\u1a7f', 220), ('\u1b34', '\u1b34', 7),
131 ('\u1b44', '\u1b44', 9), ('\u1b6b', '\u1b6b', 230),
132 ('\u1b6c', '\u1b6c', 220), ('\u1b6d', '\u1b73', 230),
133 ('\u1baa', '\u1bab', 9), ('\u1be6', '\u1be6', 7),
134 ('\u1bf2', '\u1bf3', 9), ('\u1c37', '\u1c37', 7),
135 ('\u1cd0', '\u1cd2', 230), ('\u1cd4', '\u1cd4', 1),
136 ('\u1cd5', '\u1cd9', 220), ('\u1cda', '\u1cdb', 230),
137 ('\u1cdc', '\u1cdf', 220), ('\u1ce0', '\u1ce0', 230),
138 ('\u1ce2', '\u1ce8', 1), ('\u1ced', '\u1ced', 220),
139 ('\u1cf4', '\u1cf4', 230), ('\u1dc0', '\u1dc1', 230),
140 ('\u1dc2', '\u1dc2', 220), ('\u1dc3', '\u1dc9', 230),
141 ('\u1dca', '\u1dca', 220), ('\u1dcb', '\u1dcc', 230),
142 ('\u1dcd', '\u1dcd', 234), ('\u1dce', '\u1dce', 214),
143 ('\u1dcf', '\u1dcf', 220), ('\u1dd0', '\u1dd0', 202),
144 ('\u1dd1', '\u1de6', 230), ('\u1dfc', '\u1dfc', 233),
145 ('\u1dfd', '\u1dfd', 220), ('\u1dfe', '\u1dfe', 230),
146 ('\u1dff', '\u1dff', 220), ('\u20d0', '\u20d1', 230),
147 ('\u20d2', '\u20d3', 1), ('\u20d4', '\u20d7', 230),
148 ('\u20d8', '\u20da', 1), ('\u20db', '\u20dc', 230),
149 ('\u20e1', '\u20e1', 230), ('\u20e5', '\u20e6', 1),
150 ('\u20e7', '\u20e7', 230), ('\u20e8', '\u20e8', 220),
151 ('\u20e9', '\u20e9', 230), ('\u20ea', '\u20eb', 1),
152 ('\u20ec', '\u20ef', 220), ('\u20f0', '\u20f0', 230),
153 ('\u2cef', '\u2cf1', 230), ('\u2d7f', '\u2d7f', 9),
154 ('\u2de0', '\u2dff', 230), ('\u302a', '\u302a', 218),
155 ('\u302b', '\u302b', 228), ('\u302c', '\u302c', 232),
156 ('\u302d', '\u302d', 222), ('\u302e', '\u302f', 224),
157 ('\u3099', '\u309a', 8), ('\ua66f', '\ua66f', 230),
158 ('\ua674', '\ua67d', 230), ('\ua69f', '\ua69f', 230),
159 ('\ua6f0', '\ua6f1', 230), ('\ua806', '\ua806', 9),
160 ('\ua8c4', '\ua8c4', 9), ('\ua8e0', '\ua8f1', 230),
161 ('\ua92b', '\ua92d', 220), ('\ua953', '\ua953', 9),
162 ('\ua9b3', '\ua9b3', 7), ('\ua9c0', '\ua9c0', 9),
163 ('\uaab0', '\uaab0', 230), ('\uaab2', '\uaab3', 230),
164 ('\uaab4', '\uaab4', 220), ('\uaab7', '\uaab8', 230),
165 ('\uaabe', '\uaabf', 230), ('\uaac1', '\uaac1', 230),
166 ('\uaaf6', '\uaaf6', 9), ('\uabed', '\uabed', 9),
167 ('\ufb1e', '\ufb1e', 26), ('\ufe20', '\ufe26', 230),
168 ('\U000101fd', '\U000101fd', 220), ('\U00010a0d', '\U00010a0d', 220),
169 ('\U00010a0f', '\U00010a0f', 230), ('\U00010a38', '\U00010a38', 230),
170 ('\U00010a39', '\U00010a39', 1), ('\U00010a3a', '\U00010a3a', 220),
171 ('\U00010a3f', '\U00010a3f', 9), ('\U00011046', '\U00011046', 9),
172 ('\U000110b9', '\U000110b9', 9), ('\U000110ba', '\U000110ba', 7),
173 ('\U00011100', '\U00011102', 230), ('\U00011133', '\U00011134', 9),
174 ('\U000111c0', '\U000111c0', 9), ('\U000116b6', '\U000116b6', 9),
175 ('\U000116b7', '\U000116b7', 7), ('\U0001d165', '\U0001d166', 216),
176 ('\U0001d167', '\U0001d169', 1), ('\U0001d16d', '\U0001d16d', 226),
177 ('\U0001d16e', '\U0001d172', 216), ('\U0001d17b', '\U0001d182', 220),
178 ('\U0001d185', '\U0001d189', 230), ('\U0001d18a', '\U0001d18b', 220),
179 ('\U0001d1aa', '\U0001d1ad', 230), ('\U0001d242', '\U0001d244', 230)
180 ];
181
182 pub fn canonical_combining_class(c: char) -> u8 {
183 bsearch_range_value_table(c, combining_class_table)
184 }
185 }