1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package unicode_test 6 7 import ( 8 "testing" 9 . "unicode" 10 ) 11 12 type T struct { 13 rune rune 14 script string 15 } 16 17 // Hand-chosen tests from Unicode 5.1.0, 6.0.0, 6.2.0, 6.3.0, 7.0.0 and 8.0.0 18 // mostly to discover when new scripts and categories arise. 19 var inTest = []T{ 20 {0x11711, "Ahom"}, 21 {0x14646, "Anatolian_Hieroglyphs"}, 22 {0x06e2, "Arabic"}, 23 {0x0567, "Armenian"}, 24 {0x10b20, "Avestan"}, 25 {0x1b37, "Balinese"}, 26 {0xa6af, "Bamum"}, 27 {0x16ada, "Bassa_Vah"}, 28 {0x1be1, "Batak"}, 29 {0x09c2, "Bengali"}, 30 {0x3115, "Bopomofo"}, 31 {0x282d, "Braille"}, 32 {0x1a1a, "Buginese"}, 33 {0x1747, "Buhid"}, 34 {0x11011, "Brahmi"}, 35 {0x156d, "Canadian_Aboriginal"}, 36 {0x102a9, "Carian"}, 37 {0x10563, "Caucasian_Albanian"}, 38 {0x11111, "Chakma"}, 39 {0xaa4d, "Cham"}, 40 {0x13c2, "Cherokee"}, 41 {0x0020, "Common"}, 42 {0x1d4a5, "Common"}, 43 {0x2cfc, "Coptic"}, 44 {0x12420, "Cuneiform"}, 45 {0x1080c, "Cypriot"}, 46 {0xa663, "Cyrillic"}, 47 {0x10430, "Deseret"}, 48 {0x094a, "Devanagari"}, 49 {0x1BC00, "Duployan"}, 50 {0x13001, "Egyptian_Hieroglyphs"}, 51 {0x10500, "Elbasan"}, 52 {0x1271, "Ethiopic"}, 53 {0x10fc, "Georgian"}, 54 {0x2c40, "Glagolitic"}, 55 {0x10347, "Gothic"}, 56 {0x11303, "Grantha"}, 57 {0x03ae, "Greek"}, 58 {0x0abf, "Gujarati"}, 59 {0x0a24, "Gurmukhi"}, 60 {0x3028, "Han"}, 61 {0x11b8, "Hangul"}, 62 {0x1727, "Hanunoo"}, 63 {0x108FF, "Hatran"}, 64 {0x05a0, "Hebrew"}, 65 {0x3058, "Hiragana"}, 66 {0x10841, "Imperial_Aramaic"}, 67 {0x20e6, "Inherited"}, 68 {0x10b70, "Inscriptional_Pahlavi"}, 69 {0x10b5a, "Inscriptional_Parthian"}, 70 {0xa9d0, "Javanese"}, 71 {0x1109f, "Kaithi"}, 72 {0x0cbd, "Kannada"}, 73 {0x30a6, "Katakana"}, 74 {0xa928, "Kayah_Li"}, 75 {0x10a11, "Kharoshthi"}, 76 {0x17c6, "Khmer"}, 77 {0x11211, "Khojki"}, 78 {0x112df, "Khudawadi"}, 79 {0x0eaa, "Lao"}, 80 {0x1d79, "Latin"}, 81 {0x1c10, "Lepcha"}, 82 {0x1930, "Limbu"}, 83 {0x10755, "Linear_A"}, 84 {0x1003c, "Linear_B"}, 85 {0xa4e1, "Lisu"}, 86 {0x10290, "Lycian"}, 87 {0x10930, "Lydian"}, 88 {0x11173, "Mahajani"}, 89 {0x0d42, "Malayalam"}, 90 {0x0843, "Mandaic"}, 91 {0x10ac8, "Manichaean"}, 92 {0xabd0, "Meetei_Mayek"}, 93 {0x1e800, "Mende_Kikakui"}, 94 {0x1099f, "Meroitic_Hieroglyphs"}, 95 {0x109a0, "Meroitic_Cursive"}, 96 {0x16f00, "Miao"}, 97 {0x11611, "Modi"}, 98 {0x1822, "Mongolian"}, 99 {0x16a60, "Mro"}, 100 {0x11293, "Multani"}, 101 {0x104c, "Myanmar"}, 102 {0x10880, "Nabataean"}, 103 {0x19c3, "New_Tai_Lue"}, 104 {0x07f8, "Nko"}, 105 {0x169b, "Ogham"}, 106 {0x1c6a, "Ol_Chiki"}, 107 {0x10C80, "Old_Hungarian"}, 108 {0x10310, "Old_Italic"}, 109 {0x10a80, "Old_North_Arabian"}, 110 {0x10350, "Old_Permic"}, 111 {0x103c9, "Old_Persian"}, 112 {0x10a6f, "Old_South_Arabian"}, 113 {0x10c20, "Old_Turkic"}, 114 {0x0b3e, "Oriya"}, 115 {0x10491, "Osmanya"}, 116 {0x16b2b, "Pahawh_Hmong"}, 117 {0x10876, "Palmyrene"}, 118 {0x11ACE, "Pau_Cin_Hau"}, 119 {0xa860, "Phags_Pa"}, 120 {0x10918, "Phoenician"}, 121 {0x10baf, "Psalter_Pahlavi"}, 122 {0xa949, "Rejang"}, 123 {0x16c0, "Runic"}, 124 {0x081d, "Samaritan"}, 125 {0xa892, "Saurashtra"}, 126 {0x111a0, "Sharada"}, 127 {0x10463, "Shavian"}, 128 {0x115c1, "Siddham"}, 129 {0x1D920, "SignWriting"}, 130 {0x0dbd, "Sinhala"}, 131 {0x110d0, "Sora_Sompeng"}, 132 {0x1ba3, "Sundanese"}, 133 {0xa803, "Syloti_Nagri"}, 134 {0x070f, "Syriac"}, 135 {0x170f, "Tagalog"}, 136 {0x176f, "Tagbanwa"}, 137 {0x1972, "Tai_Le"}, 138 {0x1a62, "Tai_Tham"}, 139 {0xaadc, "Tai_Viet"}, 140 {0x116c9, "Takri"}, 141 {0x0bbf, "Tamil"}, 142 {0x0c55, "Telugu"}, 143 {0x07a7, "Thaana"}, 144 {0x0e46, "Thai"}, 145 {0x0f36, "Tibetan"}, 146 {0x2d55, "Tifinagh"}, 147 {0x114d9, "Tirhuta"}, 148 {0x10388, "Ugaritic"}, 149 {0xa60e, "Vai"}, 150 {0x118ff, "Warang_Citi"}, 151 {0xa216, "Yi"}, 152 } 153 154 var outTest = []T{ // not really worth being thorough 155 {0x20, "Telugu"}, 156 } 157 158 var inCategoryTest = []T{ 159 {0x0081, "Cc"}, 160 {0x200B, "Cf"}, 161 {0xf0000, "Co"}, 162 {0xdb80, "Cs"}, 163 {0x0236, "Ll"}, 164 {0x1d9d, "Lm"}, 165 {0x07cf, "Lo"}, 166 {0x1f8a, "Lt"}, 167 {0x03ff, "Lu"}, 168 {0x0bc1, "Mc"}, 169 {0x20df, "Me"}, 170 {0x07f0, "Mn"}, 171 {0x1bb2, "Nd"}, 172 {0x10147, "Nl"}, 173 {0x2478, "No"}, 174 {0xfe33, "Pc"}, 175 {0x2011, "Pd"}, 176 {0x301e, "Pe"}, 177 {0x2e03, "Pf"}, 178 {0x2e02, "Pi"}, 179 {0x0022, "Po"}, 180 {0x2770, "Ps"}, 181 {0x00a4, "Sc"}, 182 {0xa711, "Sk"}, 183 {0x25f9, "Sm"}, 184 {0x2108, "So"}, 185 {0x2028, "Zl"}, 186 {0x2029, "Zp"}, 187 {0x202f, "Zs"}, 188 // Unifieds. 189 {0x04aa, "L"}, 190 {0x0009, "C"}, 191 {0x1712, "M"}, 192 {0x0031, "N"}, 193 {0x00bb, "P"}, 194 {0x00a2, "S"}, 195 {0x00a0, "Z"}, 196 } 197 198 var inPropTest = []T{ 199 {0x0046, "ASCII_Hex_Digit"}, 200 {0x200F, "Bidi_Control"}, 201 {0x2212, "Dash"}, 202 {0xE0001, "Deprecated"}, 203 {0x00B7, "Diacritic"}, 204 {0x30FE, "Extender"}, 205 {0xFF46, "Hex_Digit"}, 206 {0x2E17, "Hyphen"}, 207 {0x2FFB, "IDS_Binary_Operator"}, 208 {0x2FF3, "IDS_Trinary_Operator"}, 209 {0xFA6A, "Ideographic"}, 210 {0x200D, "Join_Control"}, 211 {0x0EC4, "Logical_Order_Exception"}, 212 {0x2FFFF, "Noncharacter_Code_Point"}, 213 {0x065E, "Other_Alphabetic"}, 214 {0x2065, "Other_Default_Ignorable_Code_Point"}, 215 {0x0BD7, "Other_Grapheme_Extend"}, 216 {0x0387, "Other_ID_Continue"}, 217 {0x212E, "Other_ID_Start"}, 218 {0x2094, "Other_Lowercase"}, 219 {0x2040, "Other_Math"}, 220 {0x216F, "Other_Uppercase"}, 221 {0x0027, "Pattern_Syntax"}, 222 {0x0020, "Pattern_White_Space"}, 223 {0x300D, "Quotation_Mark"}, 224 {0x2EF3, "Radical"}, 225 {0x061F, "STerm"}, 226 {0x2071, "Soft_Dotted"}, 227 {0x003A, "Terminal_Punctuation"}, 228 {0x9FC3, "Unified_Ideograph"}, 229 {0xFE0F, "Variation_Selector"}, 230 {0x0020, "White_Space"}, 231 } 232 233 func TestScripts(t *testing.T) { 234 notTested := make(map[string]bool) 235 for k := range Scripts { 236 notTested[k] = true 237 } 238 for _, test := range inTest { 239 if _, ok := Scripts[test.script]; !ok { 240 t.Fatal(test.script, "not a known script") 241 } 242 if !Is(Scripts[test.script], test.rune) { 243 t.Errorf("IsScript(%U, %s) = false, want true", test.rune, test.script) 244 } 245 delete(notTested, test.script) 246 } 247 for _, test := range outTest { 248 if Is(Scripts[test.script], test.rune) { 249 t.Errorf("IsScript(%U, %s) = true, want false", test.rune, test.script) 250 } 251 } 252 for k := range notTested { 253 t.Error("script not tested:", k) 254 } 255 } 256 257 func TestCategories(t *testing.T) { 258 notTested := make(map[string]bool) 259 for k := range Categories { 260 notTested[k] = true 261 } 262 for _, test := range inCategoryTest { 263 if _, ok := Categories[test.script]; !ok { 264 t.Fatal(test.script, "not a known category") 265 } 266 if !Is(Categories[test.script], test.rune) { 267 t.Errorf("IsCategory(%U, %s) = false, want true", test.rune, test.script) 268 } 269 delete(notTested, test.script) 270 } 271 for k := range notTested { 272 t.Error("category not tested:", k) 273 } 274 } 275 276 func TestProperties(t *testing.T) { 277 notTested := make(map[string]bool) 278 for k := range Properties { 279 notTested[k] = true 280 } 281 for _, test := range inPropTest { 282 if _, ok := Properties[test.script]; !ok { 283 t.Fatal(test.script, "not a known prop") 284 } 285 if !Is(Properties[test.script], test.rune) { 286 t.Errorf("IsCategory(%U, %s) = false, want true", test.rune, test.script) 287 } 288 delete(notTested, test.script) 289 } 290 for k := range notTested { 291 t.Error("property not tested:", k) 292 } 293 } 294