1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package unicode_test 6 7 import ( 8 "testing" 9 . "unicode" 10 ) 11 12 type T struct { 13 rune rune 14 script string 15 } 16 17 // Hand-chosen tests from Unicode 5.1.0, 6.0.0, 6.2.0, 6.3.0, 7.0.0 and 8.0.0 18 // mostly to discover when new scripts and categories arise. 19 var inTest = []T{ 20 {0x11711, "Ahom"}, 21 {0x1e900, "Adlam"}, 22 {0x14646, "Anatolian_Hieroglyphs"}, 23 {0x06e2, "Arabic"}, 24 {0x0567, "Armenian"}, 25 {0x10b20, "Avestan"}, 26 {0x11c00, "Bhaiksuki"}, 27 {0x1b37, "Balinese"}, 28 {0xa6af, "Bamum"}, 29 {0x16ada, "Bassa_Vah"}, 30 {0x1be1, "Batak"}, 31 {0x09c2, "Bengali"}, 32 {0x3115, "Bopomofo"}, 33 {0x282d, "Braille"}, 34 {0x1a1a, "Buginese"}, 35 {0x1747, "Buhid"}, 36 {0x11011, "Brahmi"}, 37 {0x156d, "Canadian_Aboriginal"}, 38 {0x102a9, "Carian"}, 39 {0x10563, "Caucasian_Albanian"}, 40 {0x11111, "Chakma"}, 41 {0xaa4d, "Cham"}, 42 {0x13c2, "Cherokee"}, 43 {0x0020, "Common"}, 44 {0x1d4a5, "Common"}, 45 {0x2cfc, "Coptic"}, 46 {0x12420, "Cuneiform"}, 47 {0x1080c, "Cypriot"}, 48 {0xa663, "Cyrillic"}, 49 {0x10430, "Deseret"}, 50 {0x094a, "Devanagari"}, 51 {0x1BC00, "Duployan"}, 52 {0x13001, "Egyptian_Hieroglyphs"}, 53 {0x10500, "Elbasan"}, 54 {0x1271, "Ethiopic"}, 55 {0x10fc, "Georgian"}, 56 {0x2c40, "Glagolitic"}, 57 {0x10347, "Gothic"}, 58 {0x11303, "Grantha"}, 59 {0x03ae, "Greek"}, 60 {0x0abf, "Gujarati"}, 61 {0x0a24, "Gurmukhi"}, 62 {0x3028, "Han"}, 63 {0x11b8, "Hangul"}, 64 {0x1727, "Hanunoo"}, 65 {0x108FF, "Hatran"}, 66 {0x05a0, "Hebrew"}, 67 {0x3058, "Hiragana"}, 68 {0x10841, "Imperial_Aramaic"}, 69 {0x20e6, "Inherited"}, 70 {0x10b70, "Inscriptional_Pahlavi"}, 71 {0x10b5a, "Inscriptional_Parthian"}, 72 {0xa9d0, "Javanese"}, 73 {0x1109f, "Kaithi"}, 74 {0x0cbd, "Kannada"}, 75 {0x30a6, "Katakana"}, 76 {0xa928, "Kayah_Li"}, 77 {0x10a11, "Kharoshthi"}, 78 {0x17c6, "Khmer"}, 79 {0x11211, "Khojki"}, 80 {0x112df, "Khudawadi"}, 81 {0x0eaa, "Lao"}, 82 {0x1d79, "Latin"}, 83 {0x1c10, "Lepcha"}, 84 {0x1930, "Limbu"}, 85 {0x10755, "Linear_A"}, 86 {0x1003c, "Linear_B"}, 87 {0xa4e1, "Lisu"}, 88 {0x10290, "Lycian"}, 89 {0x10930, "Lydian"}, 90 {0x11173, "Mahajani"}, 91 {0x0d42, "Malayalam"}, 92 {0x0843, "Mandaic"}, 93 {0x10ac8, "Manichaean"}, 94 {0x11cB6, "Marchen"}, 95 {0xabd0, "Meetei_Mayek"}, 96 {0x1e800, "Mende_Kikakui"}, 97 {0x1099f, "Meroitic_Hieroglyphs"}, 98 {0x109a0, "Meroitic_Cursive"}, 99 {0x16f00, "Miao"}, 100 {0x11611, "Modi"}, 101 {0x1822, "Mongolian"}, 102 {0x16a60, "Mro"}, 103 {0x11293, "Multani"}, 104 {0x104c, "Myanmar"}, 105 {0x10880, "Nabataean"}, 106 {0x11400, "Newa"}, 107 {0x19c3, "New_Tai_Lue"}, 108 {0x07f8, "Nko"}, 109 {0x169b, "Ogham"}, 110 {0x1c6a, "Ol_Chiki"}, 111 {0x10C80, "Old_Hungarian"}, 112 {0x10310, "Old_Italic"}, 113 {0x10a80, "Old_North_Arabian"}, 114 {0x10350, "Old_Permic"}, 115 {0x103c9, "Old_Persian"}, 116 {0x10a6f, "Old_South_Arabian"}, 117 {0x10c20, "Old_Turkic"}, 118 {0x0b3e, "Oriya"}, 119 {0x104d9, "Osage"}, 120 {0x10491, "Osmanya"}, 121 {0x16b2b, "Pahawh_Hmong"}, 122 {0x10876, "Palmyrene"}, 123 {0x11ACE, "Pau_Cin_Hau"}, 124 {0xa860, "Phags_Pa"}, 125 {0x10918, "Phoenician"}, 126 {0x10baf, "Psalter_Pahlavi"}, 127 {0xa949, "Rejang"}, 128 {0x16c0, "Runic"}, 129 {0x081d, "Samaritan"}, 130 {0xa892, "Saurashtra"}, 131 {0x111a0, "Sharada"}, 132 {0x10463, "Shavian"}, 133 {0x115c1, "Siddham"}, 134 {0x1D920, "SignWriting"}, 135 {0x0dbd, "Sinhala"}, 136 {0x110d0, "Sora_Sompeng"}, 137 {0x1ba3, "Sundanese"}, 138 {0xa803, "Syloti_Nagri"}, 139 {0x070f, "Syriac"}, 140 {0x170f, "Tagalog"}, 141 {0x176f, "Tagbanwa"}, 142 {0x1972, "Tai_Le"}, 143 {0x1a62, "Tai_Tham"}, 144 {0xaadc, "Tai_Viet"}, 145 {0x116c9, "Takri"}, 146 {0x0bbf, "Tamil"}, 147 {0x17000, "Tangut"}, 148 {0x0c55, "Telugu"}, 149 {0x07a7, "Thaana"}, 150 {0x0e46, "Thai"}, 151 {0x0f36, "Tibetan"}, 152 {0x2d55, "Tifinagh"}, 153 {0x114d9, "Tirhuta"}, 154 {0x10388, "Ugaritic"}, 155 {0xa60e, "Vai"}, 156 {0x118ff, "Warang_Citi"}, 157 {0xa216, "Yi"}, 158 } 159 160 var outTest = []T{ // not really worth being thorough 161 {0x20, "Telugu"}, 162 } 163 164 var inCategoryTest = []T{ 165 {0x0081, "Cc"}, 166 {0x200B, "Cf"}, 167 {0xf0000, "Co"}, 168 {0xdb80, "Cs"}, 169 {0x0236, "Ll"}, 170 {0x1d9d, "Lm"}, 171 {0x07cf, "Lo"}, 172 {0x1f8a, "Lt"}, 173 {0x03ff, "Lu"}, 174 {0x0bc1, "Mc"}, 175 {0x20df, "Me"}, 176 {0x07f0, "Mn"}, 177 {0x1bb2, "Nd"}, 178 {0x10147, "Nl"}, 179 {0x2478, "No"}, 180 {0xfe33, "Pc"}, 181 {0x2011, "Pd"}, 182 {0x301e, "Pe"}, 183 {0x2e03, "Pf"}, 184 {0x2e02, "Pi"}, 185 {0x0022, "Po"}, 186 {0x2770, "Ps"}, 187 {0x00a4, "Sc"}, 188 {0xa711, "Sk"}, 189 {0x25f9, "Sm"}, 190 {0x2108, "So"}, 191 {0x2028, "Zl"}, 192 {0x2029, "Zp"}, 193 {0x202f, "Zs"}, 194 // Unifieds. 195 {0x04aa, "L"}, 196 {0x0009, "C"}, 197 {0x1712, "M"}, 198 {0x0031, "N"}, 199 {0x00bb, "P"}, 200 {0x00a2, "S"}, 201 {0x00a0, "Z"}, 202 } 203 204 var inPropTest = []T{ 205 {0x0046, "ASCII_Hex_Digit"}, 206 {0x200F, "Bidi_Control"}, 207 {0x2212, "Dash"}, 208 {0xE0001, "Deprecated"}, 209 {0x00B7, "Diacritic"}, 210 {0x30FE, "Extender"}, 211 {0xFF46, "Hex_Digit"}, 212 {0x2E17, "Hyphen"}, 213 {0x2FFB, "IDS_Binary_Operator"}, 214 {0x2FF3, "IDS_Trinary_Operator"}, 215 {0xFA6A, "Ideographic"}, 216 {0x200D, "Join_Control"}, 217 {0x0EC4, "Logical_Order_Exception"}, 218 {0x2FFFF, "Noncharacter_Code_Point"}, 219 {0x065E, "Other_Alphabetic"}, 220 {0x2065, "Other_Default_Ignorable_Code_Point"}, 221 {0x0BD7, "Other_Grapheme_Extend"}, 222 {0x0387, "Other_ID_Continue"}, 223 {0x212E, "Other_ID_Start"}, 224 {0x2094, "Other_Lowercase"}, 225 {0x2040, "Other_Math"}, 226 {0x216F, "Other_Uppercase"}, 227 {0x0027, "Pattern_Syntax"}, 228 {0x0020, "Pattern_White_Space"}, 229 {0x06DD, "Prepended_Concatenation_Mark"}, 230 {0x300D, "Quotation_Mark"}, 231 {0x2EF3, "Radical"}, 232 {0x061F, "STerm"}, // Deprecated alias of Sentence_Terminal 233 {0x061F, "Sentence_Terminal"}, 234 {0x2071, "Soft_Dotted"}, 235 {0x003A, "Terminal_Punctuation"}, 236 {0x9FC3, "Unified_Ideograph"}, 237 {0xFE0F, "Variation_Selector"}, 238 {0x0020, "White_Space"}, 239 } 240 241 func TestScripts(t *testing.T) { 242 notTested := make(map[string]bool) 243 for k := range Scripts { 244 notTested[k] = true 245 } 246 for _, test := range inTest { 247 if _, ok := Scripts[test.script]; !ok { 248 t.Fatal(test.script, "not a known script") 249 } 250 if !Is(Scripts[test.script], test.rune) { 251 t.Errorf("IsScript(%U, %s) = false, want true", test.rune, test.script) 252 } 253 delete(notTested, test.script) 254 } 255 for _, test := range outTest { 256 if Is(Scripts[test.script], test.rune) { 257 t.Errorf("IsScript(%U, %s) = true, want false", test.rune, test.script) 258 } 259 } 260 for k := range notTested { 261 t.Error("script not tested:", k) 262 } 263 } 264 265 func TestCategories(t *testing.T) { 266 notTested := make(map[string]bool) 267 for k := range Categories { 268 notTested[k] = true 269 } 270 for _, test := range inCategoryTest { 271 if _, ok := Categories[test.script]; !ok { 272 t.Fatal(test.script, "not a known category") 273 } 274 if !Is(Categories[test.script], test.rune) { 275 t.Errorf("IsCategory(%U, %s) = false, want true", test.rune, test.script) 276 } 277 delete(notTested, test.script) 278 } 279 for k := range notTested { 280 t.Error("category not tested:", k) 281 } 282 } 283 284 func TestProperties(t *testing.T) { 285 notTested := make(map[string]bool) 286 for k := range Properties { 287 notTested[k] = true 288 } 289 for _, test := range inPropTest { 290 if _, ok := Properties[test.script]; !ok { 291 t.Fatal(test.script, "not a known prop") 292 } 293 if !Is(Properties[test.script], test.rune) { 294 t.Errorf("IsCategory(%U, %s) = false, want true", test.rune, test.script) 295 } 296 delete(notTested, test.script) 297 } 298 for k := range notTested { 299 t.Error("property not tested:", k) 300 } 301 } 302