Home | History | Annotate | Download | only in unicode
      1 // Copyright 2009 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 package unicode_test
      6 
      7 import (
      8 	"testing"
      9 	. "unicode"
     10 )
     11 
     12 type T struct {
     13 	rune   rune
     14 	script string
     15 }
     16 
     17 // Hand-chosen tests from Unicode 5.1.0, 6.0.0, 6.2.0, 6.3.0, 7.0.0 and 8.0.0
     18 // mostly to discover when new scripts and categories arise.
     19 var inTest = []T{
     20 	{0x11711, "Ahom"},
     21 	{0x1e900, "Adlam"},
     22 	{0x14646, "Anatolian_Hieroglyphs"},
     23 	{0x06e2, "Arabic"},
     24 	{0x0567, "Armenian"},
     25 	{0x10b20, "Avestan"},
     26 	{0x11c00, "Bhaiksuki"},
     27 	{0x1b37, "Balinese"},
     28 	{0xa6af, "Bamum"},
     29 	{0x16ada, "Bassa_Vah"},
     30 	{0x1be1, "Batak"},
     31 	{0x09c2, "Bengali"},
     32 	{0x3115, "Bopomofo"},
     33 	{0x282d, "Braille"},
     34 	{0x1a1a, "Buginese"},
     35 	{0x1747, "Buhid"},
     36 	{0x11011, "Brahmi"},
     37 	{0x156d, "Canadian_Aboriginal"},
     38 	{0x102a9, "Carian"},
     39 	{0x10563, "Caucasian_Albanian"},
     40 	{0x11111, "Chakma"},
     41 	{0xaa4d, "Cham"},
     42 	{0x13c2, "Cherokee"},
     43 	{0x0020, "Common"},
     44 	{0x1d4a5, "Common"},
     45 	{0x2cfc, "Coptic"},
     46 	{0x12420, "Cuneiform"},
     47 	{0x1080c, "Cypriot"},
     48 	{0xa663, "Cyrillic"},
     49 	{0x10430, "Deseret"},
     50 	{0x094a, "Devanagari"},
     51 	{0x1BC00, "Duployan"},
     52 	{0x13001, "Egyptian_Hieroglyphs"},
     53 	{0x10500, "Elbasan"},
     54 	{0x1271, "Ethiopic"},
     55 	{0x10fc, "Georgian"},
     56 	{0x2c40, "Glagolitic"},
     57 	{0x10347, "Gothic"},
     58 	{0x11303, "Grantha"},
     59 	{0x03ae, "Greek"},
     60 	{0x0abf, "Gujarati"},
     61 	{0x0a24, "Gurmukhi"},
     62 	{0x3028, "Han"},
     63 	{0x11b8, "Hangul"},
     64 	{0x1727, "Hanunoo"},
     65 	{0x108FF, "Hatran"},
     66 	{0x05a0, "Hebrew"},
     67 	{0x3058, "Hiragana"},
     68 	{0x10841, "Imperial_Aramaic"},
     69 	{0x20e6, "Inherited"},
     70 	{0x10b70, "Inscriptional_Pahlavi"},
     71 	{0x10b5a, "Inscriptional_Parthian"},
     72 	{0xa9d0, "Javanese"},
     73 	{0x1109f, "Kaithi"},
     74 	{0x0cbd, "Kannada"},
     75 	{0x30a6, "Katakana"},
     76 	{0xa928, "Kayah_Li"},
     77 	{0x10a11, "Kharoshthi"},
     78 	{0x17c6, "Khmer"},
     79 	{0x11211, "Khojki"},
     80 	{0x112df, "Khudawadi"},
     81 	{0x0eaa, "Lao"},
     82 	{0x1d79, "Latin"},
     83 	{0x1c10, "Lepcha"},
     84 	{0x1930, "Limbu"},
     85 	{0x10755, "Linear_A"},
     86 	{0x1003c, "Linear_B"},
     87 	{0xa4e1, "Lisu"},
     88 	{0x10290, "Lycian"},
     89 	{0x10930, "Lydian"},
     90 	{0x11173, "Mahajani"},
     91 	{0x0d42, "Malayalam"},
     92 	{0x0843, "Mandaic"},
     93 	{0x10ac8, "Manichaean"},
     94 	{0x11cB6, "Marchen"},
     95 	{0xabd0, "Meetei_Mayek"},
     96 	{0x1e800, "Mende_Kikakui"},
     97 	{0x1099f, "Meroitic_Hieroglyphs"},
     98 	{0x109a0, "Meroitic_Cursive"},
     99 	{0x16f00, "Miao"},
    100 	{0x11611, "Modi"},
    101 	{0x1822, "Mongolian"},
    102 	{0x16a60, "Mro"},
    103 	{0x11293, "Multani"},
    104 	{0x104c, "Myanmar"},
    105 	{0x10880, "Nabataean"},
    106 	{0x11400, "Newa"},
    107 	{0x19c3, "New_Tai_Lue"},
    108 	{0x07f8, "Nko"},
    109 	{0x169b, "Ogham"},
    110 	{0x1c6a, "Ol_Chiki"},
    111 	{0x10C80, "Old_Hungarian"},
    112 	{0x10310, "Old_Italic"},
    113 	{0x10a80, "Old_North_Arabian"},
    114 	{0x10350, "Old_Permic"},
    115 	{0x103c9, "Old_Persian"},
    116 	{0x10a6f, "Old_South_Arabian"},
    117 	{0x10c20, "Old_Turkic"},
    118 	{0x0b3e, "Oriya"},
    119 	{0x104d9, "Osage"},
    120 	{0x10491, "Osmanya"},
    121 	{0x16b2b, "Pahawh_Hmong"},
    122 	{0x10876, "Palmyrene"},
    123 	{0x11ACE, "Pau_Cin_Hau"},
    124 	{0xa860, "Phags_Pa"},
    125 	{0x10918, "Phoenician"},
    126 	{0x10baf, "Psalter_Pahlavi"},
    127 	{0xa949, "Rejang"},
    128 	{0x16c0, "Runic"},
    129 	{0x081d, "Samaritan"},
    130 	{0xa892, "Saurashtra"},
    131 	{0x111a0, "Sharada"},
    132 	{0x10463, "Shavian"},
    133 	{0x115c1, "Siddham"},
    134 	{0x1D920, "SignWriting"},
    135 	{0x0dbd, "Sinhala"},
    136 	{0x110d0, "Sora_Sompeng"},
    137 	{0x1ba3, "Sundanese"},
    138 	{0xa803, "Syloti_Nagri"},
    139 	{0x070f, "Syriac"},
    140 	{0x170f, "Tagalog"},
    141 	{0x176f, "Tagbanwa"},
    142 	{0x1972, "Tai_Le"},
    143 	{0x1a62, "Tai_Tham"},
    144 	{0xaadc, "Tai_Viet"},
    145 	{0x116c9, "Takri"},
    146 	{0x0bbf, "Tamil"},
    147 	{0x17000, "Tangut"},
    148 	{0x0c55, "Telugu"},
    149 	{0x07a7, "Thaana"},
    150 	{0x0e46, "Thai"},
    151 	{0x0f36, "Tibetan"},
    152 	{0x2d55, "Tifinagh"},
    153 	{0x114d9, "Tirhuta"},
    154 	{0x10388, "Ugaritic"},
    155 	{0xa60e, "Vai"},
    156 	{0x118ff, "Warang_Citi"},
    157 	{0xa216, "Yi"},
    158 }
    159 
    160 var outTest = []T{ // not really worth being thorough
    161 	{0x20, "Telugu"},
    162 }
    163 
    164 var inCategoryTest = []T{
    165 	{0x0081, "Cc"},
    166 	{0x200B, "Cf"},
    167 	{0xf0000, "Co"},
    168 	{0xdb80, "Cs"},
    169 	{0x0236, "Ll"},
    170 	{0x1d9d, "Lm"},
    171 	{0x07cf, "Lo"},
    172 	{0x1f8a, "Lt"},
    173 	{0x03ff, "Lu"},
    174 	{0x0bc1, "Mc"},
    175 	{0x20df, "Me"},
    176 	{0x07f0, "Mn"},
    177 	{0x1bb2, "Nd"},
    178 	{0x10147, "Nl"},
    179 	{0x2478, "No"},
    180 	{0xfe33, "Pc"},
    181 	{0x2011, "Pd"},
    182 	{0x301e, "Pe"},
    183 	{0x2e03, "Pf"},
    184 	{0x2e02, "Pi"},
    185 	{0x0022, "Po"},
    186 	{0x2770, "Ps"},
    187 	{0x00a4, "Sc"},
    188 	{0xa711, "Sk"},
    189 	{0x25f9, "Sm"},
    190 	{0x2108, "So"},
    191 	{0x2028, "Zl"},
    192 	{0x2029, "Zp"},
    193 	{0x202f, "Zs"},
    194 	// Unifieds.
    195 	{0x04aa, "L"},
    196 	{0x0009, "C"},
    197 	{0x1712, "M"},
    198 	{0x0031, "N"},
    199 	{0x00bb, "P"},
    200 	{0x00a2, "S"},
    201 	{0x00a0, "Z"},
    202 }
    203 
    204 var inPropTest = []T{
    205 	{0x0046, "ASCII_Hex_Digit"},
    206 	{0x200F, "Bidi_Control"},
    207 	{0x2212, "Dash"},
    208 	{0xE0001, "Deprecated"},
    209 	{0x00B7, "Diacritic"},
    210 	{0x30FE, "Extender"},
    211 	{0xFF46, "Hex_Digit"},
    212 	{0x2E17, "Hyphen"},
    213 	{0x2FFB, "IDS_Binary_Operator"},
    214 	{0x2FF3, "IDS_Trinary_Operator"},
    215 	{0xFA6A, "Ideographic"},
    216 	{0x200D, "Join_Control"},
    217 	{0x0EC4, "Logical_Order_Exception"},
    218 	{0x2FFFF, "Noncharacter_Code_Point"},
    219 	{0x065E, "Other_Alphabetic"},
    220 	{0x2065, "Other_Default_Ignorable_Code_Point"},
    221 	{0x0BD7, "Other_Grapheme_Extend"},
    222 	{0x0387, "Other_ID_Continue"},
    223 	{0x212E, "Other_ID_Start"},
    224 	{0x2094, "Other_Lowercase"},
    225 	{0x2040, "Other_Math"},
    226 	{0x216F, "Other_Uppercase"},
    227 	{0x0027, "Pattern_Syntax"},
    228 	{0x0020, "Pattern_White_Space"},
    229 	{0x06DD, "Prepended_Concatenation_Mark"},
    230 	{0x300D, "Quotation_Mark"},
    231 	{0x2EF3, "Radical"},
    232 	{0x061F, "STerm"}, // Deprecated alias of Sentence_Terminal
    233 	{0x061F, "Sentence_Terminal"},
    234 	{0x2071, "Soft_Dotted"},
    235 	{0x003A, "Terminal_Punctuation"},
    236 	{0x9FC3, "Unified_Ideograph"},
    237 	{0xFE0F, "Variation_Selector"},
    238 	{0x0020, "White_Space"},
    239 }
    240 
    241 func TestScripts(t *testing.T) {
    242 	notTested := make(map[string]bool)
    243 	for k := range Scripts {
    244 		notTested[k] = true
    245 	}
    246 	for _, test := range inTest {
    247 		if _, ok := Scripts[test.script]; !ok {
    248 			t.Fatal(test.script, "not a known script")
    249 		}
    250 		if !Is(Scripts[test.script], test.rune) {
    251 			t.Errorf("IsScript(%U, %s) = false, want true", test.rune, test.script)
    252 		}
    253 		delete(notTested, test.script)
    254 	}
    255 	for _, test := range outTest {
    256 		if Is(Scripts[test.script], test.rune) {
    257 			t.Errorf("IsScript(%U, %s) = true, want false", test.rune, test.script)
    258 		}
    259 	}
    260 	for k := range notTested {
    261 		t.Error("script not tested:", k)
    262 	}
    263 }
    264 
    265 func TestCategories(t *testing.T) {
    266 	notTested := make(map[string]bool)
    267 	for k := range Categories {
    268 		notTested[k] = true
    269 	}
    270 	for _, test := range inCategoryTest {
    271 		if _, ok := Categories[test.script]; !ok {
    272 			t.Fatal(test.script, "not a known category")
    273 		}
    274 		if !Is(Categories[test.script], test.rune) {
    275 			t.Errorf("IsCategory(%U, %s) = false, want true", test.rune, test.script)
    276 		}
    277 		delete(notTested, test.script)
    278 	}
    279 	for k := range notTested {
    280 		t.Error("category not tested:", k)
    281 	}
    282 }
    283 
    284 func TestProperties(t *testing.T) {
    285 	notTested := make(map[string]bool)
    286 	for k := range Properties {
    287 		notTested[k] = true
    288 	}
    289 	for _, test := range inPropTest {
    290 		if _, ok := Properties[test.script]; !ok {
    291 			t.Fatal(test.script, "not a known prop")
    292 		}
    293 		if !Is(Properties[test.script], test.rune) {
    294 			t.Errorf("IsCategory(%U, %s) = false, want true", test.rune, test.script)
    295 		}
    296 		delete(notTested, test.script)
    297 	}
    298 	for k := range notTested {
    299 		t.Error("property not tested:", k)
    300 	}
    301 }
    302