1 /******************************************************************** 2 * COPYRIGHT: 3 * Copyright (c) 2011, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ********************************************************************/ 6 // 7 // file: alphaindex.cpp 8 // Alphabetic Index Tests. 9 // 10 #include "intltest.h" 11 #include "alphaindextst.h" 12 13 #include "unicode/alphaindex.h" 14 #include "unicode/coll.h" 15 #include "unicode/tblcoll.h" 16 #include "unicode/uniset.h" 17 18 // #include <string> 19 // #include <iostream> 20 21 AlphabeticIndexTest::AlphabeticIndexTest() { 22 } 23 24 AlphabeticIndexTest::~AlphabeticIndexTest() { 25 } 26 27 void AlphabeticIndexTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ ) 28 { 29 if (exec) logln("TestSuite AlphabeticIndex: "); 30 switch (index) { 31 32 case 0: name = "APITest"; 33 if (exec) APITest(); 34 break; 35 36 case 1: name = "ManyLocales"; 37 if (exec) ManyLocalesTest(); 38 break; 39 40 case 2: name = "HackPinyinTest"; 41 if (exec) HackPinyinTest(); 42 break; 43 44 default: name = ""; 45 break; //needed to end loop 46 } 47 } 48 49 #define TEST_CHECK_STATUS {if (U_FAILURE(status)) {dataerrln("%s:%d: Test failure. status=%s", \ 50 __FILE__, __LINE__, u_errorName(status)); return;}} 51 52 #define TEST_ASSERT(expr) {if ((expr)==FALSE) {errln("%s:%d: Test failure \n", __FILE__, __LINE__);};} 53 54 // 55 // APITest. Invoke every function at least once, and check that it does something. 56 // Does not attempt to check complete functionality. 57 // 58 void AlphabeticIndexTest::APITest() { 59 60 // 61 // Simple constructor and destructor, getBucketCount() 62 // 63 UErrorCode status = U_ZERO_ERROR; 64 int32_t lc = 0; 65 int32_t i = 0; 66 AlphabeticIndex *index = new AlphabeticIndex(Locale::getEnglish(), status); 67 TEST_CHECK_STATUS; 68 lc = index->getBucketCount(status); 69 TEST_CHECK_STATUS; 70 TEST_ASSERT(28 == lc); // 26 letters plus two under/overflow labels. 71 //printf("getBucketCount() == %d\n", lc); 72 delete index; 73 74 // addLabels() 75 76 status = U_ZERO_ERROR; 77 index = new AlphabeticIndex(Locale::getEnglish(), status); 78 TEST_CHECK_STATUS; 79 UnicodeSet additions; 80 additions.add((UChar32)0x410).add((UChar32)0x415); // A couple of Cyrillic letters 81 index->addLabels(additions, status); 82 TEST_CHECK_STATUS; 83 lc = index->getBucketCount(status); 84 TEST_CHECK_STATUS; 85 // TODO: should get 31. Java also gives 30. Needs fixing 86 TEST_ASSERT(30 == lc); // 26 Latin letters plus 87 // TEST_ASSERT(31 == lc); // 26 Latin letters plus 88 // 2 Cyrillic letters plus 89 // 1 inflow label plus 90 // two under/overflow labels. 91 // std::cout << lc << std::endl; 92 delete index; 93 94 95 // addLabels(Locale) 96 97 status = U_ZERO_ERROR; 98 index = new AlphabeticIndex(Locale::getEnglish(), status); 99 TEST_CHECK_STATUS; 100 AlphabeticIndex &aip = index->addLabels(Locale::getJapanese(), status); 101 TEST_ASSERT(&aip == index); 102 TEST_CHECK_STATUS; 103 lc = index->getBucketCount(status); 104 TEST_CHECK_STATUS; 105 TEST_ASSERT(35 < lc); // Japanese should add a bunch. Don't rely on the exact value. 106 delete index; 107 108 // GetCollator(), Get under/in/over flow labels 109 110 status = U_ZERO_ERROR; 111 index = new AlphabeticIndex(Locale::getGerman(), status); 112 TEST_CHECK_STATUS; 113 Collator *germanCol = Collator::createInstance(Locale::getGerman(), status); 114 TEST_CHECK_STATUS; 115 const RuleBasedCollator &indexCol = index->getCollator(); 116 TEST_ASSERT(*germanCol == indexCol); 117 delete germanCol; 118 119 UnicodeString ELLIPSIS; ELLIPSIS.append((UChar32)0x2026); 120 UnicodeString s = index->getUnderflowLabel(); 121 TEST_ASSERT(ELLIPSIS == s); 122 s = index->getOverflowLabel(); 123 TEST_ASSERT(ELLIPSIS == s); 124 s = index->getInflowLabel(); 125 TEST_ASSERT(ELLIPSIS == s); 126 index->setOverflowLabel(UNICODE_STRING_SIMPLE("O"), status); 127 index->setUnderflowLabel(UNICODE_STRING_SIMPLE("U"), status).setInflowLabel(UNICODE_STRING_SIMPLE("I"), status); 128 s = index->getUnderflowLabel(); 129 TEST_ASSERT(UNICODE_STRING_SIMPLE("U") == s); 130 s = index->getOverflowLabel(); 131 TEST_ASSERT(UNICODE_STRING_SIMPLE("O") == s); 132 s = index->getInflowLabel(); 133 TEST_ASSERT(UNICODE_STRING_SIMPLE("I") == s); 134 135 136 137 138 delete index; 139 140 141 142 const UnicodeString adam = UNICODE_STRING_SIMPLE("Adam"); 143 const UnicodeString baker = UNICODE_STRING_SIMPLE("Baker"); 144 const UnicodeString charlie = UNICODE_STRING_SIMPLE("Charlie"); 145 const UnicodeString chad = UNICODE_STRING_SIMPLE("Chad"); 146 const UnicodeString zed = UNICODE_STRING_SIMPLE("Zed"); 147 const UnicodeString Cyrillic = UNICODE_STRING_SIMPLE("\\u0410\\u0443\\u0435").unescape(); 148 149 // addRecord(), verify that it comes back out. 150 // 151 status = U_ZERO_ERROR; 152 index = new AlphabeticIndex(Locale::getEnglish(), status); 153 TEST_CHECK_STATUS; 154 index->addRecord(UnicodeString("Adam"), this, status); 155 UBool b; 156 TEST_CHECK_STATUS; 157 index->resetBucketIterator(status); 158 TEST_CHECK_STATUS; 159 index->nextBucket(status); // Move to underflow label 160 index->nextBucket(status); // Move to "A" 161 TEST_CHECK_STATUS; 162 const UnicodeString &label2 = index->getBucketLabel(); 163 UnicodeString A_STR = UNICODE_STRING_SIMPLE("A"); 164 TEST_ASSERT(A_STR == label2); 165 166 b = index->nextRecord(status); 167 TEST_CHECK_STATUS; 168 TEST_ASSERT(b); 169 const UnicodeString &itemName = index->getRecordName(); 170 TEST_ASSERT(adam == itemName); 171 172 const void *itemContext = index->getRecordData(); 173 TEST_ASSERT(itemContext == this); 174 175 delete index; 176 177 // clearRecords, addRecord(), Iteration 178 179 status = U_ZERO_ERROR; 180 index = new AlphabeticIndex(Locale::getEnglish(), status); 181 TEST_CHECK_STATUS; 182 while (index->nextBucket(status)) { 183 TEST_CHECK_STATUS; 184 while (index->nextRecord(status)) { 185 TEST_CHECK_STATUS; 186 TEST_ASSERT(FALSE); // No items have been added. 187 } 188 TEST_CHECK_STATUS; 189 } 190 191 index->addRecord(adam, NULL, status); 192 index->addRecord(baker, NULL, status); 193 index->addRecord(charlie, NULL, status); 194 index->addRecord(chad, NULL, status); 195 TEST_CHECK_STATUS; 196 int itemCount = 0; 197 index->resetBucketIterator(status); 198 while (index->nextBucket(status)) { 199 TEST_CHECK_STATUS; 200 while (index->nextRecord(status)) { 201 TEST_CHECK_STATUS; 202 ++itemCount; 203 } 204 } 205 TEST_CHECK_STATUS; 206 TEST_ASSERT(itemCount == 4); 207 208 TEST_ASSERT(index->nextBucket(status) == FALSE); 209 index->resetBucketIterator(status); 210 TEST_CHECK_STATUS; 211 TEST_ASSERT(index->nextBucket(status) == TRUE); 212 213 index->clearRecords(status); 214 TEST_CHECK_STATUS; 215 index->resetBucketIterator(status); 216 while (index->nextBucket(status)) { 217 TEST_CHECK_STATUS; 218 while (index->nextRecord(status)) { 219 TEST_ASSERT(FALSE); // No items have been added. 220 } 221 } 222 TEST_CHECK_STATUS; 223 delete index; 224 225 // getBucketLabel(), getBucketType() 226 227 status = U_ZERO_ERROR; 228 index = new AlphabeticIndex(Locale::getEnglish(), status); 229 TEST_CHECK_STATUS; 230 index->setUnderflowLabel(adam, status).setOverflowLabel(charlie, status); 231 TEST_CHECK_STATUS; 232 for (i=0; index->nextBucket(status); i++) { 233 TEST_CHECK_STATUS; 234 UnicodeString label = index->getBucketLabel(); 235 UAlphabeticIndexLabelType type = index->getBucketLabelType(); 236 if (i == 0) { 237 TEST_ASSERT(type == U_ALPHAINDEX_UNDERFLOW); 238 TEST_ASSERT(label == adam); 239 } else if (i <= 26) { 240 // Labels A - Z for English locale 241 TEST_ASSERT(type == U_ALPHAINDEX_NORMAL); 242 UnicodeString expectedLabel((UChar)(0x40 + i)); 243 TEST_ASSERT(expectedLabel == label); 244 } else if (i == 27) { 245 TEST_ASSERT(type == U_ALPHAINDEX_OVERFLOW); 246 TEST_ASSERT(label == charlie); 247 } else { 248 TEST_ASSERT(FALSE); 249 } 250 } 251 TEST_ASSERT(i==28); 252 delete index; 253 254 // getBucketIndex() 255 256 status = U_ZERO_ERROR; 257 index = new AlphabeticIndex(Locale::getEnglish(), status); 258 TEST_CHECK_STATUS; 259 int32_t n = index->getBucketIndex(adam, status); 260 TEST_CHECK_STATUS; 261 TEST_ASSERT(n == 1); /* Label #0 is underflow, 1 is A, etc. */ 262 n = index->getBucketIndex(baker, status); 263 TEST_ASSERT(n == 2); 264 n = index->getBucketIndex(Cyrillic, status); 265 TEST_ASSERT(n == 27); // Overflow label 266 n = index->getBucketIndex(zed, status); 267 TEST_ASSERT(n == 26); 268 269 for (i=0; index->nextBucket(status); i++) { 270 n = index->getBucketIndex(); 271 TEST_ASSERT(n == i); 272 UnicodeString label = index->getBucketLabel(); 273 TEST_ASSERT(n == i); 274 } 275 TEST_ASSERT(i == 28); 276 277 delete index; 278 index = new AlphabeticIndex(Locale::createFromName("ru"), status); 279 //Locale loc = Locale::createFromName(localeName); 280 TEST_CHECK_STATUS; 281 n = index->getBucketIndex(adam, status); 282 TEST_CHECK_STATUS; 283 TEST_ASSERT(n == 0); // Label #0 is underflow 284 n = index->getBucketIndex(baker, status); 285 TEST_ASSERT(n == 0); 286 n = index->getBucketIndex(Cyrillic, status); 287 TEST_ASSERT(n == 1); // Overflow label 288 n = index->getBucketIndex(zed, status); 289 TEST_ASSERT(n == 0); 290 291 delete index; 292 293 } 294 295 296 static const char * KEY_LOCALES[] = { 297 "en", "es", "de", "fr", "ja", "it", "tr", "pt", "zh", "nl", 298 "pl", "ar", "ru", "zh_Hant", "ko", "th", "sv", "fi", "da", 299 "he", "nb", "el", "hr", "bg", "sk", "lt", "vi", "lv", "sr", 300 "pt_PT", "ro", "hu", "cs", "id", "sl", "fil", "fa", "uk", 301 "ca", "hi", "et", "eu", "is", "sw", "ms", "bn", "am", "ta", 302 "te", "mr", "ur", "ml", "kn", "gu", "or", ""}; 303 304 305 void AlphabeticIndexTest::ManyLocalesTest() { 306 UErrorCode status = U_ZERO_ERROR; 307 int32_t lc = 0; 308 AlphabeticIndex *index = NULL; 309 310 for (int i=0; ; ++i) { 311 status = U_ZERO_ERROR; 312 const char *localeName = KEY_LOCALES[i]; 313 if (localeName[0] == 0) { 314 break; 315 } 316 // std::cout << localeName << " "; 317 Locale loc = Locale::createFromName(localeName); 318 index = new AlphabeticIndex(loc, status); 319 TEST_CHECK_STATUS; 320 lc = index->getBucketCount(status); 321 TEST_CHECK_STATUS; 322 // std::cout << "getBucketCount() == " << lc << std::endl; 323 324 while (index->nextBucket(status)) { 325 TEST_CHECK_STATUS; 326 const UnicodeString &label = index->getBucketLabel(); 327 TEST_ASSERT(label.length()>0); 328 // std::string ss; 329 // std::cout << ":" << label.toUTF8String(ss); 330 } 331 // std::cout << ":" << std::endl; 332 333 334 delete index; 335 } 336 } 337 338 339 // Test data for Pinyin based indexes. 340 // The Chinese characters should be distributed under latin labels in 341 // an index. 342 343 static const char *pinyinTestData[] = { 344 "\\u0101", "\\u5416", "\\u58ba", // 345 "b", "\\u516b", "\\u62d4", "\\u8500", // 346 "c", "\\u5693", "\\u7938", "\\u9e7e", // 347 "d", "\\u5491", "\\u8fcf", "\\u964a", // 348 "\\u0113","\\u59b8", "\\u92e8", "\\u834b", // 349 "f", "\\u53d1", "\\u9197", "\\u99a5", // 350 "g", "\\u7324", "\\u91d3", "\\u8142", // 351 "h", "\\u598e", "\\u927f", "\\u593b", // 352 "j", "\\u4e0c", "\\u6785", "\\u9d58", // 353 "k", "\\u5494", "\\u958b", "\\u7a52", // 354 "l", "\\u5783", "\\u62c9", "\\u9ba5", // 355 "m", "\\u5638", "\\u9ebb", "\\u65c0", // 356 "n", "\\u62ff", "\\u80ad", "\\u685b", // 357 "\\u014D", "\\u5662", "\\u6bee", "\\u8bb4", // 358 "p", "\\u5991", "\\u8019", "\\u8c31", // 359 "q", "\\u4e03", "\\u6053", "\\u7f56", // 360 "r", "\\u5465", "\\u72aa", "\\u6e03", // 361 "s", "\\u4ee8", "\\u9491", "\\u93c1", // 362 "t", "\\u4ed6", "\\u9248", "\\u67dd", // 363 "w", "\\u5c72", "\\u5558", "\\u5a7a", // 364 "x", "\\u5915", "\\u5438", "\\u6bbe", // 365 "y", "\\u4e2b", "\\u82bd", "\\u8574", // 366 "z", "\\u5e00", "\\u707d", "\\u5c0a", 367 NULL 368 }; 369 370 void AlphabeticIndexTest::HackPinyinTest() { 371 UErrorCode status = U_ZERO_ERROR; 372 AlphabeticIndex aindex(Locale::createFromName("zh"), status); 373 TEST_CHECK_STATUS; 374 375 UnicodeString names[sizeof(pinyinTestData) / sizeof(pinyinTestData[0])]; 376 int32_t nameCount; 377 for (nameCount=0; pinyinTestData[nameCount] != NULL; nameCount++) { 378 names[nameCount] = UnicodeString(pinyinTestData[nameCount], -1, UnicodeString::kInvariant).unescape(); 379 aindex.addRecord(names[nameCount], &names[nameCount], status); 380 TEST_CHECK_STATUS; 381 if (U_FAILURE(status)) { 382 return; 383 } 384 } 385 TEST_ASSERT(nameCount == aindex.getRecordCount(status)); 386 387 // Weak checking: make sure that none of the Chinese names landed in the overflow bucket 388 // of the index, and that the names are distributed among several buckets. 389 // (Exact expected data would be subject to change with evolution of the collation rules.) 390 391 int32_t bucketCount = 0; 392 int32_t filledBucketCount = 0; 393 while (aindex.nextBucket(status)) { 394 bucketCount++; 395 UnicodeString label = aindex.getBucketLabel(); 396 // std::string s; 397 // std::cout << label.toUTF8String(s) << ": "; 398 399 UBool bucketHasContents = FALSE; 400 while (aindex.nextRecord(status)) { 401 bucketHasContents = TRUE; 402 UnicodeString name = aindex.getRecordName(); 403 if (aindex.getBucketLabelType() != U_ALPHAINDEX_NORMAL) { 404 errln("File %s, Line %d, Name \"\\u%x\" is in an under or overflow bucket.", 405 __FILE__, __LINE__, name.char32At(0)); 406 } 407 // s.clear(); 408 // std::cout << aindex.getRecordName().toUTF8String(s) << " "; 409 } 410 if (bucketHasContents) { 411 filledBucketCount++; 412 } 413 // std::cout << std::endl; 414 } 415 TEST_ASSERT(bucketCount > 25); 416 TEST_ASSERT(filledBucketCount > 15); 417 } 418