1 // Copyright (C) 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /******************************************************************** 4 * COPYRIGHT: 5 * Copyright (c) 1997-2014, International Business Machines Corporation and 6 * others. All Rights Reserved. 7 ********************************************************************/ 8 9 #include "unicode/utypes.h" 10 11 #if !UCONFIG_NO_COLLATION 12 13 #include "unicode/coll.h" 14 #include "unicode/tblcoll.h" 15 #include "unicode/unistr.h" 16 #include "unicode/sortkey.h" 17 #include "g7coll.h" 18 #include "sfwdchit.h" 19 #include "cmemory.h" 20 21 static const UChar testCases[][G7CollationTest::MAX_TOKEN_LEN] = { 22 { 0x0062 /*'b'*/, 0x006c /*'l'*/, 0x0061 /*'a'*/, 0x0062 /*'c'*/, 0x006b /*'k'*/, 23 0x0062 /*'b'*/, 0x0069 /*'i'*/, 0x0072 /*'r'*/, 0x0064 /*'d'*/, 0x0073 /*'s'*/, 0x0000}, /* 9 */ 24 { 0x0050 /*'P'*/, 0x0061 /*'a'*/, 0x0074/*'t'*/, 0x0000}, /* 1 */ 25 { 0x0070 /*'p'*/, 0x00E9, 0x0063 /*'c'*/, 0x0068 /*'h'*/, 0x00E9, 0x0000}, /* 2 */ 26 { 0x0070 /*'p'*/, 0x00EA, 0x0063 /*'c'*/, 0x0068 /*'h'*/, 0x0065 /*'e'*/, 0x0000}, /* 3 */ 27 { 0x0070 /*'p'*/, 0x00E9, 0x0063 /*'c'*/, 0x0068 /*'h'*/, 0x0065 /*'e'*/, 0x0072 /*'r'*/, 0x0000}, /* 4 */ 28 { 0x0070 /*'p'*/, 0x00EA, 0x0063 /*'c'*/, 0x0068 /*'h'*/, 0x0065 /*'e'*/, 0x0072 /*'r'*/, 0x0000}, /* 5 */ 29 { 0x0054 /*'T'*/, 0x006f /*'o'*/, 0x0064 /*'d'*/, 0x0000}, /* 6 */ 30 { 0x0054 /*'T'*/, 0x00F6, 0x006e /*'n'*/, 0x0065 /*'e'*/, 0x0000}, /* 7 */ 31 { 0x0054 /*'T'*/, 0x006f /*'o'*/, 0x0066 /*'f'*/, 0x0075 /*'u'*/, 0x0000}, /* 8 */ 32 { 0x0062 /*'b'*/, 0x006c /*'l'*/, 0x0061 /*'a'*/, 0x0062 /*'c'*/, 0x006b /*'k'*/, 33 0x0062 /*'b'*/, 0x0069 /*'i'*/, 0x0072 /*'r'*/, 0x0064 /*'d'*/, 0x0000}, /* 12 */ 34 { 0x0054 /*'T'*/, 0x006f /*'o'*/, 0x006e /*'n'*/, 0x0000}, /* 10 */ 35 { 0x0050 /*'P'*/, 0x0041 /*'A'*/, 0x0054 /*'T'*/, 0x0000}, /* 11 */ 36 { 0x0062 /*'b'*/, 0x006c /*'l'*/, 0x0061 /*'a'*/, 0x0062 /*'c'*/, 0x006b /*'k'*/, 37 0x002d /*'-'*/, 0x0062 /*'b'*/, 0x0069 /*'i'*/, 0x0072 /*'r'*/, 0x0064 /*'d'*/, 0x0000}, /* 13 */ 38 { 0x0062 /*'b'*/, 0x006c /*'l'*/, 0x0061 /*'a'*/, 0x0062 /*'c'*/, 0x006b /*'k'*/, 39 0x002d /*'-'*/, 0x0062 /*'b'*/, 0x0069 /*'i'*/, 0x0072 /*'r'*/, 0x0064 /*'d'*/, 0x0073/*'s'*/, 0x0000}, /* 0 */ 40 {0x0070 /*'p'*/, 0x0061 /*'a'*/, 0x0074 /*'t'*/, 0x0000}, /* 14 */ 41 /* Additional tests */ 42 { 0x0063 /*'c'*/, 0x007a /*'z'*/, 0x0061 /*'a'*/, 0x0072 /*'r'*/, 0x0000 }, /* 15 */ 43 { 0x0063 /*'c'*/, 0x0068 /*'h'*/, 0x0075 /*'u'*/, 0x0072 /*'r'*/, 0x006f /*'o'*/, 0x0000 }, /* 16 */ 44 { 0x0063 /*'c'*/, 0x0061 /*'a'*/, 0x0074 /*'t'*/, 0x000 }, /* 17 */ 45 { 0x0064 /*'d'*/, 0x0061 /*'a'*/, 0x0072 /*'r'*/, 0x006e /*'n'*/, 0x0000 }, /* 18 */ 46 { 0x003f /*'?'*/, 0x0000 }, /* 19 */ 47 { 0x0071 /*'q'*/, 0x0075 /*'u'*/, 0x0069 /*'i'*/, 0x0063 /*'c'*/, 0x006b /*'k'*/, 0x0000 }, /* 20 */ 48 { 0x0023 /*'#'*/, 0x0000 }, /* 21 */ 49 { 0x0026 /*'&'*/, 0x0000 }, /* 22 */ 50 { 0x0061 /*'a'*/, 0x002d /*'-'*/, 0x0072 /*'r'*/, 0x0064 /*'d'*/, 0x0076 /*'v'*/, 0x0061 /*'a'*/, 51 0x0072/*'r'*/, 0x006b/*'k'*/, 0x0000}, /* 24 */ 52 { 0x0061 /*'a'*/, 0x0061 /*'a'*/, 0x0072 /*'r'*/, 0x0064 /*'d'*/, 0x0076 /*'v'*/, 0x0061 /*'a'*/, 53 0x0072/*'r'*/, 0x006b/*'k'*/, 0x0000}, /* 23 */ 54 { 0x0061 /*'a'*/, 0x0062 /*'b'*/, 0x0062 /*'b'*/, 0x006f /*'o'*/, 0x0074 /*'t'*/, 0x0000}, /* 25 */ 55 { 0x0063 /*'c'*/, 0x006f /*'o'*/, 0x002d /*'-'*/, 0x0070 /*'p'*/, 0x0000}, /* 27 */ 56 { 0x0063 /*'c'*/, 0x006f /*'o'*/, 0x0070 /*'p'*/, 0x0000}, /* 28 */ 57 { 0x0063 /*'c'*/, 0x006f /*'o'*/, 0x006f /*'o'*/, 0x0070 /*'p'*/, 0x0000}, /* 26 */ 58 { 0x007a /*'z'*/, 0x0065 /*'e'*/, 0x0062 /*'b'*/, 0x0072 /*'r'*/, 0x0061 /*'a'*/, 0x0000} /* 29 */ 59 }; 60 61 static const int32_t results[G7CollationTest::TESTLOCALES][G7CollationTest::TOTALTESTSET] = { 62 { 12, 13, 9, 0, 14, 1, 11, 2, 3, 4, 5, 6, 8, 10, 7, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31 }, /* en_US */ 63 { 12, 13, 9, 0, 14, 1, 11, 2, 3, 4, 5, 6, 8, 10, 7, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31 }, /* en_GB */ 64 { 12, 13, 9, 0, 14, 1, 11, 2, 3, 4, 5, 6, 8, 10, 7, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31 }, /* en_CA */ 65 { 12, 13, 9, 0, 14, 1, 11, 2, 3, 4, 5, 6, 8, 10, 7, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31 }, /* fr_FR */ 66 { 12, 13, 9, 0, 14, 1, 11, 3, 2, 4, 5, 6, 8, 10, 7, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31 }, /* fr_CA */ 67 { 12, 13, 9, 0, 14, 1, 11, 2, 3, 4, 5, 6, 8, 10, 7, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31 }, /* de_DE */ 68 { 12, 13, 9, 0, 14, 1, 11, 2, 3, 4, 5, 6, 8, 10, 7, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31 }, /* it_IT */ 69 { 12, 13, 9, 0, 14, 1, 11, 2, 3, 4, 5, 6, 8, 10, 7, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31 }, /* ja_JP */ 70 /* new table collation with rules "& Z < p, P" loop to FIXEDTESTSET */ 71 { 12, 13, 9, 0, 6, 8, 10, 7, 14, 1, 11, 2, 3, 4, 5, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31 }, 72 /* new table collation with rules "& C < ch , cH, Ch, CH " loop to TOTALTESTSET */ 73 { 19, 22, 21, 23, 24, 25, 12, 13, 9, 0, 17, 26, 28, 27, 15, 16, 18, 14, 1, 11, 2, 3, 4, 5, 20, 6, 8, 10, 7, 29 }, 74 /* new table collation with rules "& Question-mark ; ? & Hash-mark ; # & Ampersand ; '&' " loop to TOTALTESTSET */ 75 { 23, 24, 25, 22, 12, 13, 9, 0, 17, 16, 26, 28, 27, 15, 18, 21, 14, 1, 11, 2, 3, 4, 5, 19, 20, 6, 8, 10, 7, 29 }, 76 /* analogous to Japanese rules " & aa ; a- & ee ; e- & ii ; i- & oo ; o- & uu ; u- " */ /* loop to TOTALTESTSET */ 77 { 19, 22, 21, 24, 23, 25, 12, 13, 9, 0, 17, 16, 28, 26, 27, 15, 18, 14, 1, 11, 2, 3, 4, 5, 20, 6, 8, 10, 7, 29 } 78 }; 79 80 G7CollationTest::~G7CollationTest() {} 81 82 void G7CollationTest::TestG7Locales(/* char* par */) 83 { 84 int32_t i; 85 const Locale locales[8] = { 86 Locale("en", "US", ""), 87 Locale("en", "GB", ""), 88 Locale("en", "CA", ""), 89 Locale("fr", "FR", ""), 90 Locale("fr", "CA", ""), 91 Locale("de", "DE", ""), 92 Locale("it", "IT", ""), 93 Locale("ja", "JP", "") 94 }; 95 96 for (i = 0; i < UPRV_LENGTHOF(locales); i++) 97 { 98 UnicodeString dispName; 99 UErrorCode status = U_ZERO_ERROR; 100 101 const Locale &locale = locales[i]; 102 LocalPointer<Collator> myCollation(Collator::createInstance(locale, status)); 103 if(U_FAILURE(status)) { 104 errcheckln(status, "Couldn't instantiate collator. Error: %s", u_errorName(status)); 105 return; 106 } 107 myCollation->setStrength(Collator::QUATERNARY); 108 myCollation->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, status); 109 if (U_FAILURE(status)) { 110 errln("Locale %s creation failed - %s", locale.getName(), u_errorName(status)); 111 continue; 112 } 113 114 const UnicodeString &rules = ((RuleBasedCollator*)myCollation.getAlias())->getRules(); 115 if (rules.isEmpty() && 116 (locale == Locale::getCanadaFrench() || locale == Locale::getJapanese())) { 117 dataerrln("%s Collator missing rule string", locale.getName()); 118 if (logKnownIssue("10671", "TestG7Locales does not test ignore-punctuation")) { 119 continue; 120 } 121 } else { 122 status = U_ZERO_ERROR; 123 RuleBasedCollator *tblColl1 = new RuleBasedCollator(rules, status); 124 if (U_FAILURE(status)) { 125 errln("Recreate %s collation failed - %s", locale.getName(), u_errorName(status)); 126 continue; 127 } 128 myCollation.adoptInstead(tblColl1); 129 } 130 131 UnicodeString msg; 132 133 msg += "Locale "; 134 msg += locales[i].getDisplayName(dispName); 135 msg += "tests start :"; 136 logln(msg); 137 138 int32_t j, n; 139 for (j = 0; j < FIXEDTESTSET; j++) 140 { 141 for (n = j+1; n < FIXEDTESTSET; n++) 142 { 143 doTest(myCollation.getAlias(), testCases[results[i][j]], testCases[results[i][n]], Collator::LESS); 144 } 145 } 146 } 147 } 148 149 void G7CollationTest::TestDemo1(/* char* par */) 150 { 151 logln("Demo Test 1 : Create a new table collation with rules \"& Z < p, P\""); 152 UErrorCode status = U_ZERO_ERROR; 153 Collator *col = Collator::createInstance("en_US", status); 154 if(U_FAILURE(status)) { 155 delete col; 156 errcheckln(status, "Couldn't instantiate collator. Error: %s", u_errorName(status)); 157 return; 158 } 159 const UnicodeString baseRules = ((RuleBasedCollator*)col)->getRules(); 160 UnicodeString newRules(" & Z < p, P"); 161 newRules.insert(0, baseRules); 162 RuleBasedCollator *myCollation = new RuleBasedCollator(newRules, status); 163 164 if (U_FAILURE(status)) 165 { 166 errln( "Demo Test 1 Table Collation object creation failed."); 167 return; 168 } 169 170 int32_t j, n; 171 for (j = 0; j < FIXEDTESTSET; j++) 172 { 173 for (n = j+1; n < FIXEDTESTSET; n++) 174 { 175 doTest(myCollation, testCases[results[8][j]], testCases[results[8][n]], Collator::LESS); 176 } 177 } 178 179 delete myCollation; 180 delete col; 181 } 182 183 void G7CollationTest::TestDemo2(/* char* par */) 184 { 185 logln("Demo Test 2 : Create a new table collation with rules \"& C < ch , cH, Ch, CH\""); 186 UErrorCode status = U_ZERO_ERROR; 187 Collator *col = Collator::createInstance("en_US", status); 188 if(U_FAILURE(status)) { 189 delete col; 190 errcheckln(status, "Couldn't instantiate collator. Error: %s", u_errorName(status)); 191 return; 192 } 193 const UnicodeString baseRules = ((RuleBasedCollator*)col)->getRules(); 194 UnicodeString newRules("& C < ch , cH, Ch, CH"); 195 newRules.insert(0, baseRules); 196 RuleBasedCollator *myCollation = new RuleBasedCollator(newRules, status); 197 198 if (U_FAILURE(status)) 199 { 200 errln("Demo Test 2 Table Collation object creation failed."); 201 return; 202 } 203 204 int32_t j, n; 205 for (j = 0; j < TOTALTESTSET; j++) 206 { 207 for (n = j+1; n < TOTALTESTSET; n++) 208 { 209 doTest(myCollation, testCases[results[9][j]], testCases[results[9][n]], Collator::LESS); 210 } 211 } 212 213 delete myCollation; 214 delete col; 215 } 216 217 void G7CollationTest::TestDemo3(/* char* par */) 218 { 219 logln("Demo Test 3 : Create a new table collation with rules \"& Question'-'mark ; '?' & Hash'-'mark ; '#' & Ampersand ; '&'\""); 220 UErrorCode status = U_ZERO_ERROR; 221 Collator *col = Collator::createInstance("en_US", status); 222 if(U_FAILURE(status)) { 223 errcheckln(status, "Couldn't instantiate collator. Error: %s", u_errorName(status)); 224 delete col; 225 return; 226 } 227 const UnicodeString baseRules = ((RuleBasedCollator*)col)->getRules(); 228 UnicodeString newRules = "& Question'-'mark ; '?' & Hash'-'mark ; '#' & Ampersand ; '&'"; 229 newRules.insert(0, baseRules); 230 RuleBasedCollator *myCollation = new RuleBasedCollator(newRules, status); 231 232 if (U_FAILURE(status)) 233 { 234 errln("Demo Test 3 Table Collation object creation failed."); 235 return; 236 } 237 238 int32_t j, n; 239 for (j = 0; j < TOTALTESTSET; j++) 240 { 241 for (n = j+1; n < TOTALTESTSET; n++) 242 { 243 doTest(myCollation, testCases[results[10][j]], testCases[results[10][n]], Collator::LESS); 244 } 245 } 246 247 delete myCollation; 248 delete col; 249 } 250 251 void G7CollationTest::TestDemo4(/* char* par */) 252 { 253 logln("Demo Test 4 : Create a new table collation with rules \" & aa ; a'-' & ee ; e'-' & ii ; i'-' & oo ; o'-' & uu ; u'-' \""); 254 UErrorCode status = U_ZERO_ERROR; 255 Collator *col = Collator::createInstance("en_US", status); 256 if(U_FAILURE(status)) { 257 delete col; 258 errcheckln(status, "Couldn't instantiate collator. Error: %s", u_errorName(status)); 259 return; 260 } 261 262 const UnicodeString baseRules = ((RuleBasedCollator*)col)->getRules(); 263 UnicodeString newRules = " & aa ; a'-' & ee ; e'-' & ii ; i'-' & oo ; o'-' & uu ; u'-' "; 264 newRules.insert(0, baseRules); 265 RuleBasedCollator *myCollation = new RuleBasedCollator(newRules, status); 266 267 int32_t j, n; 268 for (j = 0; j < TOTALTESTSET; j++) 269 { 270 for (n = j+1; n < TOTALTESTSET; n++) 271 { 272 doTest(myCollation, testCases[results[11][j]], testCases[results[11][n]], Collator::LESS); 273 } 274 } 275 276 delete myCollation; 277 delete col; 278 } 279 280 void G7CollationTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ ) 281 { 282 if (exec) logln("TestSuite G7CollationTest: "); 283 switch (index) { 284 case 0: name = "TestG7Locales"; if (exec) TestG7Locales(/* par */); break; 285 case 1: name = "TestDemo1"; if (exec) TestDemo1(/* par */); break; 286 case 2: name = "TestDemo2"; if (exec) TestDemo2(/* par */); break; 287 case 3: name = "TestDemo3"; if (exec) TestDemo3(/* par */); break; 288 case 4: name = "TestDemo4"; if (exec) TestDemo4(/* par */); break; 289 default: name = ""; break; 290 } 291 } 292 293 #endif /* #if !UCONFIG_NO_COLLATION */ 294