1 // 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /******************************************************************** 4 * COPYRIGHT: 5 * Copyright (c) 1997-2014, International Business Machines Corporation and 6 * others. All Rights Reserved. 7 ********************************************************************/ 8 /******************************************************************************** 9 * 10 * File CG7COLL.C 11 * 12 * Modification History: 13 * Name Description 14 * Madhu Katragadda Ported for C API 15 *********************************************************************************/ 16 /** 17 * G7CollationTest is a third level test class. This test performs the examples 18 * mentioned on the IBM Java international demos web site. 19 * Sample Rules: & Z < p , P 20 * Effect : Making P sort after Z. 21 * 22 * Sample Rules: & c < ch , cH, Ch, CH 23 * Effect : As well as adding sequences of characters that act as a single character (this is 24 * known as contraction), you can also add characters that act like a sequence of 25 * characters (this is known as expansion). 26 * 27 * Sample Rules: & Question'-'mark ; '?' & Hash'-'mark ; '#' & Ampersand ; '&' 28 * Effect : Expansion and contraction can actually be combined. 29 * 30 * Sample Rules: & aa ; a'-' & ee ; e'-' & ii ; i'-' & oo ; o'-' & uu ; u'-' 31 * Effect : sorted sequence as the following, 32 * aardvark 33 * a-rdvark 34 * abbot 35 * coop 36 * co-p 37 * cop 38 */ 39 #include <stdlib.h> 40 #include <string.h> 41 #include <stdio.h> 42 43 #include "unicode/utypes.h" 44 45 #if !UCONFIG_NO_COLLATION 46 47 #include "unicode/ucol.h" 48 #include "unicode/uloc.h" 49 #include "cintltst.h" 50 #include "cg7coll.h" 51 #include "ccolltst.h" 52 #include "callcoll.h" 53 #include "unicode/ustring.h" 54 55 const char* locales[8] = { 56 "en_US", 57 "en_GB", 58 "en_CA", 59 "fr_FR", 60 "fr_CA", 61 "de_DE", 62 "it_IT", 63 "ja_JP" 64 }; 65 66 67 68 const static UChar testCases[][MAX_TOKEN_LEN] = { 69 { 0x0062 /*'b'*/, 0x006c /*'l'*/, 0x0061 /*'a'*/, 0x0062 /*'c'*/, 0x006b /*'k'*/, 70 0x0062 /*'b'*/, 0x0069 /*'i'*/, 0x0072 /*'r'*/, 0x0064 /*'d'*/, 0x0073 /*'s'*/, 0x0000}, /* 9 */ 71 { 0x0050 /*'P'*/, 0x0061 /*'a'*/, 0x0074/*'t'*/, 0x0000}, /* 1 */ 72 { 0x0070 /*'p'*/, 0x00E9, 0x0063 /*'c'*/, 0x0068 /*'h'*/, 0x00E9, 0x0000}, /* 2 */ 73 { 0x0070 /*'p'*/, 0x00EA, 0x0063 /*'c'*/, 0x0068 /*'h'*/, 0x0065 /*'e'*/, 0x0000}, /* 3 */ 74 { 0x0070 /*'p'*/, 0x00E9, 0x0063 /*'c'*/, 0x0068 /*'h'*/, 0x0065 /*'e'*/, 0x0072 /*'r'*/, 0x0000}, /* 4 */ 75 { 0x0070 /*'p'*/, 0x00EA, 0x0063 /*'c'*/, 0x0068 /*'h'*/, 0x0065 /*'e'*/, 0x0072 /*'r'*/, 0x0000}, /* 5 */ 76 { 0x0054 /*'T'*/, 0x006f /*'o'*/, 0x0064 /*'d'*/, 0x0000}, /* 6 */ 77 { 0x0054 /*'T'*/, 0x00F6, 0x006e /*'n'*/, 0x0065 /*'e'*/, 0x0000}, /* 7 */ 78 { 0x0054 /*'T'*/, 0x006f /*'o'*/, 0x0066 /*'f'*/, 0x0075 /*'u'*/, 0x0000}, /* 8 */ 79 { 0x0062 /*'b'*/, 0x006c /*'l'*/, 0x0061 /*'a'*/, 0x0062 /*'c'*/, 0x006b /*'k'*/, 80 0x0062 /*'b'*/, 0x0069 /*'i'*/, 0x0072 /*'r'*/, 0x0064 /*'d'*/, 0x0000}, /* 12 */ 81 { 0x0054 /*'T'*/, 0x006f /*'o'*/, 0x006e /*'n'*/, 0x0000}, /* 10 */ 82 { 0x0050 /*'P'*/, 0x0041 /*'A'*/, 0x0054 /*'T'*/, 0x0000}, /* 11 */ 83 { 0x0062 /*'b'*/, 0x006c /*'l'*/, 0x0061 /*'a'*/, 0x0062 /*'c'*/, 0x006b /*'k'*/, 84 0x002d /*'-'*/, 0x0062 /*'b'*/, 0x0069 /*'i'*/, 0x0072 /*'r'*/, 0x0064 /*'d'*/, 0x0000}, /* 13 */ 85 { 0x0062 /*'b'*/, 0x006c /*'l'*/, 0x0061 /*'a'*/, 0x0062 /*'c'*/, 0x006b /*'k'*/, 86 0x002d /*'-'*/, 0x0062 /*'b'*/, 0x0069 /*'i'*/, 0x0072 /*'r'*/, 0x0064 /*'d'*/, 0x0073/*'s'*/, 0x0000}, /* 0 */ 87 {0x0070 /*'p'*/, 0x0061 /*'a'*/, 0x0074 /*'t'*/, 0x0000}, /* 14 */ 88 /* Additional tests */ 89 { 0x0063 /*'c'*/, 0x007a /*'z'*/, 0x0061 /*'a'*/, 0x0072 /*'r'*/, 0x0000 }, /* 15 */ 90 { 0x0063 /*'c'*/, 0x0068 /*'h'*/, 0x0075 /*'u'*/, 0x0072 /*'r'*/, 0x006f /*'o'*/, 0x0000 }, /* 16 */ 91 { 0x0063 /*'c'*/, 0x0061 /*'a'*/, 0x0074 /*'t'*/, 0x000 }, /* 17 */ 92 { 0x0064 /*'d'*/, 0x0061 /*'a'*/, 0x0072 /*'r'*/, 0x006e /*'n'*/, 0x0000 }, /* 18 */ 93 { 0x003f /*'?'*/, 0x0000 }, /* 19 */ 94 { 0x0071 /*'q'*/, 0x0075 /*'u'*/, 0x0069 /*'i'*/, 0x0063 /*'c'*/, 0x006b /*'k'*/, 0x0000 }, /* 20 */ 95 { 0x0023 /*'#'*/, 0x0000 }, /* 21 */ 96 { 0x0026 /*'&'*/, 0x0000 }, /* 22 */ 97 { 0x0061 /*'a'*/, 0x002d /*'-'*/, 0x0072 /*'r'*/, 0x0064 /*'d'*/, 0x0076 /*'v'*/, 0x0061 /*'a'*/, 98 0x0072/*'r'*/, 0x006b/*'k'*/, 0x0000}, /* 24 */ 99 { 0x0061 /*'a'*/, 0x0061 /*'a'*/, 0x0072 /*'r'*/, 0x0064 /*'d'*/, 0x0076 /*'v'*/, 0x0061 /*'a'*/, 100 0x0072/*'r'*/, 0x006b/*'k'*/, 0x0000}, /* 23 */ 101 { 0x0061 /*'a'*/, 0x0062 /*'b'*/, 0x0062 /*'b'*/, 0x006f /*'o'*/, 0x0074 /*'t'*/, 0x0000}, /* 25 */ 102 { 0x0063 /*'c'*/, 0x006f /*'o'*/, 0x002d /*'-'*/, 0x0070 /*'p'*/, 0x0000}, /* 27 */ 103 { 0x0063 /*'c'*/, 0x006f /*'o'*/, 0x0070 /*'p'*/, 0x0000}, /* 28 */ 104 { 0x0063 /*'c'*/, 0x006f /*'o'*/, 0x006f /*'o'*/, 0x0070 /*'p'*/, 0x0000}, /* 26 */ 105 { 0x007a /*'z'*/, 0x0065 /*'e'*/, 0x0062 /*'b'*/, 0x0072 /*'r'*/, 0x0061 /*'a'*/, 0x0000} /* 29 */ 106 }; 107 108 const static int32_t results[TESTLOCALES][TOTALTESTSET] = { 109 { 12, 13, 9, 0, 14, 1, 11, 2, 3, 4, 5, 6, 8, 10, 7, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31 }, /* en_US */ 110 { 12, 13, 9, 0, 14, 1, 11, 2, 3, 4, 5, 6, 8, 10, 7, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31 }, /* en_GB */ 111 { 12, 13, 9, 0, 14, 1, 11, 2, 3, 4, 5, 6, 8, 10, 7, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31 }, /* en_CA */ 112 { 12, 13, 9, 0, 14, 1, 11, 2, 3, 4, 5, 6, 8, 10, 7, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31 }, /* fr_FR */ 113 { 12, 13, 9, 0, 14, 1, 11, 3, 2, 4, 5, 6, 8, 10, 7, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31 }, /* fr_CA */ 114 { 12, 13, 9, 0, 14, 1, 11, 2, 3, 4, 5, 6, 8, 10, 7, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31 }, /* de_DE */ 115 { 12, 13, 9, 0, 14, 1, 11, 2, 3, 4, 5, 6, 8, 10, 7, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31 }, /* it_IT */ 116 { 12, 13, 9, 0, 14, 1, 11, 2, 3, 4, 5, 6, 8, 10, 7, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31 }, /* ja_JP */ 117 /* new table collation with rules "& Z < p, P" loop to FIXEDTESTSET */ 118 { 12, 13, 9, 0, 6, 8, 10, 7, 14, 1, 11, 2, 3, 4, 5, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31 }, 119 /* new table collation with rules "& C < ch , cH, Ch, CH " loop to TOTALTESTSET */ 120 { 19, 22, 21, 23, 24, 25, 12, 13, 9, 0, 17, 26, 28, 27, 15, 16, 18, 14, 1, 11, 2, 3, 4, 5, 20, 6, 8, 10, 7, 29 }, 121 /* new table collation with rules "& Question-mark ; ? & Hash-mark ; # & Ampersand ; '&' " loop to TOTALTESTSET */ 122 { 23, 24, 25, 22, 12, 13, 9, 0, 17, 16, 26, 28, 27, 15, 18, 21, 14, 1, 11, 2, 3, 4, 5, 19, 20, 6, 8, 10, 7, 29 }, 123 /* analogous to Japanese rules " & aa ; a- & ee ; e- & ii ; i- & oo ; o- & uu ; u- " */ /* loop to TOTALTESTSET */ 124 { 19, 22, 21, 24, 23, 25, 12, 13, 9, 0, 17, 16, 28, 26, 27, 15, 18, 14, 1, 11, 2, 3, 4, 5, 20, 6, 8, 10, 7, 29 } 125 }; 126 127 void addRuleBasedCollTest(TestNode** root) 128 { 129 addTest(root, &TestG7Locales, "tscoll/cg7coll/TestG7Locales"); 130 addTest(root, &TestDemo1, "tscoll/cg7coll/TestDemo1"); 131 addTest(root, &TestDemo2, "tscoll/cg7coll/TestDemo2"); 132 addTest(root, &TestDemo3, "tscoll/cg7coll/TestDemo3"); 133 addTest(root, &TestDemo4, "tscoll/cg7coll/TestDemo4"); 134 135 136 } 137 138 static void TestG7Locales() 139 { 140 UCollator *myCollation; 141 UErrorCode status = U_ZERO_ERROR; 142 const UChar *defRules; 143 int32_t i, rlen, j, n; 144 log_verbose("Testing ucol_openRules for all the locales\n"); 145 for (i = 0; i < UPRV_LENGTHOF(locales); i++) 146 { 147 const char *locale = locales[i]; 148 status = U_ZERO_ERROR; 149 myCollation = ucol_open(locale, &status); 150 ucol_setAttribute(myCollation, UCOL_STRENGTH, UCOL_QUATERNARY, &status); 151 ucol_setAttribute(myCollation, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status); 152 153 if (U_FAILURE(status)) 154 { 155 log_err_status(status, "Error in creating collator in %s: %s\n", locale, myErrorName(status)); 156 ucol_close(myCollation); 157 continue; 158 } 159 160 defRules = ucol_getRules(myCollation, &rlen); 161 if (rlen == 0 && (strcmp(locale, "fr_CA") == 0 || strcmp(locale, "ja_JP") == 0)) { 162 log_data_err("%s UCollator missing rule string\n", locale); 163 if (log_knownIssue("10671", "TestG7Locales does not test ignore-punctuation")) { 164 ucol_close(myCollation); 165 continue; 166 } 167 } else { 168 UCollator *tblColl1; 169 status = U_ZERO_ERROR; 170 tblColl1 = ucol_openRules(defRules, rlen, UCOL_OFF, 171 UCOL_DEFAULT_STRENGTH,NULL, &status); 172 ucol_close(myCollation); 173 if (U_FAILURE(status)) 174 { 175 log_err_status(status, "Error in creating collator in %s: %s\n", locale, myErrorName(status)); 176 continue; 177 } 178 myCollation = tblColl1; 179 } 180 181 log_verbose("Locale %s\n", locales[i]); 182 log_verbose(" tests start...\n"); 183 184 j = 0; 185 n = 0; 186 for (j = 0; j < FIXEDTESTSET; j++) 187 { 188 for (n = j+1; n < FIXEDTESTSET; n++) 189 { 190 doTest(myCollation, testCases[results[i][j]], testCases[results[i][n]], UCOL_LESS); 191 } 192 } 193 194 ucol_close(myCollation); 195 } 196 } 197 198 static void TestDemo1() 199 { 200 UCollator *myCollation; 201 int32_t j, n; 202 static const char rules[] = "& Z < p, P"; 203 int32_t len=(int32_t)strlen(rules); 204 UChar temp[sizeof(rules)]; 205 UErrorCode status = U_ZERO_ERROR; 206 u_uastrcpy(temp, rules); 207 208 log_verbose("Demo Test 1 : Create a new table collation with rules \" & Z < p, P \" \n"); 209 210 myCollation = ucol_openRules(temp, len, UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status); 211 212 if (U_FAILURE(status)) 213 { 214 log_err_status(status, "Demo Test 1 Rule collation object creation failed. : %s\n", myErrorName(status)); 215 return; 216 } 217 218 for (j = 0; j < FIXEDTESTSET; j++) 219 { 220 for (n = j+1; n < FIXEDTESTSET; n++) 221 { 222 doTest(myCollation, testCases[results[8][j]], testCases[results[8][n]], UCOL_LESS); 223 } 224 } 225 226 ucol_close(myCollation); 227 } 228 229 static void TestDemo2() 230 { 231 UCollator *myCollation; 232 int32_t j, n; 233 static const char rules[] = "& C < ch , cH, Ch, CH"; 234 int32_t len=(int32_t)strlen(rules); 235 UChar temp[sizeof(rules)]; 236 UErrorCode status = U_ZERO_ERROR; 237 u_uastrcpy(temp, rules); 238 239 log_verbose("Demo Test 2 : Create a new table collation with rules \"& C < ch , cH, Ch, CH\""); 240 241 myCollation = ucol_openRules(temp, len, UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL, &status); 242 243 if (U_FAILURE(status)) 244 { 245 log_err_status(status, "Demo Test 2 Rule collation object creation failed.: %s\n", myErrorName(status)); 246 return; 247 } 248 for (j = 0; j < TOTALTESTSET; j++) 249 { 250 for (n = j+1; n < TOTALTESTSET; n++) 251 { 252 doTest(myCollation, testCases[results[9][j]], testCases[results[9][n]], UCOL_LESS); 253 } 254 } 255 ucol_close(myCollation); 256 } 257 258 static void TestDemo3() 259 { 260 UCollator *myCollation; 261 int32_t j, n; 262 static const char rules[] = "& Question'-'mark ; '?' & Hash'-'mark ; '#' & Ampersand ; '&'"; 263 int32_t len=(int32_t)strlen(rules); 264 UChar temp[sizeof(rules)]; 265 UErrorCode status = U_ZERO_ERROR; 266 u_uastrcpy(temp, rules); 267 268 log_verbose("Demo Test 3 : Create a new table collation with rules \"& Question'-'mark ; '?' & Hash'-'mark ; '#' & Ampersand ; '&'\" \n"); 269 270 myCollation = ucol_openRules(temp, len, UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL, &status); 271 272 if (U_FAILURE(status)) 273 { 274 log_err_status(status, "Demo Test 3 Rule collation object creation failed.: %s\n", myErrorName(status)); 275 return; 276 } 277 278 for (j = 0; j < TOTALTESTSET; j++) 279 { 280 for (n = j+1; n < TOTALTESTSET; n++) 281 { 282 doTest(myCollation, testCases[results[10][j]], testCases[results[10][n]], UCOL_LESS); 283 } 284 } 285 ucol_close(myCollation); 286 } 287 288 static void TestDemo4() 289 { 290 UCollator *myCollation; 291 int32_t j, n; 292 static const char rules[] = " & aa ; a'-' & ee ; e'-' & ii ; i'-' & oo ; o'-' & uu ; u'-' "; 293 int32_t len=(int32_t)strlen(rules); 294 UChar temp[sizeof(rules)]; 295 UErrorCode status = U_ZERO_ERROR; 296 u_uastrcpy(temp, rules); 297 298 log_verbose("Demo Test 4 : Create a new table collation with rules \" & aa ; a'-' & ee ; e'-' & ii ; i'-' & oo ; o'-' & uu ; u'-' \"\n"); 299 300 myCollation = ucol_openRules(temp, len, UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL, &status); 301 302 if (U_FAILURE(status)) 303 { 304 log_err_status(status, "Demo Test 4 Rule collation object creation failed.: %s\n", myErrorName(status)); 305 return; 306 } 307 for (j = 0; j < TOTALTESTSET; j++) 308 { 309 for (n = j+1; n < TOTALTESTSET; n++) 310 { 311 doTest(myCollation, testCases[results[11][j]], testCases[results[11][n]], UCOL_LESS); 312 } 313 } 314 ucol_close(myCollation); 315 } 316 317 #endif /* #if !UCONFIG_NO_COLLATION */ 318