1 /************************************************************************* 2 * 3 * 2016 and later: Unicode, Inc. and others. 4 * License & terms of use: http://www.unicode.org/copyright.html#License 5 * 6 ************************************************************************* 7 ************************************************************************* 8 * COPYRIGHT: 9 * Copyright (C) 2002-2006 IBM, Inc. All Rights Reserved. 10 * 11 *************************************************************************/ 12 13 /** 14 * This program demos string collation 15 */ 16 17 const char gHelpString[] = 18 "usage: coll [options*] -source source_string -target target_string\n" 19 "-help Display this message.\n" 20 "-locale name ICU locale to use. Default is en_US\n" 21 "-rules rule Collation rules file (overrides locale)\n" 22 "-french French accent ordering\n" 23 "-norm Normalizing mode on\n" 24 "-shifted Shifted mode\n" 25 "-lower Lower case first\n" 26 "-upper Upper case first\n" 27 "-case Enable separate case level\n" 28 "-level n Sort level, 1 to 5, for Primary, Secndary, Tertiary, Quaternary, Identical\n" 29 "-source string Source string for comparison\n" 30 "-target string Target string for comparison\n" 31 "Example coll -rules \\u0026b\\u003ca -source a -target b\n" 32 "The format \\uXXXX is supported for the rules and comparison strings\n" 33 ; 34 35 #include <stdio.h> 36 #include <string.h> 37 #include <stdlib.h> 38 39 #include <unicode/utypes.h> 40 #include <unicode/ucol.h> 41 #include <unicode/ustring.h> 42 43 /** 44 * Command line option variables 45 * These global variables are set according to the options specified 46 * on the command line by the user. 47 */ 48 char * opt_locale = "en_US"; 49 char * opt_rules = 0; 50 UBool opt_help = FALSE; 51 UBool opt_norm = FALSE; 52 UBool opt_french = FALSE; 53 UBool opt_shifted = FALSE; 54 UBool opt_lower = FALSE; 55 UBool opt_upper = FALSE; 56 UBool opt_case = FALSE; 57 int opt_level = 0; 58 char * opt_source = "abc"; 59 char * opt_target = "abd"; 60 UCollator * collator = 0; 61 62 /** 63 * Definitions for the command line options 64 */ 65 struct OptSpec { 66 const char *name; 67 enum {FLAG, NUM, STRING} type; 68 void *pVar; 69 }; 70 71 OptSpec opts[] = { 72 {"-locale", OptSpec::STRING, &opt_locale}, 73 {"-rules", OptSpec::STRING, &opt_rules}, 74 {"-source", OptSpec::STRING, &opt_source}, 75 {"-target", OptSpec::STRING, &opt_target}, 76 {"-norm", OptSpec::FLAG, &opt_norm}, 77 {"-french", OptSpec::FLAG, &opt_french}, 78 {"-shifted", OptSpec::FLAG, &opt_shifted}, 79 {"-lower", OptSpec::FLAG, &opt_lower}, 80 {"-upper", OptSpec::FLAG, &opt_upper}, 81 {"-case", OptSpec::FLAG, &opt_case}, 82 {"-level", OptSpec::NUM, &opt_level}, 83 {"-help", OptSpec::FLAG, &opt_help}, 84 {"-?", OptSpec::FLAG, &opt_help}, 85 {0, OptSpec::FLAG, 0} 86 }; 87 88 /** 89 * processOptions() Function to read the command line options. 90 */ 91 UBool processOptions(int argc, const char **argv, OptSpec opts[]) 92 { 93 for (int argNum = 1; argNum < argc; argNum ++) { 94 const char *pArgName = argv[argNum]; 95 OptSpec *pOpt; 96 for (pOpt = opts; pOpt->name != 0; pOpt ++) { 97 if (strcmp(pOpt->name, pArgName) == 0) { 98 switch (pOpt->type) { 99 case OptSpec::FLAG: 100 *(UBool *)(pOpt->pVar) = TRUE; 101 break; 102 case OptSpec::STRING: 103 argNum ++; 104 if (argNum >= argc) { 105 fprintf(stderr, "value expected for \"%s\" option.\n", 106 pOpt->name); 107 return FALSE; 108 } 109 *(const char **)(pOpt->pVar) = argv[argNum]; 110 break; 111 case OptSpec::NUM: 112 argNum ++; 113 if (argNum >= argc) { 114 fprintf(stderr, "value expected for \"%s\" option.\n", 115 pOpt->name); 116 return FALSE; 117 } 118 char *endp; 119 int i = strtol(argv[argNum], &endp, 0); 120 if (endp == argv[argNum]) { 121 fprintf(stderr, 122 "integer value expected for \"%s\" option.\n", 123 pOpt->name); 124 return FALSE; 125 } 126 *(int *)(pOpt->pVar) = i; 127 } 128 break; 129 } 130 } 131 if (pOpt->name == 0) 132 { 133 fprintf(stderr, "Unrecognized option \"%s\"\n", pArgName); 134 return FALSE; 135 } 136 } 137 return TRUE; 138 } 139 140 /** 141 * ICU string comparison 142 */ 143 int strcmp() 144 { 145 UChar source[100]; 146 UChar target[100]; 147 u_unescape(opt_source, source, 100); 148 u_unescape(opt_target, target, 100); 149 UCollationResult result = ucol_strcoll(collator, source, -1, target, -1); 150 if (result == UCOL_LESS) { 151 return -1; 152 } 153 else if (result == UCOL_GREATER) { 154 return 1; 155 } 156 return 0; 157 } 158 159 /** 160 * Creates a collator 161 */ 162 UBool processCollator() 163 { 164 // Set up an ICU collator 165 UErrorCode status = U_ZERO_ERROR; 166 UChar rules[100]; 167 168 if (opt_rules != 0) { 169 u_unescape(opt_rules, rules, 100); 170 collator = ucol_openRules(rules, -1, UCOL_OFF, UCOL_TERTIARY, 171 NULL, &status); 172 } 173 else { 174 collator = ucol_open(opt_locale, &status); 175 } 176 if (U_FAILURE(status)) { 177 fprintf(stderr, "Collator creation failed.: %d\n", status); 178 return FALSE; 179 } 180 if (status == U_USING_DEFAULT_WARNING) { 181 fprintf(stderr, "Warning, U_USING_DEFAULT_WARNING for %s\n", 182 opt_locale); 183 } 184 if (status == U_USING_FALLBACK_WARNING) { 185 fprintf(stderr, "Warning, U_USING_FALLBACK_ERROR for %s\n", 186 opt_locale); 187 } 188 if (opt_norm) { 189 ucol_setAttribute(collator, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); 190 } 191 if (opt_french) { 192 ucol_setAttribute(collator, UCOL_FRENCH_COLLATION, UCOL_ON, &status); 193 } 194 if (opt_lower) { 195 ucol_setAttribute(collator, UCOL_CASE_FIRST, UCOL_LOWER_FIRST, 196 &status); 197 } 198 if (opt_upper) { 199 ucol_setAttribute(collator, UCOL_CASE_FIRST, UCOL_UPPER_FIRST, 200 &status); 201 } 202 if (opt_case) { 203 ucol_setAttribute(collator, UCOL_CASE_LEVEL, UCOL_ON, &status); 204 } 205 if (opt_shifted) { 206 ucol_setAttribute(collator, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, 207 &status); 208 } 209 if (opt_level != 0) { 210 switch (opt_level) { 211 case 1: 212 ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_PRIMARY, &status); 213 break; 214 case 2: 215 ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_SECONDARY, 216 &status); 217 break; 218 case 3: 219 ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_TERTIARY, &status); 220 break; 221 case 4: 222 ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_QUATERNARY, 223 &status); 224 break; 225 case 5: 226 ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_IDENTICAL, 227 &status); 228 break; 229 default: 230 fprintf(stderr, "-level param must be between 1 and 5\n"); 231 return FALSE; 232 } 233 } 234 if (U_FAILURE(status)) { 235 fprintf(stderr, "Collator attribute setting failed.: %d\n", status); 236 return FALSE; 237 } 238 return TRUE; 239 } 240 241 /** 242 * Main -- process command line, read in and pre-process the test file, 243 * call other functions to do the actual tests. 244 */ 245 int main(int argc, const char** argv) 246 { 247 if (processOptions(argc, argv, opts) != TRUE || opt_help) { 248 printf(gHelpString); 249 return -1; 250 } 251 252 if (processCollator() != TRUE) { 253 fprintf(stderr, "Error creating collator for comparison\n"); 254 return -1; 255 } 256 257 fprintf(stdout, "Comparing source=%s and target=%s\n", opt_source, 258 opt_target); 259 int result = strcmp(); 260 if (result == 0) { 261 fprintf(stdout, "source is equals to target\n"); 262 } 263 else if (result < 0) { 264 fprintf(stdout, "source is less than target\n"); 265 } 266 else { 267 fprintf(stdout, "source is greater than target\n"); 268 } 269 270 ucol_close(collator); 271 return 0; 272 } 273