1 /******************************************************************** 2 * COPYRIGHT: 3 * Copyright (C) 2002-2006 IBM, Inc. All Rights Reserved. 4 * 5 ********************************************************************/ 6 7 /** 8 * This program demos string collation 9 */ 10 11 const char gHelpString[] = 12 "usage: strsrch [options*] -source source_string -pattern pattern_string\n" 13 "-help Display this message.\n" 14 "-locale name ICU locale to use. Default is en_US\n" 15 "-rules rule Collation rules file (overrides locale)\n" 16 "-french French accent ordering\n" 17 "-norm Normalizing mode on\n" 18 "-shifted Shifted mode\n" 19 "-lower Lower case first\n" 20 "-upper Upper case first\n" 21 "-case Enable separate case level\n" 22 "-level n Sort level, 1 to 5, for Primary, Secndary, Tertiary, Quaternary, Identical\n" 23 "-source string Source string\n" 24 "-pattern string Pattern string to look for in source\n" 25 "-overlap Enable searching to be done on overlapping patterns\n" 26 "-canonical Enable searching to be done matching canonical equivalent patterns" 27 "Example strsrch -rules \\u0026b\\u003ca -source a\\u0020b\\u0020bc -pattern b\n" 28 "The format \\uXXXX is supported for the rules and comparison strings\n" 29 ; 30 31 #include <stdio.h> 32 #include <string.h> 33 #include <stdlib.h> 34 35 #include <unicode/utypes.h> 36 #include <unicode/ucol.h> 37 #include <unicode/usearch.h> 38 #include <unicode/ustring.h> 39 40 /** 41 * Command line option variables 42 * These global variables are set according to the options specified 43 * on the command line by the user. 44 */ 45 char * opt_locale = "en_US"; 46 char * opt_rules = 0; 47 UBool opt_help = FALSE; 48 UBool opt_norm = FALSE; 49 UBool opt_french = FALSE; 50 UBool opt_shifted = FALSE; 51 UBool opt_lower = FALSE; 52 UBool opt_upper = FALSE; 53 UBool opt_case = FALSE; 54 UBool opt_overlap = FALSE; 55 UBool opt_canonical = FALSE; 56 int opt_level = 0; 57 char * opt_source = "International Components for Unicode"; 58 char * opt_pattern = "Unicode"; 59 UCollator * collator = 0; 60 UStringSearch * search = 0; 61 UChar rules[100]; 62 UChar source[100]; 63 UChar pattern[100]; 64 65 /** 66 * Definitions for the command line options 67 */ 68 struct OptSpec { 69 const char *name; 70 enum {FLAG, NUM, STRING} type; 71 void *pVar; 72 }; 73 74 OptSpec opts[] = { 75 {"-locale", OptSpec::STRING, &opt_locale}, 76 {"-rules", OptSpec::STRING, &opt_rules}, 77 {"-source", OptSpec::STRING, &opt_source}, 78 {"-pattern", OptSpec::STRING, &opt_pattern}, 79 {"-norm", OptSpec::FLAG, &opt_norm}, 80 {"-french", OptSpec::FLAG, &opt_french}, 81 {"-shifted", OptSpec::FLAG, &opt_shifted}, 82 {"-lower", OptSpec::FLAG, &opt_lower}, 83 {"-upper", OptSpec::FLAG, &opt_upper}, 84 {"-case", OptSpec::FLAG, &opt_case}, 85 {"-level", OptSpec::NUM, &opt_level}, 86 {"-overlap", OptSpec::FLAG, &opt_overlap}, 87 {"-canonical", OptSpec::FLAG, &opt_canonical}, 88 {"-help", OptSpec::FLAG, &opt_help}, 89 {"-?", OptSpec::FLAG, &opt_help}, 90 {0, OptSpec::FLAG, 0} 91 }; 92 93 /** 94 * processOptions() Function to read the command line options. 95 */ 96 UBool processOptions(int argc, const char **argv, OptSpec opts[]) 97 { 98 for (int argNum = 1; argNum < argc; argNum ++) { 99 const char *pArgName = argv[argNum]; 100 OptSpec *pOpt; 101 for (pOpt = opts; pOpt->name != 0; pOpt ++) { 102 if (strcmp(pOpt->name, pArgName) == 0) { 103 switch (pOpt->type) { 104 case OptSpec::FLAG: 105 *(UBool *)(pOpt->pVar) = TRUE; 106 break; 107 case OptSpec::STRING: 108 argNum ++; 109 if (argNum >= argc) { 110 fprintf(stderr, "value expected for \"%s\" option.\n", 111 pOpt->name); 112 return FALSE; 113 } 114 *(const char **)(pOpt->pVar) = argv[argNum]; 115 break; 116 case OptSpec::NUM: 117 argNum ++; 118 if (argNum >= argc) { 119 fprintf(stderr, "value expected for \"%s\" option.\n", 120 pOpt->name); 121 return FALSE; 122 } 123 char *endp; 124 int i = strtol(argv[argNum], &endp, 0); 125 if (endp == argv[argNum]) { 126 fprintf(stderr, 127 "integer value expected for \"%s\" option.\n", 128 pOpt->name); 129 return FALSE; 130 } 131 *(int *)(pOpt->pVar) = i; 132 } 133 break; 134 } 135 } 136 if (pOpt->name == 0) 137 { 138 fprintf(stderr, "Unrecognized option \"%s\"\n", pArgName); 139 return FALSE; 140 } 141 } 142 return TRUE; 143 } 144 145 /** 146 * Creates a collator 147 */ 148 UBool processCollator() 149 { 150 // Set up an ICU collator 151 UErrorCode status = U_ZERO_ERROR; 152 153 if (opt_rules != 0) { 154 u_unescape(opt_rules, rules, 100); 155 collator = ucol_openRules(rules, -1, UCOL_OFF, UCOL_TERTIARY, 156 NULL, &status); 157 } 158 else { 159 collator = ucol_open(opt_locale, &status); 160 } 161 if (U_FAILURE(status)) { 162 fprintf(stderr, "Collator creation failed.: %d\n", status); 163 return FALSE; 164 } 165 if (status == U_USING_DEFAULT_WARNING) { 166 fprintf(stderr, "Warning, U_USING_DEFAULT_WARNING for %s\n", 167 opt_locale); 168 } 169 if (status == U_USING_FALLBACK_WARNING) { 170 fprintf(stderr, "Warning, U_USING_FALLBACK_ERROR for %s\n", 171 opt_locale); 172 } 173 if (opt_norm) { 174 ucol_setAttribute(collator, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); 175 } 176 if (opt_french) { 177 ucol_setAttribute(collator, UCOL_FRENCH_COLLATION, UCOL_ON, &status); 178 } 179 if (opt_lower) { 180 ucol_setAttribute(collator, UCOL_CASE_FIRST, UCOL_LOWER_FIRST, 181 &status); 182 } 183 if (opt_upper) { 184 ucol_setAttribute(collator, UCOL_CASE_FIRST, UCOL_UPPER_FIRST, 185 &status); 186 } 187 if (opt_case) { 188 ucol_setAttribute(collator, UCOL_CASE_LEVEL, UCOL_ON, &status); 189 } 190 if (opt_shifted) { 191 ucol_setAttribute(collator, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, 192 &status); 193 } 194 if (opt_level != 0) { 195 switch (opt_level) { 196 case 1: 197 ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_PRIMARY, &status); 198 break; 199 case 2: 200 ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_SECONDARY, 201 &status); 202 break; 203 case 3: 204 ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_TERTIARY, &status); 205 break; 206 case 4: 207 ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_QUATERNARY, 208 &status); 209 break; 210 case 5: 211 ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_IDENTICAL, 212 &status); 213 break; 214 default: 215 fprintf(stderr, "-level param must be between 1 and 5\n"); 216 return FALSE; 217 } 218 } 219 if (U_FAILURE(status)) { 220 fprintf(stderr, "Collator attribute setting failed.: %d\n", status); 221 return FALSE; 222 } 223 return TRUE; 224 } 225 226 /** 227 * Creates a string search 228 */ 229 UBool processStringSearch() 230 { 231 u_unescape(opt_source, source, 100); 232 u_unescape(opt_pattern, pattern, 100); 233 UErrorCode status = U_ZERO_ERROR; 234 search = usearch_openFromCollator(pattern, -1, source, -1, collator, NULL, 235 &status); 236 if (U_FAILURE(status)) { 237 return FALSE; 238 } 239 if (opt_overlap == TRUE) { 240 usearch_setAttribute(search, USEARCH_OVERLAP, USEARCH_ON, &status); 241 } 242 if (opt_canonical == TRUE) { 243 usearch_setAttribute(search, USEARCH_CANONICAL_MATCH, USEARCH_ON, 244 &status); 245 } 246 if (U_FAILURE(status)) { 247 fprintf(stderr, "Error setting search attributes\n"); 248 return FALSE; 249 } 250 return TRUE; 251 } 252 253 UBool findPattern() 254 { 255 UErrorCode status = U_ZERO_ERROR; 256 int32_t offset = usearch_next(search, &status); 257 if (offset == USEARCH_DONE) { 258 fprintf(stdout, "Pattern not found in source\n"); 259 } 260 while (offset != USEARCH_DONE) { 261 fprintf(stdout, "Pattern found at offset %d size %d\n", offset, 262 usearch_getMatchedLength(search)); 263 offset = usearch_next(search, &status); 264 } 265 if (U_FAILURE(status)) { 266 fprintf(stderr, "Error in searching for pattern %d\n", status); 267 return FALSE; 268 } 269 fprintf(stdout, "End of search\n"); 270 return TRUE; 271 } 272 273 /** 274 * Main -- process command line, read in and pre-process the test file, 275 * call other functions to do the actual tests. 276 */ 277 int main(int argc, const char** argv) 278 { 279 if (processOptions(argc, argv, opts) != TRUE || opt_help) { 280 printf(gHelpString); 281 return -1; 282 } 283 284 if (processCollator() != TRUE) { 285 fprintf(stderr, "Error creating collator\n"); 286 return -1; 287 } 288 289 if (processStringSearch() != TRUE) { 290 fprintf(stderr, "Error creating string search\n"); 291 return -1; 292 } 293 294 fprintf(stdout, "Finding pattern %s in source %s\n", opt_pattern, 295 opt_source); 296 297 findPattern(); 298 ucol_close(collator); 299 usearch_close(search); 300 return 0; 301 } 302