Home | History | Annotate | Download | only in strsrch
      1 /********************************************************************
      2  * COPYRIGHT:
      3  * Copyright (C) 2002-2006 IBM, Inc.   All Rights Reserved.
      4  *
      5  ********************************************************************/
      6 
      7 /**
      8  * This program demos string collation
      9  */
     10 
     11 const char gHelpString[] =
     12     "usage: strsrch [options*] -source source_string -pattern pattern_string\n"
     13     "-help            Display this message.\n"
     14     "-locale name     ICU locale to use.  Default is en_US\n"
     15     "-rules rule      Collation rules file (overrides locale)\n"
     16     "-french          French accent ordering\n"
     17     "-norm            Normalizing mode on\n"
     18     "-shifted         Shifted mode\n"
     19     "-lower           Lower case first\n"
     20     "-upper           Upper case first\n"
     21     "-case            Enable separate case level\n"
     22     "-level n         Sort level, 1 to 5, for Primary, Secndary, Tertiary, Quaternary, Identical\n"
     23 	"-source string   Source string\n"
     24 	"-pattern string  Pattern string to look for in source\n"
     25 	"-overlap         Enable searching to be done on overlapping patterns\n"
     26 	"-canonical       Enable searching to be done matching canonical equivalent patterns"
     27     "Example strsrch -rules \\u0026b\\u003ca -source a\\u0020b\\u0020bc -pattern b\n"
     28 	"The format \\uXXXX is supported for the rules and comparison strings\n"
     29 	;
     30 
     31 #include <stdio.h>
     32 #include <string.h>
     33 #include <stdlib.h>
     34 
     35 #include <unicode/utypes.h>
     36 #include <unicode/ucol.h>
     37 #include <unicode/usearch.h>
     38 #include <unicode/ustring.h>
     39 
     40 /**
     41  * Command line option variables
     42  *    These global variables are set according to the options specified
     43  *    on the command line by the user.
     44  */
     45 char * opt_locale      = "en_US";
     46 char * opt_rules       = 0;
     47 UBool  opt_help        = FALSE;
     48 UBool  opt_norm        = FALSE;
     49 UBool  opt_french      = FALSE;
     50 UBool  opt_shifted     = FALSE;
     51 UBool  opt_lower       = FALSE;
     52 UBool  opt_upper       = FALSE;
     53 UBool  opt_case        = FALSE;
     54 UBool  opt_overlap     = FALSE;
     55 UBool  opt_canonical   = FALSE;
     56 int    opt_level       = 0;
     57 char * opt_source      = "International Components for Unicode";
     58 char * opt_pattern     = "Unicode";
     59 UCollator * collator   = 0;
     60 UStringSearch * search = 0;
     61 UChar rules[100];
     62 UChar source[100];
     63 UChar pattern[100];
     64 
     65 /**
     66  * Definitions for the command line options
     67  */
     68 struct OptSpec {
     69     const char *name;
     70     enum {FLAG, NUM, STRING} type;
     71     void *pVar;
     72 };
     73 
     74 OptSpec opts[] = {
     75     {"-locale",      OptSpec::STRING, &opt_locale},
     76     {"-rules",       OptSpec::STRING, &opt_rules},
     77 	{"-source",      OptSpec::STRING, &opt_source},
     78     {"-pattern",     OptSpec::STRING, &opt_pattern},
     79     {"-norm",        OptSpec::FLAG,   &opt_norm},
     80     {"-french",      OptSpec::FLAG,   &opt_french},
     81     {"-shifted",     OptSpec::FLAG,   &opt_shifted},
     82     {"-lower",       OptSpec::FLAG,   &opt_lower},
     83     {"-upper",       OptSpec::FLAG,   &opt_upper},
     84     {"-case",        OptSpec::FLAG,   &opt_case},
     85     {"-level",       OptSpec::NUM,    &opt_level},
     86 	{"-overlap",     OptSpec::FLAG,   &opt_overlap},
     87 	{"-canonical",   OptSpec::FLAG,   &opt_canonical},
     88     {"-help",        OptSpec::FLAG,   &opt_help},
     89     {"-?",           OptSpec::FLAG,   &opt_help},
     90     {0, OptSpec::FLAG, 0}
     91 };
     92 
     93 /**
     94  * processOptions()  Function to read the command line options.
     95  */
     96 UBool processOptions(int argc, const char **argv, OptSpec opts[])
     97 {
     98     for (int argNum = 1; argNum < argc; argNum ++) {
     99         const char *pArgName = argv[argNum];
    100         OptSpec *pOpt;
    101         for (pOpt = opts;  pOpt->name != 0; pOpt ++) {
    102             if (strcmp(pOpt->name, pArgName) == 0) {
    103                 switch (pOpt->type) {
    104                 case OptSpec::FLAG:
    105                     *(UBool *)(pOpt->pVar) = TRUE;
    106                     break;
    107                 case OptSpec::STRING:
    108                     argNum ++;
    109                     if (argNum >= argc) {
    110                         fprintf(stderr, "value expected for \"%s\" option.\n",
    111 							    pOpt->name);
    112                         return FALSE;
    113                     }
    114                     *(const char **)(pOpt->pVar) = argv[argNum];
    115                     break;
    116                 case OptSpec::NUM:
    117                     argNum ++;
    118                     if (argNum >= argc) {
    119                         fprintf(stderr, "value expected for \"%s\" option.\n",
    120 							    pOpt->name);
    121                         return FALSE;
    122                     }
    123                     char *endp;
    124                     int i = strtol(argv[argNum], &endp, 0);
    125                     if (endp == argv[argNum]) {
    126                         fprintf(stderr,
    127 							    "integer value expected for \"%s\" option.\n",
    128 								pOpt->name);
    129                         return FALSE;
    130                     }
    131                     *(int *)(pOpt->pVar) = i;
    132                 }
    133                 break;
    134             }
    135         }
    136         if (pOpt->name == 0)
    137         {
    138             fprintf(stderr, "Unrecognized option \"%s\"\n", pArgName);
    139             return FALSE;
    140         }
    141     }
    142 	return TRUE;
    143 }
    144 
    145 /**
    146  * Creates a collator
    147  */
    148 UBool processCollator()
    149 {
    150 	// Set up an ICU collator
    151     UErrorCode status = U_ZERO_ERROR;
    152 
    153     if (opt_rules != 0) {
    154 		u_unescape(opt_rules, rules, 100);
    155         collator = ucol_openRules(rules, -1, UCOL_OFF, UCOL_TERTIARY,
    156 			                  NULL, &status);
    157     }
    158     else {
    159         collator = ucol_open(opt_locale, &status);
    160     }
    161 	if (U_FAILURE(status)) {
    162         fprintf(stderr, "Collator creation failed.: %d\n", status);
    163         return FALSE;
    164     }
    165     if (status == U_USING_DEFAULT_WARNING) {
    166         fprintf(stderr, "Warning, U_USING_DEFAULT_WARNING for %s\n",
    167 			    opt_locale);
    168     }
    169     if (status == U_USING_FALLBACK_WARNING) {
    170         fprintf(stderr, "Warning, U_USING_FALLBACK_ERROR for %s\n",
    171 			    opt_locale);
    172     }
    173     if (opt_norm) {
    174         ucol_setAttribute(collator, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
    175     }
    176     if (opt_french) {
    177         ucol_setAttribute(collator, UCOL_FRENCH_COLLATION, UCOL_ON, &status);
    178     }
    179     if (opt_lower) {
    180         ucol_setAttribute(collator, UCOL_CASE_FIRST, UCOL_LOWER_FIRST,
    181 			              &status);
    182     }
    183     if (opt_upper) {
    184         ucol_setAttribute(collator, UCOL_CASE_FIRST, UCOL_UPPER_FIRST,
    185 			              &status);
    186     }
    187     if (opt_case) {
    188         ucol_setAttribute(collator, UCOL_CASE_LEVEL, UCOL_ON, &status);
    189     }
    190     if (opt_shifted) {
    191         ucol_setAttribute(collator, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED,
    192 			              &status);
    193     }
    194     if (opt_level != 0) {
    195         switch (opt_level) {
    196         case 1:
    197             ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_PRIMARY, &status);
    198             break;
    199         case 2:
    200             ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_SECONDARY,
    201 				              &status);
    202             break;
    203         case 3:
    204             ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_TERTIARY, &status);
    205             break;
    206         case 4:
    207             ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_QUATERNARY,
    208 				              &status);
    209             break;
    210         case 5:
    211             ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_IDENTICAL,
    212 				              &status);
    213             break;
    214         default:
    215             fprintf(stderr, "-level param must be between 1 and 5\n");
    216             return FALSE;
    217         }
    218     }
    219     if (U_FAILURE(status)) {
    220         fprintf(stderr, "Collator attribute setting failed.: %d\n", status);
    221         return FALSE;
    222     }
    223 	return TRUE;
    224 }
    225 
    226 /**
    227  * Creates a string search
    228  */
    229 UBool processStringSearch()
    230 {
    231 	u_unescape(opt_source, source, 100);
    232 	u_unescape(opt_pattern, pattern, 100);
    233 	UErrorCode status = U_ZERO_ERROR;
    234 	search = usearch_openFromCollator(pattern, -1, source, -1, collator, NULL,
    235 		                              &status);
    236 	if (U_FAILURE(status)) {
    237 		return FALSE;
    238 	}
    239 	if (opt_overlap == TRUE) {
    240 		usearch_setAttribute(search, USEARCH_OVERLAP, USEARCH_ON, &status);
    241 	}
    242 	if (opt_canonical == TRUE) {
    243 		usearch_setAttribute(search, USEARCH_CANONICAL_MATCH, USEARCH_ON,
    244 			                 &status);
    245 	}
    246 	if (U_FAILURE(status)) {
    247 		fprintf(stderr, "Error setting search attributes\n");
    248 		return FALSE;
    249 	}
    250 	return TRUE;
    251 }
    252 
    253 UBool findPattern()
    254 {
    255 	UErrorCode status = U_ZERO_ERROR;
    256 	int32_t offset = usearch_next(search, &status);
    257 	if (offset == USEARCH_DONE) {
    258 		fprintf(stdout, "Pattern not found in source\n");
    259 	}
    260 	while (offset != USEARCH_DONE) {
    261 		fprintf(stdout, "Pattern found at offset %d size %d\n", offset,
    262 				usearch_getMatchedLength(search));
    263 		offset = usearch_next(search, &status);
    264 	}
    265 	if (U_FAILURE(status)) {
    266 		fprintf(stderr, "Error in searching for pattern %d\n", status);
    267 		return FALSE;
    268 	}
    269 	fprintf(stdout, "End of search\n");
    270 	return TRUE;
    271 }
    272 
    273 /**
    274  * Main   --  process command line, read in and pre-process the test file,
    275  *            call other functions to do the actual tests.
    276  */
    277 int main(int argc, const char** argv)
    278 {
    279     if (processOptions(argc, argv, opts) != TRUE || opt_help) {
    280         printf(gHelpString);
    281         return -1;
    282     }
    283 
    284     if (processCollator() != TRUE) {
    285 		fprintf(stderr, "Error creating collator\n");
    286 		return -1;
    287 	}
    288 
    289 	if (processStringSearch() != TRUE) {
    290 		fprintf(stderr, "Error creating string search\n");
    291 		return -1;
    292 	}
    293 
    294 	fprintf(stdout, "Finding pattern %s in source %s\n", opt_pattern,
    295 		    opt_source);
    296 
    297 	findPattern();
    298 	ucol_close(collator);
    299 	usearch_close(search);
    300 	return 0;
    301 }
    302