Home | History | Annotate | Download | only in coll
      1 /*************************************************************************
      2  *
      3  *    2016 and later: Unicode, Inc. and others.
      4  *   License & terms of use: http://www.unicode.org/copyright.html#License
      5  *
      6  *************************************************************************
      7  *************************************************************************
      8  * COPYRIGHT:
      9  * Copyright (C) 2002-2006 IBM, Inc.   All Rights Reserved.
     10  *
     11  *************************************************************************/
     12 
     13 /**
     14  * This program demos string collation
     15  */
     16 
     17 const char gHelpString[] =
     18     "usage: coll [options*] -source source_string -target target_string\n"
     19     "-help            Display this message.\n"
     20     "-locale name     ICU locale to use.  Default is en_US\n"
     21     "-rules rule      Collation rules file (overrides locale)\n"
     22     "-french          French accent ordering\n"
     23     "-norm            Normalizing mode on\n"
     24     "-shifted         Shifted mode\n"
     25     "-lower           Lower case first\n"
     26     "-upper           Upper case first\n"
     27     "-case            Enable separate case level\n"
     28     "-level n         Sort level, 1 to 5, for Primary, Secndary, Tertiary, Quaternary, Identical\n"
     29 	"-source string   Source string for comparison\n"
     30 	"-target string   Target string for comparison\n"
     31     "Example coll -rules \\u0026b\\u003ca -source a -target b\n"
     32 	"The format \\uXXXX is supported for the rules and comparison strings\n"
     33 	;
     34 
     35 #include <stdio.h>
     36 #include <string.h>
     37 #include <stdlib.h>
     38 
     39 #include <unicode/utypes.h>
     40 #include <unicode/ucol.h>
     41 #include <unicode/ustring.h>
     42 
     43 /**
     44  * Command line option variables
     45  *    These global variables are set according to the options specified
     46  *    on the command line by the user.
     47  */
     48 char * opt_locale     = "en_US";
     49 char * opt_rules      = 0;
     50 UBool  opt_help       = FALSE;
     51 UBool  opt_norm       = FALSE;
     52 UBool  opt_french     = FALSE;
     53 UBool  opt_shifted    = FALSE;
     54 UBool  opt_lower      = FALSE;
     55 UBool  opt_upper      = FALSE;
     56 UBool  opt_case       = FALSE;
     57 int    opt_level      = 0;
     58 char * opt_source     = "abc";
     59 char * opt_target     = "abd";
     60 UCollator * collator  = 0;
     61 
     62 /**
     63  * Definitions for the command line options
     64  */
     65 struct OptSpec {
     66     const char *name;
     67     enum {FLAG, NUM, STRING} type;
     68     void *pVar;
     69 };
     70 
     71 OptSpec opts[] = {
     72     {"-locale",      OptSpec::STRING, &opt_locale},
     73     {"-rules",       OptSpec::STRING, &opt_rules},
     74 	{"-source",      OptSpec::STRING, &opt_source},
     75     {"-target",      OptSpec::STRING, &opt_target},
     76     {"-norm",        OptSpec::FLAG,   &opt_norm},
     77     {"-french",      OptSpec::FLAG,   &opt_french},
     78     {"-shifted",     OptSpec::FLAG,   &opt_shifted},
     79     {"-lower",       OptSpec::FLAG,   &opt_lower},
     80     {"-upper",       OptSpec::FLAG,   &opt_upper},
     81     {"-case",        OptSpec::FLAG,   &opt_case},
     82     {"-level",       OptSpec::NUM,    &opt_level},
     83     {"-help",        OptSpec::FLAG,   &opt_help},
     84     {"-?",           OptSpec::FLAG,   &opt_help},
     85     {0, OptSpec::FLAG, 0}
     86 };
     87 
     88 /**
     89  * processOptions()  Function to read the command line options.
     90  */
     91 UBool processOptions(int argc, const char **argv, OptSpec opts[])
     92 {
     93     for (int argNum = 1; argNum < argc; argNum ++) {
     94         const char *pArgName = argv[argNum];
     95         OptSpec *pOpt;
     96         for (pOpt = opts;  pOpt->name != 0; pOpt ++) {
     97             if (strcmp(pOpt->name, pArgName) == 0) {
     98                 switch (pOpt->type) {
     99                 case OptSpec::FLAG:
    100                     *(UBool *)(pOpt->pVar) = TRUE;
    101                     break;
    102                 case OptSpec::STRING:
    103                     argNum ++;
    104                     if (argNum >= argc) {
    105                         fprintf(stderr, "value expected for \"%s\" option.\n",
    106 							    pOpt->name);
    107                         return FALSE;
    108                     }
    109                     *(const char **)(pOpt->pVar) = argv[argNum];
    110                     break;
    111                 case OptSpec::NUM:
    112                     argNum ++;
    113                     if (argNum >= argc) {
    114                         fprintf(stderr, "value expected for \"%s\" option.\n",
    115 							    pOpt->name);
    116                         return FALSE;
    117                     }
    118                     char *endp;
    119                     int i = strtol(argv[argNum], &endp, 0);
    120                     if (endp == argv[argNum]) {
    121                         fprintf(stderr,
    122 							    "integer value expected for \"%s\" option.\n",
    123 								pOpt->name);
    124                         return FALSE;
    125                     }
    126                     *(int *)(pOpt->pVar) = i;
    127                 }
    128                 break;
    129             }
    130         }
    131         if (pOpt->name == 0)
    132         {
    133             fprintf(stderr, "Unrecognized option \"%s\"\n", pArgName);
    134             return FALSE;
    135         }
    136     }
    137 	return TRUE;
    138 }
    139 
    140 /**
    141  * ICU string comparison
    142  */
    143 int strcmp()
    144 {
    145 	UChar source[100];
    146 	UChar target[100];
    147 	u_unescape(opt_source, source, 100);
    148 	u_unescape(opt_target, target, 100);
    149     UCollationResult result = ucol_strcoll(collator, source, -1, target, -1);
    150     if (result == UCOL_LESS) {
    151 		return -1;
    152 	}
    153     else if (result == UCOL_GREATER) {
    154 		return 1;
    155 	}
    156 	return 0;
    157 }
    158 
    159 /**
    160  * Creates a collator
    161  */
    162 UBool processCollator()
    163 {
    164 	// Set up an ICU collator
    165     UErrorCode status = U_ZERO_ERROR;
    166 	UChar rules[100];
    167 
    168     if (opt_rules != 0) {
    169 		u_unescape(opt_rules, rules, 100);
    170         collator = ucol_openRules(rules, -1, UCOL_OFF, UCOL_TERTIARY,
    171 			                  NULL, &status);
    172     }
    173     else {
    174         collator = ucol_open(opt_locale, &status);
    175     }
    176 	if (U_FAILURE(status)) {
    177         fprintf(stderr, "Collator creation failed.: %d\n", status);
    178         return FALSE;
    179     }
    180     if (status == U_USING_DEFAULT_WARNING) {
    181         fprintf(stderr, "Warning, U_USING_DEFAULT_WARNING for %s\n",
    182 			    opt_locale);
    183     }
    184     if (status == U_USING_FALLBACK_WARNING) {
    185         fprintf(stderr, "Warning, U_USING_FALLBACK_ERROR for %s\n",
    186 			    opt_locale);
    187     }
    188     if (opt_norm) {
    189         ucol_setAttribute(collator, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
    190     }
    191     if (opt_french) {
    192         ucol_setAttribute(collator, UCOL_FRENCH_COLLATION, UCOL_ON, &status);
    193     }
    194     if (opt_lower) {
    195         ucol_setAttribute(collator, UCOL_CASE_FIRST, UCOL_LOWER_FIRST,
    196 			              &status);
    197     }
    198     if (opt_upper) {
    199         ucol_setAttribute(collator, UCOL_CASE_FIRST, UCOL_UPPER_FIRST,
    200 			              &status);
    201     }
    202     if (opt_case) {
    203         ucol_setAttribute(collator, UCOL_CASE_LEVEL, UCOL_ON, &status);
    204     }
    205     if (opt_shifted) {
    206         ucol_setAttribute(collator, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED,
    207 			              &status);
    208     }
    209     if (opt_level != 0) {
    210         switch (opt_level) {
    211         case 1:
    212             ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_PRIMARY, &status);
    213             break;
    214         case 2:
    215             ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_SECONDARY,
    216 				              &status);
    217             break;
    218         case 3:
    219             ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_TERTIARY, &status);
    220             break;
    221         case 4:
    222             ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_QUATERNARY,
    223 				              &status);
    224             break;
    225         case 5:
    226             ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_IDENTICAL,
    227 				              &status);
    228             break;
    229         default:
    230             fprintf(stderr, "-level param must be between 1 and 5\n");
    231             return FALSE;
    232         }
    233     }
    234     if (U_FAILURE(status)) {
    235         fprintf(stderr, "Collator attribute setting failed.: %d\n", status);
    236         return FALSE;
    237     }
    238 	return TRUE;
    239 }
    240 
    241 /**
    242  * Main   --  process command line, read in and pre-process the test file,
    243  *            call other functions to do the actual tests.
    244  */
    245 int main(int argc, const char** argv)
    246 {
    247     if (processOptions(argc, argv, opts) != TRUE || opt_help) {
    248         printf(gHelpString);
    249         return -1;
    250     }
    251 
    252     if (processCollator() != TRUE) {
    253 		fprintf(stderr, "Error creating collator for comparison\n");
    254 		return -1;
    255 	}
    256 
    257 	fprintf(stdout, "Comparing source=%s and target=%s\n", opt_source,
    258 			opt_target);
    259 	int result = strcmp();
    260 	if (result == 0) {
    261 		fprintf(stdout, "source is equals to target\n");
    262 	}
    263 	else if (result < 0) {
    264 		fprintf(stdout, "source is less than target\n");
    265 	}
    266 	else {
    267 		fprintf(stdout, "source is greater than target\n");
    268 	}
    269 
    270 	ucol_close(collator);
    271 	return 0;
    272 }
    273