Home | History | Annotate | Download | only in cintltst
      1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /********************************************************************
      4  * COPYRIGHT:
      5  * Copyright (c) 1997-2016, International Business Machines Corporation and
      6  * others. All Rights Reserved.
      7  ********************************************************************/
      8 /********************************************************************************
      9 *
     10 * File CFRTST.C
     11 *
     12 * Modification History:
     13 *        Name                     Description
     14 *     Madhu Katragadda            Ported for C API
     15 *********************************************************************************/
     16 /**
     17  * CollationFrenchTest is a third level test class.  This tests the locale
     18  * specific primary, secondary and tertiary rules.  For example, the ignorable
     19  * character '-' in string "black-bird".  The en_US locale uses the default
     20  * collation rules as its sorting sequence.
     21  */
     22 
     23 #include <stdlib.h>
     24 
     25 #include "unicode/utypes.h"
     26 
     27 #if !UCONFIG_NO_COLLATION
     28 
     29 #include "unicode/ucol.h"
     30 #include "unicode/uloc.h"
     31 #include "cintltst.h"
     32 #include "ccolltst.h"
     33 #include "callcoll.h"
     34 #include "cfrtst.h"
     35 #include "cmemory.h"
     36 #include "unicode/ustring.h"
     37 #include "string.h"
     38 
     39 static  UCollator *myCollation;
     40 const static UChar testSourceCases[][MAX_TOKEN_LEN] =
     41 {
     42     {0x0061/*'a'*/, 0x0062/*'b'*/, 0x0063/*'c'*/, 0x0000},
     43     {0x0043/*'C'*/, 0x004f/*'O'*/, 0x0054/*'T'*/, 0x0045/*'E'*/, 0x0000},
     44     {0x0063/*'c'*/, 0x006f/*'o'*/, 0x002d/*'-'*/, 0x006f/*'o'*/, 0x0070/*'p'*/, 0x0000},
     45     {0x0070/*'p'*/, 0x00EA, 0x0063/*'c'*/, 0x0068/*'h'*/, 0x0065/*'e'*/, 0x0000},
     46     {0x0070/*'p'*/, 0x00EA, 0x0063/*'c'*/, 0x0068/*'h'*/, 0x0065/*'e'*/, 0x0072/*'r'*/, 0x0000},
     47     {0x0070/*'p'*/, 0x00E9, 0x0063/*'c'*/, 0x0068/*'h'*/, 0x0065/*'e'*/, 0x0072/*'r'*/, 0x0000},
     48     {0x0070/*'p'*/, 0x00E9, 0x0063/*'c'*/, 0x0068/*'h'*/, 0x0065/*'e'*/, 0x0072/*'r'*/, 0x0000},
     49     {0x0048/*'H'*/, 0x0065/*'e'*/, 0x006c/*'l'*/, 0x006c/*'l'*/, 0x006f/*'o'*/, 0x0000},
     50     {0x01f1, 0x0000},
     51     {0xfb00, 0x0000},
     52     {0x01fa, 0x0000},
     53     {0x0101, 0x0000}
     54 };
     55 
     56 const static UChar testTargetCases[][MAX_TOKEN_LEN] =
     57 {
     58     {0x0041/*'A'*/, 0x0042/*'B'*/, 0x0043/*'C'*/, 0x0000},
     59     {0x0063/*'c'*/, 0x00f4, 0x0074/*'t'*/, 0x0065/*'e'*/, 0x0000},
     60     {0x0043/*'C'*/, 0x004f/*'O'*/, 0x004f/*'O'*/, 0x0050/*'P'*/, 0x0000},
     61     {0x0070/*'p'*/, 0x00E9, 0x0063/*'c'*/, 0x0068/*'h'*/, 0x00E9, 0x0000},
     62     {0x0070/*'p'*/,  0x00E9, 0x0063/*'c'*/, 0x0068/*'h'*/, 0x00E9, 0x0000},
     63     {0x0070/*'p'*/, 0x00EA, 0x0063/*'c'*/, 0x0068/*'h'*/, 0x0065/*'e'*/, 0x0000},
     64     {0x0070/*'p'*/, 0x00EA, 0x0063/*'c'*/, 0x0068/*'h'*/, 0x0065/*'e'*/, 0x0072/*'r'*/, 0x0000},
     65     {0x0068/*'h'*/, 0x0065/*'e'*/, 0x006c/*'l'*/, 0x006c/*'l'*/, 0x004f/*'O'*/, 0x0000},
     66     {0x01ee, 0x0000},
     67     {0x25ca, 0x0000},
     68     {0x00e0, 0x0000},
     69     {0x01df, 0x0000}
     70 };
     71 
     72 const static UCollationResult results[] =
     73 {
     74     UCOL_LESS,
     75     UCOL_LESS,
     76     UCOL_LESS, /*UCOL_GREATER,*/
     77     UCOL_LESS,
     78     UCOL_GREATER,
     79     UCOL_GREATER,
     80     UCOL_LESS,
     81     UCOL_GREATER,
     82     UCOL_LESS, /*UCOL_GREATER,*/
     83     UCOL_GREATER,
     84     UCOL_LESS,
     85     UCOL_LESS
     86 };
     87 
     88 /* 0x0300 is grave, 0x0301 is acute*/
     89 /* the order of elements in this array must be different than the order in CollationEnglishTest*/
     90 const static UChar testAcute[][MAX_TOKEN_LEN] =
     91 {
     92 /*00*/    {0x0065/*'e'*/, 0x0065/*'e'*/,  0x0000},
     93 /*01*/    {0x0065/*'e'*/, 0x0301, 0x0065/*'e'*/,  0x0000},
     94 /*02*/    {0x0065/*'e'*/, 0x0300, 0x0301, 0x0065/*'e'*/,  0x0000},
     95 /*03*/    {0x0065/*'e'*/, 0x0300, 0x0065/*'e'*/,  0x0000},
     96 /*04*/    {0x0065/*'e'*/, 0x0301, 0x0300, 0x0065/*'e'*/,  0x0000},
     97 /*05*/    {0x0065/*'e'*/, 0x0065/*'e'*/, 0x0301, 0x0000},
     98 /*06*/    {0x0065/*'e'*/, 0x0301, 0x0065/*'e'*/, 0x0301, 0x0000},
     99 /*07*/    {0x0065/*'e'*/, 0x0300, 0x0301, 0x0065/*'e'*/, 0x0301, 0x0000},
    100 /*08*/    {0x0065/*'e'*/, 0x0300, 0x0065/*'e'*/, 0x0301, 0x0000},
    101 /*09*/    {0x0065/*'e'*/, 0x0301, 0x0300, 0x0065/*'e'*/, 0x0301, 0x0000},
    102 /*0a*/    {0x0065/*'e'*/, 0x0065/*'e'*/, 0x0300, 0x0301, 0x0000},
    103 /*0b*/    {0x0065/*'e'*/, 0x0301, 0x0065/*'e'*/, 0x0300, 0x0301, 0x0000},
    104 /*0c*/    {0x0065/*'e'*/, 0x0300, 0x0301, 0x0065/*'e'*/, 0x0300, 0x0301, 0x0000},
    105 /*0d*/    {0x0065/*'e'*/, 0x0300, 0x0065/*'e'*/, 0x0300, 0x0301, 0x0000},
    106 /*0e*/    {0x0065/*'e'*/, 0x0301, 0x0300, 0x0065/*'e'*/, 0x0300, 0x0301, 0x0000},
    107 /*0f*/    {0x0065/*'e'*/, 0x0065/*'e'*/, 0x0300, 0x0000},
    108 /*10*/    {0x0065/*'e'*/, 0x0301, 0x0065/*'e'*/, 0x0300, 0x0000},
    109 /*11*/    {0x0065/*'e'*/, 0x0300, 0x0301, 0x0065/*'e'*/, 0x0300, 0x0000},
    110 /*12*/    {0x0065/*'e'*/, 0x0300, 0x0065/*'e'*/, 0x0300, 0x0000},
    111 /*13*/    {0x0065/*'e'*/, 0x0301, 0x0300, 0x0065/*'e'*/, 0x0300, 0x0000},
    112 /*14*/    {0x0065/*'e'*/, 0x0065/*'e'*/, 0x0301, 0x0300, 0x0000},
    113 /*15*/    {0x0065/*'e'*/, 0x0301, 0x0065/*'e'*/, 0x0301, 0x0300, 0x0000},
    114 /*16*/    {0x0065/*'e'*/, 0x0300, 0x0301, 0x0065/*'e'*/, 0x0301, 0x0300, 0x0000},
    115 /*17*/    {0x0065/*'e'*/, 0x0300, 0x0065/*'e'*/, 0x0301, 0x0300, 0x0000},
    116 /*18*/    {0x0065/*'e'*/, 0x0301, 0x0300, 0x0065/*'e'*/, 0x0301, 0x0300, 0x0000}
    117 };
    118 
    119 const static UChar testBugs[][MAX_TOKEN_LEN] =
    120 {
    121     {0x0061/*'a'*/, 0x000},
    122     {0x0041/*'A'*/, 0x000},
    123     {0x0065/*'e'*/, 0x000},
    124     {0x0045/*'E'*/, 0x000},
    125     {0x00e9, 0x000},
    126     {0x00e8, 0x000},
    127     {0x00ea, 0x000},
    128     {0x00eb, 0x000},
    129     {0x0065/*'e'*/, 0x0061/*'a'*/, 0x000},
    130     {0x0078/*'x'*/, 0x000}
    131 };
    132 
    133 
    134 static void TestGetSortKey(void);
    135 
    136 
    137 void addFrenchCollTest(TestNode** root)
    138 {
    139     addTest(root, &TestSecondary, "tscoll/cfrtst/TestSecondary");
    140     addTest(root, &TestTertiary, "tscoll/cfrtst/TestTertiary");
    141     addTest(root, &TestExtra, "tscoll/cfrtst/TestExtra");
    142     addTest(root, &TestGetSortKey, "tscoll/cfrtst/TestGetSortKey");
    143 }
    144 
    145 
    146 static void TestTertiary( )
    147 {
    148 
    149     int32_t i;
    150     UErrorCode status = U_ZERO_ERROR;
    151     myCollation = ucol_open("fr_CA", &status);
    152     if(U_FAILURE(status) || !myCollation){
    153         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
    154         return;
    155     }
    156 
    157     ucol_setAttribute(myCollation, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
    158     if(U_FAILURE(status)){
    159         log_err("ERROR: in creation of rule based collator: %s\n", myErrorName(status));
    160         return;
    161     }
    162     log_verbose("Testing fr_CA Collation with Tertiary strength\n");
    163     ucol_setStrength(myCollation, UCOL_QUATERNARY);
    164     for (i = 0; i < 12 ; i++)
    165     {
    166         doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]);
    167     }
    168     ucol_close(myCollation);
    169 }
    170 
    171 static void TestSecondary()
    172 {
    173     int32_t i,j, testAcuteSize;
    174     UCollationResult expected=UCOL_EQUAL;
    175     UErrorCode status = U_ZERO_ERROR;
    176     myCollation = ucol_open("fr_CA", &status);
    177     if(U_FAILURE(status)){
    178         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
    179         return;
    180     }
    181     ucol_setAttribute(myCollation, UCOL_STRENGTH, UCOL_SECONDARY, &status);
    182     if(U_FAILURE(status)){
    183         log_err("ERROR: in creation of rule based collator: %s\n", myErrorName(status));
    184         return;
    185     }
    186     log_verbose("Testing fr_CA Collation with Secondary strength\n");
    187     /*test acute and grave ordering (compare to french collation)*/
    188     testAcuteSize = UPRV_LENGTHOF(testAcute);
    189     for (i = 0; i < testAcuteSize; i++)
    190     {
    191         for (j = 0; j < testAcuteSize; j++)
    192         {
    193             if (i <  j) expected = UCOL_LESS;
    194             if (i == j) expected = UCOL_EQUAL;
    195             if (i >  j) expected = UCOL_GREATER;
    196             doTest(myCollation, testAcute[i], testAcute[j], expected );
    197         }
    198     }
    199     ucol_close(myCollation);
    200 }
    201 
    202 static void TestExtra()
    203 {
    204     int32_t i, j;
    205     UErrorCode status = U_ZERO_ERROR;
    206     myCollation = ucol_open("fr_CA", &status);
    207     if(U_FAILURE(status)){
    208         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
    209         return;
    210     }
    211     log_verbose("Testing fr_CA Collation extra with secondary strength\n");
    212     ucol_setStrength(myCollation, UCOL_TERTIARY);
    213     for (i = 0; i < 9 ; i++)
    214     {
    215         for (j = i + 1; j < 10; j += 1)
    216         {
    217             doTest(myCollation, testBugs[i], testBugs[j], UCOL_LESS);
    218         }
    219     }
    220     ucol_close(myCollation);
    221 }
    222 
    223 static void TestGetSortKey() {
    224     /* This is meant to test a buffer reallocation crash while using
    225     French secondary sorting with a large buffer.
    226     The fact that Japanese characters are used is irrelevant. */
    227     static const UChar pucUTF16[] = {
    228         0x3049,0x30b9,0x3088,0xfffd,0xfffd,0x308f,0xfffd,0x3042,
    229         0xfffd,0xfffd,0x305e,0xfffd,0x30b6,0x30bb,0x305b,0x30b1,
    230         0x3050,0x30af,0x304e,0x30bd,0xfffd,0x30c6,0xfffd,0xfffd,
    231         0x30e1,0xfffd,0xfffd,0x30d9,0xfffd,0x3092,0x3075,0x304a,
    232         0x3074,0x3070,0x30f5,0x30c4,0x306e,0x30df,0x3053,0xfffd,
    233         0x30a6,0x30b6,0x30e0,0xfffd,0x30bc,0x30ef,0x3087,0x30cc,
    234         0x305f,0x30de,0xfffd,0x3090,0x3063,0x30dc,0x30b6,0x30b9,
    235         0x30d2,0x3072,0x3061,0xfffd,0xfffd,0xfffd,0x307b,0x3092,
    236         0x30a5,0x30a9,0x30b1,0x30e7,0xfffd,0xfffd,0xfffd,0xfffd,
    237         0xfffd,0x305e,0xfffd,0x30c7,0x30ae,0x305b,0x308b,0x30c0,
    238         0x30f5,0xfffd,0xfffd,0xfffd,0x307d,0x304e,0xfffd,0xfffd,
    239         0x30c0,0x30c8,0x306f,0x307a,0x30dd,0x30e4,0x3084,0xfffd,
    240         0x308c,0x30f1,0xfffd,0x30c6,0xfffd,0x307a,0xfffd,0x3052,
    241         0x3056,0x305d,0x30b7,0xfffd,0x305b,0x30b0,0x30b9,0xfffd,
    242         0x30b2,0x306d,0x3044,0xfffd,0x3073,0xfffd,0x30be,0x30cf,
    243         0x3080,0xfffd,0x30a8,0x30f5,0x30a5,0x30c7,0x307c,0xfffd,
    244         0x30d1,0x305f,0x30b2,0xfffd,0x3053,0x30ca,0xfffd,0x30dd,
    245         0x3058,0x30c0,0x305d,0x30e1,0xfffd,0x30bb,0x305f,0x30d1,
    246         0x30f2,0x3058,0x3086,0x30ce,0x30db,0x30cb,0x30e9,0xfffd,
    247         0x308c,0xfffd,0xfffd,0x30af,0x30c4,0x3076,0x304c,0x30f5,
    248         0x30e8,0x308c,0xfffd,0x30e2,0x3073,0x30a3,0x304e,0x30ea,
    249         0xfffd,0x304f,0xfffd,0x306c,0x3044,0xfffd,0xfffd,0x30c9,
    250         0xfffd,0x30f5,0xfffd,0xfffd,0xfffd,0x30eb,0x30a8,0xfffd,
    251         0x306d,0x307d,0x30d8,0x3069,0xfffd,0xfffd,0x3086,0x30a9,
    252         0xfffd,0x3076,0x30e9,0x30cc,0x3074,0x30e0,0xfffd,0xfffd,
    253         0xfffd,0x30f0,0x3086,0x30ac,0x3076,0x3068,0x30c7,0xfffd,
    254         0x30b7,0x30d2,0x3048,0x308e,0x30e8,0x30d9,0x30ce,0x30d0,
    255         0x308b,0x30ee,0x30e6,0x3079,0x30f3,0x30af,0xfffd,0x3079,
    256         0xfffd,0xfffd,0x30ca,0x30bf,0xfffd,0x30b5,0xfffd,0xfffd,
    257         0x3093,0xfffd,0x30ba,0xfffd,0x3076,0x3047,0x304a,0xfffd,
    258         0xfffd,0x3086,0xfffd,0x3081,0xfffd,0x30f6,0x3066,0xfffd,
    259         0xfffd,0x30b6,0x30ef,0x30e2,0x30bf,0xfffd,0x3053,0x304a,
    260         0xfffd,0xfffd,0x304a,0x30e8,0xfffd,0x30e2,0xfffd,0xfffd,
    261         0x305c,0x3081,0x30c6,0xfffd,0x3091,0x3046,0x306a,0x3059,
    262         0xfffd,0xfffd,0x30dd,0x30d1,0x308a,0x30ee,0xfffd,0xfffd,
    263         0x308a,0x3042,0x30da,0xfffd,0x3064,0x30ef,0x305c,0x306b,
    264         0xfffd,0x30ca,0x3085,0x3067,0x30ea,0x30c2,0x30c8,0xfffd,
    265         0x30f5,0xfffd,0xfffd,0xfffd,0x30ca,0xfffd,0x3050,0x30f1,
    266         0x3050,0x3053,0x3072,0xfffd,0xfffd,0xfffd,0x3074,0xfffd,
    267         0x304b,0x30dd,0x306d,0xfffd,0x3049,0x30a1,0x30cc,0x30de,
    268         0x30ae,0x307b,0x308a,0xfffd,0x3065,0xfffd,0xfffd,0x30c0,
    269         0xfffd,0x3048,0x30dc,0x304f,0x3085,0x3059,0x304b,0x30d3,
    270         0x30eb,0x30a4,0x3073,0xfffd,0x30ba,0x308f,0x30a7,0x30c3,
    271         0x3074,0x30cf,0x306c,0x3053,0x30c0,0xfffd,0x3066,0xfffd,
    272         0x308f,0xfffd,0x30b5,0xfffd,0x3092,0x30c4,0xfffd,0x30d6,
    273         0x3056,0x30ad,0x30d2,0x30ba,0xfffd,0x30e6,0x304c,0x3088,
    274         0x30b6,0x3048,0x3077,0x30d1,0xfffd,0x3050,0xfffd,0x3042,
    275         0xfffd,0xfffd,0x308f,0xfffd,0x30c1,0xfffd,0x3074,0x3061,
    276         0x3056,0x30e5,0xfffd,0xfffd,0x3057,0xfffd,0xfffd,0xfffd,
    277         0xfffd,0x30bd,0x30b3,0x30ee,0xfffd,0x30f2,0x3084,0x3050,
    278         0xfffd,0x30e7,0xfffd,0xfffd,0x3060,0x3049,0x30f2,0x30ad,
    279         0x30bf,0x30f1,0x30a2,0xfffd,0x30af,0xfffd,0x3060,0x30a1,
    280         0x30e9,0x30c3,0xfffd,0x3072,0x3093,0x3070,0xfffd,0x308f,
    281         0x3060,0xfffd,0x3067,0x306f,0x3082,0x308b,0x3051,0xfffd,
    282         0x3058,0xfffd,0xfffd,0x30a8,0x3051,0x3054,0x30ad,0x30f0,
    283         0x3053,0xfffd,0x30e1,0x30d7,0x308d,0x307f,0x30be,0x30b0,
    284         0xfffd,0x30db,0xfffd,0x30d1,0xfffd,0x3054,0x30a5,0xfffd,
    285         0x306a,0xfffd,0x305c,0xfffd,0x3052,0x3088,0xfffd,0x306e,
    286         0xfffd,0x30a9,0x30a1,0x30b4,0x3083,0x30bd,0xfffd,0xfffd,
    287         0x306a,0x3070,0x30cd,0xfffd,0x3072,0x30ed,0x30c6,0x30be,
    288         0x30c4,0x305e,0x30b3,0x30e1,0x308a,0xfffd,0x305b,0xfffd,
    289         0x3042,0x3088,0xfffd,0x304c,0xfffd,0x3089,0x3071,0xfffd,
    290         0xfffd,0x30c6,0x3062,0x3079,0xfffd,0x304b,0x304a,0xfffd,
    291         0x30ad,0x3045,0x3045,0x3087,0xfffd,0x306a,0x308b,0x0000,
    292         0x30bd,0x3065,0x30b8,0x3086,0x30d3,0x3076,0xfffd,0xfffd,
    293         0x308f,0x3053,0x307c,0x3053,0x3084,0x30ae,0x30c4,0x3045,
    294         0x30a8,0x30d0,0x30e1,0x308c,0x30e6,0x30b7,0xfffd,0xfffd,
    295         0xfffd,0x3046,0x305f,0xfffd,0x3086,0x30ab,0xfffd,0xfffd,
    296         0x30c8,0xfffd,0x30a1,0x3052,0x3059,0xfffd,0x30a4,0xfffd,
    297         0xfffd,0x308c,0x3085,0x30ab,0x30b5,0x3091,0x30bf,0x30e3,
    298         0xfffd,0xfffd,0x3087,0xfffd,0x30f6,0x3051,0x30bd,0x3092,
    299         0x3063,0xfffd,0x30a9,0x3063,0x306e,0xfffd,0xfffd,0xfffd,
    300         0x306c,0xfffd,0x307e,0x30ad,0x3077,0x30c2,0x30e9,0x30d5,
    301         0xfffd,0xfffd,0x30c6,0x305c,0xfffd,0xfffd,0x3089,0xfffd,
    302         0x3048,0x30cb,0x308c,0xfffd,0xfffd,0x3044,0xfffd,0x3080,
    303         0x3063,0x3079,0xfffd,0x308a,0x30cb,0x3042,0x3057,0xfffd,
    304         0x307c,0x30c1,0x30a8,0x30cf,0xfffd,0x3083,0xfffd,0xfffd,
    305         0x306c,0xfffd,0x305e,0x3092,0xfffd,0x30dc,0x30b0,0x3081,
    306         0x30e3,0x30f0,0x304e,0x30cc,0x308e,0x30c4,0x30ad
    307     };
    308 
    309     UErrorCode status = U_ZERO_ERROR;
    310     UCollator *pCollator;
    311     int32_t lenActualSortKey;
    312     uint8_t pucSortKey[4096];
    313     static const int32_t LENSORTKEY = (int32_t)sizeof(pucSortKey);
    314 
    315     ucol_prepareShortStringOpen("LFR_AN_CX_EX_FO_HX_NX_S3", 0, NULL, &status);
    316 
    317     pCollator = ucol_openFromShortString("LFR_AN_CX_EX_FO_HX_NX_S3", 0, NULL, &status);
    318 
    319     if (U_FAILURE(status)) {
    320         log_data_err("error opening collator -> %s. (Are you missing data?)\n", u_errorName(status));
    321         return;
    322     }
    323 
    324     lenActualSortKey = ucol_getSortKey(pCollator,
    325         (const UChar *)pucUTF16,
    326         UPRV_LENGTHOF(pucUTF16),
    327         pucSortKey,
    328         LENSORTKEY);
    329 
    330     if (lenActualSortKey > LENSORTKEY) {
    331         log_err("sort key too big for original buffer. Got: %d Expected: %d\n", lenActualSortKey, LENSORTKEY);
    332         return;
    333     }
    334     /* If the test didn't crash, then the test succeeded. */
    335     ucol_close(pCollator);
    336 }
    337 
    338 #endif /* #if !UCONFIG_NO_COLLATION */
    339