Home | History | Annotate | Download | only in intltest
      1 /*
      2 *******************************************************************************
      3 *
      4 *   Copyright (C) 2003-2011, International Business Machines
      5 *   Corporation and others.  All Rights Reserved.
      6 *
      7 *******************************************************************************
      8 *   file name:  testidn.cpp
      9 *   encoding:   US-ASCII
     10 *   tab size:   8 (not used)
     11 *   indentation:4
     12 *
     13 *   created on: 2003-02-06
     14 *   created by: Ram Viswanadha
     15 *
     16 *   This program reads the rfc3454_*.txt files,
     17 *   parses them, and extracts the data for Nameprep conformance.
     18 *   It then preprocesses it and writes a binary file for efficient use
     19 *   in various IDNA conversion processes.
     20 */
     21 
     22 #include "unicode/utypes.h"
     23 
     24 #if !UCONFIG_NO_IDNA && !UCONFIG_NO_TRANSLITERATION
     25 
     26 #define USPREP_TYPE_NAMES_ARRAY
     27 
     28 #include "unicode/uchar.h"
     29 #include "unicode/putil.h"
     30 #include "cmemory.h"
     31 #include "cstring.h"
     32 #include "unicode/udata.h"
     33 #include "unicode/utf16.h"
     34 #include "unewdata.h"
     35 #include "uoptions.h"
     36 #include "uparse.h"
     37 #include "utrie.h"
     38 #include "umutex.h"
     39 #include "sprpimpl.h"
     40 #include "testidna.h"
     41 #include "punyref.h"
     42 #include <stdlib.h>
     43 
     44 UBool beVerbose=FALSE, haveCopyright=TRUE;
     45 
     46 /* prototypes --------------------------------------------------------------- */
     47 
     48 
     49 static void
     50 parseMappings(const char *filename, UBool reportError,TestIDNA& test, UErrorCode *pErrorCode);
     51 
     52 static void
     53 compareMapping(uint32_t codepoint, uint32_t* mapping, int32_t mapLength,
     54                UStringPrepType option);
     55 
     56 static void
     57 compareFlagsForRange(uint32_t start, uint32_t end,UStringPrepType option);
     58 
     59 static void
     60 testAllCodepoints(TestIDNA& test);
     61 
     62 static TestIDNA* pTestIDNA =NULL;
     63 
     64 static const char* fileNames[] = {
     65                                     "rfc3491.txt"
     66                                  };
     67 static const UTrie *idnTrie              = NULL;
     68 static const int32_t *indexes            = NULL;
     69 static const uint16_t *mappingData       = NULL;
     70 /* -------------------------------------------------------------------------- */
     71 
     72 /* file definitions */
     73 #define DATA_TYPE "icu"
     74 
     75 #define SPREP_DIR "sprep"
     76 
     77 extern int
     78 testData(TestIDNA& test) {
     79     char *basename=NULL;
     80     UErrorCode errorCode=U_ZERO_ERROR;
     81     char *saveBasename =NULL;
     82 
     83     LocalUStringPrepProfilePointer profile(usprep_openByType(USPREP_RFC3491_NAMEPREP, &errorCode));
     84     if(U_FAILURE(errorCode)){
     85         test.errcheckln(errorCode, "Failed to load IDNA data file. " + UnicodeString(u_errorName(errorCode)));
     86         return errorCode;
     87     }
     88 
     89     char* filename = (char*) malloc(strlen(IntlTest::pathToDataDirectory())*1024);
     90     //TODO get the srcDir dynamically
     91     const char *srcDir=IntlTest::pathToDataDirectory();
     92 
     93     idnTrie     = &profile->sprepTrie;
     94     indexes     = profile->indexes;
     95     mappingData = profile->mappingData;
     96 
     97     //initialize
     98     pTestIDNA = &test;
     99 
    100     /* prepare the filename beginning with the source dir */
    101     if(uprv_strchr(srcDir,U_FILE_SEP_CHAR) == NULL){
    102         filename[0] = 0x2E;
    103         filename[1] = U_FILE_SEP_CHAR;
    104         uprv_strcpy(filename+2,srcDir);
    105     }else{
    106         uprv_strcpy(filename, srcDir);
    107     }
    108     basename=filename+uprv_strlen(filename);
    109     if(basename>filename && *(basename-1)!=U_FILE_SEP_CHAR) {
    110         *basename++=U_FILE_SEP_CHAR;
    111     }
    112 
    113     /* process unassigned */
    114     basename=filename+uprv_strlen(filename);
    115     if(basename>filename && *(basename-1)!=U_FILE_SEP_CHAR) {
    116         *basename++=U_FILE_SEP_CHAR;
    117     }
    118 
    119     /* first copy misc directory */
    120     saveBasename = basename;
    121     uprv_strcpy(basename,SPREP_DIR);
    122     basename = basename + uprv_strlen(SPREP_DIR);
    123     *basename++=U_FILE_SEP_CHAR;
    124 
    125     /* process unassigned */
    126     uprv_strcpy(basename,fileNames[0]);
    127     parseMappings(filename,TRUE, test,&errorCode);
    128     if(U_FAILURE(errorCode)) {
    129         test.errln( "Could not open file %s for reading \n", filename);
    130         return errorCode;
    131     }
    132 
    133     testAllCodepoints(test);
    134 
    135     pTestIDNA = NULL;
    136     free(filename);
    137     return errorCode;
    138 }
    139 U_CDECL_BEGIN
    140 
    141 static void U_CALLCONV
    142 strprepProfileLineFn(void * /*context*/,
    143               char *fields[][2], int32_t fieldCount,
    144               UErrorCode *pErrorCode) {
    145     uint32_t mapping[40];
    146     char *end, *map;
    147     uint32_t code;
    148     int32_t length;
    149    /*UBool* mapWithNorm = (UBool*) context;*/
    150     const char* typeName;
    151     uint32_t rangeStart=0,rangeEnd =0;
    152     const char *s;
    153 
    154     s = u_skipWhitespace(fields[0][0]);
    155     if (*s == '@') {
    156         /* a special directive introduced in 4.2 */
    157         return;
    158     }
    159 
    160     if(fieldCount != 3){
    161         *pErrorCode = U_INVALID_FORMAT_ERROR;
    162         return;
    163     }
    164 
    165     typeName = fields[2][0];
    166     map = fields[1][0];
    167 
    168     if(uprv_strstr(typeName, usprepTypeNames[USPREP_UNASSIGNED])!=NULL){
    169 
    170         u_parseCodePointRange(s, &rangeStart,&rangeEnd, pErrorCode);
    171 
    172         /* store the range */
    173         compareFlagsForRange(rangeStart,rangeEnd,USPREP_UNASSIGNED);
    174 
    175     }else if(uprv_strstr(typeName, usprepTypeNames[USPREP_PROHIBITED])!=NULL){
    176 
    177         u_parseCodePointRange(s, &rangeStart,&rangeEnd, pErrorCode);
    178 
    179         /* store the range */
    180         compareFlagsForRange(rangeStart,rangeEnd,USPREP_PROHIBITED);
    181 
    182     }else if(uprv_strstr(typeName, usprepTypeNames[USPREP_MAP])!=NULL){
    183         /* get the character code, field 0 */
    184         code=(uint32_t)uprv_strtoul(s, &end, 16);
    185 
    186         /* parse the mapping string */
    187         length=u_parseCodePoints(map, mapping, sizeof(mapping)/4, pErrorCode);
    188 
    189         /* store the mapping */
    190         compareMapping(code,mapping, length,USPREP_MAP);
    191 
    192     }else{
    193         *pErrorCode = U_INVALID_FORMAT_ERROR;
    194     }
    195 
    196 }
    197 
    198 U_CDECL_END
    199 
    200 static void
    201 parseMappings(const char *filename,UBool reportError, TestIDNA& test, UErrorCode *pErrorCode) {
    202     char *fields[3][2];
    203 
    204     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
    205         return;
    206     }
    207 
    208     u_parseDelimitedFile(filename, ';', fields, 3, strprepProfileLineFn, (void*)filename, pErrorCode);
    209 
    210     //fprintf(stdout,"Number of code points that have mappings with length >1 : %i\n",len);
    211 
    212     if(U_FAILURE(*pErrorCode) && (reportError || *pErrorCode!=U_FILE_ACCESS_ERROR)) {
    213         test.errln( "testidn error: u_parseDelimitedFile(\"%s\") failed - %s\n", filename, u_errorName(*pErrorCode));
    214     }
    215 }
    216 
    217 
    218 static inline UStringPrepType
    219 getValues(uint32_t result, int32_t& value, UBool& isIndex){
    220 
    221     UStringPrepType type;
    222 
    223     if(result == 0){
    224         /*
    225          * Initial value stored in the mapping table
    226          * just return USPREP_TYPE_LIMIT .. so that
    227          * the source codepoint is copied to the destination
    228          */
    229         type = USPREP_TYPE_LIMIT;
    230         isIndex =FALSE;
    231         value = 0;
    232     }else if(result >= _SPREP_TYPE_THRESHOLD){
    233         type = (UStringPrepType) (result - _SPREP_TYPE_THRESHOLD);
    234         isIndex =FALSE;
    235         value = 0;
    236     }else{
    237         /* get the state */
    238         type = USPREP_MAP;
    239         /* ascertain if the value is index or delta */
    240         if(result & 0x02){
    241             isIndex = TRUE;
    242             value = result  >> 2; //mask off the lower 2 bits and shift
    243 
    244         }else{
    245             isIndex = FALSE;
    246             value = (int16_t)result;
    247             value =  (value >> 2);
    248 
    249         }
    250         if((result>>2) == _SPREP_MAX_INDEX_VALUE){
    251             type = USPREP_DELETE;
    252             isIndex =FALSE;
    253             value = 0;
    254         }
    255     }
    256     return type;
    257 }
    258 
    259 
    260 
    261 static void
    262 testAllCodepoints(TestIDNA& test){
    263     /*
    264     {
    265         UChar str[19] = {
    266                             0xC138, 0xACC4, 0xC758, 0xBAA8, 0xB4E0, 0xC0AC, 0xB78C, 0xB4E4, 0xC774,
    267                             0x070F,//prohibited
    268                             0xD55C, 0xAD6D, 0xC5B4, 0xB97C, 0xC774, 0xD574, 0xD55C, 0xB2E4, 0xBA74
    269                         };
    270         uint32_t in[19] = {0};
    271         UErrorCode status = U_ZERO_ERROR;
    272         int32_t inLength=0, outLength=100;
    273         char output[100] = {0};
    274         punycode_status error;
    275         u_strToUTF32((UChar32*)in,19,&inLength,str,19,&status);
    276 
    277         error= punycode_encode(inLength, in, NULL, (uint32_t*)&outLength, output);
    278         printf(output);
    279 
    280     }
    281     */
    282 
    283     uint32_t i = 0;
    284     int32_t unassigned      = 0;
    285     int32_t prohibited      = 0;
    286     int32_t mappedWithNorm  = 0;
    287     int32_t mapped          = 0;
    288     int32_t noValueInTrie   = 0;
    289 
    290     UStringPrepType type;
    291     int32_t value;
    292     UBool isIndex = FALSE;
    293 
    294     for(i=0;i<=0x10FFFF;i++){
    295         uint32_t result = 0;
    296         UTRIE_GET16(idnTrie,i, result);
    297         type = getValues(result,value, isIndex);
    298         if(type != USPREP_TYPE_LIMIT ){
    299             if(type == USPREP_UNASSIGNED){
    300                 unassigned++;
    301             }
    302             if(type == USPREP_PROHIBITED){
    303                 prohibited++;
    304             }
    305             if(type == USPREP_MAP){
    306                 mapped++;
    307             }
    308         }else{
    309             noValueInTrie++;
    310             if(result > 0){
    311                 test.errln("The return value for 0x%06X is wrong. %i\n",i,result);
    312             }
    313         }
    314     }
    315 
    316     test.logln("Number of Unassinged code points : %i \n",unassigned);
    317     test.logln("Number of Prohibited code points : %i \n",prohibited);
    318     test.logln("Number of Mapped code points : %i \n",mapped);
    319     test.logln("Number of Mapped with NFKC code points : %i \n",mappedWithNorm);
    320     test.logln("Number of code points that have no value in Trie: %i \n",noValueInTrie);
    321 
    322 
    323 }
    324 
    325 static void
    326 compareMapping(uint32_t codepoint, uint32_t* mapping,int32_t mapLength,
    327                UStringPrepType type){
    328     uint32_t result = 0;
    329     UTRIE_GET16(idnTrie,codepoint, result);
    330 
    331     int32_t length=0;
    332     UBool isIndex;
    333     UStringPrepType retType;
    334     int32_t value, index=0, delta=0;
    335 
    336     retType = getValues(result,value,isIndex);
    337 
    338 
    339     if(type != retType && retType != USPREP_DELETE){
    340 
    341         pTestIDNA->errln( "Did not get the assigned type for codepoint 0x%08X. Expected: %i Got: %i\n",codepoint, USPREP_MAP, type);
    342 
    343     }
    344 
    345     if(isIndex){
    346         index = value;
    347         if(index >= indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START] &&
    348                  index < indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START]){
    349             length = 1;
    350         }else if(index >= indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START] &&
    351                  index < indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START]){
    352             length = 2;
    353         }else if(index >= indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] &&
    354                  index < indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START]){
    355             length = 3;
    356         }else{
    357             length = mappingData[index++];
    358         }
    359     }else{
    360         delta = value;
    361         length = (retType == USPREP_DELETE)? 0 :  1;
    362     }
    363 
    364     int32_t realLength =0;
    365     /* figure out the real length */
    366     for(int32_t j=0; j<mapLength; j++){
    367         if(mapping[j] > 0xFFFF){
    368             realLength +=2;
    369         }else{
    370             realLength++;
    371         }
    372     }
    373 
    374     if(realLength != length){
    375         pTestIDNA->errln( "Did not get the expected length. Expected: %i Got: %i\n", mapLength, length);
    376     }
    377 
    378     if(isIndex){
    379         for(int8_t i =0; i< mapLength; i++){
    380             if(mapping[i] <= 0xFFFF){
    381                 if(mappingData[index+i] != (uint16_t)mapping[i]){
    382                     pTestIDNA->errln("Did not get the expected result. Expected: 0x%04X Got: 0x%04X \n", mapping[i], mappingData[index+i]);
    383                 }
    384             }else{
    385                 UChar lead  = U16_LEAD(mapping[i]);
    386                 UChar trail = U16_TRAIL(mapping[i]);
    387                 if(mappingData[index+i] != lead ||
    388                     mappingData[index+i+1] != trail){
    389                     pTestIDNA->errln( "Did not get the expected result. Expected: 0x%04X 0x%04X  Got: 0x%04X 0x%04X", lead, trail, mappingData[index+i], mappingData[index+i+1]);
    390                 }
    391             }
    392         }
    393     }else{
    394         if(retType!=USPREP_DELETE && (codepoint-delta) != (uint16_t)mapping[0]){
    395             pTestIDNA->errln("Did not get the expected result. Expected: 0x%04X Got: 0x%04X \n", mapping[0],(codepoint-delta));
    396         }
    397     }
    398 
    399 }
    400 
    401 static void
    402 compareFlagsForRange(uint32_t start, uint32_t end,
    403                      UStringPrepType type){
    404 
    405     uint32_t result =0 ;
    406     UStringPrepType retType;
    407     UBool isIndex=FALSE;
    408     int32_t value=0;
    409 /*
    410     // supplementary code point
    411     UChar __lead16=U16_LEAD(0x2323E);
    412     int32_t __offset;
    413 
    414     // get data for lead surrogate
    415     (result)=_UTRIE_GET_RAW((&idnTrie), index, 0, (__lead16));
    416     __offset=(&idnTrie)->getFoldingOffset(result);
    417 
    418     // get the real data from the folded lead/trail units
    419     if(__offset>0) {
    420         (result)=_UTRIE_GET_RAW((&idnTrie), index, __offset, (0x2323E)&0x3ff);
    421     } else {
    422         (result)=(uint32_t)((&idnTrie)->initialValue);
    423     }
    424 
    425     UTRIE_GET16(&idnTrie,0x2323E, result);
    426 */
    427     while(start < end+1){
    428         UTRIE_GET16(idnTrie,start, result);
    429         retType = getValues(result,value,isIndex);
    430         if(result > _SPREP_TYPE_THRESHOLD){
    431             if(retType != type){
    432                 pTestIDNA->errln( "FAIL: Did not get the expected type for 0x%06X. Expected: %s Got: %s\n",start,usprepTypeNames[type], usprepTypeNames[retType]);
    433             }
    434         }else{
    435             if(type == USPREP_PROHIBITED && ((result & 0x01) != 0x01)){
    436                 pTestIDNA->errln( "FAIL: Did not get the expected type for 0x%06X. Expected: %s Got: %s\n",start,usprepTypeNames[type], usprepTypeNames[retType]);
    437             }
    438         }
    439 
    440         start++;
    441     }
    442 
    443 }
    444 
    445 
    446 #endif /* #if !UCONFIG_NO_IDNA */
    447 
    448 /*
    449  * Hey, Emacs, please set the following:
    450  *
    451  * Local Variables:
    452  * indent-tabs-mode: nil
    453  * End:
    454  *
    455  */
    456