Home | History | Annotate | Download | only in intltest
      1 /*
      2  *******************************************************************************
      3  *
      4  *   Copyright (C) 2005-2009, International Business Machines
      5  *   Corporation and others.  All Rights Reserved.
      6  *
      7  *******************************************************************************
      8  *
      9  *   created on: 2005jun15
     10  *   created by: Raymond Yang
     11  */
     12 
     13 #if !UCONFIG_NO_IDNA
     14 
     15 #include <stdio.h>
     16 #include <stdlib.h>
     17 #include <string.h>
     18 #include "unicode/utypes.h"
     19 #include "unicode/ucnv.h"
     20 #include "unicode/ustring.h"
     21 #include "unicode/uidna.h"
     22 
     23 #include "idnaconf.h"
     24 
     25 static const UChar C_TAG[] = {0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0}; // =====
     26 static const UChar C_NAMEZONE[] = {0x6E, 0x61, 0x6D, 0x65, 0x7A, 0x6F, 0x6E, 0x65, 0}; // namezone
     27 static const UChar C_NAMEBASE[] = {0x6E, 0x61, 0x6D, 0x65, 0x62, 0x61, 0x73, 0x65, 0}; // namebase
     28 static const UChar C_NAMEUTF8[] = {0x6E, 0x61, 0x6D, 0x65, 0x75, 0x74, 0x66, 0x38, 0}; // nameutf8
     29 
     30 static const UChar C_TYPE[] = {0x74, 0x79, 0x70, 0x65, 0}; // type
     31 static const UChar C_TOASCII[]  =  {0x74, 0x6F, 0x61, 0x73, 0x63, 0x69, 0x69, 0};       // toascii
     32 static const UChar C_TOUNICODE[] = {0x74, 0x6F, 0x75, 0x6E, 0x69, 0x63, 0x6F, 0x64, 0x65, 0}; // tounicode
     33 
     34 static const UChar C_PASSFAIL[] = {0x70, 0x61, 0x73, 0x73, 0x66, 0x61, 0x69, 0x6C, 0}; // passfail
     35 static const UChar C_PASS[] = {0x70, 0x61, 0x73, 0x73, 0}; // pass
     36 static const UChar C_FAIL[] = {0x66, 0x61, 0x69, 0x6C, 0}; // fail
     37 
     38 static const UChar C_DESC[] = {0x64, 0x65, 0x73, 0x63, 0}; // desc
     39 static const UChar C_USESTD3ASCIIRULES[] = {0x55, 0x73, 0x65, 0x53, 0x54, 0x44,
     40        0x33, 0x41, 0x53, 0x43, 0x49, 0x49, 0x52, 0x75, 0x6C, 0x65, 0x73, 0}; // UseSTD3ASCIIRules
     41 
     42 IdnaConfTest::IdnaConfTest(){
     43     base = NULL;
     44     len = 0;
     45     curOffset = 0;
     46 
     47     type = option = passfail = -1;
     48     namebase.setToBogus();
     49     namezone.setToBogus();
     50 }
     51 IdnaConfTest::~IdnaConfTest(){
     52     delete [] base;
     53 }
     54 
     55 #if !UCONFIG_NO_IDNA
     56 /* this function is modified from RBBITest::ReadAndConvertFile()
     57  *
     58  */
     59 UBool IdnaConfTest::ReadAndConvertFile(){
     60 
     61     char * source = NULL;
     62     size_t source_len;
     63 
     64     // read the test data file to memory
     65     FILE* f    = NULL;
     66     UErrorCode  status  = U_ZERO_ERROR;
     67 
     68     const char *path = IntlTest::getSourceTestData(status);
     69     if (U_FAILURE(status)) {
     70         errln("%s", u_errorName(status));
     71         return FALSE;
     72     }
     73 
     74     const char* name = "idna_conf.txt";     // test data file
     75     int t = strlen(path) + strlen(name) + 1;
     76     char* absolute_name = new char[t];
     77     strcpy(absolute_name, path);
     78     strcat(absolute_name, name);
     79     f = fopen(absolute_name, "rb");
     80     delete [] absolute_name;
     81 
     82     if (f == NULL){
     83         dataerrln("fopen error on %s", name);
     84         return FALSE;
     85     }
     86 
     87     fseek( f, 0, SEEK_END);
     88     if ((source_len = ftell(f)) <= 0){
     89         errln("Error reading test data file.");
     90         fclose(f);
     91         return FALSE;
     92     }
     93 
     94     source = new char[source_len];
     95     fseek(f, 0, SEEK_SET);
     96     if (fread(source, 1, source_len, f) != source_len) {
     97         errln("Error reading test data file.");
     98         delete [] source;
     99         fclose(f);
    100         return FALSE;
    101     }
    102     fclose(f);
    103 
    104     // convert the UTF-8 encoded stream to UTF-16 stream
    105     UConverter* conv = ucnv_open("utf-8", &status);
    106     int dest_len = ucnv_toUChars(conv,
    107                                 NULL,           //  dest,
    108                                 0,              //  destCapacity,
    109                                 source,
    110                                 source_len,
    111                                 &status);
    112     if (status == U_BUFFER_OVERFLOW_ERROR) {
    113         // Buffer Overflow is expected from the preflight operation.
    114         status = U_ZERO_ERROR;
    115         UChar * dest = NULL;
    116         dest = new UChar[ dest_len + 1];
    117         ucnv_toUChars(conv, dest, dest_len + 1, source, source_len, &status);
    118         // Do not know the "if possible" behavior of ucnv_toUChars()
    119         // Do it by ourself.
    120         dest[dest_len] = 0;
    121         len = dest_len;
    122         base = dest;
    123         delete [] source;
    124         ucnv_close(conv);
    125         return TRUE;    // The buffer will owned by caller.
    126     }
    127     errln("UConverter error: %s", u_errorName(status));
    128     delete [] source;
    129     ucnv_close(conv);
    130     return FALSE;
    131 }
    132 
    133 int IdnaConfTest::isNewlineMark(){
    134     static const UChar LF        = 0x0a;
    135     static const UChar CR        = 0x0d;
    136     UChar c = base[curOffset];
    137     // CR LF
    138     if ( c == CR && curOffset + 1 < len && base[curOffset + 1] == LF){
    139         return 2;
    140     }
    141 
    142     // CR or LF
    143     if ( c == CR || c == LF) {
    144         return 1;
    145     }
    146 
    147     return 0;
    148 }
    149 
    150 /* Read a logical line.
    151  *
    152  * All lines ending in a backslash (\) and immediately followed by a newline
    153  * character are joined with the next line in the source file forming logical
    154  * lines from the physical lines.
    155  *
    156  */
    157 UBool IdnaConfTest::ReadOneLine(UnicodeString& buf){
    158     if ( !(curOffset < len) ) return FALSE; // stream end
    159 
    160     static const UChar BACKSLASH = 0x5c;
    161     buf.remove();
    162     int t = 0;
    163     while (curOffset < len){
    164         if ((t = isNewlineMark())) {  // end of line
    165             curOffset += t;
    166             break;
    167         }
    168         UChar c = base[curOffset];
    169         if (c == BACKSLASH && curOffset < len -1){  // escaped new line mark
    170             if ((t = isNewlineMark())){
    171                 curOffset += 1 + t;  // BACKSLAH and NewlineMark
    172                 continue;
    173             }
    174         };
    175         buf.append(c);
    176         curOffset++;
    177     }
    178     return TRUE;
    179 }
    180 
    181 //
    182 //===============================================================
    183 //
    184 
    185 /* Explain <xxxxx> tag to a native value
    186  *
    187  * Since <xxxxx> is always larger than the native value,
    188  * the operation will replace the tag directly in the buffer,
    189  * and, of course, will shift tail elements.
    190  */
    191 void IdnaConfTest::ExplainCodePointTag(UnicodeString& buf){
    192     buf.append((UChar)0);    // add a terminal NULL
    193     UChar* bufBase = buf.getBuffer(buf.length());
    194     UChar* p = bufBase;
    195     while (*p != 0){
    196         if ( *p != 0x3C){    // <
    197             *bufBase++ = *p++;
    198         } else {
    199             p++;    // skip <
    200             UChar32 cp = 0;
    201             for ( ;*p != 0x3E; p++){   // >
    202                 if (0x30 <= *p && *p <= 0x39){        // 0-9
    203                     cp = (cp * 16) + (*p - 0x30);
    204                 } else if (0x61 <= *p && *p <= 0x66){ // a-f
    205                     cp = (cp * 16) + (*p - 0x61) + 10;
    206                 } else if (0x41 <= *p && *p <= 0x46) {// A-F
    207                     cp = (cp * 16) + (*p - 0x41) + 10;
    208                 }
    209                 // no else. hope everything is good.
    210             }
    211             p++;    // skip >
    212             if (U_IS_BMP(cp)){
    213                 *bufBase++ = cp;
    214             } else {
    215                 *bufBase++ = U16_LEAD(cp);
    216                 *bufBase++ = U16_TRAIL(cp);
    217             }
    218         }
    219     }
    220     *bufBase = 0;  // close our buffer
    221     buf.releaseBuffer();
    222 }
    223 
    224 void IdnaConfTest::Call(){
    225     if (type == -1 || option == -1 || passfail == -1 || namebase.isBogus() || namezone.isBogus()){
    226         errln("Incomplete record");
    227     } else {
    228         UErrorCode status = U_ZERO_ERROR;
    229         UChar result[200] = {0,};   // simple life
    230         const UChar *p = namebase.getTerminatedBuffer();
    231         const int p_len = namebase.length();
    232 
    233         if (type == 0 && option == 0){
    234             uidna_IDNToASCII(p, p_len, result, 200, UIDNA_USE_STD3_RULES, NULL, &status);
    235         } else if (type == 0 && option == 1){
    236             uidna_IDNToASCII(p, p_len, result, 200, UIDNA_ALLOW_UNASSIGNED, NULL, &status);
    237         } else if (type == 1 && option == 0){
    238             uidna_IDNToUnicode(p, p_len, result, 200, UIDNA_USE_STD3_RULES, NULL, &status);
    239         } else if (type == 1 && option == 1){
    240             uidna_IDNToUnicode(p, p_len, result, 200, UIDNA_ALLOW_UNASSIGNED, NULL, &status);
    241         }
    242         if (passfail == 0){
    243             if (U_FAILURE(status)){
    244                 id.append(" should pass, but failed. - ");
    245                 id.append(u_errorName(status));
    246                 errcheckln(status, id);
    247             } else{
    248                 if (namezone.compare(result, -1) == 0){
    249                     // expected
    250                     logln(UnicodeString("namebase: ") + prettify(namebase) + UnicodeString(" result: ") + prettify(result));
    251                 } else {
    252                     id.append(" no error, but result is not as expected.");
    253                     errln(id);
    254                 }
    255             }
    256         } else if (passfail == 1){
    257             if (U_FAILURE(status)){
    258                 // expected
    259                 // TODO: Uncomment this when U_IDNA_ZERO_LENGTH_LABEL_ERROR is added to u_errorName
    260                 //logln("Got the expected error: " + UnicodeString(u_errorName(status)));
    261             } else{
    262                 if (namebase.compare(result, -1) == 0){
    263                     // garbage in -> garbage out
    264                     logln(UnicodeString("ICU will not recognize malformed ACE-Prefixes or incorrect ACE-Prefixes. ") + UnicodeString("namebase: ") + prettify(namebase) + UnicodeString(" result: ") + prettify(result));
    265                 } else {
    266                     id.append(" should fail, but not failed. ");
    267                     id.append(u_errorName(status));
    268                     errln(id);
    269                 }
    270             }
    271         }
    272     }
    273     type = option = passfail = -1;
    274     namebase.setToBogus();
    275     namezone.setToBogus();
    276     id.remove();
    277     return;
    278 }
    279 
    280 void IdnaConfTest::Test(void){
    281     if (!ReadAndConvertFile())return;
    282 
    283     UnicodeString s;
    284     UnicodeString key;
    285     UnicodeString value;
    286 
    287     // skip everything before the first "=====" and "=====" itself
    288     do {
    289         if (!ReadOneLine(s)) {
    290             errln("End of file prematurely found");
    291             break;
    292         }
    293     }
    294     while (s.compare(C_TAG, -1) != 0);   //"====="
    295 
    296     while(ReadOneLine(s)){
    297         s.trim();
    298         key.remove();
    299         value.remove();
    300         if (s.compare(C_TAG, -1) == 0){   //"====="
    301             Call();
    302        } else {
    303             // explain      key:value
    304             int p = s.indexOf((UChar)0x3A);    // :
    305             key.setTo(s,0,p).trim();
    306             value.setTo(s,p+1).trim();
    307             if (key.compare(C_TYPE, -1) == 0){
    308                 if (value.compare(C_TOASCII, -1) == 0) {
    309                     type = 0;
    310                 } else if (value.compare(C_TOUNICODE, -1) == 0){
    311                     type = 1;
    312                 }
    313             } else if (key.compare(C_PASSFAIL, -1) == 0){
    314                 if (value.compare(C_PASS, -1) == 0){
    315                     passfail = 0;
    316                 } else if (value.compare(C_FAIL, -1) == 0){
    317                     passfail = 1;
    318                 }
    319             } else if (key.compare(C_DESC, -1) == 0){
    320                 if (value.indexOf(C_USESTD3ASCIIRULES, u_strlen(C_USESTD3ASCIIRULES), 0) == -1){
    321                     option = 1; // not found
    322                 } else {
    323                     option = 0;
    324                 }
    325                 id.setTo(value, 0, value.indexOf((UChar)0x20));    // space
    326             } else if (key.compare(C_NAMEZONE, -1) == 0){
    327                 ExplainCodePointTag(value);
    328                 namezone.setTo(value);
    329             } else if (key.compare(C_NAMEBASE, -1) == 0){
    330                 ExplainCodePointTag(value);
    331                 namebase.setTo(value);
    332             }
    333             // just skip other lines
    334         }
    335     }
    336 
    337     Call(); // for last record
    338 }
    339 #else
    340 void IdnaConfTest::Test(void)
    341 {
    342   // test nothing...
    343 }
    344 #endif
    345 
    346 void IdnaConfTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/){
    347     switch (index) {
    348         TESTCASE(0,Test);
    349         default: name = ""; break;
    350     }
    351 }
    352 
    353 #endif
    354