Home | History | Annotate | Download | only in intltest
      1 /*
      2  *******************************************************************************
      3  *
      4  *   Copyright (C) 2005-2014, International Business Machines
      5  *   Corporation and others.  All Rights Reserved.
      6  *
      7  *******************************************************************************
      8  *
      9  *   created on: 2005jun15
     10  *   created by: Raymond Yang
     11  */
     12 
     13 #if !UCONFIG_NO_IDNA
     14 
     15 #include <stdio.h>
     16 #include <stdlib.h>
     17 #include <string.h>
     18 #include "unicode/utypes.h"
     19 #include "unicode/ucnv.h"
     20 #include "unicode/ustring.h"
     21 #include "unicode/uidna.h"
     22 
     23 #include "idnaconf.h"
     24 
     25 static const UChar C_TAG[] = {0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0}; // =====
     26 static const UChar C_NAMEZONE[] = {0x6E, 0x61, 0x6D, 0x65, 0x7A, 0x6F, 0x6E, 0x65, 0}; // namezone
     27 static const UChar C_NAMEBASE[] = {0x6E, 0x61, 0x6D, 0x65, 0x62, 0x61, 0x73, 0x65, 0}; // namebase
     28 
     29 static const UChar C_TYPE[] = {0x74, 0x79, 0x70, 0x65, 0}; // type
     30 static const UChar C_TOASCII[]  =  {0x74, 0x6F, 0x61, 0x73, 0x63, 0x69, 0x69, 0};       // toascii
     31 static const UChar C_TOUNICODE[] = {0x74, 0x6F, 0x75, 0x6E, 0x69, 0x63, 0x6F, 0x64, 0x65, 0}; // tounicode
     32 
     33 static const UChar C_PASSFAIL[] = {0x70, 0x61, 0x73, 0x73, 0x66, 0x61, 0x69, 0x6C, 0}; // passfail
     34 static const UChar C_PASS[] = {0x70, 0x61, 0x73, 0x73, 0}; // pass
     35 static const UChar C_FAIL[] = {0x66, 0x61, 0x69, 0x6C, 0}; // fail
     36 
     37 static const UChar C_DESC[] = {0x64, 0x65, 0x73, 0x63, 0}; // desc
     38 static const UChar C_USESTD3ASCIIRULES[] = {0x55, 0x73, 0x65, 0x53, 0x54, 0x44,
     39        0x33, 0x41, 0x53, 0x43, 0x49, 0x49, 0x52, 0x75, 0x6C, 0x65, 0x73, 0}; // UseSTD3ASCIIRules
     40 
     41 IdnaConfTest::IdnaConfTest(){
     42     base = NULL;
     43     len = 0;
     44     curOffset = 0;
     45 
     46     type = option = passfail = -1;
     47     namebase.setToBogus();
     48     namezone.setToBogus();
     49 }
     50 IdnaConfTest::~IdnaConfTest(){
     51     delete [] base;
     52 }
     53 
     54 #if !UCONFIG_NO_IDNA
     55 /* this function is modified from RBBITest::ReadAndConvertFile()
     56  *
     57  */
     58 UBool IdnaConfTest::ReadAndConvertFile(){
     59 
     60     char * source = NULL;
     61     size_t source_len;
     62 
     63     // read the test data file to memory
     64     FILE* f    = NULL;
     65     UErrorCode  status  = U_ZERO_ERROR;
     66 
     67     const char *path = IntlTest::getSourceTestData(status);
     68     if (U_FAILURE(status)) {
     69         errln("%s", u_errorName(status));
     70         return FALSE;
     71     }
     72 
     73     const char* name = "idna_conf.txt";     // test data file
     74     int t = strlen(path) + strlen(name) + 1;
     75     char* absolute_name = new char[t];
     76     strcpy(absolute_name, path);
     77     strcat(absolute_name, name);
     78     f = fopen(absolute_name, "rb");
     79     delete [] absolute_name;
     80 
     81     if (f == NULL){
     82         dataerrln("fopen error on %s", name);
     83         return FALSE;
     84     }
     85 
     86     fseek( f, 0, SEEK_END);
     87     if ((source_len = ftell(f)) <= 0){
     88         errln("Error reading test data file.");
     89         fclose(f);
     90         return FALSE;
     91     }
     92 
     93     source = new char[source_len];
     94     fseek(f, 0, SEEK_SET);
     95     if (fread(source, 1, source_len, f) != source_len) {
     96         errln("Error reading test data file.");
     97         delete [] source;
     98         fclose(f);
     99         return FALSE;
    100     }
    101     fclose(f);
    102 
    103     // convert the UTF-8 encoded stream to UTF-16 stream
    104     UConverter* conv = ucnv_open("utf-8", &status);
    105     int dest_len = ucnv_toUChars(conv,
    106                                 NULL,           //  dest,
    107                                 0,              //  destCapacity,
    108                                 source,
    109                                 source_len,
    110                                 &status);
    111     if (status == U_BUFFER_OVERFLOW_ERROR) {
    112         // Buffer Overflow is expected from the preflight operation.
    113         status = U_ZERO_ERROR;
    114         UChar * dest = NULL;
    115         dest = new UChar[ dest_len + 1];
    116         ucnv_toUChars(conv, dest, dest_len + 1, source, source_len, &status);
    117         // Do not know the "if possible" behavior of ucnv_toUChars()
    118         // Do it by ourself.
    119         dest[dest_len] = 0;
    120         len = dest_len;
    121         base = dest;
    122         delete [] source;
    123         ucnv_close(conv);
    124         return TRUE;    // The buffer will owned by caller.
    125     }
    126     errln("UConverter error: %s", u_errorName(status));
    127     delete [] source;
    128     ucnv_close(conv);
    129     return FALSE;
    130 }
    131 
    132 int IdnaConfTest::isNewlineMark(){
    133     static const UChar LF        = 0x0a;
    134     static const UChar CR        = 0x0d;
    135     UChar c = base[curOffset];
    136     // CR LF
    137     if ( c == CR && curOffset + 1 < len && base[curOffset + 1] == LF){
    138         return 2;
    139     }
    140 
    141     // CR or LF
    142     if ( c == CR || c == LF) {
    143         return 1;
    144     }
    145 
    146     return 0;
    147 }
    148 
    149 /* Read a logical line.
    150  *
    151  * All lines ending in a backslash (\) and immediately followed by a newline
    152  * character are joined with the next line in the source file forming logical
    153  * lines from the physical lines.
    154  *
    155  */
    156 UBool IdnaConfTest::ReadOneLine(UnicodeString& buf){
    157     if ( !(curOffset < len) ) return FALSE; // stream end
    158 
    159     static const UChar BACKSLASH = 0x5c;
    160     buf.remove();
    161     int t = 0;
    162     while (curOffset < len){
    163         if ((t = isNewlineMark())) {  // end of line
    164             curOffset += t;
    165             break;
    166         }
    167         UChar c = base[curOffset];
    168         if (c == BACKSLASH && curOffset < len -1){  // escaped new line mark
    169             if ((t = isNewlineMark())){
    170                 curOffset += 1 + t;  // BACKSLAH and NewlineMark
    171                 continue;
    172             }
    173         };
    174         buf.append(c);
    175         curOffset++;
    176     }
    177     return TRUE;
    178 }
    179 
    180 //
    181 //===============================================================
    182 //
    183 
    184 /* Explain <xxxxx> tag to a native value
    185  *
    186  * Since <xxxxx> is always larger than the native value,
    187  * the operation will replace the tag directly in the buffer,
    188  * and, of course, will shift tail elements.
    189  */
    190 void IdnaConfTest::ExplainCodePointTag(UnicodeString& buf){
    191     buf.append((UChar)0);    // add a terminal NULL
    192     UChar* bufBase = buf.getBuffer(buf.length());
    193     UChar* p = bufBase;
    194     while (*p != 0){
    195         if ( *p != 0x3C){    // <
    196             *bufBase++ = *p++;
    197         } else {
    198             p++;    // skip <
    199             UChar32 cp = 0;
    200             for ( ;*p != 0x3E; p++){   // >
    201                 if (0x30 <= *p && *p <= 0x39){        // 0-9
    202                     cp = (cp * 16) + (*p - 0x30);
    203                 } else if (0x61 <= *p && *p <= 0x66){ // a-f
    204                     cp = (cp * 16) + (*p - 0x61) + 10;
    205                 } else if (0x41 <= *p && *p <= 0x46) {// A-F
    206                     cp = (cp * 16) + (*p - 0x41) + 10;
    207                 }
    208                 // no else. hope everything is good.
    209             }
    210             p++;    // skip >
    211             if (U_IS_BMP(cp)){
    212                 *bufBase++ = cp;
    213             } else {
    214                 *bufBase++ = U16_LEAD(cp);
    215                 *bufBase++ = U16_TRAIL(cp);
    216             }
    217         }
    218     }
    219     *bufBase = 0;  // close our buffer
    220     buf.releaseBuffer();
    221 }
    222 
    223 void IdnaConfTest::Call(){
    224     if (type == -1 || option == -1 || passfail == -1 || namebase.isBogus() || namezone.isBogus()){
    225         errln("Incomplete record");
    226     } else {
    227         UErrorCode status = U_ZERO_ERROR;
    228         UChar result[200] = {0,};   // simple life
    229         const UChar *p = namebase.getTerminatedBuffer();
    230         const int p_len = namebase.length();
    231 
    232         if (type == 0 && option == 0){
    233             uidna_IDNToASCII(p, p_len, result, 200, UIDNA_USE_STD3_RULES, NULL, &status);
    234         } else if (type == 0 && option == 1){
    235             uidna_IDNToASCII(p, p_len, result, 200, UIDNA_ALLOW_UNASSIGNED, NULL, &status);
    236         } else if (type == 1 && option == 0){
    237             uidna_IDNToUnicode(p, p_len, result, 200, UIDNA_USE_STD3_RULES, NULL, &status);
    238         } else if (type == 1 && option == 1){
    239             uidna_IDNToUnicode(p, p_len, result, 200, UIDNA_ALLOW_UNASSIGNED, NULL, &status);
    240         }
    241         if (passfail == 0){
    242             if (U_FAILURE(status)){
    243                 id.append(" should pass, but failed. - ");
    244                 id.append(u_errorName(status));
    245                 errcheckln(status, id);
    246             } else{
    247                 if (namezone.compare(result, -1) == 0){
    248                     // expected
    249                     logln(UnicodeString("namebase: ") + prettify(namebase) + UnicodeString(" result: ") + prettify(result));
    250                 } else {
    251                     id.append(" no error, but result is not as expected.");
    252                     errln(id);
    253                 }
    254             }
    255         } else if (passfail == 1){
    256             if (U_FAILURE(status)){
    257                 // expected
    258                 // TODO: Uncomment this when U_IDNA_ZERO_LENGTH_LABEL_ERROR is added to u_errorName
    259                 //logln("Got the expected error: " + UnicodeString(u_errorName(status)));
    260             } else{
    261                 if (namebase.compare(result, -1) == 0){
    262                     // garbage in -> garbage out
    263                     logln(UnicodeString("ICU will not recognize malformed ACE-Prefixes or incorrect ACE-Prefixes. ") + UnicodeString("namebase: ") + prettify(namebase) + UnicodeString(" result: ") + prettify(result));
    264                 } else {
    265                     id.append(" should fail, but not failed. ");
    266                     id.append(u_errorName(status));
    267                     errln(id);
    268                 }
    269             }
    270         }
    271     }
    272     type = option = passfail = -1;
    273     namebase.setToBogus();
    274     namezone.setToBogus();
    275     id.remove();
    276     return;
    277 }
    278 
    279 void IdnaConfTest::Test(void){
    280     if (!ReadAndConvertFile())return;
    281 
    282     UnicodeString s;
    283     UnicodeString key;
    284     UnicodeString value;
    285 
    286     // skip everything before the first "=====" and "=====" itself
    287     do {
    288         if (!ReadOneLine(s)) {
    289             errln("End of file prematurely found");
    290             break;
    291         }
    292     }
    293     while (s.compare(C_TAG, -1) != 0);   //"====="
    294 
    295     while(ReadOneLine(s)){
    296         s.trim();
    297         key.remove();
    298         value.remove();
    299         if (s.compare(C_TAG, -1) == 0){   //"====="
    300             Call();
    301        } else {
    302             // explain      key:value
    303             int p = s.indexOf((UChar)0x3A);    // :
    304             key.setTo(s,0,p).trim();
    305             value.setTo(s,p+1).trim();
    306             if (key.compare(C_TYPE, -1) == 0){
    307                 if (value.compare(C_TOASCII, -1) == 0) {
    308                     type = 0;
    309                 } else if (value.compare(C_TOUNICODE, -1) == 0){
    310                     type = 1;
    311                 }
    312             } else if (key.compare(C_PASSFAIL, -1) == 0){
    313                 if (value.compare(C_PASS, -1) == 0){
    314                     passfail = 0;
    315                 } else if (value.compare(C_FAIL, -1) == 0){
    316                     passfail = 1;
    317                 }
    318             } else if (key.compare(C_DESC, -1) == 0){
    319                 if (value.indexOf(C_USESTD3ASCIIRULES, u_strlen(C_USESTD3ASCIIRULES), 0) == -1){
    320                     option = 1; // not found
    321                 } else {
    322                     option = 0;
    323                 }
    324                 id.setTo(value, 0, value.indexOf((UChar)0x20));    // space
    325             } else if (key.compare(C_NAMEZONE, -1) == 0){
    326                 ExplainCodePointTag(value);
    327                 namezone.setTo(value);
    328             } else if (key.compare(C_NAMEBASE, -1) == 0){
    329                 ExplainCodePointTag(value);
    330                 namebase.setTo(value);
    331             }
    332             // just skip other lines
    333         }
    334     }
    335 
    336     Call(); // for last record
    337 }
    338 #else
    339 void IdnaConfTest::Test(void)
    340 {
    341   // test nothing...
    342 }
    343 #endif
    344 
    345 void IdnaConfTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/){
    346     switch (index) {
    347         TESTCASE(0,Test);
    348         default: name = ""; break;
    349     }
    350 }
    351 
    352 #endif
    353