Home | History | Annotate | Download | only in intltest
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4  *******************************************************************************
      5  *
      6  *   Copyright (C) 2005-2014, International Business Machines
      7  *   Corporation and others.  All Rights Reserved.
      8  *
      9  *******************************************************************************
     10  *
     11  *   created on: 2005jun15
     12  *   created by: Raymond Yang
     13  */
     14 
     15 #if !UCONFIG_NO_IDNA
     16 
     17 #include <stdio.h>
     18 #include <stdlib.h>
     19 #include <string.h>
     20 #include "unicode/utypes.h"
     21 #include "unicode/ucnv.h"
     22 #include "unicode/ustring.h"
     23 #include "unicode/uidna.h"
     24 #include "unicode/utf16.h"
     25 #include "idnaconf.h"
     26 
     27 static const UChar C_TAG[] = {0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0}; // =====
     28 static const UChar C_NAMEZONE[] = {0x6E, 0x61, 0x6D, 0x65, 0x7A, 0x6F, 0x6E, 0x65, 0}; // namezone
     29 static const UChar C_NAMEBASE[] = {0x6E, 0x61, 0x6D, 0x65, 0x62, 0x61, 0x73, 0x65, 0}; // namebase
     30 
     31 static const UChar C_TYPE[] = {0x74, 0x79, 0x70, 0x65, 0}; // type
     32 static const UChar C_TOASCII[]  =  {0x74, 0x6F, 0x61, 0x73, 0x63, 0x69, 0x69, 0};       // toascii
     33 static const UChar C_TOUNICODE[] = {0x74, 0x6F, 0x75, 0x6E, 0x69, 0x63, 0x6F, 0x64, 0x65, 0}; // tounicode
     34 
     35 static const UChar C_PASSFAIL[] = {0x70, 0x61, 0x73, 0x73, 0x66, 0x61, 0x69, 0x6C, 0}; // passfail
     36 static const UChar C_PASS[] = {0x70, 0x61, 0x73, 0x73, 0}; // pass
     37 static const UChar C_FAIL[] = {0x66, 0x61, 0x69, 0x6C, 0}; // fail
     38 
     39 static const UChar C_DESC[] = {0x64, 0x65, 0x73, 0x63, 0}; // desc
     40 static const UChar C_USESTD3ASCIIRULES[] = {0x55, 0x73, 0x65, 0x53, 0x54, 0x44,
     41        0x33, 0x41, 0x53, 0x43, 0x49, 0x49, 0x52, 0x75, 0x6C, 0x65, 0x73, 0}; // UseSTD3ASCIIRules
     42 
     43 IdnaConfTest::IdnaConfTest(){
     44     base = NULL;
     45     len = 0;
     46     curOffset = 0;
     47 
     48     type = option = passfail = -1;
     49     namebase.setToBogus();
     50     namezone.setToBogus();
     51 }
     52 IdnaConfTest::~IdnaConfTest(){
     53     delete [] base;
     54 }
     55 
     56 #if !UCONFIG_NO_IDNA
     57 /* this function is modified from RBBITest::ReadAndConvertFile()
     58  *
     59  */
     60 UBool IdnaConfTest::ReadAndConvertFile(){
     61 
     62     char * source = NULL;
     63     size_t source_len;
     64 
     65     // read the test data file to memory
     66     FILE* f    = NULL;
     67     UErrorCode  status  = U_ZERO_ERROR;
     68 
     69     const char *path = IntlTest::getSourceTestData(status);
     70     if (U_FAILURE(status)) {
     71         errln("%s", u_errorName(status));
     72         return FALSE;
     73     }
     74 
     75     const char* name = "idna_conf.txt";     // test data file
     76     int t = strlen(path) + strlen(name) + 1;
     77     char* absolute_name = new char[t];
     78     strcpy(absolute_name, path);
     79     strcat(absolute_name, name);
     80     f = fopen(absolute_name, "rb");
     81     delete [] absolute_name;
     82 
     83     if (f == NULL){
     84         dataerrln("fopen error on %s", name);
     85         return FALSE;
     86     }
     87 
     88     fseek( f, 0, SEEK_END);
     89     if ((source_len = ftell(f)) <= 0){
     90         errln("Error reading test data file.");
     91         fclose(f);
     92         return FALSE;
     93     }
     94 
     95     source = new char[source_len];
     96     fseek(f, 0, SEEK_SET);
     97     if (fread(source, 1, source_len, f) != source_len) {
     98         errln("Error reading test data file.");
     99         delete [] source;
    100         fclose(f);
    101         return FALSE;
    102     }
    103     fclose(f);
    104 
    105     // convert the UTF-8 encoded stream to UTF-16 stream
    106     UConverter* conv = ucnv_open("utf-8", &status);
    107     int dest_len = ucnv_toUChars(conv,
    108                                 NULL,           //  dest,
    109                                 0,              //  destCapacity,
    110                                 source,
    111                                 source_len,
    112                                 &status);
    113     if (status == U_BUFFER_OVERFLOW_ERROR) {
    114         // Buffer Overflow is expected from the preflight operation.
    115         status = U_ZERO_ERROR;
    116         UChar * dest = NULL;
    117         dest = new UChar[ dest_len + 1];
    118         ucnv_toUChars(conv, dest, dest_len + 1, source, source_len, &status);
    119         // Do not know the "if possible" behavior of ucnv_toUChars()
    120         // Do it by ourself.
    121         dest[dest_len] = 0;
    122         len = dest_len;
    123         base = dest;
    124         delete [] source;
    125         ucnv_close(conv);
    126         return TRUE;    // The buffer will owned by caller.
    127     }
    128     errln("UConverter error: %s", u_errorName(status));
    129     delete [] source;
    130     ucnv_close(conv);
    131     return FALSE;
    132 }
    133 
    134 int IdnaConfTest::isNewlineMark(){
    135     static const UChar LF        = 0x0a;
    136     static const UChar CR        = 0x0d;
    137     UChar c = base[curOffset];
    138     // CR LF
    139     if ( c == CR && curOffset + 1 < len && base[curOffset + 1] == LF){
    140         return 2;
    141     }
    142 
    143     // CR or LF
    144     if ( c == CR || c == LF) {
    145         return 1;
    146     }
    147 
    148     return 0;
    149 }
    150 
    151 /* Read a logical line.
    152  *
    153  * All lines ending in a backslash (\) and immediately followed by a newline
    154  * character are joined with the next line in the source file forming logical
    155  * lines from the physical lines.
    156  *
    157  */
    158 UBool IdnaConfTest::ReadOneLine(UnicodeString& buf){
    159     if ( !(curOffset < len) ) return FALSE; // stream end
    160 
    161     static const UChar BACKSLASH = 0x5c;
    162     buf.remove();
    163     int t = 0;
    164     while (curOffset < len){
    165         if ((t = isNewlineMark())) {  // end of line
    166             curOffset += t;
    167             break;
    168         }
    169         UChar c = base[curOffset];
    170         if (c == BACKSLASH && curOffset < len -1){  // escaped new line mark
    171             if ((t = isNewlineMark())){
    172                 curOffset += 1 + t;  // BACKSLAH and NewlineMark
    173                 continue;
    174             }
    175         };
    176         buf.append(c);
    177         curOffset++;
    178     }
    179     return TRUE;
    180 }
    181 
    182 //
    183 //===============================================================
    184 //
    185 
    186 /* Explain <xxxxx> tag to a native value
    187  *
    188  * Since <xxxxx> is always larger than the native value,
    189  * the operation will replace the tag directly in the buffer,
    190  * and, of course, will shift tail elements.
    191  */
    192 void IdnaConfTest::ExplainCodePointTag(UnicodeString& buf){
    193     buf.append((UChar)0);    // add a terminal NULL
    194     UChar* bufBase = buf.getBuffer(buf.length());
    195     UChar* p = bufBase;
    196     while (*p != 0){
    197         if ( *p != 0x3C){    // <
    198             *bufBase++ = *p++;
    199         } else {
    200             p++;    // skip <
    201             UChar32 cp = 0;
    202             for ( ;*p != 0x3E; p++){   // >
    203                 if (0x30 <= *p && *p <= 0x39){        // 0-9
    204                     cp = (cp * 16) + (*p - 0x30);
    205                 } else if (0x61 <= *p && *p <= 0x66){ // a-f
    206                     cp = (cp * 16) + (*p - 0x61) + 10;
    207                 } else if (0x41 <= *p && *p <= 0x46) {// A-F
    208                     cp = (cp * 16) + (*p - 0x41) + 10;
    209                 }
    210                 // no else. hope everything is good.
    211             }
    212             p++;    // skip >
    213             if (U_IS_BMP(cp)){
    214                 *bufBase++ = cp;
    215             } else {
    216                 *bufBase++ = U16_LEAD(cp);
    217                 *bufBase++ = U16_TRAIL(cp);
    218             }
    219         }
    220     }
    221     *bufBase = 0;  // close our buffer
    222     buf.releaseBuffer();
    223 }
    224 
    225 void IdnaConfTest::Call(){
    226     if (type == -1 || option == -1 || passfail == -1 || namebase.isBogus() || namezone.isBogus()){
    227         errln("Incomplete record");
    228     } else {
    229         UErrorCode status = U_ZERO_ERROR;
    230         UChar result[200] = {0,};   // simple life
    231         const UChar *p = namebase.getTerminatedBuffer();
    232         const int p_len = namebase.length();
    233 
    234         if (type == 0 && option == 0){
    235             uidna_IDNToASCII(p, p_len, result, 200, UIDNA_USE_STD3_RULES, NULL, &status);
    236         } else if (type == 0 && option == 1){
    237             uidna_IDNToASCII(p, p_len, result, 200, UIDNA_ALLOW_UNASSIGNED, NULL, &status);
    238         } else if (type == 1 && option == 0){
    239             uidna_IDNToUnicode(p, p_len, result, 200, UIDNA_USE_STD3_RULES, NULL, &status);
    240         } else if (type == 1 && option == 1){
    241             uidna_IDNToUnicode(p, p_len, result, 200, UIDNA_ALLOW_UNASSIGNED, NULL, &status);
    242         }
    243         if (passfail == 0){
    244             if (U_FAILURE(status)){
    245                 id.append(" should pass, but failed. - ");
    246                 id.append(u_errorName(status));
    247                 errcheckln(status, id);
    248             } else{
    249                 if (namezone.compare(result, -1) == 0){
    250                     // expected
    251                     logln(UnicodeString("namebase: ") + prettify(namebase) + UnicodeString(" result: ") + prettify(result));
    252                 } else {
    253                     id.append(" no error, but result is not as expected.");
    254                     errln(id);
    255                 }
    256             }
    257         } else if (passfail == 1){
    258             if (U_FAILURE(status)){
    259                 // expected
    260                 // TODO: Uncomment this when U_IDNA_ZERO_LENGTH_LABEL_ERROR is added to u_errorName
    261                 //logln("Got the expected error: " + UnicodeString(u_errorName(status)));
    262             } else{
    263                 if (namebase.compare(result, -1) == 0){
    264                     // garbage in -> garbage out
    265                     logln(UnicodeString("ICU will not recognize malformed ACE-Prefixes or incorrect ACE-Prefixes. ") + UnicodeString("namebase: ") + prettify(namebase) + UnicodeString(" result: ") + prettify(result));
    266                 } else {
    267                     id.append(" should fail, but not failed. ");
    268                     id.append(u_errorName(status));
    269                     errln(id);
    270                 }
    271             }
    272         }
    273     }
    274     type = option = passfail = -1;
    275     namebase.setToBogus();
    276     namezone.setToBogus();
    277     id.remove();
    278     return;
    279 }
    280 
    281 void IdnaConfTest::Test(void){
    282     if (!ReadAndConvertFile())return;
    283 
    284     UnicodeString s;
    285     UnicodeString key;
    286     UnicodeString value;
    287 
    288     // skip everything before the first "=====" and "=====" itself
    289     do {
    290         if (!ReadOneLine(s)) {
    291             errln("End of file prematurely found");
    292             break;
    293         }
    294     }
    295     while (s.compare(C_TAG, -1) != 0);   //"====="
    296 
    297     while(ReadOneLine(s)){
    298         s.trim();
    299         key.remove();
    300         value.remove();
    301         if (s.compare(C_TAG, -1) == 0){   //"====="
    302             Call();
    303        } else {
    304             // explain      key:value
    305             int p = s.indexOf((UChar)0x3A);    // :
    306             key.setTo(s,0,p).trim();
    307             value.setTo(s,p+1).trim();
    308             if (key.compare(C_TYPE, -1) == 0){
    309                 if (value.compare(C_TOASCII, -1) == 0) {
    310                     type = 0;
    311                 } else if (value.compare(C_TOUNICODE, -1) == 0){
    312                     type = 1;
    313                 }
    314             } else if (key.compare(C_PASSFAIL, -1) == 0){
    315                 if (value.compare(C_PASS, -1) == 0){
    316                     passfail = 0;
    317                 } else if (value.compare(C_FAIL, -1) == 0){
    318                     passfail = 1;
    319                 }
    320             } else if (key.compare(C_DESC, -1) == 0){
    321                 if (value.indexOf(C_USESTD3ASCIIRULES, u_strlen(C_USESTD3ASCIIRULES), 0) == -1){
    322                     option = 1; // not found
    323                 } else {
    324                     option = 0;
    325                 }
    326                 id.setTo(value, 0, value.indexOf((UChar)0x20));    // space
    327             } else if (key.compare(C_NAMEZONE, -1) == 0){
    328                 ExplainCodePointTag(value);
    329                 namezone.setTo(value);
    330             } else if (key.compare(C_NAMEBASE, -1) == 0){
    331                 ExplainCodePointTag(value);
    332                 namebase.setTo(value);
    333             }
    334             // just skip other lines
    335         }
    336     }
    337 
    338     Call(); // for last record
    339 }
    340 #else
    341 void IdnaConfTest::Test(void)
    342 {
    343   // test nothing...
    344 }
    345 #endif
    346 
    347 void IdnaConfTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/){
    348     switch (index) {
    349         TESTCASE(0,Test);
    350         default: name = ""; break;
    351     }
    352 }
    353 
    354 #endif
    355