1 /* 2 ******************************************************************************* 3 * 4 * Copyright (C) 2005-2014, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ******************************************************************************* 8 * 9 * created on: 2005jun15 10 * created by: Raymond Yang 11 */ 12 13 #if !UCONFIG_NO_IDNA 14 15 #include <stdio.h> 16 #include <stdlib.h> 17 #include <string.h> 18 #include "unicode/utypes.h" 19 #include "unicode/ucnv.h" 20 #include "unicode/ustring.h" 21 #include "unicode/uidna.h" 22 23 #include "idnaconf.h" 24 25 static const UChar C_TAG[] = {0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0}; // ===== 26 static const UChar C_NAMEZONE[] = {0x6E, 0x61, 0x6D, 0x65, 0x7A, 0x6F, 0x6E, 0x65, 0}; // namezone 27 static const UChar C_NAMEBASE[] = {0x6E, 0x61, 0x6D, 0x65, 0x62, 0x61, 0x73, 0x65, 0}; // namebase 28 29 static const UChar C_TYPE[] = {0x74, 0x79, 0x70, 0x65, 0}; // type 30 static const UChar C_TOASCII[] = {0x74, 0x6F, 0x61, 0x73, 0x63, 0x69, 0x69, 0}; // toascii 31 static const UChar C_TOUNICODE[] = {0x74, 0x6F, 0x75, 0x6E, 0x69, 0x63, 0x6F, 0x64, 0x65, 0}; // tounicode 32 33 static const UChar C_PASSFAIL[] = {0x70, 0x61, 0x73, 0x73, 0x66, 0x61, 0x69, 0x6C, 0}; // passfail 34 static const UChar C_PASS[] = {0x70, 0x61, 0x73, 0x73, 0}; // pass 35 static const UChar C_FAIL[] = {0x66, 0x61, 0x69, 0x6C, 0}; // fail 36 37 static const UChar C_DESC[] = {0x64, 0x65, 0x73, 0x63, 0}; // desc 38 static const UChar C_USESTD3ASCIIRULES[] = {0x55, 0x73, 0x65, 0x53, 0x54, 0x44, 39 0x33, 0x41, 0x53, 0x43, 0x49, 0x49, 0x52, 0x75, 0x6C, 0x65, 0x73, 0}; // UseSTD3ASCIIRules 40 41 IdnaConfTest::IdnaConfTest(){ 42 base = NULL; 43 len = 0; 44 curOffset = 0; 45 46 type = option = passfail = -1; 47 namebase.setToBogus(); 48 namezone.setToBogus(); 49 } 50 IdnaConfTest::~IdnaConfTest(){ 51 delete [] base; 52 } 53 54 #if !UCONFIG_NO_IDNA 55 /* this function is modified from RBBITest::ReadAndConvertFile() 56 * 57 */ 58 UBool IdnaConfTest::ReadAndConvertFile(){ 59 60 char * source = NULL; 61 size_t source_len; 62 63 // read the test data file to memory 64 FILE* f = NULL; 65 UErrorCode status = U_ZERO_ERROR; 66 67 const char *path = IntlTest::getSourceTestData(status); 68 if (U_FAILURE(status)) { 69 errln("%s", u_errorName(status)); 70 return FALSE; 71 } 72 73 const char* name = "idna_conf.txt"; // test data file 74 int t = strlen(path) + strlen(name) + 1; 75 char* absolute_name = new char[t]; 76 strcpy(absolute_name, path); 77 strcat(absolute_name, name); 78 f = fopen(absolute_name, "rb"); 79 delete [] absolute_name; 80 81 if (f == NULL){ 82 dataerrln("fopen error on %s", name); 83 return FALSE; 84 } 85 86 fseek( f, 0, SEEK_END); 87 if ((source_len = ftell(f)) <= 0){ 88 errln("Error reading test data file."); 89 fclose(f); 90 return FALSE; 91 } 92 93 source = new char[source_len]; 94 fseek(f, 0, SEEK_SET); 95 if (fread(source, 1, source_len, f) != source_len) { 96 errln("Error reading test data file."); 97 delete [] source; 98 fclose(f); 99 return FALSE; 100 } 101 fclose(f); 102 103 // convert the UTF-8 encoded stream to UTF-16 stream 104 UConverter* conv = ucnv_open("utf-8", &status); 105 int dest_len = ucnv_toUChars(conv, 106 NULL, // dest, 107 0, // destCapacity, 108 source, 109 source_len, 110 &status); 111 if (status == U_BUFFER_OVERFLOW_ERROR) { 112 // Buffer Overflow is expected from the preflight operation. 113 status = U_ZERO_ERROR; 114 UChar * dest = NULL; 115 dest = new UChar[ dest_len + 1]; 116 ucnv_toUChars(conv, dest, dest_len + 1, source, source_len, &status); 117 // Do not know the "if possible" behavior of ucnv_toUChars() 118 // Do it by ourself. 119 dest[dest_len] = 0; 120 len = dest_len; 121 base = dest; 122 delete [] source; 123 ucnv_close(conv); 124 return TRUE; // The buffer will owned by caller. 125 } 126 errln("UConverter error: %s", u_errorName(status)); 127 delete [] source; 128 ucnv_close(conv); 129 return FALSE; 130 } 131 132 int IdnaConfTest::isNewlineMark(){ 133 static const UChar LF = 0x0a; 134 static const UChar CR = 0x0d; 135 UChar c = base[curOffset]; 136 // CR LF 137 if ( c == CR && curOffset + 1 < len && base[curOffset + 1] == LF){ 138 return 2; 139 } 140 141 // CR or LF 142 if ( c == CR || c == LF) { 143 return 1; 144 } 145 146 return 0; 147 } 148 149 /* Read a logical line. 150 * 151 * All lines ending in a backslash (\) and immediately followed by a newline 152 * character are joined with the next line in the source file forming logical 153 * lines from the physical lines. 154 * 155 */ 156 UBool IdnaConfTest::ReadOneLine(UnicodeString& buf){ 157 if ( !(curOffset < len) ) return FALSE; // stream end 158 159 static const UChar BACKSLASH = 0x5c; 160 buf.remove(); 161 int t = 0; 162 while (curOffset < len){ 163 if ((t = isNewlineMark())) { // end of line 164 curOffset += t; 165 break; 166 } 167 UChar c = base[curOffset]; 168 if (c == BACKSLASH && curOffset < len -1){ // escaped new line mark 169 if ((t = isNewlineMark())){ 170 curOffset += 1 + t; // BACKSLAH and NewlineMark 171 continue; 172 } 173 }; 174 buf.append(c); 175 curOffset++; 176 } 177 return TRUE; 178 } 179 180 // 181 //=============================================================== 182 // 183 184 /* Explain <xxxxx> tag to a native value 185 * 186 * Since <xxxxx> is always larger than the native value, 187 * the operation will replace the tag directly in the buffer, 188 * and, of course, will shift tail elements. 189 */ 190 void IdnaConfTest::ExplainCodePointTag(UnicodeString& buf){ 191 buf.append((UChar)0); // add a terminal NULL 192 UChar* bufBase = buf.getBuffer(buf.length()); 193 UChar* p = bufBase; 194 while (*p != 0){ 195 if ( *p != 0x3C){ // < 196 *bufBase++ = *p++; 197 } else { 198 p++; // skip < 199 UChar32 cp = 0; 200 for ( ;*p != 0x3E; p++){ // > 201 if (0x30 <= *p && *p <= 0x39){ // 0-9 202 cp = (cp * 16) + (*p - 0x30); 203 } else if (0x61 <= *p && *p <= 0x66){ // a-f 204 cp = (cp * 16) + (*p - 0x61) + 10; 205 } else if (0x41 <= *p && *p <= 0x46) {// A-F 206 cp = (cp * 16) + (*p - 0x41) + 10; 207 } 208 // no else. hope everything is good. 209 } 210 p++; // skip > 211 if (U_IS_BMP(cp)){ 212 *bufBase++ = cp; 213 } else { 214 *bufBase++ = U16_LEAD(cp); 215 *bufBase++ = U16_TRAIL(cp); 216 } 217 } 218 } 219 *bufBase = 0; // close our buffer 220 buf.releaseBuffer(); 221 } 222 223 void IdnaConfTest::Call(){ 224 if (type == -1 || option == -1 || passfail == -1 || namebase.isBogus() || namezone.isBogus()){ 225 errln("Incomplete record"); 226 } else { 227 UErrorCode status = U_ZERO_ERROR; 228 UChar result[200] = {0,}; // simple life 229 const UChar *p = namebase.getTerminatedBuffer(); 230 const int p_len = namebase.length(); 231 232 if (type == 0 && option == 0){ 233 uidna_IDNToASCII(p, p_len, result, 200, UIDNA_USE_STD3_RULES, NULL, &status); 234 } else if (type == 0 && option == 1){ 235 uidna_IDNToASCII(p, p_len, result, 200, UIDNA_ALLOW_UNASSIGNED, NULL, &status); 236 } else if (type == 1 && option == 0){ 237 uidna_IDNToUnicode(p, p_len, result, 200, UIDNA_USE_STD3_RULES, NULL, &status); 238 } else if (type == 1 && option == 1){ 239 uidna_IDNToUnicode(p, p_len, result, 200, UIDNA_ALLOW_UNASSIGNED, NULL, &status); 240 } 241 if (passfail == 0){ 242 if (U_FAILURE(status)){ 243 id.append(" should pass, but failed. - "); 244 id.append(u_errorName(status)); 245 errcheckln(status, id); 246 } else{ 247 if (namezone.compare(result, -1) == 0){ 248 // expected 249 logln(UnicodeString("namebase: ") + prettify(namebase) + UnicodeString(" result: ") + prettify(result)); 250 } else { 251 id.append(" no error, but result is not as expected."); 252 errln(id); 253 } 254 } 255 } else if (passfail == 1){ 256 if (U_FAILURE(status)){ 257 // expected 258 // TODO: Uncomment this when U_IDNA_ZERO_LENGTH_LABEL_ERROR is added to u_errorName 259 //logln("Got the expected error: " + UnicodeString(u_errorName(status))); 260 } else{ 261 if (namebase.compare(result, -1) == 0){ 262 // garbage in -> garbage out 263 logln(UnicodeString("ICU will not recognize malformed ACE-Prefixes or incorrect ACE-Prefixes. ") + UnicodeString("namebase: ") + prettify(namebase) + UnicodeString(" result: ") + prettify(result)); 264 } else { 265 id.append(" should fail, but not failed. "); 266 id.append(u_errorName(status)); 267 errln(id); 268 } 269 } 270 } 271 } 272 type = option = passfail = -1; 273 namebase.setToBogus(); 274 namezone.setToBogus(); 275 id.remove(); 276 return; 277 } 278 279 void IdnaConfTest::Test(void){ 280 if (!ReadAndConvertFile())return; 281 282 UnicodeString s; 283 UnicodeString key; 284 UnicodeString value; 285 286 // skip everything before the first "=====" and "=====" itself 287 do { 288 if (!ReadOneLine(s)) { 289 errln("End of file prematurely found"); 290 break; 291 } 292 } 293 while (s.compare(C_TAG, -1) != 0); //"=====" 294 295 while(ReadOneLine(s)){ 296 s.trim(); 297 key.remove(); 298 value.remove(); 299 if (s.compare(C_TAG, -1) == 0){ //"=====" 300 Call(); 301 } else { 302 // explain key:value 303 int p = s.indexOf((UChar)0x3A); // : 304 key.setTo(s,0,p).trim(); 305 value.setTo(s,p+1).trim(); 306 if (key.compare(C_TYPE, -1) == 0){ 307 if (value.compare(C_TOASCII, -1) == 0) { 308 type = 0; 309 } else if (value.compare(C_TOUNICODE, -1) == 0){ 310 type = 1; 311 } 312 } else if (key.compare(C_PASSFAIL, -1) == 0){ 313 if (value.compare(C_PASS, -1) == 0){ 314 passfail = 0; 315 } else if (value.compare(C_FAIL, -1) == 0){ 316 passfail = 1; 317 } 318 } else if (key.compare(C_DESC, -1) == 0){ 319 if (value.indexOf(C_USESTD3ASCIIRULES, u_strlen(C_USESTD3ASCIIRULES), 0) == -1){ 320 option = 1; // not found 321 } else { 322 option = 0; 323 } 324 id.setTo(value, 0, value.indexOf((UChar)0x20)); // space 325 } else if (key.compare(C_NAMEZONE, -1) == 0){ 326 ExplainCodePointTag(value); 327 namezone.setTo(value); 328 } else if (key.compare(C_NAMEBASE, -1) == 0){ 329 ExplainCodePointTag(value); 330 namebase.setTo(value); 331 } 332 // just skip other lines 333 } 334 } 335 336 Call(); // for last record 337 } 338 #else 339 void IdnaConfTest::Test(void) 340 { 341 // test nothing... 342 } 343 #endif 344 345 void IdnaConfTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/){ 346 switch (index) { 347 TESTCASE(0,Test); 348 default: name = ""; break; 349 } 350 } 351 352 #endif 353