1 /* 2 ********************************************************************** 3 * Copyright (C) 2002-2007, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ********************************************************************** 6 * file name: utfperf.cpp 7 * encoding: US-ASCII 8 * tab size: 8 (not used) 9 * indentation:4 10 * 11 * created on: 2005Nov17 12 * created by: Raymond Yang 13 * 14 * Ported from utfper.c created by Markus W. Scherer 15 * Performance test program for Unicode converters 16 */ 17 18 #include <stdio.h> 19 #include <stdlib.h> 20 #include "unicode/uperf.h" 21 #include "uoptions.h" 22 23 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) 24 25 /* definitions and text buffers */ 26 27 #define INPUT_CAPACITY (1024*1024) 28 #define INTERMEDIATE_CAPACITY 4096 29 #define INTERMEDIATE_SMALL_CAPACITY 20 30 #define PIVOT_CAPACITY 1024 31 #define OUTPUT_CAPACITY INPUT_CAPACITY 32 33 static char utf8[INPUT_CAPACITY]; 34 static UChar pivot[INTERMEDIATE_CAPACITY]; 35 36 static UChar output[OUTPUT_CAPACITY]; 37 static char intermediate[OUTPUT_CAPACITY]; 38 39 static int32_t utf8Length, encodedLength, outputLength, countInputCodePoints; 40 41 static int32_t fromUCallbackCount; 42 43 // Command-line options specific to utfperf. 44 // Options do not have abbreviations: Force readable command lines. 45 // (Using U+0001 for abbreviation characters.) 46 enum { 47 CHARSET, 48 CHUNK_LENGTH, 49 PIVOT_LENGTH, 50 UTFPERF_OPTIONS_COUNT 51 }; 52 53 static UOption options[UTFPERF_OPTIONS_COUNT]={ 54 UOPTION_DEF("charset", '\x01', UOPT_REQUIRES_ARG), 55 UOPTION_DEF("chunk", '\x01', UOPT_REQUIRES_ARG), 56 UOPTION_DEF("pivot", '\x01', UOPT_REQUIRES_ARG) 57 }; 58 59 static const char *const utfperf_usage = 60 "\t--charset Charset for which to test performance, e.g. windows-1251.\n" 61 "\t Default: UTF-8\n" 62 "\t--chunk Length (in bytes) of charset output chunks. [4096]\n" 63 "\t--pivot Length (in UChars) of the UTF-16 pivot buffer, if applicable.\n" 64 "\t [1024]\n"; 65 66 // Test object. 67 class UtfPerformanceTest : public UPerfTest{ 68 public: 69 UtfPerformanceTest(int32_t argc, const char *argv[], UErrorCode &status) 70 : UPerfTest(argc, argv, options, LENGTHOF(options), utfperf_usage, status) { 71 if (U_SUCCESS(status)) { 72 charset = options[CHARSET].value; 73 74 chunkLength = atoi(options[CHUNK_LENGTH].value); 75 if (chunkLength < 1 || OUTPUT_CAPACITY < chunkLength) { 76 fprintf(stderr, "error: chunk length must be 1..%ld\n", (long)OUTPUT_CAPACITY); 77 status = U_ILLEGAL_ARGUMENT_ERROR; 78 } 79 80 pivotLength = atoi(options[PIVOT_LENGTH].value); 81 if (pivotLength < 1 || PIVOT_CAPACITY < pivotLength) { 82 fprintf(stderr, "error: pivot length must be 1..%ld\n", (long)PIVOT_CAPACITY); 83 status = U_ILLEGAL_ARGUMENT_ERROR; 84 } 85 86 int32_t inputLength; 87 UPerfTest::getBuffer(inputLength, status); 88 countInputCodePoints = u_countChar32(buffer, bufferLen); 89 u_strToUTF8(utf8, (int32_t)sizeof(utf8), &utf8Length, buffer, bufferLen, &status); 90 } 91 } 92 93 virtual UPerfFunction* runIndexedTest(int32_t index, UBool exec, const char* &name, char* par = NULL); 94 95 const UChar *getBuffer() const { return buffer; } 96 int32_t getBufferLen() const { return bufferLen; } 97 98 const char *charset; 99 int32_t chunkLength, pivotLength; 100 }; 101 102 U_CDECL_BEGIN 103 // Custom callback for counting callback calls. 104 static void U_CALLCONV 105 fromUCallback(const void *context, 106 UConverterFromUnicodeArgs *fromUArgs, 107 const UChar *codeUnits, 108 int32_t length, 109 UChar32 codePoint, 110 UConverterCallbackReason reason, 111 UErrorCode *pErrorCode) { 112 if (reason <= UCNV_IRREGULAR) { 113 ++fromUCallbackCount; 114 } 115 UCNV_FROM_U_CALLBACK_SUBSTITUTE(context, fromUArgs, codeUnits, length, codePoint, reason, pErrorCode); 116 } 117 U_CDECL_END 118 119 // Base class for Roundtrip, FromUnicode and FromUTF8 with common setup. 120 class Command : public UPerfFunction { 121 protected: 122 Command(const UtfPerformanceTest &testcase) 123 : testcase(testcase), 124 input(testcase.getBuffer()), inputLength(testcase.getBufferLen()), 125 errorCode(U_ZERO_ERROR) { 126 cnv=ucnv_open(testcase.charset, &errorCode); 127 if (U_FAILURE(errorCode)) { 128 fprintf(stderr, "error opening converter for \"%s\" - %s\n", testcase.charset, u_errorName(errorCode)); 129 } 130 ucnv_setFromUCallBack(cnv, fromUCallback, NULL, NULL, NULL, &errorCode); 131 } 132 public: 133 virtual ~Command(){ 134 if(U_SUCCESS(errorCode)) { 135 ucnv_close(cnv); 136 } 137 } 138 // virtual void call(UErrorCode* pErrorCode) { ... } 139 virtual long getOperationsPerIteration(){ 140 return countInputCodePoints; 141 } 142 143 const UtfPerformanceTest &testcase; 144 const UChar *input; 145 int32_t inputLength; 146 UErrorCode errorCode; 147 UConverter *cnv; 148 }; 149 150 // Test roundtrip UTF-16->encoding->UTF-16. 151 class Roundtrip : public Command { 152 protected: 153 Roundtrip(const UtfPerformanceTest &testcase) : Command(testcase) {} 154 public: 155 static UPerfFunction* get(const UtfPerformanceTest &testcase) { 156 Roundtrip * t = new Roundtrip(testcase); 157 if (U_SUCCESS(t->errorCode)){ 158 return t; 159 } else { 160 delete t; 161 return NULL; 162 } 163 } 164 virtual void call(UErrorCode* pErrorCode){ 165 const UChar *pIn, *pInLimit; 166 UChar *pOut, *pOutLimit; 167 char *pInter, *pInterLimit; 168 const char *p; 169 UBool flush; 170 171 ucnv_reset(cnv); 172 fromUCallbackCount=0; 173 174 pIn=input; 175 pInLimit=input+inputLength; 176 177 pOut=output; 178 pOutLimit=output+OUTPUT_CAPACITY; 179 180 pInterLimit=intermediate+testcase.chunkLength; 181 182 encodedLength=outputLength=0; 183 flush=FALSE; 184 185 do { 186 /* convert a block of [pIn..pInLimit[ to the encoding in intermediate[] */ 187 pInter=intermediate; 188 ucnv_fromUnicode(cnv, &pInter, pInterLimit, &pIn, pInLimit, NULL, TRUE, pErrorCode); 189 encodedLength+=(int32_t)(pInter-intermediate); 190 191 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) { 192 /* make sure that we convert once more to really flush */ 193 *pErrorCode=U_ZERO_ERROR; 194 } else if(U_FAILURE(*pErrorCode)) { 195 return; 196 } else if(pIn==pInLimit) { 197 flush=TRUE; 198 } 199 200 /* convert the block [intermediate..pInter[ back to UTF-16 */ 201 p=intermediate; 202 ucnv_toUnicode(cnv, &pOut, pOutLimit,&p, pInter,NULL, flush,pErrorCode); 203 if(U_FAILURE(*pErrorCode)) { 204 return; 205 } 206 /* intermediate must have been consumed (p==pInter) because of the converter semantics */ 207 } while(!flush); 208 209 outputLength=pOut-output; 210 if(inputLength!=outputLength) { 211 fprintf(stderr, "error: roundtrip failed, inputLength %d!=outputLength %d\n", inputLength, outputLength); 212 *pErrorCode=U_INTERNAL_PROGRAM_ERROR; 213 } 214 } 215 }; 216 217 // Test one-way conversion UTF-16->encoding. 218 class FromUnicode : public Command { 219 protected: 220 FromUnicode(const UtfPerformanceTest &testcase) : Command(testcase) {} 221 public: 222 static UPerfFunction* get(const UtfPerformanceTest &testcase) { 223 FromUnicode * t = new FromUnicode(testcase); 224 if (U_SUCCESS(t->errorCode)){ 225 return t; 226 } else { 227 delete t; 228 return NULL; 229 } 230 } 231 virtual void call(UErrorCode* pErrorCode){ 232 const UChar *pIn, *pInLimit; 233 char *pInter, *pInterLimit; 234 235 ucnv_resetFromUnicode(cnv); 236 fromUCallbackCount=0; 237 238 pIn=input; 239 pInLimit=input+inputLength; 240 241 pInterLimit=intermediate+testcase.chunkLength; 242 243 encodedLength=0; 244 245 for(;;) { 246 pInter=intermediate; 247 ucnv_fromUnicode(cnv, &pInter, pInterLimit, &pIn, pInLimit, NULL, TRUE, pErrorCode); 248 encodedLength+=(int32_t)(pInter-intermediate); 249 250 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) { 251 /* make sure that we convert once more to really flush */ 252 *pErrorCode=U_ZERO_ERROR; 253 } else if(U_FAILURE(*pErrorCode)) { 254 return; 255 } else { 256 break; // all done 257 } 258 } 259 } 260 }; 261 262 // Test one-way conversion UTF-8->encoding. 263 class FromUTF8 : public Command { 264 protected: 265 FromUTF8(const UtfPerformanceTest &testcase) 266 : Command(testcase), 267 utf8Cnv(NULL), 268 input8(utf8), input8Length(utf8Length) { 269 utf8Cnv=ucnv_open("UTF-8", &errorCode); 270 } 271 public: 272 static UPerfFunction* get(const UtfPerformanceTest &testcase) { 273 FromUTF8 * t = new FromUTF8(testcase); 274 if (U_SUCCESS(t->errorCode)){ 275 return t; 276 } else { 277 delete t; 278 return NULL; 279 } 280 } 281 ~FromUTF8() { 282 ucnv_close(utf8Cnv); 283 } 284 virtual void call(UErrorCode* pErrorCode){ 285 const char *pIn, *pInLimit; 286 char *pInter, *pInterLimit; 287 UChar *pivotSource, *pivotTarget, *pivotLimit; 288 289 ucnv_resetToUnicode(utf8Cnv); 290 ucnv_resetFromUnicode(cnv); 291 fromUCallbackCount=0; 292 293 pIn=input8; 294 pInLimit=input8+input8Length; 295 296 pInterLimit=intermediate+testcase.chunkLength; 297 298 pivotSource=pivotTarget=pivot; 299 pivotLimit=pivot+testcase.pivotLength; 300 301 encodedLength=0; 302 303 for(;;) { 304 pInter=intermediate; 305 ucnv_convertEx(cnv, utf8Cnv, 306 &pInter, pInterLimit, 307 &pIn, pInLimit, 308 pivot, &pivotSource, &pivotTarget, pivotLimit, 309 FALSE, TRUE, pErrorCode); 310 encodedLength+=(int32_t)(pInter-intermediate); 311 312 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) { 313 /* make sure that we convert once more to really flush */ 314 *pErrorCode=U_ZERO_ERROR; 315 } else if(U_FAILURE(*pErrorCode)) { 316 return; 317 } else { 318 break; // all done 319 } 320 } 321 } 322 protected: 323 UConverter *utf8Cnv; 324 const char *input8; 325 int32_t input8Length; 326 }; 327 328 UPerfFunction* UtfPerformanceTest::runIndexedTest(int32_t index, UBool exec, const char* &name, char* par) { 329 switch (index) { 330 case 0: name = "Roundtrip"; if (exec) return Roundtrip::get(*this); break; 331 case 1: name = "FromUnicode"; if (exec) return FromUnicode::get(*this); break; 332 case 2: name = "FromUTF8"; if (exec) return FromUTF8::get(*this); break; 333 default: name = ""; break; 334 } 335 return NULL; 336 } 337 338 int main(int argc, const char *argv[]) 339 { 340 // Default values for command-line options. 341 options[CHARSET].value = "UTF-8"; 342 options[CHUNK_LENGTH].value = "4096"; 343 options[PIVOT_LENGTH].value = "1024"; 344 345 UErrorCode status = U_ZERO_ERROR; 346 UtfPerformanceTest test(argc, argv, status); 347 348 if (U_FAILURE(status)){ 349 printf("The error is %s\n", u_errorName(status)); 350 test.usage(); 351 return status; 352 } 353 354 if (test.run() == FALSE){ 355 fprintf(stderr, "FAILED: Tests could not be run please check the " 356 "arguments.\n"); 357 return -1; 358 } 359 360 if (fromUCallbackCount > 0) { 361 printf("Number of fromUnicode callback calls in the last iteration: %ld\n", (long)fromUCallbackCount); 362 } 363 364 return 0; 365 } 366