1 /* 2 ************************************************************************** 3 * 2016 and later: Unicode, Inc. and others. 4 * License & terms of use: http://www.unicode.org/copyright.html#License 5 ************************************************************************* 6 ************************************************************************* 7 * Copyright (C) 2002-2014, International Business Machines 8 * Corporation and others. All Rights Reserved. 9 ************************************************************************* 10 * file name: utfperf.cpp 11 * encoding: UTF-8 12 * tab size: 8 (not used) 13 * indentation:4 14 * 15 * created on: 2005Nov17 16 * created by: Raymond Yang 17 * 18 * Ported from utfper.c created by Markus W. Scherer 19 * Performance test program for Unicode converters 20 */ 21 22 #include <stdio.h> 23 #include <stdlib.h> 24 #include "unicode/uperf.h" 25 #include "cmemory.h" // for UPRV_LENGTHOF 26 #include "uoptions.h" 27 28 /* definitions and text buffers */ 29 30 #define INPUT_CAPACITY (1024*1024) 31 #define INTERMEDIATE_CAPACITY 4096 32 #define INTERMEDIATE_SMALL_CAPACITY 20 33 #define PIVOT_CAPACITY 1024 34 #define OUTPUT_CAPACITY INPUT_CAPACITY 35 36 static char utf8[INPUT_CAPACITY]; 37 static UChar pivot[INTERMEDIATE_CAPACITY]; 38 39 static UChar output[OUTPUT_CAPACITY]; 40 static char intermediate[OUTPUT_CAPACITY]; 41 42 static int32_t utf8Length, encodedLength, outputLength, countInputCodePoints; 43 44 static int32_t fromUCallbackCount; 45 46 // Command-line options specific to utfperf. 47 // Options do not have abbreviations: Force readable command lines. 48 // (Using U+0001 for abbreviation characters.) 49 enum { 50 CHARSET, 51 CHUNK_LENGTH, 52 PIVOT_LENGTH, 53 UTFPERF_OPTIONS_COUNT 54 }; 55 56 static UOption options[UTFPERF_OPTIONS_COUNT]={ 57 UOPTION_DEF("charset", '\x01', UOPT_REQUIRES_ARG), 58 UOPTION_DEF("chunk", '\x01', UOPT_REQUIRES_ARG), 59 UOPTION_DEF("pivot", '\x01', UOPT_REQUIRES_ARG) 60 }; 61 62 static const char *const utfperf_usage = 63 "\t--charset Charset for which to test performance, e.g. windows-1251.\n" 64 "\t Default: UTF-8\n" 65 "\t--chunk Length (in bytes) of charset output chunks. [4096]\n" 66 "\t--pivot Length (in UChars) of the UTF-16 pivot buffer, if applicable.\n" 67 "\t [1024]\n"; 68 69 // Test object. 70 class UtfPerformanceTest : public UPerfTest{ 71 public: 72 UtfPerformanceTest(int32_t argc, const char *argv[], UErrorCode &status) 73 : UPerfTest(argc, argv, options, UPRV_LENGTHOF(options), utfperf_usage, status) { 74 if (U_SUCCESS(status)) { 75 charset = options[CHARSET].value; 76 77 chunkLength = atoi(options[CHUNK_LENGTH].value); 78 if (chunkLength < 1 || OUTPUT_CAPACITY < chunkLength) { 79 fprintf(stderr, "error: chunk length must be 1..%ld\n", (long)OUTPUT_CAPACITY); 80 status = U_ILLEGAL_ARGUMENT_ERROR; 81 } 82 83 pivotLength = atoi(options[PIVOT_LENGTH].value); 84 if (pivotLength < 1 || PIVOT_CAPACITY < pivotLength) { 85 fprintf(stderr, "error: pivot length must be 1..%ld\n", (long)PIVOT_CAPACITY); 86 status = U_ILLEGAL_ARGUMENT_ERROR; 87 } 88 89 int32_t inputLength; 90 UPerfTest::getBuffer(inputLength, status); 91 countInputCodePoints = u_countChar32(buffer, bufferLen); 92 u_strToUTF8(utf8, (int32_t)sizeof(utf8), &utf8Length, buffer, bufferLen, &status); 93 } 94 } 95 96 virtual UPerfFunction* runIndexedTest(int32_t index, UBool exec, const char* &name, char* par = NULL); 97 98 const UChar *getBuffer() const { return buffer; } 99 int32_t getBufferLen() const { return bufferLen; } 100 101 const char *charset; 102 int32_t chunkLength, pivotLength; 103 }; 104 105 U_CDECL_BEGIN 106 // Custom callback for counting callback calls. 107 static void U_CALLCONV 108 fromUCallback(const void *context, 109 UConverterFromUnicodeArgs *fromUArgs, 110 const UChar *codeUnits, 111 int32_t length, 112 UChar32 codePoint, 113 UConverterCallbackReason reason, 114 UErrorCode *pErrorCode) { 115 if (reason <= UCNV_IRREGULAR) { 116 ++fromUCallbackCount; 117 } 118 UCNV_FROM_U_CALLBACK_SUBSTITUTE(context, fromUArgs, codeUnits, length, codePoint, reason, pErrorCode); 119 } 120 U_CDECL_END 121 122 // Base class for Roundtrip, FromUnicode and FromUTF8 with common setup. 123 class Command : public UPerfFunction { 124 protected: 125 Command(const UtfPerformanceTest &testcase) 126 : testcase(testcase), 127 input(testcase.getBuffer()), inputLength(testcase.getBufferLen()), 128 errorCode(U_ZERO_ERROR) { 129 cnv=ucnv_open(testcase.charset, &errorCode); 130 if (U_FAILURE(errorCode)) { 131 fprintf(stderr, "error opening converter for \"%s\" - %s\n", testcase.charset, u_errorName(errorCode)); 132 } 133 ucnv_setFromUCallBack(cnv, fromUCallback, NULL, NULL, NULL, &errorCode); 134 } 135 public: 136 virtual ~Command(){ 137 if(U_SUCCESS(errorCode)) { 138 ucnv_close(cnv); 139 } 140 } 141 // virtual void call(UErrorCode* pErrorCode) { ... } 142 virtual long getOperationsPerIteration(){ 143 return countInputCodePoints; 144 } 145 146 const UtfPerformanceTest &testcase; 147 const UChar *input; 148 int32_t inputLength; 149 UErrorCode errorCode; 150 UConverter *cnv; 151 }; 152 153 // Test roundtrip UTF-16->encoding->UTF-16. 154 class Roundtrip : public Command { 155 protected: 156 Roundtrip(const UtfPerformanceTest &testcase) : Command(testcase) {} 157 public: 158 static UPerfFunction* get(const UtfPerformanceTest &testcase) { 159 Roundtrip * t = new Roundtrip(testcase); 160 if (U_SUCCESS(t->errorCode)){ 161 return t; 162 } else { 163 delete t; 164 return NULL; 165 } 166 } 167 virtual void call(UErrorCode* pErrorCode){ 168 const UChar *pIn, *pInLimit; 169 UChar *pOut, *pOutLimit; 170 char *pInter, *pInterLimit; 171 const char *p; 172 UBool flush; 173 174 ucnv_reset(cnv); 175 fromUCallbackCount=0; 176 177 pIn=input; 178 pInLimit=input+inputLength; 179 180 pOut=output; 181 pOutLimit=output+OUTPUT_CAPACITY; 182 183 pInterLimit=intermediate+testcase.chunkLength; 184 185 encodedLength=outputLength=0; 186 flush=FALSE; 187 188 do { 189 /* convert a block of [pIn..pInLimit[ to the encoding in intermediate[] */ 190 pInter=intermediate; 191 ucnv_fromUnicode(cnv, &pInter, pInterLimit, &pIn, pInLimit, NULL, TRUE, pErrorCode); 192 encodedLength+=(int32_t)(pInter-intermediate); 193 194 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) { 195 /* make sure that we convert once more to really flush */ 196 *pErrorCode=U_ZERO_ERROR; 197 } else if(U_FAILURE(*pErrorCode)) { 198 return; 199 } else if(pIn==pInLimit) { 200 flush=TRUE; 201 } 202 203 /* convert the block [intermediate..pInter[ back to UTF-16 */ 204 p=intermediate; 205 ucnv_toUnicode(cnv, &pOut, pOutLimit,&p, pInter,NULL, flush,pErrorCode); 206 if(U_FAILURE(*pErrorCode)) { 207 return; 208 } 209 /* intermediate must have been consumed (p==pInter) because of the converter semantics */ 210 } while(!flush); 211 212 outputLength=pOut-output; 213 if(inputLength!=outputLength) { 214 fprintf(stderr, "error: roundtrip failed, inputLength %d!=outputLength %d\n", inputLength, outputLength); 215 *pErrorCode=U_INTERNAL_PROGRAM_ERROR; 216 } 217 } 218 }; 219 220 // Test one-way conversion UTF-16->encoding. 221 class FromUnicode : public Command { 222 protected: 223 FromUnicode(const UtfPerformanceTest &testcase) : Command(testcase) {} 224 public: 225 static UPerfFunction* get(const UtfPerformanceTest &testcase) { 226 FromUnicode * t = new FromUnicode(testcase); 227 if (U_SUCCESS(t->errorCode)){ 228 return t; 229 } else { 230 delete t; 231 return NULL; 232 } 233 } 234 virtual void call(UErrorCode* pErrorCode){ 235 const UChar *pIn, *pInLimit; 236 char *pInter, *pInterLimit; 237 238 ucnv_resetFromUnicode(cnv); 239 fromUCallbackCount=0; 240 241 pIn=input; 242 pInLimit=input+inputLength; 243 244 pInterLimit=intermediate+testcase.chunkLength; 245 246 encodedLength=0; 247 248 for(;;) { 249 pInter=intermediate; 250 ucnv_fromUnicode(cnv, &pInter, pInterLimit, &pIn, pInLimit, NULL, TRUE, pErrorCode); 251 encodedLength+=(int32_t)(pInter-intermediate); 252 253 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) { 254 /* make sure that we convert once more to really flush */ 255 *pErrorCode=U_ZERO_ERROR; 256 } else if(U_FAILURE(*pErrorCode)) { 257 return; 258 } else { 259 break; // all done 260 } 261 } 262 } 263 }; 264 265 // Test one-way conversion UTF-8->encoding. 266 class FromUTF8 : public Command { 267 protected: 268 FromUTF8(const UtfPerformanceTest &testcase) 269 : Command(testcase), 270 utf8Cnv(NULL), 271 input8(utf8), input8Length(utf8Length) { 272 utf8Cnv=ucnv_open("UTF-8", &errorCode); 273 } 274 public: 275 static UPerfFunction* get(const UtfPerformanceTest &testcase) { 276 FromUTF8 * t = new FromUTF8(testcase); 277 if (U_SUCCESS(t->errorCode)){ 278 return t; 279 } else { 280 delete t; 281 return NULL; 282 } 283 } 284 ~FromUTF8() { 285 ucnv_close(utf8Cnv); 286 } 287 virtual void call(UErrorCode* pErrorCode){ 288 const char *pIn, *pInLimit; 289 char *pInter, *pInterLimit; 290 UChar *pivotSource, *pivotTarget, *pivotLimit; 291 292 ucnv_resetToUnicode(utf8Cnv); 293 ucnv_resetFromUnicode(cnv); 294 fromUCallbackCount=0; 295 296 pIn=input8; 297 pInLimit=input8+input8Length; 298 299 pInterLimit=intermediate+testcase.chunkLength; 300 301 pivotSource=pivotTarget=pivot; 302 pivotLimit=pivot+testcase.pivotLength; 303 304 encodedLength=0; 305 306 for(;;) { 307 pInter=intermediate; 308 ucnv_convertEx(cnv, utf8Cnv, 309 &pInter, pInterLimit, 310 &pIn, pInLimit, 311 pivot, &pivotSource, &pivotTarget, pivotLimit, 312 FALSE, TRUE, pErrorCode); 313 encodedLength+=(int32_t)(pInter-intermediate); 314 315 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) { 316 /* make sure that we convert once more to really flush */ 317 *pErrorCode=U_ZERO_ERROR; 318 } else if(U_FAILURE(*pErrorCode)) { 319 return; 320 } else { 321 break; // all done 322 } 323 } 324 } 325 protected: 326 UConverter *utf8Cnv; 327 const char *input8; 328 int32_t input8Length; 329 }; 330 331 UPerfFunction* UtfPerformanceTest::runIndexedTest(int32_t index, UBool exec, const char* &name, char* par) { 332 switch (index) { 333 case 0: name = "Roundtrip"; if (exec) return Roundtrip::get(*this); break; 334 case 1: name = "FromUnicode"; if (exec) return FromUnicode::get(*this); break; 335 case 2: name = "FromUTF8"; if (exec) return FromUTF8::get(*this); break; 336 default: name = ""; break; 337 } 338 return NULL; 339 } 340 341 int main(int argc, const char *argv[]) 342 { 343 // Default values for command-line options. 344 options[CHARSET].value = "UTF-8"; 345 options[CHUNK_LENGTH].value = "4096"; 346 options[PIVOT_LENGTH].value = "1024"; 347 348 UErrorCode status = U_ZERO_ERROR; 349 UtfPerformanceTest test(argc, argv, status); 350 351 if (U_FAILURE(status)){ 352 printf("The error is %s\n", u_errorName(status)); 353 test.usage(); 354 return status; 355 } 356 357 if (test.run() == FALSE){ 358 fprintf(stderr, "FAILED: Tests could not be run please check the " 359 "arguments.\n"); 360 return -1; 361 } 362 363 if (fromUCallbackCount > 0) { 364 printf("Number of fromUnicode callback calls in the last iteration: %ld\n", (long)fromUCallbackCount); 365 } 366 367 return 0; 368 } 369