Home | History | Annotate | Download | only in utfperf
      1 /*
      2 **************************************************************************
      3  *    2016 and later: Unicode, Inc. and others.
      4  *   License & terms of use: http://www.unicode.org/copyright.html#License
      5  *************************************************************************
      6  *************************************************************************
      7  *   Copyright (C) 2002-2014, International Business Machines
      8  *   Corporation and others.  All Rights Reserved.
      9  *************************************************************************
     10  *   file name:  utfperf.cpp
     11  *   encoding:   UTF-8
     12  *   tab size:   8 (not used)
     13  *   indentation:4
     14  *
     15  *   created on: 2005Nov17
     16  *   created by: Raymond Yang
     17  *
     18  *   Ported from utfper.c created by Markus W. Scherer
     19  *   Performance test program for Unicode converters
     20  */
     21 
     22 #include <stdio.h>
     23 #include <stdlib.h>
     24 #include "unicode/uperf.h"
     25 #include "cmemory.h" // for UPRV_LENGTHOF
     26 #include "uoptions.h"
     27 
     28 /* definitions and text buffers */
     29 
     30 #define INPUT_CAPACITY (1024*1024)
     31 #define INTERMEDIATE_CAPACITY 4096
     32 #define INTERMEDIATE_SMALL_CAPACITY 20
     33 #define PIVOT_CAPACITY 1024
     34 #define OUTPUT_CAPACITY INPUT_CAPACITY
     35 
     36 static char utf8[INPUT_CAPACITY];
     37 static UChar pivot[INTERMEDIATE_CAPACITY];
     38 
     39 static UChar output[OUTPUT_CAPACITY];
     40 static char intermediate[OUTPUT_CAPACITY];
     41 
     42 static int32_t utf8Length, encodedLength, outputLength, countInputCodePoints;
     43 
     44 static int32_t fromUCallbackCount;
     45 
     46 // Command-line options specific to utfperf.
     47 // Options do not have abbreviations: Force readable command lines.
     48 // (Using U+0001 for abbreviation characters.)
     49 enum {
     50     CHARSET,
     51     CHUNK_LENGTH,
     52     PIVOT_LENGTH,
     53     UTFPERF_OPTIONS_COUNT
     54 };
     55 
     56 static UOption options[UTFPERF_OPTIONS_COUNT]={
     57     UOPTION_DEF("charset",  '\x01', UOPT_REQUIRES_ARG),
     58     UOPTION_DEF("chunk",    '\x01', UOPT_REQUIRES_ARG),
     59     UOPTION_DEF("pivot",    '\x01', UOPT_REQUIRES_ARG)
     60 };
     61 
     62 static const char *const utfperf_usage =
     63     "\t--charset   Charset for which to test performance, e.g. windows-1251.\n"
     64     "\t            Default: UTF-8\n"
     65     "\t--chunk     Length (in bytes) of charset output chunks. [4096]\n"
     66     "\t--pivot     Length (in UChars) of the UTF-16 pivot buffer, if applicable.\n"
     67     "\t            [1024]\n";
     68 
     69 // Test object.
     70 class  UtfPerformanceTest : public UPerfTest{
     71 public:
     72     UtfPerformanceTest(int32_t argc, const char *argv[], UErrorCode &status)
     73             : UPerfTest(argc, argv, options, UPRV_LENGTHOF(options), utfperf_usage, status) {
     74         if (U_SUCCESS(status)) {
     75             charset = options[CHARSET].value;
     76 
     77             chunkLength = atoi(options[CHUNK_LENGTH].value);
     78             if (chunkLength < 1 || OUTPUT_CAPACITY < chunkLength) {
     79                 fprintf(stderr, "error: chunk length must be 1..%ld\n", (long)OUTPUT_CAPACITY);
     80                 status = U_ILLEGAL_ARGUMENT_ERROR;
     81             }
     82 
     83             pivotLength = atoi(options[PIVOT_LENGTH].value);
     84             if (pivotLength < 1 || PIVOT_CAPACITY < pivotLength) {
     85                 fprintf(stderr, "error: pivot length must be 1..%ld\n", (long)PIVOT_CAPACITY);
     86                 status = U_ILLEGAL_ARGUMENT_ERROR;
     87             }
     88 
     89             int32_t inputLength;
     90             UPerfTest::getBuffer(inputLength, status);
     91             countInputCodePoints = u_countChar32(buffer, bufferLen);
     92             u_strToUTF8(utf8, (int32_t)sizeof(utf8), &utf8Length, buffer, bufferLen, &status);
     93         }
     94     }
     95 
     96     virtual UPerfFunction* runIndexedTest(int32_t index, UBool exec, const char* &name, char* par = NULL);
     97 
     98     const UChar *getBuffer() const { return buffer; }
     99     int32_t getBufferLen() const { return bufferLen; }
    100 
    101     const char *charset;
    102     int32_t chunkLength, pivotLength;
    103 };
    104 
    105 U_CDECL_BEGIN
    106 // Custom callback for counting callback calls.
    107 static void U_CALLCONV
    108 fromUCallback(const void *context,
    109               UConverterFromUnicodeArgs *fromUArgs,
    110               const UChar *codeUnits,
    111               int32_t length,
    112               UChar32 codePoint,
    113               UConverterCallbackReason reason,
    114               UErrorCode *pErrorCode) {
    115     if (reason <= UCNV_IRREGULAR) {
    116         ++fromUCallbackCount;
    117     }
    118     UCNV_FROM_U_CALLBACK_SUBSTITUTE(context, fromUArgs, codeUnits, length, codePoint, reason, pErrorCode);
    119 }
    120 U_CDECL_END
    121 
    122 // Base class for Roundtrip, FromUnicode and FromUTF8 with common setup.
    123 class Command : public UPerfFunction {
    124 protected:
    125     Command(const UtfPerformanceTest &testcase)
    126             : testcase(testcase),
    127               input(testcase.getBuffer()), inputLength(testcase.getBufferLen()),
    128               errorCode(U_ZERO_ERROR) {
    129         cnv=ucnv_open(testcase.charset, &errorCode);
    130         if (U_FAILURE(errorCode)) {
    131             fprintf(stderr, "error opening converter for \"%s\" - %s\n", testcase.charset, u_errorName(errorCode));
    132         }
    133         ucnv_setFromUCallBack(cnv, fromUCallback, NULL, NULL, NULL, &errorCode);
    134     }
    135 public:
    136     virtual ~Command(){
    137         if(U_SUCCESS(errorCode)) {
    138             ucnv_close(cnv);
    139         }
    140     }
    141     // virtual void call(UErrorCode* pErrorCode) { ... }
    142     virtual long getOperationsPerIteration(){
    143         return countInputCodePoints;
    144     }
    145 
    146     const UtfPerformanceTest &testcase;
    147     const UChar *input;
    148     int32_t inputLength;
    149     UErrorCode errorCode;
    150     UConverter *cnv;
    151 };
    152 
    153 // Test roundtrip UTF-16->encoding->UTF-16.
    154 class Roundtrip : public Command {
    155 protected:
    156     Roundtrip(const UtfPerformanceTest &testcase) : Command(testcase) {}
    157 public:
    158     static UPerfFunction* get(const UtfPerformanceTest &testcase) {
    159         Roundtrip * t = new Roundtrip(testcase);
    160         if (U_SUCCESS(t->errorCode)){
    161             return t;
    162         } else {
    163             delete t;
    164             return NULL;
    165         }
    166     }
    167     virtual void call(UErrorCode* pErrorCode){
    168         const UChar *pIn, *pInLimit;
    169         UChar *pOut, *pOutLimit;
    170         char *pInter, *pInterLimit;
    171         const char *p;
    172         UBool flush;
    173 
    174         ucnv_reset(cnv);
    175         fromUCallbackCount=0;
    176 
    177         pIn=input;
    178         pInLimit=input+inputLength;
    179 
    180         pOut=output;
    181         pOutLimit=output+OUTPUT_CAPACITY;
    182 
    183         pInterLimit=intermediate+testcase.chunkLength;
    184 
    185         encodedLength=outputLength=0;
    186         flush=FALSE;
    187 
    188         do {
    189             /* convert a block of [pIn..pInLimit[ to the encoding in intermediate[] */
    190             pInter=intermediate;
    191             ucnv_fromUnicode(cnv, &pInter, pInterLimit, &pIn, pInLimit, NULL, TRUE, pErrorCode);
    192             encodedLength+=(int32_t)(pInter-intermediate);
    193 
    194             if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
    195                 /* make sure that we convert once more to really flush */
    196                 *pErrorCode=U_ZERO_ERROR;
    197             } else if(U_FAILURE(*pErrorCode)) {
    198                 return;
    199             } else if(pIn==pInLimit) {
    200                 flush=TRUE;
    201             }
    202 
    203             /* convert the block [intermediate..pInter[ back to UTF-16 */
    204             p=intermediate;
    205             ucnv_toUnicode(cnv, &pOut, pOutLimit,&p, pInter,NULL, flush,pErrorCode);
    206             if(U_FAILURE(*pErrorCode)) {
    207                 return;
    208             }
    209             /* intermediate must have been consumed (p==pInter) because of the converter semantics */
    210         } while(!flush);
    211 
    212         outputLength=pOut-output;
    213         if(inputLength!=outputLength) {
    214             fprintf(stderr, "error: roundtrip failed, inputLength %d!=outputLength %d\n", inputLength, outputLength);
    215             *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
    216         }
    217     }
    218 };
    219 
    220 // Test one-way conversion UTF-16->encoding.
    221 class FromUnicode : public Command {
    222 protected:
    223     FromUnicode(const UtfPerformanceTest &testcase) : Command(testcase) {}
    224 public:
    225     static UPerfFunction* get(const UtfPerformanceTest &testcase) {
    226         FromUnicode * t = new FromUnicode(testcase);
    227         if (U_SUCCESS(t->errorCode)){
    228             return t;
    229         } else {
    230             delete t;
    231             return NULL;
    232         }
    233     }
    234     virtual void call(UErrorCode* pErrorCode){
    235         const UChar *pIn, *pInLimit;
    236         char *pInter, *pInterLimit;
    237 
    238         ucnv_resetFromUnicode(cnv);
    239         fromUCallbackCount=0;
    240 
    241         pIn=input;
    242         pInLimit=input+inputLength;
    243 
    244         pInterLimit=intermediate+testcase.chunkLength;
    245 
    246         encodedLength=0;
    247 
    248         for(;;) {
    249             pInter=intermediate;
    250             ucnv_fromUnicode(cnv, &pInter, pInterLimit, &pIn, pInLimit, NULL, TRUE, pErrorCode);
    251             encodedLength+=(int32_t)(pInter-intermediate);
    252 
    253             if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
    254                 /* make sure that we convert once more to really flush */
    255                 *pErrorCode=U_ZERO_ERROR;
    256             } else if(U_FAILURE(*pErrorCode)) {
    257                 return;
    258             } else {
    259                 break;  // all done
    260             }
    261         }
    262     }
    263 };
    264 
    265 // Test one-way conversion UTF-8->encoding.
    266 class FromUTF8 : public Command {
    267 protected:
    268     FromUTF8(const UtfPerformanceTest &testcase)
    269             : Command(testcase),
    270               utf8Cnv(NULL),
    271               input8(utf8), input8Length(utf8Length) {
    272         utf8Cnv=ucnv_open("UTF-8", &errorCode);
    273     }
    274 public:
    275     static UPerfFunction* get(const UtfPerformanceTest &testcase) {
    276         FromUTF8 * t = new FromUTF8(testcase);
    277         if (U_SUCCESS(t->errorCode)){
    278             return t;
    279         } else {
    280             delete t;
    281             return NULL;
    282         }
    283     }
    284     ~FromUTF8() {
    285         ucnv_close(utf8Cnv);
    286     }
    287     virtual void call(UErrorCode* pErrorCode){
    288         const char *pIn, *pInLimit;
    289         char *pInter, *pInterLimit;
    290         UChar *pivotSource, *pivotTarget, *pivotLimit;
    291 
    292         ucnv_resetToUnicode(utf8Cnv);
    293         ucnv_resetFromUnicode(cnv);
    294         fromUCallbackCount=0;
    295 
    296         pIn=input8;
    297         pInLimit=input8+input8Length;
    298 
    299         pInterLimit=intermediate+testcase.chunkLength;
    300 
    301         pivotSource=pivotTarget=pivot;
    302         pivotLimit=pivot+testcase.pivotLength;
    303 
    304         encodedLength=0;
    305 
    306         for(;;) {
    307             pInter=intermediate;
    308             ucnv_convertEx(cnv, utf8Cnv,
    309                            &pInter, pInterLimit,
    310                            &pIn, pInLimit,
    311                            pivot, &pivotSource, &pivotTarget, pivotLimit,
    312                            FALSE, TRUE, pErrorCode);
    313             encodedLength+=(int32_t)(pInter-intermediate);
    314 
    315             if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
    316                 /* make sure that we convert once more to really flush */
    317                 *pErrorCode=U_ZERO_ERROR;
    318             } else if(U_FAILURE(*pErrorCode)) {
    319                 return;
    320             } else {
    321                 break;  // all done
    322             }
    323         }
    324     }
    325 protected:
    326     UConverter *utf8Cnv;
    327     const char *input8;
    328     int32_t input8Length;
    329 };
    330 
    331 UPerfFunction* UtfPerformanceTest::runIndexedTest(int32_t index, UBool exec, const char* &name, char* par) {
    332     switch (index) {
    333         case 0: name = "Roundtrip";     if (exec) return Roundtrip::get(*this); break;
    334         case 1: name = "FromUnicode";   if (exec) return FromUnicode::get(*this); break;
    335         case 2: name = "FromUTF8";      if (exec) return FromUTF8::get(*this); break;
    336         default: name = ""; break;
    337     }
    338     return NULL;
    339 }
    340 
    341 int main(int argc, const char *argv[])
    342 {
    343     // Default values for command-line options.
    344     options[CHARSET].value = "UTF-8";
    345     options[CHUNK_LENGTH].value = "4096";
    346     options[PIVOT_LENGTH].value = "1024";
    347 
    348     UErrorCode status = U_ZERO_ERROR;
    349     UtfPerformanceTest test(argc, argv, status);
    350 
    351 	if (U_FAILURE(status)){
    352         printf("The error is %s\n", u_errorName(status));
    353         test.usage();
    354         return status;
    355     }
    356 
    357     if (test.run() == FALSE){
    358         fprintf(stderr, "FAILED: Tests could not be run please check the "
    359 			            "arguments.\n");
    360         return -1;
    361     }
    362 
    363     if (fromUCallbackCount > 0) {
    364         printf("Number of fromUnicode callback calls in the last iteration: %ld\n", (long)fromUCallbackCount);
    365     }
    366 
    367     return 0;
    368 }
    369