Home | History | Annotate | Download | only in utfperf
      1 /*
      2  **********************************************************************
      3  *   Copyright (C) 2002-2007, International Business Machines
      4  *   Corporation and others.  All Rights Reserved.
      5  **********************************************************************
      6  *   file name:  utfperf.cpp
      7  *   encoding:   US-ASCII
      8  *   tab size:   8 (not used)
      9  *   indentation:4
     10  *
     11  *   created on: 2005Nov17
     12  *   created by: Raymond Yang
     13  *
     14  *   Ported from utfper.c created by Markus W. Scherer
     15  *   Performance test program for Unicode converters
     16  */
     17 
     18 #include <stdio.h>
     19 #include <stdlib.h>
     20 #include "unicode/uperf.h"
     21 #include "uoptions.h"
     22 
     23 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
     24 
     25 /* definitions and text buffers */
     26 
     27 #define INPUT_CAPACITY (1024*1024)
     28 #define INTERMEDIATE_CAPACITY 4096
     29 #define INTERMEDIATE_SMALL_CAPACITY 20
     30 #define PIVOT_CAPACITY 1024
     31 #define OUTPUT_CAPACITY INPUT_CAPACITY
     32 
     33 static char utf8[INPUT_CAPACITY];
     34 static UChar pivot[INTERMEDIATE_CAPACITY];
     35 
     36 static UChar output[OUTPUT_CAPACITY];
     37 static char intermediate[OUTPUT_CAPACITY];
     38 
     39 static int32_t utf8Length, encodedLength, outputLength, countInputCodePoints;
     40 
     41 static int32_t fromUCallbackCount;
     42 
     43 // Command-line options specific to utfperf.
     44 // Options do not have abbreviations: Force readable command lines.
     45 // (Using U+0001 for abbreviation characters.)
     46 enum {
     47     CHARSET,
     48     CHUNK_LENGTH,
     49     PIVOT_LENGTH,
     50     UTFPERF_OPTIONS_COUNT
     51 };
     52 
     53 static UOption options[UTFPERF_OPTIONS_COUNT]={
     54     UOPTION_DEF("charset",  '\x01', UOPT_REQUIRES_ARG),
     55     UOPTION_DEF("chunk",    '\x01', UOPT_REQUIRES_ARG),
     56     UOPTION_DEF("pivot",    '\x01', UOPT_REQUIRES_ARG)
     57 };
     58 
     59 static const char *const utfperf_usage =
     60     "\t--charset   Charset for which to test performance, e.g. windows-1251.\n"
     61     "\t            Default: UTF-8\n"
     62     "\t--chunk     Length (in bytes) of charset output chunks. [4096]\n"
     63     "\t--pivot     Length (in UChars) of the UTF-16 pivot buffer, if applicable.\n"
     64     "\t            [1024]\n";
     65 
     66 // Test object.
     67 class  UtfPerformanceTest : public UPerfTest{
     68 public:
     69     UtfPerformanceTest(int32_t argc, const char *argv[], UErrorCode &status)
     70             : UPerfTest(argc, argv, options, LENGTHOF(options), utfperf_usage, status) {
     71         if (U_SUCCESS(status)) {
     72             charset = options[CHARSET].value;
     73 
     74             chunkLength = atoi(options[CHUNK_LENGTH].value);
     75             if (chunkLength < 1 || OUTPUT_CAPACITY < chunkLength) {
     76                 fprintf(stderr, "error: chunk length must be 1..%ld\n", (long)OUTPUT_CAPACITY);
     77                 status = U_ILLEGAL_ARGUMENT_ERROR;
     78             }
     79 
     80             pivotLength = atoi(options[PIVOT_LENGTH].value);
     81             if (pivotLength < 1 || PIVOT_CAPACITY < pivotLength) {
     82                 fprintf(stderr, "error: pivot length must be 1..%ld\n", (long)PIVOT_CAPACITY);
     83                 status = U_ILLEGAL_ARGUMENT_ERROR;
     84             }
     85 
     86             int32_t inputLength;
     87             UPerfTest::getBuffer(inputLength, status);
     88             countInputCodePoints = u_countChar32(buffer, bufferLen);
     89             u_strToUTF8(utf8, (int32_t)sizeof(utf8), &utf8Length, buffer, bufferLen, &status);
     90         }
     91     }
     92 
     93     virtual UPerfFunction* runIndexedTest(int32_t index, UBool exec, const char* &name, char* par = NULL);
     94 
     95     const UChar *getBuffer() const { return buffer; }
     96     int32_t getBufferLen() const { return bufferLen; }
     97 
     98     const char *charset;
     99     int32_t chunkLength, pivotLength;
    100 };
    101 
    102 U_CDECL_BEGIN
    103 // Custom callback for counting callback calls.
    104 static void U_CALLCONV
    105 fromUCallback(const void *context,
    106               UConverterFromUnicodeArgs *fromUArgs,
    107               const UChar *codeUnits,
    108               int32_t length,
    109               UChar32 codePoint,
    110               UConverterCallbackReason reason,
    111               UErrorCode *pErrorCode) {
    112     if (reason <= UCNV_IRREGULAR) {
    113         ++fromUCallbackCount;
    114     }
    115     UCNV_FROM_U_CALLBACK_SUBSTITUTE(context, fromUArgs, codeUnits, length, codePoint, reason, pErrorCode);
    116 }
    117 U_CDECL_END
    118 
    119 // Base class for Roundtrip, FromUnicode and FromUTF8 with common setup.
    120 class Command : public UPerfFunction {
    121 protected:
    122     Command(const UtfPerformanceTest &testcase)
    123             : testcase(testcase),
    124               input(testcase.getBuffer()), inputLength(testcase.getBufferLen()),
    125               errorCode(U_ZERO_ERROR) {
    126         cnv=ucnv_open(testcase.charset, &errorCode);
    127         if (U_FAILURE(errorCode)) {
    128             fprintf(stderr, "error opening converter for \"%s\" - %s\n", testcase.charset, u_errorName(errorCode));
    129         }
    130         ucnv_setFromUCallBack(cnv, fromUCallback, NULL, NULL, NULL, &errorCode);
    131     }
    132 public:
    133     virtual ~Command(){
    134         if(U_SUCCESS(errorCode)) {
    135             ucnv_close(cnv);
    136         }
    137     }
    138     // virtual void call(UErrorCode* pErrorCode) { ... }
    139     virtual long getOperationsPerIteration(){
    140         return countInputCodePoints;
    141     }
    142 
    143     const UtfPerformanceTest &testcase;
    144     const UChar *input;
    145     int32_t inputLength;
    146     UErrorCode errorCode;
    147     UConverter *cnv;
    148 };
    149 
    150 // Test roundtrip UTF-16->encoding->UTF-16.
    151 class Roundtrip : public Command {
    152 protected:
    153     Roundtrip(const UtfPerformanceTest &testcase) : Command(testcase) {}
    154 public:
    155     static UPerfFunction* get(const UtfPerformanceTest &testcase) {
    156         Roundtrip * t = new Roundtrip(testcase);
    157         if (U_SUCCESS(t->errorCode)){
    158             return t;
    159         } else {
    160             delete t;
    161             return NULL;
    162         }
    163     }
    164     virtual void call(UErrorCode* pErrorCode){
    165         const UChar *pIn, *pInLimit;
    166         UChar *pOut, *pOutLimit;
    167         char *pInter, *pInterLimit;
    168         const char *p;
    169         UBool flush;
    170 
    171         ucnv_reset(cnv);
    172         fromUCallbackCount=0;
    173 
    174         pIn=input;
    175         pInLimit=input+inputLength;
    176 
    177         pOut=output;
    178         pOutLimit=output+OUTPUT_CAPACITY;
    179 
    180         pInterLimit=intermediate+testcase.chunkLength;
    181 
    182         encodedLength=outputLength=0;
    183         flush=FALSE;
    184 
    185         do {
    186             /* convert a block of [pIn..pInLimit[ to the encoding in intermediate[] */
    187             pInter=intermediate;
    188             ucnv_fromUnicode(cnv, &pInter, pInterLimit, &pIn, pInLimit, NULL, TRUE, pErrorCode);
    189             encodedLength+=(int32_t)(pInter-intermediate);
    190 
    191             if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
    192                 /* make sure that we convert once more to really flush */
    193                 *pErrorCode=U_ZERO_ERROR;
    194             } else if(U_FAILURE(*pErrorCode)) {
    195                 return;
    196             } else if(pIn==pInLimit) {
    197                 flush=TRUE;
    198             }
    199 
    200             /* convert the block [intermediate..pInter[ back to UTF-16 */
    201             p=intermediate;
    202             ucnv_toUnicode(cnv, &pOut, pOutLimit,&p, pInter,NULL, flush,pErrorCode);
    203             if(U_FAILURE(*pErrorCode)) {
    204                 return;
    205             }
    206             /* intermediate must have been consumed (p==pInter) because of the converter semantics */
    207         } while(!flush);
    208 
    209         outputLength=pOut-output;
    210         if(inputLength!=outputLength) {
    211             fprintf(stderr, "error: roundtrip failed, inputLength %d!=outputLength %d\n", inputLength, outputLength);
    212             *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
    213         }
    214     }
    215 };
    216 
    217 // Test one-way conversion UTF-16->encoding.
    218 class FromUnicode : public Command {
    219 protected:
    220     FromUnicode(const UtfPerformanceTest &testcase) : Command(testcase) {}
    221 public:
    222     static UPerfFunction* get(const UtfPerformanceTest &testcase) {
    223         FromUnicode * t = new FromUnicode(testcase);
    224         if (U_SUCCESS(t->errorCode)){
    225             return t;
    226         } else {
    227             delete t;
    228             return NULL;
    229         }
    230     }
    231     virtual void call(UErrorCode* pErrorCode){
    232         const UChar *pIn, *pInLimit;
    233         char *pInter, *pInterLimit;
    234 
    235         ucnv_resetFromUnicode(cnv);
    236         fromUCallbackCount=0;
    237 
    238         pIn=input;
    239         pInLimit=input+inputLength;
    240 
    241         pInterLimit=intermediate+testcase.chunkLength;
    242 
    243         encodedLength=0;
    244 
    245         for(;;) {
    246             pInter=intermediate;
    247             ucnv_fromUnicode(cnv, &pInter, pInterLimit, &pIn, pInLimit, NULL, TRUE, pErrorCode);
    248             encodedLength+=(int32_t)(pInter-intermediate);
    249 
    250             if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
    251                 /* make sure that we convert once more to really flush */
    252                 *pErrorCode=U_ZERO_ERROR;
    253             } else if(U_FAILURE(*pErrorCode)) {
    254                 return;
    255             } else {
    256                 break;  // all done
    257             }
    258         }
    259     }
    260 };
    261 
    262 // Test one-way conversion UTF-8->encoding.
    263 class FromUTF8 : public Command {
    264 protected:
    265     FromUTF8(const UtfPerformanceTest &testcase)
    266             : Command(testcase),
    267               utf8Cnv(NULL),
    268               input8(utf8), input8Length(utf8Length) {
    269         utf8Cnv=ucnv_open("UTF-8", &errorCode);
    270     }
    271 public:
    272     static UPerfFunction* get(const UtfPerformanceTest &testcase) {
    273         FromUTF8 * t = new FromUTF8(testcase);
    274         if (U_SUCCESS(t->errorCode)){
    275             return t;
    276         } else {
    277             delete t;
    278             return NULL;
    279         }
    280     }
    281     ~FromUTF8() {
    282         ucnv_close(utf8Cnv);
    283     }
    284     virtual void call(UErrorCode* pErrorCode){
    285         const char *pIn, *pInLimit;
    286         char *pInter, *pInterLimit;
    287         UChar *pivotSource, *pivotTarget, *pivotLimit;
    288 
    289         ucnv_resetToUnicode(utf8Cnv);
    290         ucnv_resetFromUnicode(cnv);
    291         fromUCallbackCount=0;
    292 
    293         pIn=input8;
    294         pInLimit=input8+input8Length;
    295 
    296         pInterLimit=intermediate+testcase.chunkLength;
    297 
    298         pivotSource=pivotTarget=pivot;
    299         pivotLimit=pivot+testcase.pivotLength;
    300 
    301         encodedLength=0;
    302 
    303         for(;;) {
    304             pInter=intermediate;
    305             ucnv_convertEx(cnv, utf8Cnv,
    306                            &pInter, pInterLimit,
    307                            &pIn, pInLimit,
    308                            pivot, &pivotSource, &pivotTarget, pivotLimit,
    309                            FALSE, TRUE, pErrorCode);
    310             encodedLength+=(int32_t)(pInter-intermediate);
    311 
    312             if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
    313                 /* make sure that we convert once more to really flush */
    314                 *pErrorCode=U_ZERO_ERROR;
    315             } else if(U_FAILURE(*pErrorCode)) {
    316                 return;
    317             } else {
    318                 break;  // all done
    319             }
    320         }
    321     }
    322 protected:
    323     UConverter *utf8Cnv;
    324     const char *input8;
    325     int32_t input8Length;
    326 };
    327 
    328 UPerfFunction* UtfPerformanceTest::runIndexedTest(int32_t index, UBool exec, const char* &name, char* par) {
    329     switch (index) {
    330         case 0: name = "Roundtrip";     if (exec) return Roundtrip::get(*this); break;
    331         case 1: name = "FromUnicode";   if (exec) return FromUnicode::get(*this); break;
    332         case 2: name = "FromUTF8";      if (exec) return FromUTF8::get(*this); break;
    333         default: name = ""; break;
    334     }
    335     return NULL;
    336 }
    337 
    338 int main(int argc, const char *argv[])
    339 {
    340     // Default values for command-line options.
    341     options[CHARSET].value = "UTF-8";
    342     options[CHUNK_LENGTH].value = "4096";
    343     options[PIVOT_LENGTH].value = "1024";
    344 
    345     UErrorCode status = U_ZERO_ERROR;
    346     UtfPerformanceTest test(argc, argv, status);
    347 
    348 	if (U_FAILURE(status)){
    349         printf("The error is %s\n", u_errorName(status));
    350         test.usage();
    351         return status;
    352     }
    353 
    354     if (test.run() == FALSE){
    355         fprintf(stderr, "FAILED: Tests could not be run please check the "
    356 			            "arguments.\n");
    357         return -1;
    358     }
    359 
    360     if (fromUCallbackCount > 0) {
    361         printf("Number of fromUnicode callback calls in the last iteration: %ld\n", (long)fromUCallbackCount);
    362     }
    363 
    364     return 0;
    365 }
    366