Home | History | Annotate | Download | only in unisetperf
      1 /*
      2 **********************************************************************
      3 *   Copyright (C) 2007, International Business Machines
      4 *   Corporation and others.  All Rights Reserved.
      5 **********************************************************************
      6 *   file name:  unisetperf.cpp
      7 *   encoding:   US-ASCII
      8 *   tab size:   8 (not used)
      9 *   indentation:4
     10 *
     11 *   created on: 2007jan31
     12 *   created by: Markus Scherer
     13 */
     14 
     15 #include <stdio.h>
     16 #include <stdlib.h>
     17 #include <string.h>
     18 #include "unicode/uperf.h"
     19 #include "unicode/uniset.h"
     20 #include "unicode/unistr.h"
     21 #include "uoptions.h"
     22 
     23 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
     24 
     25 // Command-line options specific to unisetperf.
     26 // Options do not have abbreviations: Force readable command lines.
     27 // (Using U+0001 for abbreviation characters.)
     28 enum {
     29     SET_PATTERN,
     30     FAST_TYPE,
     31     UNISETPERF_OPTIONS_COUNT
     32 };
     33 
     34 static UOption options[UNISETPERF_OPTIONS_COUNT]={
     35     UOPTION_DEF("pattern", '\x01', UOPT_REQUIRES_ARG),
     36     UOPTION_DEF("type",    '\x01', UOPT_REQUIRES_ARG)
     37 };
     38 
     39 static const char *const unisetperf_usage =
     40     "\t--pattern   UnicodeSet pattern for instantiation.\n"
     41     "\t            Default: [:ID_Continue:]\n"
     42     "\t--type      Type of UnicodeSet: slow fast\n"
     43     "\t            Default: slow\n";
     44 
     45 // Test object with setup data.
     46 class UnicodeSetPerformanceTest : public UPerfTest {
     47 public:
     48     UnicodeSetPerformanceTest(int32_t argc, const char *argv[], UErrorCode &status)
     49             : UPerfTest(argc, argv, options, LENGTHOF(options), unisetperf_usage, status),
     50               utf8(NULL), utf8Length(0), countInputCodePoints(0), spanCount(0) {
     51         if (U_SUCCESS(status)) {
     52             UnicodeString pattern=UnicodeString(options[SET_PATTERN].value, -1, US_INV).unescape();
     53             set.applyPattern(pattern, status);
     54             prefrozen=set;
     55             if(0==strcmp(options[FAST_TYPE].value, "fast")) {
     56                 set.freeze();
     57             }
     58 
     59             int32_t inputLength;
     60             UPerfTest::getBuffer(inputLength, status);
     61             if(U_SUCCESS(status) && inputLength>0) {
     62                 countInputCodePoints = u_countChar32(buffer, bufferLen);
     63 
     64                 countSpans();
     65 
     66                 // Preflight the UTF-8 length and allocate utf8.
     67                 u_strToUTF8(NULL, 0, &utf8Length, buffer, bufferLen, &status);
     68                 if(status==U_BUFFER_OVERFLOW_ERROR) {
     69                     utf8=(char *)malloc(utf8Length);
     70                     if(utf8!=NULL) {
     71                         status=U_ZERO_ERROR;
     72                         u_strToUTF8(utf8, utf8Length, NULL, buffer, bufferLen, &status);
     73                     } else {
     74                         status=U_MEMORY_ALLOCATION_ERROR;
     75                     }
     76                 }
     77 
     78                 if(verbose) {
     79                     printf("code points:%ld  len16:%ld  len8:%ld  spans:%ld  "
     80                            "cp/span:%.3g  UChar/span:%.3g  B/span:%.3g  B/cp:%.3g\n",
     81                            (long)countInputCodePoints, (long)bufferLen, (long)utf8Length, (long)spanCount,
     82                            (double)countInputCodePoints/spanCount, (double)bufferLen/spanCount, (double)utf8Length/spanCount,
     83                            (double)utf8Length/countInputCodePoints);
     84                 }
     85             }
     86         }
     87     }
     88 
     89     virtual UPerfFunction* runIndexedTest(int32_t index, UBool exec, const char* &name, char* par = NULL);
     90 
     91     // Count spans of characters that are in the set,
     92     // and spans of characters that are not in the set.
     93     // If the very first character is in the set, then one additional
     94     // not-span is counted.
     95     void countSpans() {
     96         const UChar *s=getBuffer();
     97         int32_t length=getBufferLen();
     98         int32_t i=0;
     99         UBool tf=FALSE;
    100         while(i<length) {
    101             i=span(s, length, i, tf);
    102             tf=(UBool)(!tf);
    103             ++spanCount;
    104         }
    105     }
    106     int32_t span(const UChar *s, int32_t length, int32_t start, UBool tf) const {
    107         UChar32 c;
    108         int32_t prev;
    109         while((prev=start)<length) {
    110             U16_NEXT(s, start, length, c);
    111             if(tf!=set.contains(c)) {
    112                 break;
    113             }
    114         }
    115         return prev;
    116     }
    117 
    118     const UChar *getBuffer() const { return buffer; }
    119     int32_t getBufferLen() const { return bufferLen; }
    120 
    121     char *utf8;
    122     int32_t utf8Length;
    123 
    124     // Number of code points in the input text.
    125     int32_t countInputCodePoints;
    126     int32_t spanCount;
    127 
    128     UnicodeSet set;
    129     UnicodeSet prefrozen;
    130 };
    131 
    132 // Performance test function object.
    133 class Command : public UPerfFunction {
    134 protected:
    135     Command(const UnicodeSetPerformanceTest &testcase) : testcase(testcase) {}
    136 
    137 public:
    138     virtual ~Command() {}
    139 
    140     // virtual void call(UErrorCode* pErrorCode) { ... }
    141 
    142     virtual long getOperationsPerIteration() {
    143         // Number of code points tested:
    144         // Input code points, plus one for the end of each span except the last span.
    145         return testcase.countInputCodePoints+testcase.spanCount-1;
    146     }
    147 
    148     virtual long getEventsPerIteration() {
    149         return testcase.spanCount;
    150     }
    151 
    152     const UnicodeSetPerformanceTest &testcase;
    153 };
    154 
    155 class Contains : public Command {
    156 protected:
    157     Contains(const UnicodeSetPerformanceTest &testcase) : Command(testcase) {
    158         // Verify that the frozen set is equal to the unfrozen one.
    159         UnicodeSet set;
    160         UChar32 c;
    161 
    162         for(c=0; c<=0x10ffff; ++c) {
    163             if(testcase.set.contains(c)) {
    164                 set.add(c);
    165             }
    166         }
    167         if(set!=testcase.set) {
    168             fprintf(stderr, "error: frozen set != original!\n");
    169         }
    170     }
    171 public:
    172     static UPerfFunction* get(const UnicodeSetPerformanceTest &testcase) {
    173         return new Contains(testcase);
    174     }
    175     virtual void call(UErrorCode* pErrorCode) {
    176         const UnicodeSet &set=testcase.set;
    177         const UChar *s=testcase.getBuffer();
    178         int32_t length=testcase.getBufferLen();
    179         int32_t count=0;
    180         int32_t i=0;
    181         UBool tf=FALSE;
    182         while(i<length) {
    183             i+=span(set, s+i, length-i, tf);
    184             tf=(UBool)(!tf);
    185             ++count;
    186         }
    187         if(count!=testcase.spanCount) {
    188             fprintf(stderr, "error: Contains() count=%ld != %ld=UnicodeSetPerformanceTest.spanCount\n",
    189                     (long)count, (long)testcase.spanCount);
    190         }
    191     }
    192     static int32_t span(const UnicodeSet &set, const UChar *s, int32_t length, UBool tf) {
    193         UChar32 c;
    194         int32_t start=0, prev;
    195         while((prev=start)<length) {
    196             U16_NEXT(s, start, length, c);
    197             if(tf!=set.contains(c)) {
    198                 break;
    199             }
    200         }
    201         return prev;
    202     }
    203 };
    204 
    205 class SpanUTF16 : public Command {
    206 protected:
    207     SpanUTF16(const UnicodeSetPerformanceTest &testcase) : Command(testcase) {
    208         // Verify that the frozen set is equal to the unfrozen one.
    209         UnicodeSet set;
    210         UChar utf16[2];
    211         UChar32 c, c2;
    212 
    213         for(c=0; c<=0xffff; ++c) {
    214             utf16[0]=(UChar)c;
    215             if(testcase.set.span(utf16, 1, USET_SPAN_CONTAINED)>0) {
    216                 set.add(c);
    217             }
    218         }
    219         for(c=0xd800; c<=0xdbff; ++c) {
    220             utf16[0]=(UChar)c;
    221             for(c2=0xdc00; c2<=0xdfff; ++c2) {
    222                 utf16[1]=(UChar)c2;
    223                 if(testcase.set.span(utf16, 2, USET_SPAN_CONTAINED)>0) {
    224                     set.add(U16_GET_SUPPLEMENTARY(c, c2));
    225                 }
    226             }
    227         }
    228 
    229         if(set!=testcase.set) {
    230             fprintf(stderr, "error: frozen set != original!\n");
    231         }
    232     }
    233 public:
    234     static UPerfFunction* get(const UnicodeSetPerformanceTest &testcase) {
    235         return new SpanUTF16(testcase);
    236     }
    237     virtual void call(UErrorCode* pErrorCode) {
    238         const UnicodeSet &set=testcase.set;
    239         const UChar *s=testcase.getBuffer();
    240         int32_t length=testcase.getBufferLen();
    241         int32_t count=0;
    242         int32_t i=0;
    243         UBool tf=FALSE;
    244         while(i<length) {
    245             i+=set.span(s+i, length-i, (USetSpanCondition)tf);
    246             tf=(UBool)(!tf);
    247             ++count;
    248         }
    249         if(count!=testcase.spanCount) {
    250             fprintf(stderr, "error: SpanUTF16() count=%ld != %ld=UnicodeSetPerformanceTest.spanCount\n",
    251                     (long)count, (long)testcase.spanCount);
    252         }
    253     }
    254 };
    255 
    256 class SpanBackUTF16 : public Command {
    257 protected:
    258     SpanBackUTF16(const UnicodeSetPerformanceTest &testcase) : Command(testcase) {
    259         // Verify that the frozen set is equal to the unfrozen one.
    260         UnicodeSet set;
    261         UChar utf16[2];
    262         UChar32 c, c2;
    263 
    264         for(c=0; c<=0xffff; ++c) {
    265             utf16[0]=(UChar)c;
    266             if(testcase.set.spanBack(utf16, 1, USET_SPAN_CONTAINED)==0) {
    267                 set.add(c);
    268             }
    269         }
    270         for(c=0xd800; c<=0xdbff; ++c) {
    271             utf16[0]=(UChar)c;
    272             for(c2=0xdc00; c2<=0xdfff; ++c2) {
    273                 utf16[1]=(UChar)c2;
    274                 if(testcase.set.spanBack(utf16, 2, USET_SPAN_CONTAINED)==0) {
    275                     set.add(U16_GET_SUPPLEMENTARY(c, c2));
    276                 }
    277             }
    278         }
    279 
    280         if(set!=testcase.set) {
    281             fprintf(stderr, "error: frozen set != original!\n");
    282         }
    283     }
    284 public:
    285     static UPerfFunction* get(const UnicodeSetPerformanceTest &testcase) {
    286         return new SpanBackUTF16(testcase);
    287     }
    288     virtual void call(UErrorCode* pErrorCode) {
    289         const UnicodeSet &set=testcase.set;
    290         const UChar *s=testcase.getBuffer();
    291         int32_t length=testcase.getBufferLen();
    292         int32_t count=0;
    293         /*
    294          * Get the same spans as with span() where we always start with a not-contained span.
    295          * If testcase.spanCount is an odd number, then the last span() was not-contained.
    296          * The last spanBack() must be not-contained to match the first span().
    297          */
    298         UBool tf=(UBool)((testcase.spanCount&1)==0);
    299         while(length>0 || !tf) {
    300             length=set.spanBack(s, length, (USetSpanCondition)tf);
    301             tf=(UBool)(!tf);
    302             ++count;
    303         }
    304         if(count!=testcase.spanCount) {
    305             fprintf(stderr, "error: SpanBackUTF16() count=%ld != %ld=UnicodeSetPerformanceTest.spanCount\n",
    306                     (long)count, (long)testcase.spanCount);
    307         }
    308     }
    309 };
    310 
    311 class SpanUTF8 : public Command {
    312 protected:
    313     SpanUTF8(const UnicodeSetPerformanceTest &testcase) : Command(testcase) {
    314         // Verify that the frozen set is equal to the unfrozen one.
    315         UnicodeSet set;
    316         char utf8[4];
    317         UChar32 c;
    318         int32_t length;
    319 
    320         for(c=0; c<=0x10ffff; ++c) {
    321             if(c==0xd800) {
    322                 c=0xe000;
    323             }
    324             length=0;
    325             U8_APPEND_UNSAFE(utf8, length, c);
    326             if(testcase.set.spanUTF8(utf8, length, USET_SPAN_CONTAINED)>0) {
    327                 set.add(c);
    328             }
    329         }
    330         if(set!=testcase.set) {
    331             fprintf(stderr, "error: frozen set != original!\n");
    332         }
    333     }
    334 public:
    335     static UPerfFunction* get(const UnicodeSetPerformanceTest &testcase) {
    336         return new SpanUTF8(testcase);
    337     }
    338     virtual void call(UErrorCode* pErrorCode) {
    339         const UnicodeSet &set=testcase.set;
    340         const char *s=testcase.utf8;
    341         int32_t length=testcase.utf8Length;
    342         int32_t count=0;
    343         int32_t i=0;
    344         UBool tf=FALSE;
    345         while(i<length) {
    346             i+=set.spanUTF8(s+i, length-i, (USetSpanCondition)tf);
    347             tf=(UBool)(!tf);
    348             ++count;
    349         }
    350         if(count!=testcase.spanCount) {
    351             fprintf(stderr, "error: SpanUTF8() count=%ld != %ld=UnicodeSetPerformanceTest.spanCount\n",
    352                     (long)count, (long)testcase.spanCount);
    353         }
    354     }
    355 };
    356 
    357 class SpanBackUTF8 : public Command {
    358 protected:
    359     SpanBackUTF8(const UnicodeSetPerformanceTest &testcase) : Command(testcase) {
    360         // Verify that the frozen set is equal to the unfrozen one.
    361         UnicodeSet set;
    362         char utf8[4];
    363         UChar32 c;
    364         int32_t length;
    365 
    366         for(c=0; c<=0x10ffff; ++c) {
    367             if(c==0xd800) {
    368                 c=0xe000;
    369             }
    370             length=0;
    371             U8_APPEND_UNSAFE(utf8, length, c);
    372             if(testcase.set.spanBackUTF8(utf8, length, USET_SPAN_CONTAINED)==0) {
    373                 set.add(c);
    374             }
    375         }
    376         if(set!=testcase.set) {
    377             fprintf(stderr, "error: frozen set != original!\n");
    378         }
    379     }
    380 public:
    381     static UPerfFunction* get(const UnicodeSetPerformanceTest &testcase) {
    382         return new SpanBackUTF8(testcase);
    383     }
    384     virtual void call(UErrorCode* pErrorCode) {
    385         const UnicodeSet &set=testcase.set;
    386         const char *s=testcase.utf8;
    387         int32_t length=testcase.utf8Length;
    388         int32_t count=0;
    389         /*
    390          * Get the same spans as with span() where we always start with a not-contained span.
    391          * If testcase.spanCount is an odd number, then the last span() was not-contained.
    392          * The last spanBack() must be not-contained to match the first span().
    393          */
    394         UBool tf=(UBool)((testcase.spanCount&1)==0);
    395         while(length>0 || !tf) {
    396             length=set.spanBackUTF8(s, length, (USetSpanCondition)tf);
    397             tf=(UBool)(!tf);
    398             ++count;
    399         }
    400         if(count!=testcase.spanCount) {
    401             fprintf(stderr, "error: SpanBackUTF8() count=%ld != %ld=UnicodeSetPerformanceTest.spanCount\n",
    402                     (long)count, (long)testcase.spanCount);
    403         }
    404     }
    405 };
    406 
    407 UPerfFunction* UnicodeSetPerformanceTest::runIndexedTest(int32_t index, UBool exec, const char* &name, char* par) {
    408     switch (index) {
    409         case 0: name = "Contains";     if (exec) return Contains::get(*this); break;
    410         case 1: name = "SpanUTF16";    if (exec) return SpanUTF16::get(*this); break;
    411         case 2: name = "SpanBackUTF16";if (exec) return SpanBackUTF16::get(*this); break;
    412         case 3: name = "SpanUTF8";     if (exec) return SpanUTF8::get(*this); break;
    413         case 4: name = "SpanBackUTF8"; if (exec) return SpanBackUTF8::get(*this); break;
    414         default: name = ""; break;
    415     }
    416     return NULL;
    417 }
    418 
    419 int main(int argc, const char *argv[])
    420 {
    421     // Default values for command-line options.
    422     options[SET_PATTERN].value = "[:ID_Continue:]";
    423     options[FAST_TYPE].value = "slow";
    424 
    425     UErrorCode status = U_ZERO_ERROR;
    426     UnicodeSetPerformanceTest test(argc, argv, status);
    427 
    428 	if (U_FAILURE(status)){
    429         printf("The error is %s\n", u_errorName(status));
    430         test.usage();
    431         return status;
    432     }
    433 
    434     if (test.run() == FALSE){
    435         fprintf(stderr, "FAILED: Tests could not be run, please check the "
    436 			            "arguments.\n");
    437         return 1;
    438     }
    439 
    440     return 0;
    441 }
    442