Home | History | Annotate | Download | only in unisetperf
      1 /*
      2 **********************************************************************
      3 *   Copyright (C) 2014, International Business Machines
      4 *   Corporation and others.  All Rights Reserved.
      5 **********************************************************************
      6 *   file name:  unisetperf.cpp
      7 *   encoding:   US-ASCII
      8 *   tab size:   8 (not used)
      9 *   indentation:4
     10 *
     11 *   created on: 2007jan31
     12 *   created by: Markus Scherer
     13 */
     14 
     15 #include <stdio.h>
     16 #include <stdlib.h>
     17 #include <string.h>
     18 #include "unicode/uperf.h"
     19 #include "unicode/uniset.h"
     20 #include "unicode/unistr.h"
     21 #include "uoptions.h"
     22 #include "cmemory.h" // for UPRV_LENGTHOF
     23 
     24 // Command-line options specific to unisetperf.
     25 // Options do not have abbreviations: Force readable command lines.
     26 // (Using U+0001 for abbreviation characters.)
     27 enum {
     28     SET_PATTERN,
     29     FAST_TYPE,
     30     UNISETPERF_OPTIONS_COUNT
     31 };
     32 
     33 static UOption options[UNISETPERF_OPTIONS_COUNT]={
     34     UOPTION_DEF("pattern", '\x01', UOPT_REQUIRES_ARG),
     35     UOPTION_DEF("type",    '\x01', UOPT_REQUIRES_ARG)
     36 };
     37 
     38 static const char *const unisetperf_usage =
     39     "\t--pattern   UnicodeSet pattern for instantiation.\n"
     40     "\t            Default: [:ID_Continue:]\n"
     41     "\t--type      Type of UnicodeSet: slow fast\n"
     42     "\t            Default: slow\n";
     43 
     44 // Test object with setup data.
     45 class UnicodeSetPerformanceTest : public UPerfTest {
     46 public:
     47     UnicodeSetPerformanceTest(int32_t argc, const char *argv[], UErrorCode &status)
     48             : UPerfTest(argc, argv, options, UPRV_LENGTHOF(options), unisetperf_usage, status),
     49               utf8(NULL), utf8Length(0), countInputCodePoints(0), spanCount(0) {
     50         if (U_SUCCESS(status)) {
     51             UnicodeString pattern=UnicodeString(options[SET_PATTERN].value, -1, US_INV).unescape();
     52             set.applyPattern(pattern, status);
     53             prefrozen=set;
     54             if(0==strcmp(options[FAST_TYPE].value, "fast")) {
     55                 set.freeze();
     56             }
     57 
     58             int32_t inputLength;
     59             UPerfTest::getBuffer(inputLength, status);
     60             if(U_SUCCESS(status) && inputLength>0) {
     61                 countInputCodePoints = u_countChar32(buffer, bufferLen);
     62 
     63                 countSpans();
     64 
     65                 // Preflight the UTF-8 length and allocate utf8.
     66                 u_strToUTF8(NULL, 0, &utf8Length, buffer, bufferLen, &status);
     67                 if(status==U_BUFFER_OVERFLOW_ERROR) {
     68                     utf8=(char *)malloc(utf8Length);
     69                     if(utf8!=NULL) {
     70                         status=U_ZERO_ERROR;
     71                         u_strToUTF8(utf8, utf8Length, NULL, buffer, bufferLen, &status);
     72                     } else {
     73                         status=U_MEMORY_ALLOCATION_ERROR;
     74                     }
     75                 }
     76 
     77                 if(verbose) {
     78                     printf("code points:%ld  len16:%ld  len8:%ld  spans:%ld  "
     79                            "cp/span:%.3g  UChar/span:%.3g  B/span:%.3g  B/cp:%.3g\n",
     80                            (long)countInputCodePoints, (long)bufferLen, (long)utf8Length, (long)spanCount,
     81                            (double)countInputCodePoints/spanCount, (double)bufferLen/spanCount, (double)utf8Length/spanCount,
     82                            (double)utf8Length/countInputCodePoints);
     83                 }
     84             }
     85         }
     86     }
     87 
     88     virtual UPerfFunction* runIndexedTest(int32_t index, UBool exec, const char* &name, char* par = NULL);
     89 
     90     // Count spans of characters that are in the set,
     91     // and spans of characters that are not in the set.
     92     // If the very first character is in the set, then one additional
     93     // not-span is counted.
     94     void countSpans() {
     95         const UChar *s=getBuffer();
     96         int32_t length=getBufferLen();
     97         int32_t i=0;
     98         UBool tf=FALSE;
     99         while(i<length) {
    100             i=span(s, length, i, tf);
    101             tf=(UBool)(!tf);
    102             ++spanCount;
    103         }
    104     }
    105     int32_t span(const UChar *s, int32_t length, int32_t start, UBool tf) const {
    106         UChar32 c;
    107         int32_t prev;
    108         while((prev=start)<length) {
    109             U16_NEXT(s, start, length, c);
    110             if(tf!=set.contains(c)) {
    111                 break;
    112             }
    113         }
    114         return prev;
    115     }
    116 
    117     const UChar *getBuffer() const { return buffer; }
    118     int32_t getBufferLen() const { return bufferLen; }
    119 
    120     char *utf8;
    121     int32_t utf8Length;
    122 
    123     // Number of code points in the input text.
    124     int32_t countInputCodePoints;
    125     int32_t spanCount;
    126 
    127     UnicodeSet set;
    128     UnicodeSet prefrozen;
    129 };
    130 
    131 // Performance test function object.
    132 class Command : public UPerfFunction {
    133 protected:
    134     Command(const UnicodeSetPerformanceTest &testcase) : testcase(testcase) {}
    135 
    136 public:
    137     virtual ~Command() {}
    138 
    139     // virtual void call(UErrorCode* pErrorCode) { ... }
    140 
    141     virtual long getOperationsPerIteration() {
    142         // Number of code points tested:
    143         // Input code points, plus one for the end of each span except the last span.
    144         return testcase.countInputCodePoints+testcase.spanCount-1;
    145     }
    146 
    147     virtual long getEventsPerIteration() {
    148         return testcase.spanCount;
    149     }
    150 
    151     const UnicodeSetPerformanceTest &testcase;
    152 };
    153 
    154 class Contains : public Command {
    155 protected:
    156     Contains(const UnicodeSetPerformanceTest &testcase) : Command(testcase) {
    157         // Verify that the frozen set is equal to the unfrozen one.
    158         UnicodeSet set;
    159         UChar32 c;
    160 
    161         for(c=0; c<=0x10ffff; ++c) {
    162             if(testcase.set.contains(c)) {
    163                 set.add(c);
    164             }
    165         }
    166         if(set!=testcase.set) {
    167             fprintf(stderr, "error: frozen set != original!\n");
    168         }
    169     }
    170 public:
    171     static UPerfFunction* get(const UnicodeSetPerformanceTest &testcase) {
    172         return new Contains(testcase);
    173     }
    174     virtual void call(UErrorCode* pErrorCode) {
    175         const UnicodeSet &set=testcase.set;
    176         const UChar *s=testcase.getBuffer();
    177         int32_t length=testcase.getBufferLen();
    178         int32_t count=0;
    179         int32_t i=0;
    180         UBool tf=FALSE;
    181         while(i<length) {
    182             i+=span(set, s+i, length-i, tf);
    183             tf=(UBool)(!tf);
    184             ++count;
    185         }
    186         if(count!=testcase.spanCount) {
    187             fprintf(stderr, "error: Contains() count=%ld != %ld=UnicodeSetPerformanceTest.spanCount\n",
    188                     (long)count, (long)testcase.spanCount);
    189         }
    190     }
    191     static int32_t span(const UnicodeSet &set, const UChar *s, int32_t length, UBool tf) {
    192         UChar32 c;
    193         int32_t start=0, prev;
    194         while((prev=start)<length) {
    195             U16_NEXT(s, start, length, c);
    196             if(tf!=set.contains(c)) {
    197                 break;
    198             }
    199         }
    200         return prev;
    201     }
    202 };
    203 
    204 class SpanUTF16 : public Command {
    205 protected:
    206     SpanUTF16(const UnicodeSetPerformanceTest &testcase) : Command(testcase) {
    207         // Verify that the frozen set is equal to the unfrozen one.
    208         UnicodeSet set;
    209         UChar utf16[2];
    210         UChar32 c, c2;
    211 
    212         for(c=0; c<=0xffff; ++c) {
    213             utf16[0]=(UChar)c;
    214             if(testcase.set.span(utf16, 1, USET_SPAN_CONTAINED)>0) {
    215                 set.add(c);
    216             }
    217         }
    218         for(c=0xd800; c<=0xdbff; ++c) {
    219             utf16[0]=(UChar)c;
    220             for(c2=0xdc00; c2<=0xdfff; ++c2) {
    221                 utf16[1]=(UChar)c2;
    222                 if(testcase.set.span(utf16, 2, USET_SPAN_CONTAINED)>0) {
    223                     set.add(U16_GET_SUPPLEMENTARY(c, c2));
    224                 }
    225             }
    226         }
    227 
    228         if(set!=testcase.set) {
    229             fprintf(stderr, "error: frozen set != original!\n");
    230         }
    231     }
    232 public:
    233     static UPerfFunction* get(const UnicodeSetPerformanceTest &testcase) {
    234         return new SpanUTF16(testcase);
    235     }
    236     virtual void call(UErrorCode* pErrorCode) {
    237         const UnicodeSet &set=testcase.set;
    238         const UChar *s=testcase.getBuffer();
    239         int32_t length=testcase.getBufferLen();
    240         int32_t count=0;
    241         int32_t i=0;
    242         UBool tf=FALSE;
    243         while(i<length) {
    244             i+=set.span(s+i, length-i, (USetSpanCondition)tf);
    245             tf=(UBool)(!tf);
    246             ++count;
    247         }
    248         if(count!=testcase.spanCount) {
    249             fprintf(stderr, "error: SpanUTF16() count=%ld != %ld=UnicodeSetPerformanceTest.spanCount\n",
    250                     (long)count, (long)testcase.spanCount);
    251         }
    252     }
    253 };
    254 
    255 class SpanBackUTF16 : public Command {
    256 protected:
    257     SpanBackUTF16(const UnicodeSetPerformanceTest &testcase) : Command(testcase) {
    258         // Verify that the frozen set is equal to the unfrozen one.
    259         UnicodeSet set;
    260         UChar utf16[2];
    261         UChar32 c, c2;
    262 
    263         for(c=0; c<=0xffff; ++c) {
    264             utf16[0]=(UChar)c;
    265             if(testcase.set.spanBack(utf16, 1, USET_SPAN_CONTAINED)==0) {
    266                 set.add(c);
    267             }
    268         }
    269         for(c=0xd800; c<=0xdbff; ++c) {
    270             utf16[0]=(UChar)c;
    271             for(c2=0xdc00; c2<=0xdfff; ++c2) {
    272                 utf16[1]=(UChar)c2;
    273                 if(testcase.set.spanBack(utf16, 2, USET_SPAN_CONTAINED)==0) {
    274                     set.add(U16_GET_SUPPLEMENTARY(c, c2));
    275                 }
    276             }
    277         }
    278 
    279         if(set!=testcase.set) {
    280             fprintf(stderr, "error: frozen set != original!\n");
    281         }
    282     }
    283 public:
    284     static UPerfFunction* get(const UnicodeSetPerformanceTest &testcase) {
    285         return new SpanBackUTF16(testcase);
    286     }
    287     virtual void call(UErrorCode* pErrorCode) {
    288         const UnicodeSet &set=testcase.set;
    289         const UChar *s=testcase.getBuffer();
    290         int32_t length=testcase.getBufferLen();
    291         int32_t count=0;
    292         /*
    293          * Get the same spans as with span() where we always start with a not-contained span.
    294          * If testcase.spanCount is an odd number, then the last span() was not-contained.
    295          * The last spanBack() must be not-contained to match the first span().
    296          */
    297         UBool tf=(UBool)((testcase.spanCount&1)==0);
    298         while(length>0 || !tf) {
    299             length=set.spanBack(s, length, (USetSpanCondition)tf);
    300             tf=(UBool)(!tf);
    301             ++count;
    302         }
    303         if(count!=testcase.spanCount) {
    304             fprintf(stderr, "error: SpanBackUTF16() count=%ld != %ld=UnicodeSetPerformanceTest.spanCount\n",
    305                     (long)count, (long)testcase.spanCount);
    306         }
    307     }
    308 };
    309 
    310 class SpanUTF8 : public Command {
    311 protected:
    312     SpanUTF8(const UnicodeSetPerformanceTest &testcase) : Command(testcase) {
    313         // Verify that the frozen set is equal to the unfrozen one.
    314         UnicodeSet set;
    315         char utf8[4];
    316         UChar32 c;
    317         int32_t length;
    318 
    319         for(c=0; c<=0x10ffff; ++c) {
    320             if(c==0xd800) {
    321                 c=0xe000;
    322             }
    323             length=0;
    324             U8_APPEND_UNSAFE(utf8, length, c);
    325             if(testcase.set.spanUTF8(utf8, length, USET_SPAN_CONTAINED)>0) {
    326                 set.add(c);
    327             }
    328         }
    329         if(set!=testcase.set) {
    330             fprintf(stderr, "error: frozen set != original!\n");
    331         }
    332     }
    333 public:
    334     static UPerfFunction* get(const UnicodeSetPerformanceTest &testcase) {
    335         return new SpanUTF8(testcase);
    336     }
    337     virtual void call(UErrorCode* pErrorCode) {
    338         const UnicodeSet &set=testcase.set;
    339         const char *s=testcase.utf8;
    340         int32_t length=testcase.utf8Length;
    341         int32_t count=0;
    342         int32_t i=0;
    343         UBool tf=FALSE;
    344         while(i<length) {
    345             i+=set.spanUTF8(s+i, length-i, (USetSpanCondition)tf);
    346             tf=(UBool)(!tf);
    347             ++count;
    348         }
    349         if(count!=testcase.spanCount) {
    350             fprintf(stderr, "error: SpanUTF8() count=%ld != %ld=UnicodeSetPerformanceTest.spanCount\n",
    351                     (long)count, (long)testcase.spanCount);
    352         }
    353     }
    354 };
    355 
    356 class SpanBackUTF8 : public Command {
    357 protected:
    358     SpanBackUTF8(const UnicodeSetPerformanceTest &testcase) : Command(testcase) {
    359         // Verify that the frozen set is equal to the unfrozen one.
    360         UnicodeSet set;
    361         char utf8[4];
    362         UChar32 c;
    363         int32_t length;
    364 
    365         for(c=0; c<=0x10ffff; ++c) {
    366             if(c==0xd800) {
    367                 c=0xe000;
    368             }
    369             length=0;
    370             U8_APPEND_UNSAFE(utf8, length, c);
    371             if(testcase.set.spanBackUTF8(utf8, length, USET_SPAN_CONTAINED)==0) {
    372                 set.add(c);
    373             }
    374         }
    375         if(set!=testcase.set) {
    376             fprintf(stderr, "error: frozen set != original!\n");
    377         }
    378     }
    379 public:
    380     static UPerfFunction* get(const UnicodeSetPerformanceTest &testcase) {
    381         return new SpanBackUTF8(testcase);
    382     }
    383     virtual void call(UErrorCode* pErrorCode) {
    384         const UnicodeSet &set=testcase.set;
    385         const char *s=testcase.utf8;
    386         int32_t length=testcase.utf8Length;
    387         int32_t count=0;
    388         /*
    389          * Get the same spans as with span() where we always start with a not-contained span.
    390          * If testcase.spanCount is an odd number, then the last span() was not-contained.
    391          * The last spanBack() must be not-contained to match the first span().
    392          */
    393         UBool tf=(UBool)((testcase.spanCount&1)==0);
    394         while(length>0 || !tf) {
    395             length=set.spanBackUTF8(s, length, (USetSpanCondition)tf);
    396             tf=(UBool)(!tf);
    397             ++count;
    398         }
    399         if(count!=testcase.spanCount) {
    400             fprintf(stderr, "error: SpanBackUTF8() count=%ld != %ld=UnicodeSetPerformanceTest.spanCount\n",
    401                     (long)count, (long)testcase.spanCount);
    402         }
    403     }
    404 };
    405 
    406 UPerfFunction* UnicodeSetPerformanceTest::runIndexedTest(int32_t index, UBool exec, const char* &name, char* par) {
    407     switch (index) {
    408         case 0: name = "Contains";     if (exec) return Contains::get(*this); break;
    409         case 1: name = "SpanUTF16";    if (exec) return SpanUTF16::get(*this); break;
    410         case 2: name = "SpanBackUTF16";if (exec) return SpanBackUTF16::get(*this); break;
    411         case 3: name = "SpanUTF8";     if (exec) return SpanUTF8::get(*this); break;
    412         case 4: name = "SpanBackUTF8"; if (exec) return SpanBackUTF8::get(*this); break;
    413         default: name = ""; break;
    414     }
    415     return NULL;
    416 }
    417 
    418 int main(int argc, const char *argv[])
    419 {
    420     // Default values for command-line options.
    421     options[SET_PATTERN].value = "[:ID_Continue:]";
    422     options[FAST_TYPE].value = "slow";
    423 
    424     UErrorCode status = U_ZERO_ERROR;
    425     UnicodeSetPerformanceTest test(argc, argv, status);
    426 
    427 	if (U_FAILURE(status)){
    428         printf("The error is %s\n", u_errorName(status));
    429         test.usage();
    430         return status;
    431     }
    432 
    433     if (test.run() == FALSE){
    434         fprintf(stderr, "FAILED: Tests could not be run, please check the "
    435 			            "arguments.\n");
    436         return 1;
    437     }
    438 
    439     return 0;
    440 }
    441