1 /* 2 ********************************************************************** 3 * Copyright (C) 2007, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ********************************************************************** 6 * file name: unisetperf.cpp 7 * encoding: US-ASCII 8 * tab size: 8 (not used) 9 * indentation:4 10 * 11 * created on: 2007jan31 12 * created by: Markus Scherer 13 */ 14 15 #include <stdio.h> 16 #include <stdlib.h> 17 #include <string.h> 18 #include "unicode/uperf.h" 19 #include "unicode/uniset.h" 20 #include "unicode/unistr.h" 21 #include "uoptions.h" 22 23 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) 24 25 // Command-line options specific to unisetperf. 26 // Options do not have abbreviations: Force readable command lines. 27 // (Using U+0001 for abbreviation characters.) 28 enum { 29 SET_PATTERN, 30 FAST_TYPE, 31 UNISETPERF_OPTIONS_COUNT 32 }; 33 34 static UOption options[UNISETPERF_OPTIONS_COUNT]={ 35 UOPTION_DEF("pattern", '\x01', UOPT_REQUIRES_ARG), 36 UOPTION_DEF("type", '\x01', UOPT_REQUIRES_ARG) 37 }; 38 39 static const char *const unisetperf_usage = 40 "\t--pattern UnicodeSet pattern for instantiation.\n" 41 "\t Default: [:ID_Continue:]\n" 42 "\t--type Type of UnicodeSet: slow fast\n" 43 "\t Default: slow\n"; 44 45 // Test object with setup data. 46 class UnicodeSetPerformanceTest : public UPerfTest { 47 public: 48 UnicodeSetPerformanceTest(int32_t argc, const char *argv[], UErrorCode &status) 49 : UPerfTest(argc, argv, options, LENGTHOF(options), unisetperf_usage, status), 50 utf8(NULL), utf8Length(0), countInputCodePoints(0), spanCount(0) { 51 if (U_SUCCESS(status)) { 52 UnicodeString pattern=UnicodeString(options[SET_PATTERN].value, -1, US_INV).unescape(); 53 set.applyPattern(pattern, status); 54 prefrozen=set; 55 if(0==strcmp(options[FAST_TYPE].value, "fast")) { 56 set.freeze(); 57 } 58 59 int32_t inputLength; 60 UPerfTest::getBuffer(inputLength, status); 61 if(U_SUCCESS(status) && inputLength>0) { 62 countInputCodePoints = u_countChar32(buffer, bufferLen); 63 64 countSpans(); 65 66 // Preflight the UTF-8 length and allocate utf8. 67 u_strToUTF8(NULL, 0, &utf8Length, buffer, bufferLen, &status); 68 if(status==U_BUFFER_OVERFLOW_ERROR) { 69 utf8=(char *)malloc(utf8Length); 70 if(utf8!=NULL) { 71 status=U_ZERO_ERROR; 72 u_strToUTF8(utf8, utf8Length, NULL, buffer, bufferLen, &status); 73 } else { 74 status=U_MEMORY_ALLOCATION_ERROR; 75 } 76 } 77 78 if(verbose) { 79 printf("code points:%ld len16:%ld len8:%ld spans:%ld " 80 "cp/span:%.3g UChar/span:%.3g B/span:%.3g B/cp:%.3g\n", 81 (long)countInputCodePoints, (long)bufferLen, (long)utf8Length, (long)spanCount, 82 (double)countInputCodePoints/spanCount, (double)bufferLen/spanCount, (double)utf8Length/spanCount, 83 (double)utf8Length/countInputCodePoints); 84 } 85 } 86 } 87 } 88 89 virtual UPerfFunction* runIndexedTest(int32_t index, UBool exec, const char* &name, char* par = NULL); 90 91 // Count spans of characters that are in the set, 92 // and spans of characters that are not in the set. 93 // If the very first character is in the set, then one additional 94 // not-span is counted. 95 void countSpans() { 96 const UChar *s=getBuffer(); 97 int32_t length=getBufferLen(); 98 int32_t i=0; 99 UBool tf=FALSE; 100 while(i<length) { 101 i=span(s, length, i, tf); 102 tf=(UBool)(!tf); 103 ++spanCount; 104 } 105 } 106 int32_t span(const UChar *s, int32_t length, int32_t start, UBool tf) const { 107 UChar32 c; 108 int32_t prev; 109 while((prev=start)<length) { 110 U16_NEXT(s, start, length, c); 111 if(tf!=set.contains(c)) { 112 break; 113 } 114 } 115 return prev; 116 } 117 118 const UChar *getBuffer() const { return buffer; } 119 int32_t getBufferLen() const { return bufferLen; } 120 121 char *utf8; 122 int32_t utf8Length; 123 124 // Number of code points in the input text. 125 int32_t countInputCodePoints; 126 int32_t spanCount; 127 128 UnicodeSet set; 129 UnicodeSet prefrozen; 130 }; 131 132 // Performance test function object. 133 class Command : public UPerfFunction { 134 protected: 135 Command(const UnicodeSetPerformanceTest &testcase) : testcase(testcase) {} 136 137 public: 138 virtual ~Command() {} 139 140 // virtual void call(UErrorCode* pErrorCode) { ... } 141 142 virtual long getOperationsPerIteration() { 143 // Number of code points tested: 144 // Input code points, plus one for the end of each span except the last span. 145 return testcase.countInputCodePoints+testcase.spanCount-1; 146 } 147 148 virtual long getEventsPerIteration() { 149 return testcase.spanCount; 150 } 151 152 const UnicodeSetPerformanceTest &testcase; 153 }; 154 155 class Contains : public Command { 156 protected: 157 Contains(const UnicodeSetPerformanceTest &testcase) : Command(testcase) { 158 // Verify that the frozen set is equal to the unfrozen one. 159 UnicodeSet set; 160 UChar32 c; 161 162 for(c=0; c<=0x10ffff; ++c) { 163 if(testcase.set.contains(c)) { 164 set.add(c); 165 } 166 } 167 if(set!=testcase.set) { 168 fprintf(stderr, "error: frozen set != original!\n"); 169 } 170 } 171 public: 172 static UPerfFunction* get(const UnicodeSetPerformanceTest &testcase) { 173 return new Contains(testcase); 174 } 175 virtual void call(UErrorCode* pErrorCode) { 176 const UnicodeSet &set=testcase.set; 177 const UChar *s=testcase.getBuffer(); 178 int32_t length=testcase.getBufferLen(); 179 int32_t count=0; 180 int32_t i=0; 181 UBool tf=FALSE; 182 while(i<length) { 183 i+=span(set, s+i, length-i, tf); 184 tf=(UBool)(!tf); 185 ++count; 186 } 187 if(count!=testcase.spanCount) { 188 fprintf(stderr, "error: Contains() count=%ld != %ld=UnicodeSetPerformanceTest.spanCount\n", 189 (long)count, (long)testcase.spanCount); 190 } 191 } 192 static int32_t span(const UnicodeSet &set, const UChar *s, int32_t length, UBool tf) { 193 UChar32 c; 194 int32_t start=0, prev; 195 while((prev=start)<length) { 196 U16_NEXT(s, start, length, c); 197 if(tf!=set.contains(c)) { 198 break; 199 } 200 } 201 return prev; 202 } 203 }; 204 205 class SpanUTF16 : public Command { 206 protected: 207 SpanUTF16(const UnicodeSetPerformanceTest &testcase) : Command(testcase) { 208 // Verify that the frozen set is equal to the unfrozen one. 209 UnicodeSet set; 210 UChar utf16[2]; 211 UChar32 c, c2; 212 213 for(c=0; c<=0xffff; ++c) { 214 utf16[0]=(UChar)c; 215 if(testcase.set.span(utf16, 1, USET_SPAN_CONTAINED)>0) { 216 set.add(c); 217 } 218 } 219 for(c=0xd800; c<=0xdbff; ++c) { 220 utf16[0]=(UChar)c; 221 for(c2=0xdc00; c2<=0xdfff; ++c2) { 222 utf16[1]=(UChar)c2; 223 if(testcase.set.span(utf16, 2, USET_SPAN_CONTAINED)>0) { 224 set.add(U16_GET_SUPPLEMENTARY(c, c2)); 225 } 226 } 227 } 228 229 if(set!=testcase.set) { 230 fprintf(stderr, "error: frozen set != original!\n"); 231 } 232 } 233 public: 234 static UPerfFunction* get(const UnicodeSetPerformanceTest &testcase) { 235 return new SpanUTF16(testcase); 236 } 237 virtual void call(UErrorCode* pErrorCode) { 238 const UnicodeSet &set=testcase.set; 239 const UChar *s=testcase.getBuffer(); 240 int32_t length=testcase.getBufferLen(); 241 int32_t count=0; 242 int32_t i=0; 243 UBool tf=FALSE; 244 while(i<length) { 245 i+=set.span(s+i, length-i, (USetSpanCondition)tf); 246 tf=(UBool)(!tf); 247 ++count; 248 } 249 if(count!=testcase.spanCount) { 250 fprintf(stderr, "error: SpanUTF16() count=%ld != %ld=UnicodeSetPerformanceTest.spanCount\n", 251 (long)count, (long)testcase.spanCount); 252 } 253 } 254 }; 255 256 class SpanBackUTF16 : public Command { 257 protected: 258 SpanBackUTF16(const UnicodeSetPerformanceTest &testcase) : Command(testcase) { 259 // Verify that the frozen set is equal to the unfrozen one. 260 UnicodeSet set; 261 UChar utf16[2]; 262 UChar32 c, c2; 263 264 for(c=0; c<=0xffff; ++c) { 265 utf16[0]=(UChar)c; 266 if(testcase.set.spanBack(utf16, 1, USET_SPAN_CONTAINED)==0) { 267 set.add(c); 268 } 269 } 270 for(c=0xd800; c<=0xdbff; ++c) { 271 utf16[0]=(UChar)c; 272 for(c2=0xdc00; c2<=0xdfff; ++c2) { 273 utf16[1]=(UChar)c2; 274 if(testcase.set.spanBack(utf16, 2, USET_SPAN_CONTAINED)==0) { 275 set.add(U16_GET_SUPPLEMENTARY(c, c2)); 276 } 277 } 278 } 279 280 if(set!=testcase.set) { 281 fprintf(stderr, "error: frozen set != original!\n"); 282 } 283 } 284 public: 285 static UPerfFunction* get(const UnicodeSetPerformanceTest &testcase) { 286 return new SpanBackUTF16(testcase); 287 } 288 virtual void call(UErrorCode* pErrorCode) { 289 const UnicodeSet &set=testcase.set; 290 const UChar *s=testcase.getBuffer(); 291 int32_t length=testcase.getBufferLen(); 292 int32_t count=0; 293 /* 294 * Get the same spans as with span() where we always start with a not-contained span. 295 * If testcase.spanCount is an odd number, then the last span() was not-contained. 296 * The last spanBack() must be not-contained to match the first span(). 297 */ 298 UBool tf=(UBool)((testcase.spanCount&1)==0); 299 while(length>0 || !tf) { 300 length=set.spanBack(s, length, (USetSpanCondition)tf); 301 tf=(UBool)(!tf); 302 ++count; 303 } 304 if(count!=testcase.spanCount) { 305 fprintf(stderr, "error: SpanBackUTF16() count=%ld != %ld=UnicodeSetPerformanceTest.spanCount\n", 306 (long)count, (long)testcase.spanCount); 307 } 308 } 309 }; 310 311 class SpanUTF8 : public Command { 312 protected: 313 SpanUTF8(const UnicodeSetPerformanceTest &testcase) : Command(testcase) { 314 // Verify that the frozen set is equal to the unfrozen one. 315 UnicodeSet set; 316 char utf8[4]; 317 UChar32 c; 318 int32_t length; 319 320 for(c=0; c<=0x10ffff; ++c) { 321 if(c==0xd800) { 322 c=0xe000; 323 } 324 length=0; 325 U8_APPEND_UNSAFE(utf8, length, c); 326 if(testcase.set.spanUTF8(utf8, length, USET_SPAN_CONTAINED)>0) { 327 set.add(c); 328 } 329 } 330 if(set!=testcase.set) { 331 fprintf(stderr, "error: frozen set != original!\n"); 332 } 333 } 334 public: 335 static UPerfFunction* get(const UnicodeSetPerformanceTest &testcase) { 336 return new SpanUTF8(testcase); 337 } 338 virtual void call(UErrorCode* pErrorCode) { 339 const UnicodeSet &set=testcase.set; 340 const char *s=testcase.utf8; 341 int32_t length=testcase.utf8Length; 342 int32_t count=0; 343 int32_t i=0; 344 UBool tf=FALSE; 345 while(i<length) { 346 i+=set.spanUTF8(s+i, length-i, (USetSpanCondition)tf); 347 tf=(UBool)(!tf); 348 ++count; 349 } 350 if(count!=testcase.spanCount) { 351 fprintf(stderr, "error: SpanUTF8() count=%ld != %ld=UnicodeSetPerformanceTest.spanCount\n", 352 (long)count, (long)testcase.spanCount); 353 } 354 } 355 }; 356 357 class SpanBackUTF8 : public Command { 358 protected: 359 SpanBackUTF8(const UnicodeSetPerformanceTest &testcase) : Command(testcase) { 360 // Verify that the frozen set is equal to the unfrozen one. 361 UnicodeSet set; 362 char utf8[4]; 363 UChar32 c; 364 int32_t length; 365 366 for(c=0; c<=0x10ffff; ++c) { 367 if(c==0xd800) { 368 c=0xe000; 369 } 370 length=0; 371 U8_APPEND_UNSAFE(utf8, length, c); 372 if(testcase.set.spanBackUTF8(utf8, length, USET_SPAN_CONTAINED)==0) { 373 set.add(c); 374 } 375 } 376 if(set!=testcase.set) { 377 fprintf(stderr, "error: frozen set != original!\n"); 378 } 379 } 380 public: 381 static UPerfFunction* get(const UnicodeSetPerformanceTest &testcase) { 382 return new SpanBackUTF8(testcase); 383 } 384 virtual void call(UErrorCode* pErrorCode) { 385 const UnicodeSet &set=testcase.set; 386 const char *s=testcase.utf8; 387 int32_t length=testcase.utf8Length; 388 int32_t count=0; 389 /* 390 * Get the same spans as with span() where we always start with a not-contained span. 391 * If testcase.spanCount is an odd number, then the last span() was not-contained. 392 * The last spanBack() must be not-contained to match the first span(). 393 */ 394 UBool tf=(UBool)((testcase.spanCount&1)==0); 395 while(length>0 || !tf) { 396 length=set.spanBackUTF8(s, length, (USetSpanCondition)tf); 397 tf=(UBool)(!tf); 398 ++count; 399 } 400 if(count!=testcase.spanCount) { 401 fprintf(stderr, "error: SpanBackUTF8() count=%ld != %ld=UnicodeSetPerformanceTest.spanCount\n", 402 (long)count, (long)testcase.spanCount); 403 } 404 } 405 }; 406 407 UPerfFunction* UnicodeSetPerformanceTest::runIndexedTest(int32_t index, UBool exec, const char* &name, char* par) { 408 switch (index) { 409 case 0: name = "Contains"; if (exec) return Contains::get(*this); break; 410 case 1: name = "SpanUTF16"; if (exec) return SpanUTF16::get(*this); break; 411 case 2: name = "SpanBackUTF16";if (exec) return SpanBackUTF16::get(*this); break; 412 case 3: name = "SpanUTF8"; if (exec) return SpanUTF8::get(*this); break; 413 case 4: name = "SpanBackUTF8"; if (exec) return SpanBackUTF8::get(*this); break; 414 default: name = ""; break; 415 } 416 return NULL; 417 } 418 419 int main(int argc, const char *argv[]) 420 { 421 // Default values for command-line options. 422 options[SET_PATTERN].value = "[:ID_Continue:]"; 423 options[FAST_TYPE].value = "slow"; 424 425 UErrorCode status = U_ZERO_ERROR; 426 UnicodeSetPerformanceTest test(argc, argv, status); 427 428 if (U_FAILURE(status)){ 429 printf("The error is %s\n", u_errorName(status)); 430 test.usage(); 431 return status; 432 } 433 434 if (test.run() == FALSE){ 435 fprintf(stderr, "FAILED: Tests could not be run, please check the " 436 "arguments.\n"); 437 return 1; 438 } 439 440 return 0; 441 } 442