Home | History | Annotate | Download | only in gencolusb
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /**
      4  * Copyright (c) 1999-2016, International Business Machines Corporation and
      5  * others. All Rights Reserved.
      6  *
      7  * Generator for source/i18n/collunsafe.h
      8  * see Makefile
      9  */
     10 
     11 #include <stdio.h>
     12 #include "unicode/uversion.h"
     13 #include "unicode/uniset.h"
     14 #include "collationroot.h"
     15 #include "collationtailoring.h"
     16 
     17 /**
     18  * Define the type of generator to use. Choose one.
     19  */
     20 #define SERIALIZE 1   //< Default: use UnicodeSet.serialize() and a new internal c'tor
     21 #define RANGES 0      //< Enumerate ranges (works, not as fast. No support in collationdatareader.cpp)
     22 #define PATTERN 0     //< Generate a UnicodeSet pattern (depends on #11891 AND probably slower. No support in collationdatareader.cpp)
     23 
     24 int main(int argc, const char *argv[]) {
     25     UErrorCode errorCode = U_ZERO_ERROR;
     26 
     27     // Get the unsafeBackwardsSet
     28     const CollationCacheEntry *rootEntry = CollationRoot::getRootCacheEntry(errorCode);
     29     if(U_FAILURE(errorCode)) {
     30       fprintf(stderr, "Err: %s getting root cache entry\n", u_errorName(errorCode));
     31       return 1;
     32     }
     33     const UVersionInfo &version = rootEntry->tailoring->version;
     34     const UnicodeSet *unsafeBackwardSet = rootEntry->tailoring->unsafeBackwardSet;
     35     char verString[20];
     36     u_versionToString(version, verString);
     37     fprintf(stderr, "Generating data for ICU %s, Collation %s\n", U_ICU_VERSION, verString);
     38     int32_t rangeCount = unsafeBackwardSet->getRangeCount();
     39 
     40 #if SERIALIZE
     41     fprintf(stderr, ".. serializing\n");
     42     // UnicodeSet serialization
     43 
     44     UErrorCode preflightCode = U_ZERO_ERROR;
     45     // preflight
     46     int32_t serializedCount = unsafeBackwardSet->serialize(NULL,0,preflightCode);
     47     if(U_FAILURE(preflightCode) && preflightCode != U_BUFFER_OVERFLOW_ERROR) {
     48       fprintf(stderr, "Err: %s preflighting unicode set\n", u_errorName(preflightCode));
     49       return 1;
     50     }
     51     uint16_t *serializedData = new uint16_t[serializedCount];
     52     // serialize
     53     unsafeBackwardSet->serialize(serializedData, serializedCount, errorCode);
     54     if(U_FAILURE(errorCode)) {
     55       delete [] serializedData;
     56       fprintf(stderr, "Err: %s serializing unicodeset\n", u_errorName(errorCode));
     57       return 1;
     58     }
     59 #endif
     60 
     61 #if PATTERN
     62     fprintf(stderr,".. pattern. (Note: collationdatareader.cpp does not support this form also see #11891)\n");
     63     // attempt to use pattern
     64 
     65     UnicodeString pattern;
     66     UnicodeSet set(*unsafeBackwardSet);
     67     set.compact();
     68     set.toPattern(pattern, FALSE);
     69 
     70     if(U_SUCCESS(errorCode)) {
     71       // This fails (bug# ?) - which is why this method was abandoned.
     72 
     73       // UnicodeSet usA(pattern, errorCode);
     74       // fprintf(stderr, "\n%s:%d: err creating set A %s\n", __FILE__, __LINE__, u_errorName(errorCode));
     75       // return 1;
     76     }
     77 
     78 
     79     const UChar *buf = pattern.getBuffer();
     80     int32_t needed = pattern.length();
     81 
     82     // print
     83     {
     84       char buf2[2048];
     85       int32_t len2 = pattern.extract(0, pattern.length(), buf2, "utf-8");
     86       buf2[len2]=0;
     87       fprintf(stderr,"===\n%s\n===\n", buf2);
     88     }
     89 
     90     const UnicodeString unsafeBackwardPattern(FALSE, buf, needed);
     91   if(U_SUCCESS(errorCode)) {
     92     //UnicodeSet us(unsafeBackwardPattern, errorCode);
     93     //    fprintf(stderr, "\n%s:%d: err creating set %s\n", __FILE__, __LINE__, u_errorName(errorCode));
     94   } else {
     95     fprintf(stderr, "Uset OK - \n");
     96   }
     97 #endif
     98 
     99 
    100   // Generate the output file.
    101 
    102   printf("// collunsafe.h\n");
    103   printf("// %s\n", U_COPYRIGHT_STRING);
    104   printf("\n");
    105   printf("// To be included by collationdatareader.cpp, and generated by gencolusb.\n");
    106   printf("// Machine generated, do not edit.\n");
    107   printf("\n");
    108   printf("#ifndef COLLUNSAFE_H\n"
    109          "#define COLLUNSAFE_H\n"
    110          "\n"
    111          "#include \"unicode/utypes.h\"\n"
    112          "\n"
    113          "#define COLLUNSAFE_ICU_VERSION \"" U_ICU_VERSION "\"\n");
    114   printf("#define COLLUNSAFE_COLL_VERSION \"%s\"\n", verString);
    115 
    116 
    117 
    118 #if PATTERN
    119   printf("#define COLLUNSAFE_PATTERN 1\n");
    120   printf("static const int32_t collunsafe_len = %d;\n", needed);
    121   printf("static const UChar collunsafe_pattern[collunsafe_len] = {\n");
    122   for(int i=0;i<needed;i++) {
    123     if( (i>0) && (i%8 == 0) ) {
    124       printf(" // %d\n", i);
    125     }
    126     printf("0x%04X", buf[i]); // TODO check
    127     if(i != (needed-1)) {
    128       printf(", ");
    129     }
    130     }
    131   printf(" //%d\n};\n", (needed-1));
    132 #endif
    133 
    134 #if RANGE
    135     fprintf(stderr, "COLLUNSAFE_RANGE - no code support in collationdatareader.cpp for this\n");
    136     printf("#define COLLUNSAFE_RANGE 1\n");
    137     printf("static const int32_t unsafe_rangeCount = %d;\n", rangeCount);
    138     printf("static const UChar32 unsafe_ranges[%d] = { \n", rangeCount*2);
    139     for(int32_t i=0;i<rangeCount;i++) {
    140       printf(" 0x%04X, 0x%04X, // %d\n",
    141              unsafeBackwardSet->getRangeStart(i),
    142              unsafeBackwardSet->getRangeEnd(i),
    143              i);
    144     }
    145     printf("};\n");
    146 #endif
    147 
    148 #if SERIALIZE
    149     printf("#define COLLUNSAFE_SERIALIZE 1\n");
    150     printf("static const int32_t unsafe_serializedCount = %d;\n", serializedCount);
    151     printf("static const uint16_t unsafe_serializedData[%d] = { \n", serializedCount);
    152     for(int32_t i=0;i<serializedCount;i++) {
    153       if( (i>0) && (i%8 == 0) ) {
    154         printf(" // %d\n", i);
    155       }
    156       printf("0x%04X", serializedData[i]); // TODO check
    157       if(i != (serializedCount-1)) {
    158         printf(", ");
    159       }
    160     }
    161     printf("};\n");
    162 #endif
    163 
    164     printf("#endif\n");
    165     fflush(stderr);
    166     fflush(stdout);
    167     return(U_SUCCESS(errorCode)?0:1);
    168 }
    169