Home | History | Annotate | Download | only in genpname
      1 /*
      2 **********************************************************************
      3 *   Copyright (C) 2002-2010, International Business Machines
      4 *   Corporation and others.  All Rights Reserved.
      5 **********************************************************************
      6 *   Date        Name        Description
      7 *   10/11/02    aliu        Creation.
      8 **********************************************************************
      9 */
     10 
     11 #include "unicode/utypes.h"
     12 #include "unicode/putil.h"
     13 #include "unicode/uclean.h"
     14 #include "cmemory.h"
     15 #include "cstring.h"
     16 #include "filestrm.h"
     17 #include "uarrsort.h"
     18 #include "unewdata.h"
     19 #include "uoptions.h"
     20 #include "uprops.h"
     21 #include "propname.h"
     22 #include "uassert.h"
     23 
     24 #include <stdio.h>
     25 
     26 U_NAMESPACE_USE
     27 
     28 // TODO: Clean up and comment this code.
     29 
     30 //----------------------------------------------------------------------
     31 // BEGIN DATA
     32 //
     33 // This is the raw data to be output.  We define the data structure,
     34 // then include a machine-generated header that contains the actual
     35 // data.
     36 
     37 #include "unicode/uchar.h"
     38 #include "unicode/uscript.h"
     39 #include "unicode/unorm.h"
     40 #include "unicode/unorm2.h"
     41 
     42 class AliasName {
     43 public:
     44     const char* str;
     45     int32_t     index;
     46 
     47     AliasName(const char* str, int32_t index);
     48 
     49     int compare(const AliasName& other) const;
     50 
     51     UBool operator==(const AliasName& other) const {
     52         return compare(other) == 0;
     53     }
     54 
     55     UBool operator!=(const AliasName& other) const {
     56         return compare(other) != 0;
     57     }
     58 };
     59 
     60 AliasName::AliasName(const char* _str,
     61                int32_t _index) :
     62     str(_str),
     63     index(_index)
     64 {
     65 }
     66 
     67 int AliasName::compare(const AliasName& other) const {
     68     return uprv_comparePropertyNames(str, other.str);
     69 }
     70 
     71 class Alias {
     72 public:
     73     int32_t     enumValue;
     74     int32_t     nameGroupIndex;
     75 
     76     Alias(int32_t enumValue,
     77              int32_t nameGroupIndex);
     78 
     79     int32_t getUniqueNames(int32_t* nameGroupIndices) const;
     80 };
     81 
     82 Alias::Alias(int32_t anEnumValue,
     83                    int32_t aNameGroupIndex) :
     84     enumValue(anEnumValue),
     85     nameGroupIndex(aNameGroupIndex)
     86 {
     87 }
     88 
     89 class Property : public Alias {
     90 public:
     91     int32_t         valueCount;
     92     const Alias* valueList;
     93 
     94     Property(int32_t enumValue,
     95                        int32_t nameGroupIndex,
     96                        int32_t valueCount,
     97                        const Alias* valueList);
     98 };
     99 
    100 Property::Property(int32_t _enumValue,
    101                                        int32_t _nameGroupIndex,
    102                                        int32_t _valueCount,
    103                                        const Alias* _valueList) :
    104     Alias(_enumValue, _nameGroupIndex),
    105     valueCount(_valueCount),
    106     valueList(_valueList)
    107 {
    108 }
    109 
    110 // *** Include the data header ***
    111 #include "data.h"
    112 
    113 /* return a list of unique names, not including "", for this property
    114  * @param stringIndices array of at least MAX_NAMES_PER_GROUP
    115  * elements, will be filled with indices into STRING_TABLE
    116  * @return number of indices, >= 1
    117  */
    118 int32_t Alias::getUniqueNames(int32_t* stringIndices) const {
    119     int32_t count = 0;
    120     int32_t i = nameGroupIndex;
    121     UBool done = FALSE;
    122     while (!done) {
    123         int32_t j = NAME_GROUP[i++];
    124         if (j < 0) {
    125             done = TRUE;
    126             j = -j;
    127         }
    128         if (j == 0) continue; // omit "" entries
    129         UBool dupe = FALSE;
    130         for (int32_t k=0; k<count; ++k) {
    131             if (stringIndices[k] == j) {
    132                 dupe = TRUE;
    133                 break;
    134             }
    135             // also do a string check for things like "age|Age"
    136             if (STRING_TABLE[stringIndices[k]] == STRING_TABLE[j]) {
    137                 //printf("Found dupe %s|%s\n",
    138                 //       STRING_TABLE[stringIndices[k]].str,
    139                 //       STRING_TABLE[j].str);
    140                 dupe = TRUE;
    141                 break;
    142             }
    143         }
    144         if (dupe) continue; // omit duplicates
    145         stringIndices[count++] = j;
    146     }
    147     return count;
    148 }
    149 
    150 // END DATA
    151 //----------------------------------------------------------------------
    152 
    153 #define MALLOC(type, count) \
    154   (type*) uprv_malloc(sizeof(type) * count)
    155 
    156 void die(const char* msg) {
    157     fprintf(stderr, "Error: %s\n", msg);
    158     exit(1);
    159 }
    160 
    161 //----------------------------------------------------------------------
    162 
    163 /**
    164  * A list of Alias objects.
    165  */
    166 class AliasList {
    167 public:
    168     virtual ~AliasList();
    169     virtual const Alias& operator[](int32_t i) const = 0;
    170     virtual int32_t count() const = 0;
    171 };
    172 
    173 AliasList::~AliasList() {}
    174 
    175 /**
    176  * A single array.
    177  */
    178 class AliasArrayList : public AliasList {
    179     const Alias* a;
    180     int32_t n;
    181 public:
    182     AliasArrayList(const Alias* _a, int32_t _n) {
    183         a = _a;
    184         n = _n;
    185     }
    186     virtual const Alias& operator[](int32_t i) const {
    187         return a[i];
    188     }
    189     virtual int32_t count() const {
    190         return n;
    191     }
    192 };
    193 
    194 /**
    195  * A single array.
    196  */
    197 class PropertyArrayList : public AliasList {
    198     const Property* a;
    199     int32_t n;
    200 public:
    201     PropertyArrayList(const Property* _a, int32_t _n) {
    202         a = _a;
    203         n = _n;
    204     }
    205     virtual const Alias& operator[](int32_t i) const {
    206         return a[i];
    207     }
    208     virtual int32_t count() const {
    209         return n;
    210     }
    211 };
    212 
    213 //----------------------------------------------------------------------
    214 
    215 /**
    216  * An element in a name index.  It maps a name (given by index) into
    217  * an enum value.
    218  */
    219 class NameToEnumEntry {
    220 public:
    221     int32_t nameIndex;
    222     int32_t enumValue;
    223     NameToEnumEntry(int32_t a, int32_t b) { nameIndex=a; enumValue=b; }
    224 };
    225 
    226 // Sort function for NameToEnumEntry (sort by name)
    227 U_CFUNC int32_t
    228 compareNameToEnumEntry(const void * /*context*/, const void* e1, const void* e2) {
    229     return
    230         STRING_TABLE[((NameToEnumEntry*)e1)->nameIndex].
    231             compare(STRING_TABLE[((NameToEnumEntry*)e2)->nameIndex]);
    232 }
    233 
    234 //----------------------------------------------------------------------
    235 
    236 /**
    237  * An element in an enum index.  It maps an enum into a name group entry
    238  * (given by index).
    239  */
    240 class EnumToNameGroupEntry {
    241 public:
    242     int32_t enumValue;
    243     int32_t nameGroupIndex;
    244     EnumToNameGroupEntry(int32_t a, int32_t b) { enumValue=a; nameGroupIndex=b; }
    245 
    246     // are enumValues contiguous for count entries starting with this one?
    247     // ***!!!*** we assume we are in an array and look at neighbors ***!!!***
    248     UBool isContiguous(int32_t count) const {
    249         const EnumToNameGroupEntry* p = this;
    250         for (int32_t i=1; i<count; ++i) {
    251             if (p[i].enumValue != (this->enumValue + i)) {
    252                 return FALSE;
    253             }
    254         }
    255         return TRUE;
    256     }
    257 };
    258 
    259 // Sort function for EnumToNameGroupEntry (sort by name index)
    260 U_CFUNC int32_t
    261 compareEnumToNameGroupEntry(const void * /*context*/, const void* e1, const void* e2) {
    262     return ((EnumToNameGroupEntry*)e1)->enumValue - ((EnumToNameGroupEntry*)e2)->enumValue;
    263 }
    264 
    265 //----------------------------------------------------------------------
    266 
    267 /**
    268  * An element in the map from enumerated property enums to value maps.
    269  */
    270 class EnumToValueEntry {
    271 public:
    272     int32_t enumValue;
    273     EnumToNameGroupEntry* enumToName;
    274     int32_t enumToName_count;
    275     NameToEnumEntry* nameToEnum;
    276     int32_t nameToEnum_count;
    277 
    278     // are enumValues contiguous for count entries starting with this one?
    279     // ***!!!*** we assume we are in an array and look at neighbors ***!!!***
    280     UBool isContiguous(int32_t count) const {
    281         const EnumToValueEntry* p = this;
    282         for (int32_t i=1; i<count; ++i) {
    283             if (p[i].enumValue != (this->enumValue + i)) {
    284                 return FALSE;
    285             }
    286         }
    287         return TRUE;
    288     }
    289 };
    290 
    291 // Sort function for EnumToValueEntry (sort by enum)
    292 U_CFUNC int32_t
    293 compareEnumToValueEntry(const void * /*context*/, const void* e1, const void* e2) {
    294     return ((EnumToValueEntry*)e1)->enumValue - ((EnumToValueEntry*)e2)->enumValue;
    295 }
    296 
    297 //----------------------------------------------------------------------
    298 // BEGIN Builder
    299 
    300 #define IS_VALID_OFFSET(x) (((x)>=0)&&((x)<=MAX_OFFSET))
    301 
    302 class Builder {
    303     // header:
    304     PropertyAliases header;
    305 
    306     // 0:
    307     NonContiguousEnumToOffset* enumToName;
    308     int32_t enumToName_size;
    309     Offset enumToName_offset;
    310 
    311     // 1: (deleted)
    312 
    313     // 2:
    314     NameToEnum* nameToEnum;
    315     int32_t nameToEnum_size;
    316     Offset nameToEnum_offset;
    317 
    318     // 3:
    319     NonContiguousEnumToOffset* enumToValue;
    320     int32_t enumToValue_size;
    321     Offset enumToValue_offset;
    322 
    323     // 4:
    324     ValueMap* valueMap;
    325     int32_t valueMap_size;
    326     int32_t valueMap_count;
    327     Offset valueMap_offset;
    328 
    329     // for any i, one of valueEnumToName[i] or valueNCEnumToName[i] is
    330     // NULL and one is not.  valueEnumToName_size[i] is the size of
    331     // the non-NULL one.  i=0..valueMapCount-1
    332     // 5a:
    333     EnumToOffset** valueEnumToName;
    334     // 5b:
    335     NonContiguousEnumToOffset** valueNCEnumToName;
    336     int32_t* valueEnumToName_size;
    337     Offset* valueEnumToName_offset;
    338     // 6:
    339     // arrays of valueMap_count pointers, sizes, & offsets
    340     NameToEnum** valueNameToEnum;
    341     int32_t* valueNameToEnum_size;
    342     Offset* valueNameToEnum_offset;
    343 
    344     // 98:
    345     Offset* nameGroupPool;
    346     int32_t nameGroupPool_count;
    347     int32_t nameGroupPool_size;
    348     Offset nameGroupPool_offset;
    349 
    350     // 99:
    351     char* stringPool;
    352     int32_t stringPool_count;
    353     int32_t stringPool_size;
    354     Offset stringPool_offset;
    355     Offset* stringPool_offsetArray; // relative to stringPool
    356 
    357     int32_t total_size; // size of everything
    358 
    359     int32_t debug;
    360 
    361 public:
    362 
    363     Builder(int32_t debugLevel);
    364     ~Builder();
    365 
    366     void buildTopLevelProperties(const NameToEnumEntry* propName,
    367                                  int32_t propNameCount,
    368                                  const EnumToNameGroupEntry* propEnum,
    369                                  int32_t propEnumCount);
    370 
    371     void buildValues(const EnumToValueEntry* e2v,
    372                      int32_t count);
    373 
    374     void buildStringPool(const AliasName* propertyNames,
    375                          int32_t propertyNameCount,
    376                          const int32_t* nameGroupIndices,
    377                          int32_t nameGroupIndicesCount);
    378 
    379     void fixup();
    380 
    381     int8_t* createData(int32_t& length) const;
    382 
    383 private:
    384 
    385     static EnumToOffset* buildEnumToOffset(const EnumToNameGroupEntry* e2ng,
    386                                            int32_t count,
    387                                            int32_t& size);
    388     static NonContiguousEnumToOffset*
    389         buildNCEnumToNameGroup(const EnumToNameGroupEntry* e2ng,
    390                                int32_t count,
    391                                int32_t& size);
    392 
    393     static NonContiguousEnumToOffset*
    394         buildNCEnumToValue(const EnumToValueEntry* e2v,
    395                            int32_t count,
    396                            int32_t& size);
    397 
    398     static NameToEnum* buildNameToEnum(const NameToEnumEntry* nameToEnum,
    399                                        int32_t count,
    400                                        int32_t& size);
    401 
    402     Offset stringIndexToOffset(int32_t index, UBool allowNeg=FALSE) const;
    403     void fixupNameToEnum(NameToEnum* n);
    404     void fixupEnumToNameGroup(EnumToOffset* e2ng);
    405     void fixupNCEnumToNameGroup(NonContiguousEnumToOffset* e2ng);
    406 
    407     void computeOffsets();
    408     void fixupStringPoolOffsets();
    409     void fixupNameGroupPoolOffsets();
    410     void fixupMiscellaneousOffsets();
    411 
    412     static int32_t align(int32_t a);
    413     static void erase(void* p, int32_t size);
    414 };
    415 
    416 Builder::Builder(int32_t debugLevel) {
    417     debug = debugLevel;
    418     enumToName = 0;
    419     nameToEnum = 0;
    420     enumToValue = 0;
    421     valueMap_count = 0;
    422     valueMap = 0;
    423     valueEnumToName = 0;
    424     valueNCEnumToName = 0;
    425     valueEnumToName_size = 0;
    426     valueEnumToName_offset = 0;
    427     valueNameToEnum = 0;
    428     valueNameToEnum_size = 0;
    429     valueNameToEnum_offset = 0;
    430     nameGroupPool = 0;
    431     stringPool = 0;
    432     stringPool_offsetArray = 0;
    433 }
    434 
    435 Builder::~Builder() {
    436     uprv_free(enumToName);
    437     uprv_free(nameToEnum);
    438     uprv_free(enumToValue);
    439     uprv_free(valueMap);
    440     for (int32_t i=0; i<valueMap_count; ++i) {
    441         uprv_free(valueEnumToName[i]);
    442         uprv_free(valueNCEnumToName[i]);
    443         uprv_free(valueNameToEnum[i]);
    444     }
    445     uprv_free(valueEnumToName);
    446     uprv_free(valueNCEnumToName);
    447     uprv_free(valueEnumToName_size);
    448     uprv_free(valueEnumToName_offset);
    449     uprv_free(valueNameToEnum);
    450     uprv_free(valueNameToEnum_size);
    451     uprv_free(valueNameToEnum_offset);
    452     uprv_free(nameGroupPool);
    453     uprv_free(stringPool);
    454     uprv_free(stringPool_offsetArray);
    455 }
    456 
    457 int32_t Builder::align(int32_t a) {
    458     U_ASSERT(a >= 0);
    459     int32_t k = a % sizeof(int32_t);
    460     if (k == 0) {
    461         return a;
    462     }
    463     a += sizeof(int32_t) - k;
    464     return a;
    465 }
    466 
    467 void Builder::erase(void* p, int32_t size) {
    468     U_ASSERT(size >= 0);
    469     int8_t* q = (int8_t*) p;
    470     while (size--) {
    471         *q++ = 0;
    472     }
    473 }
    474 
    475 EnumToOffset* Builder::buildEnumToOffset(const EnumToNameGroupEntry* e2ng,
    476                                          int32_t count,
    477                                          int32_t& size) {
    478     U_ASSERT(e2ng->isContiguous(count));
    479     size = align(EnumToOffset::getSize(count));
    480     EnumToOffset* result = (EnumToOffset*) uprv_malloc(size);
    481     erase(result, size);
    482     result->enumStart = e2ng->enumValue;
    483     result->enumLimit = e2ng->enumValue + count;
    484     Offset* p = result->getOffsetArray();
    485     for (int32_t i=0; i<count; ++i) {
    486         // set these to NGI index values
    487         // fix them up to NGI offset values
    488         U_ASSERT(IS_VALID_OFFSET(e2ng[i].nameGroupIndex));
    489         p[i] = (Offset) e2ng[i].nameGroupIndex; // FIXUP later
    490     }
    491     return result;
    492 }
    493 
    494 NonContiguousEnumToOffset*
    495 Builder::buildNCEnumToNameGroup(const EnumToNameGroupEntry* e2ng,
    496                                 int32_t count,
    497                                 int32_t& size) {
    498     U_ASSERT(!e2ng->isContiguous(count));
    499     size = align(NonContiguousEnumToOffset::getSize(count));
    500     NonContiguousEnumToOffset* nc = (NonContiguousEnumToOffset*) uprv_malloc(size);
    501     erase(nc, size);
    502     nc->count = count;
    503     EnumValue* e = nc->getEnumArray();
    504     Offset* p = nc->getOffsetArray();
    505     for (int32_t i=0; i<count; ++i) {
    506         // set these to NGI index values
    507         // fix them up to NGI offset values
    508         e[i] = e2ng[i].enumValue;
    509         U_ASSERT(IS_VALID_OFFSET(e2ng[i].nameGroupIndex));
    510         p[i] = (Offset) e2ng[i].nameGroupIndex; // FIXUP later
    511     }
    512     return nc;
    513 }
    514 
    515 NonContiguousEnumToOffset*
    516 Builder::buildNCEnumToValue(const EnumToValueEntry* e2v,
    517                             int32_t count,
    518                             int32_t& size) {
    519     U_ASSERT(!e2v->isContiguous(count));
    520     size = align(NonContiguousEnumToOffset::getSize(count));
    521     NonContiguousEnumToOffset* result = (NonContiguousEnumToOffset*) uprv_malloc(size);
    522     erase(result, size);
    523     result->count = count;
    524     EnumValue* e = result->getEnumArray();
    525     for (int32_t i=0; i<count; ++i) {
    526         e[i] = e2v[i].enumValue;
    527         // offset must be set later
    528     }
    529     return result;
    530 }
    531 
    532 /**
    533  * Given an index into the string pool, return an offset.  computeOffsets()
    534  * must have been called already.  If allowNegative is true, allow negatives
    535  * and preserve their sign.
    536  */
    537 Offset Builder::stringIndexToOffset(int32_t index, UBool allowNegative) const {
    538     // Index 0 is ""; we turn this into an Offset of zero
    539     if (index == 0) return 0;
    540     if (index < 0) {
    541         if (allowNegative) {
    542             return -Builder::stringIndexToOffset(-index);
    543         } else {
    544             die("Negative string pool index");
    545         }
    546     } else {
    547         if (index >= stringPool_count) {
    548             die("String pool index too large");
    549         }
    550         Offset result = stringPool_offset + stringPool_offsetArray[index];
    551         U_ASSERT(result >= 0 && result < total_size);
    552         return result;
    553     }
    554     return 0; // never executed; make compiler happy
    555 }
    556 
    557 NameToEnum* Builder::buildNameToEnum(const NameToEnumEntry* nameToEnum,
    558                                      int32_t count,
    559                                      int32_t& size) {
    560     size = align(NameToEnum::getSize(count));
    561     NameToEnum* n2e = (NameToEnum*) uprv_malloc(size);
    562     erase(n2e, size);
    563     n2e->count = count;
    564     Offset* p = n2e->getNameArray();
    565     EnumValue* e = n2e->getEnumArray();
    566     for (int32_t i=0; i<count; ++i) {
    567         // set these to SP index values
    568         // fix them up to SP offset values
    569         U_ASSERT(IS_VALID_OFFSET(nameToEnum[i].nameIndex));
    570         p[i] = (Offset) nameToEnum[i].nameIndex; // FIXUP later
    571         e[i] = nameToEnum[i].enumValue;
    572     }
    573     return n2e;
    574 }
    575 
    576 
    577 void Builder::buildTopLevelProperties(const NameToEnumEntry* propName,
    578                                       int32_t propNameCount,
    579                                       const EnumToNameGroupEntry* propEnum,
    580                                       int32_t propEnumCount) {
    581     enumToName = buildNCEnumToNameGroup(propEnum,
    582                                         propEnumCount,
    583                                         enumToName_size);
    584     nameToEnum = buildNameToEnum(propName,
    585                                  propNameCount,
    586                                  nameToEnum_size);
    587 }
    588 
    589 void Builder::buildValues(const EnumToValueEntry* e2v,
    590                           int32_t count) {
    591     int32_t i;
    592 
    593     U_ASSERT(!e2v->isContiguous(count));
    594 
    595     valueMap_count = count;
    596 
    597     enumToValue = buildNCEnumToValue(e2v, count,
    598                                      enumToValue_size);
    599 
    600     valueMap_size = align(count * sizeof(ValueMap));
    601     valueMap = (ValueMap*) uprv_malloc(valueMap_size);
    602     erase(valueMap, valueMap_size);
    603 
    604     valueEnumToName = MALLOC(EnumToOffset*, count);
    605     valueNCEnumToName = MALLOC(NonContiguousEnumToOffset*, count);
    606     valueEnumToName_size = MALLOC(int32_t, count);
    607     valueEnumToName_offset = MALLOC(Offset, count);
    608     valueNameToEnum = MALLOC(NameToEnum*, count);
    609     valueNameToEnum_size = MALLOC(int32_t, count);
    610     valueNameToEnum_offset = MALLOC(Offset, count);
    611 
    612     for (i=0; i<count; ++i) {
    613         UBool isContiguous =
    614             e2v[i].enumToName->isContiguous(e2v[i].enumToName_count);
    615         valueEnumToName[i] = 0;
    616         valueNCEnumToName[i] = 0;
    617         if (isContiguous) {
    618             valueEnumToName[i] = buildEnumToOffset(e2v[i].enumToName,
    619                                                    e2v[i].enumToName_count,
    620                                                    valueEnumToName_size[i]);
    621         } else {
    622             valueNCEnumToName[i] = buildNCEnumToNameGroup(e2v[i].enumToName,
    623                                                           e2v[i].enumToName_count,
    624                                                           valueEnumToName_size[i]);
    625         }
    626         valueNameToEnum[i] =
    627             buildNameToEnum(e2v[i].nameToEnum,
    628                             e2v[i].nameToEnum_count,
    629                             valueNameToEnum_size[i]);
    630     }
    631 }
    632 
    633 void Builder::buildStringPool(const AliasName* propertyNames,
    634                               int32_t propertyNameCount,
    635                               const int32_t* nameGroupIndices,
    636                               int32_t nameGroupIndicesCount) {
    637     int32_t i;
    638 
    639     nameGroupPool_count = nameGroupIndicesCount;
    640     nameGroupPool_size = sizeof(Offset) * nameGroupPool_count;
    641     nameGroupPool = MALLOC(Offset, nameGroupPool_count);
    642 
    643     for (i=0; i<nameGroupPool_count; ++i) {
    644         // Some indices are negative.
    645         int32_t a = nameGroupIndices[i];
    646         if (a < 0) a = -a;
    647         U_ASSERT(IS_VALID_OFFSET(a));
    648         nameGroupPool[i] = (Offset) nameGroupIndices[i];
    649     }
    650 
    651     stringPool_count = propertyNameCount;
    652     stringPool_size = 0;
    653     // first string must be "" -- we skip it
    654     U_ASSERT(*propertyNames[0].str == 0);
    655     for (i=1 /*sic*/; i<propertyNameCount; ++i) {
    656         stringPool_size += (int32_t)(uprv_strlen(propertyNames[i].str) + 1);
    657     }
    658     stringPool = MALLOC(char, stringPool_size);
    659     stringPool_offsetArray = MALLOC(Offset, stringPool_count);
    660     Offset soFar = 0;
    661     char* p = stringPool;
    662     stringPool_offsetArray[0] = -1; // we don't use this entry
    663     for (i=1 /*sic*/; i<propertyNameCount; ++i) {
    664         const char* str = propertyNames[i].str;
    665         int32_t len = (int32_t)uprv_strlen(str);
    666         uprv_strcpy(p, str);
    667         p += len;
    668         *p++ = 0;
    669         stringPool_offsetArray[i] = soFar;
    670         soFar += (Offset)(len+1);
    671     }
    672     U_ASSERT(soFar == stringPool_size);
    673     U_ASSERT(p == (stringPool + stringPool_size));
    674 }
    675 
    676 // Confirm that PropertyAliases is a POD (plain old data; see C++
    677 // std).  The following union will _fail to compile_ if
    678 // PropertyAliases is _not_ a POD.  (Note: We used to use the offsetof
    679 // macro to check this, but that's not quite right, so that test is
    680 // commented out -- see below.)
    681 typedef union {
    682     int32_t i;
    683     PropertyAliases p;
    684 } PropertyAliasesPODTest;
    685 
    686 void Builder::computeOffsets() {
    687     int32_t i;
    688     Offset off = sizeof(header);
    689 
    690     if (debug>0) {
    691         printf("header   \t offset=%4d  size=%5d\n", 0, off);
    692     }
    693 
    694     // PropertyAliases must have no v-table and must be
    695     // padded (if necessary) to the next 32-bit boundary.
    696     //U_ASSERT(offsetof(PropertyAliases, enumToName_offset) == 0); // see above
    697     U_ASSERT(sizeof(header) % sizeof(int32_t) == 0);
    698 
    699     #define COMPUTE_OFFSET(foo) COMPUTE_OFFSET2(foo,int32_t)
    700 
    701     #define COMPUTE_OFFSET2(foo,type) \
    702       if (debug>0)\
    703         printf(#foo "\t offset=%4d  size=%5d\n", off, (int)foo##_size);\
    704       foo##_offset = off;\
    705       U_ASSERT(IS_VALID_OFFSET(off + foo##_size));\
    706       U_ASSERT(foo##_offset % sizeof(type) == 0);\
    707       off = (Offset) (off + foo##_size);
    708 
    709     COMPUTE_OFFSET(enumToName);     // 0:
    710     COMPUTE_OFFSET(nameToEnum);     // 2:
    711     COMPUTE_OFFSET(enumToValue);    // 3:
    712     COMPUTE_OFFSET(valueMap);       // 4:
    713 
    714     for (i=0; i<valueMap_count; ++i) {
    715         if (debug>0) {
    716             printf(" enumToName[%d]\t offset=%4d  size=%5d\n",
    717                    (int)i, off, (int)valueEnumToName_size[i]);
    718         }
    719 
    720         valueEnumToName_offset[i] = off;   // 5:
    721         U_ASSERT(IS_VALID_OFFSET(off + valueEnumToName_size[i]));
    722         off = (Offset) (off + valueEnumToName_size[i]);
    723 
    724         if (debug>0) {
    725             printf(" nameToEnum[%d]\t offset=%4d  size=%5d\n",
    726                    (int)i, off, (int)valueNameToEnum_size[i]);
    727         }
    728 
    729         valueNameToEnum_offset[i] = off;   // 6:
    730         U_ASSERT(IS_VALID_OFFSET(off + valueNameToEnum_size[i]));
    731         off = (Offset) (off + valueNameToEnum_size[i]);
    732     }
    733 
    734     // These last two chunks have weaker alignment needs
    735     COMPUTE_OFFSET2(nameGroupPool,Offset); // 98:
    736     COMPUTE_OFFSET2(stringPool,char);      // 99:
    737 
    738     total_size = off;
    739     if (debug>0) printf("total                         size=%5d\n\n", (int)total_size);
    740     U_ASSERT(total_size <= (MAX_OFFSET+1));
    741 }
    742 
    743 void Builder::fixupNameToEnum(NameToEnum* n) {
    744     // Fix the string pool offsets in n
    745     Offset* p = n->getNameArray();
    746     for (int32_t i=0; i<n->count; ++i) {
    747         p[i] = stringIndexToOffset(p[i]);
    748     }
    749 }
    750 
    751 void Builder::fixupStringPoolOffsets() {
    752     int32_t i;
    753 
    754     // 2:
    755     fixupNameToEnum(nameToEnum);
    756 
    757     // 6:
    758     for (i=0; i<valueMap_count; ++i) {
    759         fixupNameToEnum(valueNameToEnum[i]);
    760     }
    761 
    762     // 98:
    763     for (i=0; i<nameGroupPool_count; ++i) {
    764         nameGroupPool[i] = stringIndexToOffset(nameGroupPool[i], TRUE);
    765     }
    766 }
    767 
    768 void Builder::fixupEnumToNameGroup(EnumToOffset* e2ng) {
    769     EnumValue i;
    770     int32_t j;
    771     Offset* p = e2ng->getOffsetArray();
    772     for (i=e2ng->enumStart, j=0; i<e2ng->enumLimit; ++i, ++j) {
    773         p[j] = nameGroupPool_offset + sizeof(Offset) * p[j];
    774     }
    775 }
    776 
    777 void Builder::fixupNCEnumToNameGroup(NonContiguousEnumToOffset* e2ng) {
    778     int32_t i;
    779     /*EnumValue* e = e2ng->getEnumArray();*/
    780     Offset* p = e2ng->getOffsetArray();
    781     for (i=0; i<e2ng->count; ++i) {
    782         p[i] = nameGroupPool_offset + sizeof(Offset) * p[i];
    783     }
    784 }
    785 
    786 void Builder::fixupNameGroupPoolOffsets() {
    787     int32_t i;
    788 
    789     // 0:
    790     fixupNCEnumToNameGroup(enumToName);
    791 
    792     // 1: (deleted)
    793 
    794     // 5:
    795     for (i=0; i<valueMap_count; ++i) {
    796         // 5a:
    797         if (valueEnumToName[i] != 0) {
    798             fixupEnumToNameGroup(valueEnumToName[i]);
    799         }
    800         // 5b:
    801         if (valueNCEnumToName[i] != 0) {
    802             fixupNCEnumToNameGroup(valueNCEnumToName[i]);
    803         }
    804     }
    805 }
    806 
    807 void Builder::fixupMiscellaneousOffsets() {
    808     int32_t i;
    809 
    810     // header:
    811     erase(&header, sizeof(header));
    812     header.enumToName_offset = enumToName_offset;
    813     header.nameToEnum_offset = nameToEnum_offset;
    814     header.enumToValue_offset = enumToValue_offset;
    815     // header meta-info used by Java:
    816     U_ASSERT(total_size > 0 && total_size < 0x7FFF);
    817     header.total_size = (int16_t) total_size;
    818     header.valueMap_offset = valueMap_offset;
    819     header.valueMap_count = (int16_t) valueMap_count;
    820     header.nameGroupPool_offset = nameGroupPool_offset;
    821     header.nameGroupPool_count = (int16_t) nameGroupPool_count;
    822     header.stringPool_offset = stringPool_offset;
    823     header.stringPool_count = (int16_t) stringPool_count - 1; // don't include "" entry
    824 
    825     U_ASSERT(valueMap_count <= 0x7FFF);
    826     U_ASSERT(nameGroupPool_count <= 0x7FFF);
    827     U_ASSERT(stringPool_count <= 0x7FFF);
    828 
    829     // 3:
    830     Offset* p = enumToValue->getOffsetArray();
    831     /*EnumValue* e = enumToValue->getEnumArray();*/
    832     U_ASSERT(valueMap_count == enumToValue->count);
    833     for (i=0; i<valueMap_count; ++i) {
    834         p[i] = (Offset)(valueMap_offset + sizeof(ValueMap) * i);
    835     }
    836 
    837     // 4:
    838     for (i=0; i<valueMap_count; ++i) {
    839         ValueMap& v = valueMap[i];
    840         v.enumToName_offset = v.ncEnumToName_offset = 0;
    841         if (valueEnumToName[i] != 0) {
    842             v.enumToName_offset = valueEnumToName_offset[i];
    843         }
    844         if (valueNCEnumToName[i] != 0) {
    845             v.ncEnumToName_offset = valueEnumToName_offset[i];
    846         }
    847         v.nameToEnum_offset = valueNameToEnum_offset[i];
    848     }
    849 }
    850 
    851 void Builder::fixup() {
    852     computeOffsets();
    853     fixupStringPoolOffsets();
    854     fixupNameGroupPoolOffsets();
    855     fixupMiscellaneousOffsets();
    856 }
    857 
    858 int8_t* Builder::createData(int32_t& length) const {
    859     length = total_size;
    860     int8_t* result = MALLOC(int8_t, length);
    861 
    862     int8_t* p = result;
    863     int8_t* limit = result + length;
    864 
    865     #define APPEND2(x, size)   \
    866       U_ASSERT((p+size)<=limit); \
    867       uprv_memcpy(p, x, size); \
    868       p += size
    869 
    870     #define APPEND(x) APPEND2(x, x##_size)
    871 
    872     APPEND2(&header, sizeof(header));
    873     APPEND(enumToName);
    874     APPEND(nameToEnum);
    875     APPEND(enumToValue);
    876     APPEND(valueMap);
    877 
    878     for (int32_t i=0; i<valueMap_count; ++i) {
    879         U_ASSERT((valueEnumToName[i] != 0 && valueNCEnumToName[i] == 0) ||
    880                (valueEnumToName[i] == 0 && valueNCEnumToName[i] != 0));
    881         if (valueEnumToName[i] != 0) {
    882             APPEND2(valueEnumToName[i], valueEnumToName_size[i]);
    883         }
    884         if (valueNCEnumToName[i] != 0) {
    885             APPEND2(valueNCEnumToName[i], valueEnumToName_size[i]);
    886         }
    887         APPEND2(valueNameToEnum[i], valueNameToEnum_size[i]);
    888     }
    889 
    890     APPEND(nameGroupPool);
    891     APPEND(stringPool);
    892 
    893     if (p != limit) {
    894         fprintf(stderr, "p != limit; p = %p, limit = %p", p, limit);
    895         exit(1);
    896     }
    897     return result;
    898 }
    899 
    900 // END Builder
    901 //----------------------------------------------------------------------
    902 
    903 /* UDataInfo cf. udata.h */
    904 static UDataInfo dataInfo = {
    905     sizeof(UDataInfo),
    906     0,
    907 
    908     U_IS_BIG_ENDIAN,
    909     U_CHARSET_FAMILY,
    910     sizeof(UChar),
    911     0,
    912 
    913     {PNAME_SIG_0, PNAME_SIG_1, PNAME_SIG_2, PNAME_SIG_3},
    914     {PNAME_FORMAT_VERSION, 0, 0, 0},                 /* formatVersion */
    915     {VERSION_0, VERSION_1, VERSION_2, VERSION_3} /* Unicode version */
    916 };
    917 
    918 class genpname {
    919 
    920     // command-line options
    921     UBool useCopyright;
    922     UBool verbose;
    923     int32_t debug;
    924 
    925 public:
    926     int      MMain(int argc, char *argv[]);
    927 
    928 private:
    929     NameToEnumEntry* createNameIndex(const AliasList& list,
    930                                      int32_t& nameIndexCount);
    931 
    932     EnumToNameGroupEntry* createEnumIndex(const AliasList& list);
    933 
    934     int32_t  writeDataFile(const char *destdir, const Builder&);
    935 };
    936 
    937 int main(int argc, char *argv[]) {
    938     UErrorCode status = U_ZERO_ERROR;
    939     u_init(&status);
    940     if (U_FAILURE(status) && status != U_FILE_ACCESS_ERROR) {
    941         // Note: u_init() will try to open ICU property data.
    942         //       failures here are expected when building ICU from scratch.
    943         //       ignore them.
    944         fprintf(stderr, "genpname: can not initialize ICU.  Status = %s\n",
    945             u_errorName(status));
    946         exit(1);
    947     }
    948 
    949     genpname app;
    950     U_MAIN_INIT_ARGS(argc, argv);
    951     int retVal = app.MMain(argc, argv);
    952     u_cleanup();
    953     return retVal;
    954 }
    955 
    956 static UOption options[]={
    957     UOPTION_HELP_H,
    958     UOPTION_HELP_QUESTION_MARK,
    959     UOPTION_COPYRIGHT,
    960     UOPTION_DESTDIR,
    961     UOPTION_VERBOSE,
    962     UOPTION_DEF("debug", 'D', UOPT_REQUIRES_ARG),
    963 };
    964 
    965 NameToEnumEntry* genpname::createNameIndex(const AliasList& list,
    966                                            int32_t& nameIndexCount) {
    967 
    968     // Build name => enum map
    969 
    970     // This is an n->1 map.  There are typically multiple names
    971     // mapping to one enum.  The name index is sorted in order of the name,
    972     // as defined by the uprv_compareAliasNames() function.
    973 
    974     int32_t i, j;
    975     int32_t count = list.count();
    976 
    977     // compute upper limit on number of names in the index
    978     int32_t nameIndexCapacity = count * MAX_NAMES_PER_GROUP;
    979     NameToEnumEntry* nameIndex = MALLOC(NameToEnumEntry, nameIndexCapacity);
    980 
    981     nameIndexCount = 0;
    982     int32_t names[MAX_NAMES_PER_GROUP];
    983     for (i=0; i<count; ++i) {
    984         const Alias& p = list[i];
    985         int32_t n = p.getUniqueNames(names);
    986         for (j=0; j<n; ++j) {
    987             U_ASSERT(nameIndexCount < nameIndexCapacity);
    988             nameIndex[nameIndexCount++] =
    989                 NameToEnumEntry(names[j], p.enumValue);
    990         }
    991     }
    992 
    993     /*
    994      * use a stable sort to ensure consistent results between
    995      * genpname.cpp and the propname.cpp swapping code
    996      */
    997     UErrorCode errorCode = U_ZERO_ERROR;
    998     uprv_sortArray(nameIndex, nameIndexCount, sizeof(nameIndex[0]),
    999                    compareNameToEnumEntry, NULL, TRUE, &errorCode);
   1000     if (debug>1) {
   1001         printf("Alias names: %d\n", (int)nameIndexCount);
   1002         for (i=0; i<nameIndexCount; ++i) {
   1003             printf("%s => %d\n",
   1004                    STRING_TABLE[nameIndex[i].nameIndex].str,
   1005                    (int)nameIndex[i].enumValue);
   1006         }
   1007         printf("\n");
   1008     }
   1009     // make sure there are no duplicates.  for a sorted list we need
   1010     // only compare adjacent items.  Alias.getUniqueNames() has
   1011     // already eliminated duplicate names for a single property, which
   1012     // does occur, so we're checking for duplicate names between two
   1013     // properties, which should never occur.
   1014     UBool ok = TRUE;
   1015     for (i=1; i<nameIndexCount; ++i) {
   1016         if (STRING_TABLE[nameIndex[i-1].nameIndex] ==
   1017             STRING_TABLE[nameIndex[i].nameIndex]) {
   1018             printf("Error: Duplicate names in property list: \"%s\", \"%s\"\n",
   1019                    STRING_TABLE[nameIndex[i-1].nameIndex].str,
   1020                    STRING_TABLE[nameIndex[i].nameIndex].str);
   1021             ok = FALSE;
   1022         }
   1023     }
   1024     if (!ok) {
   1025         die("Two or more duplicate names in property list");
   1026     }
   1027 
   1028     return nameIndex;
   1029 }
   1030 
   1031 EnumToNameGroupEntry* genpname::createEnumIndex(const AliasList& list) {
   1032 
   1033     // Build the enum => name map
   1034 
   1035     // This is a 1->n map.  Each enum maps to 1 or more names.  To
   1036     // accomplish this the index entry points to an element of the
   1037     // NAME_GROUP array.  This is the short name (which may be empty).
   1038     // From there, subsequent elements of NAME_GROUP are alternate
   1039     // names for this enum, up to and including the first one that is
   1040     // negative (negate for actual index).
   1041 
   1042     int32_t i, j, k;
   1043     int32_t count = list.count();
   1044 
   1045     EnumToNameGroupEntry* enumIndex = MALLOC(EnumToNameGroupEntry, count);
   1046     for (i=0; i<count; ++i) {
   1047         const Alias& p = list[i];
   1048         enumIndex[i] = EnumToNameGroupEntry(p.enumValue, p.nameGroupIndex);
   1049     }
   1050 
   1051     UErrorCode errorCode = U_ZERO_ERROR;
   1052     uprv_sortArray(enumIndex, count, sizeof(enumIndex[0]),
   1053                    compareEnumToNameGroupEntry, NULL, FALSE, &errorCode);
   1054     if (debug>1) {
   1055         printf("Property enums: %d\n", (int)count);
   1056         for (i=0; i<count; ++i) {
   1057             printf("%d => %d: ",
   1058                    (int)enumIndex[i].enumValue,
   1059                    (int)enumIndex[i].nameGroupIndex);
   1060             UBool done = FALSE;
   1061             for (j=enumIndex[i].nameGroupIndex; !done; ++j) {
   1062                 k = NAME_GROUP[j];
   1063                 if (k < 0) {
   1064                     k = -k;
   1065                     done = TRUE;
   1066                 }
   1067                 printf("\"%s\"", STRING_TABLE[k].str);
   1068                 if (!done) printf(", ");
   1069             }
   1070             printf("\n");
   1071         }
   1072         printf("\n");
   1073     }
   1074     return enumIndex;
   1075 }
   1076 
   1077 int genpname::MMain(int argc, char* argv[])
   1078 {
   1079     int32_t i, j;
   1080     UErrorCode status = U_ZERO_ERROR;
   1081 
   1082     u_init(&status);
   1083     if (U_FAILURE(status) && status != U_FILE_ACCESS_ERROR) {
   1084         fprintf(stderr, "Error: u_init returned %s\n", u_errorName(status));
   1085         status = U_ZERO_ERROR;
   1086     }
   1087 
   1088 
   1089     /* preset then read command line options */
   1090     options[3].value=u_getDataDirectory();
   1091     argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options);
   1092 
   1093     /* error handling, printing usage message */
   1094     if (argc<0) {
   1095         fprintf(stderr,
   1096             "error in command line argument \"%s\"\n",
   1097             argv[-argc]);
   1098     }
   1099 
   1100     debug = options[5].doesOccur ? (*options[5].value - '0') : 0;
   1101 
   1102     if (argc!=1 || options[0].doesOccur || options[1].doesOccur ||
   1103        debug < 0 || debug > 9) {
   1104         fprintf(stderr,
   1105             "usage: %s [-options]\n"
   1106             "\tcreate " PNAME_DATA_NAME "." PNAME_DATA_TYPE "\n"
   1107             "options:\n"
   1108             "\t-h or -? or --help  this usage text\n"
   1109             "\t-v or --verbose     turn on verbose output\n"
   1110             "\t-c or --copyright   include a copyright notice\n"
   1111             "\t-d or --destdir     destination directory, followed by the path\n"
   1112             "\t-D or --debug 0..9  emit debugging messages (if > 0)\n",
   1113             argv[0]);
   1114         return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
   1115     }
   1116 
   1117     /* get the options values */
   1118     useCopyright=options[2].doesOccur;
   1119     verbose = options[4].doesOccur;
   1120 
   1121     // ------------------------------------------------------------
   1122     // Do not sort the string table, instead keep it in data.h order.
   1123     // This simplifies data swapping and testing thereof because the string
   1124     // table itself need not be sorted during swapping.
   1125     // The NameToEnum sorter sorts each such map's string offsets instead.
   1126 
   1127     if (debug>1) {
   1128         printf("String pool: %d\n", (int)STRING_COUNT);
   1129         for (i=0; i<STRING_COUNT; ++i) {
   1130             if (i != 0) {
   1131                 printf(", ");
   1132             }
   1133             printf("%s (%d)", STRING_TABLE[i].str, (int)STRING_TABLE[i].index);
   1134         }
   1135         printf("\n\n");
   1136     }
   1137 
   1138     // ------------------------------------------------------------
   1139     // Create top-level property indices
   1140 
   1141     PropertyArrayList props(PROPERTY, PROPERTY_COUNT);
   1142     int32_t propNameCount;
   1143     NameToEnumEntry* propName = createNameIndex(props, propNameCount);
   1144     EnumToNameGroupEntry* propEnum = createEnumIndex(props);
   1145 
   1146     // ------------------------------------------------------------
   1147     // Create indices for the value list for each enumerated property
   1148 
   1149     // This will have more entries than we need...
   1150     EnumToValueEntry* enumToValue = MALLOC(EnumToValueEntry, PROPERTY_COUNT);
   1151     int32_t enumToValue_count = 0;
   1152     for (i=0, j=0; i<PROPERTY_COUNT; ++i) {
   1153         if (PROPERTY[i].valueCount == 0) continue;
   1154         AliasArrayList values(PROPERTY[i].valueList,
   1155                               PROPERTY[i].valueCount);
   1156         enumToValue[j].enumValue = PROPERTY[i].enumValue;
   1157         enumToValue[j].enumToName = createEnumIndex(values);
   1158         enumToValue[j].enumToName_count = PROPERTY[i].valueCount;
   1159         enumToValue[j].nameToEnum = createNameIndex(values,
   1160                                                     enumToValue[j].nameToEnum_count);
   1161         ++j;
   1162     }
   1163     enumToValue_count = j;
   1164 
   1165     uprv_sortArray(enumToValue, enumToValue_count, sizeof(enumToValue[0]),
   1166                    compareEnumToValueEntry, NULL, FALSE, &status);
   1167 
   1168     // ------------------------------------------------------------
   1169     // Build PropertyAliases layout in memory
   1170 
   1171     Builder builder(debug);
   1172 
   1173     builder.buildTopLevelProperties(propName,
   1174                                     propNameCount,
   1175                                     propEnum,
   1176                                     PROPERTY_COUNT);
   1177 
   1178     builder.buildValues(enumToValue,
   1179                         enumToValue_count);
   1180 
   1181     builder.buildStringPool(STRING_TABLE,
   1182                             STRING_COUNT,
   1183                             NAME_GROUP,
   1184                             NAME_GROUP_COUNT);
   1185 
   1186     builder.fixup();
   1187 
   1188     ////////////////////////////////////////////////////////////
   1189     // Write the output file
   1190     ////////////////////////////////////////////////////////////
   1191     int32_t wlen = writeDataFile(options[3].value, builder);
   1192     if (verbose) {
   1193         fprintf(stdout, "Output file: %s.%s, %ld bytes\n",
   1194             U_ICUDATA_NAME "_" PNAME_DATA_NAME, PNAME_DATA_TYPE, (long)wlen);
   1195     }
   1196 
   1197     return 0; // success
   1198 }
   1199 
   1200 int32_t genpname::writeDataFile(const char *destdir, const Builder& builder) {
   1201     int32_t length;
   1202     int8_t* data = builder.createData(length);
   1203 
   1204     UNewDataMemory *pdata;
   1205     UErrorCode status = U_ZERO_ERROR;
   1206 
   1207     pdata = udata_create(destdir, PNAME_DATA_TYPE, PNAME_DATA_NAME, &dataInfo,
   1208                          useCopyright ? U_COPYRIGHT_STRING : 0, &status);
   1209     if (U_FAILURE(status)) {
   1210         die("Unable to create data memory");
   1211     }
   1212 
   1213     udata_writeBlock(pdata, data, length);
   1214 
   1215     int32_t dataLength = (int32_t) udata_finish(pdata, &status);
   1216     if (U_FAILURE(status)) {
   1217         die("Error writing output file");
   1218     }
   1219     if (dataLength != length) {
   1220         die("Written file doesn't match expected size");
   1221     }
   1222 
   1223     return dataLength;
   1224 }
   1225 
   1226 //eof
   1227