Home | History | Annotate | Download | only in toolutil
      1 /******************************************************************************
      2  *   Copyright (C) 2009-2013, International Business Machines
      3  *   Corporation and others.  All Rights Reserved.
      4  *******************************************************************************
      5  */
      6 #include "unicode/utypes.h"
      7 
      8 #if U_PLATFORM_HAS_WIN32_API
      9 #   define VC_EXTRALEAN
     10 #   define WIN32_LEAN_AND_MEAN
     11 #   define NOUSER
     12 #   define NOSERVICE
     13 #   define NOIME
     14 #   define NOMCX
     15 #include <windows.h>
     16 #include <time.h>
     17 #   ifdef __GNUC__
     18 #       define WINDOWS_WITH_GNUC
     19 #   endif
     20 #endif
     21 
     22 #if U_PLATFORM_IS_LINUX_BASED && U_HAVE_ELF_H
     23 #   define U_ELF
     24 #endif
     25 
     26 #ifdef U_ELF
     27 #   include <elf.h>
     28 #   if defined(ELFCLASS64)
     29 #       define U_ELF64
     30 #   endif
     31     /* Old elf.h headers may not have EM_X86_64, or have EM_X8664 instead. */
     32 #   ifndef EM_X86_64
     33 #       define EM_X86_64 62
     34 #   endif
     35 #   define ICU_ENTRY_OFFSET 0
     36 #endif
     37 
     38 #include <stdio.h>
     39 #include <stdlib.h>
     40 #include "unicode/putil.h"
     41 #include "cmemory.h"
     42 #include "cstring.h"
     43 #include "filestrm.h"
     44 #include "toolutil.h"
     45 #include "unicode/uclean.h"
     46 #include "uoptions.h"
     47 #include "pkg_genc.h"
     48 
     49 #define MAX_COLUMN ((uint32_t)(0xFFFFFFFFU))
     50 
     51 #define HEX_0X 0 /*  0x1234 */
     52 #define HEX_0H 1 /*  01234h */
     53 
     54 /* prototypes --------------------------------------------------------------- */
     55 static void
     56 getOutFilename(const char *inFilename, const char *destdir, char *outFilename, char *entryName, const char *newSuffix, const char *optFilename);
     57 
     58 static uint32_t
     59 write8(FileStream *out, uint8_t byte, uint32_t column);
     60 
     61 static uint32_t
     62 write32(FileStream *out, uint32_t byte, uint32_t column);
     63 
     64 #if U_PLATFORM == U_PF_OS400
     65 static uint32_t
     66 write8str(FileStream *out, uint8_t byte, uint32_t column);
     67 #endif
     68 /* -------------------------------------------------------------------------- */
     69 
     70 /*
     71 Creating Template Files for New Platforms
     72 
     73 Let the cc compiler help you get started.
     74 Compile this program
     75     const unsigned int x[5] = {1, 2, 0xdeadbeef, 0xffffffff, 16};
     76 with the -S option to produce assembly output.
     77 
     78 For example, this will generate array.s:
     79 gcc -S array.c
     80 
     81 This will produce a .s file that may look like this:
     82 
     83     .file   "array.c"
     84     .version        "01.01"
     85 gcc2_compiled.:
     86     .globl x
     87     .section        .rodata
     88     .align 4
     89     .type    x,@object
     90     .size    x,20
     91 x:
     92     .long   1
     93     .long   2
     94     .long   -559038737
     95     .long   -1
     96     .long   16
     97     .ident  "GCC: (GNU) 2.96 20000731 (Red Hat Linux 7.1 2.96-85)"
     98 
     99 which gives a starting point that will compile, and can be transformed
    100 to become the template, generally with some consulting of as docs and
    101 some experimentation.
    102 
    103 If you want ICU to automatically use this assembly, you should
    104 specify "GENCCODE_ASSEMBLY=-a name" in the specific config/mh-* file,
    105 where the name is the compiler or platform that you used in this
    106 assemblyHeader data structure.
    107 */
    108 static const struct AssemblyType {
    109     const char *name;
    110     const char *header;
    111     const char *beginLine;
    112     const char *footer;
    113     int8_t      hexType; /* HEX_0X or HEX_0h */
    114 } assemblyHeader[] = {
    115     // For gcc assemblers, the meaning of .align changes depending on the
    116     // hardware, so we use .balign 16 which always means 16 bytes.
    117     // https://sourceware.org/binutils/docs/as/Pseudo-Ops.html
    118     {"gcc",
    119         ".globl %s\n"
    120         "\t.section .note.GNU-stack,\"\",%%progbits\n"
    121         "\t.section .rodata\n"
    122         "\t.balign 16\n"
    123         "\t.type %s,%%object\n"
    124         "%s:\n\n",
    125 
    126         ".long ","",HEX_0X
    127     },
    128     {"gcc-darwin",
    129         /*"\t.section __TEXT,__text,regular,pure_instructions\n"
    130         "\t.section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32\n"*/
    131         ".globl _%s\n"
    132         "\t.data\n"
    133         "\t.const\n"
    134         "\t.balign 16\n"
    135         "_%s:\n\n",
    136 
    137         ".long ","",HEX_0X
    138     },
    139     {"gcc-cygwin",
    140         ".globl _%s\n"
    141         "\t.section .rodata\n"
    142         "\t.balign 16\n"
    143         "_%s:\n\n",
    144 
    145         ".long ","",HEX_0X
    146     },
    147     {"gcc-mingw64",
    148         ".globl %s\n"
    149         "\t.section .rodata\n"
    150         "\t.balign 16\n"
    151         "%s:\n\n",
    152 
    153         ".long ","",HEX_0X
    154     },
    155 // 16 bytes alignment.
    156 // http://docs.oracle.com/cd/E19641-01/802-1947/802-1947.pdf
    157     {"sun",
    158         "\t.section \".rodata\"\n"
    159         "\t.align   16\n"
    160         ".globl     %s\n"
    161         "%s:\n",
    162 
    163         ".word ","",HEX_0X
    164     },
    165 // 16 bytes alignment for sun-x86.
    166 // http://docs.oracle.com/cd/E19963-01/html/821-1608/eoiyg.html
    167     {"sun-x86",
    168         "Drodata.rodata:\n"
    169         "\t.type   Drodata.rodata,@object\n"
    170         "\t.size   Drodata.rodata,0\n"
    171         "\t.globl  %s\n"
    172         "\t.align  16\n"
    173         "%s:\n",
    174 
    175         ".4byte ","",HEX_0X
    176     },
    177 // 1<<4 bit alignment for aix.
    178 // http://pic.dhe.ibm.com/infocenter/aix/v6r1/index.jsp?topic=%2Fcom.ibm.aix.aixassem%2Fdoc%2Falangref%2Fidalangref_csect_pseudoop.htm
    179     {"xlc",
    180         ".globl %s{RO}\n"
    181         "\t.toc\n"
    182         "%s:\n"
    183         "\t.csect %s{RO}, 4\n",
    184 
    185         ".long ","",HEX_0X
    186     },
    187     {"aCC-ia64",
    188         "\t.file   \"%s.s\"\n"
    189         "\t.type   %s,@object\n"
    190         "\t.global %s\n"
    191         "\t.secalias .abe$0.rodata, \".rodata\"\n"
    192         "\t.section .abe$0.rodata = \"a\", \"progbits\"\n"
    193         "\t.align  16\n"
    194         "%s::\t",
    195 
    196         "data4 ","",HEX_0X
    197     },
    198     {"aCC-parisc",
    199         "\t.SPACE  $TEXT$\n"
    200         "\t.SUBSPA $LIT$\n"
    201         "%s\n"
    202         "\t.EXPORT %s\n"
    203         "\t.ALIGN  16\n",
    204 
    205         ".WORD ","",HEX_0X
    206     },
    207 // align 16 bytes
    208 //  http://msdn.microsoft.com/en-us/library/dwa9fwef.aspx
    209     { "masm",
    210       "\tTITLE %s\n"
    211       "; generated by genccode\n"
    212       ".386\n"
    213       ".model flat\n"
    214       "\tPUBLIC _%s\n"
    215       "ICUDATA_%s\tSEGMENT READONLY PARA PUBLIC FLAT 'DATA'\n"
    216       "\tALIGN 16\n"
    217       "_%s\tLABEL DWORD\n",
    218       "\tDWORD ","\nICUDATA_%s\tENDS\n\tEND\n",HEX_0H
    219     }
    220 };
    221 
    222 static int32_t assemblyHeaderIndex = -1;
    223 static int32_t hexType = HEX_0X;
    224 
    225 U_CAPI UBool U_EXPORT2
    226 checkAssemblyHeaderName(const char* optAssembly) {
    227     int32_t idx;
    228     assemblyHeaderIndex = -1;
    229     for (idx = 0; idx < (int32_t)(sizeof(assemblyHeader)/sizeof(assemblyHeader[0])); idx++) {
    230         if (uprv_strcmp(optAssembly, assemblyHeader[idx].name) == 0) {
    231             assemblyHeaderIndex = idx;
    232             hexType = assemblyHeader[idx].hexType; /* set the hex type */
    233             return TRUE;
    234         }
    235     }
    236 
    237     return FALSE;
    238 }
    239 
    240 
    241 U_CAPI void U_EXPORT2
    242 printAssemblyHeadersToStdErr(void) {
    243     int32_t idx;
    244     fprintf(stderr, "%s", assemblyHeader[0].name);
    245     for (idx = 1; idx < (int32_t)(sizeof(assemblyHeader)/sizeof(assemblyHeader[0])); idx++) {
    246         fprintf(stderr, ", %s", assemblyHeader[idx].name);
    247     }
    248     fprintf(stderr,
    249         ")\n");
    250 }
    251 
    252 U_CAPI void U_EXPORT2
    253 writeAssemblyCode(const char *filename, const char *destdir, const char *optEntryPoint, const char *optFilename, char *outFilePath) {
    254     uint32_t column = MAX_COLUMN;
    255     char entry[64];
    256     uint32_t buffer[1024];
    257     char *bufferStr = (char *)buffer;
    258     FileStream *in, *out;
    259     size_t i, length;
    260 
    261     in=T_FileStream_open(filename, "rb");
    262     if(in==NULL) {
    263         fprintf(stderr, "genccode: unable to open input file %s\n", filename);
    264         exit(U_FILE_ACCESS_ERROR);
    265     }
    266 
    267     getOutFilename(filename, destdir, bufferStr, entry, ".s", optFilename);
    268     out=T_FileStream_open(bufferStr, "w");
    269     if(out==NULL) {
    270         fprintf(stderr, "genccode: unable to open output file %s\n", bufferStr);
    271         exit(U_FILE_ACCESS_ERROR);
    272     }
    273 
    274     if (outFilePath != NULL) {
    275         uprv_strcpy(outFilePath, bufferStr);
    276     }
    277 
    278 #ifdef WINDOWS_WITH_GNUC
    279     /* Need to fix the file seperator character when using MinGW. */
    280     swapFileSepChar(outFilePath, U_FILE_SEP_CHAR, '/');
    281 #endif
    282 
    283     if(optEntryPoint != NULL) {
    284         uprv_strcpy(entry, optEntryPoint);
    285         uprv_strcat(entry, "_dat");
    286     }
    287 
    288     /* turn dashes or dots in the entry name into underscores */
    289     length=uprv_strlen(entry);
    290     for(i=0; i<length; ++i) {
    291         if(entry[i]=='-' || entry[i]=='.') {
    292             entry[i]='_';
    293         }
    294     }
    295 
    296     sprintf(bufferStr, assemblyHeader[assemblyHeaderIndex].header,
    297         entry, entry, entry, entry,
    298         entry, entry, entry, entry);
    299     T_FileStream_writeLine(out, bufferStr);
    300     T_FileStream_writeLine(out, assemblyHeader[assemblyHeaderIndex].beginLine);
    301 
    302     for(;;) {
    303         length=T_FileStream_read(in, buffer, sizeof(buffer));
    304         if(length==0) {
    305             break;
    306         }
    307         if (length != sizeof(buffer)) {
    308             /* pad with extra 0's when at the end of the file */
    309             for(i=0; i < (length % sizeof(uint32_t)); ++i) {
    310                 buffer[length+i] = 0;
    311             }
    312         }
    313         for(i=0; i<(length/sizeof(buffer[0])); i++) {
    314             column = write32(out, buffer[i], column);
    315         }
    316     }
    317 
    318     T_FileStream_writeLine(out, "\n");
    319 
    320     sprintf(bufferStr, assemblyHeader[assemblyHeaderIndex].footer,
    321         entry, entry, entry, entry,
    322         entry, entry, entry, entry);
    323     T_FileStream_writeLine(out, bufferStr);
    324 
    325     if(T_FileStream_error(in)) {
    326         fprintf(stderr, "genccode: file read error while generating from file %s\n", filename);
    327         exit(U_FILE_ACCESS_ERROR);
    328     }
    329 
    330     if(T_FileStream_error(out)) {
    331         fprintf(stderr, "genccode: file write error while generating from file %s\n", filename);
    332         exit(U_FILE_ACCESS_ERROR);
    333     }
    334 
    335     T_FileStream_close(out);
    336     T_FileStream_close(in);
    337 }
    338 
    339 U_CAPI void U_EXPORT2
    340 writeCCode(const char *filename, const char *destdir, const char *optName, const char *optFilename, char *outFilePath) {
    341     uint32_t column = MAX_COLUMN;
    342     char buffer[4096], entry[64];
    343     FileStream *in, *out;
    344     size_t i, length;
    345 
    346     in=T_FileStream_open(filename, "rb");
    347     if(in==NULL) {
    348         fprintf(stderr, "genccode: unable to open input file %s\n", filename);
    349         exit(U_FILE_ACCESS_ERROR);
    350     }
    351 
    352     if(optName != NULL) { /* prepend  'icudt28_' */
    353       strcpy(entry, optName);
    354       strcat(entry, "_");
    355     } else {
    356       entry[0] = 0;
    357     }
    358 
    359     getOutFilename(filename, destdir, buffer, entry+uprv_strlen(entry), ".c", optFilename);
    360     if (outFilePath != NULL) {
    361         uprv_strcpy(outFilePath, buffer);
    362     }
    363     out=T_FileStream_open(buffer, "w");
    364     if(out==NULL) {
    365         fprintf(stderr, "genccode: unable to open output file %s\n", buffer);
    366         exit(U_FILE_ACCESS_ERROR);
    367     }
    368 
    369     /* turn dashes or dots in the entry name into underscores */
    370     length=uprv_strlen(entry);
    371     for(i=0; i<length; ++i) {
    372         if(entry[i]=='-' || entry[i]=='.') {
    373             entry[i]='_';
    374         }
    375     }
    376 
    377 #if U_PLATFORM == U_PF_OS400
    378     /*
    379     TODO: Fix this once the compiler implements this feature. Keep in sync with udatamem.c
    380 
    381     This is here because this platform can't currently put
    382     const data into the read-only pages of an object or
    383     shared library (service program). Only strings are allowed in read-only
    384     pages, so we use char * strings to store the data.
    385 
    386     In order to prevent the beginning of the data from ever matching the
    387     magic numbers we must still use the initial double.
    388     [grhoten 4/24/2003]
    389     */
    390     sprintf(buffer,
    391         "#ifndef IN_GENERATED_CCODE\n"
    392         "#define IN_GENERATED_CCODE\n"
    393         "#define U_DISABLE_RENAMING 1\n"
    394         "#include \"unicode/umachine.h\"\n"
    395         "#endif\n"
    396         "U_CDECL_BEGIN\n"
    397         "const struct {\n"
    398         "    double bogus;\n"
    399         "    const char *bytes; \n"
    400         "} %s={ 0.0, \n",
    401         entry);
    402     T_FileStream_writeLine(out, buffer);
    403 
    404     for(;;) {
    405         length=T_FileStream_read(in, buffer, sizeof(buffer));
    406         if(length==0) {
    407             break;
    408         }
    409         for(i=0; i<length; ++i) {
    410             column = write8str(out, (uint8_t)buffer[i], column);
    411         }
    412     }
    413 
    414     T_FileStream_writeLine(out, "\"\n};\nU_CDECL_END\n");
    415 #else
    416     /* Function renaming shouldn't be done in data */
    417     sprintf(buffer,
    418         "#ifndef IN_GENERATED_CCODE\n"
    419         "#define IN_GENERATED_CCODE\n"
    420         "#define U_DISABLE_RENAMING 1\n"
    421         "#include \"unicode/umachine.h\"\n"
    422         "#endif\n"
    423         "U_CDECL_BEGIN\n"
    424         "const struct {\n"
    425         "    double bogus;\n"
    426         "    uint8_t bytes[%ld]; \n"
    427         "} %s={ 0.0, {\n",
    428         (long)T_FileStream_size(in), entry);
    429     T_FileStream_writeLine(out, buffer);
    430 
    431     for(;;) {
    432         length=T_FileStream_read(in, buffer, sizeof(buffer));
    433         if(length==0) {
    434             break;
    435         }
    436         for(i=0; i<length; ++i) {
    437             column = write8(out, (uint8_t)buffer[i], column);
    438         }
    439     }
    440 
    441     T_FileStream_writeLine(out, "\n}\n};\nU_CDECL_END\n");
    442 #endif
    443 
    444     if(T_FileStream_error(in)) {
    445         fprintf(stderr, "genccode: file read error while generating from file %s\n", filename);
    446         exit(U_FILE_ACCESS_ERROR);
    447     }
    448 
    449     if(T_FileStream_error(out)) {
    450         fprintf(stderr, "genccode: file write error while generating from file %s\n", filename);
    451         exit(U_FILE_ACCESS_ERROR);
    452     }
    453 
    454     T_FileStream_close(out);
    455     T_FileStream_close(in);
    456 }
    457 
    458 static uint32_t
    459 write32(FileStream *out, uint32_t bitField, uint32_t column) {
    460     int32_t i;
    461     char bitFieldStr[64]; /* This is more bits than needed for a 32-bit number */
    462     char *s = bitFieldStr;
    463     uint8_t *ptrIdx = (uint8_t *)&bitField;
    464     static const char hexToStr[16] = {
    465         '0','1','2','3',
    466         '4','5','6','7',
    467         '8','9','A','B',
    468         'C','D','E','F'
    469     };
    470 
    471     /* write the value, possibly with comma and newline */
    472     if(column==MAX_COLUMN) {
    473         /* first byte */
    474         column=1;
    475     } else if(column<32) {
    476         *(s++)=',';
    477         ++column;
    478     } else {
    479         *(s++)='\n';
    480         uprv_strcpy(s, assemblyHeader[assemblyHeaderIndex].beginLine);
    481         s+=uprv_strlen(s);
    482         column=1;
    483     }
    484 
    485     if (bitField < 10) {
    486         /* It's a small number. Don't waste the space for 0x */
    487         *(s++)=hexToStr[bitField];
    488     }
    489     else {
    490         int seenNonZero = 0; /* This is used to remove leading zeros */
    491 
    492         if(hexType==HEX_0X) {
    493          *(s++)='0';
    494          *(s++)='x';
    495         } else if(hexType==HEX_0H) {
    496          *(s++)='0';
    497         }
    498 
    499         /* This creates a 32-bit field */
    500 #if U_IS_BIG_ENDIAN
    501         for (i = 0; i < sizeof(uint32_t); i++)
    502 #else
    503         for (i = sizeof(uint32_t)-1; i >= 0 ; i--)
    504 #endif
    505         {
    506             uint8_t value = ptrIdx[i];
    507             if (value || seenNonZero) {
    508                 *(s++)=hexToStr[value>>4];
    509                 *(s++)=hexToStr[value&0xF];
    510                 seenNonZero = 1;
    511             }
    512         }
    513         if(hexType==HEX_0H) {
    514          *(s++)='h';
    515         }
    516     }
    517 
    518     *(s++)=0;
    519     T_FileStream_writeLine(out, bitFieldStr);
    520     return column;
    521 }
    522 
    523 static uint32_t
    524 write8(FileStream *out, uint8_t byte, uint32_t column) {
    525     char s[4];
    526     int i=0;
    527 
    528     /* convert the byte value to a string */
    529     if(byte>=100) {
    530         s[i++]=(char)('0'+byte/100);
    531         byte%=100;
    532     }
    533     if(i>0 || byte>=10) {
    534         s[i++]=(char)('0'+byte/10);
    535         byte%=10;
    536     }
    537     s[i++]=(char)('0'+byte);
    538     s[i]=0;
    539 
    540     /* write the value, possibly with comma and newline */
    541     if(column==MAX_COLUMN) {
    542         /* first byte */
    543         column=1;
    544     } else if(column<16) {
    545         T_FileStream_writeLine(out, ",");
    546         ++column;
    547     } else {
    548         T_FileStream_writeLine(out, ",\n");
    549         column=1;
    550     }
    551     T_FileStream_writeLine(out, s);
    552     return column;
    553 }
    554 
    555 #if U_PLATFORM == U_PF_OS400
    556 static uint32_t
    557 write8str(FileStream *out, uint8_t byte, uint32_t column) {
    558     char s[8];
    559 
    560     if (byte > 7)
    561         sprintf(s, "\\x%X", byte);
    562     else
    563         sprintf(s, "\\%X", byte);
    564 
    565     /* write the value, possibly with comma and newline */
    566     if(column==MAX_COLUMN) {
    567         /* first byte */
    568         column=1;
    569         T_FileStream_writeLine(out, "\"");
    570     } else if(column<24) {
    571         ++column;
    572     } else {
    573         T_FileStream_writeLine(out, "\"\n\"");
    574         column=1;
    575     }
    576     T_FileStream_writeLine(out, s);
    577     return column;
    578 }
    579 #endif
    580 
    581 static void
    582 getOutFilename(const char *inFilename, const char *destdir, char *outFilename, char *entryName, const char *newSuffix, const char *optFilename) {
    583     const char *basename=findBasename(inFilename), *suffix=uprv_strrchr(basename, '.');
    584 
    585     /* copy path */
    586     if(destdir!=NULL && *destdir!=0) {
    587         do {
    588             *outFilename++=*destdir++;
    589         } while(*destdir!=0);
    590         if(*(outFilename-1)!=U_FILE_SEP_CHAR) {
    591             *outFilename++=U_FILE_SEP_CHAR;
    592         }
    593         inFilename=basename;
    594     } else {
    595         while(inFilename<basename) {
    596             *outFilename++=*inFilename++;
    597         }
    598     }
    599 
    600     if(suffix==NULL) {
    601         /* the filename does not have a suffix */
    602         uprv_strcpy(entryName, inFilename);
    603         if(optFilename != NULL) {
    604           uprv_strcpy(outFilename, optFilename);
    605         } else {
    606           uprv_strcpy(outFilename, inFilename);
    607         }
    608         uprv_strcat(outFilename, newSuffix);
    609     } else {
    610         char *saveOutFilename = outFilename;
    611         /* copy basename */
    612         while(inFilename<suffix) {
    613             if(*inFilename=='-') {
    614                 /* iSeries cannot have '-' in the .o objects. */
    615                 *outFilename++=*entryName++='_';
    616                 inFilename++;
    617             }
    618             else {
    619                 *outFilename++=*entryName++=*inFilename++;
    620             }
    621         }
    622 
    623         /* replace '.' by '_' */
    624         *outFilename++=*entryName++='_';
    625         ++inFilename;
    626 
    627         /* copy suffix */
    628         while(*inFilename!=0) {
    629             *outFilename++=*entryName++=*inFilename++;
    630         }
    631 
    632         *entryName=0;
    633 
    634         if(optFilename != NULL) {
    635             uprv_strcpy(saveOutFilename, optFilename);
    636             uprv_strcat(saveOutFilename, newSuffix);
    637         } else {
    638             /* add ".c" */
    639             uprv_strcpy(outFilename, newSuffix);
    640         }
    641     }
    642 }
    643 
    644 #ifdef CAN_GENERATE_OBJECTS
    645 static void
    646 getArchitecture(uint16_t *pCPU, uint16_t *pBits, UBool *pIsBigEndian, const char *optMatchArch) {
    647     union {
    648         char        bytes[2048];
    649 #ifdef U_ELF
    650         Elf32_Ehdr  header32;
    651         /* Elf32_Ehdr and ELF64_Ehdr are identical for the necessary fields. */
    652 #elif U_PLATFORM_HAS_WIN32_API
    653         IMAGE_FILE_HEADER header;
    654 #endif
    655     } buffer;
    656 
    657     const char *filename;
    658     FileStream *in;
    659     int32_t length;
    660 
    661 #ifdef U_ELF
    662 
    663 #elif U_PLATFORM_HAS_WIN32_API
    664     const IMAGE_FILE_HEADER *pHeader;
    665 #else
    666 #   error "Unknown platform for CAN_GENERATE_OBJECTS."
    667 #endif
    668 
    669     if(optMatchArch != NULL) {
    670         filename=optMatchArch;
    671     } else {
    672         /* set defaults */
    673 #ifdef U_ELF
    674         /* set EM_386 because elf.h does not provide better defaults */
    675         *pCPU=EM_386;
    676         *pBits=32;
    677         *pIsBigEndian=(UBool)(U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB);
    678 #elif U_PLATFORM_HAS_WIN32_API
    679 /* _M_IA64 should be defined in windows.h */
    680 #   if defined(_M_IA64)
    681         *pCPU=IMAGE_FILE_MACHINE_IA64;
    682 #   elif defined(_M_AMD64)
    683         *pCPU=IMAGE_FILE_MACHINE_AMD64;
    684 #   else
    685         *pCPU=IMAGE_FILE_MACHINE_I386;
    686 #   endif
    687         *pBits= *pCPU==IMAGE_FILE_MACHINE_I386 ? 32 : 64;
    688         *pIsBigEndian=FALSE;
    689 #else
    690 #   error "Unknown platform for CAN_GENERATE_OBJECTS."
    691 #endif
    692         return;
    693     }
    694 
    695     in=T_FileStream_open(filename, "rb");
    696     if(in==NULL) {
    697         fprintf(stderr, "genccode: unable to open match-arch file %s\n", filename);
    698         exit(U_FILE_ACCESS_ERROR);
    699     }
    700     length=T_FileStream_read(in, buffer.bytes, sizeof(buffer.bytes));
    701 
    702 #ifdef U_ELF
    703     if(length<sizeof(Elf32_Ehdr)) {
    704         fprintf(stderr, "genccode: match-arch file %s is too short\n", filename);
    705         exit(U_UNSUPPORTED_ERROR);
    706     }
    707     if(
    708         buffer.header32.e_ident[0]!=ELFMAG0 ||
    709         buffer.header32.e_ident[1]!=ELFMAG1 ||
    710         buffer.header32.e_ident[2]!=ELFMAG2 ||
    711         buffer.header32.e_ident[3]!=ELFMAG3 ||
    712         buffer.header32.e_ident[EI_CLASS]<ELFCLASS32 || buffer.header32.e_ident[EI_CLASS]>ELFCLASS64
    713     ) {
    714         fprintf(stderr, "genccode: match-arch file %s is not an ELF object file, or not supported\n", filename);
    715         exit(U_UNSUPPORTED_ERROR);
    716     }
    717 
    718     *pBits= buffer.header32.e_ident[EI_CLASS]==ELFCLASS32 ? 32 : 64; /* only 32 or 64: see check above */
    719 #ifdef U_ELF64
    720     if(*pBits!=32 && *pBits!=64) {
    721         fprintf(stderr, "genccode: currently only supports 32-bit and 64-bit ELF format\n");
    722         exit(U_UNSUPPORTED_ERROR);
    723     }
    724 #else
    725     if(*pBits!=32) {
    726         fprintf(stderr, "genccode: built with elf.h missing 64-bit definitions\n");
    727         exit(U_UNSUPPORTED_ERROR);
    728     }
    729 #endif
    730 
    731     *pIsBigEndian=(UBool)(buffer.header32.e_ident[EI_DATA]==ELFDATA2MSB);
    732     if(*pIsBigEndian!=U_IS_BIG_ENDIAN) {
    733         fprintf(stderr, "genccode: currently only same-endianness ELF formats are supported\n");
    734         exit(U_UNSUPPORTED_ERROR);
    735     }
    736     /* TODO: Support byte swapping */
    737 
    738     *pCPU=buffer.header32.e_machine;
    739 #elif U_PLATFORM_HAS_WIN32_API
    740     if(length<sizeof(IMAGE_FILE_HEADER)) {
    741         fprintf(stderr, "genccode: match-arch file %s is too short\n", filename);
    742         exit(U_UNSUPPORTED_ERROR);
    743     }
    744     /* TODO: Use buffer.header.  Keep aliasing legal.  */
    745     pHeader=(const IMAGE_FILE_HEADER *)buffer.bytes;
    746     *pCPU=pHeader->Machine;
    747     /*
    748      * The number of bits is implicit with the Machine value.
    749      * *pBits is ignored in the calling code, so this need not be precise.
    750      */
    751     *pBits= *pCPU==IMAGE_FILE_MACHINE_I386 ? 32 : 64;
    752     /* Windows always runs on little-endian CPUs. */
    753     *pIsBigEndian=FALSE;
    754 #else
    755 #   error "Unknown platform for CAN_GENERATE_OBJECTS."
    756 #endif
    757 
    758     T_FileStream_close(in);
    759 }
    760 
    761 U_CAPI void U_EXPORT2
    762 writeObjectCode(const char *filename, const char *destdir, const char *optEntryPoint, const char *optMatchArch, const char *optFilename, char *outFilePath) {
    763     /* common variables */
    764     char buffer[4096], entry[40]={ 0 };
    765     FileStream *in, *out;
    766     const char *newSuffix;
    767     int32_t i, entryLength, length, size, entryOffset=0, entryLengthOffset=0;
    768 
    769     uint16_t cpu, bits;
    770     UBool makeBigEndian;
    771 
    772     /* platform-specific variables and initialization code */
    773 #ifdef U_ELF
    774     /* 32-bit Elf file header */
    775     static Elf32_Ehdr header32={
    776         {
    777             /* e_ident[] */
    778             ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3,
    779             ELFCLASS32,
    780             U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB,
    781             EV_CURRENT /* EI_VERSION */
    782         },
    783         ET_REL,
    784         EM_386,
    785         EV_CURRENT, /* e_version */
    786         0, /* e_entry */
    787         0, /* e_phoff */
    788         (Elf32_Off)sizeof(Elf32_Ehdr), /* e_shoff */
    789         0, /* e_flags */
    790         (Elf32_Half)sizeof(Elf32_Ehdr), /* eh_size */
    791         0, /* e_phentsize */
    792         0, /* e_phnum */
    793         (Elf32_Half)sizeof(Elf32_Shdr), /* e_shentsize */
    794         5, /* e_shnum */
    795         2 /* e_shstrndx */
    796     };
    797 
    798     /* 32-bit Elf section header table */
    799     static Elf32_Shdr sectionHeaders32[5]={
    800         { /* SHN_UNDEF */
    801             0
    802         },
    803         { /* .symtab */
    804             1, /* sh_name */
    805             SHT_SYMTAB,
    806             0, /* sh_flags */
    807             0, /* sh_addr */
    808             (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)), /* sh_offset */
    809             (Elf32_Word)(2*sizeof(Elf32_Sym)), /* sh_size */
    810             3, /* sh_link=sect hdr index of .strtab */
    811             1, /* sh_info=One greater than the symbol table index of the last
    812                 * local symbol (with STB_LOCAL). */
    813             4, /* sh_addralign */
    814             (Elf32_Word)(sizeof(Elf32_Sym)) /* sh_entsize */
    815         },
    816         { /* .shstrtab */
    817             9, /* sh_name */
    818             SHT_STRTAB,
    819             0, /* sh_flags */
    820             0, /* sh_addr */
    821             (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)), /* sh_offset */
    822             40, /* sh_size */
    823             0, /* sh_link */
    824             0, /* sh_info */
    825             1, /* sh_addralign */
    826             0 /* sh_entsize */
    827         },
    828         { /* .strtab */
    829             19, /* sh_name */
    830             SHT_STRTAB,
    831             0, /* sh_flags */
    832             0, /* sh_addr */
    833             (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)+40), /* sh_offset */
    834             (Elf32_Word)sizeof(entry), /* sh_size */
    835             0, /* sh_link */
    836             0, /* sh_info */
    837             1, /* sh_addralign */
    838             0 /* sh_entsize */
    839         },
    840         { /* .rodata */
    841             27, /* sh_name */
    842             SHT_PROGBITS,
    843             SHF_ALLOC, /* sh_flags */
    844             0, /* sh_addr */
    845             (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)+40+sizeof(entry)), /* sh_offset */
    846             0, /* sh_size */
    847             0, /* sh_link */
    848             0, /* sh_info */
    849             16, /* sh_addralign */
    850             0 /* sh_entsize */
    851         }
    852     };
    853 
    854     /* symbol table */
    855     static Elf32_Sym symbols32[2]={
    856         { /* STN_UNDEF */
    857             0
    858         },
    859         { /* data entry point */
    860             1, /* st_name */
    861             0, /* st_value */
    862             0, /* st_size */
    863             ELF64_ST_INFO(STB_GLOBAL, STT_OBJECT),
    864             0, /* st_other */
    865             4 /* st_shndx=index of related section table entry */
    866         }
    867     };
    868 
    869     /* section header string table, with decimal string offsets */
    870     static const char sectionStrings[40]=
    871         /*  0 */ "\0"
    872         /*  1 */ ".symtab\0"
    873         /*  9 */ ".shstrtab\0"
    874         /* 19 */ ".strtab\0"
    875         /* 27 */ ".rodata\0"
    876         /* 35 */ "\0\0\0\0"; /* contains terminating NUL */
    877         /* 40: padded to multiple of 8 bytes */
    878 
    879     /*
    880      * Use entry[] for the string table which will contain only the
    881      * entry point name.
    882      * entry[0] must be 0 (NUL)
    883      * The entry point name can be up to 38 characters long (sizeof(entry)-2).
    884      */
    885 
    886     /* 16-align .rodata in the .o file, just in case */
    887     static const char padding[16]={ 0 };
    888     int32_t paddingSize;
    889 
    890 #ifdef U_ELF64
    891     /* 64-bit Elf file header */
    892     static Elf64_Ehdr header64={
    893         {
    894             /* e_ident[] */
    895             ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3,
    896             ELFCLASS64,
    897             U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB,
    898             EV_CURRENT /* EI_VERSION */
    899         },
    900         ET_REL,
    901         EM_X86_64,
    902         EV_CURRENT, /* e_version */
    903         0, /* e_entry */
    904         0, /* e_phoff */
    905         (Elf64_Off)sizeof(Elf64_Ehdr), /* e_shoff */
    906         0, /* e_flags */
    907         (Elf64_Half)sizeof(Elf64_Ehdr), /* eh_size */
    908         0, /* e_phentsize */
    909         0, /* e_phnum */
    910         (Elf64_Half)sizeof(Elf64_Shdr), /* e_shentsize */
    911         5, /* e_shnum */
    912         2 /* e_shstrndx */
    913     };
    914 
    915     /* 64-bit Elf section header table */
    916     static Elf64_Shdr sectionHeaders64[5]={
    917         { /* SHN_UNDEF */
    918             0
    919         },
    920         { /* .symtab */
    921             1, /* sh_name */
    922             SHT_SYMTAB,
    923             0, /* sh_flags */
    924             0, /* sh_addr */
    925             (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)), /* sh_offset */
    926             (Elf64_Xword)(2*sizeof(Elf64_Sym)), /* sh_size */
    927             3, /* sh_link=sect hdr index of .strtab */
    928             1, /* sh_info=One greater than the symbol table index of the last
    929                 * local symbol (with STB_LOCAL). */
    930             4, /* sh_addralign */
    931             (Elf64_Xword)(sizeof(Elf64_Sym)) /* sh_entsize */
    932         },
    933         { /* .shstrtab */
    934             9, /* sh_name */
    935             SHT_STRTAB,
    936             0, /* sh_flags */
    937             0, /* sh_addr */
    938             (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)), /* sh_offset */
    939             40, /* sh_size */
    940             0, /* sh_link */
    941             0, /* sh_info */
    942             1, /* sh_addralign */
    943             0 /* sh_entsize */
    944         },
    945         { /* .strtab */
    946             19, /* sh_name */
    947             SHT_STRTAB,
    948             0, /* sh_flags */
    949             0, /* sh_addr */
    950             (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)+40), /* sh_offset */
    951             (Elf64_Xword)sizeof(entry), /* sh_size */
    952             0, /* sh_link */
    953             0, /* sh_info */
    954             1, /* sh_addralign */
    955             0 /* sh_entsize */
    956         },
    957         { /* .rodata */
    958             27, /* sh_name */
    959             SHT_PROGBITS,
    960             SHF_ALLOC, /* sh_flags */
    961             0, /* sh_addr */
    962             (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)+40+sizeof(entry)), /* sh_offset */
    963             0, /* sh_size */
    964             0, /* sh_link */
    965             0, /* sh_info */
    966             16, /* sh_addralign */
    967             0 /* sh_entsize */
    968         }
    969     };
    970 
    971     /*
    972      * 64-bit symbol table
    973      * careful: different order of items compared with Elf32_sym!
    974      */
    975     static Elf64_Sym symbols64[2]={
    976         { /* STN_UNDEF */
    977             0
    978         },
    979         { /* data entry point */
    980             1, /* st_name */
    981             ELF64_ST_INFO(STB_GLOBAL, STT_OBJECT),
    982             0, /* st_other */
    983             4, /* st_shndx=index of related section table entry */
    984             0, /* st_value */
    985             0 /* st_size */
    986         }
    987     };
    988 
    989 #endif /* U_ELF64 */
    990 
    991     /* entry[] have a leading NUL */
    992     entryOffset=1;
    993 
    994     /* in the common code, count entryLength from after the NUL */
    995     entryLengthOffset=1;
    996 
    997     newSuffix=".o";
    998 
    999 #elif U_PLATFORM_HAS_WIN32_API
   1000     struct {
   1001         IMAGE_FILE_HEADER fileHeader;
   1002         IMAGE_SECTION_HEADER sections[2];
   1003         char linkerOptions[100];
   1004     } objHeader;
   1005     IMAGE_SYMBOL symbols[1];
   1006     struct {
   1007         DWORD sizeofLongNames;
   1008         char longNames[100];
   1009     } symbolNames;
   1010 
   1011     /*
   1012      * entry sometimes have a leading '_'
   1013      * overwritten if entryOffset==0 depending on the target platform
   1014      * see check for cpu below
   1015      */
   1016     entry[0]='_';
   1017 
   1018     newSuffix=".obj";
   1019 #else
   1020 #   error "Unknown platform for CAN_GENERATE_OBJECTS."
   1021 #endif
   1022 
   1023     /* deal with options, files and the entry point name */
   1024     getArchitecture(&cpu, &bits, &makeBigEndian, optMatchArch);
   1025     printf("genccode: --match-arch cpu=%hu bits=%hu big-endian=%d\n", cpu, bits, makeBigEndian);
   1026 #if U_PLATFORM_HAS_WIN32_API
   1027     if(cpu==IMAGE_FILE_MACHINE_I386) {
   1028         entryOffset=1;
   1029     }
   1030 #endif
   1031 
   1032     in=T_FileStream_open(filename, "rb");
   1033     if(in==NULL) {
   1034         fprintf(stderr, "genccode: unable to open input file %s\n", filename);
   1035         exit(U_FILE_ACCESS_ERROR);
   1036     }
   1037     size=T_FileStream_size(in);
   1038 
   1039     getOutFilename(filename, destdir, buffer, entry+entryOffset, newSuffix, optFilename);
   1040     if (outFilePath != NULL) {
   1041         uprv_strcpy(outFilePath, buffer);
   1042     }
   1043 
   1044     if(optEntryPoint != NULL) {
   1045         uprv_strcpy(entry+entryOffset, optEntryPoint);
   1046         uprv_strcat(entry+entryOffset, "_dat");
   1047     }
   1048     /* turn dashes in the entry name into underscores */
   1049     entryLength=(int32_t)uprv_strlen(entry+entryLengthOffset);
   1050     for(i=0; i<entryLength; ++i) {
   1051         if(entry[entryLengthOffset+i]=='-') {
   1052             entry[entryLengthOffset+i]='_';
   1053         }
   1054     }
   1055 
   1056     /* open the output file */
   1057     out=T_FileStream_open(buffer, "wb");
   1058     if(out==NULL) {
   1059         fprintf(stderr, "genccode: unable to open output file %s\n", buffer);
   1060         exit(U_FILE_ACCESS_ERROR);
   1061     }
   1062 
   1063 #ifdef U_ELF
   1064     if(bits==32) {
   1065         header32.e_ident[EI_DATA]= makeBigEndian ? ELFDATA2MSB : ELFDATA2LSB;
   1066         header32.e_machine=cpu;
   1067 
   1068         /* 16-align .rodata in the .o file, just in case */
   1069         paddingSize=sectionHeaders32[4].sh_offset & 0xf;
   1070         if(paddingSize!=0) {
   1071                 paddingSize=0x10-paddingSize;
   1072                 sectionHeaders32[4].sh_offset+=paddingSize;
   1073         }
   1074 
   1075         sectionHeaders32[4].sh_size=(Elf32_Word)size;
   1076 
   1077         symbols32[1].st_size=(Elf32_Word)size;
   1078 
   1079         /* write .o headers */
   1080         T_FileStream_write(out, &header32, (int32_t)sizeof(header32));
   1081         T_FileStream_write(out, sectionHeaders32, (int32_t)sizeof(sectionHeaders32));
   1082         T_FileStream_write(out, symbols32, (int32_t)sizeof(symbols32));
   1083     } else /* bits==64 */ {
   1084 #ifdef U_ELF64
   1085         header64.e_ident[EI_DATA]= makeBigEndian ? ELFDATA2MSB : ELFDATA2LSB;
   1086         header64.e_machine=cpu;
   1087 
   1088         /* 16-align .rodata in the .o file, just in case */
   1089         paddingSize=sectionHeaders64[4].sh_offset & 0xf;
   1090         if(paddingSize!=0) {
   1091                 paddingSize=0x10-paddingSize;
   1092                 sectionHeaders64[4].sh_offset+=paddingSize;
   1093         }
   1094 
   1095         sectionHeaders64[4].sh_size=(Elf64_Xword)size;
   1096 
   1097         symbols64[1].st_size=(Elf64_Xword)size;
   1098 
   1099         /* write .o headers */
   1100         T_FileStream_write(out, &header64, (int32_t)sizeof(header64));
   1101         T_FileStream_write(out, sectionHeaders64, (int32_t)sizeof(sectionHeaders64));
   1102         T_FileStream_write(out, symbols64, (int32_t)sizeof(symbols64));
   1103 #endif
   1104     }
   1105 
   1106     T_FileStream_write(out, sectionStrings, (int32_t)sizeof(sectionStrings));
   1107     T_FileStream_write(out, entry, (int32_t)sizeof(entry));
   1108     if(paddingSize!=0) {
   1109         T_FileStream_write(out, padding, paddingSize);
   1110     }
   1111 #elif U_PLATFORM_HAS_WIN32_API
   1112     /* populate the .obj headers */
   1113     uprv_memset(&objHeader, 0, sizeof(objHeader));
   1114     uprv_memset(&symbols, 0, sizeof(symbols));
   1115     uprv_memset(&symbolNames, 0, sizeof(symbolNames));
   1116 
   1117     /* write the linker export directive */
   1118     uprv_strcpy(objHeader.linkerOptions, "-export:");
   1119     length=8;
   1120     uprv_strcpy(objHeader.linkerOptions+length, entry);
   1121     length+=entryLength;
   1122     uprv_strcpy(objHeader.linkerOptions+length, ",data ");
   1123     length+=6;
   1124 
   1125     /* set the file header */
   1126     objHeader.fileHeader.Machine=cpu;
   1127     objHeader.fileHeader.NumberOfSections=2;
   1128     objHeader.fileHeader.TimeDateStamp=(DWORD)time(NULL);
   1129     objHeader.fileHeader.PointerToSymbolTable=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER+length+size; /* start of symbol table */
   1130     objHeader.fileHeader.NumberOfSymbols=1;
   1131 
   1132     /* set the section for the linker options */
   1133     uprv_strncpy((char *)objHeader.sections[0].Name, ".drectve", 8);
   1134     objHeader.sections[0].SizeOfRawData=length;
   1135     objHeader.sections[0].PointerToRawData=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER;
   1136     objHeader.sections[0].Characteristics=IMAGE_SCN_LNK_INFO|IMAGE_SCN_LNK_REMOVE|IMAGE_SCN_ALIGN_1BYTES;
   1137 
   1138     /* set the data section */
   1139     uprv_strncpy((char *)objHeader.sections[1].Name, ".rdata", 6);
   1140     objHeader.sections[1].SizeOfRawData=size;
   1141     objHeader.sections[1].PointerToRawData=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER+length;
   1142     objHeader.sections[1].Characteristics=IMAGE_SCN_CNT_INITIALIZED_DATA|IMAGE_SCN_ALIGN_16BYTES|IMAGE_SCN_MEM_READ;
   1143 
   1144     /* set the symbol table */
   1145     if(entryLength<=8) {
   1146         uprv_strncpy((char *)symbols[0].N.ShortName, entry, entryLength);
   1147         symbolNames.sizeofLongNames=4;
   1148     } else {
   1149         symbols[0].N.Name.Short=0;
   1150         symbols[0].N.Name.Long=4;
   1151         symbolNames.sizeofLongNames=4+entryLength+1;
   1152         uprv_strcpy(symbolNames.longNames, entry);
   1153     }
   1154     symbols[0].SectionNumber=2;
   1155     symbols[0].StorageClass=IMAGE_SYM_CLASS_EXTERNAL;
   1156 
   1157     /* write the file header and the linker options section */
   1158     T_FileStream_write(out, &objHeader, objHeader.sections[1].PointerToRawData);
   1159 #else
   1160 #   error "Unknown platform for CAN_GENERATE_OBJECTS."
   1161 #endif
   1162 
   1163     /* copy the data file into section 2 */
   1164     for(;;) {
   1165         length=T_FileStream_read(in, buffer, sizeof(buffer));
   1166         if(length==0) {
   1167             break;
   1168         }
   1169         T_FileStream_write(out, buffer, (int32_t)length);
   1170     }
   1171 
   1172 #if U_PLATFORM_HAS_WIN32_API
   1173     /* write the symbol table */
   1174     T_FileStream_write(out, symbols, IMAGE_SIZEOF_SYMBOL);
   1175     T_FileStream_write(out, &symbolNames, symbolNames.sizeofLongNames);
   1176 #endif
   1177 
   1178     if(T_FileStream_error(in)) {
   1179         fprintf(stderr, "genccode: file read error while generating from file %s\n", filename);
   1180         exit(U_FILE_ACCESS_ERROR);
   1181     }
   1182 
   1183     if(T_FileStream_error(out)) {
   1184         fprintf(stderr, "genccode: file write error while generating from file %s\n", filename);
   1185         exit(U_FILE_ACCESS_ERROR);
   1186     }
   1187 
   1188     T_FileStream_close(out);
   1189     T_FileStream_close(in);
   1190 }
   1191 #endif
   1192