Home | History | Annotate | Download | only in toolutil
      1 /******************************************************************************
      2  *   Copyright (C) 2009-2013, International Business Machines
      3  *   Corporation and others.  All Rights Reserved.
      4  *******************************************************************************
      5  */
      6 #include "unicode/utypes.h"
      7 
      8 #if U_PLATFORM_HAS_WIN32_API
      9 #   define VC_EXTRALEAN
     10 #   define WIN32_LEAN_AND_MEAN
     11 #   define NOUSER
     12 #   define NOSERVICE
     13 #   define NOIME
     14 #   define NOMCX
     15 #include <windows.h>
     16 #include <time.h>
     17 #   ifdef __GNUC__
     18 #       define WINDOWS_WITH_GNUC
     19 #   endif
     20 #endif
     21 
     22 #if U_PLATFORM_IS_LINUX_BASED && U_HAVE_ELF_H
     23 #   define U_ELF
     24 #endif
     25 
     26 #ifdef U_ELF
     27 #   include <elf.h>
     28 #   if defined(ELFCLASS64)
     29 #       define U_ELF64
     30 #   endif
     31     /* Old elf.h headers may not have EM_X86_64, or have EM_X8664 instead. */
     32 #   ifndef EM_X86_64
     33 #       define EM_X86_64 62
     34 #   endif
     35 #   define ICU_ENTRY_OFFSET 0
     36 #endif
     37 
     38 #include <stdio.h>
     39 #include <stdlib.h>
     40 #include "unicode/putil.h"
     41 #include "cmemory.h"
     42 #include "cstring.h"
     43 #include "filestrm.h"
     44 #include "toolutil.h"
     45 #include "unicode/uclean.h"
     46 #include "uoptions.h"
     47 #include "pkg_genc.h"
     48 
     49 #define MAX_COLUMN ((uint32_t)(0xFFFFFFFFU))
     50 
     51 #define HEX_0X 0 /*  0x1234 */
     52 #define HEX_0H 1 /*  01234h */
     53 
     54 /* prototypes --------------------------------------------------------------- */
     55 static void
     56 getOutFilename(const char *inFilename, const char *destdir, char *outFilename, char *entryName, const char *newSuffix, const char *optFilename);
     57 
     58 static uint32_t
     59 write8(FileStream *out, uint8_t byte, uint32_t column);
     60 
     61 static uint32_t
     62 write32(FileStream *out, uint32_t byte, uint32_t column);
     63 
     64 #if U_PLATFORM == U_PF_OS400
     65 static uint32_t
     66 write8str(FileStream *out, uint8_t byte, uint32_t column);
     67 #endif
     68 /* -------------------------------------------------------------------------- */
     69 
     70 /*
     71 Creating Template Files for New Platforms
     72 
     73 Let the cc compiler help you get started.
     74 Compile this program
     75     const unsigned int x[5] = {1, 2, 0xdeadbeef, 0xffffffff, 16};
     76 with the -S option to produce assembly output.
     77 
     78 For example, this will generate array.s:
     79 gcc -S array.c
     80 
     81 This will produce a .s file that may look like this:
     82 
     83     .file   "array.c"
     84     .version        "01.01"
     85 gcc2_compiled.:
     86     .globl x
     87     .section        .rodata
     88     .align 4
     89     .type    x,@object
     90     .size    x,20
     91 x:
     92     .long   1
     93     .long   2
     94     .long   -559038737
     95     .long   -1
     96     .long   16
     97     .ident  "GCC: (GNU) 2.96 20000731 (Red Hat Linux 7.1 2.96-85)"
     98 
     99 which gives a starting point that will compile, and can be transformed
    100 to become the template, generally with some consulting of as docs and
    101 some experimentation.
    102 
    103 If you want ICU to automatically use this assembly, you should
    104 specify "GENCCODE_ASSEMBLY=-a name" in the specific config/mh-* file,
    105 where the name is the compiler or platform that you used in this
    106 assemblyHeader data structure.
    107 */
    108 static const struct AssemblyType {
    109     const char *name;
    110     const char *header;
    111     const char *beginLine;
    112     const char *footer;
    113     int8_t      hexType; /* HEX_0X or HEX_0h */
    114 } assemblyHeader[] = {
    115     // For gcc assemblers, the meaning of .align changes depending on the
    116     // hardware, so we use .balign 16 which always means 16 bytes.
    117     // https://sourceware.org/binutils/docs/as/Pseudo-Ops.html
    118     {"gcc",
    119         ".globl %s\n"
    120         "\t.section .note.GNU-stack,\"\",%%progbits\n"
    121         "\t.section .rodata\n"
    122         "\t.balign 16\n"
    123         /* The 3 lines below are added for Chrome. */
    124         "#ifdef U_HIDE_DATA_SYMBOL\n"
    125         "\t.hidden %s\n"
    126         "#endif\n"
    127         "\t.type %s,%%object\n"
    128         "%s:\n\n",
    129 
    130         ".long ","",HEX_0X
    131     },
    132     {"gcc-darwin",
    133         /*"\t.section __TEXT,__text,regular,pure_instructions\n"
    134         "\t.section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32\n"*/
    135         ".globl _%s\n"
    136         /* The 3 lines below are added for Chrome. */
    137         "#ifdef U_HIDE_DATA_SYMBOL\n"
    138         "\t.private_extern _%s\n"
    139         "#endif\n"
    140         "\t.data\n"
    141         "\t.const\n"
    142         "\t.balign 16\n"
    143         "_%s:\n\n",
    144 
    145         ".long ","",HEX_0X
    146     },
    147     {"gcc-cygwin",
    148         ".globl _%s\n"
    149         "\t.section .rodata\n"
    150         "\t.balign 16\n"
    151         "_%s:\n\n",
    152 
    153         ".long ","",HEX_0X
    154     },
    155     {"gcc-mingw64",
    156         ".globl %s\n"
    157         "\t.section .rodata\n"
    158         "\t.balign 16\n"
    159         "%s:\n\n",
    160 
    161         ".long ","",HEX_0X
    162     },
    163 // 16 bytes alignment.
    164 // http://docs.oracle.com/cd/E19641-01/802-1947/802-1947.pdf
    165     {"sun",
    166         "\t.section \".rodata\"\n"
    167         "\t.align   16\n"
    168         ".globl     %s\n"
    169         "%s:\n",
    170 
    171         ".word ","",HEX_0X
    172     },
    173 // 16 bytes alignment for sun-x86.
    174 // http://docs.oracle.com/cd/E19963-01/html/821-1608/eoiyg.html
    175     {"sun-x86",
    176         "Drodata.rodata:\n"
    177         "\t.type   Drodata.rodata,@object\n"
    178         "\t.size   Drodata.rodata,0\n"
    179         "\t.globl  %s\n"
    180         "\t.align  16\n"
    181         "%s:\n",
    182 
    183         ".4byte ","",HEX_0X
    184     },
    185 // 1<<4 bit alignment for aix.
    186 // http://pic.dhe.ibm.com/infocenter/aix/v6r1/index.jsp?topic=%2Fcom.ibm.aix.aixassem%2Fdoc%2Falangref%2Fidalangref_csect_pseudoop.htm
    187     {"xlc",
    188         ".globl %s{RO}\n"
    189         "\t.toc\n"
    190         "%s:\n"
    191         "\t.csect %s{RO}, 4\n",
    192 
    193         ".long ","",HEX_0X
    194     },
    195     {"aCC-ia64",
    196         "\t.file   \"%s.s\"\n"
    197         "\t.type   %s,@object\n"
    198         "\t.global %s\n"
    199         "\t.secalias .abe$0.rodata, \".rodata\"\n"
    200         "\t.section .abe$0.rodata = \"a\", \"progbits\"\n"
    201         "\t.align  16\n"
    202         "%s::\t",
    203 
    204         "data4 ","",HEX_0X
    205     },
    206     {"aCC-parisc",
    207         "\t.SPACE  $TEXT$\n"
    208         "\t.SUBSPA $LIT$\n"
    209         "%s\n"
    210         "\t.EXPORT %s\n"
    211         "\t.ALIGN  16\n",
    212 
    213         ".WORD ","",HEX_0X
    214     },
    215 // align 16 bytes
    216 //  http://msdn.microsoft.com/en-us/library/dwa9fwef.aspx
    217     { "masm",
    218       "\tTITLE %s\n"
    219       "; generated by genccode\n"
    220       ".386\n"
    221       ".model flat\n"
    222       "\tPUBLIC _%s\n"
    223       "ICUDATA_%s\tSEGMENT READONLY PARA PUBLIC FLAT 'DATA'\n"
    224       "\tALIGN 16\n"
    225       "_%s\tLABEL DWORD\n",
    226       "\tDWORD ","\nICUDATA_%s\tENDS\n\tEND\n",HEX_0H
    227     }
    228 };
    229 
    230 static int32_t assemblyHeaderIndex = -1;
    231 static int32_t hexType = HEX_0X;
    232 
    233 U_CAPI UBool U_EXPORT2
    234 checkAssemblyHeaderName(const char* optAssembly) {
    235     int32_t idx;
    236     assemblyHeaderIndex = -1;
    237     for (idx = 0; idx < (int32_t)(sizeof(assemblyHeader)/sizeof(assemblyHeader[0])); idx++) {
    238         if (uprv_strcmp(optAssembly, assemblyHeader[idx].name) == 0) {
    239             assemblyHeaderIndex = idx;
    240             hexType = assemblyHeader[idx].hexType; /* set the hex type */
    241             return TRUE;
    242         }
    243     }
    244 
    245     return FALSE;
    246 }
    247 
    248 
    249 U_CAPI void U_EXPORT2
    250 printAssemblyHeadersToStdErr(void) {
    251     int32_t idx;
    252     fprintf(stderr, "%s", assemblyHeader[0].name);
    253     for (idx = 1; idx < (int32_t)(sizeof(assemblyHeader)/sizeof(assemblyHeader[0])); idx++) {
    254         fprintf(stderr, ", %s", assemblyHeader[idx].name);
    255     }
    256     fprintf(stderr,
    257         ")\n");
    258 }
    259 
    260 U_CAPI void U_EXPORT2
    261 writeAssemblyCode(const char *filename, const char *destdir, const char *optEntryPoint, const char *optFilename, char *outFilePath) {
    262     uint32_t column = MAX_COLUMN;
    263     char entry[64];
    264     uint32_t buffer[1024];
    265     char *bufferStr = (char *)buffer;
    266     FileStream *in, *out;
    267     size_t i, length;
    268 
    269     in=T_FileStream_open(filename, "rb");
    270     if(in==NULL) {
    271         fprintf(stderr, "genccode: unable to open input file %s\n", filename);
    272         exit(U_FILE_ACCESS_ERROR);
    273     }
    274 
    275     getOutFilename(filename, destdir, bufferStr, entry, ".S", optFilename);
    276     out=T_FileStream_open(bufferStr, "w");
    277     if(out==NULL) {
    278         fprintf(stderr, "genccode: unable to open output file %s\n", bufferStr);
    279         exit(U_FILE_ACCESS_ERROR);
    280     }
    281 
    282     if (outFilePath != NULL) {
    283         uprv_strcpy(outFilePath, bufferStr);
    284     }
    285 
    286 #ifdef WINDOWS_WITH_GNUC
    287     /* Need to fix the file seperator character when using MinGW. */
    288     swapFileSepChar(outFilePath, U_FILE_SEP_CHAR, '/');
    289 #endif
    290 
    291     if(optEntryPoint != NULL) {
    292         uprv_strcpy(entry, optEntryPoint);
    293         uprv_strcat(entry, "_dat");
    294     }
    295 
    296     /* turn dashes or dots in the entry name into underscores */
    297     length=uprv_strlen(entry);
    298     for(i=0; i<length; ++i) {
    299         if(entry[i]=='-' || entry[i]=='.') {
    300             entry[i]='_';
    301         }
    302     }
    303 
    304     sprintf(bufferStr, assemblyHeader[assemblyHeaderIndex].header,
    305         entry, entry, entry, entry,
    306         entry, entry, entry, entry);
    307     T_FileStream_writeLine(out, bufferStr);
    308     T_FileStream_writeLine(out, assemblyHeader[assemblyHeaderIndex].beginLine);
    309 
    310     for(;;) {
    311         length=T_FileStream_read(in, buffer, sizeof(buffer));
    312         if(length==0) {
    313             break;
    314         }
    315         if (length != sizeof(buffer)) {
    316             /* pad with extra 0's when at the end of the file */
    317             for(i=0; i < (length % sizeof(uint32_t)); ++i) {
    318                 buffer[length+i] = 0;
    319             }
    320         }
    321         for(i=0; i<(length/sizeof(buffer[0])); i++) {
    322             column = write32(out, buffer[i], column);
    323         }
    324     }
    325 
    326     T_FileStream_writeLine(out, "\n");
    327 
    328     sprintf(bufferStr, assemblyHeader[assemblyHeaderIndex].footer,
    329         entry, entry, entry, entry,
    330         entry, entry, entry, entry);
    331     T_FileStream_writeLine(out, bufferStr);
    332 
    333     if(T_FileStream_error(in)) {
    334         fprintf(stderr, "genccode: file read error while generating from file %s\n", filename);
    335         exit(U_FILE_ACCESS_ERROR);
    336     }
    337 
    338     if(T_FileStream_error(out)) {
    339         fprintf(stderr, "genccode: file write error while generating from file %s\n", filename);
    340         exit(U_FILE_ACCESS_ERROR);
    341     }
    342 
    343     T_FileStream_close(out);
    344     T_FileStream_close(in);
    345 }
    346 
    347 U_CAPI void U_EXPORT2
    348 writeCCode(const char *filename, const char *destdir, const char *optName, const char *optFilename, char *outFilePath) {
    349     uint32_t column = MAX_COLUMN;
    350     char buffer[4096], entry[64];
    351     FileStream *in, *out;
    352     size_t i, length;
    353 
    354     in=T_FileStream_open(filename, "rb");
    355     if(in==NULL) {
    356         fprintf(stderr, "genccode: unable to open input file %s\n", filename);
    357         exit(U_FILE_ACCESS_ERROR);
    358     }
    359 
    360     if(optName != NULL) { /* prepend  'icudt28_' */
    361       strcpy(entry, optName);
    362       strcat(entry, "_");
    363     } else {
    364       entry[0] = 0;
    365     }
    366 
    367     getOutFilename(filename, destdir, buffer, entry+uprv_strlen(entry), ".c", optFilename);
    368     if (outFilePath != NULL) {
    369         uprv_strcpy(outFilePath, buffer);
    370     }
    371     out=T_FileStream_open(buffer, "w");
    372     if(out==NULL) {
    373         fprintf(stderr, "genccode: unable to open output file %s\n", buffer);
    374         exit(U_FILE_ACCESS_ERROR);
    375     }
    376 
    377     /* turn dashes or dots in the entry name into underscores */
    378     length=uprv_strlen(entry);
    379     for(i=0; i<length; ++i) {
    380         if(entry[i]=='-' || entry[i]=='.') {
    381             entry[i]='_';
    382         }
    383     }
    384 
    385 #if U_PLATFORM == U_PF_OS400
    386     /*
    387     TODO: Fix this once the compiler implements this feature. Keep in sync with udatamem.c
    388 
    389     This is here because this platform can't currently put
    390     const data into the read-only pages of an object or
    391     shared library (service program). Only strings are allowed in read-only
    392     pages, so we use char * strings to store the data.
    393 
    394     In order to prevent the beginning of the data from ever matching the
    395     magic numbers we must still use the initial double.
    396     [grhoten 4/24/2003]
    397     */
    398     sprintf(buffer,
    399         "#ifndef IN_GENERATED_CCODE\n"
    400         "#define IN_GENERATED_CCODE\n"
    401         "#define U_DISABLE_RENAMING 1\n"
    402         "#include \"unicode/umachine.h\"\n"
    403         "#endif\n"
    404         "U_CDECL_BEGIN\n"
    405         "const struct {\n"
    406         "    double bogus;\n"
    407         "    const char *bytes; \n"
    408         "} %s={ 0.0, \n",
    409         entry);
    410     T_FileStream_writeLine(out, buffer);
    411 
    412     for(;;) {
    413         length=T_FileStream_read(in, buffer, sizeof(buffer));
    414         if(length==0) {
    415             break;
    416         }
    417         for(i=0; i<length; ++i) {
    418             column = write8str(out, (uint8_t)buffer[i], column);
    419         }
    420     }
    421 
    422     T_FileStream_writeLine(out, "\"\n};\nU_CDECL_END\n");
    423 #else
    424     /* Function renaming shouldn't be done in data */
    425     sprintf(buffer,
    426         "#ifndef IN_GENERATED_CCODE\n"
    427         "#define IN_GENERATED_CCODE\n"
    428         "#define U_DISABLE_RENAMING 1\n"
    429         "#include \"unicode/umachine.h\"\n"
    430         "#endif\n"
    431         "U_CDECL_BEGIN\n"
    432         "const struct {\n"
    433         "    double bogus;\n"
    434         "    uint8_t bytes[%ld]; \n"
    435         "} %s={ 0.0, {\n",
    436         (long)T_FileStream_size(in), entry);
    437     T_FileStream_writeLine(out, buffer);
    438 
    439     for(;;) {
    440         length=T_FileStream_read(in, buffer, sizeof(buffer));
    441         if(length==0) {
    442             break;
    443         }
    444         for(i=0; i<length; ++i) {
    445             column = write8(out, (uint8_t)buffer[i], column);
    446         }
    447     }
    448 
    449     T_FileStream_writeLine(out, "\n}\n};\nU_CDECL_END\n");
    450 #endif
    451 
    452     if(T_FileStream_error(in)) {
    453         fprintf(stderr, "genccode: file read error while generating from file %s\n", filename);
    454         exit(U_FILE_ACCESS_ERROR);
    455     }
    456 
    457     if(T_FileStream_error(out)) {
    458         fprintf(stderr, "genccode: file write error while generating from file %s\n", filename);
    459         exit(U_FILE_ACCESS_ERROR);
    460     }
    461 
    462     T_FileStream_close(out);
    463     T_FileStream_close(in);
    464 }
    465 
    466 static uint32_t
    467 write32(FileStream *out, uint32_t bitField, uint32_t column) {
    468     int32_t i;
    469     char bitFieldStr[64]; /* This is more bits than needed for a 32-bit number */
    470     char *s = bitFieldStr;
    471     uint8_t *ptrIdx = (uint8_t *)&bitField;
    472     static const char hexToStr[16] = {
    473         '0','1','2','3',
    474         '4','5','6','7',
    475         '8','9','A','B',
    476         'C','D','E','F'
    477     };
    478 
    479     /* write the value, possibly with comma and newline */
    480     if(column==MAX_COLUMN) {
    481         /* first byte */
    482         column=1;
    483     } else if(column<32) {
    484         *(s++)=',';
    485         ++column;
    486     } else {
    487         *(s++)='\n';
    488         uprv_strcpy(s, assemblyHeader[assemblyHeaderIndex].beginLine);
    489         s+=uprv_strlen(s);
    490         column=1;
    491     }
    492 
    493     if (bitField < 10) {
    494         /* It's a small number. Don't waste the space for 0x */
    495         *(s++)=hexToStr[bitField];
    496     }
    497     else {
    498         int seenNonZero = 0; /* This is used to remove leading zeros */
    499 
    500         if(hexType==HEX_0X) {
    501          *(s++)='0';
    502          *(s++)='x';
    503         } else if(hexType==HEX_0H) {
    504          *(s++)='0';
    505         }
    506 
    507         /* This creates a 32-bit field */
    508 #if U_IS_BIG_ENDIAN
    509         for (i = 0; i < sizeof(uint32_t); i++)
    510 #else
    511         for (i = sizeof(uint32_t)-1; i >= 0 ; i--)
    512 #endif
    513         {
    514             uint8_t value = ptrIdx[i];
    515             if (value || seenNonZero) {
    516                 *(s++)=hexToStr[value>>4];
    517                 *(s++)=hexToStr[value&0xF];
    518                 seenNonZero = 1;
    519             }
    520         }
    521         if(hexType==HEX_0H) {
    522          *(s++)='h';
    523         }
    524     }
    525 
    526     *(s++)=0;
    527     T_FileStream_writeLine(out, bitFieldStr);
    528     return column;
    529 }
    530 
    531 static uint32_t
    532 write8(FileStream *out, uint8_t byte, uint32_t column) {
    533     char s[4];
    534     int i=0;
    535 
    536     /* convert the byte value to a string */
    537     if(byte>=100) {
    538         s[i++]=(char)('0'+byte/100);
    539         byte%=100;
    540     }
    541     if(i>0 || byte>=10) {
    542         s[i++]=(char)('0'+byte/10);
    543         byte%=10;
    544     }
    545     s[i++]=(char)('0'+byte);
    546     s[i]=0;
    547 
    548     /* write the value, possibly with comma and newline */
    549     if(column==MAX_COLUMN) {
    550         /* first byte */
    551         column=1;
    552     } else if(column<16) {
    553         T_FileStream_writeLine(out, ",");
    554         ++column;
    555     } else {
    556         T_FileStream_writeLine(out, ",\n");
    557         column=1;
    558     }
    559     T_FileStream_writeLine(out, s);
    560     return column;
    561 }
    562 
    563 #if U_PLATFORM == U_PF_OS400
    564 static uint32_t
    565 write8str(FileStream *out, uint8_t byte, uint32_t column) {
    566     char s[8];
    567 
    568     if (byte > 7)
    569         sprintf(s, "\\x%X", byte);
    570     else
    571         sprintf(s, "\\%X", byte);
    572 
    573     /* write the value, possibly with comma and newline */
    574     if(column==MAX_COLUMN) {
    575         /* first byte */
    576         column=1;
    577         T_FileStream_writeLine(out, "\"");
    578     } else if(column<24) {
    579         ++column;
    580     } else {
    581         T_FileStream_writeLine(out, "\"\n\"");
    582         column=1;
    583     }
    584     T_FileStream_writeLine(out, s);
    585     return column;
    586 }
    587 #endif
    588 
    589 static void
    590 getOutFilename(const char *inFilename, const char *destdir, char *outFilename, char *entryName, const char *newSuffix, const char *optFilename) {
    591     const char *basename=findBasename(inFilename), *suffix=uprv_strrchr(basename, '.');
    592 
    593     /* copy path */
    594     if(destdir!=NULL && *destdir!=0) {
    595         do {
    596             *outFilename++=*destdir++;
    597         } while(*destdir!=0);
    598         if(*(outFilename-1)!=U_FILE_SEP_CHAR) {
    599             *outFilename++=U_FILE_SEP_CHAR;
    600         }
    601         inFilename=basename;
    602     } else {
    603         while(inFilename<basename) {
    604             *outFilename++=*inFilename++;
    605         }
    606     }
    607 
    608     if(suffix==NULL) {
    609         /* the filename does not have a suffix */
    610         uprv_strcpy(entryName, inFilename);
    611         if(optFilename != NULL) {
    612           uprv_strcpy(outFilename, optFilename);
    613         } else {
    614           uprv_strcpy(outFilename, inFilename);
    615         }
    616         uprv_strcat(outFilename, newSuffix);
    617     } else {
    618         char *saveOutFilename = outFilename;
    619         /* copy basename */
    620         while(inFilename<suffix) {
    621             if(*inFilename=='-') {
    622                 /* iSeries cannot have '-' in the .o objects. */
    623                 *outFilename++=*entryName++='_';
    624                 inFilename++;
    625             }
    626             else {
    627                 *outFilename++=*entryName++=*inFilename++;
    628             }
    629         }
    630 
    631         /* replace '.' by '_' */
    632         *outFilename++=*entryName++='_';
    633         ++inFilename;
    634 
    635         /* copy suffix */
    636         while(*inFilename!=0) {
    637             *outFilename++=*entryName++=*inFilename++;
    638         }
    639 
    640         *entryName=0;
    641 
    642         if(optFilename != NULL) {
    643             uprv_strcpy(saveOutFilename, optFilename);
    644             uprv_strcat(saveOutFilename, newSuffix);
    645         } else {
    646             /* add ".c" */
    647             uprv_strcpy(outFilename, newSuffix);
    648         }
    649     }
    650 }
    651 
    652 #ifdef CAN_GENERATE_OBJECTS
    653 static void
    654 getArchitecture(uint16_t *pCPU, uint16_t *pBits, UBool *pIsBigEndian, const char *optMatchArch) {
    655     union {
    656         char        bytes[2048];
    657 #ifdef U_ELF
    658         Elf32_Ehdr  header32;
    659         /* Elf32_Ehdr and ELF64_Ehdr are identical for the necessary fields. */
    660 #elif U_PLATFORM_HAS_WIN32_API
    661         IMAGE_FILE_HEADER header;
    662 #endif
    663     } buffer;
    664 
    665     const char *filename;
    666     FileStream *in;
    667     int32_t length;
    668 
    669 #ifdef U_ELF
    670 
    671 #elif U_PLATFORM_HAS_WIN32_API
    672     const IMAGE_FILE_HEADER *pHeader;
    673 #else
    674 #   error "Unknown platform for CAN_GENERATE_OBJECTS."
    675 #endif
    676 
    677     if(optMatchArch != NULL) {
    678         filename=optMatchArch;
    679     } else {
    680         /* set defaults */
    681 #ifdef U_ELF
    682         /* set EM_386 because elf.h does not provide better defaults */
    683         *pCPU=EM_386;
    684         *pBits=32;
    685         *pIsBigEndian=(UBool)(U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB);
    686 #elif U_PLATFORM_HAS_WIN32_API
    687 /* _M_IA64 should be defined in windows.h */
    688 #   if defined(_M_IA64)
    689         *pCPU=IMAGE_FILE_MACHINE_IA64;
    690 #   elif defined(_M_AMD64)
    691         *pCPU=IMAGE_FILE_MACHINE_AMD64;
    692 #   else
    693         *pCPU=IMAGE_FILE_MACHINE_I386;
    694 #   endif
    695         *pBits= *pCPU==IMAGE_FILE_MACHINE_I386 ? 32 : 64;
    696         *pIsBigEndian=FALSE;
    697 #else
    698 #   error "Unknown platform for CAN_GENERATE_OBJECTS."
    699 #endif
    700         return;
    701     }
    702 
    703     in=T_FileStream_open(filename, "rb");
    704     if(in==NULL) {
    705         fprintf(stderr, "genccode: unable to open match-arch file %s\n", filename);
    706         exit(U_FILE_ACCESS_ERROR);
    707     }
    708     length=T_FileStream_read(in, buffer.bytes, sizeof(buffer.bytes));
    709 
    710 #ifdef U_ELF
    711     if(length<sizeof(Elf32_Ehdr)) {
    712         fprintf(stderr, "genccode: match-arch file %s is too short\n", filename);
    713         exit(U_UNSUPPORTED_ERROR);
    714     }
    715     if(
    716         buffer.header32.e_ident[0]!=ELFMAG0 ||
    717         buffer.header32.e_ident[1]!=ELFMAG1 ||
    718         buffer.header32.e_ident[2]!=ELFMAG2 ||
    719         buffer.header32.e_ident[3]!=ELFMAG3 ||
    720         buffer.header32.e_ident[EI_CLASS]<ELFCLASS32 || buffer.header32.e_ident[EI_CLASS]>ELFCLASS64
    721     ) {
    722         fprintf(stderr, "genccode: match-arch file %s is not an ELF object file, or not supported\n", filename);
    723         exit(U_UNSUPPORTED_ERROR);
    724     }
    725 
    726     *pBits= buffer.header32.e_ident[EI_CLASS]==ELFCLASS32 ? 32 : 64; /* only 32 or 64: see check above */
    727 #ifdef U_ELF64
    728     if(*pBits!=32 && *pBits!=64) {
    729         fprintf(stderr, "genccode: currently only supports 32-bit and 64-bit ELF format\n");
    730         exit(U_UNSUPPORTED_ERROR);
    731     }
    732 #else
    733     if(*pBits!=32) {
    734         fprintf(stderr, "genccode: built with elf.h missing 64-bit definitions\n");
    735         exit(U_UNSUPPORTED_ERROR);
    736     }
    737 #endif
    738 
    739     *pIsBigEndian=(UBool)(buffer.header32.e_ident[EI_DATA]==ELFDATA2MSB);
    740     if(*pIsBigEndian!=U_IS_BIG_ENDIAN) {
    741         fprintf(stderr, "genccode: currently only same-endianness ELF formats are supported\n");
    742         exit(U_UNSUPPORTED_ERROR);
    743     }
    744     /* TODO: Support byte swapping */
    745 
    746     *pCPU=buffer.header32.e_machine;
    747 #elif U_PLATFORM_HAS_WIN32_API
    748     if(length<sizeof(IMAGE_FILE_HEADER)) {
    749         fprintf(stderr, "genccode: match-arch file %s is too short\n", filename);
    750         exit(U_UNSUPPORTED_ERROR);
    751     }
    752     /* TODO: Use buffer.header.  Keep aliasing legal.  */
    753     pHeader=(const IMAGE_FILE_HEADER *)buffer.bytes;
    754     *pCPU=pHeader->Machine;
    755     /*
    756      * The number of bits is implicit with the Machine value.
    757      * *pBits is ignored in the calling code, so this need not be precise.
    758      */
    759     *pBits= *pCPU==IMAGE_FILE_MACHINE_I386 ? 32 : 64;
    760     /* Windows always runs on little-endian CPUs. */
    761     *pIsBigEndian=FALSE;
    762 #else
    763 #   error "Unknown platform for CAN_GENERATE_OBJECTS."
    764 #endif
    765 
    766     T_FileStream_close(in);
    767 }
    768 
    769 U_CAPI void U_EXPORT2
    770 writeObjectCode(const char *filename, const char *destdir, const char *optEntryPoint, const char *optMatchArch, const char *optFilename, char *outFilePath) {
    771     /* common variables */
    772     char buffer[4096], entry[40]={ 0 };
    773     FileStream *in, *out;
    774     const char *newSuffix;
    775     int32_t i, entryLength, length, size, entryOffset=0, entryLengthOffset=0;
    776 
    777     uint16_t cpu, bits;
    778     UBool makeBigEndian;
    779 
    780     /* platform-specific variables and initialization code */
    781 #ifdef U_ELF
    782     /* 32-bit Elf file header */
    783     static Elf32_Ehdr header32={
    784         {
    785             /* e_ident[] */
    786             ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3,
    787             ELFCLASS32,
    788             U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB,
    789             EV_CURRENT /* EI_VERSION */
    790         },
    791         ET_REL,
    792         EM_386,
    793         EV_CURRENT, /* e_version */
    794         0, /* e_entry */
    795         0, /* e_phoff */
    796         (Elf32_Off)sizeof(Elf32_Ehdr), /* e_shoff */
    797         0, /* e_flags */
    798         (Elf32_Half)sizeof(Elf32_Ehdr), /* eh_size */
    799         0, /* e_phentsize */
    800         0, /* e_phnum */
    801         (Elf32_Half)sizeof(Elf32_Shdr), /* e_shentsize */
    802         5, /* e_shnum */
    803         2 /* e_shstrndx */
    804     };
    805 
    806     /* 32-bit Elf section header table */
    807     static Elf32_Shdr sectionHeaders32[5]={
    808         { /* SHN_UNDEF */
    809             0
    810         },
    811         { /* .symtab */
    812             1, /* sh_name */
    813             SHT_SYMTAB,
    814             0, /* sh_flags */
    815             0, /* sh_addr */
    816             (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)), /* sh_offset */
    817             (Elf32_Word)(2*sizeof(Elf32_Sym)), /* sh_size */
    818             3, /* sh_link=sect hdr index of .strtab */
    819             1, /* sh_info=One greater than the symbol table index of the last
    820                 * local symbol (with STB_LOCAL). */
    821             4, /* sh_addralign */
    822             (Elf32_Word)(sizeof(Elf32_Sym)) /* sh_entsize */
    823         },
    824         { /* .shstrtab */
    825             9, /* sh_name */
    826             SHT_STRTAB,
    827             0, /* sh_flags */
    828             0, /* sh_addr */
    829             (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)), /* sh_offset */
    830             40, /* sh_size */
    831             0, /* sh_link */
    832             0, /* sh_info */
    833             1, /* sh_addralign */
    834             0 /* sh_entsize */
    835         },
    836         { /* .strtab */
    837             19, /* sh_name */
    838             SHT_STRTAB,
    839             0, /* sh_flags */
    840             0, /* sh_addr */
    841             (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)+40), /* sh_offset */
    842             (Elf32_Word)sizeof(entry), /* sh_size */
    843             0, /* sh_link */
    844             0, /* sh_info */
    845             1, /* sh_addralign */
    846             0 /* sh_entsize */
    847         },
    848         { /* .rodata */
    849             27, /* sh_name */
    850             SHT_PROGBITS,
    851             SHF_ALLOC, /* sh_flags */
    852             0, /* sh_addr */
    853             (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)+40+sizeof(entry)), /* sh_offset */
    854             0, /* sh_size */
    855             0, /* sh_link */
    856             0, /* sh_info */
    857             16, /* sh_addralign */
    858             0 /* sh_entsize */
    859         }
    860     };
    861 
    862     /* symbol table */
    863     static Elf32_Sym symbols32[2]={
    864         { /* STN_UNDEF */
    865             0
    866         },
    867         { /* data entry point */
    868             1, /* st_name */
    869             0, /* st_value */
    870             0, /* st_size */
    871             ELF64_ST_INFO(STB_GLOBAL, STT_OBJECT),
    872             0, /* st_other */
    873             4 /* st_shndx=index of related section table entry */
    874         }
    875     };
    876 
    877     /* section header string table, with decimal string offsets */
    878     static const char sectionStrings[40]=
    879         /*  0 */ "\0"
    880         /*  1 */ ".symtab\0"
    881         /*  9 */ ".shstrtab\0"
    882         /* 19 */ ".strtab\0"
    883         /* 27 */ ".rodata\0"
    884         /* 35 */ "\0\0\0\0"; /* contains terminating NUL */
    885         /* 40: padded to multiple of 8 bytes */
    886 
    887     /*
    888      * Use entry[] for the string table which will contain only the
    889      * entry point name.
    890      * entry[0] must be 0 (NUL)
    891      * The entry point name can be up to 38 characters long (sizeof(entry)-2).
    892      */
    893 
    894     /* 16-align .rodata in the .o file, just in case */
    895     static const char padding[16]={ 0 };
    896     int32_t paddingSize;
    897 
    898 #ifdef U_ELF64
    899     /* 64-bit Elf file header */
    900     static Elf64_Ehdr header64={
    901         {
    902             /* e_ident[] */
    903             ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3,
    904             ELFCLASS64,
    905             U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB,
    906             EV_CURRENT /* EI_VERSION */
    907         },
    908         ET_REL,
    909         EM_X86_64,
    910         EV_CURRENT, /* e_version */
    911         0, /* e_entry */
    912         0, /* e_phoff */
    913         (Elf64_Off)sizeof(Elf64_Ehdr), /* e_shoff */
    914         0, /* e_flags */
    915         (Elf64_Half)sizeof(Elf64_Ehdr), /* eh_size */
    916         0, /* e_phentsize */
    917         0, /* e_phnum */
    918         (Elf64_Half)sizeof(Elf64_Shdr), /* e_shentsize */
    919         5, /* e_shnum */
    920         2 /* e_shstrndx */
    921     };
    922 
    923     /* 64-bit Elf section header table */
    924     static Elf64_Shdr sectionHeaders64[5]={
    925         { /* SHN_UNDEF */
    926             0
    927         },
    928         { /* .symtab */
    929             1, /* sh_name */
    930             SHT_SYMTAB,
    931             0, /* sh_flags */
    932             0, /* sh_addr */
    933             (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)), /* sh_offset */
    934             (Elf64_Xword)(2*sizeof(Elf64_Sym)), /* sh_size */
    935             3, /* sh_link=sect hdr index of .strtab */
    936             1, /* sh_info=One greater than the symbol table index of the last
    937                 * local symbol (with STB_LOCAL). */
    938             4, /* sh_addralign */
    939             (Elf64_Xword)(sizeof(Elf64_Sym)) /* sh_entsize */
    940         },
    941         { /* .shstrtab */
    942             9, /* sh_name */
    943             SHT_STRTAB,
    944             0, /* sh_flags */
    945             0, /* sh_addr */
    946             (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)), /* sh_offset */
    947             40, /* sh_size */
    948             0, /* sh_link */
    949             0, /* sh_info */
    950             1, /* sh_addralign */
    951             0 /* sh_entsize */
    952         },
    953         { /* .strtab */
    954             19, /* sh_name */
    955             SHT_STRTAB,
    956             0, /* sh_flags */
    957             0, /* sh_addr */
    958             (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)+40), /* sh_offset */
    959             (Elf64_Xword)sizeof(entry), /* sh_size */
    960             0, /* sh_link */
    961             0, /* sh_info */
    962             1, /* sh_addralign */
    963             0 /* sh_entsize */
    964         },
    965         { /* .rodata */
    966             27, /* sh_name */
    967             SHT_PROGBITS,
    968             SHF_ALLOC, /* sh_flags */
    969             0, /* sh_addr */
    970             (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)+40+sizeof(entry)), /* sh_offset */
    971             0, /* sh_size */
    972             0, /* sh_link */
    973             0, /* sh_info */
    974             16, /* sh_addralign */
    975             0 /* sh_entsize */
    976         }
    977     };
    978 
    979     /*
    980      * 64-bit symbol table
    981      * careful: different order of items compared with Elf32_sym!
    982      */
    983     static Elf64_Sym symbols64[2]={
    984         { /* STN_UNDEF */
    985             0
    986         },
    987         { /* data entry point */
    988             1, /* st_name */
    989             ELF64_ST_INFO(STB_GLOBAL, STT_OBJECT),
    990             0, /* st_other */
    991             4, /* st_shndx=index of related section table entry */
    992             0, /* st_value */
    993             0 /* st_size */
    994         }
    995     };
    996 
    997 #endif /* U_ELF64 */
    998 
    999     /* entry[] have a leading NUL */
   1000     entryOffset=1;
   1001 
   1002     /* in the common code, count entryLength from after the NUL */
   1003     entryLengthOffset=1;
   1004 
   1005     newSuffix=".o";
   1006 
   1007 #elif U_PLATFORM_HAS_WIN32_API
   1008     struct {
   1009         IMAGE_FILE_HEADER fileHeader;
   1010         IMAGE_SECTION_HEADER sections[2];
   1011         char linkerOptions[100];
   1012     } objHeader;
   1013     IMAGE_SYMBOL symbols[1];
   1014     struct {
   1015         DWORD sizeofLongNames;
   1016         char longNames[100];
   1017     } symbolNames;
   1018 
   1019     /*
   1020      * entry sometimes have a leading '_'
   1021      * overwritten if entryOffset==0 depending on the target platform
   1022      * see check for cpu below
   1023      */
   1024     entry[0]='_';
   1025 
   1026     newSuffix=".obj";
   1027 #else
   1028 #   error "Unknown platform for CAN_GENERATE_OBJECTS."
   1029 #endif
   1030 
   1031     /* deal with options, files and the entry point name */
   1032     getArchitecture(&cpu, &bits, &makeBigEndian, optMatchArch);
   1033     printf("genccode: --match-arch cpu=%hu bits=%hu big-endian=%d\n", cpu, bits, makeBigEndian);
   1034 #if U_PLATFORM_HAS_WIN32_API
   1035     if(cpu==IMAGE_FILE_MACHINE_I386) {
   1036         entryOffset=1;
   1037     }
   1038 #endif
   1039 
   1040     in=T_FileStream_open(filename, "rb");
   1041     if(in==NULL) {
   1042         fprintf(stderr, "genccode: unable to open input file %s\n", filename);
   1043         exit(U_FILE_ACCESS_ERROR);
   1044     }
   1045     size=T_FileStream_size(in);
   1046 
   1047     getOutFilename(filename, destdir, buffer, entry+entryOffset, newSuffix, optFilename);
   1048     if (outFilePath != NULL) {
   1049         uprv_strcpy(outFilePath, buffer);
   1050     }
   1051 
   1052     if(optEntryPoint != NULL) {
   1053         uprv_strcpy(entry+entryOffset, optEntryPoint);
   1054         uprv_strcat(entry+entryOffset, "_dat");
   1055     }
   1056     /* turn dashes in the entry name into underscores */
   1057     entryLength=(int32_t)uprv_strlen(entry+entryLengthOffset);
   1058     for(i=0; i<entryLength; ++i) {
   1059         if(entry[entryLengthOffset+i]=='-') {
   1060             entry[entryLengthOffset+i]='_';
   1061         }
   1062     }
   1063 
   1064     /* open the output file */
   1065     out=T_FileStream_open(buffer, "wb");
   1066     if(out==NULL) {
   1067         fprintf(stderr, "genccode: unable to open output file %s\n", buffer);
   1068         exit(U_FILE_ACCESS_ERROR);
   1069     }
   1070 
   1071 #ifdef U_ELF
   1072     if(bits==32) {
   1073         header32.e_ident[EI_DATA]= makeBigEndian ? ELFDATA2MSB : ELFDATA2LSB;
   1074         header32.e_machine=cpu;
   1075 
   1076         /* 16-align .rodata in the .o file, just in case */
   1077         paddingSize=sectionHeaders32[4].sh_offset & 0xf;
   1078         if(paddingSize!=0) {
   1079                 paddingSize=0x10-paddingSize;
   1080                 sectionHeaders32[4].sh_offset+=paddingSize;
   1081         }
   1082 
   1083         sectionHeaders32[4].sh_size=(Elf32_Word)size;
   1084 
   1085         symbols32[1].st_size=(Elf32_Word)size;
   1086 
   1087         /* write .o headers */
   1088         T_FileStream_write(out, &header32, (int32_t)sizeof(header32));
   1089         T_FileStream_write(out, sectionHeaders32, (int32_t)sizeof(sectionHeaders32));
   1090         T_FileStream_write(out, symbols32, (int32_t)sizeof(symbols32));
   1091     } else /* bits==64 */ {
   1092 #ifdef U_ELF64
   1093         header64.e_ident[EI_DATA]= makeBigEndian ? ELFDATA2MSB : ELFDATA2LSB;
   1094         header64.e_machine=cpu;
   1095 
   1096         /* 16-align .rodata in the .o file, just in case */
   1097         paddingSize=sectionHeaders64[4].sh_offset & 0xf;
   1098         if(paddingSize!=0) {
   1099                 paddingSize=0x10-paddingSize;
   1100                 sectionHeaders64[4].sh_offset+=paddingSize;
   1101         }
   1102 
   1103         sectionHeaders64[4].sh_size=(Elf64_Xword)size;
   1104 
   1105         symbols64[1].st_size=(Elf64_Xword)size;
   1106 
   1107         /* write .o headers */
   1108         T_FileStream_write(out, &header64, (int32_t)sizeof(header64));
   1109         T_FileStream_write(out, sectionHeaders64, (int32_t)sizeof(sectionHeaders64));
   1110         T_FileStream_write(out, symbols64, (int32_t)sizeof(symbols64));
   1111 #endif
   1112     }
   1113 
   1114     T_FileStream_write(out, sectionStrings, (int32_t)sizeof(sectionStrings));
   1115     T_FileStream_write(out, entry, (int32_t)sizeof(entry));
   1116     if(paddingSize!=0) {
   1117         T_FileStream_write(out, padding, paddingSize);
   1118     }
   1119 #elif U_PLATFORM_HAS_WIN32_API
   1120     /* populate the .obj headers */
   1121     uprv_memset(&objHeader, 0, sizeof(objHeader));
   1122     uprv_memset(&symbols, 0, sizeof(symbols));
   1123     uprv_memset(&symbolNames, 0, sizeof(symbolNames));
   1124 
   1125     /* write the linker export directive */
   1126     uprv_strcpy(objHeader.linkerOptions, "-export:");
   1127     length=8;
   1128     uprv_strcpy(objHeader.linkerOptions+length, entry);
   1129     length+=entryLength;
   1130     uprv_strcpy(objHeader.linkerOptions+length, ",data ");
   1131     length+=6;
   1132 
   1133     /* set the file header */
   1134     objHeader.fileHeader.Machine=cpu;
   1135     objHeader.fileHeader.NumberOfSections=2;
   1136     objHeader.fileHeader.TimeDateStamp=(DWORD)time(NULL);
   1137     objHeader.fileHeader.PointerToSymbolTable=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER+length+size; /* start of symbol table */
   1138     objHeader.fileHeader.NumberOfSymbols=1;
   1139 
   1140     /* set the section for the linker options */
   1141     uprv_strncpy((char *)objHeader.sections[0].Name, ".drectve", 8);
   1142     objHeader.sections[0].SizeOfRawData=length;
   1143     objHeader.sections[0].PointerToRawData=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER;
   1144     objHeader.sections[0].Characteristics=IMAGE_SCN_LNK_INFO|IMAGE_SCN_LNK_REMOVE|IMAGE_SCN_ALIGN_1BYTES;
   1145 
   1146     /* set the data section */
   1147     uprv_strncpy((char *)objHeader.sections[1].Name, ".rdata", 6);
   1148     objHeader.sections[1].SizeOfRawData=size;
   1149     objHeader.sections[1].PointerToRawData=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER+length;
   1150     objHeader.sections[1].Characteristics=IMAGE_SCN_CNT_INITIALIZED_DATA|IMAGE_SCN_ALIGN_16BYTES|IMAGE_SCN_MEM_READ;
   1151 
   1152     /* set the symbol table */
   1153     if(entryLength<=8) {
   1154         uprv_strncpy((char *)symbols[0].N.ShortName, entry, entryLength);
   1155         symbolNames.sizeofLongNames=4;
   1156     } else {
   1157         symbols[0].N.Name.Short=0;
   1158         symbols[0].N.Name.Long=4;
   1159         symbolNames.sizeofLongNames=4+entryLength+1;
   1160         uprv_strcpy(symbolNames.longNames, entry);
   1161     }
   1162     symbols[0].SectionNumber=2;
   1163     symbols[0].StorageClass=IMAGE_SYM_CLASS_EXTERNAL;
   1164 
   1165     /* write the file header and the linker options section */
   1166     T_FileStream_write(out, &objHeader, objHeader.sections[1].PointerToRawData);
   1167 #else
   1168 #   error "Unknown platform for CAN_GENERATE_OBJECTS."
   1169 #endif
   1170 
   1171     /* copy the data file into section 2 */
   1172     for(;;) {
   1173         length=T_FileStream_read(in, buffer, sizeof(buffer));
   1174         if(length==0) {
   1175             break;
   1176         }
   1177         T_FileStream_write(out, buffer, (int32_t)length);
   1178     }
   1179 
   1180 #if U_PLATFORM_HAS_WIN32_API
   1181     /* write the symbol table */
   1182     T_FileStream_write(out, symbols, IMAGE_SIZEOF_SYMBOL);
   1183     T_FileStream_write(out, &symbolNames, symbolNames.sizeofLongNames);
   1184 #endif
   1185 
   1186     if(T_FileStream_error(in)) {
   1187         fprintf(stderr, "genccode: file read error while generating from file %s\n", filename);
   1188         exit(U_FILE_ACCESS_ERROR);
   1189     }
   1190 
   1191     if(T_FileStream_error(out)) {
   1192         fprintf(stderr, "genccode: file write error while generating from file %s\n", filename);
   1193         exit(U_FILE_ACCESS_ERROR);
   1194     }
   1195 
   1196     T_FileStream_close(out);
   1197     T_FileStream_close(in);
   1198 }
   1199 #endif
   1200