Home | History | Annotate | Download | only in toolutil
      1 /******************************************************************************
      2  *   Copyright (C) 2009-2015, International Business Machines
      3  *   Corporation and others.  All Rights Reserved.
      4  *******************************************************************************
      5  */
      6 #include "unicode/utypes.h"
      7 
      8 #if U_PLATFORM_HAS_WIN32_API
      9 #   define VC_EXTRALEAN
     10 #   define WIN32_LEAN_AND_MEAN
     11 #   define NOUSER
     12 #   define NOSERVICE
     13 #   define NOIME
     14 #   define NOMCX
     15 #include <windows.h>
     16 #include <time.h>
     17 #   ifdef __GNUC__
     18 #       define WINDOWS_WITH_GNUC
     19 #   endif
     20 #endif
     21 
     22 #if U_PLATFORM_IS_LINUX_BASED && U_HAVE_ELF_H
     23 #   define U_ELF
     24 #endif
     25 
     26 #ifdef U_ELF
     27 #   include <elf.h>
     28 #   if defined(ELFCLASS64)
     29 #       define U_ELF64
     30 #   endif
     31     /* Old elf.h headers may not have EM_X86_64, or have EM_X8664 instead. */
     32 #   ifndef EM_X86_64
     33 #       define EM_X86_64 62
     34 #   endif
     35 #   define ICU_ENTRY_OFFSET 0
     36 #endif
     37 
     38 #include <stdio.h>
     39 #include <stdlib.h>
     40 #include "unicode/putil.h"
     41 #include "cmemory.h"
     42 #include "cstring.h"
     43 #include "filestrm.h"
     44 #include "toolutil.h"
     45 #include "unicode/uclean.h"
     46 #include "uoptions.h"
     47 #include "pkg_genc.h"
     48 
     49 #define MAX_COLUMN ((uint32_t)(0xFFFFFFFFU))
     50 
     51 #define HEX_0X 0 /*  0x1234 */
     52 #define HEX_0H 1 /*  01234h */
     53 
     54 /* prototypes --------------------------------------------------------------- */
     55 static void
     56 getOutFilename(const char *inFilename, const char *destdir, char *outFilename, char *entryName, const char *newSuffix, const char *optFilename);
     57 
     58 static uint32_t
     59 write8(FileStream *out, uint8_t byte, uint32_t column);
     60 
     61 static uint32_t
     62 write32(FileStream *out, uint32_t byte, uint32_t column);
     63 
     64 #if U_PLATFORM == U_PF_OS400
     65 static uint32_t
     66 write8str(FileStream *out, uint8_t byte, uint32_t column);
     67 #endif
     68 /* -------------------------------------------------------------------------- */
     69 
     70 /*
     71 Creating Template Files for New Platforms
     72 
     73 Let the cc compiler help you get started.
     74 Compile this program
     75     const unsigned int x[5] = {1, 2, 0xdeadbeef, 0xffffffff, 16};
     76 with the -S option to produce assembly output.
     77 
     78 For example, this will generate array.s:
     79 gcc -S array.c
     80 
     81 This will produce a .s file that may look like this:
     82 
     83     .file   "array.c"
     84     .version        "01.01"
     85 gcc2_compiled.:
     86     .globl x
     87     .section        .rodata
     88     .align 4
     89     .type    x,@object
     90     .size    x,20
     91 x:
     92     .long   1
     93     .long   2
     94     .long   -559038737
     95     .long   -1
     96     .long   16
     97     .ident  "GCC: (GNU) 2.96 20000731 (Red Hat Linux 7.1 2.96-85)"
     98 
     99 which gives a starting point that will compile, and can be transformed
    100 to become the template, generally with some consulting of as docs and
    101 some experimentation.
    102 
    103 If you want ICU to automatically use this assembly, you should
    104 specify "GENCCODE_ASSEMBLY=-a name" in the specific config/mh-* file,
    105 where the name is the compiler or platform that you used in this
    106 assemblyHeader data structure.
    107 */
    108 static const struct AssemblyType {
    109     const char *name;
    110     const char *header;
    111     const char *beginLine;
    112     const char *footer;
    113     int8_t      hexType; /* HEX_0X or HEX_0h */
    114 } assemblyHeader[] = {
    115     /* For gcc assemblers, the meaning of .align changes depending on the */
    116     /* hardware, so we use .balign 16 which always means 16 bytes. */
    117     /* https://sourceware.org/binutils/docs/as/Pseudo-Ops.html */
    118     {"gcc",
    119         ".globl %s\n"
    120         "\t.section .note.GNU-stack,\"\",%%progbits\n"
    121         "\t.section .rodata\n"
    122         "\t.balign 16\n"
    123         "#ifdef U_HIDE_DATA_SYMBOL\n"
    124         "\t.hidden %s\n"
    125         "#endif\n"
    126         "\t.type %s,%%object\n"
    127         "%s:\n\n",
    128 
    129         ".long ","",HEX_0X
    130     },
    131     {"gcc-darwin",
    132         /*"\t.section __TEXT,__text,regular,pure_instructions\n"
    133         "\t.section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32\n"*/
    134         ".globl _%s\n"
    135         "#ifdef U_HIDE_DATA_SYMBOL\n"
    136         "\t.private_extern _%s\n"
    137         "#endif\n"
    138         "\t.data\n"
    139         "\t.const\n"
    140         "\t.balign 16\n"
    141         "_%s:\n\n",
    142 
    143         ".long ","",HEX_0X
    144     },
    145     {"gcc-cygwin",
    146         ".globl _%s\n"
    147         "\t.section .rodata\n"
    148         "\t.balign 16\n"
    149         "_%s:\n\n",
    150 
    151         ".long ","",HEX_0X
    152     },
    153     {"gcc-mingw64",
    154         ".globl %s\n"
    155         "\t.section .rodata\n"
    156         "\t.balign 16\n"
    157         "%s:\n\n",
    158 
    159         ".long ","",HEX_0X
    160     },
    161 /* 16 bytes alignment. */
    162 /* http://docs.oracle.com/cd/E19641-01/802-1947/802-1947.pdf */
    163     {"sun",
    164         "\t.section \".rodata\"\n"
    165         "\t.align   16\n"
    166         ".globl     %s\n"
    167         "%s:\n",
    168 
    169         ".word ","",HEX_0X
    170     },
    171 /* 16 bytes alignment for sun-x86. */
    172 /* http://docs.oracle.com/cd/E19963-01/html/821-1608/eoiyg.html */
    173     {"sun-x86",
    174         "Drodata.rodata:\n"
    175         "\t.type   Drodata.rodata,@object\n"
    176         "\t.size   Drodata.rodata,0\n"
    177         "\t.globl  %s\n"
    178         "\t.align  16\n"
    179         "%s:\n",
    180 
    181         ".4byte ","",HEX_0X
    182     },
    183 /* 1<<4 bit alignment for aix. */
    184 /* http://pic.dhe.ibm.com/infocenter/aix/v6r1/index.jsp?topic=%2Fcom.ibm.aix.aixassem%2Fdoc%2Falangref%2Fidalangref_csect_pseudoop.htm */
    185     {"xlc",
    186         ".globl %s{RO}\n"
    187         "\t.toc\n"
    188         "%s:\n"
    189         "\t.csect %s{RO}, 4\n",
    190 
    191         ".long ","",HEX_0X
    192     },
    193     {"aCC-ia64",
    194         "\t.file   \"%s.s\"\n"
    195         "\t.type   %s,@object\n"
    196         "\t.global %s\n"
    197         "\t.secalias .abe$0.rodata, \".rodata\"\n"
    198         "\t.section .abe$0.rodata = \"a\", \"progbits\"\n"
    199         "\t.align  16\n"
    200         "%s::\t",
    201 
    202         "data4 ","",HEX_0X
    203     },
    204     {"aCC-parisc",
    205         "\t.SPACE  $TEXT$\n"
    206         "\t.SUBSPA $LIT$\n"
    207         "%s\n"
    208         "\t.EXPORT %s\n"
    209         "\t.ALIGN  16\n",
    210 
    211         ".WORD ","",HEX_0X
    212     },
    213 /* align 16 bytes */
    214 /*  http://msdn.microsoft.com/en-us/library/dwa9fwef.aspx */
    215     { "masm",
    216       "\tTITLE %s\n"
    217       "; generated by genccode\n"
    218       ".386\n"
    219       ".model flat\n"
    220       "\tPUBLIC _%s\n"
    221       "ICUDATA_%s\tSEGMENT READONLY PARA PUBLIC FLAT 'DATA'\n"
    222       "\tALIGN 16\n"
    223       "_%s\tLABEL DWORD\n",
    224       "\tDWORD ","\nICUDATA_%s\tENDS\n\tEND\n",HEX_0H
    225     }
    226 };
    227 
    228 static int32_t assemblyHeaderIndex = -1;
    229 static int32_t hexType = HEX_0X;
    230 
    231 U_CAPI UBool U_EXPORT2
    232 checkAssemblyHeaderName(const char* optAssembly) {
    233     int32_t idx;
    234     assemblyHeaderIndex = -1;
    235     for (idx = 0; idx < (int32_t)(sizeof(assemblyHeader)/sizeof(assemblyHeader[0])); idx++) {
    236         if (uprv_strcmp(optAssembly, assemblyHeader[idx].name) == 0) {
    237             assemblyHeaderIndex = idx;
    238             hexType = assemblyHeader[idx].hexType; /* set the hex type */
    239             return TRUE;
    240         }
    241     }
    242 
    243     return FALSE;
    244 }
    245 
    246 
    247 U_CAPI void U_EXPORT2
    248 printAssemblyHeadersToStdErr(void) {
    249     int32_t idx;
    250     fprintf(stderr, "%s", assemblyHeader[0].name);
    251     for (idx = 1; idx < (int32_t)(sizeof(assemblyHeader)/sizeof(assemblyHeader[0])); idx++) {
    252         fprintf(stderr, ", %s", assemblyHeader[idx].name);
    253     }
    254     fprintf(stderr,
    255         ")\n");
    256 }
    257 
    258 U_CAPI void U_EXPORT2
    259 writeAssemblyCode(const char *filename, const char *destdir, const char *optEntryPoint, const char *optFilename, char *outFilePath) {
    260     uint32_t column = MAX_COLUMN;
    261     char entry[64];
    262     uint32_t buffer[1024];
    263     char *bufferStr = (char *)buffer;
    264     FileStream *in, *out;
    265     size_t i, length;
    266 
    267     in=T_FileStream_open(filename, "rb");
    268     if(in==NULL) {
    269         fprintf(stderr, "genccode: unable to open input file %s\n", filename);
    270         exit(U_FILE_ACCESS_ERROR);
    271     }
    272 
    273     getOutFilename(filename, destdir, bufferStr, entry, ".S", optFilename);
    274     out=T_FileStream_open(bufferStr, "w");
    275     if(out==NULL) {
    276         fprintf(stderr, "genccode: unable to open output file %s\n", bufferStr);
    277         exit(U_FILE_ACCESS_ERROR);
    278     }
    279 
    280     if (outFilePath != NULL) {
    281         uprv_strcpy(outFilePath, bufferStr);
    282     }
    283 
    284 #ifdef WINDOWS_WITH_GNUC
    285     /* Need to fix the file seperator character when using MinGW. */
    286     swapFileSepChar(outFilePath, U_FILE_SEP_CHAR, '/');
    287 #endif
    288 
    289     if(optEntryPoint != NULL) {
    290         uprv_strcpy(entry, optEntryPoint);
    291         uprv_strcat(entry, "_dat");
    292     }
    293 
    294     /* turn dashes or dots in the entry name into underscores */
    295     length=uprv_strlen(entry);
    296     for(i=0; i<length; ++i) {
    297         if(entry[i]=='-' || entry[i]=='.') {
    298             entry[i]='_';
    299         }
    300     }
    301 
    302     sprintf(bufferStr, assemblyHeader[assemblyHeaderIndex].header,
    303         entry, entry, entry, entry,
    304         entry, entry, entry, entry);
    305     T_FileStream_writeLine(out, bufferStr);
    306     T_FileStream_writeLine(out, assemblyHeader[assemblyHeaderIndex].beginLine);
    307 
    308     for(;;) {
    309         length=T_FileStream_read(in, buffer, sizeof(buffer));
    310         if(length==0) {
    311             break;
    312         }
    313         if (length != sizeof(buffer)) {
    314             /* pad with extra 0's when at the end of the file */
    315             for(i=0; i < (length % sizeof(uint32_t)); ++i) {
    316                 buffer[length+i] = 0;
    317             }
    318         }
    319         for(i=0; i<(length/sizeof(buffer[0])); i++) {
    320             column = write32(out, buffer[i], column);
    321         }
    322     }
    323 
    324     T_FileStream_writeLine(out, "\n");
    325 
    326     sprintf(bufferStr, assemblyHeader[assemblyHeaderIndex].footer,
    327         entry, entry, entry, entry,
    328         entry, entry, entry, entry);
    329     T_FileStream_writeLine(out, bufferStr);
    330 
    331     if(T_FileStream_error(in)) {
    332         fprintf(stderr, "genccode: file read error while generating from file %s\n", filename);
    333         exit(U_FILE_ACCESS_ERROR);
    334     }
    335 
    336     if(T_FileStream_error(out)) {
    337         fprintf(stderr, "genccode: file write error while generating from file %s\n", filename);
    338         exit(U_FILE_ACCESS_ERROR);
    339     }
    340 
    341     T_FileStream_close(out);
    342     T_FileStream_close(in);
    343 }
    344 
    345 U_CAPI void U_EXPORT2
    346 writeCCode(const char *filename, const char *destdir, const char *optName, const char *optFilename, char *outFilePath) {
    347     uint32_t column = MAX_COLUMN;
    348     char buffer[4096], entry[64];
    349     FileStream *in, *out;
    350     size_t i, length;
    351 
    352     in=T_FileStream_open(filename, "rb");
    353     if(in==NULL) {
    354         fprintf(stderr, "genccode: unable to open input file %s\n", filename);
    355         exit(U_FILE_ACCESS_ERROR);
    356     }
    357 
    358     if(optName != NULL) { /* prepend  'icudt28_' */
    359       strcpy(entry, optName);
    360       strcat(entry, "_");
    361     } else {
    362       entry[0] = 0;
    363     }
    364 
    365     getOutFilename(filename, destdir, buffer, entry+uprv_strlen(entry), ".c", optFilename);
    366     if (outFilePath != NULL) {
    367         uprv_strcpy(outFilePath, buffer);
    368     }
    369     out=T_FileStream_open(buffer, "w");
    370     if(out==NULL) {
    371         fprintf(stderr, "genccode: unable to open output file %s\n", buffer);
    372         exit(U_FILE_ACCESS_ERROR);
    373     }
    374 
    375     /* turn dashes or dots in the entry name into underscores */
    376     length=uprv_strlen(entry);
    377     for(i=0; i<length; ++i) {
    378         if(entry[i]=='-' || entry[i]=='.') {
    379             entry[i]='_';
    380         }
    381     }
    382 
    383 #if U_PLATFORM == U_PF_OS400
    384     /*
    385     TODO: Fix this once the compiler implements this feature. Keep in sync with udatamem.c
    386 
    387     This is here because this platform can't currently put
    388     const data into the read-only pages of an object or
    389     shared library (service program). Only strings are allowed in read-only
    390     pages, so we use char * strings to store the data.
    391 
    392     In order to prevent the beginning of the data from ever matching the
    393     magic numbers we must still use the initial double.
    394     [grhoten 4/24/2003]
    395     */
    396     sprintf(buffer,
    397         "#ifndef IN_GENERATED_CCODE\n"
    398         "#define IN_GENERATED_CCODE\n"
    399         "#define U_DISABLE_RENAMING 1\n"
    400         "#include \"unicode/umachine.h\"\n"
    401         "#endif\n"
    402         "U_CDECL_BEGIN\n"
    403         "const struct {\n"
    404         "    double bogus;\n"
    405         "    const char *bytes; \n"
    406         "} %s={ 0.0, \n",
    407         entry);
    408     T_FileStream_writeLine(out, buffer);
    409 
    410     for(;;) {
    411         length=T_FileStream_read(in, buffer, sizeof(buffer));
    412         if(length==0) {
    413             break;
    414         }
    415         for(i=0; i<length; ++i) {
    416             column = write8str(out, (uint8_t)buffer[i], column);
    417         }
    418     }
    419 
    420     T_FileStream_writeLine(out, "\"\n};\nU_CDECL_END\n");
    421 #else
    422     /* Function renaming shouldn't be done in data */
    423     sprintf(buffer,
    424         "#ifndef IN_GENERATED_CCODE\n"
    425         "#define IN_GENERATED_CCODE\n"
    426         "#define U_DISABLE_RENAMING 1\n"
    427         "#include \"unicode/umachine.h\"\n"
    428         "#endif\n"
    429         "U_CDECL_BEGIN\n"
    430         "const struct {\n"
    431         "    double bogus;\n"
    432         "    uint8_t bytes[%ld]; \n"
    433         "} %s={ 0.0, {\n",
    434         (long)T_FileStream_size(in), entry);
    435     T_FileStream_writeLine(out, buffer);
    436 
    437     for(;;) {
    438         length=T_FileStream_read(in, buffer, sizeof(buffer));
    439         if(length==0) {
    440             break;
    441         }
    442         for(i=0; i<length; ++i) {
    443             column = write8(out, (uint8_t)buffer[i], column);
    444         }
    445     }
    446 
    447     T_FileStream_writeLine(out, "\n}\n};\nU_CDECL_END\n");
    448 #endif
    449 
    450     if(T_FileStream_error(in)) {
    451         fprintf(stderr, "genccode: file read error while generating from file %s\n", filename);
    452         exit(U_FILE_ACCESS_ERROR);
    453     }
    454 
    455     if(T_FileStream_error(out)) {
    456         fprintf(stderr, "genccode: file write error while generating from file %s\n", filename);
    457         exit(U_FILE_ACCESS_ERROR);
    458     }
    459 
    460     T_FileStream_close(out);
    461     T_FileStream_close(in);
    462 }
    463 
    464 static uint32_t
    465 write32(FileStream *out, uint32_t bitField, uint32_t column) {
    466     int32_t i;
    467     char bitFieldStr[64]; /* This is more bits than needed for a 32-bit number */
    468     char *s = bitFieldStr;
    469     uint8_t *ptrIdx = (uint8_t *)&bitField;
    470     static const char hexToStr[16] = {
    471         '0','1','2','3',
    472         '4','5','6','7',
    473         '8','9','A','B',
    474         'C','D','E','F'
    475     };
    476 
    477     /* write the value, possibly with comma and newline */
    478     if(column==MAX_COLUMN) {
    479         /* first byte */
    480         column=1;
    481     } else if(column<32) {
    482         *(s++)=',';
    483         ++column;
    484     } else {
    485         *(s++)='\n';
    486         uprv_strcpy(s, assemblyHeader[assemblyHeaderIndex].beginLine);
    487         s+=uprv_strlen(s);
    488         column=1;
    489     }
    490 
    491     if (bitField < 10) {
    492         /* It's a small number. Don't waste the space for 0x */
    493         *(s++)=hexToStr[bitField];
    494     }
    495     else {
    496         int seenNonZero = 0; /* This is used to remove leading zeros */
    497 
    498         if(hexType==HEX_0X) {
    499          *(s++)='0';
    500          *(s++)='x';
    501         } else if(hexType==HEX_0H) {
    502          *(s++)='0';
    503         }
    504 
    505         /* This creates a 32-bit field */
    506 #if U_IS_BIG_ENDIAN
    507         for (i = 0; i < sizeof(uint32_t); i++)
    508 #else
    509         for (i = sizeof(uint32_t)-1; i >= 0 ; i--)
    510 #endif
    511         {
    512             uint8_t value = ptrIdx[i];
    513             if (value || seenNonZero) {
    514                 *(s++)=hexToStr[value>>4];
    515                 *(s++)=hexToStr[value&0xF];
    516                 seenNonZero = 1;
    517             }
    518         }
    519         if(hexType==HEX_0H) {
    520          *(s++)='h';
    521         }
    522     }
    523 
    524     *(s++)=0;
    525     T_FileStream_writeLine(out, bitFieldStr);
    526     return column;
    527 }
    528 
    529 static uint32_t
    530 write8(FileStream *out, uint8_t byte, uint32_t column) {
    531     char s[4];
    532     int i=0;
    533 
    534     /* convert the byte value to a string */
    535     if(byte>=100) {
    536         s[i++]=(char)('0'+byte/100);
    537         byte%=100;
    538     }
    539     if(i>0 || byte>=10) {
    540         s[i++]=(char)('0'+byte/10);
    541         byte%=10;
    542     }
    543     s[i++]=(char)('0'+byte);
    544     s[i]=0;
    545 
    546     /* write the value, possibly with comma and newline */
    547     if(column==MAX_COLUMN) {
    548         /* first byte */
    549         column=1;
    550     } else if(column<16) {
    551         T_FileStream_writeLine(out, ",");
    552         ++column;
    553     } else {
    554         T_FileStream_writeLine(out, ",\n");
    555         column=1;
    556     }
    557     T_FileStream_writeLine(out, s);
    558     return column;
    559 }
    560 
    561 #if U_PLATFORM == U_PF_OS400
    562 static uint32_t
    563 write8str(FileStream *out, uint8_t byte, uint32_t column) {
    564     char s[8];
    565 
    566     if (byte > 7)
    567         sprintf(s, "\\x%X", byte);
    568     else
    569         sprintf(s, "\\%X", byte);
    570 
    571     /* write the value, possibly with comma and newline */
    572     if(column==MAX_COLUMN) {
    573         /* first byte */
    574         column=1;
    575         T_FileStream_writeLine(out, "\"");
    576     } else if(column<24) {
    577         ++column;
    578     } else {
    579         T_FileStream_writeLine(out, "\"\n\"");
    580         column=1;
    581     }
    582     T_FileStream_writeLine(out, s);
    583     return column;
    584 }
    585 #endif
    586 
    587 static void
    588 getOutFilename(const char *inFilename, const char *destdir, char *outFilename, char *entryName, const char *newSuffix, const char *optFilename) {
    589     const char *basename=findBasename(inFilename), *suffix=uprv_strrchr(basename, '.');
    590 
    591     /* copy path */
    592     if(destdir!=NULL && *destdir!=0) {
    593         do {
    594             *outFilename++=*destdir++;
    595         } while(*destdir!=0);
    596         if(*(outFilename-1)!=U_FILE_SEP_CHAR) {
    597             *outFilename++=U_FILE_SEP_CHAR;
    598         }
    599         inFilename=basename;
    600     } else {
    601         while(inFilename<basename) {
    602             *outFilename++=*inFilename++;
    603         }
    604     }
    605 
    606     if(suffix==NULL) {
    607         /* the filename does not have a suffix */
    608         uprv_strcpy(entryName, inFilename);
    609         if(optFilename != NULL) {
    610           uprv_strcpy(outFilename, optFilename);
    611         } else {
    612           uprv_strcpy(outFilename, inFilename);
    613         }
    614         uprv_strcat(outFilename, newSuffix);
    615     } else {
    616         char *saveOutFilename = outFilename;
    617         /* copy basename */
    618         while(inFilename<suffix) {
    619             if(*inFilename=='-') {
    620                 /* iSeries cannot have '-' in the .o objects. */
    621                 *outFilename++=*entryName++='_';
    622                 inFilename++;
    623             }
    624             else {
    625                 *outFilename++=*entryName++=*inFilename++;
    626             }
    627         }
    628 
    629         /* replace '.' by '_' */
    630         *outFilename++=*entryName++='_';
    631         ++inFilename;
    632 
    633         /* copy suffix */
    634         while(*inFilename!=0) {
    635             *outFilename++=*entryName++=*inFilename++;
    636         }
    637 
    638         *entryName=0;
    639 
    640         if(optFilename != NULL) {
    641             uprv_strcpy(saveOutFilename, optFilename);
    642             uprv_strcat(saveOutFilename, newSuffix);
    643         } else {
    644             /* add ".c" */
    645             uprv_strcpy(outFilename, newSuffix);
    646         }
    647     }
    648 }
    649 
    650 #ifdef CAN_GENERATE_OBJECTS
    651 static void
    652 getArchitecture(uint16_t *pCPU, uint16_t *pBits, UBool *pIsBigEndian, const char *optMatchArch) {
    653     union {
    654         char        bytes[2048];
    655 #ifdef U_ELF
    656         Elf32_Ehdr  header32;
    657         /* Elf32_Ehdr and ELF64_Ehdr are identical for the necessary fields. */
    658 #elif U_PLATFORM_HAS_WIN32_API
    659         IMAGE_FILE_HEADER header;
    660 #endif
    661     } buffer;
    662 
    663     const char *filename;
    664     FileStream *in;
    665     int32_t length;
    666 
    667 #ifdef U_ELF
    668 
    669 #elif U_PLATFORM_HAS_WIN32_API
    670     const IMAGE_FILE_HEADER *pHeader;
    671 #else
    672 #   error "Unknown platform for CAN_GENERATE_OBJECTS."
    673 #endif
    674 
    675     if(optMatchArch != NULL) {
    676         filename=optMatchArch;
    677     } else {
    678         /* set defaults */
    679 #ifdef U_ELF
    680         /* set EM_386 because elf.h does not provide better defaults */
    681         *pCPU=EM_386;
    682         *pBits=32;
    683         *pIsBigEndian=(UBool)(U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB);
    684 #elif U_PLATFORM_HAS_WIN32_API
    685 /* _M_IA64 should be defined in windows.h */
    686 #   if defined(_M_IA64)
    687         *pCPU=IMAGE_FILE_MACHINE_IA64;
    688 #   elif defined(_M_AMD64)
    689         *pCPU=IMAGE_FILE_MACHINE_AMD64;
    690 #   else
    691         *pCPU=IMAGE_FILE_MACHINE_I386;
    692 #   endif
    693         *pBits= *pCPU==IMAGE_FILE_MACHINE_I386 ? 32 : 64;
    694         *pIsBigEndian=FALSE;
    695 #else
    696 #   error "Unknown platform for CAN_GENERATE_OBJECTS."
    697 #endif
    698         return;
    699     }
    700 
    701     in=T_FileStream_open(filename, "rb");
    702     if(in==NULL) {
    703         fprintf(stderr, "genccode: unable to open match-arch file %s\n", filename);
    704         exit(U_FILE_ACCESS_ERROR);
    705     }
    706     length=T_FileStream_read(in, buffer.bytes, sizeof(buffer.bytes));
    707 
    708 #ifdef U_ELF
    709     if(length<sizeof(Elf32_Ehdr)) {
    710         fprintf(stderr, "genccode: match-arch file %s is too short\n", filename);
    711         exit(U_UNSUPPORTED_ERROR);
    712     }
    713     if(
    714         buffer.header32.e_ident[0]!=ELFMAG0 ||
    715         buffer.header32.e_ident[1]!=ELFMAG1 ||
    716         buffer.header32.e_ident[2]!=ELFMAG2 ||
    717         buffer.header32.e_ident[3]!=ELFMAG3 ||
    718         buffer.header32.e_ident[EI_CLASS]<ELFCLASS32 || buffer.header32.e_ident[EI_CLASS]>ELFCLASS64
    719     ) {
    720         fprintf(stderr, "genccode: match-arch file %s is not an ELF object file, or not supported\n", filename);
    721         exit(U_UNSUPPORTED_ERROR);
    722     }
    723 
    724     *pBits= buffer.header32.e_ident[EI_CLASS]==ELFCLASS32 ? 32 : 64; /* only 32 or 64: see check above */
    725 #ifdef U_ELF64
    726     if(*pBits!=32 && *pBits!=64) {
    727         fprintf(stderr, "genccode: currently only supports 32-bit and 64-bit ELF format\n");
    728         exit(U_UNSUPPORTED_ERROR);
    729     }
    730 #else
    731     if(*pBits!=32) {
    732         fprintf(stderr, "genccode: built with elf.h missing 64-bit definitions\n");
    733         exit(U_UNSUPPORTED_ERROR);
    734     }
    735 #endif
    736 
    737     *pIsBigEndian=(UBool)(buffer.header32.e_ident[EI_DATA]==ELFDATA2MSB);
    738     if(*pIsBigEndian!=U_IS_BIG_ENDIAN) {
    739         fprintf(stderr, "genccode: currently only same-endianness ELF formats are supported\n");
    740         exit(U_UNSUPPORTED_ERROR);
    741     }
    742     /* TODO: Support byte swapping */
    743 
    744     *pCPU=buffer.header32.e_machine;
    745 #elif U_PLATFORM_HAS_WIN32_API
    746     if(length<sizeof(IMAGE_FILE_HEADER)) {
    747         fprintf(stderr, "genccode: match-arch file %s is too short\n", filename);
    748         exit(U_UNSUPPORTED_ERROR);
    749     }
    750     /* TODO: Use buffer.header.  Keep aliasing legal.  */
    751     pHeader=(const IMAGE_FILE_HEADER *)buffer.bytes;
    752     *pCPU=pHeader->Machine;
    753     /*
    754      * The number of bits is implicit with the Machine value.
    755      * *pBits is ignored in the calling code, so this need not be precise.
    756      */
    757     *pBits= *pCPU==IMAGE_FILE_MACHINE_I386 ? 32 : 64;
    758     /* Windows always runs on little-endian CPUs. */
    759     *pIsBigEndian=FALSE;
    760 #else
    761 #   error "Unknown platform for CAN_GENERATE_OBJECTS."
    762 #endif
    763 
    764     T_FileStream_close(in);
    765 }
    766 
    767 U_CAPI void U_EXPORT2
    768 writeObjectCode(const char *filename, const char *destdir, const char *optEntryPoint, const char *optMatchArch, const char *optFilename, char *outFilePath) {
    769     /* common variables */
    770     char buffer[4096], entry[96]={ 0 };
    771     FileStream *in, *out;
    772     const char *newSuffix;
    773     int32_t i, entryLength, length, size, entryOffset=0, entryLengthOffset=0;
    774 
    775     uint16_t cpu, bits;
    776     UBool makeBigEndian;
    777 
    778     /* platform-specific variables and initialization code */
    779 #ifdef U_ELF
    780     /* 32-bit Elf file header */
    781     static Elf32_Ehdr header32={
    782         {
    783             /* e_ident[] */
    784             ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3,
    785             ELFCLASS32,
    786             U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB,
    787             EV_CURRENT /* EI_VERSION */
    788         },
    789         ET_REL,
    790         EM_386,
    791         EV_CURRENT, /* e_version */
    792         0, /* e_entry */
    793         0, /* e_phoff */
    794         (Elf32_Off)sizeof(Elf32_Ehdr), /* e_shoff */
    795         0, /* e_flags */
    796         (Elf32_Half)sizeof(Elf32_Ehdr), /* eh_size */
    797         0, /* e_phentsize */
    798         0, /* e_phnum */
    799         (Elf32_Half)sizeof(Elf32_Shdr), /* e_shentsize */
    800         5, /* e_shnum */
    801         2 /* e_shstrndx */
    802     };
    803 
    804     /* 32-bit Elf section header table */
    805     static Elf32_Shdr sectionHeaders32[5]={
    806         { /* SHN_UNDEF */
    807             0
    808         },
    809         { /* .symtab */
    810             1, /* sh_name */
    811             SHT_SYMTAB,
    812             0, /* sh_flags */
    813             0, /* sh_addr */
    814             (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)), /* sh_offset */
    815             (Elf32_Word)(2*sizeof(Elf32_Sym)), /* sh_size */
    816             3, /* sh_link=sect hdr index of .strtab */
    817             1, /* sh_info=One greater than the symbol table index of the last
    818                 * local symbol (with STB_LOCAL). */
    819             4, /* sh_addralign */
    820             (Elf32_Word)(sizeof(Elf32_Sym)) /* sh_entsize */
    821         },
    822         { /* .shstrtab */
    823             9, /* sh_name */
    824             SHT_STRTAB,
    825             0, /* sh_flags */
    826             0, /* sh_addr */
    827             (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)), /* sh_offset */
    828             40, /* sh_size */
    829             0, /* sh_link */
    830             0, /* sh_info */
    831             1, /* sh_addralign */
    832             0 /* sh_entsize */
    833         },
    834         { /* .strtab */
    835             19, /* sh_name */
    836             SHT_STRTAB,
    837             0, /* sh_flags */
    838             0, /* sh_addr */
    839             (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)+40), /* sh_offset */
    840             (Elf32_Word)sizeof(entry), /* sh_size */
    841             0, /* sh_link */
    842             0, /* sh_info */
    843             1, /* sh_addralign */
    844             0 /* sh_entsize */
    845         },
    846         { /* .rodata */
    847             27, /* sh_name */
    848             SHT_PROGBITS,
    849             SHF_ALLOC, /* sh_flags */
    850             0, /* sh_addr */
    851             (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)+40+sizeof(entry)), /* sh_offset */
    852             0, /* sh_size */
    853             0, /* sh_link */
    854             0, /* sh_info */
    855             16, /* sh_addralign */
    856             0 /* sh_entsize */
    857         }
    858     };
    859 
    860     /* symbol table */
    861     static Elf32_Sym symbols32[2]={
    862         { /* STN_UNDEF */
    863             0
    864         },
    865         { /* data entry point */
    866             1, /* st_name */
    867             0, /* st_value */
    868             0, /* st_size */
    869             ELF64_ST_INFO(STB_GLOBAL, STT_OBJECT),
    870             0, /* st_other */
    871             4 /* st_shndx=index of related section table entry */
    872         }
    873     };
    874 
    875     /* section header string table, with decimal string offsets */
    876     static const char sectionStrings[40]=
    877         /*  0 */ "\0"
    878         /*  1 */ ".symtab\0"
    879         /*  9 */ ".shstrtab\0"
    880         /* 19 */ ".strtab\0"
    881         /* 27 */ ".rodata\0"
    882         /* 35 */ "\0\0\0\0"; /* contains terminating NUL */
    883         /* 40: padded to multiple of 8 bytes */
    884 
    885     /*
    886      * Use entry[] for the string table which will contain only the
    887      * entry point name.
    888      * entry[0] must be 0 (NUL)
    889      * The entry point name can be up to 38 characters long (sizeof(entry)-2).
    890      */
    891 
    892     /* 16-align .rodata in the .o file, just in case */
    893     static const char padding[16]={ 0 };
    894     int32_t paddingSize;
    895 
    896 #ifdef U_ELF64
    897     /* 64-bit Elf file header */
    898     static Elf64_Ehdr header64={
    899         {
    900             /* e_ident[] */
    901             ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3,
    902             ELFCLASS64,
    903             U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB,
    904             EV_CURRENT /* EI_VERSION */
    905         },
    906         ET_REL,
    907         EM_X86_64,
    908         EV_CURRENT, /* e_version */
    909         0, /* e_entry */
    910         0, /* e_phoff */
    911         (Elf64_Off)sizeof(Elf64_Ehdr), /* e_shoff */
    912         0, /* e_flags */
    913         (Elf64_Half)sizeof(Elf64_Ehdr), /* eh_size */
    914         0, /* e_phentsize */
    915         0, /* e_phnum */
    916         (Elf64_Half)sizeof(Elf64_Shdr), /* e_shentsize */
    917         5, /* e_shnum */
    918         2 /* e_shstrndx */
    919     };
    920 
    921     /* 64-bit Elf section header table */
    922     static Elf64_Shdr sectionHeaders64[5]={
    923         { /* SHN_UNDEF */
    924             0
    925         },
    926         { /* .symtab */
    927             1, /* sh_name */
    928             SHT_SYMTAB,
    929             0, /* sh_flags */
    930             0, /* sh_addr */
    931             (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)), /* sh_offset */
    932             (Elf64_Xword)(2*sizeof(Elf64_Sym)), /* sh_size */
    933             3, /* sh_link=sect hdr index of .strtab */
    934             1, /* sh_info=One greater than the symbol table index of the last
    935                 * local symbol (with STB_LOCAL). */
    936             4, /* sh_addralign */
    937             (Elf64_Xword)(sizeof(Elf64_Sym)) /* sh_entsize */
    938         },
    939         { /* .shstrtab */
    940             9, /* sh_name */
    941             SHT_STRTAB,
    942             0, /* sh_flags */
    943             0, /* sh_addr */
    944             (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)), /* sh_offset */
    945             40, /* sh_size */
    946             0, /* sh_link */
    947             0, /* sh_info */
    948             1, /* sh_addralign */
    949             0 /* sh_entsize */
    950         },
    951         { /* .strtab */
    952             19, /* sh_name */
    953             SHT_STRTAB,
    954             0, /* sh_flags */
    955             0, /* sh_addr */
    956             (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)+40), /* sh_offset */
    957             (Elf64_Xword)sizeof(entry), /* sh_size */
    958             0, /* sh_link */
    959             0, /* sh_info */
    960             1, /* sh_addralign */
    961             0 /* sh_entsize */
    962         },
    963         { /* .rodata */
    964             27, /* sh_name */
    965             SHT_PROGBITS,
    966             SHF_ALLOC, /* sh_flags */
    967             0, /* sh_addr */
    968             (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)+40+sizeof(entry)), /* sh_offset */
    969             0, /* sh_size */
    970             0, /* sh_link */
    971             0, /* sh_info */
    972             16, /* sh_addralign */
    973             0 /* sh_entsize */
    974         }
    975     };
    976 
    977     /*
    978      * 64-bit symbol table
    979      * careful: different order of items compared with Elf32_sym!
    980      */
    981     static Elf64_Sym symbols64[2]={
    982         { /* STN_UNDEF */
    983             0
    984         },
    985         { /* data entry point */
    986             1, /* st_name */
    987             ELF64_ST_INFO(STB_GLOBAL, STT_OBJECT),
    988             0, /* st_other */
    989             4, /* st_shndx=index of related section table entry */
    990             0, /* st_value */
    991             0 /* st_size */
    992         }
    993     };
    994 
    995 #endif /* U_ELF64 */
    996 
    997     /* entry[] have a leading NUL */
    998     entryOffset=1;
    999 
   1000     /* in the common code, count entryLength from after the NUL */
   1001     entryLengthOffset=1;
   1002 
   1003     newSuffix=".o";
   1004 
   1005 #elif U_PLATFORM_HAS_WIN32_API
   1006     struct {
   1007         IMAGE_FILE_HEADER fileHeader;
   1008         IMAGE_SECTION_HEADER sections[2];
   1009         char linkerOptions[100];
   1010     } objHeader;
   1011     IMAGE_SYMBOL symbols[1];
   1012     struct {
   1013         DWORD sizeofLongNames;
   1014         char longNames[100];
   1015     } symbolNames;
   1016 
   1017     /*
   1018      * entry sometimes have a leading '_'
   1019      * overwritten if entryOffset==0 depending on the target platform
   1020      * see check for cpu below
   1021      */
   1022     entry[0]='_';
   1023 
   1024     newSuffix=".obj";
   1025 #else
   1026 #   error "Unknown platform for CAN_GENERATE_OBJECTS."
   1027 #endif
   1028 
   1029     /* deal with options, files and the entry point name */
   1030     getArchitecture(&cpu, &bits, &makeBigEndian, optMatchArch);
   1031     printf("genccode: --match-arch cpu=%hu bits=%hu big-endian=%d\n", cpu, bits, makeBigEndian);
   1032 #if U_PLATFORM_HAS_WIN32_API
   1033     if(cpu==IMAGE_FILE_MACHINE_I386) {
   1034         entryOffset=1;
   1035     }
   1036 #endif
   1037 
   1038     in=T_FileStream_open(filename, "rb");
   1039     if(in==NULL) {
   1040         fprintf(stderr, "genccode: unable to open input file %s\n", filename);
   1041         exit(U_FILE_ACCESS_ERROR);
   1042     }
   1043     size=T_FileStream_size(in);
   1044 
   1045     getOutFilename(filename, destdir, buffer, entry+entryOffset, newSuffix, optFilename);
   1046     if (outFilePath != NULL) {
   1047         uprv_strcpy(outFilePath, buffer);
   1048     }
   1049 
   1050     if(optEntryPoint != NULL) {
   1051         uprv_strcpy(entry+entryOffset, optEntryPoint);
   1052         uprv_strcat(entry+entryOffset, "_dat");
   1053     }
   1054     /* turn dashes in the entry name into underscores */
   1055     entryLength=(int32_t)uprv_strlen(entry+entryLengthOffset);
   1056     for(i=0; i<entryLength; ++i) {
   1057         if(entry[entryLengthOffset+i]=='-') {
   1058             entry[entryLengthOffset+i]='_';
   1059         }
   1060     }
   1061 
   1062     /* open the output file */
   1063     out=T_FileStream_open(buffer, "wb");
   1064     if(out==NULL) {
   1065         fprintf(stderr, "genccode: unable to open output file %s\n", buffer);
   1066         exit(U_FILE_ACCESS_ERROR);
   1067     }
   1068 
   1069 #ifdef U_ELF
   1070     if(bits==32) {
   1071         header32.e_ident[EI_DATA]= makeBigEndian ? ELFDATA2MSB : ELFDATA2LSB;
   1072         header32.e_machine=cpu;
   1073 
   1074         /* 16-align .rodata in the .o file, just in case */
   1075         paddingSize=sectionHeaders32[4].sh_offset & 0xf;
   1076         if(paddingSize!=0) {
   1077                 paddingSize=0x10-paddingSize;
   1078                 sectionHeaders32[4].sh_offset+=paddingSize;
   1079         }
   1080 
   1081         sectionHeaders32[4].sh_size=(Elf32_Word)size;
   1082 
   1083         symbols32[1].st_size=(Elf32_Word)size;
   1084 
   1085         /* write .o headers */
   1086         T_FileStream_write(out, &header32, (int32_t)sizeof(header32));
   1087         T_FileStream_write(out, sectionHeaders32, (int32_t)sizeof(sectionHeaders32));
   1088         T_FileStream_write(out, symbols32, (int32_t)sizeof(symbols32));
   1089     } else /* bits==64 */ {
   1090 #ifdef U_ELF64
   1091         header64.e_ident[EI_DATA]= makeBigEndian ? ELFDATA2MSB : ELFDATA2LSB;
   1092         header64.e_machine=cpu;
   1093 
   1094         /* 16-align .rodata in the .o file, just in case */
   1095         paddingSize=sectionHeaders64[4].sh_offset & 0xf;
   1096         if(paddingSize!=0) {
   1097                 paddingSize=0x10-paddingSize;
   1098                 sectionHeaders64[4].sh_offset+=paddingSize;
   1099         }
   1100 
   1101         sectionHeaders64[4].sh_size=(Elf64_Xword)size;
   1102 
   1103         symbols64[1].st_size=(Elf64_Xword)size;
   1104 
   1105         /* write .o headers */
   1106         T_FileStream_write(out, &header64, (int32_t)sizeof(header64));
   1107         T_FileStream_write(out, sectionHeaders64, (int32_t)sizeof(sectionHeaders64));
   1108         T_FileStream_write(out, symbols64, (int32_t)sizeof(symbols64));
   1109 #endif
   1110     }
   1111 
   1112     T_FileStream_write(out, sectionStrings, (int32_t)sizeof(sectionStrings));
   1113     T_FileStream_write(out, entry, (int32_t)sizeof(entry));
   1114     if(paddingSize!=0) {
   1115         T_FileStream_write(out, padding, paddingSize);
   1116     }
   1117 #elif U_PLATFORM_HAS_WIN32_API
   1118     /* populate the .obj headers */
   1119     uprv_memset(&objHeader, 0, sizeof(objHeader));
   1120     uprv_memset(&symbols, 0, sizeof(symbols));
   1121     uprv_memset(&symbolNames, 0, sizeof(symbolNames));
   1122 
   1123     /* write the linker export directive */
   1124     uprv_strcpy(objHeader.linkerOptions, "-export:");
   1125     length=8;
   1126     uprv_strcpy(objHeader.linkerOptions+length, entry);
   1127     length+=entryLength;
   1128     uprv_strcpy(objHeader.linkerOptions+length, ",data ");
   1129     length+=6;
   1130 
   1131     /* set the file header */
   1132     objHeader.fileHeader.Machine=cpu;
   1133     objHeader.fileHeader.NumberOfSections=2;
   1134     objHeader.fileHeader.TimeDateStamp=(DWORD)time(NULL);
   1135     objHeader.fileHeader.PointerToSymbolTable=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER+length+size; /* start of symbol table */
   1136     objHeader.fileHeader.NumberOfSymbols=1;
   1137 
   1138     /* set the section for the linker options */
   1139     uprv_strncpy((char *)objHeader.sections[0].Name, ".drectve", 8);
   1140     objHeader.sections[0].SizeOfRawData=length;
   1141     objHeader.sections[0].PointerToRawData=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER;
   1142     objHeader.sections[0].Characteristics=IMAGE_SCN_LNK_INFO|IMAGE_SCN_LNK_REMOVE|IMAGE_SCN_ALIGN_1BYTES;
   1143 
   1144     /* set the data section */
   1145     uprv_strncpy((char *)objHeader.sections[1].Name, ".rdata", 6);
   1146     objHeader.sections[1].SizeOfRawData=size;
   1147     objHeader.sections[1].PointerToRawData=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER+length;
   1148     objHeader.sections[1].Characteristics=IMAGE_SCN_CNT_INITIALIZED_DATA|IMAGE_SCN_ALIGN_16BYTES|IMAGE_SCN_MEM_READ;
   1149 
   1150     /* set the symbol table */
   1151     if(entryLength<=8) {
   1152         uprv_strncpy((char *)symbols[0].N.ShortName, entry, entryLength);
   1153         symbolNames.sizeofLongNames=4;
   1154     } else {
   1155         symbols[0].N.Name.Short=0;
   1156         symbols[0].N.Name.Long=4;
   1157         symbolNames.sizeofLongNames=4+entryLength+1;
   1158         uprv_strcpy(symbolNames.longNames, entry);
   1159     }
   1160     symbols[0].SectionNumber=2;
   1161     symbols[0].StorageClass=IMAGE_SYM_CLASS_EXTERNAL;
   1162 
   1163     /* write the file header and the linker options section */
   1164     T_FileStream_write(out, &objHeader, objHeader.sections[1].PointerToRawData);
   1165 #else
   1166 #   error "Unknown platform for CAN_GENERATE_OBJECTS."
   1167 #endif
   1168 
   1169     /* copy the data file into section 2 */
   1170     for(;;) {
   1171         length=T_FileStream_read(in, buffer, sizeof(buffer));
   1172         if(length==0) {
   1173             break;
   1174         }
   1175         T_FileStream_write(out, buffer, (int32_t)length);
   1176     }
   1177 
   1178 #if U_PLATFORM_HAS_WIN32_API
   1179     /* write the symbol table */
   1180     T_FileStream_write(out, symbols, IMAGE_SIZEOF_SYMBOL);
   1181     T_FileStream_write(out, &symbolNames, symbolNames.sizeofLongNames);
   1182 #endif
   1183 
   1184     if(T_FileStream_error(in)) {
   1185         fprintf(stderr, "genccode: file read error while generating from file %s\n", filename);
   1186         exit(U_FILE_ACCESS_ERROR);
   1187     }
   1188 
   1189     if(T_FileStream_error(out)) {
   1190         fprintf(stderr, "genccode: file write error while generating from file %s\n", filename);
   1191         exit(U_FILE_ACCESS_ERROR);
   1192     }
   1193 
   1194     T_FileStream_close(out);
   1195     T_FileStream_close(in);
   1196 }
   1197 #endif
   1198