Home | History | Annotate | Download | only in toolutil
      1 /******************************************************************************
      2  *   Copyright (C) 2009-2012, International Business Machines
      3  *   Corporation and others.  All Rights Reserved.
      4  *******************************************************************************
      5  */
      6 #include "unicode/utypes.h"
      7 
      8 #if U_PLATFORM_HAS_WIN32_API
      9 #   define VC_EXTRALEAN
     10 #   define WIN32_LEAN_AND_MEAN
     11 #   define NOUSER
     12 #   define NOSERVICE
     13 #   define NOIME
     14 #   define NOMCX
     15 #include <windows.h>
     16 #include <time.h>
     17 #   ifdef __GNUC__
     18 #       define WINDOWS_WITH_GNUC
     19 #   endif
     20 #endif
     21 
     22 #if U_PLATFORM_IS_LINUX_BASED
     23 #   define U_ELF
     24 #endif
     25 
     26 #ifdef U_ELF
     27 #   include <elf.h>
     28 #   if defined(ELFCLASS64)
     29 #       define U_ELF64
     30 #   endif
     31     /* Old elf.h headers may not have EM_X86_64, or have EM_X8664 instead. */
     32 #   ifndef EM_X86_64
     33 #       define EM_X86_64 62
     34 #   endif
     35 #   define ICU_ENTRY_OFFSET 0
     36 #endif
     37 
     38 #include <stdio.h>
     39 #include <stdlib.h>
     40 #include "unicode/putil.h"
     41 #include "cmemory.h"
     42 #include "cstring.h"
     43 #include "filestrm.h"
     44 #include "toolutil.h"
     45 #include "unicode/uclean.h"
     46 #include "uoptions.h"
     47 #include "pkg_genc.h"
     48 
     49 #define MAX_COLUMN ((uint32_t)(0xFFFFFFFFU))
     50 
     51 #define HEX_0X 0 /*  0x1234 */
     52 #define HEX_0H 1 /*  01234h */
     53 
     54 /* prototypes --------------------------------------------------------------- */
     55 static void
     56 getOutFilename(const char *inFilename, const char *destdir, char *outFilename, char *entryName, const char *newSuffix, const char *optFilename);
     57 
     58 static uint32_t
     59 write8(FileStream *out, uint8_t byte, uint32_t column);
     60 
     61 static uint32_t
     62 write32(FileStream *out, uint32_t byte, uint32_t column);
     63 
     64 #if U_PLATFORM == U_PF_OS400
     65 static uint32_t
     66 write8str(FileStream *out, uint8_t byte, uint32_t column);
     67 #endif
     68 /* -------------------------------------------------------------------------- */
     69 
     70 /*
     71 Creating Template Files for New Platforms
     72 
     73 Let the cc compiler help you get started.
     74 Compile this program
     75     const unsigned int x[5] = {1, 2, 0xdeadbeef, 0xffffffff, 16};
     76 with the -S option to produce assembly output.
     77 
     78 For example, this will generate array.s:
     79 gcc -S array.c
     80 
     81 This will produce a .s file that may look like this:
     82 
     83     .file   "array.c"
     84     .version        "01.01"
     85 gcc2_compiled.:
     86     .globl x
     87     .section        .rodata
     88     .align 4
     89     .type    x,@object
     90     .size    x,20
     91 x:
     92     .long   1
     93     .long   2
     94     .long   -559038737
     95     .long   -1
     96     .long   16
     97     .ident  "GCC: (GNU) 2.96 20000731 (Red Hat Linux 7.1 2.96-85)"
     98 
     99 which gives a starting point that will compile, and can be transformed
    100 to become the template, generally with some consulting of as docs and
    101 some experimentation.
    102 
    103 If you want ICU to automatically use this assembly, you should
    104 specify "GENCCODE_ASSEMBLY=-a name" in the specific config/mh-* file,
    105 where the name is the compiler or platform that you used in this
    106 assemblyHeader data structure.
    107 */
    108 static const struct AssemblyType {
    109     const char *name;
    110     const char *header;
    111     const char *beginLine;
    112     const char *footer;
    113     int8_t      hexType; /* HEX_0X or HEX_0h */
    114 } assemblyHeader[] = {
    115     {"gcc",
    116         ".globl %s\n"
    117         "\t.section .note.GNU-stack,\"\",%%progbits\n"
    118         "\t.section .rodata\n"
    119         "\t.align 8\n" /* Either align 8 bytes or 2^8 (256) bytes. 8 bytes is needed. */
    120         "\t.type %s,%%object\n"
    121         "%s:\n\n",
    122 
    123         ".long ","",HEX_0X
    124     },
    125     {"gcc-darwin",
    126         /*"\t.section __TEXT,__text,regular,pure_instructions\n"
    127         "\t.section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32\n"*/
    128         ".globl _%s\n"
    129         "\t.data\n"
    130         "\t.const\n"
    131         "\t.align 4\n"  /* 1<<4 = 16 */
    132         "_%s:\n\n",
    133 
    134         ".long ","",HEX_0X
    135     },
    136     {"gcc-cygwin",
    137         ".globl _%s\n"
    138         "\t.section .rodata\n"
    139         "\t.align 8\n" /* Either align 8 bytes or 2^8 (256) bytes. 8 bytes is needed. */
    140         "_%s:\n\n",
    141 
    142         ".long ","",HEX_0X
    143     },
    144     {"gcc-mingw64",
    145         ".globl %s\n"
    146         "\t.section .rodata\n"
    147         "\t.align 8\n" /* Either align 8 bytes or 2^8 (256) bytes. 8 bytes is needed. */
    148         "%s:\n\n",
    149 
    150         ".long ","",HEX_0X
    151     },
    152     {"sun",
    153         "\t.section \".rodata\"\n"
    154         "\t.align   8\n"
    155         ".globl     %s\n"
    156         "%s:\n",
    157 
    158         ".word ","",HEX_0X
    159     },
    160     {"sun-x86",
    161         "Drodata.rodata:\n"
    162         "\t.type   Drodata.rodata,@object\n"
    163         "\t.size   Drodata.rodata,0\n"
    164         "\t.globl  %s\n"
    165         "\t.align  8\n"
    166         "%s:\n",
    167 
    168         ".4byte ","",HEX_0X
    169     },
    170     {"xlc",
    171         ".globl %s{RO}\n"
    172         "\t.toc\n"
    173         "%s:\n"
    174         "\t.csect %s{RO}, 4\n",
    175 
    176         ".long ","",HEX_0X
    177     },
    178     {"aCC-ia64",
    179         "\t.file   \"%s.s\"\n"
    180         "\t.type   %s,@object\n"
    181         "\t.global %s\n"
    182         "\t.secalias .abe$0.rodata, \".rodata\"\n"
    183         "\t.section .abe$0.rodata = \"a\", \"progbits\"\n"
    184         "\t.align  16\n"
    185         "%s::\t",
    186 
    187         "data4 ","",HEX_0X
    188     },
    189     {"aCC-parisc",
    190         "\t.SPACE  $TEXT$\n"
    191         "\t.SUBSPA $LIT$\n"
    192         "%s\n"
    193         "\t.EXPORT %s\n"
    194         "\t.ALIGN  16\n",
    195 
    196         ".WORD ","",HEX_0X
    197     },
    198     { "masm",
    199       "\tTITLE %s\n"
    200       "; generated by genccode\n"
    201       ".386\n"
    202       ".model flat\n"
    203       "\tPUBLIC _%s\n"
    204       "ICUDATA_%s\tSEGMENT READONLY PARA PUBLIC FLAT 'DATA'\n"
    205       "\tALIGN 16\n"
    206       "_%s\tLABEL DWORD\n",
    207       "\tDWORD ","\nICUDATA_%s\tENDS\n\tEND\n",HEX_0H
    208     }
    209 };
    210 
    211 static int32_t assemblyHeaderIndex = -1;
    212 static int32_t hexType = HEX_0X;
    213 
    214 U_CAPI UBool U_EXPORT2
    215 checkAssemblyHeaderName(const char* optAssembly) {
    216     int32_t idx;
    217     assemblyHeaderIndex = -1;
    218     for (idx = 0; idx < (int32_t)(sizeof(assemblyHeader)/sizeof(assemblyHeader[0])); idx++) {
    219         if (uprv_strcmp(optAssembly, assemblyHeader[idx].name) == 0) {
    220             assemblyHeaderIndex = idx;
    221             hexType = assemblyHeader[idx].hexType; /* set the hex type */
    222             return TRUE;
    223         }
    224     }
    225 
    226     return FALSE;
    227 }
    228 
    229 
    230 U_CAPI void U_EXPORT2
    231 printAssemblyHeadersToStdErr(void) {
    232     int32_t idx;
    233     fprintf(stderr, "%s", assemblyHeader[0].name);
    234     for (idx = 1; idx < (int32_t)(sizeof(assemblyHeader)/sizeof(assemblyHeader[0])); idx++) {
    235         fprintf(stderr, ", %s", assemblyHeader[idx].name);
    236     }
    237     fprintf(stderr,
    238         ")\n");
    239 }
    240 
    241 U_CAPI void U_EXPORT2
    242 writeAssemblyCode(const char *filename, const char *destdir, const char *optEntryPoint, const char *optFilename, char *outFilePath) {
    243     uint32_t column = MAX_COLUMN;
    244     char entry[64];
    245     uint32_t buffer[1024];
    246     char *bufferStr = (char *)buffer;
    247     FileStream *in, *out;
    248     size_t i, length;
    249 
    250     in=T_FileStream_open(filename, "rb");
    251     if(in==NULL) {
    252         fprintf(stderr, "genccode: unable to open input file %s\n", filename);
    253         exit(U_FILE_ACCESS_ERROR);
    254     }
    255 
    256     getOutFilename(filename, destdir, bufferStr, entry, ".s", optFilename);
    257     out=T_FileStream_open(bufferStr, "w");
    258     if(out==NULL) {
    259         fprintf(stderr, "genccode: unable to open output file %s\n", bufferStr);
    260         exit(U_FILE_ACCESS_ERROR);
    261     }
    262 
    263     if (outFilePath != NULL) {
    264         uprv_strcpy(outFilePath, bufferStr);
    265     }
    266 
    267 #ifdef WINDOWS_WITH_GNUC
    268     /* Need to fix the file seperator character when using MinGW. */
    269     swapFileSepChar(outFilePath, U_FILE_SEP_CHAR, '/');
    270 #endif
    271 
    272     if(optEntryPoint != NULL) {
    273         uprv_strcpy(entry, optEntryPoint);
    274         uprv_strcat(entry, "_dat");
    275     }
    276 
    277     /* turn dashes or dots in the entry name into underscores */
    278     length=uprv_strlen(entry);
    279     for(i=0; i<length; ++i) {
    280         if(entry[i]=='-' || entry[i]=='.') {
    281             entry[i]='_';
    282         }
    283     }
    284 
    285     sprintf(bufferStr, assemblyHeader[assemblyHeaderIndex].header,
    286         entry, entry, entry, entry,
    287         entry, entry, entry, entry);
    288     T_FileStream_writeLine(out, bufferStr);
    289     T_FileStream_writeLine(out, assemblyHeader[assemblyHeaderIndex].beginLine);
    290 
    291     for(;;) {
    292         length=T_FileStream_read(in, buffer, sizeof(buffer));
    293         if(length==0) {
    294             break;
    295         }
    296         if (length != sizeof(buffer)) {
    297             /* pad with extra 0's when at the end of the file */
    298             for(i=0; i < (length % sizeof(uint32_t)); ++i) {
    299                 buffer[length+i] = 0;
    300             }
    301         }
    302         for(i=0; i<(length/sizeof(buffer[0])); i++) {
    303             column = write32(out, buffer[i], column);
    304         }
    305     }
    306 
    307     T_FileStream_writeLine(out, "\n");
    308 
    309     sprintf(bufferStr, assemblyHeader[assemblyHeaderIndex].footer,
    310         entry, entry, entry, entry,
    311         entry, entry, entry, entry);
    312     T_FileStream_writeLine(out, bufferStr);
    313 
    314     if(T_FileStream_error(in)) {
    315         fprintf(stderr, "genccode: file read error while generating from file %s\n", filename);
    316         exit(U_FILE_ACCESS_ERROR);
    317     }
    318 
    319     if(T_FileStream_error(out)) {
    320         fprintf(stderr, "genccode: file write error while generating from file %s\n", filename);
    321         exit(U_FILE_ACCESS_ERROR);
    322     }
    323 
    324     T_FileStream_close(out);
    325     T_FileStream_close(in);
    326 }
    327 
    328 U_CAPI void U_EXPORT2
    329 writeCCode(const char *filename, const char *destdir, const char *optName, const char *optFilename, char *outFilePath) {
    330     uint32_t column = MAX_COLUMN;
    331     char buffer[4096], entry[64];
    332     FileStream *in, *out;
    333     size_t i, length;
    334 
    335     in=T_FileStream_open(filename, "rb");
    336     if(in==NULL) {
    337         fprintf(stderr, "genccode: unable to open input file %s\n", filename);
    338         exit(U_FILE_ACCESS_ERROR);
    339     }
    340 
    341     if(optName != NULL) { /* prepend  'icudt28_' */
    342       strcpy(entry, optName);
    343       strcat(entry, "_");
    344     } else {
    345       entry[0] = 0;
    346     }
    347 
    348     getOutFilename(filename, destdir, buffer, entry+uprv_strlen(entry), ".c", optFilename);
    349     if (outFilePath != NULL) {
    350         uprv_strcpy(outFilePath, buffer);
    351     }
    352     out=T_FileStream_open(buffer, "w");
    353     if(out==NULL) {
    354         fprintf(stderr, "genccode: unable to open output file %s\n", buffer);
    355         exit(U_FILE_ACCESS_ERROR);
    356     }
    357 
    358     /* turn dashes or dots in the entry name into underscores */
    359     length=uprv_strlen(entry);
    360     for(i=0; i<length; ++i) {
    361         if(entry[i]=='-' || entry[i]=='.') {
    362             entry[i]='_';
    363         }
    364     }
    365 
    366 #if U_PLATFORM == U_PF_OS400
    367     /*
    368     TODO: Fix this once the compiler implements this feature. Keep in sync with udatamem.c
    369 
    370     This is here because this platform can't currently put
    371     const data into the read-only pages of an object or
    372     shared library (service program). Only strings are allowed in read-only
    373     pages, so we use char * strings to store the data.
    374 
    375     In order to prevent the beginning of the data from ever matching the
    376     magic numbers we must still use the initial double.
    377     [grhoten 4/24/2003]
    378     */
    379     sprintf(buffer,
    380         "#ifndef IN_GENERATED_CCODE\n"
    381         "#define IN_GENERATED_CCODE\n"
    382         "#define U_DISABLE_RENAMING 1\n"
    383         "#include \"unicode/umachine.h\"\n"
    384         "#endif\n"
    385         "U_CDECL_BEGIN\n"
    386         "const struct {\n"
    387         "    double bogus;\n"
    388         "    const char *bytes; \n"
    389         "} %s={ 0.0, \n",
    390         entry);
    391     T_FileStream_writeLine(out, buffer);
    392 
    393     for(;;) {
    394         length=T_FileStream_read(in, buffer, sizeof(buffer));
    395         if(length==0) {
    396             break;
    397         }
    398         for(i=0; i<length; ++i) {
    399             column = write8str(out, (uint8_t)buffer[i], column);
    400         }
    401     }
    402 
    403     T_FileStream_writeLine(out, "\"\n};\nU_CDECL_END\n");
    404 #else
    405     /* Function renaming shouldn't be done in data */
    406     sprintf(buffer,
    407         "#ifndef IN_GENERATED_CCODE\n"
    408         "#define IN_GENERATED_CCODE\n"
    409         "#define U_DISABLE_RENAMING 1\n"
    410         "#include \"unicode/umachine.h\"\n"
    411         "#endif\n"
    412         "U_CDECL_BEGIN\n"
    413         "const struct {\n"
    414         "    double bogus;\n"
    415         "    uint8_t bytes[%ld]; \n"
    416         "} %s={ 0.0, {\n",
    417         (long)T_FileStream_size(in), entry);
    418     T_FileStream_writeLine(out, buffer);
    419 
    420     for(;;) {
    421         length=T_FileStream_read(in, buffer, sizeof(buffer));
    422         if(length==0) {
    423             break;
    424         }
    425         for(i=0; i<length; ++i) {
    426             column = write8(out, (uint8_t)buffer[i], column);
    427         }
    428     }
    429 
    430     T_FileStream_writeLine(out, "\n}\n};\nU_CDECL_END\n");
    431 #endif
    432 
    433     if(T_FileStream_error(in)) {
    434         fprintf(stderr, "genccode: file read error while generating from file %s\n", filename);
    435         exit(U_FILE_ACCESS_ERROR);
    436     }
    437 
    438     if(T_FileStream_error(out)) {
    439         fprintf(stderr, "genccode: file write error while generating from file %s\n", filename);
    440         exit(U_FILE_ACCESS_ERROR);
    441     }
    442 
    443     T_FileStream_close(out);
    444     T_FileStream_close(in);
    445 }
    446 
    447 static uint32_t
    448 write32(FileStream *out, uint32_t bitField, uint32_t column) {
    449     int32_t i;
    450     char bitFieldStr[64]; /* This is more bits than needed for a 32-bit number */
    451     char *s = bitFieldStr;
    452     uint8_t *ptrIdx = (uint8_t *)&bitField;
    453     static const char hexToStr[16] = {
    454         '0','1','2','3',
    455         '4','5','6','7',
    456         '8','9','A','B',
    457         'C','D','E','F'
    458     };
    459 
    460     /* write the value, possibly with comma and newline */
    461     if(column==MAX_COLUMN) {
    462         /* first byte */
    463         column=1;
    464     } else if(column<32) {
    465         *(s++)=',';
    466         ++column;
    467     } else {
    468         *(s++)='\n';
    469         uprv_strcpy(s, assemblyHeader[assemblyHeaderIndex].beginLine);
    470         s+=uprv_strlen(s);
    471         column=1;
    472     }
    473 
    474     if (bitField < 10) {
    475         /* It's a small number. Don't waste the space for 0x */
    476         *(s++)=hexToStr[bitField];
    477     }
    478     else {
    479         int seenNonZero = 0; /* This is used to remove leading zeros */
    480 
    481         if(hexType==HEX_0X) {
    482          *(s++)='0';
    483          *(s++)='x';
    484         } else if(hexType==HEX_0H) {
    485          *(s++)='0';
    486         }
    487 
    488         /* This creates a 32-bit field */
    489 #if U_IS_BIG_ENDIAN
    490         for (i = 0; i < sizeof(uint32_t); i++)
    491 #else
    492         for (i = sizeof(uint32_t)-1; i >= 0 ; i--)
    493 #endif
    494         {
    495             uint8_t value = ptrIdx[i];
    496             if (value || seenNonZero) {
    497                 *(s++)=hexToStr[value>>4];
    498                 *(s++)=hexToStr[value&0xF];
    499                 seenNonZero = 1;
    500             }
    501         }
    502         if(hexType==HEX_0H) {
    503          *(s++)='h';
    504         }
    505     }
    506 
    507     *(s++)=0;
    508     T_FileStream_writeLine(out, bitFieldStr);
    509     return column;
    510 }
    511 
    512 static uint32_t
    513 write8(FileStream *out, uint8_t byte, uint32_t column) {
    514     char s[4];
    515     int i=0;
    516 
    517     /* convert the byte value to a string */
    518     if(byte>=100) {
    519         s[i++]=(char)('0'+byte/100);
    520         byte%=100;
    521     }
    522     if(i>0 || byte>=10) {
    523         s[i++]=(char)('0'+byte/10);
    524         byte%=10;
    525     }
    526     s[i++]=(char)('0'+byte);
    527     s[i]=0;
    528 
    529     /* write the value, possibly with comma and newline */
    530     if(column==MAX_COLUMN) {
    531         /* first byte */
    532         column=1;
    533     } else if(column<16) {
    534         T_FileStream_writeLine(out, ",");
    535         ++column;
    536     } else {
    537         T_FileStream_writeLine(out, ",\n");
    538         column=1;
    539     }
    540     T_FileStream_writeLine(out, s);
    541     return column;
    542 }
    543 
    544 #if U_PLATFORM == U_PF_OS400
    545 static uint32_t
    546 write8str(FileStream *out, uint8_t byte, uint32_t column) {
    547     char s[8];
    548 
    549     if (byte > 7)
    550         sprintf(s, "\\x%X", byte);
    551     else
    552         sprintf(s, "\\%X", byte);
    553 
    554     /* write the value, possibly with comma and newline */
    555     if(column==MAX_COLUMN) {
    556         /* first byte */
    557         column=1;
    558         T_FileStream_writeLine(out, "\"");
    559     } else if(column<24) {
    560         ++column;
    561     } else {
    562         T_FileStream_writeLine(out, "\"\n\"");
    563         column=1;
    564     }
    565     T_FileStream_writeLine(out, s);
    566     return column;
    567 }
    568 #endif
    569 
    570 static void
    571 getOutFilename(const char *inFilename, const char *destdir, char *outFilename, char *entryName, const char *newSuffix, const char *optFilename) {
    572     const char *basename=findBasename(inFilename), *suffix=uprv_strrchr(basename, '.');
    573 
    574     /* copy path */
    575     if(destdir!=NULL && *destdir!=0) {
    576         do {
    577             *outFilename++=*destdir++;
    578         } while(*destdir!=0);
    579         if(*(outFilename-1)!=U_FILE_SEP_CHAR) {
    580             *outFilename++=U_FILE_SEP_CHAR;
    581         }
    582         inFilename=basename;
    583     } else {
    584         while(inFilename<basename) {
    585             *outFilename++=*inFilename++;
    586         }
    587     }
    588 
    589     if(suffix==NULL) {
    590         /* the filename does not have a suffix */
    591         uprv_strcpy(entryName, inFilename);
    592         if(optFilename != NULL) {
    593           uprv_strcpy(outFilename, optFilename);
    594         } else {
    595           uprv_strcpy(outFilename, inFilename);
    596         }
    597         uprv_strcat(outFilename, newSuffix);
    598     } else {
    599         char *saveOutFilename = outFilename;
    600         /* copy basename */
    601         while(inFilename<suffix) {
    602             if(*inFilename=='-') {
    603                 /* iSeries cannot have '-' in the .o objects. */
    604                 *outFilename++=*entryName++='_';
    605                 inFilename++;
    606             }
    607             else {
    608                 *outFilename++=*entryName++=*inFilename++;
    609             }
    610         }
    611 
    612         /* replace '.' by '_' */
    613         *outFilename++=*entryName++='_';
    614         ++inFilename;
    615 
    616         /* copy suffix */
    617         while(*inFilename!=0) {
    618             *outFilename++=*entryName++=*inFilename++;
    619         }
    620 
    621         *entryName=0;
    622 
    623         if(optFilename != NULL) {
    624             uprv_strcpy(saveOutFilename, optFilename);
    625             uprv_strcat(saveOutFilename, newSuffix);
    626         } else {
    627             /* add ".c" */
    628             uprv_strcpy(outFilename, newSuffix);
    629         }
    630     }
    631 }
    632 
    633 #ifdef CAN_GENERATE_OBJECTS
    634 static void
    635 getArchitecture(uint16_t *pCPU, uint16_t *pBits, UBool *pIsBigEndian, const char *optMatchArch) {
    636     union {
    637         char        bytes[2048];
    638 #ifdef U_ELF
    639         Elf32_Ehdr  header32;
    640         /* Elf32_Ehdr and ELF64_Ehdr are identical for the necessary fields. */
    641 #elif U_PLATFORM_HAS_WIN32_API
    642         IMAGE_FILE_HEADER header;
    643 #endif
    644     } buffer;
    645 
    646     const char *filename;
    647     FileStream *in;
    648     int32_t length;
    649 
    650 #ifdef U_ELF
    651 
    652 #elif U_PLATFORM_HAS_WIN32_API
    653     const IMAGE_FILE_HEADER *pHeader;
    654 #else
    655 #   error "Unknown platform for CAN_GENERATE_OBJECTS."
    656 #endif
    657 
    658     if(optMatchArch != NULL) {
    659         filename=optMatchArch;
    660     } else {
    661         /* set defaults */
    662 #ifdef U_ELF
    663         /* set EM_386 because elf.h does not provide better defaults */
    664         *pCPU=EM_386;
    665         *pBits=32;
    666         *pIsBigEndian=(UBool)(U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB);
    667 #elif U_PLATFORM_HAS_WIN32_API
    668 /* _M_IA64 should be defined in windows.h */
    669 #   if defined(_M_IA64)
    670         *pCPU=IMAGE_FILE_MACHINE_IA64;
    671 #   elif defined(_M_AMD64)
    672         *pCPU=IMAGE_FILE_MACHINE_AMD64;
    673 #   else
    674         *pCPU=IMAGE_FILE_MACHINE_I386;
    675 #   endif
    676         *pBits= *pCPU==IMAGE_FILE_MACHINE_I386 ? 32 : 64;
    677         *pIsBigEndian=FALSE;
    678 #else
    679 #   error "Unknown platform for CAN_GENERATE_OBJECTS."
    680 #endif
    681         return;
    682     }
    683 
    684     in=T_FileStream_open(filename, "rb");
    685     if(in==NULL) {
    686         fprintf(stderr, "genccode: unable to open match-arch file %s\n", filename);
    687         exit(U_FILE_ACCESS_ERROR);
    688     }
    689     length=T_FileStream_read(in, buffer.bytes, sizeof(buffer.bytes));
    690 
    691 #ifdef U_ELF
    692     if(length<sizeof(Elf32_Ehdr)) {
    693         fprintf(stderr, "genccode: match-arch file %s is too short\n", filename);
    694         exit(U_UNSUPPORTED_ERROR);
    695     }
    696     if(
    697         buffer.header32.e_ident[0]!=ELFMAG0 ||
    698         buffer.header32.e_ident[1]!=ELFMAG1 ||
    699         buffer.header32.e_ident[2]!=ELFMAG2 ||
    700         buffer.header32.e_ident[3]!=ELFMAG3 ||
    701         buffer.header32.e_ident[EI_CLASS]<ELFCLASS32 || buffer.header32.e_ident[EI_CLASS]>ELFCLASS64
    702     ) {
    703         fprintf(stderr, "genccode: match-arch file %s is not an ELF object file, or not supported\n", filename);
    704         exit(U_UNSUPPORTED_ERROR);
    705     }
    706 
    707     *pBits= buffer.header32.e_ident[EI_CLASS]==ELFCLASS32 ? 32 : 64; /* only 32 or 64: see check above */
    708 #ifdef U_ELF64
    709     if(*pBits!=32 && *pBits!=64) {
    710         fprintf(stderr, "genccode: currently only supports 32-bit and 64-bit ELF format\n");
    711         exit(U_UNSUPPORTED_ERROR);
    712     }
    713 #else
    714     if(*pBits!=32) {
    715         fprintf(stderr, "genccode: built with elf.h missing 64-bit definitions\n");
    716         exit(U_UNSUPPORTED_ERROR);
    717     }
    718 #endif
    719 
    720     *pIsBigEndian=(UBool)(buffer.header32.e_ident[EI_DATA]==ELFDATA2MSB);
    721     if(*pIsBigEndian!=U_IS_BIG_ENDIAN) {
    722         fprintf(stderr, "genccode: currently only same-endianness ELF formats are supported\n");
    723         exit(U_UNSUPPORTED_ERROR);
    724     }
    725     /* TODO: Support byte swapping */
    726 
    727     *pCPU=buffer.header32.e_machine;
    728 #elif U_PLATFORM_HAS_WIN32_API
    729     if(length<sizeof(IMAGE_FILE_HEADER)) {
    730         fprintf(stderr, "genccode: match-arch file %s is too short\n", filename);
    731         exit(U_UNSUPPORTED_ERROR);
    732     }
    733     /* TODO: Use buffer.header.  Keep aliasing legal.  */
    734     pHeader=(const IMAGE_FILE_HEADER *)buffer.bytes;
    735     *pCPU=pHeader->Machine;
    736     /*
    737      * The number of bits is implicit with the Machine value.
    738      * *pBits is ignored in the calling code, so this need not be precise.
    739      */
    740     *pBits= *pCPU==IMAGE_FILE_MACHINE_I386 ? 32 : 64;
    741     /* Windows always runs on little-endian CPUs. */
    742     *pIsBigEndian=FALSE;
    743 #else
    744 #   error "Unknown platform for CAN_GENERATE_OBJECTS."
    745 #endif
    746 
    747     T_FileStream_close(in);
    748 }
    749 
    750 U_CAPI void U_EXPORT2
    751 writeObjectCode(const char *filename, const char *destdir, const char *optEntryPoint, const char *optMatchArch, const char *optFilename, char *outFilePath) {
    752     /* common variables */
    753     char buffer[4096], entry[40]={ 0 };
    754     FileStream *in, *out;
    755     const char *newSuffix;
    756     int32_t i, entryLength, length, size, entryOffset=0, entryLengthOffset=0;
    757 
    758     uint16_t cpu, bits;
    759     UBool makeBigEndian;
    760 
    761     /* platform-specific variables and initialization code */
    762 #ifdef U_ELF
    763     /* 32-bit Elf file header */
    764     static Elf32_Ehdr header32={
    765         {
    766             /* e_ident[] */
    767             ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3,
    768             ELFCLASS32,
    769             U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB,
    770             EV_CURRENT /* EI_VERSION */
    771         },
    772         ET_REL,
    773         EM_386,
    774         EV_CURRENT, /* e_version */
    775         0, /* e_entry */
    776         0, /* e_phoff */
    777         (Elf32_Off)sizeof(Elf32_Ehdr), /* e_shoff */
    778         0, /* e_flags */
    779         (Elf32_Half)sizeof(Elf32_Ehdr), /* eh_size */
    780         0, /* e_phentsize */
    781         0, /* e_phnum */
    782         (Elf32_Half)sizeof(Elf32_Shdr), /* e_shentsize */
    783         5, /* e_shnum */
    784         2 /* e_shstrndx */
    785     };
    786 
    787     /* 32-bit Elf section header table */
    788     static Elf32_Shdr sectionHeaders32[5]={
    789         { /* SHN_UNDEF */
    790             0
    791         },
    792         { /* .symtab */
    793             1, /* sh_name */
    794             SHT_SYMTAB,
    795             0, /* sh_flags */
    796             0, /* sh_addr */
    797             (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)), /* sh_offset */
    798             (Elf32_Word)(2*sizeof(Elf32_Sym)), /* sh_size */
    799             3, /* sh_link=sect hdr index of .strtab */
    800             1, /* sh_info=One greater than the symbol table index of the last
    801                 * local symbol (with STB_LOCAL). */
    802             4, /* sh_addralign */
    803             (Elf32_Word)(sizeof(Elf32_Sym)) /* sh_entsize */
    804         },
    805         { /* .shstrtab */
    806             9, /* sh_name */
    807             SHT_STRTAB,
    808             0, /* sh_flags */
    809             0, /* sh_addr */
    810             (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)), /* sh_offset */
    811             40, /* sh_size */
    812             0, /* sh_link */
    813             0, /* sh_info */
    814             1, /* sh_addralign */
    815             0 /* sh_entsize */
    816         },
    817         { /* .strtab */
    818             19, /* sh_name */
    819             SHT_STRTAB,
    820             0, /* sh_flags */
    821             0, /* sh_addr */
    822             (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)+40), /* sh_offset */
    823             (Elf32_Word)sizeof(entry), /* sh_size */
    824             0, /* sh_link */
    825             0, /* sh_info */
    826             1, /* sh_addralign */
    827             0 /* sh_entsize */
    828         },
    829         { /* .rodata */
    830             27, /* sh_name */
    831             SHT_PROGBITS,
    832             SHF_ALLOC, /* sh_flags */
    833             0, /* sh_addr */
    834             (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)+40+sizeof(entry)), /* sh_offset */
    835             0, /* sh_size */
    836             0, /* sh_link */
    837             0, /* sh_info */
    838             16, /* sh_addralign */
    839             0 /* sh_entsize */
    840         }
    841     };
    842 
    843     /* symbol table */
    844     static Elf32_Sym symbols32[2]={
    845         { /* STN_UNDEF */
    846             0
    847         },
    848         { /* data entry point */
    849             1, /* st_name */
    850             0, /* st_value */
    851             0, /* st_size */
    852             ELF64_ST_INFO(STB_GLOBAL, STT_OBJECT),
    853             0, /* st_other */
    854             4 /* st_shndx=index of related section table entry */
    855         }
    856     };
    857 
    858     /* section header string table, with decimal string offsets */
    859     static const char sectionStrings[40]=
    860         /*  0 */ "\0"
    861         /*  1 */ ".symtab\0"
    862         /*  9 */ ".shstrtab\0"
    863         /* 19 */ ".strtab\0"
    864         /* 27 */ ".rodata\0"
    865         /* 35 */ "\0\0\0\0"; /* contains terminating NUL */
    866         /* 40: padded to multiple of 8 bytes */
    867 
    868     /*
    869      * Use entry[] for the string table which will contain only the
    870      * entry point name.
    871      * entry[0] must be 0 (NUL)
    872      * The entry point name can be up to 38 characters long (sizeof(entry)-2).
    873      */
    874 
    875     /* 16-align .rodata in the .o file, just in case */
    876     static const char padding[16]={ 0 };
    877     int32_t paddingSize;
    878 
    879 #ifdef U_ELF64
    880     /* 64-bit Elf file header */
    881     static Elf64_Ehdr header64={
    882         {
    883             /* e_ident[] */
    884             ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3,
    885             ELFCLASS64,
    886             U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB,
    887             EV_CURRENT /* EI_VERSION */
    888         },
    889         ET_REL,
    890         EM_X86_64,
    891         EV_CURRENT, /* e_version */
    892         0, /* e_entry */
    893         0, /* e_phoff */
    894         (Elf64_Off)sizeof(Elf64_Ehdr), /* e_shoff */
    895         0, /* e_flags */
    896         (Elf64_Half)sizeof(Elf64_Ehdr), /* eh_size */
    897         0, /* e_phentsize */
    898         0, /* e_phnum */
    899         (Elf64_Half)sizeof(Elf64_Shdr), /* e_shentsize */
    900         5, /* e_shnum */
    901         2 /* e_shstrndx */
    902     };
    903 
    904     /* 64-bit Elf section header table */
    905     static Elf64_Shdr sectionHeaders64[5]={
    906         { /* SHN_UNDEF */
    907             0
    908         },
    909         { /* .symtab */
    910             1, /* sh_name */
    911             SHT_SYMTAB,
    912             0, /* sh_flags */
    913             0, /* sh_addr */
    914             (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)), /* sh_offset */
    915             (Elf64_Xword)(2*sizeof(Elf64_Sym)), /* sh_size */
    916             3, /* sh_link=sect hdr index of .strtab */
    917             1, /* sh_info=One greater than the symbol table index of the last
    918                 * local symbol (with STB_LOCAL). */
    919             4, /* sh_addralign */
    920             (Elf64_Xword)(sizeof(Elf64_Sym)) /* sh_entsize */
    921         },
    922         { /* .shstrtab */
    923             9, /* sh_name */
    924             SHT_STRTAB,
    925             0, /* sh_flags */
    926             0, /* sh_addr */
    927             (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)), /* sh_offset */
    928             40, /* sh_size */
    929             0, /* sh_link */
    930             0, /* sh_info */
    931             1, /* sh_addralign */
    932             0 /* sh_entsize */
    933         },
    934         { /* .strtab */
    935             19, /* sh_name */
    936             SHT_STRTAB,
    937             0, /* sh_flags */
    938             0, /* sh_addr */
    939             (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)+40), /* sh_offset */
    940             (Elf64_Xword)sizeof(entry), /* sh_size */
    941             0, /* sh_link */
    942             0, /* sh_info */
    943             1, /* sh_addralign */
    944             0 /* sh_entsize */
    945         },
    946         { /* .rodata */
    947             27, /* sh_name */
    948             SHT_PROGBITS,
    949             SHF_ALLOC, /* sh_flags */
    950             0, /* sh_addr */
    951             (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)+40+sizeof(entry)), /* sh_offset */
    952             0, /* sh_size */
    953             0, /* sh_link */
    954             0, /* sh_info */
    955             16, /* sh_addralign */
    956             0 /* sh_entsize */
    957         }
    958     };
    959 
    960     /*
    961      * 64-bit symbol table
    962      * careful: different order of items compared with Elf32_sym!
    963      */
    964     static Elf64_Sym symbols64[2]={
    965         { /* STN_UNDEF */
    966             0
    967         },
    968         { /* data entry point */
    969             1, /* st_name */
    970             ELF64_ST_INFO(STB_GLOBAL, STT_OBJECT),
    971             0, /* st_other */
    972             4, /* st_shndx=index of related section table entry */
    973             0, /* st_value */
    974             0 /* st_size */
    975         }
    976     };
    977 
    978 #endif /* U_ELF64 */
    979 
    980     /* entry[] have a leading NUL */
    981     entryOffset=1;
    982 
    983     /* in the common code, count entryLength from after the NUL */
    984     entryLengthOffset=1;
    985 
    986     newSuffix=".o";
    987 
    988 #elif U_PLATFORM_HAS_WIN32_API
    989     struct {
    990         IMAGE_FILE_HEADER fileHeader;
    991         IMAGE_SECTION_HEADER sections[2];
    992         char linkerOptions[100];
    993     } objHeader;
    994     IMAGE_SYMBOL symbols[1];
    995     struct {
    996         DWORD sizeofLongNames;
    997         char longNames[100];
    998     } symbolNames;
    999 
   1000     /*
   1001      * entry sometimes have a leading '_'
   1002      * overwritten if entryOffset==0 depending on the target platform
   1003      * see check for cpu below
   1004      */
   1005     entry[0]='_';
   1006 
   1007     newSuffix=".obj";
   1008 #else
   1009 #   error "Unknown platform for CAN_GENERATE_OBJECTS."
   1010 #endif
   1011 
   1012     /* deal with options, files and the entry point name */
   1013     getArchitecture(&cpu, &bits, &makeBigEndian, optMatchArch);
   1014     printf("genccode: --match-arch cpu=%hu bits=%hu big-endian=%d\n", cpu, bits, makeBigEndian);
   1015 #if U_PLATFORM_HAS_WIN32_API
   1016     if(cpu==IMAGE_FILE_MACHINE_I386) {
   1017         entryOffset=1;
   1018     }
   1019 #endif
   1020 
   1021     in=T_FileStream_open(filename, "rb");
   1022     if(in==NULL) {
   1023         fprintf(stderr, "genccode: unable to open input file %s\n", filename);
   1024         exit(U_FILE_ACCESS_ERROR);
   1025     }
   1026     size=T_FileStream_size(in);
   1027 
   1028     getOutFilename(filename, destdir, buffer, entry+entryOffset, newSuffix, optFilename);
   1029     if (outFilePath != NULL) {
   1030         uprv_strcpy(outFilePath, buffer);
   1031     }
   1032 
   1033     if(optEntryPoint != NULL) {
   1034         uprv_strcpy(entry+entryOffset, optEntryPoint);
   1035         uprv_strcat(entry+entryOffset, "_dat");
   1036     }
   1037     /* turn dashes in the entry name into underscores */
   1038     entryLength=(int32_t)uprv_strlen(entry+entryLengthOffset);
   1039     for(i=0; i<entryLength; ++i) {
   1040         if(entry[entryLengthOffset+i]=='-') {
   1041             entry[entryLengthOffset+i]='_';
   1042         }
   1043     }
   1044 
   1045     /* open the output file */
   1046     out=T_FileStream_open(buffer, "wb");
   1047     if(out==NULL) {
   1048         fprintf(stderr, "genccode: unable to open output file %s\n", buffer);
   1049         exit(U_FILE_ACCESS_ERROR);
   1050     }
   1051 
   1052 #ifdef U_ELF
   1053     if(bits==32) {
   1054         header32.e_ident[EI_DATA]= makeBigEndian ? ELFDATA2MSB : ELFDATA2LSB;
   1055         header32.e_machine=cpu;
   1056 
   1057         /* 16-align .rodata in the .o file, just in case */
   1058         paddingSize=sectionHeaders32[4].sh_offset & 0xf;
   1059         if(paddingSize!=0) {
   1060                 paddingSize=0x10-paddingSize;
   1061                 sectionHeaders32[4].sh_offset+=paddingSize;
   1062         }
   1063 
   1064         sectionHeaders32[4].sh_size=(Elf32_Word)size;
   1065 
   1066         symbols32[1].st_size=(Elf32_Word)size;
   1067 
   1068         /* write .o headers */
   1069         T_FileStream_write(out, &header32, (int32_t)sizeof(header32));
   1070         T_FileStream_write(out, sectionHeaders32, (int32_t)sizeof(sectionHeaders32));
   1071         T_FileStream_write(out, symbols32, (int32_t)sizeof(symbols32));
   1072     } else /* bits==64 */ {
   1073 #ifdef U_ELF64
   1074         header64.e_ident[EI_DATA]= makeBigEndian ? ELFDATA2MSB : ELFDATA2LSB;
   1075         header64.e_machine=cpu;
   1076 
   1077         /* 16-align .rodata in the .o file, just in case */
   1078         paddingSize=sectionHeaders64[4].sh_offset & 0xf;
   1079         if(paddingSize!=0) {
   1080                 paddingSize=0x10-paddingSize;
   1081                 sectionHeaders64[4].sh_offset+=paddingSize;
   1082         }
   1083 
   1084         sectionHeaders64[4].sh_size=(Elf64_Xword)size;
   1085 
   1086         symbols64[1].st_size=(Elf64_Xword)size;
   1087 
   1088         /* write .o headers */
   1089         T_FileStream_write(out, &header64, (int32_t)sizeof(header64));
   1090         T_FileStream_write(out, sectionHeaders64, (int32_t)sizeof(sectionHeaders64));
   1091         T_FileStream_write(out, symbols64, (int32_t)sizeof(symbols64));
   1092 #endif
   1093     }
   1094 
   1095     T_FileStream_write(out, sectionStrings, (int32_t)sizeof(sectionStrings));
   1096     T_FileStream_write(out, entry, (int32_t)sizeof(entry));
   1097     if(paddingSize!=0) {
   1098         T_FileStream_write(out, padding, paddingSize);
   1099     }
   1100 #elif U_PLATFORM_HAS_WIN32_API
   1101     /* populate the .obj headers */
   1102     uprv_memset(&objHeader, 0, sizeof(objHeader));
   1103     uprv_memset(&symbols, 0, sizeof(symbols));
   1104     uprv_memset(&symbolNames, 0, sizeof(symbolNames));
   1105 
   1106     /* write the linker export directive */
   1107     uprv_strcpy(objHeader.linkerOptions, "-export:");
   1108     length=8;
   1109     uprv_strcpy(objHeader.linkerOptions+length, entry);
   1110     length+=entryLength;
   1111     uprv_strcpy(objHeader.linkerOptions+length, ",data ");
   1112     length+=6;
   1113 
   1114     /* set the file header */
   1115     objHeader.fileHeader.Machine=cpu;
   1116     objHeader.fileHeader.NumberOfSections=2;
   1117     objHeader.fileHeader.TimeDateStamp=(DWORD)time(NULL);
   1118     objHeader.fileHeader.PointerToSymbolTable=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER+length+size; /* start of symbol table */
   1119     objHeader.fileHeader.NumberOfSymbols=1;
   1120 
   1121     /* set the section for the linker options */
   1122     uprv_strncpy((char *)objHeader.sections[0].Name, ".drectve", 8);
   1123     objHeader.sections[0].SizeOfRawData=length;
   1124     objHeader.sections[0].PointerToRawData=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER;
   1125     objHeader.sections[0].Characteristics=IMAGE_SCN_LNK_INFO|IMAGE_SCN_LNK_REMOVE|IMAGE_SCN_ALIGN_1BYTES;
   1126 
   1127     /* set the data section */
   1128     uprv_strncpy((char *)objHeader.sections[1].Name, ".rdata", 6);
   1129     objHeader.sections[1].SizeOfRawData=size;
   1130     objHeader.sections[1].PointerToRawData=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER+length;
   1131     objHeader.sections[1].Characteristics=IMAGE_SCN_CNT_INITIALIZED_DATA|IMAGE_SCN_ALIGN_16BYTES|IMAGE_SCN_MEM_READ;
   1132 
   1133     /* set the symbol table */
   1134     if(entryLength<=8) {
   1135         uprv_strncpy((char *)symbols[0].N.ShortName, entry, entryLength);
   1136         symbolNames.sizeofLongNames=4;
   1137     } else {
   1138         symbols[0].N.Name.Short=0;
   1139         symbols[0].N.Name.Long=4;
   1140         symbolNames.sizeofLongNames=4+entryLength+1;
   1141         uprv_strcpy(symbolNames.longNames, entry);
   1142     }
   1143     symbols[0].SectionNumber=2;
   1144     symbols[0].StorageClass=IMAGE_SYM_CLASS_EXTERNAL;
   1145 
   1146     /* write the file header and the linker options section */
   1147     T_FileStream_write(out, &objHeader, objHeader.sections[1].PointerToRawData);
   1148 #else
   1149 #   error "Unknown platform for CAN_GENERATE_OBJECTS."
   1150 #endif
   1151 
   1152     /* copy the data file into section 2 */
   1153     for(;;) {
   1154         length=T_FileStream_read(in, buffer, sizeof(buffer));
   1155         if(length==0) {
   1156             break;
   1157         }
   1158         T_FileStream_write(out, buffer, (int32_t)length);
   1159     }
   1160 
   1161 #if U_PLATFORM_HAS_WIN32_API
   1162     /* write the symbol table */
   1163     T_FileStream_write(out, symbols, IMAGE_SIZEOF_SYMBOL);
   1164     T_FileStream_write(out, &symbolNames, symbolNames.sizeofLongNames);
   1165 #endif
   1166 
   1167     if(T_FileStream_error(in)) {
   1168         fprintf(stderr, "genccode: file read error while generating from file %s\n", filename);
   1169         exit(U_FILE_ACCESS_ERROR);
   1170     }
   1171 
   1172     if(T_FileStream_error(out)) {
   1173         fprintf(stderr, "genccode: file write error while generating from file %s\n", filename);
   1174         exit(U_FILE_ACCESS_ERROR);
   1175     }
   1176 
   1177     T_FileStream_close(out);
   1178     T_FileStream_close(in);
   1179 }
   1180 #endif
   1181