Home | History | Annotate | Download | only in toolutil
      1 /******************************************************************************
      2  *   Copyright (C) 2009-2010, International Business Machines
      3  *   Corporation and others.  All Rights Reserved.
      4  *******************************************************************************
      5  */
      6 #include "unicode/utypes.h"
      7 
      8 #ifdef U_WINDOWS
      9 #   define VC_EXTRALEAN
     10 #   define WIN32_LEAN_AND_MEAN
     11 #   define NOUSER
     12 #   define NOSERVICE
     13 #   define NOIME
     14 #   define NOMCX
     15 #include <windows.h>
     16 #include <time.h>
     17 #   ifdef __GNUC__
     18 #       define WINDOWS_WITH_GNUC
     19 #   endif
     20 #endif
     21 
     22 #ifdef U_LINUX
     23 #   define U_ELF
     24 #endif
     25 
     26 #ifdef U_ELF
     27 #   include <elf.h>
     28 #   if defined(ELFCLASS64)
     29 #       define U_ELF64
     30 #   endif
     31     /* Old elf.h headers may not have EM_X86_64, or have EM_X8664 instead. */
     32 #   ifndef EM_X86_64
     33 #       define EM_X86_64 62
     34 #   endif
     35 #   define ICU_ENTRY_OFFSET 0
     36 #endif
     37 
     38 #include <stdio.h>
     39 #include <stdlib.h>
     40 #include "unicode/putil.h"
     41 #include "cmemory.h"
     42 #include "cstring.h"
     43 #include "filestrm.h"
     44 #include "toolutil.h"
     45 #include "unicode/uclean.h"
     46 #include "uoptions.h"
     47 #include "pkg_genc.h"
     48 
     49 #define MAX_COLUMN ((uint32_t)(0xFFFFFFFFU))
     50 
     51 #define HEX_0X 0 /*  0x1234 */
     52 #define HEX_0H 1 /*  01234h */
     53 
     54 #if defined(U_WINDOWS) || defined(U_ELF)
     55 #define CAN_GENERATE_OBJECTS
     56 #endif
     57 
     58 /* prototypes --------------------------------------------------------------- */
     59 static void
     60 getOutFilename(const char *inFilename, const char *destdir, char *outFilename, char *entryName, const char *newSuffix, const char *optFilename);
     61 
     62 static uint32_t
     63 write8(FileStream *out, uint8_t byte, uint32_t column);
     64 
     65 static uint32_t
     66 write32(FileStream *out, uint32_t byte, uint32_t column);
     67 
     68 #ifdef OS400
     69 static uint32_t
     70 write8str(FileStream *out, uint8_t byte, uint32_t column);
     71 #endif
     72 /* -------------------------------------------------------------------------- */
     73 
     74 /*
     75 Creating Template Files for New Platforms
     76 
     77 Let the cc compiler help you get started.
     78 Compile this program
     79     const unsigned int x[5] = {1, 2, 0xdeadbeef, 0xffffffff, 16};
     80 with the -S option to produce assembly output.
     81 
     82 For example, this will generate array.s:
     83 gcc -S array.c
     84 
     85 This will produce a .s file that may look like this:
     86 
     87     .file   "array.c"
     88     .version        "01.01"
     89 gcc2_compiled.:
     90     .globl x
     91     .section        .rodata
     92     .align 4
     93     .type    x,@object
     94     .size    x,20
     95 x:
     96     .long   1
     97     .long   2
     98     .long   -559038737
     99     .long   -1
    100     .long   16
    101     .ident  "GCC: (GNU) 2.96 20000731 (Red Hat Linux 7.1 2.96-85)"
    102 
    103 which gives a starting point that will compile, and can be transformed
    104 to become the template, generally with some consulting of as docs and
    105 some experimentation.
    106 
    107 If you want ICU to automatically use this assembly, you should
    108 specify "GENCCODE_ASSEMBLY=-a name" in the specific config/mh-* file,
    109 where the name is the compiler or platform that you used in this
    110 assemblyHeader data structure.
    111 */
    112 static const struct AssemblyType {
    113     const char *name;
    114     const char *header;
    115     const char *beginLine;
    116     const char *footer;
    117     int8_t      hexType; /* HEX_0X or HEX_0h */
    118 } assemblyHeader[] = {
    119     {"gcc",
    120         ".globl %s\n"
    121         "\t.section .note.GNU-stack,\"\",%%progbits\n"
    122         "\t.section .rodata\n"
    123         "\t.align 8\n" /* Either align 8 bytes or 2^8 (256) bytes. 8 bytes is needed. */
    124         "\t.type %s,%%object\n"
    125         "%s:\n\n",
    126 
    127         ".long ","",HEX_0X
    128     },
    129     {"gcc-darwin",
    130         /*"\t.section __TEXT,__text,regular,pure_instructions\n"
    131         "\t.section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32\n"*/
    132         ".globl _%s\n"
    133         "\t.data\n"
    134         "\t.const\n"
    135         "\t.align 4\n"  /* 1<<4 = 16 */
    136         "_%s:\n\n",
    137 
    138         ".long ","",HEX_0X
    139     },
    140     {"gcc-cygwin",
    141         ".globl _%s\n"
    142         "\t.section .rodata\n"
    143         "\t.align 8\n" /* Either align 8 bytes or 2^8 (256) bytes. 8 bytes is needed. */
    144         "_%s:\n\n",
    145 
    146         ".long ","",HEX_0X
    147     },
    148     {"sun",
    149         "\t.section \".rodata\"\n"
    150         "\t.align   8\n"
    151         ".globl     %s\n"
    152         "%s:\n",
    153 
    154         ".word ","",HEX_0X
    155     },
    156     {"sun-x86",
    157         "Drodata.rodata:\n"
    158         "\t.type   Drodata.rodata,@object\n"
    159         "\t.size   Drodata.rodata,0\n"
    160         "\t.globl  %s\n"
    161         "\t.align  8\n"
    162         "%s:\n",
    163 
    164         ".4byte ","",HEX_0X
    165     },
    166     {"xlc",
    167         ".globl %s{RO}\n"
    168         "\t.toc\n"
    169         "%s:\n"
    170         "\t.csect %s{RO}, 4\n",
    171 
    172         ".long ","",HEX_0X
    173     },
    174     {"aCC-ia64",
    175         "\t.file   \"%s.s\"\n"
    176         "\t.type   %s,@object\n"
    177         "\t.global %s\n"
    178         "\t.secalias .abe$0.rodata, \".rodata\"\n"
    179         "\t.section .abe$0.rodata = \"a\", \"progbits\"\n"
    180         "\t.align  16\n"
    181         "%s::\t",
    182 
    183         "data4 ","",HEX_0X
    184     },
    185     {"aCC-parisc",
    186         "\t.SPACE  $TEXT$\n"
    187         "\t.SUBSPA $LIT$\n"
    188         "%s\n"
    189         "\t.EXPORT %s\n"
    190         "\t.ALIGN  16\n",
    191 
    192         ".WORD ","",HEX_0X
    193     },
    194     { "masm",
    195       "\tTITLE %s\n"
    196       "; generated by genccode\n"
    197       ".386\n"
    198       ".model flat\n"
    199       "\tPUBLIC _%s\n"
    200       "ICUDATA_%s\tSEGMENT READONLY PARA PUBLIC FLAT 'DATA'\n"
    201       "\tALIGN 16\n"
    202       "_%s\tLABEL DWORD\n",
    203       "\tDWORD ","\nICUDATA_%s\tENDS\n\tEND\n",HEX_0H
    204     }
    205 };
    206 
    207 static int32_t assemblyHeaderIndex = -1;
    208 static int32_t hexType = HEX_0X;
    209 
    210 U_CAPI UBool U_EXPORT2
    211 checkAssemblyHeaderName(const char* optAssembly) {
    212     int32_t idx;
    213     assemblyHeaderIndex = -1;
    214     for (idx = 0; idx < (int32_t)(sizeof(assemblyHeader)/sizeof(assemblyHeader[0])); idx++) {
    215         if (uprv_strcmp(optAssembly, assemblyHeader[idx].name) == 0) {
    216             assemblyHeaderIndex = idx;
    217             hexType = assemblyHeader[idx].hexType; /* set the hex type */
    218             return TRUE;
    219         }
    220     }
    221 
    222     return FALSE;
    223 }
    224 
    225 
    226 U_CAPI void U_EXPORT2
    227 printAssemblyHeadersToStdErr(void) {
    228     int32_t idx;
    229     fprintf(stderr, "%s", assemblyHeader[0].name);
    230     for (idx = 1; idx < (int32_t)(sizeof(assemblyHeader)/sizeof(assemblyHeader[0])); idx++) {
    231         fprintf(stderr, ", %s", assemblyHeader[idx].name);
    232     }
    233     fprintf(stderr,
    234         ")\n");
    235 }
    236 
    237 U_CAPI void U_EXPORT2
    238 writeAssemblyCode(const char *filename, const char *destdir, const char *optEntryPoint, const char *optFilename, char *outFilePath) {
    239     uint32_t column = MAX_COLUMN;
    240     char entry[64];
    241     uint32_t buffer[1024];
    242     char *bufferStr = (char *)buffer;
    243     FileStream *in, *out;
    244     size_t i, length;
    245 
    246     in=T_FileStream_open(filename, "rb");
    247     if(in==NULL) {
    248         fprintf(stderr, "genccode: unable to open input file %s\n", filename);
    249         exit(U_FILE_ACCESS_ERROR);
    250     }
    251 
    252     getOutFilename(filename, destdir, bufferStr, entry, ".s", optFilename);
    253     out=T_FileStream_open(bufferStr, "w");
    254     if(out==NULL) {
    255         fprintf(stderr, "genccode: unable to open output file %s\n", bufferStr);
    256         exit(U_FILE_ACCESS_ERROR);
    257     }
    258 
    259     if (outFilePath != NULL) {
    260         uprv_strcpy(outFilePath, bufferStr);
    261     }
    262 
    263 #ifdef WINDOWS_WITH_GNUC
    264     /* Need to fix the file seperator character when using MinGW. */
    265     swapFileSepChar(outFilePath, U_FILE_SEP_CHAR, '/');
    266 #endif
    267 
    268     if(optEntryPoint != NULL) {
    269         uprv_strcpy(entry, optEntryPoint);
    270         uprv_strcat(entry, "_dat");
    271     }
    272 
    273     /* turn dashes or dots in the entry name into underscores */
    274     length=uprv_strlen(entry);
    275     for(i=0; i<length; ++i) {
    276         if(entry[i]=='-' || entry[i]=='.') {
    277             entry[i]='_';
    278         }
    279     }
    280 
    281     sprintf(bufferStr, assemblyHeader[assemblyHeaderIndex].header,
    282         entry, entry, entry, entry,
    283         entry, entry, entry, entry);
    284     T_FileStream_writeLine(out, bufferStr);
    285     T_FileStream_writeLine(out, assemblyHeader[assemblyHeaderIndex].beginLine);
    286 
    287     for(;;) {
    288         length=T_FileStream_read(in, buffer, sizeof(buffer));
    289         if(length==0) {
    290             break;
    291         }
    292         if (length != sizeof(buffer)) {
    293             /* pad with extra 0's when at the end of the file */
    294             for(i=0; i < (length % sizeof(uint32_t)); ++i) {
    295                 buffer[length+i] = 0;
    296             }
    297         }
    298         for(i=0; i<(length/sizeof(buffer[0])); i++) {
    299             column = write32(out, buffer[i], column);
    300         }
    301     }
    302 
    303     T_FileStream_writeLine(out, "\n");
    304 
    305     sprintf(bufferStr, assemblyHeader[assemblyHeaderIndex].footer,
    306         entry, entry, entry, entry,
    307         entry, entry, entry, entry);
    308     T_FileStream_writeLine(out, bufferStr);
    309 
    310     if(T_FileStream_error(in)) {
    311         fprintf(stderr, "genccode: file read error while generating from file %s\n", filename);
    312         exit(U_FILE_ACCESS_ERROR);
    313     }
    314 
    315     if(T_FileStream_error(out)) {
    316         fprintf(stderr, "genccode: file write error while generating from file %s\n", filename);
    317         exit(U_FILE_ACCESS_ERROR);
    318     }
    319 
    320     T_FileStream_close(out);
    321     T_FileStream_close(in);
    322 }
    323 
    324 U_CAPI void U_EXPORT2
    325 writeCCode(const char *filename, const char *destdir, const char *optName, const char *optFilename, char *outFilePath) {
    326     uint32_t column = MAX_COLUMN;
    327     char buffer[4096], entry[64];
    328     FileStream *in, *out;
    329     size_t i, length;
    330 
    331     in=T_FileStream_open(filename, "rb");
    332     if(in==NULL) {
    333         fprintf(stderr, "genccode: unable to open input file %s\n", filename);
    334         exit(U_FILE_ACCESS_ERROR);
    335     }
    336 
    337     if(optName != NULL) { /* prepend  'icudt28_' */
    338       strcpy(entry, optName);
    339       strcat(entry, "_");
    340     } else {
    341       entry[0] = 0;
    342     }
    343 
    344     getOutFilename(filename, destdir, buffer, entry+uprv_strlen(entry), ".c", optFilename);
    345     if (outFilePath != NULL) {
    346         uprv_strcpy(outFilePath, buffer);
    347     }
    348     out=T_FileStream_open(buffer, "w");
    349     if(out==NULL) {
    350         fprintf(stderr, "genccode: unable to open output file %s\n", buffer);
    351         exit(U_FILE_ACCESS_ERROR);
    352     }
    353 
    354     /* turn dashes or dots in the entry name into underscores */
    355     length=uprv_strlen(entry);
    356     for(i=0; i<length; ++i) {
    357         if(entry[i]=='-' || entry[i]=='.') {
    358             entry[i]='_';
    359         }
    360     }
    361 
    362 #ifdef OS400
    363     /*
    364     TODO: Fix this once the compiler implements this feature. Keep in sync with udatamem.c
    365 
    366     This is here because this platform can't currently put
    367     const data into the read-only pages of an object or
    368     shared library (service program). Only strings are allowed in read-only
    369     pages, so we use char * strings to store the data.
    370 
    371     In order to prevent the beginning of the data from ever matching the
    372     magic numbers we must still use the initial double.
    373     [grhoten 4/24/2003]
    374     */
    375     sprintf(buffer,
    376         "#define U_DISABLE_RENAMING 1\n"
    377         "#include \"unicode/umachine.h\"\n"
    378         "U_CDECL_BEGIN\n"
    379         "const struct {\n"
    380         "    double bogus;\n"
    381         "    const char *bytes; \n"
    382         "} %s={ 0.0, \n",
    383         entry);
    384     T_FileStream_writeLine(out, buffer);
    385 
    386     for(;;) {
    387         length=T_FileStream_read(in, buffer, sizeof(buffer));
    388         if(length==0) {
    389             break;
    390         }
    391         for(i=0; i<length; ++i) {
    392             column = write8str(out, (uint8_t)buffer[i], column);
    393         }
    394     }
    395 
    396     T_FileStream_writeLine(out, "\"\n};\nU_CDECL_END\n");
    397 #else
    398     /* Function renaming shouldn't be done in data */
    399     sprintf(buffer,
    400         "#define U_DISABLE_RENAMING 1\n"
    401         "#include \"unicode/umachine.h\"\n"
    402         "U_CDECL_BEGIN\n"
    403         "const struct {\n"
    404         "    double bogus;\n"
    405         "    uint8_t bytes[%ld]; \n"
    406         "} %s={ 0.0, {\n",
    407         (long)T_FileStream_size(in), entry);
    408     T_FileStream_writeLine(out, buffer);
    409 
    410     for(;;) {
    411         length=T_FileStream_read(in, buffer, sizeof(buffer));
    412         if(length==0) {
    413             break;
    414         }
    415         for(i=0; i<length; ++i) {
    416             column = write8(out, (uint8_t)buffer[i], column);
    417         }
    418     }
    419 
    420     T_FileStream_writeLine(out, "\n}\n};\nU_CDECL_END\n");
    421 #endif
    422 
    423     if(T_FileStream_error(in)) {
    424         fprintf(stderr, "genccode: file read error while generating from file %s\n", filename);
    425         exit(U_FILE_ACCESS_ERROR);
    426     }
    427 
    428     if(T_FileStream_error(out)) {
    429         fprintf(stderr, "genccode: file write error while generating from file %s\n", filename);
    430         exit(U_FILE_ACCESS_ERROR);
    431     }
    432 
    433     T_FileStream_close(out);
    434     T_FileStream_close(in);
    435 }
    436 
    437 static uint32_t
    438 write32(FileStream *out, uint32_t bitField, uint32_t column) {
    439     int32_t i;
    440     char bitFieldStr[64]; /* This is more bits than needed for a 32-bit number */
    441     char *s = bitFieldStr;
    442     uint8_t *ptrIdx = (uint8_t *)&bitField;
    443     static const char hexToStr[16] = {
    444         '0','1','2','3',
    445         '4','5','6','7',
    446         '8','9','A','B',
    447         'C','D','E','F'
    448     };
    449 
    450     /* write the value, possibly with comma and newline */
    451     if(column==MAX_COLUMN) {
    452         /* first byte */
    453         column=1;
    454     } else if(column<32) {
    455         *(s++)=',';
    456         ++column;
    457     } else {
    458         *(s++)='\n';
    459         uprv_strcpy(s, assemblyHeader[assemblyHeaderIndex].beginLine);
    460         s+=uprv_strlen(s);
    461         column=1;
    462     }
    463 
    464     if (bitField < 10) {
    465         /* It's a small number. Don't waste the space for 0x */
    466         *(s++)=hexToStr[bitField];
    467     }
    468     else {
    469         int seenNonZero = 0; /* This is used to remove leading zeros */
    470 
    471         if(hexType==HEX_0X) {
    472          *(s++)='0';
    473          *(s++)='x';
    474         } else if(hexType==HEX_0H) {
    475          *(s++)='0';
    476         }
    477 
    478         /* This creates a 32-bit field */
    479 #if U_IS_BIG_ENDIAN
    480         for (i = 0; i < sizeof(uint32_t); i++)
    481 #else
    482         for (i = sizeof(uint32_t)-1; i >= 0 ; i--)
    483 #endif
    484         {
    485             uint8_t value = ptrIdx[i];
    486             if (value || seenNonZero) {
    487                 *(s++)=hexToStr[value>>4];
    488                 *(s++)=hexToStr[value&0xF];
    489                 seenNonZero = 1;
    490             }
    491         }
    492         if(hexType==HEX_0H) {
    493          *(s++)='h';
    494         }
    495     }
    496 
    497     *(s++)=0;
    498     T_FileStream_writeLine(out, bitFieldStr);
    499     return column;
    500 }
    501 
    502 static uint32_t
    503 write8(FileStream *out, uint8_t byte, uint32_t column) {
    504     char s[4];
    505     int i=0;
    506 
    507     /* convert the byte value to a string */
    508     if(byte>=100) {
    509         s[i++]=(char)('0'+byte/100);
    510         byte%=100;
    511     }
    512     if(i>0 || byte>=10) {
    513         s[i++]=(char)('0'+byte/10);
    514         byte%=10;
    515     }
    516     s[i++]=(char)('0'+byte);
    517     s[i]=0;
    518 
    519     /* write the value, possibly with comma and newline */
    520     if(column==MAX_COLUMN) {
    521         /* first byte */
    522         column=1;
    523     } else if(column<16) {
    524         T_FileStream_writeLine(out, ",");
    525         ++column;
    526     } else {
    527         T_FileStream_writeLine(out, ",\n");
    528         column=1;
    529     }
    530     T_FileStream_writeLine(out, s);
    531     return column;
    532 }
    533 
    534 #ifdef OS400
    535 static uint32_t
    536 write8str(FileStream *out, uint8_t byte, uint32_t column) {
    537     char s[8];
    538 
    539     if (byte > 7)
    540         sprintf(s, "\\x%X", byte);
    541     else
    542         sprintf(s, "\\%X", byte);
    543 
    544     /* write the value, possibly with comma and newline */
    545     if(column==MAX_COLUMN) {
    546         /* first byte */
    547         column=1;
    548         T_FileStream_writeLine(out, "\"");
    549     } else if(column<24) {
    550         ++column;
    551     } else {
    552         T_FileStream_writeLine(out, "\"\n\"");
    553         column=1;
    554     }
    555     T_FileStream_writeLine(out, s);
    556     return column;
    557 }
    558 #endif
    559 
    560 static void
    561 getOutFilename(const char *inFilename, const char *destdir, char *outFilename, char *entryName, const char *newSuffix, const char *optFilename) {
    562     const char *basename=findBasename(inFilename), *suffix=uprv_strrchr(basename, '.');
    563 
    564     /* copy path */
    565     if(destdir!=NULL && *destdir!=0) {
    566         do {
    567             *outFilename++=*destdir++;
    568         } while(*destdir!=0);
    569         if(*(outFilename-1)!=U_FILE_SEP_CHAR) {
    570             *outFilename++=U_FILE_SEP_CHAR;
    571         }
    572         inFilename=basename;
    573     } else {
    574         while(inFilename<basename) {
    575             *outFilename++=*inFilename++;
    576         }
    577     }
    578 
    579     if(suffix==NULL) {
    580         /* the filename does not have a suffix */
    581         uprv_strcpy(entryName, inFilename);
    582         if(optFilename != NULL) {
    583           uprv_strcpy(outFilename, optFilename);
    584         } else {
    585           uprv_strcpy(outFilename, inFilename);
    586         }
    587         uprv_strcat(outFilename, newSuffix);
    588     } else {
    589         char *saveOutFilename = outFilename;
    590         /* copy basename */
    591         while(inFilename<suffix) {
    592             if(*inFilename=='-') {
    593                 /* iSeries cannot have '-' in the .o objects. */
    594                 *outFilename++=*entryName++='_';
    595                 inFilename++;
    596             }
    597             else {
    598                 *outFilename++=*entryName++=*inFilename++;
    599             }
    600         }
    601 
    602         /* replace '.' by '_' */
    603         *outFilename++=*entryName++='_';
    604         ++inFilename;
    605 
    606         /* copy suffix */
    607         while(*inFilename!=0) {
    608             *outFilename++=*entryName++=*inFilename++;
    609         }
    610 
    611         *entryName=0;
    612 
    613         if(optFilename != NULL) {
    614             uprv_strcpy(saveOutFilename, optFilename);
    615             uprv_strcat(saveOutFilename, newSuffix);
    616         } else {
    617             /* add ".c" */
    618             uprv_strcpy(outFilename, newSuffix);
    619         }
    620     }
    621 }
    622 
    623 #ifdef CAN_GENERATE_OBJECTS
    624 static void
    625 getArchitecture(uint16_t *pCPU, uint16_t *pBits, UBool *pIsBigEndian, const char *optMatchArch) {
    626     union {
    627         char        bytes[2048];
    628 #ifdef U_ELF
    629         Elf32_Ehdr  header32;
    630         /* Elf32_Ehdr and ELF64_Ehdr are identical for the necessary fields. */
    631 #elif defined(U_WINDOWS)
    632         IMAGE_FILE_HEADER header;
    633 #endif
    634     } buffer;
    635 
    636     const char *filename;
    637     FileStream *in;
    638     int32_t length;
    639 
    640 #ifdef U_ELF
    641 
    642 #elif defined(U_WINDOWS)
    643     const IMAGE_FILE_HEADER *pHeader;
    644 #else
    645 #   error "Unknown platform for CAN_GENERATE_OBJECTS."
    646 #endif
    647 
    648     if(optMatchArch != NULL) {
    649         filename=optMatchArch;
    650     } else {
    651         /* set defaults */
    652 #ifdef U_ELF
    653         /* set EM_386 because elf.h does not provide better defaults */
    654         *pCPU=EM_386;
    655         *pBits=32;
    656         *pIsBigEndian=(UBool)(U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB);
    657 #elif defined(U_WINDOWS)
    658 /* _M_IA64 should be defined in windows.h */
    659 #   if defined(_M_IA64)
    660         *pCPU=IMAGE_FILE_MACHINE_IA64;
    661 #   elif defined(_M_AMD64)
    662         *pCPU=IMAGE_FILE_MACHINE_AMD64;
    663 #   else
    664         *pCPU=IMAGE_FILE_MACHINE_I386;
    665 #   endif
    666         *pBits= *pCPU==IMAGE_FILE_MACHINE_I386 ? 32 : 64;
    667         *pIsBigEndian=FALSE;
    668 #else
    669 #   error "Unknown platform for CAN_GENERATE_OBJECTS."
    670 #endif
    671         return;
    672     }
    673 
    674     in=T_FileStream_open(filename, "rb");
    675     if(in==NULL) {
    676         fprintf(stderr, "genccode: unable to open match-arch file %s\n", filename);
    677         exit(U_FILE_ACCESS_ERROR);
    678     }
    679     length=T_FileStream_read(in, buffer.bytes, sizeof(buffer.bytes));
    680 
    681 #ifdef U_ELF
    682     if(length<sizeof(Elf32_Ehdr)) {
    683         fprintf(stderr, "genccode: match-arch file %s is too short\n", filename);
    684         exit(U_UNSUPPORTED_ERROR);
    685     }
    686     if(
    687         buffer.header32.e_ident[0]!=ELFMAG0 ||
    688         buffer.header32.e_ident[1]!=ELFMAG1 ||
    689         buffer.header32.e_ident[2]!=ELFMAG2 ||
    690         buffer.header32.e_ident[3]!=ELFMAG3 ||
    691         buffer.header32.e_ident[EI_CLASS]<ELFCLASS32 || buffer.header32.e_ident[EI_CLASS]>ELFCLASS64
    692     ) {
    693         fprintf(stderr, "genccode: match-arch file %s is not an ELF object file, or not supported\n", filename);
    694         exit(U_UNSUPPORTED_ERROR);
    695     }
    696 
    697     *pBits= buffer.header32.e_ident[EI_CLASS]==ELFCLASS32 ? 32 : 64; /* only 32 or 64: see check above */
    698 #ifdef U_ELF64
    699     if(*pBits!=32 && *pBits!=64) {
    700         fprintf(stderr, "genccode: currently only supports 32-bit and 64-bit ELF format\n");
    701         exit(U_UNSUPPORTED_ERROR);
    702     }
    703 #else
    704     if(*pBits!=32) {
    705         fprintf(stderr, "genccode: built with elf.h missing 64-bit definitions\n");
    706         exit(U_UNSUPPORTED_ERROR);
    707     }
    708 #endif
    709 
    710     *pIsBigEndian=(UBool)(buffer.header32.e_ident[EI_DATA]==ELFDATA2MSB);
    711     if(*pIsBigEndian!=U_IS_BIG_ENDIAN) {
    712         fprintf(stderr, "genccode: currently only same-endianness ELF formats are supported\n");
    713         exit(U_UNSUPPORTED_ERROR);
    714     }
    715     /* TODO: Support byte swapping */
    716 
    717     *pCPU=buffer.header32.e_machine;
    718 #elif defined(U_WINDOWS)
    719     if(length<sizeof(IMAGE_FILE_HEADER)) {
    720         fprintf(stderr, "genccode: match-arch file %s is too short\n", filename);
    721         exit(U_UNSUPPORTED_ERROR);
    722     }
    723     /* TODO: Use buffer.header.  Keep aliasing legal.  */
    724     pHeader=(const IMAGE_FILE_HEADER *)buffer.bytes;
    725     *pCPU=pHeader->Machine;
    726     /*
    727      * The number of bits is implicit with the Machine value.
    728      * *pBits is ignored in the calling code, so this need not be precise.
    729      */
    730     *pBits= *pCPU==IMAGE_FILE_MACHINE_I386 ? 32 : 64;
    731     /* Windows always runs on little-endian CPUs. */
    732     *pIsBigEndian=FALSE;
    733 #else
    734 #   error "Unknown platform for CAN_GENERATE_OBJECTS."
    735 #endif
    736 
    737     T_FileStream_close(in);
    738 }
    739 
    740 U_CAPI void U_EXPORT2
    741 writeObjectCode(const char *filename, const char *destdir, const char *optEntryPoint, const char *optMatchArch, const char *optFilename, char *outFilePath) {
    742     /* common variables */
    743     char buffer[4096], entry[40]={ 0 };
    744     FileStream *in, *out;
    745     const char *newSuffix;
    746     int32_t i, entryLength, length, size, entryOffset=0, entryLengthOffset=0;
    747 
    748     uint16_t cpu, bits;
    749     UBool makeBigEndian;
    750 
    751     /* platform-specific variables and initialization code */
    752 #ifdef U_ELF
    753     /* 32-bit Elf file header */
    754     static Elf32_Ehdr header32={
    755         {
    756             /* e_ident[] */
    757             ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3,
    758             ELFCLASS32,
    759             U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB,
    760             EV_CURRENT /* EI_VERSION */
    761         },
    762         ET_REL,
    763         EM_386,
    764         EV_CURRENT, /* e_version */
    765         0, /* e_entry */
    766         0, /* e_phoff */
    767         (Elf32_Off)sizeof(Elf32_Ehdr), /* e_shoff */
    768         0, /* e_flags */
    769         (Elf32_Half)sizeof(Elf32_Ehdr), /* eh_size */
    770         0, /* e_phentsize */
    771         0, /* e_phnum */
    772         (Elf32_Half)sizeof(Elf32_Shdr), /* e_shentsize */
    773         5, /* e_shnum */
    774         2 /* e_shstrndx */
    775     };
    776 
    777     /* 32-bit Elf section header table */
    778     static Elf32_Shdr sectionHeaders32[5]={
    779         { /* SHN_UNDEF */
    780             0
    781         },
    782         { /* .symtab */
    783             1, /* sh_name */
    784             SHT_SYMTAB,
    785             0, /* sh_flags */
    786             0, /* sh_addr */
    787             (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)), /* sh_offset */
    788             (Elf32_Word)(2*sizeof(Elf32_Sym)), /* sh_size */
    789             3, /* sh_link=sect hdr index of .strtab */
    790             1, /* sh_info=One greater than the symbol table index of the last
    791                 * local symbol (with STB_LOCAL). */
    792             4, /* sh_addralign */
    793             (Elf32_Word)(sizeof(Elf32_Sym)) /* sh_entsize */
    794         },
    795         { /* .shstrtab */
    796             9, /* sh_name */
    797             SHT_STRTAB,
    798             0, /* sh_flags */
    799             0, /* sh_addr */
    800             (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)), /* sh_offset */
    801             40, /* sh_size */
    802             0, /* sh_link */
    803             0, /* sh_info */
    804             1, /* sh_addralign */
    805             0 /* sh_entsize */
    806         },
    807         { /* .strtab */
    808             19, /* sh_name */
    809             SHT_STRTAB,
    810             0, /* sh_flags */
    811             0, /* sh_addr */
    812             (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)+40), /* sh_offset */
    813             (Elf32_Word)sizeof(entry), /* sh_size */
    814             0, /* sh_link */
    815             0, /* sh_info */
    816             1, /* sh_addralign */
    817             0 /* sh_entsize */
    818         },
    819         { /* .rodata */
    820             27, /* sh_name */
    821             SHT_PROGBITS,
    822             SHF_ALLOC, /* sh_flags */
    823             0, /* sh_addr */
    824             (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)+40+sizeof(entry)), /* sh_offset */
    825             0, /* sh_size */
    826             0, /* sh_link */
    827             0, /* sh_info */
    828             16, /* sh_addralign */
    829             0 /* sh_entsize */
    830         }
    831     };
    832 
    833     /* symbol table */
    834     static Elf32_Sym symbols32[2]={
    835         { /* STN_UNDEF */
    836             0
    837         },
    838         { /* data entry point */
    839             1, /* st_name */
    840             0, /* st_value */
    841             0, /* st_size */
    842             ELF64_ST_INFO(STB_GLOBAL, STT_OBJECT),
    843             0, /* st_other */
    844             4 /* st_shndx=index of related section table entry */
    845         }
    846     };
    847 
    848     /* section header string table, with decimal string offsets */
    849     static const char sectionStrings[40]=
    850         /*  0 */ "\0"
    851         /*  1 */ ".symtab\0"
    852         /*  9 */ ".shstrtab\0"
    853         /* 19 */ ".strtab\0"
    854         /* 27 */ ".rodata\0"
    855         /* 35 */ "\0\0\0\0"; /* contains terminating NUL */
    856         /* 40: padded to multiple of 8 bytes */
    857 
    858     /*
    859      * Use entry[] for the string table which will contain only the
    860      * entry point name.
    861      * entry[0] must be 0 (NUL)
    862      * The entry point name can be up to 38 characters long (sizeof(entry)-2).
    863      */
    864 
    865     /* 16-align .rodata in the .o file, just in case */
    866     static const char padding[16]={ 0 };
    867     int32_t paddingSize;
    868 
    869 #ifdef U_ELF64
    870     /* 64-bit Elf file header */
    871     static Elf64_Ehdr header64={
    872         {
    873             /* e_ident[] */
    874             ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3,
    875             ELFCLASS64,
    876             U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB,
    877             EV_CURRENT /* EI_VERSION */
    878         },
    879         ET_REL,
    880         EM_X86_64,
    881         EV_CURRENT, /* e_version */
    882         0, /* e_entry */
    883         0, /* e_phoff */
    884         (Elf64_Off)sizeof(Elf64_Ehdr), /* e_shoff */
    885         0, /* e_flags */
    886         (Elf64_Half)sizeof(Elf64_Ehdr), /* eh_size */
    887         0, /* e_phentsize */
    888         0, /* e_phnum */
    889         (Elf64_Half)sizeof(Elf64_Shdr), /* e_shentsize */
    890         5, /* e_shnum */
    891         2 /* e_shstrndx */
    892     };
    893 
    894     /* 64-bit Elf section header table */
    895     static Elf64_Shdr sectionHeaders64[5]={
    896         { /* SHN_UNDEF */
    897             0
    898         },
    899         { /* .symtab */
    900             1, /* sh_name */
    901             SHT_SYMTAB,
    902             0, /* sh_flags */
    903             0, /* sh_addr */
    904             (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)), /* sh_offset */
    905             (Elf64_Xword)(2*sizeof(Elf64_Sym)), /* sh_size */
    906             3, /* sh_link=sect hdr index of .strtab */
    907             1, /* sh_info=One greater than the symbol table index of the last
    908                 * local symbol (with STB_LOCAL). */
    909             4, /* sh_addralign */
    910             (Elf64_Xword)(sizeof(Elf64_Sym)) /* sh_entsize */
    911         },
    912         { /* .shstrtab */
    913             9, /* sh_name */
    914             SHT_STRTAB,
    915             0, /* sh_flags */
    916             0, /* sh_addr */
    917             (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)), /* sh_offset */
    918             40, /* sh_size */
    919             0, /* sh_link */
    920             0, /* sh_info */
    921             1, /* sh_addralign */
    922             0 /* sh_entsize */
    923         },
    924         { /* .strtab */
    925             19, /* sh_name */
    926             SHT_STRTAB,
    927             0, /* sh_flags */
    928             0, /* sh_addr */
    929             (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)+40), /* sh_offset */
    930             (Elf64_Xword)sizeof(entry), /* sh_size */
    931             0, /* sh_link */
    932             0, /* sh_info */
    933             1, /* sh_addralign */
    934             0 /* sh_entsize */
    935         },
    936         { /* .rodata */
    937             27, /* sh_name */
    938             SHT_PROGBITS,
    939             SHF_ALLOC, /* sh_flags */
    940             0, /* sh_addr */
    941             (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)+40+sizeof(entry)), /* sh_offset */
    942             0, /* sh_size */
    943             0, /* sh_link */
    944             0, /* sh_info */
    945             16, /* sh_addralign */
    946             0 /* sh_entsize */
    947         }
    948     };
    949 
    950     /*
    951      * 64-bit symbol table
    952      * careful: different order of items compared with Elf32_sym!
    953      */
    954     static Elf64_Sym symbols64[2]={
    955         { /* STN_UNDEF */
    956             0
    957         },
    958         { /* data entry point */
    959             1, /* st_name */
    960             ELF64_ST_INFO(STB_GLOBAL, STT_OBJECT),
    961             0, /* st_other */
    962             4, /* st_shndx=index of related section table entry */
    963             0, /* st_value */
    964             0 /* st_size */
    965         }
    966     };
    967 
    968 #endif /* U_ELF64 */
    969 
    970     /* entry[] have a leading NUL */
    971     entryOffset=1;
    972 
    973     /* in the common code, count entryLength from after the NUL */
    974     entryLengthOffset=1;
    975 
    976     newSuffix=".o";
    977 
    978 #elif defined(U_WINDOWS)
    979     struct {
    980         IMAGE_FILE_HEADER fileHeader;
    981         IMAGE_SECTION_HEADER sections[2];
    982         char linkerOptions[100];
    983     } objHeader;
    984     IMAGE_SYMBOL symbols[1];
    985     struct {
    986         DWORD sizeofLongNames;
    987         char longNames[100];
    988     } symbolNames;
    989 
    990     /*
    991      * entry sometimes have a leading '_'
    992      * overwritten if entryOffset==0 depending on the target platform
    993      * see check for cpu below
    994      */
    995     entry[0]='_';
    996 
    997     newSuffix=".obj";
    998 #else
    999 #   error "Unknown platform for CAN_GENERATE_OBJECTS."
   1000 #endif
   1001 
   1002     /* deal with options, files and the entry point name */
   1003     getArchitecture(&cpu, &bits, &makeBigEndian, optMatchArch);
   1004     printf("genccode: --match-arch cpu=%hu bits=%hu big-endian=%hu\n", cpu, bits, makeBigEndian);
   1005 #ifdef U_WINDOWS
   1006     if(cpu==IMAGE_FILE_MACHINE_I386) {
   1007         entryOffset=1;
   1008     }
   1009 #endif
   1010 
   1011     in=T_FileStream_open(filename, "rb");
   1012     if(in==NULL) {
   1013         fprintf(stderr, "genccode: unable to open input file %s\n", filename);
   1014         exit(U_FILE_ACCESS_ERROR);
   1015     }
   1016     size=T_FileStream_size(in);
   1017 
   1018     getOutFilename(filename, destdir, buffer, entry+entryOffset, newSuffix, optFilename);
   1019     if (outFilePath != NULL) {
   1020         uprv_strcpy(outFilePath, buffer);
   1021     }
   1022 
   1023     if(optEntryPoint != NULL) {
   1024         uprv_strcpy(entry+entryOffset, optEntryPoint);
   1025         uprv_strcat(entry+entryOffset, "_dat");
   1026     }
   1027     /* turn dashes in the entry name into underscores */
   1028     entryLength=(int32_t)uprv_strlen(entry+entryLengthOffset);
   1029     for(i=0; i<entryLength; ++i) {
   1030         if(entry[entryLengthOffset+i]=='-') {
   1031             entry[entryLengthOffset+i]='_';
   1032         }
   1033     }
   1034 
   1035     /* open the output file */
   1036     out=T_FileStream_open(buffer, "wb");
   1037     if(out==NULL) {
   1038         fprintf(stderr, "genccode: unable to open output file %s\n", buffer);
   1039         exit(U_FILE_ACCESS_ERROR);
   1040     }
   1041 
   1042 #ifdef U_ELF
   1043     if(bits==32) {
   1044         header32.e_ident[EI_DATA]= makeBigEndian ? ELFDATA2MSB : ELFDATA2LSB;
   1045         header32.e_machine=cpu;
   1046 
   1047         /* 16-align .rodata in the .o file, just in case */
   1048         paddingSize=sectionHeaders32[4].sh_offset & 0xf;
   1049         if(paddingSize!=0) {
   1050                 paddingSize=0x10-paddingSize;
   1051                 sectionHeaders32[4].sh_offset+=paddingSize;
   1052         }
   1053 
   1054         sectionHeaders32[4].sh_size=(Elf32_Word)size;
   1055 
   1056         symbols32[1].st_size=(Elf32_Word)size;
   1057 
   1058         /* write .o headers */
   1059         T_FileStream_write(out, &header32, (int32_t)sizeof(header32));
   1060         T_FileStream_write(out, sectionHeaders32, (int32_t)sizeof(sectionHeaders32));
   1061         T_FileStream_write(out, symbols32, (int32_t)sizeof(symbols32));
   1062     } else /* bits==64 */ {
   1063 #ifdef U_ELF64
   1064         header64.e_ident[EI_DATA]= makeBigEndian ? ELFDATA2MSB : ELFDATA2LSB;
   1065         header64.e_machine=cpu;
   1066 
   1067         /* 16-align .rodata in the .o file, just in case */
   1068         paddingSize=sectionHeaders64[4].sh_offset & 0xf;
   1069         if(paddingSize!=0) {
   1070                 paddingSize=0x10-paddingSize;
   1071                 sectionHeaders64[4].sh_offset+=paddingSize;
   1072         }
   1073 
   1074         sectionHeaders64[4].sh_size=(Elf64_Xword)size;
   1075 
   1076         symbols64[1].st_size=(Elf64_Xword)size;
   1077 
   1078         /* write .o headers */
   1079         T_FileStream_write(out, &header64, (int32_t)sizeof(header64));
   1080         T_FileStream_write(out, sectionHeaders64, (int32_t)sizeof(sectionHeaders64));
   1081         T_FileStream_write(out, symbols64, (int32_t)sizeof(symbols64));
   1082 #endif
   1083     }
   1084 
   1085     T_FileStream_write(out, sectionStrings, (int32_t)sizeof(sectionStrings));
   1086     T_FileStream_write(out, entry, (int32_t)sizeof(entry));
   1087     if(paddingSize!=0) {
   1088         T_FileStream_write(out, padding, paddingSize);
   1089     }
   1090 #elif defined(U_WINDOWS)
   1091     /* populate the .obj headers */
   1092     uprv_memset(&objHeader, 0, sizeof(objHeader));
   1093     uprv_memset(&symbols, 0, sizeof(symbols));
   1094     uprv_memset(&symbolNames, 0, sizeof(symbolNames));
   1095 
   1096     /* write the linker export directive */
   1097     uprv_strcpy(objHeader.linkerOptions, "-export:");
   1098     length=8;
   1099     uprv_strcpy(objHeader.linkerOptions+length, entry);
   1100     length+=entryLength;
   1101     uprv_strcpy(objHeader.linkerOptions+length, ",data ");
   1102     length+=6;
   1103 
   1104     /* set the file header */
   1105     objHeader.fileHeader.Machine=cpu;
   1106     objHeader.fileHeader.NumberOfSections=2;
   1107     objHeader.fileHeader.TimeDateStamp=(DWORD)time(NULL);
   1108     objHeader.fileHeader.PointerToSymbolTable=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER+length+size; /* start of symbol table */
   1109     objHeader.fileHeader.NumberOfSymbols=1;
   1110 
   1111     /* set the section for the linker options */
   1112     uprv_strncpy((char *)objHeader.sections[0].Name, ".drectve", 8);
   1113     objHeader.sections[0].SizeOfRawData=length;
   1114     objHeader.sections[0].PointerToRawData=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER;
   1115     objHeader.sections[0].Characteristics=IMAGE_SCN_LNK_INFO|IMAGE_SCN_LNK_REMOVE|IMAGE_SCN_ALIGN_1BYTES;
   1116 
   1117     /* set the data section */
   1118     uprv_strncpy((char *)objHeader.sections[1].Name, ".rdata", 6);
   1119     objHeader.sections[1].SizeOfRawData=size;
   1120     objHeader.sections[1].PointerToRawData=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER+length;
   1121     objHeader.sections[1].Characteristics=IMAGE_SCN_CNT_INITIALIZED_DATA|IMAGE_SCN_ALIGN_16BYTES|IMAGE_SCN_MEM_READ;
   1122 
   1123     /* set the symbol table */
   1124     if(entryLength<=8) {
   1125         uprv_strncpy((char *)symbols[0].N.ShortName, entry, entryLength);
   1126         symbolNames.sizeofLongNames=4;
   1127     } else {
   1128         symbols[0].N.Name.Short=0;
   1129         symbols[0].N.Name.Long=4;
   1130         symbolNames.sizeofLongNames=4+entryLength+1;
   1131         uprv_strcpy(symbolNames.longNames, entry);
   1132     }
   1133     symbols[0].SectionNumber=2;
   1134     symbols[0].StorageClass=IMAGE_SYM_CLASS_EXTERNAL;
   1135 
   1136     /* write the file header and the linker options section */
   1137     T_FileStream_write(out, &objHeader, objHeader.sections[1].PointerToRawData);
   1138 #else
   1139 #   error "Unknown platform for CAN_GENERATE_OBJECTS."
   1140 #endif
   1141 
   1142     /* copy the data file into section 2 */
   1143     for(;;) {
   1144         length=T_FileStream_read(in, buffer, sizeof(buffer));
   1145         if(length==0) {
   1146             break;
   1147         }
   1148         T_FileStream_write(out, buffer, (int32_t)length);
   1149     }
   1150 
   1151 #ifdef U_WINDOWS
   1152     /* write the symbol table */
   1153     T_FileStream_write(out, symbols, IMAGE_SIZEOF_SYMBOL);
   1154     T_FileStream_write(out, &symbolNames, symbolNames.sizeofLongNames);
   1155 #endif
   1156 
   1157     if(T_FileStream_error(in)) {
   1158         fprintf(stderr, "genccode: file read error while generating from file %s\n", filename);
   1159         exit(U_FILE_ACCESS_ERROR);
   1160     }
   1161 
   1162     if(T_FileStream_error(out)) {
   1163         fprintf(stderr, "genccode: file write error while generating from file %s\n", filename);
   1164         exit(U_FILE_ACCESS_ERROR);
   1165     }
   1166 
   1167     T_FileStream_close(out);
   1168     T_FileStream_close(in);
   1169 }
   1170 #endif
   1171