Home | History | Annotate | Download | only in toolutil
      1 /******************************************************************************
      2  *   Copyright (C) 2009-2012, International Business Machines
      3  *   Corporation and others.  All Rights Reserved.
      4  *******************************************************************************
      5  */
      6 #include "unicode/utypes.h"
      7 
      8 #if U_PLATFORM_HAS_WIN32_API
      9 #   define VC_EXTRALEAN
     10 #   define WIN32_LEAN_AND_MEAN
     11 #   define NOUSER
     12 #   define NOSERVICE
     13 #   define NOIME
     14 #   define NOMCX
     15 #include <windows.h>
     16 #include <time.h>
     17 #   ifdef __GNUC__
     18 #       define WINDOWS_WITH_GNUC
     19 #   endif
     20 #endif
     21 
     22 #if U_PLATFORM_IS_LINUX_BASED
     23 #   define U_ELF
     24 #endif
     25 
     26 #ifdef U_ELF
     27 #   include <elf.h>
     28 #   if defined(ELFCLASS64)
     29 #       define U_ELF64
     30 #   endif
     31     /* Old elf.h headers may not have EM_X86_64, or have EM_X8664 instead. */
     32 #   ifndef EM_X86_64
     33 #       define EM_X86_64 62
     34 #   endif
     35 #   define ICU_ENTRY_OFFSET 0
     36 #endif
     37 
     38 #include <stdio.h>
     39 #include <stdlib.h>
     40 #include "unicode/putil.h"
     41 #include "cmemory.h"
     42 #include "cstring.h"
     43 #include "filestrm.h"
     44 #include "toolutil.h"
     45 #include "unicode/uclean.h"
     46 #include "uoptions.h"
     47 #include "pkg_genc.h"
     48 
     49 #define MAX_COLUMN ((uint32_t)(0xFFFFFFFFU))
     50 
     51 #define HEX_0X 0 /*  0x1234 */
     52 #define HEX_0H 1 /*  01234h */
     53 
     54 /*
     55  * The following is needed by MinGW64
     56  */
     57 #ifndef __USER_LABEL_PREFIX__
     58 #define __USER_LABEL_PREFIX__ _
     59 #endif
     60 #define GCC_LABEL_PREFIX_INTERNAL(a) #a
     61 #define GCC_LABEL_PREFIX(a) GCC_LABEL_PREFIX_INTERNAL(a)
     62 
     63 /* prototypes --------------------------------------------------------------- */
     64 static void
     65 getOutFilename(const char *inFilename, const char *destdir, char *outFilename, char *entryName, const char *newSuffix, const char *optFilename);
     66 
     67 static uint32_t
     68 write8(FileStream *out, uint8_t byte, uint32_t column);
     69 
     70 static uint32_t
     71 write32(FileStream *out, uint32_t byte, uint32_t column);
     72 
     73 #if U_PLATFORM == U_PF_OS400
     74 static uint32_t
     75 write8str(FileStream *out, uint8_t byte, uint32_t column);
     76 #endif
     77 /* -------------------------------------------------------------------------- */
     78 
     79 /*
     80 Creating Template Files for New Platforms
     81 
     82 Let the cc compiler help you get started.
     83 Compile this program
     84     const unsigned int x[5] = {1, 2, 0xdeadbeef, 0xffffffff, 16};
     85 with the -S option to produce assembly output.
     86 
     87 For example, this will generate array.s:
     88 gcc -S array.c
     89 
     90 This will produce a .s file that may look like this:
     91 
     92     .file   "array.c"
     93     .version        "01.01"
     94 gcc2_compiled.:
     95     .globl x
     96     .section        .rodata
     97     .align 4
     98     .type    x,@object
     99     .size    x,20
    100 x:
    101     .long   1
    102     .long   2
    103     .long   -559038737
    104     .long   -1
    105     .long   16
    106     .ident  "GCC: (GNU) 2.96 20000731 (Red Hat Linux 7.1 2.96-85)"
    107 
    108 which gives a starting point that will compile, and can be transformed
    109 to become the template, generally with some consulting of as docs and
    110 some experimentation.
    111 
    112 If you want ICU to automatically use this assembly, you should
    113 specify "GENCCODE_ASSEMBLY=-a name" in the specific config/mh-* file,
    114 where the name is the compiler or platform that you used in this
    115 assemblyHeader data structure.
    116 */
    117 static const struct AssemblyType {
    118     const char *name;
    119     const char *header;
    120     const char *beginLine;
    121     const char *footer;
    122     int8_t      hexType; /* HEX_0X or HEX_0h */
    123 } assemblyHeader[] = {
    124     {"gcc",
    125         ".globl %s\n"
    126         "\t.section .note.GNU-stack,\"\",%%progbits\n"
    127         "\t.section .rodata\n"
    128         "\t.align 8\n" /* Either align 8 bytes or 2^8 (256) bytes. 8 bytes is needed. */
    129         "\t.type %s,%%object\n"
    130         "%s:\n\n",
    131 
    132         ".long ","",HEX_0X
    133     },
    134     {"gcc-darwin",
    135         /*"\t.section __TEXT,__text,regular,pure_instructions\n"
    136         "\t.section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32\n"*/
    137         ".globl _%s\n"
    138         "\t.data\n"
    139         "\t.const\n"
    140         "\t.align 4\n"  /* 1<<4 = 16 */
    141         "_%s:\n\n",
    142 
    143         ".long ","",HEX_0X
    144     },
    145     {"gcc-cygwin",
    146         ".globl "GCC_LABEL_PREFIX(__USER_LABEL_PREFIX__) "%s\n"
    147         "\t.section .rodata\n"
    148         "\t.align 8\n" /* Either align 8 bytes or 2^8 (256) bytes. 8 bytes is needed. */
    149         GCC_LABEL_PREFIX(__USER_LABEL_PREFIX__) "%s:\n\n",
    150 
    151         ".long ","",HEX_0X
    152     },
    153     {"sun",
    154         "\t.section \".rodata\"\n"
    155         "\t.align   8\n"
    156         ".globl     %s\n"
    157         "%s:\n",
    158 
    159         ".word ","",HEX_0X
    160     },
    161     {"sun-x86",
    162         "Drodata.rodata:\n"
    163         "\t.type   Drodata.rodata,@object\n"
    164         "\t.size   Drodata.rodata,0\n"
    165         "\t.globl  %s\n"
    166         "\t.align  8\n"
    167         "%s:\n",
    168 
    169         ".4byte ","",HEX_0X
    170     },
    171     {"xlc",
    172         ".globl %s{RO}\n"
    173         "\t.toc\n"
    174         "%s:\n"
    175         "\t.csect %s{RO}, 4\n",
    176 
    177         ".long ","",HEX_0X
    178     },
    179     {"aCC-ia64",
    180         "\t.file   \"%s.s\"\n"
    181         "\t.type   %s,@object\n"
    182         "\t.global %s\n"
    183         "\t.secalias .abe$0.rodata, \".rodata\"\n"
    184         "\t.section .abe$0.rodata = \"a\", \"progbits\"\n"
    185         "\t.align  16\n"
    186         "%s::\t",
    187 
    188         "data4 ","",HEX_0X
    189     },
    190     {"aCC-parisc",
    191         "\t.SPACE  $TEXT$\n"
    192         "\t.SUBSPA $LIT$\n"
    193         "%s\n"
    194         "\t.EXPORT %s\n"
    195         "\t.ALIGN  16\n",
    196 
    197         ".WORD ","",HEX_0X
    198     },
    199     { "masm",
    200       "\tTITLE %s\n"
    201       "; generated by genccode\n"
    202       ".386\n"
    203       ".model flat\n"
    204       "\tPUBLIC _%s\n"
    205       "ICUDATA_%s\tSEGMENT READONLY PARA PUBLIC FLAT 'DATA'\n"
    206       "\tALIGN 16\n"
    207       "_%s\tLABEL DWORD\n",
    208       "\tDWORD ","\nICUDATA_%s\tENDS\n\tEND\n",HEX_0H
    209     }
    210 };
    211 
    212 static int32_t assemblyHeaderIndex = -1;
    213 static int32_t hexType = HEX_0X;
    214 
    215 U_CAPI UBool U_EXPORT2
    216 checkAssemblyHeaderName(const char* optAssembly) {
    217     int32_t idx;
    218     assemblyHeaderIndex = -1;
    219     for (idx = 0; idx < (int32_t)(sizeof(assemblyHeader)/sizeof(assemblyHeader[0])); idx++) {
    220         if (uprv_strcmp(optAssembly, assemblyHeader[idx].name) == 0) {
    221             assemblyHeaderIndex = idx;
    222             hexType = assemblyHeader[idx].hexType; /* set the hex type */
    223             return TRUE;
    224         }
    225     }
    226 
    227     return FALSE;
    228 }
    229 
    230 
    231 U_CAPI void U_EXPORT2
    232 printAssemblyHeadersToStdErr(void) {
    233     int32_t idx;
    234     fprintf(stderr, "%s", assemblyHeader[0].name);
    235     for (idx = 1; idx < (int32_t)(sizeof(assemblyHeader)/sizeof(assemblyHeader[0])); idx++) {
    236         fprintf(stderr, ", %s", assemblyHeader[idx].name);
    237     }
    238     fprintf(stderr,
    239         ")\n");
    240 }
    241 
    242 U_CAPI void U_EXPORT2
    243 writeAssemblyCode(const char *filename, const char *destdir, const char *optEntryPoint, const char *optFilename, char *outFilePath) {
    244     uint32_t column = MAX_COLUMN;
    245     char entry[64];
    246     uint32_t buffer[1024];
    247     char *bufferStr = (char *)buffer;
    248     FileStream *in, *out;
    249     size_t i, length;
    250 
    251     in=T_FileStream_open(filename, "rb");
    252     if(in==NULL) {
    253         fprintf(stderr, "genccode: unable to open input file %s\n", filename);
    254         exit(U_FILE_ACCESS_ERROR);
    255     }
    256 
    257     getOutFilename(filename, destdir, bufferStr, entry, ".s", optFilename);
    258     out=T_FileStream_open(bufferStr, "w");
    259     if(out==NULL) {
    260         fprintf(stderr, "genccode: unable to open output file %s\n", bufferStr);
    261         exit(U_FILE_ACCESS_ERROR);
    262     }
    263 
    264     if (outFilePath != NULL) {
    265         uprv_strcpy(outFilePath, bufferStr);
    266     }
    267 
    268 #ifdef WINDOWS_WITH_GNUC
    269     /* Need to fix the file seperator character when using MinGW. */
    270     swapFileSepChar(outFilePath, U_FILE_SEP_CHAR, '/');
    271 #endif
    272 
    273     if(optEntryPoint != NULL) {
    274         uprv_strcpy(entry, optEntryPoint);
    275         uprv_strcat(entry, "_dat");
    276     }
    277 
    278     /* turn dashes or dots in the entry name into underscores */
    279     length=uprv_strlen(entry);
    280     for(i=0; i<length; ++i) {
    281         if(entry[i]=='-' || entry[i]=='.') {
    282             entry[i]='_';
    283         }
    284     }
    285 
    286     sprintf(bufferStr, assemblyHeader[assemblyHeaderIndex].header,
    287         entry, entry, entry, entry,
    288         entry, entry, entry, entry);
    289     T_FileStream_writeLine(out, bufferStr);
    290     T_FileStream_writeLine(out, assemblyHeader[assemblyHeaderIndex].beginLine);
    291 
    292     for(;;) {
    293         length=T_FileStream_read(in, buffer, sizeof(buffer));
    294         if(length==0) {
    295             break;
    296         }
    297         if (length != sizeof(buffer)) {
    298             /* pad with extra 0's when at the end of the file */
    299             for(i=0; i < (length % sizeof(uint32_t)); ++i) {
    300                 buffer[length+i] = 0;
    301             }
    302         }
    303         for(i=0; i<(length/sizeof(buffer[0])); i++) {
    304             column = write32(out, buffer[i], column);
    305         }
    306     }
    307 
    308     T_FileStream_writeLine(out, "\n");
    309 
    310     sprintf(bufferStr, assemblyHeader[assemblyHeaderIndex].footer,
    311         entry, entry, entry, entry,
    312         entry, entry, entry, entry);
    313     T_FileStream_writeLine(out, bufferStr);
    314 
    315     if(T_FileStream_error(in)) {
    316         fprintf(stderr, "genccode: file read error while generating from file %s\n", filename);
    317         exit(U_FILE_ACCESS_ERROR);
    318     }
    319 
    320     if(T_FileStream_error(out)) {
    321         fprintf(stderr, "genccode: file write error while generating from file %s\n", filename);
    322         exit(U_FILE_ACCESS_ERROR);
    323     }
    324 
    325     T_FileStream_close(out);
    326     T_FileStream_close(in);
    327 }
    328 
    329 U_CAPI void U_EXPORT2
    330 writeCCode(const char *filename, const char *destdir, const char *optName, const char *optFilename, char *outFilePath) {
    331     uint32_t column = MAX_COLUMN;
    332     char buffer[4096], entry[64];
    333     FileStream *in, *out;
    334     size_t i, length;
    335 
    336     in=T_FileStream_open(filename, "rb");
    337     if(in==NULL) {
    338         fprintf(stderr, "genccode: unable to open input file %s\n", filename);
    339         exit(U_FILE_ACCESS_ERROR);
    340     }
    341 
    342     if(optName != NULL) { /* prepend  'icudt28_' */
    343       strcpy(entry, optName);
    344       strcat(entry, "_");
    345     } else {
    346       entry[0] = 0;
    347     }
    348 
    349     getOutFilename(filename, destdir, buffer, entry+uprv_strlen(entry), ".c", optFilename);
    350     if (outFilePath != NULL) {
    351         uprv_strcpy(outFilePath, buffer);
    352     }
    353     out=T_FileStream_open(buffer, "w");
    354     if(out==NULL) {
    355         fprintf(stderr, "genccode: unable to open output file %s\n", buffer);
    356         exit(U_FILE_ACCESS_ERROR);
    357     }
    358 
    359     /* turn dashes or dots in the entry name into underscores */
    360     length=uprv_strlen(entry);
    361     for(i=0; i<length; ++i) {
    362         if(entry[i]=='-' || entry[i]=='.') {
    363             entry[i]='_';
    364         }
    365     }
    366 
    367 #if U_PLATFORM == U_PF_OS400
    368     /*
    369     TODO: Fix this once the compiler implements this feature. Keep in sync with udatamem.c
    370 
    371     This is here because this platform can't currently put
    372     const data into the read-only pages of an object or
    373     shared library (service program). Only strings are allowed in read-only
    374     pages, so we use char * strings to store the data.
    375 
    376     In order to prevent the beginning of the data from ever matching the
    377     magic numbers we must still use the initial double.
    378     [grhoten 4/24/2003]
    379     */
    380     sprintf(buffer,
    381         "#ifndef IN_GENERATED_CCODE\n"
    382         "#define IN_GENERATED_CCODE\n"
    383         "#define U_DISABLE_RENAMING 1\n"
    384         "#include \"unicode/umachine.h\"\n"
    385         "#endif\n"
    386         "U_CDECL_BEGIN\n"
    387         "const struct {\n"
    388         "    double bogus;\n"
    389         "    const char *bytes; \n"
    390         "} %s={ 0.0, \n",
    391         entry);
    392     T_FileStream_writeLine(out, buffer);
    393 
    394     for(;;) {
    395         length=T_FileStream_read(in, buffer, sizeof(buffer));
    396         if(length==0) {
    397             break;
    398         }
    399         for(i=0; i<length; ++i) {
    400             column = write8str(out, (uint8_t)buffer[i], column);
    401         }
    402     }
    403 
    404     T_FileStream_writeLine(out, "\"\n};\nU_CDECL_END\n");
    405 #else
    406     /* Function renaming shouldn't be done in data */
    407     sprintf(buffer,
    408         "#ifndef IN_GENERATED_CCODE\n"
    409         "#define IN_GENERATED_CCODE\n"
    410         "#define U_DISABLE_RENAMING 1\n"
    411         "#include \"unicode/umachine.h\"\n"
    412         "#endif\n"
    413         "U_CDECL_BEGIN\n"
    414         "const struct {\n"
    415         "    double bogus;\n"
    416         "    uint8_t bytes[%ld]; \n"
    417         "} %s={ 0.0, {\n",
    418         (long)T_FileStream_size(in), entry);
    419     T_FileStream_writeLine(out, buffer);
    420 
    421     for(;;) {
    422         length=T_FileStream_read(in, buffer, sizeof(buffer));
    423         if(length==0) {
    424             break;
    425         }
    426         for(i=0; i<length; ++i) {
    427             column = write8(out, (uint8_t)buffer[i], column);
    428         }
    429     }
    430 
    431     T_FileStream_writeLine(out, "\n}\n};\nU_CDECL_END\n");
    432 #endif
    433 
    434     if(T_FileStream_error(in)) {
    435         fprintf(stderr, "genccode: file read error while generating from file %s\n", filename);
    436         exit(U_FILE_ACCESS_ERROR);
    437     }
    438 
    439     if(T_FileStream_error(out)) {
    440         fprintf(stderr, "genccode: file write error while generating from file %s\n", filename);
    441         exit(U_FILE_ACCESS_ERROR);
    442     }
    443 
    444     T_FileStream_close(out);
    445     T_FileStream_close(in);
    446 }
    447 
    448 static uint32_t
    449 write32(FileStream *out, uint32_t bitField, uint32_t column) {
    450     int32_t i;
    451     char bitFieldStr[64]; /* This is more bits than needed for a 32-bit number */
    452     char *s = bitFieldStr;
    453     uint8_t *ptrIdx = (uint8_t *)&bitField;
    454     static const char hexToStr[16] = {
    455         '0','1','2','3',
    456         '4','5','6','7',
    457         '8','9','A','B',
    458         'C','D','E','F'
    459     };
    460 
    461     /* write the value, possibly with comma and newline */
    462     if(column==MAX_COLUMN) {
    463         /* first byte */
    464         column=1;
    465     } else if(column<32) {
    466         *(s++)=',';
    467         ++column;
    468     } else {
    469         *(s++)='\n';
    470         uprv_strcpy(s, assemblyHeader[assemblyHeaderIndex].beginLine);
    471         s+=uprv_strlen(s);
    472         column=1;
    473     }
    474 
    475     if (bitField < 10) {
    476         /* It's a small number. Don't waste the space for 0x */
    477         *(s++)=hexToStr[bitField];
    478     }
    479     else {
    480         int seenNonZero = 0; /* This is used to remove leading zeros */
    481 
    482         if(hexType==HEX_0X) {
    483          *(s++)='0';
    484          *(s++)='x';
    485         } else if(hexType==HEX_0H) {
    486          *(s++)='0';
    487         }
    488 
    489         /* This creates a 32-bit field */
    490 #if U_IS_BIG_ENDIAN
    491         for (i = 0; i < sizeof(uint32_t); i++)
    492 #else
    493         for (i = sizeof(uint32_t)-1; i >= 0 ; i--)
    494 #endif
    495         {
    496             uint8_t value = ptrIdx[i];
    497             if (value || seenNonZero) {
    498                 *(s++)=hexToStr[value>>4];
    499                 *(s++)=hexToStr[value&0xF];
    500                 seenNonZero = 1;
    501             }
    502         }
    503         if(hexType==HEX_0H) {
    504          *(s++)='h';
    505         }
    506     }
    507 
    508     *(s++)=0;
    509     T_FileStream_writeLine(out, bitFieldStr);
    510     return column;
    511 }
    512 
    513 static uint32_t
    514 write8(FileStream *out, uint8_t byte, uint32_t column) {
    515     char s[4];
    516     int i=0;
    517 
    518     /* convert the byte value to a string */
    519     if(byte>=100) {
    520         s[i++]=(char)('0'+byte/100);
    521         byte%=100;
    522     }
    523     if(i>0 || byte>=10) {
    524         s[i++]=(char)('0'+byte/10);
    525         byte%=10;
    526     }
    527     s[i++]=(char)('0'+byte);
    528     s[i]=0;
    529 
    530     /* write the value, possibly with comma and newline */
    531     if(column==MAX_COLUMN) {
    532         /* first byte */
    533         column=1;
    534     } else if(column<16) {
    535         T_FileStream_writeLine(out, ",");
    536         ++column;
    537     } else {
    538         T_FileStream_writeLine(out, ",\n");
    539         column=1;
    540     }
    541     T_FileStream_writeLine(out, s);
    542     return column;
    543 }
    544 
    545 #if U_PLATFORM == U_PF_OS400
    546 static uint32_t
    547 write8str(FileStream *out, uint8_t byte, uint32_t column) {
    548     char s[8];
    549 
    550     if (byte > 7)
    551         sprintf(s, "\\x%X", byte);
    552     else
    553         sprintf(s, "\\%X", byte);
    554 
    555     /* write the value, possibly with comma and newline */
    556     if(column==MAX_COLUMN) {
    557         /* first byte */
    558         column=1;
    559         T_FileStream_writeLine(out, "\"");
    560     } else if(column<24) {
    561         ++column;
    562     } else {
    563         T_FileStream_writeLine(out, "\"\n\"");
    564         column=1;
    565     }
    566     T_FileStream_writeLine(out, s);
    567     return column;
    568 }
    569 #endif
    570 
    571 static void
    572 getOutFilename(const char *inFilename, const char *destdir, char *outFilename, char *entryName, const char *newSuffix, const char *optFilename) {
    573     const char *basename=findBasename(inFilename), *suffix=uprv_strrchr(basename, '.');
    574 
    575     /* copy path */
    576     if(destdir!=NULL && *destdir!=0) {
    577         do {
    578             *outFilename++=*destdir++;
    579         } while(*destdir!=0);
    580         if(*(outFilename-1)!=U_FILE_SEP_CHAR) {
    581             *outFilename++=U_FILE_SEP_CHAR;
    582         }
    583         inFilename=basename;
    584     } else {
    585         while(inFilename<basename) {
    586             *outFilename++=*inFilename++;
    587         }
    588     }
    589 
    590     if(suffix==NULL) {
    591         /* the filename does not have a suffix */
    592         uprv_strcpy(entryName, inFilename);
    593         if(optFilename != NULL) {
    594           uprv_strcpy(outFilename, optFilename);
    595         } else {
    596           uprv_strcpy(outFilename, inFilename);
    597         }
    598         uprv_strcat(outFilename, newSuffix);
    599     } else {
    600         char *saveOutFilename = outFilename;
    601         /* copy basename */
    602         while(inFilename<suffix) {
    603             if(*inFilename=='-') {
    604                 /* iSeries cannot have '-' in the .o objects. */
    605                 *outFilename++=*entryName++='_';
    606                 inFilename++;
    607             }
    608             else {
    609                 *outFilename++=*entryName++=*inFilename++;
    610             }
    611         }
    612 
    613         /* replace '.' by '_' */
    614         *outFilename++=*entryName++='_';
    615         ++inFilename;
    616 
    617         /* copy suffix */
    618         while(*inFilename!=0) {
    619             *outFilename++=*entryName++=*inFilename++;
    620         }
    621 
    622         *entryName=0;
    623 
    624         if(optFilename != NULL) {
    625             uprv_strcpy(saveOutFilename, optFilename);
    626             uprv_strcat(saveOutFilename, newSuffix);
    627         } else {
    628             /* add ".c" */
    629             uprv_strcpy(outFilename, newSuffix);
    630         }
    631     }
    632 }
    633 
    634 #ifdef CAN_GENERATE_OBJECTS
    635 static void
    636 getArchitecture(uint16_t *pCPU, uint16_t *pBits, UBool *pIsBigEndian, const char *optMatchArch) {
    637     union {
    638         char        bytes[2048];
    639 #ifdef U_ELF
    640         Elf32_Ehdr  header32;
    641         /* Elf32_Ehdr and ELF64_Ehdr are identical for the necessary fields. */
    642 #elif U_PLATFORM_HAS_WIN32_API
    643         IMAGE_FILE_HEADER header;
    644 #endif
    645     } buffer;
    646 
    647     const char *filename;
    648     FileStream *in;
    649     int32_t length;
    650 
    651 #ifdef U_ELF
    652 
    653 #elif U_PLATFORM_HAS_WIN32_API
    654     const IMAGE_FILE_HEADER *pHeader;
    655 #else
    656 #   error "Unknown platform for CAN_GENERATE_OBJECTS."
    657 #endif
    658 
    659     if(optMatchArch != NULL) {
    660         filename=optMatchArch;
    661     } else {
    662         /* set defaults */
    663 #ifdef U_ELF
    664         /* set EM_386 because elf.h does not provide better defaults */
    665         *pCPU=EM_386;
    666         *pBits=32;
    667         *pIsBigEndian=(UBool)(U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB);
    668 #elif U_PLATFORM_HAS_WIN32_API
    669 /* _M_IA64 should be defined in windows.h */
    670 #   if defined(_M_IA64)
    671         *pCPU=IMAGE_FILE_MACHINE_IA64;
    672 #   elif defined(_M_AMD64)
    673         *pCPU=IMAGE_FILE_MACHINE_AMD64;
    674 #   else
    675         *pCPU=IMAGE_FILE_MACHINE_I386;
    676 #   endif
    677         *pBits= *pCPU==IMAGE_FILE_MACHINE_I386 ? 32 : 64;
    678         *pIsBigEndian=FALSE;
    679 #else
    680 #   error "Unknown platform for CAN_GENERATE_OBJECTS."
    681 #endif
    682         return;
    683     }
    684 
    685     in=T_FileStream_open(filename, "rb");
    686     if(in==NULL) {
    687         fprintf(stderr, "genccode: unable to open match-arch file %s\n", filename);
    688         exit(U_FILE_ACCESS_ERROR);
    689     }
    690     length=T_FileStream_read(in, buffer.bytes, sizeof(buffer.bytes));
    691 
    692 #ifdef U_ELF
    693     if(length<sizeof(Elf32_Ehdr)) {
    694         fprintf(stderr, "genccode: match-arch file %s is too short\n", filename);
    695         exit(U_UNSUPPORTED_ERROR);
    696     }
    697     if(
    698         buffer.header32.e_ident[0]!=ELFMAG0 ||
    699         buffer.header32.e_ident[1]!=ELFMAG1 ||
    700         buffer.header32.e_ident[2]!=ELFMAG2 ||
    701         buffer.header32.e_ident[3]!=ELFMAG3 ||
    702         buffer.header32.e_ident[EI_CLASS]<ELFCLASS32 || buffer.header32.e_ident[EI_CLASS]>ELFCLASS64
    703     ) {
    704         fprintf(stderr, "genccode: match-arch file %s is not an ELF object file, or not supported\n", filename);
    705         exit(U_UNSUPPORTED_ERROR);
    706     }
    707 
    708     *pBits= buffer.header32.e_ident[EI_CLASS]==ELFCLASS32 ? 32 : 64; /* only 32 or 64: see check above */
    709 #ifdef U_ELF64
    710     if(*pBits!=32 && *pBits!=64) {
    711         fprintf(stderr, "genccode: currently only supports 32-bit and 64-bit ELF format\n");
    712         exit(U_UNSUPPORTED_ERROR);
    713     }
    714 #else
    715     if(*pBits!=32) {
    716         fprintf(stderr, "genccode: built with elf.h missing 64-bit definitions\n");
    717         exit(U_UNSUPPORTED_ERROR);
    718     }
    719 #endif
    720 
    721     *pIsBigEndian=(UBool)(buffer.header32.e_ident[EI_DATA]==ELFDATA2MSB);
    722     if(*pIsBigEndian!=U_IS_BIG_ENDIAN) {
    723         fprintf(stderr, "genccode: currently only same-endianness ELF formats are supported\n");
    724         exit(U_UNSUPPORTED_ERROR);
    725     }
    726     /* TODO: Support byte swapping */
    727 
    728     *pCPU=buffer.header32.e_machine;
    729 #elif U_PLATFORM_HAS_WIN32_API
    730     if(length<sizeof(IMAGE_FILE_HEADER)) {
    731         fprintf(stderr, "genccode: match-arch file %s is too short\n", filename);
    732         exit(U_UNSUPPORTED_ERROR);
    733     }
    734     /* TODO: Use buffer.header.  Keep aliasing legal.  */
    735     pHeader=(const IMAGE_FILE_HEADER *)buffer.bytes;
    736     *pCPU=pHeader->Machine;
    737     /*
    738      * The number of bits is implicit with the Machine value.
    739      * *pBits is ignored in the calling code, so this need not be precise.
    740      */
    741     *pBits= *pCPU==IMAGE_FILE_MACHINE_I386 ? 32 : 64;
    742     /* Windows always runs on little-endian CPUs. */
    743     *pIsBigEndian=FALSE;
    744 #else
    745 #   error "Unknown platform for CAN_GENERATE_OBJECTS."
    746 #endif
    747 
    748     T_FileStream_close(in);
    749 }
    750 
    751 U_CAPI void U_EXPORT2
    752 writeObjectCode(const char *filename, const char *destdir, const char *optEntryPoint, const char *optMatchArch, const char *optFilename, char *outFilePath) {
    753     /* common variables */
    754     char buffer[4096], entry[40]={ 0 };
    755     FileStream *in, *out;
    756     const char *newSuffix;
    757     int32_t i, entryLength, length, size, entryOffset=0, entryLengthOffset=0;
    758 
    759     uint16_t cpu, bits;
    760     UBool makeBigEndian;
    761 
    762     /* platform-specific variables and initialization code */
    763 #ifdef U_ELF
    764     /* 32-bit Elf file header */
    765     static Elf32_Ehdr header32={
    766         {
    767             /* e_ident[] */
    768             ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3,
    769             ELFCLASS32,
    770             U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB,
    771             EV_CURRENT /* EI_VERSION */
    772         },
    773         ET_REL,
    774         EM_386,
    775         EV_CURRENT, /* e_version */
    776         0, /* e_entry */
    777         0, /* e_phoff */
    778         (Elf32_Off)sizeof(Elf32_Ehdr), /* e_shoff */
    779         0, /* e_flags */
    780         (Elf32_Half)sizeof(Elf32_Ehdr), /* eh_size */
    781         0, /* e_phentsize */
    782         0, /* e_phnum */
    783         (Elf32_Half)sizeof(Elf32_Shdr), /* e_shentsize */
    784         5, /* e_shnum */
    785         2 /* e_shstrndx */
    786     };
    787 
    788     /* 32-bit Elf section header table */
    789     static Elf32_Shdr sectionHeaders32[5]={
    790         { /* SHN_UNDEF */
    791             0
    792         },
    793         { /* .symtab */
    794             1, /* sh_name */
    795             SHT_SYMTAB,
    796             0, /* sh_flags */
    797             0, /* sh_addr */
    798             (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)), /* sh_offset */
    799             (Elf32_Word)(2*sizeof(Elf32_Sym)), /* sh_size */
    800             3, /* sh_link=sect hdr index of .strtab */
    801             1, /* sh_info=One greater than the symbol table index of the last
    802                 * local symbol (with STB_LOCAL). */
    803             4, /* sh_addralign */
    804             (Elf32_Word)(sizeof(Elf32_Sym)) /* sh_entsize */
    805         },
    806         { /* .shstrtab */
    807             9, /* sh_name */
    808             SHT_STRTAB,
    809             0, /* sh_flags */
    810             0, /* sh_addr */
    811             (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)), /* sh_offset */
    812             40, /* sh_size */
    813             0, /* sh_link */
    814             0, /* sh_info */
    815             1, /* sh_addralign */
    816             0 /* sh_entsize */
    817         },
    818         { /* .strtab */
    819             19, /* sh_name */
    820             SHT_STRTAB,
    821             0, /* sh_flags */
    822             0, /* sh_addr */
    823             (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)+40), /* sh_offset */
    824             (Elf32_Word)sizeof(entry), /* sh_size */
    825             0, /* sh_link */
    826             0, /* sh_info */
    827             1, /* sh_addralign */
    828             0 /* sh_entsize */
    829         },
    830         { /* .rodata */
    831             27, /* sh_name */
    832             SHT_PROGBITS,
    833             SHF_ALLOC, /* sh_flags */
    834             0, /* sh_addr */
    835             (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)+40+sizeof(entry)), /* sh_offset */
    836             0, /* sh_size */
    837             0, /* sh_link */
    838             0, /* sh_info */
    839             16, /* sh_addralign */
    840             0 /* sh_entsize */
    841         }
    842     };
    843 
    844     /* symbol table */
    845     static Elf32_Sym symbols32[2]={
    846         { /* STN_UNDEF */
    847             0
    848         },
    849         { /* data entry point */
    850             1, /* st_name */
    851             0, /* st_value */
    852             0, /* st_size */
    853             ELF64_ST_INFO(STB_GLOBAL, STT_OBJECT),
    854             0, /* st_other */
    855             4 /* st_shndx=index of related section table entry */
    856         }
    857     };
    858 
    859     /* section header string table, with decimal string offsets */
    860     static const char sectionStrings[40]=
    861         /*  0 */ "\0"
    862         /*  1 */ ".symtab\0"
    863         /*  9 */ ".shstrtab\0"
    864         /* 19 */ ".strtab\0"
    865         /* 27 */ ".rodata\0"
    866         /* 35 */ "\0\0\0\0"; /* contains terminating NUL */
    867         /* 40: padded to multiple of 8 bytes */
    868 
    869     /*
    870      * Use entry[] for the string table which will contain only the
    871      * entry point name.
    872      * entry[0] must be 0 (NUL)
    873      * The entry point name can be up to 38 characters long (sizeof(entry)-2).
    874      */
    875 
    876     /* 16-align .rodata in the .o file, just in case */
    877     static const char padding[16]={ 0 };
    878     int32_t paddingSize;
    879 
    880 #ifdef U_ELF64
    881     /* 64-bit Elf file header */
    882     static Elf64_Ehdr header64={
    883         {
    884             /* e_ident[] */
    885             ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3,
    886             ELFCLASS64,
    887             U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB,
    888             EV_CURRENT /* EI_VERSION */
    889         },
    890         ET_REL,
    891         EM_X86_64,
    892         EV_CURRENT, /* e_version */
    893         0, /* e_entry */
    894         0, /* e_phoff */
    895         (Elf64_Off)sizeof(Elf64_Ehdr), /* e_shoff */
    896         0, /* e_flags */
    897         (Elf64_Half)sizeof(Elf64_Ehdr), /* eh_size */
    898         0, /* e_phentsize */
    899         0, /* e_phnum */
    900         (Elf64_Half)sizeof(Elf64_Shdr), /* e_shentsize */
    901         5, /* e_shnum */
    902         2 /* e_shstrndx */
    903     };
    904 
    905     /* 64-bit Elf section header table */
    906     static Elf64_Shdr sectionHeaders64[5]={
    907         { /* SHN_UNDEF */
    908             0
    909         },
    910         { /* .symtab */
    911             1, /* sh_name */
    912             SHT_SYMTAB,
    913             0, /* sh_flags */
    914             0, /* sh_addr */
    915             (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)), /* sh_offset */
    916             (Elf64_Xword)(2*sizeof(Elf64_Sym)), /* sh_size */
    917             3, /* sh_link=sect hdr index of .strtab */
    918             1, /* sh_info=One greater than the symbol table index of the last
    919                 * local symbol (with STB_LOCAL). */
    920             4, /* sh_addralign */
    921             (Elf64_Xword)(sizeof(Elf64_Sym)) /* sh_entsize */
    922         },
    923         { /* .shstrtab */
    924             9, /* sh_name */
    925             SHT_STRTAB,
    926             0, /* sh_flags */
    927             0, /* sh_addr */
    928             (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)), /* sh_offset */
    929             40, /* sh_size */
    930             0, /* sh_link */
    931             0, /* sh_info */
    932             1, /* sh_addralign */
    933             0 /* sh_entsize */
    934         },
    935         { /* .strtab */
    936             19, /* sh_name */
    937             SHT_STRTAB,
    938             0, /* sh_flags */
    939             0, /* sh_addr */
    940             (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)+40), /* sh_offset */
    941             (Elf64_Xword)sizeof(entry), /* sh_size */
    942             0, /* sh_link */
    943             0, /* sh_info */
    944             1, /* sh_addralign */
    945             0 /* sh_entsize */
    946         },
    947         { /* .rodata */
    948             27, /* sh_name */
    949             SHT_PROGBITS,
    950             SHF_ALLOC, /* sh_flags */
    951             0, /* sh_addr */
    952             (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)+40+sizeof(entry)), /* sh_offset */
    953             0, /* sh_size */
    954             0, /* sh_link */
    955             0, /* sh_info */
    956             16, /* sh_addralign */
    957             0 /* sh_entsize */
    958         }
    959     };
    960 
    961     /*
    962      * 64-bit symbol table
    963      * careful: different order of items compared with Elf32_sym!
    964      */
    965     static Elf64_Sym symbols64[2]={
    966         { /* STN_UNDEF */
    967             0
    968         },
    969         { /* data entry point */
    970             1, /* st_name */
    971             ELF64_ST_INFO(STB_GLOBAL, STT_OBJECT),
    972             0, /* st_other */
    973             4, /* st_shndx=index of related section table entry */
    974             0, /* st_value */
    975             0 /* st_size */
    976         }
    977     };
    978 
    979 #endif /* U_ELF64 */
    980 
    981     /* entry[] have a leading NUL */
    982     entryOffset=1;
    983 
    984     /* in the common code, count entryLength from after the NUL */
    985     entryLengthOffset=1;
    986 
    987     newSuffix=".o";
    988 
    989 #elif U_PLATFORM_HAS_WIN32_API
    990     struct {
    991         IMAGE_FILE_HEADER fileHeader;
    992         IMAGE_SECTION_HEADER sections[2];
    993         char linkerOptions[100];
    994     } objHeader;
    995     IMAGE_SYMBOL symbols[1];
    996     struct {
    997         DWORD sizeofLongNames;
    998         char longNames[100];
    999     } symbolNames;
   1000 
   1001     /*
   1002      * entry sometimes have a leading '_'
   1003      * overwritten if entryOffset==0 depending on the target platform
   1004      * see check for cpu below
   1005      */
   1006     entry[0]='_';
   1007 
   1008     newSuffix=".obj";
   1009 #else
   1010 #   error "Unknown platform for CAN_GENERATE_OBJECTS."
   1011 #endif
   1012 
   1013     /* deal with options, files and the entry point name */
   1014     getArchitecture(&cpu, &bits, &makeBigEndian, optMatchArch);
   1015     printf("genccode: --match-arch cpu=%hu bits=%hu big-endian=%d\n", cpu, bits, makeBigEndian);
   1016 #if U_PLATFORM_HAS_WIN32_API
   1017     if(cpu==IMAGE_FILE_MACHINE_I386) {
   1018         entryOffset=1;
   1019     }
   1020 #endif
   1021 
   1022     in=T_FileStream_open(filename, "rb");
   1023     if(in==NULL) {
   1024         fprintf(stderr, "genccode: unable to open input file %s\n", filename);
   1025         exit(U_FILE_ACCESS_ERROR);
   1026     }
   1027     size=T_FileStream_size(in);
   1028 
   1029     getOutFilename(filename, destdir, buffer, entry+entryOffset, newSuffix, optFilename);
   1030     if (outFilePath != NULL) {
   1031         uprv_strcpy(outFilePath, buffer);
   1032     }
   1033 
   1034     if(optEntryPoint != NULL) {
   1035         uprv_strcpy(entry+entryOffset, optEntryPoint);
   1036         uprv_strcat(entry+entryOffset, "_dat");
   1037     }
   1038     /* turn dashes in the entry name into underscores */
   1039     entryLength=(int32_t)uprv_strlen(entry+entryLengthOffset);
   1040     for(i=0; i<entryLength; ++i) {
   1041         if(entry[entryLengthOffset+i]=='-') {
   1042             entry[entryLengthOffset+i]='_';
   1043         }
   1044     }
   1045 
   1046     /* open the output file */
   1047     out=T_FileStream_open(buffer, "wb");
   1048     if(out==NULL) {
   1049         fprintf(stderr, "genccode: unable to open output file %s\n", buffer);
   1050         exit(U_FILE_ACCESS_ERROR);
   1051     }
   1052 
   1053 #ifdef U_ELF
   1054     if(bits==32) {
   1055         header32.e_ident[EI_DATA]= makeBigEndian ? ELFDATA2MSB : ELFDATA2LSB;
   1056         header32.e_machine=cpu;
   1057 
   1058         /* 16-align .rodata in the .o file, just in case */
   1059         paddingSize=sectionHeaders32[4].sh_offset & 0xf;
   1060         if(paddingSize!=0) {
   1061                 paddingSize=0x10-paddingSize;
   1062                 sectionHeaders32[4].sh_offset+=paddingSize;
   1063         }
   1064 
   1065         sectionHeaders32[4].sh_size=(Elf32_Word)size;
   1066 
   1067         symbols32[1].st_size=(Elf32_Word)size;
   1068 
   1069         /* write .o headers */
   1070         T_FileStream_write(out, &header32, (int32_t)sizeof(header32));
   1071         T_FileStream_write(out, sectionHeaders32, (int32_t)sizeof(sectionHeaders32));
   1072         T_FileStream_write(out, symbols32, (int32_t)sizeof(symbols32));
   1073     } else /* bits==64 */ {
   1074 #ifdef U_ELF64
   1075         header64.e_ident[EI_DATA]= makeBigEndian ? ELFDATA2MSB : ELFDATA2LSB;
   1076         header64.e_machine=cpu;
   1077 
   1078         /* 16-align .rodata in the .o file, just in case */
   1079         paddingSize=sectionHeaders64[4].sh_offset & 0xf;
   1080         if(paddingSize!=0) {
   1081                 paddingSize=0x10-paddingSize;
   1082                 sectionHeaders64[4].sh_offset+=paddingSize;
   1083         }
   1084 
   1085         sectionHeaders64[4].sh_size=(Elf64_Xword)size;
   1086 
   1087         symbols64[1].st_size=(Elf64_Xword)size;
   1088 
   1089         /* write .o headers */
   1090         T_FileStream_write(out, &header64, (int32_t)sizeof(header64));
   1091         T_FileStream_write(out, sectionHeaders64, (int32_t)sizeof(sectionHeaders64));
   1092         T_FileStream_write(out, symbols64, (int32_t)sizeof(symbols64));
   1093 #endif
   1094     }
   1095 
   1096     T_FileStream_write(out, sectionStrings, (int32_t)sizeof(sectionStrings));
   1097     T_FileStream_write(out, entry, (int32_t)sizeof(entry));
   1098     if(paddingSize!=0) {
   1099         T_FileStream_write(out, padding, paddingSize);
   1100     }
   1101 #elif U_PLATFORM_HAS_WIN32_API
   1102     /* populate the .obj headers */
   1103     uprv_memset(&objHeader, 0, sizeof(objHeader));
   1104     uprv_memset(&symbols, 0, sizeof(symbols));
   1105     uprv_memset(&symbolNames, 0, sizeof(symbolNames));
   1106 
   1107     /* write the linker export directive */
   1108     uprv_strcpy(objHeader.linkerOptions, "-export:");
   1109     length=8;
   1110     uprv_strcpy(objHeader.linkerOptions+length, entry);
   1111     length+=entryLength;
   1112     uprv_strcpy(objHeader.linkerOptions+length, ",data ");
   1113     length+=6;
   1114 
   1115     /* set the file header */
   1116     objHeader.fileHeader.Machine=cpu;
   1117     objHeader.fileHeader.NumberOfSections=2;
   1118     objHeader.fileHeader.TimeDateStamp=(DWORD)time(NULL);
   1119     objHeader.fileHeader.PointerToSymbolTable=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER+length+size; /* start of symbol table */
   1120     objHeader.fileHeader.NumberOfSymbols=1;
   1121 
   1122     /* set the section for the linker options */
   1123     uprv_strncpy((char *)objHeader.sections[0].Name, ".drectve", 8);
   1124     objHeader.sections[0].SizeOfRawData=length;
   1125     objHeader.sections[0].PointerToRawData=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER;
   1126     objHeader.sections[0].Characteristics=IMAGE_SCN_LNK_INFO|IMAGE_SCN_LNK_REMOVE|IMAGE_SCN_ALIGN_1BYTES;
   1127 
   1128     /* set the data section */
   1129     uprv_strncpy((char *)objHeader.sections[1].Name, ".rdata", 6);
   1130     objHeader.sections[1].SizeOfRawData=size;
   1131     objHeader.sections[1].PointerToRawData=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER+length;
   1132     objHeader.sections[1].Characteristics=IMAGE_SCN_CNT_INITIALIZED_DATA|IMAGE_SCN_ALIGN_16BYTES|IMAGE_SCN_MEM_READ;
   1133 
   1134     /* set the symbol table */
   1135     if(entryLength<=8) {
   1136         uprv_strncpy((char *)symbols[0].N.ShortName, entry, entryLength);
   1137         symbolNames.sizeofLongNames=4;
   1138     } else {
   1139         symbols[0].N.Name.Short=0;
   1140         symbols[0].N.Name.Long=4;
   1141         symbolNames.sizeofLongNames=4+entryLength+1;
   1142         uprv_strcpy(symbolNames.longNames, entry);
   1143     }
   1144     symbols[0].SectionNumber=2;
   1145     symbols[0].StorageClass=IMAGE_SYM_CLASS_EXTERNAL;
   1146 
   1147     /* write the file header and the linker options section */
   1148     T_FileStream_write(out, &objHeader, objHeader.sections[1].PointerToRawData);
   1149 #else
   1150 #   error "Unknown platform for CAN_GENERATE_OBJECTS."
   1151 #endif
   1152 
   1153     /* copy the data file into section 2 */
   1154     for(;;) {
   1155         length=T_FileStream_read(in, buffer, sizeof(buffer));
   1156         if(length==0) {
   1157             break;
   1158         }
   1159         T_FileStream_write(out, buffer, (int32_t)length);
   1160     }
   1161 
   1162 #if U_PLATFORM_HAS_WIN32_API
   1163     /* write the symbol table */
   1164     T_FileStream_write(out, symbols, IMAGE_SIZEOF_SYMBOL);
   1165     T_FileStream_write(out, &symbolNames, symbolNames.sizeofLongNames);
   1166 #endif
   1167 
   1168     if(T_FileStream_error(in)) {
   1169         fprintf(stderr, "genccode: file read error while generating from file %s\n", filename);
   1170         exit(U_FILE_ACCESS_ERROR);
   1171     }
   1172 
   1173     if(T_FileStream_error(out)) {
   1174         fprintf(stderr, "genccode: file write error while generating from file %s\n", filename);
   1175         exit(U_FILE_ACCESS_ERROR);
   1176     }
   1177 
   1178     T_FileStream_close(out);
   1179     T_FileStream_close(in);
   1180 }
   1181 #endif
   1182