Home | History | Annotate | Download | only in toolutil
      1 /******************************************************************************
      2  *   Copyright (C) 2009, International Business Machines
      3  *   Corporation and others.  All Rights Reserved.
      4  *******************************************************************************
      5  */
      6 #include "unicode/utypes.h"
      7 
      8 #ifdef U_WINDOWS
      9 #   define VC_EXTRALEAN
     10 #   define WIN32_LEAN_AND_MEAN
     11 #   define NOUSER
     12 #   define NOSERVICE
     13 #   define NOIME
     14 #   define NOMCX
     15 #include <windows.h>
     16 #include <time.h>
     17 #   ifdef __GNUC__
     18 #       define WINDOWS_WITH_GNUC
     19 #   endif
     20 #endif
     21 
     22 #ifdef U_LINUX
     23 #   define U_ELF
     24 #endif
     25 
     26 #ifdef U_ELF
     27 #   include <elf.h>
     28 #   if defined(ELFCLASS64)
     29 #       define U_ELF64
     30 #   endif
     31     /* Old elf.h headers may not have EM_X86_64, or have EM_X8664 instead. */
     32 #   ifndef EM_X86_64
     33 #       define EM_X86_64 62
     34 #   endif
     35 #   define ICU_ENTRY_OFFSET 0
     36 #endif
     37 
     38 #include <stdio.h>
     39 #include <stdlib.h>
     40 #include "unicode/putil.h"
     41 #include "cmemory.h"
     42 #include "cstring.h"
     43 #include "filestrm.h"
     44 #include "toolutil.h"
     45 #include "unicode/uclean.h"
     46 #include "uoptions.h"
     47 #include "pkg_genc.h"
     48 
     49 #define MAX_COLUMN ((uint32_t)(0xFFFFFFFFU))
     50 
     51 #define HEX_0X 0 /*  0x1234 */
     52 #define HEX_0H 1 /*  01234h */
     53 
     54 #if defined(U_WINDOWS) || defined(U_ELF)
     55 #define CAN_GENERATE_OBJECTS
     56 #endif
     57 
     58 /* prototypes --------------------------------------------------------------- */
     59 static void
     60 getOutFilename(const char *inFilename, const char *destdir, char *outFilename, char *entryName, const char *newSuffix, const char *optFilename);
     61 
     62 static uint32_t
     63 write8(FileStream *out, uint8_t byte, uint32_t column);
     64 
     65 static uint32_t
     66 write32(FileStream *out, uint32_t byte, uint32_t column);
     67 
     68 #ifdef OS400
     69 static uint32_t
     70 write8str(FileStream *out, uint8_t byte, uint32_t column);
     71 #endif
     72 /* -------------------------------------------------------------------------- */
     73 
     74 /*
     75 Creating Template Files for New Platforms
     76 
     77 Let the cc compiler help you get started.
     78 Compile this program
     79     const unsigned int x[5] = {1, 2, 0xdeadbeef, 0xffffffff, 16};
     80 with the -S option to produce assembly output.
     81 
     82 For example, this will generate array.s:
     83 gcc -S array.c
     84 
     85 This will produce a .s file that may look like this:
     86 
     87     .file   "array.c"
     88     .version        "01.01"
     89 gcc2_compiled.:
     90     .globl x
     91     .section        .rodata
     92     .align 4
     93     .type    x,@object
     94     .size    x,20
     95 x:
     96     .long   1
     97     .long   2
     98     .long   -559038737
     99     .long   -1
    100     .long   16
    101     .ident  "GCC: (GNU) 2.96 20000731 (Red Hat Linux 7.1 2.96-85)"
    102 
    103 which gives a starting point that will compile, and can be transformed
    104 to become the template, generally with some consulting of as docs and
    105 some experimentation.
    106 
    107 If you want ICU to automatically use this assembly, you should
    108 specify "GENCCODE_ASSEMBLY=-a name" in the specific config/mh-* file,
    109 where the name is the compiler or platform that you used in this
    110 assemblyHeader data structure.
    111 */
    112 static const struct AssemblyType {
    113     const char *name;
    114     const char *header;
    115     const char *beginLine;
    116     const char *footer;
    117     int8_t      hexType; /* HEX_0X or HEX_0h */
    118 } assemblyHeader[] = {
    119     {"gcc",
    120         ".globl %s\n"
    121         "\t.section .note.GNU-stack,\"\",@progbits\n"
    122         "\t.section .rodata\n"
    123         "\t.align 8\n" /* Either align 8 bytes or 2^8 (256) bytes. 8 bytes is needed. */
    124         "\t.type %s,@object\n"
    125         "%s:\n\n",
    126 
    127         ".long ","",HEX_0X
    128     },
    129     {"gcc-darwin",
    130         /*"\t.section __TEXT,__text,regular,pure_instructions\n"
    131         "\t.section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32\n"*/
    132         ".globl _%s\n"
    133         "\t.data\n"
    134         "\t.const\n"
    135         "\t.align 4\n"  /* 1<<4 = 16 */
    136         "_%s:\n\n",
    137 
    138         ".long ","",HEX_0X
    139     },
    140     {"gcc-cygwin",
    141         ".globl _%s\n"
    142         "\t.section .rodata\n"
    143         "\t.align 8\n" /* Either align 8 bytes or 2^8 (256) bytes. 8 bytes is needed. */
    144         "_%s:\n\n",
    145 
    146         ".long ","",HEX_0X
    147     },
    148     {"sun",
    149         "\t.section \".rodata\"\n"
    150         "\t.align   8\n"
    151         ".globl     %s\n"
    152         "%s:\n",
    153 
    154         ".word ","",HEX_0X
    155     },
    156     {"sun-x86",
    157         "Drodata.rodata:\n"
    158         "\t.type   Drodata.rodata,@object\n"
    159         "\t.size   Drodata.rodata,0\n"
    160         "\t.globl  %s\n"
    161         "\t.align  8\n"
    162         "%s:\n",
    163 
    164         ".4byte ","",HEX_0X
    165     },
    166     {"xlc",
    167         ".globl %s{RO}\n"
    168         "\t.toc\n"
    169         "%s:\n"
    170         "\t.csect %s{RO}, 4\n",
    171 
    172         ".long ","",HEX_0X
    173     },
    174     {"aCC-ia64",
    175         "\t.file   \"%s.s\"\n"
    176         "\t.type   %s,@object\n"
    177         "\t.global %s\n"
    178         "\t.secalias .abe$0.rodata, \".rodata\"\n"
    179         "\t.section .abe$0.rodata = \"a\", \"progbits\"\n"
    180         "\t.align  16\n"
    181         "%s::\t",
    182 
    183         "data4 ","",HEX_0X
    184     },
    185     {"aCC-parisc",
    186         "\t.SPACE  $TEXT$\n"
    187         "\t.SUBSPA $LIT$\n"
    188         "%s\n"
    189         "\t.EXPORT %s\n"
    190         "\t.ALIGN  16\n",
    191 
    192         ".WORD ","",HEX_0X
    193     },
    194     { "masm",
    195       "\tTITLE %s\n"
    196       "; generated by genccode\n"
    197       ".386\n"
    198       ".model flat\n"
    199       "\tPUBLIC _%s\n"
    200       "ICUDATA_%s\tSEGMENT READONLY PARA PUBLIC FLAT 'DATA'\n"
    201       "\tALIGN 16\n"
    202       "_%s\tLABEL DWORD\n",
    203       "\tDWORD ","\nICUDATA_%s\tENDS\n\tEND\n",HEX_0H
    204     }
    205 };
    206 
    207 static int32_t assemblyHeaderIndex = -1;
    208 static int32_t hexType = HEX_0X;
    209 
    210 U_CAPI UBool U_EXPORT2
    211 checkAssemblyHeaderName(const char* optAssembly) {
    212     int32_t idx;
    213     assemblyHeaderIndex = -1;
    214     for (idx = 0; idx < (int32_t)(sizeof(assemblyHeader)/sizeof(assemblyHeader[0])); idx++) {
    215         if (uprv_strcmp(optAssembly, assemblyHeader[idx].name) == 0) {
    216             assemblyHeaderIndex = idx;
    217             hexType = assemblyHeader[idx].hexType; /* set the hex type */
    218             return TRUE;
    219         }
    220     }
    221 
    222     return FALSE;
    223 }
    224 
    225 
    226 U_CAPI void U_EXPORT2
    227 printAssemblyHeadersToStdErr(void) {
    228     int32_t idx;
    229     fprintf(stderr, "%s", assemblyHeader[0].name);
    230     for (idx = 1; idx < (int32_t)(sizeof(assemblyHeader)/sizeof(assemblyHeader[0])); idx++) {
    231         fprintf(stderr, ", %s", assemblyHeader[idx].name);
    232     }
    233     fprintf(stderr,
    234         ")\n");
    235 }
    236 
    237 U_CAPI void U_EXPORT2
    238 writeAssemblyCode(const char *filename, const char *destdir, const char *optEntryPoint, const char *optFilename, char *outFilePath) {
    239     uint32_t column = MAX_COLUMN;
    240     char entry[64];
    241     uint32_t buffer[1024];
    242     char *bufferStr = (char *)buffer;
    243     FileStream *in, *out;
    244     size_t i, length;
    245 
    246     in=T_FileStream_open(filename, "rb");
    247     if(in==NULL) {
    248         fprintf(stderr, "genccode: unable to open input file %s\n", filename);
    249         exit(U_FILE_ACCESS_ERROR);
    250     }
    251 
    252     getOutFilename(filename, destdir, bufferStr, entry, ".s", optFilename);
    253     out=T_FileStream_open(bufferStr, "w");
    254     if(out==NULL) {
    255         fprintf(stderr, "genccode: unable to open output file %s\n", bufferStr);
    256         exit(U_FILE_ACCESS_ERROR);
    257     }
    258 
    259     if (outFilePath != NULL) {
    260         uprv_strcpy(outFilePath, bufferStr);
    261     }
    262 
    263 #ifdef WINDOWS_WITH_GNUC
    264     /* Need to fix the file seperator character when using MinGW. */
    265     swapFileSepChar(outFilePath, U_FILE_SEP_CHAR, '/');
    266 #endif
    267 
    268     if(optEntryPoint != NULL) {
    269         uprv_strcpy(entry, optEntryPoint);
    270         uprv_strcat(entry, "_dat");
    271     }
    272 
    273     /* turn dashes or dots in the entry name into underscores */
    274     length=uprv_strlen(entry);
    275     for(i=0; i<length; ++i) {
    276         if(entry[i]=='-' || entry[i]=='.') {
    277             entry[i]='_';
    278         }
    279     }
    280 
    281     sprintf(bufferStr, assemblyHeader[assemblyHeaderIndex].header,
    282         entry, entry, entry, entry,
    283         entry, entry, entry, entry);
    284     T_FileStream_writeLine(out, bufferStr);
    285     T_FileStream_writeLine(out, assemblyHeader[assemblyHeaderIndex].beginLine);
    286 
    287     for(;;) {
    288         length=T_FileStream_read(in, buffer, sizeof(buffer));
    289         if(length==0) {
    290             break;
    291         }
    292         if (length != sizeof(buffer)) {
    293             /* pad with extra 0's when at the end of the file */
    294             for(i=0; i < (length % sizeof(uint32_t)); ++i) {
    295                 buffer[length+i] = 0;
    296             }
    297         }
    298         for(i=0; i<(length/sizeof(buffer[0])); i++) {
    299             column = write32(out, buffer[i], column);
    300         }
    301     }
    302 
    303     T_FileStream_writeLine(out, "\n");
    304 
    305     sprintf(bufferStr, assemblyHeader[assemblyHeaderIndex].footer,
    306         entry, entry, entry, entry,
    307         entry, entry, entry, entry);
    308     T_FileStream_writeLine(out, bufferStr);
    309 
    310     if(T_FileStream_error(in)) {
    311         fprintf(stderr, "genccode: file read error while generating from file %s\n", filename);
    312         exit(U_FILE_ACCESS_ERROR);
    313     }
    314 
    315     if(T_FileStream_error(out)) {
    316         fprintf(stderr, "genccode: file write error while generating from file %s\n", filename);
    317         exit(U_FILE_ACCESS_ERROR);
    318     }
    319 
    320     T_FileStream_close(out);
    321     T_FileStream_close(in);
    322 }
    323 
    324 U_CAPI void U_EXPORT2
    325 writeCCode(const char *filename, const char *destdir, const char *optName, const char *optFilename, char *outFilePath) {
    326     uint32_t column = MAX_COLUMN;
    327     char buffer[4096], entry[64];
    328     FileStream *in, *out;
    329     size_t i, length;
    330 
    331     in=T_FileStream_open(filename, "rb");
    332     if(in==NULL) {
    333         fprintf(stderr, "genccode: unable to open input file %s\n", filename);
    334         exit(U_FILE_ACCESS_ERROR);
    335     }
    336 
    337     if(optName != NULL) { /* prepend  'icudt28_' */
    338       strcpy(entry, optName);
    339       strcat(entry, "_");
    340     } else {
    341       entry[0] = 0;
    342     }
    343 
    344     getOutFilename(filename, destdir, buffer, entry+uprv_strlen(entry), ".c", optFilename);
    345     if (outFilePath != NULL) {
    346         uprv_strcpy(outFilePath, buffer);
    347     }
    348     out=T_FileStream_open(buffer, "w");
    349     if(out==NULL) {
    350         fprintf(stderr, "genccode: unable to open output file %s\n", buffer);
    351         exit(U_FILE_ACCESS_ERROR);
    352     }
    353 
    354     /* turn dashes or dots in the entry name into underscores */
    355     length=uprv_strlen(entry);
    356     for(i=0; i<length; ++i) {
    357         if(entry[i]=='-' || entry[i]=='.') {
    358             entry[i]='_';
    359         }
    360     }
    361 
    362 #ifdef OS400
    363     /*
    364     TODO: Fix this once the compiler implements this feature. Keep in sync with udatamem.c
    365 
    366     This is here because this platform can't currently put
    367     const data into the read-only pages of an object or
    368     shared library (service program). Only strings are allowed in read-only
    369     pages, so we use char * strings to store the data.
    370 
    371     In order to prevent the beginning of the data from ever matching the
    372     magic numbers we must still use the initial double.
    373     [grhoten 4/24/2003]
    374     */
    375     sprintf(buffer,
    376         "#define U_DISABLE_RENAMING 1\n"
    377         "#include \"unicode/umachine.h\"\n"
    378         "U_CDECL_BEGIN\n"
    379         "const struct {\n"
    380         "    double bogus;\n"
    381         "    const char *bytes; \n"
    382         "} %s={ 0.0, \n",
    383         entry);
    384     T_FileStream_writeLine(out, buffer);
    385 
    386     for(;;) {
    387         length=T_FileStream_read(in, buffer, sizeof(buffer));
    388         if(length==0) {
    389             break;
    390         }
    391         for(i=0; i<length; ++i) {
    392             column = write8str(out, (uint8_t)buffer[i], column);
    393         }
    394     }
    395 
    396     T_FileStream_writeLine(out, "\"\n};\nU_CDECL_END\n");
    397 #else
    398     /* Function renaming shouldn't be done in data */
    399     sprintf(buffer,
    400         "#define U_DISABLE_RENAMING 1\n"
    401         "#include \"unicode/umachine.h\"\n"
    402         "U_CDECL_BEGIN\n"
    403         "const struct {\n"
    404         "    double bogus;\n"
    405         "    uint8_t bytes[%ld]; \n"
    406         "} %s={ 0.0, {\n",
    407         (long)T_FileStream_size(in), entry);
    408     T_FileStream_writeLine(out, buffer);
    409 
    410     for(;;) {
    411         length=T_FileStream_read(in, buffer, sizeof(buffer));
    412         if(length==0) {
    413             break;
    414         }
    415         for(i=0; i<length; ++i) {
    416             column = write8(out, (uint8_t)buffer[i], column);
    417         }
    418     }
    419 
    420     T_FileStream_writeLine(out, "\n}\n};\nU_CDECL_END\n");
    421 #endif
    422 
    423     if(T_FileStream_error(in)) {
    424         fprintf(stderr, "genccode: file read error while generating from file %s\n", filename);
    425         exit(U_FILE_ACCESS_ERROR);
    426     }
    427 
    428     if(T_FileStream_error(out)) {
    429         fprintf(stderr, "genccode: file write error while generating from file %s\n", filename);
    430         exit(U_FILE_ACCESS_ERROR);
    431     }
    432 
    433     T_FileStream_close(out);
    434     T_FileStream_close(in);
    435 }
    436 
    437 static uint32_t
    438 write32(FileStream *out, uint32_t bitField, uint32_t column) {
    439     int32_t i;
    440     char bitFieldStr[64]; /* This is more bits than needed for a 32-bit number */
    441     char *s = bitFieldStr;
    442     uint8_t *ptrIdx = (uint8_t *)&bitField;
    443     static const char hexToStr[16] = {
    444         '0','1','2','3',
    445         '4','5','6','7',
    446         '8','9','A','B',
    447         'C','D','E','F'
    448     };
    449 
    450     /* write the value, possibly with comma and newline */
    451     if(column==MAX_COLUMN) {
    452         /* first byte */
    453         column=1;
    454     } else if(column<32) {
    455         *(s++)=',';
    456         ++column;
    457     } else {
    458         *(s++)='\n';
    459         uprv_strcpy(s, assemblyHeader[assemblyHeaderIndex].beginLine);
    460         s+=uprv_strlen(s);
    461         column=1;
    462     }
    463 
    464     if (bitField < 10) {
    465         /* It's a small number. Don't waste the space for 0x */
    466         *(s++)=hexToStr[bitField];
    467     }
    468     else {
    469         int seenNonZero = 0; /* This is used to remove leading zeros */
    470 
    471         if(hexType==HEX_0X) {
    472          *(s++)='0';
    473          *(s++)='x';
    474         } else if(hexType==HEX_0H) {
    475          *(s++)='0';
    476         }
    477 
    478         /* This creates a 32-bit field */
    479 #if U_IS_BIG_ENDIAN
    480         for (i = 0; i < sizeof(uint32_t); i++)
    481 #else
    482         for (i = sizeof(uint32_t)-1; i >= 0 ; i--)
    483 #endif
    484         {
    485             uint8_t value = ptrIdx[i];
    486             if (value || seenNonZero) {
    487                 *(s++)=hexToStr[value>>4];
    488                 *(s++)=hexToStr[value&0xF];
    489                 seenNonZero = 1;
    490             }
    491         }
    492         if(hexType==HEX_0H) {
    493          *(s++)='h';
    494         }
    495     }
    496 
    497     *(s++)=0;
    498     T_FileStream_writeLine(out, bitFieldStr);
    499     return column;
    500 }
    501 
    502 static uint32_t
    503 write8(FileStream *out, uint8_t byte, uint32_t column) {
    504     char s[4];
    505     int i=0;
    506 
    507     /* convert the byte value to a string */
    508     if(byte>=100) {
    509         s[i++]=(char)('0'+byte/100);
    510         byte%=100;
    511     }
    512     if(i>0 || byte>=10) {
    513         s[i++]=(char)('0'+byte/10);
    514         byte%=10;
    515     }
    516     s[i++]=(char)('0'+byte);
    517     s[i]=0;
    518 
    519     /* write the value, possibly with comma and newline */
    520     if(column==MAX_COLUMN) {
    521         /* first byte */
    522         column=1;
    523     } else if(column<16) {
    524         T_FileStream_writeLine(out, ",");
    525         ++column;
    526     } else {
    527         T_FileStream_writeLine(out, ",\n");
    528         column=1;
    529     }
    530     T_FileStream_writeLine(out, s);
    531     return column;
    532 }
    533 
    534 #ifdef OS400
    535 static uint32_t
    536 write8str(FileStream *out, uint8_t byte, uint32_t column) {
    537     char s[8];
    538 
    539     if (byte > 7)
    540         sprintf(s, "\\x%X", byte);
    541     else
    542         sprintf(s, "\\%X", byte);
    543 
    544     /* write the value, possibly with comma and newline */
    545     if(column==MAX_COLUMN) {
    546         /* first byte */
    547         column=1;
    548         T_FileStream_writeLine(out, "\"");
    549     } else if(column<24) {
    550         ++column;
    551     } else {
    552         T_FileStream_writeLine(out, "\"\n\"");
    553         column=1;
    554     }
    555     T_FileStream_writeLine(out, s);
    556     return column;
    557 }
    558 #endif
    559 
    560 static void
    561 getOutFilename(const char *inFilename, const char *destdir, char *outFilename, char *entryName, const char *newSuffix, const char *optFilename) {
    562     const char *basename=findBasename(inFilename), *suffix=uprv_strrchr(basename, '.');
    563 
    564     /* copy path */
    565     if(destdir!=NULL && *destdir!=0) {
    566         do {
    567             *outFilename++=*destdir++;
    568         } while(*destdir!=0);
    569         if(*(outFilename-1)!=U_FILE_SEP_CHAR) {
    570             *outFilename++=U_FILE_SEP_CHAR;
    571         }
    572         inFilename=basename;
    573     } else {
    574         while(inFilename<basename) {
    575             *outFilename++=*inFilename++;
    576         }
    577     }
    578 
    579     if(suffix==NULL) {
    580         /* the filename does not have a suffix */
    581         uprv_strcpy(entryName, inFilename);
    582         if(optFilename != NULL) {
    583           uprv_strcpy(outFilename, optFilename);
    584         } else {
    585           uprv_strcpy(outFilename, inFilename);
    586         }
    587         uprv_strcat(outFilename, newSuffix);
    588     } else {
    589         char *saveOutFilename = outFilename;
    590         /* copy basename */
    591         while(inFilename<suffix) {
    592             if(*inFilename=='-') {
    593                 /* iSeries cannot have '-' in the .o objects. */
    594                 *outFilename++=*entryName++='_';
    595                 inFilename++;
    596             }
    597             else {
    598                 *outFilename++=*entryName++=*inFilename++;
    599             }
    600         }
    601 
    602         /* replace '.' by '_' */
    603         *outFilename++=*entryName++='_';
    604         ++inFilename;
    605 
    606         /* copy suffix */
    607         while(*inFilename!=0) {
    608             *outFilename++=*entryName++=*inFilename++;
    609         }
    610 
    611         *entryName=0;
    612 
    613         if(optFilename != NULL) {
    614             uprv_strcpy(saveOutFilename, optFilename);
    615             uprv_strcat(saveOutFilename, newSuffix);
    616         } else {
    617             /* add ".c" */
    618             uprv_strcpy(outFilename, newSuffix);
    619         }
    620     }
    621 }
    622 
    623 #ifdef CAN_GENERATE_OBJECTS
    624 static void
    625 getArchitecture(uint16_t *pCPU, uint16_t *pBits, UBool *pIsBigEndian, const char *optMatchArch) {
    626     int64_t buffer[256];
    627     const char *filename;
    628     FileStream *in;
    629     int32_t length;
    630 
    631 #ifdef U_ELF
    632     /* Pointer to ELF header. Elf32_Ehdr and ELF64_Ehdr are identical for the necessary fields. */
    633     const Elf32_Ehdr *pHeader32;
    634 #elif defined(U_WINDOWS)
    635     const IMAGE_FILE_HEADER *pHeader;
    636 #else
    637 #   error "Unknown platform for CAN_GENERATE_OBJECTS."
    638 #endif
    639 
    640     if(optMatchArch != NULL) {
    641         filename=optMatchArch;
    642     } else {
    643         /* set defaults */
    644 #ifdef U_ELF
    645         /* set EM_386 because elf.h does not provide better defaults */
    646         *pCPU=EM_386;
    647         *pBits=32;
    648         *pIsBigEndian=(UBool)(U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB);
    649 #elif defined(U_WINDOWS)
    650 /* _M_IA64 should be defined in windows.h */
    651 #   if defined(_M_IA64)
    652         *pCPU=IMAGE_FILE_MACHINE_IA64;
    653 #   elif defined(_M_AMD64)
    654         *pCPU=IMAGE_FILE_MACHINE_AMD64;
    655 #   else
    656         *pCPU=IMAGE_FILE_MACHINE_I386;
    657 #   endif
    658         *pBits= *pCPU==IMAGE_FILE_MACHINE_I386 ? 32 : 64;
    659         *pIsBigEndian=FALSE;
    660 #else
    661 #   error "Unknown platform for CAN_GENERATE_OBJECTS."
    662 #endif
    663         return;
    664     }
    665 
    666     in=T_FileStream_open(filename, "rb");
    667     if(in==NULL) {
    668         fprintf(stderr, "genccode: unable to open match-arch file %s\n", filename);
    669         exit(U_FILE_ACCESS_ERROR);
    670     }
    671     length=T_FileStream_read(in, buffer, sizeof(buffer));
    672 
    673 #ifdef U_ELF
    674     if(length<sizeof(Elf32_Ehdr)) {
    675         fprintf(stderr, "genccode: match-arch file %s is too short\n", filename);
    676         exit(U_UNSUPPORTED_ERROR);
    677     }
    678     pHeader32=(const Elf32_Ehdr *)buffer;
    679     if(
    680         pHeader32->e_ident[0]!=ELFMAG0 ||
    681         pHeader32->e_ident[1]!=ELFMAG1 ||
    682         pHeader32->e_ident[2]!=ELFMAG2 ||
    683         pHeader32->e_ident[3]!=ELFMAG3 ||
    684         pHeader32->e_ident[EI_CLASS]<ELFCLASS32 || pHeader32->e_ident[EI_CLASS]>ELFCLASS64
    685     ) {
    686         fprintf(stderr, "genccode: match-arch file %s is not an ELF object file, or not supported\n", filename);
    687         exit(U_UNSUPPORTED_ERROR);
    688     }
    689 
    690     *pBits= pHeader32->e_ident[EI_CLASS]==ELFCLASS32 ? 32 : 64; /* only 32 or 64: see check above */
    691 #ifdef U_ELF64
    692     if(*pBits!=32 && *pBits!=64) {
    693         fprintf(stderr, "genccode: currently only supports 32-bit and 64-bit ELF format\n");
    694         exit(U_UNSUPPORTED_ERROR);
    695     }
    696 #else
    697     if(*pBits!=32) {
    698         fprintf(stderr, "genccode: built with elf.h missing 64-bit definitions\n");
    699         exit(U_UNSUPPORTED_ERROR);
    700     }
    701 #endif
    702 
    703     *pIsBigEndian=(UBool)(pHeader32->e_ident[EI_DATA]==ELFDATA2MSB);
    704     if(*pIsBigEndian!=U_IS_BIG_ENDIAN) {
    705         fprintf(stderr, "genccode: currently only same-endianness ELF formats are supported\n");
    706         exit(U_UNSUPPORTED_ERROR);
    707     }
    708     /* TODO: Support byte swapping */
    709 
    710     *pCPU=pHeader32->e_machine;
    711 #elif defined(U_WINDOWS)
    712     if(length<sizeof(IMAGE_FILE_HEADER)) {
    713         fprintf(stderr, "genccode: match-arch file %s is too short\n", filename);
    714         exit(U_UNSUPPORTED_ERROR);
    715     }
    716     pHeader=(const IMAGE_FILE_HEADER *)buffer;
    717     *pCPU=pHeader->Machine;
    718     /*
    719      * The number of bits is implicit with the Machine value.
    720      * *pBits is ignored in the calling code, so this need not be precise.
    721      */
    722     *pBits= *pCPU==IMAGE_FILE_MACHINE_I386 ? 32 : 64;
    723     /* Windows always runs on little-endian CPUs. */
    724     *pIsBigEndian=FALSE;
    725 #else
    726 #   error "Unknown platform for CAN_GENERATE_OBJECTS."
    727 #endif
    728 
    729     T_FileStream_close(in);
    730 }
    731 
    732 U_CAPI void U_EXPORT2
    733 writeObjectCode(const char *filename, const char *destdir, const char *optEntryPoint, const char *optMatchArch, const char *optFilename, char *outFilePath) {
    734     /* common variables */
    735     char buffer[4096], entry[40]={ 0 };
    736     FileStream *in, *out;
    737     const char *newSuffix;
    738     int32_t i, entryLength, length, size, entryOffset=0, entryLengthOffset=0;
    739 
    740     uint16_t cpu, bits;
    741     UBool makeBigEndian;
    742 
    743     /* platform-specific variables and initialization code */
    744 #ifdef U_ELF
    745     /* 32-bit Elf file header */
    746     static Elf32_Ehdr header32={
    747         {
    748             /* e_ident[] */
    749             ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3,
    750             ELFCLASS32,
    751             U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB,
    752             EV_CURRENT /* EI_VERSION */
    753         },
    754         ET_REL,
    755         EM_386,
    756         EV_CURRENT, /* e_version */
    757         0, /* e_entry */
    758         0, /* e_phoff */
    759         (Elf32_Off)sizeof(Elf32_Ehdr), /* e_shoff */
    760         0, /* e_flags */
    761         (Elf32_Half)sizeof(Elf32_Ehdr), /* eh_size */
    762         0, /* e_phentsize */
    763         0, /* e_phnum */
    764         (Elf32_Half)sizeof(Elf32_Shdr), /* e_shentsize */
    765         5, /* e_shnum */
    766         2 /* e_shstrndx */
    767     };
    768 
    769     /* 32-bit Elf section header table */
    770     static Elf32_Shdr sectionHeaders32[5]={
    771         { /* SHN_UNDEF */
    772             0
    773         },
    774         { /* .symtab */
    775             1, /* sh_name */
    776             SHT_SYMTAB,
    777             0, /* sh_flags */
    778             0, /* sh_addr */
    779             (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)), /* sh_offset */
    780             (Elf32_Word)(2*sizeof(Elf32_Sym)), /* sh_size */
    781             3, /* sh_link=sect hdr index of .strtab */
    782             1, /* sh_info=One greater than the symbol table index of the last
    783                 * local symbol (with STB_LOCAL). */
    784             4, /* sh_addralign */
    785             (Elf32_Word)(sizeof(Elf32_Sym)) /* sh_entsize */
    786         },
    787         { /* .shstrtab */
    788             9, /* sh_name */
    789             SHT_STRTAB,
    790             0, /* sh_flags */
    791             0, /* sh_addr */
    792             (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)), /* sh_offset */
    793             40, /* sh_size */
    794             0, /* sh_link */
    795             0, /* sh_info */
    796             1, /* sh_addralign */
    797             0 /* sh_entsize */
    798         },
    799         { /* .strtab */
    800             19, /* sh_name */
    801             SHT_STRTAB,
    802             0, /* sh_flags */
    803             0, /* sh_addr */
    804             (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)+40), /* sh_offset */
    805             (Elf32_Word)sizeof(entry), /* sh_size */
    806             0, /* sh_link */
    807             0, /* sh_info */
    808             1, /* sh_addralign */
    809             0 /* sh_entsize */
    810         },
    811         { /* .rodata */
    812             27, /* sh_name */
    813             SHT_PROGBITS,
    814             SHF_ALLOC, /* sh_flags */
    815             0, /* sh_addr */
    816             (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)+40+sizeof(entry)), /* sh_offset */
    817             0, /* sh_size */
    818             0, /* sh_link */
    819             0, /* sh_info */
    820             16, /* sh_addralign */
    821             0 /* sh_entsize */
    822         }
    823     };
    824 
    825     /* symbol table */
    826     static Elf32_Sym symbols32[2]={
    827         { /* STN_UNDEF */
    828             0
    829         },
    830         { /* data entry point */
    831             1, /* st_name */
    832             0, /* st_value */
    833             0, /* st_size */
    834             ELF64_ST_INFO(STB_GLOBAL, STT_OBJECT),
    835             0, /* st_other */
    836             4 /* st_shndx=index of related section table entry */
    837         }
    838     };
    839 
    840     /* section header string table, with decimal string offsets */
    841     static const char sectionStrings[40]=
    842         /*  0 */ "\0"
    843         /*  1 */ ".symtab\0"
    844         /*  9 */ ".shstrtab\0"
    845         /* 19 */ ".strtab\0"
    846         /* 27 */ ".rodata\0"
    847         /* 35 */ "\0\0\0\0"; /* contains terminating NUL */
    848         /* 40: padded to multiple of 8 bytes */
    849 
    850     /*
    851      * Use entry[] for the string table which will contain only the
    852      * entry point name.
    853      * entry[0] must be 0 (NUL)
    854      * The entry point name can be up to 38 characters long (sizeof(entry)-2).
    855      */
    856 
    857     /* 16-align .rodata in the .o file, just in case */
    858     static const char padding[16]={ 0 };
    859     int32_t paddingSize;
    860 
    861 #ifdef U_ELF64
    862     /* 64-bit Elf file header */
    863     static Elf64_Ehdr header64={
    864         {
    865             /* e_ident[] */
    866             ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3,
    867             ELFCLASS64,
    868             U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB,
    869             EV_CURRENT /* EI_VERSION */
    870         },
    871         ET_REL,
    872         EM_X86_64,
    873         EV_CURRENT, /* e_version */
    874         0, /* e_entry */
    875         0, /* e_phoff */
    876         (Elf64_Off)sizeof(Elf64_Ehdr), /* e_shoff */
    877         0, /* e_flags */
    878         (Elf64_Half)sizeof(Elf64_Ehdr), /* eh_size */
    879         0, /* e_phentsize */
    880         0, /* e_phnum */
    881         (Elf64_Half)sizeof(Elf64_Shdr), /* e_shentsize */
    882         5, /* e_shnum */
    883         2 /* e_shstrndx */
    884     };
    885 
    886     /* 64-bit Elf section header table */
    887     static Elf64_Shdr sectionHeaders64[5]={
    888         { /* SHN_UNDEF */
    889             0
    890         },
    891         { /* .symtab */
    892             1, /* sh_name */
    893             SHT_SYMTAB,
    894             0, /* sh_flags */
    895             0, /* sh_addr */
    896             (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)), /* sh_offset */
    897             (Elf64_Xword)(2*sizeof(Elf64_Sym)), /* sh_size */
    898             3, /* sh_link=sect hdr index of .strtab */
    899             1, /* sh_info=One greater than the symbol table index of the last
    900                 * local symbol (with STB_LOCAL). */
    901             4, /* sh_addralign */
    902             (Elf64_Xword)(sizeof(Elf64_Sym)) /* sh_entsize */
    903         },
    904         { /* .shstrtab */
    905             9, /* sh_name */
    906             SHT_STRTAB,
    907             0, /* sh_flags */
    908             0, /* sh_addr */
    909             (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)), /* sh_offset */
    910             40, /* sh_size */
    911             0, /* sh_link */
    912             0, /* sh_info */
    913             1, /* sh_addralign */
    914             0 /* sh_entsize */
    915         },
    916         { /* .strtab */
    917             19, /* sh_name */
    918             SHT_STRTAB,
    919             0, /* sh_flags */
    920             0, /* sh_addr */
    921             (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)+40), /* sh_offset */
    922             (Elf64_Xword)sizeof(entry), /* sh_size */
    923             0, /* sh_link */
    924             0, /* sh_info */
    925             1, /* sh_addralign */
    926             0 /* sh_entsize */
    927         },
    928         { /* .rodata */
    929             27, /* sh_name */
    930             SHT_PROGBITS,
    931             SHF_ALLOC, /* sh_flags */
    932             0, /* sh_addr */
    933             (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)+40+sizeof(entry)), /* sh_offset */
    934             0, /* sh_size */
    935             0, /* sh_link */
    936             0, /* sh_info */
    937             16, /* sh_addralign */
    938             0 /* sh_entsize */
    939         }
    940     };
    941 
    942     /*
    943      * 64-bit symbol table
    944      * careful: different order of items compared with Elf32_sym!
    945      */
    946     static Elf64_Sym symbols64[2]={
    947         { /* STN_UNDEF */
    948             0
    949         },
    950         { /* data entry point */
    951             1, /* st_name */
    952             ELF64_ST_INFO(STB_GLOBAL, STT_OBJECT),
    953             0, /* st_other */
    954             4, /* st_shndx=index of related section table entry */
    955             0, /* st_value */
    956             0 /* st_size */
    957         }
    958     };
    959 
    960 #endif /* U_ELF64 */
    961 
    962     /* entry[] have a leading NUL */
    963     entryOffset=1;
    964 
    965     /* in the common code, count entryLength from after the NUL */
    966     entryLengthOffset=1;
    967 
    968     newSuffix=".o";
    969 
    970 #elif defined(U_WINDOWS)
    971     struct {
    972         IMAGE_FILE_HEADER fileHeader;
    973         IMAGE_SECTION_HEADER sections[2];
    974         char linkerOptions[100];
    975     } objHeader;
    976     IMAGE_SYMBOL symbols[1];
    977     struct {
    978         DWORD sizeofLongNames;
    979         char longNames[100];
    980     } symbolNames;
    981 
    982     /*
    983      * entry sometimes have a leading '_'
    984      * overwritten if entryOffset==0 depending on the target platform
    985      * see check for cpu below
    986      */
    987     entry[0]='_';
    988 
    989     newSuffix=".obj";
    990 #else
    991 #   error "Unknown platform for CAN_GENERATE_OBJECTS."
    992 #endif
    993 
    994     /* deal with options, files and the entry point name */
    995     getArchitecture(&cpu, &bits, &makeBigEndian, optMatchArch);
    996     printf("genccode: --match-arch cpu=%hu bits=%hu big-endian=%hu\n", cpu, bits, makeBigEndian);
    997 #ifdef U_WINDOWS
    998     if(cpu==IMAGE_FILE_MACHINE_I386) {
    999         entryOffset=1;
   1000     }
   1001 #endif
   1002 
   1003     in=T_FileStream_open(filename, "rb");
   1004     if(in==NULL) {
   1005         fprintf(stderr, "genccode: unable to open input file %s\n", filename);
   1006         exit(U_FILE_ACCESS_ERROR);
   1007     }
   1008     size=T_FileStream_size(in);
   1009 
   1010     getOutFilename(filename, destdir, buffer, entry+entryOffset, newSuffix, optFilename);
   1011     if (outFilePath != NULL) {
   1012         uprv_strcpy(outFilePath, buffer);
   1013     }
   1014 
   1015     if(optEntryPoint != NULL) {
   1016         uprv_strcpy(entry+entryOffset, optEntryPoint);
   1017         uprv_strcat(entry+entryOffset, "_dat");
   1018     }
   1019     /* turn dashes in the entry name into underscores */
   1020     entryLength=(int32_t)uprv_strlen(entry+entryLengthOffset);
   1021     for(i=0; i<entryLength; ++i) {
   1022         if(entry[entryLengthOffset+i]=='-') {
   1023             entry[entryLengthOffset+i]='_';
   1024         }
   1025     }
   1026 
   1027     /* open the output file */
   1028     out=T_FileStream_open(buffer, "wb");
   1029     if(out==NULL) {
   1030         fprintf(stderr, "genccode: unable to open output file %s\n", buffer);
   1031         exit(U_FILE_ACCESS_ERROR);
   1032     }
   1033 
   1034 #ifdef U_ELF
   1035     if(bits==32) {
   1036         header32.e_ident[EI_DATA]= makeBigEndian ? ELFDATA2MSB : ELFDATA2LSB;
   1037         header32.e_machine=cpu;
   1038 
   1039         /* 16-align .rodata in the .o file, just in case */
   1040         paddingSize=sectionHeaders32[4].sh_offset & 0xf;
   1041         if(paddingSize!=0) {
   1042                 paddingSize=0x10-paddingSize;
   1043                 sectionHeaders32[4].sh_offset+=paddingSize;
   1044         }
   1045 
   1046         sectionHeaders32[4].sh_size=(Elf32_Word)size;
   1047 
   1048         symbols32[1].st_size=(Elf32_Word)size;
   1049 
   1050         /* write .o headers */
   1051         T_FileStream_write(out, &header32, (int32_t)sizeof(header32));
   1052         T_FileStream_write(out, sectionHeaders32, (int32_t)sizeof(sectionHeaders32));
   1053         T_FileStream_write(out, symbols32, (int32_t)sizeof(symbols32));
   1054     } else /* bits==64 */ {
   1055 #ifdef U_ELF64
   1056         header64.e_ident[EI_DATA]= makeBigEndian ? ELFDATA2MSB : ELFDATA2LSB;
   1057         header64.e_machine=cpu;
   1058 
   1059         /* 16-align .rodata in the .o file, just in case */
   1060         paddingSize=sectionHeaders64[4].sh_offset & 0xf;
   1061         if(paddingSize!=0) {
   1062                 paddingSize=0x10-paddingSize;
   1063                 sectionHeaders64[4].sh_offset+=paddingSize;
   1064         }
   1065 
   1066         sectionHeaders64[4].sh_size=(Elf64_Xword)size;
   1067 
   1068         symbols64[1].st_size=(Elf64_Xword)size;
   1069 
   1070         /* write .o headers */
   1071         T_FileStream_write(out, &header64, (int32_t)sizeof(header64));
   1072         T_FileStream_write(out, sectionHeaders64, (int32_t)sizeof(sectionHeaders64));
   1073         T_FileStream_write(out, symbols64, (int32_t)sizeof(symbols64));
   1074 #endif
   1075     }
   1076 
   1077     T_FileStream_write(out, sectionStrings, (int32_t)sizeof(sectionStrings));
   1078     T_FileStream_write(out, entry, (int32_t)sizeof(entry));
   1079     if(paddingSize!=0) {
   1080         T_FileStream_write(out, padding, paddingSize);
   1081     }
   1082 #elif defined(U_WINDOWS)
   1083     /* populate the .obj headers */
   1084     uprv_memset(&objHeader, 0, sizeof(objHeader));
   1085     uprv_memset(&symbols, 0, sizeof(symbols));
   1086     uprv_memset(&symbolNames, 0, sizeof(symbolNames));
   1087 
   1088     /* write the linker export directive */
   1089     uprv_strcpy(objHeader.linkerOptions, "-export:");
   1090     length=8;
   1091     uprv_strcpy(objHeader.linkerOptions+length, entry);
   1092     length+=entryLength;
   1093     uprv_strcpy(objHeader.linkerOptions+length, ",data ");
   1094     length+=6;
   1095 
   1096     /* set the file header */
   1097     objHeader.fileHeader.Machine=cpu;
   1098     objHeader.fileHeader.NumberOfSections=2;
   1099     objHeader.fileHeader.TimeDateStamp=(DWORD)time(NULL);
   1100     objHeader.fileHeader.PointerToSymbolTable=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER+length+size; /* start of symbol table */
   1101     objHeader.fileHeader.NumberOfSymbols=1;
   1102 
   1103     /* set the section for the linker options */
   1104     uprv_strncpy((char *)objHeader.sections[0].Name, ".drectve", 8);
   1105     objHeader.sections[0].SizeOfRawData=length;
   1106     objHeader.sections[0].PointerToRawData=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER;
   1107     objHeader.sections[0].Characteristics=IMAGE_SCN_LNK_INFO|IMAGE_SCN_LNK_REMOVE|IMAGE_SCN_ALIGN_1BYTES;
   1108 
   1109     /* set the data section */
   1110     uprv_strncpy((char *)objHeader.sections[1].Name, ".rdata", 6);
   1111     objHeader.sections[1].SizeOfRawData=size;
   1112     objHeader.sections[1].PointerToRawData=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER+length;
   1113     objHeader.sections[1].Characteristics=IMAGE_SCN_CNT_INITIALIZED_DATA|IMAGE_SCN_ALIGN_16BYTES|IMAGE_SCN_MEM_READ;
   1114 
   1115     /* set the symbol table */
   1116     if(entryLength<=8) {
   1117         uprv_strncpy((char *)symbols[0].N.ShortName, entry, entryLength);
   1118         symbolNames.sizeofLongNames=4;
   1119     } else {
   1120         symbols[0].N.Name.Short=0;
   1121         symbols[0].N.Name.Long=4;
   1122         symbolNames.sizeofLongNames=4+entryLength+1;
   1123         uprv_strcpy(symbolNames.longNames, entry);
   1124     }
   1125     symbols[0].SectionNumber=2;
   1126     symbols[0].StorageClass=IMAGE_SYM_CLASS_EXTERNAL;
   1127 
   1128     /* write the file header and the linker options section */
   1129     T_FileStream_write(out, &objHeader, objHeader.sections[1].PointerToRawData);
   1130 #else
   1131 #   error "Unknown platform for CAN_GENERATE_OBJECTS."
   1132 #endif
   1133 
   1134     /* copy the data file into section 2 */
   1135     for(;;) {
   1136         length=T_FileStream_read(in, buffer, sizeof(buffer));
   1137         if(length==0) {
   1138             break;
   1139         }
   1140         T_FileStream_write(out, buffer, (int32_t)length);
   1141     }
   1142 
   1143 #ifdef U_WINDOWS
   1144     /* write the symbol table */
   1145     T_FileStream_write(out, symbols, IMAGE_SIZEOF_SYMBOL);
   1146     T_FileStream_write(out, &symbolNames, symbolNames.sizeofLongNames);
   1147 #endif
   1148 
   1149     if(T_FileStream_error(in)) {
   1150         fprintf(stderr, "genccode: file read error while generating from file %s\n", filename);
   1151         exit(U_FILE_ACCESS_ERROR);
   1152     }
   1153 
   1154     if(T_FileStream_error(out)) {
   1155         fprintf(stderr, "genccode: file write error while generating from file %s\n", filename);
   1156         exit(U_FILE_ACCESS_ERROR);
   1157     }
   1158 
   1159     T_FileStream_close(out);
   1160     T_FileStream_close(in);
   1161 }
   1162 #endif
   1163