1 /****************************************************************************** 2 * Copyright (C) 2009-2013, International Business Machines 3 * Corporation and others. All Rights Reserved. 4 ******************************************************************************* 5 */ 6 #include "unicode/utypes.h" 7 8 #if U_PLATFORM_HAS_WIN32_API 9 # define VC_EXTRALEAN 10 # define WIN32_LEAN_AND_MEAN 11 # define NOUSER 12 # define NOSERVICE 13 # define NOIME 14 # define NOMCX 15 #include <windows.h> 16 #include <time.h> 17 # ifdef __GNUC__ 18 # define WINDOWS_WITH_GNUC 19 # endif 20 #endif 21 22 #if U_PLATFORM_IS_LINUX_BASED && U_HAVE_ELF_H 23 # define U_ELF 24 #endif 25 26 #ifdef U_ELF 27 # include <elf.h> 28 # if defined(ELFCLASS64) 29 # define U_ELF64 30 # endif 31 /* Old elf.h headers may not have EM_X86_64, or have EM_X8664 instead. */ 32 # ifndef EM_X86_64 33 # define EM_X86_64 62 34 # endif 35 # define ICU_ENTRY_OFFSET 0 36 #endif 37 38 #include <stdio.h> 39 #include <stdlib.h> 40 #include "unicode/putil.h" 41 #include "cmemory.h" 42 #include "cstring.h" 43 #include "filestrm.h" 44 #include "toolutil.h" 45 #include "unicode/uclean.h" 46 #include "uoptions.h" 47 #include "pkg_genc.h" 48 49 #define MAX_COLUMN ((uint32_t)(0xFFFFFFFFU)) 50 51 #define HEX_0X 0 /* 0x1234 */ 52 #define HEX_0H 1 /* 01234h */ 53 54 /* prototypes --------------------------------------------------------------- */ 55 static void 56 getOutFilename(const char *inFilename, const char *destdir, char *outFilename, char *entryName, const char *newSuffix, const char *optFilename); 57 58 static uint32_t 59 write8(FileStream *out, uint8_t byte, uint32_t column); 60 61 static uint32_t 62 write32(FileStream *out, uint32_t byte, uint32_t column); 63 64 #if U_PLATFORM == U_PF_OS400 65 static uint32_t 66 write8str(FileStream *out, uint8_t byte, uint32_t column); 67 #endif 68 /* -------------------------------------------------------------------------- */ 69 70 /* 71 Creating Template Files for New Platforms 72 73 Let the cc compiler help you get started. 74 Compile this program 75 const unsigned int x[5] = {1, 2, 0xdeadbeef, 0xffffffff, 16}; 76 with the -S option to produce assembly output. 77 78 For example, this will generate array.s: 79 gcc -S array.c 80 81 This will produce a .s file that may look like this: 82 83 .file "array.c" 84 .version "01.01" 85 gcc2_compiled.: 86 .globl x 87 .section .rodata 88 .align 4 89 .type x,@object 90 .size x,20 91 x: 92 .long 1 93 .long 2 94 .long -559038737 95 .long -1 96 .long 16 97 .ident "GCC: (GNU) 2.96 20000731 (Red Hat Linux 7.1 2.96-85)" 98 99 which gives a starting point that will compile, and can be transformed 100 to become the template, generally with some consulting of as docs and 101 some experimentation. 102 103 If you want ICU to automatically use this assembly, you should 104 specify "GENCCODE_ASSEMBLY=-a name" in the specific config/mh-* file, 105 where the name is the compiler or platform that you used in this 106 assemblyHeader data structure. 107 */ 108 static const struct AssemblyType { 109 const char *name; 110 const char *header; 111 const char *beginLine; 112 const char *footer; 113 int8_t hexType; /* HEX_0X or HEX_0h */ 114 } assemblyHeader[] = { 115 // For gcc assemblers, the meaning of .align changes depending on the 116 // hardware, so we use .balign 16 which always means 16 bytes. 117 // https://sourceware.org/binutils/docs/as/Pseudo-Ops.html 118 {"gcc", 119 ".globl %s\n" 120 "\t.section .note.GNU-stack,\"\",%%progbits\n" 121 "\t.section .rodata\n" 122 "\t.balign 16\n" 123 "\t.type %s,%%object\n" 124 "%s:\n\n", 125 126 ".long ","",HEX_0X 127 }, 128 {"gcc-darwin", 129 /*"\t.section __TEXT,__text,regular,pure_instructions\n" 130 "\t.section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32\n"*/ 131 ".globl _%s\n" 132 "\t.data\n" 133 "\t.const\n" 134 "\t.balign 16\n" 135 "_%s:\n\n", 136 137 ".long ","",HEX_0X 138 }, 139 {"gcc-cygwin", 140 ".globl _%s\n" 141 "\t.section .rodata\n" 142 "\t.balign 16\n" 143 "_%s:\n\n", 144 145 ".long ","",HEX_0X 146 }, 147 {"gcc-mingw64", 148 ".globl %s\n" 149 "\t.section .rodata\n" 150 "\t.balign 16\n" 151 "%s:\n\n", 152 153 ".long ","",HEX_0X 154 }, 155 // 16 bytes alignment. 156 // http://docs.oracle.com/cd/E19641-01/802-1947/802-1947.pdf 157 {"sun", 158 "\t.section \".rodata\"\n" 159 "\t.align 16\n" 160 ".globl %s\n" 161 "%s:\n", 162 163 ".word ","",HEX_0X 164 }, 165 // 16 bytes alignment for sun-x86. 166 // http://docs.oracle.com/cd/E19963-01/html/821-1608/eoiyg.html 167 {"sun-x86", 168 "Drodata.rodata:\n" 169 "\t.type Drodata.rodata,@object\n" 170 "\t.size Drodata.rodata,0\n" 171 "\t.globl %s\n" 172 "\t.align 16\n" 173 "%s:\n", 174 175 ".4byte ","",HEX_0X 176 }, 177 // 1<<4 bit alignment for aix. 178 // http://pic.dhe.ibm.com/infocenter/aix/v6r1/index.jsp?topic=%2Fcom.ibm.aix.aixassem%2Fdoc%2Falangref%2Fidalangref_csect_pseudoop.htm 179 {"xlc", 180 ".globl %s{RO}\n" 181 "\t.toc\n" 182 "%s:\n" 183 "\t.csect %s{RO}, 4\n", 184 185 ".long ","",HEX_0X 186 }, 187 {"aCC-ia64", 188 "\t.file \"%s.s\"\n" 189 "\t.type %s,@object\n" 190 "\t.global %s\n" 191 "\t.secalias .abe$0.rodata, \".rodata\"\n" 192 "\t.section .abe$0.rodata = \"a\", \"progbits\"\n" 193 "\t.align 16\n" 194 "%s::\t", 195 196 "data4 ","",HEX_0X 197 }, 198 {"aCC-parisc", 199 "\t.SPACE $TEXT$\n" 200 "\t.SUBSPA $LIT$\n" 201 "%s\n" 202 "\t.EXPORT %s\n" 203 "\t.ALIGN 16\n", 204 205 ".WORD ","",HEX_0X 206 }, 207 // align 16 bytes 208 // http://msdn.microsoft.com/en-us/library/dwa9fwef.aspx 209 { "masm", 210 "\tTITLE %s\n" 211 "; generated by genccode\n" 212 ".386\n" 213 ".model flat\n" 214 "\tPUBLIC _%s\n" 215 "ICUDATA_%s\tSEGMENT READONLY PARA PUBLIC FLAT 'DATA'\n" 216 "\tALIGN 16\n" 217 "_%s\tLABEL DWORD\n", 218 "\tDWORD ","\nICUDATA_%s\tENDS\n\tEND\n",HEX_0H 219 } 220 }; 221 222 static int32_t assemblyHeaderIndex = -1; 223 static int32_t hexType = HEX_0X; 224 225 U_CAPI UBool U_EXPORT2 226 checkAssemblyHeaderName(const char* optAssembly) { 227 int32_t idx; 228 assemblyHeaderIndex = -1; 229 for (idx = 0; idx < (int32_t)(sizeof(assemblyHeader)/sizeof(assemblyHeader[0])); idx++) { 230 if (uprv_strcmp(optAssembly, assemblyHeader[idx].name) == 0) { 231 assemblyHeaderIndex = idx; 232 hexType = assemblyHeader[idx].hexType; /* set the hex type */ 233 return TRUE; 234 } 235 } 236 237 return FALSE; 238 } 239 240 241 U_CAPI void U_EXPORT2 242 printAssemblyHeadersToStdErr(void) { 243 int32_t idx; 244 fprintf(stderr, "%s", assemblyHeader[0].name); 245 for (idx = 1; idx < (int32_t)(sizeof(assemblyHeader)/sizeof(assemblyHeader[0])); idx++) { 246 fprintf(stderr, ", %s", assemblyHeader[idx].name); 247 } 248 fprintf(stderr, 249 ")\n"); 250 } 251 252 U_CAPI void U_EXPORT2 253 writeAssemblyCode(const char *filename, const char *destdir, const char *optEntryPoint, const char *optFilename, char *outFilePath) { 254 uint32_t column = MAX_COLUMN; 255 char entry[64]; 256 uint32_t buffer[1024]; 257 char *bufferStr = (char *)buffer; 258 FileStream *in, *out; 259 size_t i, length; 260 261 in=T_FileStream_open(filename, "rb"); 262 if(in==NULL) { 263 fprintf(stderr, "genccode: unable to open input file %s\n", filename); 264 exit(U_FILE_ACCESS_ERROR); 265 } 266 267 getOutFilename(filename, destdir, bufferStr, entry, ".s", optFilename); 268 out=T_FileStream_open(bufferStr, "w"); 269 if(out==NULL) { 270 fprintf(stderr, "genccode: unable to open output file %s\n", bufferStr); 271 exit(U_FILE_ACCESS_ERROR); 272 } 273 274 if (outFilePath != NULL) { 275 uprv_strcpy(outFilePath, bufferStr); 276 } 277 278 #ifdef WINDOWS_WITH_GNUC 279 /* Need to fix the file seperator character when using MinGW. */ 280 swapFileSepChar(outFilePath, U_FILE_SEP_CHAR, '/'); 281 #endif 282 283 if(optEntryPoint != NULL) { 284 uprv_strcpy(entry, optEntryPoint); 285 uprv_strcat(entry, "_dat"); 286 } 287 288 /* turn dashes or dots in the entry name into underscores */ 289 length=uprv_strlen(entry); 290 for(i=0; i<length; ++i) { 291 if(entry[i]=='-' || entry[i]=='.') { 292 entry[i]='_'; 293 } 294 } 295 296 sprintf(bufferStr, assemblyHeader[assemblyHeaderIndex].header, 297 entry, entry, entry, entry, 298 entry, entry, entry, entry); 299 T_FileStream_writeLine(out, bufferStr); 300 T_FileStream_writeLine(out, assemblyHeader[assemblyHeaderIndex].beginLine); 301 302 for(;;) { 303 length=T_FileStream_read(in, buffer, sizeof(buffer)); 304 if(length==0) { 305 break; 306 } 307 if (length != sizeof(buffer)) { 308 /* pad with extra 0's when at the end of the file */ 309 for(i=0; i < (length % sizeof(uint32_t)); ++i) { 310 buffer[length+i] = 0; 311 } 312 } 313 for(i=0; i<(length/sizeof(buffer[0])); i++) { 314 column = write32(out, buffer[i], column); 315 } 316 } 317 318 T_FileStream_writeLine(out, "\n"); 319 320 sprintf(bufferStr, assemblyHeader[assemblyHeaderIndex].footer, 321 entry, entry, entry, entry, 322 entry, entry, entry, entry); 323 T_FileStream_writeLine(out, bufferStr); 324 325 if(T_FileStream_error(in)) { 326 fprintf(stderr, "genccode: file read error while generating from file %s\n", filename); 327 exit(U_FILE_ACCESS_ERROR); 328 } 329 330 if(T_FileStream_error(out)) { 331 fprintf(stderr, "genccode: file write error while generating from file %s\n", filename); 332 exit(U_FILE_ACCESS_ERROR); 333 } 334 335 T_FileStream_close(out); 336 T_FileStream_close(in); 337 } 338 339 U_CAPI void U_EXPORT2 340 writeCCode(const char *filename, const char *destdir, const char *optName, const char *optFilename, char *outFilePath) { 341 uint32_t column = MAX_COLUMN; 342 char buffer[4096], entry[64]; 343 FileStream *in, *out; 344 size_t i, length; 345 346 in=T_FileStream_open(filename, "rb"); 347 if(in==NULL) { 348 fprintf(stderr, "genccode: unable to open input file %s\n", filename); 349 exit(U_FILE_ACCESS_ERROR); 350 } 351 352 if(optName != NULL) { /* prepend 'icudt28_' */ 353 strcpy(entry, optName); 354 strcat(entry, "_"); 355 } else { 356 entry[0] = 0; 357 } 358 359 getOutFilename(filename, destdir, buffer, entry+uprv_strlen(entry), ".c", optFilename); 360 if (outFilePath != NULL) { 361 uprv_strcpy(outFilePath, buffer); 362 } 363 out=T_FileStream_open(buffer, "w"); 364 if(out==NULL) { 365 fprintf(stderr, "genccode: unable to open output file %s\n", buffer); 366 exit(U_FILE_ACCESS_ERROR); 367 } 368 369 /* turn dashes or dots in the entry name into underscores */ 370 length=uprv_strlen(entry); 371 for(i=0; i<length; ++i) { 372 if(entry[i]=='-' || entry[i]=='.') { 373 entry[i]='_'; 374 } 375 } 376 377 #if U_PLATFORM == U_PF_OS400 378 /* 379 TODO: Fix this once the compiler implements this feature. Keep in sync with udatamem.c 380 381 This is here because this platform can't currently put 382 const data into the read-only pages of an object or 383 shared library (service program). Only strings are allowed in read-only 384 pages, so we use char * strings to store the data. 385 386 In order to prevent the beginning of the data from ever matching the 387 magic numbers we must still use the initial double. 388 [grhoten 4/24/2003] 389 */ 390 sprintf(buffer, 391 "#ifndef IN_GENERATED_CCODE\n" 392 "#define IN_GENERATED_CCODE\n" 393 "#define U_DISABLE_RENAMING 1\n" 394 "#include \"unicode/umachine.h\"\n" 395 "#endif\n" 396 "U_CDECL_BEGIN\n" 397 "const struct {\n" 398 " double bogus;\n" 399 " const char *bytes; \n" 400 "} %s={ 0.0, \n", 401 entry); 402 T_FileStream_writeLine(out, buffer); 403 404 for(;;) { 405 length=T_FileStream_read(in, buffer, sizeof(buffer)); 406 if(length==0) { 407 break; 408 } 409 for(i=0; i<length; ++i) { 410 column = write8str(out, (uint8_t)buffer[i], column); 411 } 412 } 413 414 T_FileStream_writeLine(out, "\"\n};\nU_CDECL_END\n"); 415 #else 416 /* Function renaming shouldn't be done in data */ 417 sprintf(buffer, 418 "#ifndef IN_GENERATED_CCODE\n" 419 "#define IN_GENERATED_CCODE\n" 420 "#define U_DISABLE_RENAMING 1\n" 421 "#include \"unicode/umachine.h\"\n" 422 "#endif\n" 423 "U_CDECL_BEGIN\n" 424 "const struct {\n" 425 " double bogus;\n" 426 " uint8_t bytes[%ld]; \n" 427 "} %s={ 0.0, {\n", 428 (long)T_FileStream_size(in), entry); 429 T_FileStream_writeLine(out, buffer); 430 431 for(;;) { 432 length=T_FileStream_read(in, buffer, sizeof(buffer)); 433 if(length==0) { 434 break; 435 } 436 for(i=0; i<length; ++i) { 437 column = write8(out, (uint8_t)buffer[i], column); 438 } 439 } 440 441 T_FileStream_writeLine(out, "\n}\n};\nU_CDECL_END\n"); 442 #endif 443 444 if(T_FileStream_error(in)) { 445 fprintf(stderr, "genccode: file read error while generating from file %s\n", filename); 446 exit(U_FILE_ACCESS_ERROR); 447 } 448 449 if(T_FileStream_error(out)) { 450 fprintf(stderr, "genccode: file write error while generating from file %s\n", filename); 451 exit(U_FILE_ACCESS_ERROR); 452 } 453 454 T_FileStream_close(out); 455 T_FileStream_close(in); 456 } 457 458 static uint32_t 459 write32(FileStream *out, uint32_t bitField, uint32_t column) { 460 int32_t i; 461 char bitFieldStr[64]; /* This is more bits than needed for a 32-bit number */ 462 char *s = bitFieldStr; 463 uint8_t *ptrIdx = (uint8_t *)&bitField; 464 static const char hexToStr[16] = { 465 '0','1','2','3', 466 '4','5','6','7', 467 '8','9','A','B', 468 'C','D','E','F' 469 }; 470 471 /* write the value, possibly with comma and newline */ 472 if(column==MAX_COLUMN) { 473 /* first byte */ 474 column=1; 475 } else if(column<32) { 476 *(s++)=','; 477 ++column; 478 } else { 479 *(s++)='\n'; 480 uprv_strcpy(s, assemblyHeader[assemblyHeaderIndex].beginLine); 481 s+=uprv_strlen(s); 482 column=1; 483 } 484 485 if (bitField < 10) { 486 /* It's a small number. Don't waste the space for 0x */ 487 *(s++)=hexToStr[bitField]; 488 } 489 else { 490 int seenNonZero = 0; /* This is used to remove leading zeros */ 491 492 if(hexType==HEX_0X) { 493 *(s++)='0'; 494 *(s++)='x'; 495 } else if(hexType==HEX_0H) { 496 *(s++)='0'; 497 } 498 499 /* This creates a 32-bit field */ 500 #if U_IS_BIG_ENDIAN 501 for (i = 0; i < sizeof(uint32_t); i++) 502 #else 503 for (i = sizeof(uint32_t)-1; i >= 0 ; i--) 504 #endif 505 { 506 uint8_t value = ptrIdx[i]; 507 if (value || seenNonZero) { 508 *(s++)=hexToStr[value>>4]; 509 *(s++)=hexToStr[value&0xF]; 510 seenNonZero = 1; 511 } 512 } 513 if(hexType==HEX_0H) { 514 *(s++)='h'; 515 } 516 } 517 518 *(s++)=0; 519 T_FileStream_writeLine(out, bitFieldStr); 520 return column; 521 } 522 523 static uint32_t 524 write8(FileStream *out, uint8_t byte, uint32_t column) { 525 char s[4]; 526 int i=0; 527 528 /* convert the byte value to a string */ 529 if(byte>=100) { 530 s[i++]=(char)('0'+byte/100); 531 byte%=100; 532 } 533 if(i>0 || byte>=10) { 534 s[i++]=(char)('0'+byte/10); 535 byte%=10; 536 } 537 s[i++]=(char)('0'+byte); 538 s[i]=0; 539 540 /* write the value, possibly with comma and newline */ 541 if(column==MAX_COLUMN) { 542 /* first byte */ 543 column=1; 544 } else if(column<16) { 545 T_FileStream_writeLine(out, ","); 546 ++column; 547 } else { 548 T_FileStream_writeLine(out, ",\n"); 549 column=1; 550 } 551 T_FileStream_writeLine(out, s); 552 return column; 553 } 554 555 #if U_PLATFORM == U_PF_OS400 556 static uint32_t 557 write8str(FileStream *out, uint8_t byte, uint32_t column) { 558 char s[8]; 559 560 if (byte > 7) 561 sprintf(s, "\\x%X", byte); 562 else 563 sprintf(s, "\\%X", byte); 564 565 /* write the value, possibly with comma and newline */ 566 if(column==MAX_COLUMN) { 567 /* first byte */ 568 column=1; 569 T_FileStream_writeLine(out, "\""); 570 } else if(column<24) { 571 ++column; 572 } else { 573 T_FileStream_writeLine(out, "\"\n\""); 574 column=1; 575 } 576 T_FileStream_writeLine(out, s); 577 return column; 578 } 579 #endif 580 581 static void 582 getOutFilename(const char *inFilename, const char *destdir, char *outFilename, char *entryName, const char *newSuffix, const char *optFilename) { 583 const char *basename=findBasename(inFilename), *suffix=uprv_strrchr(basename, '.'); 584 585 /* copy path */ 586 if(destdir!=NULL && *destdir!=0) { 587 do { 588 *outFilename++=*destdir++; 589 } while(*destdir!=0); 590 if(*(outFilename-1)!=U_FILE_SEP_CHAR) { 591 *outFilename++=U_FILE_SEP_CHAR; 592 } 593 inFilename=basename; 594 } else { 595 while(inFilename<basename) { 596 *outFilename++=*inFilename++; 597 } 598 } 599 600 if(suffix==NULL) { 601 /* the filename does not have a suffix */ 602 uprv_strcpy(entryName, inFilename); 603 if(optFilename != NULL) { 604 uprv_strcpy(outFilename, optFilename); 605 } else { 606 uprv_strcpy(outFilename, inFilename); 607 } 608 uprv_strcat(outFilename, newSuffix); 609 } else { 610 char *saveOutFilename = outFilename; 611 /* copy basename */ 612 while(inFilename<suffix) { 613 if(*inFilename=='-') { 614 /* iSeries cannot have '-' in the .o objects. */ 615 *outFilename++=*entryName++='_'; 616 inFilename++; 617 } 618 else { 619 *outFilename++=*entryName++=*inFilename++; 620 } 621 } 622 623 /* replace '.' by '_' */ 624 *outFilename++=*entryName++='_'; 625 ++inFilename; 626 627 /* copy suffix */ 628 while(*inFilename!=0) { 629 *outFilename++=*entryName++=*inFilename++; 630 } 631 632 *entryName=0; 633 634 if(optFilename != NULL) { 635 uprv_strcpy(saveOutFilename, optFilename); 636 uprv_strcat(saveOutFilename, newSuffix); 637 } else { 638 /* add ".c" */ 639 uprv_strcpy(outFilename, newSuffix); 640 } 641 } 642 } 643 644 #ifdef CAN_GENERATE_OBJECTS 645 static void 646 getArchitecture(uint16_t *pCPU, uint16_t *pBits, UBool *pIsBigEndian, const char *optMatchArch) { 647 union { 648 char bytes[2048]; 649 #ifdef U_ELF 650 Elf32_Ehdr header32; 651 /* Elf32_Ehdr and ELF64_Ehdr are identical for the necessary fields. */ 652 #elif U_PLATFORM_HAS_WIN32_API 653 IMAGE_FILE_HEADER header; 654 #endif 655 } buffer; 656 657 const char *filename; 658 FileStream *in; 659 int32_t length; 660 661 #ifdef U_ELF 662 663 #elif U_PLATFORM_HAS_WIN32_API 664 const IMAGE_FILE_HEADER *pHeader; 665 #else 666 # error "Unknown platform for CAN_GENERATE_OBJECTS." 667 #endif 668 669 if(optMatchArch != NULL) { 670 filename=optMatchArch; 671 } else { 672 /* set defaults */ 673 #ifdef U_ELF 674 /* set EM_386 because elf.h does not provide better defaults */ 675 *pCPU=EM_386; 676 *pBits=32; 677 *pIsBigEndian=(UBool)(U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB); 678 #elif U_PLATFORM_HAS_WIN32_API 679 /* _M_IA64 should be defined in windows.h */ 680 # if defined(_M_IA64) 681 *pCPU=IMAGE_FILE_MACHINE_IA64; 682 # elif defined(_M_AMD64) 683 *pCPU=IMAGE_FILE_MACHINE_AMD64; 684 # else 685 *pCPU=IMAGE_FILE_MACHINE_I386; 686 # endif 687 *pBits= *pCPU==IMAGE_FILE_MACHINE_I386 ? 32 : 64; 688 *pIsBigEndian=FALSE; 689 #else 690 # error "Unknown platform for CAN_GENERATE_OBJECTS." 691 #endif 692 return; 693 } 694 695 in=T_FileStream_open(filename, "rb"); 696 if(in==NULL) { 697 fprintf(stderr, "genccode: unable to open match-arch file %s\n", filename); 698 exit(U_FILE_ACCESS_ERROR); 699 } 700 length=T_FileStream_read(in, buffer.bytes, sizeof(buffer.bytes)); 701 702 #ifdef U_ELF 703 if(length<sizeof(Elf32_Ehdr)) { 704 fprintf(stderr, "genccode: match-arch file %s is too short\n", filename); 705 exit(U_UNSUPPORTED_ERROR); 706 } 707 if( 708 buffer.header32.e_ident[0]!=ELFMAG0 || 709 buffer.header32.e_ident[1]!=ELFMAG1 || 710 buffer.header32.e_ident[2]!=ELFMAG2 || 711 buffer.header32.e_ident[3]!=ELFMAG3 || 712 buffer.header32.e_ident[EI_CLASS]<ELFCLASS32 || buffer.header32.e_ident[EI_CLASS]>ELFCLASS64 713 ) { 714 fprintf(stderr, "genccode: match-arch file %s is not an ELF object file, or not supported\n", filename); 715 exit(U_UNSUPPORTED_ERROR); 716 } 717 718 *pBits= buffer.header32.e_ident[EI_CLASS]==ELFCLASS32 ? 32 : 64; /* only 32 or 64: see check above */ 719 #ifdef U_ELF64 720 if(*pBits!=32 && *pBits!=64) { 721 fprintf(stderr, "genccode: currently only supports 32-bit and 64-bit ELF format\n"); 722 exit(U_UNSUPPORTED_ERROR); 723 } 724 #else 725 if(*pBits!=32) { 726 fprintf(stderr, "genccode: built with elf.h missing 64-bit definitions\n"); 727 exit(U_UNSUPPORTED_ERROR); 728 } 729 #endif 730 731 *pIsBigEndian=(UBool)(buffer.header32.e_ident[EI_DATA]==ELFDATA2MSB); 732 if(*pIsBigEndian!=U_IS_BIG_ENDIAN) { 733 fprintf(stderr, "genccode: currently only same-endianness ELF formats are supported\n"); 734 exit(U_UNSUPPORTED_ERROR); 735 } 736 /* TODO: Support byte swapping */ 737 738 *pCPU=buffer.header32.e_machine; 739 #elif U_PLATFORM_HAS_WIN32_API 740 if(length<sizeof(IMAGE_FILE_HEADER)) { 741 fprintf(stderr, "genccode: match-arch file %s is too short\n", filename); 742 exit(U_UNSUPPORTED_ERROR); 743 } 744 /* TODO: Use buffer.header. Keep aliasing legal. */ 745 pHeader=(const IMAGE_FILE_HEADER *)buffer.bytes; 746 *pCPU=pHeader->Machine; 747 /* 748 * The number of bits is implicit with the Machine value. 749 * *pBits is ignored in the calling code, so this need not be precise. 750 */ 751 *pBits= *pCPU==IMAGE_FILE_MACHINE_I386 ? 32 : 64; 752 /* Windows always runs on little-endian CPUs. */ 753 *pIsBigEndian=FALSE; 754 #else 755 # error "Unknown platform for CAN_GENERATE_OBJECTS." 756 #endif 757 758 T_FileStream_close(in); 759 } 760 761 U_CAPI void U_EXPORT2 762 writeObjectCode(const char *filename, const char *destdir, const char *optEntryPoint, const char *optMatchArch, const char *optFilename, char *outFilePath) { 763 /* common variables */ 764 char buffer[4096], entry[40]={ 0 }; 765 FileStream *in, *out; 766 const char *newSuffix; 767 int32_t i, entryLength, length, size, entryOffset=0, entryLengthOffset=0; 768 769 uint16_t cpu, bits; 770 UBool makeBigEndian; 771 772 /* platform-specific variables and initialization code */ 773 #ifdef U_ELF 774 /* 32-bit Elf file header */ 775 static Elf32_Ehdr header32={ 776 { 777 /* e_ident[] */ 778 ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3, 779 ELFCLASS32, 780 U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB, 781 EV_CURRENT /* EI_VERSION */ 782 }, 783 ET_REL, 784 EM_386, 785 EV_CURRENT, /* e_version */ 786 0, /* e_entry */ 787 0, /* e_phoff */ 788 (Elf32_Off)sizeof(Elf32_Ehdr), /* e_shoff */ 789 0, /* e_flags */ 790 (Elf32_Half)sizeof(Elf32_Ehdr), /* eh_size */ 791 0, /* e_phentsize */ 792 0, /* e_phnum */ 793 (Elf32_Half)sizeof(Elf32_Shdr), /* e_shentsize */ 794 5, /* e_shnum */ 795 2 /* e_shstrndx */ 796 }; 797 798 /* 32-bit Elf section header table */ 799 static Elf32_Shdr sectionHeaders32[5]={ 800 { /* SHN_UNDEF */ 801 0 802 }, 803 { /* .symtab */ 804 1, /* sh_name */ 805 SHT_SYMTAB, 806 0, /* sh_flags */ 807 0, /* sh_addr */ 808 (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)), /* sh_offset */ 809 (Elf32_Word)(2*sizeof(Elf32_Sym)), /* sh_size */ 810 3, /* sh_link=sect hdr index of .strtab */ 811 1, /* sh_info=One greater than the symbol table index of the last 812 * local symbol (with STB_LOCAL). */ 813 4, /* sh_addralign */ 814 (Elf32_Word)(sizeof(Elf32_Sym)) /* sh_entsize */ 815 }, 816 { /* .shstrtab */ 817 9, /* sh_name */ 818 SHT_STRTAB, 819 0, /* sh_flags */ 820 0, /* sh_addr */ 821 (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)), /* sh_offset */ 822 40, /* sh_size */ 823 0, /* sh_link */ 824 0, /* sh_info */ 825 1, /* sh_addralign */ 826 0 /* sh_entsize */ 827 }, 828 { /* .strtab */ 829 19, /* sh_name */ 830 SHT_STRTAB, 831 0, /* sh_flags */ 832 0, /* sh_addr */ 833 (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)+40), /* sh_offset */ 834 (Elf32_Word)sizeof(entry), /* sh_size */ 835 0, /* sh_link */ 836 0, /* sh_info */ 837 1, /* sh_addralign */ 838 0 /* sh_entsize */ 839 }, 840 { /* .rodata */ 841 27, /* sh_name */ 842 SHT_PROGBITS, 843 SHF_ALLOC, /* sh_flags */ 844 0, /* sh_addr */ 845 (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)+40+sizeof(entry)), /* sh_offset */ 846 0, /* sh_size */ 847 0, /* sh_link */ 848 0, /* sh_info */ 849 16, /* sh_addralign */ 850 0 /* sh_entsize */ 851 } 852 }; 853 854 /* symbol table */ 855 static Elf32_Sym symbols32[2]={ 856 { /* STN_UNDEF */ 857 0 858 }, 859 { /* data entry point */ 860 1, /* st_name */ 861 0, /* st_value */ 862 0, /* st_size */ 863 ELF64_ST_INFO(STB_GLOBAL, STT_OBJECT), 864 0, /* st_other */ 865 4 /* st_shndx=index of related section table entry */ 866 } 867 }; 868 869 /* section header string table, with decimal string offsets */ 870 static const char sectionStrings[40]= 871 /* 0 */ "\0" 872 /* 1 */ ".symtab\0" 873 /* 9 */ ".shstrtab\0" 874 /* 19 */ ".strtab\0" 875 /* 27 */ ".rodata\0" 876 /* 35 */ "\0\0\0\0"; /* contains terminating NUL */ 877 /* 40: padded to multiple of 8 bytes */ 878 879 /* 880 * Use entry[] for the string table which will contain only the 881 * entry point name. 882 * entry[0] must be 0 (NUL) 883 * The entry point name can be up to 38 characters long (sizeof(entry)-2). 884 */ 885 886 /* 16-align .rodata in the .o file, just in case */ 887 static const char padding[16]={ 0 }; 888 int32_t paddingSize; 889 890 #ifdef U_ELF64 891 /* 64-bit Elf file header */ 892 static Elf64_Ehdr header64={ 893 { 894 /* e_ident[] */ 895 ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3, 896 ELFCLASS64, 897 U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB, 898 EV_CURRENT /* EI_VERSION */ 899 }, 900 ET_REL, 901 EM_X86_64, 902 EV_CURRENT, /* e_version */ 903 0, /* e_entry */ 904 0, /* e_phoff */ 905 (Elf64_Off)sizeof(Elf64_Ehdr), /* e_shoff */ 906 0, /* e_flags */ 907 (Elf64_Half)sizeof(Elf64_Ehdr), /* eh_size */ 908 0, /* e_phentsize */ 909 0, /* e_phnum */ 910 (Elf64_Half)sizeof(Elf64_Shdr), /* e_shentsize */ 911 5, /* e_shnum */ 912 2 /* e_shstrndx */ 913 }; 914 915 /* 64-bit Elf section header table */ 916 static Elf64_Shdr sectionHeaders64[5]={ 917 { /* SHN_UNDEF */ 918 0 919 }, 920 { /* .symtab */ 921 1, /* sh_name */ 922 SHT_SYMTAB, 923 0, /* sh_flags */ 924 0, /* sh_addr */ 925 (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)), /* sh_offset */ 926 (Elf64_Xword)(2*sizeof(Elf64_Sym)), /* sh_size */ 927 3, /* sh_link=sect hdr index of .strtab */ 928 1, /* sh_info=One greater than the symbol table index of the last 929 * local symbol (with STB_LOCAL). */ 930 4, /* sh_addralign */ 931 (Elf64_Xword)(sizeof(Elf64_Sym)) /* sh_entsize */ 932 }, 933 { /* .shstrtab */ 934 9, /* sh_name */ 935 SHT_STRTAB, 936 0, /* sh_flags */ 937 0, /* sh_addr */ 938 (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)), /* sh_offset */ 939 40, /* sh_size */ 940 0, /* sh_link */ 941 0, /* sh_info */ 942 1, /* sh_addralign */ 943 0 /* sh_entsize */ 944 }, 945 { /* .strtab */ 946 19, /* sh_name */ 947 SHT_STRTAB, 948 0, /* sh_flags */ 949 0, /* sh_addr */ 950 (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)+40), /* sh_offset */ 951 (Elf64_Xword)sizeof(entry), /* sh_size */ 952 0, /* sh_link */ 953 0, /* sh_info */ 954 1, /* sh_addralign */ 955 0 /* sh_entsize */ 956 }, 957 { /* .rodata */ 958 27, /* sh_name */ 959 SHT_PROGBITS, 960 SHF_ALLOC, /* sh_flags */ 961 0, /* sh_addr */ 962 (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)+40+sizeof(entry)), /* sh_offset */ 963 0, /* sh_size */ 964 0, /* sh_link */ 965 0, /* sh_info */ 966 16, /* sh_addralign */ 967 0 /* sh_entsize */ 968 } 969 }; 970 971 /* 972 * 64-bit symbol table 973 * careful: different order of items compared with Elf32_sym! 974 */ 975 static Elf64_Sym symbols64[2]={ 976 { /* STN_UNDEF */ 977 0 978 }, 979 { /* data entry point */ 980 1, /* st_name */ 981 ELF64_ST_INFO(STB_GLOBAL, STT_OBJECT), 982 0, /* st_other */ 983 4, /* st_shndx=index of related section table entry */ 984 0, /* st_value */ 985 0 /* st_size */ 986 } 987 }; 988 989 #endif /* U_ELF64 */ 990 991 /* entry[] have a leading NUL */ 992 entryOffset=1; 993 994 /* in the common code, count entryLength from after the NUL */ 995 entryLengthOffset=1; 996 997 newSuffix=".o"; 998 999 #elif U_PLATFORM_HAS_WIN32_API 1000 struct { 1001 IMAGE_FILE_HEADER fileHeader; 1002 IMAGE_SECTION_HEADER sections[2]; 1003 char linkerOptions[100]; 1004 } objHeader; 1005 IMAGE_SYMBOL symbols[1]; 1006 struct { 1007 DWORD sizeofLongNames; 1008 char longNames[100]; 1009 } symbolNames; 1010 1011 /* 1012 * entry sometimes have a leading '_' 1013 * overwritten if entryOffset==0 depending on the target platform 1014 * see check for cpu below 1015 */ 1016 entry[0]='_'; 1017 1018 newSuffix=".obj"; 1019 #else 1020 # error "Unknown platform for CAN_GENERATE_OBJECTS." 1021 #endif 1022 1023 /* deal with options, files and the entry point name */ 1024 getArchitecture(&cpu, &bits, &makeBigEndian, optMatchArch); 1025 printf("genccode: --match-arch cpu=%hu bits=%hu big-endian=%d\n", cpu, bits, makeBigEndian); 1026 #if U_PLATFORM_HAS_WIN32_API 1027 if(cpu==IMAGE_FILE_MACHINE_I386) { 1028 entryOffset=1; 1029 } 1030 #endif 1031 1032 in=T_FileStream_open(filename, "rb"); 1033 if(in==NULL) { 1034 fprintf(stderr, "genccode: unable to open input file %s\n", filename); 1035 exit(U_FILE_ACCESS_ERROR); 1036 } 1037 size=T_FileStream_size(in); 1038 1039 getOutFilename(filename, destdir, buffer, entry+entryOffset, newSuffix, optFilename); 1040 if (outFilePath != NULL) { 1041 uprv_strcpy(outFilePath, buffer); 1042 } 1043 1044 if(optEntryPoint != NULL) { 1045 uprv_strcpy(entry+entryOffset, optEntryPoint); 1046 uprv_strcat(entry+entryOffset, "_dat"); 1047 } 1048 /* turn dashes in the entry name into underscores */ 1049 entryLength=(int32_t)uprv_strlen(entry+entryLengthOffset); 1050 for(i=0; i<entryLength; ++i) { 1051 if(entry[entryLengthOffset+i]=='-') { 1052 entry[entryLengthOffset+i]='_'; 1053 } 1054 } 1055 1056 /* open the output file */ 1057 out=T_FileStream_open(buffer, "wb"); 1058 if(out==NULL) { 1059 fprintf(stderr, "genccode: unable to open output file %s\n", buffer); 1060 exit(U_FILE_ACCESS_ERROR); 1061 } 1062 1063 #ifdef U_ELF 1064 if(bits==32) { 1065 header32.e_ident[EI_DATA]= makeBigEndian ? ELFDATA2MSB : ELFDATA2LSB; 1066 header32.e_machine=cpu; 1067 1068 /* 16-align .rodata in the .o file, just in case */ 1069 paddingSize=sectionHeaders32[4].sh_offset & 0xf; 1070 if(paddingSize!=0) { 1071 paddingSize=0x10-paddingSize; 1072 sectionHeaders32[4].sh_offset+=paddingSize; 1073 } 1074 1075 sectionHeaders32[4].sh_size=(Elf32_Word)size; 1076 1077 symbols32[1].st_size=(Elf32_Word)size; 1078 1079 /* write .o headers */ 1080 T_FileStream_write(out, &header32, (int32_t)sizeof(header32)); 1081 T_FileStream_write(out, sectionHeaders32, (int32_t)sizeof(sectionHeaders32)); 1082 T_FileStream_write(out, symbols32, (int32_t)sizeof(symbols32)); 1083 } else /* bits==64 */ { 1084 #ifdef U_ELF64 1085 header64.e_ident[EI_DATA]= makeBigEndian ? ELFDATA2MSB : ELFDATA2LSB; 1086 header64.e_machine=cpu; 1087 1088 /* 16-align .rodata in the .o file, just in case */ 1089 paddingSize=sectionHeaders64[4].sh_offset & 0xf; 1090 if(paddingSize!=0) { 1091 paddingSize=0x10-paddingSize; 1092 sectionHeaders64[4].sh_offset+=paddingSize; 1093 } 1094 1095 sectionHeaders64[4].sh_size=(Elf64_Xword)size; 1096 1097 symbols64[1].st_size=(Elf64_Xword)size; 1098 1099 /* write .o headers */ 1100 T_FileStream_write(out, &header64, (int32_t)sizeof(header64)); 1101 T_FileStream_write(out, sectionHeaders64, (int32_t)sizeof(sectionHeaders64)); 1102 T_FileStream_write(out, symbols64, (int32_t)sizeof(symbols64)); 1103 #endif 1104 } 1105 1106 T_FileStream_write(out, sectionStrings, (int32_t)sizeof(sectionStrings)); 1107 T_FileStream_write(out, entry, (int32_t)sizeof(entry)); 1108 if(paddingSize!=0) { 1109 T_FileStream_write(out, padding, paddingSize); 1110 } 1111 #elif U_PLATFORM_HAS_WIN32_API 1112 /* populate the .obj headers */ 1113 uprv_memset(&objHeader, 0, sizeof(objHeader)); 1114 uprv_memset(&symbols, 0, sizeof(symbols)); 1115 uprv_memset(&symbolNames, 0, sizeof(symbolNames)); 1116 1117 /* write the linker export directive */ 1118 uprv_strcpy(objHeader.linkerOptions, "-export:"); 1119 length=8; 1120 uprv_strcpy(objHeader.linkerOptions+length, entry); 1121 length+=entryLength; 1122 uprv_strcpy(objHeader.linkerOptions+length, ",data "); 1123 length+=6; 1124 1125 /* set the file header */ 1126 objHeader.fileHeader.Machine=cpu; 1127 objHeader.fileHeader.NumberOfSections=2; 1128 objHeader.fileHeader.TimeDateStamp=(DWORD)time(NULL); 1129 objHeader.fileHeader.PointerToSymbolTable=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER+length+size; /* start of symbol table */ 1130 objHeader.fileHeader.NumberOfSymbols=1; 1131 1132 /* set the section for the linker options */ 1133 uprv_strncpy((char *)objHeader.sections[0].Name, ".drectve", 8); 1134 objHeader.sections[0].SizeOfRawData=length; 1135 objHeader.sections[0].PointerToRawData=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER; 1136 objHeader.sections[0].Characteristics=IMAGE_SCN_LNK_INFO|IMAGE_SCN_LNK_REMOVE|IMAGE_SCN_ALIGN_1BYTES; 1137 1138 /* set the data section */ 1139 uprv_strncpy((char *)objHeader.sections[1].Name, ".rdata", 6); 1140 objHeader.sections[1].SizeOfRawData=size; 1141 objHeader.sections[1].PointerToRawData=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER+length; 1142 objHeader.sections[1].Characteristics=IMAGE_SCN_CNT_INITIALIZED_DATA|IMAGE_SCN_ALIGN_16BYTES|IMAGE_SCN_MEM_READ; 1143 1144 /* set the symbol table */ 1145 if(entryLength<=8) { 1146 uprv_strncpy((char *)symbols[0].N.ShortName, entry, entryLength); 1147 symbolNames.sizeofLongNames=4; 1148 } else { 1149 symbols[0].N.Name.Short=0; 1150 symbols[0].N.Name.Long=4; 1151 symbolNames.sizeofLongNames=4+entryLength+1; 1152 uprv_strcpy(symbolNames.longNames, entry); 1153 } 1154 symbols[0].SectionNumber=2; 1155 symbols[0].StorageClass=IMAGE_SYM_CLASS_EXTERNAL; 1156 1157 /* write the file header and the linker options section */ 1158 T_FileStream_write(out, &objHeader, objHeader.sections[1].PointerToRawData); 1159 #else 1160 # error "Unknown platform for CAN_GENERATE_OBJECTS." 1161 #endif 1162 1163 /* copy the data file into section 2 */ 1164 for(;;) { 1165 length=T_FileStream_read(in, buffer, sizeof(buffer)); 1166 if(length==0) { 1167 break; 1168 } 1169 T_FileStream_write(out, buffer, (int32_t)length); 1170 } 1171 1172 #if U_PLATFORM_HAS_WIN32_API 1173 /* write the symbol table */ 1174 T_FileStream_write(out, symbols, IMAGE_SIZEOF_SYMBOL); 1175 T_FileStream_write(out, &symbolNames, symbolNames.sizeofLongNames); 1176 #endif 1177 1178 if(T_FileStream_error(in)) { 1179 fprintf(stderr, "genccode: file read error while generating from file %s\n", filename); 1180 exit(U_FILE_ACCESS_ERROR); 1181 } 1182 1183 if(T_FileStream_error(out)) { 1184 fprintf(stderr, "genccode: file write error while generating from file %s\n", filename); 1185 exit(U_FILE_ACCESS_ERROR); 1186 } 1187 1188 T_FileStream_close(out); 1189 T_FileStream_close(in); 1190 } 1191 #endif 1192