1 /****************************************************************************** 2 * Copyright (C) 2009-2012, International Business Machines 3 * Corporation and others. All Rights Reserved. 4 ******************************************************************************* 5 */ 6 #include "unicode/utypes.h" 7 8 #if U_PLATFORM_HAS_WIN32_API 9 # define VC_EXTRALEAN 10 # define WIN32_LEAN_AND_MEAN 11 # define NOUSER 12 # define NOSERVICE 13 # define NOIME 14 # define NOMCX 15 #include <windows.h> 16 #include <time.h> 17 # ifdef __GNUC__ 18 # define WINDOWS_WITH_GNUC 19 # endif 20 #endif 21 22 #if U_PLATFORM_IS_LINUX_BASED 23 # define U_ELF 24 #endif 25 26 #ifdef U_ELF 27 # include <elf.h> 28 # if defined(ELFCLASS64) 29 # define U_ELF64 30 # endif 31 /* Old elf.h headers may not have EM_X86_64, or have EM_X8664 instead. */ 32 # ifndef EM_X86_64 33 # define EM_X86_64 62 34 # endif 35 # define ICU_ENTRY_OFFSET 0 36 #endif 37 38 #include <stdio.h> 39 #include <stdlib.h> 40 #include "unicode/putil.h" 41 #include "cmemory.h" 42 #include "cstring.h" 43 #include "filestrm.h" 44 #include "toolutil.h" 45 #include "unicode/uclean.h" 46 #include "uoptions.h" 47 #include "pkg_genc.h" 48 49 #define MAX_COLUMN ((uint32_t)(0xFFFFFFFFU)) 50 51 #define HEX_0X 0 /* 0x1234 */ 52 #define HEX_0H 1 /* 01234h */ 53 54 /* 55 * The following is needed by MinGW64 56 */ 57 #ifndef __USER_LABEL_PREFIX__ 58 #define __USER_LABEL_PREFIX__ _ 59 #endif 60 #define GCC_LABEL_PREFIX_INTERNAL(a) #a 61 #define GCC_LABEL_PREFIX(a) GCC_LABEL_PREFIX_INTERNAL(a) 62 63 /* prototypes --------------------------------------------------------------- */ 64 static void 65 getOutFilename(const char *inFilename, const char *destdir, char *outFilename, char *entryName, const char *newSuffix, const char *optFilename); 66 67 static uint32_t 68 write8(FileStream *out, uint8_t byte, uint32_t column); 69 70 static uint32_t 71 write32(FileStream *out, uint32_t byte, uint32_t column); 72 73 #if U_PLATFORM == U_PF_OS400 74 static uint32_t 75 write8str(FileStream *out, uint8_t byte, uint32_t column); 76 #endif 77 /* -------------------------------------------------------------------------- */ 78 79 /* 80 Creating Template Files for New Platforms 81 82 Let the cc compiler help you get started. 83 Compile this program 84 const unsigned int x[5] = {1, 2, 0xdeadbeef, 0xffffffff, 16}; 85 with the -S option to produce assembly output. 86 87 For example, this will generate array.s: 88 gcc -S array.c 89 90 This will produce a .s file that may look like this: 91 92 .file "array.c" 93 .version "01.01" 94 gcc2_compiled.: 95 .globl x 96 .section .rodata 97 .align 4 98 .type x,@object 99 .size x,20 100 x: 101 .long 1 102 .long 2 103 .long -559038737 104 .long -1 105 .long 16 106 .ident "GCC: (GNU) 2.96 20000731 (Red Hat Linux 7.1 2.96-85)" 107 108 which gives a starting point that will compile, and can be transformed 109 to become the template, generally with some consulting of as docs and 110 some experimentation. 111 112 If you want ICU to automatically use this assembly, you should 113 specify "GENCCODE_ASSEMBLY=-a name" in the specific config/mh-* file, 114 where the name is the compiler or platform that you used in this 115 assemblyHeader data structure. 116 */ 117 static const struct AssemblyType { 118 const char *name; 119 const char *header; 120 const char *beginLine; 121 const char *footer; 122 int8_t hexType; /* HEX_0X or HEX_0h */ 123 } assemblyHeader[] = { 124 {"gcc", 125 ".globl %s\n" 126 "\t.section .note.GNU-stack,\"\",%%progbits\n" 127 "\t.section .rodata\n" 128 "\t.align 8\n" /* Either align 8 bytes or 2^8 (256) bytes. 8 bytes is needed. */ 129 "\t.type %s,%%object\n" 130 "%s:\n\n", 131 132 ".long ","",HEX_0X 133 }, 134 {"gcc-darwin", 135 /*"\t.section __TEXT,__text,regular,pure_instructions\n" 136 "\t.section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32\n"*/ 137 ".globl _%s\n" 138 "\t.data\n" 139 "\t.const\n" 140 "\t.align 4\n" /* 1<<4 = 16 */ 141 "_%s:\n\n", 142 143 ".long ","",HEX_0X 144 }, 145 {"gcc-cygwin", 146 ".globl "GCC_LABEL_PREFIX(__USER_LABEL_PREFIX__) "%s\n" 147 "\t.section .rodata\n" 148 "\t.align 8\n" /* Either align 8 bytes or 2^8 (256) bytes. 8 bytes is needed. */ 149 GCC_LABEL_PREFIX(__USER_LABEL_PREFIX__) "%s:\n\n", 150 151 ".long ","",HEX_0X 152 }, 153 {"sun", 154 "\t.section \".rodata\"\n" 155 "\t.align 8\n" 156 ".globl %s\n" 157 "%s:\n", 158 159 ".word ","",HEX_0X 160 }, 161 {"sun-x86", 162 "Drodata.rodata:\n" 163 "\t.type Drodata.rodata,@object\n" 164 "\t.size Drodata.rodata,0\n" 165 "\t.globl %s\n" 166 "\t.align 8\n" 167 "%s:\n", 168 169 ".4byte ","",HEX_0X 170 }, 171 {"xlc", 172 ".globl %s{RO}\n" 173 "\t.toc\n" 174 "%s:\n" 175 "\t.csect %s{RO}, 4\n", 176 177 ".long ","",HEX_0X 178 }, 179 {"aCC-ia64", 180 "\t.file \"%s.s\"\n" 181 "\t.type %s,@object\n" 182 "\t.global %s\n" 183 "\t.secalias .abe$0.rodata, \".rodata\"\n" 184 "\t.section .abe$0.rodata = \"a\", \"progbits\"\n" 185 "\t.align 16\n" 186 "%s::\t", 187 188 "data4 ","",HEX_0X 189 }, 190 {"aCC-parisc", 191 "\t.SPACE $TEXT$\n" 192 "\t.SUBSPA $LIT$\n" 193 "%s\n" 194 "\t.EXPORT %s\n" 195 "\t.ALIGN 16\n", 196 197 ".WORD ","",HEX_0X 198 }, 199 { "masm", 200 "\tTITLE %s\n" 201 "; generated by genccode\n" 202 ".386\n" 203 ".model flat\n" 204 "\tPUBLIC _%s\n" 205 "ICUDATA_%s\tSEGMENT READONLY PARA PUBLIC FLAT 'DATA'\n" 206 "\tALIGN 16\n" 207 "_%s\tLABEL DWORD\n", 208 "\tDWORD ","\nICUDATA_%s\tENDS\n\tEND\n",HEX_0H 209 } 210 }; 211 212 static int32_t assemblyHeaderIndex = -1; 213 static int32_t hexType = HEX_0X; 214 215 U_CAPI UBool U_EXPORT2 216 checkAssemblyHeaderName(const char* optAssembly) { 217 int32_t idx; 218 assemblyHeaderIndex = -1; 219 for (idx = 0; idx < (int32_t)(sizeof(assemblyHeader)/sizeof(assemblyHeader[0])); idx++) { 220 if (uprv_strcmp(optAssembly, assemblyHeader[idx].name) == 0) { 221 assemblyHeaderIndex = idx; 222 hexType = assemblyHeader[idx].hexType; /* set the hex type */ 223 return TRUE; 224 } 225 } 226 227 return FALSE; 228 } 229 230 231 U_CAPI void U_EXPORT2 232 printAssemblyHeadersToStdErr(void) { 233 int32_t idx; 234 fprintf(stderr, "%s", assemblyHeader[0].name); 235 for (idx = 1; idx < (int32_t)(sizeof(assemblyHeader)/sizeof(assemblyHeader[0])); idx++) { 236 fprintf(stderr, ", %s", assemblyHeader[idx].name); 237 } 238 fprintf(stderr, 239 ")\n"); 240 } 241 242 U_CAPI void U_EXPORT2 243 writeAssemblyCode(const char *filename, const char *destdir, const char *optEntryPoint, const char *optFilename, char *outFilePath) { 244 uint32_t column = MAX_COLUMN; 245 char entry[64]; 246 uint32_t buffer[1024]; 247 char *bufferStr = (char *)buffer; 248 FileStream *in, *out; 249 size_t i, length; 250 251 in=T_FileStream_open(filename, "rb"); 252 if(in==NULL) { 253 fprintf(stderr, "genccode: unable to open input file %s\n", filename); 254 exit(U_FILE_ACCESS_ERROR); 255 } 256 257 getOutFilename(filename, destdir, bufferStr, entry, ".s", optFilename); 258 out=T_FileStream_open(bufferStr, "w"); 259 if(out==NULL) { 260 fprintf(stderr, "genccode: unable to open output file %s\n", bufferStr); 261 exit(U_FILE_ACCESS_ERROR); 262 } 263 264 if (outFilePath != NULL) { 265 uprv_strcpy(outFilePath, bufferStr); 266 } 267 268 #ifdef WINDOWS_WITH_GNUC 269 /* Need to fix the file seperator character when using MinGW. */ 270 swapFileSepChar(outFilePath, U_FILE_SEP_CHAR, '/'); 271 #endif 272 273 if(optEntryPoint != NULL) { 274 uprv_strcpy(entry, optEntryPoint); 275 uprv_strcat(entry, "_dat"); 276 } 277 278 /* turn dashes or dots in the entry name into underscores */ 279 length=uprv_strlen(entry); 280 for(i=0; i<length; ++i) { 281 if(entry[i]=='-' || entry[i]=='.') { 282 entry[i]='_'; 283 } 284 } 285 286 sprintf(bufferStr, assemblyHeader[assemblyHeaderIndex].header, 287 entry, entry, entry, entry, 288 entry, entry, entry, entry); 289 T_FileStream_writeLine(out, bufferStr); 290 T_FileStream_writeLine(out, assemblyHeader[assemblyHeaderIndex].beginLine); 291 292 for(;;) { 293 length=T_FileStream_read(in, buffer, sizeof(buffer)); 294 if(length==0) { 295 break; 296 } 297 if (length != sizeof(buffer)) { 298 /* pad with extra 0's when at the end of the file */ 299 for(i=0; i < (length % sizeof(uint32_t)); ++i) { 300 buffer[length+i] = 0; 301 } 302 } 303 for(i=0; i<(length/sizeof(buffer[0])); i++) { 304 column = write32(out, buffer[i], column); 305 } 306 } 307 308 T_FileStream_writeLine(out, "\n"); 309 310 sprintf(bufferStr, assemblyHeader[assemblyHeaderIndex].footer, 311 entry, entry, entry, entry, 312 entry, entry, entry, entry); 313 T_FileStream_writeLine(out, bufferStr); 314 315 if(T_FileStream_error(in)) { 316 fprintf(stderr, "genccode: file read error while generating from file %s\n", filename); 317 exit(U_FILE_ACCESS_ERROR); 318 } 319 320 if(T_FileStream_error(out)) { 321 fprintf(stderr, "genccode: file write error while generating from file %s\n", filename); 322 exit(U_FILE_ACCESS_ERROR); 323 } 324 325 T_FileStream_close(out); 326 T_FileStream_close(in); 327 } 328 329 U_CAPI void U_EXPORT2 330 writeCCode(const char *filename, const char *destdir, const char *optName, const char *optFilename, char *outFilePath) { 331 uint32_t column = MAX_COLUMN; 332 char buffer[4096], entry[64]; 333 FileStream *in, *out; 334 size_t i, length; 335 336 in=T_FileStream_open(filename, "rb"); 337 if(in==NULL) { 338 fprintf(stderr, "genccode: unable to open input file %s\n", filename); 339 exit(U_FILE_ACCESS_ERROR); 340 } 341 342 if(optName != NULL) { /* prepend 'icudt28_' */ 343 strcpy(entry, optName); 344 strcat(entry, "_"); 345 } else { 346 entry[0] = 0; 347 } 348 349 getOutFilename(filename, destdir, buffer, entry+uprv_strlen(entry), ".c", optFilename); 350 if (outFilePath != NULL) { 351 uprv_strcpy(outFilePath, buffer); 352 } 353 out=T_FileStream_open(buffer, "w"); 354 if(out==NULL) { 355 fprintf(stderr, "genccode: unable to open output file %s\n", buffer); 356 exit(U_FILE_ACCESS_ERROR); 357 } 358 359 /* turn dashes or dots in the entry name into underscores */ 360 length=uprv_strlen(entry); 361 for(i=0; i<length; ++i) { 362 if(entry[i]=='-' || entry[i]=='.') { 363 entry[i]='_'; 364 } 365 } 366 367 #if U_PLATFORM == U_PF_OS400 368 /* 369 TODO: Fix this once the compiler implements this feature. Keep in sync with udatamem.c 370 371 This is here because this platform can't currently put 372 const data into the read-only pages of an object or 373 shared library (service program). Only strings are allowed in read-only 374 pages, so we use char * strings to store the data. 375 376 In order to prevent the beginning of the data from ever matching the 377 magic numbers we must still use the initial double. 378 [grhoten 4/24/2003] 379 */ 380 sprintf(buffer, 381 "#ifndef IN_GENERATED_CCODE\n" 382 "#define IN_GENERATED_CCODE\n" 383 "#define U_DISABLE_RENAMING 1\n" 384 "#include \"unicode/umachine.h\"\n" 385 "#endif\n" 386 "U_CDECL_BEGIN\n" 387 "const struct {\n" 388 " double bogus;\n" 389 " const char *bytes; \n" 390 "} %s={ 0.0, \n", 391 entry); 392 T_FileStream_writeLine(out, buffer); 393 394 for(;;) { 395 length=T_FileStream_read(in, buffer, sizeof(buffer)); 396 if(length==0) { 397 break; 398 } 399 for(i=0; i<length; ++i) { 400 column = write8str(out, (uint8_t)buffer[i], column); 401 } 402 } 403 404 T_FileStream_writeLine(out, "\"\n};\nU_CDECL_END\n"); 405 #else 406 /* Function renaming shouldn't be done in data */ 407 sprintf(buffer, 408 "#ifndef IN_GENERATED_CCODE\n" 409 "#define IN_GENERATED_CCODE\n" 410 "#define U_DISABLE_RENAMING 1\n" 411 "#include \"unicode/umachine.h\"\n" 412 "#endif\n" 413 "U_CDECL_BEGIN\n" 414 "const struct {\n" 415 " double bogus;\n" 416 " uint8_t bytes[%ld]; \n" 417 "} %s={ 0.0, {\n", 418 (long)T_FileStream_size(in), entry); 419 T_FileStream_writeLine(out, buffer); 420 421 for(;;) { 422 length=T_FileStream_read(in, buffer, sizeof(buffer)); 423 if(length==0) { 424 break; 425 } 426 for(i=0; i<length; ++i) { 427 column = write8(out, (uint8_t)buffer[i], column); 428 } 429 } 430 431 T_FileStream_writeLine(out, "\n}\n};\nU_CDECL_END\n"); 432 #endif 433 434 if(T_FileStream_error(in)) { 435 fprintf(stderr, "genccode: file read error while generating from file %s\n", filename); 436 exit(U_FILE_ACCESS_ERROR); 437 } 438 439 if(T_FileStream_error(out)) { 440 fprintf(stderr, "genccode: file write error while generating from file %s\n", filename); 441 exit(U_FILE_ACCESS_ERROR); 442 } 443 444 T_FileStream_close(out); 445 T_FileStream_close(in); 446 } 447 448 static uint32_t 449 write32(FileStream *out, uint32_t bitField, uint32_t column) { 450 int32_t i; 451 char bitFieldStr[64]; /* This is more bits than needed for a 32-bit number */ 452 char *s = bitFieldStr; 453 uint8_t *ptrIdx = (uint8_t *)&bitField; 454 static const char hexToStr[16] = { 455 '0','1','2','3', 456 '4','5','6','7', 457 '8','9','A','B', 458 'C','D','E','F' 459 }; 460 461 /* write the value, possibly with comma and newline */ 462 if(column==MAX_COLUMN) { 463 /* first byte */ 464 column=1; 465 } else if(column<32) { 466 *(s++)=','; 467 ++column; 468 } else { 469 *(s++)='\n'; 470 uprv_strcpy(s, assemblyHeader[assemblyHeaderIndex].beginLine); 471 s+=uprv_strlen(s); 472 column=1; 473 } 474 475 if (bitField < 10) { 476 /* It's a small number. Don't waste the space for 0x */ 477 *(s++)=hexToStr[bitField]; 478 } 479 else { 480 int seenNonZero = 0; /* This is used to remove leading zeros */ 481 482 if(hexType==HEX_0X) { 483 *(s++)='0'; 484 *(s++)='x'; 485 } else if(hexType==HEX_0H) { 486 *(s++)='0'; 487 } 488 489 /* This creates a 32-bit field */ 490 #if U_IS_BIG_ENDIAN 491 for (i = 0; i < sizeof(uint32_t); i++) 492 #else 493 for (i = sizeof(uint32_t)-1; i >= 0 ; i--) 494 #endif 495 { 496 uint8_t value = ptrIdx[i]; 497 if (value || seenNonZero) { 498 *(s++)=hexToStr[value>>4]; 499 *(s++)=hexToStr[value&0xF]; 500 seenNonZero = 1; 501 } 502 } 503 if(hexType==HEX_0H) { 504 *(s++)='h'; 505 } 506 } 507 508 *(s++)=0; 509 T_FileStream_writeLine(out, bitFieldStr); 510 return column; 511 } 512 513 static uint32_t 514 write8(FileStream *out, uint8_t byte, uint32_t column) { 515 char s[4]; 516 int i=0; 517 518 /* convert the byte value to a string */ 519 if(byte>=100) { 520 s[i++]=(char)('0'+byte/100); 521 byte%=100; 522 } 523 if(i>0 || byte>=10) { 524 s[i++]=(char)('0'+byte/10); 525 byte%=10; 526 } 527 s[i++]=(char)('0'+byte); 528 s[i]=0; 529 530 /* write the value, possibly with comma and newline */ 531 if(column==MAX_COLUMN) { 532 /* first byte */ 533 column=1; 534 } else if(column<16) { 535 T_FileStream_writeLine(out, ","); 536 ++column; 537 } else { 538 T_FileStream_writeLine(out, ",\n"); 539 column=1; 540 } 541 T_FileStream_writeLine(out, s); 542 return column; 543 } 544 545 #if U_PLATFORM == U_PF_OS400 546 static uint32_t 547 write8str(FileStream *out, uint8_t byte, uint32_t column) { 548 char s[8]; 549 550 if (byte > 7) 551 sprintf(s, "\\x%X", byte); 552 else 553 sprintf(s, "\\%X", byte); 554 555 /* write the value, possibly with comma and newline */ 556 if(column==MAX_COLUMN) { 557 /* first byte */ 558 column=1; 559 T_FileStream_writeLine(out, "\""); 560 } else if(column<24) { 561 ++column; 562 } else { 563 T_FileStream_writeLine(out, "\"\n\""); 564 column=1; 565 } 566 T_FileStream_writeLine(out, s); 567 return column; 568 } 569 #endif 570 571 static void 572 getOutFilename(const char *inFilename, const char *destdir, char *outFilename, char *entryName, const char *newSuffix, const char *optFilename) { 573 const char *basename=findBasename(inFilename), *suffix=uprv_strrchr(basename, '.'); 574 575 /* copy path */ 576 if(destdir!=NULL && *destdir!=0) { 577 do { 578 *outFilename++=*destdir++; 579 } while(*destdir!=0); 580 if(*(outFilename-1)!=U_FILE_SEP_CHAR) { 581 *outFilename++=U_FILE_SEP_CHAR; 582 } 583 inFilename=basename; 584 } else { 585 while(inFilename<basename) { 586 *outFilename++=*inFilename++; 587 } 588 } 589 590 if(suffix==NULL) { 591 /* the filename does not have a suffix */ 592 uprv_strcpy(entryName, inFilename); 593 if(optFilename != NULL) { 594 uprv_strcpy(outFilename, optFilename); 595 } else { 596 uprv_strcpy(outFilename, inFilename); 597 } 598 uprv_strcat(outFilename, newSuffix); 599 } else { 600 char *saveOutFilename = outFilename; 601 /* copy basename */ 602 while(inFilename<suffix) { 603 if(*inFilename=='-') { 604 /* iSeries cannot have '-' in the .o objects. */ 605 *outFilename++=*entryName++='_'; 606 inFilename++; 607 } 608 else { 609 *outFilename++=*entryName++=*inFilename++; 610 } 611 } 612 613 /* replace '.' by '_' */ 614 *outFilename++=*entryName++='_'; 615 ++inFilename; 616 617 /* copy suffix */ 618 while(*inFilename!=0) { 619 *outFilename++=*entryName++=*inFilename++; 620 } 621 622 *entryName=0; 623 624 if(optFilename != NULL) { 625 uprv_strcpy(saveOutFilename, optFilename); 626 uprv_strcat(saveOutFilename, newSuffix); 627 } else { 628 /* add ".c" */ 629 uprv_strcpy(outFilename, newSuffix); 630 } 631 } 632 } 633 634 #ifdef CAN_GENERATE_OBJECTS 635 static void 636 getArchitecture(uint16_t *pCPU, uint16_t *pBits, UBool *pIsBigEndian, const char *optMatchArch) { 637 union { 638 char bytes[2048]; 639 #ifdef U_ELF 640 Elf32_Ehdr header32; 641 /* Elf32_Ehdr and ELF64_Ehdr are identical for the necessary fields. */ 642 #elif U_PLATFORM_HAS_WIN32_API 643 IMAGE_FILE_HEADER header; 644 #endif 645 } buffer; 646 647 const char *filename; 648 FileStream *in; 649 int32_t length; 650 651 #ifdef U_ELF 652 653 #elif U_PLATFORM_HAS_WIN32_API 654 const IMAGE_FILE_HEADER *pHeader; 655 #else 656 # error "Unknown platform for CAN_GENERATE_OBJECTS." 657 #endif 658 659 if(optMatchArch != NULL) { 660 filename=optMatchArch; 661 } else { 662 /* set defaults */ 663 #ifdef U_ELF 664 /* set EM_386 because elf.h does not provide better defaults */ 665 *pCPU=EM_386; 666 *pBits=32; 667 *pIsBigEndian=(UBool)(U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB); 668 #elif U_PLATFORM_HAS_WIN32_API 669 /* _M_IA64 should be defined in windows.h */ 670 # if defined(_M_IA64) 671 *pCPU=IMAGE_FILE_MACHINE_IA64; 672 # elif defined(_M_AMD64) 673 *pCPU=IMAGE_FILE_MACHINE_AMD64; 674 # else 675 *pCPU=IMAGE_FILE_MACHINE_I386; 676 # endif 677 *pBits= *pCPU==IMAGE_FILE_MACHINE_I386 ? 32 : 64; 678 *pIsBigEndian=FALSE; 679 #else 680 # error "Unknown platform for CAN_GENERATE_OBJECTS." 681 #endif 682 return; 683 } 684 685 in=T_FileStream_open(filename, "rb"); 686 if(in==NULL) { 687 fprintf(stderr, "genccode: unable to open match-arch file %s\n", filename); 688 exit(U_FILE_ACCESS_ERROR); 689 } 690 length=T_FileStream_read(in, buffer.bytes, sizeof(buffer.bytes)); 691 692 #ifdef U_ELF 693 if(length<sizeof(Elf32_Ehdr)) { 694 fprintf(stderr, "genccode: match-arch file %s is too short\n", filename); 695 exit(U_UNSUPPORTED_ERROR); 696 } 697 if( 698 buffer.header32.e_ident[0]!=ELFMAG0 || 699 buffer.header32.e_ident[1]!=ELFMAG1 || 700 buffer.header32.e_ident[2]!=ELFMAG2 || 701 buffer.header32.e_ident[3]!=ELFMAG3 || 702 buffer.header32.e_ident[EI_CLASS]<ELFCLASS32 || buffer.header32.e_ident[EI_CLASS]>ELFCLASS64 703 ) { 704 fprintf(stderr, "genccode: match-arch file %s is not an ELF object file, or not supported\n", filename); 705 exit(U_UNSUPPORTED_ERROR); 706 } 707 708 *pBits= buffer.header32.e_ident[EI_CLASS]==ELFCLASS32 ? 32 : 64; /* only 32 or 64: see check above */ 709 #ifdef U_ELF64 710 if(*pBits!=32 && *pBits!=64) { 711 fprintf(stderr, "genccode: currently only supports 32-bit and 64-bit ELF format\n"); 712 exit(U_UNSUPPORTED_ERROR); 713 } 714 #else 715 if(*pBits!=32) { 716 fprintf(stderr, "genccode: built with elf.h missing 64-bit definitions\n"); 717 exit(U_UNSUPPORTED_ERROR); 718 } 719 #endif 720 721 *pIsBigEndian=(UBool)(buffer.header32.e_ident[EI_DATA]==ELFDATA2MSB); 722 if(*pIsBigEndian!=U_IS_BIG_ENDIAN) { 723 fprintf(stderr, "genccode: currently only same-endianness ELF formats are supported\n"); 724 exit(U_UNSUPPORTED_ERROR); 725 } 726 /* TODO: Support byte swapping */ 727 728 *pCPU=buffer.header32.e_machine; 729 #elif U_PLATFORM_HAS_WIN32_API 730 if(length<sizeof(IMAGE_FILE_HEADER)) { 731 fprintf(stderr, "genccode: match-arch file %s is too short\n", filename); 732 exit(U_UNSUPPORTED_ERROR); 733 } 734 /* TODO: Use buffer.header. Keep aliasing legal. */ 735 pHeader=(const IMAGE_FILE_HEADER *)buffer.bytes; 736 *pCPU=pHeader->Machine; 737 /* 738 * The number of bits is implicit with the Machine value. 739 * *pBits is ignored in the calling code, so this need not be precise. 740 */ 741 *pBits= *pCPU==IMAGE_FILE_MACHINE_I386 ? 32 : 64; 742 /* Windows always runs on little-endian CPUs. */ 743 *pIsBigEndian=FALSE; 744 #else 745 # error "Unknown platform for CAN_GENERATE_OBJECTS." 746 #endif 747 748 T_FileStream_close(in); 749 } 750 751 U_CAPI void U_EXPORT2 752 writeObjectCode(const char *filename, const char *destdir, const char *optEntryPoint, const char *optMatchArch, const char *optFilename, char *outFilePath) { 753 /* common variables */ 754 char buffer[4096], entry[40]={ 0 }; 755 FileStream *in, *out; 756 const char *newSuffix; 757 int32_t i, entryLength, length, size, entryOffset=0, entryLengthOffset=0; 758 759 uint16_t cpu, bits; 760 UBool makeBigEndian; 761 762 /* platform-specific variables and initialization code */ 763 #ifdef U_ELF 764 /* 32-bit Elf file header */ 765 static Elf32_Ehdr header32={ 766 { 767 /* e_ident[] */ 768 ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3, 769 ELFCLASS32, 770 U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB, 771 EV_CURRENT /* EI_VERSION */ 772 }, 773 ET_REL, 774 EM_386, 775 EV_CURRENT, /* e_version */ 776 0, /* e_entry */ 777 0, /* e_phoff */ 778 (Elf32_Off)sizeof(Elf32_Ehdr), /* e_shoff */ 779 0, /* e_flags */ 780 (Elf32_Half)sizeof(Elf32_Ehdr), /* eh_size */ 781 0, /* e_phentsize */ 782 0, /* e_phnum */ 783 (Elf32_Half)sizeof(Elf32_Shdr), /* e_shentsize */ 784 5, /* e_shnum */ 785 2 /* e_shstrndx */ 786 }; 787 788 /* 32-bit Elf section header table */ 789 static Elf32_Shdr sectionHeaders32[5]={ 790 { /* SHN_UNDEF */ 791 0 792 }, 793 { /* .symtab */ 794 1, /* sh_name */ 795 SHT_SYMTAB, 796 0, /* sh_flags */ 797 0, /* sh_addr */ 798 (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)), /* sh_offset */ 799 (Elf32_Word)(2*sizeof(Elf32_Sym)), /* sh_size */ 800 3, /* sh_link=sect hdr index of .strtab */ 801 1, /* sh_info=One greater than the symbol table index of the last 802 * local symbol (with STB_LOCAL). */ 803 4, /* sh_addralign */ 804 (Elf32_Word)(sizeof(Elf32_Sym)) /* sh_entsize */ 805 }, 806 { /* .shstrtab */ 807 9, /* sh_name */ 808 SHT_STRTAB, 809 0, /* sh_flags */ 810 0, /* sh_addr */ 811 (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)), /* sh_offset */ 812 40, /* sh_size */ 813 0, /* sh_link */ 814 0, /* sh_info */ 815 1, /* sh_addralign */ 816 0 /* sh_entsize */ 817 }, 818 { /* .strtab */ 819 19, /* sh_name */ 820 SHT_STRTAB, 821 0, /* sh_flags */ 822 0, /* sh_addr */ 823 (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)+40), /* sh_offset */ 824 (Elf32_Word)sizeof(entry), /* sh_size */ 825 0, /* sh_link */ 826 0, /* sh_info */ 827 1, /* sh_addralign */ 828 0 /* sh_entsize */ 829 }, 830 { /* .rodata */ 831 27, /* sh_name */ 832 SHT_PROGBITS, 833 SHF_ALLOC, /* sh_flags */ 834 0, /* sh_addr */ 835 (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)+40+sizeof(entry)), /* sh_offset */ 836 0, /* sh_size */ 837 0, /* sh_link */ 838 0, /* sh_info */ 839 16, /* sh_addralign */ 840 0 /* sh_entsize */ 841 } 842 }; 843 844 /* symbol table */ 845 static Elf32_Sym symbols32[2]={ 846 { /* STN_UNDEF */ 847 0 848 }, 849 { /* data entry point */ 850 1, /* st_name */ 851 0, /* st_value */ 852 0, /* st_size */ 853 ELF64_ST_INFO(STB_GLOBAL, STT_OBJECT), 854 0, /* st_other */ 855 4 /* st_shndx=index of related section table entry */ 856 } 857 }; 858 859 /* section header string table, with decimal string offsets */ 860 static const char sectionStrings[40]= 861 /* 0 */ "\0" 862 /* 1 */ ".symtab\0" 863 /* 9 */ ".shstrtab\0" 864 /* 19 */ ".strtab\0" 865 /* 27 */ ".rodata\0" 866 /* 35 */ "\0\0\0\0"; /* contains terminating NUL */ 867 /* 40: padded to multiple of 8 bytes */ 868 869 /* 870 * Use entry[] for the string table which will contain only the 871 * entry point name. 872 * entry[0] must be 0 (NUL) 873 * The entry point name can be up to 38 characters long (sizeof(entry)-2). 874 */ 875 876 /* 16-align .rodata in the .o file, just in case */ 877 static const char padding[16]={ 0 }; 878 int32_t paddingSize; 879 880 #ifdef U_ELF64 881 /* 64-bit Elf file header */ 882 static Elf64_Ehdr header64={ 883 { 884 /* e_ident[] */ 885 ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3, 886 ELFCLASS64, 887 U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB, 888 EV_CURRENT /* EI_VERSION */ 889 }, 890 ET_REL, 891 EM_X86_64, 892 EV_CURRENT, /* e_version */ 893 0, /* e_entry */ 894 0, /* e_phoff */ 895 (Elf64_Off)sizeof(Elf64_Ehdr), /* e_shoff */ 896 0, /* e_flags */ 897 (Elf64_Half)sizeof(Elf64_Ehdr), /* eh_size */ 898 0, /* e_phentsize */ 899 0, /* e_phnum */ 900 (Elf64_Half)sizeof(Elf64_Shdr), /* e_shentsize */ 901 5, /* e_shnum */ 902 2 /* e_shstrndx */ 903 }; 904 905 /* 64-bit Elf section header table */ 906 static Elf64_Shdr sectionHeaders64[5]={ 907 { /* SHN_UNDEF */ 908 0 909 }, 910 { /* .symtab */ 911 1, /* sh_name */ 912 SHT_SYMTAB, 913 0, /* sh_flags */ 914 0, /* sh_addr */ 915 (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)), /* sh_offset */ 916 (Elf64_Xword)(2*sizeof(Elf64_Sym)), /* sh_size */ 917 3, /* sh_link=sect hdr index of .strtab */ 918 1, /* sh_info=One greater than the symbol table index of the last 919 * local symbol (with STB_LOCAL). */ 920 4, /* sh_addralign */ 921 (Elf64_Xword)(sizeof(Elf64_Sym)) /* sh_entsize */ 922 }, 923 { /* .shstrtab */ 924 9, /* sh_name */ 925 SHT_STRTAB, 926 0, /* sh_flags */ 927 0, /* sh_addr */ 928 (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)), /* sh_offset */ 929 40, /* sh_size */ 930 0, /* sh_link */ 931 0, /* sh_info */ 932 1, /* sh_addralign */ 933 0 /* sh_entsize */ 934 }, 935 { /* .strtab */ 936 19, /* sh_name */ 937 SHT_STRTAB, 938 0, /* sh_flags */ 939 0, /* sh_addr */ 940 (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)+40), /* sh_offset */ 941 (Elf64_Xword)sizeof(entry), /* sh_size */ 942 0, /* sh_link */ 943 0, /* sh_info */ 944 1, /* sh_addralign */ 945 0 /* sh_entsize */ 946 }, 947 { /* .rodata */ 948 27, /* sh_name */ 949 SHT_PROGBITS, 950 SHF_ALLOC, /* sh_flags */ 951 0, /* sh_addr */ 952 (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)+40+sizeof(entry)), /* sh_offset */ 953 0, /* sh_size */ 954 0, /* sh_link */ 955 0, /* sh_info */ 956 16, /* sh_addralign */ 957 0 /* sh_entsize */ 958 } 959 }; 960 961 /* 962 * 64-bit symbol table 963 * careful: different order of items compared with Elf32_sym! 964 */ 965 static Elf64_Sym symbols64[2]={ 966 { /* STN_UNDEF */ 967 0 968 }, 969 { /* data entry point */ 970 1, /* st_name */ 971 ELF64_ST_INFO(STB_GLOBAL, STT_OBJECT), 972 0, /* st_other */ 973 4, /* st_shndx=index of related section table entry */ 974 0, /* st_value */ 975 0 /* st_size */ 976 } 977 }; 978 979 #endif /* U_ELF64 */ 980 981 /* entry[] have a leading NUL */ 982 entryOffset=1; 983 984 /* in the common code, count entryLength from after the NUL */ 985 entryLengthOffset=1; 986 987 newSuffix=".o"; 988 989 #elif U_PLATFORM_HAS_WIN32_API 990 struct { 991 IMAGE_FILE_HEADER fileHeader; 992 IMAGE_SECTION_HEADER sections[2]; 993 char linkerOptions[100]; 994 } objHeader; 995 IMAGE_SYMBOL symbols[1]; 996 struct { 997 DWORD sizeofLongNames; 998 char longNames[100]; 999 } symbolNames; 1000 1001 /* 1002 * entry sometimes have a leading '_' 1003 * overwritten if entryOffset==0 depending on the target platform 1004 * see check for cpu below 1005 */ 1006 entry[0]='_'; 1007 1008 newSuffix=".obj"; 1009 #else 1010 # error "Unknown platform for CAN_GENERATE_OBJECTS." 1011 #endif 1012 1013 /* deal with options, files and the entry point name */ 1014 getArchitecture(&cpu, &bits, &makeBigEndian, optMatchArch); 1015 printf("genccode: --match-arch cpu=%hu bits=%hu big-endian=%d\n", cpu, bits, makeBigEndian); 1016 #if U_PLATFORM_HAS_WIN32_API 1017 if(cpu==IMAGE_FILE_MACHINE_I386) { 1018 entryOffset=1; 1019 } 1020 #endif 1021 1022 in=T_FileStream_open(filename, "rb"); 1023 if(in==NULL) { 1024 fprintf(stderr, "genccode: unable to open input file %s\n", filename); 1025 exit(U_FILE_ACCESS_ERROR); 1026 } 1027 size=T_FileStream_size(in); 1028 1029 getOutFilename(filename, destdir, buffer, entry+entryOffset, newSuffix, optFilename); 1030 if (outFilePath != NULL) { 1031 uprv_strcpy(outFilePath, buffer); 1032 } 1033 1034 if(optEntryPoint != NULL) { 1035 uprv_strcpy(entry+entryOffset, optEntryPoint); 1036 uprv_strcat(entry+entryOffset, "_dat"); 1037 } 1038 /* turn dashes in the entry name into underscores */ 1039 entryLength=(int32_t)uprv_strlen(entry+entryLengthOffset); 1040 for(i=0; i<entryLength; ++i) { 1041 if(entry[entryLengthOffset+i]=='-') { 1042 entry[entryLengthOffset+i]='_'; 1043 } 1044 } 1045 1046 /* open the output file */ 1047 out=T_FileStream_open(buffer, "wb"); 1048 if(out==NULL) { 1049 fprintf(stderr, "genccode: unable to open output file %s\n", buffer); 1050 exit(U_FILE_ACCESS_ERROR); 1051 } 1052 1053 #ifdef U_ELF 1054 if(bits==32) { 1055 header32.e_ident[EI_DATA]= makeBigEndian ? ELFDATA2MSB : ELFDATA2LSB; 1056 header32.e_machine=cpu; 1057 1058 /* 16-align .rodata in the .o file, just in case */ 1059 paddingSize=sectionHeaders32[4].sh_offset & 0xf; 1060 if(paddingSize!=0) { 1061 paddingSize=0x10-paddingSize; 1062 sectionHeaders32[4].sh_offset+=paddingSize; 1063 } 1064 1065 sectionHeaders32[4].sh_size=(Elf32_Word)size; 1066 1067 symbols32[1].st_size=(Elf32_Word)size; 1068 1069 /* write .o headers */ 1070 T_FileStream_write(out, &header32, (int32_t)sizeof(header32)); 1071 T_FileStream_write(out, sectionHeaders32, (int32_t)sizeof(sectionHeaders32)); 1072 T_FileStream_write(out, symbols32, (int32_t)sizeof(symbols32)); 1073 } else /* bits==64 */ { 1074 #ifdef U_ELF64 1075 header64.e_ident[EI_DATA]= makeBigEndian ? ELFDATA2MSB : ELFDATA2LSB; 1076 header64.e_machine=cpu; 1077 1078 /* 16-align .rodata in the .o file, just in case */ 1079 paddingSize=sectionHeaders64[4].sh_offset & 0xf; 1080 if(paddingSize!=0) { 1081 paddingSize=0x10-paddingSize; 1082 sectionHeaders64[4].sh_offset+=paddingSize; 1083 } 1084 1085 sectionHeaders64[4].sh_size=(Elf64_Xword)size; 1086 1087 symbols64[1].st_size=(Elf64_Xword)size; 1088 1089 /* write .o headers */ 1090 T_FileStream_write(out, &header64, (int32_t)sizeof(header64)); 1091 T_FileStream_write(out, sectionHeaders64, (int32_t)sizeof(sectionHeaders64)); 1092 T_FileStream_write(out, symbols64, (int32_t)sizeof(symbols64)); 1093 #endif 1094 } 1095 1096 T_FileStream_write(out, sectionStrings, (int32_t)sizeof(sectionStrings)); 1097 T_FileStream_write(out, entry, (int32_t)sizeof(entry)); 1098 if(paddingSize!=0) { 1099 T_FileStream_write(out, padding, paddingSize); 1100 } 1101 #elif U_PLATFORM_HAS_WIN32_API 1102 /* populate the .obj headers */ 1103 uprv_memset(&objHeader, 0, sizeof(objHeader)); 1104 uprv_memset(&symbols, 0, sizeof(symbols)); 1105 uprv_memset(&symbolNames, 0, sizeof(symbolNames)); 1106 1107 /* write the linker export directive */ 1108 uprv_strcpy(objHeader.linkerOptions, "-export:"); 1109 length=8; 1110 uprv_strcpy(objHeader.linkerOptions+length, entry); 1111 length+=entryLength; 1112 uprv_strcpy(objHeader.linkerOptions+length, ",data "); 1113 length+=6; 1114 1115 /* set the file header */ 1116 objHeader.fileHeader.Machine=cpu; 1117 objHeader.fileHeader.NumberOfSections=2; 1118 objHeader.fileHeader.TimeDateStamp=(DWORD)time(NULL); 1119 objHeader.fileHeader.PointerToSymbolTable=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER+length+size; /* start of symbol table */ 1120 objHeader.fileHeader.NumberOfSymbols=1; 1121 1122 /* set the section for the linker options */ 1123 uprv_strncpy((char *)objHeader.sections[0].Name, ".drectve", 8); 1124 objHeader.sections[0].SizeOfRawData=length; 1125 objHeader.sections[0].PointerToRawData=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER; 1126 objHeader.sections[0].Characteristics=IMAGE_SCN_LNK_INFO|IMAGE_SCN_LNK_REMOVE|IMAGE_SCN_ALIGN_1BYTES; 1127 1128 /* set the data section */ 1129 uprv_strncpy((char *)objHeader.sections[1].Name, ".rdata", 6); 1130 objHeader.sections[1].SizeOfRawData=size; 1131 objHeader.sections[1].PointerToRawData=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER+length; 1132 objHeader.sections[1].Characteristics=IMAGE_SCN_CNT_INITIALIZED_DATA|IMAGE_SCN_ALIGN_16BYTES|IMAGE_SCN_MEM_READ; 1133 1134 /* set the symbol table */ 1135 if(entryLength<=8) { 1136 uprv_strncpy((char *)symbols[0].N.ShortName, entry, entryLength); 1137 symbolNames.sizeofLongNames=4; 1138 } else { 1139 symbols[0].N.Name.Short=0; 1140 symbols[0].N.Name.Long=4; 1141 symbolNames.sizeofLongNames=4+entryLength+1; 1142 uprv_strcpy(symbolNames.longNames, entry); 1143 } 1144 symbols[0].SectionNumber=2; 1145 symbols[0].StorageClass=IMAGE_SYM_CLASS_EXTERNAL; 1146 1147 /* write the file header and the linker options section */ 1148 T_FileStream_write(out, &objHeader, objHeader.sections[1].PointerToRawData); 1149 #else 1150 # error "Unknown platform for CAN_GENERATE_OBJECTS." 1151 #endif 1152 1153 /* copy the data file into section 2 */ 1154 for(;;) { 1155 length=T_FileStream_read(in, buffer, sizeof(buffer)); 1156 if(length==0) { 1157 break; 1158 } 1159 T_FileStream_write(out, buffer, (int32_t)length); 1160 } 1161 1162 #if U_PLATFORM_HAS_WIN32_API 1163 /* write the symbol table */ 1164 T_FileStream_write(out, symbols, IMAGE_SIZEOF_SYMBOL); 1165 T_FileStream_write(out, &symbolNames, symbolNames.sizeofLongNames); 1166 #endif 1167 1168 if(T_FileStream_error(in)) { 1169 fprintf(stderr, "genccode: file read error while generating from file %s\n", filename); 1170 exit(U_FILE_ACCESS_ERROR); 1171 } 1172 1173 if(T_FileStream_error(out)) { 1174 fprintf(stderr, "genccode: file write error while generating from file %s\n", filename); 1175 exit(U_FILE_ACCESS_ERROR); 1176 } 1177 1178 T_FileStream_close(out); 1179 T_FileStream_close(in); 1180 } 1181 #endif 1182