1 /****************************************************************************** 2 * Copyright (C) 2009-2012, International Business Machines 3 * Corporation and others. All Rights Reserved. 4 ******************************************************************************* 5 */ 6 #include "unicode/utypes.h" 7 8 #if U_PLATFORM_HAS_WIN32_API 9 # define VC_EXTRALEAN 10 # define WIN32_LEAN_AND_MEAN 11 # define NOUSER 12 # define NOSERVICE 13 # define NOIME 14 # define NOMCX 15 #include <windows.h> 16 #include <time.h> 17 # ifdef __GNUC__ 18 # define WINDOWS_WITH_GNUC 19 # endif 20 #endif 21 22 #if U_PLATFORM_IS_LINUX_BASED 23 # define U_ELF 24 #endif 25 26 #ifdef U_ELF 27 # include <elf.h> 28 # if defined(ELFCLASS64) 29 # define U_ELF64 30 # endif 31 /* Old elf.h headers may not have EM_X86_64, or have EM_X8664 instead. */ 32 # ifndef EM_X86_64 33 # define EM_X86_64 62 34 # endif 35 # define ICU_ENTRY_OFFSET 0 36 #endif 37 38 #include <stdio.h> 39 #include <stdlib.h> 40 #include "unicode/putil.h" 41 #include "cmemory.h" 42 #include "cstring.h" 43 #include "filestrm.h" 44 #include "toolutil.h" 45 #include "unicode/uclean.h" 46 #include "uoptions.h" 47 #include "pkg_genc.h" 48 49 #define MAX_COLUMN ((uint32_t)(0xFFFFFFFFU)) 50 51 #define HEX_0X 0 /* 0x1234 */ 52 #define HEX_0H 1 /* 01234h */ 53 54 /* prototypes --------------------------------------------------------------- */ 55 static void 56 getOutFilename(const char *inFilename, const char *destdir, char *outFilename, char *entryName, const char *newSuffix, const char *optFilename); 57 58 static uint32_t 59 write8(FileStream *out, uint8_t byte, uint32_t column); 60 61 static uint32_t 62 write32(FileStream *out, uint32_t byte, uint32_t column); 63 64 #if U_PLATFORM == U_PF_OS400 65 static uint32_t 66 write8str(FileStream *out, uint8_t byte, uint32_t column); 67 #endif 68 /* -------------------------------------------------------------------------- */ 69 70 /* 71 Creating Template Files for New Platforms 72 73 Let the cc compiler help you get started. 74 Compile this program 75 const unsigned int x[5] = {1, 2, 0xdeadbeef, 0xffffffff, 16}; 76 with the -S option to produce assembly output. 77 78 For example, this will generate array.s: 79 gcc -S array.c 80 81 This will produce a .s file that may look like this: 82 83 .file "array.c" 84 .version "01.01" 85 gcc2_compiled.: 86 .globl x 87 .section .rodata 88 .align 4 89 .type x,@object 90 .size x,20 91 x: 92 .long 1 93 .long 2 94 .long -559038737 95 .long -1 96 .long 16 97 .ident "GCC: (GNU) 2.96 20000731 (Red Hat Linux 7.1 2.96-85)" 98 99 which gives a starting point that will compile, and can be transformed 100 to become the template, generally with some consulting of as docs and 101 some experimentation. 102 103 If you want ICU to automatically use this assembly, you should 104 specify "GENCCODE_ASSEMBLY=-a name" in the specific config/mh-* file, 105 where the name is the compiler or platform that you used in this 106 assemblyHeader data structure. 107 */ 108 static const struct AssemblyType { 109 const char *name; 110 const char *header; 111 const char *beginLine; 112 const char *footer; 113 int8_t hexType; /* HEX_0X or HEX_0h */ 114 } assemblyHeader[] = { 115 {"gcc", 116 ".globl %s\n" 117 "\t.section .note.GNU-stack,\"\",%%progbits\n" 118 "\t.section .rodata\n" 119 "\t.align 8\n" /* Either align 8 bytes or 2^8 (256) bytes. 8 bytes is needed. */ 120 "\t.type %s,%%object\n" 121 "%s:\n\n", 122 123 ".long ","",HEX_0X 124 }, 125 {"gcc-darwin", 126 /*"\t.section __TEXT,__text,regular,pure_instructions\n" 127 "\t.section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32\n"*/ 128 ".globl _%s\n" 129 "\t.data\n" 130 "\t.const\n" 131 "\t.align 4\n" /* 1<<4 = 16 */ 132 "_%s:\n\n", 133 134 ".long ","",HEX_0X 135 }, 136 {"gcc-cygwin", 137 ".globl _%s\n" 138 "\t.section .rodata\n" 139 "\t.align 8\n" /* Either align 8 bytes or 2^8 (256) bytes. 8 bytes is needed. */ 140 "_%s:\n\n", 141 142 ".long ","",HEX_0X 143 }, 144 {"gcc-mingw64", 145 ".globl %s\n" 146 "\t.section .rodata\n" 147 "\t.align 8\n" /* Either align 8 bytes or 2^8 (256) bytes. 8 bytes is needed. */ 148 "%s:\n\n", 149 150 ".long ","",HEX_0X 151 }, 152 {"sun", 153 "\t.section \".rodata\"\n" 154 "\t.align 8\n" 155 ".globl %s\n" 156 "%s:\n", 157 158 ".word ","",HEX_0X 159 }, 160 {"sun-x86", 161 "Drodata.rodata:\n" 162 "\t.type Drodata.rodata,@object\n" 163 "\t.size Drodata.rodata,0\n" 164 "\t.globl %s\n" 165 "\t.align 8\n" 166 "%s:\n", 167 168 ".4byte ","",HEX_0X 169 }, 170 {"xlc", 171 ".globl %s{RO}\n" 172 "\t.toc\n" 173 "%s:\n" 174 "\t.csect %s{RO}, 4\n", 175 176 ".long ","",HEX_0X 177 }, 178 {"aCC-ia64", 179 "\t.file \"%s.s\"\n" 180 "\t.type %s,@object\n" 181 "\t.global %s\n" 182 "\t.secalias .abe$0.rodata, \".rodata\"\n" 183 "\t.section .abe$0.rodata = \"a\", \"progbits\"\n" 184 "\t.align 16\n" 185 "%s::\t", 186 187 "data4 ","",HEX_0X 188 }, 189 {"aCC-parisc", 190 "\t.SPACE $TEXT$\n" 191 "\t.SUBSPA $LIT$\n" 192 "%s\n" 193 "\t.EXPORT %s\n" 194 "\t.ALIGN 16\n", 195 196 ".WORD ","",HEX_0X 197 }, 198 { "masm", 199 "\tTITLE %s\n" 200 "; generated by genccode\n" 201 ".386\n" 202 ".model flat\n" 203 "\tPUBLIC _%s\n" 204 "ICUDATA_%s\tSEGMENT READONLY PARA PUBLIC FLAT 'DATA'\n" 205 "\tALIGN 16\n" 206 "_%s\tLABEL DWORD\n", 207 "\tDWORD ","\nICUDATA_%s\tENDS\n\tEND\n",HEX_0H 208 } 209 }; 210 211 static int32_t assemblyHeaderIndex = -1; 212 static int32_t hexType = HEX_0X; 213 214 U_CAPI UBool U_EXPORT2 215 checkAssemblyHeaderName(const char* optAssembly) { 216 int32_t idx; 217 assemblyHeaderIndex = -1; 218 for (idx = 0; idx < (int32_t)(sizeof(assemblyHeader)/sizeof(assemblyHeader[0])); idx++) { 219 if (uprv_strcmp(optAssembly, assemblyHeader[idx].name) == 0) { 220 assemblyHeaderIndex = idx; 221 hexType = assemblyHeader[idx].hexType; /* set the hex type */ 222 return TRUE; 223 } 224 } 225 226 return FALSE; 227 } 228 229 230 U_CAPI void U_EXPORT2 231 printAssemblyHeadersToStdErr(void) { 232 int32_t idx; 233 fprintf(stderr, "%s", assemblyHeader[0].name); 234 for (idx = 1; idx < (int32_t)(sizeof(assemblyHeader)/sizeof(assemblyHeader[0])); idx++) { 235 fprintf(stderr, ", %s", assemblyHeader[idx].name); 236 } 237 fprintf(stderr, 238 ")\n"); 239 } 240 241 U_CAPI void U_EXPORT2 242 writeAssemblyCode(const char *filename, const char *destdir, const char *optEntryPoint, const char *optFilename, char *outFilePath) { 243 uint32_t column = MAX_COLUMN; 244 char entry[64]; 245 uint32_t buffer[1024]; 246 char *bufferStr = (char *)buffer; 247 FileStream *in, *out; 248 size_t i, length; 249 250 in=T_FileStream_open(filename, "rb"); 251 if(in==NULL) { 252 fprintf(stderr, "genccode: unable to open input file %s\n", filename); 253 exit(U_FILE_ACCESS_ERROR); 254 } 255 256 getOutFilename(filename, destdir, bufferStr, entry, ".s", optFilename); 257 out=T_FileStream_open(bufferStr, "w"); 258 if(out==NULL) { 259 fprintf(stderr, "genccode: unable to open output file %s\n", bufferStr); 260 exit(U_FILE_ACCESS_ERROR); 261 } 262 263 if (outFilePath != NULL) { 264 uprv_strcpy(outFilePath, bufferStr); 265 } 266 267 #ifdef WINDOWS_WITH_GNUC 268 /* Need to fix the file seperator character when using MinGW. */ 269 swapFileSepChar(outFilePath, U_FILE_SEP_CHAR, '/'); 270 #endif 271 272 if(optEntryPoint != NULL) { 273 uprv_strcpy(entry, optEntryPoint); 274 uprv_strcat(entry, "_dat"); 275 } 276 277 /* turn dashes or dots in the entry name into underscores */ 278 length=uprv_strlen(entry); 279 for(i=0; i<length; ++i) { 280 if(entry[i]=='-' || entry[i]=='.') { 281 entry[i]='_'; 282 } 283 } 284 285 sprintf(bufferStr, assemblyHeader[assemblyHeaderIndex].header, 286 entry, entry, entry, entry, 287 entry, entry, entry, entry); 288 T_FileStream_writeLine(out, bufferStr); 289 T_FileStream_writeLine(out, assemblyHeader[assemblyHeaderIndex].beginLine); 290 291 for(;;) { 292 length=T_FileStream_read(in, buffer, sizeof(buffer)); 293 if(length==0) { 294 break; 295 } 296 if (length != sizeof(buffer)) { 297 /* pad with extra 0's when at the end of the file */ 298 for(i=0; i < (length % sizeof(uint32_t)); ++i) { 299 buffer[length+i] = 0; 300 } 301 } 302 for(i=0; i<(length/sizeof(buffer[0])); i++) { 303 column = write32(out, buffer[i], column); 304 } 305 } 306 307 T_FileStream_writeLine(out, "\n"); 308 309 sprintf(bufferStr, assemblyHeader[assemblyHeaderIndex].footer, 310 entry, entry, entry, entry, 311 entry, entry, entry, entry); 312 T_FileStream_writeLine(out, bufferStr); 313 314 if(T_FileStream_error(in)) { 315 fprintf(stderr, "genccode: file read error while generating from file %s\n", filename); 316 exit(U_FILE_ACCESS_ERROR); 317 } 318 319 if(T_FileStream_error(out)) { 320 fprintf(stderr, "genccode: file write error while generating from file %s\n", filename); 321 exit(U_FILE_ACCESS_ERROR); 322 } 323 324 T_FileStream_close(out); 325 T_FileStream_close(in); 326 } 327 328 U_CAPI void U_EXPORT2 329 writeCCode(const char *filename, const char *destdir, const char *optName, const char *optFilename, char *outFilePath) { 330 uint32_t column = MAX_COLUMN; 331 char buffer[4096], entry[64]; 332 FileStream *in, *out; 333 size_t i, length; 334 335 in=T_FileStream_open(filename, "rb"); 336 if(in==NULL) { 337 fprintf(stderr, "genccode: unable to open input file %s\n", filename); 338 exit(U_FILE_ACCESS_ERROR); 339 } 340 341 if(optName != NULL) { /* prepend 'icudt28_' */ 342 strcpy(entry, optName); 343 strcat(entry, "_"); 344 } else { 345 entry[0] = 0; 346 } 347 348 getOutFilename(filename, destdir, buffer, entry+uprv_strlen(entry), ".c", optFilename); 349 if (outFilePath != NULL) { 350 uprv_strcpy(outFilePath, buffer); 351 } 352 out=T_FileStream_open(buffer, "w"); 353 if(out==NULL) { 354 fprintf(stderr, "genccode: unable to open output file %s\n", buffer); 355 exit(U_FILE_ACCESS_ERROR); 356 } 357 358 /* turn dashes or dots in the entry name into underscores */ 359 length=uprv_strlen(entry); 360 for(i=0; i<length; ++i) { 361 if(entry[i]=='-' || entry[i]=='.') { 362 entry[i]='_'; 363 } 364 } 365 366 #if U_PLATFORM == U_PF_OS400 367 /* 368 TODO: Fix this once the compiler implements this feature. Keep in sync with udatamem.c 369 370 This is here because this platform can't currently put 371 const data into the read-only pages of an object or 372 shared library (service program). Only strings are allowed in read-only 373 pages, so we use char * strings to store the data. 374 375 In order to prevent the beginning of the data from ever matching the 376 magic numbers we must still use the initial double. 377 [grhoten 4/24/2003] 378 */ 379 sprintf(buffer, 380 "#ifndef IN_GENERATED_CCODE\n" 381 "#define IN_GENERATED_CCODE\n" 382 "#define U_DISABLE_RENAMING 1\n" 383 "#include \"unicode/umachine.h\"\n" 384 "#endif\n" 385 "U_CDECL_BEGIN\n" 386 "const struct {\n" 387 " double bogus;\n" 388 " const char *bytes; \n" 389 "} %s={ 0.0, \n", 390 entry); 391 T_FileStream_writeLine(out, buffer); 392 393 for(;;) { 394 length=T_FileStream_read(in, buffer, sizeof(buffer)); 395 if(length==0) { 396 break; 397 } 398 for(i=0; i<length; ++i) { 399 column = write8str(out, (uint8_t)buffer[i], column); 400 } 401 } 402 403 T_FileStream_writeLine(out, "\"\n};\nU_CDECL_END\n"); 404 #else 405 /* Function renaming shouldn't be done in data */ 406 sprintf(buffer, 407 "#ifndef IN_GENERATED_CCODE\n" 408 "#define IN_GENERATED_CCODE\n" 409 "#define U_DISABLE_RENAMING 1\n" 410 "#include \"unicode/umachine.h\"\n" 411 "#endif\n" 412 "U_CDECL_BEGIN\n" 413 "const struct {\n" 414 " double bogus;\n" 415 " uint8_t bytes[%ld]; \n" 416 "} %s={ 0.0, {\n", 417 (long)T_FileStream_size(in), entry); 418 T_FileStream_writeLine(out, buffer); 419 420 for(;;) { 421 length=T_FileStream_read(in, buffer, sizeof(buffer)); 422 if(length==0) { 423 break; 424 } 425 for(i=0; i<length; ++i) { 426 column = write8(out, (uint8_t)buffer[i], column); 427 } 428 } 429 430 T_FileStream_writeLine(out, "\n}\n};\nU_CDECL_END\n"); 431 #endif 432 433 if(T_FileStream_error(in)) { 434 fprintf(stderr, "genccode: file read error while generating from file %s\n", filename); 435 exit(U_FILE_ACCESS_ERROR); 436 } 437 438 if(T_FileStream_error(out)) { 439 fprintf(stderr, "genccode: file write error while generating from file %s\n", filename); 440 exit(U_FILE_ACCESS_ERROR); 441 } 442 443 T_FileStream_close(out); 444 T_FileStream_close(in); 445 } 446 447 static uint32_t 448 write32(FileStream *out, uint32_t bitField, uint32_t column) { 449 int32_t i; 450 char bitFieldStr[64]; /* This is more bits than needed for a 32-bit number */ 451 char *s = bitFieldStr; 452 uint8_t *ptrIdx = (uint8_t *)&bitField; 453 static const char hexToStr[16] = { 454 '0','1','2','3', 455 '4','5','6','7', 456 '8','9','A','B', 457 'C','D','E','F' 458 }; 459 460 /* write the value, possibly with comma and newline */ 461 if(column==MAX_COLUMN) { 462 /* first byte */ 463 column=1; 464 } else if(column<32) { 465 *(s++)=','; 466 ++column; 467 } else { 468 *(s++)='\n'; 469 uprv_strcpy(s, assemblyHeader[assemblyHeaderIndex].beginLine); 470 s+=uprv_strlen(s); 471 column=1; 472 } 473 474 if (bitField < 10) { 475 /* It's a small number. Don't waste the space for 0x */ 476 *(s++)=hexToStr[bitField]; 477 } 478 else { 479 int seenNonZero = 0; /* This is used to remove leading zeros */ 480 481 if(hexType==HEX_0X) { 482 *(s++)='0'; 483 *(s++)='x'; 484 } else if(hexType==HEX_0H) { 485 *(s++)='0'; 486 } 487 488 /* This creates a 32-bit field */ 489 #if U_IS_BIG_ENDIAN 490 for (i = 0; i < sizeof(uint32_t); i++) 491 #else 492 for (i = sizeof(uint32_t)-1; i >= 0 ; i--) 493 #endif 494 { 495 uint8_t value = ptrIdx[i]; 496 if (value || seenNonZero) { 497 *(s++)=hexToStr[value>>4]; 498 *(s++)=hexToStr[value&0xF]; 499 seenNonZero = 1; 500 } 501 } 502 if(hexType==HEX_0H) { 503 *(s++)='h'; 504 } 505 } 506 507 *(s++)=0; 508 T_FileStream_writeLine(out, bitFieldStr); 509 return column; 510 } 511 512 static uint32_t 513 write8(FileStream *out, uint8_t byte, uint32_t column) { 514 char s[4]; 515 int i=0; 516 517 /* convert the byte value to a string */ 518 if(byte>=100) { 519 s[i++]=(char)('0'+byte/100); 520 byte%=100; 521 } 522 if(i>0 || byte>=10) { 523 s[i++]=(char)('0'+byte/10); 524 byte%=10; 525 } 526 s[i++]=(char)('0'+byte); 527 s[i]=0; 528 529 /* write the value, possibly with comma and newline */ 530 if(column==MAX_COLUMN) { 531 /* first byte */ 532 column=1; 533 } else if(column<16) { 534 T_FileStream_writeLine(out, ","); 535 ++column; 536 } else { 537 T_FileStream_writeLine(out, ",\n"); 538 column=1; 539 } 540 T_FileStream_writeLine(out, s); 541 return column; 542 } 543 544 #if U_PLATFORM == U_PF_OS400 545 static uint32_t 546 write8str(FileStream *out, uint8_t byte, uint32_t column) { 547 char s[8]; 548 549 if (byte > 7) 550 sprintf(s, "\\x%X", byte); 551 else 552 sprintf(s, "\\%X", byte); 553 554 /* write the value, possibly with comma and newline */ 555 if(column==MAX_COLUMN) { 556 /* first byte */ 557 column=1; 558 T_FileStream_writeLine(out, "\""); 559 } else if(column<24) { 560 ++column; 561 } else { 562 T_FileStream_writeLine(out, "\"\n\""); 563 column=1; 564 } 565 T_FileStream_writeLine(out, s); 566 return column; 567 } 568 #endif 569 570 static void 571 getOutFilename(const char *inFilename, const char *destdir, char *outFilename, char *entryName, const char *newSuffix, const char *optFilename) { 572 const char *basename=findBasename(inFilename), *suffix=uprv_strrchr(basename, '.'); 573 574 /* copy path */ 575 if(destdir!=NULL && *destdir!=0) { 576 do { 577 *outFilename++=*destdir++; 578 } while(*destdir!=0); 579 if(*(outFilename-1)!=U_FILE_SEP_CHAR) { 580 *outFilename++=U_FILE_SEP_CHAR; 581 } 582 inFilename=basename; 583 } else { 584 while(inFilename<basename) { 585 *outFilename++=*inFilename++; 586 } 587 } 588 589 if(suffix==NULL) { 590 /* the filename does not have a suffix */ 591 uprv_strcpy(entryName, inFilename); 592 if(optFilename != NULL) { 593 uprv_strcpy(outFilename, optFilename); 594 } else { 595 uprv_strcpy(outFilename, inFilename); 596 } 597 uprv_strcat(outFilename, newSuffix); 598 } else { 599 char *saveOutFilename = outFilename; 600 /* copy basename */ 601 while(inFilename<suffix) { 602 if(*inFilename=='-') { 603 /* iSeries cannot have '-' in the .o objects. */ 604 *outFilename++=*entryName++='_'; 605 inFilename++; 606 } 607 else { 608 *outFilename++=*entryName++=*inFilename++; 609 } 610 } 611 612 /* replace '.' by '_' */ 613 *outFilename++=*entryName++='_'; 614 ++inFilename; 615 616 /* copy suffix */ 617 while(*inFilename!=0) { 618 *outFilename++=*entryName++=*inFilename++; 619 } 620 621 *entryName=0; 622 623 if(optFilename != NULL) { 624 uprv_strcpy(saveOutFilename, optFilename); 625 uprv_strcat(saveOutFilename, newSuffix); 626 } else { 627 /* add ".c" */ 628 uprv_strcpy(outFilename, newSuffix); 629 } 630 } 631 } 632 633 #ifdef CAN_GENERATE_OBJECTS 634 static void 635 getArchitecture(uint16_t *pCPU, uint16_t *pBits, UBool *pIsBigEndian, const char *optMatchArch) { 636 union { 637 char bytes[2048]; 638 #ifdef U_ELF 639 Elf32_Ehdr header32; 640 /* Elf32_Ehdr and ELF64_Ehdr are identical for the necessary fields. */ 641 #elif U_PLATFORM_HAS_WIN32_API 642 IMAGE_FILE_HEADER header; 643 #endif 644 } buffer; 645 646 const char *filename; 647 FileStream *in; 648 int32_t length; 649 650 #ifdef U_ELF 651 652 #elif U_PLATFORM_HAS_WIN32_API 653 const IMAGE_FILE_HEADER *pHeader; 654 #else 655 # error "Unknown platform for CAN_GENERATE_OBJECTS." 656 #endif 657 658 if(optMatchArch != NULL) { 659 filename=optMatchArch; 660 } else { 661 /* set defaults */ 662 #ifdef U_ELF 663 /* set EM_386 because elf.h does not provide better defaults */ 664 *pCPU=EM_386; 665 *pBits=32; 666 *pIsBigEndian=(UBool)(U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB); 667 #elif U_PLATFORM_HAS_WIN32_API 668 /* _M_IA64 should be defined in windows.h */ 669 # if defined(_M_IA64) 670 *pCPU=IMAGE_FILE_MACHINE_IA64; 671 # elif defined(_M_AMD64) 672 *pCPU=IMAGE_FILE_MACHINE_AMD64; 673 # else 674 *pCPU=IMAGE_FILE_MACHINE_I386; 675 # endif 676 *pBits= *pCPU==IMAGE_FILE_MACHINE_I386 ? 32 : 64; 677 *pIsBigEndian=FALSE; 678 #else 679 # error "Unknown platform for CAN_GENERATE_OBJECTS." 680 #endif 681 return; 682 } 683 684 in=T_FileStream_open(filename, "rb"); 685 if(in==NULL) { 686 fprintf(stderr, "genccode: unable to open match-arch file %s\n", filename); 687 exit(U_FILE_ACCESS_ERROR); 688 } 689 length=T_FileStream_read(in, buffer.bytes, sizeof(buffer.bytes)); 690 691 #ifdef U_ELF 692 if(length<sizeof(Elf32_Ehdr)) { 693 fprintf(stderr, "genccode: match-arch file %s is too short\n", filename); 694 exit(U_UNSUPPORTED_ERROR); 695 } 696 if( 697 buffer.header32.e_ident[0]!=ELFMAG0 || 698 buffer.header32.e_ident[1]!=ELFMAG1 || 699 buffer.header32.e_ident[2]!=ELFMAG2 || 700 buffer.header32.e_ident[3]!=ELFMAG3 || 701 buffer.header32.e_ident[EI_CLASS]<ELFCLASS32 || buffer.header32.e_ident[EI_CLASS]>ELFCLASS64 702 ) { 703 fprintf(stderr, "genccode: match-arch file %s is not an ELF object file, or not supported\n", filename); 704 exit(U_UNSUPPORTED_ERROR); 705 } 706 707 *pBits= buffer.header32.e_ident[EI_CLASS]==ELFCLASS32 ? 32 : 64; /* only 32 or 64: see check above */ 708 #ifdef U_ELF64 709 if(*pBits!=32 && *pBits!=64) { 710 fprintf(stderr, "genccode: currently only supports 32-bit and 64-bit ELF format\n"); 711 exit(U_UNSUPPORTED_ERROR); 712 } 713 #else 714 if(*pBits!=32) { 715 fprintf(stderr, "genccode: built with elf.h missing 64-bit definitions\n"); 716 exit(U_UNSUPPORTED_ERROR); 717 } 718 #endif 719 720 *pIsBigEndian=(UBool)(buffer.header32.e_ident[EI_DATA]==ELFDATA2MSB); 721 if(*pIsBigEndian!=U_IS_BIG_ENDIAN) { 722 fprintf(stderr, "genccode: currently only same-endianness ELF formats are supported\n"); 723 exit(U_UNSUPPORTED_ERROR); 724 } 725 /* TODO: Support byte swapping */ 726 727 *pCPU=buffer.header32.e_machine; 728 #elif U_PLATFORM_HAS_WIN32_API 729 if(length<sizeof(IMAGE_FILE_HEADER)) { 730 fprintf(stderr, "genccode: match-arch file %s is too short\n", filename); 731 exit(U_UNSUPPORTED_ERROR); 732 } 733 /* TODO: Use buffer.header. Keep aliasing legal. */ 734 pHeader=(const IMAGE_FILE_HEADER *)buffer.bytes; 735 *pCPU=pHeader->Machine; 736 /* 737 * The number of bits is implicit with the Machine value. 738 * *pBits is ignored in the calling code, so this need not be precise. 739 */ 740 *pBits= *pCPU==IMAGE_FILE_MACHINE_I386 ? 32 : 64; 741 /* Windows always runs on little-endian CPUs. */ 742 *pIsBigEndian=FALSE; 743 #else 744 # error "Unknown platform for CAN_GENERATE_OBJECTS." 745 #endif 746 747 T_FileStream_close(in); 748 } 749 750 U_CAPI void U_EXPORT2 751 writeObjectCode(const char *filename, const char *destdir, const char *optEntryPoint, const char *optMatchArch, const char *optFilename, char *outFilePath) { 752 /* common variables */ 753 char buffer[4096], entry[40]={ 0 }; 754 FileStream *in, *out; 755 const char *newSuffix; 756 int32_t i, entryLength, length, size, entryOffset=0, entryLengthOffset=0; 757 758 uint16_t cpu, bits; 759 UBool makeBigEndian; 760 761 /* platform-specific variables and initialization code */ 762 #ifdef U_ELF 763 /* 32-bit Elf file header */ 764 static Elf32_Ehdr header32={ 765 { 766 /* e_ident[] */ 767 ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3, 768 ELFCLASS32, 769 U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB, 770 EV_CURRENT /* EI_VERSION */ 771 }, 772 ET_REL, 773 EM_386, 774 EV_CURRENT, /* e_version */ 775 0, /* e_entry */ 776 0, /* e_phoff */ 777 (Elf32_Off)sizeof(Elf32_Ehdr), /* e_shoff */ 778 0, /* e_flags */ 779 (Elf32_Half)sizeof(Elf32_Ehdr), /* eh_size */ 780 0, /* e_phentsize */ 781 0, /* e_phnum */ 782 (Elf32_Half)sizeof(Elf32_Shdr), /* e_shentsize */ 783 5, /* e_shnum */ 784 2 /* e_shstrndx */ 785 }; 786 787 /* 32-bit Elf section header table */ 788 static Elf32_Shdr sectionHeaders32[5]={ 789 { /* SHN_UNDEF */ 790 0 791 }, 792 { /* .symtab */ 793 1, /* sh_name */ 794 SHT_SYMTAB, 795 0, /* sh_flags */ 796 0, /* sh_addr */ 797 (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)), /* sh_offset */ 798 (Elf32_Word)(2*sizeof(Elf32_Sym)), /* sh_size */ 799 3, /* sh_link=sect hdr index of .strtab */ 800 1, /* sh_info=One greater than the symbol table index of the last 801 * local symbol (with STB_LOCAL). */ 802 4, /* sh_addralign */ 803 (Elf32_Word)(sizeof(Elf32_Sym)) /* sh_entsize */ 804 }, 805 { /* .shstrtab */ 806 9, /* sh_name */ 807 SHT_STRTAB, 808 0, /* sh_flags */ 809 0, /* sh_addr */ 810 (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)), /* sh_offset */ 811 40, /* sh_size */ 812 0, /* sh_link */ 813 0, /* sh_info */ 814 1, /* sh_addralign */ 815 0 /* sh_entsize */ 816 }, 817 { /* .strtab */ 818 19, /* sh_name */ 819 SHT_STRTAB, 820 0, /* sh_flags */ 821 0, /* sh_addr */ 822 (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)+40), /* sh_offset */ 823 (Elf32_Word)sizeof(entry), /* sh_size */ 824 0, /* sh_link */ 825 0, /* sh_info */ 826 1, /* sh_addralign */ 827 0 /* sh_entsize */ 828 }, 829 { /* .rodata */ 830 27, /* sh_name */ 831 SHT_PROGBITS, 832 SHF_ALLOC, /* sh_flags */ 833 0, /* sh_addr */ 834 (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)+40+sizeof(entry)), /* sh_offset */ 835 0, /* sh_size */ 836 0, /* sh_link */ 837 0, /* sh_info */ 838 16, /* sh_addralign */ 839 0 /* sh_entsize */ 840 } 841 }; 842 843 /* symbol table */ 844 static Elf32_Sym symbols32[2]={ 845 { /* STN_UNDEF */ 846 0 847 }, 848 { /* data entry point */ 849 1, /* st_name */ 850 0, /* st_value */ 851 0, /* st_size */ 852 ELF64_ST_INFO(STB_GLOBAL, STT_OBJECT), 853 0, /* st_other */ 854 4 /* st_shndx=index of related section table entry */ 855 } 856 }; 857 858 /* section header string table, with decimal string offsets */ 859 static const char sectionStrings[40]= 860 /* 0 */ "\0" 861 /* 1 */ ".symtab\0" 862 /* 9 */ ".shstrtab\0" 863 /* 19 */ ".strtab\0" 864 /* 27 */ ".rodata\0" 865 /* 35 */ "\0\0\0\0"; /* contains terminating NUL */ 866 /* 40: padded to multiple of 8 bytes */ 867 868 /* 869 * Use entry[] for the string table which will contain only the 870 * entry point name. 871 * entry[0] must be 0 (NUL) 872 * The entry point name can be up to 38 characters long (sizeof(entry)-2). 873 */ 874 875 /* 16-align .rodata in the .o file, just in case */ 876 static const char padding[16]={ 0 }; 877 int32_t paddingSize; 878 879 #ifdef U_ELF64 880 /* 64-bit Elf file header */ 881 static Elf64_Ehdr header64={ 882 { 883 /* e_ident[] */ 884 ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3, 885 ELFCLASS64, 886 U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB, 887 EV_CURRENT /* EI_VERSION */ 888 }, 889 ET_REL, 890 EM_X86_64, 891 EV_CURRENT, /* e_version */ 892 0, /* e_entry */ 893 0, /* e_phoff */ 894 (Elf64_Off)sizeof(Elf64_Ehdr), /* e_shoff */ 895 0, /* e_flags */ 896 (Elf64_Half)sizeof(Elf64_Ehdr), /* eh_size */ 897 0, /* e_phentsize */ 898 0, /* e_phnum */ 899 (Elf64_Half)sizeof(Elf64_Shdr), /* e_shentsize */ 900 5, /* e_shnum */ 901 2 /* e_shstrndx */ 902 }; 903 904 /* 64-bit Elf section header table */ 905 static Elf64_Shdr sectionHeaders64[5]={ 906 { /* SHN_UNDEF */ 907 0 908 }, 909 { /* .symtab */ 910 1, /* sh_name */ 911 SHT_SYMTAB, 912 0, /* sh_flags */ 913 0, /* sh_addr */ 914 (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)), /* sh_offset */ 915 (Elf64_Xword)(2*sizeof(Elf64_Sym)), /* sh_size */ 916 3, /* sh_link=sect hdr index of .strtab */ 917 1, /* sh_info=One greater than the symbol table index of the last 918 * local symbol (with STB_LOCAL). */ 919 4, /* sh_addralign */ 920 (Elf64_Xword)(sizeof(Elf64_Sym)) /* sh_entsize */ 921 }, 922 { /* .shstrtab */ 923 9, /* sh_name */ 924 SHT_STRTAB, 925 0, /* sh_flags */ 926 0, /* sh_addr */ 927 (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)), /* sh_offset */ 928 40, /* sh_size */ 929 0, /* sh_link */ 930 0, /* sh_info */ 931 1, /* sh_addralign */ 932 0 /* sh_entsize */ 933 }, 934 { /* .strtab */ 935 19, /* sh_name */ 936 SHT_STRTAB, 937 0, /* sh_flags */ 938 0, /* sh_addr */ 939 (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)+40), /* sh_offset */ 940 (Elf64_Xword)sizeof(entry), /* sh_size */ 941 0, /* sh_link */ 942 0, /* sh_info */ 943 1, /* sh_addralign */ 944 0 /* sh_entsize */ 945 }, 946 { /* .rodata */ 947 27, /* sh_name */ 948 SHT_PROGBITS, 949 SHF_ALLOC, /* sh_flags */ 950 0, /* sh_addr */ 951 (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)+40+sizeof(entry)), /* sh_offset */ 952 0, /* sh_size */ 953 0, /* sh_link */ 954 0, /* sh_info */ 955 16, /* sh_addralign */ 956 0 /* sh_entsize */ 957 } 958 }; 959 960 /* 961 * 64-bit symbol table 962 * careful: different order of items compared with Elf32_sym! 963 */ 964 static Elf64_Sym symbols64[2]={ 965 { /* STN_UNDEF */ 966 0 967 }, 968 { /* data entry point */ 969 1, /* st_name */ 970 ELF64_ST_INFO(STB_GLOBAL, STT_OBJECT), 971 0, /* st_other */ 972 4, /* st_shndx=index of related section table entry */ 973 0, /* st_value */ 974 0 /* st_size */ 975 } 976 }; 977 978 #endif /* U_ELF64 */ 979 980 /* entry[] have a leading NUL */ 981 entryOffset=1; 982 983 /* in the common code, count entryLength from after the NUL */ 984 entryLengthOffset=1; 985 986 newSuffix=".o"; 987 988 #elif U_PLATFORM_HAS_WIN32_API 989 struct { 990 IMAGE_FILE_HEADER fileHeader; 991 IMAGE_SECTION_HEADER sections[2]; 992 char linkerOptions[100]; 993 } objHeader; 994 IMAGE_SYMBOL symbols[1]; 995 struct { 996 DWORD sizeofLongNames; 997 char longNames[100]; 998 } symbolNames; 999 1000 /* 1001 * entry sometimes have a leading '_' 1002 * overwritten if entryOffset==0 depending on the target platform 1003 * see check for cpu below 1004 */ 1005 entry[0]='_'; 1006 1007 newSuffix=".obj"; 1008 #else 1009 # error "Unknown platform for CAN_GENERATE_OBJECTS." 1010 #endif 1011 1012 /* deal with options, files and the entry point name */ 1013 getArchitecture(&cpu, &bits, &makeBigEndian, optMatchArch); 1014 printf("genccode: --match-arch cpu=%hu bits=%hu big-endian=%d\n", cpu, bits, makeBigEndian); 1015 #if U_PLATFORM_HAS_WIN32_API 1016 if(cpu==IMAGE_FILE_MACHINE_I386) { 1017 entryOffset=1; 1018 } 1019 #endif 1020 1021 in=T_FileStream_open(filename, "rb"); 1022 if(in==NULL) { 1023 fprintf(stderr, "genccode: unable to open input file %s\n", filename); 1024 exit(U_FILE_ACCESS_ERROR); 1025 } 1026 size=T_FileStream_size(in); 1027 1028 getOutFilename(filename, destdir, buffer, entry+entryOffset, newSuffix, optFilename); 1029 if (outFilePath != NULL) { 1030 uprv_strcpy(outFilePath, buffer); 1031 } 1032 1033 if(optEntryPoint != NULL) { 1034 uprv_strcpy(entry+entryOffset, optEntryPoint); 1035 uprv_strcat(entry+entryOffset, "_dat"); 1036 } 1037 /* turn dashes in the entry name into underscores */ 1038 entryLength=(int32_t)uprv_strlen(entry+entryLengthOffset); 1039 for(i=0; i<entryLength; ++i) { 1040 if(entry[entryLengthOffset+i]=='-') { 1041 entry[entryLengthOffset+i]='_'; 1042 } 1043 } 1044 1045 /* open the output file */ 1046 out=T_FileStream_open(buffer, "wb"); 1047 if(out==NULL) { 1048 fprintf(stderr, "genccode: unable to open output file %s\n", buffer); 1049 exit(U_FILE_ACCESS_ERROR); 1050 } 1051 1052 #ifdef U_ELF 1053 if(bits==32) { 1054 header32.e_ident[EI_DATA]= makeBigEndian ? ELFDATA2MSB : ELFDATA2LSB; 1055 header32.e_machine=cpu; 1056 1057 /* 16-align .rodata in the .o file, just in case */ 1058 paddingSize=sectionHeaders32[4].sh_offset & 0xf; 1059 if(paddingSize!=0) { 1060 paddingSize=0x10-paddingSize; 1061 sectionHeaders32[4].sh_offset+=paddingSize; 1062 } 1063 1064 sectionHeaders32[4].sh_size=(Elf32_Word)size; 1065 1066 symbols32[1].st_size=(Elf32_Word)size; 1067 1068 /* write .o headers */ 1069 T_FileStream_write(out, &header32, (int32_t)sizeof(header32)); 1070 T_FileStream_write(out, sectionHeaders32, (int32_t)sizeof(sectionHeaders32)); 1071 T_FileStream_write(out, symbols32, (int32_t)sizeof(symbols32)); 1072 } else /* bits==64 */ { 1073 #ifdef U_ELF64 1074 header64.e_ident[EI_DATA]= makeBigEndian ? ELFDATA2MSB : ELFDATA2LSB; 1075 header64.e_machine=cpu; 1076 1077 /* 16-align .rodata in the .o file, just in case */ 1078 paddingSize=sectionHeaders64[4].sh_offset & 0xf; 1079 if(paddingSize!=0) { 1080 paddingSize=0x10-paddingSize; 1081 sectionHeaders64[4].sh_offset+=paddingSize; 1082 } 1083 1084 sectionHeaders64[4].sh_size=(Elf64_Xword)size; 1085 1086 symbols64[1].st_size=(Elf64_Xword)size; 1087 1088 /* write .o headers */ 1089 T_FileStream_write(out, &header64, (int32_t)sizeof(header64)); 1090 T_FileStream_write(out, sectionHeaders64, (int32_t)sizeof(sectionHeaders64)); 1091 T_FileStream_write(out, symbols64, (int32_t)sizeof(symbols64)); 1092 #endif 1093 } 1094 1095 T_FileStream_write(out, sectionStrings, (int32_t)sizeof(sectionStrings)); 1096 T_FileStream_write(out, entry, (int32_t)sizeof(entry)); 1097 if(paddingSize!=0) { 1098 T_FileStream_write(out, padding, paddingSize); 1099 } 1100 #elif U_PLATFORM_HAS_WIN32_API 1101 /* populate the .obj headers */ 1102 uprv_memset(&objHeader, 0, sizeof(objHeader)); 1103 uprv_memset(&symbols, 0, sizeof(symbols)); 1104 uprv_memset(&symbolNames, 0, sizeof(symbolNames)); 1105 1106 /* write the linker export directive */ 1107 uprv_strcpy(objHeader.linkerOptions, "-export:"); 1108 length=8; 1109 uprv_strcpy(objHeader.linkerOptions+length, entry); 1110 length+=entryLength; 1111 uprv_strcpy(objHeader.linkerOptions+length, ",data "); 1112 length+=6; 1113 1114 /* set the file header */ 1115 objHeader.fileHeader.Machine=cpu; 1116 objHeader.fileHeader.NumberOfSections=2; 1117 objHeader.fileHeader.TimeDateStamp=(DWORD)time(NULL); 1118 objHeader.fileHeader.PointerToSymbolTable=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER+length+size; /* start of symbol table */ 1119 objHeader.fileHeader.NumberOfSymbols=1; 1120 1121 /* set the section for the linker options */ 1122 uprv_strncpy((char *)objHeader.sections[0].Name, ".drectve", 8); 1123 objHeader.sections[0].SizeOfRawData=length; 1124 objHeader.sections[0].PointerToRawData=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER; 1125 objHeader.sections[0].Characteristics=IMAGE_SCN_LNK_INFO|IMAGE_SCN_LNK_REMOVE|IMAGE_SCN_ALIGN_1BYTES; 1126 1127 /* set the data section */ 1128 uprv_strncpy((char *)objHeader.sections[1].Name, ".rdata", 6); 1129 objHeader.sections[1].SizeOfRawData=size; 1130 objHeader.sections[1].PointerToRawData=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER+length; 1131 objHeader.sections[1].Characteristics=IMAGE_SCN_CNT_INITIALIZED_DATA|IMAGE_SCN_ALIGN_16BYTES|IMAGE_SCN_MEM_READ; 1132 1133 /* set the symbol table */ 1134 if(entryLength<=8) { 1135 uprv_strncpy((char *)symbols[0].N.ShortName, entry, entryLength); 1136 symbolNames.sizeofLongNames=4; 1137 } else { 1138 symbols[0].N.Name.Short=0; 1139 symbols[0].N.Name.Long=4; 1140 symbolNames.sizeofLongNames=4+entryLength+1; 1141 uprv_strcpy(symbolNames.longNames, entry); 1142 } 1143 symbols[0].SectionNumber=2; 1144 symbols[0].StorageClass=IMAGE_SYM_CLASS_EXTERNAL; 1145 1146 /* write the file header and the linker options section */ 1147 T_FileStream_write(out, &objHeader, objHeader.sections[1].PointerToRawData); 1148 #else 1149 # error "Unknown platform for CAN_GENERATE_OBJECTS." 1150 #endif 1151 1152 /* copy the data file into section 2 */ 1153 for(;;) { 1154 length=T_FileStream_read(in, buffer, sizeof(buffer)); 1155 if(length==0) { 1156 break; 1157 } 1158 T_FileStream_write(out, buffer, (int32_t)length); 1159 } 1160 1161 #if U_PLATFORM_HAS_WIN32_API 1162 /* write the symbol table */ 1163 T_FileStream_write(out, symbols, IMAGE_SIZEOF_SYMBOL); 1164 T_FileStream_write(out, &symbolNames, symbolNames.sizeofLongNames); 1165 #endif 1166 1167 if(T_FileStream_error(in)) { 1168 fprintf(stderr, "genccode: file read error while generating from file %s\n", filename); 1169 exit(U_FILE_ACCESS_ERROR); 1170 } 1171 1172 if(T_FileStream_error(out)) { 1173 fprintf(stderr, "genccode: file write error while generating from file %s\n", filename); 1174 exit(U_FILE_ACCESS_ERROR); 1175 } 1176 1177 T_FileStream_close(out); 1178 T_FileStream_close(in); 1179 } 1180 #endif 1181