1 /****************************************************************************** 2 * Copyright (C) 2009-2015, International Business Machines 3 * Corporation and others. All Rights Reserved. 4 ******************************************************************************* 5 */ 6 #include "unicode/utypes.h" 7 8 #if U_PLATFORM_HAS_WIN32_API 9 # define VC_EXTRALEAN 10 # define WIN32_LEAN_AND_MEAN 11 # define NOUSER 12 # define NOSERVICE 13 # define NOIME 14 # define NOMCX 15 #include <windows.h> 16 #include <time.h> 17 # ifdef __GNUC__ 18 # define WINDOWS_WITH_GNUC 19 # endif 20 #endif 21 22 #if U_PLATFORM_IS_LINUX_BASED && U_HAVE_ELF_H 23 # define U_ELF 24 #endif 25 26 #ifdef U_ELF 27 # include <elf.h> 28 # if defined(ELFCLASS64) 29 # define U_ELF64 30 # endif 31 /* Old elf.h headers may not have EM_X86_64, or have EM_X8664 instead. */ 32 # ifndef EM_X86_64 33 # define EM_X86_64 62 34 # endif 35 # define ICU_ENTRY_OFFSET 0 36 #endif 37 38 #include <stdio.h> 39 #include <stdlib.h> 40 #include "unicode/putil.h" 41 #include "cmemory.h" 42 #include "cstring.h" 43 #include "filestrm.h" 44 #include "toolutil.h" 45 #include "unicode/uclean.h" 46 #include "uoptions.h" 47 #include "pkg_genc.h" 48 49 #define MAX_COLUMN ((uint32_t)(0xFFFFFFFFU)) 50 51 #define HEX_0X 0 /* 0x1234 */ 52 #define HEX_0H 1 /* 01234h */ 53 54 /* prototypes --------------------------------------------------------------- */ 55 static void 56 getOutFilename(const char *inFilename, const char *destdir, char *outFilename, char *entryName, const char *newSuffix, const char *optFilename); 57 58 static uint32_t 59 write8(FileStream *out, uint8_t byte, uint32_t column); 60 61 static uint32_t 62 write32(FileStream *out, uint32_t byte, uint32_t column); 63 64 #if U_PLATFORM == U_PF_OS400 65 static uint32_t 66 write8str(FileStream *out, uint8_t byte, uint32_t column); 67 #endif 68 /* -------------------------------------------------------------------------- */ 69 70 /* 71 Creating Template Files for New Platforms 72 73 Let the cc compiler help you get started. 74 Compile this program 75 const unsigned int x[5] = {1, 2, 0xdeadbeef, 0xffffffff, 16}; 76 with the -S option to produce assembly output. 77 78 For example, this will generate array.s: 79 gcc -S array.c 80 81 This will produce a .s file that may look like this: 82 83 .file "array.c" 84 .version "01.01" 85 gcc2_compiled.: 86 .globl x 87 .section .rodata 88 .align 4 89 .type x,@object 90 .size x,20 91 x: 92 .long 1 93 .long 2 94 .long -559038737 95 .long -1 96 .long 16 97 .ident "GCC: (GNU) 2.96 20000731 (Red Hat Linux 7.1 2.96-85)" 98 99 which gives a starting point that will compile, and can be transformed 100 to become the template, generally with some consulting of as docs and 101 some experimentation. 102 103 If you want ICU to automatically use this assembly, you should 104 specify "GENCCODE_ASSEMBLY=-a name" in the specific config/mh-* file, 105 where the name is the compiler or platform that you used in this 106 assemblyHeader data structure. 107 */ 108 static const struct AssemblyType { 109 const char *name; 110 const char *header; 111 const char *beginLine; 112 const char *footer; 113 int8_t hexType; /* HEX_0X or HEX_0h */ 114 } assemblyHeader[] = { 115 /* For gcc assemblers, the meaning of .align changes depending on the */ 116 /* hardware, so we use .balign 16 which always means 16 bytes. */ 117 /* https://sourceware.org/binutils/docs/as/Pseudo-Ops.html */ 118 {"gcc", 119 ".globl %s\n" 120 "\t.section .note.GNU-stack,\"\",%%progbits\n" 121 "\t.section .rodata\n" 122 "\t.balign 16\n" 123 "#ifdef U_HIDE_DATA_SYMBOL\n" 124 "\t.hidden %s\n" 125 "#endif\n" 126 "\t.type %s,%%object\n" 127 "%s:\n\n", 128 129 ".long ","",HEX_0X 130 }, 131 {"gcc-darwin", 132 /*"\t.section __TEXT,__text,regular,pure_instructions\n" 133 "\t.section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32\n"*/ 134 ".globl _%s\n" 135 "#ifdef U_HIDE_DATA_SYMBOL\n" 136 "\t.private_extern _%s\n" 137 "#endif\n" 138 "\t.data\n" 139 "\t.const\n" 140 "\t.balign 16\n" 141 "_%s:\n\n", 142 143 ".long ","",HEX_0X 144 }, 145 {"gcc-cygwin", 146 ".globl _%s\n" 147 "\t.section .rodata\n" 148 "\t.balign 16\n" 149 "_%s:\n\n", 150 151 ".long ","",HEX_0X 152 }, 153 {"gcc-mingw64", 154 ".globl %s\n" 155 "\t.section .rodata\n" 156 "\t.balign 16\n" 157 "%s:\n\n", 158 159 ".long ","",HEX_0X 160 }, 161 /* 16 bytes alignment. */ 162 /* http://docs.oracle.com/cd/E19641-01/802-1947/802-1947.pdf */ 163 {"sun", 164 "\t.section \".rodata\"\n" 165 "\t.align 16\n" 166 ".globl %s\n" 167 "%s:\n", 168 169 ".word ","",HEX_0X 170 }, 171 /* 16 bytes alignment for sun-x86. */ 172 /* http://docs.oracle.com/cd/E19963-01/html/821-1608/eoiyg.html */ 173 {"sun-x86", 174 "Drodata.rodata:\n" 175 "\t.type Drodata.rodata,@object\n" 176 "\t.size Drodata.rodata,0\n" 177 "\t.globl %s\n" 178 "\t.align 16\n" 179 "%s:\n", 180 181 ".4byte ","",HEX_0X 182 }, 183 /* 1<<4 bit alignment for aix. */ 184 /* http://pic.dhe.ibm.com/infocenter/aix/v6r1/index.jsp?topic=%2Fcom.ibm.aix.aixassem%2Fdoc%2Falangref%2Fidalangref_csect_pseudoop.htm */ 185 {"xlc", 186 ".globl %s{RO}\n" 187 "\t.toc\n" 188 "%s:\n" 189 "\t.csect %s{RO}, 4\n", 190 191 ".long ","",HEX_0X 192 }, 193 {"aCC-ia64", 194 "\t.file \"%s.s\"\n" 195 "\t.type %s,@object\n" 196 "\t.global %s\n" 197 "\t.secalias .abe$0.rodata, \".rodata\"\n" 198 "\t.section .abe$0.rodata = \"a\", \"progbits\"\n" 199 "\t.align 16\n" 200 "%s::\t", 201 202 "data4 ","",HEX_0X 203 }, 204 {"aCC-parisc", 205 "\t.SPACE $TEXT$\n" 206 "\t.SUBSPA $LIT$\n" 207 "%s\n" 208 "\t.EXPORT %s\n" 209 "\t.ALIGN 16\n", 210 211 ".WORD ","",HEX_0X 212 }, 213 /* align 16 bytes */ 214 /* http://msdn.microsoft.com/en-us/library/dwa9fwef.aspx */ 215 { "masm", 216 "\tTITLE %s\n" 217 "; generated by genccode\n" 218 ".386\n" 219 ".model flat\n" 220 "\tPUBLIC _%s\n" 221 "ICUDATA_%s\tSEGMENT READONLY PARA PUBLIC FLAT 'DATA'\n" 222 "\tALIGN 16\n" 223 "_%s\tLABEL DWORD\n", 224 "\tDWORD ","\nICUDATA_%s\tENDS\n\tEND\n",HEX_0H 225 } 226 }; 227 228 static int32_t assemblyHeaderIndex = -1; 229 static int32_t hexType = HEX_0X; 230 231 U_CAPI UBool U_EXPORT2 232 checkAssemblyHeaderName(const char* optAssembly) { 233 int32_t idx; 234 assemblyHeaderIndex = -1; 235 for (idx = 0; idx < (int32_t)(sizeof(assemblyHeader)/sizeof(assemblyHeader[0])); idx++) { 236 if (uprv_strcmp(optAssembly, assemblyHeader[idx].name) == 0) { 237 assemblyHeaderIndex = idx; 238 hexType = assemblyHeader[idx].hexType; /* set the hex type */ 239 return TRUE; 240 } 241 } 242 243 return FALSE; 244 } 245 246 247 U_CAPI void U_EXPORT2 248 printAssemblyHeadersToStdErr(void) { 249 int32_t idx; 250 fprintf(stderr, "%s", assemblyHeader[0].name); 251 for (idx = 1; idx < (int32_t)(sizeof(assemblyHeader)/sizeof(assemblyHeader[0])); idx++) { 252 fprintf(stderr, ", %s", assemblyHeader[idx].name); 253 } 254 fprintf(stderr, 255 ")\n"); 256 } 257 258 U_CAPI void U_EXPORT2 259 writeAssemblyCode(const char *filename, const char *destdir, const char *optEntryPoint, const char *optFilename, char *outFilePath) { 260 uint32_t column = MAX_COLUMN; 261 char entry[64]; 262 uint32_t buffer[1024]; 263 char *bufferStr = (char *)buffer; 264 FileStream *in, *out; 265 size_t i, length; 266 267 in=T_FileStream_open(filename, "rb"); 268 if(in==NULL) { 269 fprintf(stderr, "genccode: unable to open input file %s\n", filename); 270 exit(U_FILE_ACCESS_ERROR); 271 } 272 273 getOutFilename(filename, destdir, bufferStr, entry, ".S", optFilename); 274 out=T_FileStream_open(bufferStr, "w"); 275 if(out==NULL) { 276 fprintf(stderr, "genccode: unable to open output file %s\n", bufferStr); 277 exit(U_FILE_ACCESS_ERROR); 278 } 279 280 if (outFilePath != NULL) { 281 uprv_strcpy(outFilePath, bufferStr); 282 } 283 284 #ifdef WINDOWS_WITH_GNUC 285 /* Need to fix the file seperator character when using MinGW. */ 286 swapFileSepChar(outFilePath, U_FILE_SEP_CHAR, '/'); 287 #endif 288 289 if(optEntryPoint != NULL) { 290 uprv_strcpy(entry, optEntryPoint); 291 uprv_strcat(entry, "_dat"); 292 } 293 294 /* turn dashes or dots in the entry name into underscores */ 295 length=uprv_strlen(entry); 296 for(i=0; i<length; ++i) { 297 if(entry[i]=='-' || entry[i]=='.') { 298 entry[i]='_'; 299 } 300 } 301 302 sprintf(bufferStr, assemblyHeader[assemblyHeaderIndex].header, 303 entry, entry, entry, entry, 304 entry, entry, entry, entry); 305 T_FileStream_writeLine(out, bufferStr); 306 T_FileStream_writeLine(out, assemblyHeader[assemblyHeaderIndex].beginLine); 307 308 for(;;) { 309 length=T_FileStream_read(in, buffer, sizeof(buffer)); 310 if(length==0) { 311 break; 312 } 313 if (length != sizeof(buffer)) { 314 /* pad with extra 0's when at the end of the file */ 315 for(i=0; i < (length % sizeof(uint32_t)); ++i) { 316 buffer[length+i] = 0; 317 } 318 } 319 for(i=0; i<(length/sizeof(buffer[0])); i++) { 320 column = write32(out, buffer[i], column); 321 } 322 } 323 324 T_FileStream_writeLine(out, "\n"); 325 326 sprintf(bufferStr, assemblyHeader[assemblyHeaderIndex].footer, 327 entry, entry, entry, entry, 328 entry, entry, entry, entry); 329 T_FileStream_writeLine(out, bufferStr); 330 331 if(T_FileStream_error(in)) { 332 fprintf(stderr, "genccode: file read error while generating from file %s\n", filename); 333 exit(U_FILE_ACCESS_ERROR); 334 } 335 336 if(T_FileStream_error(out)) { 337 fprintf(stderr, "genccode: file write error while generating from file %s\n", filename); 338 exit(U_FILE_ACCESS_ERROR); 339 } 340 341 T_FileStream_close(out); 342 T_FileStream_close(in); 343 } 344 345 U_CAPI void U_EXPORT2 346 writeCCode(const char *filename, const char *destdir, const char *optName, const char *optFilename, char *outFilePath) { 347 uint32_t column = MAX_COLUMN; 348 char buffer[4096], entry[64]; 349 FileStream *in, *out; 350 size_t i, length; 351 352 in=T_FileStream_open(filename, "rb"); 353 if(in==NULL) { 354 fprintf(stderr, "genccode: unable to open input file %s\n", filename); 355 exit(U_FILE_ACCESS_ERROR); 356 } 357 358 if(optName != NULL) { /* prepend 'icudt28_' */ 359 strcpy(entry, optName); 360 strcat(entry, "_"); 361 } else { 362 entry[0] = 0; 363 } 364 365 getOutFilename(filename, destdir, buffer, entry+uprv_strlen(entry), ".c", optFilename); 366 if (outFilePath != NULL) { 367 uprv_strcpy(outFilePath, buffer); 368 } 369 out=T_FileStream_open(buffer, "w"); 370 if(out==NULL) { 371 fprintf(stderr, "genccode: unable to open output file %s\n", buffer); 372 exit(U_FILE_ACCESS_ERROR); 373 } 374 375 /* turn dashes or dots in the entry name into underscores */ 376 length=uprv_strlen(entry); 377 for(i=0; i<length; ++i) { 378 if(entry[i]=='-' || entry[i]=='.') { 379 entry[i]='_'; 380 } 381 } 382 383 #if U_PLATFORM == U_PF_OS400 384 /* 385 TODO: Fix this once the compiler implements this feature. Keep in sync with udatamem.c 386 387 This is here because this platform can't currently put 388 const data into the read-only pages of an object or 389 shared library (service program). Only strings are allowed in read-only 390 pages, so we use char * strings to store the data. 391 392 In order to prevent the beginning of the data from ever matching the 393 magic numbers we must still use the initial double. 394 [grhoten 4/24/2003] 395 */ 396 sprintf(buffer, 397 "#ifndef IN_GENERATED_CCODE\n" 398 "#define IN_GENERATED_CCODE\n" 399 "#define U_DISABLE_RENAMING 1\n" 400 "#include \"unicode/umachine.h\"\n" 401 "#endif\n" 402 "U_CDECL_BEGIN\n" 403 "const struct {\n" 404 " double bogus;\n" 405 " const char *bytes; \n" 406 "} %s={ 0.0, \n", 407 entry); 408 T_FileStream_writeLine(out, buffer); 409 410 for(;;) { 411 length=T_FileStream_read(in, buffer, sizeof(buffer)); 412 if(length==0) { 413 break; 414 } 415 for(i=0; i<length; ++i) { 416 column = write8str(out, (uint8_t)buffer[i], column); 417 } 418 } 419 420 T_FileStream_writeLine(out, "\"\n};\nU_CDECL_END\n"); 421 #else 422 /* Function renaming shouldn't be done in data */ 423 sprintf(buffer, 424 "#ifndef IN_GENERATED_CCODE\n" 425 "#define IN_GENERATED_CCODE\n" 426 "#define U_DISABLE_RENAMING 1\n" 427 "#include \"unicode/umachine.h\"\n" 428 "#endif\n" 429 "U_CDECL_BEGIN\n" 430 "const struct {\n" 431 " double bogus;\n" 432 " uint8_t bytes[%ld]; \n" 433 "} %s={ 0.0, {\n", 434 (long)T_FileStream_size(in), entry); 435 T_FileStream_writeLine(out, buffer); 436 437 for(;;) { 438 length=T_FileStream_read(in, buffer, sizeof(buffer)); 439 if(length==0) { 440 break; 441 } 442 for(i=0; i<length; ++i) { 443 column = write8(out, (uint8_t)buffer[i], column); 444 } 445 } 446 447 T_FileStream_writeLine(out, "\n}\n};\nU_CDECL_END\n"); 448 #endif 449 450 if(T_FileStream_error(in)) { 451 fprintf(stderr, "genccode: file read error while generating from file %s\n", filename); 452 exit(U_FILE_ACCESS_ERROR); 453 } 454 455 if(T_FileStream_error(out)) { 456 fprintf(stderr, "genccode: file write error while generating from file %s\n", filename); 457 exit(U_FILE_ACCESS_ERROR); 458 } 459 460 T_FileStream_close(out); 461 T_FileStream_close(in); 462 } 463 464 static uint32_t 465 write32(FileStream *out, uint32_t bitField, uint32_t column) { 466 int32_t i; 467 char bitFieldStr[64]; /* This is more bits than needed for a 32-bit number */ 468 char *s = bitFieldStr; 469 uint8_t *ptrIdx = (uint8_t *)&bitField; 470 static const char hexToStr[16] = { 471 '0','1','2','3', 472 '4','5','6','7', 473 '8','9','A','B', 474 'C','D','E','F' 475 }; 476 477 /* write the value, possibly with comma and newline */ 478 if(column==MAX_COLUMN) { 479 /* first byte */ 480 column=1; 481 } else if(column<32) { 482 *(s++)=','; 483 ++column; 484 } else { 485 *(s++)='\n'; 486 uprv_strcpy(s, assemblyHeader[assemblyHeaderIndex].beginLine); 487 s+=uprv_strlen(s); 488 column=1; 489 } 490 491 if (bitField < 10) { 492 /* It's a small number. Don't waste the space for 0x */ 493 *(s++)=hexToStr[bitField]; 494 } 495 else { 496 int seenNonZero = 0; /* This is used to remove leading zeros */ 497 498 if(hexType==HEX_0X) { 499 *(s++)='0'; 500 *(s++)='x'; 501 } else if(hexType==HEX_0H) { 502 *(s++)='0'; 503 } 504 505 /* This creates a 32-bit field */ 506 #if U_IS_BIG_ENDIAN 507 for (i = 0; i < sizeof(uint32_t); i++) 508 #else 509 for (i = sizeof(uint32_t)-1; i >= 0 ; i--) 510 #endif 511 { 512 uint8_t value = ptrIdx[i]; 513 if (value || seenNonZero) { 514 *(s++)=hexToStr[value>>4]; 515 *(s++)=hexToStr[value&0xF]; 516 seenNonZero = 1; 517 } 518 } 519 if(hexType==HEX_0H) { 520 *(s++)='h'; 521 } 522 } 523 524 *(s++)=0; 525 T_FileStream_writeLine(out, bitFieldStr); 526 return column; 527 } 528 529 static uint32_t 530 write8(FileStream *out, uint8_t byte, uint32_t column) { 531 char s[4]; 532 int i=0; 533 534 /* convert the byte value to a string */ 535 if(byte>=100) { 536 s[i++]=(char)('0'+byte/100); 537 byte%=100; 538 } 539 if(i>0 || byte>=10) { 540 s[i++]=(char)('0'+byte/10); 541 byte%=10; 542 } 543 s[i++]=(char)('0'+byte); 544 s[i]=0; 545 546 /* write the value, possibly with comma and newline */ 547 if(column==MAX_COLUMN) { 548 /* first byte */ 549 column=1; 550 } else if(column<16) { 551 T_FileStream_writeLine(out, ","); 552 ++column; 553 } else { 554 T_FileStream_writeLine(out, ",\n"); 555 column=1; 556 } 557 T_FileStream_writeLine(out, s); 558 return column; 559 } 560 561 #if U_PLATFORM == U_PF_OS400 562 static uint32_t 563 write8str(FileStream *out, uint8_t byte, uint32_t column) { 564 char s[8]; 565 566 if (byte > 7) 567 sprintf(s, "\\x%X", byte); 568 else 569 sprintf(s, "\\%X", byte); 570 571 /* write the value, possibly with comma and newline */ 572 if(column==MAX_COLUMN) { 573 /* first byte */ 574 column=1; 575 T_FileStream_writeLine(out, "\""); 576 } else if(column<24) { 577 ++column; 578 } else { 579 T_FileStream_writeLine(out, "\"\n\""); 580 column=1; 581 } 582 T_FileStream_writeLine(out, s); 583 return column; 584 } 585 #endif 586 587 static void 588 getOutFilename(const char *inFilename, const char *destdir, char *outFilename, char *entryName, const char *newSuffix, const char *optFilename) { 589 const char *basename=findBasename(inFilename), *suffix=uprv_strrchr(basename, '.'); 590 591 /* copy path */ 592 if(destdir!=NULL && *destdir!=0) { 593 do { 594 *outFilename++=*destdir++; 595 } while(*destdir!=0); 596 if(*(outFilename-1)!=U_FILE_SEP_CHAR) { 597 *outFilename++=U_FILE_SEP_CHAR; 598 } 599 inFilename=basename; 600 } else { 601 while(inFilename<basename) { 602 *outFilename++=*inFilename++; 603 } 604 } 605 606 if(suffix==NULL) { 607 /* the filename does not have a suffix */ 608 uprv_strcpy(entryName, inFilename); 609 if(optFilename != NULL) { 610 uprv_strcpy(outFilename, optFilename); 611 } else { 612 uprv_strcpy(outFilename, inFilename); 613 } 614 uprv_strcat(outFilename, newSuffix); 615 } else { 616 char *saveOutFilename = outFilename; 617 /* copy basename */ 618 while(inFilename<suffix) { 619 if(*inFilename=='-') { 620 /* iSeries cannot have '-' in the .o objects. */ 621 *outFilename++=*entryName++='_'; 622 inFilename++; 623 } 624 else { 625 *outFilename++=*entryName++=*inFilename++; 626 } 627 } 628 629 /* replace '.' by '_' */ 630 *outFilename++=*entryName++='_'; 631 ++inFilename; 632 633 /* copy suffix */ 634 while(*inFilename!=0) { 635 *outFilename++=*entryName++=*inFilename++; 636 } 637 638 *entryName=0; 639 640 if(optFilename != NULL) { 641 uprv_strcpy(saveOutFilename, optFilename); 642 uprv_strcat(saveOutFilename, newSuffix); 643 } else { 644 /* add ".c" */ 645 uprv_strcpy(outFilename, newSuffix); 646 } 647 } 648 } 649 650 #ifdef CAN_GENERATE_OBJECTS 651 static void 652 getArchitecture(uint16_t *pCPU, uint16_t *pBits, UBool *pIsBigEndian, const char *optMatchArch) { 653 union { 654 char bytes[2048]; 655 #ifdef U_ELF 656 Elf32_Ehdr header32; 657 /* Elf32_Ehdr and ELF64_Ehdr are identical for the necessary fields. */ 658 #elif U_PLATFORM_HAS_WIN32_API 659 IMAGE_FILE_HEADER header; 660 #endif 661 } buffer; 662 663 const char *filename; 664 FileStream *in; 665 int32_t length; 666 667 #ifdef U_ELF 668 669 #elif U_PLATFORM_HAS_WIN32_API 670 const IMAGE_FILE_HEADER *pHeader; 671 #else 672 # error "Unknown platform for CAN_GENERATE_OBJECTS." 673 #endif 674 675 if(optMatchArch != NULL) { 676 filename=optMatchArch; 677 } else { 678 /* set defaults */ 679 #ifdef U_ELF 680 /* set EM_386 because elf.h does not provide better defaults */ 681 *pCPU=EM_386; 682 *pBits=32; 683 *pIsBigEndian=(UBool)(U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB); 684 #elif U_PLATFORM_HAS_WIN32_API 685 /* _M_IA64 should be defined in windows.h */ 686 # if defined(_M_IA64) 687 *pCPU=IMAGE_FILE_MACHINE_IA64; 688 # elif defined(_M_AMD64) 689 *pCPU=IMAGE_FILE_MACHINE_AMD64; 690 # else 691 *pCPU=IMAGE_FILE_MACHINE_I386; 692 # endif 693 *pBits= *pCPU==IMAGE_FILE_MACHINE_I386 ? 32 : 64; 694 *pIsBigEndian=FALSE; 695 #else 696 # error "Unknown platform for CAN_GENERATE_OBJECTS." 697 #endif 698 return; 699 } 700 701 in=T_FileStream_open(filename, "rb"); 702 if(in==NULL) { 703 fprintf(stderr, "genccode: unable to open match-arch file %s\n", filename); 704 exit(U_FILE_ACCESS_ERROR); 705 } 706 length=T_FileStream_read(in, buffer.bytes, sizeof(buffer.bytes)); 707 708 #ifdef U_ELF 709 if(length<sizeof(Elf32_Ehdr)) { 710 fprintf(stderr, "genccode: match-arch file %s is too short\n", filename); 711 exit(U_UNSUPPORTED_ERROR); 712 } 713 if( 714 buffer.header32.e_ident[0]!=ELFMAG0 || 715 buffer.header32.e_ident[1]!=ELFMAG1 || 716 buffer.header32.e_ident[2]!=ELFMAG2 || 717 buffer.header32.e_ident[3]!=ELFMAG3 || 718 buffer.header32.e_ident[EI_CLASS]<ELFCLASS32 || buffer.header32.e_ident[EI_CLASS]>ELFCLASS64 719 ) { 720 fprintf(stderr, "genccode: match-arch file %s is not an ELF object file, or not supported\n", filename); 721 exit(U_UNSUPPORTED_ERROR); 722 } 723 724 *pBits= buffer.header32.e_ident[EI_CLASS]==ELFCLASS32 ? 32 : 64; /* only 32 or 64: see check above */ 725 #ifdef U_ELF64 726 if(*pBits!=32 && *pBits!=64) { 727 fprintf(stderr, "genccode: currently only supports 32-bit and 64-bit ELF format\n"); 728 exit(U_UNSUPPORTED_ERROR); 729 } 730 #else 731 if(*pBits!=32) { 732 fprintf(stderr, "genccode: built with elf.h missing 64-bit definitions\n"); 733 exit(U_UNSUPPORTED_ERROR); 734 } 735 #endif 736 737 *pIsBigEndian=(UBool)(buffer.header32.e_ident[EI_DATA]==ELFDATA2MSB); 738 if(*pIsBigEndian!=U_IS_BIG_ENDIAN) { 739 fprintf(stderr, "genccode: currently only same-endianness ELF formats are supported\n"); 740 exit(U_UNSUPPORTED_ERROR); 741 } 742 /* TODO: Support byte swapping */ 743 744 *pCPU=buffer.header32.e_machine; 745 #elif U_PLATFORM_HAS_WIN32_API 746 if(length<sizeof(IMAGE_FILE_HEADER)) { 747 fprintf(stderr, "genccode: match-arch file %s is too short\n", filename); 748 exit(U_UNSUPPORTED_ERROR); 749 } 750 /* TODO: Use buffer.header. Keep aliasing legal. */ 751 pHeader=(const IMAGE_FILE_HEADER *)buffer.bytes; 752 *pCPU=pHeader->Machine; 753 /* 754 * The number of bits is implicit with the Machine value. 755 * *pBits is ignored in the calling code, so this need not be precise. 756 */ 757 *pBits= *pCPU==IMAGE_FILE_MACHINE_I386 ? 32 : 64; 758 /* Windows always runs on little-endian CPUs. */ 759 *pIsBigEndian=FALSE; 760 #else 761 # error "Unknown platform for CAN_GENERATE_OBJECTS." 762 #endif 763 764 T_FileStream_close(in); 765 } 766 767 U_CAPI void U_EXPORT2 768 writeObjectCode(const char *filename, const char *destdir, const char *optEntryPoint, const char *optMatchArch, const char *optFilename, char *outFilePath) { 769 /* common variables */ 770 char buffer[4096], entry[96]={ 0 }; 771 FileStream *in, *out; 772 const char *newSuffix; 773 int32_t i, entryLength, length, size, entryOffset=0, entryLengthOffset=0; 774 775 uint16_t cpu, bits; 776 UBool makeBigEndian; 777 778 /* platform-specific variables and initialization code */ 779 #ifdef U_ELF 780 /* 32-bit Elf file header */ 781 static Elf32_Ehdr header32={ 782 { 783 /* e_ident[] */ 784 ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3, 785 ELFCLASS32, 786 U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB, 787 EV_CURRENT /* EI_VERSION */ 788 }, 789 ET_REL, 790 EM_386, 791 EV_CURRENT, /* e_version */ 792 0, /* e_entry */ 793 0, /* e_phoff */ 794 (Elf32_Off)sizeof(Elf32_Ehdr), /* e_shoff */ 795 0, /* e_flags */ 796 (Elf32_Half)sizeof(Elf32_Ehdr), /* eh_size */ 797 0, /* e_phentsize */ 798 0, /* e_phnum */ 799 (Elf32_Half)sizeof(Elf32_Shdr), /* e_shentsize */ 800 5, /* e_shnum */ 801 2 /* e_shstrndx */ 802 }; 803 804 /* 32-bit Elf section header table */ 805 static Elf32_Shdr sectionHeaders32[5]={ 806 { /* SHN_UNDEF */ 807 0 808 }, 809 { /* .symtab */ 810 1, /* sh_name */ 811 SHT_SYMTAB, 812 0, /* sh_flags */ 813 0, /* sh_addr */ 814 (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)), /* sh_offset */ 815 (Elf32_Word)(2*sizeof(Elf32_Sym)), /* sh_size */ 816 3, /* sh_link=sect hdr index of .strtab */ 817 1, /* sh_info=One greater than the symbol table index of the last 818 * local symbol (with STB_LOCAL). */ 819 4, /* sh_addralign */ 820 (Elf32_Word)(sizeof(Elf32_Sym)) /* sh_entsize */ 821 }, 822 { /* .shstrtab */ 823 9, /* sh_name */ 824 SHT_STRTAB, 825 0, /* sh_flags */ 826 0, /* sh_addr */ 827 (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)), /* sh_offset */ 828 40, /* sh_size */ 829 0, /* sh_link */ 830 0, /* sh_info */ 831 1, /* sh_addralign */ 832 0 /* sh_entsize */ 833 }, 834 { /* .strtab */ 835 19, /* sh_name */ 836 SHT_STRTAB, 837 0, /* sh_flags */ 838 0, /* sh_addr */ 839 (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)+40), /* sh_offset */ 840 (Elf32_Word)sizeof(entry), /* sh_size */ 841 0, /* sh_link */ 842 0, /* sh_info */ 843 1, /* sh_addralign */ 844 0 /* sh_entsize */ 845 }, 846 { /* .rodata */ 847 27, /* sh_name */ 848 SHT_PROGBITS, 849 SHF_ALLOC, /* sh_flags */ 850 0, /* sh_addr */ 851 (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)+40+sizeof(entry)), /* sh_offset */ 852 0, /* sh_size */ 853 0, /* sh_link */ 854 0, /* sh_info */ 855 16, /* sh_addralign */ 856 0 /* sh_entsize */ 857 } 858 }; 859 860 /* symbol table */ 861 static Elf32_Sym symbols32[2]={ 862 { /* STN_UNDEF */ 863 0 864 }, 865 { /* data entry point */ 866 1, /* st_name */ 867 0, /* st_value */ 868 0, /* st_size */ 869 ELF64_ST_INFO(STB_GLOBAL, STT_OBJECT), 870 0, /* st_other */ 871 4 /* st_shndx=index of related section table entry */ 872 } 873 }; 874 875 /* section header string table, with decimal string offsets */ 876 static const char sectionStrings[40]= 877 /* 0 */ "\0" 878 /* 1 */ ".symtab\0" 879 /* 9 */ ".shstrtab\0" 880 /* 19 */ ".strtab\0" 881 /* 27 */ ".rodata\0" 882 /* 35 */ "\0\0\0\0"; /* contains terminating NUL */ 883 /* 40: padded to multiple of 8 bytes */ 884 885 /* 886 * Use entry[] for the string table which will contain only the 887 * entry point name. 888 * entry[0] must be 0 (NUL) 889 * The entry point name can be up to 38 characters long (sizeof(entry)-2). 890 */ 891 892 /* 16-align .rodata in the .o file, just in case */ 893 static const char padding[16]={ 0 }; 894 int32_t paddingSize; 895 896 #ifdef U_ELF64 897 /* 64-bit Elf file header */ 898 static Elf64_Ehdr header64={ 899 { 900 /* e_ident[] */ 901 ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3, 902 ELFCLASS64, 903 U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB, 904 EV_CURRENT /* EI_VERSION */ 905 }, 906 ET_REL, 907 EM_X86_64, 908 EV_CURRENT, /* e_version */ 909 0, /* e_entry */ 910 0, /* e_phoff */ 911 (Elf64_Off)sizeof(Elf64_Ehdr), /* e_shoff */ 912 0, /* e_flags */ 913 (Elf64_Half)sizeof(Elf64_Ehdr), /* eh_size */ 914 0, /* e_phentsize */ 915 0, /* e_phnum */ 916 (Elf64_Half)sizeof(Elf64_Shdr), /* e_shentsize */ 917 5, /* e_shnum */ 918 2 /* e_shstrndx */ 919 }; 920 921 /* 64-bit Elf section header table */ 922 static Elf64_Shdr sectionHeaders64[5]={ 923 { /* SHN_UNDEF */ 924 0 925 }, 926 { /* .symtab */ 927 1, /* sh_name */ 928 SHT_SYMTAB, 929 0, /* sh_flags */ 930 0, /* sh_addr */ 931 (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)), /* sh_offset */ 932 (Elf64_Xword)(2*sizeof(Elf64_Sym)), /* sh_size */ 933 3, /* sh_link=sect hdr index of .strtab */ 934 1, /* sh_info=One greater than the symbol table index of the last 935 * local symbol (with STB_LOCAL). */ 936 4, /* sh_addralign */ 937 (Elf64_Xword)(sizeof(Elf64_Sym)) /* sh_entsize */ 938 }, 939 { /* .shstrtab */ 940 9, /* sh_name */ 941 SHT_STRTAB, 942 0, /* sh_flags */ 943 0, /* sh_addr */ 944 (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)), /* sh_offset */ 945 40, /* sh_size */ 946 0, /* sh_link */ 947 0, /* sh_info */ 948 1, /* sh_addralign */ 949 0 /* sh_entsize */ 950 }, 951 { /* .strtab */ 952 19, /* sh_name */ 953 SHT_STRTAB, 954 0, /* sh_flags */ 955 0, /* sh_addr */ 956 (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)+40), /* sh_offset */ 957 (Elf64_Xword)sizeof(entry), /* sh_size */ 958 0, /* sh_link */ 959 0, /* sh_info */ 960 1, /* sh_addralign */ 961 0 /* sh_entsize */ 962 }, 963 { /* .rodata */ 964 27, /* sh_name */ 965 SHT_PROGBITS, 966 SHF_ALLOC, /* sh_flags */ 967 0, /* sh_addr */ 968 (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)+40+sizeof(entry)), /* sh_offset */ 969 0, /* sh_size */ 970 0, /* sh_link */ 971 0, /* sh_info */ 972 16, /* sh_addralign */ 973 0 /* sh_entsize */ 974 } 975 }; 976 977 /* 978 * 64-bit symbol table 979 * careful: different order of items compared with Elf32_sym! 980 */ 981 static Elf64_Sym symbols64[2]={ 982 { /* STN_UNDEF */ 983 0 984 }, 985 { /* data entry point */ 986 1, /* st_name */ 987 ELF64_ST_INFO(STB_GLOBAL, STT_OBJECT), 988 0, /* st_other */ 989 4, /* st_shndx=index of related section table entry */ 990 0, /* st_value */ 991 0 /* st_size */ 992 } 993 }; 994 995 #endif /* U_ELF64 */ 996 997 /* entry[] have a leading NUL */ 998 entryOffset=1; 999 1000 /* in the common code, count entryLength from after the NUL */ 1001 entryLengthOffset=1; 1002 1003 newSuffix=".o"; 1004 1005 #elif U_PLATFORM_HAS_WIN32_API 1006 struct { 1007 IMAGE_FILE_HEADER fileHeader; 1008 IMAGE_SECTION_HEADER sections[2]; 1009 char linkerOptions[100]; 1010 } objHeader; 1011 IMAGE_SYMBOL symbols[1]; 1012 struct { 1013 DWORD sizeofLongNames; 1014 char longNames[100]; 1015 } symbolNames; 1016 1017 /* 1018 * entry sometimes have a leading '_' 1019 * overwritten if entryOffset==0 depending on the target platform 1020 * see check for cpu below 1021 */ 1022 entry[0]='_'; 1023 1024 newSuffix=".obj"; 1025 #else 1026 # error "Unknown platform for CAN_GENERATE_OBJECTS." 1027 #endif 1028 1029 /* deal with options, files and the entry point name */ 1030 getArchitecture(&cpu, &bits, &makeBigEndian, optMatchArch); 1031 printf("genccode: --match-arch cpu=%hu bits=%hu big-endian=%d\n", cpu, bits, makeBigEndian); 1032 #if U_PLATFORM_HAS_WIN32_API 1033 if(cpu==IMAGE_FILE_MACHINE_I386) { 1034 entryOffset=1; 1035 } 1036 #endif 1037 1038 in=T_FileStream_open(filename, "rb"); 1039 if(in==NULL) { 1040 fprintf(stderr, "genccode: unable to open input file %s\n", filename); 1041 exit(U_FILE_ACCESS_ERROR); 1042 } 1043 size=T_FileStream_size(in); 1044 1045 getOutFilename(filename, destdir, buffer, entry+entryOffset, newSuffix, optFilename); 1046 if (outFilePath != NULL) { 1047 uprv_strcpy(outFilePath, buffer); 1048 } 1049 1050 if(optEntryPoint != NULL) { 1051 uprv_strcpy(entry+entryOffset, optEntryPoint); 1052 uprv_strcat(entry+entryOffset, "_dat"); 1053 } 1054 /* turn dashes in the entry name into underscores */ 1055 entryLength=(int32_t)uprv_strlen(entry+entryLengthOffset); 1056 for(i=0; i<entryLength; ++i) { 1057 if(entry[entryLengthOffset+i]=='-') { 1058 entry[entryLengthOffset+i]='_'; 1059 } 1060 } 1061 1062 /* open the output file */ 1063 out=T_FileStream_open(buffer, "wb"); 1064 if(out==NULL) { 1065 fprintf(stderr, "genccode: unable to open output file %s\n", buffer); 1066 exit(U_FILE_ACCESS_ERROR); 1067 } 1068 1069 #ifdef U_ELF 1070 if(bits==32) { 1071 header32.e_ident[EI_DATA]= makeBigEndian ? ELFDATA2MSB : ELFDATA2LSB; 1072 header32.e_machine=cpu; 1073 1074 /* 16-align .rodata in the .o file, just in case */ 1075 paddingSize=sectionHeaders32[4].sh_offset & 0xf; 1076 if(paddingSize!=0) { 1077 paddingSize=0x10-paddingSize; 1078 sectionHeaders32[4].sh_offset+=paddingSize; 1079 } 1080 1081 sectionHeaders32[4].sh_size=(Elf32_Word)size; 1082 1083 symbols32[1].st_size=(Elf32_Word)size; 1084 1085 /* write .o headers */ 1086 T_FileStream_write(out, &header32, (int32_t)sizeof(header32)); 1087 T_FileStream_write(out, sectionHeaders32, (int32_t)sizeof(sectionHeaders32)); 1088 T_FileStream_write(out, symbols32, (int32_t)sizeof(symbols32)); 1089 } else /* bits==64 */ { 1090 #ifdef U_ELF64 1091 header64.e_ident[EI_DATA]= makeBigEndian ? ELFDATA2MSB : ELFDATA2LSB; 1092 header64.e_machine=cpu; 1093 1094 /* 16-align .rodata in the .o file, just in case */ 1095 paddingSize=sectionHeaders64[4].sh_offset & 0xf; 1096 if(paddingSize!=0) { 1097 paddingSize=0x10-paddingSize; 1098 sectionHeaders64[4].sh_offset+=paddingSize; 1099 } 1100 1101 sectionHeaders64[4].sh_size=(Elf64_Xword)size; 1102 1103 symbols64[1].st_size=(Elf64_Xword)size; 1104 1105 /* write .o headers */ 1106 T_FileStream_write(out, &header64, (int32_t)sizeof(header64)); 1107 T_FileStream_write(out, sectionHeaders64, (int32_t)sizeof(sectionHeaders64)); 1108 T_FileStream_write(out, symbols64, (int32_t)sizeof(symbols64)); 1109 #endif 1110 } 1111 1112 T_FileStream_write(out, sectionStrings, (int32_t)sizeof(sectionStrings)); 1113 T_FileStream_write(out, entry, (int32_t)sizeof(entry)); 1114 if(paddingSize!=0) { 1115 T_FileStream_write(out, padding, paddingSize); 1116 } 1117 #elif U_PLATFORM_HAS_WIN32_API 1118 /* populate the .obj headers */ 1119 uprv_memset(&objHeader, 0, sizeof(objHeader)); 1120 uprv_memset(&symbols, 0, sizeof(symbols)); 1121 uprv_memset(&symbolNames, 0, sizeof(symbolNames)); 1122 1123 /* write the linker export directive */ 1124 uprv_strcpy(objHeader.linkerOptions, "-export:"); 1125 length=8; 1126 uprv_strcpy(objHeader.linkerOptions+length, entry); 1127 length+=entryLength; 1128 uprv_strcpy(objHeader.linkerOptions+length, ",data "); 1129 length+=6; 1130 1131 /* set the file header */ 1132 objHeader.fileHeader.Machine=cpu; 1133 objHeader.fileHeader.NumberOfSections=2; 1134 objHeader.fileHeader.TimeDateStamp=(DWORD)time(NULL); 1135 objHeader.fileHeader.PointerToSymbolTable=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER+length+size; /* start of symbol table */ 1136 objHeader.fileHeader.NumberOfSymbols=1; 1137 1138 /* set the section for the linker options */ 1139 uprv_strncpy((char *)objHeader.sections[0].Name, ".drectve", 8); 1140 objHeader.sections[0].SizeOfRawData=length; 1141 objHeader.sections[0].PointerToRawData=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER; 1142 objHeader.sections[0].Characteristics=IMAGE_SCN_LNK_INFO|IMAGE_SCN_LNK_REMOVE|IMAGE_SCN_ALIGN_1BYTES; 1143 1144 /* set the data section */ 1145 uprv_strncpy((char *)objHeader.sections[1].Name, ".rdata", 6); 1146 objHeader.sections[1].SizeOfRawData=size; 1147 objHeader.sections[1].PointerToRawData=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER+length; 1148 objHeader.sections[1].Characteristics=IMAGE_SCN_CNT_INITIALIZED_DATA|IMAGE_SCN_ALIGN_16BYTES|IMAGE_SCN_MEM_READ; 1149 1150 /* set the symbol table */ 1151 if(entryLength<=8) { 1152 uprv_strncpy((char *)symbols[0].N.ShortName, entry, entryLength); 1153 symbolNames.sizeofLongNames=4; 1154 } else { 1155 symbols[0].N.Name.Short=0; 1156 symbols[0].N.Name.Long=4; 1157 symbolNames.sizeofLongNames=4+entryLength+1; 1158 uprv_strcpy(symbolNames.longNames, entry); 1159 } 1160 symbols[0].SectionNumber=2; 1161 symbols[0].StorageClass=IMAGE_SYM_CLASS_EXTERNAL; 1162 1163 /* write the file header and the linker options section */ 1164 T_FileStream_write(out, &objHeader, objHeader.sections[1].PointerToRawData); 1165 #else 1166 # error "Unknown platform for CAN_GENERATE_OBJECTS." 1167 #endif 1168 1169 /* copy the data file into section 2 */ 1170 for(;;) { 1171 length=T_FileStream_read(in, buffer, sizeof(buffer)); 1172 if(length==0) { 1173 break; 1174 } 1175 T_FileStream_write(out, buffer, (int32_t)length); 1176 } 1177 1178 #if U_PLATFORM_HAS_WIN32_API 1179 /* write the symbol table */ 1180 T_FileStream_write(out, symbols, IMAGE_SIZEOF_SYMBOL); 1181 T_FileStream_write(out, &symbolNames, symbolNames.sizeofLongNames); 1182 #endif 1183 1184 if(T_FileStream_error(in)) { 1185 fprintf(stderr, "genccode: file read error while generating from file %s\n", filename); 1186 exit(U_FILE_ACCESS_ERROR); 1187 } 1188 1189 if(T_FileStream_error(out)) { 1190 fprintf(stderr, "genccode: file write error while generating from file %s\n", filename); 1191 exit(U_FILE_ACCESS_ERROR); 1192 } 1193 1194 T_FileStream_close(out); 1195 T_FileStream_close(in); 1196 } 1197 #endif 1198