1 /****************************************************************************** 2 * Copyright (C) 2009-2010, International Business Machines 3 * Corporation and others. All Rights Reserved. 4 ******************************************************************************* 5 */ 6 #include "unicode/utypes.h" 7 8 #ifdef U_WINDOWS 9 # define VC_EXTRALEAN 10 # define WIN32_LEAN_AND_MEAN 11 # define NOUSER 12 # define NOSERVICE 13 # define NOIME 14 # define NOMCX 15 #include <windows.h> 16 #include <time.h> 17 # ifdef __GNUC__ 18 # define WINDOWS_WITH_GNUC 19 # endif 20 #endif 21 22 #ifdef U_LINUX 23 # define U_ELF 24 #endif 25 26 #ifdef U_ELF 27 # include <elf.h> 28 # if defined(ELFCLASS64) 29 # define U_ELF64 30 # endif 31 /* Old elf.h headers may not have EM_X86_64, or have EM_X8664 instead. */ 32 # ifndef EM_X86_64 33 # define EM_X86_64 62 34 # endif 35 # define ICU_ENTRY_OFFSET 0 36 #endif 37 38 #include <stdio.h> 39 #include <stdlib.h> 40 #include "unicode/putil.h" 41 #include "cmemory.h" 42 #include "cstring.h" 43 #include "filestrm.h" 44 #include "toolutil.h" 45 #include "unicode/uclean.h" 46 #include "uoptions.h" 47 #include "pkg_genc.h" 48 49 #define MAX_COLUMN ((uint32_t)(0xFFFFFFFFU)) 50 51 #define HEX_0X 0 /* 0x1234 */ 52 #define HEX_0H 1 /* 01234h */ 53 54 #if defined(U_WINDOWS) || defined(U_ELF) 55 #define CAN_GENERATE_OBJECTS 56 #endif 57 58 /* prototypes --------------------------------------------------------------- */ 59 static void 60 getOutFilename(const char *inFilename, const char *destdir, char *outFilename, char *entryName, const char *newSuffix, const char *optFilename); 61 62 static uint32_t 63 write8(FileStream *out, uint8_t byte, uint32_t column); 64 65 static uint32_t 66 write32(FileStream *out, uint32_t byte, uint32_t column); 67 68 #ifdef OS400 69 static uint32_t 70 write8str(FileStream *out, uint8_t byte, uint32_t column); 71 #endif 72 /* -------------------------------------------------------------------------- */ 73 74 /* 75 Creating Template Files for New Platforms 76 77 Let the cc compiler help you get started. 78 Compile this program 79 const unsigned int x[5] = {1, 2, 0xdeadbeef, 0xffffffff, 16}; 80 with the -S option to produce assembly output. 81 82 For example, this will generate array.s: 83 gcc -S array.c 84 85 This will produce a .s file that may look like this: 86 87 .file "array.c" 88 .version "01.01" 89 gcc2_compiled.: 90 .globl x 91 .section .rodata 92 .align 4 93 .type x,@object 94 .size x,20 95 x: 96 .long 1 97 .long 2 98 .long -559038737 99 .long -1 100 .long 16 101 .ident "GCC: (GNU) 2.96 20000731 (Red Hat Linux 7.1 2.96-85)" 102 103 which gives a starting point that will compile, and can be transformed 104 to become the template, generally with some consulting of as docs and 105 some experimentation. 106 107 If you want ICU to automatically use this assembly, you should 108 specify "GENCCODE_ASSEMBLY=-a name" in the specific config/mh-* file, 109 where the name is the compiler or platform that you used in this 110 assemblyHeader data structure. 111 */ 112 static const struct AssemblyType { 113 const char *name; 114 const char *header; 115 const char *beginLine; 116 const char *footer; 117 int8_t hexType; /* HEX_0X or HEX_0h */ 118 } assemblyHeader[] = { 119 {"gcc", 120 ".globl %s\n" 121 "\t.section .note.GNU-stack,\"\",%%progbits\n" 122 "\t.section .rodata\n" 123 "\t.align 8\n" /* Either align 8 bytes or 2^8 (256) bytes. 8 bytes is needed. */ 124 "\t.type %s,%%object\n" 125 "%s:\n\n", 126 127 ".long ","",HEX_0X 128 }, 129 {"gcc-darwin", 130 /*"\t.section __TEXT,__text,regular,pure_instructions\n" 131 "\t.section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32\n"*/ 132 ".globl _%s\n" 133 "\t.data\n" 134 "\t.const\n" 135 "\t.align 4\n" /* 1<<4 = 16 */ 136 "_%s:\n\n", 137 138 ".long ","",HEX_0X 139 }, 140 {"gcc-cygwin", 141 ".globl _%s\n" 142 "\t.section .rodata\n" 143 "\t.align 8\n" /* Either align 8 bytes or 2^8 (256) bytes. 8 bytes is needed. */ 144 "_%s:\n\n", 145 146 ".long ","",HEX_0X 147 }, 148 {"sun", 149 "\t.section \".rodata\"\n" 150 "\t.align 8\n" 151 ".globl %s\n" 152 "%s:\n", 153 154 ".word ","",HEX_0X 155 }, 156 {"sun-x86", 157 "Drodata.rodata:\n" 158 "\t.type Drodata.rodata,@object\n" 159 "\t.size Drodata.rodata,0\n" 160 "\t.globl %s\n" 161 "\t.align 8\n" 162 "%s:\n", 163 164 ".4byte ","",HEX_0X 165 }, 166 {"xlc", 167 ".globl %s{RO}\n" 168 "\t.toc\n" 169 "%s:\n" 170 "\t.csect %s{RO}, 4\n", 171 172 ".long ","",HEX_0X 173 }, 174 {"aCC-ia64", 175 "\t.file \"%s.s\"\n" 176 "\t.type %s,@object\n" 177 "\t.global %s\n" 178 "\t.secalias .abe$0.rodata, \".rodata\"\n" 179 "\t.section .abe$0.rodata = \"a\", \"progbits\"\n" 180 "\t.align 16\n" 181 "%s::\t", 182 183 "data4 ","",HEX_0X 184 }, 185 {"aCC-parisc", 186 "\t.SPACE $TEXT$\n" 187 "\t.SUBSPA $LIT$\n" 188 "%s\n" 189 "\t.EXPORT %s\n" 190 "\t.ALIGN 16\n", 191 192 ".WORD ","",HEX_0X 193 }, 194 { "masm", 195 "\tTITLE %s\n" 196 "; generated by genccode\n" 197 ".386\n" 198 ".model flat\n" 199 "\tPUBLIC _%s\n" 200 "ICUDATA_%s\tSEGMENT READONLY PARA PUBLIC FLAT 'DATA'\n" 201 "\tALIGN 16\n" 202 "_%s\tLABEL DWORD\n", 203 "\tDWORD ","\nICUDATA_%s\tENDS\n\tEND\n",HEX_0H 204 } 205 }; 206 207 static int32_t assemblyHeaderIndex = -1; 208 static int32_t hexType = HEX_0X; 209 210 U_CAPI UBool U_EXPORT2 211 checkAssemblyHeaderName(const char* optAssembly) { 212 int32_t idx; 213 assemblyHeaderIndex = -1; 214 for (idx = 0; idx < (int32_t)(sizeof(assemblyHeader)/sizeof(assemblyHeader[0])); idx++) { 215 if (uprv_strcmp(optAssembly, assemblyHeader[idx].name) == 0) { 216 assemblyHeaderIndex = idx; 217 hexType = assemblyHeader[idx].hexType; /* set the hex type */ 218 return TRUE; 219 } 220 } 221 222 return FALSE; 223 } 224 225 226 U_CAPI void U_EXPORT2 227 printAssemblyHeadersToStdErr(void) { 228 int32_t idx; 229 fprintf(stderr, "%s", assemblyHeader[0].name); 230 for (idx = 1; idx < (int32_t)(sizeof(assemblyHeader)/sizeof(assemblyHeader[0])); idx++) { 231 fprintf(stderr, ", %s", assemblyHeader[idx].name); 232 } 233 fprintf(stderr, 234 ")\n"); 235 } 236 237 U_CAPI void U_EXPORT2 238 writeAssemblyCode(const char *filename, const char *destdir, const char *optEntryPoint, const char *optFilename, char *outFilePath) { 239 uint32_t column = MAX_COLUMN; 240 char entry[64]; 241 uint32_t buffer[1024]; 242 char *bufferStr = (char *)buffer; 243 FileStream *in, *out; 244 size_t i, length; 245 246 in=T_FileStream_open(filename, "rb"); 247 if(in==NULL) { 248 fprintf(stderr, "genccode: unable to open input file %s\n", filename); 249 exit(U_FILE_ACCESS_ERROR); 250 } 251 252 getOutFilename(filename, destdir, bufferStr, entry, ".s", optFilename); 253 out=T_FileStream_open(bufferStr, "w"); 254 if(out==NULL) { 255 fprintf(stderr, "genccode: unable to open output file %s\n", bufferStr); 256 exit(U_FILE_ACCESS_ERROR); 257 } 258 259 if (outFilePath != NULL) { 260 uprv_strcpy(outFilePath, bufferStr); 261 } 262 263 #ifdef WINDOWS_WITH_GNUC 264 /* Need to fix the file seperator character when using MinGW. */ 265 swapFileSepChar(outFilePath, U_FILE_SEP_CHAR, '/'); 266 #endif 267 268 if(optEntryPoint != NULL) { 269 uprv_strcpy(entry, optEntryPoint); 270 uprv_strcat(entry, "_dat"); 271 } 272 273 /* turn dashes or dots in the entry name into underscores */ 274 length=uprv_strlen(entry); 275 for(i=0; i<length; ++i) { 276 if(entry[i]=='-' || entry[i]=='.') { 277 entry[i]='_'; 278 } 279 } 280 281 sprintf(bufferStr, assemblyHeader[assemblyHeaderIndex].header, 282 entry, entry, entry, entry, 283 entry, entry, entry, entry); 284 T_FileStream_writeLine(out, bufferStr); 285 T_FileStream_writeLine(out, assemblyHeader[assemblyHeaderIndex].beginLine); 286 287 for(;;) { 288 length=T_FileStream_read(in, buffer, sizeof(buffer)); 289 if(length==0) { 290 break; 291 } 292 if (length != sizeof(buffer)) { 293 /* pad with extra 0's when at the end of the file */ 294 for(i=0; i < (length % sizeof(uint32_t)); ++i) { 295 buffer[length+i] = 0; 296 } 297 } 298 for(i=0; i<(length/sizeof(buffer[0])); i++) { 299 column = write32(out, buffer[i], column); 300 } 301 } 302 303 T_FileStream_writeLine(out, "\n"); 304 305 sprintf(bufferStr, assemblyHeader[assemblyHeaderIndex].footer, 306 entry, entry, entry, entry, 307 entry, entry, entry, entry); 308 T_FileStream_writeLine(out, bufferStr); 309 310 if(T_FileStream_error(in)) { 311 fprintf(stderr, "genccode: file read error while generating from file %s\n", filename); 312 exit(U_FILE_ACCESS_ERROR); 313 } 314 315 if(T_FileStream_error(out)) { 316 fprintf(stderr, "genccode: file write error while generating from file %s\n", filename); 317 exit(U_FILE_ACCESS_ERROR); 318 } 319 320 T_FileStream_close(out); 321 T_FileStream_close(in); 322 } 323 324 U_CAPI void U_EXPORT2 325 writeCCode(const char *filename, const char *destdir, const char *optName, const char *optFilename, char *outFilePath) { 326 uint32_t column = MAX_COLUMN; 327 char buffer[4096], entry[64]; 328 FileStream *in, *out; 329 size_t i, length; 330 331 in=T_FileStream_open(filename, "rb"); 332 if(in==NULL) { 333 fprintf(stderr, "genccode: unable to open input file %s\n", filename); 334 exit(U_FILE_ACCESS_ERROR); 335 } 336 337 if(optName != NULL) { /* prepend 'icudt28_' */ 338 strcpy(entry, optName); 339 strcat(entry, "_"); 340 } else { 341 entry[0] = 0; 342 } 343 344 getOutFilename(filename, destdir, buffer, entry+uprv_strlen(entry), ".c", optFilename); 345 if (outFilePath != NULL) { 346 uprv_strcpy(outFilePath, buffer); 347 } 348 out=T_FileStream_open(buffer, "w"); 349 if(out==NULL) { 350 fprintf(stderr, "genccode: unable to open output file %s\n", buffer); 351 exit(U_FILE_ACCESS_ERROR); 352 } 353 354 /* turn dashes or dots in the entry name into underscores */ 355 length=uprv_strlen(entry); 356 for(i=0; i<length; ++i) { 357 if(entry[i]=='-' || entry[i]=='.') { 358 entry[i]='_'; 359 } 360 } 361 362 #ifdef OS400 363 /* 364 TODO: Fix this once the compiler implements this feature. Keep in sync with udatamem.c 365 366 This is here because this platform can't currently put 367 const data into the read-only pages of an object or 368 shared library (service program). Only strings are allowed in read-only 369 pages, so we use char * strings to store the data. 370 371 In order to prevent the beginning of the data from ever matching the 372 magic numbers we must still use the initial double. 373 [grhoten 4/24/2003] 374 */ 375 sprintf(buffer, 376 "#define U_DISABLE_RENAMING 1\n" 377 "#include \"unicode/umachine.h\"\n" 378 "U_CDECL_BEGIN\n" 379 "const struct {\n" 380 " double bogus;\n" 381 " const char *bytes; \n" 382 "} %s={ 0.0, \n", 383 entry); 384 T_FileStream_writeLine(out, buffer); 385 386 for(;;) { 387 length=T_FileStream_read(in, buffer, sizeof(buffer)); 388 if(length==0) { 389 break; 390 } 391 for(i=0; i<length; ++i) { 392 column = write8str(out, (uint8_t)buffer[i], column); 393 } 394 } 395 396 T_FileStream_writeLine(out, "\"\n};\nU_CDECL_END\n"); 397 #else 398 /* Function renaming shouldn't be done in data */ 399 sprintf(buffer, 400 "#define U_DISABLE_RENAMING 1\n" 401 "#include \"unicode/umachine.h\"\n" 402 "U_CDECL_BEGIN\n" 403 "const struct {\n" 404 " double bogus;\n" 405 " uint8_t bytes[%ld]; \n" 406 "} %s={ 0.0, {\n", 407 (long)T_FileStream_size(in), entry); 408 T_FileStream_writeLine(out, buffer); 409 410 for(;;) { 411 length=T_FileStream_read(in, buffer, sizeof(buffer)); 412 if(length==0) { 413 break; 414 } 415 for(i=0; i<length; ++i) { 416 column = write8(out, (uint8_t)buffer[i], column); 417 } 418 } 419 420 T_FileStream_writeLine(out, "\n}\n};\nU_CDECL_END\n"); 421 #endif 422 423 if(T_FileStream_error(in)) { 424 fprintf(stderr, "genccode: file read error while generating from file %s\n", filename); 425 exit(U_FILE_ACCESS_ERROR); 426 } 427 428 if(T_FileStream_error(out)) { 429 fprintf(stderr, "genccode: file write error while generating from file %s\n", filename); 430 exit(U_FILE_ACCESS_ERROR); 431 } 432 433 T_FileStream_close(out); 434 T_FileStream_close(in); 435 } 436 437 static uint32_t 438 write32(FileStream *out, uint32_t bitField, uint32_t column) { 439 int32_t i; 440 char bitFieldStr[64]; /* This is more bits than needed for a 32-bit number */ 441 char *s = bitFieldStr; 442 uint8_t *ptrIdx = (uint8_t *)&bitField; 443 static const char hexToStr[16] = { 444 '0','1','2','3', 445 '4','5','6','7', 446 '8','9','A','B', 447 'C','D','E','F' 448 }; 449 450 /* write the value, possibly with comma and newline */ 451 if(column==MAX_COLUMN) { 452 /* first byte */ 453 column=1; 454 } else if(column<32) { 455 *(s++)=','; 456 ++column; 457 } else { 458 *(s++)='\n'; 459 uprv_strcpy(s, assemblyHeader[assemblyHeaderIndex].beginLine); 460 s+=uprv_strlen(s); 461 column=1; 462 } 463 464 if (bitField < 10) { 465 /* It's a small number. Don't waste the space for 0x */ 466 *(s++)=hexToStr[bitField]; 467 } 468 else { 469 int seenNonZero = 0; /* This is used to remove leading zeros */ 470 471 if(hexType==HEX_0X) { 472 *(s++)='0'; 473 *(s++)='x'; 474 } else if(hexType==HEX_0H) { 475 *(s++)='0'; 476 } 477 478 /* This creates a 32-bit field */ 479 #if U_IS_BIG_ENDIAN 480 for (i = 0; i < sizeof(uint32_t); i++) 481 #else 482 for (i = sizeof(uint32_t)-1; i >= 0 ; i--) 483 #endif 484 { 485 uint8_t value = ptrIdx[i]; 486 if (value || seenNonZero) { 487 *(s++)=hexToStr[value>>4]; 488 *(s++)=hexToStr[value&0xF]; 489 seenNonZero = 1; 490 } 491 } 492 if(hexType==HEX_0H) { 493 *(s++)='h'; 494 } 495 } 496 497 *(s++)=0; 498 T_FileStream_writeLine(out, bitFieldStr); 499 return column; 500 } 501 502 static uint32_t 503 write8(FileStream *out, uint8_t byte, uint32_t column) { 504 char s[4]; 505 int i=0; 506 507 /* convert the byte value to a string */ 508 if(byte>=100) { 509 s[i++]=(char)('0'+byte/100); 510 byte%=100; 511 } 512 if(i>0 || byte>=10) { 513 s[i++]=(char)('0'+byte/10); 514 byte%=10; 515 } 516 s[i++]=(char)('0'+byte); 517 s[i]=0; 518 519 /* write the value, possibly with comma and newline */ 520 if(column==MAX_COLUMN) { 521 /* first byte */ 522 column=1; 523 } else if(column<16) { 524 T_FileStream_writeLine(out, ","); 525 ++column; 526 } else { 527 T_FileStream_writeLine(out, ",\n"); 528 column=1; 529 } 530 T_FileStream_writeLine(out, s); 531 return column; 532 } 533 534 #ifdef OS400 535 static uint32_t 536 write8str(FileStream *out, uint8_t byte, uint32_t column) { 537 char s[8]; 538 539 if (byte > 7) 540 sprintf(s, "\\x%X", byte); 541 else 542 sprintf(s, "\\%X", byte); 543 544 /* write the value, possibly with comma and newline */ 545 if(column==MAX_COLUMN) { 546 /* first byte */ 547 column=1; 548 T_FileStream_writeLine(out, "\""); 549 } else if(column<24) { 550 ++column; 551 } else { 552 T_FileStream_writeLine(out, "\"\n\""); 553 column=1; 554 } 555 T_FileStream_writeLine(out, s); 556 return column; 557 } 558 #endif 559 560 static void 561 getOutFilename(const char *inFilename, const char *destdir, char *outFilename, char *entryName, const char *newSuffix, const char *optFilename) { 562 const char *basename=findBasename(inFilename), *suffix=uprv_strrchr(basename, '.'); 563 564 /* copy path */ 565 if(destdir!=NULL && *destdir!=0) { 566 do { 567 *outFilename++=*destdir++; 568 } while(*destdir!=0); 569 if(*(outFilename-1)!=U_FILE_SEP_CHAR) { 570 *outFilename++=U_FILE_SEP_CHAR; 571 } 572 inFilename=basename; 573 } else { 574 while(inFilename<basename) { 575 *outFilename++=*inFilename++; 576 } 577 } 578 579 if(suffix==NULL) { 580 /* the filename does not have a suffix */ 581 uprv_strcpy(entryName, inFilename); 582 if(optFilename != NULL) { 583 uprv_strcpy(outFilename, optFilename); 584 } else { 585 uprv_strcpy(outFilename, inFilename); 586 } 587 uprv_strcat(outFilename, newSuffix); 588 } else { 589 char *saveOutFilename = outFilename; 590 /* copy basename */ 591 while(inFilename<suffix) { 592 if(*inFilename=='-') { 593 /* iSeries cannot have '-' in the .o objects. */ 594 *outFilename++=*entryName++='_'; 595 inFilename++; 596 } 597 else { 598 *outFilename++=*entryName++=*inFilename++; 599 } 600 } 601 602 /* replace '.' by '_' */ 603 *outFilename++=*entryName++='_'; 604 ++inFilename; 605 606 /* copy suffix */ 607 while(*inFilename!=0) { 608 *outFilename++=*entryName++=*inFilename++; 609 } 610 611 *entryName=0; 612 613 if(optFilename != NULL) { 614 uprv_strcpy(saveOutFilename, optFilename); 615 uprv_strcat(saveOutFilename, newSuffix); 616 } else { 617 /* add ".c" */ 618 uprv_strcpy(outFilename, newSuffix); 619 } 620 } 621 } 622 623 #ifdef CAN_GENERATE_OBJECTS 624 static void 625 getArchitecture(uint16_t *pCPU, uint16_t *pBits, UBool *pIsBigEndian, const char *optMatchArch) { 626 union { 627 char bytes[2048]; 628 #ifdef U_ELF 629 Elf32_Ehdr header32; 630 /* Elf32_Ehdr and ELF64_Ehdr are identical for the necessary fields. */ 631 #elif defined(U_WINDOWS) 632 IMAGE_FILE_HEADER header; 633 #endif 634 } buffer; 635 636 const char *filename; 637 FileStream *in; 638 int32_t length; 639 640 #ifdef U_ELF 641 642 #elif defined(U_WINDOWS) 643 const IMAGE_FILE_HEADER *pHeader; 644 #else 645 # error "Unknown platform for CAN_GENERATE_OBJECTS." 646 #endif 647 648 if(optMatchArch != NULL) { 649 filename=optMatchArch; 650 } else { 651 /* set defaults */ 652 #ifdef U_ELF 653 /* set EM_386 because elf.h does not provide better defaults */ 654 *pCPU=EM_386; 655 *pBits=32; 656 *pIsBigEndian=(UBool)(U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB); 657 #elif defined(U_WINDOWS) 658 /* _M_IA64 should be defined in windows.h */ 659 # if defined(_M_IA64) 660 *pCPU=IMAGE_FILE_MACHINE_IA64; 661 # elif defined(_M_AMD64) 662 *pCPU=IMAGE_FILE_MACHINE_AMD64; 663 # else 664 *pCPU=IMAGE_FILE_MACHINE_I386; 665 # endif 666 *pBits= *pCPU==IMAGE_FILE_MACHINE_I386 ? 32 : 64; 667 *pIsBigEndian=FALSE; 668 #else 669 # error "Unknown platform for CAN_GENERATE_OBJECTS." 670 #endif 671 return; 672 } 673 674 in=T_FileStream_open(filename, "rb"); 675 if(in==NULL) { 676 fprintf(stderr, "genccode: unable to open match-arch file %s\n", filename); 677 exit(U_FILE_ACCESS_ERROR); 678 } 679 length=T_FileStream_read(in, buffer.bytes, sizeof(buffer.bytes)); 680 681 #ifdef U_ELF 682 if(length<sizeof(Elf32_Ehdr)) { 683 fprintf(stderr, "genccode: match-arch file %s is too short\n", filename); 684 exit(U_UNSUPPORTED_ERROR); 685 } 686 if( 687 buffer.header32.e_ident[0]!=ELFMAG0 || 688 buffer.header32.e_ident[1]!=ELFMAG1 || 689 buffer.header32.e_ident[2]!=ELFMAG2 || 690 buffer.header32.e_ident[3]!=ELFMAG3 || 691 buffer.header32.e_ident[EI_CLASS]<ELFCLASS32 || buffer.header32.e_ident[EI_CLASS]>ELFCLASS64 692 ) { 693 fprintf(stderr, "genccode: match-arch file %s is not an ELF object file, or not supported\n", filename); 694 exit(U_UNSUPPORTED_ERROR); 695 } 696 697 *pBits= buffer.header32.e_ident[EI_CLASS]==ELFCLASS32 ? 32 : 64; /* only 32 or 64: see check above */ 698 #ifdef U_ELF64 699 if(*pBits!=32 && *pBits!=64) { 700 fprintf(stderr, "genccode: currently only supports 32-bit and 64-bit ELF format\n"); 701 exit(U_UNSUPPORTED_ERROR); 702 } 703 #else 704 if(*pBits!=32) { 705 fprintf(stderr, "genccode: built with elf.h missing 64-bit definitions\n"); 706 exit(U_UNSUPPORTED_ERROR); 707 } 708 #endif 709 710 *pIsBigEndian=(UBool)(buffer.header32.e_ident[EI_DATA]==ELFDATA2MSB); 711 if(*pIsBigEndian!=U_IS_BIG_ENDIAN) { 712 fprintf(stderr, "genccode: currently only same-endianness ELF formats are supported\n"); 713 exit(U_UNSUPPORTED_ERROR); 714 } 715 /* TODO: Support byte swapping */ 716 717 *pCPU=buffer.header32.e_machine; 718 #elif defined(U_WINDOWS) 719 if(length<sizeof(IMAGE_FILE_HEADER)) { 720 fprintf(stderr, "genccode: match-arch file %s is too short\n", filename); 721 exit(U_UNSUPPORTED_ERROR); 722 } 723 /* TODO: Use buffer.header. Keep aliasing legal. */ 724 pHeader=(const IMAGE_FILE_HEADER *)buffer.bytes; 725 *pCPU=pHeader->Machine; 726 /* 727 * The number of bits is implicit with the Machine value. 728 * *pBits is ignored in the calling code, so this need not be precise. 729 */ 730 *pBits= *pCPU==IMAGE_FILE_MACHINE_I386 ? 32 : 64; 731 /* Windows always runs on little-endian CPUs. */ 732 *pIsBigEndian=FALSE; 733 #else 734 # error "Unknown platform for CAN_GENERATE_OBJECTS." 735 #endif 736 737 T_FileStream_close(in); 738 } 739 740 U_CAPI void U_EXPORT2 741 writeObjectCode(const char *filename, const char *destdir, const char *optEntryPoint, const char *optMatchArch, const char *optFilename, char *outFilePath) { 742 /* common variables */ 743 char buffer[4096], entry[40]={ 0 }; 744 FileStream *in, *out; 745 const char *newSuffix; 746 int32_t i, entryLength, length, size, entryOffset=0, entryLengthOffset=0; 747 748 uint16_t cpu, bits; 749 UBool makeBigEndian; 750 751 /* platform-specific variables and initialization code */ 752 #ifdef U_ELF 753 /* 32-bit Elf file header */ 754 static Elf32_Ehdr header32={ 755 { 756 /* e_ident[] */ 757 ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3, 758 ELFCLASS32, 759 U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB, 760 EV_CURRENT /* EI_VERSION */ 761 }, 762 ET_REL, 763 EM_386, 764 EV_CURRENT, /* e_version */ 765 0, /* e_entry */ 766 0, /* e_phoff */ 767 (Elf32_Off)sizeof(Elf32_Ehdr), /* e_shoff */ 768 0, /* e_flags */ 769 (Elf32_Half)sizeof(Elf32_Ehdr), /* eh_size */ 770 0, /* e_phentsize */ 771 0, /* e_phnum */ 772 (Elf32_Half)sizeof(Elf32_Shdr), /* e_shentsize */ 773 5, /* e_shnum */ 774 2 /* e_shstrndx */ 775 }; 776 777 /* 32-bit Elf section header table */ 778 static Elf32_Shdr sectionHeaders32[5]={ 779 { /* SHN_UNDEF */ 780 0 781 }, 782 { /* .symtab */ 783 1, /* sh_name */ 784 SHT_SYMTAB, 785 0, /* sh_flags */ 786 0, /* sh_addr */ 787 (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)), /* sh_offset */ 788 (Elf32_Word)(2*sizeof(Elf32_Sym)), /* sh_size */ 789 3, /* sh_link=sect hdr index of .strtab */ 790 1, /* sh_info=One greater than the symbol table index of the last 791 * local symbol (with STB_LOCAL). */ 792 4, /* sh_addralign */ 793 (Elf32_Word)(sizeof(Elf32_Sym)) /* sh_entsize */ 794 }, 795 { /* .shstrtab */ 796 9, /* sh_name */ 797 SHT_STRTAB, 798 0, /* sh_flags */ 799 0, /* sh_addr */ 800 (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)), /* sh_offset */ 801 40, /* sh_size */ 802 0, /* sh_link */ 803 0, /* sh_info */ 804 1, /* sh_addralign */ 805 0 /* sh_entsize */ 806 }, 807 { /* .strtab */ 808 19, /* sh_name */ 809 SHT_STRTAB, 810 0, /* sh_flags */ 811 0, /* sh_addr */ 812 (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)+40), /* sh_offset */ 813 (Elf32_Word)sizeof(entry), /* sh_size */ 814 0, /* sh_link */ 815 0, /* sh_info */ 816 1, /* sh_addralign */ 817 0 /* sh_entsize */ 818 }, 819 { /* .rodata */ 820 27, /* sh_name */ 821 SHT_PROGBITS, 822 SHF_ALLOC, /* sh_flags */ 823 0, /* sh_addr */ 824 (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)+40+sizeof(entry)), /* sh_offset */ 825 0, /* sh_size */ 826 0, /* sh_link */ 827 0, /* sh_info */ 828 16, /* sh_addralign */ 829 0 /* sh_entsize */ 830 } 831 }; 832 833 /* symbol table */ 834 static Elf32_Sym symbols32[2]={ 835 { /* STN_UNDEF */ 836 0 837 }, 838 { /* data entry point */ 839 1, /* st_name */ 840 0, /* st_value */ 841 0, /* st_size */ 842 ELF64_ST_INFO(STB_GLOBAL, STT_OBJECT), 843 0, /* st_other */ 844 4 /* st_shndx=index of related section table entry */ 845 } 846 }; 847 848 /* section header string table, with decimal string offsets */ 849 static const char sectionStrings[40]= 850 /* 0 */ "\0" 851 /* 1 */ ".symtab\0" 852 /* 9 */ ".shstrtab\0" 853 /* 19 */ ".strtab\0" 854 /* 27 */ ".rodata\0" 855 /* 35 */ "\0\0\0\0"; /* contains terminating NUL */ 856 /* 40: padded to multiple of 8 bytes */ 857 858 /* 859 * Use entry[] for the string table which will contain only the 860 * entry point name. 861 * entry[0] must be 0 (NUL) 862 * The entry point name can be up to 38 characters long (sizeof(entry)-2). 863 */ 864 865 /* 16-align .rodata in the .o file, just in case */ 866 static const char padding[16]={ 0 }; 867 int32_t paddingSize; 868 869 #ifdef U_ELF64 870 /* 64-bit Elf file header */ 871 static Elf64_Ehdr header64={ 872 { 873 /* e_ident[] */ 874 ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3, 875 ELFCLASS64, 876 U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB, 877 EV_CURRENT /* EI_VERSION */ 878 }, 879 ET_REL, 880 EM_X86_64, 881 EV_CURRENT, /* e_version */ 882 0, /* e_entry */ 883 0, /* e_phoff */ 884 (Elf64_Off)sizeof(Elf64_Ehdr), /* e_shoff */ 885 0, /* e_flags */ 886 (Elf64_Half)sizeof(Elf64_Ehdr), /* eh_size */ 887 0, /* e_phentsize */ 888 0, /* e_phnum */ 889 (Elf64_Half)sizeof(Elf64_Shdr), /* e_shentsize */ 890 5, /* e_shnum */ 891 2 /* e_shstrndx */ 892 }; 893 894 /* 64-bit Elf section header table */ 895 static Elf64_Shdr sectionHeaders64[5]={ 896 { /* SHN_UNDEF */ 897 0 898 }, 899 { /* .symtab */ 900 1, /* sh_name */ 901 SHT_SYMTAB, 902 0, /* sh_flags */ 903 0, /* sh_addr */ 904 (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)), /* sh_offset */ 905 (Elf64_Xword)(2*sizeof(Elf64_Sym)), /* sh_size */ 906 3, /* sh_link=sect hdr index of .strtab */ 907 1, /* sh_info=One greater than the symbol table index of the last 908 * local symbol (with STB_LOCAL). */ 909 4, /* sh_addralign */ 910 (Elf64_Xword)(sizeof(Elf64_Sym)) /* sh_entsize */ 911 }, 912 { /* .shstrtab */ 913 9, /* sh_name */ 914 SHT_STRTAB, 915 0, /* sh_flags */ 916 0, /* sh_addr */ 917 (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)), /* sh_offset */ 918 40, /* sh_size */ 919 0, /* sh_link */ 920 0, /* sh_info */ 921 1, /* sh_addralign */ 922 0 /* sh_entsize */ 923 }, 924 { /* .strtab */ 925 19, /* sh_name */ 926 SHT_STRTAB, 927 0, /* sh_flags */ 928 0, /* sh_addr */ 929 (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)+40), /* sh_offset */ 930 (Elf64_Xword)sizeof(entry), /* sh_size */ 931 0, /* sh_link */ 932 0, /* sh_info */ 933 1, /* sh_addralign */ 934 0 /* sh_entsize */ 935 }, 936 { /* .rodata */ 937 27, /* sh_name */ 938 SHT_PROGBITS, 939 SHF_ALLOC, /* sh_flags */ 940 0, /* sh_addr */ 941 (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)+40+sizeof(entry)), /* sh_offset */ 942 0, /* sh_size */ 943 0, /* sh_link */ 944 0, /* sh_info */ 945 16, /* sh_addralign */ 946 0 /* sh_entsize */ 947 } 948 }; 949 950 /* 951 * 64-bit symbol table 952 * careful: different order of items compared with Elf32_sym! 953 */ 954 static Elf64_Sym symbols64[2]={ 955 { /* STN_UNDEF */ 956 0 957 }, 958 { /* data entry point */ 959 1, /* st_name */ 960 ELF64_ST_INFO(STB_GLOBAL, STT_OBJECT), 961 0, /* st_other */ 962 4, /* st_shndx=index of related section table entry */ 963 0, /* st_value */ 964 0 /* st_size */ 965 } 966 }; 967 968 #endif /* U_ELF64 */ 969 970 /* entry[] have a leading NUL */ 971 entryOffset=1; 972 973 /* in the common code, count entryLength from after the NUL */ 974 entryLengthOffset=1; 975 976 newSuffix=".o"; 977 978 #elif defined(U_WINDOWS) 979 struct { 980 IMAGE_FILE_HEADER fileHeader; 981 IMAGE_SECTION_HEADER sections[2]; 982 char linkerOptions[100]; 983 } objHeader; 984 IMAGE_SYMBOL symbols[1]; 985 struct { 986 DWORD sizeofLongNames; 987 char longNames[100]; 988 } symbolNames; 989 990 /* 991 * entry sometimes have a leading '_' 992 * overwritten if entryOffset==0 depending on the target platform 993 * see check for cpu below 994 */ 995 entry[0]='_'; 996 997 newSuffix=".obj"; 998 #else 999 # error "Unknown platform for CAN_GENERATE_OBJECTS." 1000 #endif 1001 1002 /* deal with options, files and the entry point name */ 1003 getArchitecture(&cpu, &bits, &makeBigEndian, optMatchArch); 1004 printf("genccode: --match-arch cpu=%hu bits=%hu big-endian=%hu\n", cpu, bits, makeBigEndian); 1005 #ifdef U_WINDOWS 1006 if(cpu==IMAGE_FILE_MACHINE_I386) { 1007 entryOffset=1; 1008 } 1009 #endif 1010 1011 in=T_FileStream_open(filename, "rb"); 1012 if(in==NULL) { 1013 fprintf(stderr, "genccode: unable to open input file %s\n", filename); 1014 exit(U_FILE_ACCESS_ERROR); 1015 } 1016 size=T_FileStream_size(in); 1017 1018 getOutFilename(filename, destdir, buffer, entry+entryOffset, newSuffix, optFilename); 1019 if (outFilePath != NULL) { 1020 uprv_strcpy(outFilePath, buffer); 1021 } 1022 1023 if(optEntryPoint != NULL) { 1024 uprv_strcpy(entry+entryOffset, optEntryPoint); 1025 uprv_strcat(entry+entryOffset, "_dat"); 1026 } 1027 /* turn dashes in the entry name into underscores */ 1028 entryLength=(int32_t)uprv_strlen(entry+entryLengthOffset); 1029 for(i=0; i<entryLength; ++i) { 1030 if(entry[entryLengthOffset+i]=='-') { 1031 entry[entryLengthOffset+i]='_'; 1032 } 1033 } 1034 1035 /* open the output file */ 1036 out=T_FileStream_open(buffer, "wb"); 1037 if(out==NULL) { 1038 fprintf(stderr, "genccode: unable to open output file %s\n", buffer); 1039 exit(U_FILE_ACCESS_ERROR); 1040 } 1041 1042 #ifdef U_ELF 1043 if(bits==32) { 1044 header32.e_ident[EI_DATA]= makeBigEndian ? ELFDATA2MSB : ELFDATA2LSB; 1045 header32.e_machine=cpu; 1046 1047 /* 16-align .rodata in the .o file, just in case */ 1048 paddingSize=sectionHeaders32[4].sh_offset & 0xf; 1049 if(paddingSize!=0) { 1050 paddingSize=0x10-paddingSize; 1051 sectionHeaders32[4].sh_offset+=paddingSize; 1052 } 1053 1054 sectionHeaders32[4].sh_size=(Elf32_Word)size; 1055 1056 symbols32[1].st_size=(Elf32_Word)size; 1057 1058 /* write .o headers */ 1059 T_FileStream_write(out, &header32, (int32_t)sizeof(header32)); 1060 T_FileStream_write(out, sectionHeaders32, (int32_t)sizeof(sectionHeaders32)); 1061 T_FileStream_write(out, symbols32, (int32_t)sizeof(symbols32)); 1062 } else /* bits==64 */ { 1063 #ifdef U_ELF64 1064 header64.e_ident[EI_DATA]= makeBigEndian ? ELFDATA2MSB : ELFDATA2LSB; 1065 header64.e_machine=cpu; 1066 1067 /* 16-align .rodata in the .o file, just in case */ 1068 paddingSize=sectionHeaders64[4].sh_offset & 0xf; 1069 if(paddingSize!=0) { 1070 paddingSize=0x10-paddingSize; 1071 sectionHeaders64[4].sh_offset+=paddingSize; 1072 } 1073 1074 sectionHeaders64[4].sh_size=(Elf64_Xword)size; 1075 1076 symbols64[1].st_size=(Elf64_Xword)size; 1077 1078 /* write .o headers */ 1079 T_FileStream_write(out, &header64, (int32_t)sizeof(header64)); 1080 T_FileStream_write(out, sectionHeaders64, (int32_t)sizeof(sectionHeaders64)); 1081 T_FileStream_write(out, symbols64, (int32_t)sizeof(symbols64)); 1082 #endif 1083 } 1084 1085 T_FileStream_write(out, sectionStrings, (int32_t)sizeof(sectionStrings)); 1086 T_FileStream_write(out, entry, (int32_t)sizeof(entry)); 1087 if(paddingSize!=0) { 1088 T_FileStream_write(out, padding, paddingSize); 1089 } 1090 #elif defined(U_WINDOWS) 1091 /* populate the .obj headers */ 1092 uprv_memset(&objHeader, 0, sizeof(objHeader)); 1093 uprv_memset(&symbols, 0, sizeof(symbols)); 1094 uprv_memset(&symbolNames, 0, sizeof(symbolNames)); 1095 1096 /* write the linker export directive */ 1097 uprv_strcpy(objHeader.linkerOptions, "-export:"); 1098 length=8; 1099 uprv_strcpy(objHeader.linkerOptions+length, entry); 1100 length+=entryLength; 1101 uprv_strcpy(objHeader.linkerOptions+length, ",data "); 1102 length+=6; 1103 1104 /* set the file header */ 1105 objHeader.fileHeader.Machine=cpu; 1106 objHeader.fileHeader.NumberOfSections=2; 1107 objHeader.fileHeader.TimeDateStamp=(DWORD)time(NULL); 1108 objHeader.fileHeader.PointerToSymbolTable=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER+length+size; /* start of symbol table */ 1109 objHeader.fileHeader.NumberOfSymbols=1; 1110 1111 /* set the section for the linker options */ 1112 uprv_strncpy((char *)objHeader.sections[0].Name, ".drectve", 8); 1113 objHeader.sections[0].SizeOfRawData=length; 1114 objHeader.sections[0].PointerToRawData=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER; 1115 objHeader.sections[0].Characteristics=IMAGE_SCN_LNK_INFO|IMAGE_SCN_LNK_REMOVE|IMAGE_SCN_ALIGN_1BYTES; 1116 1117 /* set the data section */ 1118 uprv_strncpy((char *)objHeader.sections[1].Name, ".rdata", 6); 1119 objHeader.sections[1].SizeOfRawData=size; 1120 objHeader.sections[1].PointerToRawData=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER+length; 1121 objHeader.sections[1].Characteristics=IMAGE_SCN_CNT_INITIALIZED_DATA|IMAGE_SCN_ALIGN_16BYTES|IMAGE_SCN_MEM_READ; 1122 1123 /* set the symbol table */ 1124 if(entryLength<=8) { 1125 uprv_strncpy((char *)symbols[0].N.ShortName, entry, entryLength); 1126 symbolNames.sizeofLongNames=4; 1127 } else { 1128 symbols[0].N.Name.Short=0; 1129 symbols[0].N.Name.Long=4; 1130 symbolNames.sizeofLongNames=4+entryLength+1; 1131 uprv_strcpy(symbolNames.longNames, entry); 1132 } 1133 symbols[0].SectionNumber=2; 1134 symbols[0].StorageClass=IMAGE_SYM_CLASS_EXTERNAL; 1135 1136 /* write the file header and the linker options section */ 1137 T_FileStream_write(out, &objHeader, objHeader.sections[1].PointerToRawData); 1138 #else 1139 # error "Unknown platform for CAN_GENERATE_OBJECTS." 1140 #endif 1141 1142 /* copy the data file into section 2 */ 1143 for(;;) { 1144 length=T_FileStream_read(in, buffer, sizeof(buffer)); 1145 if(length==0) { 1146 break; 1147 } 1148 T_FileStream_write(out, buffer, (int32_t)length); 1149 } 1150 1151 #ifdef U_WINDOWS 1152 /* write the symbol table */ 1153 T_FileStream_write(out, symbols, IMAGE_SIZEOF_SYMBOL); 1154 T_FileStream_write(out, &symbolNames, symbolNames.sizeofLongNames); 1155 #endif 1156 1157 if(T_FileStream_error(in)) { 1158 fprintf(stderr, "genccode: file read error while generating from file %s\n", filename); 1159 exit(U_FILE_ACCESS_ERROR); 1160 } 1161 1162 if(T_FileStream_error(out)) { 1163 fprintf(stderr, "genccode: file write error while generating from file %s\n", filename); 1164 exit(U_FILE_ACCESS_ERROR); 1165 } 1166 1167 T_FileStream_close(out); 1168 T_FileStream_close(in); 1169 } 1170 #endif 1171