1 /****************************************************************************** 2 * Copyright (C) 2009, International Business Machines 3 * Corporation and others. All Rights Reserved. 4 ******************************************************************************* 5 */ 6 #include "unicode/utypes.h" 7 8 #ifdef U_WINDOWS 9 # define VC_EXTRALEAN 10 # define WIN32_LEAN_AND_MEAN 11 # define NOUSER 12 # define NOSERVICE 13 # define NOIME 14 # define NOMCX 15 #include <windows.h> 16 #include <time.h> 17 # ifdef __GNUC__ 18 # define WINDOWS_WITH_GNUC 19 # endif 20 #endif 21 22 #ifdef U_LINUX 23 # define U_ELF 24 #endif 25 26 #ifdef U_ELF 27 # include <elf.h> 28 # if defined(ELFCLASS64) 29 # define U_ELF64 30 # endif 31 /* Old elf.h headers may not have EM_X86_64, or have EM_X8664 instead. */ 32 # ifndef EM_X86_64 33 # define EM_X86_64 62 34 # endif 35 # define ICU_ENTRY_OFFSET 0 36 #endif 37 38 #include <stdio.h> 39 #include <stdlib.h> 40 #include "unicode/putil.h" 41 #include "cmemory.h" 42 #include "cstring.h" 43 #include "filestrm.h" 44 #include "toolutil.h" 45 #include "unicode/uclean.h" 46 #include "uoptions.h" 47 #include "pkg_genc.h" 48 49 #define MAX_COLUMN ((uint32_t)(0xFFFFFFFFU)) 50 51 #define HEX_0X 0 /* 0x1234 */ 52 #define HEX_0H 1 /* 01234h */ 53 54 #if defined(U_WINDOWS) || defined(U_ELF) 55 #define CAN_GENERATE_OBJECTS 56 #endif 57 58 /* prototypes --------------------------------------------------------------- */ 59 static void 60 getOutFilename(const char *inFilename, const char *destdir, char *outFilename, char *entryName, const char *newSuffix, const char *optFilename); 61 62 static uint32_t 63 write8(FileStream *out, uint8_t byte, uint32_t column); 64 65 static uint32_t 66 write32(FileStream *out, uint32_t byte, uint32_t column); 67 68 #ifdef OS400 69 static uint32_t 70 write8str(FileStream *out, uint8_t byte, uint32_t column); 71 #endif 72 /* -------------------------------------------------------------------------- */ 73 74 /* 75 Creating Template Files for New Platforms 76 77 Let the cc compiler help you get started. 78 Compile this program 79 const unsigned int x[5] = {1, 2, 0xdeadbeef, 0xffffffff, 16}; 80 with the -S option to produce assembly output. 81 82 For example, this will generate array.s: 83 gcc -S array.c 84 85 This will produce a .s file that may look like this: 86 87 .file "array.c" 88 .version "01.01" 89 gcc2_compiled.: 90 .globl x 91 .section .rodata 92 .align 4 93 .type x,@object 94 .size x,20 95 x: 96 .long 1 97 .long 2 98 .long -559038737 99 .long -1 100 .long 16 101 .ident "GCC: (GNU) 2.96 20000731 (Red Hat Linux 7.1 2.96-85)" 102 103 which gives a starting point that will compile, and can be transformed 104 to become the template, generally with some consulting of as docs and 105 some experimentation. 106 107 If you want ICU to automatically use this assembly, you should 108 specify "GENCCODE_ASSEMBLY=-a name" in the specific config/mh-* file, 109 where the name is the compiler or platform that you used in this 110 assemblyHeader data structure. 111 */ 112 static const struct AssemblyType { 113 const char *name; 114 const char *header; 115 const char *beginLine; 116 const char *footer; 117 int8_t hexType; /* HEX_0X or HEX_0h */ 118 } assemblyHeader[] = { 119 {"gcc", 120 ".globl %s\n" 121 "\t.section .note.GNU-stack,\"\",@progbits\n" 122 "\t.section .rodata\n" 123 "\t.align 8\n" /* Either align 8 bytes or 2^8 (256) bytes. 8 bytes is needed. */ 124 "\t.type %s,@object\n" 125 "%s:\n\n", 126 127 ".long ","",HEX_0X 128 }, 129 {"gcc-darwin", 130 /*"\t.section __TEXT,__text,regular,pure_instructions\n" 131 "\t.section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32\n"*/ 132 ".globl _%s\n" 133 "\t.data\n" 134 "\t.const\n" 135 "\t.align 4\n" /* 1<<4 = 16 */ 136 "_%s:\n\n", 137 138 ".long ","",HEX_0X 139 }, 140 {"gcc-cygwin", 141 ".globl _%s\n" 142 "\t.section .rodata\n" 143 "\t.align 8\n" /* Either align 8 bytes or 2^8 (256) bytes. 8 bytes is needed. */ 144 "_%s:\n\n", 145 146 ".long ","",HEX_0X 147 }, 148 {"sun", 149 "\t.section \".rodata\"\n" 150 "\t.align 8\n" 151 ".globl %s\n" 152 "%s:\n", 153 154 ".word ","",HEX_0X 155 }, 156 {"sun-x86", 157 "Drodata.rodata:\n" 158 "\t.type Drodata.rodata,@object\n" 159 "\t.size Drodata.rodata,0\n" 160 "\t.globl %s\n" 161 "\t.align 8\n" 162 "%s:\n", 163 164 ".4byte ","",HEX_0X 165 }, 166 {"xlc", 167 ".globl %s{RO}\n" 168 "\t.toc\n" 169 "%s:\n" 170 "\t.csect %s{RO}, 4\n", 171 172 ".long ","",HEX_0X 173 }, 174 {"aCC-ia64", 175 "\t.file \"%s.s\"\n" 176 "\t.type %s,@object\n" 177 "\t.global %s\n" 178 "\t.secalias .abe$0.rodata, \".rodata\"\n" 179 "\t.section .abe$0.rodata = \"a\", \"progbits\"\n" 180 "\t.align 16\n" 181 "%s::\t", 182 183 "data4 ","",HEX_0X 184 }, 185 {"aCC-parisc", 186 "\t.SPACE $TEXT$\n" 187 "\t.SUBSPA $LIT$\n" 188 "%s\n" 189 "\t.EXPORT %s\n" 190 "\t.ALIGN 16\n", 191 192 ".WORD ","",HEX_0X 193 }, 194 { "masm", 195 "\tTITLE %s\n" 196 "; generated by genccode\n" 197 ".386\n" 198 ".model flat\n" 199 "\tPUBLIC _%s\n" 200 "ICUDATA_%s\tSEGMENT READONLY PARA PUBLIC FLAT 'DATA'\n" 201 "\tALIGN 16\n" 202 "_%s\tLABEL DWORD\n", 203 "\tDWORD ","\nICUDATA_%s\tENDS\n\tEND\n",HEX_0H 204 } 205 }; 206 207 static int32_t assemblyHeaderIndex = -1; 208 static int32_t hexType = HEX_0X; 209 210 U_CAPI UBool U_EXPORT2 211 checkAssemblyHeaderName(const char* optAssembly) { 212 int32_t idx; 213 assemblyHeaderIndex = -1; 214 for (idx = 0; idx < (int32_t)(sizeof(assemblyHeader)/sizeof(assemblyHeader[0])); idx++) { 215 if (uprv_strcmp(optAssembly, assemblyHeader[idx].name) == 0) { 216 assemblyHeaderIndex = idx; 217 hexType = assemblyHeader[idx].hexType; /* set the hex type */ 218 return TRUE; 219 } 220 } 221 222 return FALSE; 223 } 224 225 226 U_CAPI void U_EXPORT2 227 printAssemblyHeadersToStdErr(void) { 228 int32_t idx; 229 fprintf(stderr, "%s", assemblyHeader[0].name); 230 for (idx = 1; idx < (int32_t)(sizeof(assemblyHeader)/sizeof(assemblyHeader[0])); idx++) { 231 fprintf(stderr, ", %s", assemblyHeader[idx].name); 232 } 233 fprintf(stderr, 234 ")\n"); 235 } 236 237 U_CAPI void U_EXPORT2 238 writeAssemblyCode(const char *filename, const char *destdir, const char *optEntryPoint, const char *optFilename, char *outFilePath) { 239 uint32_t column = MAX_COLUMN; 240 char entry[64]; 241 uint32_t buffer[1024]; 242 char *bufferStr = (char *)buffer; 243 FileStream *in, *out; 244 size_t i, length; 245 246 in=T_FileStream_open(filename, "rb"); 247 if(in==NULL) { 248 fprintf(stderr, "genccode: unable to open input file %s\n", filename); 249 exit(U_FILE_ACCESS_ERROR); 250 } 251 252 getOutFilename(filename, destdir, bufferStr, entry, ".s", optFilename); 253 out=T_FileStream_open(bufferStr, "w"); 254 if(out==NULL) { 255 fprintf(stderr, "genccode: unable to open output file %s\n", bufferStr); 256 exit(U_FILE_ACCESS_ERROR); 257 } 258 259 if (outFilePath != NULL) { 260 uprv_strcpy(outFilePath, bufferStr); 261 } 262 263 #ifdef WINDOWS_WITH_GNUC 264 /* Need to fix the file seperator character when using MinGW. */ 265 swapFileSepChar(outFilePath, U_FILE_SEP_CHAR, '/'); 266 #endif 267 268 if(optEntryPoint != NULL) { 269 uprv_strcpy(entry, optEntryPoint); 270 uprv_strcat(entry, "_dat"); 271 } 272 273 /* turn dashes or dots in the entry name into underscores */ 274 length=uprv_strlen(entry); 275 for(i=0; i<length; ++i) { 276 if(entry[i]=='-' || entry[i]=='.') { 277 entry[i]='_'; 278 } 279 } 280 281 sprintf(bufferStr, assemblyHeader[assemblyHeaderIndex].header, 282 entry, entry, entry, entry, 283 entry, entry, entry, entry); 284 T_FileStream_writeLine(out, bufferStr); 285 T_FileStream_writeLine(out, assemblyHeader[assemblyHeaderIndex].beginLine); 286 287 for(;;) { 288 length=T_FileStream_read(in, buffer, sizeof(buffer)); 289 if(length==0) { 290 break; 291 } 292 if (length != sizeof(buffer)) { 293 /* pad with extra 0's when at the end of the file */ 294 for(i=0; i < (length % sizeof(uint32_t)); ++i) { 295 buffer[length+i] = 0; 296 } 297 } 298 for(i=0; i<(length/sizeof(buffer[0])); i++) { 299 column = write32(out, buffer[i], column); 300 } 301 } 302 303 T_FileStream_writeLine(out, "\n"); 304 305 sprintf(bufferStr, assemblyHeader[assemblyHeaderIndex].footer, 306 entry, entry, entry, entry, 307 entry, entry, entry, entry); 308 T_FileStream_writeLine(out, bufferStr); 309 310 if(T_FileStream_error(in)) { 311 fprintf(stderr, "genccode: file read error while generating from file %s\n", filename); 312 exit(U_FILE_ACCESS_ERROR); 313 } 314 315 if(T_FileStream_error(out)) { 316 fprintf(stderr, "genccode: file write error while generating from file %s\n", filename); 317 exit(U_FILE_ACCESS_ERROR); 318 } 319 320 T_FileStream_close(out); 321 T_FileStream_close(in); 322 } 323 324 U_CAPI void U_EXPORT2 325 writeCCode(const char *filename, const char *destdir, const char *optName, const char *optFilename, char *outFilePath) { 326 uint32_t column = MAX_COLUMN; 327 char buffer[4096], entry[64]; 328 FileStream *in, *out; 329 size_t i, length; 330 331 in=T_FileStream_open(filename, "rb"); 332 if(in==NULL) { 333 fprintf(stderr, "genccode: unable to open input file %s\n", filename); 334 exit(U_FILE_ACCESS_ERROR); 335 } 336 337 if(optName != NULL) { /* prepend 'icudt28_' */ 338 strcpy(entry, optName); 339 strcat(entry, "_"); 340 } else { 341 entry[0] = 0; 342 } 343 344 getOutFilename(filename, destdir, buffer, entry+uprv_strlen(entry), ".c", optFilename); 345 if (outFilePath != NULL) { 346 uprv_strcpy(outFilePath, buffer); 347 } 348 out=T_FileStream_open(buffer, "w"); 349 if(out==NULL) { 350 fprintf(stderr, "genccode: unable to open output file %s\n", buffer); 351 exit(U_FILE_ACCESS_ERROR); 352 } 353 354 /* turn dashes or dots in the entry name into underscores */ 355 length=uprv_strlen(entry); 356 for(i=0; i<length; ++i) { 357 if(entry[i]=='-' || entry[i]=='.') { 358 entry[i]='_'; 359 } 360 } 361 362 #ifdef OS400 363 /* 364 TODO: Fix this once the compiler implements this feature. Keep in sync with udatamem.c 365 366 This is here because this platform can't currently put 367 const data into the read-only pages of an object or 368 shared library (service program). Only strings are allowed in read-only 369 pages, so we use char * strings to store the data. 370 371 In order to prevent the beginning of the data from ever matching the 372 magic numbers we must still use the initial double. 373 [grhoten 4/24/2003] 374 */ 375 sprintf(buffer, 376 "#define U_DISABLE_RENAMING 1\n" 377 "#include \"unicode/umachine.h\"\n" 378 "U_CDECL_BEGIN\n" 379 "const struct {\n" 380 " double bogus;\n" 381 " const char *bytes; \n" 382 "} %s={ 0.0, \n", 383 entry); 384 T_FileStream_writeLine(out, buffer); 385 386 for(;;) { 387 length=T_FileStream_read(in, buffer, sizeof(buffer)); 388 if(length==0) { 389 break; 390 } 391 for(i=0; i<length; ++i) { 392 column = write8str(out, (uint8_t)buffer[i], column); 393 } 394 } 395 396 T_FileStream_writeLine(out, "\"\n};\nU_CDECL_END\n"); 397 #else 398 /* Function renaming shouldn't be done in data */ 399 sprintf(buffer, 400 "#define U_DISABLE_RENAMING 1\n" 401 "#include \"unicode/umachine.h\"\n" 402 "U_CDECL_BEGIN\n" 403 "const struct {\n" 404 " double bogus;\n" 405 " uint8_t bytes[%ld]; \n" 406 "} %s={ 0.0, {\n", 407 (long)T_FileStream_size(in), entry); 408 T_FileStream_writeLine(out, buffer); 409 410 for(;;) { 411 length=T_FileStream_read(in, buffer, sizeof(buffer)); 412 if(length==0) { 413 break; 414 } 415 for(i=0; i<length; ++i) { 416 column = write8(out, (uint8_t)buffer[i], column); 417 } 418 } 419 420 T_FileStream_writeLine(out, "\n}\n};\nU_CDECL_END\n"); 421 #endif 422 423 if(T_FileStream_error(in)) { 424 fprintf(stderr, "genccode: file read error while generating from file %s\n", filename); 425 exit(U_FILE_ACCESS_ERROR); 426 } 427 428 if(T_FileStream_error(out)) { 429 fprintf(stderr, "genccode: file write error while generating from file %s\n", filename); 430 exit(U_FILE_ACCESS_ERROR); 431 } 432 433 T_FileStream_close(out); 434 T_FileStream_close(in); 435 } 436 437 static uint32_t 438 write32(FileStream *out, uint32_t bitField, uint32_t column) { 439 int32_t i; 440 char bitFieldStr[64]; /* This is more bits than needed for a 32-bit number */ 441 char *s = bitFieldStr; 442 uint8_t *ptrIdx = (uint8_t *)&bitField; 443 static const char hexToStr[16] = { 444 '0','1','2','3', 445 '4','5','6','7', 446 '8','9','A','B', 447 'C','D','E','F' 448 }; 449 450 /* write the value, possibly with comma and newline */ 451 if(column==MAX_COLUMN) { 452 /* first byte */ 453 column=1; 454 } else if(column<32) { 455 *(s++)=','; 456 ++column; 457 } else { 458 *(s++)='\n'; 459 uprv_strcpy(s, assemblyHeader[assemblyHeaderIndex].beginLine); 460 s+=uprv_strlen(s); 461 column=1; 462 } 463 464 if (bitField < 10) { 465 /* It's a small number. Don't waste the space for 0x */ 466 *(s++)=hexToStr[bitField]; 467 } 468 else { 469 int seenNonZero = 0; /* This is used to remove leading zeros */ 470 471 if(hexType==HEX_0X) { 472 *(s++)='0'; 473 *(s++)='x'; 474 } else if(hexType==HEX_0H) { 475 *(s++)='0'; 476 } 477 478 /* This creates a 32-bit field */ 479 #if U_IS_BIG_ENDIAN 480 for (i = 0; i < sizeof(uint32_t); i++) 481 #else 482 for (i = sizeof(uint32_t)-1; i >= 0 ; i--) 483 #endif 484 { 485 uint8_t value = ptrIdx[i]; 486 if (value || seenNonZero) { 487 *(s++)=hexToStr[value>>4]; 488 *(s++)=hexToStr[value&0xF]; 489 seenNonZero = 1; 490 } 491 } 492 if(hexType==HEX_0H) { 493 *(s++)='h'; 494 } 495 } 496 497 *(s++)=0; 498 T_FileStream_writeLine(out, bitFieldStr); 499 return column; 500 } 501 502 static uint32_t 503 write8(FileStream *out, uint8_t byte, uint32_t column) { 504 char s[4]; 505 int i=0; 506 507 /* convert the byte value to a string */ 508 if(byte>=100) { 509 s[i++]=(char)('0'+byte/100); 510 byte%=100; 511 } 512 if(i>0 || byte>=10) { 513 s[i++]=(char)('0'+byte/10); 514 byte%=10; 515 } 516 s[i++]=(char)('0'+byte); 517 s[i]=0; 518 519 /* write the value, possibly with comma and newline */ 520 if(column==MAX_COLUMN) { 521 /* first byte */ 522 column=1; 523 } else if(column<16) { 524 T_FileStream_writeLine(out, ","); 525 ++column; 526 } else { 527 T_FileStream_writeLine(out, ",\n"); 528 column=1; 529 } 530 T_FileStream_writeLine(out, s); 531 return column; 532 } 533 534 #ifdef OS400 535 static uint32_t 536 write8str(FileStream *out, uint8_t byte, uint32_t column) { 537 char s[8]; 538 539 if (byte > 7) 540 sprintf(s, "\\x%X", byte); 541 else 542 sprintf(s, "\\%X", byte); 543 544 /* write the value, possibly with comma and newline */ 545 if(column==MAX_COLUMN) { 546 /* first byte */ 547 column=1; 548 T_FileStream_writeLine(out, "\""); 549 } else if(column<24) { 550 ++column; 551 } else { 552 T_FileStream_writeLine(out, "\"\n\""); 553 column=1; 554 } 555 T_FileStream_writeLine(out, s); 556 return column; 557 } 558 #endif 559 560 static void 561 getOutFilename(const char *inFilename, const char *destdir, char *outFilename, char *entryName, const char *newSuffix, const char *optFilename) { 562 const char *basename=findBasename(inFilename), *suffix=uprv_strrchr(basename, '.'); 563 564 /* copy path */ 565 if(destdir!=NULL && *destdir!=0) { 566 do { 567 *outFilename++=*destdir++; 568 } while(*destdir!=0); 569 if(*(outFilename-1)!=U_FILE_SEP_CHAR) { 570 *outFilename++=U_FILE_SEP_CHAR; 571 } 572 inFilename=basename; 573 } else { 574 while(inFilename<basename) { 575 *outFilename++=*inFilename++; 576 } 577 } 578 579 if(suffix==NULL) { 580 /* the filename does not have a suffix */ 581 uprv_strcpy(entryName, inFilename); 582 if(optFilename != NULL) { 583 uprv_strcpy(outFilename, optFilename); 584 } else { 585 uprv_strcpy(outFilename, inFilename); 586 } 587 uprv_strcat(outFilename, newSuffix); 588 } else { 589 char *saveOutFilename = outFilename; 590 /* copy basename */ 591 while(inFilename<suffix) { 592 if(*inFilename=='-') { 593 /* iSeries cannot have '-' in the .o objects. */ 594 *outFilename++=*entryName++='_'; 595 inFilename++; 596 } 597 else { 598 *outFilename++=*entryName++=*inFilename++; 599 } 600 } 601 602 /* replace '.' by '_' */ 603 *outFilename++=*entryName++='_'; 604 ++inFilename; 605 606 /* copy suffix */ 607 while(*inFilename!=0) { 608 *outFilename++=*entryName++=*inFilename++; 609 } 610 611 *entryName=0; 612 613 if(optFilename != NULL) { 614 uprv_strcpy(saveOutFilename, optFilename); 615 uprv_strcat(saveOutFilename, newSuffix); 616 } else { 617 /* add ".c" */ 618 uprv_strcpy(outFilename, newSuffix); 619 } 620 } 621 } 622 623 #ifdef CAN_GENERATE_OBJECTS 624 static void 625 getArchitecture(uint16_t *pCPU, uint16_t *pBits, UBool *pIsBigEndian, const char *optMatchArch) { 626 int64_t buffer[256]; 627 const char *filename; 628 FileStream *in; 629 int32_t length; 630 631 #ifdef U_ELF 632 /* Pointer to ELF header. Elf32_Ehdr and ELF64_Ehdr are identical for the necessary fields. */ 633 const Elf32_Ehdr *pHeader32; 634 #elif defined(U_WINDOWS) 635 const IMAGE_FILE_HEADER *pHeader; 636 #else 637 # error "Unknown platform for CAN_GENERATE_OBJECTS." 638 #endif 639 640 if(optMatchArch != NULL) { 641 filename=optMatchArch; 642 } else { 643 /* set defaults */ 644 #ifdef U_ELF 645 /* set EM_386 because elf.h does not provide better defaults */ 646 *pCPU=EM_386; 647 *pBits=32; 648 *pIsBigEndian=(UBool)(U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB); 649 #elif defined(U_WINDOWS) 650 /* _M_IA64 should be defined in windows.h */ 651 # if defined(_M_IA64) 652 *pCPU=IMAGE_FILE_MACHINE_IA64; 653 # elif defined(_M_AMD64) 654 *pCPU=IMAGE_FILE_MACHINE_AMD64; 655 # else 656 *pCPU=IMAGE_FILE_MACHINE_I386; 657 # endif 658 *pBits= *pCPU==IMAGE_FILE_MACHINE_I386 ? 32 : 64; 659 *pIsBigEndian=FALSE; 660 #else 661 # error "Unknown platform for CAN_GENERATE_OBJECTS." 662 #endif 663 return; 664 } 665 666 in=T_FileStream_open(filename, "rb"); 667 if(in==NULL) { 668 fprintf(stderr, "genccode: unable to open match-arch file %s\n", filename); 669 exit(U_FILE_ACCESS_ERROR); 670 } 671 length=T_FileStream_read(in, buffer, sizeof(buffer)); 672 673 #ifdef U_ELF 674 if(length<sizeof(Elf32_Ehdr)) { 675 fprintf(stderr, "genccode: match-arch file %s is too short\n", filename); 676 exit(U_UNSUPPORTED_ERROR); 677 } 678 pHeader32=(const Elf32_Ehdr *)buffer; 679 if( 680 pHeader32->e_ident[0]!=ELFMAG0 || 681 pHeader32->e_ident[1]!=ELFMAG1 || 682 pHeader32->e_ident[2]!=ELFMAG2 || 683 pHeader32->e_ident[3]!=ELFMAG3 || 684 pHeader32->e_ident[EI_CLASS]<ELFCLASS32 || pHeader32->e_ident[EI_CLASS]>ELFCLASS64 685 ) { 686 fprintf(stderr, "genccode: match-arch file %s is not an ELF object file, or not supported\n", filename); 687 exit(U_UNSUPPORTED_ERROR); 688 } 689 690 *pBits= pHeader32->e_ident[EI_CLASS]==ELFCLASS32 ? 32 : 64; /* only 32 or 64: see check above */ 691 #ifdef U_ELF64 692 if(*pBits!=32 && *pBits!=64) { 693 fprintf(stderr, "genccode: currently only supports 32-bit and 64-bit ELF format\n"); 694 exit(U_UNSUPPORTED_ERROR); 695 } 696 #else 697 if(*pBits!=32) { 698 fprintf(stderr, "genccode: built with elf.h missing 64-bit definitions\n"); 699 exit(U_UNSUPPORTED_ERROR); 700 } 701 #endif 702 703 *pIsBigEndian=(UBool)(pHeader32->e_ident[EI_DATA]==ELFDATA2MSB); 704 if(*pIsBigEndian!=U_IS_BIG_ENDIAN) { 705 fprintf(stderr, "genccode: currently only same-endianness ELF formats are supported\n"); 706 exit(U_UNSUPPORTED_ERROR); 707 } 708 /* TODO: Support byte swapping */ 709 710 *pCPU=pHeader32->e_machine; 711 #elif defined(U_WINDOWS) 712 if(length<sizeof(IMAGE_FILE_HEADER)) { 713 fprintf(stderr, "genccode: match-arch file %s is too short\n", filename); 714 exit(U_UNSUPPORTED_ERROR); 715 } 716 pHeader=(const IMAGE_FILE_HEADER *)buffer; 717 *pCPU=pHeader->Machine; 718 /* 719 * The number of bits is implicit with the Machine value. 720 * *pBits is ignored in the calling code, so this need not be precise. 721 */ 722 *pBits= *pCPU==IMAGE_FILE_MACHINE_I386 ? 32 : 64; 723 /* Windows always runs on little-endian CPUs. */ 724 *pIsBigEndian=FALSE; 725 #else 726 # error "Unknown platform for CAN_GENERATE_OBJECTS." 727 #endif 728 729 T_FileStream_close(in); 730 } 731 732 U_CAPI void U_EXPORT2 733 writeObjectCode(const char *filename, const char *destdir, const char *optEntryPoint, const char *optMatchArch, const char *optFilename, char *outFilePath) { 734 /* common variables */ 735 char buffer[4096], entry[40]={ 0 }; 736 FileStream *in, *out; 737 const char *newSuffix; 738 int32_t i, entryLength, length, size, entryOffset=0, entryLengthOffset=0; 739 740 uint16_t cpu, bits; 741 UBool makeBigEndian; 742 743 /* platform-specific variables and initialization code */ 744 #ifdef U_ELF 745 /* 32-bit Elf file header */ 746 static Elf32_Ehdr header32={ 747 { 748 /* e_ident[] */ 749 ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3, 750 ELFCLASS32, 751 U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB, 752 EV_CURRENT /* EI_VERSION */ 753 }, 754 ET_REL, 755 EM_386, 756 EV_CURRENT, /* e_version */ 757 0, /* e_entry */ 758 0, /* e_phoff */ 759 (Elf32_Off)sizeof(Elf32_Ehdr), /* e_shoff */ 760 0, /* e_flags */ 761 (Elf32_Half)sizeof(Elf32_Ehdr), /* eh_size */ 762 0, /* e_phentsize */ 763 0, /* e_phnum */ 764 (Elf32_Half)sizeof(Elf32_Shdr), /* e_shentsize */ 765 5, /* e_shnum */ 766 2 /* e_shstrndx */ 767 }; 768 769 /* 32-bit Elf section header table */ 770 static Elf32_Shdr sectionHeaders32[5]={ 771 { /* SHN_UNDEF */ 772 0 773 }, 774 { /* .symtab */ 775 1, /* sh_name */ 776 SHT_SYMTAB, 777 0, /* sh_flags */ 778 0, /* sh_addr */ 779 (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)), /* sh_offset */ 780 (Elf32_Word)(2*sizeof(Elf32_Sym)), /* sh_size */ 781 3, /* sh_link=sect hdr index of .strtab */ 782 1, /* sh_info=One greater than the symbol table index of the last 783 * local symbol (with STB_LOCAL). */ 784 4, /* sh_addralign */ 785 (Elf32_Word)(sizeof(Elf32_Sym)) /* sh_entsize */ 786 }, 787 { /* .shstrtab */ 788 9, /* sh_name */ 789 SHT_STRTAB, 790 0, /* sh_flags */ 791 0, /* sh_addr */ 792 (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)), /* sh_offset */ 793 40, /* sh_size */ 794 0, /* sh_link */ 795 0, /* sh_info */ 796 1, /* sh_addralign */ 797 0 /* sh_entsize */ 798 }, 799 { /* .strtab */ 800 19, /* sh_name */ 801 SHT_STRTAB, 802 0, /* sh_flags */ 803 0, /* sh_addr */ 804 (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)+40), /* sh_offset */ 805 (Elf32_Word)sizeof(entry), /* sh_size */ 806 0, /* sh_link */ 807 0, /* sh_info */ 808 1, /* sh_addralign */ 809 0 /* sh_entsize */ 810 }, 811 { /* .rodata */ 812 27, /* sh_name */ 813 SHT_PROGBITS, 814 SHF_ALLOC, /* sh_flags */ 815 0, /* sh_addr */ 816 (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)+40+sizeof(entry)), /* sh_offset */ 817 0, /* sh_size */ 818 0, /* sh_link */ 819 0, /* sh_info */ 820 16, /* sh_addralign */ 821 0 /* sh_entsize */ 822 } 823 }; 824 825 /* symbol table */ 826 static Elf32_Sym symbols32[2]={ 827 { /* STN_UNDEF */ 828 0 829 }, 830 { /* data entry point */ 831 1, /* st_name */ 832 0, /* st_value */ 833 0, /* st_size */ 834 ELF64_ST_INFO(STB_GLOBAL, STT_OBJECT), 835 0, /* st_other */ 836 4 /* st_shndx=index of related section table entry */ 837 } 838 }; 839 840 /* section header string table, with decimal string offsets */ 841 static const char sectionStrings[40]= 842 /* 0 */ "\0" 843 /* 1 */ ".symtab\0" 844 /* 9 */ ".shstrtab\0" 845 /* 19 */ ".strtab\0" 846 /* 27 */ ".rodata\0" 847 /* 35 */ "\0\0\0\0"; /* contains terminating NUL */ 848 /* 40: padded to multiple of 8 bytes */ 849 850 /* 851 * Use entry[] for the string table which will contain only the 852 * entry point name. 853 * entry[0] must be 0 (NUL) 854 * The entry point name can be up to 38 characters long (sizeof(entry)-2). 855 */ 856 857 /* 16-align .rodata in the .o file, just in case */ 858 static const char padding[16]={ 0 }; 859 int32_t paddingSize; 860 861 #ifdef U_ELF64 862 /* 64-bit Elf file header */ 863 static Elf64_Ehdr header64={ 864 { 865 /* e_ident[] */ 866 ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3, 867 ELFCLASS64, 868 U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB, 869 EV_CURRENT /* EI_VERSION */ 870 }, 871 ET_REL, 872 EM_X86_64, 873 EV_CURRENT, /* e_version */ 874 0, /* e_entry */ 875 0, /* e_phoff */ 876 (Elf64_Off)sizeof(Elf64_Ehdr), /* e_shoff */ 877 0, /* e_flags */ 878 (Elf64_Half)sizeof(Elf64_Ehdr), /* eh_size */ 879 0, /* e_phentsize */ 880 0, /* e_phnum */ 881 (Elf64_Half)sizeof(Elf64_Shdr), /* e_shentsize */ 882 5, /* e_shnum */ 883 2 /* e_shstrndx */ 884 }; 885 886 /* 64-bit Elf section header table */ 887 static Elf64_Shdr sectionHeaders64[5]={ 888 { /* SHN_UNDEF */ 889 0 890 }, 891 { /* .symtab */ 892 1, /* sh_name */ 893 SHT_SYMTAB, 894 0, /* sh_flags */ 895 0, /* sh_addr */ 896 (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)), /* sh_offset */ 897 (Elf64_Xword)(2*sizeof(Elf64_Sym)), /* sh_size */ 898 3, /* sh_link=sect hdr index of .strtab */ 899 1, /* sh_info=One greater than the symbol table index of the last 900 * local symbol (with STB_LOCAL). */ 901 4, /* sh_addralign */ 902 (Elf64_Xword)(sizeof(Elf64_Sym)) /* sh_entsize */ 903 }, 904 { /* .shstrtab */ 905 9, /* sh_name */ 906 SHT_STRTAB, 907 0, /* sh_flags */ 908 0, /* sh_addr */ 909 (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)), /* sh_offset */ 910 40, /* sh_size */ 911 0, /* sh_link */ 912 0, /* sh_info */ 913 1, /* sh_addralign */ 914 0 /* sh_entsize */ 915 }, 916 { /* .strtab */ 917 19, /* sh_name */ 918 SHT_STRTAB, 919 0, /* sh_flags */ 920 0, /* sh_addr */ 921 (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)+40), /* sh_offset */ 922 (Elf64_Xword)sizeof(entry), /* sh_size */ 923 0, /* sh_link */ 924 0, /* sh_info */ 925 1, /* sh_addralign */ 926 0 /* sh_entsize */ 927 }, 928 { /* .rodata */ 929 27, /* sh_name */ 930 SHT_PROGBITS, 931 SHF_ALLOC, /* sh_flags */ 932 0, /* sh_addr */ 933 (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)+40+sizeof(entry)), /* sh_offset */ 934 0, /* sh_size */ 935 0, /* sh_link */ 936 0, /* sh_info */ 937 16, /* sh_addralign */ 938 0 /* sh_entsize */ 939 } 940 }; 941 942 /* 943 * 64-bit symbol table 944 * careful: different order of items compared with Elf32_sym! 945 */ 946 static Elf64_Sym symbols64[2]={ 947 { /* STN_UNDEF */ 948 0 949 }, 950 { /* data entry point */ 951 1, /* st_name */ 952 ELF64_ST_INFO(STB_GLOBAL, STT_OBJECT), 953 0, /* st_other */ 954 4, /* st_shndx=index of related section table entry */ 955 0, /* st_value */ 956 0 /* st_size */ 957 } 958 }; 959 960 #endif /* U_ELF64 */ 961 962 /* entry[] have a leading NUL */ 963 entryOffset=1; 964 965 /* in the common code, count entryLength from after the NUL */ 966 entryLengthOffset=1; 967 968 newSuffix=".o"; 969 970 #elif defined(U_WINDOWS) 971 struct { 972 IMAGE_FILE_HEADER fileHeader; 973 IMAGE_SECTION_HEADER sections[2]; 974 char linkerOptions[100]; 975 } objHeader; 976 IMAGE_SYMBOL symbols[1]; 977 struct { 978 DWORD sizeofLongNames; 979 char longNames[100]; 980 } symbolNames; 981 982 /* 983 * entry sometimes have a leading '_' 984 * overwritten if entryOffset==0 depending on the target platform 985 * see check for cpu below 986 */ 987 entry[0]='_'; 988 989 newSuffix=".obj"; 990 #else 991 # error "Unknown platform for CAN_GENERATE_OBJECTS." 992 #endif 993 994 /* deal with options, files and the entry point name */ 995 getArchitecture(&cpu, &bits, &makeBigEndian, optMatchArch); 996 printf("genccode: --match-arch cpu=%hu bits=%hu big-endian=%hu\n", cpu, bits, makeBigEndian); 997 #ifdef U_WINDOWS 998 if(cpu==IMAGE_FILE_MACHINE_I386) { 999 entryOffset=1; 1000 } 1001 #endif 1002 1003 in=T_FileStream_open(filename, "rb"); 1004 if(in==NULL) { 1005 fprintf(stderr, "genccode: unable to open input file %s\n", filename); 1006 exit(U_FILE_ACCESS_ERROR); 1007 } 1008 size=T_FileStream_size(in); 1009 1010 getOutFilename(filename, destdir, buffer, entry+entryOffset, newSuffix, optFilename); 1011 if (outFilePath != NULL) { 1012 uprv_strcpy(outFilePath, buffer); 1013 } 1014 1015 if(optEntryPoint != NULL) { 1016 uprv_strcpy(entry+entryOffset, optEntryPoint); 1017 uprv_strcat(entry+entryOffset, "_dat"); 1018 } 1019 /* turn dashes in the entry name into underscores */ 1020 entryLength=(int32_t)uprv_strlen(entry+entryLengthOffset); 1021 for(i=0; i<entryLength; ++i) { 1022 if(entry[entryLengthOffset+i]=='-') { 1023 entry[entryLengthOffset+i]='_'; 1024 } 1025 } 1026 1027 /* open the output file */ 1028 out=T_FileStream_open(buffer, "wb"); 1029 if(out==NULL) { 1030 fprintf(stderr, "genccode: unable to open output file %s\n", buffer); 1031 exit(U_FILE_ACCESS_ERROR); 1032 } 1033 1034 #ifdef U_ELF 1035 if(bits==32) { 1036 header32.e_ident[EI_DATA]= makeBigEndian ? ELFDATA2MSB : ELFDATA2LSB; 1037 header32.e_machine=cpu; 1038 1039 /* 16-align .rodata in the .o file, just in case */ 1040 paddingSize=sectionHeaders32[4].sh_offset & 0xf; 1041 if(paddingSize!=0) { 1042 paddingSize=0x10-paddingSize; 1043 sectionHeaders32[4].sh_offset+=paddingSize; 1044 } 1045 1046 sectionHeaders32[4].sh_size=(Elf32_Word)size; 1047 1048 symbols32[1].st_size=(Elf32_Word)size; 1049 1050 /* write .o headers */ 1051 T_FileStream_write(out, &header32, (int32_t)sizeof(header32)); 1052 T_FileStream_write(out, sectionHeaders32, (int32_t)sizeof(sectionHeaders32)); 1053 T_FileStream_write(out, symbols32, (int32_t)sizeof(symbols32)); 1054 } else /* bits==64 */ { 1055 #ifdef U_ELF64 1056 header64.e_ident[EI_DATA]= makeBigEndian ? ELFDATA2MSB : ELFDATA2LSB; 1057 header64.e_machine=cpu; 1058 1059 /* 16-align .rodata in the .o file, just in case */ 1060 paddingSize=sectionHeaders64[4].sh_offset & 0xf; 1061 if(paddingSize!=0) { 1062 paddingSize=0x10-paddingSize; 1063 sectionHeaders64[4].sh_offset+=paddingSize; 1064 } 1065 1066 sectionHeaders64[4].sh_size=(Elf64_Xword)size; 1067 1068 symbols64[1].st_size=(Elf64_Xword)size; 1069 1070 /* write .o headers */ 1071 T_FileStream_write(out, &header64, (int32_t)sizeof(header64)); 1072 T_FileStream_write(out, sectionHeaders64, (int32_t)sizeof(sectionHeaders64)); 1073 T_FileStream_write(out, symbols64, (int32_t)sizeof(symbols64)); 1074 #endif 1075 } 1076 1077 T_FileStream_write(out, sectionStrings, (int32_t)sizeof(sectionStrings)); 1078 T_FileStream_write(out, entry, (int32_t)sizeof(entry)); 1079 if(paddingSize!=0) { 1080 T_FileStream_write(out, padding, paddingSize); 1081 } 1082 #elif defined(U_WINDOWS) 1083 /* populate the .obj headers */ 1084 uprv_memset(&objHeader, 0, sizeof(objHeader)); 1085 uprv_memset(&symbols, 0, sizeof(symbols)); 1086 uprv_memset(&symbolNames, 0, sizeof(symbolNames)); 1087 1088 /* write the linker export directive */ 1089 uprv_strcpy(objHeader.linkerOptions, "-export:"); 1090 length=8; 1091 uprv_strcpy(objHeader.linkerOptions+length, entry); 1092 length+=entryLength; 1093 uprv_strcpy(objHeader.linkerOptions+length, ",data "); 1094 length+=6; 1095 1096 /* set the file header */ 1097 objHeader.fileHeader.Machine=cpu; 1098 objHeader.fileHeader.NumberOfSections=2; 1099 objHeader.fileHeader.TimeDateStamp=(DWORD)time(NULL); 1100 objHeader.fileHeader.PointerToSymbolTable=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER+length+size; /* start of symbol table */ 1101 objHeader.fileHeader.NumberOfSymbols=1; 1102 1103 /* set the section for the linker options */ 1104 uprv_strncpy((char *)objHeader.sections[0].Name, ".drectve", 8); 1105 objHeader.sections[0].SizeOfRawData=length; 1106 objHeader.sections[0].PointerToRawData=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER; 1107 objHeader.sections[0].Characteristics=IMAGE_SCN_LNK_INFO|IMAGE_SCN_LNK_REMOVE|IMAGE_SCN_ALIGN_1BYTES; 1108 1109 /* set the data section */ 1110 uprv_strncpy((char *)objHeader.sections[1].Name, ".rdata", 6); 1111 objHeader.sections[1].SizeOfRawData=size; 1112 objHeader.sections[1].PointerToRawData=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER+length; 1113 objHeader.sections[1].Characteristics=IMAGE_SCN_CNT_INITIALIZED_DATA|IMAGE_SCN_ALIGN_16BYTES|IMAGE_SCN_MEM_READ; 1114 1115 /* set the symbol table */ 1116 if(entryLength<=8) { 1117 uprv_strncpy((char *)symbols[0].N.ShortName, entry, entryLength); 1118 symbolNames.sizeofLongNames=4; 1119 } else { 1120 symbols[0].N.Name.Short=0; 1121 symbols[0].N.Name.Long=4; 1122 symbolNames.sizeofLongNames=4+entryLength+1; 1123 uprv_strcpy(symbolNames.longNames, entry); 1124 } 1125 symbols[0].SectionNumber=2; 1126 symbols[0].StorageClass=IMAGE_SYM_CLASS_EXTERNAL; 1127 1128 /* write the file header and the linker options section */ 1129 T_FileStream_write(out, &objHeader, objHeader.sections[1].PointerToRawData); 1130 #else 1131 # error "Unknown platform for CAN_GENERATE_OBJECTS." 1132 #endif 1133 1134 /* copy the data file into section 2 */ 1135 for(;;) { 1136 length=T_FileStream_read(in, buffer, sizeof(buffer)); 1137 if(length==0) { 1138 break; 1139 } 1140 T_FileStream_write(out, buffer, (int32_t)length); 1141 } 1142 1143 #ifdef U_WINDOWS 1144 /* write the symbol table */ 1145 T_FileStream_write(out, symbols, IMAGE_SIZEOF_SYMBOL); 1146 T_FileStream_write(out, &symbolNames, symbolNames.sizeofLongNames); 1147 #endif 1148 1149 if(T_FileStream_error(in)) { 1150 fprintf(stderr, "genccode: file read error while generating from file %s\n", filename); 1151 exit(U_FILE_ACCESS_ERROR); 1152 } 1153 1154 if(T_FileStream_error(out)) { 1155 fprintf(stderr, "genccode: file write error while generating from file %s\n", filename); 1156 exit(U_FILE_ACCESS_ERROR); 1157 } 1158 1159 T_FileStream_close(out); 1160 T_FileStream_close(in); 1161 } 1162 #endif 1163