1 2 /* Derived from Valgrind sources, coregrind/m_debuginfo/readmacho.c. 3 GPL 2+ therefore. 4 5 Can be compiled as either a 32- or 64-bit program (doesn't matter). 6 */ 7 8 /* What does this program do? In short it postprocesses tool 9 executables on MacOSX, after linking using /usr/bin/ld. This is so 10 as to work around a bug in the linker on Xcode 4.0.0 and Xcode 11 4.0.1. Xcode versions prior to 4.0.0 are unaffected. 12 13 The tracking bug is https://bugs.kde.org/show_bug.cgi?id=267997 14 15 The bug causes 64-bit tool executables to segfault at startup, 16 because: 17 18 Comparing the MachO load commands vs a (working) tool executable 19 that was created by Xcode 3.2.x, it appears that the new linker has 20 partially ignored the build system's request to place the tool 21 executable's stack at a non standard location. The build system 22 tells the linker "-stack_addr 0x134000000 -stack_size 0x800000". 23 24 With the Xcode 3.2 linker those flags produce two results: 25 26 (1) A load command to allocate the stack at the said location: 27 Load command 3 28 cmd LC_SEGMENT_64 29 cmdsize 72 30 segname __UNIXSTACK 31 vmaddr 0x0000000133800000 32 vmsize 0x0000000000800000 33 fileoff 2285568 34 filesize 0 35 maxprot 0x00000007 36 initprot 0x00000003 37 nsects 0 38 flags 0x0 39 40 (2) A request (in LC_UNIXTHREAD) to set %rsp to the correct value 41 at process startup, 0x134000000. 42 43 With Xcode 4.0.1, (1) is missing but (2) is still present. The 44 tool executable therefore starts up with %rsp pointing to unmapped 45 memory and faults almost instantly. 46 47 The workaround implemented by this program is documented in comment 48 8 of bug 267997, viz: 49 50 One really sick workaround is to observe that the executables 51 contain a redundant MachO load command: 52 53 Load command 2 54 cmd LC_SEGMENT_64 55 cmdsize 72 56 segname __LINKEDIT 57 vmaddr 0x0000000138dea000 58 vmsize 0x00000000000ad000 59 fileoff 2658304 60 filesize 705632 61 maxprot 0x00000007 62 initprot 0x00000001 63 nsects 0 64 flags 0x0 65 66 The described section presumably contains information intended for 67 the dynamic linker, but is irrelevant because this is a statically 68 linked executable. Hence it might be possible to postprocess the 69 executables after linking, to overwrite this entry with the 70 information that would have been in the missing __UNIXSTACK entry. 71 I tried this by hand (with a binary editor) earlier and got 72 something that worked. 73 */ 74 75 #define DEBUGPRINTING 0 76 77 #include <assert.h> 78 #include <stdlib.h> 79 #include <stdio.h> 80 #include <string.h> 81 #include <sys/mman.h> 82 #include <sys/stat.h> 83 #include <unistd.h> 84 #include <fcntl.h> 85 86 87 #undef PLAT_x86_darwin 88 #undef PLAT_amd64_darwin 89 90 #if defined(__APPLE__) && defined(__i386__) 91 # define PLAT_x86_darwin 1 92 #elif defined(__APPLE__) && defined(__x86_64__) 93 # define PLAT_amd64_darwin 1 94 #else 95 # error "Can't be compiled on this platform" 96 #endif 97 98 #include <mach-o/loader.h> 99 #include <mach-o/nlist.h> 100 #include <mach-o/fat.h> 101 #include <mach/i386/thread_status.h> 102 103 104 typedef unsigned char UChar; 105 typedef signed char Char; 106 typedef char HChar; /* signfulness depends on host */ 107 108 typedef unsigned int UInt; 109 typedef signed int Int; 110 111 typedef unsigned char Bool; 112 #define True ((Bool)1) 113 #define False ((Bool)0) 114 115 typedef unsigned long UWord; 116 117 typedef UWord SizeT; 118 typedef UWord Addr; 119 120 typedef unsigned long long int ULong; 121 typedef signed long long int Long; 122 123 124 125 __attribute__((noreturn)) 126 void fail ( HChar* msg ) 127 { 128 fprintf(stderr, "fixup_macho_loadcmds: fail: %s\n", msg); 129 exit(1); 130 } 131 132 133 /*------------------------------------------------------------*/ 134 /*--- ---*/ 135 /*--- Mach-O file mapping/unmapping helpers ---*/ 136 /*--- ---*/ 137 /*------------------------------------------------------------*/ 138 139 typedef 140 struct { 141 /* These two describe the entire mapped-in ("primary") image, 142 fat headers, kitchen sink, whatnot: the entire file. The 143 image is mapped into img[0 .. img_szB-1]. */ 144 UChar* img; 145 SizeT img_szB; 146 /* These two describe the Mach-O object of interest, which is 147 presumably somewhere inside the primary image. 148 map_image_aboard() below, which generates this info, will 149 carefully check that the macho_ fields denote a section of 150 memory that falls entirely inside img[0 .. img_szB-1]. */ 151 UChar* macho_img; 152 SizeT macho_img_szB; 153 } 154 ImageInfo; 155 156 157 Bool is_macho_object_file( const void* buf, SizeT szB ) 158 { 159 /* (JRS: the Mach-O headers might not be in this mapped data, 160 because we only mapped a page for this initial check, 161 or at least not very much, and what's at the start of the file 162 is in general a so-called fat header. The Mach-O object we're 163 interested in could be arbitrarily far along the image, and so 164 we can't assume its header will fall within this page.) */ 165 166 /* But we can say that either it's a fat object, in which case it 167 begins with a fat header, or it's unadorned Mach-O, in which 168 case it starts with a normal header. At least do what checks we 169 can to establish whether or not we're looking at something 170 sane. */ 171 172 const struct fat_header* fh_be = buf; 173 const struct mach_header_64* mh = buf; 174 175 assert(buf); 176 if (szB < sizeof(struct fat_header)) 177 return False; 178 if (ntohl(fh_be->magic) == FAT_MAGIC) 179 return True; 180 181 if (szB < sizeof(struct mach_header_64)) 182 return False; 183 if (mh->magic == MH_MAGIC_64) 184 return True; 185 186 return False; 187 } 188 189 190 /* Unmap an image mapped in by map_image_aboard. */ 191 static void unmap_image ( /*MOD*/ImageInfo* ii ) 192 { 193 Int r; 194 assert(ii->img); 195 assert(ii->img_szB > 0); 196 r = munmap( ii->img, ii->img_szB ); 197 /* Do we care if this fails? I suppose so; it would indicate 198 some fairly serious snafu with the mapping of the file. */ 199 assert( !r ); 200 memset(ii, 0, sizeof(*ii)); 201 } 202 203 204 /* Map a given fat or thin object aboard, find the thin part if 205 necessary, do some checks, and write details of both the fat and 206 thin parts into *ii. Returns 32 (and leaves the file unmapped) if 207 the thin part is a 32 bit file. Returns 64 if it's a 64 bit file. 208 Does not return on failure. Guarantees to return pointers to a 209 valid(ish) Mach-O image if it succeeds. */ 210 static Int map_image_aboard ( /*OUT*/ImageInfo* ii, HChar* filename ) 211 { 212 memset(ii, 0, sizeof(*ii)); 213 214 /* First off, try to map the thing in. */ 215 { SizeT size; 216 Int r, fd; 217 struct stat stat_buf; 218 219 r = stat(filename, &stat_buf); 220 if (r) 221 fail("Can't stat image (to determine its size)?!"); 222 size = stat_buf.st_size; 223 224 fd = open(filename, O_RDWR, 0); 225 if (fd == -1) 226 fail("Can't open image for possible modification!"); 227 if (DEBUGPRINTING) 228 printf("size %lu fd %d\n", size, fd); 229 void* v = mmap ( NULL, size, PROT_READ|PROT_WRITE, 230 MAP_FILE|MAP_SHARED, fd, 0 ); 231 if (v == MAP_FAILED) { 232 perror("mmap failed"); 233 fail("Can't mmap image for possible modification!"); 234 } 235 236 close(fd); 237 238 ii->img = (UChar*)v; 239 ii->img_szB = size; 240 } 241 242 /* Now it's mapped in and we have .img and .img_szB set. Look for 243 the embedded Mach-O object. If not findable, unmap and fail. */ 244 { struct fat_header* fh_be; 245 struct fat_header fh; 246 struct mach_header_64* mh; 247 248 // Assume initially that we have a thin image, and update 249 // these if it turns out to be fat. 250 ii->macho_img = ii->img; 251 ii->macho_img_szB = ii->img_szB; 252 253 // Check for fat header. 254 if (ii->img_szB < sizeof(struct fat_header)) 255 fail("Invalid Mach-O file (0 too small)."); 256 257 // Fat header is always BIG-ENDIAN 258 fh_be = (struct fat_header *)ii->img; 259 fh.magic = ntohl(fh_be->magic); 260 fh.nfat_arch = ntohl(fh_be->nfat_arch); 261 if (fh.magic == FAT_MAGIC) { 262 // Look for a good architecture. 263 struct fat_arch *arch_be; 264 struct fat_arch arch; 265 Int f; 266 if (ii->img_szB < sizeof(struct fat_header) 267 + fh.nfat_arch * sizeof(struct fat_arch)) 268 fail("Invalid Mach-O file (1 too small)."); 269 270 for (f = 0, arch_be = (struct fat_arch *)(fh_be+1); 271 f < fh.nfat_arch; 272 f++, arch_be++) { 273 Int cputype; 274 # if defined(PLAT_x86_darwin) 275 cputype = CPU_TYPE_X86; 276 # elif defined(PLAT_amd64_darwin) 277 cputype = CPU_TYPE_X86_64; 278 # else 279 # error "unknown architecture" 280 # endif 281 arch.cputype = ntohl(arch_be->cputype); 282 arch.cpusubtype = ntohl(arch_be->cpusubtype); 283 arch.offset = ntohl(arch_be->offset); 284 arch.size = ntohl(arch_be->size); 285 if (arch.cputype == cputype) { 286 if (ii->img_szB < arch.offset + arch.size) 287 fail("Invalid Mach-O file (2 too small)."); 288 ii->macho_img = ii->img + arch.offset; 289 ii->macho_img_szB = arch.size; 290 break; 291 } 292 } 293 if (f == fh.nfat_arch) 294 fail("No acceptable architecture found in fat file."); 295 } 296 297 /* Sanity check what we found. */ 298 299 /* assured by logic above */ 300 assert(ii->img_szB >= sizeof(struct fat_header)); 301 302 if (ii->macho_img_szB < sizeof(struct mach_header_64)) 303 fail("Invalid Mach-O file (3 too small)."); 304 305 if (ii->macho_img_szB > ii->img_szB) 306 fail("Invalid Mach-O file (thin bigger than fat)."); 307 308 if (ii->macho_img >= ii->img 309 && ii->macho_img + ii->macho_img_szB <= ii->img + ii->img_szB) { 310 /* thin entirely within fat, as expected */ 311 } else { 312 fail("Invalid Mach-O file (thin not inside fat)."); 313 } 314 315 mh = (struct mach_header_64 *)ii->macho_img; 316 if (mh->magic == MH_MAGIC) { 317 assert(ii->img); 318 assert(ii->macho_img); 319 assert(ii->img_szB > 0); 320 assert(ii->macho_img_szB > 0); 321 assert(ii->macho_img >= ii->img); 322 assert(ii->macho_img + ii->macho_img_szB <= ii->img + ii->img_szB); 323 return 32; 324 } 325 if (mh->magic != MH_MAGIC_64) 326 fail("Invalid Mach-O file (bad magic)."); 327 328 if (ii->macho_img_szB < sizeof(struct mach_header_64) + mh->sizeofcmds) 329 fail("Invalid Mach-O file (4 too small)."); 330 } 331 332 assert(ii->img); 333 assert(ii->macho_img); 334 assert(ii->img_szB > 0); 335 assert(ii->macho_img_szB > 0); 336 assert(ii->macho_img >= ii->img); 337 assert(ii->macho_img + ii->macho_img_szB <= ii->img + ii->img_szB); 338 return 64; 339 } 340 341 342 /*------------------------------------------------------------*/ 343 /*--- ---*/ 344 /*--- Mach-O top-level processing ---*/ 345 /*--- ---*/ 346 /*------------------------------------------------------------*/ 347 348 void modify_macho_loadcmds ( HChar* filename, 349 ULong expected_stack_start, 350 ULong expected_stack_size ) 351 { 352 ImageInfo ii; 353 memset(&ii, 0, sizeof(ii)); 354 355 Int size = map_image_aboard( &ii, filename ); 356 if (size == 32) { 357 fprintf(stderr, "fixup_macho_loadcmds: Is 32-bit MachO file;" 358 " no modifications needed.\n"); 359 goto out; 360 } 361 362 assert(size == 64); 363 364 assert(ii.macho_img != NULL && ii.macho_img_szB > 0); 365 366 /* Poke around in the Mach-O header, to find some important 367 stuff. 368 * the location of the __UNIXSTACK load command, if any 369 * the location of the __LINKEDIT load command, if any 370 * the initial RSP value as stated in the LC_UNIXTHREAD 371 */ 372 373 /* The collected data */ 374 ULong init_rsp = 0; 375 Bool have_rsp = False; 376 struct segment_command_64* seg__unixstack = NULL; 377 struct segment_command_64* seg__linkedit = NULL; 378 379 /* Loop over the load commands and fill in the above 4 variables. */ 380 381 { struct mach_header_64 *mh = (struct mach_header_64 *)ii.macho_img; 382 struct load_command *cmd; 383 Int c; 384 385 for (c = 0, cmd = (struct load_command *)(mh+1); 386 c < mh->ncmds; 387 c++, cmd = (struct load_command *)(cmd->cmdsize 388 + (unsigned long)cmd)) { 389 if (DEBUGPRINTING) 390 printf("load cmd: offset %4lu size %3d kind %2d = ", 391 (unsigned long)((UChar*)cmd - (UChar*)ii.macho_img), 392 cmd->cmdsize, cmd->cmd); 393 394 switch (cmd->cmd) { 395 case LC_SEGMENT_64: 396 if (DEBUGPRINTING) 397 printf("LC_SEGMENT_64"); 398 break; 399 case LC_SYMTAB: 400 if (DEBUGPRINTING) 401 printf("LC_SYMTAB"); 402 break; 403 case LC_DYSYMTAB: 404 if (DEBUGPRINTING) 405 printf("LC_DYSYMTAB"); 406 break; 407 case LC_UUID: 408 if (DEBUGPRINTING) 409 printf("LC_UUID"); 410 break; 411 case LC_UNIXTHREAD: 412 if (DEBUGPRINTING) 413 printf("LC_UNIXTHREAD"); 414 break; 415 default: 416 printf("???"); 417 fail("unexpected load command in Mach header"); 418 break; 419 } 420 if (DEBUGPRINTING) 421 printf("\n"); 422 423 /* Note what the stated initial RSP value is, so we can 424 check it is as expected. */ 425 if (cmd->cmd == LC_UNIXTHREAD) { 426 struct thread_command* tcmd = (struct thread_command*)cmd; 427 UInt* w32s = (UInt*)( (UChar*)tcmd + sizeof(*tcmd) ); 428 if (DEBUGPRINTING) 429 printf("UnixThread: flavor %u = ", w32s[0]); 430 if (w32s[0] == x86_THREAD_STATE64 && !have_rsp) { 431 if (DEBUGPRINTING) 432 printf("x86_THREAD_STATE64\n"); 433 x86_thread_state64_t* state64 434 = (x86_thread_state64_t*)(&w32s[2]); 435 have_rsp = True; 436 init_rsp = state64->__rsp; 437 if (DEBUGPRINTING) 438 printf("rsp = 0x%llx\n", init_rsp); 439 } else { 440 if (DEBUGPRINTING) 441 printf("???"); 442 } 443 if (DEBUGPRINTING) 444 printf("\n"); 445 } 446 447 if (cmd->cmd == LC_SEGMENT_64) { 448 struct segment_command_64 *seg = (struct segment_command_64 *)cmd; 449 if (0 == strcmp(seg->segname, "__LINKEDIT")) 450 seg__linkedit = seg; 451 if (0 == strcmp(seg->segname, "__UNIXSTACK")) 452 seg__unixstack = seg; 453 } 454 455 } 456 } 457 458 /* 459 Actions are then as follows: 460 461 * (always) check the RSP value is as expected, and abort if not 462 463 * if there's a UNIXSTACK load command, check it is as expected. 464 If not abort, if yes, do nothing more. 465 466 * (so there's no UNIXSTACK load command). if there's a LINKEDIT 467 load command, check if it is minimally usable (has 0 for 468 nsects and flags). If yes, convert it to a UNIXSTACK load 469 command. If there is none, or is unusable, then we're out of 470 options and have to abort. 471 */ 472 if (!have_rsp) 473 fail("Can't find / check initial RSP setting"); 474 if (init_rsp != expected_stack_start + expected_stack_size) 475 fail("Initial RSP value not as expected"); 476 477 fprintf(stderr, "fixup_macho_loadcmds: " 478 "initial RSP is as expected (0x%llx)\n", 479 expected_stack_start + expected_stack_size ); 480 481 if (seg__unixstack) { 482 struct segment_command_64 *seg = seg__unixstack; 483 if (seg->vmaddr != expected_stack_start) 484 fail("has __UNIXSTACK, but wrong ::vmaddr"); 485 if (seg->vmsize != expected_stack_size) 486 fail("has __UNIXSTACK, but wrong ::vmsize"); 487 if (seg->maxprot != 7) 488 fail("has __UNIXSTACK, but wrong ::maxprot (should be 7)"); 489 if (seg->initprot != 3) 490 fail("has __UNIXSTACK, but wrong ::initprot (should be 3)"); 491 if (seg->nsects != 0) 492 fail("has __UNIXSTACK, but wrong ::nsects (should be 0)"); 493 if (seg->flags != 0) 494 fail("has __UNIXSTACK, but wrong ::flags (should be 0)"); 495 /* looks ok */ 496 fprintf(stderr, "fixup_macho_loadcmds: " 497 "acceptable __UNIXSTACK present; no modifications.\n" ); 498 goto out; 499 } 500 501 if (seg__linkedit) { 502 struct segment_command_64 *seg = seg__linkedit; 503 if (seg->nsects != 0) 504 fail("has __LINKEDIT, but wrong ::nsects (should be 0)"); 505 if (seg->flags != 0) 506 fail("has __LINKEDIT, but wrong ::flags (should be 0)"); 507 fprintf(stderr, "fixup_macho_loadcmds: " 508 "no __UNIXSTACK present.\n" ); 509 fprintf(stderr, "fixup_macho_loadcmds: " 510 "converting __LINKEDIT to __UNIXSTACK.\n" ); 511 strcpy(seg->segname, "__UNIXSTACK"); 512 seg->vmaddr = expected_stack_start; 513 seg->vmsize = expected_stack_size; 514 seg->fileoff = 0; 515 seg->filesize = 0; 516 seg->maxprot = 7; 517 seg->initprot = 3; 518 /* success */ 519 goto out; 520 } 521 522 /* out of options */ 523 fail("no __UNIXSTACK found and no usable __LINKEDIT found; " 524 "out of options."); 525 /* NOTREACHED */ 526 527 out: 528 if (ii.img) 529 unmap_image(&ii); 530 } 531 532 533 static Bool is_plausible_tool_exe_name ( HChar* nm ) 534 { 535 HChar* p; 536 if (!nm) 537 return False; 538 539 // Does it end with this string? 540 p = strstr(nm, "-x86-darwin"); 541 if (p && 0 == strcmp(p, "-x86-darwin")) 542 return True; 543 544 p = strstr(nm, "-amd64-darwin"); 545 if (p && 0 == strcmp(p, "-amd64-darwin")) 546 return True; 547 548 return False; 549 } 550 551 552 int main ( int argc, char** argv ) 553 { 554 Int r; 555 ULong req_stack_addr = 0; 556 ULong req_stack_size = 0; 557 558 if (argc != 4) 559 fail("args: -stack_addr-arg -stack_size-arg " 560 "name-of-tool-executable-to-modify"); 561 562 r= sscanf(argv[1], "0x%llx", &req_stack_addr); 563 if (r != 1) fail("invalid stack_addr arg"); 564 565 r= sscanf(argv[2], "0x%llx", &req_stack_size); 566 if (r != 1) fail("invalid stack_size arg"); 567 568 fprintf(stderr, "fixup_macho_loadcmds: " 569 "requested stack_addr (top) 0x%llx, " 570 "stack_size 0x%llx\n", req_stack_addr, req_stack_size ); 571 572 if (!is_plausible_tool_exe_name(argv[3])) 573 fail("implausible tool exe name -- not of the form *-{x86,amd64}-darwin"); 574 575 fprintf(stderr, "fixup_macho_loadcmds: examining tool exe: %s\n", 576 argv[3] ); 577 modify_macho_loadcmds( argv[3], req_stack_addr - req_stack_size, 578 req_stack_size ); 579 580 return 0; 581 } 582 583 /* 584 cmd LC_SEGMENT_64 585 cmdsize 72 586 segname __LINKEDIT 587 vmaddr 0x0000000138dea000 588 vmsize 0x00000000000ad000 589 fileoff 2658304 590 filesize 705632 591 maxprot 0x00000007 592 initprot 0x00000001 593 nsects 0 594 flags 0x0 595 */ 596 597 /* 598 cmd LC_SEGMENT_64 599 cmdsize 72 600 segname __UNIXSTACK 601 vmaddr 0x0000000133800000 602 vmsize 0x0000000000800000 603 fileoff 2498560 604 filesize 0 605 maxprot 0x00000007 606 initprot 0x00000003 607 nsects 0 608 flags 0x0 609 */ 610