Home | History | Annotate | Download | only in coregrind
      1 
      2 /* Derived from Valgrind sources, coregrind/m_debuginfo/readmacho.c.
      3    GPL 2+ therefore.
      4 
      5    Can be compiled as either a 32- or 64-bit program (doesn't matter).
      6 */
      7 
      8 /* What does this program do?  In short it postprocesses tool
      9    executables on MacOSX, after linking using /usr/bin/ld.  This is so
     10    as to work around a bug in the linker on Xcode 4.0.0 and Xcode
     11    4.0.1.  Xcode versions prior to 4.0.0 are unaffected.
     12 
     13    The tracking bug is https://bugs.kde.org/show_bug.cgi?id=267997
     14 
     15    The bug causes 64-bit tool executables to segfault at startup,
     16    because:
     17 
     18    Comparing the MachO load commands vs a (working) tool executable
     19    that was created by Xcode 3.2.x, it appears that the new linker has
     20    partially ignored the build system's request to place the tool
     21    executable's stack at a non standard location.  The build system
     22    tells the linker "-stack_addr 0x134000000 -stack_size 0x800000".
     23 
     24    With the Xcode 3.2 linker those flags produce two results:
     25 
     26    (1) A load command to allocate the stack at the said location:
     27           Load command 3
     28                 cmd LC_SEGMENT_64
     29             cmdsize 72
     30             segname __UNIXSTACK
     31              vmaddr 0x0000000133800000
     32              vmsize 0x0000000000800000
     33             fileoff 2285568
     34            filesize 0
     35             maxprot 0x00000007
     36            initprot 0x00000003
     37              nsects 0
     38               flags 0x0
     39 
     40    (2) A request (in LC_UNIXTHREAD) to set %rsp to the correct value
     41        at process startup, 0x134000000.
     42 
     43    With Xcode 4.0.1, (1) is missing but (2) is still present.  The
     44    tool executable therefore starts up with %rsp pointing to unmapped
     45    memory and faults almost instantly.
     46 
     47    The workaround implemented by this program is documented in comment
     48    8 of bug 267997, viz:
     49 
     50    One really sick workaround is to observe that the executables
     51    contain a redundant MachO load command:
     52 
     53       Load command 2
     54             cmd LC_SEGMENT_64
     55         cmdsize 72
     56         segname __LINKEDIT
     57          vmaddr 0x0000000138dea000
     58          vmsize 0x00000000000ad000
     59         fileoff 2658304
     60        filesize 705632
     61         maxprot 0x00000007
     62        initprot 0x00000001
     63          nsects 0
     64           flags 0x0
     65 
     66    The described section presumably contains information intended for
     67    the dynamic linker, but is irrelevant because this is a statically
     68    linked executable.  Hence it might be possible to postprocess the
     69    executables after linking, to overwrite this entry with the
     70    information that would have been in the missing __UNIXSTACK entry.
     71    I tried this by hand (with a binary editor) earlier and got
     72    something that worked.
     73 */
     74 
     75 #define DEBUGPRINTING 0
     76 
     77 #include <assert.h>
     78 #include <stdlib.h>
     79 #include <stdio.h>
     80 #include <string.h>
     81 #include <sys/mman.h>
     82 #include <sys/stat.h>
     83 #include <unistd.h>
     84 #include <fcntl.h>
     85 
     86 
     87 #undef PLAT_x86_darwin
     88 #undef PLAT_amd64_darwin
     89 
     90 #if defined(__APPLE__) && defined(__i386__)
     91 #  define PLAT_x86_darwin 1
     92 #elif defined(__APPLE__) && defined(__x86_64__)
     93 #  define PLAT_amd64_darwin 1
     94 #else
     95 #  error "Can't be compiled on this platform"
     96 #endif
     97 
     98 #include <mach-o/loader.h>
     99 #include <mach-o/nlist.h>
    100 #include <mach-o/fat.h>
    101 #include <mach/i386/thread_status.h>
    102 
    103 
    104 typedef  unsigned char   UChar;
    105 typedef    signed char   Char;
    106 typedef           char   HChar; /* signfulness depends on host */
    107 
    108 typedef  unsigned int    UInt;
    109 typedef    signed int    Int;
    110 
    111 typedef  unsigned char   Bool;
    112 #define  True   ((Bool)1)
    113 #define  False  ((Bool)0)
    114 
    115 typedef  unsigned long   UWord;
    116 
    117 typedef  UWord           SizeT;
    118 typedef  UWord           Addr;
    119 
    120 typedef  unsigned long long int   ULong;
    121 typedef    signed long long int   Long;
    122 
    123 
    124 
    125 __attribute__((noreturn))
    126 void fail ( HChar* msg )
    127 {
    128    fprintf(stderr, "fixup_macho_loadcmds: fail: %s\n", msg);
    129    exit(1);
    130 }
    131 
    132 
    133 /*------------------------------------------------------------*/
    134 /*---                                                      ---*/
    135 /*--- Mach-O file mapping/unmapping helpers                ---*/
    136 /*---                                                      ---*/
    137 /*------------------------------------------------------------*/
    138 
    139 typedef
    140    struct {
    141       /* These two describe the entire mapped-in ("primary") image,
    142          fat headers, kitchen sink, whatnot: the entire file.  The
    143          image is mapped into img[0 .. img_szB-1]. */
    144       UChar* img;
    145       SizeT  img_szB;
    146       /* These two describe the Mach-O object of interest, which is
    147          presumably somewhere inside the primary image.
    148          map_image_aboard() below, which generates this info, will
    149          carefully check that the macho_ fields denote a section of
    150          memory that falls entirely inside img[0 .. img_szB-1]. */
    151       UChar* macho_img;
    152       SizeT  macho_img_szB;
    153    }
    154    ImageInfo;
    155 
    156 
    157 Bool is_macho_object_file( const void* buf, SizeT szB )
    158 {
    159    /* (JRS: the Mach-O headers might not be in this mapped data,
    160       because we only mapped a page for this initial check,
    161       or at least not very much, and what's at the start of the file
    162       is in general a so-called fat header.  The Mach-O object we're
    163       interested in could be arbitrarily far along the image, and so
    164       we can't assume its header will fall within this page.) */
    165 
    166    /* But we can say that either it's a fat object, in which case it
    167       begins with a fat header, or it's unadorned Mach-O, in which
    168       case it starts with a normal header.  At least do what checks we
    169       can to establish whether or not we're looking at something
    170       sane. */
    171 
    172    const struct fat_header*  fh_be = buf;
    173    const struct mach_header_64* mh    = buf;
    174 
    175    assert(buf);
    176    if (szB < sizeof(struct fat_header))
    177       return False;
    178    if (ntohl(fh_be->magic) == FAT_MAGIC)
    179       return True;
    180 
    181    if (szB < sizeof(struct mach_header_64))
    182       return False;
    183    if (mh->magic == MH_MAGIC_64)
    184       return True;
    185 
    186    return False;
    187 }
    188 
    189 
    190 /* Unmap an image mapped in by map_image_aboard. */
    191 static void unmap_image ( /*MOD*/ImageInfo* ii )
    192 {
    193    Int r;
    194    assert(ii->img);
    195    assert(ii->img_szB > 0);
    196    r = munmap( ii->img, ii->img_szB );
    197    /* Do we care if this fails?  I suppose so; it would indicate
    198       some fairly serious snafu with the mapping of the file. */
    199    assert( !r );
    200    memset(ii, 0, sizeof(*ii));
    201 }
    202 
    203 
    204 /* Map a given fat or thin object aboard, find the thin part if
    205    necessary, do some checks, and write details of both the fat and
    206    thin parts into *ii.  Returns 32 (and leaves the file unmapped) if
    207    the thin part is a 32 bit file.  Returns 64 if it's a 64 bit file.
    208    Does not return on failure.  Guarantees to return pointers to a
    209    valid(ish) Mach-O image if it succeeds. */
    210 static Int map_image_aboard ( /*OUT*/ImageInfo* ii, HChar* filename )
    211 {
    212    memset(ii, 0, sizeof(*ii));
    213 
    214    /* First off, try to map the thing in. */
    215    { SizeT  size;
    216      Int r, fd;
    217      struct stat stat_buf;
    218 
    219      r = stat(filename, &stat_buf);
    220      if (r)
    221         fail("Can't stat image (to determine its size)?!");
    222      size = stat_buf.st_size;
    223 
    224      fd = open(filename, O_RDWR, 0);
    225      if (fd == -1)
    226         fail("Can't open image for possible modification!");
    227      if (DEBUGPRINTING)
    228         printf("size %lu fd %d\n", size, fd);
    229      void* v = mmap ( NULL, size, PROT_READ|PROT_WRITE,
    230                                   MAP_FILE|MAP_SHARED, fd, 0 );
    231      if (v == MAP_FAILED) {
    232         perror("mmap failed");
    233         fail("Can't mmap image for possible modification!");
    234      }
    235 
    236      close(fd);
    237 
    238      ii->img     = (UChar*)v;
    239      ii->img_szB = size;
    240    }
    241 
    242    /* Now it's mapped in and we have .img and .img_szB set.  Look for
    243       the embedded Mach-O object.  If not findable, unmap and fail. */
    244    { struct fat_header*  fh_be;
    245      struct fat_header   fh;
    246      struct mach_header_64* mh;
    247 
    248      // Assume initially that we have a thin image, and update
    249      // these if it turns out to be fat.
    250      ii->macho_img     = ii->img;
    251      ii->macho_img_szB = ii->img_szB;
    252 
    253      // Check for fat header.
    254      if (ii->img_szB < sizeof(struct fat_header))
    255         fail("Invalid Mach-O file (0 too small).");
    256 
    257      // Fat header is always BIG-ENDIAN
    258      fh_be = (struct fat_header *)ii->img;
    259      fh.magic = ntohl(fh_be->magic);
    260      fh.nfat_arch = ntohl(fh_be->nfat_arch);
    261      if (fh.magic == FAT_MAGIC) {
    262         // Look for a good architecture.
    263         struct fat_arch *arch_be;
    264         struct fat_arch arch;
    265         Int f;
    266         if (ii->img_szB < sizeof(struct fat_header)
    267                           + fh.nfat_arch * sizeof(struct fat_arch))
    268            fail("Invalid Mach-O file (1 too small).");
    269 
    270         for (f = 0, arch_be = (struct fat_arch *)(fh_be+1);
    271              f < fh.nfat_arch;
    272              f++, arch_be++) {
    273            Int cputype;
    274 #          if defined(PLAT_x86_darwin)
    275            cputype = CPU_TYPE_X86;
    276 #          elif defined(PLAT_amd64_darwin)
    277            cputype = CPU_TYPE_X86_64;
    278 #          else
    279 #            error "unknown architecture"
    280 #          endif
    281            arch.cputype    = ntohl(arch_be->cputype);
    282            arch.cpusubtype = ntohl(arch_be->cpusubtype);
    283            arch.offset     = ntohl(arch_be->offset);
    284            arch.size       = ntohl(arch_be->size);
    285            if (arch.cputype == cputype) {
    286               if (ii->img_szB < arch.offset + arch.size)
    287                  fail("Invalid Mach-O file (2 too small).");
    288               ii->macho_img     = ii->img + arch.offset;
    289               ii->macho_img_szB = arch.size;
    290               break;
    291            }
    292         }
    293         if (f == fh.nfat_arch)
    294            fail("No acceptable architecture found in fat file.");
    295      }
    296 
    297      /* Sanity check what we found. */
    298 
    299      /* assured by logic above */
    300      assert(ii->img_szB >= sizeof(struct fat_header));
    301 
    302      if (ii->macho_img_szB < sizeof(struct mach_header_64))
    303         fail("Invalid Mach-O file (3 too small).");
    304 
    305      if (ii->macho_img_szB > ii->img_szB)
    306         fail("Invalid Mach-O file (thin bigger than fat).");
    307 
    308      if (ii->macho_img >= ii->img
    309          && ii->macho_img + ii->macho_img_szB <= ii->img + ii->img_szB) {
    310         /* thin entirely within fat, as expected */
    311      } else {
    312         fail("Invalid Mach-O file (thin not inside fat).");
    313      }
    314 
    315      mh = (struct mach_header_64 *)ii->macho_img;
    316      if (mh->magic == MH_MAGIC) {
    317         assert(ii->img);
    318         assert(ii->macho_img);
    319         assert(ii->img_szB > 0);
    320         assert(ii->macho_img_szB > 0);
    321         assert(ii->macho_img >= ii->img);
    322         assert(ii->macho_img + ii->macho_img_szB <= ii->img + ii->img_szB);
    323         return 32;
    324      }
    325      if (mh->magic != MH_MAGIC_64)
    326         fail("Invalid Mach-O file (bad magic).");
    327 
    328      if (ii->macho_img_szB < sizeof(struct mach_header_64) + mh->sizeofcmds)
    329         fail("Invalid Mach-O file (4 too small).");
    330    }
    331 
    332    assert(ii->img);
    333    assert(ii->macho_img);
    334    assert(ii->img_szB > 0);
    335    assert(ii->macho_img_szB > 0);
    336    assert(ii->macho_img >= ii->img);
    337    assert(ii->macho_img + ii->macho_img_szB <= ii->img + ii->img_szB);
    338    return 64;
    339 }
    340 
    341 
    342 /*------------------------------------------------------------*/
    343 /*---                                                      ---*/
    344 /*--- Mach-O top-level processing                          ---*/
    345 /*---                                                      ---*/
    346 /*------------------------------------------------------------*/
    347 
    348 void modify_macho_loadcmds ( HChar* filename,
    349                              ULong  expected_stack_start,
    350                              ULong  expected_stack_size )
    351 {
    352    ImageInfo ii;
    353    memset(&ii, 0, sizeof(ii));
    354 
    355    Int size = map_image_aboard( &ii, filename );
    356    if (size == 32) {
    357       fprintf(stderr, "fixup_macho_loadcmds:   Is 32-bit MachO file;"
    358               " no modifications needed.\n");
    359       goto out;
    360    }
    361 
    362    assert(size == 64);
    363 
    364    assert(ii.macho_img != NULL && ii.macho_img_szB > 0);
    365 
    366    /* Poke around in the Mach-O header, to find some important
    367       stuff.
    368       * the location of the __UNIXSTACK load command, if any
    369       * the location of the __LINKEDIT load command, if any
    370       * the initial RSP value as stated in the LC_UNIXTHREAD
    371    */
    372 
    373    /* The collected data */
    374    ULong init_rsp = 0;
    375    Bool  have_rsp = False;
    376    struct segment_command_64* seg__unixstack = NULL;
    377    struct segment_command_64* seg__linkedit  = NULL;
    378 
    379    /* Loop over the load commands and fill in the above 4 variables. */
    380 
    381    { struct mach_header_64 *mh = (struct mach_header_64 *)ii.macho_img;
    382       struct load_command *cmd;
    383       Int c;
    384 
    385       for (c = 0, cmd = (struct load_command *)(mh+1);
    386            c < mh->ncmds;
    387            c++, cmd = (struct load_command *)(cmd->cmdsize
    388                                               + (unsigned long)cmd)) {
    389          if (DEBUGPRINTING)
    390             printf("load cmd: offset %4lu   size %3d   kind %2d = ",
    391                    (unsigned long)((UChar*)cmd - (UChar*)ii.macho_img),
    392                    cmd->cmdsize, cmd->cmd);
    393 
    394          switch (cmd->cmd) {
    395             case LC_SEGMENT_64:
    396                if (DEBUGPRINTING)
    397                   printf("LC_SEGMENT_64");
    398                break;
    399             case LC_SYMTAB:
    400                if (DEBUGPRINTING)
    401                   printf("LC_SYMTAB");
    402                break;
    403             case LC_DYSYMTAB:
    404                if (DEBUGPRINTING)
    405                   printf("LC_DYSYMTAB");
    406                break;
    407             case LC_UUID:
    408                if (DEBUGPRINTING)
    409                   printf("LC_UUID");
    410                break;
    411             case LC_UNIXTHREAD:
    412                if (DEBUGPRINTING)
    413                   printf("LC_UNIXTHREAD");
    414                break;
    415             default:
    416                   printf("???");
    417                fail("unexpected load command in Mach header");
    418             break;
    419          }
    420          if (DEBUGPRINTING)
    421             printf("\n");
    422 
    423          /* Note what the stated initial RSP value is, so we can
    424             check it is as expected. */
    425          if (cmd->cmd == LC_UNIXTHREAD) {
    426             struct thread_command* tcmd = (struct thread_command*)cmd;
    427             UInt* w32s = (UInt*)( (UChar*)tcmd + sizeof(*tcmd) );
    428             if (DEBUGPRINTING)
    429                printf("UnixThread: flavor %u = ", w32s[0]);
    430             if (w32s[0] == x86_THREAD_STATE64 && !have_rsp) {
    431                if (DEBUGPRINTING)
    432                   printf("x86_THREAD_STATE64\n");
    433                x86_thread_state64_t* state64
    434                   = (x86_thread_state64_t*)(&w32s[2]);
    435                have_rsp = True;
    436                init_rsp = state64->__rsp;
    437                if (DEBUGPRINTING)
    438                   printf("rsp = 0x%llx\n", init_rsp);
    439             } else {
    440                if (DEBUGPRINTING)
    441                   printf("???");
    442             }
    443             if (DEBUGPRINTING)
    444                printf("\n");
    445          }
    446 
    447          if (cmd->cmd == LC_SEGMENT_64) {
    448             struct segment_command_64 *seg = (struct segment_command_64 *)cmd;
    449             if (0 == strcmp(seg->segname, "__LINKEDIT"))
    450                seg__linkedit = seg;
    451             if (0 == strcmp(seg->segname, "__UNIXSTACK"))
    452                seg__unixstack = seg;
    453          }
    454 
    455       }
    456    }
    457 
    458    /*
    459       Actions are then as follows:
    460 
    461       * (always) check the RSP value is as expected, and abort if not
    462 
    463       * if there's a UNIXSTACK load command, check it is as expected.
    464         If not abort, if yes, do nothing more.
    465 
    466       * (so there's no UNIXSTACK load command).  if there's a LINKEDIT
    467         load command, check if it is minimally usable (has 0 for
    468         nsects and flags).  If yes, convert it to a UNIXSTACK load
    469         command.  If there is none, or is unusable, then we're out of
    470         options and have to abort.
    471    */
    472    if (!have_rsp)
    473       fail("Can't find / check initial RSP setting");
    474    if (init_rsp != expected_stack_start + expected_stack_size)
    475       fail("Initial RSP value not as expected");
    476 
    477    fprintf(stderr, "fixup_macho_loadcmds:   "
    478                    "initial RSP is as expected (0x%llx)\n",
    479                    expected_stack_start + expected_stack_size );
    480 
    481    if (seg__unixstack) {
    482       struct segment_command_64 *seg = seg__unixstack;
    483       if (seg->vmaddr != expected_stack_start)
    484          fail("has __UNIXSTACK, but wrong ::vmaddr");
    485       if (seg->vmsize != expected_stack_size)
    486          fail("has __UNIXSTACK, but wrong ::vmsize");
    487       if (seg->maxprot != 7)
    488          fail("has __UNIXSTACK, but wrong ::maxprot (should be 7)");
    489       if (seg->initprot != 3)
    490          fail("has __UNIXSTACK, but wrong ::initprot (should be 3)");
    491       if (seg->nsects != 0)
    492          fail("has __UNIXSTACK, but wrong ::nsects (should be 0)");
    493       if (seg->flags != 0)
    494          fail("has __UNIXSTACK, but wrong ::flags (should be 0)");
    495       /* looks ok */
    496       fprintf(stderr, "fixup_macho_loadcmds:   "
    497               "acceptable __UNIXSTACK present; no modifications.\n" );
    498       goto out;
    499    }
    500 
    501    if (seg__linkedit) {
    502       struct segment_command_64 *seg = seg__linkedit;
    503       if (seg->nsects != 0)
    504          fail("has __LINKEDIT, but wrong ::nsects (should be 0)");
    505       if (seg->flags != 0)
    506          fail("has __LINKEDIT, but wrong ::flags (should be 0)");
    507       fprintf(stderr, "fixup_macho_loadcmds:   "
    508               "no __UNIXSTACK present.\n" );
    509       fprintf(stderr, "fixup_macho_loadcmds:   "
    510               "converting __LINKEDIT to __UNIXSTACK.\n" );
    511       strcpy(seg->segname, "__UNIXSTACK");
    512       seg->vmaddr   = expected_stack_start;
    513       seg->vmsize   = expected_stack_size;
    514       seg->fileoff  = 0;
    515       seg->filesize = 0;
    516       seg->maxprot  = 7;
    517       seg->initprot = 3;
    518       /* success */
    519       goto out;
    520    }
    521 
    522    /* out of options */
    523    fail("no __UNIXSTACK found and no usable __LINKEDIT found; "
    524         "out of options.");
    525    /* NOTREACHED */
    526 
    527   out:
    528    if (ii.img)
    529       unmap_image(&ii);
    530 }
    531 
    532 
    533 static Bool is_plausible_tool_exe_name ( HChar* nm )
    534 {
    535    HChar* p;
    536    if (!nm)
    537       return False;
    538 
    539    // Does it end with this string?
    540    p = strstr(nm, "-x86-darwin");
    541    if (p && 0 == strcmp(p, "-x86-darwin"))
    542       return True;
    543 
    544    p = strstr(nm, "-amd64-darwin");
    545    if (p && 0 == strcmp(p, "-amd64-darwin"))
    546       return True;
    547 
    548    return False;
    549 }
    550 
    551 
    552 int main ( int argc, char** argv )
    553 {
    554    Int   r;
    555    ULong req_stack_addr = 0;
    556    ULong req_stack_size = 0;
    557 
    558    if (argc != 4)
    559       fail("args: -stack_addr-arg -stack_size-arg "
    560            "name-of-tool-executable-to-modify");
    561 
    562    r= sscanf(argv[1], "0x%llx", &req_stack_addr);
    563    if (r != 1) fail("invalid stack_addr arg");
    564 
    565    r= sscanf(argv[2], "0x%llx", &req_stack_size);
    566    if (r != 1) fail("invalid stack_size arg");
    567 
    568    fprintf(stderr, "fixup_macho_loadcmds: "
    569            "requested stack_addr (top) 0x%llx, "
    570            "stack_size 0x%llx\n", req_stack_addr, req_stack_size );
    571 
    572    if (!is_plausible_tool_exe_name(argv[3]))
    573       fail("implausible tool exe name -- not of the form *-{x86,amd64}-darwin");
    574 
    575    fprintf(stderr, "fixup_macho_loadcmds: examining tool exe: %s\n",
    576            argv[3] );
    577    modify_macho_loadcmds( argv[3], req_stack_addr - req_stack_size,
    578                           req_stack_size );
    579 
    580    return 0;
    581 }
    582 
    583 /*
    584       cmd LC_SEGMENT_64
    585   cmdsize 72
    586   segname __LINKEDIT
    587    vmaddr 0x0000000138dea000
    588    vmsize 0x00000000000ad000
    589   fileoff 2658304
    590  filesize 705632
    591   maxprot 0x00000007
    592  initprot 0x00000001
    593    nsects 0
    594     flags 0x0
    595 */
    596 
    597 /*
    598       cmd LC_SEGMENT_64
    599   cmdsize 72
    600   segname __UNIXSTACK
    601    vmaddr 0x0000000133800000
    602    vmsize 0x0000000000800000
    603   fileoff 2498560
    604  filesize 0
    605   maxprot 0x00000007
    606  initprot 0x00000003
    607    nsects 0
    608     flags 0x0
    609 */
    610