Home | History | Annotate | Download | only in mach_override
      1 /*******************************************************************************
      2 	mach_override.c
      3 		Copyright (c) 2003-2009 Jonathan 'Wolf' Rentzsch: <http://rentzsch.com>
      4 		Some rights reserved: <http://opensource.org/licenses/mit-license.php>
      5 
      6 	***************************************************************************/
      7 #ifdef __APPLE__
      8 
      9 #include "mach_override.h"
     10 
     11 #include <mach-o/dyld.h>
     12 #include <mach/mach_host.h>
     13 #include <mach/mach_init.h>
     14 #include <mach/vm_map.h>
     15 #include <sys/mman.h>
     16 
     17 #include <CoreServices/CoreServices.h>
     18 
     19 //#define DEBUG_DISASM 1
     20 #undef DEBUG_DISASM
     21 
     22 /**************************
     23 *
     24 *	Constants
     25 *
     26 **************************/
     27 #pragma mark	-
     28 #pragma mark	(Constants)
     29 
     30 #if defined(__ppc__) || defined(__POWERPC__)
     31 
     32 long kIslandTemplate[] = {
     33 	0x9001FFFC,	//	stw		r0,-4(SP)
     34 	0x3C00DEAD,	//	lis		r0,0xDEAD
     35 	0x6000BEEF,	//	ori		r0,r0,0xBEEF
     36 	0x7C0903A6,	//	mtctr	r0
     37 	0x8001FFFC,	//	lwz		r0,-4(SP)
     38 	0x60000000,	//	nop		; optionally replaced
     39 	0x4E800420 	//	bctr
     40 };
     41 
     42 #define kAddressHi			3
     43 #define kAddressLo			5
     44 #define kInstructionHi		10
     45 #define kInstructionLo		11
     46 
     47 #elif defined(__i386__)
     48 
     49 #define kOriginalInstructionsSize 16
     50 
     51 char kIslandTemplate[] = {
     52 	// kOriginalInstructionsSize nop instructions so that we
     53 	// should have enough space to host original instructions
     54 	0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
     55 	0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
     56 	// Now the real jump instruction
     57 	0xE9, 0xEF, 0xBE, 0xAD, 0xDE
     58 };
     59 
     60 #define kInstructions	0
     61 #define kJumpAddress    kInstructions + kOriginalInstructionsSize + 1
     62 #elif defined(__x86_64__)
     63 
     64 #define kOriginalInstructionsSize 32
     65 
     66 #define kJumpAddress    kOriginalInstructionsSize + 6
     67 
     68 char kIslandTemplate[] = {
     69 	// kOriginalInstructionsSize nop instructions so that we
     70 	// should have enough space to host original instructions
     71 	0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
     72 	0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
     73 	0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
     74 	0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
     75 	// Now the real jump instruction
     76 	0xFF, 0x25, 0x00, 0x00, 0x00, 0x00,
     77         0x00, 0x00, 0x00, 0x00,
     78         0x00, 0x00, 0x00, 0x00
     79 };
     80 
     81 #endif
     82 
     83 #define	kAllocateHigh		1
     84 #define	kAllocateNormal		0
     85 
     86 /**************************
     87 *
     88 *	Data Types
     89 *
     90 **************************/
     91 #pragma mark	-
     92 #pragma mark	(Data Types)
     93 
     94 typedef	struct	{
     95 	char	instructions[sizeof(kIslandTemplate)];
     96 	int		allocatedHigh;
     97 }	BranchIsland;
     98 
     99 /**************************
    100 *
    101 *	Funky Protos
    102 *
    103 **************************/
    104 #pragma mark	-
    105 #pragma mark	(Funky Protos)
    106 
    107 	mach_error_t
    108 allocateBranchIsland(
    109 		BranchIsland	**island,
    110 		int				allocateHigh,
    111 		void *originalFunctionAddress) __attribute__((visibility("hidden")));
    112 
    113 	mach_error_t
    114 freeBranchIsland(
    115 		BranchIsland	*island ) __attribute__((visibility("hidden")));
    116 
    117 	mach_error_t
    118 defaultIslandMalloc(
    119 	  void **ptr, size_t unused_size, void *hint) __attribute__((visibility("hidden")));
    120 
    121 	mach_error_t
    122 defaultIslandFree(
    123    	void *ptr) __attribute__((visibility("hidden")));
    124 
    125 #if defined(__ppc__) || defined(__POWERPC__)
    126 	mach_error_t
    127 setBranchIslandTarget(
    128 		BranchIsland	*island,
    129 		const void		*branchTo,
    130 		long			instruction ) __attribute__((visibility("hidden")));
    131 #endif
    132 
    133 #if defined(__i386__) || defined(__x86_64__)
    134 mach_error_t
    135 setBranchIslandTarget_i386(
    136 						   BranchIsland	*island,
    137 						   const void		*branchTo,
    138 						   char*			instructions ) __attribute__((visibility("hidden")));
    139 void
    140 atomic_mov64(
    141 		uint64_t *targetAddress,
    142 		uint64_t value ) __attribute__((visibility("hidden")));
    143 
    144 	static Boolean
    145 eatKnownInstructions(
    146 	unsigned char	*code,
    147 	uint64_t		*newInstruction,
    148 	int				*howManyEaten,
    149 	char			*originalInstructions,
    150 	int				*originalInstructionCount,
    151 	uint8_t			*originalInstructionSizes ) __attribute__((visibility("hidden")));
    152 
    153 	static void
    154 fixupInstructions(
    155     void		*originalFunction,
    156     void		*escapeIsland,
    157     void		*instructionsToFix,
    158 	int			instructionCount,
    159 	uint8_t		*instructionSizes ) __attribute__((visibility("hidden")));
    160 
    161 #ifdef DEBUG_DISASM
    162 	static void
    163 dump16Bytes(
    164 	void	*ptr);
    165 #endif  // DEBUG_DISASM
    166 #endif
    167 
    168 /*******************************************************************************
    169 *
    170 *	Interface
    171 *
    172 *******************************************************************************/
    173 #pragma mark	-
    174 #pragma mark	(Interface)
    175 
    176 #if defined(__i386__) || defined(__x86_64__)
    177 mach_error_t makeIslandExecutable(void *address) {
    178 	mach_error_t err = err_none;
    179     vm_size_t pageSize;
    180     host_page_size( mach_host_self(), &pageSize );
    181     uintptr_t page = (uintptr_t)address & ~(uintptr_t)(pageSize-1);
    182     int e = err_none;
    183     e |= mprotect((void *)page, pageSize, PROT_EXEC | PROT_READ | PROT_WRITE);
    184     e |= msync((void *)page, pageSize, MS_INVALIDATE );
    185     if (e) {
    186         err = err_cannot_override;
    187     }
    188     return err;
    189 }
    190 #endif
    191 
    192 		mach_error_t
    193 defaultIslandMalloc(
    194 	void **ptr, size_t unused_size, void *hint) {
    195   return allocateBranchIsland( (BranchIsland**)ptr, kAllocateHigh, hint );
    196 }
    197 		mach_error_t
    198 defaultIslandFree(
    199 	void *ptr) {
    200 	return freeBranchIsland(ptr);
    201 }
    202 
    203     mach_error_t
    204 __asan_mach_override_ptr(
    205 	void *originalFunctionAddress,
    206     const void *overrideFunctionAddress,
    207     void **originalFunctionReentryIsland )
    208 {
    209   return __asan_mach_override_ptr_custom(originalFunctionAddress,
    210 		overrideFunctionAddress,
    211 		originalFunctionReentryIsland,
    212 		defaultIslandMalloc,
    213 		defaultIslandFree);
    214 }
    215 
    216     mach_error_t
    217 __asan_mach_override_ptr_custom(
    218 	void *originalFunctionAddress,
    219     const void *overrideFunctionAddress,
    220     void **originalFunctionReentryIsland,
    221 		island_malloc *alloc,
    222 		island_free *dealloc)
    223 {
    224 	assert( originalFunctionAddress );
    225 	assert( overrideFunctionAddress );
    226 
    227 	// this addresses overriding such functions as AudioOutputUnitStart()
    228 	// test with modified DefaultOutputUnit project
    229 #if defined(__x86_64__)
    230     for(;;){
    231         if(*(uint16_t*)originalFunctionAddress==0x25FF)    // jmp qword near [rip+0x????????]
    232             originalFunctionAddress=*(void**)((char*)originalFunctionAddress+6+*(int32_t *)((uint16_t*)originalFunctionAddress+1));
    233         else break;
    234     }
    235 #elif defined(__i386__)
    236     for(;;){
    237         if(*(uint16_t*)originalFunctionAddress==0x25FF)    // jmp *0x????????
    238             originalFunctionAddress=**(void***)((uint16_t*)originalFunctionAddress+1);
    239         else break;
    240     }
    241 #endif
    242 #ifdef DEBUG_DISASM
    243   {
    244     fprintf(stderr, "Replacing function at %p\n", originalFunctionAddress);
    245     fprintf(stderr, "First 16 bytes of the function: ");
    246     unsigned char *orig = (unsigned char *)originalFunctionAddress;
    247     int i;
    248     for (i = 0; i < 16; i++) {
    249        fprintf(stderr, "%x ", (unsigned int) orig[i]);
    250     }
    251     fprintf(stderr, "\n");
    252     fprintf(stderr,
    253             "To disassemble, save the following function as disas.c"
    254             " and run:\n  gcc -c disas.c && gobjdump -d disas.o\n"
    255             "The first 16 bytes of the original function will start"
    256             " after four nop instructions.\n");
    257     fprintf(stderr, "\nvoid foo() {\n  asm volatile(\"nop;nop;nop;nop;\");\n");
    258     int j = 0;
    259     for (j = 0; j < 2; j++) {
    260       fprintf(stderr, "  asm volatile(\".byte ");
    261       for (i = 8 * j; i < 8 * (j+1) - 1; i++) {
    262         fprintf(stderr, "0x%x, ", (unsigned int) orig[i]);
    263       }
    264       fprintf(stderr, "0x%x;\");\n", (unsigned int) orig[8 * (j+1) - 1]);
    265     }
    266     fprintf(stderr, "}\n\n");
    267   }
    268 #endif
    269 
    270 	long	*originalFunctionPtr = (long*) originalFunctionAddress;
    271 	mach_error_t	err = err_none;
    272 
    273 #if defined(__ppc__) || defined(__POWERPC__)
    274 	//	Ensure first instruction isn't 'mfctr'.
    275 	#define	kMFCTRMask			0xfc1fffff
    276 	#define	kMFCTRInstruction	0x7c0903a6
    277 
    278 	long	originalInstruction = *originalFunctionPtr;
    279 	if( !err && ((originalInstruction & kMFCTRMask) == kMFCTRInstruction) )
    280 		err = err_cannot_override;
    281 #elif defined(__i386__) || defined(__x86_64__)
    282 	int eatenCount = 0;
    283 	int originalInstructionCount = 0;
    284 	char originalInstructions[kOriginalInstructionsSize];
    285 	uint8_t originalInstructionSizes[kOriginalInstructionsSize];
    286 	uint64_t jumpRelativeInstruction = 0; // JMP
    287 
    288 	Boolean overridePossible = eatKnownInstructions ((unsigned char *)originalFunctionPtr,
    289 										&jumpRelativeInstruction, &eatenCount,
    290 										originalInstructions, &originalInstructionCount,
    291 										originalInstructionSizes );
    292 #ifdef DEBUG_DISASM
    293   if (!overridePossible) fprintf(stderr, "overridePossible = false @%d\n", __LINE__);
    294 #endif
    295 	if (eatenCount > kOriginalInstructionsSize) {
    296 #ifdef DEBUG_DISASM
    297 		fprintf(stderr, "Too many instructions eaten\n");
    298 #endif
    299 		overridePossible = false;
    300 	}
    301 	if (!overridePossible) err = err_cannot_override;
    302 	if (err) fprintf(stderr, "err = %x %s:%d\n", err, __FILE__, __LINE__);
    303 #endif
    304 
    305 	//	Make the original function implementation writable.
    306 	if( !err ) {
    307 		err = vm_protect( mach_task_self(),
    308 				(vm_address_t) originalFunctionPtr, 8, false,
    309 				(VM_PROT_ALL | VM_PROT_COPY) );
    310 		if( err )
    311 			err = vm_protect( mach_task_self(),
    312 					(vm_address_t) originalFunctionPtr, 8, false,
    313 					(VM_PROT_DEFAULT | VM_PROT_COPY) );
    314 	}
    315 	if (err) fprintf(stderr, "err = %x %s:%d\n", err, __FILE__, __LINE__);
    316 
    317 	//	Allocate and target the escape island to the overriding function.
    318 	BranchIsland	*escapeIsland = NULL;
    319 	if( !err )
    320 		err = alloc( (void**)&escapeIsland, sizeof(BranchIsland), originalFunctionAddress );
    321 	if ( err ) fprintf(stderr, "err = %x %s:%d\n", err, __FILE__, __LINE__);
    322 
    323 #if defined(__ppc__) || defined(__POWERPC__)
    324 	if( !err )
    325 		err = setBranchIslandTarget( escapeIsland, overrideFunctionAddress, 0 );
    326 
    327 	//	Build the branch absolute instruction to the escape island.
    328 	long	branchAbsoluteInstruction = 0; // Set to 0 just to silence warning.
    329 	if( !err ) {
    330 		long escapeIslandAddress = ((long) escapeIsland) & 0x3FFFFFF;
    331 		branchAbsoluteInstruction = 0x48000002 | escapeIslandAddress;
    332 	}
    333 #elif defined(__i386__) || defined(__x86_64__)
    334         if (err) fprintf(stderr, "err = %x %s:%d\n", err, __FILE__, __LINE__);
    335 
    336 	if( !err )
    337 		err = setBranchIslandTarget_i386( escapeIsland, overrideFunctionAddress, 0 );
    338 
    339 	if (err) fprintf(stderr, "err = %x %s:%d\n", err, __FILE__, __LINE__);
    340 	// Build the jump relative instruction to the escape island
    341 #endif
    342 
    343 
    344 #if defined(__i386__) || defined(__x86_64__)
    345 	if (!err) {
    346 		uint32_t addressOffset = ((char*)escapeIsland - (char*)originalFunctionPtr - 5);
    347 		addressOffset = OSSwapInt32(addressOffset);
    348 
    349 		jumpRelativeInstruction |= 0xE900000000000000LL;
    350 		jumpRelativeInstruction |= ((uint64_t)addressOffset & 0xffffffff) << 24;
    351 		jumpRelativeInstruction = OSSwapInt64(jumpRelativeInstruction);
    352 	}
    353 #endif
    354 
    355 	//	Optionally allocate & return the reentry island. This may contain relocated
    356 	//  jmp instructions and so has all the same addressing reachability requirements
    357 	//  the escape island has to the original function, except the escape island is
    358 	//  technically our original function.
    359 	BranchIsland	*reentryIsland = NULL;
    360 	if( !err && originalFunctionReentryIsland ) {
    361 		err = alloc( (void**)&reentryIsland, sizeof(BranchIsland), escapeIsland);
    362 		if( !err )
    363 			*originalFunctionReentryIsland = reentryIsland;
    364 	}
    365 
    366 #if defined(__ppc__) || defined(__POWERPC__)
    367 	//	Atomically:
    368 	//	o If the reentry island was allocated:
    369 	//		o Insert the original instruction into the reentry island.
    370 	//		o Target the reentry island at the 2nd instruction of the
    371 	//		  original function.
    372 	//	o Replace the original instruction with the branch absolute.
    373 	if( !err ) {
    374 		int escapeIslandEngaged = false;
    375 		do {
    376 			if( reentryIsland )
    377 				err = setBranchIslandTarget( reentryIsland,
    378 						(void*) (originalFunctionPtr+1), originalInstruction );
    379 			if( !err ) {
    380 				escapeIslandEngaged = CompareAndSwap( originalInstruction,
    381 										branchAbsoluteInstruction,
    382 										(UInt32*)originalFunctionPtr );
    383 				if( !escapeIslandEngaged ) {
    384 					//	Someone replaced the instruction out from under us,
    385 					//	re-read the instruction, make sure it's still not
    386 					//	'mfctr' and try again.
    387 					originalInstruction = *originalFunctionPtr;
    388 					if( (originalInstruction & kMFCTRMask) == kMFCTRInstruction)
    389 						err = err_cannot_override;
    390 				}
    391 			}
    392 		} while( !err && !escapeIslandEngaged );
    393 	}
    394 #elif defined(__i386__) || defined(__x86_64__)
    395 	// Atomically:
    396 	//	o If the reentry island was allocated:
    397 	//		o Insert the original instructions into the reentry island.
    398 	//		o Target the reentry island at the first non-replaced
    399 	//        instruction of the original function.
    400 	//	o Replace the original first instructions with the jump relative.
    401 	//
    402 	// Note that on i386, we do not support someone else changing the code under our feet
    403 	if ( !err ) {
    404 		fixupInstructions(originalFunctionPtr, reentryIsland, originalInstructions,
    405 					originalInstructionCount, originalInstructionSizes );
    406 
    407 		if( reentryIsland )
    408 			err = setBranchIslandTarget_i386( reentryIsland,
    409 										 (void*) ((char *)originalFunctionPtr+eatenCount), originalInstructions );
    410 		// try making islands executable before planting the jmp
    411 #if defined(__x86_64__) || defined(__i386__)
    412         if( !err )
    413             err = makeIslandExecutable(escapeIsland);
    414         if( !err && reentryIsland )
    415             err = makeIslandExecutable(reentryIsland);
    416 #endif
    417 		if ( !err )
    418 			atomic_mov64((uint64_t *)originalFunctionPtr, jumpRelativeInstruction);
    419 	}
    420 #endif
    421 
    422 	//	Clean up on error.
    423 	if( err ) {
    424 		if( reentryIsland )
    425 			dealloc( reentryIsland );
    426 		if( escapeIsland )
    427 			dealloc( escapeIsland );
    428 	}
    429 
    430 #ifdef DEBUG_DISASM
    431   {
    432     fprintf(stderr, "First 16 bytes of the function after slicing: ");
    433     unsigned char *orig = (unsigned char *)originalFunctionAddress;
    434     int i;
    435     for (i = 0; i < 16; i++) {
    436        fprintf(stderr, "%x ", (unsigned int) orig[i]);
    437     }
    438     fprintf(stderr, "\n");
    439   }
    440 #endif
    441 	return err;
    442 }
    443 
    444 /*******************************************************************************
    445 *
    446 *	Implementation
    447 *
    448 *******************************************************************************/
    449 #pragma mark	-
    450 #pragma mark	(Implementation)
    451 
    452 /***************************************************************************//**
    453 	Implementation: Allocates memory for a branch island.
    454 
    455 	@param	island			<-	The allocated island.
    456 	@param	allocateHigh	->	Whether to allocate the island at the end of the
    457 								address space (for use with the branch absolute
    458 								instruction).
    459 	@result					<-	mach_error_t
    460 
    461 	***************************************************************************/
    462 
    463 	mach_error_t
    464 allocateBranchIsland(
    465 		BranchIsland	**island,
    466 		int				allocateHigh,
    467 		void *originalFunctionAddress)
    468 {
    469 	assert( island );
    470 
    471 	mach_error_t	err = err_none;
    472 
    473 	if( allocateHigh ) {
    474 		vm_size_t pageSize;
    475 		err = host_page_size( mach_host_self(), &pageSize );
    476 		if( !err ) {
    477 			assert( sizeof( BranchIsland ) <= pageSize );
    478 #if defined(__ppc__) || defined(__POWERPC__)
    479 			vm_address_t first = 0xfeffffff;
    480 			vm_address_t last = 0xfe000000 + pageSize;
    481 #elif defined(__x86_64__)
    482 			vm_address_t first = ((uint64_t)originalFunctionAddress & ~(uint64_t)(((uint64_t)1 << 31) - 1)) | ((uint64_t)1 << 31); // start in the middle of the page?
    483 			vm_address_t last = 0x0;
    484 #else
    485 			vm_address_t first = 0xffc00000;
    486 			vm_address_t last = 0xfffe0000;
    487 #endif
    488 
    489 			vm_address_t page = first;
    490 			int allocated = 0;
    491 			vm_map_t task_self = mach_task_self();
    492 
    493 			while( !err && !allocated && page != last ) {
    494 
    495 				err = vm_allocate( task_self, &page, pageSize, 0 );
    496 				if( err == err_none )
    497 					allocated = 1;
    498 				else if( err == KERN_NO_SPACE ) {
    499 #if defined(__x86_64__)
    500 					page -= pageSize;
    501 #else
    502 					page += pageSize;
    503 #endif
    504 					err = err_none;
    505 				}
    506 			}
    507 			if( allocated )
    508 				*island = (BranchIsland*) page;
    509 			else if( !allocated && !err )
    510 				err = KERN_NO_SPACE;
    511 		}
    512 	} else {
    513 		void *block = malloc( sizeof( BranchIsland ) );
    514 		if( block )
    515 			*island = block;
    516 		else
    517 			err = KERN_NO_SPACE;
    518 	}
    519 	if( !err )
    520 		(**island).allocatedHigh = allocateHigh;
    521 
    522 	return err;
    523 }
    524 
    525 /***************************************************************************//**
    526 	Implementation: Deallocates memory for a branch island.
    527 
    528 	@param	island	->	The island to deallocate.
    529 	@result			<-	mach_error_t
    530 
    531 	***************************************************************************/
    532 
    533 	mach_error_t
    534 freeBranchIsland(
    535 		BranchIsland	*island )
    536 {
    537 	assert( island );
    538 	assert( (*(long*)&island->instructions[0]) == kIslandTemplate[0] );
    539 	assert( island->allocatedHigh );
    540 
    541 	mach_error_t	err = err_none;
    542 
    543 	if( island->allocatedHigh ) {
    544 		vm_size_t pageSize;
    545 		err = host_page_size( mach_host_self(), &pageSize );
    546 		if( !err ) {
    547 			assert( sizeof( BranchIsland ) <= pageSize );
    548 			err = vm_deallocate(
    549 					mach_task_self(),
    550 					(vm_address_t) island, pageSize );
    551 		}
    552 	} else {
    553 		free( island );
    554 	}
    555 
    556 	return err;
    557 }
    558 
    559 /***************************************************************************//**
    560 	Implementation: Sets the branch island's target, with an optional
    561 	instruction.
    562 
    563 	@param	island		->	The branch island to insert target into.
    564 	@param	branchTo	->	The address of the target.
    565 	@param	instruction	->	Optional instruction to execute prior to branch. Set
    566 							to zero for nop.
    567 	@result				<-	mach_error_t
    568 
    569 	***************************************************************************/
    570 #if defined(__ppc__) || defined(__POWERPC__)
    571 	mach_error_t
    572 setBranchIslandTarget(
    573 		BranchIsland	*island,
    574 		const void		*branchTo,
    575 		long			instruction )
    576 {
    577 	//	Copy over the template code.
    578     bcopy( kIslandTemplate, island->instructions, sizeof( kIslandTemplate ) );
    579 
    580     //	Fill in the address.
    581     ((short*)island->instructions)[kAddressLo] = ((long) branchTo) & 0x0000FFFF;
    582     ((short*)island->instructions)[kAddressHi]
    583     	= (((long) branchTo) >> 16) & 0x0000FFFF;
    584 
    585     //	Fill in the (optional) instuction.
    586     if( instruction != 0 ) {
    587         ((short*)island->instructions)[kInstructionLo]
    588         	= instruction & 0x0000FFFF;
    589         ((short*)island->instructions)[kInstructionHi]
    590         	= (instruction >> 16) & 0x0000FFFF;
    591     }
    592 
    593     //MakeDataExecutable( island->instructions, sizeof( kIslandTemplate ) );
    594 	msync( island->instructions, sizeof( kIslandTemplate ), MS_INVALIDATE );
    595 
    596     return err_none;
    597 }
    598 #endif
    599 
    600 #if defined(__i386__)
    601 	mach_error_t
    602 setBranchIslandTarget_i386(
    603 	BranchIsland	*island,
    604 	const void		*branchTo,
    605 	char*			instructions )
    606 {
    607 
    608 	//	Copy over the template code.
    609     bcopy( kIslandTemplate, island->instructions, sizeof( kIslandTemplate ) );
    610 
    611 	// copy original instructions
    612 	if (instructions) {
    613 		bcopy (instructions, island->instructions + kInstructions, kOriginalInstructionsSize);
    614 	}
    615 
    616     // Fill in the address.
    617     int32_t addressOffset = (char *)branchTo - (island->instructions + kJumpAddress + 4);
    618     *((int32_t *)(island->instructions + kJumpAddress)) = addressOffset;
    619 
    620     msync( island->instructions, sizeof( kIslandTemplate ), MS_INVALIDATE );
    621     return err_none;
    622 }
    623 
    624 #elif defined(__x86_64__)
    625 mach_error_t
    626 setBranchIslandTarget_i386(
    627         BranchIsland	*island,
    628         const void		*branchTo,
    629         char*			instructions )
    630 {
    631     // Copy over the template code.
    632     bcopy( kIslandTemplate, island->instructions, sizeof( kIslandTemplate ) );
    633 
    634     // Copy original instructions.
    635     if (instructions) {
    636         bcopy (instructions, island->instructions, kOriginalInstructionsSize);
    637     }
    638 
    639     //	Fill in the address.
    640     *((uint64_t *)(island->instructions + kJumpAddress)) = (uint64_t)branchTo;
    641     msync( island->instructions, sizeof( kIslandTemplate ), MS_INVALIDATE );
    642 
    643     return err_none;
    644 }
    645 #endif
    646 
    647 
    648 #if defined(__i386__) || defined(__x86_64__)
    649 // simplistic instruction matching
    650 typedef struct {
    651 	unsigned int length; // max 15
    652 	unsigned char mask[15]; // sequence of bytes in memory order
    653 	unsigned char constraint[15]; // sequence of bytes in memory order
    654 }	AsmInstructionMatch;
    655 
    656 #if defined(__i386__)
    657 static AsmInstructionMatch possibleInstructions[] = {
    658 	{ 0x5, {0xFF, 0x00, 0x00, 0x00, 0x00}, {0xE9, 0x00, 0x00, 0x00, 0x00} },	// jmp 0x????????
    659 	{ 0x5, {0xFF, 0xFF, 0xFF, 0xFF, 0xFF}, {0x55, 0x89, 0xe5, 0xc9, 0xc3} },	// push %esp; mov %esp,%ebp; leave; ret
    660 	{ 0x1, {0xFF}, {0x90} },							// nop
    661 	{ 0x1, {0xF8}, {0x50} },							// push %reg
    662 	{ 0x2, {0xFF, 0xFF}, {0x89, 0xE5} },				                // mov %esp,%ebp
    663 	{ 0x3, {0xFF, 0xFF, 0xFF}, {0x89, 0x1C, 0x24} },				                // mov %ebx,(%esp)
    664 	{ 0x3, {0xFF, 0xFF, 0x00}, {0x83, 0xEC, 0x00} },	                        // sub 0x??, %esp
    665 	{ 0x6, {0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00}, {0x81, 0xEC, 0x00, 0x00, 0x00, 0x00} },	// sub 0x??, %esp with 32bit immediate
    666 	{ 0x2, {0xFF, 0xFF}, {0x31, 0xC0} },						// xor %eax, %eax
    667 	{ 0x3, {0xFF, 0x4F, 0x00}, {0x8B, 0x45, 0x00} },  // mov $imm(%ebp), %reg
    668 	{ 0x3, {0xFF, 0x4C, 0x00}, {0x8B, 0x40, 0x00} },  // mov $imm(%eax-%edx), %reg
    669 	{ 0x3, {0xFF, 0xCF, 0x00}, {0x8B, 0x4D, 0x00} },  // mov $imm(%rpb), %reg
    670 	{ 0x3, {0xFF, 0x4F, 0x00}, {0x8A, 0x4D, 0x00} },  // mov $imm(%ebp), %cl
    671 	{ 0x4, {0xFF, 0xFF, 0xFF, 0x00}, {0x8B, 0x4C, 0x24, 0x00} },  			// mov $imm(%esp), %ecx
    672 	{ 0x4, {0xFF, 0x00, 0x00, 0x00}, {0x8B, 0x00, 0x00, 0x00} },  			// mov r16,r/m16 or r32,r/m32
    673 	{ 0x5, {0xFF, 0x00, 0x00, 0x00, 0x00}, {0xB9, 0x00, 0x00, 0x00, 0x00} }, 	// mov $imm, %ecx
    674 	{ 0x5, {0xFF, 0x00, 0x00, 0x00, 0x00}, {0xB8, 0x00, 0x00, 0x00, 0x00} }, 	// mov $imm, %eax
    675 	{ 0x4, {0xFF, 0xFF, 0xFF, 0x00}, {0x66, 0x0F, 0xEF, 0x00} },             	// pxor xmm2/128, xmm1
    676 	{ 0x2, {0xFF, 0xFF}, {0xDB, 0xE3} }, 						// fninit
    677 	{ 0x5, {0xFF, 0x00, 0x00, 0x00, 0x00}, {0xE8, 0x00, 0x00, 0x00, 0x00} },	// call $imm
    678 	{ 0x0 }
    679 };
    680 #elif defined(__x86_64__)
    681 // TODO(glider): disassembling the "0x48, 0x89" sequences is trickier than it's done below.
    682 // If it stops working, refer to http://ref.x86asm.net/geek.html#modrm_byte_32_64 to do it
    683 // more accurately.
    684 // Note: 0x48 is in fact the REX.W prefix, but it might be wrong to treat it as a separate
    685 // instruction.
    686 static AsmInstructionMatch possibleInstructions[] = {
    687 	{ 0x5, {0xFF, 0x00, 0x00, 0x00, 0x00}, {0xE9, 0x00, 0x00, 0x00, 0x00} },	// jmp 0x????????
    688 	{ 0x1, {0xFF}, {0x90} },							// nop
    689 	{ 0x1, {0xF8}, {0x50} },							// push %rX
    690 	{ 0x1, {0xFF}, {0x65} },							// GS prefix
    691 	{ 0x3, {0xFF, 0xFF, 0xFF}, {0x48, 0x89, 0xE5} },				// mov %rsp,%rbp
    692 	{ 0x4, {0xFF, 0xFF, 0xFF, 0x00}, {0x48, 0x83, 0xEC, 0x00} },	                // sub 0x??, %rsp
    693 	{ 0x4, {0xFB, 0xFF, 0x07, 0x00}, {0x48, 0x89, 0x05, 0x00} },	                // move onto rbp
    694 	{ 0x3, {0xFB, 0xFF, 0x00}, {0x48, 0x89, 0x00} },	                            // mov %reg, %reg
    695 	{ 0x3, {0xFB, 0xFF, 0x00}, {0x49, 0x89, 0x00} },	                            // mov %reg, %reg (REX.WB)
    696 	{ 0x2, {0xFF, 0x00}, {0x41, 0x00} },						// push %rXX
    697 	{ 0x2, {0xFF, 0x00}, {0x85, 0x00} },						// test %rX,%rX
    698 	{ 0x2, {0xFF, 0x00}, {0x77, 0x00} },						// ja $i8
    699 	{ 0x2, {0xFF, 0x00}, {0x74, 0x00} },						// je $i8
    700 	{ 0x5, {0xF8, 0x00, 0x00, 0x00, 0x00}, {0xB8, 0x00, 0x00, 0x00, 0x00} },	// mov $imm, %reg
    701 	{ 0x3, {0xFF, 0xFF, 0x00}, {0xFF, 0x77, 0x00} },				// pushq $imm(%rdi)
    702 	{ 0x2, {0xFF, 0xFF}, {0x31, 0xC0} },						// xor %eax, %eax
    703 	{ 0x5, {0xFF, 0x00, 0x00, 0x00, 0x00}, {0x25, 0x00, 0x00, 0x00, 0x00} },	// and $imm, %eax
    704 	{ 0x3, {0xFF, 0xFF, 0xFF}, {0x80, 0x3F, 0x00} },				// cmpb $imm, (%rdi)
    705 
    706   { 0x8, {0xFF, 0xFF, 0xCF, 0xFF, 0x00, 0x00, 0x00, 0x00},
    707          {0x48, 0x8B, 0x04, 0x25, 0x00, 0x00, 0x00, 0x00}, },                     // mov $imm, %{rax,rdx,rsp,rsi}
    708   { 0x4, {0xFF, 0xFF, 0xFF, 0x00}, {0x48, 0x83, 0xFA, 0x00}, },   // cmp $i8, %rdx
    709 	{ 0x4, {0xFF, 0xFF, 0x00, 0x00}, {0x83, 0x7f, 0x00, 0x00}, },			// cmpl $imm, $imm(%rdi)
    710 	{ 0xa, {0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
    711                {0x48, 0xB8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00} },    // mov $imm, %rax
    712         { 0x6, {0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00},
    713                {0x81, 0xE6, 0x00, 0x00, 0x00, 0x00} },                            // and $imm, %esi
    714         { 0x6, {0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00},
    715                {0xFF, 0x25, 0x00, 0x00, 0x00, 0x00} },                            // jmpq *(%rip)
    716         { 0x4, {0xFF, 0xFF, 0xFF, 0x00}, {0x66, 0x0F, 0xEF, 0x00} },              // pxor xmm2/128, xmm1
    717         { 0x2, {0xFF, 0x00}, {0x89, 0x00} },                               // mov r/m32,r32 or r/m16,r16
    718         { 0x3, {0xFF, 0xFF, 0xFF}, {0x49, 0x89, 0xF8} },                   // mov %rdi,%r8
    719         { 0x3, {0xFF, 0xFF, 0x00}, {0xFF, 0x77, 0x00} },  // pushq $imm(%rdi)
    720         { 0x2, {0xFF, 0xFF}, {0xDB, 0xE3} }, // fninit
    721 	{ 0x0 }
    722 };
    723 #endif
    724 
    725 static Boolean codeMatchesInstruction(unsigned char *code, AsmInstructionMatch* instruction)
    726 {
    727 	Boolean match = true;
    728 
    729 	size_t i;
    730   assert(instruction);
    731 #ifdef DEBUG_DISASM
    732 	fprintf(stderr, "Matching: ");
    733 #endif
    734 	for (i=0; i<instruction->length; i++) {
    735 		unsigned char mask = instruction->mask[i];
    736 		unsigned char constraint = instruction->constraint[i];
    737 		unsigned char codeValue = code[i];
    738 #ifdef DEBUG_DISASM
    739 		fprintf(stderr, "%x ", (unsigned)codeValue);
    740 #endif
    741 		match = ((codeValue & mask) == constraint);
    742 		if (!match) break;
    743 	}
    744 #ifdef DEBUG_DISASM
    745 	if (match) {
    746 		fprintf(stderr, " OK\n");
    747 	} else {
    748 		fprintf(stderr, " FAIL\n");
    749 	}
    750 #endif
    751 	return match;
    752 }
    753 
    754 #if defined(__i386__) || defined(__x86_64__)
    755 	static Boolean
    756 eatKnownInstructions(
    757 	unsigned char	*code,
    758 	uint64_t		*newInstruction,
    759 	int				*howManyEaten,
    760 	char			*originalInstructions,
    761 	int				*originalInstructionCount,
    762 	uint8_t			*originalInstructionSizes )
    763 {
    764 	Boolean allInstructionsKnown = true;
    765 	int totalEaten = 0;
    766 	unsigned char* ptr = code;
    767 	int remainsToEat = 5; // a JMP instruction takes 5 bytes
    768 	int instructionIndex = 0;
    769 
    770 	if (howManyEaten) *howManyEaten = 0;
    771 	if (originalInstructionCount) *originalInstructionCount = 0;
    772 	while (remainsToEat > 0) {
    773 		Boolean curInstructionKnown = false;
    774 
    775 		// See if instruction matches one  we know
    776 		AsmInstructionMatch* curInstr = possibleInstructions;
    777 		do {
    778 			if ((curInstructionKnown = codeMatchesInstruction(ptr, curInstr))) break;
    779 			curInstr++;
    780 		} while (curInstr->length > 0);
    781 
    782 		// if all instruction matches failed, we don't know current instruction then, stop here
    783 		if (!curInstructionKnown) {
    784 			allInstructionsKnown = false;
    785 			fprintf(stderr, "mach_override: some instructions unknown! Need to update mach_override.c\n");
    786 			break;
    787 		}
    788 
    789 		// At this point, we've matched curInstr
    790 		int eaten = curInstr->length;
    791 		ptr += eaten;
    792 		remainsToEat -= eaten;
    793 		totalEaten += eaten;
    794 
    795 		if (originalInstructionSizes) originalInstructionSizes[instructionIndex] = eaten;
    796 		instructionIndex += 1;
    797 		if (originalInstructionCount) *originalInstructionCount = instructionIndex;
    798 	}
    799 
    800 
    801 	if (howManyEaten) *howManyEaten = totalEaten;
    802 
    803 	if (originalInstructions) {
    804 		Boolean enoughSpaceForOriginalInstructions = (totalEaten < kOriginalInstructionsSize);
    805 
    806 		if (enoughSpaceForOriginalInstructions) {
    807 			memset(originalInstructions, 0x90 /* NOP */, kOriginalInstructionsSize); // fill instructions with NOP
    808 			bcopy(code, originalInstructions, totalEaten);
    809 		} else {
    810 #ifdef DEBUG_DISASM
    811 			fprintf(stderr, "Not enough space in island to store original instructions. Adapt the island definition and kOriginalInstructionsSize\n");
    812 #endif
    813 			return false;
    814 		}
    815 	}
    816 
    817 	if (allInstructionsKnown) {
    818 		// save last 3 bytes of first 64bits of codre we'll replace
    819 		uint64_t currentFirst64BitsOfCode = *((uint64_t *)code);
    820 		currentFirst64BitsOfCode = OSSwapInt64(currentFirst64BitsOfCode); // back to memory representation
    821 		currentFirst64BitsOfCode &= 0x0000000000FFFFFFLL;
    822 
    823 		// keep only last 3 instructions bytes, first 5 will be replaced by JMP instr
    824 		*newInstruction &= 0xFFFFFFFFFF000000LL; // clear last 3 bytes
    825 		*newInstruction |= (currentFirst64BitsOfCode & 0x0000000000FFFFFFLL); // set last 3 bytes
    826 	}
    827 
    828 	return allInstructionsKnown;
    829 }
    830 
    831 	static void
    832 fixupInstructions(
    833     void		*originalFunction,
    834     void		*escapeIsland,
    835     void		*instructionsToFix,
    836 	int			instructionCount,
    837 	uint8_t		*instructionSizes )
    838 {
    839 	void *initialOriginalFunction = originalFunction;
    840 	int	index, fixed_size, code_size = 0;
    841 	for (index = 0;index < instructionCount;index += 1)
    842 		code_size += instructionSizes[index];
    843 
    844 #ifdef DEBUG_DISASM
    845 	void *initialInstructionsToFix = instructionsToFix;
    846 	fprintf(stderr, "BEFORE FIXING:\n");
    847 	dump16Bytes(initialOriginalFunction);
    848 	dump16Bytes(initialInstructionsToFix);
    849 #endif  // DEBUG_DISASM
    850 
    851 	for (index = 0;index < instructionCount;index += 1)
    852 	{
    853                 fixed_size = instructionSizes[index];
    854 		if ((*(uint8_t*)instructionsToFix == 0xE9) || // 32-bit jump relative
    855 		    (*(uint8_t*)instructionsToFix == 0xE8))   // 32-bit call relative
    856 		{
    857 			uint32_t offset = (uintptr_t)originalFunction - (uintptr_t)escapeIsland;
    858 			uint32_t *jumpOffsetPtr = (uint32_t*)((uintptr_t)instructionsToFix + 1);
    859 			*jumpOffsetPtr += offset;
    860 		}
    861 		if ((*(uint8_t*)instructionsToFix == 0x74) ||  // Near jump if equal (je), 2 bytes.
    862 		    (*(uint8_t*)instructionsToFix == 0x77))    // Near jump if above (ja), 2 bytes.
    863 		{
    864 			// We replace a near je/ja instruction, "7P JJ", with a 32-bit je/ja, "0F 8P WW XX YY ZZ".
    865 			// This is critical, otherwise a near jump will likely fall outside the original function.
    866 			uint32_t offset = (uintptr_t)initialOriginalFunction - (uintptr_t)escapeIsland;
    867 			uint32_t jumpOffset = *(uint8_t*)((uintptr_t)instructionsToFix + 1);
    868 			*(uint8_t*)(instructionsToFix + 1) = *(uint8_t*)instructionsToFix + 0x10;
    869 			*(uint8_t*)instructionsToFix = 0x0F;
    870 			uint32_t *jumpOffsetPtr = (uint32_t*)((uintptr_t)instructionsToFix + 2 );
    871 			*jumpOffsetPtr = offset + jumpOffset;
    872 			fixed_size = 6;
    873                 }
    874 
    875 		originalFunction = (void*)((uintptr_t)originalFunction + instructionSizes[index]);
    876 		escapeIsland = (void*)((uintptr_t)escapeIsland + instructionSizes[index]);
    877 		instructionsToFix = (void*)((uintptr_t)instructionsToFix + fixed_size);
    878 
    879 		// Expanding short instructions into longer ones may overwrite the next instructions,
    880 		// so we must restore them.
    881 		code_size -= fixed_size;
    882 		if ((code_size > 0) && (fixed_size != instructionSizes[index])) {
    883 			bcopy(originalFunction, instructionsToFix, code_size);
    884 		}
    885 	}
    886 #ifdef DEBUG_DISASM
    887 	fprintf(stderr, "AFTER_FIXING:\n");
    888 	dump16Bytes(initialOriginalFunction);
    889 	dump16Bytes(initialInstructionsToFix);
    890 #endif  // DEBUG_DISASM
    891 }
    892 
    893 #ifdef DEBUG_DISASM
    894 #define HEX_DIGIT(x) ((((x) % 16) < 10) ? ('0' + ((x) % 16)) : ('A' + ((x) % 16 - 10)))
    895 
    896 	static void
    897 dump16Bytes(
    898 	void 	*ptr) {
    899 	int i;
    900 	char buf[3];
    901 	uint8_t *bytes = (uint8_t*)ptr;
    902 	for (i = 0; i < 16; i++) {
    903 		buf[0] = HEX_DIGIT(bytes[i] / 16);
    904 		buf[1] = HEX_DIGIT(bytes[i] % 16);
    905 		buf[2] = ' ';
    906 		write(2, buf, 3);
    907 	}
    908 	write(2, "\n", 1);
    909 }
    910 #endif  // DEBUG_DISASM
    911 #endif
    912 
    913 #if defined(__i386__)
    914 __asm(
    915 			".text;"
    916 			".align 2, 0x90;"
    917 			"_atomic_mov64:;"
    918 			"	pushl %ebp;"
    919 			"	movl %esp, %ebp;"
    920 			"	pushl %esi;"
    921 			"	pushl %ebx;"
    922 			"	pushl %ecx;"
    923 			"	pushl %eax;"
    924 			"	pushl %edx;"
    925 
    926 			// atomic push of value to an address
    927 			// we use cmpxchg8b, which compares content of an address with
    928 			// edx:eax. If they are equal, it atomically puts 64bit value
    929 			// ecx:ebx in address.
    930 			// We thus put contents of address in edx:eax to force ecx:ebx
    931 			// in address
    932 			"	mov		8(%ebp), %esi;"  // esi contains target address
    933 			"	mov		12(%ebp), %ebx;"
    934 			"	mov		16(%ebp), %ecx;" // ecx:ebx now contains value to put in target address
    935 			"	mov		(%esi), %eax;"
    936 			"	mov		4(%esi), %edx;"  // edx:eax now contains value currently contained in target address
    937 			"	lock; cmpxchg8b	(%esi);" // atomic move.
    938 
    939 			// restore registers
    940 			"	popl %edx;"
    941 			"	popl %eax;"
    942 			"	popl %ecx;"
    943 			"	popl %ebx;"
    944 			"	popl %esi;"
    945 			"	popl %ebp;"
    946 			"	ret"
    947 );
    948 #elif defined(__x86_64__)
    949 void atomic_mov64(
    950 		uint64_t *targetAddress,
    951 		uint64_t value )
    952 {
    953     *targetAddress = value;
    954 }
    955 #endif
    956 #endif
    957 #endif  // __APPLE__
    958