Home | History | Annotate | Download | only in traceline
      1 // Copyright (c) 2009 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // TODO
      6 //  - Make capturing system call arguments optional and the number configurable.
      7 //  - Lots of places depend on the ABI so that we can modify EAX or EDX, this
      8 //    is safe, but these could be moved to be saved and restored anyway.
      9 //  - Understand the loader better, and make some more meaningful hooks with
     10 //    proper data collection and durations.  Right now it's just noise.
     11 //  - Get the returned pointer from AllocateHeap.
     12 
     13 #include <windows.h>
     14 
     15 #include <stdio.h>
     16 
     17 #include <map>
     18 #include <string>
     19 
     20 #include "assembler.h"
     21 #include "logging.h"
     22 #include "rdtsc.h"
     23 #include "sym_resolver.h"
     24 #include "syscall_map.h"
     25 
     26 #include "sidestep/mini_disassembler.h"
     27 
     28 namespace {
     29 
     30 std::string JSONString(const std::string& str) {
     31   static const char hextable[] = "0123456789abcdef";
     32   std::string out;
     33   out.push_back('"');
     34   for (std::string::const_iterator it = str.begin(); it != str.end(); ++it) {
     35     unsigned char c = static_cast<unsigned char>(*it);
     36     switch (c) {
     37       case '\\':
     38       case '"':
     39       case '\'':
     40         out.push_back('\\'); out.push_back(c);
     41         break;
     42       default:
     43         if (c < 20 || c >= 127) {
     44           out.push_back('\\'); out.push_back('x');
     45           out.push_back(hextable[c >> 4]); out.push_back(hextable[c & 0xf]);
     46         } else {
     47           // Unescaped.
     48           out.push_back(c);
     49         }
     50         break;
     51     }
     52   }
     53   out.push_back('"');
     54   return out;
     55 }
     56 
     57 }  // namespace
     58 
     59 class Playground {
     60  public:
     61   static const int kPlaygroundSize = 64 * 1024 * 1024;
     62 
     63   // Encapsulate the configuration options to the playground.
     64   class Options {
     65    public:
     66     Options()
     67         : stack_unwind_depth_(0),
     68           log_heap_(false),
     69           log_lock_(false),
     70           vista_(false) { }
     71 
     72 
     73     // The maximum amount of frames we should unwind from the call stack.
     74     int stack_unwind_depth() { return stack_unwind_depth_; }
     75     void set_stack_unwind_depth(int depth) { stack_unwind_depth_ = depth; }
     76 
     77     // Whether we should log heap operations (alloc / free).
     78     bool log_heap() { return log_heap_; }
     79     void set_log_heap(bool x) { log_heap_ = x; }
     80 
     81     // Whether we should log lock (critical section) operations.
     82     bool log_lock() { return log_lock_; }
     83     void set_log_lock(bool x) { log_lock_ = x; }
     84 
     85     // Whether we are running on Vista.
     86     bool vista() { return vista_; }
     87     void set_vista(bool x) { vista_ = x; }
     88 
     89    private:
     90     int stack_unwind_depth_;
     91     bool log_heap_;
     92     bool log_lock_;
     93     bool vista_;
     94   };
     95 
     96   Playground(HANDLE proc, const Options& options)
     97       : proc_(proc),
     98         remote_addr_(NULL),
     99         resolver_("ntdll.dll"),
    100         options_(options) {
    101     // We copy the entire playground into the remote process, and we have
    102     // fields that we expect to be zero.  TODO this could be a lot better.
    103     memset(buf_, 0, sizeof(buf_));
    104   }
    105 
    106   void AllocateInRemote() {
    107     // Try to get something out of the way and easy to debug.
    108     static void* kPlaygroundAddr = reinterpret_cast<void*>(0x66660000);
    109     // Allocate our playground memory in the target process.  This is a big
    110     // slab of read/write/execute memory that we use for our code
    111     // instrumentation, and the memory for writing out our logging events.
    112     remote_addr_ = reinterpret_cast<char*>(
    113         VirtualAllocEx(proc_,
    114                        kPlaygroundAddr,
    115                        kPlaygroundSize,
    116                        MEM_COMMIT | MEM_RESERVE,
    117                        PAGE_EXECUTE_READWRITE));
    118     if (remote_addr_ == NULL || remote_addr_ != kPlaygroundAddr) {
    119       NOTREACHED("Falied to allocate playground: 0x%08x", remote_addr_);
    120     }
    121   }
    122 
    123   void CopyToRemote() {
    124     WriteProcessMemory(proc_,
    125                        remote_addr_,
    126                        buf_,
    127                        sizeof(buf_),
    128                        NULL);
    129   }
    130 
    131   void CopyFromRemote() {
    132     SIZE_T size = 0;
    133     ReadProcessMemory(proc_,
    134                       remote_addr_,
    135                       buf_,
    136                       sizeof(buf_),
    137                       &size);
    138   }
    139 
    140   enum EventRecordType {
    141     EVENT_TYPE_LDR              = 0,
    142     EVENT_TYPE_THREADBEGIN      = 1,
    143     EVENT_TYPE_THREADNAME       = 2,
    144     EVENT_TYPE_EXCEPTION        = 3,
    145     EVENT_TYPE_PROCESSEXIT      = 4,
    146     EVENT_TYPE_CREATETHREAD     = 5,
    147     EVENT_TYPE_THREADEXIT       = 6,
    148     EVENT_TYPE_ALLOCHEAP        = 7,
    149     EVENT_TYPE_FREEHEAP         = 8,
    150     EVENT_TYPE_SYSCALL          = 9,
    151     EVENT_TYPE_ENTER_CS         = 10,
    152     EVENT_TYPE_TRYENTER_CS      = 11,
    153     EVENT_TYPE_LEAVE_CS         = 12,
    154     EVENT_TYPE_APC              = 13
    155   };
    156 
    157   static const int kThreadNameBufSize = 64;
    158   static const int kLdrBufSize = 512;  // Looks like internal buffer is 512.
    159 
    160   static const int kCodeBlockSize               = 256;
    161 
    162   static const int kOffLdrCode                  = 0 * kCodeBlockSize;
    163   static const int kOffCreateThreadCode         = 1 * kCodeBlockSize;
    164   static const int kOffThreadCode               = 2 * kCodeBlockSize;
    165   static const int kOffExpCode                  = 3 * kCodeBlockSize;
    166   static const int kOffExitCode                 = 4 * kCodeBlockSize;
    167   static const int kOffThreadExitCode           = 5 * kCodeBlockSize;
    168   static const int kOffAllocHeapCode            = 6 * kCodeBlockSize;
    169   static const int kOffFreeHeapCode             = 7 * kCodeBlockSize;
    170   static const int kOffSyscallCode              = 8 * kCodeBlockSize;
    171   static const int kOffEnterCritSecCode         = 9 * kCodeBlockSize;
    172   static const int kOffTryEnterCritSecCode      = 10 * kCodeBlockSize;
    173   static const int kOffLeaveCritSecCode         = 11 * kCodeBlockSize;
    174   static const int kOffApcDispCode              = 12 * kCodeBlockSize;
    175 
    176   static const int kOffLogAreaPtr               = 4096;
    177   static const int kOffLogAreaData              = 4096 + 4;
    178 
    179   static const int kRecordHeaderSize = 8 + 4 + 4 + 4;
    180 
    181   // Given the address to the start of a function, patch the function to jump
    182   // to a given offset into the playground.  This function will try to take
    183   // advantage of hotpatch code, if the function is prefixed with 5 0x90 bytes.
    184   // Returns a std::string of any assembly instructions that must be relocated,
    185   // as they were overwritten during patching.
    186   std::string PatchPreamble(int func_addr, int playground_off) {
    187     sidestep::MiniDisassembler disas;
    188     int stub_addr = reinterpret_cast<int>(remote_addr_ + playground_off);
    189 
    190     std::string instrs;
    191 
    192     char buf[15];
    193     if (ReadProcessMemory(proc_,
    194                           reinterpret_cast<void*>(func_addr - 5),
    195                           buf,
    196                           sizeof(buf),
    197                           NULL) == 0) {
    198       NOTREACHED("ReadProcessMemory(0x%08x) failed: %d",
    199                  func_addr - 5, GetLastError());
    200     }
    201 
    202     // TODO(deanm): It seems in more recent updates the compiler is generating
    203     // complicated sequences for padding / alignment.  For example:
    204     // 00000000  8DA42400000000    lea esp,[esp+0x0]
    205     // 00000007  8D4900            lea ecx,[ecx+0x0]
    206     // is used for a 16 byte alignment.  We need a better way of handling this.
    207     if (memcmp(buf, "\x90\x90\x90\x90\x90", 5) == 0 ||
    208         memcmp(buf, "\x00\x8D\x64\x24\x00", 5) == 0 ||
    209         memcmp(buf, "\x00\x00\x8D\x49\x00", 5) == 0) {
    210       unsigned int instr_bytes = 0;
    211 
    212       // We might have a hotpatch no-op of mov edi, edi "\x8b\xff".  It is a
    213       // bit of a waste to relocate it, but it makes everything simpler.
    214 
    215       while (instr_bytes < 2) {
    216         if (disas.Disassemble(
    217             reinterpret_cast<unsigned char*>(buf + 5 + instr_bytes),
    218             &instr_bytes) != sidestep::IT_GENERIC) {
    219           NOTREACHED("Could not disassemble or relocate instruction.");
    220         }
    221         // We only read 10 bytes worth of instructions.
    222         CHECK(instr_bytes < 10);
    223       }
    224 
    225       instrs.assign(buf + 5, instr_bytes);
    226 
    227       // We have a hotpatch prefix of 5 nop bytes.  We can use this for our
    228       // long jump, and then overwrite the first 2 bytes to jump back to there.
    229       CodeBuffer patch(buf);
    230       int off = stub_addr - func_addr;
    231       patch.jmp_rel(off);
    232       patch.jmp_rel_short(-2 - 5);
    233     } else {
    234       // We need a full 5 bytes for the jump.
    235       unsigned int instr_bytes = 0;
    236       while (instr_bytes < 5) {
    237         if (disas.Disassemble(
    238             reinterpret_cast<unsigned char*>(buf + 5 + instr_bytes),
    239             &instr_bytes) != sidestep::IT_GENERIC) {
    240           NOTREACHED("Could not disassemble or relocate instruction.");
    241         }
    242         // We only read 10 bytes worth of instructions.
    243         CHECK(instr_bytes < 10);
    244       }
    245 
    246       instrs.assign(buf + 5, instr_bytes);
    247 
    248       // Overwrite the first 5 bytes with a relative jump to our stub.
    249       CodeBuffer patch(buf + 5);
    250       int off = stub_addr - (func_addr + 5);
    251       patch.jmp_rel(off);
    252     }
    253 
    254     // Write back the bytes, we are really probably writing more back than we
    255     // need to, but it shouldn't really matter.
    256     if (WriteProcessMemory(proc_,
    257                            reinterpret_cast<void*>(func_addr - 5),
    258                            buf,
    259                            sizeof(buf),
    260                            NULL) == 0) {
    261       NOTREACHED("WriteProcessMemory(0x%08x) failed: %d",
    262                  func_addr - 5, GetLastError());
    263     }
    264 
    265     return instrs;
    266   }
    267 
    268   std::string PatchPreamble(const char* func_name, int playground_off) {
    269     return PatchPreamble(
    270         reinterpret_cast<int>(resolver_.Resolve(func_name)), playground_off);
    271   }
    272 
    273   // Restore any instructions that needed to be moved to make space for our
    274   // patch and jump back to the original code.
    275   void ResumeOriginalFunction(const char* func_name,
    276                               const std::string& moved_instructions,
    277                               int stub_offset,
    278                               CodeBuffer* cb) {
    279     cb->emit_bytes(moved_instructions);
    280     int off = resolver_.Resolve(func_name) +
    281               moved_instructions.size() -
    282               (remote_addr_ + stub_offset + cb->size() + 5);
    283     cb->jmp_rel(off);
    284   }
    285 
    286   // Makes a call to NtQueryPerformanceCounter, writing the timestamp to the
    287   // buffer pointed to by EDI.  EDI it not incremented.  EAX is not preserved.
    288   void AssembleQueryPerformanceCounter(CodeBuffer* cb) {
    289     // Make a call to NtQueryPerformanceCounter and write the result into
    290     // the log area.  The buffer we write to should be aligned, but we should
    291     // garantee that anyway for the logging area for performance.
    292     cb->push_imm(0);       // PerformanceFrequency
    293     cb->push(EDI);         // PerformanceCounter
    294     cb->mov_imm(EAX, reinterpret_cast<int>(
    295         resolver_.Resolve("ntdll!NtQueryPerformanceCounter")));
    296     cb->call(EAX);
    297   }
    298 
    299   // This is the common log setup routine.  It will allocate a new log entry,
    300   // and write out the common log header to the event entry.  The header is:
    301   // is [ 64bit QPC ] [ 32bit cpu id ] [ 32bit thread id ] [ 32bit rec id ]
    302   // EDI will be left pointing to the log entry, with |space| bytes left for
    303   // type specific data.  All other registers should not be clobbered.
    304   void AssembleHeaderCode(CodeBuffer* cb, EventRecordType rt, int space) {
    305     cb->push(EAX);
    306     cb->push(EDX);
    307     cb->push(ECX);
    308     cb->push(ESI);
    309 
    310     int unwind_depth = options_.stack_unwind_depth();
    311 
    312     // Load EDI with the number of bytes we want for our log entry, this will
    313     // be used in the atomic increment to allocate the log entry.
    314     cb->mov_imm(EDI, kRecordHeaderSize + (unwind_depth * 4) + space);
    315     // Do the increment and have EDI point to our log entry buffer space.
    316     cb->mov_imm(EDX, reinterpret_cast<int>(remote_addr_ + kOffLogAreaPtr));
    317     cb->inc_atomic(EDX, EDI);
    318     // EDI is the buffer offset, make it a pointer to the record entry.
    319     cb->add_imm(EDI, reinterpret_cast<int>(remote_addr_ + kOffLogAreaData));
    320 
    321     AssembleQueryPerformanceCounter(cb);
    322     cb->add_imm(EDI, 8);
    323 
    324     cb->which_cpu();
    325     cb->stosd();
    326 
    327     cb->which_thread();
    328     cb->stosd();
    329 
    330     // Stack unwinding, follow EBP to the maximum number of frames, and make
    331     // sure that it stays on the stack (between ESP and TEB.StackBase).
    332     if (unwind_depth > 0) {
    333       cb->mov_imm(ECX, unwind_depth);
    334       cb->fs(); cb->mov(EDX, Operand(0x04));  // get TEB.StackBase
    335 
    336       // Start at EBP.
    337       cb->mov(EAX, EBP);
    338 
    339       Label unwind_loop, bail;
    340       cb->bind(&unwind_loop);
    341 
    342       // Bail if (EAX < ESP) (below the stack)
    343       cb->cmp(EAX, ESP);
    344       cb->jcc(below, &bail);
    345       // Bail if (EAX >= EDX) (above the stack)
    346       cb->cmp(EAX, EDX);
    347       cb->jcc(above_equal, &bail);
    348 
    349       // We have a valid stack pointer, it should point to something like:
    350       //   [ saved frame pointer ] [ return address ] [ arguments ... ]
    351       cb->mov(ESI, EAX);
    352       cb->lodsd();  // Get the new stack pointer to follow in EAX
    353       cb->movsd();  // Copy the return address to the log area.
    354 
    355       cb->loop(&unwind_loop);
    356 
    357       cb->bind(&bail);
    358       // If we did managed to unwind to the max, fill the rest with 0 (really
    359       // we just want to inc EDI to the end, and this is an easy way).
    360       cb->mov_imm(EAX, 0);  // TODO use an xor
    361       cb->rep(); cb->stosd();
    362     }
    363 
    364     // Store the type for this record entry.
    365     cb->mov_imm(EAX, rt);
    366     cb->stosd();
    367 
    368     cb->pop(ESI);
    369     cb->pop(ECX);
    370     cb->pop(EDX);
    371     cb->pop(EAX);
    372   }
    373 
    374   void PatchLoader() {
    375     static const EventRecordType kRecordType =  EVENT_TYPE_LDR;
    376     static const char* kFuncName = "ntdll!DebugPrint";
    377     static const int kStubOffset = kOffLdrCode;
    378 
    379     std::string moved_instructions = PatchPreamble(kFuncName, kStubOffset);
    380 
    381     // Set ShowSnaps to one to get the print routines to be called.
    382     char enabled = 1;
    383     WriteProcessMemory(
    384         proc_, resolver_.Resolve("ntdll!ShowSnaps"), &enabled, 1, NULL);
    385 
    386     CodeBuffer cb(buf_ + kStubOffset);
    387 
    388     cb.pop(EDX);  // return address
    389     cb.pop(EAX);  // First param in eax
    390     cb.push(ESI);
    391     cb.push(EDI);
    392     cb.push(EDX);
    393 
    394     cb.mov(ESI, EAX);  // ESI points at the string structure.
    395 
    396     // We used to do variable length based on the length supplied in the str
    397     // structure, but it's easier (and sloppier) to just copy a fixed amount.
    398     AssembleHeaderCode(&cb, kRecordType, kLdrBufSize);
    399 
    400     cb.lodsd();        // Load the character count
    401     cb.lodsd();        // Load the char*
    402     cb.mov(ESI, EAX);
    403     cb.mov_imm(ECX, kLdrBufSize / 4);  // load the char count as the rep count
    404     cb.rep(); cb.movsb();  // Copy the string to the logging buffer
    405 
    406     // Return
    407     cb.pop(EDX);
    408     cb.pop(EDI);
    409     cb.pop(ESI);
    410     cb.pop(ECX);  // don't care
    411     cb.pop(ECX);  // don't care
    412     cb.jmp(EDX);
    413   }
    414 
    415   void PatchCreateThread() {
    416     static const EventRecordType kRecordType =  EVENT_TYPE_CREATETHREAD;
    417     static const char* kFuncName =
    418       options_.vista() ? "ntdll!NtCreateThreadEx" : "ntdll!NtCreateThread";
    419     static const int kStubOffset = kOffCreateThreadCode;
    420 
    421     std::string moved_instructions = PatchPreamble(kFuncName, kStubOffset);
    422 
    423     CodeBuffer cb(buf_ + kStubOffset);
    424 
    425     cb.push(EDI);
    426     cb.push(ESI);
    427 
    428     AssembleHeaderCode(&cb, kRecordType, 8);
    429 
    430     cb.mov(EAX, Operand(ESP, 0x18 + 8));
    431 
    432     // Super ugly hack.  To coorrelate between creating a thread and the new
    433     // thread running, we stash something to identify the creating event when
    434     // we log the created event.  We just use a pointer to the event log data
    435     // since this will be unique and can tie the two events together.  We pass
    436     // it by writing into the context structure, so it will be passed in ESI.
    437     cb.add_imm(EAX, 0xa0);
    438     cb.push(EDI);
    439     cb.mov(EDI, EAX);
    440     cb.pop(EAX);
    441     cb.push(EAX);
    442     cb.stosd();
    443 
    444     // Get and save CONTEXT.Eip
    445     cb.mov(ESI, EDI);
    446     cb.add_imm(ESI, 20);
    447     cb.pop(EDI);
    448     cb.mov(EAX, EDI);
    449     cb.stosd();  // Record the event identifier to tie together the events.
    450     cb.movsd();  // write Eip to the log event
    451 
    452     cb.pop(ESI);
    453     cb.pop(EDI);
    454 
    455     ResumeOriginalFunction(kFuncName, moved_instructions, kStubOffset, &cb);
    456   }
    457 
    458   void PatchThreadBegin() {
    459     static const EventRecordType kRecordType =  EVENT_TYPE_THREADBEGIN;
    460     static const char* kFuncName = "ntdll!CsrNewThread";
    461     static const int kStubOffset = kOffThreadCode;
    462 
    463     std::string moved_instructions = PatchPreamble(kFuncName, kStubOffset);
    464 
    465     CodeBuffer cb(buf_ + kStubOffset);
    466 
    467     cb.push(EDI);
    468 
    469     AssembleHeaderCode(&cb, kRecordType, 8);
    470 
    471     cb.mov(EAX, ESI);  // We stashed the creator's eventid in the context ESI.
    472     cb.stosd();
    473 
    474     // TODO(deanm): The pointer is going to point into the CRT or something,
    475     // should we dig deeper to get more information about the real entry?
    476     cb.mov(EAX, Operand(EBP, 0x8));
    477     cb.stosd();
    478     cb.pop(EDI);
    479 
    480     ResumeOriginalFunction(kFuncName, moved_instructions, kStubOffset, &cb);
    481   }
    482 
    483   void PatchThreadBeginVista() {
    484     static const EventRecordType kRecordType =  EVENT_TYPE_THREADBEGIN;
    485     static const char* kFuncName = "ntdll!_RtlUserThreadStart";
    486     static const int kStubOffset = kOffThreadCode;
    487 
    488     std::string moved_instructions = PatchPreamble(kFuncName, kStubOffset);
    489 
    490     CodeBuffer cb(buf_ + kStubOffset);
    491 
    492     cb.push(EDI);
    493 
    494     AssembleHeaderCode(&cb, kRecordType, 8);
    495 
    496     cb.mov(EAX, ESI);  // We stashed the creator's eventid in the context ESI.
    497     cb.stosd();
    498 
    499     // TODO(deanm): The pointer is going to point into the CRT or something,
    500     // should we dig deeper to get more information about the real entry?
    501     //cb.mov(EAX, Operand(EBP, 0x8));
    502     cb.mov_imm(EAX, 0);
    503     cb.stosd();
    504     cb.pop(EDI);
    505 
    506     ResumeOriginalFunction(kFuncName, moved_instructions, kStubOffset, &cb);
    507   }
    508 
    509   // Intercept exception dispatching so we can catch when threads set a thread
    510   // name (which is an exception with a special code).  TODO it could be
    511   // useful to log all exceptions.
    512   void PatchSetThreadName() {
    513     static const EventRecordType kRecordType =  EVENT_TYPE_THREADNAME;
    514     static const char* kFuncName = "ntdll!RtlDispatchException";
    515     static const int kStubOffset = kOffExpCode;
    516 
    517     std::string moved_instructions = PatchPreamble(kFuncName, kStubOffset);
    518 
    519     CodeBuffer cb(buf_ + kStubOffset);
    520 
    521     cb.pop(EDX);  // return address
    522     cb.pop(EAX);  // ExceptionRecord
    523     cb.push(EAX);
    524     cb.push(EDX);
    525 
    526     cb.push(ESI);
    527 
    528     cb.mov(ESI, EAX);
    529     cb.lodsd();
    530 
    531     Label bail;
    532     // exception code
    533     cb.cmp_imm(EAX, 0x406D1388);
    534     cb.jcc(not_equal, &bail);
    535 
    536     cb.push(EDI);
    537 
    538     AssembleHeaderCode(&cb, kRecordType, kThreadNameBufSize);
    539 
    540     // Fetch the second parameter.
    541     for (int i = 0; i < 6; ++i) {
    542       cb.lodsd();
    543     }
    544 
    545     // TODO This is sloppy and we could run into unmapped memory...
    546     cb.mov(ESI, EAX);
    547     cb.mov_imm(ECX, kThreadNameBufSize / 4);
    548     cb.rep(); cb.movsd();
    549 
    550     cb.pop(EDI);
    551 
    552     cb.bind(&bail);
    553     cb.pop(ESI);
    554 
    555     ResumeOriginalFunction(kFuncName, moved_instructions, kStubOffset, &cb);
    556   }
    557 
    558 
    559   void PatchThreadExit() {
    560     static const EventRecordType kRecordType =  EVENT_TYPE_THREADEXIT;
    561     static const char* kFuncName = "ntdll!LdrShutdownThread";
    562     static const int kStubOffset = kOffThreadExitCode;
    563 
    564     std::string moved_instructions = PatchPreamble(kFuncName, kStubOffset);
    565     CodeBuffer cb(buf_ + kStubOffset);
    566 
    567     cb.push(EDI);
    568     AssembleHeaderCode(&cb, kRecordType, 0);
    569     cb.pop(EDI);
    570 
    571     ResumeOriginalFunction(kFuncName, moved_instructions, kStubOffset, &cb);
    572   }
    573 
    574   void PatchAllocateHeap() {
    575     static const EventRecordType kRecordType =  EVENT_TYPE_ALLOCHEAP;
    576     static const char* kFuncName = "ntdll!RtlAllocateHeap";
    577     static const int kStubOffset = kOffAllocHeapCode;
    578 
    579     std::string moved_instructions = PatchPreamble(kFuncName, kStubOffset);
    580     CodeBuffer cb(buf_ + kStubOffset);
    581 
    582     cb.push(EDI);
    583     cb.push(ESI);
    584 
    585     AssembleHeaderCode(&cb, kRecordType, 12);
    586 
    587     cb.mov(ESI, ESP);
    588     cb.add_imm(ESI, 12);  // Skip over our saved and the return address
    589     cb.movsd(); cb.movsd(); cb.movsd();  // Copy the 3 parameters
    590 
    591     cb.pop(ESI);
    592     cb.pop(EDI);
    593 
    594     ResumeOriginalFunction(kFuncName, moved_instructions, kStubOffset, &cb);
    595   }
    596 
    597   void PatchFreeHeap() {
    598     static const EventRecordType kRecordType =  EVENT_TYPE_FREEHEAP;
    599     static const char* kFuncName = "ntdll!RtlFreeHeap";
    600     static const int kStubOffset = kOffFreeHeapCode;
    601 
    602     std::string moved_instructions = PatchPreamble(kFuncName, kStubOffset);
    603     CodeBuffer cb(buf_ + kStubOffset);
    604 
    605     cb.push(EDI);
    606     cb.push(ESI);
    607 
    608     AssembleHeaderCode(&cb, kRecordType, 12);
    609 
    610     cb.mov(ESI, ESP);
    611     cb.add_imm(ESI, 12);  // Skip over our saved and the return address
    612     cb.movsd(); cb.movsd(); cb.movsd();  // Copy the 3 parameters
    613 
    614     cb.pop(ESI);
    615     cb.pop(EDI);
    616 
    617     ResumeOriginalFunction(kFuncName, moved_instructions, kStubOffset, &cb);
    618   }
    619 
    620   // Don't even bother going back to the original code, just implement our
    621   // own KiFastSystemCall.  The original looks like:
    622   //   .text:7C90EB8B                 mov     edx, esp
    623   //   .text:7C90EB8D                 sysenter
    624   //   .text:7C90EB8F                 nop
    625   //   .text:7C90EB90                 nop
    626   //   .text:7C90EB91                 nop
    627   //   .text:7C90EB92                 nop
    628   //   .text:7C90EB93                 nop
    629   void PatchSyscall() {
    630     static const EventRecordType kRecordType =  EVENT_TYPE_SYSCALL;
    631     static const char* kFuncName = "ntdll!KiFastSystemCall";
    632     static const int kStubOffset = kOffSyscallCode;
    633 
    634     std::string moved_instructions = PatchPreamble(kFuncName, kStubOffset);
    635 
    636     {
    637       CodeBuffer cb(buf_ + kStubOffset);
    638 
    639       Label skip;
    640 
    641       // Skip 0xa5 which is QueryPerformanceCounter, to make sure we don't log
    642       // our own logging's QPC.  Disabled for now, using ret addr check...
    643       // cb.cmp_imm(EAX, 0xa5);
    644       // cb.jcc(equal, &skip);
    645 
    646       // Check if the return address is from 0x6666 (our code region).
    647       // 66817C24066666    cmp word [esp+0x6],0x6666
    648       cb.emit(0x66); cb.emit(0x81); cb.emit(0x7C);
    649       cb.emit(0x24); cb.emit(0x06); cb.emit(0x66); cb.emit(0x66);
    650       cb.jcc(equal, &skip);
    651 
    652       // This is all a bit shit.  Originally I thought I could store some state
    653       // on the stack above ESP, however, it seems that when APCs, etc are
    654       // queued, they will use the stack above ESP.  Well, not above ESP, above
    655       // what was passed in as EDX into the systemcall, not matter if ESP was
    656       // different than this :(.  So we need to store our state in the event
    657       // log record, and then we stick a pointer to that over a ret addr...
    658 
    659       // Our stack starts like:
    660       //  [ ret addr ] [ ret addr 2 ] [ arguments ]
    661       // We will update it to look like
    662       //  [ ret stub addr ] [ event entry ptr ] [ arguments ]
    663 
    664       cb.push(EDI);  // save EDI since we're using it
    665       AssembleHeaderCode(&cb, kRecordType, 16 + 16 + 8);
    666       cb.mov(EDX, EAX);  // Save EAX...
    667       cb.stosd();  // eax is the syscall number
    668       cb.pop(EAX);
    669       cb.stosd();  // store the saved EDI
    670       cb.pop(EAX);
    671       cb.stosd();  // store the real return address
    672       cb.pop(EAX);
    673       cb.stosd();  // store the real (secondary) return address;
    674 
    675       cb.push(ESI);
    676       cb.mov(ESI, ESP);
    677       cb.lodsd();
    678       cb.movsd();  // argument 1
    679       cb.movsd();  // argument 2
    680       cb.movsd();  // argument 3
    681       cb.pop(ESI);
    682 
    683       cb.push(EDI);  // store our event ptr over the secondary ret addr.
    684       cb.push_imm(reinterpret_cast<int>(remote_addr_ + kOffSyscallCode + 200));
    685       cb.mov(EAX, EDX);  // restore EAX
    686 
    687       cb.bind(&skip);
    688       cb.mov(EDX, ESP);
    689       cb.sysenter();
    690 
    691       if (cb.size() > 200) {
    692         NOTREACHED("code too big: %d", cb.size());
    693       }
    694     }
    695 
    696     {
    697       CodeBuffer cb(buf_ + kStubOffset + 200);
    698 
    699       // TODO share the QPC code, this is a copy and paste...
    700 
    701       cb.pop(EDI);  // get the log area
    702 
    703       cb.stosd();   // Log the system call return value.
    704 
    705       // QPC will clobber EAX, and it's very important to save it since it
    706       // is the return value from the system call.  TODO validate if there is
    707       // anything else we need to save...
    708       cb.push(EAX);
    709       AssembleQueryPerformanceCounter(&cb);
    710       cb.pop(EAX);
    711 
    712       // We need to:
    713       //  - Restore the original "seconary" return address
    714       //  - Restore the original value of the EDI register
    715       //  - Jump control flow to the original return address
    716       // All 3 of these values are stored in the log record...
    717       // [ syscall num ] [ saved edi ] [ real rets ] [ args ] [ retval ] [ ts ]
    718       //                   currently edi points here     ----^
    719 
    720       cb.push(Operand(EDI, -4 - 16));   // push the real 2nd ret
    721       cb.push(Operand(EDI, -8 - 16));   // push the real ret
    722       cb.push(Operand(EDI, -12 - 16));  // push the saved EDI
    723 
    724       cb.pop(EDI);  // restore EDI that was saved in the record
    725       cb.ret();     // jmp back to the real ret ...
    726 
    727       if (cb.size() > 56) {
    728         NOTREACHED("ug");
    729       }
    730     }
    731   }
    732 
    733   // Patch lock (criticial section) holding.
    734   void PatchEnterCriticalSection() {
    735     static const EventRecordType kRecordType =  EVENT_TYPE_ENTER_CS;
    736     static const char* kFuncName = "ntdll!RtlEnterCriticalSection";
    737     static const int kStubOffset = kOffEnterCritSecCode;
    738 
    739     std::string moved_instructions = PatchPreamble(kFuncName, kStubOffset);
    740 
    741     // We just want to capture the return address and original argument, so
    742     // we know when EnterCriticalSection returned, we don't want to know when
    743     // it entered because it could sit waiting.  We want to know when the lock
    744     // actually started being held.  The compiler will sometimes generated code
    745     // that overwrites arguments, so we'll keep a copy of the argument just in
    746     // case code like this is ever generated in the future.  TODO is it enough
    747     // to just assume a LPCRITICAL_SECTION uniquely identifies the lock, or
    748     // can the same lock have multiple different copies, I would assume not.
    749     {
    750       CodeBuffer cb(buf_ + kStubOffset);
    751 
    752       // Set up an additional frame so that we capture the return.
    753       // TODO use memory instructions instead of using registers.
    754       cb.pop(EAX);  // return address
    755       cb.pop(EDX);  // first argument (critical section pointer)
    756 
    757       cb.push(EDX);
    758       cb.push(EAX);
    759       cb.push(EDX);
    760       cb.push_imm(
    761           reinterpret_cast<int>(remote_addr_ + kStubOffset + 40));
    762 
    763       ResumeOriginalFunction(kFuncName, moved_instructions, kStubOffset, &cb);
    764       CHECK(cb.size() < 40);
    765     }
    766 
    767     {
    768       CodeBuffer cb(buf_ + kStubOffset + 40);
    769 
    770       cb.push(ESI);
    771       cb.mov(ESI, ESP);
    772       cb.push(EAX);
    773       cb.push(EDI);
    774 
    775       AssembleHeaderCode(&cb, kRecordType, 4);
    776 
    777       cb.lodsd();  // Skip over our saved ESI
    778       cb.lodsd();  // Skip over the return address
    779       cb.movsd();  // Write the CRITICAL_SECTION* to the event record.
    780 
    781       cb.pop(EDI);
    782       cb.pop(EAX);
    783       cb.pop(ESI);
    784 
    785       cb.ret(0x04);
    786     }
    787   }
    788 
    789   void PatchTryEnterCriticalSection() {
    790     static const EventRecordType kRecordType =  EVENT_TYPE_TRYENTER_CS;
    791     static const char* kFuncName = "ntdll!RtlTryEnterCriticalSection";
    792     static const int kStubOffset = kOffTryEnterCritSecCode;
    793 
    794     std::string moved_instructions = PatchPreamble(kFuncName, kStubOffset);
    795 
    796     {
    797       CodeBuffer cb(buf_ + kStubOffset);
    798 
    799       // Set up an additional frame so that we capture the return.
    800       // TODO use memory instructions instead of using registers.
    801       cb.pop(EAX);  // return address
    802       cb.pop(EDX);  // first argument (critical section pointer)
    803 
    804       cb.push(EDX);
    805       cb.push(EAX);
    806       cb.push(EDX);
    807       cb.push_imm(reinterpret_cast<int>(remote_addr_ + kStubOffset + 40));
    808 
    809       ResumeOriginalFunction(kFuncName, moved_instructions, kStubOffset, &cb);
    810       CHECK(cb.size() < 40);
    811     }
    812 
    813     {
    814       CodeBuffer cb(buf_ + kStubOffset + 40);
    815 
    816       cb.push(ESI);
    817       cb.mov(ESI, ESP);
    818       cb.push(EDI);
    819 
    820       cb.push(EAX);
    821 
    822       AssembleHeaderCode(&cb, kRecordType, 8);
    823 
    824       cb.lodsd();  // Skip over our saved ESI
    825       cb.lodsd();  // Skip over the return address
    826       cb.movsd();  // Write the CRITICAL_SECTION* to the event record.
    827 
    828       cb.pop(EAX);
    829       cb.stosd();  // Write the return value to the event record.
    830 
    831       cb.pop(EDI);
    832       cb.pop(ESI);
    833 
    834       cb.ret(0x04);
    835     }
    836   }
    837 
    838   void PatchLeaveCriticalSection() {
    839     static const EventRecordType kRecordType =  EVENT_TYPE_LEAVE_CS;
    840     static const char* kFuncName = "ntdll!RtlLeaveCriticalSection";
    841     static const int kStubOffset = kOffLeaveCritSecCode;
    842 
    843     std::string moved_instructions = PatchPreamble(kFuncName, kStubOffset);
    844     CodeBuffer cb(buf_ + kStubOffset);
    845 
    846     // TODO use memory instructions instead of using registers.
    847     cb.pop(EDX);  // return address
    848     cb.pop(EAX);  // first argument (critical section pointer)
    849     cb.push(EAX);
    850     cb.push(EDX);
    851 
    852     cb.push(EDI);
    853     AssembleHeaderCode(&cb, kRecordType, 4);
    854     cb.stosd();  // Write the CRITICAL_SECTION* to the event record.
    855     cb.pop(EDI);
    856 
    857     ResumeOriginalFunction(kFuncName, moved_instructions, kStubOffset, &cb);
    858   }
    859 
    860   // Patch APC dispatching.  This is a bit hacky, since the return to kernel
    861   // mode is done with NtContinue, we have to shim in a stub return address to
    862   // catch when the callback is finished.  It is probably a bit fragile.
    863   void PatchApcDispatcher() {
    864     static const EventRecordType kRecordType =  EVENT_TYPE_APC;
    865     static const char* kFuncName = "ntdll!KiUserApcDispatcher";
    866     static const int kStubOffset = kOffApcDispCode;
    867 
    868     std::string moved_instructions = PatchPreamble(kFuncName, kStubOffset);
    869 
    870     {
    871       CodeBuffer cb(buf_ + kStubOffset);
    872 
    873       // We don't really need to preserve these since we're the first thing
    874       // executing from the kernel dispatch, but yeah, it is good practice.
    875       cb.push(EDI);
    876       cb.push(EAX);
    877 
    878       AssembleHeaderCode(&cb, kRecordType, 4 + 4 + 8);
    879 
    880       cb.mov_imm(EAX, reinterpret_cast<int>(remote_addr_ + kStubOffset + 140));
    881       cb.xchg(EAX, Operand(ESP, 8));  // Swap the callback address with ours.
    882       cb.stosd();  // Store the original callback function address.
    883 
    884       // TODO for now we're lazy and depend that ESI will be preserved, and we
    885       // use it to store the pointer into our log record.  EDI isn't preserved.
    886       cb.mov(ESI, EDI);
    887 
    888       cb.pop(EAX);
    889       cb.pop(EDI);
    890 
    891       ResumeOriginalFunction(kFuncName, moved_instructions, kStubOffset, &cb);
    892 
    893       CHECK(cb.size() < 140);
    894     }
    895     {
    896       CodeBuffer cb(buf_ + kStubOffset + 140);
    897 
    898       // This is our shim, we need to call the original callback function, then
    899       // we can catch the return and log when it was completed.
    900       cb.pop(EAX);  // The real return address, safe to use EAX w/ the ABI?
    901       cb.push(EDI);
    902 
    903       cb.mov(EDI, ESI);
    904       cb.stosd();  // Store the real return address, we'll need it.
    905 
    906       cb.add_imm(ESI, -4);
    907       cb.lodsd();   // Load the real callback address.
    908 
    909       cb.mov(ESI, EDI);
    910       cb.pop(EDI);
    911 
    912       cb.call(EAX);  // Call the original callback address.
    913 
    914       cb.push(EAX);
    915       cb.push(EDI);
    916 
    917       cb.mov(EDI, ESI);
    918       AssembleQueryPerformanceCounter(&cb);
    919 
    920       cb.pop(EDI);
    921       cb.pop(EAX);
    922 
    923       cb.push(Operand(ESI, -4));  // Push the real return address.
    924       cb.ret();  // Return back to the APC Dispatcher.
    925 
    926       CHECK(cb.size() < 50);
    927     }
    928   }
    929 
    930   // We need to hook into process shutdown for two reasons.  Most importantly,
    931   // we need to copy the playground back from the process before the address
    932   // space goes away.  We could avoid this with shared memory, however, there
    933   // is a reason two.  In order to capture symbols for all of the libraries
    934   // loaded into arbitrary applications, on shutdown we do an instrusive load
    935   // of symbols into the traced process.
    936   //
    937   // ntdll!LdrShutdownProcess
    938   //  - NtSetEvent(event, 0);
    939   //  - NtWaitForSingleObject(event, FALSE, NULL);
    940   //  - jmp back
    941   void PatchExit(HANDLE exiting, HANDLE exited) {
    942     static const EventRecordType kRecordType =  EVENT_TYPE_PROCESSEXIT;
    943     static const char* kFuncName = "ntdll!LdrShutdownProcess";
    944     static const int kStubOffset = kOffExitCode;
    945 
    946     HANDLE rexiting, rexited;
    947     if (!DuplicateHandle(::GetCurrentProcess(),
    948                          exiting,
    949                          proc_,
    950                          &rexiting,
    951                          0,
    952                          FALSE,
    953                          DUPLICATE_SAME_ACCESS)) {
    954       NOTREACHED("");
    955     }
    956     if (!DuplicateHandle(::GetCurrentProcess(),
    957                          exited,
    958                          proc_,
    959                          &rexited,
    960                          0,
    961                          FALSE,
    962                          DUPLICATE_SAME_ACCESS)) {
    963       NOTREACHED("");
    964     }
    965 
    966     std::string moved_instructions = PatchPreamble(kFuncName, kStubOffset);
    967     CodeBuffer cb(buf_ + kStubOffset);
    968 
    969     cb.push(EDI);
    970     AssembleHeaderCode(&cb, kRecordType, 0);
    971     cb.pop(EDI);
    972 
    973     // NtSetEvent(exiting, 0);
    974     cb.push_imm(0);
    975     cb.push_imm(reinterpret_cast<int>(rexiting));
    976     cb.mov_imm(EAX, reinterpret_cast<int>(
    977         resolver_.Resolve("ntdll!NtSetEvent")));
    978     cb.call(EAX);
    979 
    980     // NtWaitForSingleObject(exited, FALSE, INFINITE);
    981     cb.push_imm(0);
    982     cb.push_imm(0);
    983     cb.push_imm(reinterpret_cast<int>(rexited));
    984     cb.mov_imm(EAX, reinterpret_cast<int>(
    985         resolver_.Resolve("ntdll!NtWaitForSingleObject")));
    986     cb.call(EAX);
    987 
    988     ResumeOriginalFunction(kFuncName, moved_instructions, kStubOffset, &cb);
    989   }
    990 
    991 
    992   void Patch() {
    993     if (options_.vista()) {
    994       // TODO(deanm): Make PatchCreateThread work on Vista.
    995       PatchThreadBeginVista();
    996     } else {
    997       PatchCreateThread();
    998       PatchThreadBegin();
    999     }
   1000 
   1001     PatchThreadExit();
   1002     PatchSetThreadName();
   1003     PatchSyscall();
   1004 
   1005     PatchApcDispatcher();
   1006 
   1007     // The loader logging needs to be improved a bit to really be useful.
   1008     //PatchLoader();
   1009 
   1010     // These are interesting, but will collect a ton of data:
   1011     if (options_.log_heap()) {
   1012       PatchAllocateHeap();
   1013       PatchFreeHeap();
   1014     }
   1015     if (options_.log_lock()) {
   1016       PatchEnterCriticalSection();
   1017       PatchTryEnterCriticalSection();
   1018       PatchLeaveCriticalSection();
   1019     }
   1020   }
   1021 
   1022   // Dump the event records from the playground to stdout in a JSON format.
   1023   // TODO: Drop RDTSCNormalizer, it was from old code that tried to use the
   1024   // rdtsc counters from the CPU, and this required a bunch of normalization
   1025   // to account for non-syncronized timestamps across different cores, etc.
   1026   void DumpJSON(RDTSCNormalizer* rdn, SymResolver* res) {
   1027     int pos = kOffLogAreaPtr;
   1028     int i = IntAt(pos);
   1029     pos += 4;
   1030 
   1031     std::map<int, const char*> syscalls = CreateSyscallMap();
   1032 
   1033     printf("parseEvents([\n");
   1034     for (int end = pos + i; pos < end; ) {
   1035       printf("{\n");
   1036       __int64 ts = Int64At(pos);
   1037       pos += 8;
   1038       void* cpuid = reinterpret_cast<void*>(IntAt(pos));
   1039       pos += 4;
   1040       printf("'ms': %f,\n", rdn->MsFromStart(cpuid, ts));
   1041 
   1042       printf("'cpu': 0x%x,\n'thread': 0x%x,\n", cpuid, IntAt(pos));
   1043       pos += 4;
   1044 
   1045       if (options_.stack_unwind_depth() > 0) {
   1046         printf("'stacktrace': [\n");
   1047         for (int i = 0; i < options_.stack_unwind_depth(); ++i) {
   1048           int retaddr = IntAt(pos + (i * 4));
   1049           if (!retaddr)
   1050             break;
   1051           printf("  [ 0x%x, %s ],\n",
   1052                  retaddr,
   1053                  res ? JSONString(res->Unresolve(retaddr)).c_str() : "\"\"");
   1054         }
   1055         printf("],\n");
   1056         pos += (options_.stack_unwind_depth() * 4);
   1057       }
   1058 
   1059 
   1060       EventRecordType rt = static_cast<EventRecordType>(IntAt(pos));
   1061       pos += 4;
   1062 
   1063       switch (rt) {
   1064         case EVENT_TYPE_LDR:
   1065         {
   1066           printf("'eventtype': 'EVENT_TYPE_LDR',\n");
   1067           std::string str(&buf_[pos], kLdrBufSize);
   1068           str = str.substr(0, str.find('\0'));
   1069           printf("'ldrinfo': %s,\n", JSONString(str).c_str());
   1070           pos += kLdrBufSize;
   1071           break;
   1072         }
   1073         case EVENT_TYPE_CREATETHREAD:
   1074         {
   1075           printf("'eventtype': 'EVENT_TYPE_CREATETHREAD',\n"
   1076                  "'eventid': 0x%x,\n"
   1077                  "'startaddr': 0x%x,\n",
   1078                  IntAt(pos), IntAt(pos+4));
   1079           pos += 8;
   1080           break;
   1081         }
   1082         case EVENT_TYPE_THREADBEGIN:
   1083         {
   1084           printf("'eventtype': 'EVENT_TYPE_THREADBEGIN',\n"
   1085                  "'parenteventid': 0x%x,\n"
   1086                  "'startaddr': 0x%x,\n",
   1087                  IntAt(pos), IntAt(pos+4));
   1088           pos += 8;
   1089           break;
   1090         }
   1091         case EVENT_TYPE_THREADNAME:
   1092         {
   1093           std::string str(&buf_[pos], kThreadNameBufSize);
   1094           str = str.substr(0, str.find('\0'));
   1095           printf("'eventtype': 'EVENT_TYPE_THREADNAME',\n"
   1096                  "'threadname': %s,\n",
   1097                  JSONString(str).c_str());
   1098           pos += kThreadNameBufSize;
   1099           break;
   1100         }
   1101         case EVENT_TYPE_PROCESSEXIT:
   1102         {
   1103           printf("'eventtype': 'EVENT_TYPE_PROCESSEXIT',\n");
   1104           break;
   1105         }
   1106         case EVENT_TYPE_THREADEXIT:
   1107         {
   1108           printf("'eventtype': 'EVENT_TYPE_THREADEXIT',\n");
   1109           break;
   1110         }
   1111         case EVENT_TYPE_ALLOCHEAP:
   1112         {
   1113           printf("'eventtype': 'EVENT_TYPE_ALLOCHEAP',\n"
   1114                  "'heaphandle': 0x%x,\n"
   1115                  "'heapflags': 0x%x,\n"
   1116                  "'heapsize': %d,\n",
   1117                  IntAt(pos), IntAt(pos+4), IntAt(pos+8));
   1118           pos += 12;
   1119           break;
   1120         }
   1121         case EVENT_TYPE_FREEHEAP:
   1122         {
   1123           printf("'eventtype': 'EVENT_TYPE_FREEHEAP',\n"
   1124                  "'heaphandle': 0x%x,\n"
   1125                  "'heapflags': 0x%x,\n"
   1126                  "'heapptr': %d,\n",
   1127                  IntAt(pos), IntAt(pos+4), IntAt(pos+8));
   1128           pos += 12;
   1129           break;
   1130         }
   1131         case EVENT_TYPE_SYSCALL:
   1132         {
   1133           int syscall = IntAt(pos);
   1134           printf("'eventtype': 'EVENT_TYPE_SYSCALL',\n"
   1135                  "'syscall': 0x%x,\n", syscall);
   1136           pos += 16;
   1137 
   1138           printf("'syscallargs': [\n");
   1139           for (int i = 0; i < 3; ++i) {
   1140             printf("  0x%x,\n", IntAt(pos));
   1141             pos += 4;
   1142           }
   1143           printf("],\n");
   1144 
   1145           printf("'retval': 0x%x,\n"
   1146                  "'done': %f,\n",
   1147                  IntAt(pos), rdn->MsFromStart(0, Int64At(pos+4)));
   1148           pos += 12;
   1149 
   1150           if (syscalls.count(syscall) == 1) {
   1151             std::string sname = syscalls[syscall];
   1152             printf("'syscallname': %s,\n",
   1153                    JSONString(sname).c_str());
   1154             // Mark system calls that we should consider "waiting" system
   1155             // calls, where we are not actually active.
   1156             if (sname.find("WaitFor") != std::string::npos ||
   1157                 sname.find("RemoveIoCompletion") != std::string::npos) {
   1158               printf("'waiting': 1,\n");
   1159             }
   1160           }
   1161           break;
   1162         }
   1163         case EVENT_TYPE_ENTER_CS:
   1164         {
   1165           printf("'eventtype': 'EVENT_TYPE_ENTER_CS',\n"
   1166                  "'critical_section': 0x%x,\n", IntAt(pos));
   1167           pos += 4;
   1168           break;
   1169         }
   1170         case EVENT_TYPE_TRYENTER_CS:
   1171         {
   1172           printf("'eventtype': 'EVENT_TYPE_TRYENTER_CS',\n"
   1173                  "'critical_section': 0x%x,\n"
   1174                  "'retval': 0x%x,\n",
   1175                  IntAt(pos), IntAt(pos+4));
   1176           pos += 8;
   1177           break;
   1178         }
   1179         case EVENT_TYPE_LEAVE_CS:
   1180         {
   1181           printf("'eventtype': 'EVENT_TYPE_LEAVE_CS',\n"
   1182                  "'critical_section': 0x%x,\n", IntAt(pos));
   1183           pos += 4;
   1184           break;
   1185         }
   1186         case EVENT_TYPE_APC:
   1187         {
   1188           int func_addr = IntAt(pos);
   1189           printf("'eventtype': 'EVENT_TYPE_APC',\n"
   1190                  "'func_addr': 0x%x,\n"
   1191                  "'func_addr_name': %s,\n"
   1192                  "'ret_addr': 0x%x,\n"
   1193                  "'done': %f,\n",
   1194                  func_addr,
   1195                  res ? JSONString(res->Unresolve(func_addr)).c_str() : "\"\"",
   1196                  IntAt(pos+4), rdn->MsFromStart(0, Int64At(pos+8)));
   1197           pos += 16;
   1198           break;
   1199         }
   1200         default:
   1201           NOTREACHED("Unknown event type: %d", rt);
   1202           break;
   1203       }
   1204       printf("},\n");
   1205     }
   1206     printf("]);");
   1207   }
   1208 
   1209   int IntAt(int pos) { return *reinterpret_cast<int*>(&buf_[pos]); }
   1210   __int64 Int64At(int pos) { return *reinterpret_cast<__int64*>(&buf_[pos]); }
   1211 
   1212 
   1213  private:
   1214   // Handle the process we install into or read back from.
   1215   HANDLE proc_;
   1216   // The address where we will keep our playground in the remote process.
   1217   char* remote_addr_;
   1218   // Lookup addresses from symbol names for ntdll.dll.
   1219   SymResolver resolver_;
   1220   Options options_;
   1221   // A local copy of the playground data, we copy it into the remote process.
   1222   char buf_[kPlaygroundSize];
   1223 };
   1224 
   1225 
   1226 int main(int argc, char** argv) {
   1227   std::string command_line;
   1228   bool use_symbols = false;
   1229   bool attaching = false;
   1230   bool launched = false;
   1231   bool manual_quit = false;
   1232 
   1233   Playground::Options options;
   1234 
   1235   PROCESS_INFORMATION info = {0};
   1236 
   1237   argc--; argv++;
   1238 
   1239   while (argc > 0) {
   1240     if (std::string("--symbols") == argv[0]) {
   1241       use_symbols = true;
   1242     } else if (std::string("--vista") == argv[0]) {
   1243       options.set_vista(true);
   1244     } else if (std::string("--log-heap") == argv[0]) {
   1245       options.set_log_heap(true);
   1246     } else if (std::string("--log-lock") == argv[0]) {
   1247       options.set_log_lock(true);
   1248     } else if (std::string("--manual-quit") == argv[0]) {
   1249       manual_quit = true;
   1250     } else if (argc >= 2 && std::string("--unwind") == argv[0]) {
   1251       options.set_stack_unwind_depth(atoi(argv[1]));
   1252       argc--; argv++;
   1253     } else if (argc >= 2 && !launched && std::string("--attach") == argv[0]) {
   1254       attaching = true;
   1255       info.hProcess = OpenProcess(PROCESS_ALL_ACCESS, FALSE, atoi(argv[1]));
   1256       launched = true;
   1257       argc--; argv++;
   1258     } else if (!launched) {
   1259       STARTUPINFOA start_info = {0};
   1260       start_info.cb = sizeof(start_info);
   1261 
   1262       if (!CreateProcessA(NULL,
   1263                           argv[0],
   1264                           NULL,
   1265                           NULL,
   1266                           FALSE,
   1267                           CREATE_SUSPENDED,
   1268                           NULL,
   1269                           NULL,
   1270                           &start_info,
   1271                           &info)) {
   1272         NOTREACHED("Failed to launch \"%s\": %d\n", argv[0], GetLastError());
   1273         return 1;
   1274       }
   1275       launched = true;
   1276     } else {
   1277       NOTREACHED("error parsing command line.");
   1278     }
   1279     argc--; argv++;
   1280   }
   1281 
   1282   if (!launched) {
   1283     printf("usage: traceline.exe \"app.exe my arguments\"\n"
   1284            "  --attach 123: buggy support for attaching to a process\n"
   1285            "  --unwind 16: unwind the stack to the specified max depth\n"
   1286            "  --symbols: use symbols for stacktraces\n"
   1287            "  --log-heap: log heap operations (alloc / free).\n"
   1288            "  --log-lock: log lock (critical section) operations.\n");
   1289     return 1;
   1290   }
   1291 
   1292 
   1293   HANDLE exiting = CreateEvent(NULL, FALSE, FALSE, NULL);
   1294   HANDLE exited = CreateEvent(NULL, FALSE, FALSE, NULL);
   1295 
   1296   // The playground object is big (32MB), dynamically alloc.
   1297   Playground* pg = new Playground(info.hProcess, options);
   1298 
   1299   pg->AllocateInRemote();
   1300   pg->Patch();
   1301   pg->PatchExit(exiting, exited);
   1302   pg->CopyToRemote();
   1303 
   1304   RDTSCNormalizer rdn;
   1305   rdn.Start();
   1306 
   1307   if (!attaching)
   1308     ResumeThread(info.hThread);
   1309 
   1310   // Wait until we have been notified that it's exiting.
   1311   if (manual_quit) {
   1312     fprintf(stderr, "Press enter when you want stop tracing and collect.\n");
   1313     fflush(stderr);
   1314     getchar();
   1315   } else {
   1316     HANDLE whs[] = {exiting, info.hProcess};
   1317     if (WaitForMultipleObjects(2, whs, FALSE, INFINITE) != WAIT_OBJECT_0) {
   1318       NOTREACHED("Failed to correctly capture process shutdown.");
   1319     }
   1320   }
   1321 
   1322   pg->CopyFromRemote();
   1323 
   1324   if (use_symbols) {
   1325     // Break in and get the symbols...
   1326     SymResolver res(NULL, info.hProcess);
   1327     pg->DumpJSON(&rdn, &res);
   1328   } else {
   1329     pg->DumpJSON(&rdn, NULL);
   1330   }
   1331 
   1332   // Notify that it can exit now, we are done.
   1333   SetEvent(exited);
   1334 
   1335   CloseHandle(info.hProcess);
   1336   CloseHandle(info.hThread);
   1337 
   1338   delete pg;
   1339 }
   1340