Home | History | Annotate | Download | only in windows
      1 // Copyright (c) 2007, Google Inc.
      2 // All rights reserved.
      3 //
      4 // Redistribution and use in source and binary forms, with or without
      5 // modification, are permitted provided that the following conditions are
      6 // met:
      7 //
      8 //     * Redistributions of source code must retain the above copyright
      9 // notice, this list of conditions and the following disclaimer.
     10 //     * Redistributions in binary form must reproduce the above
     11 // copyright notice, this list of conditions and the following disclaimer
     12 // in the documentation and/or other materials provided with the
     13 // distribution.
     14 //     * Neither the name of Google Inc. nor the names of its
     15 // contributors may be used to endorse or promote products derived from
     16 // this software without specific prior written permission.
     17 //
     18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     29 //
     30 // ---
     31 // Author: Craig Silverstein
     32 //
     33 // The main purpose of this file is to patch the libc allocation
     34 // routines (malloc and friends, but also _msize and other
     35 // windows-specific libc-style routines).  However, we also patch
     36 // windows routines to do accounting.  We do better at the former than
     37 // the latter.  Here are some comments from Paul Pluzhnikov about what
     38 // it might take to do a really good job patching windows routines to
     39 // keep track of memory usage:
     40 //
     41 // "You should intercept at least the following:
     42 //     HeapCreate HeapDestroy HeapAlloc HeapReAlloc HeapFree
     43 //     RtlCreateHeap RtlDestroyHeap RtlAllocateHeap RtlFreeHeap
     44 //     malloc calloc realloc free
     45 //     malloc_dbg calloc_dbg realloc_dbg free_dbg
     46 // Some of these call the other ones (but not always), sometimes
     47 // recursively (i.e. HeapCreate may call HeapAlloc on a different
     48 // heap, IIRC)."
     49 //
     50 // Since Paul didn't mention VirtualAllocEx, he may not have even been
     51 // considering all the mmap-like functions that windows has (or he may
     52 // just be ignoring it because he's seen we already patch it).  Of the
     53 // above, we do not patch the *_dbg functions, and of the windows
     54 // functions, we only patch HeapAlloc and HeapFree.
     55 //
     56 // The *_dbg functions come into play with /MDd, /MTd, and /MLd,
     57 // probably.  It may be ok to just turn off tcmalloc in those cases --
     58 // if the user wants the windows debug malloc, they probably don't
     59 // want tcmalloc!  We should also test with all of /MD, /MT, and /ML,
     60 // which we're not currently doing.
     61 
     62 // TODO(csilvers): try to do better here?  Paul does conclude:
     63 //                 "Keeping track of all of this was a nightmare."
     64 
     65 #ifndef _WIN32
     66 # error You should only be including windows/patch_functions.cc in a windows environment!
     67 #endif
     68 
     69 #include <config.h>
     70 
     71 #ifdef WIN32_OVERRIDE_ALLOCATORS
     72 #error This file is intended for patching allocators - use override_functions.cc instead.
     73 #endif
     74 
     75 // We use psapi.  Non-MSVC systems will have to link this in themselves.
     76 #ifdef _MSC_VER
     77 #pragma comment(lib, "Psapi.lib")
     78 #endif
     79 
     80 // Make sure we always use the 'old' names of the psapi functions.
     81 #ifndef PSAPI_VERSION
     82 #define PSAPI_VERSION 1
     83 #endif
     84 
     85 #include <windows.h>
     86 #include <stdio.h>
     87 #include <malloc.h>       // for _msize and _expand
     88 #include <Psapi.h>        // for EnumProcessModules, GetModuleInformation, etc.
     89 #include <set>
     90 #include <map>
     91 #include <vector>
     92 #include <base/logging.h>
     93 #include "base/spinlock.h"
     94 #include "gperftools/malloc_hook.h"
     95 #include "malloc_hook-inl.h"
     96 #include "preamble_patcher.h"
     97 
     98 // The maximum number of modules we allow to be in one executable
     99 const int kMaxModules = 8182;
    100 
    101 // These are hard-coded, unfortunately. :-( They are also probably
    102 // compiler specific.  See get_mangled_names.cc, in this directory,
    103 // for instructions on how to update these names for your compiler.
    104 const char kMangledNew[] = "??2@YAPAXI@Z";
    105 const char kMangledNewArray[] = "??_U@YAPAXI@Z";
    106 const char kMangledDelete[] = "??3@YAXPAX@Z";
    107 const char kMangledDeleteArray[] = "??_V@YAXPAX@Z";
    108 const char kMangledNewNothrow[] = "??2@YAPAXIABUnothrow_t@std@@@Z";
    109 const char kMangledNewArrayNothrow[] = "??_U@YAPAXIABUnothrow_t@std@@@Z";
    110 const char kMangledDeleteNothrow[] = "??3@YAXPAXABUnothrow_t@std@@@Z";
    111 const char kMangledDeleteArrayNothrow[] = "??_V@YAXPAXABUnothrow_t@std@@@Z";
    112 
    113 // This is an unused but exported symbol that we can use to tell the
    114 // MSVC linker to bring in libtcmalloc, via the /INCLUDE linker flag.
    115 // Without this, the linker will likely decide that libtcmalloc.dll
    116 // doesn't add anything to the executable (since it does all its work
    117 // through patching, which the linker can't see), and ignore it
    118 // entirely.  (The name 'tcmalloc' is already reserved for a
    119 // namespace.  I'd rather export a variable named "_tcmalloc", but I
    120 // couldn't figure out how to get that to work.  This function exports
    121 // the symbol "__tcmalloc".)
    122 extern "C" PERFTOOLS_DLL_DECL void _tcmalloc();
    123 void _tcmalloc() { }
    124 
    125 // This is the version needed for windows x64, which has a different
    126 // decoration scheme which doesn't auto-add a leading underscore.
    127 extern "C" PERFTOOLS_DLL_DECL void __tcmalloc();
    128 void __tcmalloc() { }
    129 
    130 namespace {    // most everything here is in an unnamed namespace
    131 
    132 typedef void (*GenericFnPtr)();
    133 
    134 using sidestep::PreamblePatcher;
    135 
    136 struct ModuleEntryCopy;   // defined below
    137 
    138 // These functions are how we override the memory allocation
    139 // functions, just like tcmalloc.cc and malloc_hook.cc do.
    140 
    141 // This is information about the routines we're patching, for a given
    142 // module that implements libc memory routines.  A single executable
    143 // can have several libc implementations running about (in different
    144 // .dll's), and we need to patch/unpatch them all.  This defines
    145 // everything except the new functions we're patching in, which
    146 // are defined in LibcFunctions, below.
    147 class LibcInfo {
    148  public:
    149   LibcInfo() {
    150     memset(this, 0, sizeof(*this));  // easiest way to initialize the array
    151   }
    152 
    153   bool patched() const { return is_valid(); }
    154   void set_is_valid(bool b) { is_valid_ = b; }
    155   // According to http://msdn.microsoft.com/en-us/library/ms684229(VS.85).aspx:
    156   // "The load address of a module (lpBaseOfDll) is the same as the HMODULE
    157   // value."
    158   HMODULE hmodule() const {
    159     return reinterpret_cast<HMODULE>(const_cast<void*>(module_base_address_));
    160   }
    161 
    162   // Populates all the windows_fn_[] vars based on our module info.
    163   // Returns false if windows_fn_ is all NULL's, because there's
    164   // nothing to patch.  Also populates the rest of the module_entry
    165   // info, such as the module's name.
    166   bool PopulateWindowsFn(const ModuleEntryCopy& module_entry);
    167 
    168  protected:
    169   void CopyFrom(const LibcInfo& that) {
    170     if (this == &that)
    171       return;
    172     this->is_valid_ = that.is_valid_;
    173     memcpy(this->windows_fn_, that.windows_fn_, sizeof(windows_fn_));
    174     this->module_base_address_ = that.module_base_address_;
    175     this->module_base_size_ = that.module_base_size_;
    176   }
    177 
    178   enum {
    179     kMalloc, kFree, kRealloc, kCalloc,
    180     kNew, kNewArray, kDelete, kDeleteArray,
    181     kNewNothrow, kNewArrayNothrow, kDeleteNothrow, kDeleteArrayNothrow,
    182     // These are windows-only functions from malloc.h
    183     k_Msize, k_Expand,
    184     // A MS CRT "internal" function, implemented using _calloc_impl
    185     k_CallocCrt,
    186     kNumFunctions
    187   };
    188 
    189   // I'd like to put these together in a struct (perhaps in the
    190   // subclass, so we can put in perftools_fn_ as well), but vc8 seems
    191   // to have a bug where it doesn't initialize the struct properly if
    192   // we try to take the address of a function that's not yet loaded
    193   // from a dll, as is the common case for static_fn_.  So we need
    194   // each to be in its own array. :-(
    195   static const char* const function_name_[kNumFunctions];
    196 
    197   // This function is only used when statically linking the binary.
    198   // In that case, loading malloc/etc from the dll (via
    199   // PatchOneModule) won't work, since there are no dlls.  Instead,
    200   // you just want to be taking the address of malloc/etc directly.
    201   // In the common, non-static-link case, these pointers will all be
    202   // NULL, since this initializer runs before msvcrt.dll is loaded.
    203   static const GenericFnPtr static_fn_[kNumFunctions];
    204 
    205   // This is the address of the function we are going to patch
    206   // (malloc, etc).  Other info about the function is in the
    207   // patch-specific subclasses, below.
    208   GenericFnPtr windows_fn_[kNumFunctions];
    209 
    210   // This is set to true when this structure is initialized (because
    211   // we're patching a new library) and set to false when it's
    212   // uninitialized (because we've freed that library).
    213   bool is_valid_;
    214 
    215   const void *module_base_address_;
    216   size_t module_base_size_;
    217 
    218  public:
    219   // These shouldn't have to be public, since only subclasses of
    220   // LibcInfo need it, but they do.  Maybe something to do with
    221   // templates.  Shrug.  I hide them down here so users won't see
    222   // them. :-)  (OK, I also need to define ctrgProcAddress late.)
    223   bool is_valid() const { return is_valid_; }
    224   GenericFnPtr windows_fn(int ifunction) const {
    225     return windows_fn_[ifunction];
    226   }
    227   // These three are needed by ModuleEntryCopy.
    228   static const int ctrgProcAddress = kNumFunctions;
    229   static GenericFnPtr static_fn(int ifunction) {
    230     return static_fn_[ifunction];
    231   }
    232   static const char* const function_name(int ifunction) {
    233     return function_name_[ifunction];
    234   }
    235 };
    236 
    237 // Template trickiness: logically, a LibcInfo would include
    238 // Windows_malloc_, origstub_malloc_, and Perftools_malloc_: for a
    239 // given module, these three go together.  And in fact,
    240 // Perftools_malloc_ may need to call origstub_malloc_, which means we
    241 // either need to change Perftools_malloc_ to take origstub_malloc_ as
    242 // an arugment -- unfortunately impossible since it needs to keep the
    243 // same API as normal malloc -- or we need to write a different
    244 // version of Perftools_malloc_ for each LibcInfo instance we create.
    245 // We choose the second route, and use templates to implement it (we
    246 // could have also used macros).  So to get multiple versions
    247 // of the struct, we say "struct<1> var1; struct<2> var2;".  The price
    248 // we pay is some code duplication, and more annoying, each instance
    249 // of this var is a separate type.
    250 template<int> class LibcInfoWithPatchFunctions : public LibcInfo {
    251  public:
    252   // me_info should have had PopulateWindowsFn() called on it, so the
    253   // module_* vars and windows_fn_ are set up.
    254   bool Patch(const LibcInfo& me_info);
    255   void Unpatch();
    256 
    257  private:
    258   // This holds the original function contents after we patch the function.
    259   // This has to be defined static in the subclass, because the perftools_fns
    260   // reference origstub_fn_.
    261   static GenericFnPtr origstub_fn_[kNumFunctions];
    262 
    263   // This is the function we want to patch in
    264   static const GenericFnPtr perftools_fn_[kNumFunctions];
    265 
    266   static void* Perftools_malloc(size_t size) __THROW;
    267   static void Perftools_free(void* ptr) __THROW;
    268   static void* Perftools_realloc(void* ptr, size_t size) __THROW;
    269   static void* Perftools_calloc(size_t nmemb, size_t size) __THROW;
    270   static void* Perftools_new(size_t size);
    271   static void* Perftools_newarray(size_t size);
    272   static void Perftools_delete(void *ptr);
    273   static void Perftools_deletearray(void *ptr);
    274   static void* Perftools_new_nothrow(size_t size,
    275                                      const std::nothrow_t&) __THROW;
    276   static void* Perftools_newarray_nothrow(size_t size,
    277                                           const std::nothrow_t&) __THROW;
    278   static void Perftools_delete_nothrow(void *ptr,
    279                                        const std::nothrow_t&) __THROW;
    280   static void Perftools_deletearray_nothrow(void *ptr,
    281                                             const std::nothrow_t&) __THROW;
    282   static size_t Perftools__msize(void *ptr) __THROW;
    283   static void* Perftools__expand(void *ptr, size_t size) __THROW;
    284   // malloc.h also defines these functions:
    285   //   _aligned_malloc, _aligned_free,
    286   //   _recalloc, _aligned_offset_malloc, _aligned_realloc, _aligned_recalloc
    287   //   _aligned_offset_realloc, _aligned_offset_recalloc, _malloca, _freea
    288   // But they seem pretty obscure, and I'm fine not overriding them for now.
    289   // It may be they all call into malloc/free anyway.
    290 };
    291 
    292 // This is a subset of MODDULEENTRY32, that we need for patching.
    293 struct ModuleEntryCopy {
    294   LPVOID  modBaseAddr;     // the same as hmodule
    295   DWORD   modBaseSize;
    296   // This is not part of MODDULEENTRY32, but is needed to avoid making
    297   // windows syscalls while we're holding patch_all_modules_lock (see
    298   // lock-inversion comments at patch_all_modules_lock definition, below).
    299   GenericFnPtr rgProcAddresses[LibcInfo::ctrgProcAddress];
    300 
    301   ModuleEntryCopy() {
    302     modBaseAddr = NULL;
    303     modBaseSize = 0;
    304     for (int i = 0; i < sizeof(rgProcAddresses)/sizeof(*rgProcAddresses); i++)
    305       rgProcAddresses[i] = LibcInfo::static_fn(i);
    306   }
    307   ModuleEntryCopy(const MODULEINFO& mi) {
    308     this->modBaseAddr = mi.lpBaseOfDll;
    309     this->modBaseSize = mi.SizeOfImage;
    310     LPVOID modEndAddr = (char*)mi.lpBaseOfDll + mi.SizeOfImage;
    311     for (int i = 0; i < sizeof(rgProcAddresses)/sizeof(*rgProcAddresses); i++) {
    312       FARPROC target = ::GetProcAddress(
    313           reinterpret_cast<const HMODULE>(mi.lpBaseOfDll),
    314           LibcInfo::function_name(i));
    315       // Sometimes a DLL forwards a function to a function in another
    316       // DLL.  We don't want to patch those forwarded functions --
    317       // they'll get patched when the other DLL is processed.
    318       if (target >= modBaseAddr && target < modEndAddr)
    319         rgProcAddresses[i] = (GenericFnPtr)target;
    320       else
    321         rgProcAddresses[i] = (GenericFnPtr)NULL;
    322     }
    323   }
    324 };
    325 
    326 // This class is easier because there's only one of them.
    327 class WindowsInfo {
    328  public:
    329   void Patch();
    330   void Unpatch();
    331 
    332  private:
    333   // TODO(csilvers): should we be patching GlobalAlloc/LocalAlloc instead,
    334   //                 for pre-XP systems?
    335   enum {
    336     kHeapAlloc, kHeapFree, kVirtualAllocEx, kVirtualFreeEx,
    337     kMapViewOfFileEx, kUnmapViewOfFile, kLoadLibraryExW, kFreeLibrary,
    338     kNumFunctions
    339   };
    340 
    341   struct FunctionInfo {
    342     const char* const name;          // name of fn in a module (eg "malloc")
    343     GenericFnPtr windows_fn;         // the fn whose name we call (&malloc)
    344     GenericFnPtr origstub_fn;        // original fn contents after we patch
    345     const GenericFnPtr perftools_fn; // fn we want to patch in
    346   };
    347 
    348   static FunctionInfo function_info_[kNumFunctions];
    349 
    350   // A Windows-API equivalent of malloc and free
    351   static LPVOID WINAPI Perftools_HeapAlloc(HANDLE hHeap, DWORD dwFlags,
    352                                            DWORD_PTR dwBytes);
    353   static BOOL WINAPI Perftools_HeapFree(HANDLE hHeap, DWORD dwFlags,
    354                                         LPVOID lpMem);
    355   // A Windows-API equivalent of mmap and munmap, for "anonymous regions"
    356   static LPVOID WINAPI Perftools_VirtualAllocEx(HANDLE process, LPVOID address,
    357                                                 SIZE_T size, DWORD type,
    358                                                 DWORD protect);
    359   static BOOL WINAPI Perftools_VirtualFreeEx(HANDLE process, LPVOID address,
    360                                              SIZE_T size, DWORD type);
    361   // A Windows-API equivalent of mmap and munmap, for actual files
    362   static LPVOID WINAPI Perftools_MapViewOfFileEx(HANDLE hFileMappingObject,
    363                                                  DWORD dwDesiredAccess,
    364                                                  DWORD dwFileOffsetHigh,
    365                                                  DWORD dwFileOffsetLow,
    366                                                  SIZE_T dwNumberOfBytesToMap,
    367                                                  LPVOID lpBaseAddress);
    368   static BOOL WINAPI Perftools_UnmapViewOfFile(LPCVOID lpBaseAddress);
    369   // We don't need the other 3 variants because they all call this one. */
    370   static HMODULE WINAPI Perftools_LoadLibraryExW(LPCWSTR lpFileName,
    371                                                  HANDLE hFile,
    372                                                  DWORD dwFlags);
    373   static BOOL WINAPI Perftools_FreeLibrary(HMODULE hLibModule);
    374 };
    375 
    376 // If you run out, just add a few more to the array.  You'll also need
    377 // to update the switch statement in PatchOneModule(), and the list in
    378 // UnpatchWindowsFunctions().
    379 // main_executable and main_executable_windows are two windows into
    380 // the same executable.  One is responsible for patching the libc
    381 // routines that live in the main executable (if any) to use tcmalloc;
    382 // the other is responsible for patching the windows routines like
    383 // HeapAlloc/etc to use tcmalloc.
    384 static LibcInfoWithPatchFunctions<0> main_executable;
    385 static LibcInfoWithPatchFunctions<1> libc1;
    386 static LibcInfoWithPatchFunctions<2> libc2;
    387 static LibcInfoWithPatchFunctions<3> libc3;
    388 static LibcInfoWithPatchFunctions<4> libc4;
    389 static LibcInfoWithPatchFunctions<5> libc5;
    390 static LibcInfoWithPatchFunctions<6> libc6;
    391 static LibcInfoWithPatchFunctions<7> libc7;
    392 static LibcInfoWithPatchFunctions<8> libc8;
    393 static LibcInfo* g_module_libcs[] = {
    394   &libc1, &libc2, &libc3, &libc4, &libc5, &libc6, &libc7, &libc8
    395 };
    396 static WindowsInfo main_executable_windows;
    397 
    398 const char* const LibcInfo::function_name_[] = {
    399   "malloc", "free", "realloc", "calloc",
    400   kMangledNew, kMangledNewArray, kMangledDelete, kMangledDeleteArray,
    401   // Ideally we should patch the nothrow versions of new/delete, but
    402   // at least in msvcrt, nothrow-new machine-code is of a type we
    403   // can't patch.  Since these are relatively rare, I'm hoping it's ok
    404   // not to patch them.  (NULL name turns off patching.)
    405   NULL,  // kMangledNewNothrow,
    406   NULL,  // kMangledNewArrayNothrow,
    407   NULL,  // kMangledDeleteNothrow,
    408   NULL,  // kMangledDeleteArrayNothrow,
    409   "_msize", "_expand", "_calloc_crt",
    410 };
    411 
    412 // For mingw, I can't patch the new/delete here, because the
    413 // instructions are too small to patch.  Luckily, they're so small
    414 // because all they do is call into malloc/free, so they still end up
    415 // calling tcmalloc routines, and we don't actually lose anything
    416 // (except maybe some stacktrace goodness) by not patching.
    417 const GenericFnPtr LibcInfo::static_fn_[] = {
    418   (GenericFnPtr)&::malloc,
    419   (GenericFnPtr)&::free,
    420   (GenericFnPtr)&::realloc,
    421   (GenericFnPtr)&::calloc,
    422 #ifdef __MINGW32__
    423   NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
    424 #else
    425   (GenericFnPtr)(void*(*)(size_t))&::operator new,
    426   (GenericFnPtr)(void*(*)(size_t))&::operator new[],
    427   (GenericFnPtr)(void(*)(void*))&::operator delete,
    428   (GenericFnPtr)(void(*)(void*))&::operator delete[],
    429   (GenericFnPtr)
    430   (void*(*)(size_t, struct std::nothrow_t const &))&::operator new,
    431   (GenericFnPtr)
    432   (void*(*)(size_t, struct std::nothrow_t const &))&::operator new[],
    433   (GenericFnPtr)
    434   (void(*)(void*, struct std::nothrow_t const &))&::operator delete,
    435   (GenericFnPtr)
    436   (void(*)(void*, struct std::nothrow_t const &))&::operator delete[],
    437 #endif
    438   (GenericFnPtr)&::_msize,
    439   (GenericFnPtr)&::_expand,
    440   (GenericFnPtr)&::calloc,
    441 };
    442 
    443 template<int T> GenericFnPtr LibcInfoWithPatchFunctions<T>::origstub_fn_[] = {
    444   // This will get filled in at run-time, as patching is done.
    445 };
    446 
    447 template<int T>
    448 const GenericFnPtr LibcInfoWithPatchFunctions<T>::perftools_fn_[] = {
    449   (GenericFnPtr)&Perftools_malloc,
    450   (GenericFnPtr)&Perftools_free,
    451   (GenericFnPtr)&Perftools_realloc,
    452   (GenericFnPtr)&Perftools_calloc,
    453   (GenericFnPtr)&Perftools_new,
    454   (GenericFnPtr)&Perftools_newarray,
    455   (GenericFnPtr)&Perftools_delete,
    456   (GenericFnPtr)&Perftools_deletearray,
    457   (GenericFnPtr)&Perftools_new_nothrow,
    458   (GenericFnPtr)&Perftools_newarray_nothrow,
    459   (GenericFnPtr)&Perftools_delete_nothrow,
    460   (GenericFnPtr)&Perftools_deletearray_nothrow,
    461   (GenericFnPtr)&Perftools__msize,
    462   (GenericFnPtr)&Perftools__expand,
    463   (GenericFnPtr)&Perftools_calloc,
    464 };
    465 
    466 /*static*/ WindowsInfo::FunctionInfo WindowsInfo::function_info_[] = {
    467   { "HeapAlloc", NULL, NULL, (GenericFnPtr)&Perftools_HeapAlloc },
    468   { "HeapFree", NULL, NULL, (GenericFnPtr)&Perftools_HeapFree },
    469   { "VirtualAllocEx", NULL, NULL, (GenericFnPtr)&Perftools_VirtualAllocEx },
    470   { "VirtualFreeEx", NULL, NULL, (GenericFnPtr)&Perftools_VirtualFreeEx },
    471   { "MapViewOfFileEx", NULL, NULL, (GenericFnPtr)&Perftools_MapViewOfFileEx },
    472   { "UnmapViewOfFile", NULL, NULL, (GenericFnPtr)&Perftools_UnmapViewOfFile },
    473   { "LoadLibraryExW", NULL, NULL, (GenericFnPtr)&Perftools_LoadLibraryExW },
    474   { "FreeLibrary", NULL, NULL, (GenericFnPtr)&Perftools_FreeLibrary },
    475 };
    476 
    477 bool LibcInfo::PopulateWindowsFn(const ModuleEntryCopy& module_entry) {
    478   // First, store the location of the function to patch before
    479   // patching it.  If none of these functions are found in the module,
    480   // then this module has no libc in it, and we just return false.
    481   for (int i = 0; i < kNumFunctions; i++) {
    482     if (!function_name_[i])     // we can turn off patching by unsetting name
    483       continue;
    484     // The ::GetProcAddress calls were done in the ModuleEntryCopy
    485     // constructor, so we don't have to make any windows calls here.
    486     const GenericFnPtr fn = module_entry.rgProcAddresses[i];
    487     if (fn) {
    488       windows_fn_[i] = PreamblePatcher::ResolveTarget(fn);
    489     }
    490   }
    491 
    492   // Some modules use the same function pointer for new and new[].  If
    493   // we find that, set one of the pointers to NULL so we don't double-
    494   // patch.  Same may happen with new and nothrow-new, or even new[]
    495   // and nothrow-new.  It's easiest just to check each fn-ptr against
    496   // every other.
    497   for (int i = 0; i < kNumFunctions; i++) {
    498     for (int j = i+1; j < kNumFunctions; j++) {
    499       if (windows_fn_[i] == windows_fn_[j]) {
    500         // We NULL the later one (j), so as to minimize the chances we
    501         // NULL kFree and kRealloc.  See comments below.  This is fragile!
    502         windows_fn_[j] = NULL;
    503       }
    504     }
    505   }
    506 
    507   // There's always a chance that our module uses the same function
    508   // as another module that we've already loaded.  In that case, we
    509   // need to set our windows_fn to NULL, to avoid double-patching.
    510   for (int ifn = 0; ifn < kNumFunctions; ifn++) {
    511     for (int imod = 0;
    512          imod < sizeof(g_module_libcs)/sizeof(*g_module_libcs);  imod++) {
    513       if (g_module_libcs[imod]->is_valid() &&
    514           this->windows_fn(ifn) == g_module_libcs[imod]->windows_fn(ifn)) {
    515         windows_fn_[ifn] = NULL;
    516       }
    517     }
    518   }
    519 
    520   bool found_non_null = false;
    521   for (int i = 0; i < kNumFunctions; i++) {
    522     if (windows_fn_[i])
    523       found_non_null = true;
    524   }
    525   if (!found_non_null)
    526     return false;
    527 
    528   // It's important we didn't NULL out windows_fn_[kFree] or [kRealloc].
    529   // The reason is, if those are NULL-ed out, we'll never patch them
    530   // and thus never get an origstub_fn_ value for them, and when we
    531   // try to call origstub_fn_[kFree/kRealloc] in Perftools_free and
    532   // Perftools_realloc, below, it will fail.  We could work around
    533   // that by adding a pointer from one patch-unit to the other, but we
    534   // haven't needed to yet.
    535   CHECK(windows_fn_[kFree]);
    536   CHECK(windows_fn_[kRealloc]);
    537 
    538   // OK, we successfully populated.  Let's store our member information.
    539   module_base_address_ = module_entry.modBaseAddr;
    540   module_base_size_ = module_entry.modBaseSize;
    541   return true;
    542 }
    543 
    544 template<int T>
    545 bool LibcInfoWithPatchFunctions<T>::Patch(const LibcInfo& me_info) {
    546   CopyFrom(me_info);   // copies the module_entry and the windows_fn_ array
    547   for (int i = 0; i < kNumFunctions; i++) {
    548     if (windows_fn_[i] && windows_fn_[i] != perftools_fn_[i]) {
    549       // if origstub_fn_ is not NULL, it's left around from a previous
    550       // patch.  We need to set it to NULL for the new Patch call.
    551       // Since we've patched Unpatch() not to delete origstub_fn_ (it
    552       // causes problems in some contexts, though obviously not this
    553       // one), we should delete it now, before setting it to NULL.
    554       // NOTE: casting from a function to a pointer is contra the C++
    555       //       spec.  It's not safe on IA64, but is on i386.  We use
    556       //       a C-style cast here to emphasize this is not legal C++.
    557       delete[] (char*)(origstub_fn_[i]);
    558       origstub_fn_[i] = NULL;   // Patch() will fill this in
    559       CHECK_EQ(sidestep::SIDESTEP_SUCCESS,
    560                PreamblePatcher::Patch(windows_fn_[i], perftools_fn_[i],
    561                                       &origstub_fn_[i]));
    562     }
    563   }
    564   set_is_valid(true);
    565   return true;
    566 }
    567 
    568 template<int T>
    569 void LibcInfoWithPatchFunctions<T>::Unpatch() {
    570   // We have to cast our GenericFnPtrs to void* for unpatch.  This is
    571   // contra the C++ spec; we use C-style casts to empahsize that.
    572   for (int i = 0; i < kNumFunctions; i++) {
    573     if (windows_fn_[i])
    574       CHECK_EQ(sidestep::SIDESTEP_SUCCESS,
    575                PreamblePatcher::Unpatch((void*)windows_fn_[i],
    576                                         (void*)perftools_fn_[i],
    577                                         (void*)origstub_fn_[i]));
    578   }
    579   set_is_valid(false);
    580 }
    581 
    582 void WindowsInfo::Patch() {
    583   HMODULE hkernel32 = ::GetModuleHandleA("kernel32");
    584   CHECK_NE(hkernel32, NULL);
    585 
    586   // Unlike for libc, we know these exist in our module, so we can get
    587   // and patch at the same time.
    588   for (int i = 0; i < kNumFunctions; i++) {
    589     function_info_[i].windows_fn = (GenericFnPtr)
    590         ::GetProcAddress(hkernel32, function_info_[i].name);
    591     // If origstub_fn is not NULL, it's left around from a previous
    592     // patch.  We need to set it to NULL for the new Patch call.
    593     // Since we've patched Unpatch() not to delete origstub_fn_ (it
    594     // causes problems in some contexts, though obviously not this
    595     // one), we should delete it now, before setting it to NULL.
    596     // NOTE: casting from a function to a pointer is contra the C++
    597     //       spec.  It's not safe on IA64, but is on i386.  We use
    598     //       a C-style cast here to emphasize this is not legal C++.
    599     delete[] (char*)(function_info_[i].origstub_fn);
    600     function_info_[i].origstub_fn = NULL;  // Patch() will fill this in
    601     CHECK_EQ(sidestep::SIDESTEP_SUCCESS,
    602              PreamblePatcher::Patch(function_info_[i].windows_fn,
    603                                     function_info_[i].perftools_fn,
    604                                     &function_info_[i].origstub_fn));
    605   }
    606 }
    607 
    608 void WindowsInfo::Unpatch() {
    609   // We have to cast our GenericFnPtrs to void* for unpatch.  This is
    610   // contra the C++ spec; we use C-style casts to empahsize that.
    611   for (int i = 0; i < kNumFunctions; i++) {
    612     CHECK_EQ(sidestep::SIDESTEP_SUCCESS,
    613              PreamblePatcher::Unpatch((void*)function_info_[i].windows_fn,
    614                                       (void*)function_info_[i].perftools_fn,
    615                                       (void*)function_info_[i].origstub_fn));
    616   }
    617 }
    618 
    619 // You should hold the patch_all_modules_lock when calling this.
    620 void PatchOneModuleLocked(const LibcInfo& me_info) {
    621   // If we don't already have info on this module, let's add it.  This
    622   // is where we're sad that each libcX has a different type, so we
    623   // can't use an array; instead, we have to use a switch statement.
    624   // Patch() returns false if there were no libc functions in the module.
    625   for (int i = 0; i < sizeof(g_module_libcs)/sizeof(*g_module_libcs); i++) {
    626     if (!g_module_libcs[i]->is_valid()) {   // found an empty spot to add!
    627       switch (i) {
    628         case 0: libc1.Patch(me_info); return;
    629         case 1: libc2.Patch(me_info); return;
    630         case 2: libc3.Patch(me_info); return;
    631         case 3: libc4.Patch(me_info); return;
    632         case 4: libc5.Patch(me_info); return;
    633         case 5: libc6.Patch(me_info); return;
    634         case 6: libc7.Patch(me_info); return;
    635         case 7: libc8.Patch(me_info); return;
    636       }
    637     }
    638   }
    639   printf("PERFTOOLS ERROR: Too many modules containing libc in this executable\n");
    640 }
    641 
    642 void PatchMainExecutableLocked() {
    643   if (main_executable.patched())
    644     return;    // main executable has already been patched
    645   ModuleEntryCopy fake_module_entry;   // make a fake one to pass into Patch()
    646   // No need to call PopulateModuleEntryProcAddresses on the main executable.
    647   main_executable.PopulateWindowsFn(fake_module_entry);
    648   main_executable.Patch(main_executable);
    649 }
    650 
    651 // This lock is subject to a subtle and annoying lock inversion
    652 // problem: it may interact badly with unknown internal windows locks.
    653 // In particular, windows may be holding a lock when it calls
    654 // LoadLibraryExW and FreeLibrary, which we've patched.  We have those
    655 // routines call PatchAllModules, which acquires this lock.  If we
    656 // make windows system calls while holding this lock, those system
    657 // calls may need the internal windows locks that are being held in
    658 // the call to LoadLibraryExW, resulting in deadlock.  The solution is
    659 // to be very careful not to call *any* windows routines while holding
    660 // patch_all_modules_lock, inside PatchAllModules().
    661 static SpinLock patch_all_modules_lock(SpinLock::LINKER_INITIALIZED);
    662 
    663 // last_loaded: The set of modules that were loaded the last time
    664 // PatchAllModules was called.  This is an optimization for only
    665 // looking at modules that were added or removed from the last call.
    666 static std::set<HMODULE> *g_last_loaded;
    667 
    668 // Iterates over all the modules currently loaded by the executable,
    669 // according to windows, and makes sure they're all patched.  Most
    670 // modules will already be in loaded_modules, meaning we have already
    671 // loaded and either patched them or determined they did not need to
    672 // be patched.  Others will not, which means we need to patch them
    673 // (if necessary).  Finally, we have to go through the existing
    674 // g_module_libcs and see if any of those are *not* in the modules
    675 // currently loaded by the executable.  If so, we need to invalidate
    676 // them.  Returns true if we did any work (patching or invalidating),
    677 // false if we were a noop.  May update loaded_modules as well.
    678 // NOTE: you must hold the patch_all_modules_lock to access loaded_modules.
    679 bool PatchAllModules() {
    680   std::vector<ModuleEntryCopy> modules;
    681   bool made_changes = false;
    682 
    683   const HANDLE hCurrentProcess = GetCurrentProcess();
    684   DWORD num_modules = 0;
    685   HMODULE hModules[kMaxModules];  // max # of modules we support in one process
    686   if (!::EnumProcessModules(hCurrentProcess, hModules, sizeof(hModules),
    687                             &num_modules)) {
    688     num_modules = 0;
    689   }
    690   // EnumProcessModules actually set the bytes written into hModules,
    691   // so we need to divide to make num_modules actually be a module-count.
    692   num_modules /= sizeof(*hModules);
    693   if (num_modules >= kMaxModules) {
    694     printf("PERFTOOLS ERROR: Too many modules in this executable to try"
    695            " to patch them all (if you need to, raise kMaxModules in"
    696            " patch_functions.cc).\n");
    697     num_modules = kMaxModules;
    698   }
    699 
    700   // Now we handle the unpatching of modules we have in g_module_libcs
    701   // but that were not found in EnumProcessModules.  We need to
    702   // invalidate them.  To speed that up, we store the EnumProcessModules
    703   // output in a set.
    704   // At the same time, we prepare for the adding of new modules, by
    705   // removing from hModules all the modules we know we've already
    706   // patched (or decided don't need to be patched).  At the end,
    707   // hModules will hold only the modules that we need to consider patching.
    708   std::set<HMODULE> currently_loaded_modules;
    709   {
    710     SpinLockHolder h(&patch_all_modules_lock);
    711     if (!g_last_loaded)  g_last_loaded = new std::set<HMODULE>;
    712     // At the end of this loop, currently_loaded_modules contains the
    713     // full list of EnumProcessModules, and hModules just the ones we
    714     // haven't handled yet.
    715     for (int i = 0; i < num_modules; ) {
    716       currently_loaded_modules.insert(hModules[i]);
    717       if (g_last_loaded->count(hModules[i]) > 0) {
    718         hModules[i] = hModules[--num_modules];  // replace element i with tail
    719       } else {
    720         i++;                                    // keep element i
    721       }
    722     }
    723     // Now we do the unpatching/invalidation.
    724     for (int i = 0; i < sizeof(g_module_libcs)/sizeof(*g_module_libcs); i++) {
    725       if (g_module_libcs[i]->patched() &&
    726           currently_loaded_modules.count(g_module_libcs[i]->hmodule()) == 0) {
    727         // Means g_module_libcs[i] is no longer loaded (no me32 matched).
    728         // We could call Unpatch() here, but why bother?  The module
    729         // has gone away, so nobody is going to call into it anyway.
    730         g_module_libcs[i]->set_is_valid(false);
    731         made_changes = true;
    732       }
    733     }
    734     // Update the loaded module cache.
    735     g_last_loaded->swap(currently_loaded_modules);
    736   }
    737 
    738   // Now that we know what modules are new, let's get the info we'll
    739   // need to patch them.  Note this *cannot* be done while holding the
    740   // lock, since it needs to make windows calls (see the lock-inversion
    741   // comments before the definition of patch_all_modules_lock).
    742   MODULEINFO mi;
    743   for (int i = 0; i < num_modules; i++) {
    744     if (::GetModuleInformation(hCurrentProcess, hModules[i], &mi, sizeof(mi)))
    745       modules.push_back(ModuleEntryCopy(mi));
    746   }
    747 
    748   // Now we can do the patching of new modules.
    749   {
    750     SpinLockHolder h(&patch_all_modules_lock);
    751     for (std::vector<ModuleEntryCopy>::iterator it = modules.begin();
    752          it != modules.end(); ++it) {
    753       LibcInfo libc_info;
    754       if (libc_info.PopulateWindowsFn(*it)) { // true==module has libc routines
    755         PatchOneModuleLocked(libc_info);
    756         made_changes = true;
    757       }
    758     }
    759 
    760     // Now that we've dealt with the modules (dlls), update the main
    761     // executable.  We do this last because PatchMainExecutableLocked
    762     // wants to look at how other modules were patched.
    763     if (!main_executable.patched()) {
    764       PatchMainExecutableLocked();
    765       made_changes = true;
    766     }
    767   }
    768   // TODO(csilvers): for this to be reliable, we need to also take
    769   // into account if we *would* have patched any modules had they not
    770   // already been loaded.  (That is, made_changes should ignore
    771   // g_last_loaded.)
    772   return made_changes;
    773 }
    774 
    775 
    776 }  // end unnamed namespace
    777 
    778 // ---------------------------------------------------------------------
    779 // Now that we've done all the patching machinery, let's actually
    780 // define the functions we're patching in.  Mostly these are
    781 // simple wrappers around the do_* routines in tcmalloc.cc.
    782 //
    783 // In fact, we #include tcmalloc.cc to get at the tcmalloc internal
    784 // do_* functions, the better to write our own hook functions.
    785 // U-G-L-Y, I know.  But the alternatives are, perhaps, worse.  This
    786 // also lets us define _msize(), _expand(), and other windows-specific
    787 // functions here, using tcmalloc internals, without polluting
    788 // tcmalloc.cc.
    789 // -------------------------------------------------------------------
    790 
    791 // TODO(csilvers): refactor tcmalloc.cc into two files, so I can link
    792 // against the file with do_malloc, and ignore the one with malloc.
    793 #include "tcmalloc.cc"
    794 
    795 template<int T>
    796 void* LibcInfoWithPatchFunctions<T>::Perftools_malloc(size_t size) __THROW {
    797   void* result = do_malloc_or_cpp_alloc(size);
    798   MallocHook::InvokeNewHook(result, size);
    799   return result;
    800 }
    801 
    802 template<int T>
    803 void LibcInfoWithPatchFunctions<T>::Perftools_free(void* ptr) __THROW {
    804   MallocHook::InvokeDeleteHook(ptr);
    805   // This calls the windows free if do_free decides ptr was not
    806   // allocated by tcmalloc.  Note it calls the origstub_free from
    807   // *this* templatized instance of LibcInfo.  See "template
    808   // trickiness" above.
    809   do_free_with_callback(ptr, (void (*)(void*))origstub_fn_[kFree]);
    810 }
    811 
    812 template<int T>
    813 void* LibcInfoWithPatchFunctions<T>::Perftools_realloc(
    814     void* old_ptr, size_t new_size) __THROW {
    815   if (old_ptr == NULL) {
    816     void* result = do_malloc_or_cpp_alloc(new_size);
    817     MallocHook::InvokeNewHook(result, new_size);
    818     return result;
    819   }
    820   if (new_size == 0) {
    821     MallocHook::InvokeDeleteHook(old_ptr);
    822     do_free_with_callback(old_ptr,
    823                           (void (*)(void*))origstub_fn_[kFree]);
    824     return NULL;
    825   }
    826   return do_realloc_with_callback(
    827       old_ptr, new_size,
    828       (void (*)(void*))origstub_fn_[kFree],
    829       (size_t (*)(const void*))origstub_fn_[k_Msize]);
    830 }
    831 
    832 template<int T>
    833 void* LibcInfoWithPatchFunctions<T>::Perftools_calloc(
    834     size_t n, size_t elem_size) __THROW {
    835   void* result = do_calloc(n, elem_size);
    836   MallocHook::InvokeNewHook(result, n * elem_size);
    837   return result;
    838 }
    839 
    840 template<int T>
    841 void* LibcInfoWithPatchFunctions<T>::Perftools_new(size_t size) {
    842   void* p = cpp_alloc(size, false);
    843   MallocHook::InvokeNewHook(p, size);
    844   return p;
    845 }
    846 
    847 template<int T>
    848 void* LibcInfoWithPatchFunctions<T>::Perftools_newarray(size_t size) {
    849   void* p = cpp_alloc(size, false);
    850   MallocHook::InvokeNewHook(p, size);
    851   return p;
    852 }
    853 
    854 template<int T>
    855 void LibcInfoWithPatchFunctions<T>::Perftools_delete(void *p) {
    856   MallocHook::InvokeDeleteHook(p);
    857   do_free_with_callback(p, (void (*)(void*))origstub_fn_[kFree]);
    858 }
    859 
    860 template<int T>
    861 void LibcInfoWithPatchFunctions<T>::Perftools_deletearray(void *p) {
    862   MallocHook::InvokeDeleteHook(p);
    863   do_free_with_callback(p, (void (*)(void*))origstub_fn_[kFree]);
    864 }
    865 
    866 template<int T>
    867 void* LibcInfoWithPatchFunctions<T>::Perftools_new_nothrow(
    868     size_t size, const std::nothrow_t&) __THROW {
    869   void* p = cpp_alloc(size, true);
    870   MallocHook::InvokeNewHook(p, size);
    871   return p;
    872 }
    873 
    874 template<int T>
    875 void* LibcInfoWithPatchFunctions<T>::Perftools_newarray_nothrow(
    876     size_t size, const std::nothrow_t&) __THROW {
    877   void* p = cpp_alloc(size, true);
    878   MallocHook::InvokeNewHook(p, size);
    879   return p;
    880 }
    881 
    882 template<int T>
    883 void LibcInfoWithPatchFunctions<T>::Perftools_delete_nothrow(
    884     void *p, const std::nothrow_t&) __THROW {
    885   MallocHook::InvokeDeleteHook(p);
    886   do_free_with_callback(p, (void (*)(void*))origstub_fn_[kFree]);
    887 }
    888 
    889 template<int T>
    890 void LibcInfoWithPatchFunctions<T>::Perftools_deletearray_nothrow(
    891     void *p, const std::nothrow_t&) __THROW {
    892   MallocHook::InvokeDeleteHook(p);
    893   do_free_with_callback(p, (void (*)(void*))origstub_fn_[kFree]);
    894 }
    895 
    896 
    897 // _msize() lets you figure out how much space is reserved for a
    898 // pointer, in Windows.  Even if applications don't call it, any DLL
    899 // with global constructors will call (transitively) something called
    900 // __dllonexit_lk in order to make sure the destructors get called
    901 // when the dll unloads.  And that will call msize -- horrible things
    902 // can ensue if this is not hooked.  Other parts of libc may also call
    903 // this internally.
    904 
    905 template<int T>
    906 size_t LibcInfoWithPatchFunctions<T>::Perftools__msize(void* ptr) __THROW {
    907   return GetSizeWithCallback(ptr, (size_t (*)(const void*))origstub_fn_[k_Msize]);
    908 }
    909 
    910 // We need to define this because internal windows functions like to
    911 // call into it(?).  _expand() is like realloc but doesn't move the
    912 // pointer.  We punt, which will cause callers to fall back on realloc.
    913 template<int T>
    914 void* LibcInfoWithPatchFunctions<T>::Perftools__expand(void *ptr,
    915                                                        size_t size) __THROW {
    916   return NULL;
    917 }
    918 
    919 LPVOID WINAPI WindowsInfo::Perftools_HeapAlloc(HANDLE hHeap, DWORD dwFlags,
    920                                                DWORD_PTR dwBytes) {
    921   LPVOID result = ((LPVOID (WINAPI *)(HANDLE, DWORD, DWORD_PTR))
    922                    function_info_[kHeapAlloc].origstub_fn)(
    923                        hHeap, dwFlags, dwBytes);
    924   MallocHook::InvokeNewHook(result, dwBytes);
    925   return result;
    926 }
    927 
    928 BOOL WINAPI WindowsInfo::Perftools_HeapFree(HANDLE hHeap, DWORD dwFlags,
    929                                             LPVOID lpMem) {
    930   MallocHook::InvokeDeleteHook(lpMem);
    931   return ((BOOL (WINAPI *)(HANDLE, DWORD, LPVOID))
    932           function_info_[kHeapFree].origstub_fn)(
    933               hHeap, dwFlags, lpMem);
    934 }
    935 
    936 LPVOID WINAPI WindowsInfo::Perftools_VirtualAllocEx(HANDLE process,
    937                                                     LPVOID address,
    938                                                     SIZE_T size, DWORD type,
    939                                                     DWORD protect) {
    940   LPVOID result = ((LPVOID (WINAPI *)(HANDLE, LPVOID, SIZE_T, DWORD, DWORD))
    941                    function_info_[kVirtualAllocEx].origstub_fn)(
    942                        process, address, size, type, protect);
    943   // VirtualAllocEx() seems to be the Windows equivalent of mmap()
    944   MallocHook::InvokeMmapHook(result, address, size, protect, type, -1, 0);
    945   return result;
    946 }
    947 
    948 BOOL WINAPI WindowsInfo::Perftools_VirtualFreeEx(HANDLE process, LPVOID address,
    949                                                  SIZE_T size, DWORD type) {
    950   MallocHook::InvokeMunmapHook(address, size);
    951   return ((BOOL (WINAPI *)(HANDLE, LPVOID, SIZE_T, DWORD))
    952           function_info_[kVirtualFreeEx].origstub_fn)(
    953               process, address, size, type);
    954 }
    955 
    956 LPVOID WINAPI WindowsInfo::Perftools_MapViewOfFileEx(
    957     HANDLE hFileMappingObject, DWORD dwDesiredAccess, DWORD dwFileOffsetHigh,
    958     DWORD dwFileOffsetLow, SIZE_T dwNumberOfBytesToMap, LPVOID lpBaseAddress) {
    959   // For this function pair, you always deallocate the full block of
    960   // data that you allocate, so NewHook/DeleteHook is the right API.
    961   LPVOID result = ((LPVOID (WINAPI *)(HANDLE, DWORD, DWORD, DWORD,
    962                                       SIZE_T, LPVOID))
    963                    function_info_[kMapViewOfFileEx].origstub_fn)(
    964                        hFileMappingObject, dwDesiredAccess, dwFileOffsetHigh,
    965                        dwFileOffsetLow, dwNumberOfBytesToMap, lpBaseAddress);
    966   MallocHook::InvokeNewHook(result, dwNumberOfBytesToMap);
    967   return result;
    968 }
    969 
    970 BOOL WINAPI WindowsInfo::Perftools_UnmapViewOfFile(LPCVOID lpBaseAddress) {
    971   MallocHook::InvokeDeleteHook(lpBaseAddress);
    972   return ((BOOL (WINAPI *)(LPCVOID))
    973           function_info_[kUnmapViewOfFile].origstub_fn)(
    974               lpBaseAddress);
    975 }
    976 
    977 // g_load_map holds a copy of windows' refcount for how many times
    978 // each currently loaded module has been loaded and unloaded.  We use
    979 // it as an optimization when the same module is loaded more than
    980 // once: as long as the refcount stays above 1, we don't need to worry
    981 // about patching because it's already patched.  Likewise, we don't
    982 // need to unpatch until the refcount drops to 0.  load_map is
    983 // maintained in LoadLibraryExW and FreeLibrary, and only covers
    984 // modules explicitly loaded/freed via those interfaces.
    985 static std::map<HMODULE, int>* g_load_map = NULL;
    986 
    987 HMODULE WINAPI WindowsInfo::Perftools_LoadLibraryExW(LPCWSTR lpFileName,
    988                                                      HANDLE hFile,
    989                                                      DWORD dwFlags) {
    990   HMODULE rv;
    991   // Check to see if the modules is already loaded, flag 0 gets a
    992   // reference if it was loaded.  If it was loaded no need to call
    993   // PatchAllModules, just increase the reference count to match
    994   // what GetModuleHandleExW does internally inside windows.
    995   if (::GetModuleHandleExW(0, lpFileName, &rv)) {
    996     return rv;
    997   } else {
    998     // Not already loaded, so load it.
    999     rv = ((HMODULE (WINAPI *)(LPCWSTR, HANDLE, DWORD))
   1000                   function_info_[kLoadLibraryExW].origstub_fn)(
   1001                       lpFileName, hFile, dwFlags);
   1002     // This will patch any newly loaded libraries, if patching needs
   1003     // to be done.
   1004     PatchAllModules();
   1005 
   1006     return rv;
   1007   }
   1008 }
   1009 
   1010 BOOL WINAPI WindowsInfo::Perftools_FreeLibrary(HMODULE hLibModule) {
   1011   BOOL rv = ((BOOL (WINAPI *)(HMODULE))
   1012              function_info_[kFreeLibrary].origstub_fn)(hLibModule);
   1013 
   1014   // Check to see if the module is still loaded by passing the base
   1015   // address and seeing if it comes back with the same address.  If it
   1016   // is the same address it's still loaded, so the FreeLibrary() call
   1017   // was a noop, and there's no need to redo the patching.
   1018   HMODULE owner = NULL;
   1019   BOOL result = ::GetModuleHandleExW(
   1020       (GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS |
   1021        GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT),
   1022       (LPCWSTR)hLibModule,
   1023       &owner);
   1024   if (result && owner == hLibModule)
   1025     return rv;
   1026 
   1027   PatchAllModules();    // this will fix up the list of patched libraries
   1028   return rv;
   1029 }
   1030 
   1031 
   1032 // ---------------------------------------------------------------------
   1033 // PatchWindowsFunctions()
   1034 //    This is the function that is exposed to the outside world.
   1035 //    It should be called before the program becomes multi-threaded,
   1036 //    since main_executable_windows.Patch() is not thread-safe.
   1037 // ---------------------------------------------------------------------
   1038 
   1039 void PatchWindowsFunctions() {
   1040   // This does the libc patching in every module, and the main executable.
   1041   PatchAllModules();
   1042   main_executable_windows.Patch();
   1043 }
   1044 
   1045 #if 0
   1046 // It's possible to unpatch all the functions when we are exiting.
   1047 
   1048 // The idea is to handle properly windows-internal data that is
   1049 // allocated before PatchWindowsFunctions is called.  If all
   1050 // destruction happened in reverse order from construction, then we
   1051 // could call UnpatchWindowsFunctions at just the right time, so that
   1052 // that early-allocated data would be freed using the windows
   1053 // allocation functions rather than tcmalloc.  The problem is that
   1054 // windows allocates some structures lazily, so it would allocate them
   1055 // late (using tcmalloc) and then try to deallocate them late as well.
   1056 // So instead of unpatching, we just modify all the tcmalloc routines
   1057 // so they call through to the libc rountines if the memory in
   1058 // question doesn't seem to have been allocated with tcmalloc.  I keep
   1059 // this unpatch code around for reference.
   1060 
   1061 void UnpatchWindowsFunctions() {
   1062   // We need to go back to the system malloc/etc at global destruct time,
   1063   // so objects that were constructed before tcmalloc, using the system
   1064   // malloc, can destroy themselves using the system free.  This depends
   1065   // on DLLs unloading in the reverse order in which they load!
   1066   //
   1067   // We also go back to the default HeapAlloc/etc, just for consistency.
   1068   // Who knows, it may help avoid weird bugs in some situations.
   1069   main_executable_windows.Unpatch();
   1070   main_executable.Unpatch();
   1071   if (libc1.is_valid()) libc1.Unpatch();
   1072   if (libc2.is_valid()) libc2.Unpatch();
   1073   if (libc3.is_valid()) libc3.Unpatch();
   1074   if (libc4.is_valid()) libc4.Unpatch();
   1075   if (libc5.is_valid()) libc5.Unpatch();
   1076   if (libc6.is_valid()) libc6.Unpatch();
   1077   if (libc7.is_valid()) libc7.Unpatch();
   1078   if (libc8.is_valid()) libc8.Unpatch();
   1079 }
   1080 #endif
   1081