Home | History | Annotate | Download | only in vm
      1 /*
      2  * Copyright (C) 2008 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 /*
     18  * Android's method call profiling goodies.
     19  */
     20 #include "Dalvik.h"
     21 #include <interp/InterpDefs.h>
     22 
     23 #include <stdlib.h>
     24 #include <stddef.h>
     25 #include <string.h>
     26 #include <sys/time.h>
     27 #include <time.h>
     28 #include <sys/mman.h>
     29 #include <sched.h>
     30 #include <errno.h>
     31 #include <fcntl.h>
     32 
     33 #include <cutils/open_memstream.h>
     34 
     35 #ifdef HAVE_ANDROID_OS
     36 # define UPDATE_MAGIC_PAGE      1
     37 #endif
     38 
     39 /*
     40  * File format:
     41  *  header
     42  *  record 0
     43  *  record 1
     44  *  ...
     45  *
     46  * Header format:
     47  *  u4  magic ('SLOW')
     48  *  u2  version
     49  *  u2  offset to data
     50  *  u8  start date/time in usec
     51  *  u2  record size in bytes (version >= 2 only)
     52  *  ... padding to 32 bytes
     53  *
     54  * Record format v1:
     55  *  u1  thread ID
     56  *  u4  method ID | method action
     57  *  u4  time delta since start, in usec
     58  *
     59  * Record format v2:
     60  *  u2  thread ID
     61  *  u4  method ID | method action
     62  *  u4  time delta since start, in usec
     63  *
     64  * Record format v3:
     65  *  u2  thread ID
     66  *  u4  method ID | method action
     67  *  u4  time delta since start, in usec
     68  *  u4  wall time since start, in usec (when clock == "dual" only)
     69  *
     70  * 32 bits of microseconds is 70 minutes.
     71  *
     72  * All values are stored in little-endian order.
     73  */
     74 #define TRACE_REC_SIZE_SINGLE_CLOCK  10 // using v2
     75 #define TRACE_REC_SIZE_DUAL_CLOCK    14 // using v3 with two timestamps
     76 #define TRACE_MAGIC         0x574f4c53
     77 #define TRACE_HEADER_LEN    32
     78 
     79 #define FILL_PATTERN        0xeeeeeeee
     80 
     81 
     82 /*
     83  * Returns true if the thread CPU clock should be used.
     84  */
     85 static inline bool useThreadCpuClock() {
     86 #if defined(HAVE_POSIX_CLOCKS)
     87     return gDvm.profilerClockSource != kProfilerClockSourceWall;
     88 #else
     89     return false;
     90 #endif
     91 }
     92 
     93 /*
     94  * Returns true if the wall clock should be used.
     95  */
     96 static inline bool useWallClock() {
     97 #if defined(HAVE_POSIX_CLOCKS)
     98     return gDvm.profilerClockSource != kProfilerClockSourceThreadCpu;
     99 #else
    100     return true;
    101 #endif
    102 }
    103 
    104 /*
    105  * Get the wall-clock date/time, in usec.
    106  */
    107 static inline u8 getWallTimeInUsec()
    108 {
    109     struct timeval tv;
    110 
    111     gettimeofday(&tv, NULL);
    112     return tv.tv_sec * 1000000LL + tv.tv_usec;
    113 }
    114 
    115 #if defined(HAVE_POSIX_CLOCKS)
    116 /*
    117  * Get the thread-cpu time, in usec.
    118  * We use this clock when we can because it enables us to track the time that
    119  * a thread spends running and not blocked.
    120  */
    121 static inline u8 getThreadCpuTimeInUsec(Thread* thread)
    122 {
    123     clockid_t cid;
    124     struct timespec tm;
    125     pthread_getcpuclockid(thread->handle, &cid);
    126     clock_gettime(cid, &tm);
    127     if (!(tm.tv_nsec >= 0 && tm.tv_nsec < 1*1000*1000*1000)) {
    128         ALOGE("bad nsec: %ld", tm.tv_nsec);
    129         dvmAbort();
    130     }
    131     return tm.tv_sec * 1000000LL + tm.tv_nsec / 1000;
    132 }
    133 #endif
    134 
    135 /*
    136  * Get the clock used for stopwatch-like timing measurements on a single thread.
    137  */
    138 static inline u8 getStopwatchClock()
    139 {
    140 #if defined(HAVE_POSIX_CLOCKS)
    141     return getThreadCpuTimeInUsec(dvmThreadSelf());
    142 #else
    143     return getWallTimeInUsec();
    144 #endif
    145 }
    146 
    147 /*
    148  * Write little-endian data.
    149  */
    150 static inline void storeShortLE(u1* buf, u2 val)
    151 {
    152     *buf++ = (u1) val;
    153     *buf++ = (u1) (val >> 8);
    154 }
    155 static inline void storeIntLE(u1* buf, u4 val)
    156 {
    157     *buf++ = (u1) val;
    158     *buf++ = (u1) (val >> 8);
    159     *buf++ = (u1) (val >> 16);
    160     *buf++ = (u1) (val >> 24);
    161 }
    162 static inline void storeLongLE(u1* buf, u8 val)
    163 {
    164     *buf++ = (u1) val;
    165     *buf++ = (u1) (val >> 8);
    166     *buf++ = (u1) (val >> 16);
    167     *buf++ = (u1) (val >> 24);
    168     *buf++ = (u1) (val >> 32);
    169     *buf++ = (u1) (val >> 40);
    170     *buf++ = (u1) (val >> 48);
    171     *buf++ = (u1) (val >> 56);
    172 }
    173 
    174 /*
    175  * Gets a thread's stack trace as an array of method pointers of length pCount.
    176  * The returned array must be freed by the caller.
    177  */
    178 static const Method** getStackTrace(Thread* thread, size_t* pCount)
    179 {
    180     void* fp = thread->interpSave.curFrame;
    181     assert(thread == dvmThreadSelf() || dvmIsSuspended(thread));
    182 
    183     /* Compute the stack depth. */
    184     size_t stackDepth = 0;
    185     while (fp != NULL) {
    186         const StackSaveArea* saveArea = SAVEAREA_FROM_FP(fp);
    187 
    188         if (!dvmIsBreakFrame((u4*) fp))
    189             stackDepth++;
    190 
    191         assert(fp != saveArea->prevFrame);
    192         fp = saveArea->prevFrame;
    193     }
    194     *pCount = stackDepth;
    195 
    196     /*
    197      * Allocate memory for stack trace. This must be freed later, either by
    198      * freeThreadStackTraceSamples when tracing stops or by freeThread.
    199      */
    200     const Method** stackTrace = (const Method**) malloc(sizeof(Method*) *
    201                                                         stackDepth);
    202     if (stackTrace == NULL)
    203         return NULL;
    204 
    205     /* Walk the stack a second time, filling in the stack trace. */
    206     const Method** ptr = stackTrace;
    207     fp = thread->interpSave.curFrame;
    208     while (fp != NULL) {
    209         const StackSaveArea* saveArea = SAVEAREA_FROM_FP(fp);
    210         const Method* method = saveArea->method;
    211 
    212         if (!dvmIsBreakFrame((u4*) fp)) {
    213             *ptr++ = method;
    214             stackDepth--;
    215         }
    216         assert(fp != saveArea->prevFrame);
    217         fp = saveArea->prevFrame;
    218     }
    219     assert(stackDepth == 0);
    220 
    221     return stackTrace;
    222 }
    223 /*
    224  * Get a sample of the stack trace for a thread.
    225  */
    226 static void getSample(Thread* thread)
    227 {
    228     /* Get old and new stack trace for thread. */
    229     size_t newLength = 0;
    230     const Method** newStackTrace = getStackTrace(thread, &newLength);
    231     size_t oldLength = thread->stackTraceSampleLength;
    232     const Method** oldStackTrace = thread->stackTraceSample;
    233 
    234     /* Read time clocks to use for all events in this trace. */
    235     u4 cpuClockDiff = 0;
    236     u4 wallClockDiff = 0;
    237     dvmMethodTraceReadClocks(thread, &cpuClockDiff, &wallClockDiff);
    238     if (oldStackTrace == NULL) {
    239         /*
    240          * If there's no previous stack trace sample, log an entry event for
    241          * every method in the trace.
    242          */
    243         for (int i = newLength - 1; i >= 0; --i) {
    244             dvmMethodTraceAdd(thread, newStackTrace[i], METHOD_TRACE_ENTER,
    245                               cpuClockDiff, wallClockDiff);
    246         }
    247     } else {
    248         /*
    249          * If there's a previous stack trace, diff the traces and emit entry
    250          * and exit events accordingly.
    251          */
    252         int diffIndexOld = oldLength - 1;
    253         int diffIndexNew = newLength - 1;
    254         /* Iterate bottom-up until there's a difference between traces. */
    255         while (diffIndexOld >= 0 && diffIndexNew >= 0 &&
    256                oldStackTrace[diffIndexOld] == newStackTrace[diffIndexNew]) {
    257             diffIndexOld--;
    258             diffIndexNew--;
    259         }
    260         /* Iterate top-down over old trace until diff, emitting exit events. */
    261         for (int i = 0; i <= diffIndexOld; ++i) {
    262             dvmMethodTraceAdd(thread, oldStackTrace[i], METHOD_TRACE_EXIT,
    263                               cpuClockDiff, wallClockDiff);
    264         }
    265         /* Iterate bottom-up over new trace from diff, emitting entry events. */
    266         for (int i = diffIndexNew; i >= 0; --i) {
    267             dvmMethodTraceAdd(thread, newStackTrace[i], METHOD_TRACE_ENTER,
    268                               cpuClockDiff, wallClockDiff);
    269         }
    270     }
    271 
    272     /* Free the old stack trace and update the thread's stack trace sample. */
    273     free(oldStackTrace);
    274     thread->stackTraceSample = newStackTrace;
    275     thread->stackTraceSampleLength = newLength;
    276 }
    277 
    278 /*
    279  * Entry point for sampling thread. The sampling interval in microseconds is
    280  * passed in as an argument.
    281  */
    282 static void* runSamplingThread(void* arg)
    283 {
    284     int intervalUs = (int) arg;
    285     while (gDvm.methodTrace.traceEnabled) {
    286         dvmSuspendAllThreads(SUSPEND_FOR_SAMPLING);
    287 
    288         dvmLockThreadList(dvmThreadSelf());
    289         for (Thread *thread = gDvm.threadList; thread != NULL; thread = thread->next) {
    290             getSample(thread);
    291         }
    292         dvmUnlockThreadList();
    293 
    294         dvmResumeAllThreads(SUSPEND_FOR_SAMPLING);
    295 
    296         usleep(intervalUs);
    297     }
    298     return NULL;
    299 }
    300 
    301 /*
    302  * Boot-time init.
    303  */
    304 bool dvmProfilingStartup()
    305 {
    306     /*
    307      * Initialize "dmtrace" method profiling.
    308      */
    309     memset(&gDvm.methodTrace, 0, sizeof(gDvm.methodTrace));
    310     dvmInitMutex(&gDvm.methodTrace.startStopLock);
    311     pthread_cond_init(&gDvm.methodTrace.threadExitCond, NULL);
    312 
    313     assert(!dvmCheckException(dvmThreadSelf()));
    314 
    315     /*
    316      * Allocate storage for instruction counters.
    317      */
    318     gDvm.executedInstrCounts = (int*) calloc(kNumPackedOpcodes, sizeof(int));
    319     if (gDvm.executedInstrCounts == NULL)
    320         return false;
    321 
    322 #ifdef UPDATE_MAGIC_PAGE
    323     /*
    324      * If we're running on the emulator, there's a magic page into which
    325      * we can put interpreted method information.  This allows interpreted
    326      * methods to show up in the emulator's code traces.
    327      *
    328      * We could key this off of the "ro.kernel.qemu" property, but there's
    329      * no real harm in doing this on a real device.
    330      */
    331     int fd = open("/dev/qemu_trace", O_RDWR);
    332     if (fd < 0) {
    333         ALOGV("Unable to open /dev/qemu_trace");
    334     } else {
    335         gDvm.emulatorTracePage = mmap(0, SYSTEM_PAGE_SIZE, PROT_READ|PROT_WRITE,
    336                                       MAP_SHARED, fd, 0);
    337         close(fd);
    338         if (gDvm.emulatorTracePage == MAP_FAILED) {
    339             ALOGE("Unable to mmap /dev/qemu_trace");
    340             gDvm.emulatorTracePage = NULL;
    341         } else {
    342             *(u4*) gDvm.emulatorTracePage = 0;
    343         }
    344     }
    345 #else
    346     assert(gDvm.emulatorTracePage == NULL);
    347 #endif
    348 
    349     return true;
    350 }
    351 
    352 /*
    353  * Free up profiling resources.
    354  */
    355 void dvmProfilingShutdown()
    356 {
    357 #ifdef UPDATE_MAGIC_PAGE
    358     if (gDvm.emulatorTracePage != NULL)
    359         munmap(gDvm.emulatorTracePage, SYSTEM_PAGE_SIZE);
    360 #endif
    361     free(gDvm.executedInstrCounts);
    362 }
    363 
    364 /*
    365  * Update the set of active profilers
    366  */
    367 static void updateActiveProfilers(ExecutionSubModes newMode, bool enable)
    368 {
    369     int oldValue, newValue;
    370 
    371     // Update global count
    372     do {
    373         oldValue = gDvm.activeProfilers;
    374         newValue = oldValue + (enable ? 1 : -1);
    375         if (newValue < 0) {
    376             ALOGE("Can't have %d active profilers", newValue);
    377             dvmAbort();
    378         }
    379     } while (android_atomic_release_cas(oldValue, newValue,
    380             &gDvm.activeProfilers) != 0);
    381 
    382     // Tell the threads
    383     if (enable) {
    384         dvmEnableAllSubMode(newMode);
    385     } else {
    386         dvmDisableAllSubMode(newMode);
    387     }
    388 
    389 #if defined(WITH_JIT)
    390     dvmCompilerUpdateGlobalState();
    391 #endif
    392 
    393     ALOGD("+++ active profiler count now %d", newValue);
    394 }
    395 
    396 
    397 /*
    398  * Reset the "cpuClockBase" field in all threads.
    399  */
    400 static void resetCpuClockBase()
    401 {
    402     Thread* thread;
    403 
    404     dvmLockThreadList(NULL);
    405     for (thread = gDvm.threadList; thread != NULL; thread = thread->next) {
    406         thread->cpuClockBaseSet = false;
    407         thread->cpuClockBase = 0;
    408     }
    409     dvmUnlockThreadList();
    410 }
    411 
    412 /*
    413  * Free and reset the "stackTraceSample" field in all threads.
    414  */
    415 static void freeThreadStackTraceSamples()
    416 {
    417     Thread* thread;
    418 
    419     dvmLockThreadList(NULL);
    420     for (thread = gDvm.threadList; thread != NULL; thread = thread->next) {
    421         free(thread->stackTraceSample);
    422         thread->stackTraceSample = NULL;
    423     }
    424     dvmUnlockThreadList();
    425 }
    426 
    427 /*
    428  * Dump the thread list to the specified file.
    429  */
    430 static void dumpThreadList(FILE* fp) {
    431     dvmLockThreadList(NULL);
    432     for (Thread* thread = gDvm.threadList; thread != NULL; thread = thread->next) {
    433         std::string threadName(dvmGetThreadName(thread));
    434         fprintf(fp, "%d\t%s\n", thread->threadId, threadName.c_str());
    435     }
    436     dvmUnlockThreadList();
    437 }
    438 
    439 /*
    440  * This is a dvmHashForeach callback.
    441  */
    442 static int dumpMarkedMethods(void* vclazz, void* vfp)
    443 {
    444     DexStringCache stringCache;
    445     ClassObject* clazz = (ClassObject*) vclazz;
    446     FILE* fp = (FILE*) vfp;
    447     Method* meth;
    448     char* name;
    449     int i;
    450 
    451     dexStringCacheInit(&stringCache);
    452 
    453     for (i = 0; i < clazz->virtualMethodCount; i++) {
    454         meth = &clazz->virtualMethods[i];
    455         if (meth->inProfile) {
    456             name = dvmDescriptorToName(meth->clazz->descriptor);
    457             fprintf(fp, "0x%08x\t%s\t%s\t%s\t%s\t%d\n", (int) meth,
    458                 name, meth->name,
    459                 dexProtoGetMethodDescriptor(&meth->prototype, &stringCache),
    460                 dvmGetMethodSourceFile(meth), dvmLineNumFromPC(meth, 0));
    461             meth->inProfile = false;
    462             free(name);
    463         }
    464     }
    465 
    466     for (i = 0; i < clazz->directMethodCount; i++) {
    467         meth = &clazz->directMethods[i];
    468         if (meth->inProfile) {
    469             name = dvmDescriptorToName(meth->clazz->descriptor);
    470             fprintf(fp, "0x%08x\t%s\t%s\t%s\t%s\t%d\n", (int) meth,
    471                 name, meth->name,
    472                 dexProtoGetMethodDescriptor(&meth->prototype, &stringCache),
    473                 dvmGetMethodSourceFile(meth), dvmLineNumFromPC(meth, 0));
    474             meth->inProfile = false;
    475             free(name);
    476         }
    477     }
    478 
    479     dexStringCacheRelease(&stringCache);
    480 
    481     return 0;
    482 }
    483 
    484 /*
    485  * Dump the list of "marked" methods to the specified file.
    486  */
    487 static void dumpMethodList(FILE* fp)
    488 {
    489     dvmHashTableLock(gDvm.loadedClasses);
    490     dvmHashForeach(gDvm.loadedClasses, dumpMarkedMethods, (void*) fp);
    491     dvmHashTableUnlock(gDvm.loadedClasses);
    492 }
    493 
    494 /*
    495  * Start method tracing.  Method tracing is global to the VM (i.e. we
    496  * trace all threads).
    497  *
    498  * This opens the output file (if an already open fd has not been supplied,
    499  * and we're not going direct to DDMS) and allocates the data buffer.  This
    500  * takes ownership of the file descriptor, closing it on completion.
    501  *
    502  * On failure, we throw an exception and return.
    503  */
    504 void dvmMethodTraceStart(const char* traceFileName, int traceFd, int bufferSize,
    505     int flags, bool directToDdms, bool samplingEnabled, int intervalUs)
    506 {
    507     MethodTraceState* state = &gDvm.methodTrace;
    508 
    509     assert(bufferSize > 0);
    510 
    511     dvmLockMutex(&state->startStopLock);
    512     while (state->traceEnabled != 0) {
    513         ALOGI("TRACE start requested, but already in progress; stopping");
    514         dvmUnlockMutex(&state->startStopLock);
    515         dvmMethodTraceStop();
    516         dvmLockMutex(&state->startStopLock);
    517     }
    518     ALOGI("TRACE STARTED: '%s' %dKB", traceFileName, bufferSize / 1024);
    519 
    520     /*
    521      * Allocate storage and open files.
    522      *
    523      * We don't need to initialize the buffer, but doing so might remove
    524      * some fault overhead if the pages aren't mapped until touched.
    525      */
    526     state->buf = (u1*) malloc(bufferSize);
    527     if (state->buf == NULL) {
    528         dvmThrowInternalError("buffer alloc failed");
    529         goto fail;
    530     }
    531     if (!directToDdms) {
    532         if (traceFd < 0) {
    533             state->traceFile = fopen(traceFileName, "w");
    534         } else {
    535             state->traceFile = fdopen(traceFd, "w");
    536         }
    537         if (state->traceFile == NULL) {
    538             int err = errno;
    539             ALOGE("Unable to open trace file '%s': %s",
    540                 traceFileName, strerror(err));
    541             dvmThrowExceptionFmt(gDvm.exRuntimeException,
    542                 "Unable to open trace file '%s': %s",
    543                 traceFileName, strerror(err));
    544             goto fail;
    545         }
    546     }
    547     traceFd = -1;
    548     memset(state->buf, (char)FILL_PATTERN, bufferSize);
    549 
    550     state->directToDdms = directToDdms;
    551     state->bufferSize = bufferSize;
    552     state->overflow = false;
    553 
    554     /*
    555      * Enable alloc counts if we've been requested to do so.
    556      */
    557     state->flags = flags;
    558     if ((flags & TRACE_ALLOC_COUNTS) != 0)
    559         dvmStartAllocCounting();
    560 
    561     /* reset our notion of the start time for all CPU threads */
    562     resetCpuClockBase();
    563 
    564     state->startWhen = getWallTimeInUsec();
    565 
    566     if (useThreadCpuClock() && useWallClock()) {
    567         state->traceVersion = 3;
    568         state->recordSize = TRACE_REC_SIZE_DUAL_CLOCK;
    569     } else {
    570         state->traceVersion = 2;
    571         state->recordSize = TRACE_REC_SIZE_SINGLE_CLOCK;
    572     }
    573 
    574     state->samplingEnabled = samplingEnabled;
    575 
    576     /*
    577      * Output the header.
    578      */
    579     memset(state->buf, 0, TRACE_HEADER_LEN);
    580     storeIntLE(state->buf + 0, TRACE_MAGIC);
    581     storeShortLE(state->buf + 4, state->traceVersion);
    582     storeShortLE(state->buf + 6, TRACE_HEADER_LEN);
    583     storeLongLE(state->buf + 8, state->startWhen);
    584     if (state->traceVersion >= 3) {
    585         storeShortLE(state->buf + 16, state->recordSize);
    586     }
    587     state->curOffset = TRACE_HEADER_LEN;
    588 
    589     /*
    590      * Set the "enabled" flag.  Once we do this, threads will wait to be
    591      * signaled before exiting, so we have to make sure we wake them up.
    592      */
    593     android_atomic_release_store(true, &state->traceEnabled);
    594 
    595     /*
    596      * ENHANCEMENT: To trace just a single thread, modify the
    597      * following to take a Thread* argument, and set the appropriate
    598      * interpBreak flags only on the target thread.
    599      */
    600     if (samplingEnabled) {
    601         updateActiveProfilers(kSubModeSampleTrace, true);
    602         /* Start the sampling thread. */
    603         if (!dvmCreateInternalThread(&state->samplingThreadHandle,
    604                 "Sampling Thread", &runSamplingThread, (void*) intervalUs)) {
    605             dvmThrowInternalError("failed to create sampling thread");
    606             goto fail;
    607         }
    608     } else {
    609         updateActiveProfilers(kSubModeMethodTrace, true);
    610     }
    611 
    612     dvmUnlockMutex(&state->startStopLock);
    613     return;
    614 
    615 fail:
    616     if (state->traceFile != NULL) {
    617         fclose(state->traceFile);
    618         state->traceFile = NULL;
    619     }
    620     if (state->buf != NULL) {
    621         free(state->buf);
    622         state->buf = NULL;
    623     }
    624     if (traceFd >= 0)
    625         close(traceFd);
    626     dvmUnlockMutex(&state->startStopLock);
    627 }
    628 
    629 /*
    630  * Run through the data buffer and pull out the methods that were visited.
    631  * Set a mark so that we know which ones to output.
    632  */
    633 static void markTouchedMethods(int endOffset)
    634 {
    635     u1* ptr = gDvm.methodTrace.buf + TRACE_HEADER_LEN;
    636     u1* end = gDvm.methodTrace.buf + endOffset;
    637     size_t recordSize = gDvm.methodTrace.recordSize;
    638     unsigned int methodVal;
    639     Method* method;
    640 
    641     while (ptr < end) {
    642         methodVal = ptr[2] | (ptr[3] << 8) | (ptr[4] << 16)
    643                     | (ptr[5] << 24);
    644         method = (Method*) METHOD_ID(methodVal);
    645 
    646         method->inProfile = true;
    647         ptr += recordSize;
    648     }
    649 }
    650 
    651 /*
    652  * Exercises the clocks in the same way they will be during profiling.
    653  */
    654 static inline void measureClockOverhead()
    655 {
    656 #if defined(HAVE_POSIX_CLOCKS)
    657     if (useThreadCpuClock()) {
    658         getThreadCpuTimeInUsec(dvmThreadSelf());
    659     }
    660 #endif
    661     if (useWallClock()) {
    662         getWallTimeInUsec();
    663     }
    664 }
    665 
    666 /*
    667  * Compute the amount of overhead in a clock call, in nsec.
    668  *
    669  * This value is going to vary depending on what else is going on in the
    670  * system.  When examined across several runs a pattern should emerge.
    671  */
    672 static u4 getClockOverhead()
    673 {
    674     u8 calStart, calElapsed;
    675     int i;
    676 
    677     calStart = getStopwatchClock();
    678     for (i = 1000 * 4; i > 0; i--) {
    679         measureClockOverhead();
    680         measureClockOverhead();
    681         measureClockOverhead();
    682         measureClockOverhead();
    683         measureClockOverhead();
    684         measureClockOverhead();
    685         measureClockOverhead();
    686         measureClockOverhead();
    687     }
    688 
    689     calElapsed = getStopwatchClock() - calStart;
    690     return (int) (calElapsed / (8*4));
    691 }
    692 
    693 /*
    694  * Indicates if method tracing is active and what kind of tracing is active.
    695  */
    696 TracingMode dvmGetMethodTracingMode()
    697 {
    698     const MethodTraceState* state = &gDvm.methodTrace;
    699     if (!state->traceEnabled) {
    700         return TRACING_INACTIVE;
    701     } else if (state->samplingEnabled) {
    702         return SAMPLE_PROFILING_ACTIVE;
    703     } else {
    704         return METHOD_TRACING_ACTIVE;
    705     }
    706 }
    707 
    708 /*
    709  * Stop method tracing.  We write the buffer to disk and generate a key
    710  * file so we can interpret it.
    711  */
    712 void dvmMethodTraceStop()
    713 {
    714     MethodTraceState* state = &gDvm.methodTrace;
    715     bool samplingEnabled = state->samplingEnabled;
    716     u8 elapsed;
    717 
    718     /*
    719      * We need this to prevent somebody from starting a new trace while
    720      * we're in the process of stopping the old.
    721      */
    722     dvmLockMutex(&state->startStopLock);
    723 
    724     if (!state->traceEnabled) {
    725         /* somebody already stopped it, or it was never started */
    726         ALOGD("TRACE stop requested, but not running");
    727         dvmUnlockMutex(&state->startStopLock);
    728         return;
    729     } else {
    730         if (samplingEnabled) {
    731             updateActiveProfilers(kSubModeSampleTrace, false);
    732         } else {
    733             updateActiveProfilers(kSubModeMethodTrace, false);
    734         }
    735     }
    736 
    737     /* compute elapsed time */
    738     elapsed = getWallTimeInUsec() - state->startWhen;
    739 
    740     /*
    741      * Globally disable it, and allow other threads to notice.  We want
    742      * to stall here for at least as long as dvmMethodTraceAdd needs
    743      * to finish.  There's no real risk though -- it will take a while to
    744      * write the data to disk, and we don't clear the buffer pointer until
    745      * after that completes.
    746      */
    747     state->traceEnabled = false;
    748     ANDROID_MEMBAR_FULL();
    749     sched_yield();
    750     usleep(250 * 1000);
    751 
    752     if ((state->flags & TRACE_ALLOC_COUNTS) != 0)
    753         dvmStopAllocCounting();
    754 
    755     /*
    756      * It's possible under some circumstances for a thread to have advanced
    757      * the data pointer but not written the method value.  It's possible
    758      * (though less likely) for the data pointer to be advanced, or partial
    759      * data written, while we're doing work here.
    760      *
    761      * To avoid seeing partially-written data, we grab state->curOffset here,
    762      * and use our local copy from here on.  We then scan through what's
    763      * already written.  If we see the fill pattern in what should be the
    764      * method pointer, we cut things off early.  (If we don't, we'll fail
    765      * when we dereference the pointer.)
    766      *
    767      * There's a theoretical possibility of interrupting another thread
    768      * after it has partially written the method pointer, in which case
    769      * we'll likely crash when we dereference it.  The possibility of
    770      * this actually happening should be at or near zero.  Fixing it
    771      * completely could be done by writing the thread number last and
    772      * using a sentinel value to indicate a partially-written record,
    773      * but that requires memory barriers.
    774      */
    775     int finalCurOffset = state->curOffset;
    776 
    777     size_t recordSize = state->recordSize;
    778     if (finalCurOffset > TRACE_HEADER_LEN) {
    779         u4 fillVal = METHOD_ID(FILL_PATTERN);
    780         u1* scanPtr = state->buf + TRACE_HEADER_LEN;
    781 
    782         while (scanPtr < state->buf + finalCurOffset) {
    783             u4 methodVal = scanPtr[2] | (scanPtr[3] << 8) | (scanPtr[4] << 16)
    784                         | (scanPtr[5] << 24);
    785             if (METHOD_ID(methodVal) == fillVal) {
    786                 u1* scanBase = state->buf + TRACE_HEADER_LEN;
    787                 ALOGW("Found unfilled record at %d (of %d)",
    788                     (scanPtr - scanBase) / recordSize,
    789                     (finalCurOffset - TRACE_HEADER_LEN) / recordSize);
    790                 finalCurOffset = scanPtr - state->buf;
    791                 break;
    792             }
    793 
    794             scanPtr += recordSize;
    795         }
    796     }
    797 
    798     ALOGI("TRACE STOPPED%s: writing %d records",
    799         state->overflow ? " (NOTE: overflowed buffer)" : "",
    800         (finalCurOffset - TRACE_HEADER_LEN) / recordSize);
    801     if (gDvm.debuggerActive) {
    802         ALOGW("WARNING: a debugger is active; method-tracing results "
    803              "will be skewed");
    804     }
    805 
    806     /*
    807      * Do a quick calibration test to see how expensive our clock call is.
    808      */
    809     u4 clockNsec = getClockOverhead();
    810 
    811     markTouchedMethods(finalCurOffset);
    812 
    813     char* memStreamPtr;
    814     size_t memStreamSize;
    815     if (state->directToDdms) {
    816         assert(state->traceFile == NULL);
    817         state->traceFile = open_memstream(&memStreamPtr, &memStreamSize);
    818         if (state->traceFile == NULL) {
    819             /* not expected */
    820             ALOGE("Unable to open memstream");
    821             dvmAbort();
    822         }
    823     }
    824     assert(state->traceFile != NULL);
    825 
    826     fprintf(state->traceFile, "%cversion\n", TOKEN_CHAR);
    827     fprintf(state->traceFile, "%d\n", state->traceVersion);
    828     fprintf(state->traceFile, "data-file-overflow=%s\n",
    829         state->overflow ? "true" : "false");
    830     if (useThreadCpuClock()) {
    831         if (useWallClock()) {
    832             fprintf(state->traceFile, "clock=dual\n");
    833         } else {
    834             fprintf(state->traceFile, "clock=thread-cpu\n");
    835         }
    836     } else {
    837         fprintf(state->traceFile, "clock=wall\n");
    838     }
    839     fprintf(state->traceFile, "elapsed-time-usec=%llu\n", elapsed);
    840     fprintf(state->traceFile, "num-method-calls=%d\n",
    841         (finalCurOffset - TRACE_HEADER_LEN) / state->recordSize);
    842     fprintf(state->traceFile, "clock-call-overhead-nsec=%d\n", clockNsec);
    843     fprintf(state->traceFile, "vm=dalvik\n");
    844     if ((state->flags & TRACE_ALLOC_COUNTS) != 0) {
    845         fprintf(state->traceFile, "alloc-count=%d\n",
    846             gDvm.allocProf.allocCount);
    847         fprintf(state->traceFile, "alloc-size=%d\n",
    848             gDvm.allocProf.allocSize);
    849         fprintf(state->traceFile, "gc-count=%d\n",
    850             gDvm.allocProf.gcCount);
    851     }
    852     fprintf(state->traceFile, "%cthreads\n", TOKEN_CHAR);
    853     dumpThreadList(state->traceFile);
    854     fprintf(state->traceFile, "%cmethods\n", TOKEN_CHAR);
    855     dumpMethodList(state->traceFile);
    856     fprintf(state->traceFile, "%cend\n", TOKEN_CHAR);
    857 
    858     if (state->directToDdms) {
    859         /*
    860          * Data is in two places: memStreamPtr and state->buf.  Send
    861          * the whole thing to DDMS, wrapped in an MPSE packet.
    862          */
    863         fflush(state->traceFile);
    864 
    865         struct iovec iov[2];
    866         iov[0].iov_base = memStreamPtr;
    867         iov[0].iov_len = memStreamSize;
    868         iov[1].iov_base = state->buf;
    869         iov[1].iov_len = finalCurOffset;
    870         dvmDbgDdmSendChunkV(CHUNK_TYPE("MPSE"), iov, 2);
    871     } else {
    872         /* append the profiling data */
    873         if (fwrite(state->buf, finalCurOffset, 1, state->traceFile) != 1) {
    874             int err = errno;
    875             ALOGE("trace fwrite(%d) failed: %s",
    876                 finalCurOffset, strerror(err));
    877             dvmThrowExceptionFmt(gDvm.exRuntimeException,
    878                 "Trace data write failed: %s", strerror(err));
    879         }
    880     }
    881 
    882     /* done! */
    883     free(state->buf);
    884     state->buf = NULL;
    885     fclose(state->traceFile);
    886     state->traceFile = NULL;
    887 
    888     /* free and clear sampling traces held by all threads */
    889     if (samplingEnabled) {
    890         freeThreadStackTraceSamples();
    891     }
    892 
    893     /* wake any threads that were waiting for profiling to complete */
    894     dvmBroadcastCond(&state->threadExitCond);
    895     dvmUnlockMutex(&state->startStopLock);
    896 
    897     /* make sure the sampling thread has stopped */
    898     if (samplingEnabled &&
    899         pthread_join(state->samplingThreadHandle, NULL) != 0) {
    900         ALOGW("Sampling thread join failed");
    901     }
    902 }
    903 
    904 /*
    905  * Read clocks and generate time diffs for method trace events.
    906  */
    907 void dvmMethodTraceReadClocks(Thread* self, u4* cpuClockDiff,
    908                               u4* wallClockDiff)
    909 {
    910 #if defined(HAVE_POSIX_CLOCKS)
    911     if (useThreadCpuClock()) {
    912         if (!self->cpuClockBaseSet) {
    913             /* Initialize per-thread CPU clock base time on first use. */
    914             self->cpuClockBase = getThreadCpuTimeInUsec(self);
    915             self->cpuClockBaseSet = true;
    916         } else {
    917             *cpuClockDiff = getThreadCpuTimeInUsec(self) - self->cpuClockBase;
    918         }
    919     }
    920 #endif
    921     if (useWallClock()) {
    922         *wallClockDiff = getWallTimeInUsec() - gDvm.methodTrace.startWhen;
    923     }
    924 }
    925 
    926 /*
    927  * We just did something with a method.  Emit a record.
    928  *
    929  * Multiple threads may be banging on this all at once.  We use atomic ops
    930  * rather than mutexes for speed.
    931  */
    932 void dvmMethodTraceAdd(Thread* self, const Method* method, int action,
    933                        u4 cpuClockDiff, u4 wallClockDiff)
    934 {
    935     MethodTraceState* state = &gDvm.methodTrace;
    936     u4 methodVal;
    937     int oldOffset, newOffset;
    938     u1* ptr;
    939 
    940     assert(method != NULL);
    941 
    942     /*
    943      * Advance "curOffset" atomically.
    944      */
    945     do {
    946         oldOffset = state->curOffset;
    947         newOffset = oldOffset + state->recordSize;
    948         if (newOffset > state->bufferSize) {
    949             state->overflow = true;
    950             return;
    951         }
    952     } while (android_atomic_release_cas(oldOffset, newOffset,
    953             &state->curOffset) != 0);
    954 
    955     //assert(METHOD_ACTION((u4) method) == 0);
    956 
    957     methodVal = METHOD_COMBINE((u4) method, action);
    958 
    959     /*
    960      * Write data into "oldOffset".
    961      */
    962     ptr = state->buf + oldOffset;
    963     *ptr++ = (u1) self->threadId;
    964     *ptr++ = (u1) (self->threadId >> 8);
    965     *ptr++ = (u1) methodVal;
    966     *ptr++ = (u1) (methodVal >> 8);
    967     *ptr++ = (u1) (methodVal >> 16);
    968     *ptr++ = (u1) (methodVal >> 24);
    969 
    970 #if defined(HAVE_POSIX_CLOCKS)
    971     if (useThreadCpuClock()) {
    972         *ptr++ = (u1) cpuClockDiff;
    973         *ptr++ = (u1) (cpuClockDiff >> 8);
    974         *ptr++ = (u1) (cpuClockDiff >> 16);
    975         *ptr++ = (u1) (cpuClockDiff >> 24);
    976     }
    977 #endif
    978 
    979     if (useWallClock()) {
    980         *ptr++ = (u1) wallClockDiff;
    981         *ptr++ = (u1) (wallClockDiff >> 8);
    982         *ptr++ = (u1) (wallClockDiff >> 16);
    983         *ptr++ = (u1) (wallClockDiff >> 24);
    984     }
    985 }
    986 
    987 
    988 /*
    989  * Register the METHOD_TRACE_ENTER action for the fast interpreter and
    990  * JIT'ed code.
    991  */
    992 void dvmFastMethodTraceEnter(const Method* method, Thread* self)
    993 {
    994     if (self->interpBreak.ctl.subMode & kSubModeMethodTrace) {
    995         u4 cpuClockDiff = 0;
    996         u4 wallClockDiff = 0;
    997         dvmMethodTraceReadClocks(self, &cpuClockDiff, &wallClockDiff);
    998         dvmMethodTraceAdd(self, method, METHOD_TRACE_ENTER, cpuClockDiff,
    999                           wallClockDiff);
   1000     }
   1001 }
   1002 
   1003 /*
   1004  * Register the METHOD_TRACE_EXIT action for the fast interpreter and
   1005  * JIT'ed code for methods. The about-to-return callee method can be
   1006  * retrieved from self->interpSave.method.
   1007  */
   1008 void dvmFastMethodTraceExit(Thread* self)
   1009 {
   1010     if (self->interpBreak.ctl.subMode & kSubModeMethodTrace) {
   1011         u4 cpuClockDiff = 0;
   1012         u4 wallClockDiff = 0;
   1013         dvmMethodTraceReadClocks(self, &cpuClockDiff, &wallClockDiff);
   1014         dvmMethodTraceAdd(self, self->interpSave.method,
   1015                           METHOD_TRACE_EXIT, cpuClockDiff, wallClockDiff);
   1016     }
   1017 }
   1018 
   1019 /*
   1020  * Register the METHOD_TRACE_EXIT action for the fast interpreter and
   1021  * JIT'ed code for JNI methods. The about-to-return JNI callee method is passed
   1022  * in explicitly.  Also used for inline-execute.
   1023  */
   1024 void dvmFastNativeMethodTraceExit(const Method* method, Thread* self)
   1025 {
   1026     if (self->interpBreak.ctl.subMode & kSubModeMethodTrace) {
   1027         u4 cpuClockDiff = 0;
   1028         u4 wallClockDiff = 0;
   1029         dvmMethodTraceReadClocks(self, &cpuClockDiff, &wallClockDiff);
   1030         dvmMethodTraceAdd(self, method, METHOD_TRACE_EXIT, cpuClockDiff,
   1031                           wallClockDiff);
   1032     }
   1033 }
   1034 
   1035 /*
   1036  * We just did something with a method.  Emit a record by setting a value
   1037  * in a magic memory location.
   1038  */
   1039 void dvmEmitEmulatorTrace(const Method* method, int action)
   1040 {
   1041 #ifdef UPDATE_MAGIC_PAGE
   1042     /*
   1043      * We store the address of the Dalvik bytecodes to the memory-mapped
   1044      * trace page for normal methods.  We also trace calls to native
   1045      * functions by storing the address of the native function to the
   1046      * trace page.
   1047      * Abstract methods don't have any bytecodes, so we don't trace them.
   1048      * (Abstract methods are never called, but in Dalvik they can be
   1049      * because we do a "late trap" to a native method to generate the
   1050      * abstract method exception.)
   1051      */
   1052     if (dvmIsAbstractMethod(method))
   1053         return;
   1054 
   1055     u4* pMagic = (u4*) gDvm.emulatorTracePage;
   1056     u4 addr;
   1057 
   1058     if (dvmIsNativeMethod(method)) {
   1059         /*
   1060          * The "action" parameter is one of:
   1061          *   0 = ENTER
   1062          *   1 = EXIT
   1063          *   2 = UNROLL
   1064          * To help the trace tools reconstruct the runtime stack containing
   1065          * a mix of normal plus native methods, we add 4 to the action if this
   1066          * is a native method.
   1067          */
   1068         action += 4;
   1069 
   1070         /*
   1071          * Get the address of the native function.
   1072          * This isn't the right address -- how do I get it?
   1073          * Fortunately, the trace tools can get by without the address, but
   1074          * it would be nice to fix this.
   1075          */
   1076          addr = (u4) method->nativeFunc;
   1077     } else {
   1078         /*
   1079          * The dexlist output shows the &DexCode.insns offset value, which
   1080          * is offset from the start of the base DEX header. Method.insns
   1081          * is the absolute address, effectively offset from the start of
   1082          * the optimized DEX header. We either need to return the
   1083          * optimized DEX base file address offset by the right amount, or
   1084          * take the "real" address and subtract off the size of the
   1085          * optimized DEX header.
   1086          *
   1087          * Would be nice to factor this out at dexlist time, but we can't count
   1088          * on having access to the correct optimized DEX file.
   1089          */
   1090         assert(method->insns != NULL);
   1091         const DexOptHeader* pOptHdr = method->clazz->pDvmDex->pDexFile->pOptHeader;
   1092         addr = (u4) method->insns - pOptHdr->dexOffset;
   1093     }
   1094 
   1095     *(pMagic+action) = addr;
   1096     LOGVV("Set %p = 0x%08x (%s.%s)",
   1097         pMagic+action, addr, method->clazz->descriptor, method->name);
   1098 #endif
   1099 }
   1100 
   1101 /*
   1102  * The GC calls this when it's about to start.  We add a marker to the
   1103  * trace output so the tool can exclude the GC cost from the results.
   1104  */
   1105 void dvmMethodTraceGCBegin()
   1106 {
   1107     TRACE_METHOD_ENTER(dvmThreadSelf(), gDvm.methodTraceGcMethod);
   1108 }
   1109 void dvmMethodTraceGCEnd()
   1110 {
   1111     TRACE_METHOD_EXIT(dvmThreadSelf(), gDvm.methodTraceGcMethod);
   1112 }
   1113 
   1114 /*
   1115  * The class loader calls this when it's loading or initializing a class.
   1116  */
   1117 void dvmMethodTraceClassPrepBegin()
   1118 {
   1119     TRACE_METHOD_ENTER(dvmThreadSelf(), gDvm.methodTraceClassPrepMethod);
   1120 }
   1121 void dvmMethodTraceClassPrepEnd()
   1122 {
   1123     TRACE_METHOD_EXIT(dvmThreadSelf(), gDvm.methodTraceClassPrepMethod);
   1124 }
   1125 
   1126 
   1127 /*
   1128  * Enable emulator trace info.
   1129  */
   1130 void dvmEmulatorTraceStart()
   1131 {
   1132     /* If we could not map the emulator trace page, then do not enable tracing */
   1133     if (gDvm.emulatorTracePage == NULL)
   1134         return;
   1135 
   1136     /* in theory we should make this an atomic inc; in practice not important */
   1137     gDvm.emulatorTraceEnableCount++;
   1138     if (gDvm.emulatorTraceEnableCount == 1)
   1139         ALOGD("--- emulator method traces enabled");
   1140     updateActiveProfilers(kSubModeEmulatorTrace, true);
   1141 }
   1142 
   1143 /*
   1144  * Disable emulator trace info.
   1145  */
   1146 void dvmEmulatorTraceStop()
   1147 {
   1148     if (gDvm.emulatorTraceEnableCount == 0) {
   1149         ALOGE("ERROR: emulator tracing not enabled");
   1150         return;
   1151     }
   1152     /* in theory we should make this an atomic inc; in practice not important */
   1153     gDvm.emulatorTraceEnableCount--;
   1154     if (gDvm.emulatorTraceEnableCount == 0)
   1155         ALOGD("--- emulator method traces disabled");
   1156     updateActiveProfilers(kSubModeEmulatorTrace,
   1157                           (gDvm.emulatorTraceEnableCount != 0));
   1158 }
   1159 
   1160 
   1161 /*
   1162  * Start instruction counting.
   1163  */
   1164 void dvmStartInstructionCounting()
   1165 {
   1166     /* in theory we should make this an atomic inc; in practice not important */
   1167     gDvm.instructionCountEnableCount++;
   1168     updateActiveProfilers(kSubModeInstCounting, true);
   1169 }
   1170 
   1171 /*
   1172  * Stop instruction counting.
   1173  */
   1174 void dvmStopInstructionCounting()
   1175 {
   1176     if (gDvm.instructionCountEnableCount == 0) {
   1177         ALOGE("ERROR: instruction counting not enabled");
   1178         dvmAbort();
   1179     }
   1180     gDvm.instructionCountEnableCount--;
   1181     updateActiveProfilers(kSubModeInstCounting,
   1182                           (gDvm.instructionCountEnableCount != 0));
   1183 }
   1184 
   1185 
   1186 /*
   1187  * Start alloc counting.  Note this doesn't affect the "active profilers"
   1188  * count, since the interpreter loop is not involved.
   1189  */
   1190 void dvmStartAllocCounting()
   1191 {
   1192     gDvm.allocProf.enabled = true;
   1193 }
   1194 
   1195 /*
   1196  * Stop alloc counting.
   1197  */
   1198 void dvmStopAllocCounting()
   1199 {
   1200     gDvm.allocProf.enabled = false;
   1201 }
   1202