1 /* 2 * Copyright (C) 2008 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 /* 18 * Android's method call profiling goodies. 19 */ 20 #include "Dalvik.h" 21 #include <interp/InterpDefs.h> 22 23 #include <stdlib.h> 24 #include <stddef.h> 25 #include <string.h> 26 #include <sys/time.h> 27 #include <time.h> 28 #include <sys/mman.h> 29 #include <sched.h> 30 #include <errno.h> 31 #include <fcntl.h> 32 33 #include <cutils/open_memstream.h> 34 35 #ifdef HAVE_ANDROID_OS 36 # define UPDATE_MAGIC_PAGE 1 37 #endif 38 39 /* 40 * File format: 41 * header 42 * record 0 43 * record 1 44 * ... 45 * 46 * Header format: 47 * u4 magic ('SLOW') 48 * u2 version 49 * u2 offset to data 50 * u8 start date/time in usec 51 * u2 record size in bytes (version >= 2 only) 52 * ... padding to 32 bytes 53 * 54 * Record format v1: 55 * u1 thread ID 56 * u4 method ID | method action 57 * u4 time delta since start, in usec 58 * 59 * Record format v2: 60 * u2 thread ID 61 * u4 method ID | method action 62 * u4 time delta since start, in usec 63 * 64 * Record format v3: 65 * u2 thread ID 66 * u4 method ID | method action 67 * u4 time delta since start, in usec 68 * u4 wall time since start, in usec (when clock == "dual" only) 69 * 70 * 32 bits of microseconds is 70 minutes. 71 * 72 * All values are stored in little-endian order. 73 */ 74 #define TRACE_REC_SIZE_SINGLE_CLOCK 10 // using v2 75 #define TRACE_REC_SIZE_DUAL_CLOCK 14 // using v3 with two timestamps 76 #define TRACE_MAGIC 0x574f4c53 77 #define TRACE_HEADER_LEN 32 78 79 #define FILL_PATTERN 0xeeeeeeee 80 81 82 /* 83 * Returns true if the thread CPU clock should be used. 84 */ 85 static inline bool useThreadCpuClock() { 86 #if defined(HAVE_POSIX_CLOCKS) 87 return gDvm.profilerClockSource != kProfilerClockSourceWall; 88 #else 89 return false; 90 #endif 91 } 92 93 /* 94 * Returns true if the wall clock should be used. 95 */ 96 static inline bool useWallClock() { 97 #if defined(HAVE_POSIX_CLOCKS) 98 return gDvm.profilerClockSource != kProfilerClockSourceThreadCpu; 99 #else 100 return true; 101 #endif 102 } 103 104 /* 105 * Get the wall-clock date/time, in usec. 106 */ 107 static inline u8 getWallTimeInUsec() 108 { 109 struct timeval tv; 110 111 gettimeofday(&tv, NULL); 112 return tv.tv_sec * 1000000LL + tv.tv_usec; 113 } 114 115 #if defined(HAVE_POSIX_CLOCKS) 116 /* 117 * Get the thread-cpu time, in usec. 118 * We use this clock when we can because it enables us to track the time that 119 * a thread spends running and not blocked. 120 */ 121 static inline u8 getThreadCpuTimeInUsec(Thread* thread) 122 { 123 clockid_t cid; 124 struct timespec tm; 125 pthread_getcpuclockid(thread->handle, &cid); 126 clock_gettime(cid, &tm); 127 if (!(tm.tv_nsec >= 0 && tm.tv_nsec < 1*1000*1000*1000)) { 128 ALOGE("bad nsec: %ld", tm.tv_nsec); 129 dvmAbort(); 130 } 131 return tm.tv_sec * 1000000LL + tm.tv_nsec / 1000; 132 } 133 #endif 134 135 /* 136 * Get the clock used for stopwatch-like timing measurements on a single thread. 137 */ 138 static inline u8 getStopwatchClock() 139 { 140 #if defined(HAVE_POSIX_CLOCKS) 141 return getThreadCpuTimeInUsec(dvmThreadSelf()); 142 #else 143 return getWallTimeInUsec(); 144 #endif 145 } 146 147 /* 148 * Write little-endian data. 149 */ 150 static inline void storeShortLE(u1* buf, u2 val) 151 { 152 *buf++ = (u1) val; 153 *buf++ = (u1) (val >> 8); 154 } 155 static inline void storeIntLE(u1* buf, u4 val) 156 { 157 *buf++ = (u1) val; 158 *buf++ = (u1) (val >> 8); 159 *buf++ = (u1) (val >> 16); 160 *buf++ = (u1) (val >> 24); 161 } 162 static inline void storeLongLE(u1* buf, u8 val) 163 { 164 *buf++ = (u1) val; 165 *buf++ = (u1) (val >> 8); 166 *buf++ = (u1) (val >> 16); 167 *buf++ = (u1) (val >> 24); 168 *buf++ = (u1) (val >> 32); 169 *buf++ = (u1) (val >> 40); 170 *buf++ = (u1) (val >> 48); 171 *buf++ = (u1) (val >> 56); 172 } 173 174 /* 175 * Gets a thread's stack trace as an array of method pointers of length pCount. 176 * The returned array must be freed by the caller. 177 */ 178 static const Method** getStackTrace(Thread* thread, size_t* pCount) 179 { 180 void* fp = thread->interpSave.curFrame; 181 assert(thread == dvmThreadSelf() || dvmIsSuspended(thread)); 182 183 /* Compute the stack depth. */ 184 size_t stackDepth = 0; 185 while (fp != NULL) { 186 const StackSaveArea* saveArea = SAVEAREA_FROM_FP(fp); 187 188 if (!dvmIsBreakFrame((u4*) fp)) 189 stackDepth++; 190 191 assert(fp != saveArea->prevFrame); 192 fp = saveArea->prevFrame; 193 } 194 *pCount = stackDepth; 195 196 /* 197 * Allocate memory for stack trace. This must be freed later, either by 198 * freeThreadStackTraceSamples when tracing stops or by freeThread. 199 */ 200 const Method** stackTrace = (const Method**) malloc(sizeof(Method*) * 201 stackDepth); 202 if (stackTrace == NULL) 203 return NULL; 204 205 /* Walk the stack a second time, filling in the stack trace. */ 206 const Method** ptr = stackTrace; 207 fp = thread->interpSave.curFrame; 208 while (fp != NULL) { 209 const StackSaveArea* saveArea = SAVEAREA_FROM_FP(fp); 210 const Method* method = saveArea->method; 211 212 if (!dvmIsBreakFrame((u4*) fp)) { 213 *ptr++ = method; 214 stackDepth--; 215 } 216 assert(fp != saveArea->prevFrame); 217 fp = saveArea->prevFrame; 218 } 219 assert(stackDepth == 0); 220 221 return stackTrace; 222 } 223 /* 224 * Get a sample of the stack trace for a thread. 225 */ 226 static void getSample(Thread* thread) 227 { 228 /* Get old and new stack trace for thread. */ 229 size_t newLength = 0; 230 const Method** newStackTrace = getStackTrace(thread, &newLength); 231 size_t oldLength = thread->stackTraceSampleLength; 232 const Method** oldStackTrace = thread->stackTraceSample; 233 234 /* Read time clocks to use for all events in this trace. */ 235 u4 cpuClockDiff = 0; 236 u4 wallClockDiff = 0; 237 dvmMethodTraceReadClocks(thread, &cpuClockDiff, &wallClockDiff); 238 if (oldStackTrace == NULL) { 239 /* 240 * If there's no previous stack trace sample, log an entry event for 241 * every method in the trace. 242 */ 243 for (int i = newLength - 1; i >= 0; --i) { 244 dvmMethodTraceAdd(thread, newStackTrace[i], METHOD_TRACE_ENTER, 245 cpuClockDiff, wallClockDiff); 246 } 247 } else { 248 /* 249 * If there's a previous stack trace, diff the traces and emit entry 250 * and exit events accordingly. 251 */ 252 int diffIndexOld = oldLength - 1; 253 int diffIndexNew = newLength - 1; 254 /* Iterate bottom-up until there's a difference between traces. */ 255 while (diffIndexOld >= 0 && diffIndexNew >= 0 && 256 oldStackTrace[diffIndexOld] == newStackTrace[diffIndexNew]) { 257 diffIndexOld--; 258 diffIndexNew--; 259 } 260 /* Iterate top-down over old trace until diff, emitting exit events. */ 261 for (int i = 0; i <= diffIndexOld; ++i) { 262 dvmMethodTraceAdd(thread, oldStackTrace[i], METHOD_TRACE_EXIT, 263 cpuClockDiff, wallClockDiff); 264 } 265 /* Iterate bottom-up over new trace from diff, emitting entry events. */ 266 for (int i = diffIndexNew; i >= 0; --i) { 267 dvmMethodTraceAdd(thread, newStackTrace[i], METHOD_TRACE_ENTER, 268 cpuClockDiff, wallClockDiff); 269 } 270 } 271 272 /* Free the old stack trace and update the thread's stack trace sample. */ 273 free(oldStackTrace); 274 thread->stackTraceSample = newStackTrace; 275 thread->stackTraceSampleLength = newLength; 276 } 277 278 /* 279 * Entry point for sampling thread. The sampling interval in microseconds is 280 * passed in as an argument. 281 */ 282 static void* runSamplingThread(void* arg) 283 { 284 int intervalUs = (int) arg; 285 while (gDvm.methodTrace.traceEnabled) { 286 dvmSuspendAllThreads(SUSPEND_FOR_SAMPLING); 287 288 dvmLockThreadList(dvmThreadSelf()); 289 for (Thread *thread = gDvm.threadList; thread != NULL; thread = thread->next) { 290 getSample(thread); 291 } 292 dvmUnlockThreadList(); 293 294 dvmResumeAllThreads(SUSPEND_FOR_SAMPLING); 295 296 usleep(intervalUs); 297 } 298 return NULL; 299 } 300 301 /* 302 * Boot-time init. 303 */ 304 bool dvmProfilingStartup() 305 { 306 /* 307 * Initialize "dmtrace" method profiling. 308 */ 309 memset(&gDvm.methodTrace, 0, sizeof(gDvm.methodTrace)); 310 dvmInitMutex(&gDvm.methodTrace.startStopLock); 311 pthread_cond_init(&gDvm.methodTrace.threadExitCond, NULL); 312 313 assert(!dvmCheckException(dvmThreadSelf())); 314 315 /* 316 * Allocate storage for instruction counters. 317 */ 318 gDvm.executedInstrCounts = (int*) calloc(kNumPackedOpcodes, sizeof(int)); 319 if (gDvm.executedInstrCounts == NULL) 320 return false; 321 322 #ifdef UPDATE_MAGIC_PAGE 323 /* 324 * If we're running on the emulator, there's a magic page into which 325 * we can put interpreted method information. This allows interpreted 326 * methods to show up in the emulator's code traces. 327 * 328 * We could key this off of the "ro.kernel.qemu" property, but there's 329 * no real harm in doing this on a real device. 330 */ 331 int fd = open("/dev/qemu_trace", O_RDWR); 332 if (fd < 0) { 333 ALOGV("Unable to open /dev/qemu_trace"); 334 } else { 335 gDvm.emulatorTracePage = mmap(0, SYSTEM_PAGE_SIZE, PROT_READ|PROT_WRITE, 336 MAP_SHARED, fd, 0); 337 close(fd); 338 if (gDvm.emulatorTracePage == MAP_FAILED) { 339 ALOGE("Unable to mmap /dev/qemu_trace"); 340 gDvm.emulatorTracePage = NULL; 341 } else { 342 *(u4*) gDvm.emulatorTracePage = 0; 343 } 344 } 345 #else 346 assert(gDvm.emulatorTracePage == NULL); 347 #endif 348 349 return true; 350 } 351 352 /* 353 * Free up profiling resources. 354 */ 355 void dvmProfilingShutdown() 356 { 357 #ifdef UPDATE_MAGIC_PAGE 358 if (gDvm.emulatorTracePage != NULL) 359 munmap(gDvm.emulatorTracePage, SYSTEM_PAGE_SIZE); 360 #endif 361 free(gDvm.executedInstrCounts); 362 } 363 364 /* 365 * Update the set of active profilers 366 */ 367 static void updateActiveProfilers(ExecutionSubModes newMode, bool enable) 368 { 369 int oldValue, newValue; 370 371 // Update global count 372 do { 373 oldValue = gDvm.activeProfilers; 374 newValue = oldValue + (enable ? 1 : -1); 375 if (newValue < 0) { 376 ALOGE("Can't have %d active profilers", newValue); 377 dvmAbort(); 378 } 379 } while (android_atomic_release_cas(oldValue, newValue, 380 &gDvm.activeProfilers) != 0); 381 382 // Tell the threads 383 if (enable) { 384 dvmEnableAllSubMode(newMode); 385 } else { 386 dvmDisableAllSubMode(newMode); 387 } 388 389 #if defined(WITH_JIT) 390 dvmCompilerUpdateGlobalState(); 391 #endif 392 393 ALOGD("+++ active profiler count now %d", newValue); 394 } 395 396 397 /* 398 * Reset the "cpuClockBase" field in all threads. 399 */ 400 static void resetCpuClockBase() 401 { 402 Thread* thread; 403 404 dvmLockThreadList(NULL); 405 for (thread = gDvm.threadList; thread != NULL; thread = thread->next) { 406 thread->cpuClockBaseSet = false; 407 thread->cpuClockBase = 0; 408 } 409 dvmUnlockThreadList(); 410 } 411 412 /* 413 * Free and reset the "stackTraceSample" field in all threads. 414 */ 415 static void freeThreadStackTraceSamples() 416 { 417 Thread* thread; 418 419 dvmLockThreadList(NULL); 420 for (thread = gDvm.threadList; thread != NULL; thread = thread->next) { 421 free(thread->stackTraceSample); 422 thread->stackTraceSample = NULL; 423 } 424 dvmUnlockThreadList(); 425 } 426 427 /* 428 * Dump the thread list to the specified file. 429 */ 430 static void dumpThreadList(FILE* fp) { 431 dvmLockThreadList(NULL); 432 for (Thread* thread = gDvm.threadList; thread != NULL; thread = thread->next) { 433 std::string threadName(dvmGetThreadName(thread)); 434 fprintf(fp, "%d\t%s\n", thread->threadId, threadName.c_str()); 435 } 436 dvmUnlockThreadList(); 437 } 438 439 /* 440 * This is a dvmHashForeach callback. 441 */ 442 static int dumpMarkedMethods(void* vclazz, void* vfp) 443 { 444 DexStringCache stringCache; 445 ClassObject* clazz = (ClassObject*) vclazz; 446 FILE* fp = (FILE*) vfp; 447 Method* meth; 448 char* name; 449 int i; 450 451 dexStringCacheInit(&stringCache); 452 453 for (i = 0; i < clazz->virtualMethodCount; i++) { 454 meth = &clazz->virtualMethods[i]; 455 if (meth->inProfile) { 456 name = dvmDescriptorToName(meth->clazz->descriptor); 457 fprintf(fp, "0x%08x\t%s\t%s\t%s\t%s\t%d\n", (int) meth, 458 name, meth->name, 459 dexProtoGetMethodDescriptor(&meth->prototype, &stringCache), 460 dvmGetMethodSourceFile(meth), dvmLineNumFromPC(meth, 0)); 461 meth->inProfile = false; 462 free(name); 463 } 464 } 465 466 for (i = 0; i < clazz->directMethodCount; i++) { 467 meth = &clazz->directMethods[i]; 468 if (meth->inProfile) { 469 name = dvmDescriptorToName(meth->clazz->descriptor); 470 fprintf(fp, "0x%08x\t%s\t%s\t%s\t%s\t%d\n", (int) meth, 471 name, meth->name, 472 dexProtoGetMethodDescriptor(&meth->prototype, &stringCache), 473 dvmGetMethodSourceFile(meth), dvmLineNumFromPC(meth, 0)); 474 meth->inProfile = false; 475 free(name); 476 } 477 } 478 479 dexStringCacheRelease(&stringCache); 480 481 return 0; 482 } 483 484 /* 485 * Dump the list of "marked" methods to the specified file. 486 */ 487 static void dumpMethodList(FILE* fp) 488 { 489 dvmHashTableLock(gDvm.loadedClasses); 490 dvmHashForeach(gDvm.loadedClasses, dumpMarkedMethods, (void*) fp); 491 dvmHashTableUnlock(gDvm.loadedClasses); 492 } 493 494 /* 495 * Start method tracing. Method tracing is global to the VM (i.e. we 496 * trace all threads). 497 * 498 * This opens the output file (if an already open fd has not been supplied, 499 * and we're not going direct to DDMS) and allocates the data buffer. This 500 * takes ownership of the file descriptor, closing it on completion. 501 * 502 * On failure, we throw an exception and return. 503 */ 504 void dvmMethodTraceStart(const char* traceFileName, int traceFd, int bufferSize, 505 int flags, bool directToDdms, bool samplingEnabled, int intervalUs) 506 { 507 MethodTraceState* state = &gDvm.methodTrace; 508 509 assert(bufferSize > 0); 510 511 dvmLockMutex(&state->startStopLock); 512 while (state->traceEnabled != 0) { 513 ALOGI("TRACE start requested, but already in progress; stopping"); 514 dvmUnlockMutex(&state->startStopLock); 515 dvmMethodTraceStop(); 516 dvmLockMutex(&state->startStopLock); 517 } 518 ALOGI("TRACE STARTED: '%s' %dKB", traceFileName, bufferSize / 1024); 519 520 /* 521 * Allocate storage and open files. 522 * 523 * We don't need to initialize the buffer, but doing so might remove 524 * some fault overhead if the pages aren't mapped until touched. 525 */ 526 state->buf = (u1*) malloc(bufferSize); 527 if (state->buf == NULL) { 528 dvmThrowInternalError("buffer alloc failed"); 529 goto fail; 530 } 531 if (!directToDdms) { 532 if (traceFd < 0) { 533 state->traceFile = fopen(traceFileName, "w"); 534 } else { 535 state->traceFile = fdopen(traceFd, "w"); 536 } 537 if (state->traceFile == NULL) { 538 int err = errno; 539 ALOGE("Unable to open trace file '%s': %s", 540 traceFileName, strerror(err)); 541 dvmThrowExceptionFmt(gDvm.exRuntimeException, 542 "Unable to open trace file '%s': %s", 543 traceFileName, strerror(err)); 544 goto fail; 545 } 546 } 547 traceFd = -1; 548 memset(state->buf, (char)FILL_PATTERN, bufferSize); 549 550 state->directToDdms = directToDdms; 551 state->bufferSize = bufferSize; 552 state->overflow = false; 553 554 /* 555 * Enable alloc counts if we've been requested to do so. 556 */ 557 state->flags = flags; 558 if ((flags & TRACE_ALLOC_COUNTS) != 0) 559 dvmStartAllocCounting(); 560 561 /* reset our notion of the start time for all CPU threads */ 562 resetCpuClockBase(); 563 564 state->startWhen = getWallTimeInUsec(); 565 566 if (useThreadCpuClock() && useWallClock()) { 567 state->traceVersion = 3; 568 state->recordSize = TRACE_REC_SIZE_DUAL_CLOCK; 569 } else { 570 state->traceVersion = 2; 571 state->recordSize = TRACE_REC_SIZE_SINGLE_CLOCK; 572 } 573 574 state->samplingEnabled = samplingEnabled; 575 576 /* 577 * Output the header. 578 */ 579 memset(state->buf, 0, TRACE_HEADER_LEN); 580 storeIntLE(state->buf + 0, TRACE_MAGIC); 581 storeShortLE(state->buf + 4, state->traceVersion); 582 storeShortLE(state->buf + 6, TRACE_HEADER_LEN); 583 storeLongLE(state->buf + 8, state->startWhen); 584 if (state->traceVersion >= 3) { 585 storeShortLE(state->buf + 16, state->recordSize); 586 } 587 state->curOffset = TRACE_HEADER_LEN; 588 589 /* 590 * Set the "enabled" flag. Once we do this, threads will wait to be 591 * signaled before exiting, so we have to make sure we wake them up. 592 */ 593 android_atomic_release_store(true, &state->traceEnabled); 594 595 /* 596 * ENHANCEMENT: To trace just a single thread, modify the 597 * following to take a Thread* argument, and set the appropriate 598 * interpBreak flags only on the target thread. 599 */ 600 if (samplingEnabled) { 601 updateActiveProfilers(kSubModeSampleTrace, true); 602 /* Start the sampling thread. */ 603 if (!dvmCreateInternalThread(&state->samplingThreadHandle, 604 "Sampling Thread", &runSamplingThread, (void*) intervalUs)) { 605 dvmThrowInternalError("failed to create sampling thread"); 606 goto fail; 607 } 608 } else { 609 updateActiveProfilers(kSubModeMethodTrace, true); 610 } 611 612 dvmUnlockMutex(&state->startStopLock); 613 return; 614 615 fail: 616 if (state->traceFile != NULL) { 617 fclose(state->traceFile); 618 state->traceFile = NULL; 619 } 620 if (state->buf != NULL) { 621 free(state->buf); 622 state->buf = NULL; 623 } 624 if (traceFd >= 0) 625 close(traceFd); 626 dvmUnlockMutex(&state->startStopLock); 627 } 628 629 /* 630 * Run through the data buffer and pull out the methods that were visited. 631 * Set a mark so that we know which ones to output. 632 */ 633 static void markTouchedMethods(int endOffset) 634 { 635 u1* ptr = gDvm.methodTrace.buf + TRACE_HEADER_LEN; 636 u1* end = gDvm.methodTrace.buf + endOffset; 637 size_t recordSize = gDvm.methodTrace.recordSize; 638 unsigned int methodVal; 639 Method* method; 640 641 while (ptr < end) { 642 methodVal = ptr[2] | (ptr[3] << 8) | (ptr[4] << 16) 643 | (ptr[5] << 24); 644 method = (Method*) METHOD_ID(methodVal); 645 646 method->inProfile = true; 647 ptr += recordSize; 648 } 649 } 650 651 /* 652 * Exercises the clocks in the same way they will be during profiling. 653 */ 654 static inline void measureClockOverhead() 655 { 656 #if defined(HAVE_POSIX_CLOCKS) 657 if (useThreadCpuClock()) { 658 getThreadCpuTimeInUsec(dvmThreadSelf()); 659 } 660 #endif 661 if (useWallClock()) { 662 getWallTimeInUsec(); 663 } 664 } 665 666 /* 667 * Compute the amount of overhead in a clock call, in nsec. 668 * 669 * This value is going to vary depending on what else is going on in the 670 * system. When examined across several runs a pattern should emerge. 671 */ 672 static u4 getClockOverhead() 673 { 674 u8 calStart, calElapsed; 675 int i; 676 677 calStart = getStopwatchClock(); 678 for (i = 1000 * 4; i > 0; i--) { 679 measureClockOverhead(); 680 measureClockOverhead(); 681 measureClockOverhead(); 682 measureClockOverhead(); 683 measureClockOverhead(); 684 measureClockOverhead(); 685 measureClockOverhead(); 686 measureClockOverhead(); 687 } 688 689 calElapsed = getStopwatchClock() - calStart; 690 return (int) (calElapsed / (8*4)); 691 } 692 693 /* 694 * Indicates if method tracing is active and what kind of tracing is active. 695 */ 696 TracingMode dvmGetMethodTracingMode() 697 { 698 const MethodTraceState* state = &gDvm.methodTrace; 699 if (!state->traceEnabled) { 700 return TRACING_INACTIVE; 701 } else if (state->samplingEnabled) { 702 return SAMPLE_PROFILING_ACTIVE; 703 } else { 704 return METHOD_TRACING_ACTIVE; 705 } 706 } 707 708 /* 709 * Stop method tracing. We write the buffer to disk and generate a key 710 * file so we can interpret it. 711 */ 712 void dvmMethodTraceStop() 713 { 714 MethodTraceState* state = &gDvm.methodTrace; 715 bool samplingEnabled = state->samplingEnabled; 716 u8 elapsed; 717 718 /* 719 * We need this to prevent somebody from starting a new trace while 720 * we're in the process of stopping the old. 721 */ 722 dvmLockMutex(&state->startStopLock); 723 724 if (!state->traceEnabled) { 725 /* somebody already stopped it, or it was never started */ 726 ALOGD("TRACE stop requested, but not running"); 727 dvmUnlockMutex(&state->startStopLock); 728 return; 729 } else { 730 if (samplingEnabled) { 731 updateActiveProfilers(kSubModeSampleTrace, false); 732 } else { 733 updateActiveProfilers(kSubModeMethodTrace, false); 734 } 735 } 736 737 /* compute elapsed time */ 738 elapsed = getWallTimeInUsec() - state->startWhen; 739 740 /* 741 * Globally disable it, and allow other threads to notice. We want 742 * to stall here for at least as long as dvmMethodTraceAdd needs 743 * to finish. There's no real risk though -- it will take a while to 744 * write the data to disk, and we don't clear the buffer pointer until 745 * after that completes. 746 */ 747 state->traceEnabled = false; 748 ANDROID_MEMBAR_FULL(); 749 sched_yield(); 750 usleep(250 * 1000); 751 752 if ((state->flags & TRACE_ALLOC_COUNTS) != 0) 753 dvmStopAllocCounting(); 754 755 /* 756 * It's possible under some circumstances for a thread to have advanced 757 * the data pointer but not written the method value. It's possible 758 * (though less likely) for the data pointer to be advanced, or partial 759 * data written, while we're doing work here. 760 * 761 * To avoid seeing partially-written data, we grab state->curOffset here, 762 * and use our local copy from here on. We then scan through what's 763 * already written. If we see the fill pattern in what should be the 764 * method pointer, we cut things off early. (If we don't, we'll fail 765 * when we dereference the pointer.) 766 * 767 * There's a theoretical possibility of interrupting another thread 768 * after it has partially written the method pointer, in which case 769 * we'll likely crash when we dereference it. The possibility of 770 * this actually happening should be at or near zero. Fixing it 771 * completely could be done by writing the thread number last and 772 * using a sentinel value to indicate a partially-written record, 773 * but that requires memory barriers. 774 */ 775 int finalCurOffset = state->curOffset; 776 777 size_t recordSize = state->recordSize; 778 if (finalCurOffset > TRACE_HEADER_LEN) { 779 u4 fillVal = METHOD_ID(FILL_PATTERN); 780 u1* scanPtr = state->buf + TRACE_HEADER_LEN; 781 782 while (scanPtr < state->buf + finalCurOffset) { 783 u4 methodVal = scanPtr[2] | (scanPtr[3] << 8) | (scanPtr[4] << 16) 784 | (scanPtr[5] << 24); 785 if (METHOD_ID(methodVal) == fillVal) { 786 u1* scanBase = state->buf + TRACE_HEADER_LEN; 787 ALOGW("Found unfilled record at %d (of %d)", 788 (scanPtr - scanBase) / recordSize, 789 (finalCurOffset - TRACE_HEADER_LEN) / recordSize); 790 finalCurOffset = scanPtr - state->buf; 791 break; 792 } 793 794 scanPtr += recordSize; 795 } 796 } 797 798 ALOGI("TRACE STOPPED%s: writing %d records", 799 state->overflow ? " (NOTE: overflowed buffer)" : "", 800 (finalCurOffset - TRACE_HEADER_LEN) / recordSize); 801 if (gDvm.debuggerActive) { 802 ALOGW("WARNING: a debugger is active; method-tracing results " 803 "will be skewed"); 804 } 805 806 /* 807 * Do a quick calibration test to see how expensive our clock call is. 808 */ 809 u4 clockNsec = getClockOverhead(); 810 811 markTouchedMethods(finalCurOffset); 812 813 char* memStreamPtr; 814 size_t memStreamSize; 815 if (state->directToDdms) { 816 assert(state->traceFile == NULL); 817 state->traceFile = open_memstream(&memStreamPtr, &memStreamSize); 818 if (state->traceFile == NULL) { 819 /* not expected */ 820 ALOGE("Unable to open memstream"); 821 dvmAbort(); 822 } 823 } 824 assert(state->traceFile != NULL); 825 826 fprintf(state->traceFile, "%cversion\n", TOKEN_CHAR); 827 fprintf(state->traceFile, "%d\n", state->traceVersion); 828 fprintf(state->traceFile, "data-file-overflow=%s\n", 829 state->overflow ? "true" : "false"); 830 if (useThreadCpuClock()) { 831 if (useWallClock()) { 832 fprintf(state->traceFile, "clock=dual\n"); 833 } else { 834 fprintf(state->traceFile, "clock=thread-cpu\n"); 835 } 836 } else { 837 fprintf(state->traceFile, "clock=wall\n"); 838 } 839 fprintf(state->traceFile, "elapsed-time-usec=%llu\n", elapsed); 840 fprintf(state->traceFile, "num-method-calls=%d\n", 841 (finalCurOffset - TRACE_HEADER_LEN) / state->recordSize); 842 fprintf(state->traceFile, "clock-call-overhead-nsec=%d\n", clockNsec); 843 fprintf(state->traceFile, "vm=dalvik\n"); 844 if ((state->flags & TRACE_ALLOC_COUNTS) != 0) { 845 fprintf(state->traceFile, "alloc-count=%d\n", 846 gDvm.allocProf.allocCount); 847 fprintf(state->traceFile, "alloc-size=%d\n", 848 gDvm.allocProf.allocSize); 849 fprintf(state->traceFile, "gc-count=%d\n", 850 gDvm.allocProf.gcCount); 851 } 852 fprintf(state->traceFile, "%cthreads\n", TOKEN_CHAR); 853 dumpThreadList(state->traceFile); 854 fprintf(state->traceFile, "%cmethods\n", TOKEN_CHAR); 855 dumpMethodList(state->traceFile); 856 fprintf(state->traceFile, "%cend\n", TOKEN_CHAR); 857 858 if (state->directToDdms) { 859 /* 860 * Data is in two places: memStreamPtr and state->buf. Send 861 * the whole thing to DDMS, wrapped in an MPSE packet. 862 */ 863 fflush(state->traceFile); 864 865 struct iovec iov[2]; 866 iov[0].iov_base = memStreamPtr; 867 iov[0].iov_len = memStreamSize; 868 iov[1].iov_base = state->buf; 869 iov[1].iov_len = finalCurOffset; 870 dvmDbgDdmSendChunkV(CHUNK_TYPE("MPSE"), iov, 2); 871 } else { 872 /* append the profiling data */ 873 if (fwrite(state->buf, finalCurOffset, 1, state->traceFile) != 1) { 874 int err = errno; 875 ALOGE("trace fwrite(%d) failed: %s", 876 finalCurOffset, strerror(err)); 877 dvmThrowExceptionFmt(gDvm.exRuntimeException, 878 "Trace data write failed: %s", strerror(err)); 879 } 880 } 881 882 /* done! */ 883 free(state->buf); 884 state->buf = NULL; 885 fclose(state->traceFile); 886 state->traceFile = NULL; 887 888 /* free and clear sampling traces held by all threads */ 889 if (samplingEnabled) { 890 freeThreadStackTraceSamples(); 891 } 892 893 /* wake any threads that were waiting for profiling to complete */ 894 dvmBroadcastCond(&state->threadExitCond); 895 dvmUnlockMutex(&state->startStopLock); 896 897 /* make sure the sampling thread has stopped */ 898 if (samplingEnabled && 899 pthread_join(state->samplingThreadHandle, NULL) != 0) { 900 ALOGW("Sampling thread join failed"); 901 } 902 } 903 904 /* 905 * Read clocks and generate time diffs for method trace events. 906 */ 907 void dvmMethodTraceReadClocks(Thread* self, u4* cpuClockDiff, 908 u4* wallClockDiff) 909 { 910 #if defined(HAVE_POSIX_CLOCKS) 911 if (useThreadCpuClock()) { 912 if (!self->cpuClockBaseSet) { 913 /* Initialize per-thread CPU clock base time on first use. */ 914 self->cpuClockBase = getThreadCpuTimeInUsec(self); 915 self->cpuClockBaseSet = true; 916 } else { 917 *cpuClockDiff = getThreadCpuTimeInUsec(self) - self->cpuClockBase; 918 } 919 } 920 #endif 921 if (useWallClock()) { 922 *wallClockDiff = getWallTimeInUsec() - gDvm.methodTrace.startWhen; 923 } 924 } 925 926 /* 927 * We just did something with a method. Emit a record. 928 * 929 * Multiple threads may be banging on this all at once. We use atomic ops 930 * rather than mutexes for speed. 931 */ 932 void dvmMethodTraceAdd(Thread* self, const Method* method, int action, 933 u4 cpuClockDiff, u4 wallClockDiff) 934 { 935 MethodTraceState* state = &gDvm.methodTrace; 936 u4 methodVal; 937 int oldOffset, newOffset; 938 u1* ptr; 939 940 assert(method != NULL); 941 942 /* 943 * Advance "curOffset" atomically. 944 */ 945 do { 946 oldOffset = state->curOffset; 947 newOffset = oldOffset + state->recordSize; 948 if (newOffset > state->bufferSize) { 949 state->overflow = true; 950 return; 951 } 952 } while (android_atomic_release_cas(oldOffset, newOffset, 953 &state->curOffset) != 0); 954 955 //assert(METHOD_ACTION((u4) method) == 0); 956 957 methodVal = METHOD_COMBINE((u4) method, action); 958 959 /* 960 * Write data into "oldOffset". 961 */ 962 ptr = state->buf + oldOffset; 963 *ptr++ = (u1) self->threadId; 964 *ptr++ = (u1) (self->threadId >> 8); 965 *ptr++ = (u1) methodVal; 966 *ptr++ = (u1) (methodVal >> 8); 967 *ptr++ = (u1) (methodVal >> 16); 968 *ptr++ = (u1) (methodVal >> 24); 969 970 #if defined(HAVE_POSIX_CLOCKS) 971 if (useThreadCpuClock()) { 972 *ptr++ = (u1) cpuClockDiff; 973 *ptr++ = (u1) (cpuClockDiff >> 8); 974 *ptr++ = (u1) (cpuClockDiff >> 16); 975 *ptr++ = (u1) (cpuClockDiff >> 24); 976 } 977 #endif 978 979 if (useWallClock()) { 980 *ptr++ = (u1) wallClockDiff; 981 *ptr++ = (u1) (wallClockDiff >> 8); 982 *ptr++ = (u1) (wallClockDiff >> 16); 983 *ptr++ = (u1) (wallClockDiff >> 24); 984 } 985 } 986 987 988 /* 989 * Register the METHOD_TRACE_ENTER action for the fast interpreter and 990 * JIT'ed code. 991 */ 992 void dvmFastMethodTraceEnter(const Method* method, Thread* self) 993 { 994 if (self->interpBreak.ctl.subMode & kSubModeMethodTrace) { 995 u4 cpuClockDiff = 0; 996 u4 wallClockDiff = 0; 997 dvmMethodTraceReadClocks(self, &cpuClockDiff, &wallClockDiff); 998 dvmMethodTraceAdd(self, method, METHOD_TRACE_ENTER, cpuClockDiff, 999 wallClockDiff); 1000 } 1001 } 1002 1003 /* 1004 * Register the METHOD_TRACE_EXIT action for the fast interpreter and 1005 * JIT'ed code for methods. The about-to-return callee method can be 1006 * retrieved from self->interpSave.method. 1007 */ 1008 void dvmFastMethodTraceExit(Thread* self) 1009 { 1010 if (self->interpBreak.ctl.subMode & kSubModeMethodTrace) { 1011 u4 cpuClockDiff = 0; 1012 u4 wallClockDiff = 0; 1013 dvmMethodTraceReadClocks(self, &cpuClockDiff, &wallClockDiff); 1014 dvmMethodTraceAdd(self, self->interpSave.method, 1015 METHOD_TRACE_EXIT, cpuClockDiff, wallClockDiff); 1016 } 1017 } 1018 1019 /* 1020 * Register the METHOD_TRACE_EXIT action for the fast interpreter and 1021 * JIT'ed code for JNI methods. The about-to-return JNI callee method is passed 1022 * in explicitly. Also used for inline-execute. 1023 */ 1024 void dvmFastNativeMethodTraceExit(const Method* method, Thread* self) 1025 { 1026 if (self->interpBreak.ctl.subMode & kSubModeMethodTrace) { 1027 u4 cpuClockDiff = 0; 1028 u4 wallClockDiff = 0; 1029 dvmMethodTraceReadClocks(self, &cpuClockDiff, &wallClockDiff); 1030 dvmMethodTraceAdd(self, method, METHOD_TRACE_EXIT, cpuClockDiff, 1031 wallClockDiff); 1032 } 1033 } 1034 1035 /* 1036 * We just did something with a method. Emit a record by setting a value 1037 * in a magic memory location. 1038 */ 1039 void dvmEmitEmulatorTrace(const Method* method, int action) 1040 { 1041 #ifdef UPDATE_MAGIC_PAGE 1042 /* 1043 * We store the address of the Dalvik bytecodes to the memory-mapped 1044 * trace page for normal methods. We also trace calls to native 1045 * functions by storing the address of the native function to the 1046 * trace page. 1047 * Abstract methods don't have any bytecodes, so we don't trace them. 1048 * (Abstract methods are never called, but in Dalvik they can be 1049 * because we do a "late trap" to a native method to generate the 1050 * abstract method exception.) 1051 */ 1052 if (dvmIsAbstractMethod(method)) 1053 return; 1054 1055 u4* pMagic = (u4*) gDvm.emulatorTracePage; 1056 u4 addr; 1057 1058 if (dvmIsNativeMethod(method)) { 1059 /* 1060 * The "action" parameter is one of: 1061 * 0 = ENTER 1062 * 1 = EXIT 1063 * 2 = UNROLL 1064 * To help the trace tools reconstruct the runtime stack containing 1065 * a mix of normal plus native methods, we add 4 to the action if this 1066 * is a native method. 1067 */ 1068 action += 4; 1069 1070 /* 1071 * Get the address of the native function. 1072 * This isn't the right address -- how do I get it? 1073 * Fortunately, the trace tools can get by without the address, but 1074 * it would be nice to fix this. 1075 */ 1076 addr = (u4) method->nativeFunc; 1077 } else { 1078 /* 1079 * The dexlist output shows the &DexCode.insns offset value, which 1080 * is offset from the start of the base DEX header. Method.insns 1081 * is the absolute address, effectively offset from the start of 1082 * the optimized DEX header. We either need to return the 1083 * optimized DEX base file address offset by the right amount, or 1084 * take the "real" address and subtract off the size of the 1085 * optimized DEX header. 1086 * 1087 * Would be nice to factor this out at dexlist time, but we can't count 1088 * on having access to the correct optimized DEX file. 1089 */ 1090 assert(method->insns != NULL); 1091 const DexOptHeader* pOptHdr = method->clazz->pDvmDex->pDexFile->pOptHeader; 1092 addr = (u4) method->insns - pOptHdr->dexOffset; 1093 } 1094 1095 *(pMagic+action) = addr; 1096 LOGVV("Set %p = 0x%08x (%s.%s)", 1097 pMagic+action, addr, method->clazz->descriptor, method->name); 1098 #endif 1099 } 1100 1101 /* 1102 * The GC calls this when it's about to start. We add a marker to the 1103 * trace output so the tool can exclude the GC cost from the results. 1104 */ 1105 void dvmMethodTraceGCBegin() 1106 { 1107 TRACE_METHOD_ENTER(dvmThreadSelf(), gDvm.methodTraceGcMethod); 1108 } 1109 void dvmMethodTraceGCEnd() 1110 { 1111 TRACE_METHOD_EXIT(dvmThreadSelf(), gDvm.methodTraceGcMethod); 1112 } 1113 1114 /* 1115 * The class loader calls this when it's loading or initializing a class. 1116 */ 1117 void dvmMethodTraceClassPrepBegin() 1118 { 1119 TRACE_METHOD_ENTER(dvmThreadSelf(), gDvm.methodTraceClassPrepMethod); 1120 } 1121 void dvmMethodTraceClassPrepEnd() 1122 { 1123 TRACE_METHOD_EXIT(dvmThreadSelf(), gDvm.methodTraceClassPrepMethod); 1124 } 1125 1126 1127 /* 1128 * Enable emulator trace info. 1129 */ 1130 void dvmEmulatorTraceStart() 1131 { 1132 /* If we could not map the emulator trace page, then do not enable tracing */ 1133 if (gDvm.emulatorTracePage == NULL) 1134 return; 1135 1136 /* in theory we should make this an atomic inc; in practice not important */ 1137 gDvm.emulatorTraceEnableCount++; 1138 if (gDvm.emulatorTraceEnableCount == 1) 1139 ALOGD("--- emulator method traces enabled"); 1140 updateActiveProfilers(kSubModeEmulatorTrace, true); 1141 } 1142 1143 /* 1144 * Disable emulator trace info. 1145 */ 1146 void dvmEmulatorTraceStop() 1147 { 1148 if (gDvm.emulatorTraceEnableCount == 0) { 1149 ALOGE("ERROR: emulator tracing not enabled"); 1150 return; 1151 } 1152 /* in theory we should make this an atomic inc; in practice not important */ 1153 gDvm.emulatorTraceEnableCount--; 1154 if (gDvm.emulatorTraceEnableCount == 0) 1155 ALOGD("--- emulator method traces disabled"); 1156 updateActiveProfilers(kSubModeEmulatorTrace, 1157 (gDvm.emulatorTraceEnableCount != 0)); 1158 } 1159 1160 1161 /* 1162 * Start instruction counting. 1163 */ 1164 void dvmStartInstructionCounting() 1165 { 1166 /* in theory we should make this an atomic inc; in practice not important */ 1167 gDvm.instructionCountEnableCount++; 1168 updateActiveProfilers(kSubModeInstCounting, true); 1169 } 1170 1171 /* 1172 * Stop instruction counting. 1173 */ 1174 void dvmStopInstructionCounting() 1175 { 1176 if (gDvm.instructionCountEnableCount == 0) { 1177 ALOGE("ERROR: instruction counting not enabled"); 1178 dvmAbort(); 1179 } 1180 gDvm.instructionCountEnableCount--; 1181 updateActiveProfilers(kSubModeInstCounting, 1182 (gDvm.instructionCountEnableCount != 0)); 1183 } 1184 1185 1186 /* 1187 * Start alloc counting. Note this doesn't affect the "active profilers" 1188 * count, since the interpreter loop is not involved. 1189 */ 1190 void dvmStartAllocCounting() 1191 { 1192 gDvm.allocProf.enabled = true; 1193 } 1194 1195 /* 1196 * Stop alloc counting. 1197 */ 1198 void dvmStopAllocCounting() 1199 { 1200 gDvm.allocProf.enabled = false; 1201 } 1202