1 /* 2 * Copyright (c) 2015, The Android Open Source Project 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * * Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * * Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer 12 * in the documentation and/or other materials provided with the 13 * distribution. 14 * * Neither the name of Google, Inc. nor the names of its contributors 15 * may be used to endorse or promote products derived from this 16 * software without specific prior written permission. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 21 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 22 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 24 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 25 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 26 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 27 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 28 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 #include <binder/IBinder.h> 33 #include <binder/IServiceManager.h> 34 #include <binder/Parcel.h> 35 36 #include <ctime> 37 #include <cutils/properties.h> 38 #include <signal.h> 39 #include <stdbool.h> 40 #include <stdio.h> 41 #include <stdlib.h> 42 #include <string.h> 43 44 #include <sys/resource.h> 45 #include <sys/stat.h> 46 #include <sys/time.h> 47 #include <sys/types.h> 48 #include <unistd.h> 49 50 #include <utils/Log.h> 51 #include <utils/String8.h> 52 #include <utils/Trace.h> 53 #include <zlib.h> 54 55 using namespace android; 56 57 #ifdef LOG_TAG 58 #undef LOG_TAG 59 #endif 60 61 #define LOG_TAG "anrdaemon" 62 63 static const int check_period = 1; // in sec 64 static const int tracing_check_period = 500000; // in micro sec 65 static const int cpu_stat_entries = 7; // number of cpu stat entries 66 static const int min_buffer_size = 16; 67 static const int max_buffer_size = 2048; 68 static const char *min_buffer_size_str = "16"; 69 static const char *max_buffer_size_str = "2048"; 70 static const int time_buf_size = 20; 71 static const int path_buf_size = 60; 72 73 typedef struct cpu_stat { 74 unsigned long utime, ntime, stime, itime; 75 unsigned long iowtime, irqtime, sirqtime, steal; 76 unsigned long total; 77 } cpu_stat_t; 78 79 /* 80 * Logging on/off threshold. 81 * Uint: 0.01%; default to 99.90% cpu. 82 */ 83 static int idle_threshold = 10; 84 85 static bool quit = false; 86 static bool suspend= false; 87 static bool dump_requested = false; 88 static bool err = false; 89 static char err_msg[100]; 90 static bool tracing = false; 91 92 static const char *buf_size_kb = "2048"; 93 static const char *apps = ""; 94 static uint64_t tag = 0; 95 96 static cpu_stat_t new_cpu; 97 static cpu_stat_t old_cpu; 98 99 /* Log certain kernel activity when enabled */ 100 static bool log_sched = false; 101 static bool log_stack = false; 102 static bool log_irq = false; 103 static bool log_sync = false; 104 static bool log_workq = false; 105 106 /* Paths for debugfs controls*/ 107 static const char* dfs_trace_output_path = 108 "/d/tracing/trace"; 109 static const char* dfs_irq_path = 110 "/d/tracing/events/irq/enable"; 111 static const char* dfs_sync_path = 112 "/d/tracing/events/sync/enable"; 113 static const char* dfs_workq_path = 114 "/d/tracing/events/workqueue/enable"; 115 static const char* dfs_stack_path = 116 "/d/tracing/options/stacktrace"; 117 static const char* dfs_sched_switch_path = 118 "/d/tracing/events/sched/sched_switch/enable"; 119 static const char* dfs_sched_wakeup_path = 120 "/d/tracing/events/sched/sched_wakeup/enable"; 121 static const char* dfs_control_path = 122 "/d/tracing/tracing_on"; 123 static const char* dfs_buffer_size_path = 124 "/d/tracing/buffer_size_kb"; 125 static const char* dfs_tags_property = "debug.atrace.tags.enableflags"; 126 static const char* dfs_apps_property = "debug.atrace.app_cmdlines"; 127 128 /* 129 * Read accumulated cpu data from /proc/stat 130 */ 131 static void get_cpu_stat(cpu_stat_t *cpu) { 132 FILE *fp = NULL; 133 const char *params = "cpu %lu %lu %lu %lu %lu %lu %lu %*d %*d %*d\n"; 134 135 if ((fp = fopen("/proc/stat", "r")) == NULL) { 136 err = true; 137 snprintf(err_msg, sizeof(err_msg), "can't read from /proc/stat with errno %d", errno); 138 } else { 139 if (fscanf(fp, params, &cpu->utime, &cpu->ntime, 140 &cpu->stime, &cpu->itime, &cpu->iowtime, &cpu->irqtime, 141 &cpu->sirqtime) != cpu_stat_entries) { 142 /* 143 * If failed in getting status, new_cpu won't be updated and 144 * is_heavy_loaded() will return false. 145 */ 146 ALOGE("Error in getting cpu status. Skipping this check."); 147 fclose(fp); 148 return; 149 } 150 151 cpu->total = cpu->utime + cpu->ntime + cpu->stime + cpu->itime 152 + cpu->iowtime + cpu->irqtime + cpu->sirqtime; 153 154 fclose(fp); 155 } 156 } 157 158 /* 159 * Calculate cpu usage in the past interval. 160 * If tracing is on, increase the idle threshold by 1.00% so that we do not 161 * turn on and off tracing frequently when the cpu load is right close to 162 * threshold. 163 */ 164 static bool is_heavy_load(void) { 165 unsigned long diff_idle, diff_total; 166 int threshold = idle_threshold + (tracing?100:0); 167 get_cpu_stat(&new_cpu); 168 diff_idle = new_cpu.itime - old_cpu.itime; 169 diff_total = new_cpu.total - old_cpu.total; 170 old_cpu = new_cpu; 171 return (diff_idle * 10000 < diff_total * threshold); 172 } 173 174 /* 175 * Force the userland processes to refresh their property for logging. 176 */ 177 static void dfs_poke_binder(void) { 178 sp<IServiceManager> sm = defaultServiceManager(); 179 Vector<String16> services = sm->listServices(); 180 for (size_t i = 0; i < services.size(); i++) { 181 sp<IBinder> obj = sm->checkService(services[i]); 182 if (obj != NULL) { 183 Parcel data; 184 obj->transact(IBinder::SYSPROPS_TRANSACTION, data, NULL, 0); 185 } 186 } 187 } 188 189 /* 190 * Enable/disable a debugfs property by writing 0/1 to its path. 191 */ 192 static int dfs_enable(bool enable, const char* path) { 193 int fd = open(path, O_WRONLY); 194 if (fd == -1) { 195 err = true; 196 snprintf(err_msg, sizeof(err_msg), "Can't open %s. Error: %d", path, errno); 197 return -1; 198 } 199 const char* control = (enable?"1":"0"); 200 ssize_t len = strlen(control); 201 int max_try = 10; // Fail if write was interrupted for 10 times 202 while (write(fd, control, len) != len) { 203 if (errno == EINTR && max_try-- > 0) { 204 usleep(100); 205 continue; 206 } 207 208 err = true; 209 snprintf(err_msg, sizeof(err_msg), "Error %d in writing to %s.", errno, path); 210 } 211 close(fd); 212 return (err?-1:0); 213 } 214 215 /* 216 * Set the userland tracing properties. 217 */ 218 static void dfs_set_property(uint64_t mtag, const char* mapp, bool enable) { 219 char buf[64]; 220 snprintf(buf, sizeof(buf), "%#" PRIx64, mtag); 221 if (property_set(dfs_tags_property, buf) < 0) { 222 err = true; 223 snprintf(err_msg, sizeof(err_msg), "Failed to set debug tags system properties."); 224 } 225 226 if (strlen(mapp) > 0 227 && property_set(dfs_apps_property, mapp) < 0) { 228 err = true; 229 snprintf(err_msg, sizeof(err_msg), "Failed to set debug applications."); 230 } 231 232 if (log_sched) { 233 dfs_enable(enable, dfs_sched_switch_path); 234 dfs_enable(enable, dfs_sched_wakeup_path); 235 } 236 if (log_stack) { 237 dfs_enable(enable, dfs_stack_path); 238 } 239 if (log_irq) { 240 dfs_enable(enable, dfs_irq_path); 241 } 242 if (log_sync) { 243 dfs_enable(enable, dfs_sync_path); 244 } 245 if (log_workq) { 246 dfs_enable(enable, dfs_workq_path); 247 } 248 } 249 250 /* 251 * Dump the log in a compressed format for systrace to visualize. 252 * Create a dump file "dump_of_anrdaemon.<current_time>" under /data/misc/anrd 253 */ 254 static void dump_trace() 255 { 256 time_t now = time(0); 257 struct tm tstruct; 258 char time_buf[time_buf_size]; 259 char path_buf[path_buf_size]; 260 const char* header = " done\nTRACE:\n"; 261 ssize_t header_len = strlen(header); 262 263 ALOGI("Started to dump ANRdaemon trace."); 264 265 tstruct = *localtime(&now); 266 strftime(time_buf, time_buf_size, "%Y-%m-%d.%X", &tstruct); 267 snprintf(path_buf, path_buf_size, "/data/misc/anrd/dump_of_anrdaemon.%s", time_buf); 268 int output_fd = creat(path_buf, S_IRWXU); 269 if (output_fd == -1) { 270 ALOGE("Failed to create %s. Dump aborted.", path_buf); 271 return; 272 } 273 274 if (write(output_fd, header, strlen(header)) != header_len) { 275 ALOGE("Failed to write the header."); 276 close(output_fd); 277 return; 278 } 279 280 int trace_fd = open(dfs_trace_output_path, O_RDWR); 281 if (trace_fd == -1) { 282 ALOGE("Failed to open %s. Dump aborted.", dfs_trace_output_path); 283 close(output_fd); 284 return; 285 } 286 287 z_stream zs; 288 uint8_t *in, *out; 289 int result, flush; 290 291 memset(&zs, 0, sizeof(zs)); 292 result = deflateInit(&zs, Z_DEFAULT_COMPRESSION); 293 if (result != Z_OK) { 294 ALOGE("error initializing zlib: %d\n", result); 295 close(trace_fd); 296 close(output_fd); 297 return; 298 } 299 300 const size_t bufSize = 64*1024; 301 in = (uint8_t*)malloc(bufSize); 302 out = (uint8_t*)malloc(bufSize); 303 flush = Z_NO_FLUSH; 304 305 zs.next_out = out; 306 zs.avail_out = bufSize; 307 308 do { 309 if (zs.avail_in == 0) { 310 result = read(trace_fd, in, bufSize); 311 if (result < 0) { 312 ALOGE("error reading trace: %s", strerror(errno)); 313 result = Z_STREAM_END; 314 break; 315 } else if (result == 0) { 316 flush = Z_FINISH; 317 } else { 318 zs.next_in = in; 319 zs.avail_in = result; 320 } 321 } 322 323 if (zs.avail_out == 0) { 324 result = write(output_fd, out, bufSize); 325 if ((size_t)result < bufSize) { 326 ALOGE("error writing deflated trace: %s", strerror(errno)); 327 result = Z_STREAM_END; 328 zs.avail_out = bufSize; 329 break; 330 } 331 zs.next_out = out; 332 zs.avail_out = bufSize; 333 } 334 335 } while ((result = deflate(&zs, flush)) == Z_OK); 336 337 if (result != Z_STREAM_END) { 338 ALOGE("error deflating trace: %s\n", zs.msg); 339 } 340 341 if (zs.avail_out < bufSize) { 342 size_t bytes = bufSize - zs.avail_out; 343 result = write(output_fd, out, bytes); 344 if ((size_t)result < bytes) { 345 ALOGE("error writing deflated trace: %s", strerror(errno)); 346 } 347 } 348 349 result = deflateEnd(&zs); 350 if (result != Z_OK) { 351 ALOGE("error cleaning up zlib: %d\n", result); 352 } 353 354 free(in); 355 free(out); 356 357 close(trace_fd); 358 close(output_fd); 359 360 ALOGI("Finished dump. Output file stored at: %s", path_buf); 361 } 362 363 /* 364 * Start logging when cpu usage is high. Meanwhile, moniter the cpu usage and 365 * stop logging when it drops down. 366 */ 367 static void start_tracing(void) { 368 ALOGD("High cpu usage, start logging."); 369 370 if (dfs_enable(true, dfs_control_path) != 0) { 371 ALOGE("Failed to start tracing."); 372 return; 373 } 374 tracing = true; 375 376 /* Stop logging when cpu usage drops or the daemon is suspended.*/ 377 do { 378 usleep(tracing_check_period); 379 } while (!suspend && !dump_requested && is_heavy_load()); 380 381 if (dfs_enable(false, dfs_control_path) != 0) { 382 ALOGE("Failed to stop tracing."); 383 err = true; 384 return; 385 } 386 tracing = false; 387 388 if (suspend) { 389 ALOGI("trace stopped due to suspend. Send SIGCONT to resume."); 390 } else if (dump_requested) { 391 ALOGI("trace stopped due to dump request."); 392 dump_trace(); 393 dump_requested = false; 394 } else { 395 ALOGD("Usage back to low, stop logging."); 396 } 397 } 398 399 /* 400 * Set the tracing log buffer size. 401 * Note the actual buffer size will be buf_size_kb * number of cores. 402 */ 403 static int set_tracing_buffer_size(void) { 404 int fd = open(dfs_buffer_size_path, O_WRONLY); 405 if (fd == -1) { 406 err = true; 407 snprintf(err_msg, sizeof(err_msg), "Can't open atrace buffer size file under /d/tracing."); 408 return -1; 409 } 410 ssize_t len = strlen(buf_size_kb); 411 if (write(fd, buf_size_kb, len) != len) { 412 err = true; 413 snprintf(err_msg, sizeof(err_msg), "Error in writing to atrace buffer size file."); 414 } 415 close(fd); 416 return (err?-1:0); 417 418 } 419 420 /* 421 * Main loop to moniter the cpu usage and decided whether to start logging. 422 */ 423 static void start(void) { 424 if ((set_tracing_buffer_size()) != 0) 425 return; 426 427 dfs_set_property(tag, apps, true); 428 dfs_poke_binder(); 429 430 get_cpu_stat(&old_cpu); 431 sleep(check_period); 432 433 while (!quit && !err) { 434 if (!suspend && is_heavy_load()) { 435 /* 436 * Increase process priority to make sure we can stop logging when 437 * necessary and do not overwrite the buffer 438 */ 439 setpriority(PRIO_PROCESS, 0, -20); 440 start_tracing(); 441 setpriority(PRIO_PROCESS, 0, 0); 442 } 443 sleep(check_period); 444 } 445 return; 446 } 447 448 /* 449 * If trace is not running, dump trace right away. 450 * If trace is running, request to dump trace. 451 */ 452 static void request_dump_trace() 453 { 454 if (!tracing) { 455 dump_trace(); 456 } else if (!dump_requested) { 457 dump_requested = true; 458 } 459 } 460 461 static void handle_signal(int signo) 462 { 463 switch (signo) { 464 case SIGQUIT: 465 suspend = true; 466 quit = true; 467 break; 468 case SIGSTOP: 469 suspend = true; 470 break; 471 case SIGCONT: 472 suspend = false; 473 break; 474 case SIGUSR1: 475 request_dump_trace(); 476 } 477 } 478 479 /* 480 * Set the signal handler: 481 * SIGQUIT: Reset debugfs and tracing property and terminate the daemon. 482 * SIGSTOP: Stop logging and suspend the daemon. 483 * SIGCONT: Resume the daemon as normal. 484 * SIGUSR1: Dump the logging to a compressed format for systrace to visualize. 485 */ 486 static void register_sighandler(void) 487 { 488 struct sigaction sa; 489 sigset_t block_mask; 490 491 sigemptyset(&block_mask); 492 sigaddset (&block_mask, SIGQUIT); 493 sigaddset (&block_mask, SIGSTOP); 494 sigaddset (&block_mask, SIGCONT); 495 sigaddset (&block_mask, SIGUSR1); 496 497 sa.sa_flags = 0; 498 sa.sa_mask = block_mask; 499 sa.sa_handler = handle_signal; 500 sigaction(SIGQUIT, &sa, NULL); 501 sigaction(SIGSTOP, &sa, NULL); 502 sigaction(SIGCONT, &sa, NULL); 503 sigaction(SIGUSR1, &sa, NULL); 504 } 505 506 static void show_help(void) { 507 508 fprintf(stderr, "usage: ANRdaemon [options] [categoris...]\n"); 509 fprintf(stdout, "Options includes:\n" 510 " -a appname enable app-level tracing for a comma " 511 "separated list of cmdlines\n" 512 " -t N cpu threshold for logging to start " 513 "(uint = 0.01%%, min = 5000, max = 9999, default = 9990)\n" 514 " -s N use a trace buffer size of N KB " 515 "default to 2048KB\n" 516 " -h show helps\n"); 517 fprintf(stdout, "Categoris includes:\n" 518 " am - activity manager\n" 519 " sm - sync manager\n" 520 " input - input\n" 521 " dalvik - dalvik VM\n" 522 " audio - Audio\n" 523 " gfx - Graphics\n" 524 " rs - RenderScript\n" 525 " hal - Hardware Modules\n" 526 " irq - kernel irq events\n" 527 " sched - kernel scheduler activity\n" 528 " stack - kernel stack\n" 529 " sync - kernel sync activity\n" 530 " workq - kernel work queues\n"); 531 fprintf(stdout, "Control includes:\n" 532 " SIGQUIT: terminate the process\n" 533 " SIGSTOP: suspend all function of the daemon\n" 534 " SIGCONT: resume the normal function\n" 535 " SIGUSR1: dump the current logging in a compressed form\n"); 536 exit(0); 537 } 538 539 static int get_options(int argc, char *argv[]) { 540 int opt = 0; 541 int threshold; 542 while ((opt = getopt(argc, argv, "a:s:t:h")) >= 0) { 543 switch(opt) { 544 case 'a': 545 apps = optarg; 546 break; 547 case 's': 548 if (atoi(optarg) > max_buffer_size) 549 buf_size_kb = max_buffer_size_str; 550 else if (atoi(optarg) < min_buffer_size) 551 buf_size_kb = min_buffer_size_str; 552 else 553 buf_size_kb = optarg; 554 break; 555 case 't': 556 threshold = atoi(optarg); 557 if (threshold > 9999 || threshold < 5000) { 558 fprintf(stderr, "logging threshold should be 5000-9999\n"); 559 return 1; 560 } 561 idle_threshold = 10000 - threshold; 562 break; 563 case 'h': 564 show_help(); 565 break; 566 default: 567 fprintf(stderr, "Error in getting options.\n" 568 "run \"%s -h\" for usage.\n", argv[0]); 569 return 1; 570 } 571 } 572 573 for (int i = optind; i < argc; i++) { 574 if (strcmp(argv[i], "am") == 0) { 575 tag |= ATRACE_TAG_ACTIVITY_MANAGER; 576 } else if (strcmp(argv[i], "input") == 0) { 577 tag |= ATRACE_TAG_INPUT; 578 } else if (strcmp(argv[i], "sm") == 0) { 579 tag |= ATRACE_TAG_SYNC_MANAGER; 580 } else if (strcmp(argv[i], "dalvik") == 0) { 581 tag |= ATRACE_TAG_DALVIK; 582 } else if (strcmp(argv[i], "gfx") == 0) { 583 tag |= ATRACE_TAG_GRAPHICS; 584 } else if (strcmp(argv[i], "audio") == 0) { 585 tag |= ATRACE_TAG_AUDIO; 586 } else if (strcmp(argv[i], "hal") == 0) { 587 tag |= ATRACE_TAG_HAL; 588 } else if (strcmp(argv[i], "rs") == 0) { 589 tag |= ATRACE_TAG_RS; 590 } else if (strcmp(argv[i], "sched") == 0) { 591 log_sched = true; 592 } else if (strcmp(argv[i], "stack") == 0) { 593 log_stack = true; 594 } else if (strcmp(argv[i], "workq") == 0) { 595 log_workq = true; 596 } else if (strcmp(argv[i], "irq") == 0) { 597 log_irq = true; 598 } else if (strcmp(argv[i], "sync") == 0) { 599 log_sync = true; 600 } else { 601 fprintf(stderr, "invalid category: %s\n" 602 "run \"%s -h\" for usage.\n", argv[i], argv[0]); 603 return 1; 604 } 605 } 606 607 /* If nothing is enabled, don't run */ 608 if (!tag && !log_sched && !log_stack && !log_workq && !log_irq && !log_sync) { 609 ALOGE("Specify at least one category to trace."); 610 return 1; 611 } 612 613 return 0; 614 } 615 616 int main(int argc, char *argv[]) 617 { 618 if(get_options(argc, argv) != 0) 619 return 1; 620 621 if (daemon(0, 0) != 0) 622 return 1; 623 624 register_sighandler(); 625 626 /* Clear any the trace log file by overwrite it with a new file */ 627 int fd = creat(dfs_trace_output_path, 0); 628 if (fd == -1) { 629 ALOGE("Faield to open and cleaup previous log"); 630 return 1; 631 } 632 close(fd); 633 634 ALOGI("ANRdaemon starting"); 635 start(); 636 637 if (err) 638 ALOGE("ANRdaemon stopped due to Error: %s", err_msg); 639 640 ALOGI("ANRdaemon terminated."); 641 642 return (err?1:0); 643 } 644