Home | History | Annotate | Download | only in include
      1 /*
      2  * Copyright (C) 2018 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #ifndef ANDROID_ML_NN_COMMON_TRACING_H
     18 #define ANDROID_ML_NN_COMMON_TRACING_H
     19 
     20 #define ATRACE_TAG ATRACE_TAG_NNAPI
     21 #include "utils/Trace.h"
     22 
     23 // Neural Networks API (NNAPI) systracing
     24 //
     25 // Primary goal of the tracing is to capture and present timings for NNAPI.
     26 // (Other uses include providing visibility to split of execution between
     27 // drivers and the CPU fallback, and the ability to visualize call sequences).
     28 //
     29 // The tracing has three parts:
     30 //  1 Trace macros defined in this file and used throughout the codebase,
     31 //    modelled after and using atrace. These implement a naming convention for
     32 //    the tracepoints, interpreted by the systrace parser.
     33 //  2 Android systrace (atrace) on-device capture and host-based analysis.
     34 //  3 A systrace parser (TODO) to summarize the timings.
     35 //
     36 // For an overview and introduction, please refer to the "NNAPI Systrace design
     37 // and HOWTO" (internal Docs for now). This header doesn't try to replicate all
     38 // the information in that document. For the contract between traces in code and
     39 // the statistics created by the systrace parser, see
     40 // tools/systrace-parser/contract-between-code-and-parser.txt.
     41 //
     42 // Glossary:
     43 // - Phase: stage in processing (e.g., Preparation, Compilation, Execution);
     44 //   Overall phase nests rest, Execution nests Input/Output, Transformation,
     45 //   Computation and Results; optionally Executions can be nested in a
     46 //   Warmup and Benchmark - otherwise not nested (Initialization phase
     47 //   functions may occur inside other phases but will be counted out during
     48 //   analysis). Nested phases (other than Initialization) are analysed as a
     49 //   breakdown of the parent phase.
     50 // - Layer: component in the stack (from top to bottom: App, Runtime, IPC,
     51 //   Driver/CPU). Calls to lower layers are typically nested within calls to upper
     52 //   layers.
     53 // - Bucket: unit of timing analysis, the combination of Phase and Layer (and
     54 //   thus also typically nested).
     55 // - Detail: specific unit being executed, typically a function.
     56 
     57 // Convenience macros to be used in the code (phases defined below).
     58 // (Macros so that string concatenation is done at compile time).
     59 //
     60 // These exist in three variants:
     61 // - Simple (NNTRACE_<layer and potentially phase>) - to be used when only one
     62 //   Phase is active within a scope
     63 // - "Switch" (NNTRACE_<...>_SWITCH) - to be used when multiple Phases
     64 //   share a scope (e.g., transformation of data and computation in same
     65 //   function).
     66 // - "Subtract" (NNTRACE_<...>_SUBTRACT) - to be used when nesting is violated
     67 //   and the time should be subtracted from the parent scope
     68 // Arguments:
     69 // - phase: one of the NNTRACE_PHASE_* macros defined below.
     70 // - detail: free-form string constant, typically function name.
     71 // Example usage:
     72 //   // Simple
     73 //   int ANeuralNetworksMemory_createFromFd(...) {
     74 //     NNTRACE_RT(NNTRACE_PHASE_PREPARATION, "ANeuralNetworksMemory_createFromFd");
     75 //   }
     76 //   // Switch
     77 //   bool concatenationFloat32(...) {
     78 //     NNTRACE_TRANS("concatenationFloat32");  // Transformation of data begins
     79 //     ...
     80 //     NNTRACE_COMP_SWITCH("optimized_ops::Concatenation"); // Transformation
     81 //                                                          // ends and computation
     82 //                                                          // begins
     83 //   }
     84 //   // Subtract
     85 //   static int compile(...) {
     86 //     NNTRACE_FULL(NNTRACE_LAYER_IPC, NNTRACE_PHASE_COMPILATION, "prepareModel");
     87 //     device->getInterface()->prepareModel(..., preparedModelCallback);
     88 //     preparedModelCallback->wait()
     89 //   }
     90 //   ErrorStatus VersionedIDevice::prepareModel(...) {
     91 //     ... IPC work ...
     92 //     {
     93 //       NNTRACE_FULL_SUBTRACT(NNTRACE_LAYER_RUNTIME, NNTRACE_PHASE_COMPILATION,
     94 //                             "VersionedIDevice::prepareModel");
     95 //       ... Runtime work ...
     96 //     }
     97 //     ... IPC work ...
     98 //   }
     99 //
    100 // Layer Application - For native applications (e.g., unit tests)
    101 #define NNTRACE_APP(phase, detail) NNTRACE_FULL(NNTRACE_LAYER_APPLICATION, phase, detail)
    102 #define NNTRACE_APP_SWITCH(phase, detail) \
    103         NNTRACE_FULL_SWITCH(NNTRACE_LAYER_APPLICATION, phase, detail)
    104 // Layer Runtime - For the NNAPI runtime
    105 #define NNTRACE_RT(phase, detail) NNTRACE_FULL(NNTRACE_LAYER_RUNTIME, phase, detail)
    106 #define NNTRACE_RT_SWITCH(phase, detail) NNTRACE_FULL_SWITCH(NNTRACE_LAYER_RUNTIME, phase, detail)
    107 // Layer CPU - CPU executor
    108 #define NNTRACE_CPU(phase, detail) NNTRACE_FULL(NNTRACE_LAYER_CPU, phase, detail)
    109 #define NNTRACE_COMP(detail) NNTRACE_FULL(NNTRACE_LAYER_CPU, \
    110                                           NNTRACE_PHASE_COMPUTATION, detail)
    111 #define NNTRACE_COMP_SWITCH(detail) NNTRACE_FULL_SWITCH(NNTRACE_LAYER_CPU, \
    112                                                         NNTRACE_PHASE_COMPUTATION, detail)
    113 #define NNTRACE_TRANS(detail) NNTRACE_FULL(NNTRACE_LAYER_CPU, \
    114                                            NNTRACE_PHASE_TRANSFORMATION, detail)
    115 
    116 // Fully specified macros to be used when no convenience wrapper exists for your
    117 // need.
    118 #define NNTRACE_FULL(layer, phase, detail) NNTRACE_NAME_1(("[NN_" layer "_" phase "]" detail))
    119 #define NNTRACE_FULL_SWITCH(layer, phase, detail) \
    120         NNTRACE_NAME_SWITCH(("[SW][NN_" layer "_" phase "]" detail))
    121 #define NNTRACE_FULL_SUBTRACT(layer, phase, detail) \
    122         NNTRACE_NAME_1(("[SUB][NN_" layer "_" phase "]" detail))
    123 // Raw macro without scoping requirements, for special cases
    124 #define NNTRACE_FULL_RAW(layer, phase, detail) android::ScopedTrace PASTE(___tracer, __LINE__) \
    125         (ATRACE_TAG, ("[NN_" layer "_" phase "]" detail))
    126 
    127 // Tracing buckets - for calculating timing summaries over.
    128 //
    129 // Application-only phases
    130 #define NNTRACE_PHASE_OVERALL   "PO"    // Overall program, e.g., one benchmark case
    131 #define NNTRACE_PHASE_WARMUP    "PWU"   // Warmup (nesting multiple executions)
    132 #define NNTRACE_PHASE_BENCHMARK "PBM"   // Benchmark (nesting multiple executions)
    133 // Main phases, usable by all layers
    134 #define NNTRACE_PHASE_INITIALIZATION "PI" // Initialization - not related to a model
    135 #define NNTRACE_PHASE_PREPARATION "PP"  // Model construction
    136 #define NNTRACE_PHASE_COMPILATION "PC"  // Model compilation
    137 #define NNTRACE_PHASE_EXECUTION "PE"    // Executing the model
    138 #define NNTRACE_PHASE_TERMINATION "PT"  // Tearing down
    139 #define NNTRACE_PHASE_UNSPECIFIED "PU"  // Helper code called from multiple phases
    140 // Subphases of execution
    141 #define NNTRACE_PHASE_INPUTS_AND_OUTPUTS "PIO"  // Setting inputs/outputs and allocating buffers
    142 #define NNTRACE_PHASE_TRANSFORMATION "PTR"      // Transforming data for computation
    143 #define NNTRACE_PHASE_COMPUTATION "PCO"         // Computing operations' outputs
    144 #define NNTRACE_PHASE_RESULTS "PR"              // Reading out results
    145 // Layers
    146 #define NNTRACE_LAYER_APPLICATION "LA"
    147 #define NNTRACE_LAYER_RUNTIME "LR"
    148 #define NNTRACE_LAYER_IPC "LI"
    149 #define NNTRACE_LAYER_DRIVER "LD"
    150 #define NNTRACE_LAYER_CPU "LC"
    151 #define NNTRACE_LAYER_OTHER "LO"
    152 #define NNTRACE_LAYER_UTILITY "LU"              // Code used from multiple layers
    153 
    154 
    155 // Implementation
    156 //
    157 // Almost same as ATRACE_NAME, but enforcing explicit distinction between
    158 // phase-per-scope and switching phases.
    159 //
    160 // Basic trace, one per scope allowed to enforce disjointness
    161 #define NNTRACE_NAME_1(name) android::ScopedTrace ___tracer_1(ATRACE_TAG, name)
    162 // Switching trace, more than one per scope allowed, translated by
    163 // systrace_parser.py. This is mainly useful for tracing multiple phases through
    164 // one function / scope.
    165 #define NNTRACE_NAME_SWITCH(name) android::ScopedTrace PASTE(___tracer, __LINE__) \
    166         (ATRACE_TAG, name); \
    167         (void)___tracer_1  // ensure switch is only used after a basic trace
    168 
    169 
    170 // Disallow use of raw ATRACE macros
    171 #undef ATRACE_NAME
    172 #undef ATRACE_CALL
    173 
    174 #endif // ANDROID_ML_NN_COMMON_TRACING_H
    175