Home | History | Annotate | Download | only in kernel
      1 /*
      2  * Performance events:
      3  *
      4  *    Copyright (C) 2008-2009, Thomas Gleixner <tglx (at) linutronix.de>
      5  *    Copyright (C) 2008-2011, Red Hat, Inc., Ingo Molnar
      6  *    Copyright (C) 2008-2011, Red Hat, Inc., Peter Zijlstra
      7  *
      8  * Data type definitions, declarations, prototypes.
      9  *
     10  *    Started by: Thomas Gleixner and Ingo Molnar
     11  *
     12  * For licencing details see kernel-base/COPYING
     13  */
     14 #ifndef _UAPI_LINUX_PERF_EVENT_H
     15 #define _UAPI_LINUX_PERF_EVENT_H
     16 
     17 #include <asm/byteorder.h>
     18 #include <linux/ioctl.h>
     19 #include <linux/types.h>
     20 
     21 /*
     22  * User-space ABI bits:
     23  */
     24 
     25 /*
     26  * attr.type
     27  */
     28 enum perf_type_id {
     29   PERF_TYPE_HARDWARE = 0,
     30   PERF_TYPE_SOFTWARE = 1,
     31   PERF_TYPE_TRACEPOINT = 2,
     32   PERF_TYPE_HW_CACHE = 3,
     33   PERF_TYPE_RAW = 4,
     34   PERF_TYPE_BREAKPOINT = 5,
     35 
     36   PERF_TYPE_MAX, /* non-ABI */
     37 };
     38 
     39 /*
     40  * Generalized performance event event_id types, used by the
     41  * attr.event_id parameter of the sys_perf_event_open()
     42  * syscall:
     43  */
     44 enum perf_hw_id {
     45   /*
     46    * Common hardware events, generalized by the kernel:
     47    */
     48   PERF_COUNT_HW_CPU_CYCLES = 0,
     49   PERF_COUNT_HW_INSTRUCTIONS = 1,
     50   PERF_COUNT_HW_CACHE_REFERENCES = 2,
     51   PERF_COUNT_HW_CACHE_MISSES = 3,
     52   PERF_COUNT_HW_BRANCH_INSTRUCTIONS = 4,
     53   PERF_COUNT_HW_BRANCH_MISSES = 5,
     54   PERF_COUNT_HW_BUS_CYCLES = 6,
     55   PERF_COUNT_HW_STALLED_CYCLES_FRONTEND = 7,
     56   PERF_COUNT_HW_STALLED_CYCLES_BACKEND = 8,
     57   PERF_COUNT_HW_REF_CPU_CYCLES = 9,
     58 
     59   PERF_COUNT_HW_MAX, /* non-ABI */
     60 };
     61 
     62 /*
     63  * Generalized hardware cache events:
     64  *
     65  *       { L1-D, L1-I, LLC, ITLB, DTLB, BPU, NODE } x
     66  *       { read, write, prefetch } x
     67  *       { accesses, misses }
     68  */
     69 enum perf_hw_cache_id {
     70   PERF_COUNT_HW_CACHE_L1D = 0,
     71   PERF_COUNT_HW_CACHE_L1I = 1,
     72   PERF_COUNT_HW_CACHE_LL = 2,
     73   PERF_COUNT_HW_CACHE_DTLB = 3,
     74   PERF_COUNT_HW_CACHE_ITLB = 4,
     75   PERF_COUNT_HW_CACHE_BPU = 5,
     76   PERF_COUNT_HW_CACHE_NODE = 6,
     77 
     78   PERF_COUNT_HW_CACHE_MAX, /* non-ABI */
     79 };
     80 
     81 enum perf_hw_cache_op_id {
     82   PERF_COUNT_HW_CACHE_OP_READ = 0,
     83   PERF_COUNT_HW_CACHE_OP_WRITE = 1,
     84   PERF_COUNT_HW_CACHE_OP_PREFETCH = 2,
     85 
     86   PERF_COUNT_HW_CACHE_OP_MAX, /* non-ABI */
     87 };
     88 
     89 enum perf_hw_cache_op_result_id {
     90   PERF_COUNT_HW_CACHE_RESULT_ACCESS = 0,
     91   PERF_COUNT_HW_CACHE_RESULT_MISS = 1,
     92 
     93   PERF_COUNT_HW_CACHE_RESULT_MAX, /* non-ABI */
     94 };
     95 
     96 /*
     97  * Special "software" events provided by the kernel, even if the hardware
     98  * does not support performance events. These events measure various
     99  * physical and sw events of the kernel (and allow the profiling of them as
    100  * well):
    101  */
    102 enum perf_sw_ids {
    103   PERF_COUNT_SW_CPU_CLOCK = 0,
    104   PERF_COUNT_SW_TASK_CLOCK = 1,
    105   PERF_COUNT_SW_PAGE_FAULTS = 2,
    106   PERF_COUNT_SW_CONTEXT_SWITCHES = 3,
    107   PERF_COUNT_SW_CPU_MIGRATIONS = 4,
    108   PERF_COUNT_SW_PAGE_FAULTS_MIN = 5,
    109   PERF_COUNT_SW_PAGE_FAULTS_MAJ = 6,
    110   PERF_COUNT_SW_ALIGNMENT_FAULTS = 7,
    111   PERF_COUNT_SW_EMULATION_FAULTS = 8,
    112   PERF_COUNT_SW_DUMMY = 9,
    113 
    114   PERF_COUNT_SW_MAX, /* non-ABI */
    115 };
    116 
    117 /*
    118  * Bits that can be set in attr.sample_type to request information
    119  * in the overflow packets.
    120  */
    121 enum perf_event_sample_format {
    122   PERF_SAMPLE_IP = 1U << 0,
    123   PERF_SAMPLE_TID = 1U << 1,
    124   PERF_SAMPLE_TIME = 1U << 2,
    125   PERF_SAMPLE_ADDR = 1U << 3,
    126   PERF_SAMPLE_READ = 1U << 4,
    127   PERF_SAMPLE_CALLCHAIN = 1U << 5,
    128   PERF_SAMPLE_ID = 1U << 6,
    129   PERF_SAMPLE_CPU = 1U << 7,
    130   PERF_SAMPLE_PERIOD = 1U << 8,
    131   PERF_SAMPLE_STREAM_ID = 1U << 9,
    132   PERF_SAMPLE_RAW = 1U << 10,
    133   PERF_SAMPLE_BRANCH_STACK = 1U << 11,
    134   PERF_SAMPLE_REGS_USER = 1U << 12,
    135   PERF_SAMPLE_STACK_USER = 1U << 13,
    136   PERF_SAMPLE_WEIGHT = 1U << 14,
    137   PERF_SAMPLE_DATA_SRC = 1U << 15,
    138   PERF_SAMPLE_IDENTIFIER = 1U << 16,
    139   PERF_SAMPLE_TRANSACTION = 1U << 17,
    140 
    141   PERF_SAMPLE_MAX = 1U << 18, /* non-ABI */
    142 };
    143 
    144 /*
    145  * values to program into branch_sample_type when PERF_SAMPLE_BRANCH is set
    146  *
    147  * If the user does not pass priv level information via branch_sample_type,
    148  * the kernel uses the event's priv level. Branch and event priv levels do
    149  * not have to match. Branch priv level is checked for permissions.
    150  *
    151  * The branch types can be combined, however BRANCH_ANY covers all types
    152  * of branches and therefore it supersedes all the other types.
    153  */
    154 enum perf_branch_sample_type {
    155   PERF_SAMPLE_BRANCH_USER = 1U << 0,   /* user branches */
    156   PERF_SAMPLE_BRANCH_KERNEL = 1U << 1, /* kernel branches */
    157   PERF_SAMPLE_BRANCH_HV = 1U << 2,     /* hypervisor branches */
    158 
    159   PERF_SAMPLE_BRANCH_ANY = 1U << 3,        /* any branch types */
    160   PERF_SAMPLE_BRANCH_ANY_CALL = 1U << 4,   /* any call branch */
    161   PERF_SAMPLE_BRANCH_ANY_RETURN = 1U << 5, /* any return branch */
    162   PERF_SAMPLE_BRANCH_IND_CALL = 1U << 6,   /* indirect calls */
    163   PERF_SAMPLE_BRANCH_ABORT_TX = 1U << 7,   /* transaction aborts */
    164   PERF_SAMPLE_BRANCH_IN_TX = 1U << 8,      /* in transaction */
    165   PERF_SAMPLE_BRANCH_NO_TX = 1U << 9,      /* not in transaction */
    166   PERF_SAMPLE_BRANCH_COND = 1U << 10,      /* conditional branches */
    167 
    168   PERF_SAMPLE_BRANCH_MAX = 1U << 11, /* non-ABI */
    169 };
    170 
    171 #define PERF_SAMPLE_BRANCH_PLM_ALL \
    172   (PERF_SAMPLE_BRANCH_USER | PERF_SAMPLE_BRANCH_KERNEL | PERF_SAMPLE_BRANCH_HV)
    173 
    174 /*
    175  * Values to determine ABI of the registers dump.
    176  */
    177 enum perf_sample_regs_abi {
    178   PERF_SAMPLE_REGS_ABI_NONE = 0,
    179   PERF_SAMPLE_REGS_ABI_32 = 1,
    180   PERF_SAMPLE_REGS_ABI_64 = 2,
    181 };
    182 
    183 /*
    184  * Values for the memory transaction event qualifier, mostly for
    185  * abort events. Multiple bits can be set.
    186  */
    187 enum {
    188   PERF_TXN_ELISION = (1 << 0),        /* From elision */
    189   PERF_TXN_TRANSACTION = (1 << 1),    /* From transaction */
    190   PERF_TXN_SYNC = (1 << 2),           /* Instruction is related */
    191   PERF_TXN_ASYNC = (1 << 3),          /* Instruction not related */
    192   PERF_TXN_RETRY = (1 << 4),          /* Retry possible */
    193   PERF_TXN_CONFLICT = (1 << 5),       /* Conflict abort */
    194   PERF_TXN_CAPACITY_WRITE = (1 << 6), /* Capacity write abort */
    195   PERF_TXN_CAPACITY_READ = (1 << 7),  /* Capacity read abort */
    196 
    197   PERF_TXN_MAX = (1 << 8), /* non-ABI */
    198 
    199   /* bits 32..63 are reserved for the abort code */
    200 
    201   PERF_TXN_ABORT_MASK = (0xffffffffULL << 32),
    202   PERF_TXN_ABORT_SHIFT = 32,
    203 };
    204 
    205 /*
    206  * The format of the data returned by read() on a perf event fd,
    207  * as specified by attr.read_format:
    208  *
    209  * struct read_format {
    210  *	{ u64		value;
    211  *	  { u64		time_enabled; } && PERF_FORMAT_TOTAL_TIME_ENABLED
    212  *	  { u64		time_running; } && PERF_FORMAT_TOTAL_TIME_RUNNING
    213  *	  { u64		id;           } && PERF_FORMAT_ID
    214  *	} && !PERF_FORMAT_GROUP
    215  *
    216  *	{ u64		nr;
    217  *	  { u64		time_enabled; } && PERF_FORMAT_TOTAL_TIME_ENABLED
    218  *	  { u64		time_running; } && PERF_FORMAT_TOTAL_TIME_RUNNING
    219  *	  { u64		value;
    220  *	    { u64	id;           } && PERF_FORMAT_ID
    221  *	  }		cntr[nr];
    222  *	} && PERF_FORMAT_GROUP
    223  * };
    224  */
    225 enum perf_event_read_format {
    226   PERF_FORMAT_TOTAL_TIME_ENABLED = 1U << 0,
    227   PERF_FORMAT_TOTAL_TIME_RUNNING = 1U << 1,
    228   PERF_FORMAT_ID = 1U << 2,
    229   PERF_FORMAT_GROUP = 1U << 3,
    230 
    231   PERF_FORMAT_MAX = 1U << 4, /* non-ABI */
    232 };
    233 
    234 #define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */
    235 #define PERF_ATTR_SIZE_VER1 72 /* add: config2 */
    236 #define PERF_ATTR_SIZE_VER2 80 /* add: branch_sample_type */
    237 #define PERF_ATTR_SIZE_VER3 96 /* add: sample_regs_user */
    238                                /* add: sample_stack_user */
    239 
    240 /*
    241  * Hardware event_id to monitor via a performance monitoring event:
    242  */
    243 struct perf_event_attr {
    244   /*
    245    * Major type: hardware/software/tracepoint/etc.
    246    */
    247   __u32 type;
    248 
    249   /*
    250    * Size of the attr structure, for fwd/bwd compat.
    251    */
    252   __u32 size;
    253 
    254   /*
    255    * Type specific configuration information.
    256    */
    257   __u64 config;
    258 
    259   union {
    260     __u64 sample_period;
    261     __u64 sample_freq;
    262   };
    263 
    264   __u64 sample_type;
    265   __u64 read_format;
    266 
    267   __u64 disabled : 1,     /* off by default        */
    268       inherit : 1,        /* children inherit it   */
    269       pinned : 1,         /* must always be on PMU */
    270       exclusive : 1,      /* only group on PMU     */
    271       exclude_user : 1,   /* don't count user      */
    272       exclude_kernel : 1, /* ditto kernel          */
    273       exclude_hv : 1,     /* ditto hypervisor      */
    274       exclude_idle : 1,   /* don't count when idle */
    275       mmap : 1,           /* include mmap data     */
    276       comm : 1,           /* include comm data     */
    277       freq : 1,           /* use freq, not period  */
    278       inherit_stat : 1,   /* per task counts       */
    279       enable_on_exec : 1, /* next exec enables     */
    280       task : 1,           /* trace fork/exit       */
    281       watermark : 1,      /* wakeup_watermark      */
    282       /*
    283        * precise_ip:
    284        *
    285        *  0 - SAMPLE_IP can have arbitrary skid
    286        *  1 - SAMPLE_IP must have constant skid
    287        *  2 - SAMPLE_IP requested to have 0 skid
    288        *  3 - SAMPLE_IP must have 0 skid
    289        *
    290        *  See also PERF_RECORD_MISC_EXACT_IP
    291        */
    292       precise_ip : 2,    /* skid constraint       */
    293       mmap_data : 1,     /* non-exec mmap data    */
    294       sample_id_all : 1, /* sample_type all events */
    295 
    296       exclude_host : 1,  /* don't count in host   */
    297       exclude_guest : 1, /* don't count in guest  */
    298 
    299       exclude_callchain_kernel : 1, /* exclude kernel callchains */
    300       exclude_callchain_user : 1,   /* exclude user callchains */
    301       mmap2 : 1,                    /* include mmap with inode data     */
    302       comm_exec : 1, /* flag comm events that are due to an exec */
    303       __reserved_1 : 39;
    304 
    305   union {
    306     __u32 wakeup_events;    /* wakeup every n events */
    307     __u32 wakeup_watermark; /* bytes before wakeup   */
    308   };
    309 
    310   __u32 bp_type;
    311   union {
    312     __u64 bp_addr;
    313     __u64 config1; /* extension of config */
    314   };
    315   union {
    316     __u64 bp_len;
    317     __u64 config2; /* extension of config1 */
    318   };
    319   __u64 branch_sample_type; /* enum perf_branch_sample_type */
    320 
    321   /*
    322    * Defines set of user regs to dump on samples.
    323    * See asm/perf_regs.h for details.
    324    */
    325   __u64 sample_regs_user;
    326 
    327   /*
    328    * Defines size of the user stack to dump on samples.
    329    */
    330   __u32 sample_stack_user;
    331 
    332   /* Align to u64. */
    333   __u32 __reserved_2;
    334 };
    335 
    336 #define perf_flags(attr) (*(&(attr)->read_format + 1))
    337 
    338 /*
    339  * Ioctls that can be done on a perf event fd:
    340  */
    341 #define PERF_EVENT_IOC_ENABLE _IO('$', 0)
    342 #define PERF_EVENT_IOC_DISABLE _IO('$', 1)
    343 #define PERF_EVENT_IOC_REFRESH _IO('$', 2)
    344 #define PERF_EVENT_IOC_RESET _IO('$', 3)
    345 #define PERF_EVENT_IOC_PERIOD _IOW('$', 4, __u64)
    346 #define PERF_EVENT_IOC_SET_OUTPUT _IO('$', 5)
    347 #define PERF_EVENT_IOC_SET_FILTER _IOW('$', 6, char *)
    348 #define PERF_EVENT_IOC_ID _IOR('$', 7, __u64 *)
    349 
    350 enum perf_event_ioc_flags {
    351   PERF_IOC_FLAG_GROUP = 1U << 0,
    352 };
    353 
    354 /*
    355  * Structure of the page that can be mapped via mmap
    356  */
    357 struct perf_event_mmap_page {
    358   __u32 version;        /* version number of this structure */
    359   __u32 compat_version; /* lowest version this is compat with */
    360 
    361   /*
    362    * Bits needed to read the hw events in user-space.
    363    *
    364    *   u32 seq, time_mult, time_shift, idx, width;
    365    *   u64 count, enabled, running;
    366    *   u64 cyc, time_offset;
    367    *   s64 pmc = 0;
    368    *
    369    *   do {
    370    *     seq = pc->lock;
    371    *     barrier()
    372    *
    373    *     enabled = pc->time_enabled;
    374    *     running = pc->time_running;
    375    *
    376    *     if (pc->cap_usr_time && enabled != running) {
    377    *       cyc = rdtsc();
    378    *       time_offset = pc->time_offset;
    379    *       time_mult   = pc->time_mult;
    380    *       time_shift  = pc->time_shift;
    381    *     }
    382    *
    383    *     idx = pc->index;
    384    *     count = pc->offset;
    385    *     if (pc->cap_usr_rdpmc && idx) {
    386    *       width = pc->pmc_width;
    387    *       pmc = rdpmc(idx - 1);
    388    *     }
    389    *
    390    *     barrier();
    391    *   } while (pc->lock != seq);
    392    *
    393    * NOTE: for obvious reason this only works on self-monitoring
    394    *       processes.
    395    */
    396   __u32 lock;         /* seqlock for synchronization */
    397   __u32 index;        /* hardware event identifier */
    398   __s64 offset;       /* add to hardware event value */
    399   __u64 time_enabled; /* time event active */
    400   __u64 time_running; /* time event on cpu */
    401   union {
    402     __u64 capabilities;
    403     struct {
    404       __u64 cap_bit0 : 1, /* Always 0, deprecated, see commit 860f085b74e9 */
    405           cap_bit0_is_deprecated : 1, /* Always 1, signals that bit 0 is zero */
    406 
    407           cap_user_rdpmc : 1,     // The RDPMC instruction can be used to read
    408                                   // counts
    409           cap_user_time : 1,      /* The time_* fields are used */
    410           cap_user_time_zero : 1, /* The time_zero field is used */
    411           cap_____res : 59;
    412     };
    413   };
    414 
    415   /*
    416    * If cap_usr_rdpmc this field provides the bit-width of the value
    417    * read using the rdpmc() or equivalent instruction. This can be used
    418    * to sign extend the result like:
    419    *
    420    *   pmc <<= 64 - width;
    421    *   pmc >>= 64 - width; // signed shift right
    422    *   count += pmc;
    423    */
    424   __u16 pmc_width;
    425 
    426   /*
    427    * If cap_usr_time the below fields can be used to compute the time
    428    * delta since time_enabled (in ns) using rdtsc or similar.
    429    *
    430    *   u64 quot, rem;
    431    *   u64 delta;
    432    *
    433    *   quot = (cyc >> time_shift);
    434    *   rem = cyc & ((1 << time_shift) - 1);
    435    *   delta = time_offset + quot * time_mult +
    436    *              ((rem * time_mult) >> time_shift);
    437    *
    438    * Where time_offset,time_mult,time_shift and cyc are read in the
    439    * seqcount loop described above. This delta can then be added to
    440    * enabled and possible running (if idx), improving the scaling:
    441    *
    442    *   enabled += delta;
    443    *   if (idx)
    444    *     running += delta;
    445    *
    446    *   quot = count / running;
    447    *   rem  = count % running;
    448    *   count = quot * enabled + (rem * enabled) / running;
    449    */
    450   __u16 time_shift;
    451   __u32 time_mult;
    452   __u64 time_offset;
    453   /*
    454    * If cap_usr_time_zero, the hardware clock (e.g. TSC) can be calculated
    455    * from sample timestamps.
    456    *
    457    *   time = timestamp - time_zero;
    458    *   quot = time / time_mult;
    459    *   rem  = time % time_mult;
    460    *   cyc = (quot << time_shift) + (rem << time_shift) / time_mult;
    461    *
    462    * And vice versa:
    463    *
    464    *   quot = cyc >> time_shift;
    465    *   rem  = cyc & ((1 << time_shift) - 1);
    466    *   timestamp = time_zero + quot * time_mult +
    467    *               ((rem * time_mult) >> time_shift);
    468    */
    469   __u64 time_zero;
    470   __u32 size; /* Header size up to __reserved[] fields. */
    471 
    472   /*
    473    * Hole for extension of the self monitor capabilities
    474    */
    475 
    476   __u8 __reserved[118 * 8 + 4]; /* align to 1k. */
    477 
    478   /*
    479    * Control data for the mmap() data buffer.
    480    *
    481    * User-space reading the @data_head value should issue an smp_rmb(),
    482    * after reading this value.
    483    *
    484    * When the mapping is PROT_WRITE the @data_tail value should be
    485    * written by userspace to reflect the last read data, after issueing
    486    * an smp_mb() to separate the data read from the ->data_tail store.
    487    * In this case the kernel will not over-write unread data.
    488    *
    489    * See perf_output_put_handle() for the data ordering.
    490    */
    491   __u64 data_head; /* head in the data section */
    492   __u64 data_tail; /* user-space written tail */
    493 };
    494 
    495 #define PERF_RECORD_MISC_CPUMODE_MASK (7 << 0)
    496 #define PERF_RECORD_MISC_CPUMODE_UNKNOWN (0 << 0)
    497 #define PERF_RECORD_MISC_KERNEL (1 << 0)
    498 #define PERF_RECORD_MISC_USER (2 << 0)
    499 #define PERF_RECORD_MISC_HYPERVISOR (3 << 0)
    500 #define PERF_RECORD_MISC_GUEST_KERNEL (4 << 0)
    501 #define PERF_RECORD_MISC_GUEST_USER (5 << 0)
    502 
    503 /*
    504  * PERF_RECORD_MISC_MMAP_DATA and PERF_RECORD_MISC_COMM_EXEC are used on
    505  * different events so can reuse the same bit position.
    506  */
    507 #define PERF_RECORD_MISC_MMAP_DATA (1 << 13)
    508 #define PERF_RECORD_MISC_COMM_EXEC (1 << 13)
    509 /*
    510  * Indicates that the content of PERF_SAMPLE_IP points to
    511  * the actual instruction that triggered the event. See also
    512  * perf_event_attr::precise_ip.
    513  */
    514 #define PERF_RECORD_MISC_EXACT_IP (1 << 14)
    515 /*
    516  * Reserve the last bit to indicate some extended misc field
    517  */
    518 #define PERF_RECORD_MISC_EXT_RESERVED (1 << 15)
    519 
    520 struct perf_event_header {
    521   __u32 type;
    522   __u16 misc;
    523   __u16 size;
    524 };
    525 
    526 enum perf_event_type {
    527   /*
    528    * If perf_event_attr.sample_id_all is set then all event types will
    529    * have the sample_type selected fields related to where/when
    530    * (identity) an event took place (TID, TIME, ID, STREAM_ID, CPU,
    531    * IDENTIFIER) described in PERF_RECORD_SAMPLE below, it will be stashed
    532    * just after the perf_event_header and the fields already present for
    533    * the existing fields, i.e. at the end of the payload. That way a newer
    534    * perf.data file will be supported by older perf tools, with these new
    535    * optional fields being ignored.
    536    *
    537    * struct sample_id {
    538    * 	{ u32			pid, tid; } && PERF_SAMPLE_TID
    539    * 	{ u64			time;     } && PERF_SAMPLE_TIME
    540    * 	{ u64			id;       } && PERF_SAMPLE_ID
    541    * 	{ u64			stream_id;} && PERF_SAMPLE_STREAM_ID
    542    * 	{ u32			cpu, res; } && PERF_SAMPLE_CPU
    543    *	{ u64			id;	  } && PERF_SAMPLE_IDENTIFIER
    544    * } && perf_event_attr::sample_id_all
    545    *
    546    * Note that PERF_SAMPLE_IDENTIFIER duplicates PERF_SAMPLE_ID.  The
    547    * advantage of PERF_SAMPLE_IDENTIFIER is that its position is fixed
    548    * relative to header.size.
    549    */
    550 
    551   /*
    552    * The MMAP events record the PROT_EXEC mappings so that we can
    553    * correlate userspace IPs to code. They have the following structure:
    554    *
    555    * struct {
    556    *	struct perf_event_header	header;
    557    *
    558    *	u32				pid, tid;
    559    *	u64				addr;
    560    *	u64				len;
    561    *	u64				pgoff;
    562    *	char				filename[];
    563    * 	struct sample_id		sample_id;
    564    * };
    565    */
    566   PERF_RECORD_MMAP = 1,
    567 
    568   /*
    569    * struct {
    570    *	struct perf_event_header	header;
    571    *	u64				id;
    572    *	u64				lost;
    573    * 	struct sample_id		sample_id;
    574    * };
    575    */
    576   PERF_RECORD_LOST = 2,
    577 
    578   /*
    579    * struct {
    580    *	struct perf_event_header	header;
    581    *
    582    *	u32				pid, tid;
    583    *	char				comm[];
    584    * 	struct sample_id		sample_id;
    585    * };
    586    */
    587   PERF_RECORD_COMM = 3,
    588 
    589   /*
    590    * struct {
    591    *	struct perf_event_header	header;
    592    *	u32				pid, ppid;
    593    *	u32				tid, ptid;
    594    *	u64				time;
    595    * 	struct sample_id		sample_id;
    596    * };
    597    */
    598   PERF_RECORD_EXIT = 4,
    599 
    600   /*
    601    * struct {
    602    *	struct perf_event_header	header;
    603    *	u64				time;
    604    *	u64				id;
    605    *	u64				stream_id;
    606    * 	struct sample_id		sample_id;
    607    * };
    608    */
    609   PERF_RECORD_THROTTLE = 5,
    610   PERF_RECORD_UNTHROTTLE = 6,
    611 
    612   /*
    613    * struct {
    614    *	struct perf_event_header	header;
    615    *	u32				pid, ppid;
    616    *	u32				tid, ptid;
    617    *	u64				time;
    618    * 	struct sample_id		sample_id;
    619    * };
    620    */
    621   PERF_RECORD_FORK = 7,
    622 
    623   /*
    624    * struct {
    625    *	struct perf_event_header	header;
    626    *	u32				pid, tid;
    627    *
    628    *	struct read_format		values;
    629    * 	struct sample_id		sample_id;
    630    * };
    631    */
    632   PERF_RECORD_READ = 8,
    633 
    634   /*
    635    * struct {
    636    *	struct perf_event_header	header;
    637    *
    638    *	#
    639    *	# Note that PERF_SAMPLE_IDENTIFIER duplicates PERF_SAMPLE_ID.
    640    *	# The advantage of PERF_SAMPLE_IDENTIFIER is that its position
    641    *	# is fixed relative to header.
    642    *	#
    643    *
    644    *	{ u64			id;	  } && PERF_SAMPLE_IDENTIFIER
    645    *	{ u64			ip;	  } && PERF_SAMPLE_IP
    646    *	{ u32			pid, tid; } && PERF_SAMPLE_TID
    647    *	{ u64			time;     } && PERF_SAMPLE_TIME
    648    *	{ u64			addr;     } && PERF_SAMPLE_ADDR
    649    *	{ u64			id;	  } && PERF_SAMPLE_ID
    650    *	{ u64			stream_id;} && PERF_SAMPLE_STREAM_ID
    651    *	{ u32			cpu, res; } && PERF_SAMPLE_CPU
    652    *	{ u64			period;   } && PERF_SAMPLE_PERIOD
    653    *
    654    *	{ struct read_format	values;	  } && PERF_SAMPLE_READ
    655    *
    656    *	{ u64			nr,
    657    *	  u64			ips[nr];  } && PERF_SAMPLE_CALLCHAIN
    658    *
    659    *	#
    660    *	# The RAW record below is opaque data wrt the ABI
    661    *	#
    662    *	# That is, the ABI doesn't make any promises wrt to
    663    *	# the stability of its content, it may vary depending
    664    *	# on event, hardware, kernel version and phase of
    665    *	# the moon.
    666    *	#
    667    *	# In other words, PERF_SAMPLE_RAW contents are not an ABI.
    668    *	#
    669    *
    670    *	{ u32			size;
    671    *	  char                  data[size];}&& PERF_SAMPLE_RAW
    672    *
    673    *	{ u64                   nr;
    674    *        { u64 from, to, flags } lbr[nr];} && PERF_SAMPLE_BRANCH_STACK
    675    *
    676    * 	{ u64			abi; # enum perf_sample_regs_abi
    677    * 	  u64			regs[weight(mask)]; } && PERF_SAMPLE_REGS_USER
    678    *
    679    * 	{ u64			size;
    680    * 	  char			data[size];
    681    * 	  u64			dyn_size; } && PERF_SAMPLE_STACK_USER
    682    *
    683    *	{ u64			weight;   } && PERF_SAMPLE_WEIGHT
    684    *	{ u64			data_src; } && PERF_SAMPLE_DATA_SRC
    685    *	{ u64			transaction; } && PERF_SAMPLE_TRANSACTION
    686    * };
    687    */
    688   PERF_RECORD_SAMPLE = 9,
    689 
    690   /*
    691    * The MMAP2 records are an augmented version of MMAP, they add
    692    * maj, min, ino numbers to be used to uniquely identify each mapping
    693    *
    694    * struct {
    695    *	struct perf_event_header	header;
    696    *
    697    *	u32				pid, tid;
    698    *	u64				addr;
    699    *	u64				len;
    700    *	u64				pgoff;
    701    *	u32				maj;
    702    *	u32				min;
    703    *	u64				ino;
    704    *	u64				ino_generation;
    705    *	u32				prot, flags;
    706    *	char				filename[];
    707    * 	struct sample_id		sample_id;
    708    * };
    709    */
    710   PERF_RECORD_MMAP2 = 10,
    711 
    712   /*
    713    * Records that new data landed in the AUX buffer part.
    714    *
    715    * struct {
    716    * 	struct perf_event_header	header;
    717    *
    718    * 	u64				aux_offset;
    719    * 	u64				aux_size;
    720    *	u64				flags;
    721    * 	struct sample_id		sample_id;
    722    * };
    723    */
    724   PERF_RECORD_AUX = 11,
    725 
    726   /*
    727    * Indicates that instruction trace has started
    728    *
    729    * struct {
    730    *	struct perf_event_header	header;
    731    *	u32				pid;
    732    *	u32				tid;
    733    * };
    734    */
    735   PERF_RECORD_ITRACE_START = 12,
    736 
    737   /*
    738    * Records the dropped/lost sample number.
    739    *
    740    * struct {
    741    *	struct perf_event_header	header;
    742    *
    743    *	u64				lost;
    744    *	struct sample_id		sample_id;
    745    * };
    746    */
    747   PERF_RECORD_LOST_SAMPLES = 13,
    748 
    749   /*
    750    * Records a context switch in or out (flagged by
    751    * PERF_RECORD_MISC_SWITCH_OUT). See also
    752    * PERF_RECORD_SWITCH_CPU_WIDE.
    753    *
    754    * struct {
    755    *	struct perf_event_header	header;
    756    *	struct sample_id		sample_id;
    757    * };
    758    */
    759   PERF_RECORD_SWITCH = 14,
    760 
    761   /*
    762    * CPU-wide version of PERF_RECORD_SWITCH with next_prev_pid and
    763    * next_prev_tid that are the next (switching out) or previous
    764    * (switching in) pid/tid.
    765    *
    766    * struct {
    767    *	struct perf_event_header	header;
    768    *	u32				next_prev_pid;
    769    *	u32				next_prev_tid;
    770    *	struct sample_id		sample_id;
    771    * };
    772    */
    773   PERF_RECORD_SWITCH_CPU_WIDE = 15,
    774 
    775   /*
    776    * struct {
    777    *	struct perf_event_header	header;
    778    *	u32				pid;
    779    *	u32				tid;
    780    *	u64				nr_namespaces;
    781    *	{ u64				dev, inode; } [nr_namespaces];
    782    *	struct sample_id		sample_id;
    783    * };
    784    */
    785   PERF_RECORD_NAMESPACES = 16,
    786 
    787   PERF_RECORD_MAX, /* non-ABI */
    788 };
    789 
    790 #define PERF_MAX_STACK_DEPTH 127
    791 
    792 enum perf_callchain_context {
    793   PERF_CONTEXT_HV = (__u64)-32,
    794   PERF_CONTEXT_KERNEL = (__u64)-128,
    795   PERF_CONTEXT_USER = (__u64)-512,
    796 
    797   PERF_CONTEXT_GUEST = (__u64)-2048,
    798   PERF_CONTEXT_GUEST_KERNEL = (__u64)-2176,
    799   PERF_CONTEXT_GUEST_USER = (__u64)-2560,
    800 
    801   PERF_CONTEXT_MAX = (__u64)-4095,
    802 };
    803 
    804 /**
    805  * PERF_RECORD_AUX::flags bits
    806  */
    807 #define PERF_AUX_FLAG_TRUNCATED 0x01 /* record was truncated to fit */
    808 #define PERF_AUX_FLAG_OVERWRITE 0x02 /* snapshot from overwrite mode */
    809 #define PERF_AUX_FLAG_PARTIAL 0x04   /* record contains gaps */
    810 
    811 #define PERF_FLAG_FD_NO_GROUP (1UL << 0)
    812 #define PERF_FLAG_FD_OUTPUT (1UL << 1)
    813 #define PERF_FLAG_PID_CGROUP (1UL << 2) /* pid=cgroup id, per-cpu mode only */
    814 #define PERF_FLAG_FD_CLOEXEC (1UL << 3) /* O_CLOEXEC */
    815 
    816 union perf_mem_data_src {
    817   __u64 val;
    818   struct {
    819     __u64 mem_op : 5,  /* type of opcode */
    820         mem_lvl : 14,  /* memory hierarchy level */
    821         mem_snoop : 5, /* snoop mode */
    822         mem_lock : 2,  /* lock instr */
    823         mem_dtlb : 7,  /* tlb access */
    824         mem_rsvd : 31;
    825   };
    826 };
    827 
    828 /* type of opcode (load/store/prefetch,code) */
    829 #define PERF_MEM_OP_NA 0x01     /* not available */
    830 #define PERF_MEM_OP_LOAD 0x02   /* load instruction */
    831 #define PERF_MEM_OP_STORE 0x04  /* store instruction */
    832 #define PERF_MEM_OP_PFETCH 0x08 /* prefetch */
    833 #define PERF_MEM_OP_EXEC 0x10   /* code (execution) */
    834 #define PERF_MEM_OP_SHIFT 0
    835 
    836 /* memory hierarchy (memory level, hit or miss) */
    837 #define PERF_MEM_LVL_NA 0x01        /* not available */
    838 #define PERF_MEM_LVL_HIT 0x02       /* hit level */
    839 #define PERF_MEM_LVL_MISS 0x04      /* miss level  */
    840 #define PERF_MEM_LVL_L1 0x08        /* L1 */
    841 #define PERF_MEM_LVL_LFB 0x10       /* Line Fill Buffer */
    842 #define PERF_MEM_LVL_L2 0x20        /* L2 */
    843 #define PERF_MEM_LVL_L3 0x40        /* L3 */
    844 #define PERF_MEM_LVL_LOC_RAM 0x80   /* Local DRAM */
    845 #define PERF_MEM_LVL_REM_RAM1 0x100 /* Remote DRAM (1 hop) */
    846 #define PERF_MEM_LVL_REM_RAM2 0x200 /* Remote DRAM (2 hops) */
    847 #define PERF_MEM_LVL_REM_CCE1 0x400 /* Remote Cache (1 hop) */
    848 #define PERF_MEM_LVL_REM_CCE2 0x800 /* Remote Cache (2 hops) */
    849 #define PERF_MEM_LVL_IO 0x1000      /* I/O memory */
    850 #define PERF_MEM_LVL_UNC 0x2000     /* Uncached memory */
    851 #define PERF_MEM_LVL_SHIFT 5
    852 
    853 /* snoop mode */
    854 #define PERF_MEM_SNOOP_NA 0x01   /* not available */
    855 #define PERF_MEM_SNOOP_NONE 0x02 /* no snoop */
    856 #define PERF_MEM_SNOOP_HIT 0x04  /* snoop hit */
    857 #define PERF_MEM_SNOOP_MISS 0x08 /* snoop miss */
    858 #define PERF_MEM_SNOOP_HITM 0x10 /* snoop hit modified */
    859 #define PERF_MEM_SNOOP_SHIFT 19
    860 
    861 /* locked instruction */
    862 #define PERF_MEM_LOCK_NA 0x01     /* not available */
    863 #define PERF_MEM_LOCK_LOCKED 0x02 /* locked transaction */
    864 #define PERF_MEM_LOCK_SHIFT 24
    865 
    866 /* TLB access */
    867 #define PERF_MEM_TLB_NA 0x01   /* not available */
    868 #define PERF_MEM_TLB_HIT 0x02  /* hit level */
    869 #define PERF_MEM_TLB_MISS 0x04 /* miss level */
    870 #define PERF_MEM_TLB_L1 0x08   /* L1 */
    871 #define PERF_MEM_TLB_L2 0x10   /* L2 */
    872 #define PERF_MEM_TLB_WK 0x20   /* Hardware Walker*/
    873 #define PERF_MEM_TLB_OS 0x40   /* OS fault handler */
    874 #define PERF_MEM_TLB_SHIFT 26
    875 
    876 #define PERF_MEM_S(a, s) (((__u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT)
    877 
    878 /*
    879  * single taken branch record layout:
    880  *
    881  *      from: source instruction (may not always be a branch insn)
    882  *        to: branch target
    883  *   mispred: branch target was mispredicted
    884  * predicted: branch target was predicted
    885  *
    886  * support for mispred, predicted is optional. In case it
    887  * is not supported mispred = predicted = 0.
    888  *
    889  *     in_tx: running in a hardware transaction
    890  *     abort: aborting a hardware transaction
    891  */
    892 struct perf_branch_entry {
    893   __u64 from;
    894   __u64 to;
    895   __u64 mispred : 1, /* target mispredicted */
    896       predicted : 1, /* target predicted */
    897       in_tx : 1,     /* in transaction */
    898       abort : 1,     /* transaction abort */
    899       reserved : 60;
    900 };
    901 
    902 #endif /* _UAPI_LINUX_PERF_EVENT_H */
    903