diff options
Diffstat (limited to 'cddl/contrib/opensolaris/uts/common/sys')
-rw-r--r-- | cddl/contrib/opensolaris/uts/common/sys/cmn_err.h | 128 | ||||
-rw-r--r-- | cddl/contrib/opensolaris/uts/common/sys/cpupart.h | 162 | ||||
-rw-r--r-- | cddl/contrib/opensolaris/uts/common/sys/cpuvar.h | 737 | ||||
-rw-r--r-- | cddl/contrib/opensolaris/uts/common/sys/ctf.h | 358 | ||||
-rw-r--r-- | cddl/contrib/opensolaris/uts/common/sys/ctf_api.h | 241 | ||||
-rw-r--r-- | cddl/contrib/opensolaris/uts/common/sys/dtrace.h | 2242 | ||||
-rw-r--r-- | cddl/contrib/opensolaris/uts/common/sys/dtrace_impl.h | 1298 | ||||
-rw-r--r-- | cddl/contrib/opensolaris/uts/common/sys/fasttrap.h | 93 |
8 files changed, 5259 insertions, 0 deletions
diff --git a/cddl/contrib/opensolaris/uts/common/sys/cmn_err.h b/cddl/contrib/opensolaris/uts/common/sys/cmn_err.h new file mode 100644 index 0000000..e710d8e --- /dev/null +++ b/cddl/contrib/opensolaris/uts/common/sys/cmn_err.h @@ -0,0 +1,128 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ +/* All Rights Reserved */ + + +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_CMN_ERR_H +#define _SYS_CMN_ERR_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#if defined(_KERNEL) && !defined(_ASM) +#include <sys/va_list.h> +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/* Common error handling severity levels */ + +#define CE_CONT 0 /* continuation */ +#define CE_NOTE 1 /* notice */ +#define CE_WARN 2 /* warning */ +#define CE_PANIC 3 /* panic */ +#define CE_IGNORE 4 /* print nothing */ + +#ifndef _ASM + +#ifdef _KERNEL + +/*PRINTFLIKE2*/ +extern void cmn_err(int, const char *, ...) + __KPRINTFLIKE(2); +#pragma rarely_called(cmn_err) + +extern void vzcmn_err(zoneid_t, int, const char *, __va_list) + __KVPRINTFLIKE(3); +#pragma rarely_called(vzcmn_err) + +extern void vcmn_err(int, const char *, __va_list) + __KVPRINTFLIKE(2); +#pragma rarely_called(vcmn_err) + +/*PRINTFLIKE3*/ +extern void zcmn_err(zoneid_t, int, const char *, ...) + __KPRINTFLIKE(3); +#pragma rarely_called(zcmn_err) + +/*PRINTFLIKE1*/ +extern void printf(const char *, ...) + __KPRINTFLIKE(1); +#pragma rarely_called(printf) + +extern void vzprintf(zoneid_t, const char *, __va_list) + __KVPRINTFLIKE(2); +#pragma rarely_called(vzprintf) + +/*PRINTFLIKE2*/ +extern void zprintf(zoneid_t, const char *, ...) + __KPRINTFLIKE(2); +#pragma rarely_called(zprintf) + +extern void vprintf(const char *, __va_list) + __KVPRINTFLIKE(1); +#pragma rarely_called(vprintf) + +/*PRINTFLIKE1*/ +extern void uprintf(const char *, ...) + __KPRINTFLIKE(1); +#pragma rarely_called(uprintf) + +extern void vuprintf(const char *, __va_list) + __KVPRINTFLIKE(1); +#pragma rarely_called(vuprintf) + +/*PRINTFLIKE3*/ +extern size_t snprintf(char *, size_t, const char *, ...) + __KPRINTFLIKE(3); +extern size_t vsnprintf(char *, size_t, const char *, __va_list) + __KVPRINTFLIKE(3); +/*PRINTFLIKE2*/ +extern char *sprintf(char *, const char *, ...) + __KPRINTFLIKE(2); +extern char *vsprintf(char *, const char *, __va_list) + __KVPRINTFLIKE(2); + +/*PRINTFLIKE1*/ +extern void panic(const char *, ...) + __KPRINTFLIKE(1) __NORETURN; +#pragma rarely_called(panic) + +extern void vpanic(const char *, __va_list) + __KVPRINTFLIKE(1) __NORETURN; +#pragma rarely_called(vpanic) + +#endif /* _KERNEL */ +#endif /* !_ASM */ + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_CMN_ERR_H */ diff --git a/cddl/contrib/opensolaris/uts/common/sys/cpupart.h b/cddl/contrib/opensolaris/uts/common/sys/cpupart.h new file mode 100644 index 0000000..b9e0da4 --- /dev/null +++ b/cddl/contrib/opensolaris/uts/common/sys/cpupart.h @@ -0,0 +1,162 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_CPUPART_H +#define _SYS_CPUPART_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/types.h> +#include <sys/processor.h> +#include <sys/cpuvar.h> +#include <sys/disp.h> +#include <sys/pset.h> +#include <sys/lgrp.h> +#include <sys/lgrp_user.h> +#include <sys/pg.h> +#include <sys/bitset.h> +#include <sys/time.h> + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef _KERNEL + +typedef int cpupartid_t; + +/* + * Special partition id. + */ +#define CP_DEFAULT 0 + +/* + * Flags for cpupart_list() + */ +#define CP_ALL 0 /* return all cpu partitions */ +#define CP_NONEMPTY 1 /* return only non-empty ones */ + +#if defined(_MACHDEP) +struct mach_cpupart { + cpuset_t mc_haltset; +}; + +extern struct mach_cpupart cp_default_mach; +#else +struct mach_cpupart; +#endif + +typedef struct cpupart { + disp_t cp_kp_queue; /* partition-wide kpreempt queue */ + cpupartid_t cp_id; /* partition ID */ + int cp_ncpus; /* number of online processors */ + struct cpupart *cp_next; /* next partition in list */ + struct cpupart *cp_prev; /* previous partition in list */ + struct cpu *cp_cpulist; /* processor list */ + struct kstat *cp_kstat; /* per-partition statistics */ + + /* + * cp_nrunnable and cp_nrunning are used to calculate load average. + */ + uint_t cp_nrunnable; /* current # of runnable threads */ + uint_t cp_nrunning; /* current # of running threads */ + + /* + * cp_updates, cp_nrunnable_cum, cp_nwaiting_cum, and cp_hp_avenrun + * are used to generate kstat information on an as-needed basis. + */ + uint64_t cp_updates; /* number of statistics updates */ + uint64_t cp_nrunnable_cum; /* cum. # of runnable threads */ + uint64_t cp_nwaiting_cum; /* cum. # of waiting threads */ + + struct loadavg_s cp_loadavg; /* cpupart loadavg */ + + klgrpset_t cp_lgrpset; /* set of lgroups on which this */ + /* partition has cpus */ + lpl_t *cp_lgrploads; /* table of load averages for this */ + /* partition, indexed by lgrp ID */ + int cp_nlgrploads; /* size of cp_lgrploads table */ + uint64_t cp_hp_avenrun[3]; /* high-precision load average */ + uint_t cp_attr; /* bitmask of attributes */ + lgrp_gen_t cp_gen; /* generation number */ + lgrp_id_t cp_lgrp_hint; /* last home lgroup chosen */ + bitset_t cp_cmt_pgs; /* CMT PGs represented */ + + struct mach_cpupart *cp_mach; /* mach-specific */ +} cpupart_t; + +typedef struct cpupart_kstat { + kstat_named_t cpk_updates; /* number of updates */ + kstat_named_t cpk_runnable; /* cum # of runnable threads */ + kstat_named_t cpk_waiting; /* cum # waiting for I/O */ + kstat_named_t cpk_ncpus; /* current # of CPUs */ + kstat_named_t cpk_avenrun_1min; /* 1-minute load average */ + kstat_named_t cpk_avenrun_5min; /* 5-minute load average */ + kstat_named_t cpk_avenrun_15min; /* 15-minute load average */ +} cpupart_kstat_t; + +/* + * Macro to obtain the maximum run priority for the global queue associated + * with given cpu partition. + */ +#define CP_MAXRUNPRI(cp) ((cp)->cp_kp_queue.disp_maxrunpri) + +/* + * This macro is used to determine if the given thread must surrender + * CPU to higher priority runnable threads on one of its dispatch queues. + * This should really be defined in <sys/disp.h> but it is not because + * including <sys/cpupart.h> there would cause recursive includes. + */ +#define DISP_MUST_SURRENDER(t) \ + ((DISP_MAXRUNPRI(t) > DISP_PRIO(t)) || \ + (CP_MAXRUNPRI(t->t_cpupart) > DISP_PRIO(t))) + +extern cpupart_t cp_default; +extern cpupart_t *cp_list_head; +extern uint_t cp_numparts; +extern uint_t cp_numparts_nonempty; + +extern void cpupart_initialize_default(); +extern cpupart_t *cpupart_find(psetid_t); +extern int cpupart_create(psetid_t *); +extern int cpupart_destroy(psetid_t); +extern psetid_t cpupart_query_cpu(cpu_t *); +extern int cpupart_attach_cpu(psetid_t, cpu_t *, int); +extern int cpupart_get_cpus(psetid_t *, processorid_t *, uint_t *); +extern int cpupart_bind_thread(kthread_id_t, psetid_t, int, void *, + void *); +extern void cpupart_kpqalloc(pri_t); +extern int cpupart_get_loadavg(psetid_t, int *, int); +extern uint_t cpupart_list(psetid_t *, uint_t, int); +extern int cpupart_setattr(psetid_t, uint_t); +extern int cpupart_getattr(psetid_t, uint_t *); + +#endif /* _KERNEL */ + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_CPUPART_H */ diff --git a/cddl/contrib/opensolaris/uts/common/sys/cpuvar.h b/cddl/contrib/opensolaris/uts/common/sys/cpuvar.h new file mode 100644 index 0000000..c7b76b3 --- /dev/null +++ b/cddl/contrib/opensolaris/uts/common/sys/cpuvar.h @@ -0,0 +1,737 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_CPUVAR_H +#define _SYS_CPUVAR_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/thread.h> +#include <sys/sysinfo.h> /* has cpu_stat_t definition */ +#include <sys/disp.h> +#include <sys/processor.h> + +#if (defined(_KERNEL) || defined(_KMEMUSER)) && defined(_MACHDEP) +#include <sys/machcpuvar.h> +#endif + +#include <sys/types.h> +#include <sys/file.h> +#include <sys/bitmap.h> +#include <sys/rwlock.h> +#include <sys/msacct.h> +#if defined(__GNUC__) && defined(_ASM_INLINES) && defined(_KERNEL) && \ + (defined(__i386) || defined(__amd64)) +#include <asm/cpuvar.h> +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +struct squeue_set_s; + +#define CPU_CACHE_COHERENCE_SIZE 64 +#define S_LOADAVG_SZ 11 +#define S_MOVAVG_SZ 10 + +struct loadavg_s { + int lg_cur; /* current loadavg entry */ + unsigned int lg_len; /* number entries recorded */ + hrtime_t lg_total; /* used to temporarily hold load totals */ + hrtime_t lg_loads[S_LOADAVG_SZ]; /* table of recorded entries */ +}; + +/* + * For fast event tracing. + */ +struct ftrace_record; +typedef struct ftrace_data { + int ftd_state; /* ftrace flags */ + kmutex_t ftd_unused; /* ftrace buffer lock, unused */ + struct ftrace_record *ftd_cur; /* current record */ + struct ftrace_record *ftd_first; /* first record */ + struct ftrace_record *ftd_last; /* last record */ +} ftrace_data_t; + +struct cyc_cpu; +struct nvlist; + +/* + * Per-CPU data. + * + * Be careful adding new members: if they are not the same in all modules (e.g. + * change size depending on a #define), CTF uniquification can fail to work + * properly. Furthermore, this is transitive in that it applies recursively to + * all types pointed to by cpu_t. + */ +typedef struct cpu { + processorid_t cpu_id; /* CPU number */ + processorid_t cpu_seqid; /* sequential CPU id (0..ncpus-1) */ + volatile cpu_flag_t cpu_flags; /* flags indicating CPU state */ + struct cpu *cpu_self; /* pointer to itself */ + kthread_t *cpu_thread; /* current thread */ + kthread_t *cpu_idle_thread; /* idle thread for this CPU */ + kthread_t *cpu_pause_thread; /* pause thread for this CPU */ + klwp_id_t cpu_lwp; /* current lwp (if any) */ + klwp_id_t cpu_fpowner; /* currently loaded fpu owner */ + struct cpupart *cpu_part; /* partition with this CPU */ + struct lgrp_ld *cpu_lpl; /* pointer to this cpu's load */ + int cpu_cache_offset; /* see kmem.c for details */ + + /* + * Links to other CPUs. It is safe to walk these lists if + * one of the following is true: + * - cpu_lock held + * - preemption disabled via kpreempt_disable + * - PIL >= DISP_LEVEL + * - acting thread is an interrupt thread + * - all other CPUs are paused + */ + struct cpu *cpu_next; /* next existing CPU */ + struct cpu *cpu_prev; /* prev existing CPU */ + struct cpu *cpu_next_onln; /* next online (enabled) CPU */ + struct cpu *cpu_prev_onln; /* prev online (enabled) CPU */ + struct cpu *cpu_next_part; /* next CPU in partition */ + struct cpu *cpu_prev_part; /* prev CPU in partition */ + struct cpu *cpu_next_lgrp; /* next CPU in latency group */ + struct cpu *cpu_prev_lgrp; /* prev CPU in latency group */ + struct cpu *cpu_next_lpl; /* next CPU in lgrp partition */ + struct cpu *cpu_prev_lpl; + + struct cpu_pg *cpu_pg; /* cpu's processor groups */ + + void *cpu_reserved[4]; /* reserved for future use */ + + /* + * Scheduling variables. + */ + disp_t *cpu_disp; /* dispatch queue data */ + /* + * Note that cpu_disp is set before the CPU is added to the system + * and is never modified. Hence, no additional locking is needed + * beyond what's necessary to access the cpu_t structure. + */ + char cpu_runrun; /* scheduling flag - set to preempt */ + char cpu_kprunrun; /* force kernel preemption */ + pri_t cpu_chosen_level; /* priority at which cpu */ + /* was chosen for scheduling */ + kthread_t *cpu_dispthread; /* thread selected for dispatch */ + disp_lock_t cpu_thread_lock; /* dispatcher lock on current thread */ + uint8_t cpu_disp_flags; /* flags used by dispatcher */ + /* + * The following field is updated when ever the cpu_dispthread + * changes. Also in places, where the current thread(cpu_dispthread) + * priority changes. This is used in disp_lowpri_cpu() + */ + pri_t cpu_dispatch_pri; /* priority of cpu_dispthread */ + clock_t cpu_last_swtch; /* last time switched to new thread */ + + /* + * Interrupt data. + */ + caddr_t cpu_intr_stack; /* interrupt stack */ + kthread_t *cpu_intr_thread; /* interrupt thread list */ + uint_t cpu_intr_actv; /* interrupt levels active (bitmask) */ + int cpu_base_spl; /* priority for highest rupt active */ + + /* + * Statistics. + */ + cpu_stats_t cpu_stats; /* per-CPU statistics */ + struct kstat *cpu_info_kstat; /* kstat for cpu info */ + + uintptr_t cpu_profile_pc; /* kernel PC in profile interrupt */ + uintptr_t cpu_profile_upc; /* user PC in profile interrupt */ + uintptr_t cpu_profile_pil; /* PIL when profile interrupted */ + + ftrace_data_t cpu_ftrace; /* per cpu ftrace data */ + + clock_t cpu_deadman_lbolt; /* used by deadman() */ + uint_t cpu_deadman_countdown; /* used by deadman() */ + + kmutex_t cpu_cpc_ctxlock; /* protects context for idle thread */ + kcpc_ctx_t *cpu_cpc_ctx; /* performance counter context */ + + /* + * Configuration information for the processor_info system call. + */ + processor_info_t cpu_type_info; /* config info */ + time_t cpu_state_begin; /* when CPU entered current state */ + char cpu_cpr_flags; /* CPR related info */ + struct cyc_cpu *cpu_cyclic; /* per cpu cyclic subsystem data */ + struct squeue_set_s *cpu_squeue_set; /* per cpu squeue set */ + struct nvlist *cpu_props; /* pool-related properties */ + + krwlock_t cpu_ft_lock; /* DTrace: fasttrap lock */ + uintptr_t cpu_dtrace_caller; /* DTrace: caller, if any */ + hrtime_t cpu_dtrace_chillmark; /* DTrace: chill mark time */ + hrtime_t cpu_dtrace_chilled; /* DTrace: total chill time */ + volatile uint16_t cpu_mstate; /* cpu microstate */ + volatile uint16_t cpu_mstate_gen; /* generation counter */ + volatile hrtime_t cpu_mstate_start; /* cpu microstate start time */ + volatile hrtime_t cpu_acct[NCMSTATES]; /* cpu microstate data */ + hrtime_t cpu_intracct[NCMSTATES]; /* interrupt mstate data */ + hrtime_t cpu_waitrq; /* cpu run-queue wait time */ + struct loadavg_s cpu_loadavg; /* loadavg info for this cpu */ + + char *cpu_idstr; /* for printing and debugging */ + char *cpu_brandstr; /* for printing */ + + /* + * Sum of all device interrupt weights that are currently directed at + * this cpu. Cleared at start of interrupt redistribution. + */ + int32_t cpu_intr_weight; + void *cpu_vm_data; + + struct cpu_physid *cpu_physid; /* physical associations */ + + uint64_t cpu_curr_clock; /* current clock freq in Hz */ + char *cpu_supp_freqs; /* supported freqs in Hz */ + + /* + * Interrupt load factor used by dispatcher & softcall + */ + hrtime_t cpu_intrlast; /* total interrupt time (nsec) */ + int cpu_intrload; /* interrupt load factor (0-99%) */ + + /* + * New members must be added /before/ this member, as the CTF tools + * rely on this being the last field before cpu_m, so they can + * correctly calculate the offset when synthetically adding the cpu_m + * member in objects that do not have it. This fixup is required for + * uniquification to work correctly. + */ + uintptr_t cpu_m_pad; + +#if (defined(_KERNEL) || defined(_KMEMUSER)) && defined(_MACHDEP) + struct machcpu cpu_m; /* per architecture info */ +#endif +} cpu_t; + +/* + * The cpu_core structure consists of per-CPU state available in any context. + * On some architectures, this may mean that the page(s) containing the + * NCPU-sized array of cpu_core structures must be locked in the TLB -- it + * is up to the platform to assure that this is performed properly. Note that + * the structure is sized to avoid false sharing. + */ +#define CPUC_SIZE (sizeof (uint16_t) + sizeof (uintptr_t) + \ + sizeof (kmutex_t)) +#define CPUC_PADSIZE CPU_CACHE_COHERENCE_SIZE - CPUC_SIZE + +typedef struct cpu_core { + uint16_t cpuc_dtrace_flags; /* DTrace flags */ + uint8_t cpuc_pad[CPUC_PADSIZE]; /* padding */ + uintptr_t cpuc_dtrace_illval; /* DTrace illegal value */ + kmutex_t cpuc_pid_lock; /* DTrace pid provider lock */ +} cpu_core_t; + +#ifdef _KERNEL +extern cpu_core_t cpu_core[]; +#endif /* _KERNEL */ + +/* + * CPU_ON_INTR() macro. Returns non-zero if currently on interrupt stack. + * Note that this isn't a test for a high PIL. For example, cpu_intr_actv + * does not get updated when we go through sys_trap from TL>0 at high PIL. + * getpil() should be used instead to check for PIL levels. + */ +#define CPU_ON_INTR(cpup) ((cpup)->cpu_intr_actv >> (LOCK_LEVEL + 1)) + +#if defined(_KERNEL) || defined(_KMEMUSER) + +#define INTR_STACK_SIZE MAX(DEFAULTSTKSZ, PAGESIZE) + +/* MEMBERS PROTECTED BY "atomicity": cpu_flags */ + +/* + * Flags in the CPU structure. + * + * These are protected by cpu_lock (except during creation). + * + * Offlined-CPUs have three stages of being offline: + * + * CPU_ENABLE indicates that the CPU is participating in I/O interrupts + * that can be directed at a number of different CPUs. If CPU_ENABLE + * is off, the CPU will not be given interrupts that can be sent elsewhere, + * but will still get interrupts from devices associated with that CPU only, + * and from other CPUs. + * + * CPU_OFFLINE indicates that the dispatcher should not allow any threads + * other than interrupt threads to run on that CPU. A CPU will not have + * CPU_OFFLINE set if there are any bound threads (besides interrupts). + * + * CPU_QUIESCED is set if p_offline was able to completely turn idle the + * CPU and it will not have to run interrupt threads. In this case it'll + * stay in the idle loop until CPU_QUIESCED is turned off. + * + * CPU_FROZEN is used only by CPR to mark CPUs that have been successfully + * suspended (in the suspend path), or have yet to be resumed (in the resume + * case). + * + * On some platforms CPUs can be individually powered off. + * The following flags are set for powered off CPUs: CPU_QUIESCED, + * CPU_OFFLINE, and CPU_POWEROFF. The following flags are cleared: + * CPU_RUNNING, CPU_READY, CPU_EXISTS, and CPU_ENABLE. + */ +#define CPU_RUNNING 0x001 /* CPU running */ +#define CPU_READY 0x002 /* CPU ready for cross-calls */ +#define CPU_QUIESCED 0x004 /* CPU will stay in idle */ +#define CPU_EXISTS 0x008 /* CPU is configured */ +#define CPU_ENABLE 0x010 /* CPU enabled for interrupts */ +#define CPU_OFFLINE 0x020 /* CPU offline via p_online */ +#define CPU_POWEROFF 0x040 /* CPU is powered off */ +#define CPU_FROZEN 0x080 /* CPU is frozen via CPR suspend */ +#define CPU_SPARE 0x100 /* CPU offline available for use */ +#define CPU_FAULTED 0x200 /* CPU offline diagnosed faulty */ + +#define FMT_CPU_FLAGS \ + "\20\12fault\11spare\10frozen" \ + "\7poweroff\6offline\5enable\4exist\3quiesced\2ready\1run" + +#define CPU_ACTIVE(cpu) (((cpu)->cpu_flags & CPU_OFFLINE) == 0) + +/* + * Flags for cpu_offline(), cpu_faulted(), and cpu_spare(). + */ +#define CPU_FORCED 0x0001 /* Force CPU offline */ + +/* + * DTrace flags. + */ +#define CPU_DTRACE_NOFAULT 0x0001 /* Don't fault */ +#define CPU_DTRACE_DROP 0x0002 /* Drop this ECB */ +#define CPU_DTRACE_BADADDR 0x0004 /* DTrace fault: bad address */ +#define CPU_DTRACE_BADALIGN 0x0008 /* DTrace fault: bad alignment */ +#define CPU_DTRACE_DIVZERO 0x0010 /* DTrace fault: divide by zero */ +#define CPU_DTRACE_ILLOP 0x0020 /* DTrace fault: illegal operation */ +#define CPU_DTRACE_NOSCRATCH 0x0040 /* DTrace fault: out of scratch */ +#define CPU_DTRACE_KPRIV 0x0080 /* DTrace fault: bad kernel access */ +#define CPU_DTRACE_UPRIV 0x0100 /* DTrace fault: bad user access */ +#define CPU_DTRACE_TUPOFLOW 0x0200 /* DTrace fault: tuple stack overflow */ +#if defined(__sparc) +#define CPU_DTRACE_FAKERESTORE 0x0400 /* pid provider hint to getreg */ +#endif +#define CPU_DTRACE_ENTRY 0x0800 /* pid provider hint to ustack() */ +#define CPU_DTRACE_BADSTACK 0x1000 /* DTrace fault: bad stack */ + +#define CPU_DTRACE_FAULT (CPU_DTRACE_BADADDR | CPU_DTRACE_BADALIGN | \ + CPU_DTRACE_DIVZERO | CPU_DTRACE_ILLOP | \ + CPU_DTRACE_NOSCRATCH | CPU_DTRACE_KPRIV | \ + CPU_DTRACE_UPRIV | CPU_DTRACE_TUPOFLOW | \ + CPU_DTRACE_BADSTACK) +#define CPU_DTRACE_ERROR (CPU_DTRACE_FAULT | CPU_DTRACE_DROP) + +/* + * Dispatcher flags + * These flags must be changed only by the current CPU. + */ +#define CPU_DISP_DONTSTEAL 0x01 /* CPU undergoing context swtch */ +#define CPU_DISP_HALTED 0x02 /* CPU halted waiting for interrupt */ + + +#endif /* _KERNEL || _KMEMUSER */ + +#if (defined(_KERNEL) || defined(_KMEMUSER)) && defined(_MACHDEP) + +/* + * Macros for manipulating sets of CPUs as a bitmap. Note that this + * bitmap may vary in size depending on the maximum CPU id a specific + * platform supports. This may be different than the number of CPUs + * the platform supports, since CPU ids can be sparse. We define two + * sets of macros; one for platforms where the maximum CPU id is less + * than the number of bits in a single word (32 in a 32-bit kernel, + * 64 in a 64-bit kernel), and one for platforms that require bitmaps + * of more than one word. + */ + +#define CPUSET_WORDS BT_BITOUL(NCPU) +#define CPUSET_NOTINSET ((uint_t)-1) + +#if CPUSET_WORDS > 1 + +typedef struct cpuset { + ulong_t cpub[CPUSET_WORDS]; +} cpuset_t; + +/* + * Private functions for manipulating cpusets that do not fit in a + * single word. These should not be used directly; instead the + * CPUSET_* macros should be used so the code will be portable + * across different definitions of NCPU. + */ +extern void cpuset_all(cpuset_t *); +extern void cpuset_all_but(cpuset_t *, uint_t); +extern int cpuset_isnull(cpuset_t *); +extern int cpuset_cmp(cpuset_t *, cpuset_t *); +extern void cpuset_only(cpuset_t *, uint_t); +extern uint_t cpuset_find(cpuset_t *); +extern void cpuset_bounds(cpuset_t *, uint_t *, uint_t *); + +#define CPUSET_ALL(set) cpuset_all(&(set)) +#define CPUSET_ALL_BUT(set, cpu) cpuset_all_but(&(set), cpu) +#define CPUSET_ONLY(set, cpu) cpuset_only(&(set), cpu) +#define CPU_IN_SET(set, cpu) BT_TEST((set).cpub, cpu) +#define CPUSET_ADD(set, cpu) BT_SET((set).cpub, cpu) +#define CPUSET_DEL(set, cpu) BT_CLEAR((set).cpub, cpu) +#define CPUSET_ISNULL(set) cpuset_isnull(&(set)) +#define CPUSET_ISEQUAL(set1, set2) cpuset_cmp(&(set1), &(set2)) + +/* + * Find one CPU in the cpuset. + * Sets "cpu" to the id of the found CPU, or CPUSET_NOTINSET if no cpu + * could be found. (i.e. empty set) + */ +#define CPUSET_FIND(set, cpu) { \ + cpu = cpuset_find(&(set)); \ +} + +/* + * Determine the smallest and largest CPU id in the set. Returns + * CPUSET_NOTINSET in smallest and largest when set is empty. + */ +#define CPUSET_BOUNDS(set, smallest, largest) { \ + cpuset_bounds(&(set), &(smallest), &(largest)); \ +} + +/* + * Atomic cpuset operations + * These are safe to use for concurrent cpuset manipulations. + * "xdel" and "xadd" are exclusive operations, that set "result" to "0" + * if the add or del was successful, or "-1" if not successful. + * (e.g. attempting to add a cpu to a cpuset that's already there, or + * deleting a cpu that's not in the cpuset) + */ + +#define CPUSET_ATOMIC_DEL(set, cpu) BT_ATOMIC_CLEAR((set).cpub, (cpu)) +#define CPUSET_ATOMIC_ADD(set, cpu) BT_ATOMIC_SET((set).cpub, (cpu)) + +#define CPUSET_ATOMIC_XADD(set, cpu, result) \ + BT_ATOMIC_SET_EXCL((set).cpub, cpu, result) + +#define CPUSET_ATOMIC_XDEL(set, cpu, result) \ + BT_ATOMIC_CLEAR_EXCL((set).cpub, cpu, result) + + +#define CPUSET_OR(set1, set2) { \ + int _i; \ + for (_i = 0; _i < CPUSET_WORDS; _i++) \ + (set1).cpub[_i] |= (set2).cpub[_i]; \ +} + +#define CPUSET_XOR(set1, set2) { \ + int _i; \ + for (_i = 0; _i < CPUSET_WORDS; _i++) \ + (set1).cpub[_i] ^= (set2).cpub[_i]; \ +} + +#define CPUSET_AND(set1, set2) { \ + int _i; \ + for (_i = 0; _i < CPUSET_WORDS; _i++) \ + (set1).cpub[_i] &= (set2).cpub[_i]; \ +} + +#define CPUSET_ZERO(set) { \ + int _i; \ + for (_i = 0; _i < CPUSET_WORDS; _i++) \ + (set).cpub[_i] = 0; \ +} + +#elif CPUSET_WORDS == 1 + +typedef ulong_t cpuset_t; /* a set of CPUs */ + +#define CPUSET(cpu) (1UL << (cpu)) + +#define CPUSET_ALL(set) ((void)((set) = ~0UL)) +#define CPUSET_ALL_BUT(set, cpu) ((void)((set) = ~CPUSET(cpu))) +#define CPUSET_ONLY(set, cpu) ((void)((set) = CPUSET(cpu))) +#define CPU_IN_SET(set, cpu) ((set) & CPUSET(cpu)) +#define CPUSET_ADD(set, cpu) ((void)((set) |= CPUSET(cpu))) +#define CPUSET_DEL(set, cpu) ((void)((set) &= ~CPUSET(cpu))) +#define CPUSET_ISNULL(set) ((set) == 0) +#define CPUSET_ISEQUAL(set1, set2) ((set1) == (set2)) +#define CPUSET_OR(set1, set2) ((void)((set1) |= (set2))) +#define CPUSET_XOR(set1, set2) ((void)((set1) ^= (set2))) +#define CPUSET_AND(set1, set2) ((void)((set1) &= (set2))) +#define CPUSET_ZERO(set) ((void)((set) = 0)) + +#define CPUSET_FIND(set, cpu) { \ + cpu = (uint_t)(lowbit(set) - 1); \ +} + +#define CPUSET_BOUNDS(set, smallest, largest) { \ + smallest = (uint_t)(lowbit(set) - 1); \ + largest = (uint_t)(highbit(set) - 1); \ +} + +#define CPUSET_ATOMIC_DEL(set, cpu) atomic_and_long(&(set), ~CPUSET(cpu)) +#define CPUSET_ATOMIC_ADD(set, cpu) atomic_or_long(&(set), CPUSET(cpu)) + +#define CPUSET_ATOMIC_XADD(set, cpu, result) \ + { result = atomic_set_long_excl(&(set), (cpu)); } + +#define CPUSET_ATOMIC_XDEL(set, cpu, result) \ + { result = atomic_clear_long_excl(&(set), (cpu)); } + +#else /* CPUSET_WORDS <= 0 */ + +#error NCPU is undefined or invalid + +#endif /* CPUSET_WORDS */ + +extern cpuset_t cpu_seqid_inuse; + +#endif /* (_KERNEL || _KMEMUSER) && _MACHDEP */ + +#define CPU_CPR_OFFLINE 0x0 +#define CPU_CPR_ONLINE 0x1 +#define CPU_CPR_IS_OFFLINE(cpu) (((cpu)->cpu_cpr_flags & CPU_CPR_ONLINE) == 0) +#define CPU_CPR_IS_ONLINE(cpu) ((cpu)->cpu_cpr_flags & CPU_CPR_ONLINE) +#define CPU_SET_CPR_FLAGS(cpu, flag) ((cpu)->cpu_cpr_flags |= flag) + +#if defined(_KERNEL) || defined(_KMEMUSER) + +extern struct cpu *cpu[]; /* indexed by CPU number */ +extern cpu_t *cpu_list; /* list of CPUs */ +extern cpu_t *cpu_active; /* list of active CPUs */ +extern int ncpus; /* number of CPUs present */ +extern int ncpus_online; /* number of CPUs not quiesced */ +extern int max_ncpus; /* max present before ncpus is known */ +extern int boot_max_ncpus; /* like max_ncpus but for real */ +extern processorid_t max_cpuid; /* maximum CPU number */ +extern struct cpu *cpu_inmotion; /* offline or partition move target */ +extern cpu_t *clock_cpu_list; + +#if defined(__i386) || defined(__amd64) +extern struct cpu *curcpup(void); +#define CPU (curcpup()) /* Pointer to current CPU */ +#else +#define CPU (curthread->t_cpu) /* Pointer to current CPU */ +#endif + +/* + * CPU_CURRENT indicates to thread_affinity_set to use CPU->cpu_id + * as the target and to grab cpu_lock instead of requiring the caller + * to grab it. + */ +#define CPU_CURRENT -3 + +/* + * Per-CPU statistics + * + * cpu_stats_t contains numerous system and VM-related statistics, in the form + * of gauges or monotonically-increasing event occurrence counts. + */ + +#define CPU_STATS_ENTER_K() kpreempt_disable() +#define CPU_STATS_EXIT_K() kpreempt_enable() + +#define CPU_STATS_ADD_K(class, stat, amount) \ + { kpreempt_disable(); /* keep from switching CPUs */\ + CPU_STATS_ADDQ(CPU, class, stat, amount); \ + kpreempt_enable(); \ + } + +#define CPU_STATS_ADDQ(cp, class, stat, amount) { \ + extern void __dtrace_probe___cpu_##class##info_##stat(uint_t, \ + uint64_t *, cpu_t *); \ + uint64_t *stataddr = &((cp)->cpu_stats.class.stat); \ + __dtrace_probe___cpu_##class##info_##stat((amount), \ + stataddr, cp); \ + *(stataddr) += (amount); \ +} + +#define CPU_STATS(cp, stat) \ + ((cp)->cpu_stats.stat) + +#endif /* _KERNEL || _KMEMUSER */ + +/* + * CPU support routines. + */ +#if defined(_KERNEL) && defined(__STDC__) /* not for genassym.c */ + +struct zone; + +void cpu_list_init(cpu_t *); +void cpu_add_unit(cpu_t *); +void cpu_del_unit(int cpuid); +void cpu_add_active(cpu_t *); +void cpu_kstat_init(cpu_t *); +void cpu_visibility_add(cpu_t *, struct zone *); +void cpu_visibility_remove(cpu_t *, struct zone *); +void cpu_visibility_configure(cpu_t *, struct zone *); +void cpu_visibility_unconfigure(cpu_t *, struct zone *); +void cpu_visibility_online(cpu_t *, struct zone *); +void cpu_visibility_offline(cpu_t *, struct zone *); +void cpu_create_intrstat(cpu_t *); +void cpu_delete_intrstat(cpu_t *); +int cpu_kstat_intrstat_update(kstat_t *, int); +void cpu_intr_swtch_enter(kthread_t *); +void cpu_intr_swtch_exit(kthread_t *); + +void mbox_lock_init(void); /* initialize cross-call locks */ +void mbox_init(int cpun); /* initialize cross-calls */ +void poke_cpu(int cpun); /* interrupt another CPU (to preempt) */ + +/* + * values for safe_list. Pause state that CPUs are in. + */ +#define PAUSE_IDLE 0 /* normal state */ +#define PAUSE_READY 1 /* paused thread ready to spl */ +#define PAUSE_WAIT 2 /* paused thread is spl-ed high */ +#define PAUSE_DIE 3 /* tell pause thread to leave */ +#define PAUSE_DEAD 4 /* pause thread has left */ + +void mach_cpu_pause(volatile char *); + +void pause_cpus(cpu_t *off_cp); +void start_cpus(void); +int cpus_paused(void); + +void cpu_pause_init(void); +cpu_t *cpu_get(processorid_t cpun); /* get the CPU struct associated */ + +int cpu_online(cpu_t *cp); /* take cpu online */ +int cpu_offline(cpu_t *cp, int flags); /* take cpu offline */ +int cpu_spare(cpu_t *cp, int flags); /* take cpu to spare */ +int cpu_faulted(cpu_t *cp, int flags); /* take cpu to faulted */ +int cpu_poweron(cpu_t *cp); /* take powered-off cpu to offline */ +int cpu_poweroff(cpu_t *cp); /* take offline cpu to powered-off */ + +cpu_t *cpu_intr_next(cpu_t *cp); /* get next online CPU taking intrs */ +int cpu_intr_count(cpu_t *cp); /* count # of CPUs handling intrs */ +int cpu_intr_on(cpu_t *cp); /* CPU taking I/O interrupts? */ +void cpu_intr_enable(cpu_t *cp); /* enable I/O interrupts */ +int cpu_intr_disable(cpu_t *cp); /* disable I/O interrupts */ +void cpu_intr_alloc(cpu_t *cp, int n); /* allocate interrupt threads */ + +/* + * Routines for checking CPU states. + */ +int cpu_is_online(cpu_t *); /* check if CPU is online */ +int cpu_is_nointr(cpu_t *); /* check if CPU can service intrs */ +int cpu_is_active(cpu_t *); /* check if CPU can run threads */ +int cpu_is_offline(cpu_t *); /* check if CPU is offline */ +int cpu_is_poweredoff(cpu_t *); /* check if CPU is powered off */ + +int cpu_flagged_online(cpu_flag_t); /* flags show CPU is online */ +int cpu_flagged_nointr(cpu_flag_t); /* flags show CPU not handling intrs */ +int cpu_flagged_active(cpu_flag_t); /* flags show CPU scheduling threads */ +int cpu_flagged_offline(cpu_flag_t); /* flags show CPU is offline */ +int cpu_flagged_poweredoff(cpu_flag_t); /* flags show CPU is powered off */ + +/* + * The processor_info(2) state of a CPU is a simplified representation suitable + * for use by an application program. Kernel subsystems should utilize the + * internal per-CPU state as given by the cpu_flags member of the cpu structure, + * as this information may include platform- or architecture-specific state + * critical to a subsystem's disposition of a particular CPU. + */ +void cpu_set_state(cpu_t *); /* record/timestamp current state */ +int cpu_get_state(cpu_t *); /* get current cpu state */ +const char *cpu_get_state_str(cpu_t *); /* get current cpu state as string */ + + +void cpu_set_supp_freqs(cpu_t *, const char *); /* set the CPU supported */ + /* frequencies */ + +int cpu_configure(int); +int cpu_unconfigure(int); +void cpu_destroy_bound_threads(cpu_t *cp); + +extern int cpu_bind_thread(kthread_t *tp, processorid_t bind, + processorid_t *obind, int *error); +extern int cpu_unbind(processorid_t cpu_id); +extern void thread_affinity_set(kthread_t *t, int cpu_id); +extern void thread_affinity_clear(kthread_t *t); +extern void affinity_set(int cpu_id); +extern void affinity_clear(void); +extern void init_cpu_mstate(struct cpu *, int); +extern void term_cpu_mstate(struct cpu *); +extern void new_cpu_mstate(int, hrtime_t); +extern void get_cpu_mstate(struct cpu *, hrtime_t *); +extern void thread_nomigrate(void); +extern void thread_allowmigrate(void); +extern void weakbinding_stop(void); +extern void weakbinding_start(void); + +/* + * The following routines affect the CPUs participation in interrupt processing, + * if that is applicable on the architecture. This only affects interrupts + * which aren't directed at the processor (not cross calls). + * + * cpu_disable_intr returns non-zero if interrupts were previously enabled. + */ +int cpu_disable_intr(struct cpu *cp); /* stop issuing interrupts to cpu */ +void cpu_enable_intr(struct cpu *cp); /* start issuing interrupts to cpu */ + +/* + * The mutex cpu_lock protects cpu_flags for all CPUs, as well as the ncpus + * and ncpus_online counts. + */ +extern kmutex_t cpu_lock; /* lock protecting CPU data */ + +typedef enum { + CPU_INIT, + CPU_CONFIG, + CPU_UNCONFIG, + CPU_ON, + CPU_OFF, + CPU_CPUPART_IN, + CPU_CPUPART_OUT +} cpu_setup_t; + +typedef int cpu_setup_func_t(cpu_setup_t, int, void *); + +/* + * Routines used to register interest in cpu's being added to or removed + * from the system. + */ +extern void register_cpu_setup_func(cpu_setup_func_t *, void *); +extern void unregister_cpu_setup_func(cpu_setup_func_t *, void *); +extern void cpu_state_change_notify(int, cpu_setup_t); + +/* + * Create various strings that describe the given CPU for the + * processor_info system call and configuration-related kstats. + */ +#define CPU_IDSTRLEN 100 + +extern void init_cpu_info(struct cpu *); +extern void cpu_vm_data_init(struct cpu *); +extern void cpu_vm_data_destroy(struct cpu *); + +#endif /* _KERNEL */ + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_CPUVAR_H */ diff --git a/cddl/contrib/opensolaris/uts/common/sys/ctf.h b/cddl/contrib/opensolaris/uts/common/sys/ctf.h new file mode 100644 index 0000000..065e985 --- /dev/null +++ b/cddl/contrib/opensolaris/uts/common/sys/ctf.h @@ -0,0 +1,358 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _CTF_H +#define _CTF_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/types.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * CTF - Compact ANSI-C Type Format + * + * This file format can be used to compactly represent the information needed + * by a debugger to interpret the ANSI-C types used by a given program. + * Traditionally, this kind of information is generated by the compiler when + * invoked with the -g flag and is stored in "stabs" strings or in the more + * modern DWARF format. CTF provides a representation of only the information + * that is relevant to debugging a complex, optimized C program such as the + * operating system kernel in a form that is significantly more compact than + * the equivalent stabs or DWARF representation. The format is data-model + * independent, so consumers do not need different code depending on whether + * they are 32-bit or 64-bit programs. CTF assumes that a standard ELF symbol + * table is available for use in the debugger, and uses the structure and data + * of the symbol table to avoid storing redundant information. The CTF data + * may be compressed on disk or in memory, indicated by a bit in the header. + * CTF may be interpreted in a raw disk file, or it may be stored in an ELF + * section, typically named .SUNW_ctf. Data structures are aligned so that + * a raw CTF file or CTF ELF section may be manipulated using mmap(2). + * + * The CTF file or section itself has the following structure: + * + * +--------+--------+---------+----------+-------+--------+ + * | file | type | data | function | data | string | + * | header | labels | objects | info | types | table | + * +--------+--------+---------+----------+-------+--------+ + * + * The file header stores a magic number and version information, encoding + * flags, and the byte offset of each of the sections relative to the end of the + * header itself. If the CTF data has been uniquified against another set of + * CTF data, a reference to that data also appears in the the header. This + * reference is the name of the label corresponding to the types uniquified + * against. + * + * Following the header is a list of labels, used to group the types included in + * the data types section. Each label is accompanied by a type ID i. A given + * label refers to the group of types whose IDs are in the range [0, i]. + * + * Data object and function records are stored in the same order as they appear + * in the corresponding symbol table, except that symbols marked SHN_UNDEF are + * not stored and symbols that have no type data are padded out with zeroes. + * For each data object, the type ID (a small integer) is recorded. For each + * function, the type ID of the return type and argument types is recorded. + * + * The data types section is a list of variable size records that represent each + * type, in order by their ID. The types themselves form a directed graph, + * where each node may contain one or more outgoing edges to other type nodes, + * denoted by their ID. + * + * Strings are recorded as a string table ID (0 or 1) and a byte offset into the + * string table. String table 0 is the internal CTF string table. String table + * 1 is the external string table, which is the string table associated with the + * ELF symbol table for this object. CTF does not record any strings that are + * already in the symbol table, and the CTF string table does not contain any + * duplicated strings. + * + * If the CTF data has been merged with another parent CTF object, some outgoing + * edges may refer to type nodes that exist in another CTF object. The debugger + * and libctf library are responsible for connecting the appropriate objects + * together so that the full set of types can be explored and manipulated. + */ + +#define CTF_MAX_TYPE 0xffff /* max type identifier value */ +#define CTF_MAX_NAME 0x7fffffff /* max offset into a string table */ +#define CTF_MAX_VLEN 0x3ff /* max struct, union, enum members or args */ +#define CTF_MAX_INTOFF 0xff /* max offset of intrinsic value in bits */ +#define CTF_MAX_INTBITS 0xffff /* max size of an intrinsic in bits */ + +/* See ctf_type_t */ +#define CTF_MAX_SIZE 0xfffe /* max size of a type in bytes */ +#define CTF_LSIZE_SENT 0xffff /* sentinel for ctt_size */ +#define CTF_MAX_LSIZE UINT64_MAX + +typedef struct ctf_preamble { + ushort_t ctp_magic; /* magic number (CTF_MAGIC) */ + uchar_t ctp_version; /* data format version number (CTF_VERSION) */ + uchar_t ctp_flags; /* flags (see below) */ +} ctf_preamble_t; + +typedef struct ctf_header { + ctf_preamble_t cth_preamble; + uint_t cth_parlabel; /* ref to name of parent lbl uniq'd against */ + uint_t cth_parname; /* ref to basename of parent */ + uint_t cth_lbloff; /* offset of label section */ + uint_t cth_objtoff; /* offset of object section */ + uint_t cth_funcoff; /* offset of function section */ + uint_t cth_typeoff; /* offset of type section */ + uint_t cth_stroff; /* offset of string section */ + uint_t cth_strlen; /* length of string section in bytes */ +} ctf_header_t; + +#define cth_magic cth_preamble.ctp_magic +#define cth_version cth_preamble.ctp_version +#define cth_flags cth_preamble.ctp_flags + +#ifdef CTF_OLD_VERSIONS + +typedef struct ctf_header_v1 { + ctf_preamble_t cth_preamble; + uint_t cth_objtoff; + uint_t cth_funcoff; + uint_t cth_typeoff; + uint_t cth_stroff; + uint_t cth_strlen; +} ctf_header_v1_t; + +#endif /* CTF_OLD_VERSIONS */ + +#define CTF_MAGIC 0xcff1 /* magic number identifying header */ + +/* data format version number */ +#define CTF_VERSION_1 1 +#define CTF_VERSION_2 2 +#define CTF_VERSION CTF_VERSION_2 /* current version */ + +#define CTF_F_COMPRESS 0x1 /* data buffer is compressed */ + +typedef struct ctf_lblent { + uint_t ctl_label; /* ref to name of label */ + uint_t ctl_typeidx; /* last type associated with this label */ +} ctf_lblent_t; + +typedef struct ctf_stype { + uint_t ctt_name; /* reference to name in string table */ + ushort_t ctt_info; /* encoded kind, variant length (see below) */ + union { + ushort_t _size; /* size of entire type in bytes */ + ushort_t _type; /* reference to another type */ + } _u; +} ctf_stype_t; + +/* + * type sizes, measured in bytes, come in two flavors. 99% of them fit within + * (USHRT_MAX - 1), and thus can be stored in the ctt_size member of a + * ctf_stype_t. The maximum value for these sizes is CTF_MAX_SIZE. The sizes + * larger than CTF_MAX_SIZE must be stored in the ctt_lsize member of a + * ctf_type_t. Use of this member is indicated by the presence of + * CTF_LSIZE_SENT in ctt_size. + */ +typedef struct ctf_type { + uint_t ctt_name; /* reference to name in string table */ + ushort_t ctt_info; /* encoded kind, variant length (see below) */ + union { + ushort_t _size; /* always CTF_LSIZE_SENT */ + ushort_t _type; /* do not use */ + } _u; + uint_t ctt_lsizehi; /* high 32 bits of type size in bytes */ + uint_t ctt_lsizelo; /* low 32 bits of type size in bytes */ +} ctf_type_t; + +#define ctt_size _u._size /* for fundamental types that have a size */ +#define ctt_type _u._type /* for types that reference another type */ + +/* + * The following macros compose and decompose values for ctt_info and + * ctt_name, as well as other structures that contain name references. + * + * ------------------------ + * ctt_info: | kind | isroot | vlen | + * ------------------------ + * 15 11 10 9 0 + * + * kind = CTF_INFO_KIND(c.ctt_info); <-- CTF_K_* value (see below) + * vlen = CTF_INFO_VLEN(c.ctt_info); <-- length of variable data list + * + * stid = CTF_NAME_STID(c.ctt_name); <-- string table id number (0 or 1) + * offset = CTF_NAME_OFFSET(c.ctt_name); <-- string table byte offset + * + * c.ctt_info = CTF_TYPE_INFO(kind, vlen); + * c.ctt_name = CTF_TYPE_NAME(stid, offset); + */ + +#define CTF_INFO_KIND(info) (((info) & 0xf800) >> 11) +#define CTF_INFO_ISROOT(info) (((info) & 0x0400) >> 10) +#define CTF_INFO_VLEN(info) (((info) & CTF_MAX_VLEN)) + +#define CTF_NAME_STID(name) ((name) >> 31) +#define CTF_NAME_OFFSET(name) ((name) & 0x7fffffff) + +#define CTF_TYPE_INFO(kind, isroot, vlen) \ + (((kind) << 11) | (((isroot) ? 1 : 0) << 10) | ((vlen) & CTF_MAX_VLEN)) + +#define CTF_TYPE_NAME(stid, offset) \ + (((stid) << 31) | ((offset) & 0x7fffffff)) + +#define CTF_TYPE_ISPARENT(id) ((id) < 0x8000) +#define CTF_TYPE_ISCHILD(id) ((id) > 0x7fff) + +#define CTF_TYPE_TO_INDEX(id) ((id) & 0x7fff) +#define CTF_INDEX_TO_TYPE(id, child) ((child) ? ((id) | 0x8000) : (id)) +#define CTF_PARENT_SHIFT 15 + +#define CTF_STRTAB_0 0 /* symbolic define for string table id 0 */ +#define CTF_STRTAB_1 1 /* symbolic define for string table id 1 */ + +#define CTF_TYPE_LSIZE(cttp) \ + (((uint64_t)(cttp)->ctt_lsizehi) << 32 | (cttp)->ctt_lsizelo) +#define CTF_SIZE_TO_LSIZE_HI(size) ((uint32_t)((uint64_t)(size) >> 32)) +#define CTF_SIZE_TO_LSIZE_LO(size) ((uint32_t)(size)) + +#ifdef CTF_OLD_VERSIONS + +#define CTF_INFO_KIND_V1(info) (((info) & 0xf000) >> 12) +#define CTF_INFO_ISROOT_V1(info) (((info) & 0x0800) >> 11) +#define CTF_INFO_VLEN_V1(info) (((info) & 0x07ff)) + +#define CTF_TYPE_INFO_V1(kind, isroot, vlen) \ + (((kind) << 12) | (((isroot) ? 1 : 0) << 11) | ((vlen) & 0x07ff)) + +#endif /* CTF_OLD_VERSIONS */ + +/* + * Values for CTF_TYPE_KIND(). If the kind has an associated data list, + * CTF_INFO_VLEN() will extract the number of elements in the list, and + * the type of each element is shown in the comments below. + */ +#define CTF_K_UNKNOWN 0 /* unknown type (used for padding) */ +#define CTF_K_INTEGER 1 /* variant data is CTF_INT_DATA() (see below) */ +#define CTF_K_FLOAT 2 /* variant data is CTF_FP_DATA() (see below) */ +#define CTF_K_POINTER 3 /* ctt_type is referenced type */ +#define CTF_K_ARRAY 4 /* variant data is single ctf_array_t */ +#define CTF_K_FUNCTION 5 /* ctt_type is return type, variant data is */ + /* list of argument types (ushort_t's) */ +#define CTF_K_STRUCT 6 /* variant data is list of ctf_member_t's */ +#define CTF_K_UNION 7 /* variant data is list of ctf_member_t's */ +#define CTF_K_ENUM 8 /* variant data is list of ctf_enum_t's */ +#define CTF_K_FORWARD 9 /* no additional data; ctt_name is tag */ +#define CTF_K_TYPEDEF 10 /* ctt_type is referenced type */ +#define CTF_K_VOLATILE 11 /* ctt_type is base type */ +#define CTF_K_CONST 12 /* ctt_type is base type */ +#define CTF_K_RESTRICT 13 /* ctt_type is base type */ + +#define CTF_K_MAX 31 /* Maximum possible CTF_K_* value */ + +/* + * Values for ctt_type when kind is CTF_K_INTEGER. The flags, offset in bits, + * and size in bits are encoded as a single word using the following macros. + */ +#define CTF_INT_ENCODING(data) (((data) & 0xff000000) >> 24) +#define CTF_INT_OFFSET(data) (((data) & 0x00ff0000) >> 16) +#define CTF_INT_BITS(data) (((data) & 0x0000ffff)) + +#define CTF_INT_DATA(encoding, offset, bits) \ + (((encoding) << 24) | ((offset) << 16) | (bits)) + +#define CTF_INT_SIGNED 0x01 /* integer is signed (otherwise unsigned) */ +#define CTF_INT_CHAR 0x02 /* character display format */ +#define CTF_INT_BOOL 0x04 /* boolean display format */ +#define CTF_INT_VARARGS 0x08 /* varargs display format */ + +/* + * Values for ctt_type when kind is CTF_K_FLOAT. The encoding, offset in bits, + * and size in bits are encoded as a single word using the following macros. + */ +#define CTF_FP_ENCODING(data) (((data) & 0xff000000) >> 24) +#define CTF_FP_OFFSET(data) (((data) & 0x00ff0000) >> 16) +#define CTF_FP_BITS(data) (((data) & 0x0000ffff)) + +#define CTF_FP_DATA(encoding, offset, bits) \ + (((encoding) << 24) | ((offset) << 16) | (bits)) + +#define CTF_FP_SINGLE 1 /* IEEE 32-bit float encoding */ +#define CTF_FP_DOUBLE 2 /* IEEE 64-bit float encoding */ +#define CTF_FP_CPLX 3 /* Complex encoding */ +#define CTF_FP_DCPLX 4 /* Double complex encoding */ +#define CTF_FP_LDCPLX 5 /* Long double complex encoding */ +#define CTF_FP_LDOUBLE 6 /* Long double encoding */ +#define CTF_FP_INTRVL 7 /* Interval (2x32-bit) encoding */ +#define CTF_FP_DINTRVL 8 /* Double interval (2x64-bit) encoding */ +#define CTF_FP_LDINTRVL 9 /* Long double interval (2x128-bit) encoding */ +#define CTF_FP_IMAGRY 10 /* Imaginary (32-bit) encoding */ +#define CTF_FP_DIMAGRY 11 /* Long imaginary (64-bit) encoding */ +#define CTF_FP_LDIMAGRY 12 /* Long double imaginary (128-bit) encoding */ + +#define CTF_FP_MAX 12 /* Maximum possible CTF_FP_* value */ + +typedef struct ctf_array { + ushort_t cta_contents; /* reference to type of array contents */ + ushort_t cta_index; /* reference to type of array index */ + uint_t cta_nelems; /* number of elements */ +} ctf_array_t; + +/* + * Most structure members have bit offsets that can be expressed using a + * short. Some don't. ctf_member_t is used for structs which cannot + * contain any of these large offsets, whereas ctf_lmember_t is used in the + * latter case. If ctt_size for a given struct is >= 8192 bytes, all members + * will be stored as type ctf_lmember_t. + */ + +#define CTF_LSTRUCT_THRESH 8192 + +typedef struct ctf_member { + uint_t ctm_name; /* reference to name in string table */ + ushort_t ctm_type; /* reference to type of member */ + ushort_t ctm_offset; /* offset of this member in bits */ +} ctf_member_t; + +typedef struct ctf_lmember { + uint_t ctlm_name; /* reference to name in string table */ + ushort_t ctlm_type; /* reference to type of member */ + ushort_t ctlm_pad; /* padding */ + uint_t ctlm_offsethi; /* high 32 bits of member offset in bits */ + uint_t ctlm_offsetlo; /* low 32 bits of member offset in bits */ +} ctf_lmember_t; + +#define CTF_LMEM_OFFSET(ctlmp) \ + (((uint64_t)(ctlmp)->ctlm_offsethi) << 32 | (ctlmp)->ctlm_offsetlo) +#define CTF_OFFSET_TO_LMEMHI(offset) ((uint32_t)((uint64_t)(offset) >> 32)) +#define CTF_OFFSET_TO_LMEMLO(offset) ((uint32_t)(offset)) + +typedef struct ctf_enum { + uint_t cte_name; /* reference to name in string table */ + int cte_value; /* value associated with this name */ +} ctf_enum_t; + +#ifdef __cplusplus +} +#endif + +#endif /* _CTF_H */ diff --git a/cddl/contrib/opensolaris/uts/common/sys/ctf_api.h b/cddl/contrib/opensolaris/uts/common/sys/ctf_api.h new file mode 100644 index 0000000..17b0b72 --- /dev/null +++ b/cddl/contrib/opensolaris/uts/common/sys/ctf_api.h @@ -0,0 +1,241 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * This header file defines the interfaces available from the CTF debugger + * library, libctf, and an equivalent kernel module. This API can be used by + * a debugger to operate on data in the Compact ANSI-C Type Format (CTF). + * This is NOT a public interface, although it may eventually become one in + * the fullness of time after we gain more experience with the interfaces. + * + * In the meantime, be aware that any program linked with this API in this + * release of Solaris is almost guaranteed to break in the next release. + * + * In short, do not user this header file or the CTF routines for any purpose. + */ + +#ifndef _CTF_API_H +#define _CTF_API_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/types.h> +#include <sys/param.h> +#include <sys/elf.h> +#include <sys/ctf.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Clients can open one or more CTF containers and obtain a pointer to an + * opaque ctf_file_t. Types are identified by an opaque ctf_id_t token. + * These opaque definitions allow libctf to evolve without breaking clients. + */ +typedef struct ctf_file ctf_file_t; +typedef long ctf_id_t; + +/* + * If the debugger needs to provide the CTF library with a set of raw buffers + * for use as the CTF data, symbol table, and string table, it can do so by + * filling in ctf_sect_t structures and passing them to ctf_bufopen(): + */ +typedef struct ctf_sect { + const char *cts_name; /* section name (if any) */ + ulong_t cts_type; /* section type (ELF SHT_... value) */ + ulong_t cts_flags; /* section flags (ELF SHF_... value) */ + const void *cts_data; /* pointer to section data */ + size_t cts_size; /* size of data in bytes */ + size_t cts_entsize; /* size of each section entry (symtab only) */ + off64_t cts_offset; /* file offset of this section (if any) */ +} ctf_sect_t; + +/* + * Encoding information for integers, floating-point values, and certain other + * intrinsics can be obtained by calling ctf_type_encoding(), below. The flags + * field will contain values appropriate for the type defined in <sys/ctf.h>. + */ +typedef struct ctf_encoding { + uint_t cte_format; /* data format (CTF_INT_* or CTF_FP_* flags) */ + uint_t cte_offset; /* offset of value in bits */ + uint_t cte_bits; /* size of storage in bits */ +} ctf_encoding_t; + +typedef struct ctf_membinfo { + ctf_id_t ctm_type; /* type of struct or union member */ + ulong_t ctm_offset; /* offset of member in bits */ +} ctf_membinfo_t; + +typedef struct ctf_arinfo { + ctf_id_t ctr_contents; /* type of array contents */ + ctf_id_t ctr_index; /* type of array index */ + uint_t ctr_nelems; /* number of elements */ +} ctf_arinfo_t; + +typedef struct ctf_funcinfo { + ctf_id_t ctc_return; /* function return type */ + uint_t ctc_argc; /* number of typed arguments to function */ + uint_t ctc_flags; /* function attributes (see below) */ +} ctf_funcinfo_t; + +typedef struct ctf_lblinfo { + ctf_id_t ctb_typeidx; /* last type associated with the label */ +} ctf_lblinfo_t; + +#define CTF_FUNC_VARARG 0x1 /* function arguments end with varargs */ + +/* + * Functions that return integer status or a ctf_id_t use the following value + * to indicate failure. ctf_errno() can be used to obtain an error code. + */ +#define CTF_ERR (-1L) + +/* + * The CTF data model is inferred to be the caller's data model or the data + * model of the given object, unless ctf_setmodel() is explicitly called. + */ +#define CTF_MODEL_ILP32 1 /* object data model is ILP32 */ +#define CTF_MODEL_LP64 2 /* object data model is LP64 */ +#ifdef _LP64 +#define CTF_MODEL_NATIVE CTF_MODEL_LP64 +#else +#define CTF_MODEL_NATIVE CTF_MODEL_ILP32 +#endif + +/* + * Dynamic CTF containers can be created using ctf_create(). The ctf_add_* + * routines can be used to add new definitions to the dynamic container. + * New types are labeled as root or non-root to determine whether they are + * visible at the top-level program scope when subsequently doing a lookup. + */ +#define CTF_ADD_NONROOT 0 /* type only visible in nested scope */ +#define CTF_ADD_ROOT 1 /* type visible at top-level scope */ + +/* + * These typedefs are used to define the signature for callback functions + * that can be used with the iteration and visit functions below: + */ +typedef int ctf_visit_f(const char *, ctf_id_t, ulong_t, int, void *); +typedef int ctf_member_f(const char *, ctf_id_t, ulong_t, void *); +typedef int ctf_enum_f(const char *, int, void *); +typedef int ctf_type_f(ctf_id_t, void *); +typedef int ctf_label_f(const char *, const ctf_lblinfo_t *, void *); + +extern ctf_file_t *ctf_bufopen(const ctf_sect_t *, const ctf_sect_t *, + const ctf_sect_t *, int *); +extern ctf_file_t *ctf_fdopen(int, int *); +extern ctf_file_t *ctf_open(const char *, int *); +extern ctf_file_t *ctf_create(int *); +extern void ctf_close(ctf_file_t *); + +extern ctf_file_t *ctf_parent_file(ctf_file_t *); +extern const char *ctf_parent_name(ctf_file_t *); + +extern int ctf_import(ctf_file_t *, ctf_file_t *); +extern int ctf_setmodel(ctf_file_t *, int); +extern int ctf_getmodel(ctf_file_t *); + +extern void ctf_setspecific(ctf_file_t *, void *); +extern void *ctf_getspecific(ctf_file_t *); + +extern int ctf_errno(ctf_file_t *); +extern const char *ctf_errmsg(int); +extern int ctf_version(int); + +extern int ctf_func_info(ctf_file_t *, ulong_t, ctf_funcinfo_t *); +extern int ctf_func_args(ctf_file_t *, ulong_t, uint_t, ctf_id_t *); + +extern ctf_id_t ctf_lookup_by_name(ctf_file_t *, const char *); +extern ctf_id_t ctf_lookup_by_symbol(ctf_file_t *, ulong_t); + +extern ctf_id_t ctf_type_resolve(ctf_file_t *, ctf_id_t); +extern ssize_t ctf_type_lname(ctf_file_t *, ctf_id_t, char *, size_t); +extern char *ctf_type_name(ctf_file_t *, ctf_id_t, char *, size_t); +extern ssize_t ctf_type_size(ctf_file_t *, ctf_id_t); +extern ssize_t ctf_type_align(ctf_file_t *, ctf_id_t); +extern int ctf_type_kind(ctf_file_t *, ctf_id_t); +extern ctf_id_t ctf_type_reference(ctf_file_t *, ctf_id_t); +extern ctf_id_t ctf_type_pointer(ctf_file_t *, ctf_id_t); +extern int ctf_type_encoding(ctf_file_t *, ctf_id_t, ctf_encoding_t *); +extern int ctf_type_visit(ctf_file_t *, ctf_id_t, ctf_visit_f *, void *); +extern int ctf_type_cmp(ctf_file_t *, ctf_id_t, ctf_file_t *, ctf_id_t); +extern int ctf_type_compat(ctf_file_t *, ctf_id_t, ctf_file_t *, ctf_id_t); + +extern int ctf_member_info(ctf_file_t *, ctf_id_t, const char *, + ctf_membinfo_t *); +extern int ctf_array_info(ctf_file_t *, ctf_id_t, ctf_arinfo_t *); + +extern const char *ctf_enum_name(ctf_file_t *, ctf_id_t, int); +extern int ctf_enum_value(ctf_file_t *, ctf_id_t, const char *, int *); + +extern const char *ctf_label_topmost(ctf_file_t *); +extern int ctf_label_info(ctf_file_t *, const char *, ctf_lblinfo_t *); + +extern int ctf_member_iter(ctf_file_t *, ctf_id_t, ctf_member_f *, void *); +extern int ctf_enum_iter(ctf_file_t *, ctf_id_t, ctf_enum_f *, void *); +extern int ctf_type_iter(ctf_file_t *, ctf_type_f *, void *); +extern int ctf_label_iter(ctf_file_t *, ctf_label_f *, void *); + +extern ctf_id_t ctf_add_array(ctf_file_t *, uint_t, const ctf_arinfo_t *); +extern ctf_id_t ctf_add_const(ctf_file_t *, uint_t, ctf_id_t); +extern ctf_id_t ctf_add_enum(ctf_file_t *, uint_t, const char *); +extern ctf_id_t ctf_add_float(ctf_file_t *, uint_t, + const char *, const ctf_encoding_t *); +extern ctf_id_t ctf_add_forward(ctf_file_t *, uint_t, const char *, uint_t); +extern ctf_id_t ctf_add_function(ctf_file_t *, uint_t, + const ctf_funcinfo_t *, const ctf_id_t *); +extern ctf_id_t ctf_add_integer(ctf_file_t *, uint_t, + const char *, const ctf_encoding_t *); +extern ctf_id_t ctf_add_pointer(ctf_file_t *, uint_t, ctf_id_t); +extern ctf_id_t ctf_add_type(ctf_file_t *, ctf_file_t *, ctf_id_t); +extern ctf_id_t ctf_add_typedef(ctf_file_t *, uint_t, const char *, ctf_id_t); +extern ctf_id_t ctf_add_restrict(ctf_file_t *, uint_t, ctf_id_t); +extern ctf_id_t ctf_add_struct(ctf_file_t *, uint_t, const char *); +extern ctf_id_t ctf_add_union(ctf_file_t *, uint_t, const char *); +extern ctf_id_t ctf_add_volatile(ctf_file_t *, uint_t, ctf_id_t); + +extern int ctf_add_enumerator(ctf_file_t *, ctf_id_t, const char *, int); +extern int ctf_add_member(ctf_file_t *, ctf_id_t, const char *, ctf_id_t); + +extern int ctf_set_array(ctf_file_t *, ctf_id_t, const ctf_arinfo_t *); + +extern int ctf_update(ctf_file_t *); +extern int ctf_discard(ctf_file_t *); +extern int ctf_write(ctf_file_t *, int); + +#ifdef _KERNEL + +struct module; +extern ctf_file_t *ctf_modopen(struct module *, int *); + +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* _CTF_API_H */ diff --git a/cddl/contrib/opensolaris/uts/common/sys/dtrace.h b/cddl/contrib/opensolaris/uts/common/sys/dtrace.h new file mode 100644 index 0000000..b6e52ec --- /dev/null +++ b/cddl/contrib/opensolaris/uts/common/sys/dtrace.h @@ -0,0 +1,2242 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_DTRACE_H +#define _SYS_DTRACE_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * DTrace Dynamic Tracing Software: Kernel Interfaces + * + * Note: The contents of this file are private to the implementation of the + * Solaris system and DTrace subsystem and are subject to change at any time + * without notice. Applications and drivers using these interfaces will fail + * to run on future releases. These interfaces should not be used for any + * purpose except those expressly outlined in dtrace(7D) and libdtrace(3LIB). + * Please refer to the "Solaris Dynamic Tracing Guide" for more information. + */ + +#ifndef _ASM + +#include <sys/types.h> +#include <sys/modctl.h> +#include <sys/processor.h> +#include <sys/systm.h> +#include <sys/ctf_api.h> +#include <sys/cyclic.h> +#include <sys/int_limits.h> + +/* + * DTrace Universal Constants and Typedefs + */ +#define DTRACE_CPUALL -1 /* all CPUs */ +#define DTRACE_IDNONE 0 /* invalid probe identifier */ +#define DTRACE_EPIDNONE 0 /* invalid enabled probe identifier */ +#define DTRACE_AGGIDNONE 0 /* invalid aggregation identifier */ +#define DTRACE_AGGVARIDNONE 0 /* invalid aggregation variable ID */ +#define DTRACE_CACHEIDNONE 0 /* invalid predicate cache */ +#define DTRACE_PROVNONE 0 /* invalid provider identifier */ +#define DTRACE_METAPROVNONE 0 /* invalid meta-provider identifier */ +#define DTRACE_ARGNONE -1 /* invalid argument index */ + +#define DTRACE_PROVNAMELEN 64 +#define DTRACE_MODNAMELEN 64 +#define DTRACE_FUNCNAMELEN 128 +#define DTRACE_NAMELEN 64 +#define DTRACE_FULLNAMELEN (DTRACE_PROVNAMELEN + DTRACE_MODNAMELEN + \ + DTRACE_FUNCNAMELEN + DTRACE_NAMELEN + 4) +#define DTRACE_ARGTYPELEN 128 + +typedef uint32_t dtrace_id_t; /* probe identifier */ +typedef uint32_t dtrace_epid_t; /* enabled probe identifier */ +typedef uint32_t dtrace_aggid_t; /* aggregation identifier */ +typedef int64_t dtrace_aggvarid_t; /* aggregation variable identifier */ +typedef uint16_t dtrace_actkind_t; /* action kind */ +typedef int64_t dtrace_optval_t; /* option value */ +typedef uint32_t dtrace_cacheid_t; /* predicate cache identifier */ + +typedef enum dtrace_probespec { + DTRACE_PROBESPEC_NONE = -1, + DTRACE_PROBESPEC_PROVIDER = 0, + DTRACE_PROBESPEC_MOD, + DTRACE_PROBESPEC_FUNC, + DTRACE_PROBESPEC_NAME +} dtrace_probespec_t; + +/* + * DTrace Intermediate Format (DIF) + * + * The following definitions describe the DTrace Intermediate Format (DIF), a + * a RISC-like instruction set and program encoding used to represent + * predicates and actions that can be bound to DTrace probes. The constants + * below defining the number of available registers are suggested minimums; the + * compiler should use DTRACEIOC_CONF to dynamically obtain the number of + * registers provided by the current DTrace implementation. + */ +#define DIF_VERSION_1 1 /* DIF version 1: Solaris 10 Beta */ +#define DIF_VERSION_2 2 /* DIF version 2: Solaris 10 FCS */ +#define DIF_VERSION DIF_VERSION_2 /* latest DIF instruction set version */ +#define DIF_DIR_NREGS 8 /* number of DIF integer registers */ +#define DIF_DTR_NREGS 8 /* number of DIF tuple registers */ + +#define DIF_OP_OR 1 /* or r1, r2, rd */ +#define DIF_OP_XOR 2 /* xor r1, r2, rd */ +#define DIF_OP_AND 3 /* and r1, r2, rd */ +#define DIF_OP_SLL 4 /* sll r1, r2, rd */ +#define DIF_OP_SRL 5 /* srl r1, r2, rd */ +#define DIF_OP_SUB 6 /* sub r1, r2, rd */ +#define DIF_OP_ADD 7 /* add r1, r2, rd */ +#define DIF_OP_MUL 8 /* mul r1, r2, rd */ +#define DIF_OP_SDIV 9 /* sdiv r1, r2, rd */ +#define DIF_OP_UDIV 10 /* udiv r1, r2, rd */ +#define DIF_OP_SREM 11 /* srem r1, r2, rd */ +#define DIF_OP_UREM 12 /* urem r1, r2, rd */ +#define DIF_OP_NOT 13 /* not r1, rd */ +#define DIF_OP_MOV 14 /* mov r1, rd */ +#define DIF_OP_CMP 15 /* cmp r1, r2 */ +#define DIF_OP_TST 16 /* tst r1 */ +#define DIF_OP_BA 17 /* ba label */ +#define DIF_OP_BE 18 /* be label */ +#define DIF_OP_BNE 19 /* bne label */ +#define DIF_OP_BG 20 /* bg label */ +#define DIF_OP_BGU 21 /* bgu label */ +#define DIF_OP_BGE 22 /* bge label */ +#define DIF_OP_BGEU 23 /* bgeu label */ +#define DIF_OP_BL 24 /* bl label */ +#define DIF_OP_BLU 25 /* blu label */ +#define DIF_OP_BLE 26 /* ble label */ +#define DIF_OP_BLEU 27 /* bleu label */ +#define DIF_OP_LDSB 28 /* ldsb [r1], rd */ +#define DIF_OP_LDSH 29 /* ldsh [r1], rd */ +#define DIF_OP_LDSW 30 /* ldsw [r1], rd */ +#define DIF_OP_LDUB 31 /* ldub [r1], rd */ +#define DIF_OP_LDUH 32 /* lduh [r1], rd */ +#define DIF_OP_LDUW 33 /* lduw [r1], rd */ +#define DIF_OP_LDX 34 /* ldx [r1], rd */ +#define DIF_OP_RET 35 /* ret rd */ +#define DIF_OP_NOP 36 /* nop */ +#define DIF_OP_SETX 37 /* setx intindex, rd */ +#define DIF_OP_SETS 38 /* sets strindex, rd */ +#define DIF_OP_SCMP 39 /* scmp r1, r2 */ +#define DIF_OP_LDGA 40 /* ldga var, ri, rd */ +#define DIF_OP_LDGS 41 /* ldgs var, rd */ +#define DIF_OP_STGS 42 /* stgs var, rs */ +#define DIF_OP_LDTA 43 /* ldta var, ri, rd */ +#define DIF_OP_LDTS 44 /* ldts var, rd */ +#define DIF_OP_STTS 45 /* stts var, rs */ +#define DIF_OP_SRA 46 /* sra r1, r2, rd */ +#define DIF_OP_CALL 47 /* call subr, rd */ +#define DIF_OP_PUSHTR 48 /* pushtr type, rs, rr */ +#define DIF_OP_PUSHTV 49 /* pushtv type, rs, rv */ +#define DIF_OP_POPTS 50 /* popts */ +#define DIF_OP_FLUSHTS 51 /* flushts */ +#define DIF_OP_LDGAA 52 /* ldgaa var, rd */ +#define DIF_OP_LDTAA 53 /* ldtaa var, rd */ +#define DIF_OP_STGAA 54 /* stgaa var, rs */ +#define DIF_OP_STTAA 55 /* sttaa var, rs */ +#define DIF_OP_LDLS 56 /* ldls var, rd */ +#define DIF_OP_STLS 57 /* stls var, rs */ +#define DIF_OP_ALLOCS 58 /* allocs r1, rd */ +#define DIF_OP_COPYS 59 /* copys r1, r2, rd */ +#define DIF_OP_STB 60 /* stb r1, [rd] */ +#define DIF_OP_STH 61 /* sth r1, [rd] */ +#define DIF_OP_STW 62 /* stw r1, [rd] */ +#define DIF_OP_STX 63 /* stx r1, [rd] */ +#define DIF_OP_ULDSB 64 /* uldsb [r1], rd */ +#define DIF_OP_ULDSH 65 /* uldsh [r1], rd */ +#define DIF_OP_ULDSW 66 /* uldsw [r1], rd */ +#define DIF_OP_ULDUB 67 /* uldub [r1], rd */ +#define DIF_OP_ULDUH 68 /* ulduh [r1], rd */ +#define DIF_OP_ULDUW 69 /* ulduw [r1], rd */ +#define DIF_OP_ULDX 70 /* uldx [r1], rd */ +#define DIF_OP_RLDSB 71 /* rldsb [r1], rd */ +#define DIF_OP_RLDSH 72 /* rldsh [r1], rd */ +#define DIF_OP_RLDSW 73 /* rldsw [r1], rd */ +#define DIF_OP_RLDUB 74 /* rldub [r1], rd */ +#define DIF_OP_RLDUH 75 /* rlduh [r1], rd */ +#define DIF_OP_RLDUW 76 /* rlduw [r1], rd */ +#define DIF_OP_RLDX 77 /* rldx [r1], rd */ +#define DIF_OP_XLATE 78 /* xlate xlrindex, rd */ +#define DIF_OP_XLARG 79 /* xlarg xlrindex, rd */ + +#define DIF_INTOFF_MAX 0xffff /* highest integer table offset */ +#define DIF_STROFF_MAX 0xffff /* highest string table offset */ +#define DIF_REGISTER_MAX 0xff /* highest register number */ +#define DIF_VARIABLE_MAX 0xffff /* highest variable identifier */ +#define DIF_SUBROUTINE_MAX 0xffff /* highest subroutine code */ + +#define DIF_VAR_ARRAY_MIN 0x0000 /* lowest numbered array variable */ +#define DIF_VAR_ARRAY_UBASE 0x0080 /* lowest user-defined array */ +#define DIF_VAR_ARRAY_MAX 0x00ff /* highest numbered array variable */ + +#define DIF_VAR_OTHER_MIN 0x0100 /* lowest numbered scalar or assc */ +#define DIF_VAR_OTHER_UBASE 0x0500 /* lowest user-defined scalar or assc */ +#define DIF_VAR_OTHER_MAX 0xffff /* highest numbered scalar or assc */ + +#define DIF_VAR_ARGS 0x0000 /* arguments array */ +#define DIF_VAR_REGS 0x0001 /* registers array */ +#define DIF_VAR_UREGS 0x0002 /* user registers array */ +#define DIF_VAR_CURTHREAD 0x0100 /* thread pointer */ +#define DIF_VAR_TIMESTAMP 0x0101 /* timestamp */ +#define DIF_VAR_VTIMESTAMP 0x0102 /* virtual timestamp */ +#define DIF_VAR_IPL 0x0103 /* interrupt priority level */ +#define DIF_VAR_EPID 0x0104 /* enabled probe ID */ +#define DIF_VAR_ID 0x0105 /* probe ID */ +#define DIF_VAR_ARG0 0x0106 /* first argument */ +#define DIF_VAR_ARG1 0x0107 /* second argument */ +#define DIF_VAR_ARG2 0x0108 /* third argument */ +#define DIF_VAR_ARG3 0x0109 /* fourth argument */ +#define DIF_VAR_ARG4 0x010a /* fifth argument */ +#define DIF_VAR_ARG5 0x010b /* sixth argument */ +#define DIF_VAR_ARG6 0x010c /* seventh argument */ +#define DIF_VAR_ARG7 0x010d /* eighth argument */ +#define DIF_VAR_ARG8 0x010e /* ninth argument */ +#define DIF_VAR_ARG9 0x010f /* tenth argument */ +#define DIF_VAR_STACKDEPTH 0x0110 /* stack depth */ +#define DIF_VAR_CALLER 0x0111 /* caller */ +#define DIF_VAR_PROBEPROV 0x0112 /* probe provider */ +#define DIF_VAR_PROBEMOD 0x0113 /* probe module */ +#define DIF_VAR_PROBEFUNC 0x0114 /* probe function */ +#define DIF_VAR_PROBENAME 0x0115 /* probe name */ +#define DIF_VAR_PID 0x0116 /* process ID */ +#define DIF_VAR_TID 0x0117 /* (per-process) thread ID */ +#define DIF_VAR_EXECNAME 0x0118 /* name of executable */ +#define DIF_VAR_ZONENAME 0x0119 /* zone name associated with process */ +#define DIF_VAR_WALLTIMESTAMP 0x011a /* wall-clock timestamp */ +#define DIF_VAR_USTACKDEPTH 0x011b /* user-land stack depth */ +#define DIF_VAR_UCALLER 0x011c /* user-level caller */ +#define DIF_VAR_PPID 0x011d /* parent process ID */ +#define DIF_VAR_UID 0x011e /* process user ID */ +#define DIF_VAR_GID 0x011f /* process group ID */ +#define DIF_VAR_ERRNO 0x0120 /* thread errno */ + +#define DIF_SUBR_RAND 0 +#define DIF_SUBR_MUTEX_OWNED 1 +#define DIF_SUBR_MUTEX_OWNER 2 +#define DIF_SUBR_MUTEX_TYPE_ADAPTIVE 3 +#define DIF_SUBR_MUTEX_TYPE_SPIN 4 +#define DIF_SUBR_RW_READ_HELD 5 +#define DIF_SUBR_RW_WRITE_HELD 6 +#define DIF_SUBR_RW_ISWRITER 7 +#define DIF_SUBR_COPYIN 8 +#define DIF_SUBR_COPYINSTR 9 +#define DIF_SUBR_SPECULATION 10 +#define DIF_SUBR_PROGENYOF 11 +#define DIF_SUBR_STRLEN 12 +#define DIF_SUBR_COPYOUT 13 +#define DIF_SUBR_COPYOUTSTR 14 +#define DIF_SUBR_ALLOCA 15 +#define DIF_SUBR_BCOPY 16 +#define DIF_SUBR_COPYINTO 17 +#define DIF_SUBR_MSGDSIZE 18 +#define DIF_SUBR_MSGSIZE 19 +#define DIF_SUBR_GETMAJOR 20 +#define DIF_SUBR_GETMINOR 21 +#define DIF_SUBR_DDI_PATHNAME 22 +#define DIF_SUBR_STRJOIN 23 +#define DIF_SUBR_LLTOSTR 24 +#define DIF_SUBR_BASENAME 25 +#define DIF_SUBR_DIRNAME 26 +#define DIF_SUBR_CLEANPATH 27 +#define DIF_SUBR_STRCHR 28 +#define DIF_SUBR_STRRCHR 29 +#define DIF_SUBR_STRSTR 30 +#define DIF_SUBR_STRTOK 31 +#define DIF_SUBR_SUBSTR 32 +#define DIF_SUBR_INDEX 33 +#define DIF_SUBR_RINDEX 34 +#define DIF_SUBR_HTONS 35 +#define DIF_SUBR_HTONL 36 +#define DIF_SUBR_HTONLL 37 +#define DIF_SUBR_NTOHS 38 +#define DIF_SUBR_NTOHL 39 +#define DIF_SUBR_NTOHLL 40 +#define DIF_SUBR_INET_NTOP 41 +#define DIF_SUBR_INET_NTOA 42 +#define DIF_SUBR_INET_NTOA6 43 + +#define DIF_SUBR_MAX 43 /* max subroutine value */ + +typedef uint32_t dif_instr_t; + +#define DIF_INSTR_OP(i) (((i) >> 24) & 0xff) +#define DIF_INSTR_R1(i) (((i) >> 16) & 0xff) +#define DIF_INSTR_R2(i) (((i) >> 8) & 0xff) +#define DIF_INSTR_RD(i) ((i) & 0xff) +#define DIF_INSTR_RS(i) ((i) & 0xff) +#define DIF_INSTR_LABEL(i) ((i) & 0xffffff) +#define DIF_INSTR_VAR(i) (((i) >> 8) & 0xffff) +#define DIF_INSTR_INTEGER(i) (((i) >> 8) & 0xffff) +#define DIF_INSTR_STRING(i) (((i) >> 8) & 0xffff) +#define DIF_INSTR_SUBR(i) (((i) >> 8) & 0xffff) +#define DIF_INSTR_TYPE(i) (((i) >> 16) & 0xff) +#define DIF_INSTR_XLREF(i) (((i) >> 8) & 0xffff) + +#define DIF_INSTR_FMT(op, r1, r2, d) \ + (((op) << 24) | ((r1) << 16) | ((r2) << 8) | (d)) + +#define DIF_INSTR_NOT(r1, d) (DIF_INSTR_FMT(DIF_OP_NOT, r1, 0, d)) +#define DIF_INSTR_MOV(r1, d) (DIF_INSTR_FMT(DIF_OP_MOV, r1, 0, d)) +#define DIF_INSTR_CMP(op, r1, r2) (DIF_INSTR_FMT(op, r1, r2, 0)) +#define DIF_INSTR_TST(r1) (DIF_INSTR_FMT(DIF_OP_TST, r1, 0, 0)) +#define DIF_INSTR_BRANCH(op, label) (((op) << 24) | (label)) +#define DIF_INSTR_LOAD(op, r1, d) (DIF_INSTR_FMT(op, r1, 0, d)) +#define DIF_INSTR_STORE(op, r1, d) (DIF_INSTR_FMT(op, r1, 0, d)) +#define DIF_INSTR_SETX(i, d) ((DIF_OP_SETX << 24) | ((i) << 8) | (d)) +#define DIF_INSTR_SETS(s, d) ((DIF_OP_SETS << 24) | ((s) << 8) | (d)) +#define DIF_INSTR_RET(d) (DIF_INSTR_FMT(DIF_OP_RET, 0, 0, d)) +#define DIF_INSTR_NOP (DIF_OP_NOP << 24) +#define DIF_INSTR_LDA(op, v, r, d) (DIF_INSTR_FMT(op, v, r, d)) +#define DIF_INSTR_LDV(op, v, d) (((op) << 24) | ((v) << 8) | (d)) +#define DIF_INSTR_STV(op, v, rs) (((op) << 24) | ((v) << 8) | (rs)) +#define DIF_INSTR_CALL(s, d) ((DIF_OP_CALL << 24) | ((s) << 8) | (d)) +#define DIF_INSTR_PUSHTS(op, t, r2, rs) (DIF_INSTR_FMT(op, t, r2, rs)) +#define DIF_INSTR_POPTS (DIF_OP_POPTS << 24) +#define DIF_INSTR_FLUSHTS (DIF_OP_FLUSHTS << 24) +#define DIF_INSTR_ALLOCS(r1, d) (DIF_INSTR_FMT(DIF_OP_ALLOCS, r1, 0, d)) +#define DIF_INSTR_COPYS(r1, r2, d) (DIF_INSTR_FMT(DIF_OP_COPYS, r1, r2, d)) +#define DIF_INSTR_XLATE(op, r, d) (((op) << 24) | ((r) << 8) | (d)) + +#define DIF_REG_R0 0 /* %r0 is always set to zero */ + +/* + * A DTrace Intermediate Format Type (DIF Type) is used to represent the types + * of variables, function and associative array arguments, and the return type + * for each DIF object (shown below). It contains a description of the type, + * its size in bytes, and a module identifier. + */ +typedef struct dtrace_diftype { + uint8_t dtdt_kind; /* type kind (see below) */ + uint8_t dtdt_ckind; /* type kind in CTF */ + uint8_t dtdt_flags; /* type flags (see below) */ + uint8_t dtdt_pad; /* reserved for future use */ + uint32_t dtdt_size; /* type size in bytes (unless string) */ +} dtrace_diftype_t; + +#define DIF_TYPE_CTF 0 /* type is a CTF type */ +#define DIF_TYPE_STRING 1 /* type is a D string */ + +#define DIF_TF_BYREF 0x1 /* type is passed by reference */ + +/* + * A DTrace Intermediate Format variable record is used to describe each of the + * variables referenced by a given DIF object. It contains an integer variable + * identifier along with variable scope and properties, as shown below. The + * size of this structure must be sizeof (int) aligned. + */ +typedef struct dtrace_difv { + uint32_t dtdv_name; /* variable name index in dtdo_strtab */ + uint32_t dtdv_id; /* variable reference identifier */ + uint8_t dtdv_kind; /* variable kind (see below) */ + uint8_t dtdv_scope; /* variable scope (see below) */ + uint16_t dtdv_flags; /* variable flags (see below) */ + dtrace_diftype_t dtdv_type; /* variable type (see above) */ +} dtrace_difv_t; + +#define DIFV_KIND_ARRAY 0 /* variable is an array of quantities */ +#define DIFV_KIND_SCALAR 1 /* variable is a scalar quantity */ + +#define DIFV_SCOPE_GLOBAL 0 /* variable has global scope */ +#define DIFV_SCOPE_THREAD 1 /* variable has thread scope */ +#define DIFV_SCOPE_LOCAL 2 /* variable has local scope */ + +#define DIFV_F_REF 0x1 /* variable is referenced by DIFO */ +#define DIFV_F_MOD 0x2 /* variable is written by DIFO */ + +/* + * DTrace Actions + * + * The upper byte determines the class of the action; the low bytes determines + * the specific action within that class. The classes of actions are as + * follows: + * + * [ no class ] <= May record process- or kernel-related data + * DTRACEACT_PROC <= Only records process-related data + * DTRACEACT_PROC_DESTRUCTIVE <= Potentially destructive to processes + * DTRACEACT_KERNEL <= Only records kernel-related data + * DTRACEACT_KERNEL_DESTRUCTIVE <= Potentially destructive to the kernel + * DTRACEACT_SPECULATIVE <= Speculation-related action + * DTRACEACT_AGGREGATION <= Aggregating action + */ +#define DTRACEACT_NONE 0 /* no action */ +#define DTRACEACT_DIFEXPR 1 /* action is DIF expression */ +#define DTRACEACT_EXIT 2 /* exit() action */ +#define DTRACEACT_PRINTF 3 /* printf() action */ +#define DTRACEACT_PRINTA 4 /* printa() action */ +#define DTRACEACT_LIBACT 5 /* library-controlled action */ + +#define DTRACEACT_PROC 0x0100 +#define DTRACEACT_USTACK (DTRACEACT_PROC + 1) +#define DTRACEACT_JSTACK (DTRACEACT_PROC + 2) +#define DTRACEACT_USYM (DTRACEACT_PROC + 3) +#define DTRACEACT_UMOD (DTRACEACT_PROC + 4) +#define DTRACEACT_UADDR (DTRACEACT_PROC + 5) + +#define DTRACEACT_PROC_DESTRUCTIVE 0x0200 +#define DTRACEACT_STOP (DTRACEACT_PROC_DESTRUCTIVE + 1) +#define DTRACEACT_RAISE (DTRACEACT_PROC_DESTRUCTIVE + 2) +#define DTRACEACT_SYSTEM (DTRACEACT_PROC_DESTRUCTIVE + 3) +#define DTRACEACT_FREOPEN (DTRACEACT_PROC_DESTRUCTIVE + 4) + +#define DTRACEACT_PROC_CONTROL 0x0300 + +#define DTRACEACT_KERNEL 0x0400 +#define DTRACEACT_STACK (DTRACEACT_KERNEL + 1) +#define DTRACEACT_SYM (DTRACEACT_KERNEL + 2) +#define DTRACEACT_MOD (DTRACEACT_KERNEL + 3) + +#define DTRACEACT_KERNEL_DESTRUCTIVE 0x0500 +#define DTRACEACT_BREAKPOINT (DTRACEACT_KERNEL_DESTRUCTIVE + 1) +#define DTRACEACT_PANIC (DTRACEACT_KERNEL_DESTRUCTIVE + 2) +#define DTRACEACT_CHILL (DTRACEACT_KERNEL_DESTRUCTIVE + 3) + +#define DTRACEACT_SPECULATIVE 0x0600 +#define DTRACEACT_SPECULATE (DTRACEACT_SPECULATIVE + 1) +#define DTRACEACT_COMMIT (DTRACEACT_SPECULATIVE + 2) +#define DTRACEACT_DISCARD (DTRACEACT_SPECULATIVE + 3) + +#define DTRACEACT_CLASS(x) ((x) & 0xff00) + +#define DTRACEACT_ISDESTRUCTIVE(x) \ + (DTRACEACT_CLASS(x) == DTRACEACT_PROC_DESTRUCTIVE || \ + DTRACEACT_CLASS(x) == DTRACEACT_KERNEL_DESTRUCTIVE) + +#define DTRACEACT_ISSPECULATIVE(x) \ + (DTRACEACT_CLASS(x) == DTRACEACT_SPECULATIVE) + +#define DTRACEACT_ISPRINTFLIKE(x) \ + ((x) == DTRACEACT_PRINTF || (x) == DTRACEACT_PRINTA || \ + (x) == DTRACEACT_SYSTEM || (x) == DTRACEACT_FREOPEN) + +/* + * DTrace Aggregating Actions + * + * These are functions f(x) for which the following is true: + * + * f(f(x_0) U f(x_1) U ... U f(x_n)) = f(x_0 U x_1 U ... U x_n) + * + * where x_n is a set of arbitrary data. Aggregating actions are in their own + * DTrace action class, DTTRACEACT_AGGREGATION. The macros provided here allow + * for easier processing of the aggregation argument and data payload for a few + * aggregating actions (notably: quantize(), lquantize(), and ustack()). + */ +#define DTRACEACT_AGGREGATION 0x0700 +#define DTRACEAGG_COUNT (DTRACEACT_AGGREGATION + 1) +#define DTRACEAGG_MIN (DTRACEACT_AGGREGATION + 2) +#define DTRACEAGG_MAX (DTRACEACT_AGGREGATION + 3) +#define DTRACEAGG_AVG (DTRACEACT_AGGREGATION + 4) +#define DTRACEAGG_SUM (DTRACEACT_AGGREGATION + 5) +#define DTRACEAGG_STDDEV (DTRACEACT_AGGREGATION + 6) +#define DTRACEAGG_QUANTIZE (DTRACEACT_AGGREGATION + 7) +#define DTRACEAGG_LQUANTIZE (DTRACEACT_AGGREGATION + 8) + +#define DTRACEACT_ISAGG(x) \ + (DTRACEACT_CLASS(x) == DTRACEACT_AGGREGATION) + +#define DTRACE_QUANTIZE_NBUCKETS \ + (((sizeof (uint64_t) * NBBY) - 1) * 2 + 1) + +#define DTRACE_QUANTIZE_ZEROBUCKET ((sizeof (uint64_t) * NBBY) - 1) + +#define DTRACE_QUANTIZE_BUCKETVAL(buck) \ + (int64_t)((buck) < DTRACE_QUANTIZE_ZEROBUCKET ? \ + -(1LL << (DTRACE_QUANTIZE_ZEROBUCKET - 1 - (buck))) : \ + (buck) == DTRACE_QUANTIZE_ZEROBUCKET ? 0 : \ + 1LL << ((buck) - DTRACE_QUANTIZE_ZEROBUCKET - 1)) + +#define DTRACE_LQUANTIZE_STEPSHIFT 48 +#define DTRACE_LQUANTIZE_STEPMASK ((uint64_t)UINT16_MAX << 48) +#define DTRACE_LQUANTIZE_LEVELSHIFT 32 +#define DTRACE_LQUANTIZE_LEVELMASK ((uint64_t)UINT16_MAX << 32) +#define DTRACE_LQUANTIZE_BASESHIFT 0 +#define DTRACE_LQUANTIZE_BASEMASK UINT32_MAX + +#define DTRACE_LQUANTIZE_STEP(x) \ + (uint16_t)(((x) & DTRACE_LQUANTIZE_STEPMASK) >> \ + DTRACE_LQUANTIZE_STEPSHIFT) + +#define DTRACE_LQUANTIZE_LEVELS(x) \ + (uint16_t)(((x) & DTRACE_LQUANTIZE_LEVELMASK) >> \ + DTRACE_LQUANTIZE_LEVELSHIFT) + +#define DTRACE_LQUANTIZE_BASE(x) \ + (int32_t)(((x) & DTRACE_LQUANTIZE_BASEMASK) >> \ + DTRACE_LQUANTIZE_BASESHIFT) + +#define DTRACE_USTACK_NFRAMES(x) (uint32_t)((x) & UINT32_MAX) +#define DTRACE_USTACK_STRSIZE(x) (uint32_t)((x) >> 32) +#define DTRACE_USTACK_ARG(x, y) \ + ((((uint64_t)(y)) << 32) | ((x) & UINT32_MAX)) + +#ifndef _LP64 +#ifndef _LITTLE_ENDIAN +#define DTRACE_PTR(type, name) uint32_t name##pad; type *name +#else +#define DTRACE_PTR(type, name) type *name; uint32_t name##pad +#endif +#else +#define DTRACE_PTR(type, name) type *name +#endif + +/* + * DTrace Object Format (DOF) + * + * DTrace programs can be persistently encoded in the DOF format so that they + * may be embedded in other programs (for example, in an ELF file) or in the + * dtrace driver configuration file for use in anonymous tracing. The DOF + * format is versioned and extensible so that it can be revised and so that + * internal data structures can be modified or extended compatibly. All DOF + * structures use fixed-size types, so the 32-bit and 64-bit representations + * are identical and consumers can use either data model transparently. + * + * The file layout is structured as follows: + * + * +---------------+-------------------+----- ... ----+---- ... ------+ + * | dof_hdr_t | dof_sec_t[ ... ] | loadable | non-loadable | + * | (file header) | (section headers) | section data | section data | + * +---------------+-------------------+----- ... ----+---- ... ------+ + * |<------------ dof_hdr.dofh_loadsz --------------->| | + * |<------------ dof_hdr.dofh_filesz ------------------------------->| + * + * The file header stores meta-data including a magic number, data model for + * the instrumentation, data encoding, and properties of the DIF code within. + * The header describes its own size and the size of the section headers. By + * convention, an array of section headers follows the file header, and then + * the data for all loadable sections and unloadable sections. This permits + * consumer code to easily download the headers and all loadable data into the + * DTrace driver in one contiguous chunk, omitting other extraneous sections. + * + * The section headers describe the size, offset, alignment, and section type + * for each section. Sections are described using a set of #defines that tell + * the consumer what kind of data is expected. Sections can contain links to + * other sections by storing a dof_secidx_t, an index into the section header + * array, inside of the section data structures. The section header includes + * an entry size so that sections with data arrays can grow their structures. + * + * The DOF data itself can contain many snippets of DIF (i.e. >1 DIFOs), which + * are represented themselves as a collection of related DOF sections. This + * permits us to change the set of sections associated with a DIFO over time, + * and also permits us to encode DIFOs that contain different sets of sections. + * When a DOF section wants to refer to a DIFO, it stores the dof_secidx_t of a + * section of type DOF_SECT_DIFOHDR. This section's data is then an array of + * dof_secidx_t's which in turn denote the sections associated with this DIFO. + * + * This loose coupling of the file structure (header and sections) to the + * structure of the DTrace program itself (ECB descriptions, action + * descriptions, and DIFOs) permits activities such as relocation processing + * to occur in a single pass without having to understand D program structure. + * + * Finally, strings are always stored in ELF-style string tables along with a + * string table section index and string table offset. Therefore strings in + * DOF are always arbitrary-length and not bound to the current implementation. + */ + +#define DOF_ID_SIZE 16 /* total size of dofh_ident[] in bytes */ + +typedef struct dof_hdr { + uint8_t dofh_ident[DOF_ID_SIZE]; /* identification bytes (see below) */ + uint32_t dofh_flags; /* file attribute flags (if any) */ + uint32_t dofh_hdrsize; /* size of file header in bytes */ + uint32_t dofh_secsize; /* size of section header in bytes */ + uint32_t dofh_secnum; /* number of section headers */ + uint64_t dofh_secoff; /* file offset of section headers */ + uint64_t dofh_loadsz; /* file size of loadable portion */ + uint64_t dofh_filesz; /* file size of entire DOF file */ + uint64_t dofh_pad; /* reserved for future use */ +} dof_hdr_t; + +#define DOF_ID_MAG0 0 /* first byte of magic number */ +#define DOF_ID_MAG1 1 /* second byte of magic number */ +#define DOF_ID_MAG2 2 /* third byte of magic number */ +#define DOF_ID_MAG3 3 /* fourth byte of magic number */ +#define DOF_ID_MODEL 4 /* DOF data model (see below) */ +#define DOF_ID_ENCODING 5 /* DOF data encoding (see below) */ +#define DOF_ID_VERSION 6 /* DOF file format major version (see below) */ +#define DOF_ID_DIFVERS 7 /* DIF instruction set version */ +#define DOF_ID_DIFIREG 8 /* DIF integer registers used by compiler */ +#define DOF_ID_DIFTREG 9 /* DIF tuple registers used by compiler */ +#define DOF_ID_PAD 10 /* start of padding bytes (all zeroes) */ + +#define DOF_MAG_MAG0 0x7F /* DOF_ID_MAG[0-3] */ +#define DOF_MAG_MAG1 'D' +#define DOF_MAG_MAG2 'O' +#define DOF_MAG_MAG3 'F' + +#define DOF_MAG_STRING "\177DOF" +#define DOF_MAG_STRLEN 4 + +#define DOF_MODEL_NONE 0 /* DOF_ID_MODEL */ +#define DOF_MODEL_ILP32 1 +#define DOF_MODEL_LP64 2 + +#ifdef _LP64 +#define DOF_MODEL_NATIVE DOF_MODEL_LP64 +#else +#define DOF_MODEL_NATIVE DOF_MODEL_ILP32 +#endif + +#define DOF_ENCODE_NONE 0 /* DOF_ID_ENCODING */ +#define DOF_ENCODE_LSB 1 +#define DOF_ENCODE_MSB 2 + +#ifdef _BIG_ENDIAN +#define DOF_ENCODE_NATIVE DOF_ENCODE_MSB +#else +#define DOF_ENCODE_NATIVE DOF_ENCODE_LSB +#endif + +#define DOF_VERSION_1 1 /* DOF version 1: Solaris 10 FCS */ +#define DOF_VERSION_2 2 /* DOF version 2: Solaris Express 6/06 */ +#define DOF_VERSION DOF_VERSION_2 /* Latest DOF version */ + +#define DOF_FL_VALID 0 /* mask of all valid dofh_flags bits */ + +typedef uint32_t dof_secidx_t; /* section header table index type */ +typedef uint32_t dof_stridx_t; /* string table index type */ + +#define DOF_SECIDX_NONE (-1U) /* null value for section indices */ +#define DOF_STRIDX_NONE (-1U) /* null value for string indices */ + +typedef struct dof_sec { + uint32_t dofs_type; /* section type (see below) */ + uint32_t dofs_align; /* section data memory alignment */ + uint32_t dofs_flags; /* section flags (if any) */ + uint32_t dofs_entsize; /* size of section entry (if table) */ + uint64_t dofs_offset; /* offset of section data within file */ + uint64_t dofs_size; /* size of section data in bytes */ +} dof_sec_t; + +#define DOF_SECT_NONE 0 /* null section */ +#define DOF_SECT_COMMENTS 1 /* compiler comments */ +#define DOF_SECT_SOURCE 2 /* D program source code */ +#define DOF_SECT_ECBDESC 3 /* dof_ecbdesc_t */ +#define DOF_SECT_PROBEDESC 4 /* dof_probedesc_t */ +#define DOF_SECT_ACTDESC 5 /* dof_actdesc_t array */ +#define DOF_SECT_DIFOHDR 6 /* dof_difohdr_t (variable length) */ +#define DOF_SECT_DIF 7 /* uint32_t array of byte code */ +#define DOF_SECT_STRTAB 8 /* string table */ +#define DOF_SECT_VARTAB 9 /* dtrace_difv_t array */ +#define DOF_SECT_RELTAB 10 /* dof_relodesc_t array */ +#define DOF_SECT_TYPTAB 11 /* dtrace_diftype_t array */ +#define DOF_SECT_URELHDR 12 /* dof_relohdr_t (user relocations) */ +#define DOF_SECT_KRELHDR 13 /* dof_relohdr_t (kernel relocations) */ +#define DOF_SECT_OPTDESC 14 /* dof_optdesc_t array */ +#define DOF_SECT_PROVIDER 15 /* dof_provider_t */ +#define DOF_SECT_PROBES 16 /* dof_probe_t array */ +#define DOF_SECT_PRARGS 17 /* uint8_t array (probe arg mappings) */ +#define DOF_SECT_PROFFS 18 /* uint32_t array (probe arg offsets) */ +#define DOF_SECT_INTTAB 19 /* uint64_t array */ +#define DOF_SECT_UTSNAME 20 /* struct utsname */ +#define DOF_SECT_XLTAB 21 /* dof_xlref_t array */ +#define DOF_SECT_XLMEMBERS 22 /* dof_xlmember_t array */ +#define DOF_SECT_XLIMPORT 23 /* dof_xlator_t */ +#define DOF_SECT_XLEXPORT 24 /* dof_xlator_t */ +#define DOF_SECT_PREXPORT 25 /* dof_secidx_t array (exported objs) */ +#define DOF_SECT_PRENOFFS 26 /* uint32_t array (enabled offsets) */ + +#define DOF_SECF_LOAD 1 /* section should be loaded */ + +typedef struct dof_ecbdesc { + dof_secidx_t dofe_probes; /* link to DOF_SECT_PROBEDESC */ + dof_secidx_t dofe_pred; /* link to DOF_SECT_DIFOHDR */ + dof_secidx_t dofe_actions; /* link to DOF_SECT_ACTDESC */ + uint32_t dofe_pad; /* reserved for future use */ + uint64_t dofe_uarg; /* user-supplied library argument */ +} dof_ecbdesc_t; + +typedef struct dof_probedesc { + dof_secidx_t dofp_strtab; /* link to DOF_SECT_STRTAB section */ + dof_stridx_t dofp_provider; /* provider string */ + dof_stridx_t dofp_mod; /* module string */ + dof_stridx_t dofp_func; /* function string */ + dof_stridx_t dofp_name; /* name string */ + uint32_t dofp_id; /* probe identifier (or zero) */ +} dof_probedesc_t; + +typedef struct dof_actdesc { + dof_secidx_t dofa_difo; /* link to DOF_SECT_DIFOHDR */ + dof_secidx_t dofa_strtab; /* link to DOF_SECT_STRTAB section */ + uint32_t dofa_kind; /* action kind (DTRACEACT_* constant) */ + uint32_t dofa_ntuple; /* number of subsequent tuple actions */ + uint64_t dofa_arg; /* kind-specific argument */ + uint64_t dofa_uarg; /* user-supplied argument */ +} dof_actdesc_t; + +typedef struct dof_difohdr { + dtrace_diftype_t dofd_rtype; /* return type for this fragment */ + dof_secidx_t dofd_links[1]; /* variable length array of indices */ +} dof_difohdr_t; + +typedef struct dof_relohdr { + dof_secidx_t dofr_strtab; /* link to DOF_SECT_STRTAB for names */ + dof_secidx_t dofr_relsec; /* link to DOF_SECT_RELTAB for relos */ + dof_secidx_t dofr_tgtsec; /* link to section we are relocating */ +} dof_relohdr_t; + +typedef struct dof_relodesc { + dof_stridx_t dofr_name; /* string name of relocation symbol */ + uint32_t dofr_type; /* relo type (DOF_RELO_* constant) */ + uint64_t dofr_offset; /* byte offset for relocation */ + uint64_t dofr_data; /* additional type-specific data */ +} dof_relodesc_t; + +#define DOF_RELO_NONE 0 /* empty relocation entry */ +#define DOF_RELO_SETX 1 /* relocate setx value */ + +typedef struct dof_optdesc { + uint32_t dofo_option; /* option identifier */ + dof_secidx_t dofo_strtab; /* string table, if string option */ + uint64_t dofo_value; /* option value or string index */ +} dof_optdesc_t; + +typedef uint32_t dof_attr_t; /* encoded stability attributes */ + +#define DOF_ATTR(n, d, c) (((n) << 24) | ((d) << 16) | ((c) << 8)) +#define DOF_ATTR_NAME(a) (((a) >> 24) & 0xff) +#define DOF_ATTR_DATA(a) (((a) >> 16) & 0xff) +#define DOF_ATTR_CLASS(a) (((a) >> 8) & 0xff) + +typedef struct dof_provider { + dof_secidx_t dofpv_strtab; /* link to DOF_SECT_STRTAB section */ + dof_secidx_t dofpv_probes; /* link to DOF_SECT_PROBES section */ + dof_secidx_t dofpv_prargs; /* link to DOF_SECT_PRARGS section */ + dof_secidx_t dofpv_proffs; /* link to DOF_SECT_PROFFS section */ + dof_stridx_t dofpv_name; /* provider name string */ + dof_attr_t dofpv_provattr; /* provider attributes */ + dof_attr_t dofpv_modattr; /* module attributes */ + dof_attr_t dofpv_funcattr; /* function attributes */ + dof_attr_t dofpv_nameattr; /* name attributes */ + dof_attr_t dofpv_argsattr; /* args attributes */ + dof_secidx_t dofpv_prenoffs; /* link to DOF_SECT_PRENOFFS section */ +} dof_provider_t; + +typedef struct dof_probe { + uint64_t dofpr_addr; /* probe base address or offset */ + dof_stridx_t dofpr_func; /* probe function string */ + dof_stridx_t dofpr_name; /* probe name string */ + dof_stridx_t dofpr_nargv; /* native argument type strings */ + dof_stridx_t dofpr_xargv; /* translated argument type strings */ + uint32_t dofpr_argidx; /* index of first argument mapping */ + uint32_t dofpr_offidx; /* index of first offset entry */ + uint8_t dofpr_nargc; /* native argument count */ + uint8_t dofpr_xargc; /* translated argument count */ + uint16_t dofpr_noffs; /* number of offset entries for probe */ + uint32_t dofpr_enoffidx; /* index of first is-enabled offset */ + uint16_t dofpr_nenoffs; /* number of is-enabled offsets */ + uint16_t dofpr_pad1; /* reserved for future use */ + uint32_t dofpr_pad2; /* reserved for future use */ +} dof_probe_t; + +typedef struct dof_xlator { + dof_secidx_t dofxl_members; /* link to DOF_SECT_XLMEMBERS section */ + dof_secidx_t dofxl_strtab; /* link to DOF_SECT_STRTAB section */ + dof_stridx_t dofxl_argv; /* input parameter type strings */ + uint32_t dofxl_argc; /* input parameter list length */ + dof_stridx_t dofxl_type; /* output type string name */ + dof_attr_t dofxl_attr; /* output stability attributes */ +} dof_xlator_t; + +typedef struct dof_xlmember { + dof_secidx_t dofxm_difo; /* member link to DOF_SECT_DIFOHDR */ + dof_stridx_t dofxm_name; /* member name */ + dtrace_diftype_t dofxm_type; /* member type */ +} dof_xlmember_t; + +typedef struct dof_xlref { + dof_secidx_t dofxr_xlator; /* link to DOF_SECT_XLATORS section */ + uint32_t dofxr_member; /* index of referenced dof_xlmember */ + uint32_t dofxr_argn; /* index of argument for DIF_OP_XLARG */ +} dof_xlref_t; + +/* + * DTrace Intermediate Format Object (DIFO) + * + * A DIFO is used to store the compiled DIF for a D expression, its return + * type, and its string and variable tables. The string table is a single + * buffer of character data into which sets instructions and variable + * references can reference strings using a byte offset. The variable table + * is an array of dtrace_difv_t structures that describe the name and type of + * each variable and the id used in the DIF code. This structure is described + * above in the DIF section of this header file. The DIFO is used at both + * user-level (in the library) and in the kernel, but the structure is never + * passed between the two: the DOF structures form the only interface. As a + * result, the definition can change depending on the presence of _KERNEL. + */ +typedef struct dtrace_difo { + dif_instr_t *dtdo_buf; /* instruction buffer */ + uint64_t *dtdo_inttab; /* integer table (optional) */ + char *dtdo_strtab; /* string table (optional) */ + dtrace_difv_t *dtdo_vartab; /* variable table (optional) */ + uint_t dtdo_len; /* length of instruction buffer */ + uint_t dtdo_intlen; /* length of integer table */ + uint_t dtdo_strlen; /* length of string table */ + uint_t dtdo_varlen; /* length of variable table */ + dtrace_diftype_t dtdo_rtype; /* return type */ + uint_t dtdo_refcnt; /* owner reference count */ + uint_t dtdo_destructive; /* invokes destructive subroutines */ +#ifndef _KERNEL + dof_relodesc_t *dtdo_kreltab; /* kernel relocations */ + dof_relodesc_t *dtdo_ureltab; /* user relocations */ + struct dt_node **dtdo_xlmtab; /* translator references */ + uint_t dtdo_krelen; /* length of krelo table */ + uint_t dtdo_urelen; /* length of urelo table */ + uint_t dtdo_xlmlen; /* length of translator table */ +#endif +} dtrace_difo_t; + +/* + * DTrace Enabling Description Structures + * + * When DTrace is tracking the description of a DTrace enabling entity (probe, + * predicate, action, ECB, record, etc.), it does so in a description + * structure. These structures all end in "desc", and are used at both + * user-level and in the kernel -- but (with the exception of + * dtrace_probedesc_t) they are never passed between them. Typically, + * user-level will use the description structures when assembling an enabling. + * It will then distill those description structures into a DOF object (see + * above), and send it into the kernel. The kernel will again use the + * description structures to create a description of the enabling as it reads + * the DOF. When the description is complete, the enabling will be actually + * created -- turning it into the structures that represent the enabling + * instead of merely describing it. Not surprisingly, the description + * structures bear a strong resemblance to the DOF structures that act as their + * conduit. + */ +struct dtrace_predicate; + +typedef struct dtrace_probedesc { + dtrace_id_t dtpd_id; /* probe identifier */ + char dtpd_provider[DTRACE_PROVNAMELEN]; /* probe provider name */ + char dtpd_mod[DTRACE_MODNAMELEN]; /* probe module name */ + char dtpd_func[DTRACE_FUNCNAMELEN]; /* probe function name */ + char dtpd_name[DTRACE_NAMELEN]; /* probe name */ +} dtrace_probedesc_t; + +typedef struct dtrace_repldesc { + dtrace_probedesc_t dtrpd_match; /* probe descr. to match */ + dtrace_probedesc_t dtrpd_create; /* probe descr. to create */ +} dtrace_repldesc_t; + +typedef struct dtrace_preddesc { + dtrace_difo_t *dtpdd_difo; /* pointer to DIF object */ + struct dtrace_predicate *dtpdd_predicate; /* pointer to predicate */ +} dtrace_preddesc_t; + +typedef struct dtrace_actdesc { + dtrace_difo_t *dtad_difo; /* pointer to DIF object */ + struct dtrace_actdesc *dtad_next; /* next action */ + dtrace_actkind_t dtad_kind; /* kind of action */ + uint32_t dtad_ntuple; /* number in tuple */ + uint64_t dtad_arg; /* action argument */ + uint64_t dtad_uarg; /* user argument */ + int dtad_refcnt; /* reference count */ +} dtrace_actdesc_t; + +typedef struct dtrace_ecbdesc { + dtrace_actdesc_t *dted_action; /* action description(s) */ + dtrace_preddesc_t dted_pred; /* predicate description */ + dtrace_probedesc_t dted_probe; /* probe description */ + uint64_t dted_uarg; /* library argument */ + int dted_refcnt; /* reference count */ +} dtrace_ecbdesc_t; + +/* + * DTrace Metadata Description Structures + * + * DTrace separates the trace data stream from the metadata stream. The only + * metadata tokens placed in the data stream are enabled probe identifiers + * (EPIDs) or (in the case of aggregations) aggregation identifiers. In order + * to determine the structure of the data, DTrace consumers pass the token to + * the kernel, and receive in return a corresponding description of the enabled + * probe (via the dtrace_eprobedesc structure) or the aggregation (via the + * dtrace_aggdesc structure). Both of these structures are expressed in terms + * of record descriptions (via the dtrace_recdesc structure) that describe the + * exact structure of the data. Some record descriptions may also contain a + * format identifier; this additional bit of metadata can be retrieved from the + * kernel, for which a format description is returned via the dtrace_fmtdesc + * structure. Note that all four of these structures must be bitness-neutral + * to allow for a 32-bit DTrace consumer on a 64-bit kernel. + */ +typedef struct dtrace_recdesc { + dtrace_actkind_t dtrd_action; /* kind of action */ + uint32_t dtrd_size; /* size of record */ + uint32_t dtrd_offset; /* offset in ECB's data */ + uint16_t dtrd_alignment; /* required alignment */ + uint16_t dtrd_format; /* format, if any */ + uint64_t dtrd_arg; /* action argument */ + uint64_t dtrd_uarg; /* user argument */ +} dtrace_recdesc_t; + +typedef struct dtrace_eprobedesc { + dtrace_epid_t dtepd_epid; /* enabled probe ID */ + dtrace_id_t dtepd_probeid; /* probe ID */ + uint64_t dtepd_uarg; /* library argument */ + uint32_t dtepd_size; /* total size */ + int dtepd_nrecs; /* number of records */ + dtrace_recdesc_t dtepd_rec[1]; /* records themselves */ +} dtrace_eprobedesc_t; + +typedef struct dtrace_aggdesc { + DTRACE_PTR(char, dtagd_name); /* not filled in by kernel */ + dtrace_aggvarid_t dtagd_varid; /* not filled in by kernel */ + int dtagd_flags; /* not filled in by kernel */ + dtrace_aggid_t dtagd_id; /* aggregation ID */ + dtrace_epid_t dtagd_epid; /* enabled probe ID */ + uint32_t dtagd_size; /* size in bytes */ + int dtagd_nrecs; /* number of records */ + uint32_t dtagd_pad; /* explicit padding */ + dtrace_recdesc_t dtagd_rec[1]; /* record descriptions */ +} dtrace_aggdesc_t; + +typedef struct dtrace_fmtdesc { + DTRACE_PTR(char, dtfd_string); /* format string */ + int dtfd_length; /* length of format string */ + uint16_t dtfd_format; /* format identifier */ +} dtrace_fmtdesc_t; + +#define DTRACE_SIZEOF_EPROBEDESC(desc) \ + (sizeof (dtrace_eprobedesc_t) + ((desc)->dtepd_nrecs ? \ + (((desc)->dtepd_nrecs - 1) * sizeof (dtrace_recdesc_t)) : 0)) + +#define DTRACE_SIZEOF_AGGDESC(desc) \ + (sizeof (dtrace_aggdesc_t) + ((desc)->dtagd_nrecs ? \ + (((desc)->dtagd_nrecs - 1) * sizeof (dtrace_recdesc_t)) : 0)) + +/* + * DTrace Option Interface + * + * Run-time DTrace options are set and retrieved via DOF_SECT_OPTDESC sections + * in a DOF image. The dof_optdesc structure contains an option identifier and + * an option value. The valid option identifiers are found below; the mapping + * between option identifiers and option identifying strings is maintained at + * user-level. Note that the value of DTRACEOPT_UNSET is such that all of the + * following are potentially valid option values: all positive integers, zero + * and negative one. Some options (notably "bufpolicy" and "bufresize") take + * predefined tokens as their values; these are defined with + * DTRACEOPT_{option}_{token}. + */ +#define DTRACEOPT_BUFSIZE 0 /* buffer size */ +#define DTRACEOPT_BUFPOLICY 1 /* buffer policy */ +#define DTRACEOPT_DYNVARSIZE 2 /* dynamic variable size */ +#define DTRACEOPT_AGGSIZE 3 /* aggregation size */ +#define DTRACEOPT_SPECSIZE 4 /* speculation size */ +#define DTRACEOPT_NSPEC 5 /* number of speculations */ +#define DTRACEOPT_STRSIZE 6 /* string size */ +#define DTRACEOPT_CLEANRATE 7 /* dynvar cleaning rate */ +#define DTRACEOPT_CPU 8 /* CPU to trace */ +#define DTRACEOPT_BUFRESIZE 9 /* buffer resizing policy */ +#define DTRACEOPT_GRABANON 10 /* grab anonymous state, if any */ +#define DTRACEOPT_FLOWINDENT 11 /* indent function entry/return */ +#define DTRACEOPT_QUIET 12 /* only output explicitly traced data */ +#define DTRACEOPT_STACKFRAMES 13 /* number of stack frames */ +#define DTRACEOPT_USTACKFRAMES 14 /* number of user stack frames */ +#define DTRACEOPT_AGGRATE 15 /* aggregation snapshot rate */ +#define DTRACEOPT_SWITCHRATE 16 /* buffer switching rate */ +#define DTRACEOPT_STATUSRATE 17 /* status rate */ +#define DTRACEOPT_DESTRUCTIVE 18 /* destructive actions allowed */ +#define DTRACEOPT_STACKINDENT 19 /* output indent for stack traces */ +#define DTRACEOPT_RAWBYTES 20 /* always print bytes in raw form */ +#define DTRACEOPT_JSTACKFRAMES 21 /* number of jstack() frames */ +#define DTRACEOPT_JSTACKSTRSIZE 22 /* size of jstack() string table */ +#define DTRACEOPT_AGGSORTKEY 23 /* sort aggregations by key */ +#define DTRACEOPT_AGGSORTREV 24 /* reverse-sort aggregations */ +#define DTRACEOPT_AGGSORTPOS 25 /* agg. position to sort on */ +#define DTRACEOPT_AGGSORTKEYPOS 26 /* agg. key position to sort on */ +#define DTRACEOPT_MAX 27 /* number of options */ + +#define DTRACEOPT_UNSET (dtrace_optval_t)-2 /* unset option */ + +#define DTRACEOPT_BUFPOLICY_RING 0 /* ring buffer */ +#define DTRACEOPT_BUFPOLICY_FILL 1 /* fill buffer, then stop */ +#define DTRACEOPT_BUFPOLICY_SWITCH 2 /* switch buffers */ + +#define DTRACEOPT_BUFRESIZE_AUTO 0 /* automatic resizing */ +#define DTRACEOPT_BUFRESIZE_MANUAL 1 /* manual resizing */ + +/* + * DTrace Buffer Interface + * + * In order to get a snapshot of the principal or aggregation buffer, + * user-level passes a buffer description to the kernel with the dtrace_bufdesc + * structure. This describes which CPU user-level is interested in, and + * where user-level wishes the kernel to snapshot the buffer to (the + * dtbd_data field). The kernel uses the same structure to pass back some + * information regarding the buffer: the size of data actually copied out, the + * number of drops, the number of errors, and the offset of the oldest record. + * If the buffer policy is a "switch" policy, taking a snapshot of the + * principal buffer has the additional effect of switching the active and + * inactive buffers. Taking a snapshot of the aggregation buffer _always_ has + * the additional effect of switching the active and inactive buffers. + */ +typedef struct dtrace_bufdesc { + uint64_t dtbd_size; /* size of buffer */ + uint32_t dtbd_cpu; /* CPU or DTRACE_CPUALL */ + uint32_t dtbd_errors; /* number of errors */ + uint64_t dtbd_drops; /* number of drops */ + DTRACE_PTR(char, dtbd_data); /* data */ + uint64_t dtbd_oldest; /* offset of oldest record */ +} dtrace_bufdesc_t; + +/* + * DTrace Status + * + * The status of DTrace is relayed via the dtrace_status structure. This + * structure contains members to count drops other than the capacity drops + * available via the buffer interface (see above). This consists of dynamic + * drops (including capacity dynamic drops, rinsing drops and dirty drops), and + * speculative drops (including capacity speculative drops, drops due to busy + * speculative buffers and drops due to unavailable speculative buffers). + * Additionally, the status structure contains a field to indicate the number + * of "fill"-policy buffers have been filled and a boolean field to indicate + * that exit() has been called. If the dtst_exiting field is non-zero, no + * further data will be generated until tracing is stopped (at which time any + * enablings of the END action will be processed); if user-level sees that + * this field is non-zero, tracing should be stopped as soon as possible. + */ +typedef struct dtrace_status { + uint64_t dtst_dyndrops; /* dynamic drops */ + uint64_t dtst_dyndrops_rinsing; /* dyn drops due to rinsing */ + uint64_t dtst_dyndrops_dirty; /* dyn drops due to dirty */ + uint64_t dtst_specdrops; /* speculative drops */ + uint64_t dtst_specdrops_busy; /* spec drops due to busy */ + uint64_t dtst_specdrops_unavail; /* spec drops due to unavail */ + uint64_t dtst_errors; /* total errors */ + uint64_t dtst_filled; /* number of filled bufs */ + uint64_t dtst_stkstroverflows; /* stack string tab overflows */ + uint64_t dtst_dblerrors; /* errors in ERROR probes */ + char dtst_killed; /* non-zero if killed */ + char dtst_exiting; /* non-zero if exit() called */ + char dtst_pad[6]; /* pad out to 64-bit align */ +} dtrace_status_t; + +/* + * DTrace Configuration + * + * User-level may need to understand some elements of the kernel DTrace + * configuration in order to generate correct DIF. This information is + * conveyed via the dtrace_conf structure. + */ +typedef struct dtrace_conf { + uint_t dtc_difversion; /* supported DIF version */ + uint_t dtc_difintregs; /* # of DIF integer registers */ + uint_t dtc_diftupregs; /* # of DIF tuple registers */ + uint_t dtc_ctfmodel; /* CTF data model */ + uint_t dtc_pad[8]; /* reserved for future use */ +} dtrace_conf_t; + +/* + * DTrace Faults + * + * The constants below DTRACEFLT_LIBRARY indicate probe processing faults; + * constants at or above DTRACEFLT_LIBRARY indicate faults in probe + * postprocessing at user-level. Probe processing faults induce an ERROR + * probe and are replicated in unistd.d to allow users' ERROR probes to decode + * the error condition using thse symbolic labels. + */ +#define DTRACEFLT_UNKNOWN 0 /* Unknown fault */ +#define DTRACEFLT_BADADDR 1 /* Bad address */ +#define DTRACEFLT_BADALIGN 2 /* Bad alignment */ +#define DTRACEFLT_ILLOP 3 /* Illegal operation */ +#define DTRACEFLT_DIVZERO 4 /* Divide-by-zero */ +#define DTRACEFLT_NOSCRATCH 5 /* Out of scratch space */ +#define DTRACEFLT_KPRIV 6 /* Illegal kernel access */ +#define DTRACEFLT_UPRIV 7 /* Illegal user access */ +#define DTRACEFLT_TUPOFLOW 8 /* Tuple stack overflow */ +#define DTRACEFLT_BADSTACK 9 /* Bad stack */ + +#define DTRACEFLT_LIBRARY 1000 /* Library-level fault */ + +/* + * DTrace Argument Types + * + * Because it would waste both space and time, argument types do not reside + * with the probe. In order to determine argument types for args[X] + * variables, the D compiler queries for argument types on a probe-by-probe + * basis. (This optimizes for the common case that arguments are either not + * used or used in an untyped fashion.) Typed arguments are specified with a + * string of the type name in the dtragd_native member of the argument + * description structure. Typed arguments may be further translated to types + * of greater stability; the provider indicates such a translated argument by + * filling in the dtargd_xlate member with the string of the translated type. + * Finally, the provider may indicate which argument value a given argument + * maps to by setting the dtargd_mapping member -- allowing a single argument + * to map to multiple args[X] variables. + */ +typedef struct dtrace_argdesc { + dtrace_id_t dtargd_id; /* probe identifier */ + int dtargd_ndx; /* arg number (-1 iff none) */ + int dtargd_mapping; /* value mapping */ + char dtargd_native[DTRACE_ARGTYPELEN]; /* native type name */ + char dtargd_xlate[DTRACE_ARGTYPELEN]; /* translated type name */ +} dtrace_argdesc_t; + +/* + * DTrace Stability Attributes + * + * Each DTrace provider advertises the name and data stability of each of its + * probe description components, as well as its architectural dependencies. + * The D compiler can query the provider attributes (dtrace_pattr_t below) in + * order to compute the properties of an input program and report them. + */ +typedef uint8_t dtrace_stability_t; /* stability code (see attributes(5)) */ +typedef uint8_t dtrace_class_t; /* architectural dependency class */ + +#define DTRACE_STABILITY_INTERNAL 0 /* private to DTrace itself */ +#define DTRACE_STABILITY_PRIVATE 1 /* private to Sun (see docs) */ +#define DTRACE_STABILITY_OBSOLETE 2 /* scheduled for removal */ +#define DTRACE_STABILITY_EXTERNAL 3 /* not controlled by Sun */ +#define DTRACE_STABILITY_UNSTABLE 4 /* new or rapidly changing */ +#define DTRACE_STABILITY_EVOLVING 5 /* less rapidly changing */ +#define DTRACE_STABILITY_STABLE 6 /* mature interface from Sun */ +#define DTRACE_STABILITY_STANDARD 7 /* industry standard */ +#define DTRACE_STABILITY_MAX 7 /* maximum valid stability */ + +#define DTRACE_CLASS_UNKNOWN 0 /* unknown architectural dependency */ +#define DTRACE_CLASS_CPU 1 /* CPU-module-specific */ +#define DTRACE_CLASS_PLATFORM 2 /* platform-specific (uname -i) */ +#define DTRACE_CLASS_GROUP 3 /* hardware-group-specific (uname -m) */ +#define DTRACE_CLASS_ISA 4 /* ISA-specific (uname -p) */ +#define DTRACE_CLASS_COMMON 5 /* common to all systems */ +#define DTRACE_CLASS_MAX 5 /* maximum valid class */ + +#define DTRACE_PRIV_NONE 0x0000 +#define DTRACE_PRIV_KERNEL 0x0001 +#define DTRACE_PRIV_USER 0x0002 +#define DTRACE_PRIV_PROC 0x0004 +#define DTRACE_PRIV_OWNER 0x0008 +#define DTRACE_PRIV_ZONEOWNER 0x0010 + +#define DTRACE_PRIV_ALL \ + (DTRACE_PRIV_KERNEL | DTRACE_PRIV_USER | \ + DTRACE_PRIV_PROC | DTRACE_PRIV_OWNER | DTRACE_PRIV_ZONEOWNER) + +typedef struct dtrace_ppriv { + uint32_t dtpp_flags; /* privilege flags */ + uid_t dtpp_uid; /* user ID */ + zoneid_t dtpp_zoneid; /* zone ID */ +} dtrace_ppriv_t; + +typedef struct dtrace_attribute { + dtrace_stability_t dtat_name; /* entity name stability */ + dtrace_stability_t dtat_data; /* entity data stability */ + dtrace_class_t dtat_class; /* entity data dependency */ +} dtrace_attribute_t; + +typedef struct dtrace_pattr { + dtrace_attribute_t dtpa_provider; /* provider attributes */ + dtrace_attribute_t dtpa_mod; /* module attributes */ + dtrace_attribute_t dtpa_func; /* function attributes */ + dtrace_attribute_t dtpa_name; /* name attributes */ + dtrace_attribute_t dtpa_args; /* args[] attributes */ +} dtrace_pattr_t; + +typedef struct dtrace_providerdesc { + char dtvd_name[DTRACE_PROVNAMELEN]; /* provider name */ + dtrace_pattr_t dtvd_attr; /* stability attributes */ + dtrace_ppriv_t dtvd_priv; /* privileges required */ +} dtrace_providerdesc_t; + +/* + * DTrace Pseudodevice Interface + * + * DTrace is controlled through ioctl(2)'s to the in-kernel dtrace:dtrace + * pseudodevice driver. These ioctls comprise the user-kernel interface to + * DTrace. + */ +#define DTRACEIOC (('d' << 24) | ('t' << 16) | ('r' << 8)) +#define DTRACEIOC_PROVIDER (DTRACEIOC | 1) /* provider query */ +#define DTRACEIOC_PROBES (DTRACEIOC | 2) /* probe query */ +#define DTRACEIOC_BUFSNAP (DTRACEIOC | 4) /* snapshot buffer */ +#define DTRACEIOC_PROBEMATCH (DTRACEIOC | 5) /* match probes */ +#define DTRACEIOC_ENABLE (DTRACEIOC | 6) /* enable probes */ +#define DTRACEIOC_AGGSNAP (DTRACEIOC | 7) /* snapshot agg. */ +#define DTRACEIOC_EPROBE (DTRACEIOC | 8) /* get eprobe desc. */ +#define DTRACEIOC_PROBEARG (DTRACEIOC | 9) /* get probe arg */ +#define DTRACEIOC_CONF (DTRACEIOC | 10) /* get config. */ +#define DTRACEIOC_STATUS (DTRACEIOC | 11) /* get status */ +#define DTRACEIOC_GO (DTRACEIOC | 12) /* start tracing */ +#define DTRACEIOC_STOP (DTRACEIOC | 13) /* stop tracing */ +#define DTRACEIOC_AGGDESC (DTRACEIOC | 15) /* get agg. desc. */ +#define DTRACEIOC_FORMAT (DTRACEIOC | 16) /* get format str */ +#define DTRACEIOC_DOFGET (DTRACEIOC | 17) /* get DOF */ +#define DTRACEIOC_REPLICATE (DTRACEIOC | 18) /* replicate enab */ + +/* + * DTrace Helpers + * + * In general, DTrace establishes probes in processes and takes actions on + * processes without knowing their specific user-level structures. Instead of + * existing in the framework, process-specific knowledge is contained by the + * enabling D program -- which can apply process-specific knowledge by making + * appropriate use of DTrace primitives like copyin() and copyinstr() to + * operate on user-level data. However, there may exist some specific probes + * of particular semantic relevance that the application developer may wish to + * explicitly export. For example, an application may wish to export a probe + * at the point that it begins and ends certain well-defined transactions. In + * addition to providing probes, programs may wish to offer assistance for + * certain actions. For example, in highly dynamic environments (e.g., Java), + * it may be difficult to obtain a stack trace in terms of meaningful symbol + * names (the translation from instruction addresses to corresponding symbol + * names may only be possible in situ); these environments may wish to define + * a series of actions to be applied in situ to obtain a meaningful stack + * trace. + * + * These two mechanisms -- user-level statically defined tracing and assisting + * DTrace actions -- are provided via DTrace _helpers_. Helpers are specified + * via DOF, but unlike enabling DOF, helper DOF may contain definitions of + * providers, probes and their arguments. If a helper wishes to provide + * action assistance, probe descriptions and corresponding DIF actions may be + * specified in the helper DOF. For such helper actions, however, the probe + * description describes the specific helper: all DTrace helpers have the + * provider name "dtrace" and the module name "helper", and the name of the + * helper is contained in the function name (for example, the ustack() helper + * is named "ustack"). Any helper-specific name may be contained in the name + * (for example, if a helper were to have a constructor, it might be named + * "dtrace:helper:<helper>:init"). Helper actions are only called when the + * action that they are helping is taken. Helper actions may only return DIF + * expressions, and may only call the following subroutines: + * + * alloca() <= Allocates memory out of the consumer's scratch space + * bcopy() <= Copies memory to scratch space + * copyin() <= Copies memory from user-level into consumer's scratch + * copyinto() <= Copies memory into a specific location in scratch + * copyinstr() <= Copies a string into a specific location in scratch + * + * Helper actions may only access the following built-in variables: + * + * curthread <= Current kthread_t pointer + * tid <= Current thread identifier + * pid <= Current process identifier + * ppid <= Parent process identifier + * uid <= Current user ID + * gid <= Current group ID + * execname <= Current executable name + * zonename <= Current zone name + * + * Helper actions may not manipulate or allocate dynamic variables, but they + * may have clause-local and statically-allocated global variables. The + * helper action variable state is specific to the helper action -- variables + * used by the helper action may not be accessed outside of the helper + * action, and the helper action may not access variables that like outside + * of it. Helper actions may not load from kernel memory at-large; they are + * restricting to loading current user state (via copyin() and variants) and + * scratch space. As with probe enablings, helper actions are executed in + * program order. The result of the helper action is the result of the last + * executing helper expression. + * + * Helpers -- composed of either providers/probes or probes/actions (or both) + * -- are added by opening the "helper" minor node, and issuing an ioctl(2) + * (DTRACEHIOC_ADDDOF) that specifies the dof_helper_t structure. This + * encapsulates the name and base address of the user-level library or + * executable publishing the helpers and probes as well as the DOF that + * contains the definitions of those helpers and probes. + * + * The DTRACEHIOC_ADD and DTRACEHIOC_REMOVE are left in place for legacy + * helpers and should no longer be used. No other ioctls are valid on the + * helper minor node. + */ +#define DTRACEHIOC (('d' << 24) | ('t' << 16) | ('h' << 8)) +#define DTRACEHIOC_ADD (DTRACEHIOC | 1) /* add helper */ +#define DTRACEHIOC_REMOVE (DTRACEHIOC | 2) /* remove helper */ +#define DTRACEHIOC_ADDDOF (DTRACEHIOC | 3) /* add helper DOF */ + +typedef struct dof_helper { + char dofhp_mod[DTRACE_MODNAMELEN]; /* executable or library name */ + uint64_t dofhp_addr; /* base address of object */ + uint64_t dofhp_dof; /* address of helper DOF */ +} dof_helper_t; + +#define DTRACEMNR_DTRACE "dtrace" /* node for DTrace ops */ +#define DTRACEMNR_HELPER "helper" /* node for helpers */ +#define DTRACEMNRN_DTRACE 0 /* minor for DTrace ops */ +#define DTRACEMNRN_HELPER 1 /* minor for helpers */ +#define DTRACEMNRN_CLONE 2 /* first clone minor */ + +#ifdef _KERNEL + +/* + * DTrace Provider API + * + * The following functions are implemented by the DTrace framework and are + * used to implement separate in-kernel DTrace providers. Common functions + * are provided in uts/common/os/dtrace.c. ISA-dependent subroutines are + * defined in uts/<isa>/dtrace/dtrace_asm.s or uts/<isa>/dtrace/dtrace_isa.c. + * + * The provider API has two halves: the API that the providers consume from + * DTrace, and the API that providers make available to DTrace. + * + * 1 Framework-to-Provider API + * + * 1.1 Overview + * + * The Framework-to-Provider API is represented by the dtrace_pops structure + * that the provider passes to the framework when registering itself. This + * structure consists of the following members: + * + * dtps_provide() <-- Provide all probes, all modules + * dtps_provide_module() <-- Provide all probes in specified module + * dtps_enable() <-- Enable specified probe + * dtps_disable() <-- Disable specified probe + * dtps_suspend() <-- Suspend specified probe + * dtps_resume() <-- Resume specified probe + * dtps_getargdesc() <-- Get the argument description for args[X] + * dtps_getargval() <-- Get the value for an argX or args[X] variable + * dtps_usermode() <-- Find out if the probe was fired in user mode + * dtps_destroy() <-- Destroy all state associated with this probe + * + * 1.2 void dtps_provide(void *arg, const dtrace_probedesc_t *spec) + * + * 1.2.1 Overview + * + * Called to indicate that the provider should provide all probes. If the + * specified description is non-NULL, dtps_provide() is being called because + * no probe matched a specified probe -- if the provider has the ability to + * create custom probes, it may wish to create a probe that matches the + * specified description. + * + * 1.2.2 Arguments and notes + * + * The first argument is the cookie as passed to dtrace_register(). The + * second argument is a pointer to a probe description that the provider may + * wish to consider when creating custom probes. The provider is expected to + * call back into the DTrace framework via dtrace_probe_create() to create + * any necessary probes. dtps_provide() may be called even if the provider + * has made available all probes; the provider should check the return value + * of dtrace_probe_create() to handle this case. Note that the provider need + * not implement both dtps_provide() and dtps_provide_module(); see + * "Arguments and Notes" for dtrace_register(), below. + * + * 1.2.3 Return value + * + * None. + * + * 1.2.4 Caller's context + * + * dtps_provide() is typically called from open() or ioctl() context, but may + * be called from other contexts as well. The DTrace framework is locked in + * such a way that providers may not register or unregister. This means that + * the provider may not call any DTrace API that affects its registration with + * the framework, including dtrace_register(), dtrace_unregister(), + * dtrace_invalidate(), and dtrace_condense(). However, the context is such + * that the provider may (and indeed, is expected to) call probe-related + * DTrace routines, including dtrace_probe_create(), dtrace_probe_lookup(), + * and dtrace_probe_arg(). + * + * 1.3 void dtps_provide_module(void *arg, struct modctl *mp) + * + * 1.3.1 Overview + * + * Called to indicate that the provider should provide all probes in the + * specified module. + * + * 1.3.2 Arguments and notes + * + * The first argument is the cookie as passed to dtrace_register(). The + * second argument is a pointer to a modctl structure that indicates the + * module for which probes should be created. + * + * 1.3.3 Return value + * + * None. + * + * 1.3.4 Caller's context + * + * dtps_provide_module() may be called from open() or ioctl() context, but + * may also be called from a module loading context. mod_lock is held, and + * the DTrace framework is locked in such a way that providers may not + * register or unregister. This means that the provider may not call any + * DTrace API that affects its registration with the framework, including + * dtrace_register(), dtrace_unregister(), dtrace_invalidate(), and + * dtrace_condense(). However, the context is such that the provider may (and + * indeed, is expected to) call probe-related DTrace routines, including + * dtrace_probe_create(), dtrace_probe_lookup(), and dtrace_probe_arg(). Note + * that the provider need not implement both dtps_provide() and + * dtps_provide_module(); see "Arguments and Notes" for dtrace_register(), + * below. + * + * 1.4 void dtps_enable(void *arg, dtrace_id_t id, void *parg) + * + * 1.4.1 Overview + * + * Called to enable the specified probe. + * + * 1.4.2 Arguments and notes + * + * The first argument is the cookie as passed to dtrace_register(). The + * second argument is the identifier of the probe to be enabled. The third + * argument is the probe argument as passed to dtrace_probe_create(). + * dtps_enable() will be called when a probe transitions from not being + * enabled at all to having one or more ECB. The number of ECBs associated + * with the probe may change without subsequent calls into the provider. + * When the number of ECBs drops to zero, the provider will be explicitly + * told to disable the probe via dtps_disable(). dtrace_probe() should never + * be called for a probe identifier that hasn't been explicitly enabled via + * dtps_enable(). + * + * 1.4.3 Return value + * + * None. + * + * 1.4.4 Caller's context + * + * The DTrace framework is locked in such a way that it may not be called + * back into at all. cpu_lock is held. mod_lock is not held and may not + * be acquired. + * + * 1.5 void dtps_disable(void *arg, dtrace_id_t id, void *parg) + * + * 1.5.1 Overview + * + * Called to disable the specified probe. + * + * 1.5.2 Arguments and notes + * + * The first argument is the cookie as passed to dtrace_register(). The + * second argument is the identifier of the probe to be disabled. The third + * argument is the probe argument as passed to dtrace_probe_create(). + * dtps_disable() will be called when a probe transitions from being enabled + * to having zero ECBs. dtrace_probe() should never be called for a probe + * identifier that has been explicitly enabled via dtps_disable(). + * + * 1.5.3 Return value + * + * None. + * + * 1.5.4 Caller's context + * + * The DTrace framework is locked in such a way that it may not be called + * back into at all. cpu_lock is held. mod_lock is not held and may not + * be acquired. + * + * 1.6 void dtps_suspend(void *arg, dtrace_id_t id, void *parg) + * + * 1.6.1 Overview + * + * Called to suspend the specified enabled probe. This entry point is for + * providers that may need to suspend some or all of their probes when CPUs + * are being powered on or when the boot monitor is being entered for a + * prolonged period of time. + * + * 1.6.2 Arguments and notes + * + * The first argument is the cookie as passed to dtrace_register(). The + * second argument is the identifier of the probe to be suspended. The + * third argument is the probe argument as passed to dtrace_probe_create(). + * dtps_suspend will only be called on an enabled probe. Providers that + * provide a dtps_suspend entry point will want to take roughly the action + * that it takes for dtps_disable. + * + * 1.6.3 Return value + * + * None. + * + * 1.6.4 Caller's context + * + * Interrupts are disabled. The DTrace framework is in a state such that the + * specified probe cannot be disabled or destroyed for the duration of + * dtps_suspend(). As interrupts are disabled, the provider is afforded + * little latitude; the provider is expected to do no more than a store to + * memory. + * + * 1.7 void dtps_resume(void *arg, dtrace_id_t id, void *parg) + * + * 1.7.1 Overview + * + * Called to resume the specified enabled probe. This entry point is for + * providers that may need to resume some or all of their probes after the + * completion of an event that induced a call to dtps_suspend(). + * + * 1.7.2 Arguments and notes + * + * The first argument is the cookie as passed to dtrace_register(). The + * second argument is the identifier of the probe to be resumed. The + * third argument is the probe argument as passed to dtrace_probe_create(). + * dtps_resume will only be called on an enabled probe. Providers that + * provide a dtps_resume entry point will want to take roughly the action + * that it takes for dtps_enable. + * + * 1.7.3 Return value + * + * None. + * + * 1.7.4 Caller's context + * + * Interrupts are disabled. The DTrace framework is in a state such that the + * specified probe cannot be disabled or destroyed for the duration of + * dtps_resume(). As interrupts are disabled, the provider is afforded + * little latitude; the provider is expected to do no more than a store to + * memory. + * + * 1.8 void dtps_getargdesc(void *arg, dtrace_id_t id, void *parg, + * dtrace_argdesc_t *desc) + * + * 1.8.1 Overview + * + * Called to retrieve the argument description for an args[X] variable. + * + * 1.8.2 Arguments and notes + * + * The first argument is the cookie as passed to dtrace_register(). The + * second argument is the identifier of the current probe. The third + * argument is the probe argument as passed to dtrace_probe_create(). The + * fourth argument is a pointer to the argument description. This + * description is both an input and output parameter: it contains the + * index of the desired argument in the dtargd_ndx field, and expects + * the other fields to be filled in upon return. If there is no argument + * corresponding to the specified index, the dtargd_ndx field should be set + * to DTRACE_ARGNONE. + * + * 1.8.3 Return value + * + * None. The dtargd_ndx, dtargd_native, dtargd_xlate and dtargd_mapping + * members of the dtrace_argdesc_t structure are all output values. + * + * 1.8.4 Caller's context + * + * dtps_getargdesc() is called from ioctl() context. mod_lock is held, and + * the DTrace framework is locked in such a way that providers may not + * register or unregister. This means that the provider may not call any + * DTrace API that affects its registration with the framework, including + * dtrace_register(), dtrace_unregister(), dtrace_invalidate(), and + * dtrace_condense(). + * + * 1.9 uint64_t dtps_getargval(void *arg, dtrace_id_t id, void *parg, + * int argno, int aframes) + * + * 1.9.1 Overview + * + * Called to retrieve a value for an argX or args[X] variable. + * + * 1.9.2 Arguments and notes + * + * The first argument is the cookie as passed to dtrace_register(). The + * second argument is the identifier of the current probe. The third + * argument is the probe argument as passed to dtrace_probe_create(). The + * fourth argument is the number of the argument (the X in the example in + * 1.9.1). The fifth argument is the number of stack frames that were used + * to get from the actual place in the code that fired the probe to + * dtrace_probe() itself, the so-called artificial frames. This argument may + * be used to descend an appropriate number of frames to find the correct + * values. If this entry point is left NULL, the dtrace_getarg() built-in + * function is used. + * + * 1.9.3 Return value + * + * The value of the argument. + * + * 1.9.4 Caller's context + * + * This is called from within dtrace_probe() meaning that interrupts + * are disabled. No locks should be taken within this entry point. + * + * 1.10 int dtps_usermode(void *arg, dtrace_id_t id, void *parg) + * + * 1.10.1 Overview + * + * Called to determine if the probe was fired in a user context. + * + * 1.10.2 Arguments and notes + * + * The first argument is the cookie as passed to dtrace_register(). The + * second argument is the identifier of the current probe. The third + * argument is the probe argument as passed to dtrace_probe_create(). This + * entry point must not be left NULL for providers whose probes allow for + * mixed mode tracing, that is to say those probes that can fire during + * kernel- _or_ user-mode execution + * + * 1.10.3 Return value + * + * A boolean value. + * + * 1.10.4 Caller's context + * + * This is called from within dtrace_probe() meaning that interrupts + * are disabled. No locks should be taken within this entry point. + * + * 1.11 void dtps_destroy(void *arg, dtrace_id_t id, void *parg) + * + * 1.11.1 Overview + * + * Called to destroy the specified probe. + * + * 1.11.2 Arguments and notes + * + * The first argument is the cookie as passed to dtrace_register(). The + * second argument is the identifier of the probe to be destroyed. The third + * argument is the probe argument as passed to dtrace_probe_create(). The + * provider should free all state associated with the probe. The framework + * guarantees that dtps_destroy() is only called for probes that have either + * been disabled via dtps_disable() or were never enabled via dtps_enable(). + * Once dtps_disable() has been called for a probe, no further call will be + * made specifying the probe. + * + * 1.11.3 Return value + * + * None. + * + * 1.11.4 Caller's context + * + * The DTrace framework is locked in such a way that it may not be called + * back into at all. mod_lock is held. cpu_lock is not held, and may not be + * acquired. + * + * + * 2 Provider-to-Framework API + * + * 2.1 Overview + * + * The Provider-to-Framework API provides the mechanism for the provider to + * register itself with the DTrace framework, to create probes, to lookup + * probes and (most importantly) to fire probes. The Provider-to-Framework + * consists of: + * + * dtrace_register() <-- Register a provider with the DTrace framework + * dtrace_unregister() <-- Remove a provider's DTrace registration + * dtrace_invalidate() <-- Invalidate the specified provider + * dtrace_condense() <-- Remove a provider's unenabled probes + * dtrace_attached() <-- Indicates whether or not DTrace has attached + * dtrace_probe_create() <-- Create a DTrace probe + * dtrace_probe_lookup() <-- Lookup a DTrace probe based on its name + * dtrace_probe_arg() <-- Return the probe argument for a specific probe + * dtrace_probe() <-- Fire the specified probe + * + * 2.2 int dtrace_register(const char *name, const dtrace_pattr_t *pap, + * uint32_t priv, cred_t *cr, const dtrace_pops_t *pops, void *arg, + * dtrace_provider_id_t *idp) + * + * 2.2.1 Overview + * + * dtrace_register() registers the calling provider with the DTrace + * framework. It should generally be called by DTrace providers in their + * attach(9E) entry point. + * + * 2.2.2 Arguments and Notes + * + * The first argument is the name of the provider. The second argument is a + * pointer to the stability attributes for the provider. The third argument + * is the privilege flags for the provider, and must be some combination of: + * + * DTRACE_PRIV_NONE <= All users may enable probes from this provider + * + * DTRACE_PRIV_PROC <= Any user with privilege of PRIV_DTRACE_PROC may + * enable probes from this provider + * + * DTRACE_PRIV_USER <= Any user with privilege of PRIV_DTRACE_USER may + * enable probes from this provider + * + * DTRACE_PRIV_KERNEL <= Any user with privilege of PRIV_DTRACE_KERNEL + * may enable probes from this provider + * + * DTRACE_PRIV_OWNER <= This flag places an additional constraint on + * the privilege requirements above. These probes + * require either (a) a user ID matching the user + * ID of the cred passed in the fourth argument + * or (b) the PRIV_PROC_OWNER privilege. + * + * DTRACE_PRIV_ZONEOWNER<= This flag places an additional constraint on + * the privilege requirements above. These probes + * require either (a) a zone ID matching the zone + * ID of the cred passed in the fourth argument + * or (b) the PRIV_PROC_ZONE privilege. + * + * Note that these flags designate the _visibility_ of the probes, not + * the conditions under which they may or may not fire. + * + * The fourth argument is the credential that is associated with the + * provider. This argument should be NULL if the privilege flags don't + * include DTRACE_PRIV_OWNER or DTRACE_PRIV_ZONEOWNER. If non-NULL, the + * framework stashes the uid and zoneid represented by this credential + * for use at probe-time, in implicit predicates. These limit visibility + * of the probes to users and/or zones which have sufficient privilege to + * access them. + * + * The fifth argument is a DTrace provider operations vector, which provides + * the implementation for the Framework-to-Provider API. (See Section 1, + * above.) This must be non-NULL, and each member must be non-NULL. The + * exceptions to this are (1) the dtps_provide() and dtps_provide_module() + * members (if the provider so desires, _one_ of these members may be left + * NULL -- denoting that the provider only implements the other) and (2) + * the dtps_suspend() and dtps_resume() members, which must either both be + * NULL or both be non-NULL. + * + * The sixth argument is a cookie to be specified as the first argument for + * each function in the Framework-to-Provider API. This argument may have + * any value. + * + * The final argument is a pointer to dtrace_provider_id_t. If + * dtrace_register() successfully completes, the provider identifier will be + * stored in the memory pointed to be this argument. This argument must be + * non-NULL. + * + * 2.2.3 Return value + * + * On success, dtrace_register() returns 0 and stores the new provider's + * identifier into the memory pointed to by the idp argument. On failure, + * dtrace_register() returns an errno: + * + * EINVAL The arguments passed to dtrace_register() were somehow invalid. + * This may because a parameter that must be non-NULL was NULL, + * because the name was invalid (either empty or an illegal + * provider name) or because the attributes were invalid. + * + * No other failure code is returned. + * + * 2.2.4 Caller's context + * + * dtrace_register() may induce calls to dtrace_provide(); the provider must + * hold no locks across dtrace_register() that may also be acquired by + * dtrace_provide(). cpu_lock and mod_lock must not be held. + * + * 2.3 int dtrace_unregister(dtrace_provider_t id) + * + * 2.3.1 Overview + * + * Unregisters the specified provider from the DTrace framework. It should + * generally be called by DTrace providers in their detach(9E) entry point. + * + * 2.3.2 Arguments and Notes + * + * The only argument is the provider identifier, as returned from a + * successful call to dtrace_register(). As a result of calling + * dtrace_unregister(), the DTrace framework will call back into the provider + * via the dtps_destroy() entry point. Once dtrace_unregister() successfully + * completes, however, the DTrace framework will no longer make calls through + * the Framework-to-Provider API. + * + * 2.3.3 Return value + * + * On success, dtrace_unregister returns 0. On failure, dtrace_unregister() + * returns an errno: + * + * EBUSY There are currently processes that have the DTrace pseudodevice + * open, or there exists an anonymous enabling that hasn't yet + * been claimed. + * + * No other failure code is returned. + * + * 2.3.4 Caller's context + * + * Because a call to dtrace_unregister() may induce calls through the + * Framework-to-Provider API, the caller may not hold any lock across + * dtrace_register() that is also acquired in any of the Framework-to- + * Provider API functions. Additionally, mod_lock may not be held. + * + * 2.4 void dtrace_invalidate(dtrace_provider_id_t id) + * + * 2.4.1 Overview + * + * Invalidates the specified provider. All subsequent probe lookups for the + * specified provider will fail, but its probes will not be removed. + * + * 2.4.2 Arguments and note + * + * The only argument is the provider identifier, as returned from a + * successful call to dtrace_register(). In general, a provider's probes + * always remain valid; dtrace_invalidate() is a mechanism for invalidating + * an entire provider, regardless of whether or not probes are enabled or + * not. Note that dtrace_invalidate() will _not_ prevent already enabled + * probes from firing -- it will merely prevent any new enablings of the + * provider's probes. + * + * 2.5 int dtrace_condense(dtrace_provider_id_t id) + * + * 2.5.1 Overview + * + * Removes all the unenabled probes for the given provider. This function is + * not unlike dtrace_unregister(), except that it doesn't remove the + * provider just as many of its associated probes as it can. + * + * 2.5.2 Arguments and Notes + * + * As with dtrace_unregister(), the sole argument is the provider identifier + * as returned from a successful call to dtrace_register(). As a result of + * calling dtrace_condense(), the DTrace framework will call back into the + * given provider's dtps_destroy() entry point for each of the provider's + * unenabled probes. + * + * 2.5.3 Return value + * + * Currently, dtrace_condense() always returns 0. However, consumers of this + * function should check the return value as appropriate; its behavior may + * change in the future. + * + * 2.5.4 Caller's context + * + * As with dtrace_unregister(), the caller may not hold any lock across + * dtrace_condense() that is also acquired in the provider's entry points. + * Also, mod_lock may not be held. + * + * 2.6 int dtrace_attached() + * + * 2.6.1 Overview + * + * Indicates whether or not DTrace has attached. + * + * 2.6.2 Arguments and Notes + * + * For most providers, DTrace makes initial contact beyond registration. + * That is, once a provider has registered with DTrace, it waits to hear + * from DTrace to create probes. However, some providers may wish to + * proactively create probes without first being told by DTrace to do so. + * If providers wish to do this, they must first call dtrace_attached() to + * determine if DTrace itself has attached. If dtrace_attached() returns 0, + * the provider must not make any other Provider-to-Framework API call. + * + * 2.6.3 Return value + * + * dtrace_attached() returns 1 if DTrace has attached, 0 otherwise. + * + * 2.7 int dtrace_probe_create(dtrace_provider_t id, const char *mod, + * const char *func, const char *name, int aframes, void *arg) + * + * 2.7.1 Overview + * + * Creates a probe with specified module name, function name, and name. + * + * 2.7.2 Arguments and Notes + * + * The first argument is the provider identifier, as returned from a + * successful call to dtrace_register(). The second, third, and fourth + * arguments are the module name, function name, and probe name, + * respectively. Of these, module name and function name may both be NULL + * (in which case the probe is considered to be unanchored), or they may both + * be non-NULL. The name must be non-NULL, and must point to a non-empty + * string. + * + * The fifth argument is the number of artificial stack frames that will be + * found on the stack when dtrace_probe() is called for the new probe. These + * artificial frames will be automatically be pruned should the stack() or + * stackdepth() functions be called as part of one of the probe's ECBs. If + * the parameter doesn't add an artificial frame, this parameter should be + * zero. + * + * The final argument is a probe argument that will be passed back to the + * provider when a probe-specific operation is called. (e.g., via + * dtps_enable(), dtps_disable(), etc.) + * + * Note that it is up to the provider to be sure that the probe that it + * creates does not already exist -- if the provider is unsure of the probe's + * existence, it should assure its absence with dtrace_probe_lookup() before + * calling dtrace_probe_create(). + * + * 2.7.3 Return value + * + * dtrace_probe_create() always succeeds, and always returns the identifier + * of the newly-created probe. + * + * 2.7.4 Caller's context + * + * While dtrace_probe_create() is generally expected to be called from + * dtps_provide() and/or dtps_provide_module(), it may be called from other + * non-DTrace contexts. Neither cpu_lock nor mod_lock may be held. + * + * 2.8 dtrace_id_t dtrace_probe_lookup(dtrace_provider_t id, const char *mod, + * const char *func, const char *name) + * + * 2.8.1 Overview + * + * Looks up a probe based on provdider and one or more of module name, + * function name and probe name. + * + * 2.8.2 Arguments and Notes + * + * The first argument is the provider identifier, as returned from a + * successful call to dtrace_register(). The second, third, and fourth + * arguments are the module name, function name, and probe name, + * respectively. Any of these may be NULL; dtrace_probe_lookup() will return + * the identifier of the first probe that is provided by the specified + * provider and matches all of the non-NULL matching criteria. + * dtrace_probe_lookup() is generally used by a provider to be check the + * existence of a probe before creating it with dtrace_probe_create(). + * + * 2.8.3 Return value + * + * If the probe exists, returns its identifier. If the probe does not exist, + * return DTRACE_IDNONE. + * + * 2.8.4 Caller's context + * + * While dtrace_probe_lookup() is generally expected to be called from + * dtps_provide() and/or dtps_provide_module(), it may also be called from + * other non-DTrace contexts. Neither cpu_lock nor mod_lock may be held. + * + * 2.9 void *dtrace_probe_arg(dtrace_provider_t id, dtrace_id_t probe) + * + * 2.9.1 Overview + * + * Returns the probe argument associated with the specified probe. + * + * 2.9.2 Arguments and Notes + * + * The first argument is the provider identifier, as returned from a + * successful call to dtrace_register(). The second argument is a probe + * identifier, as returned from dtrace_probe_lookup() or + * dtrace_probe_create(). This is useful if a probe has multiple + * provider-specific components to it: the provider can create the probe + * once with provider-specific state, and then add to the state by looking + * up the probe based on probe identifier. + * + * 2.9.3 Return value + * + * Returns the argument associated with the specified probe. If the + * specified probe does not exist, or if the specified probe is not provided + * by the specified provider, NULL is returned. + * + * 2.9.4 Caller's context + * + * While dtrace_probe_arg() is generally expected to be called from + * dtps_provide() and/or dtps_provide_module(), it may also be called from + * other non-DTrace contexts. Neither cpu_lock nor mod_lock may be held. + * + * 2.10 void dtrace_probe(dtrace_id_t probe, uintptr_t arg0, uintptr_t arg1, + * uintptr_t arg2, uintptr_t arg3, uintptr_t arg4) + * + * 2.10.1 Overview + * + * The epicenter of DTrace: fires the specified probes with the specified + * arguments. + * + * 2.10.2 Arguments and Notes + * + * The first argument is a probe identifier as returned by + * dtrace_probe_create() or dtrace_probe_lookup(). The second through sixth + * arguments are the values to which the D variables "arg0" through "arg4" + * will be mapped. + * + * dtrace_probe() should be called whenever the specified probe has fired -- + * however the provider defines it. + * + * 2.10.3 Return value + * + * None. + * + * 2.10.4 Caller's context + * + * dtrace_probe() may be called in virtually any context: kernel, user, + * interrupt, high-level interrupt, with arbitrary adaptive locks held, with + * dispatcher locks held, with interrupts disabled, etc. The only latitude + * that must be afforded to DTrace is the ability to make calls within + * itself (and to its in-kernel subroutines) and the ability to access + * arbitrary (but mapped) memory. On some platforms, this constrains + * context. For example, on UltraSPARC, dtrace_probe() cannot be called + * from any context in which TL is greater than zero. dtrace_probe() may + * also not be called from any routine which may be called by dtrace_probe() + * -- which includes functions in the DTrace framework and some in-kernel + * DTrace subroutines. All such functions "dtrace_"; providers that + * instrument the kernel arbitrarily should be sure to not instrument these + * routines. + */ +typedef struct dtrace_pops { + void (*dtps_provide)(void *arg, const dtrace_probedesc_t *spec); + void (*dtps_provide_module)(void *arg, struct modctl *mp); + void (*dtps_enable)(void *arg, dtrace_id_t id, void *parg); + void (*dtps_disable)(void *arg, dtrace_id_t id, void *parg); + void (*dtps_suspend)(void *arg, dtrace_id_t id, void *parg); + void (*dtps_resume)(void *arg, dtrace_id_t id, void *parg); + void (*dtps_getargdesc)(void *arg, dtrace_id_t id, void *parg, + dtrace_argdesc_t *desc); + uint64_t (*dtps_getargval)(void *arg, dtrace_id_t id, void *parg, + int argno, int aframes); + int (*dtps_usermode)(void *arg, dtrace_id_t id, void *parg); + void (*dtps_destroy)(void *arg, dtrace_id_t id, void *parg); +} dtrace_pops_t; + +typedef uintptr_t dtrace_provider_id_t; + +extern int dtrace_register(const char *, const dtrace_pattr_t *, uint32_t, + cred_t *, const dtrace_pops_t *, void *, dtrace_provider_id_t *); +extern int dtrace_unregister(dtrace_provider_id_t); +extern int dtrace_condense(dtrace_provider_id_t); +extern void dtrace_invalidate(dtrace_provider_id_t); +extern dtrace_id_t dtrace_probe_lookup(dtrace_provider_id_t, const char *, + const char *, const char *); +extern dtrace_id_t dtrace_probe_create(dtrace_provider_id_t, const char *, + const char *, const char *, int, void *); +extern void *dtrace_probe_arg(dtrace_provider_id_t, dtrace_id_t); +extern void dtrace_probe(dtrace_id_t, uintptr_t arg0, uintptr_t arg1, + uintptr_t arg2, uintptr_t arg3, uintptr_t arg4); + +/* + * DTrace Meta Provider API + * + * The following functions are implemented by the DTrace framework and are + * used to implement meta providers. Meta providers plug into the DTrace + * framework and are used to instantiate new providers on the fly. At + * present, there is only one type of meta provider and only one meta + * provider may be registered with the DTrace framework at a time. The + * sole meta provider type provides user-land static tracing facilities + * by taking meta probe descriptions and adding a corresponding provider + * into the DTrace framework. + * + * 1 Framework-to-Provider + * + * 1.1 Overview + * + * The Framework-to-Provider API is represented by the dtrace_mops structure + * that the meta provider passes to the framework when registering itself as + * a meta provider. This structure consists of the following members: + * + * dtms_create_probe() <-- Add a new probe to a created provider + * dtms_provide_pid() <-- Create a new provider for a given process + * dtms_remove_pid() <-- Remove a previously created provider + * + * 1.2 void dtms_create_probe(void *arg, void *parg, + * dtrace_helper_probedesc_t *probedesc); + * + * 1.2.1 Overview + * + * Called by the DTrace framework to create a new probe in a provider + * created by this meta provider. + * + * 1.2.2 Arguments and notes + * + * The first argument is the cookie as passed to dtrace_meta_register(). + * The second argument is the provider cookie for the associated provider; + * this is obtained from the return value of dtms_provide_pid(). The third + * argument is the helper probe description. + * + * 1.2.3 Return value + * + * None + * + * 1.2.4 Caller's context + * + * dtms_create_probe() is called from either ioctl() or module load context. + * The DTrace framework is locked in such a way that meta providers may not + * register or unregister. This means that the meta provider cannot call + * dtrace_meta_register() or dtrace_meta_unregister(). However, the context is + * such that the provider may (and is expected to) call provider-related + * DTrace provider APIs including dtrace_probe_create(). + * + * 1.3 void *dtms_provide_pid(void *arg, dtrace_meta_provider_t *mprov, + * pid_t pid) + * + * 1.3.1 Overview + * + * Called by the DTrace framework to instantiate a new provider given the + * description of the provider and probes in the mprov argument. The + * meta provider should call dtrace_register() to insert the new provider + * into the DTrace framework. + * + * 1.3.2 Arguments and notes + * + * The first argument is the cookie as passed to dtrace_meta_register(). + * The second argument is a pointer to a structure describing the new + * helper provider. The third argument is the process identifier for + * process associated with this new provider. Note that the name of the + * provider as passed to dtrace_register() should be the contatenation of + * the dtmpb_provname member of the mprov argument and the processs + * identifier as a string. + * + * 1.3.3 Return value + * + * The cookie for the provider that the meta provider creates. This is + * the same value that it passed to dtrace_register(). + * + * 1.3.4 Caller's context + * + * dtms_provide_pid() is called from either ioctl() or module load context. + * The DTrace framework is locked in such a way that meta providers may not + * register or unregister. This means that the meta provider cannot call + * dtrace_meta_register() or dtrace_meta_unregister(). However, the context + * is such that the provider may -- and is expected to -- call + * provider-related DTrace provider APIs including dtrace_register(). + * + * 1.4 void dtms_remove_pid(void *arg, dtrace_meta_provider_t *mprov, + * pid_t pid) + * + * 1.4.1 Overview + * + * Called by the DTrace framework to remove a provider that had previously + * been instantiated via the dtms_provide_pid() entry point. The meta + * provider need not remove the provider immediately, but this entry + * point indicates that the provider should be removed as soon as possible + * using the dtrace_unregister() API. + * + * 1.4.2 Arguments and notes + * + * The first argument is the cookie as passed to dtrace_meta_register(). + * The second argument is a pointer to a structure describing the helper + * provider. The third argument is the process identifier for process + * associated with this new provider. + * + * 1.4.3 Return value + * + * None + * + * 1.4.4 Caller's context + * + * dtms_remove_pid() is called from either ioctl() or exit() context. + * The DTrace framework is locked in such a way that meta providers may not + * register or unregister. This means that the meta provider cannot call + * dtrace_meta_register() or dtrace_meta_unregister(). However, the context + * is such that the provider may -- and is expected to -- call + * provider-related DTrace provider APIs including dtrace_unregister(). + */ +typedef struct dtrace_helper_probedesc { + char *dthpb_mod; /* probe module */ + char *dthpb_func; /* probe function */ + char *dthpb_name; /* probe name */ + uint64_t dthpb_base; /* base address */ + uint32_t *dthpb_offs; /* offsets array */ + uint32_t *dthpb_enoffs; /* is-enabled offsets array */ + uint32_t dthpb_noffs; /* offsets count */ + uint32_t dthpb_nenoffs; /* is-enabled offsets count */ + uint8_t *dthpb_args; /* argument mapping array */ + uint8_t dthpb_xargc; /* translated argument count */ + uint8_t dthpb_nargc; /* native argument count */ + char *dthpb_xtypes; /* translated types strings */ + char *dthpb_ntypes; /* native types strings */ +} dtrace_helper_probedesc_t; + +typedef struct dtrace_helper_provdesc { + char *dthpv_provname; /* provider name */ + dtrace_pattr_t dthpv_pattr; /* stability attributes */ +} dtrace_helper_provdesc_t; + +typedef struct dtrace_mops { + void (*dtms_create_probe)(void *, void *, dtrace_helper_probedesc_t *); + void *(*dtms_provide_pid)(void *, dtrace_helper_provdesc_t *, pid_t); + void (*dtms_remove_pid)(void *, dtrace_helper_provdesc_t *, pid_t); +} dtrace_mops_t; + +typedef uintptr_t dtrace_meta_provider_id_t; + +extern int dtrace_meta_register(const char *, const dtrace_mops_t *, void *, + dtrace_meta_provider_id_t *); +extern int dtrace_meta_unregister(dtrace_meta_provider_id_t); + +/* + * DTrace Kernel Hooks + * + * The following functions are implemented by the base kernel and form a set of + * hooks used by the DTrace framework. DTrace hooks are implemented in either + * uts/common/os/dtrace_subr.c, an ISA-specific assembly file, or in a + * uts/<platform>/os/dtrace_subr.c corresponding to each hardware platform. + */ + +typedef enum dtrace_vtime_state { + DTRACE_VTIME_INACTIVE = 0, /* No DTrace, no TNF */ + DTRACE_VTIME_ACTIVE, /* DTrace virtual time, no TNF */ + DTRACE_VTIME_INACTIVE_TNF, /* No DTrace, TNF active */ + DTRACE_VTIME_ACTIVE_TNF /* DTrace virtual time _and_ TNF */ +} dtrace_vtime_state_t; + +extern dtrace_vtime_state_t dtrace_vtime_active; +extern void dtrace_vtime_switch(kthread_t *next); +extern void dtrace_vtime_enable_tnf(void); +extern void dtrace_vtime_disable_tnf(void); +extern void dtrace_vtime_enable(void); +extern void dtrace_vtime_disable(void); + +struct regs; + +extern int (*dtrace_pid_probe_ptr)(struct regs *); +extern int (*dtrace_return_probe_ptr)(struct regs *); +extern void (*dtrace_fasttrap_fork_ptr)(proc_t *, proc_t *); +extern void (*dtrace_fasttrap_exec_ptr)(proc_t *); +extern void (*dtrace_fasttrap_exit_ptr)(proc_t *); +extern void dtrace_fasttrap_fork(proc_t *, proc_t *); + +typedef uintptr_t dtrace_icookie_t; +typedef void (*dtrace_xcall_t)(void *); + +extern dtrace_icookie_t dtrace_interrupt_disable(void); +extern void dtrace_interrupt_enable(dtrace_icookie_t); + +extern void dtrace_membar_producer(void); +extern void dtrace_membar_consumer(void); + +extern void (*dtrace_cpu_init)(processorid_t); +extern void (*dtrace_modload)(struct modctl *); +extern void (*dtrace_modunload)(struct modctl *); +extern void (*dtrace_helpers_cleanup)(); +extern void (*dtrace_helpers_fork)(proc_t *parent, proc_t *child); +extern void (*dtrace_cpustart_init)(); +extern void (*dtrace_cpustart_fini)(); + +extern void (*dtrace_debugger_init)(); +extern void (*dtrace_debugger_fini)(); +extern dtrace_cacheid_t dtrace_predcache_id; + +extern hrtime_t dtrace_gethrtime(void); +extern void dtrace_sync(void); +extern void dtrace_toxic_ranges(void (*)(uintptr_t, uintptr_t)); +extern void dtrace_xcall(processorid_t, dtrace_xcall_t, void *); +extern void dtrace_vpanic(const char *, __va_list); +extern void dtrace_panic(const char *, ...); + +extern int dtrace_safe_defer_signal(void); +extern void dtrace_safe_synchronous_signal(void); + +extern int dtrace_mach_aframes(void); + +#if defined(__i386) || defined(__amd64) +extern int dtrace_instr_size(uchar_t *instr); +extern int dtrace_instr_size_isa(uchar_t *, model_t, int *); +extern void dtrace_invop_add(int (*)(uintptr_t, uintptr_t *, uintptr_t)); +extern void dtrace_invop_remove(int (*)(uintptr_t, uintptr_t *, uintptr_t)); +extern void dtrace_invop_callsite(void); +#endif + +#ifdef __sparc +extern int dtrace_blksuword32(uintptr_t, uint32_t *, int); +extern void dtrace_getfsr(uint64_t *); +#endif + +#define DTRACE_CPUFLAG_ISSET(flag) \ + (cpu_core[CPU->cpu_id].cpuc_dtrace_flags & (flag)) + +#define DTRACE_CPUFLAG_SET(flag) \ + (cpu_core[CPU->cpu_id].cpuc_dtrace_flags |= (flag)) + +#define DTRACE_CPUFLAG_CLEAR(flag) \ + (cpu_core[CPU->cpu_id].cpuc_dtrace_flags &= ~(flag)) + +#endif /* _KERNEL */ + +#endif /* _ASM */ + +#if defined(__i386) || defined(__amd64) + +#define DTRACE_INVOP_PUSHL_EBP 1 +#define DTRACE_INVOP_POPL_EBP 2 +#define DTRACE_INVOP_LEAVE 3 +#define DTRACE_INVOP_NOP 4 +#define DTRACE_INVOP_RET 5 + +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_DTRACE_H */ diff --git a/cddl/contrib/opensolaris/uts/common/sys/dtrace_impl.h b/cddl/contrib/opensolaris/uts/common/sys/dtrace_impl.h new file mode 100644 index 0000000..fed537e --- /dev/null +++ b/cddl/contrib/opensolaris/uts/common/sys/dtrace_impl.h @@ -0,0 +1,1298 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_DTRACE_IMPL_H +#define _SYS_DTRACE_IMPL_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * DTrace Dynamic Tracing Software: Kernel Implementation Interfaces + * + * Note: The contents of this file are private to the implementation of the + * Solaris system and DTrace subsystem and are subject to change at any time + * without notice. Applications and drivers using these interfaces will fail + * to run on future releases. These interfaces should not be used for any + * purpose except those expressly outlined in dtrace(7D) and libdtrace(3LIB). + * Please refer to the "Solaris Dynamic Tracing Guide" for more information. + */ + +#include <sys/dtrace.h> + +/* + * DTrace Implementation Constants and Typedefs + */ +#define DTRACE_MAXPROPLEN 128 +#define DTRACE_DYNVAR_CHUNKSIZE 256 + +struct dtrace_probe; +struct dtrace_ecb; +struct dtrace_predicate; +struct dtrace_action; +struct dtrace_provider; +struct dtrace_state; + +typedef struct dtrace_probe dtrace_probe_t; +typedef struct dtrace_ecb dtrace_ecb_t; +typedef struct dtrace_predicate dtrace_predicate_t; +typedef struct dtrace_action dtrace_action_t; +typedef struct dtrace_provider dtrace_provider_t; +typedef struct dtrace_meta dtrace_meta_t; +typedef struct dtrace_state dtrace_state_t; +typedef uint32_t dtrace_optid_t; +typedef uint32_t dtrace_specid_t; +typedef uint64_t dtrace_genid_t; + +/* + * DTrace Probes + * + * The probe is the fundamental unit of the DTrace architecture. Probes are + * created by DTrace providers, and managed by the DTrace framework. A probe + * is identified by a unique <provider, module, function, name> tuple, and has + * a unique probe identifier assigned to it. (Some probes are not associated + * with a specific point in text; these are called _unanchored probes_ and have + * no module or function associated with them.) Probes are represented as a + * dtrace_probe structure. To allow quick lookups based on each element of the + * probe tuple, probes are hashed by each of provider, module, function and + * name. (If a lookup is performed based on a regular expression, a + * dtrace_probekey is prepared, and a linear search is performed.) Each probe + * is additionally pointed to by a linear array indexed by its identifier. The + * identifier is the provider's mechanism for indicating to the DTrace + * framework that a probe has fired: the identifier is passed as the first + * argument to dtrace_probe(), where it is then mapped into the corresponding + * dtrace_probe structure. From the dtrace_probe structure, dtrace_probe() can + * iterate over the probe's list of enabling control blocks; see "DTrace + * Enabling Control Blocks", below.) + */ +struct dtrace_probe { + dtrace_id_t dtpr_id; /* probe identifier */ + dtrace_ecb_t *dtpr_ecb; /* ECB list; see below */ + dtrace_ecb_t *dtpr_ecb_last; /* last ECB in list */ + void *dtpr_arg; /* provider argument */ + dtrace_cacheid_t dtpr_predcache; /* predicate cache ID */ + int dtpr_aframes; /* artificial frames */ + dtrace_provider_t *dtpr_provider; /* pointer to provider */ + char *dtpr_mod; /* probe's module name */ + char *dtpr_func; /* probe's function name */ + char *dtpr_name; /* probe's name */ + dtrace_probe_t *dtpr_nextmod; /* next in module hash */ + dtrace_probe_t *dtpr_prevmod; /* previous in module hash */ + dtrace_probe_t *dtpr_nextfunc; /* next in function hash */ + dtrace_probe_t *dtpr_prevfunc; /* previous in function hash */ + dtrace_probe_t *dtpr_nextname; /* next in name hash */ + dtrace_probe_t *dtpr_prevname; /* previous in name hash */ + dtrace_genid_t dtpr_gen; /* probe generation ID */ +}; + +typedef int dtrace_probekey_f(const char *, const char *, int); + +typedef struct dtrace_probekey { + const char *dtpk_prov; /* provider name to match */ + dtrace_probekey_f *dtpk_pmatch; /* provider matching function */ + const char *dtpk_mod; /* module name to match */ + dtrace_probekey_f *dtpk_mmatch; /* module matching function */ + const char *dtpk_func; /* func name to match */ + dtrace_probekey_f *dtpk_fmatch; /* func matching function */ + const char *dtpk_name; /* name to match */ + dtrace_probekey_f *dtpk_nmatch; /* name matching function */ + dtrace_id_t dtpk_id; /* identifier to match */ +} dtrace_probekey_t; + +typedef struct dtrace_hashbucket { + struct dtrace_hashbucket *dthb_next; /* next on hash chain */ + dtrace_probe_t *dthb_chain; /* chain of probes */ + int dthb_len; /* number of probes here */ +} dtrace_hashbucket_t; + +typedef struct dtrace_hash { + dtrace_hashbucket_t **dth_tab; /* hash table */ + int dth_size; /* size of hash table */ + int dth_mask; /* mask to index into table */ + int dth_nbuckets; /* total number of buckets */ + uintptr_t dth_nextoffs; /* offset of next in probe */ + uintptr_t dth_prevoffs; /* offset of prev in probe */ + uintptr_t dth_stroffs; /* offset of str in probe */ +} dtrace_hash_t; + +/* + * DTrace Enabling Control Blocks + * + * When a provider wishes to fire a probe, it calls into dtrace_probe(), + * passing the probe identifier as the first argument. As described above, + * dtrace_probe() maps the identifier into a pointer to a dtrace_probe_t + * structure. This structure contains information about the probe, and a + * pointer to the list of Enabling Control Blocks (ECBs). Each ECB points to + * DTrace consumer state, and contains an optional predicate, and a list of + * actions. (Shown schematically below.) The ECB abstraction allows a single + * probe to be multiplexed across disjoint consumers, or across disjoint + * enablings of a single probe within one consumer. + * + * Enabling Control Block + * dtrace_ecb_t + * +------------------------+ + * | dtrace_epid_t ---------+--------------> Enabled Probe ID (EPID) + * | dtrace_state_t * ------+--------------> State associated with this ECB + * | dtrace_predicate_t * --+---------+ + * | dtrace_action_t * -----+----+ | + * | dtrace_ecb_t * ---+ | | | Predicate (if any) + * +-------------------+----+ | | dtrace_predicate_t + * | | +---> +--------------------+ + * | | | dtrace_difo_t * ---+----> DIFO + * | | +--------------------+ + * | | + * Next ECB | | Action + * (if any) | | dtrace_action_t + * : +--> +-------------------+ + * : | dtrace_actkind_t -+------> kind + * v | dtrace_difo_t * --+------> DIFO (if any) + * | dtrace_recdesc_t -+------> record descr. + * | dtrace_action_t * +------+ + * +-------------------+ | + * | Next action + * +-------------------------------+ (if any) + * | + * | Action + * | dtrace_action_t + * +--> +-------------------+ + * | dtrace_actkind_t -+------> kind + * | dtrace_difo_t * --+------> DIFO (if any) + * | dtrace_action_t * +------+ + * +-------------------+ | + * | Next action + * +-------------------------------+ (if any) + * | + * : + * v + * + * + * dtrace_probe() iterates over the ECB list. If the ECB needs less space + * than is available in the principal buffer, the ECB is processed: if the + * predicate is non-NULL, the DIF object is executed. If the result is + * non-zero, the action list is processed, with each action being executed + * accordingly. When the action list has been completely executed, processing + * advances to the next ECB. processing advances to the next ECB. If the + * result is non-zero; For each ECB, it first determines the The ECB + * abstraction allows disjoint consumers to multiplex on single probes. + */ +struct dtrace_ecb { + dtrace_epid_t dte_epid; /* enabled probe ID */ + uint32_t dte_alignment; /* required alignment */ + size_t dte_needed; /* bytes needed */ + size_t dte_size; /* total size of payload */ + dtrace_predicate_t *dte_predicate; /* predicate, if any */ + dtrace_action_t *dte_action; /* actions, if any */ + dtrace_ecb_t *dte_next; /* next ECB on probe */ + dtrace_state_t *dte_state; /* pointer to state */ + uint32_t dte_cond; /* security condition */ + dtrace_probe_t *dte_probe; /* pointer to probe */ + dtrace_action_t *dte_action_last; /* last action on ECB */ + uint64_t dte_uarg; /* library argument */ +}; + +struct dtrace_predicate { + dtrace_difo_t *dtp_difo; /* DIF object */ + dtrace_cacheid_t dtp_cacheid; /* cache identifier */ + int dtp_refcnt; /* reference count */ +}; + +struct dtrace_action { + dtrace_actkind_t dta_kind; /* kind of action */ + uint16_t dta_intuple; /* boolean: in aggregation */ + uint32_t dta_refcnt; /* reference count */ + dtrace_difo_t *dta_difo; /* pointer to DIFO */ + dtrace_recdesc_t dta_rec; /* record description */ + dtrace_action_t *dta_prev; /* previous action */ + dtrace_action_t *dta_next; /* next action */ +}; + +typedef struct dtrace_aggregation { + dtrace_action_t dtag_action; /* action; must be first */ + dtrace_aggid_t dtag_id; /* identifier */ + dtrace_ecb_t *dtag_ecb; /* corresponding ECB */ + dtrace_action_t *dtag_first; /* first action in tuple */ + uint32_t dtag_base; /* base of aggregation */ + uint8_t dtag_hasarg; /* boolean: has argument */ + uint64_t dtag_initial; /* initial value */ + void (*dtag_aggregate)(uint64_t *, uint64_t, uint64_t); +} dtrace_aggregation_t; + +/* + * DTrace Buffers + * + * Principal buffers, aggregation buffers, and speculative buffers are all + * managed with the dtrace_buffer structure. By default, this structure + * includes twin data buffers -- dtb_tomax and dtb_xamot -- that serve as the + * active and passive buffers, respectively. For speculative buffers, + * dtb_xamot will be NULL; for "ring" and "fill" buffers, dtb_xamot will point + * to a scratch buffer. For all buffer types, the dtrace_buffer structure is + * always allocated on a per-CPU basis; a single dtrace_buffer structure is + * never shared among CPUs. (That is, there is never true sharing of the + * dtrace_buffer structure; to prevent false sharing of the structure, it must + * always be aligned to the coherence granularity -- generally 64 bytes.) + * + * One of the critical design decisions of DTrace is that a given ECB always + * stores the same quantity and type of data. This is done to assure that the + * only metadata required for an ECB's traced data is the EPID. That is, from + * the EPID, the consumer can determine the data layout. (The data buffer + * layout is shown schematically below.) By assuring that one can determine + * data layout from the EPID, the metadata stream can be separated from the + * data stream -- simplifying the data stream enormously. + * + * base of data buffer ---> +------+--------------------+------+ + * | EPID | data | EPID | + * +------+--------+------+----+------+ + * | data | EPID | data | + * +---------------+------+-----------+ + * | data, cont. | + * +------+--------------------+------+ + * | EPID | data | | + * +------+--------------------+ | + * | || | + * | || | + * | \/ | + * : : + * . . + * . . + * . . + * : : + * | | + * limit of data buffer ---> +----------------------------------+ + * + * When evaluating an ECB, dtrace_probe() determines if the ECB's needs of the + * principal buffer (both scratch and payload) exceed the available space. If + * the ECB's needs exceed available space (and if the principal buffer policy + * is the default "switch" policy), the ECB is dropped, the buffer's drop count + * is incremented, and processing advances to the next ECB. If the ECB's needs + * can be met with the available space, the ECB is processed, but the offset in + * the principal buffer is only advanced if the ECB completes processing + * without error. + * + * When a buffer is to be switched (either because the buffer is the principal + * buffer with a "switch" policy or because it is an aggregation buffer), a + * cross call is issued to the CPU associated with the buffer. In the cross + * call context, interrupts are disabled, and the active and the inactive + * buffers are atomically switched. This involves switching the data pointers, + * copying the various state fields (offset, drops, errors, etc.) into their + * inactive equivalents, and clearing the state fields. Because interrupts are + * disabled during this procedure, the switch is guaranteed to appear atomic to + * dtrace_probe(). + * + * DTrace Ring Buffering + * + * To process a ring buffer correctly, one must know the oldest valid record. + * Processing starts at the oldest record in the buffer and continues until + * the end of the buffer is reached. Processing then resumes starting with + * the record stored at offset 0 in the buffer, and continues until the + * youngest record is processed. If trace records are of a fixed-length, + * determining the oldest record is trivial: + * + * - If the ring buffer has not wrapped, the oldest record is the record + * stored at offset 0. + * + * - If the ring buffer has wrapped, the oldest record is the record stored + * at the current offset. + * + * With variable length records, however, just knowing the current offset + * doesn't suffice for determining the oldest valid record: assuming that one + * allows for arbitrary data, one has no way of searching forward from the + * current offset to find the oldest valid record. (That is, one has no way + * of separating data from metadata.) It would be possible to simply refuse to + * process any data in the ring buffer between the current offset and the + * limit, but this leaves (potentially) an enormous amount of otherwise valid + * data unprocessed. + * + * To effect ring buffering, we track two offsets in the buffer: the current + * offset and the _wrapped_ offset. If a request is made to reserve some + * amount of data, and the buffer has wrapped, the wrapped offset is + * incremented until the wrapped offset minus the current offset is greater + * than or equal to the reserve request. This is done by repeatedly looking + * up the ECB corresponding to the EPID at the current wrapped offset, and + * incrementing the wrapped offset by the size of the data payload + * corresponding to that ECB. If this offset is greater than or equal to the + * limit of the data buffer, the wrapped offset is set to 0. Thus, the + * current offset effectively "chases" the wrapped offset around the buffer. + * Schematically: + * + * base of data buffer ---> +------+--------------------+------+ + * | EPID | data | EPID | + * +------+--------+------+----+------+ + * | data | EPID | data | + * +---------------+------+-----------+ + * | data, cont. | + * +------+---------------------------+ + * | EPID | data | + * current offset ---> +------+---------------------------+ + * | invalid data | + * wrapped offset ---> +------+--------------------+------+ + * | EPID | data | EPID | + * +------+--------+------+----+------+ + * | data | EPID | data | + * +---------------+------+-----------+ + * : : + * . . + * . ... valid data ... . + * . . + * : : + * +------+-------------+------+------+ + * | EPID | data | EPID | data | + * +------+------------++------+------+ + * | data, cont. | leftover | + * limit of data buffer ---> +-------------------+--------------+ + * + * If the amount of requested buffer space exceeds the amount of space + * available between the current offset and the end of the buffer: + * + * (1) all words in the data buffer between the current offset and the limit + * of the data buffer (marked "leftover", above) are set to + * DTRACE_EPIDNONE + * + * (2) the wrapped offset is set to zero + * + * (3) the iteration process described above occurs until the wrapped offset + * is greater than the amount of desired space. + * + * The wrapped offset is implemented by (re-)using the inactive offset. + * In a "switch" buffer policy, the inactive offset stores the offset in + * the inactive buffer; in a "ring" buffer policy, it stores the wrapped + * offset. + * + * DTrace Scratch Buffering + * + * Some ECBs may wish to allocate dynamically-sized temporary scratch memory. + * To accommodate such requests easily, scratch memory may be allocated in + * the buffer beyond the current offset plus the needed memory of the current + * ECB. If there isn't sufficient room in the buffer for the requested amount + * of scratch space, the allocation fails and an error is generated. Scratch + * memory is tracked in the dtrace_mstate_t and is automatically freed when + * the ECB ceases processing. Note that ring buffers cannot allocate their + * scratch from the principal buffer -- lest they needlessly overwrite older, + * valid data. Ring buffers therefore have their own dedicated scratch buffer + * from which scratch is allocated. + */ +#define DTRACEBUF_RING 0x0001 /* bufpolicy set to "ring" */ +#define DTRACEBUF_FILL 0x0002 /* bufpolicy set to "fill" */ +#define DTRACEBUF_NOSWITCH 0x0004 /* do not switch buffer */ +#define DTRACEBUF_WRAPPED 0x0008 /* ring buffer has wrapped */ +#define DTRACEBUF_DROPPED 0x0010 /* drops occurred */ +#define DTRACEBUF_ERROR 0x0020 /* errors occurred */ +#define DTRACEBUF_FULL 0x0040 /* "fill" buffer is full */ +#define DTRACEBUF_CONSUMED 0x0080 /* buffer has been consumed */ +#define DTRACEBUF_INACTIVE 0x0100 /* buffer is not yet active */ + +typedef struct dtrace_buffer { + uint64_t dtb_offset; /* current offset in buffer */ + uint64_t dtb_size; /* size of buffer */ + uint32_t dtb_flags; /* flags */ + uint32_t dtb_drops; /* number of drops */ + caddr_t dtb_tomax; /* active buffer */ + caddr_t dtb_xamot; /* inactive buffer */ + uint32_t dtb_xamot_flags; /* inactive flags */ + uint32_t dtb_xamot_drops; /* drops in inactive buffer */ + uint64_t dtb_xamot_offset; /* offset in inactive buffer */ + uint32_t dtb_errors; /* number of errors */ + uint32_t dtb_xamot_errors; /* errors in inactive buffer */ +#ifndef _LP64 + uint64_t dtb_pad1; +#endif +} dtrace_buffer_t; + +/* + * DTrace Aggregation Buffers + * + * Aggregation buffers use much of the same mechanism as described above + * ("DTrace Buffers"). However, because an aggregation is fundamentally a + * hash, there exists dynamic metadata associated with an aggregation buffer + * that is not associated with other kinds of buffers. This aggregation + * metadata is _only_ relevant for the in-kernel implementation of + * aggregations; it is not actually relevant to user-level consumers. To do + * this, we allocate dynamic aggregation data (hash keys and hash buckets) + * starting below the _limit_ of the buffer, and we allocate data from the + * _base_ of the buffer. When the aggregation buffer is copied out, _only_ the + * data is copied out; the metadata is simply discarded. Schematically, + * aggregation buffers look like: + * + * base of data buffer ---> +-------+------+-----------+-------+ + * | aggid | key | value | aggid | + * +-------+------+-----------+-------+ + * | key | + * +-------+-------+-----+------------+ + * | value | aggid | key | value | + * +-------+------++-----+------+-----+ + * | aggid | key | value | | + * +-------+------+-------------+ | + * | || | + * | || | + * | \/ | + * : : + * . . + * . . + * . . + * : : + * | /\ | + * | || +------------+ + * | || | | + * +---------------------+ | + * | hash keys | + * | (dtrace_aggkey structures) | + * | | + * +----------------------------------+ + * | hash buckets | + * | (dtrace_aggbuffer structure) | + * | | + * limit of data buffer ---> +----------------------------------+ + * + * + * As implied above, just as we assure that ECBs always store a constant + * amount of data, we assure that a given aggregation -- identified by its + * aggregation ID -- always stores data of a constant quantity and type. + * As with EPIDs, this allows the aggregation ID to serve as the metadata for a + * given record. + * + * Note that the size of the dtrace_aggkey structure must be sizeof (uintptr_t) + * aligned. (If this the structure changes such that this becomes false, an + * assertion will fail in dtrace_aggregate().) + */ +typedef struct dtrace_aggkey { + uint32_t dtak_hashval; /* hash value */ + uint32_t dtak_action:4; /* action -- 4 bits */ + uint32_t dtak_size:28; /* size -- 28 bits */ + caddr_t dtak_data; /* data pointer */ + struct dtrace_aggkey *dtak_next; /* next in hash chain */ +} dtrace_aggkey_t; + +typedef struct dtrace_aggbuffer { + uintptr_t dtagb_hashsize; /* number of buckets */ + uintptr_t dtagb_free; /* free list of keys */ + dtrace_aggkey_t **dtagb_hash; /* hash table */ +} dtrace_aggbuffer_t; + +/* + * DTrace Speculations + * + * Speculations have a per-CPU buffer and a global state. Once a speculation + * buffer has been comitted or discarded, it cannot be reused until all CPUs + * have taken the same action (commit or discard) on their respective + * speculative buffer. However, because DTrace probes may execute in arbitrary + * context, other CPUs cannot simply be cross-called at probe firing time to + * perform the necessary commit or discard. The speculation states thus + * optimize for the case that a speculative buffer is only active on one CPU at + * the time of a commit() or discard() -- for if this is the case, other CPUs + * need not take action, and the speculation is immediately available for + * reuse. If the speculation is active on multiple CPUs, it must be + * asynchronously cleaned -- potentially leading to a higher rate of dirty + * speculative drops. The speculation states are as follows: + * + * DTRACESPEC_INACTIVE <= Initial state; inactive speculation + * DTRACESPEC_ACTIVE <= Allocated, but not yet speculatively traced to + * DTRACESPEC_ACTIVEONE <= Speculatively traced to on one CPU + * DTRACESPEC_ACTIVEMANY <= Speculatively traced to on more than one CPU + * DTRACESPEC_COMMITTING <= Currently being commited on one CPU + * DTRACESPEC_COMMITTINGMANY <= Currently being commited on many CPUs + * DTRACESPEC_DISCARDING <= Currently being discarded on many CPUs + * + * The state transition diagram is as follows: + * + * +----------------------------------------------------------+ + * | | + * | +------------+ | + * | +-------------------| COMMITTING |<-----------------+ | + * | | +------------+ | | + * | | copied spec. ^ commit() on | | discard() on + * | | into principal | active CPU | | active CPU + * | | | commit() | | + * V V | | | + * +----------+ +--------+ +-----------+ + * | INACTIVE |---------------->| ACTIVE |--------------->| ACTIVEONE | + * +----------+ speculation() +--------+ speculate() +-----------+ + * ^ ^ | | | + * | | | discard() | | + * | | asynchronously | discard() on | | speculate() + * | | cleaned V inactive CPU | | on inactive + * | | +------------+ | | CPU + * | +-------------------| DISCARDING |<-----------------+ | + * | +------------+ | + * | asynchronously ^ | + * | copied spec. | discard() | + * | into principal +------------------------+ | + * | | V + * +----------------+ commit() +------------+ + * | COMMITTINGMANY |<----------------------------------| ACTIVEMANY | + * +----------------+ +------------+ + */ +typedef enum dtrace_speculation_state { + DTRACESPEC_INACTIVE = 0, + DTRACESPEC_ACTIVE, + DTRACESPEC_ACTIVEONE, + DTRACESPEC_ACTIVEMANY, + DTRACESPEC_COMMITTING, + DTRACESPEC_COMMITTINGMANY, + DTRACESPEC_DISCARDING +} dtrace_speculation_state_t; + +typedef struct dtrace_speculation { + dtrace_speculation_state_t dtsp_state; /* current speculation state */ + int dtsp_cleaning; /* non-zero if being cleaned */ + dtrace_buffer_t *dtsp_buffer; /* speculative buffer */ +} dtrace_speculation_t; + +/* + * DTrace Dynamic Variables + * + * The dynamic variable problem is obviously decomposed into two subproblems: + * allocating new dynamic storage, and freeing old dynamic storage. The + * presence of the second problem makes the first much more complicated -- or + * rather, the absence of the second renders the first trivial. This is the + * case with aggregations, for which there is effectively no deallocation of + * dynamic storage. (Or more accurately, all dynamic storage is deallocated + * when a snapshot is taken of the aggregation.) As DTrace dynamic variables + * allow for both dynamic allocation and dynamic deallocation, the + * implementation of dynamic variables is quite a bit more complicated than + * that of their aggregation kin. + * + * We observe that allocating new dynamic storage is tricky only because the + * size can vary -- the allocation problem is much easier if allocation sizes + * are uniform. We further observe that in D, the size of dynamic variables is + * actually _not_ dynamic -- dynamic variable sizes may be determined by static + * analysis of DIF text. (This is true even of putatively dynamically-sized + * objects like strings and stacks, the sizes of which are dictated by the + * "stringsize" and "stackframes" variables, respectively.) We exploit this by + * performing this analysis on all DIF before enabling any probes. For each + * dynamic load or store, we calculate the dynamically-allocated size plus the + * size of the dtrace_dynvar structure plus the storage required to key the + * data. For all DIF, we take the largest value and dub it the _chunksize_. + * We then divide dynamic memory into two parts: a hash table that is wide + * enough to have every chunk in its own bucket, and a larger region of equal + * chunksize units. Whenever we wish to dynamically allocate a variable, we + * always allocate a single chunk of memory. Depending on the uniformity of + * allocation, this will waste some amount of memory -- but it eliminates the + * non-determinism inherent in traditional heap fragmentation. + * + * Dynamic objects are allocated by storing a non-zero value to them; they are + * deallocated by storing a zero value to them. Dynamic variables are + * complicated enormously by being shared between CPUs. In particular, + * consider the following scenario: + * + * CPU A CPU B + * +---------------------------------+ +---------------------------------+ + * | | | | + * | allocates dynamic object a[123] | | | + * | by storing the value 345 to it | | | + * | ---------> | + * | | | wishing to load from object | + * | | | a[123], performs lookup in | + * | | | dynamic variable space | + * | <--------- | + * | deallocates object a[123] by | | | + * | storing 0 to it | | | + * | | | | + * | allocates dynamic object b[567] | | performs load from a[123] | + * | by storing the value 789 to it | | | + * : : : : + * . . . . + * + * This is obviously a race in the D program, but there are nonetheless only + * two valid values for CPU B's load from a[123]: 345 or 0. Most importantly, + * CPU B may _not_ see the value 789 for a[123]. + * + * There are essentially two ways to deal with this: + * + * (1) Explicitly spin-lock variables. That is, if CPU B wishes to load + * from a[123], it needs to lock a[123] and hold the lock for the + * duration that it wishes to manipulate it. + * + * (2) Avoid reusing freed chunks until it is known that no CPU is referring + * to them. + * + * The implementation of (1) is rife with complexity, because it requires the + * user of a dynamic variable to explicitly decree when they are done using it. + * Were all variables by value, this perhaps wouldn't be debilitating -- but + * dynamic variables of non-scalar types are tracked by reference. That is, if + * a dynamic variable is, say, a string, and that variable is to be traced to, + * say, the principal buffer, the DIF emulation code returns to the main + * dtrace_probe() loop a pointer to the underlying storage, not the contents of + * the storage. Further, code calling on DIF emulation would have to be aware + * that the DIF emulation has returned a reference to a dynamic variable that + * has been potentially locked. The variable would have to be unlocked after + * the main dtrace_probe() loop is finished with the variable, and the main + * dtrace_probe() loop would have to be careful to not call any further DIF + * emulation while the variable is locked to avoid deadlock. More generally, + * if one were to implement (1), DIF emulation code dealing with dynamic + * variables could only deal with one dynamic variable at a time (lest deadlock + * result). To sum, (1) exports too much subtlety to the users of dynamic + * variables -- increasing maintenance burden and imposing serious constraints + * on future DTrace development. + * + * The implementation of (2) is also complex, but the complexity is more + * manageable. We need to be sure that when a variable is deallocated, it is + * not placed on a traditional free list, but rather on a _dirty_ list. Once a + * variable is on a dirty list, it cannot be found by CPUs performing a + * subsequent lookup of the variable -- but it may still be in use by other + * CPUs. To assure that all CPUs that may be seeing the old variable have + * cleared out of probe context, a dtrace_sync() can be issued. Once the + * dtrace_sync() has completed, it can be known that all CPUs are done + * manipulating the dynamic variable -- the dirty list can be atomically + * appended to the free list. Unfortunately, there's a slight hiccup in this + * mechanism: dtrace_sync() may not be issued from probe context. The + * dtrace_sync() must be therefore issued asynchronously from non-probe + * context. For this we rely on the DTrace cleaner, a cyclic that runs at the + * "cleanrate" frequency. To ease this implementation, we define several chunk + * lists: + * + * - Dirty. Deallocated chunks, not yet cleaned. Not available. + * + * - Rinsing. Formerly dirty chunks that are currently being asynchronously + * cleaned. Not available, but will be shortly. Dynamic variable + * allocation may not spin or block for availability, however. + * + * - Clean. Clean chunks, ready for allocation -- but not on the free list. + * + * - Free. Available for allocation. + * + * Moreover, to avoid absurd contention, _each_ of these lists is implemented + * on a per-CPU basis. This is only for performance, not correctness; chunks + * may be allocated from another CPU's free list. The algorithm for allocation + * then is this: + * + * (1) Attempt to atomically allocate from current CPU's free list. If list + * is non-empty and allocation is successful, allocation is complete. + * + * (2) If the clean list is non-empty, atomically move it to the free list, + * and reattempt (1). + * + * (3) If the dynamic variable space is in the CLEAN state, look for free + * and clean lists on other CPUs by setting the current CPU to the next + * CPU, and reattempting (1). If the next CPU is the current CPU (that + * is, if all CPUs have been checked), atomically switch the state of + * the dynamic variable space based on the following: + * + * - If no free chunks were found and no dirty chunks were found, + * atomically set the state to EMPTY. + * + * - If dirty chunks were found, atomically set the state to DIRTY. + * + * - If rinsing chunks were found, atomically set the state to RINSING. + * + * (4) Based on state of dynamic variable space state, increment appropriate + * counter to indicate dynamic drops (if in EMPTY state) vs. dynamic + * dirty drops (if in DIRTY state) vs. dynamic rinsing drops (if in + * RINSING state). Fail the allocation. + * + * The cleaning cyclic operates with the following algorithm: for all CPUs + * with a non-empty dirty list, atomically move the dirty list to the rinsing + * list. Perform a dtrace_sync(). For all CPUs with a non-empty rinsing list, + * atomically move the rinsing list to the clean list. Perform another + * dtrace_sync(). By this point, all CPUs have seen the new clean list; the + * state of the dynamic variable space can be restored to CLEAN. + * + * There exist two final races that merit explanation. The first is a simple + * allocation race: + * + * CPU A CPU B + * +---------------------------------+ +---------------------------------+ + * | | | | + * | allocates dynamic object a[123] | | allocates dynamic object a[123] | + * | by storing the value 345 to it | | by storing the value 567 to it | + * | | | | + * : : : : + * . . . . + * + * Again, this is a race in the D program. It can be resolved by having a[123] + * hold the value 345 or a[123] hold the value 567 -- but it must be true that + * a[123] have only _one_ of these values. (That is, the racing CPUs may not + * put the same element twice on the same hash chain.) This is resolved + * simply: before the allocation is undertaken, the start of the new chunk's + * hash chain is noted. Later, after the allocation is complete, the hash + * chain is atomically switched to point to the new element. If this fails + * (because of either concurrent allocations or an allocation concurrent with a + * deletion), the newly allocated chunk is deallocated to the dirty list, and + * the whole process of looking up (and potentially allocating) the dynamic + * variable is reattempted. + * + * The final race is a simple deallocation race: + * + * CPU A CPU B + * +---------------------------------+ +---------------------------------+ + * | | | | + * | deallocates dynamic object | | deallocates dynamic object | + * | a[123] by storing the value 0 | | a[123] by storing the value 0 | + * | to it | | to it | + * | | | | + * : : : : + * . . . . + * + * Once again, this is a race in the D program, but it is one that we must + * handle without corrupting the underlying data structures. Because + * deallocations require the deletion of a chunk from the middle of a hash + * chain, we cannot use a single-word atomic operation to remove it. For this, + * we add a spin lock to the hash buckets that is _only_ used for deallocations + * (allocation races are handled as above). Further, this spin lock is _only_ + * held for the duration of the delete; before control is returned to the DIF + * emulation code, the hash bucket is unlocked. + */ +typedef struct dtrace_key { + uint64_t dttk_value; /* data value or data pointer */ + uint64_t dttk_size; /* 0 if by-val, >0 if by-ref */ +} dtrace_key_t; + +typedef struct dtrace_tuple { + uint32_t dtt_nkeys; /* number of keys in tuple */ + uint32_t dtt_pad; /* padding */ + dtrace_key_t dtt_key[1]; /* array of tuple keys */ +} dtrace_tuple_t; + +typedef struct dtrace_dynvar { + uint64_t dtdv_hashval; /* hash value -- 0 if free */ + struct dtrace_dynvar *dtdv_next; /* next on list or hash chain */ + void *dtdv_data; /* pointer to data */ + dtrace_tuple_t dtdv_tuple; /* tuple key */ +} dtrace_dynvar_t; + +typedef enum dtrace_dynvar_op { + DTRACE_DYNVAR_ALLOC, + DTRACE_DYNVAR_NOALLOC, + DTRACE_DYNVAR_DEALLOC +} dtrace_dynvar_op_t; + +typedef struct dtrace_dynhash { + dtrace_dynvar_t *dtdh_chain; /* hash chain for this bucket */ + uintptr_t dtdh_lock; /* deallocation lock */ +#ifdef _LP64 + uintptr_t dtdh_pad[6]; /* pad to avoid false sharing */ +#else + uintptr_t dtdh_pad[14]; /* pad to avoid false sharing */ +#endif +} dtrace_dynhash_t; + +typedef struct dtrace_dstate_percpu { + dtrace_dynvar_t *dtdsc_free; /* free list for this CPU */ + dtrace_dynvar_t *dtdsc_dirty; /* dirty list for this CPU */ + dtrace_dynvar_t *dtdsc_rinsing; /* rinsing list for this CPU */ + dtrace_dynvar_t *dtdsc_clean; /* clean list for this CPU */ + uint64_t dtdsc_drops; /* number of capacity drops */ + uint64_t dtdsc_dirty_drops; /* number of dirty drops */ + uint64_t dtdsc_rinsing_drops; /* number of rinsing drops */ +#ifdef _LP64 + uint64_t dtdsc_pad; /* pad to avoid false sharing */ +#else + uint64_t dtdsc_pad[2]; /* pad to avoid false sharing */ +#endif +} dtrace_dstate_percpu_t; + +typedef enum dtrace_dstate_state { + DTRACE_DSTATE_CLEAN = 0, + DTRACE_DSTATE_EMPTY, + DTRACE_DSTATE_DIRTY, + DTRACE_DSTATE_RINSING +} dtrace_dstate_state_t; + +typedef struct dtrace_dstate { + void *dtds_base; /* base of dynamic var. space */ + size_t dtds_size; /* size of dynamic var. space */ + size_t dtds_hashsize; /* number of buckets in hash */ + size_t dtds_chunksize; /* size of each chunk */ + dtrace_dynhash_t *dtds_hash; /* pointer to hash table */ + dtrace_dstate_state_t dtds_state; /* current dynamic var. state */ + dtrace_dstate_percpu_t *dtds_percpu; /* per-CPU dyn. var. state */ +} dtrace_dstate_t; + +/* + * DTrace Variable State + * + * The DTrace variable state tracks user-defined variables in its dtrace_vstate + * structure. Each DTrace consumer has exactly one dtrace_vstate structure, + * but some dtrace_vstate structures may exist without a corresponding DTrace + * consumer (see "DTrace Helpers", below). As described in <sys/dtrace.h>, + * user-defined variables can have one of three scopes: + * + * DIFV_SCOPE_GLOBAL => global scope + * DIFV_SCOPE_THREAD => thread-local scope (i.e. "self->" variables) + * DIFV_SCOPE_LOCAL => clause-local scope (i.e. "this->" variables) + * + * The variable state tracks variables by both their scope and their allocation + * type: + * + * - The dtvs_globals and dtvs_locals members each point to an array of + * dtrace_statvar structures. These structures contain both the variable + * metadata (dtrace_difv structures) and the underlying storage for all + * statically allocated variables, including statically allocated + * DIFV_SCOPE_GLOBAL variables and all DIFV_SCOPE_LOCAL variables. + * + * - The dtvs_tlocals member points to an array of dtrace_difv structures for + * DIFV_SCOPE_THREAD variables. As such, this array tracks _only_ the + * variable metadata for DIFV_SCOPE_THREAD variables; the underlying storage + * is allocated out of the dynamic variable space. + * + * - The dtvs_dynvars member is the dynamic variable state associated with the + * variable state. The dynamic variable state (described in "DTrace Dynamic + * Variables", above) tracks all DIFV_SCOPE_THREAD variables and all + * dynamically-allocated DIFV_SCOPE_GLOBAL variables. + */ +typedef struct dtrace_statvar { + uint64_t dtsv_data; /* data or pointer to it */ + size_t dtsv_size; /* size of pointed-to data */ + int dtsv_refcnt; /* reference count */ + dtrace_difv_t dtsv_var; /* variable metadata */ +} dtrace_statvar_t; + +typedef struct dtrace_vstate { + dtrace_state_t *dtvs_state; /* back pointer to state */ + dtrace_statvar_t **dtvs_globals; /* statically-allocated glbls */ + int dtvs_nglobals; /* number of globals */ + dtrace_difv_t *dtvs_tlocals; /* thread-local metadata */ + int dtvs_ntlocals; /* number of thread-locals */ + dtrace_statvar_t **dtvs_locals; /* clause-local data */ + int dtvs_nlocals; /* number of clause-locals */ + dtrace_dstate_t dtvs_dynvars; /* dynamic variable state */ +} dtrace_vstate_t; + +/* + * DTrace Machine State + * + * In the process of processing a fired probe, DTrace needs to track and/or + * cache some per-CPU state associated with that particular firing. This is + * state that is always discarded after the probe firing has completed, and + * much of it is not specific to any DTrace consumer, remaining valid across + * all ECBs. This state is tracked in the dtrace_mstate structure. + */ +#define DTRACE_MSTATE_ARGS 0x00000001 +#define DTRACE_MSTATE_PROBE 0x00000002 +#define DTRACE_MSTATE_EPID 0x00000004 +#define DTRACE_MSTATE_TIMESTAMP 0x00000008 +#define DTRACE_MSTATE_STACKDEPTH 0x00000010 +#define DTRACE_MSTATE_CALLER 0x00000020 +#define DTRACE_MSTATE_IPL 0x00000040 +#define DTRACE_MSTATE_FLTOFFS 0x00000080 +#define DTRACE_MSTATE_WALLTIMESTAMP 0x00000100 +#define DTRACE_MSTATE_USTACKDEPTH 0x00000200 +#define DTRACE_MSTATE_UCALLER 0x00000400 + +typedef struct dtrace_mstate { + uintptr_t dtms_scratch_base; /* base of scratch space */ + uintptr_t dtms_scratch_ptr; /* current scratch pointer */ + size_t dtms_scratch_size; /* scratch size */ + uint32_t dtms_present; /* variables that are present */ + uint64_t dtms_arg[5]; /* cached arguments */ + dtrace_epid_t dtms_epid; /* current EPID */ + uint64_t dtms_timestamp; /* cached timestamp */ + hrtime_t dtms_walltimestamp; /* cached wall timestamp */ + int dtms_stackdepth; /* cached stackdepth */ + int dtms_ustackdepth; /* cached ustackdepth */ + struct dtrace_probe *dtms_probe; /* current probe */ + uintptr_t dtms_caller; /* cached caller */ + uint64_t dtms_ucaller; /* cached user-level caller */ + int dtms_ipl; /* cached interrupt pri lev */ + int dtms_fltoffs; /* faulting DIFO offset */ + uintptr_t dtms_strtok; /* saved strtok() pointer */ + uint32_t dtms_access; /* memory access rights */ + dtrace_difo_t *dtms_difo; /* current dif object */ +} dtrace_mstate_t; + +#define DTRACE_COND_OWNER 0x1 +#define DTRACE_COND_USERMODE 0x2 +#define DTRACE_COND_ZONEOWNER 0x4 + +#define DTRACE_PROBEKEY_MAXDEPTH 8 /* max glob recursion depth */ + +/* + * Access flag used by dtrace_mstate.dtms_access. + */ +#define DTRACE_ACCESS_KERNEL 0x1 /* the priv to read kmem */ + + +/* + * DTrace Activity + * + * Each DTrace consumer is in one of several states, which (for purposes of + * avoiding yet-another overloading of the noun "state") we call the current + * _activity_. The activity transitions on dtrace_go() (from DTRACIOCGO), on + * dtrace_stop() (from DTRACIOCSTOP) and on the exit() action. Activities may + * only transition in one direction; the activity transition diagram is a + * directed acyclic graph. The activity transition diagram is as follows: + * + * + * +----------+ +--------+ +--------+ + * | INACTIVE |------------------>| WARMUP |------------------>| ACTIVE | + * +----------+ dtrace_go(), +--------+ dtrace_go(), +--------+ + * before BEGIN | after BEGIN | | | + * | | | | + * exit() action | | | | + * from BEGIN ECB | | | | + * | | | | + * v | | | + * +----------+ exit() action | | | + * +-----------------------------| DRAINING |<-------------------+ | | + * | +----------+ | | + * | | | | + * | dtrace_stop(), | | | + * | before END | | | + * | | | | + * | v | | + * | +---------+ +----------+ | | + * | | STOPPED |<----------------| COOLDOWN |<----------------------+ | + * | +---------+ dtrace_stop(), +----------+ dtrace_stop(), | + * | after END before END | + * | | + * | +--------+ | + * +----------------------------->| KILLED |<--------------------------+ + * deadman timeout or +--------+ deadman timeout or + * killed consumer killed consumer + * + * Note that once a DTrace consumer has stopped tracing, there is no way to + * restart it; if a DTrace consumer wishes to restart tracing, it must reopen + * the DTrace pseudodevice. + */ +typedef enum dtrace_activity { + DTRACE_ACTIVITY_INACTIVE = 0, /* not yet running */ + DTRACE_ACTIVITY_WARMUP, /* while starting */ + DTRACE_ACTIVITY_ACTIVE, /* running */ + DTRACE_ACTIVITY_DRAINING, /* before stopping */ + DTRACE_ACTIVITY_COOLDOWN, /* while stopping */ + DTRACE_ACTIVITY_STOPPED, /* after stopping */ + DTRACE_ACTIVITY_KILLED /* killed */ +} dtrace_activity_t; + +/* + * DTrace Helper Implementation + * + * A description of the helper architecture may be found in <sys/dtrace.h>. + * Each process contains a pointer to its helpers in its p_dtrace_helpers + * member. This is a pointer to a dtrace_helpers structure, which contains an + * array of pointers to dtrace_helper structures, helper variable state (shared + * among a process's helpers) and a generation count. (The generation count is + * used to provide an identifier when a helper is added so that it may be + * subsequently removed.) The dtrace_helper structure is self-explanatory, + * containing pointers to the objects needed to execute the helper. Note that + * helpers are _duplicated_ across fork(2), and destroyed on exec(2). No more + * than dtrace_helpers_max are allowed per-process. + */ +#define DTRACE_HELPER_ACTION_USTACK 0 +#define DTRACE_NHELPER_ACTIONS 1 + +typedef struct dtrace_helper_action { + int dtha_generation; /* helper action generation */ + int dtha_nactions; /* number of actions */ + dtrace_difo_t *dtha_predicate; /* helper action predicate */ + dtrace_difo_t **dtha_actions; /* array of actions */ + struct dtrace_helper_action *dtha_next; /* next helper action */ +} dtrace_helper_action_t; + +typedef struct dtrace_helper_provider { + int dthp_generation; /* helper provider generation */ + uint32_t dthp_ref; /* reference count */ + dof_helper_t dthp_prov; /* DOF w/ provider and probes */ +} dtrace_helper_provider_t; + +typedef struct dtrace_helpers { + dtrace_helper_action_t **dthps_actions; /* array of helper actions */ + dtrace_vstate_t dthps_vstate; /* helper action var. state */ + dtrace_helper_provider_t **dthps_provs; /* array of providers */ + uint_t dthps_nprovs; /* count of providers */ + uint_t dthps_maxprovs; /* provider array size */ + int dthps_generation; /* current generation */ + pid_t dthps_pid; /* pid of associated proc */ + int dthps_deferred; /* helper in deferred list */ + struct dtrace_helpers *dthps_next; /* next pointer */ + struct dtrace_helpers *dthps_prev; /* prev pointer */ +} dtrace_helpers_t; + +/* + * DTrace Helper Action Tracing + * + * Debugging helper actions can be arduous. To ease the development and + * debugging of helpers, DTrace contains a tracing-framework-within-a-tracing- + * framework: helper tracing. If dtrace_helptrace_enabled is non-zero (which + * it is by default on DEBUG kernels), all helper activity will be traced to a + * global, in-kernel ring buffer. Each entry includes a pointer to the specific + * helper, the location within the helper, and a trace of all local variables. + * The ring buffer may be displayed in a human-readable format with the + * ::dtrace_helptrace mdb(1) dcmd. + */ +#define DTRACE_HELPTRACE_NEXT (-1) +#define DTRACE_HELPTRACE_DONE (-2) +#define DTRACE_HELPTRACE_ERR (-3) + +typedef struct dtrace_helptrace { + dtrace_helper_action_t *dtht_helper; /* helper action */ + int dtht_where; /* where in helper action */ + int dtht_nlocals; /* number of locals */ + int dtht_fault; /* type of fault (if any) */ + int dtht_fltoffs; /* DIF offset */ + uint64_t dtht_illval; /* faulting value */ + uint64_t dtht_locals[1]; /* local variables */ +} dtrace_helptrace_t; + +/* + * DTrace Credentials + * + * In probe context, we have limited flexibility to examine the credentials + * of the DTrace consumer that created a particular enabling. We use + * the Least Privilege interfaces to cache the consumer's cred pointer and + * some facts about that credential in a dtrace_cred_t structure. These + * can limit the consumer's breadth of visibility and what actions the + * consumer may take. + */ +#define DTRACE_CRV_ALLPROC 0x01 +#define DTRACE_CRV_KERNEL 0x02 +#define DTRACE_CRV_ALLZONE 0x04 + +#define DTRACE_CRV_ALL (DTRACE_CRV_ALLPROC | DTRACE_CRV_KERNEL | \ + DTRACE_CRV_ALLZONE) + +#define DTRACE_CRA_PROC 0x0001 +#define DTRACE_CRA_PROC_CONTROL 0x0002 +#define DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER 0x0004 +#define DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE 0x0008 +#define DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG 0x0010 +#define DTRACE_CRA_KERNEL 0x0020 +#define DTRACE_CRA_KERNEL_DESTRUCTIVE 0x0040 + +#define DTRACE_CRA_ALL (DTRACE_CRA_PROC | \ + DTRACE_CRA_PROC_CONTROL | \ + DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER | \ + DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE | \ + DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG | \ + DTRACE_CRA_KERNEL | \ + DTRACE_CRA_KERNEL_DESTRUCTIVE) + +typedef struct dtrace_cred { + cred_t *dcr_cred; + uint8_t dcr_destructive; + uint8_t dcr_visible; + uint16_t dcr_action; +} dtrace_cred_t; + +/* + * DTrace Consumer State + * + * Each DTrace consumer has an associated dtrace_state structure that contains + * its in-kernel DTrace state -- including options, credentials, statistics and + * pointers to ECBs, buffers, speculations and formats. A dtrace_state + * structure is also allocated for anonymous enablings. When anonymous state + * is grabbed, the grabbing consumers dts_anon pointer is set to the grabbed + * dtrace_state structure. + */ +struct dtrace_state { + dev_t dts_dev; /* device */ + int dts_necbs; /* total number of ECBs */ + dtrace_ecb_t **dts_ecbs; /* array of ECBs */ + dtrace_epid_t dts_epid; /* next EPID to allocate */ + size_t dts_needed; /* greatest needed space */ + struct dtrace_state *dts_anon; /* anon. state, if grabbed */ + dtrace_activity_t dts_activity; /* current activity */ + dtrace_vstate_t dts_vstate; /* variable state */ + dtrace_buffer_t *dts_buffer; /* principal buffer */ + dtrace_buffer_t *dts_aggbuffer; /* aggregation buffer */ + dtrace_speculation_t *dts_speculations; /* speculation array */ + int dts_nspeculations; /* number of speculations */ + int dts_naggregations; /* number of aggregations */ + dtrace_aggregation_t **dts_aggregations; /* aggregation array */ + vmem_t *dts_aggid_arena; /* arena for aggregation IDs */ + uint64_t dts_errors; /* total number of errors */ + uint32_t dts_speculations_busy; /* number of spec. busy */ + uint32_t dts_speculations_unavail; /* number of spec unavail */ + uint32_t dts_stkstroverflows; /* stack string tab overflows */ + uint32_t dts_dblerrors; /* errors in ERROR probes */ + uint32_t dts_reserve; /* space reserved for END */ + hrtime_t dts_laststatus; /* time of last status */ + cyclic_id_t dts_cleaner; /* cleaning cyclic */ + cyclic_id_t dts_deadman; /* deadman cyclic */ + hrtime_t dts_alive; /* time last alive */ + char dts_speculates; /* boolean: has speculations */ + char dts_destructive; /* boolean: has dest. actions */ + int dts_nformats; /* number of formats */ + char **dts_formats; /* format string array */ + dtrace_optval_t dts_options[DTRACEOPT_MAX]; /* options */ + dtrace_cred_t dts_cred; /* credentials */ + size_t dts_nretained; /* number of retained enabs */ +}; + +struct dtrace_provider { + dtrace_pattr_t dtpv_attr; /* provider attributes */ + dtrace_ppriv_t dtpv_priv; /* provider privileges */ + dtrace_pops_t dtpv_pops; /* provider operations */ + char *dtpv_name; /* provider name */ + void *dtpv_arg; /* provider argument */ + uint_t dtpv_defunct; /* boolean: defunct provider */ + struct dtrace_provider *dtpv_next; /* next provider */ +}; + +struct dtrace_meta { + dtrace_mops_t dtm_mops; /* meta provider operations */ + char *dtm_name; /* meta provider name */ + void *dtm_arg; /* meta provider user arg */ + uint64_t dtm_count; /* no. of associated provs. */ +}; + +/* + * DTrace Enablings + * + * A dtrace_enabling structure is used to track a collection of ECB + * descriptions -- before they have been turned into actual ECBs. This is + * created as a result of DOF processing, and is generally used to generate + * ECBs immediately thereafter. However, enablings are also generally + * retained should the probes they describe be created at a later time; as + * each new module or provider registers with the framework, the retained + * enablings are reevaluated, with any new match resulting in new ECBs. To + * prevent probes from being matched more than once, the enabling tracks the + * last probe generation matched, and only matches probes from subsequent + * generations. + */ +typedef struct dtrace_enabling { + dtrace_ecbdesc_t **dten_desc; /* all ECB descriptions */ + int dten_ndesc; /* number of ECB descriptions */ + int dten_maxdesc; /* size of ECB array */ + dtrace_vstate_t *dten_vstate; /* associated variable state */ + dtrace_genid_t dten_probegen; /* matched probe generation */ + dtrace_ecbdesc_t *dten_current; /* current ECB description */ + int dten_error; /* current error value */ + int dten_primed; /* boolean: set if primed */ + struct dtrace_enabling *dten_prev; /* previous enabling */ + struct dtrace_enabling *dten_next; /* next enabling */ +} dtrace_enabling_t; + +/* + * DTrace Anonymous Enablings + * + * Anonymous enablings are DTrace enablings that are not associated with a + * controlling process, but rather derive their enabling from DOF stored as + * properties in the dtrace.conf file. If there is an anonymous enabling, a + * DTrace consumer state and enabling are created on attach. The state may be + * subsequently grabbed by the first consumer specifying the "grabanon" + * option. As long as an anonymous DTrace enabling exists, dtrace(7D) will + * refuse to unload. + */ +typedef struct dtrace_anon { + dtrace_state_t *dta_state; /* DTrace consumer state */ + dtrace_enabling_t *dta_enabling; /* pointer to enabling */ + processorid_t dta_beganon; /* which CPU BEGIN ran on */ +} dtrace_anon_t; + +/* + * DTrace Error Debugging + */ +#ifdef DEBUG +#define DTRACE_ERRDEBUG +#endif + +#ifdef DTRACE_ERRDEBUG + +typedef struct dtrace_errhash { + const char *dter_msg; /* error message */ + int dter_count; /* number of times seen */ +} dtrace_errhash_t; + +#define DTRACE_ERRHASHSZ 256 /* must be > number of err msgs */ + +#endif /* DTRACE_ERRDEBUG */ + +/* + * DTrace Toxic Ranges + * + * DTrace supports safe loads from probe context; if the address turns out to + * be invalid, a bit will be set by the kernel indicating that DTrace + * encountered a memory error, and DTrace will propagate the error to the user + * accordingly. However, there may exist some regions of memory in which an + * arbitrary load can change system state, and from which it is impossible to + * recover from such a load after it has been attempted. Examples of this may + * include memory in which programmable I/O registers are mapped (for which a + * read may have some implications for the device) or (in the specific case of + * UltraSPARC-I and -II) the virtual address hole. The platform is required + * to make DTrace aware of these toxic ranges; DTrace will then check that + * target addresses are not in a toxic range before attempting to issue a + * safe load. + */ +typedef struct dtrace_toxrange { + uintptr_t dtt_base; /* base of toxic range */ + uintptr_t dtt_limit; /* limit of toxic range */ +} dtrace_toxrange_t; + +extern uint64_t dtrace_getarg(int, int); +extern greg_t dtrace_getfp(void); +extern int dtrace_getipl(void); +extern uintptr_t dtrace_caller(int); +extern uint32_t dtrace_cas32(uint32_t *, uint32_t, uint32_t); +extern void *dtrace_casptr(void *, void *, void *); +extern void dtrace_copyin(uintptr_t, uintptr_t, size_t, volatile uint16_t *); +extern void dtrace_copyinstr(uintptr_t, uintptr_t, size_t, volatile uint16_t *); +extern void dtrace_copyout(uintptr_t, uintptr_t, size_t, volatile uint16_t *); +extern void dtrace_copyoutstr(uintptr_t, uintptr_t, size_t, + volatile uint16_t *); +extern void dtrace_getpcstack(pc_t *, int, int, uint32_t *); +extern ulong_t dtrace_getreg(struct regs *, uint_t); +extern int dtrace_getstackdepth(int); +extern void dtrace_getupcstack(uint64_t *, int); +extern void dtrace_getufpstack(uint64_t *, uint64_t *, int); +extern int dtrace_getustackdepth(void); +extern uintptr_t dtrace_fulword(void *); +extern uint8_t dtrace_fuword8(void *); +extern uint16_t dtrace_fuword16(void *); +extern uint32_t dtrace_fuword32(void *); +extern uint64_t dtrace_fuword64(void *); +extern void dtrace_probe_error(dtrace_state_t *, dtrace_epid_t, int, int, + int, uintptr_t); +extern int dtrace_assfail(const char *, const char *, int); +extern int dtrace_attached(void); +extern hrtime_t dtrace_gethrestime(); + +#ifdef __sparc +extern void dtrace_flush_windows(void); +extern void dtrace_flush_user_windows(void); +extern uint_t dtrace_getotherwin(void); +extern uint_t dtrace_getfprs(void); +#else +extern void dtrace_copy(uintptr_t, uintptr_t, size_t); +extern void dtrace_copystr(uintptr_t, uintptr_t, size_t, volatile uint16_t *); +#endif + +/* + * DTrace Assertions + * + * DTrace calls ASSERT from probe context. To assure that a failed ASSERT + * does not induce a markedly more catastrophic failure (e.g., one from which + * a dump cannot be gleaned), DTrace must define its own ASSERT to be one that + * may safely be called from probe context. This header file must thus be + * included by any DTrace component that calls ASSERT from probe context, and + * _only_ by those components. (The only exception to this is kernel + * debugging infrastructure at user-level that doesn't depend on calling + * ASSERT.) + */ +#undef ASSERT +#ifdef DEBUG +#define ASSERT(EX) ((void)((EX) || \ + dtrace_assfail(#EX, __FILE__, __LINE__))) +#else +#define ASSERT(X) ((void)0) +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_DTRACE_IMPL_H */ diff --git a/cddl/contrib/opensolaris/uts/common/sys/fasttrap.h b/cddl/contrib/opensolaris/uts/common/sys/fasttrap.h new file mode 100644 index 0000000..7f80314 --- /dev/null +++ b/cddl/contrib/opensolaris/uts/common/sys/fasttrap.h @@ -0,0 +1,93 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_FASTTRAP_H +#define _SYS_FASTTRAP_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/fasttrap_isa.h> +#include <sys/dtrace.h> +#include <sys/types.h> + +#ifdef __cplusplus +extern "C" { +#endif + +#define FASTTRAPIOC (('m' << 24) | ('r' << 16) | ('f' << 8)) +#define FASTTRAPIOC_MAKEPROBE (FASTTRAPIOC | 1) +#define FASTTRAPIOC_GETINSTR (FASTTRAPIOC | 2) + +typedef enum fasttrap_probe_type { + DTFTP_NONE = 0, + DTFTP_ENTRY, + DTFTP_RETURN, + DTFTP_OFFSETS, + DTFTP_POST_OFFSETS, + DTFTP_IS_ENABLED +} fasttrap_probe_type_t; + +typedef struct fasttrap_probe_spec { + pid_t ftps_pid; + fasttrap_probe_type_t ftps_type; + + char ftps_func[DTRACE_FUNCNAMELEN]; + char ftps_mod[DTRACE_MODNAMELEN]; + + uint64_t ftps_pc; + uint64_t ftps_size; + uint64_t ftps_noffs; + uint64_t ftps_offs[1]; +} fasttrap_probe_spec_t; + +typedef struct fasttrap_instr_query { + uint64_t ftiq_pc; + pid_t ftiq_pid; + fasttrap_instr_t ftiq_instr; +} fasttrap_instr_query_t; + +/* + * To support the fasttrap provider from very early in a process's life, + * the run-time linker, ld.so.1, has a program header of type PT_SUNWDTRACE + * which points to a data object which must be PT_SUNWDTRACE_SIZE bytes. + * This structure mimics the fasttrap provider section of the ulwp_t structure. + * When the fasttrap provider is changed to require new or different + * instructions, the data object in ld.so.1 and the thread initializers in libc + * (libc_init() and _thrp_create()) need to be updated to include the new + * instructions, and PT_SUNWDTRACE needs to be changed to a new unique number + * (while the old value gets assigned something like PT_SUNWDTRACE_1). Since the + * linker must be backward compatible with old Solaris releases, it must have + * program headers for each of the PT_SUNWDTRACE versions. The kernel's + * elfexec() function only has to look for the latest version of the + * PT_SUNWDTRACE program header. + */ +#define PT_SUNWDTRACE_SIZE FASTTRAP_SUNWDTRACE_SIZE + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_FASTTRAP_H */ |