diff options
author | jb <jb@FreeBSD.org> | 2008-05-23 05:59:42 +0000 |
---|---|---|
committer | jb <jb@FreeBSD.org> | 2008-05-23 05:59:42 +0000 |
commit | be12e1a50471524fb128cc615816dd08e60b353d (patch) | |
tree | 19da43b8882490dac0da41ddf4c15e8294765564 | |
parent | d6da92763e434b5a352932bf992e8eda0f1de268 (diff) | |
download | FreeBSD-src-be12e1a50471524fb128cc615816dd08e60b353d.zip FreeBSD-src-be12e1a50471524fb128cc615816dd08e60b353d.tar.gz |
Custom DTrace kernel module files plus FreeBSD-specific DTrace providers.
30 files changed, 15340 insertions, 0 deletions
diff --git a/sys/cddl/dev/dtmalloc/dtmalloc.c b/sys/cddl/dev/dtmalloc/dtmalloc.c new file mode 100644 index 0000000..ca822f9 --- /dev/null +++ b/sys/cddl/dev/dtmalloc/dtmalloc.c @@ -0,0 +1,220 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + * + * Portions Copyright 2006-2008 John Birrell jb@freebsd.org + * + * $FreeBSD$ + * + */ + +#include <sys/cdefs.h> +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/conf.h> +#include <sys/kernel.h> +#include <sys/malloc.h> +#include <sys/module.h> + +#include <sys/dtrace.h> +#include <sys/dtrace_bsd.h> + +static d_open_t dtmalloc_open; +static int dtmalloc_unload(void); +static void dtmalloc_getargdesc(void *, dtrace_id_t, void *, dtrace_argdesc_t *); +static void dtmalloc_provide(void *, dtrace_probedesc_t *); +static void dtmalloc_destroy(void *, dtrace_id_t, void *); +static void dtmalloc_enable(void *, dtrace_id_t, void *); +static void dtmalloc_disable(void *, dtrace_id_t, void *); +static void dtmalloc_load(void *); + +static struct cdevsw dtmalloc_cdevsw = { + .d_version = D_VERSION, + .d_open = dtmalloc_open, + .d_name = "dtmalloc", +}; + +static dtrace_pattr_t dtmalloc_attr = { +{ DTRACE_STABILITY_STABLE, DTRACE_STABILITY_STABLE, DTRACE_CLASS_COMMON }, +{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, +{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, +{ DTRACE_STABILITY_STABLE, DTRACE_STABILITY_STABLE, DTRACE_CLASS_COMMON }, +{ DTRACE_STABILITY_STABLE, DTRACE_STABILITY_STABLE, DTRACE_CLASS_COMMON }, +}; + +static dtrace_pops_t dtmalloc_pops = { + dtmalloc_provide, + NULL, + dtmalloc_enable, + dtmalloc_disable, + NULL, + NULL, + dtmalloc_getargdesc, + NULL, + NULL, + dtmalloc_destroy +}; + +static struct cdev *dtmalloc_cdev; +static dtrace_provider_id_t dtmalloc_id; + +static void +dtmalloc_getargdesc(void *arg, dtrace_id_t id, void *parg, dtrace_argdesc_t *desc) +{ + const char *p = NULL; + + switch (desc->dtargd_ndx) { + case 0: + p = "struct malloc_type *"; + break; + case 1: + p = "struct malloc_type_internal *"; + break; + case 2: + p = "struct malloc_type_stats *"; + break; + case 3: + p = "unsigned long"; + break; + case 4: + p = "int"; + break; + default: + desc->dtargd_ndx = DTRACE_ARGNONE; + break; + } + + if (p != NULL) + strlcpy(desc->dtargd_native, p, sizeof(desc->dtargd_native)); + + return; +} + +static void +dtmalloc_type_cb(struct malloc_type *mtp, void *arg __unused) +{ + char name[DTRACE_FUNCNAMELEN]; + struct malloc_type_internal *mtip = mtp->ks_handle; + + strlcpy(name, mtp->ks_shortdesc, sizeof(name)); + + if (dtrace_probe_lookup(dtmalloc_id, NULL, name, "malloc") != 0) + return; + + (void) dtrace_probe_create(dtmalloc_id, NULL, name, "malloc", 0, + &mtip->mti_probes[DTMALLOC_PROBE_MALLOC]); + (void) dtrace_probe_create(dtmalloc_id, NULL, name, "free", 0, + &mtip->mti_probes[DTMALLOC_PROBE_FREE]); +} + +static void +dtmalloc_provide(void *arg, dtrace_probedesc_t *desc) +{ + if (desc != NULL) + return; + + malloc_type_list(dtmalloc_type_cb, desc); +} + +static void +dtmalloc_destroy(void *arg, dtrace_id_t id, void *parg) +{ +} + +static void +dtmalloc_enable(void *arg, dtrace_id_t id, void *parg) +{ + uint32_t *p = parg; + *p = id; +} + +static void +dtmalloc_disable(void *arg, dtrace_id_t id, void *parg) +{ + uint32_t *p = parg; + *p = 0; +} + +static void +dtmalloc_load(void *dummy) +{ + /* Create the /dev/dtrace/dtmalloc entry. */ + dtmalloc_cdev = make_dev(&dtmalloc_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, + "dtrace/dtmalloc"); + + if (dtrace_register("dtmalloc", &dtmalloc_attr, DTRACE_PRIV_USER, + NULL, &dtmalloc_pops, NULL, &dtmalloc_id) != 0) + return; + + dtrace_malloc_probe = dtrace_probe; +} + + +static int +dtmalloc_unload() +{ + int error = 0; + + dtrace_malloc_probe = NULL; + + if ((error = dtrace_unregister(dtmalloc_id)) != 0) + return (error); + + destroy_dev(dtmalloc_cdev); + + return (error); +} + +static int +dtmalloc_modevent(module_t mod __unused, int type, void *data __unused) +{ + int error = 0; + + switch (type) { + case MOD_LOAD: + break; + + case MOD_UNLOAD: + break; + + case MOD_SHUTDOWN: + break; + + default: + error = EOPNOTSUPP; + break; + + } + + return (error); +} + +static int +dtmalloc_open(struct cdev *dev __unused, int oflags __unused, int devtype __unused, struct thread *td __unused) +{ + return (0); +} + +SYSINIT(dtmalloc_load, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, dtmalloc_load, NULL); +SYSUNINIT(dtmalloc_unload, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, dtmalloc_unload, NULL); + +DEV_MODULE(dtmalloc, dtmalloc_modevent, NULL); +MODULE_VERSION(dtmalloc, 1); +MODULE_DEPEND(dtmalloc, dtrace, 1, 1, 1); +MODULE_DEPEND(dtmalloc, opensolaris, 1, 1, 1); diff --git a/sys/cddl/dev/dtrace/amd64/dis_tables.c b/sys/cddl/dev/dtrace/amd64/dis_tables.c new file mode 100644 index 0000000..5a5bc25 --- /dev/null +++ b/sys/cddl/dev/dtrace/amd64/dis_tables.c @@ -0,0 +1,3193 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + * + * $FreeBSD$ + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* Copyright (c) 1988 AT&T */ +/* All Rights Reserved */ + + +#if defined(sun) +#pragma ident "@(#)dis_tables.c 1.11 06/03/02 SMI" +#endif + +#include "dis_tables.h" + +/* BEGIN CSTYLED */ + +/* + * Disassembly begins in dis_distable, which is equivalent to the One-byte + * Opcode Map in the Intel IA32 ISA Reference (page A-6 in my copy). The + * decoding loops then traverse out through the other tables as necessary to + * decode a given instruction. + * + * The behavior of this file can be controlled by one of the following flags: + * + * DIS_TEXT Include text for disassembly + * DIS_MEM Include memory-size calculations + * + * Either or both of these can be defined. + * + * This file is not, and will never be, cstyled. If anything, the tables should + * be taken out another tab stop or two so nothing overlaps. + */ + +/* + * These functions must be provided for the consumer to do disassembly. + */ +#ifdef DIS_TEXT +extern char *strncpy(char *, const char *, size_t); +extern size_t strlen(const char *); +extern int strcmp(const char *, const char *); +extern int strncmp(const char *, const char *, size_t); +extern size_t strlcat(char *, const char *, size_t); +#endif + + +#define TERM NULL /* used to indicate that the 'indirect' */ + /* field terminates - no pointer. */ + +/* Used to decode instructions. */ +typedef struct instable { + const struct instable *it_indirect; /* for decode op codes */ + uchar_t it_adrmode; +#ifdef DIS_TEXT + char it_name[NCPS]; + uint_t it_suffix:1; /* mneu + "w", "l", or "d" */ +#endif +#ifdef DIS_MEM + uint_t it_size:16; +#endif + uint_t it_invalid64:1; /* opcode invalid in amd64 */ + uint_t it_always64:1; /* 64 bit when in 64 bit mode */ + uint_t it_invalid32:1; /* invalid in IA32 */ + uint_t it_stackop:1; /* push/pop stack operation */ +} instable_t; + +/* + * Instruction formats. + */ +enum { + UNKNOWN, + MRw, + IMlw, + IMw, + IR, + OA, + AO, + MS, + SM, + Mv, + Mw, + M, /* register or memory */ + Mb, /* register or memory, always byte sized */ + MO, /* memory only (no registers) */ + PREF, + SWAPGS, + R, + RA, + SEG, + MR, + RM, + IA, + MA, + SD, + AD, + SA, + D, + INM, + SO, + BD, + I, + P, + V, + DSHIFT, /* for double shift that has an 8-bit immediate */ + U, + OVERRIDE, + NORM, /* instructions w/o ModR/M byte, no memory access */ + IMPLMEM, /* instructions w/o ModR/M byte, implicit mem access */ + O, /* for call */ + JTAB, /* jump table */ + IMUL, /* for 186 iimul instr */ + CBW, /* so data16 can be evaluated for cbw and variants */ + MvI, /* for 186 logicals */ + ENTER, /* for 186 enter instr */ + RMw, /* for 286 arpl instr */ + Ib, /* for push immediate byte */ + F, /* for 287 instructions */ + FF, /* for 287 instructions */ + FFC, /* for 287 instructions */ + DM, /* 16-bit data */ + AM, /* 16-bit addr */ + LSEG, /* for 3-bit seg reg encoding */ + MIb, /* for 386 logicals */ + SREG, /* for 386 special registers */ + PREFIX, /* a REP instruction prefix */ + LOCK, /* a LOCK instruction prefix */ + INT3, /* The int 3 instruction, which has a fake operand */ + INTx, /* The normal int instruction, with explicit int num */ + DSHIFTcl, /* for double shift that implicitly uses %cl */ + CWD, /* so data16 can be evaluated for cwd and variants */ + RET, /* single immediate 16-bit operand */ + MOVZ, /* for movs and movz, with different size operands */ + XADDB, /* for xaddb */ + MOVSXZ, /* AMD64 mov sign extend 32 to 64 bit instruction */ + +/* + * MMX/SIMD addressing modes. + */ + + MMO, /* Prefixable MMX/SIMD-Int mm/mem -> mm */ + MMOIMPL, /* Prefixable MMX/SIMD-Int mm -> mm (mem) */ + MMO3P, /* Prefixable MMX/SIMD-Int mm -> r32,imm8 */ + MMOM3, /* Prefixable MMX/SIMD-Int mm -> r32 */ + MMOS, /* Prefixable MMX/SIMD-Int mm -> mm/mem */ + MMOMS, /* Prefixable MMX/SIMD-Int mm -> mem */ + MMOPM, /* MMX/SIMD-Int mm/mem -> mm,imm8 */ + MMOPRM, /* Prefixable MMX/SIMD-Int r32/mem -> mm,imm8 */ + MMOSH, /* Prefixable MMX mm,imm8 */ + MM, /* MMX/SIMD-Int mm/mem -> mm */ + MMS, /* MMX/SIMD-Int mm -> mm/mem */ + MMSH, /* MMX mm,imm8 */ + XMMO, /* Prefixable SIMD xmm/mem -> xmm */ + XMMOS, /* Prefixable SIMD xmm -> xmm/mem */ + XMMOPM, /* Prefixable SIMD xmm/mem w/to xmm,imm8 */ + XMMOMX, /* Prefixable SIMD mm/mem -> xmm */ + XMMOX3, /* Prefixable SIMD xmm -> r32 */ + XMMOXMM, /* Prefixable SIMD xmm/mem -> mm */ + XMMOM, /* Prefixable SIMD xmm -> mem */ + XMMOMS, /* Prefixable SIMD mem -> xmm */ + XMM, /* SIMD xmm/mem -> xmm */ + XMMXIMPL, /* SIMD xmm -> xmm (mem) */ + XMM3P, /* SIMD xmm -> r32,imm8 */ + XMMP, /* SIMD xmm/mem w/to xmm,imm8 */ + XMMPRM, /* SIMD r32/mem -> xmm,imm8 */ + XMMS, /* SIMD xmm -> xmm/mem */ + XMMM, /* SIMD mem -> xmm */ + XMMMS, /* SIMD xmm -> mem */ + XMM3MX, /* SIMD r32/mem -> xmm */ + XMM3MXS, /* SIMD xmm -> r32/mem */ + XMMSH, /* SIMD xmm,imm8 */ + XMMXM3, /* SIMD xmm/mem -> r32 */ + XMMX3, /* SIMD xmm -> r32 */ + XMMXMM, /* SIMD xmm/mem -> mm */ + XMMMX, /* SIMD mm -> xmm */ + XMMXM, /* SIMD xmm -> mm */ + XMMFENCE, /* SIMD lfence or mfence */ + XMMSFNC /* SIMD sfence (none or mem) */ +}; + +#define FILL 0x90 /* Fill byte used for alignment (nop) */ + +/* +** Register numbers for the i386 +*/ +#define EAX_REGNO 0 +#define ECX_REGNO 1 +#define EDX_REGNO 2 +#define EBX_REGNO 3 +#define ESP_REGNO 4 +#define EBP_REGNO 5 +#define ESI_REGNO 6 +#define EDI_REGNO 7 + +/* + * modes for immediate values + */ +#define MODE_NONE 0 +#define MODE_IPREL 1 /* signed IP relative value */ +#define MODE_SIGNED 2 /* sign extended immediate */ +#define MODE_IMPLIED 3 /* constant value implied from opcode */ +#define MODE_OFFSET 4 /* offset part of an address */ + +/* + * The letters used in these macros are: + * IND - indirect to another to another table + * "T" - means to Terminate indirections (this is the final opcode) + * "S" - means "operand length suffix required" + * "NS" - means "no suffix" which is the operand length suffix of the opcode + * "Z" - means instruction size arg required + * "u" - means the opcode is invalid in IA32 but valid in amd64 + * "x" - means the opcode is invalid in amd64, but not IA32 + * "y" - means the operand size is always 64 bits in 64 bit mode + * "p" - means push/pop stack operation + */ + +#if defined(DIS_TEXT) && defined(DIS_MEM) +#define IND(table) {table, 0, "", 0, 0, 0, 0, 0, 0} +#define INDx(table) {table, 0, "", 0, 0, 1, 0, 0, 0} +#define TNS(name, amode) {TERM, amode, name, 0, 0, 0, 0, 0, 0} +#define TNSu(name, amode) {TERM, amode, name, 0, 0, 0, 0, 1, 0} +#define TNSx(name, amode) {TERM, amode, name, 0, 0, 1, 0, 0, 0} +#define TNSy(name, amode) {TERM, amode, name, 0, 0, 0, 1, 0, 0} +#define TNSyp(name, amode) {TERM, amode, name, 0, 0, 0, 1, 0, 1} +#define TNSZ(name, amode, sz) {TERM, amode, name, 0, sz, 0, 0, 0, 0} +#define TNSZy(name, amode, sz) {TERM, amode, name, 0, sz, 0, 1, 0, 0} +#define TS(name, amode) {TERM, amode, name, 1, 0, 0, 0, 0, 0} +#define TSx(name, amode) {TERM, amode, name, 1, 0, 1, 0, 0, 0} +#define TSy(name, amode) {TERM, amode, name, 1, 0, 0, 1, 0, 0} +#define TSp(name, amode) {TERM, amode, name, 1, 0, 0, 0, 0, 1} +#define TSZ(name, amode, sz) {TERM, amode, name, 1, sz, 0, 0, 0, 0} +#define TSZx(name, amode, sz) {TERM, amode, name, 1, sz, 1, 0, 0, 0} +#define TSZy(name, amode, sz) {TERM, amode, name, 1, sz, 0, 1, 0, 0} +#define INVALID {TERM, UNKNOWN, "", 0, 0, 0, 0, 0} +#elif defined(DIS_TEXT) +#define IND(table) {table, 0, "", 0, 0, 0, 0, 0} +#define INDx(table) {table, 0, "", 0, 1, 0, 0, 0} +#define TNS(name, amode) {TERM, amode, name, 0, 0, 0, 0, 0} +#define TNSu(name, amode) {TERM, amode, name, 0, 0, 0, 1, 0} +#define TNSx(name, amode) {TERM, amode, name, 0, 1, 0, 0, 0} +#define TNSy(name, amode) {TERM, amode, name, 0, 0, 1, 0, 0} +#define TNSyp(name, amode) {TERM, amode, name, 0, 0, 1, 0, 1} +#define TNSZ(name, amode, sz) {TERM, amode, name, 0, 0, 0, 0, 0} +#define TNSZy(name, amode, sz) {TERM, amode, name, 0, 0, 1, 0, 0} +#define TS(name, amode) {TERM, amode, name, 1, 0, 0, 0, 0} +#define TSx(name, amode) {TERM, amode, name, 1, 1, 0, 0, 0} +#define TSy(name, amode) {TERM, amode, name, 1, 0, 1, 0, 0} +#define TSp(name, amode) {TERM, amode, name, 1, 0, 0, 0, 1} +#define TSZ(name, amode, sz) {TERM, amode, name, 1, 0, 0, 0, 0} +#define TSZx(name, amode, sz) {TERM, amode, name, 1, 1, 0, 0, 0} +#define TSZy(name, amode, sz) {TERM, amode, name, 1, 0, 1, 0, 0} +#define INVALID {TERM, UNKNOWN, "", 0, 0, 0, 0, 0} +#elif defined(DIS_MEM) +#define IND(table) {table, 0, 0, 0, 0, 0, 0} +#define INDx(table) {table, 0, 0, 1, 0, 0, 0} +#define TNS(name, amode) {TERM, amode, 0, 0, 0, 0, 0} +#define TNSu(name, amode) {TERM, amode, 0, 0, 0, 1, 0} +#define TNSy(name, amode) {TERM, amode, 0, 0, 1, 0, 0} +#define TNSyp(name, amode) {TERM, amode, 0, 0, 1, 0, 1} +#define TNSx(name, amode) {TERM, amode, 0, 1, 0, 0, 0} +#define TNSZ(name, amode, sz) {TERM, amode, sz, 0, 0, 0, 0} +#define TNSZy(name, amode, sz) {TERM, amode, sz, 0, 1, 0, 0} +#define TS(name, amode) {TERM, amode, 0, 0, 0, 0, 0} +#define TSx(name, amode) {TERM, amode, 0, 1, 0, 0, 0} +#define TSy(name, amode) {TERM, amode, 0, 0, 1, 0, 0} +#define TSp(name, amode) {TERM, amode, 0, 0, 0, 0, 1} +#define TSZ(name, amode, sz) {TERM, amode, sz, 0, 0, 0, 0} +#define TSZx(name, amode, sz) {TERM, amode, sz, 1, 0, 0, 0} +#define TSZy(name, amode, sz) {TERM, amode, sz, 0, 1, 0, 0} +#define INVALID {TERM, UNKNOWN, 0, 0, 0, 0, 0} +#else +#define IND(table) {table[0], 0, 0, 0, 0, 0} +#define INDx(table) {table[0], 0, 1, 0, 0, 0} +#define TNS(name, amode) {TERM, amode, 0, 0, 0, 0} +#define TNSu(name, amode) {TERM, amode, 0, 0, 1, 0} +#define TNSy(name, amode) {TERM, amode, 0, 1, 0, 0} +#define TNSyp(name, amode) {TERM, amode, 0, 1, 0, 1} +#define TNSx(name, amode) {TERM, amode, 1, 0, 0, 0} +#define TNSZ(name, amode, sz) {TERM, amode, 0, 0, 0, 0} +#define TNSZy(name, amode, sz) {TERM, amode, 0, 1, 0, 0} +#define TS(name, amode) {TERM, amode, 0, 0, 0, 0} +#define TSx(name, amode) {TERM, amode, 1, 0, 0, 0} +#define TSy(name, amode) {TERM, amode, 0, 1, 0, 0} +#define TSp(name, amode) {TERM, amode, 0, 0, 0, 1} +#define TSZ(name, amode, sz) {TERM, amode, 0, 0, 0, 0} +#define TSZx(name, amode, sz) {TERM, amode, 1, 0, 0, 0} +#define TSZy(name, amode, sz) {TERM, amode, 0, 1, 0, 0} +#define INVALID {TERM, UNKNOWN, 0, 0, 0, 0} +#endif + +#ifdef DIS_TEXT +/* + * this decodes the r_m field for mode's 0, 1, 2 in 16 bit mode + */ +const char *const dis_addr16[3][8] = { +"(%bx,%si)", "(%bx,%di)", "(%bp,%si)", "(%bp,%di)", "(%si)", "(%di)", "", + "(%bx)", +"(%bx,%si)", "(%bx,%di)", "(%bp,%si)", "(%bp,%di)", "(%si)", "(%di", "(%bp)", + "(%bx)", +"(%bx,%si)", "(%bx,%di)", "(%bp,%si)", "(%bp,%di)", "(%si)", "(%di)", "(%bp)", + "(%bx)", +}; + + +/* + * This decodes 32 bit addressing mode r_m field for modes 0, 1, 2 + */ +const char *const dis_addr32_mode0[16] = { + "(%eax)", "(%ecx)", "(%edx)", "(%ebx)", "", "", "(%esi)", "(%edi)", + "(%r8d)", "(%r9d)", "(%r10d)", "(%r11d)", "", "", "(%r14d)", "(%r15d)" +}; + +const char *const dis_addr32_mode12[16] = { + "(%eax)", "(%ecx)", "(%edx)", "(%ebx)", "", "(%ebp)", "(%esi)", "(%edi)", + "(%r8d)", "(%r9d)", "(%r10d)", "(%r11d)", "", "(%r13d)", "(%r14d)", "(%r15d)" +}; + +/* + * This decodes 64 bit addressing mode r_m field for modes 0, 1, 2 + */ +const char *const dis_addr64_mode0[16] = { + "(%rax)", "(%rcx)", "(%rdx)", "(%rbx)", "", "(%rip)", "(%rsi)", "(%rdi)", + "(%r8)", "(%r9)", "(%r10)", "(%r11)", "(%r12)", "(%rip)", "(%r14)", "(%r15)" +}; +const char *const dis_addr64_mode12[16] = { + "(%rax)", "(%rcx)", "(%rdx)", "(%rbx)", "", "(%rbp)", "(%rsi)", "(%rdi)", + "(%r8)", "(%r9)", "(%r10)", "(%r11)", "(%r12)", "(%r13)", "(%r14)", "(%r15)" +}; + +/* + * decode for scale from SIB byte + */ +const char *const dis_scale_factor[4] = { ")", ",2)", ",4)", ",8)" }; + +/* + * register decoding for normal references to registers (ie. not addressing) + */ +const char *const dis_REG8[16] = { + "%al", "%cl", "%dl", "%bl", "%ah", "%ch", "%dh", "%bh", + "%r8b", "%r9b", "%r10b", "%r11b", "%r12b", "%r13b", "%r14b", "%r15b" +}; + +const char *const dis_REG8_REX[16] = { + "%al", "%cl", "%dl", "%bl", "%spl", "%bpl", "%sil", "%dil", + "%r8b", "%r9b", "%r10b", "%r11b", "%r12b", "%r13b", "%r14b", "%r15b" +}; + +const char *const dis_REG16[16] = { + "%ax", "%cx", "%dx", "%bx", "%sp", "%bp", "%si", "%di", + "%r8w", "%r9w", "%r10w", "%r11w", "%r12w", "%r13w", "%r14w", "%r15w" +}; + +const char *const dis_REG32[16] = { + "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi", + "%r8d", "%r9d", "%r10d", "%r11d", "%r12d", "%r13d", "%r14d", "%r15d" +}; + +const char *const dis_REG64[16] = { + "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi", + "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15" +}; + +const char *const dis_DEBUGREG[16] = { + "%db0", "%db1", "%db2", "%db3", "%db4", "%db5", "%db6", "%db7", + "%db8", "%db9", "%db10", "%db11", "%db12", "%db13", "%db14", "%db15" +}; + +const char *const dis_CONTROLREG[16] = { + "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5?", "%cr6?", "%cr7?", + "%cr8", "%cr9?", "%cr10?", "%cr11?", "%cr12?", "%cr13?", "%cr14?", "%cr15?" +}; + +const char *const dis_TESTREG[16] = { + "%tr0?", "%tr1?", "%tr2?", "%tr3", "%tr4", "%tr5", "%tr6", "%tr7", + "%tr0?", "%tr1?", "%tr2?", "%tr3", "%tr4", "%tr5", "%tr6", "%tr7" +}; + +const char *const dis_MMREG[16] = { + "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7", + "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7" +}; + +const char *const dis_XMMREG[16] = { + "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7", + "%xmm8", "%xmm9", "%xmm10", "%xmm11", "%xmm12", "%xmm13", "%xmm14", "%xmm15" +}; + +const char *const dis_SEGREG[16] = { + "%es", "%cs", "%ss", "%ds", "%fs", "%gs", "<reserved>", "<reserved>", + "%es", "%cs", "%ss", "%ds", "%fs", "%gs", "<reserved>", "<reserved>" +}; + +/* + * SIMD predicate suffixes + */ +const char *const dis_PREDSUFFIX[8] = { + "eq", "lt", "le", "unord", "neq", "nlt", "nle", "ord" +}; + + + +#endif /* DIS_TEXT */ + + + + +/* + * "decode table" for 64 bit mode MOVSXD instruction (opcode 0x63) + */ +const instable_t dis_opMOVSLD = TNS("movslq",MOVSXZ); + +/* + * "decode table" for pause and clflush instructions + */ +const instable_t dis_opPause = TNS("pause", NORM); + +/* + * Decode table for 0x0F00 opcodes + */ +const instable_t dis_op0F00[8] = { + +/* [0] */ TNS("sldt",M), TNS("str",M), TNSy("lldt",M), TNSy("ltr",M), +/* [4] */ TNSZ("verr",M,2), TNSZ("verw",M,2), INVALID, INVALID, +}; + + +/* + * Decode table for 0x0F01 opcodes + */ +const instable_t dis_op0F01[8] = { + +/* [0] */ TNSZ("sgdt",MO,6), TNSZ("sidt",MO,6), TNSZ("lgdt",MO,6), TNSZ("lidt",MO,6), +/* [4] */ TNSZ("smsw",M,2), INVALID, TNSZ("lmsw",M,2), TNS("invlpg",SWAPGS), +}; + +/* + * Decode table for 0x0F18 opcodes -- SIMD prefetch + */ +const instable_t dis_op0F18[8] = { + +/* [0] */ TNS("prefetchnta",PREF),TNS("prefetcht0",PREF), TNS("prefetcht1",PREF), TNS("prefetcht2",PREF), +/* [4] */ INVALID, INVALID, INVALID, INVALID, +}; + +/* + * Decode table for 0x0FAE opcodes -- SIMD state save/restore + */ +const instable_t dis_op0FAE[8] = { +/* [0] */ TNSZ("fxsave",M,512), TNSZ("fxrstor",M,512), TNS("ldmxcsr",M), TNS("stmxcsr",M), +/* [4] */ INVALID, TNS("lfence",XMMFENCE), TNS("mfence",XMMFENCE), TNS("sfence",XMMSFNC), +}; + +/* + * Decode table for 0x0FBA opcodes + */ + +const instable_t dis_op0FBA[8] = { + +/* [0] */ INVALID, INVALID, INVALID, INVALID, +/* [4] */ TS("bt",MIb), TS("bts",MIb), TS("btr",MIb), TS("btc",MIb), +}; + +/* + * Decode table for 0x0FC7 opcode + */ + +const instable_t dis_op0FC7[8] = { + +/* [0] */ INVALID, TNS("cmpxchg8b",M), INVALID, INVALID, +/* [4] */ INVALID, INVALID, INVALID, INVALID, +}; + + +/* + * Decode table for 0x0FC8 opcode -- 486 bswap instruction + * + *bit pattern: 0000 1111 1100 1reg + */ +const instable_t dis_op0FC8[4] = { +/* [0] */ TNS("bswap",R), INVALID, INVALID, INVALID, +}; + +/* + * Decode table for 0x0F71, 0x0F72, and 0x0F73 opcodes -- MMX instructions + */ +const instable_t dis_op0F7123[4][8] = { +{ +/* [70].0 */ INVALID, INVALID, INVALID, INVALID, +/* .4 */ INVALID, INVALID, INVALID, INVALID, +}, { +/* [71].0 */ INVALID, INVALID, TNS("psrlw",MMOSH), INVALID, +/* .4 */ TNS("psraw",MMOSH), INVALID, TNS("psllw",MMOSH), INVALID, +}, { +/* [72].0 */ INVALID, INVALID, TNS("psrld",MMOSH), INVALID, +/* .4 */ TNS("psrad",MMOSH), INVALID, TNS("pslld",MMOSH), INVALID, +}, { +/* [73].0 */ INVALID, INVALID, TNS("psrlq",MMOSH), TNS("INVALID",MMOSH), +/* .4 */ INVALID, INVALID, TNS("psllq",MMOSH), TNS("INVALID",MMOSH), +} }; + +/* + * Decode table for SIMD extensions to above 0x0F71-0x0F73 opcodes. + */ +const instable_t dis_opSIMD7123[32] = { +/* [70].0 */ INVALID, INVALID, INVALID, INVALID, +/* .4 */ INVALID, INVALID, INVALID, INVALID, + +/* [71].0 */ INVALID, INVALID, TNS("psrlw",XMMSH), INVALID, +/* .4 */ TNS("psraw",XMMSH), INVALID, TNS("psllw",XMMSH), INVALID, + +/* [72].0 */ INVALID, INVALID, TNS("psrld",XMMSH), INVALID, +/* .4 */ TNS("psrad",XMMSH), INVALID, TNS("pslld",XMMSH), INVALID, + +/* [73].0 */ INVALID, INVALID, TNS("psrlq",XMMSH), TNS("psrldq",XMMSH), +/* .4 */ INVALID, INVALID, TNS("psllq",XMMSH), TNS("pslldq",XMMSH), +}; + +/* + * SIMD instructions have been wedged into the existing IA32 instruction + * set through the use of prefixes. That is, while 0xf0 0x58 may be + * addps, 0xf3 0xf0 0x58 (literally, repz addps) is a completely different + * instruction - addss. At present, three prefixes have been coopted in + * this manner - address size (0x66), repnz (0xf2) and repz (0xf3). The + * following tables are used to provide the prefixed instruction names. + * The arrays are sparse, but they're fast. + */ + +/* + * Decode table for SIMD instructions with the address size (0x66) prefix. + */ +const instable_t dis_opSIMDdata16[256] = { +/* [00] */ INVALID, INVALID, INVALID, INVALID, +/* [04] */ INVALID, INVALID, INVALID, INVALID, +/* [08] */ INVALID, INVALID, INVALID, INVALID, +/* [0C] */ INVALID, INVALID, INVALID, INVALID, + +/* [10] */ TNSZ("movupd",XMM,16), TNSZ("movupd",XMMS,16), TNSZ("movlpd",XMMM,8), TNSZ("movlpd",XMMMS,8), +/* [14] */ TNSZ("unpcklpd",XMM,16),TNSZ("unpckhpd",XMM,16),TNSZ("movhpd",XMMM,8), TNSZ("movhpd",XMMMS,8), +/* [18] */ INVALID, INVALID, INVALID, INVALID, +/* [1C] */ INVALID, INVALID, INVALID, INVALID, + +/* [20] */ INVALID, INVALID, INVALID, INVALID, +/* [24] */ INVALID, INVALID, INVALID, INVALID, +/* [28] */ TNSZ("movapd",XMM,16), TNSZ("movapd",XMMS,16), TNSZ("cvtpi2pd",XMMOMX,8),TNSZ("movntpd",XMMOMS,16), +/* [2C] */ TNSZ("cvttpd2pi",XMMXMM,16),TNSZ("cvtpd2pi",XMMXMM,16),TNSZ("ucomisd",XMM,8),TNSZ("comisd",XMM,8), + +/* [30] */ INVALID, INVALID, INVALID, INVALID, +/* [34] */ INVALID, INVALID, INVALID, INVALID, +/* [38] */ INVALID, INVALID, INVALID, INVALID, +/* [3C] */ INVALID, INVALID, INVALID, INVALID, + +/* [40] */ INVALID, INVALID, INVALID, INVALID, +/* [44] */ INVALID, INVALID, INVALID, INVALID, +/* [48] */ INVALID, INVALID, INVALID, INVALID, +/* [4C] */ INVALID, INVALID, INVALID, INVALID, + +/* [50] */ TNS("movmskpd",XMMOX3), TNSZ("sqrtpd",XMM,16), INVALID, INVALID, +/* [54] */ TNSZ("andpd",XMM,16), TNSZ("andnpd",XMM,16), TNSZ("orpd",XMM,16), TNSZ("xorpd",XMM,16), +/* [58] */ TNSZ("addpd",XMM,16), TNSZ("mulpd",XMM,16), TNSZ("cvtpd2ps",XMM,16),TNSZ("cvtps2dq",XMM,16), +/* [5C] */ TNSZ("subpd",XMM,16), TNSZ("minpd",XMM,16), TNSZ("divpd",XMM,16), TNSZ("maxpd",XMM,16), + +/* [60] */ TNSZ("punpcklbw",XMM,16),TNSZ("punpcklwd",XMM,16),TNSZ("punpckldq",XMM,16),TNSZ("packsswb",XMM,16), +/* [64] */ TNSZ("pcmpgtb",XMM,16), TNSZ("pcmpgtw",XMM,16), TNSZ("pcmpgtd",XMM,16), TNSZ("packuswb",XMM,16), +/* [68] */ TNSZ("punpckhbw",XMM,16),TNSZ("punpckhwd",XMM,16),TNSZ("punpckhdq",XMM,16),TNSZ("packssdw",XMM,16), +/* [6C] */ TNSZ("punpcklqdq",XMM,16),TNSZ("punpckhqdq",XMM,16),TNSZ("movd",XMM3MX,4),TNSZ("movdqa",XMM,16), + +/* [70] */ TNSZ("pshufd",XMMP,16), INVALID, INVALID, INVALID, +/* [74] */ TNSZ("pcmpeqb",XMM,16), TNSZ("pcmpeqw",XMM,16), TNSZ("pcmpeqd",XMM,16), INVALID, +/* [78] */ INVALID, INVALID, INVALID, INVALID, +/* [7C] */ INVALID, INVALID, TNSZ("movd",XMM3MXS,4), TNSZ("movdqa",XMMS,16), + +/* [80] */ INVALID, INVALID, INVALID, INVALID, +/* [84] */ INVALID, INVALID, INVALID, INVALID, +/* [88] */ INVALID, INVALID, INVALID, INVALID, +/* [8C] */ INVALID, INVALID, INVALID, INVALID, + +/* [90] */ INVALID, INVALID, INVALID, INVALID, +/* [94] */ INVALID, INVALID, INVALID, INVALID, +/* [98] */ INVALID, INVALID, INVALID, INVALID, +/* [9C] */ INVALID, INVALID, INVALID, INVALID, + +/* [A0] */ INVALID, INVALID, INVALID, INVALID, +/* [A4] */ INVALID, INVALID, INVALID, INVALID, +/* [A8] */ INVALID, INVALID, INVALID, INVALID, +/* [AC] */ INVALID, INVALID, INVALID, INVALID, + +/* [B0] */ INVALID, INVALID, INVALID, INVALID, +/* [B4] */ INVALID, INVALID, INVALID, INVALID, +/* [B8] */ INVALID, INVALID, INVALID, INVALID, +/* [BC] */ INVALID, INVALID, INVALID, INVALID, + +/* [C0] */ INVALID, INVALID, TNSZ("cmppd",XMMP,16), INVALID, +/* [C4] */ TNSZ("pinsrw",XMMPRM,2),TNS("pextrw",XMM3P), TNSZ("shufpd",XMMP,16), INVALID, +/* [C8] */ INVALID, INVALID, INVALID, INVALID, +/* [CC] */ INVALID, INVALID, INVALID, INVALID, + +/* [D0] */ INVALID, TNSZ("psrlw",XMM,16), TNSZ("psrld",XMM,16), TNSZ("psrlq",XMM,16), +/* [D4] */ TNSZ("paddq",XMM,16), TNSZ("pmullw",XMM,16), TNSZ("movq",XMMS,8), TNS("pmovmskb",XMMX3), +/* [D8] */ TNSZ("psubusb",XMM,16), TNSZ("psubusw",XMM,16), TNSZ("pminub",XMM,16), TNSZ("pand",XMM,16), +/* [DC] */ TNSZ("paddusb",XMM,16), TNSZ("paddusw",XMM,16), TNSZ("pmaxub",XMM,16), TNSZ("pandn",XMM,16), + +/* [E0] */ TNSZ("pavgb",XMM,16), TNSZ("psraw",XMM,16), TNSZ("psrad",XMM,16), TNSZ("pavgw",XMM,16), +/* [E4] */ TNSZ("pmulhuw",XMM,16), TNSZ("pmulhw",XMM,16), TNSZ("cvttpd2dq",XMM,16),TNSZ("movntdq",XMMS,16), +/* [E8] */ TNSZ("psubsb",XMM,16), TNSZ("psubsw",XMM,16), TNSZ("pminsw",XMM,16), TNSZ("por",XMM,16), +/* [EC] */ TNSZ("paddsb",XMM,16), TNSZ("paddsw",XMM,16), TNSZ("pmaxsw",XMM,16), TNSZ("pxor",XMM,16), + +/* [F0] */ INVALID, TNSZ("psllw",XMM,16), TNSZ("pslld",XMM,16), TNSZ("psllq",XMM,16), +/* [F4] */ TNSZ("pmuludq",XMM,16), TNSZ("pmaddwd",XMM,16), TNSZ("psadbw",XMM,16), TNSZ("maskmovdqu", XMMXIMPL,16), +/* [F8] */ TNSZ("psubb",XMM,16), TNSZ("psubw",XMM,16), TNSZ("psubd",XMM,16), TNSZ("psubq",XMM,16), +/* [FC] */ TNSZ("paddb",XMM,16), TNSZ("paddw",XMM,16), TNSZ("paddd",XMM,16), INVALID, +}; + +/* + * Decode table for SIMD instructions with the repnz (0xf2) prefix. + */ +const instable_t dis_opSIMDrepnz[256] = { +/* [00] */ INVALID, INVALID, INVALID, INVALID, +/* [04] */ INVALID, INVALID, INVALID, INVALID, +/* [08] */ INVALID, INVALID, INVALID, INVALID, +/* [0C] */ INVALID, INVALID, INVALID, INVALID, + +/* [10] */ TNSZ("movsd",XMM,8), TNSZ("movsd",XMMS,8), INVALID, INVALID, +/* [14] */ INVALID, INVALID, INVALID, INVALID, +/* [18] */ INVALID, INVALID, INVALID, INVALID, +/* [1C] */ INVALID, INVALID, INVALID, INVALID, + +/* [20] */ INVALID, INVALID, INVALID, INVALID, +/* [24] */ INVALID, INVALID, INVALID, INVALID, +/* [28] */ INVALID, INVALID, TNSZ("cvtsi2sd",XMM3MX,4),INVALID, +/* [2C] */ TNSZ("cvttsd2si",XMMXM3,8),TNSZ("cvtsd2si",XMMXM3,8),INVALID, INVALID, + +/* [30] */ INVALID, INVALID, INVALID, INVALID, +/* [34] */ INVALID, INVALID, INVALID, INVALID, +/* [38] */ INVALID, INVALID, INVALID, INVALID, +/* [3C] */ INVALID, INVALID, INVALID, INVALID, + +/* [40] */ INVALID, INVALID, INVALID, INVALID, +/* [44] */ INVALID, INVALID, INVALID, INVALID, +/* [48] */ INVALID, INVALID, INVALID, INVALID, +/* [4C] */ INVALID, INVALID, INVALID, INVALID, + +/* [50] */ INVALID, TNSZ("sqrtsd",XMM,8), INVALID, INVALID, +/* [54] */ INVALID, INVALID, INVALID, INVALID, +/* [58] */ TNSZ("addsd",XMM,8), TNSZ("mulsd",XMM,8), TNSZ("cvtsd2ss",XMM,8), INVALID, +/* [5C] */ TNSZ("subsd",XMM,8), TNSZ("minsd",XMM,8), TNSZ("divsd",XMM,8), TNSZ("maxsd",XMM,8), + +/* [60] */ INVALID, INVALID, INVALID, INVALID, +/* [64] */ INVALID, INVALID, INVALID, INVALID, +/* [68] */ INVALID, INVALID, INVALID, INVALID, +/* [6C] */ INVALID, INVALID, INVALID, INVALID, + +/* [70] */ TNSZ("pshuflw",XMMP,16),INVALID, INVALID, INVALID, +/* [74] */ INVALID, INVALID, INVALID, INVALID, +/* [78] */ INVALID, INVALID, INVALID, INVALID, +/* [7C] */ INVALID, INVALID, INVALID, INVALID, + +/* [80] */ INVALID, INVALID, INVALID, INVALID, +/* [84] */ INVALID, INVALID, INVALID, INVALID, +/* [88] */ INVALID, INVALID, INVALID, INVALID, +/* [0C] */ INVALID, INVALID, INVALID, INVALID, + +/* [90] */ INVALID, INVALID, INVALID, INVALID, +/* [94] */ INVALID, INVALID, INVALID, INVALID, +/* [98] */ INVALID, INVALID, INVALID, INVALID, +/* [9C] */ INVALID, INVALID, INVALID, INVALID, + +/* [A0] */ INVALID, INVALID, INVALID, INVALID, +/* [A4] */ INVALID, INVALID, INVALID, INVALID, +/* [A8] */ INVALID, INVALID, INVALID, INVALID, +/* [AC] */ INVALID, INVALID, INVALID, INVALID, + +/* [B0] */ INVALID, INVALID, INVALID, INVALID, +/* [B4] */ INVALID, INVALID, INVALID, INVALID, +/* [B8] */ INVALID, INVALID, INVALID, INVALID, +/* [BC] */ INVALID, INVALID, INVALID, INVALID, + +/* [C0] */ INVALID, INVALID, TNSZ("cmpsd",XMMP,8), INVALID, +/* [C4] */ INVALID, INVALID, INVALID, INVALID, +/* [C8] */ INVALID, INVALID, INVALID, INVALID, +/* [CC] */ INVALID, INVALID, INVALID, INVALID, + +/* [D0] */ INVALID, INVALID, INVALID, INVALID, +/* [D4] */ INVALID, INVALID, TNS("movdq2q",XMMXM), INVALID, +/* [D8] */ INVALID, INVALID, INVALID, INVALID, +/* [DC] */ INVALID, INVALID, INVALID, INVALID, + +/* [E0] */ INVALID, INVALID, INVALID, INVALID, +/* [E4] */ INVALID, INVALID, TNSZ("cvtpd2dq",XMM,16),INVALID, +/* [E8] */ INVALID, INVALID, INVALID, INVALID, +/* [EC] */ INVALID, INVALID, INVALID, INVALID, + +/* [F0] */ INVALID, INVALID, INVALID, INVALID, +/* [F4] */ INVALID, INVALID, INVALID, INVALID, +/* [F8] */ INVALID, INVALID, INVALID, INVALID, +/* [FC] */ INVALID, INVALID, INVALID, INVALID, +}; + +/* + * Decode table for SIMD instructions with the repz (0xf3) prefix. + */ +const instable_t dis_opSIMDrepz[256] = { +/* [00] */ INVALID, INVALID, INVALID, INVALID, +/* [04] */ INVALID, INVALID, INVALID, INVALID, +/* [08] */ INVALID, INVALID, INVALID, INVALID, +/* [0C] */ INVALID, INVALID, INVALID, INVALID, + +/* [10] */ TNSZ("movss",XMM,4), TNSZ("movss",XMMS,4), INVALID, INVALID, +/* [14] */ INVALID, INVALID, INVALID, INVALID, +/* [18] */ INVALID, INVALID, INVALID, INVALID, +/* [1C] */ INVALID, INVALID, INVALID, INVALID, + +/* [20] */ INVALID, INVALID, INVALID, INVALID, +/* [24] */ INVALID, INVALID, INVALID, INVALID, +/* [28] */ INVALID, INVALID, TNSZ("cvtsi2ss",XMM3MX,4),INVALID, +/* [2C] */ TNSZ("cvttss2si",XMMXM3,4),TNSZ("cvtss2si",XMMXM3,4),INVALID, INVALID, + +/* [30] */ INVALID, INVALID, INVALID, INVALID, +/* [34] */ INVALID, INVALID, INVALID, INVALID, +/* [38] */ INVALID, INVALID, INVALID, INVALID, +/* [3C] */ INVALID, INVALID, INVALID, INVALID, + +/* [40] */ INVALID, INVALID, INVALID, INVALID, +/* [44] */ INVALID, INVALID, INVALID, INVALID, +/* [48] */ INVALID, INVALID, INVALID, INVALID, +/* [4C] */ INVALID, INVALID, INVALID, INVALID, + +/* [50] */ INVALID, TNSZ("sqrtss",XMM,4), TNSZ("rsqrtss",XMM,4), TNSZ("rcpss",XMM,4), +/* [54] */ INVALID, INVALID, INVALID, INVALID, +/* [58] */ TNSZ("addss",XMM,4), TNSZ("mulss",XMM,4), TNSZ("cvtss2sd",XMM,4), TNSZ("cvttps2dq",XMM,16), +/* [5C] */ TNSZ("subss",XMM,4), TNSZ("minss",XMM,4), TNSZ("divss",XMM,4), TNSZ("maxss",XMM,4), + +/* [60] */ INVALID, INVALID, INVALID, INVALID, +/* [64] */ INVALID, INVALID, INVALID, INVALID, +/* [68] */ INVALID, INVALID, INVALID, INVALID, +/* [6C] */ INVALID, INVALID, INVALID, TNSZ("movdqu",XMM,16), + +/* [70] */ TNSZ("pshufhw",XMMP,16),INVALID, INVALID, INVALID, +/* [74] */ INVALID, INVALID, INVALID, INVALID, +/* [78] */ INVALID, INVALID, INVALID, INVALID, +/* [7C] */ INVALID, INVALID, TNSZ("movq",XMM,8), TNSZ("movdqu",XMMS,16), + +/* [80] */ INVALID, INVALID, INVALID, INVALID, +/* [84] */ INVALID, INVALID, INVALID, INVALID, +/* [88] */ INVALID, INVALID, INVALID, INVALID, +/* [0C] */ INVALID, INVALID, INVALID, INVALID, + +/* [90] */ INVALID, INVALID, INVALID, INVALID, +/* [94] */ INVALID, INVALID, INVALID, INVALID, +/* [98] */ INVALID, INVALID, INVALID, INVALID, +/* [9C] */ INVALID, INVALID, INVALID, INVALID, + +/* [A0] */ INVALID, INVALID, INVALID, INVALID, +/* [A4] */ INVALID, INVALID, INVALID, INVALID, +/* [A8] */ INVALID, INVALID, INVALID, INVALID, +/* [AC] */ INVALID, INVALID, INVALID, INVALID, + +/* [B0] */ INVALID, INVALID, INVALID, INVALID, +/* [B4] */ INVALID, INVALID, INVALID, INVALID, +/* [B8] */ INVALID, INVALID, INVALID, INVALID, +/* [BC] */ INVALID, INVALID, INVALID, INVALID, + +/* [C0] */ INVALID, INVALID, TNSZ("cmpss",XMMP,4), INVALID, +/* [C4] */ INVALID, INVALID, INVALID, INVALID, +/* [C8] */ INVALID, INVALID, INVALID, INVALID, +/* [CC] */ INVALID, INVALID, INVALID, INVALID, + +/* [D0] */ INVALID, INVALID, INVALID, INVALID, +/* [D4] */ INVALID, INVALID, TNS("movq2dq",XMMMX), INVALID, +/* [D8] */ INVALID, INVALID, INVALID, INVALID, +/* [DC] */ INVALID, INVALID, INVALID, INVALID, + +/* [E0] */ INVALID, INVALID, INVALID, INVALID, +/* [E4] */ INVALID, INVALID, TNSZ("cvtdq2pd",XMM,8), INVALID, +/* [E8] */ INVALID, INVALID, INVALID, INVALID, +/* [EC] */ INVALID, INVALID, INVALID, INVALID, + +/* [F0] */ INVALID, INVALID, INVALID, INVALID, +/* [F4] */ INVALID, INVALID, INVALID, INVALID, +/* [F8] */ INVALID, INVALID, INVALID, INVALID, +/* [FC] */ INVALID, INVALID, INVALID, INVALID, +}; + +/* + * Decode table for 0x0F opcodes + */ + +const instable_t dis_op0F[16][16] = { +{ +/* [00] */ IND(dis_op0F00), IND(dis_op0F01), TNS("lar",MR), TNS("lsl",MR), +/* [04] */ INVALID, TNS("syscall",NORM), TNS("clts",NORM), TNS("sysret",NORM), +/* [08] */ TNS("invd",NORM), TNS("wbinvd",NORM), INVALID, TNS("ud2",NORM), +/* [0C] */ INVALID, INVALID, INVALID, INVALID, +}, { +/* [10] */ TNSZ("movups",XMMO,16), TNSZ("movups",XMMOS,16),TNSZ("movlps",XMMO,8), TNSZ("movlps",XMMOS,8), +/* [14] */ TNSZ("unpcklps",XMMO,16),TNSZ("unpckhps",XMMO,16),TNSZ("movhps",XMMOM,8),TNSZ("movhps",XMMOMS,8), +/* [18] */ IND(dis_op0F18), INVALID, INVALID, INVALID, +/* [1C] */ INVALID, INVALID, INVALID, INVALID, +}, { +/* [20] */ TSy("mov",SREG), TSy("mov",SREG), TSy("mov",SREG), TSy("mov",SREG), +/* [24] */ TSx("mov",SREG), INVALID, TSx("mov",SREG), INVALID, +/* [28] */ TNSZ("movaps",XMMO,16), TNSZ("movaps",XMMOS,16),TNSZ("cvtpi2ps",XMMOMX,8),TNSZ("movntps",XMMOS,16), +/* [2C] */ TNSZ("cvttps2pi",XMMOXMM,8),TNSZ("cvtps2pi",XMMOXMM,8),TNSZ("ucomiss",XMMO,4),TNSZ("comiss",XMMO,4), +}, { +/* [30] */ TNS("wrmsr",NORM), TNS("rdtsc",NORM), TNS("rdmsr",NORM), TNS("rdpmc",NORM), +/* [34] */ TNSx("sysenter",NORM), TNSx("sysexit",NORM), INVALID, INVALID, +/* [38] */ INVALID, INVALID, INVALID, INVALID, +/* [3C] */ INVALID, INVALID, INVALID, INVALID, +}, { +/* [40] */ TS("cmovx.o",MR), TS("cmovx.no",MR), TS("cmovx.b",MR), TS("cmovx.ae",MR), +/* [44] */ TS("cmovx.e",MR), TS("cmovx.ne",MR), TS("cmovx.be",MR), TS("cmovx.a",MR), +/* [48] */ TS("cmovx.s",MR), TS("cmovx.ns",MR), TS("cmovx.pe",MR), TS("cmovx.po",MR), +/* [4C] */ TS("cmovx.l",MR), TS("cmovx.ge",MR), TS("cmovx.le",MR), TS("cmovx.g",MR), +}, { +/* [50] */ TNS("movmskps",XMMOX3), TNSZ("sqrtps",XMMO,16), TNSZ("rsqrtps",XMMO,16),TNSZ("rcpps",XMMO,16), +/* [54] */ TNSZ("andps",XMMO,16), TNSZ("andnps",XMMO,16), TNSZ("orps",XMMO,16), TNSZ("xorps",XMMO,16), +/* [58] */ TNSZ("addps",XMMO,16), TNSZ("mulps",XMMO,16), TNSZ("cvtps2pd",XMMO,8),TNSZ("cvtdq2ps",XMMO,16), +/* [5C] */ TNSZ("subps",XMMO,16), TNSZ("minps",XMMO,16), TNSZ("divps",XMMO,16), TNSZ("maxps",XMMO,16), +}, { +/* [60] */ TNSZ("punpcklbw",MMO,4),TNSZ("punpcklwd",MMO,4),TNSZ("punpckldq",MMO,4),TNSZ("packsswb",MMO,8), +/* [64] */ TNSZ("pcmpgtb",MMO,8), TNSZ("pcmpgtw",MMO,8), TNSZ("pcmpgtd",MMO,8), TNSZ("packuswb",MMO,8), +/* [68] */ TNSZ("punpckhbw",MMO,8),TNSZ("punpckhwd",MMO,8),TNSZ("punpckhdq",MMO,8),TNSZ("packssdw",MMO,8), +/* [6C] */ TNSZ("INVALID",MMO,0), TNSZ("INVALID",MMO,0), TNSZ("movd",MMO,4), TNSZ("movq",MMO,8), +}, { +/* [70] */ TNSZ("pshufw",MMOPM,8), TNS("psrXXX",MR), TNS("psrXXX",MR), TNS("psrXXX",MR), +/* [74] */ TNSZ("pcmpeqb",MMO,8), TNSZ("pcmpeqw",MMO,8), TNSZ("pcmpeqd",MMO,8), TNS("emms",NORM), +/* [78] */ INVALID, INVALID, INVALID, INVALID, +/* [7C] */ INVALID, INVALID, TNSZ("movd",MMOS,4), TNSZ("movq",MMOS,8), +}, { +/* [80] */ TNS("jo",D), TNS("jno",D), TNS("jb",D), TNS("jae",D), +/* [84] */ TNS("je",D), TNS("jne",D), TNS("jbe",D), TNS("ja",D), +/* [88] */ TNS("js",D), TNS("jns",D), TNS("jp",D), TNS("jnp",D), +/* [8C] */ TNS("jl",D), TNS("jge",D), TNS("jle",D), TNS("jg",D), +}, { +/* [90] */ TNS("seto",Mb), TNS("setno",Mb), TNS("setb",Mb), TNS("setae",Mb), +/* [94] */ TNS("sete",Mb), TNS("setne",Mb), TNS("setbe",Mb), TNS("seta",Mb), +/* [98] */ TNS("sets",Mb), TNS("setns",Mb), TNS("setp",Mb), TNS("setnp",Mb), +/* [9C] */ TNS("setl",Mb), TNS("setge",Mb), TNS("setle",Mb), TNS("setg",Mb), +}, { +/* [A0] */ TSp("push",LSEG), TSp("pop",LSEG), TNS("cpuid",NORM), TS("bt",RMw), +/* [A4] */ TS("shld",DSHIFT), TS("shld",DSHIFTcl), INVALID, INVALID, +/* [A8] */ TSp("push",LSEG), TSp("pop",LSEG), TNS("rsm",NORM), TS("bts",RMw), +/* [AC] */ TS("shrd",DSHIFT), TS("shrd",DSHIFTcl), IND(dis_op0FAE), TS("imul",MRw), +}, { +/* [B0] */ TNS("cmpxchgb",RMw), TS("cmpxchg",RMw), TS("lss",MR), TS("btr",RMw), +/* [B4] */ TS("lfs",MR), TS("lgs",MR), TS("movzb",MOVZ), TNS("movzwl",MOVZ), +/* [B8] */ INVALID, INVALID, IND(dis_op0FBA), TS("btc",RMw), +/* [BC] */ TS("bsf",MRw), TS("bsr",MRw), TS("movsb",MOVZ), TNS("movswl",MOVZ), +}, { +/* [C0] */ TNS("xaddb",XADDB), TS("xadd",RMw), TNSZ("cmpps",XMMOPM,16),TNS("movnti",RM), +/* [C4] */ TNSZ("pinsrw",MMOPRM,2),TNS("pextrw",MMO3P), TNSZ("shufps",XMMOPM,16),IND(dis_op0FC7), +/* [C8] */ INVALID, INVALID, INVALID, INVALID, +/* [CC] */ INVALID, INVALID, INVALID, INVALID, +}, { +/* [D0] */ INVALID, TNSZ("psrlw",MMO,8), TNSZ("psrld",MMO,8), TNSZ("psrlq",MMO,8), +/* [D4] */ TNSZ("paddq",MMO,8), TNSZ("pmullw",MMO,8), TNSZ("INVALID",MMO,0), TNS("pmovmskb",MMOM3), +/* [D8] */ TNSZ("psubusb",MMO,8), TNSZ("psubusw",MMO,8), TNSZ("pminub",MMO,8), TNSZ("pand",MMO,8), +/* [DC] */ TNSZ("paddusb",MMO,8), TNSZ("paddusw",MMO,8), TNSZ("pmaxub",MMO,8), TNSZ("pandn",MMO,8), +}, { +/* [E0] */ TNSZ("pavgb",MMO,8), TNSZ("psraw",MMO,8), TNSZ("psrad",MMO,8), TNSZ("pavgw",MMO,8), +/* [E4] */ TNSZ("pmulhuw",MMO,8), TNSZ("pmulhw",MMO,8), TNS("INVALID",XMMO), TNSZ("movntq",MMOMS,8), +/* [E8] */ TNSZ("psubsb",MMO,8), TNSZ("psubsw",MMO,8), TNSZ("pminsw",MMO,8), TNSZ("por",MMO,8), +/* [EC] */ TNSZ("paddsb",MMO,8), TNSZ("paddsw",MMO,8), TNSZ("pmaxsw",MMO,8), TNSZ("pxor",MMO,8), +}, { +/* [F0] */ INVALID, TNSZ("psllw",MMO,8), TNSZ("pslld",MMO,8), TNSZ("psllq",MMO,8), +/* [F4] */ TNSZ("pmuludq",MMO,8), TNSZ("pmaddwd",MMO,8), TNSZ("psadbw",MMO,8), TNSZ("maskmovq",MMOIMPL,8), +/* [F8] */ TNSZ("psubb",MMO,8), TNSZ("psubw",MMO,8), TNSZ("psubd",MMO,8), TNSZ("psubq",MMO,8), +/* [FC] */ TNSZ("paddb",MMO,8), TNSZ("paddw",MMO,8), TNSZ("paddd",MMO,8), INVALID, +} }; + + +/* + * Decode table for 0x80 opcodes + */ + +const instable_t dis_op80[8] = { + +/* [0] */ TNS("addb",IMlw), TNS("orb",IMw), TNS("adcb",IMlw), TNS("sbbb",IMlw), +/* [4] */ TNS("andb",IMw), TNS("subb",IMlw), TNS("xorb",IMw), TNS("cmpb",IMlw), +}; + + +/* + * Decode table for 0x81 opcodes. + */ + +const instable_t dis_op81[8] = { + +/* [0] */ TS("add",IMlw), TS("or",IMw), TS("adc",IMlw), TS("sbb",IMlw), +/* [4] */ TS("and",IMw), TS("sub",IMlw), TS("xor",IMw), TS("cmp",IMlw), +}; + + +/* + * Decode table for 0x82 opcodes. + */ + +const instable_t dis_op82[8] = { + +/* [0] */ TNSx("addb",IMlw), TNSx("orb",IMlw), TNSx("adcb",IMlw), TNSx("sbbb",IMlw), +/* [4] */ TNSx("andb",IMlw), TNSx("subb",IMlw), TNSx("xorb",IMlw), TNSx("cmpb",IMlw), +}; +/* + * Decode table for 0x83 opcodes. + */ + +const instable_t dis_op83[8] = { + +/* [0] */ TS("add",IMlw), TS("or",IMlw), TS("adc",IMlw), TS("sbb",IMlw), +/* [4] */ TS("and",IMlw), TS("sub",IMlw), TS("xor",IMlw), TS("cmp",IMlw), +}; + +/* + * Decode table for 0xC0 opcodes. + */ + +const instable_t dis_opC0[8] = { + +/* [0] */ TNS("rolb",MvI), TNS("rorb",MvI), TNS("rclb",MvI), TNS("rcrb",MvI), +/* [4] */ TNS("shlb",MvI), TNS("shrb",MvI), INVALID, TNS("sarb",MvI), +}; + +/* + * Decode table for 0xD0 opcodes. + */ + +const instable_t dis_opD0[8] = { + +/* [0] */ TNS("rolb",Mv), TNS("rorb",Mv), TNS("rclb",Mv), TNS("rcrb",Mv), +/* [4] */ TNS("shlb",Mv), TNS("shrb",Mv), TNS("salb",Mv), TNS("sarb",Mv), +}; + +/* + * Decode table for 0xC1 opcodes. + * 186 instruction set + */ + +const instable_t dis_opC1[8] = { + +/* [0] */ TS("rol",MvI), TS("ror",MvI), TS("rcl",MvI), TS("rcr",MvI), +/* [4] */ TS("shl",MvI), TS("shr",MvI), TS("sal",MvI), TS("sar",MvI), +}; + +/* + * Decode table for 0xD1 opcodes. + */ + +const instable_t dis_opD1[8] = { + +/* [0] */ TS("rol",Mv), TS("ror",Mv), TS("rcl",Mv), TS("rcr",Mv), +/* [4] */ TS("shl",Mv), TS("shr",Mv), TS("sal",Mv), TS("sar",Mv), +}; + + +/* + * Decode table for 0xD2 opcodes. + */ + +const instable_t dis_opD2[8] = { + +/* [0] */ TNS("rolb",Mv), TNS("rorb",Mv), TNS("rclb",Mv), TNS("rcrb",Mv), +/* [4] */ TNS("shlb",Mv), TNS("shrb",Mv), TNS("salb",Mv), TNS("sarb",Mv), +}; +/* + * Decode table for 0xD3 opcodes. + */ + +const instable_t dis_opD3[8] = { + +/* [0] */ TS("rol",Mv), TS("ror",Mv), TS("rcl",Mv), TS("rcr",Mv), +/* [4] */ TS("shl",Mv), TS("shr",Mv), TS("salb",Mv), TS("sar",Mv), +}; + + +/* + * Decode table for 0xF6 opcodes. + */ + +const instable_t dis_opF6[8] = { + +/* [0] */ TNS("testb",IMw), TNS("testb",IMw), TNS("notb",Mw), TNS("negb",Mw), +/* [4] */ TNS("mulb",MA), TNS("imulb",MA), TNS("divb",MA), TNS("idivb",MA), +}; + + +/* + * Decode table for 0xF7 opcodes. + */ + +const instable_t dis_opF7[8] = { + +/* [0] */ TS("test",IMw), TS("test",IMw), TS("not",Mw), TS("neg",Mw), +/* [4] */ TS("mul",MA), TS("imul",MA), TS("div",MA), TS("idiv",MA), +}; + + +/* + * Decode table for 0xFE opcodes. + */ + +const instable_t dis_opFE[8] = { + +/* [0] */ TNS("incb",Mw), TNS("decb",Mw), INVALID, INVALID, +/* [4] */ INVALID, INVALID, INVALID, INVALID, +}; +/* + * Decode table for 0xFF opcodes. + */ + +const instable_t dis_opFF[8] = { + +/* [0] */ TS("inc",Mw), TS("dec",Mw), TNSyp("call",INM), TNS("lcall",INM), +/* [4] */ TNSy("jmp",INM), TNS("ljmp",INM), TSp("push",M), INVALID, +}; + +/* for 287 instructions, which are a mess to decode */ + +const instable_t dis_opFP1n2[8][8] = { +{ +/* bit pattern: 1101 1xxx MODxx xR/M */ +/* [0,0] */ TNS("fadds",M), TNS("fmuls",M), TNS("fcoms",M), TNS("fcomps",M), +/* [0,4] */ TNS("fsubs",M), TNS("fsubrs",M), TNS("fdivs",M), TNS("fdivrs",M), +}, { +/* [1,0] */ TNS("flds",M), INVALID, TNS("fsts",M), TNS("fstps",M), +/* [1,4] */ TNSZ("fldenv",M,28), TNSZ("fldcw",M,2), TNSZ("fnstenv",M,28), TNSZ("fnstcw",M,2), +}, { +/* [2,0] */ TNS("fiaddl",M), TNS("fimull",M), TNS("ficoml",M), TNS("ficompl",M), +/* [2,4] */ TNS("fisubl",M), TNS("fisubrl",M), TNS("fidivl",M), TNS("fidivrl",M), +}, { +/* [3,0] */ TNS("fildl",M), INVALID, TNS("fistl",M), TNS("fistpl",M), +/* [3,4] */ INVALID, TNSZ("fldt",M,10), INVALID, TNSZ("fstpt",M,10), +}, { +/* [4,0] */ TNSZ("faddl",M,8), TNSZ("fmull",M,8), TNSZ("fcoml",M,8), TNSZ("fcompl",M,8), +/* [4,1] */ TNSZ("fsubl",M,8), TNSZ("fsubrl",M,8), TNSZ("fdivl",M,8), TNSZ("fdivrl",M,8), +}, { +/* [5,0] */ TNSZ("fldl",M,8), INVALID, TNSZ("fstl",M,8), TNSZ("fstpl",M,8), +/* [5,4] */ TNSZ("frstor",M,108), INVALID, TNSZ("fnsave",M,108), TNSZ("fnstsw",M,2), +}, { +/* [6,0] */ TNSZ("fiadd",M,2), TNSZ("fimul",M,2), TNSZ("ficom",M,2), TNSZ("ficomp",M,2), +/* [6,4] */ TNSZ("fisub",M,2), TNSZ("fisubr",M,2), TNSZ("fidiv",M,2), TNSZ("fidivr",M,2), +}, { +/* [7,0] */ TNSZ("fild",M,2), INVALID, TNSZ("fist",M,2), TNSZ("fistp",M,2), +/* [7,4] */ TNSZ("fbld",M,10), TNSZ("fildll",M,8), TNSZ("fbstp",M,10), TNSZ("fistpll",M,8), +} }; + +const instable_t dis_opFP3[8][8] = { +{ +/* bit pattern: 1101 1xxx 11xx xREG */ +/* [0,0] */ TNS("fadd",FF), TNS("fmul",FF), TNS("fcom",F), TNS("fcomp",F), +/* [0,4] */ TNS("fsub",FF), TNS("fsubr",FF), TNS("fdiv",FF), TNS("fdivr",FF), +}, { +/* [1,0] */ TNS("fld",F), TNS("fxch",F), TNS("fnop",NORM), TNS("fstp",F), +/* [1,4] */ INVALID, INVALID, INVALID, INVALID, +}, { +/* [2,0] */ INVALID, INVALID, INVALID, INVALID, +/* [2,4] */ INVALID, TNS("fucompp",NORM), INVALID, INVALID, +}, { +/* [3,0] */ INVALID, INVALID, INVALID, INVALID, +/* [3,4] */ INVALID, INVALID, INVALID, INVALID, +}, { +/* [4,0] */ TNS("fadd",FF), TNS("fmul",FF), TNS("fcom",F), TNS("fcomp",F), +/* [4,4] */ TNS("fsub",FF), TNS("fsubr",FF), TNS("fdiv",FF), TNS("fdivr",FF), +}, { +/* [5,0] */ TNS("ffree",F), TNS("fxch",F), TNS("fst",F), TNS("fstp",F), +/* [5,4] */ TNS("fucom",F), TNS("fucomp",F), INVALID, INVALID, +}, { +/* [6,0] */ TNS("faddp",FF), TNS("fmulp",FF), TNS("fcomp",F), TNS("fcompp",NORM), +/* [6,4] */ TNS("fsubp",FF), TNS("fsubrp",FF), TNS("fdivp",FF), TNS("fdivrp",FF), +}, { +/* [7,0] */ TNS("ffree",F), TNS("fxch",F), TNS("fstp",F), TNS("fstp",F), +/* [7,4] */ TNS("fnstsw",M), TNS("fucomip",FFC), TNS("fcomip",FFC), INVALID, +} }; + +const instable_t dis_opFP4[4][8] = { +{ +/* bit pattern: 1101 1001 111x xxxx */ +/* [0,0] */ TNS("fchs",NORM), TNS("fabs",NORM), INVALID, INVALID, +/* [0,4] */ TNS("ftst",NORM), TNS("fxam",NORM), TNS("ftstp",NORM), INVALID, +}, { +/* [1,0] */ TNS("fld1",NORM), TNS("fldl2t",NORM), TNS("fldl2e",NORM), TNS("fldpi",NORM), +/* [1,4] */ TNS("fldlg2",NORM), TNS("fldln2",NORM), TNS("fldz",NORM), INVALID, +}, { +/* [2,0] */ TNS("f2xm1",NORM), TNS("fyl2x",NORM), TNS("fptan",NORM), TNS("fpatan",NORM), +/* [2,4] */ TNS("fxtract",NORM), TNS("fprem1",NORM), TNS("fdecstp",NORM), TNS("fincstp",NORM), +}, { +/* [3,0] */ TNS("fprem",NORM), TNS("fyl2xp1",NORM), TNS("fsqrt",NORM), TNS("fsincos",NORM), +/* [3,4] */ TNS("frndint",NORM), TNS("fscale",NORM), TNS("fsin",NORM), TNS("fcos",NORM), +} }; + +const instable_t dis_opFP5[8] = { +/* bit pattern: 1101 1011 111x xxxx */ +/* [0] */ TNS("feni",NORM), TNS("fdisi",NORM), TNS("fnclex",NORM), TNS("fninit",NORM), +/* [4] */ TNS("fsetpm",NORM), TNS("frstpm",NORM), INVALID, INVALID, +}; + +const instable_t dis_opFP6[8] = { +/* bit pattern: 1101 1011 11yy yxxx */ +/* [00] */ TNS("fcmov.nb",FF), TNS("fcmov.ne",FF), TNS("fcmov.nbe",FF), TNS("fcmov.nu",FF), +/* [04] */ INVALID, TNS("fucomi",F), TNS("fcomi",F), INVALID, +}; + +const instable_t dis_opFP7[8] = { +/* bit pattern: 1101 1010 11yy yxxx */ +/* [00] */ TNS("fcmov.b",FF), TNS("fcmov.e",FF), TNS("fcmov.be",FF), TNS("fcmov.u",FF), +/* [04] */ INVALID, INVALID, INVALID, INVALID, +}; + +/* + * Main decode table for the op codes. The first two nibbles + * will be used as an index into the table. If there is a + * a need to further decode an instruction, the array to be + * referenced is indicated with the other two entries being + * empty. + */ + +const instable_t dis_distable[16][16] = { +{ +/* [0,0] */ TNS("addb",RMw), TS("add",RMw), TNS("addb",MRw), TS("add",MRw), +/* [0,4] */ TNS("addb",IA), TS("add",IA), TSx("push",SEG), TSx("pop",SEG), +/* [0,8] */ TNS("orb",RMw), TS("or",RMw), TNS("orb",MRw), TS("or",MRw), +/* [0,C] */ TNS("orb",IA), TS("or",IA), TSx("push",SEG), IND(&dis_op0F[0][0]), +}, { +/* [1,0] */ TNS("adcb",RMw), TS("adc",RMw), TNS("adcb",MRw), TS("adc",MRw), +/* [1,4] */ TNS("adcb",IA), TS("adc",IA), TSx("push",SEG), TSx("pop",SEG), +/* [1,8] */ TNS("sbbb",RMw), TS("sbb",RMw), TNS("sbbb",MRw), TS("sbb",MRw), +/* [1,C] */ TNS("sbbb",IA), TS("sbb",IA), TSx("push",SEG), TSx("pop",SEG), +}, { +/* [2,0] */ TNS("andb",RMw), TS("and",RMw), TNS("andb",MRw), TS("and",MRw), +/* [2,4] */ TNS("andb",IA), TS("and",IA), TNSx("%es:",OVERRIDE), TNSx("daa",NORM), +/* [2,8] */ TNS("subb",RMw), TS("sub",RMw), TNS("subb",MRw), TS("sub",MRw), +/* [2,C] */ TNS("subb",IA), TS("sub",IA), TNSx("%cs:",OVERRIDE), TNSx("das",NORM), +}, { +/* [3,0] */ TNS("xorb",RMw), TS("xor",RMw), TNS("xorb",MRw), TS("xor",MRw), +/* [3,4] */ TNS("xorb",IA), TS("xor",IA), TNSx("%ss:",OVERRIDE), TNSx("aaa",NORM), +/* [3,8] */ TNS("cmpb",RMw), TS("cmp",RMw), TNS("cmpb",MRw), TS("cmp",MRw), +/* [3,C] */ TNS("cmpb",IA), TS("cmp",IA), TNSx("%ds:",OVERRIDE), TNSx("aas",NORM), +}, { +/* [4,0] */ TSx("inc",R), TSx("inc",R), TSx("inc",R), TSx("inc",R), +/* [4,4] */ TSx("inc",R), TSx("inc",R), TSx("inc",R), TSx("inc",R), +/* [4,8] */ TSx("dec",R), TSx("dec",R), TSx("dec",R), TSx("dec",R), +/* [4,C] */ TSx("dec",R), TSx("dec",R), TSx("dec",R), TSx("dec",R), +}, { +/* [5,0] */ TSp("push",R), TSp("push",R), TSp("push",R), TSp("push",R), +/* [5,4] */ TSp("push",R), TSp("push",R), TSp("push",R), TSp("push",R), +/* [5,8] */ TSp("pop",R), TSp("pop",R), TSp("pop",R), TSp("pop",R), +/* [5,C] */ TSp("pop",R), TSp("pop",R), TSp("pop",R), TSp("pop",R), +}, { +/* [6,0] */ TSZx("pusha",IMPLMEM,28),TSZx("popa",IMPLMEM,28), TSx("bound",MR), TNS("arpl",RMw), +/* [6,4] */ TNS("%fs:",OVERRIDE), TNS("%gs:",OVERRIDE), TNS("data16",DM), TNS("addr16",AM), +/* [6,8] */ TSp("push",I), TS("imul",IMUL), TSp("push",Ib), TS("imul",IMUL), +/* [6,C] */ TNSZ("insb",IMPLMEM,1), TSZ("ins",IMPLMEM,4), TNSZ("outsb",IMPLMEM,1),TSZ("outs",IMPLMEM,4), +}, { +/* [7,0] */ TNSy("jo",BD), TNSy("jno",BD), TNSy("jb",BD), TNSy("jae",BD), +/* [7,4] */ TNSy("je",BD), TNSy("jne",BD), TNSy("jbe",BD), TNSy("ja",BD), +/* [7,8] */ TNSy("js",BD), TNSy("jns",BD), TNSy("jp",BD), TNSy("jnp",BD), +/* [7,C] */ TNSy("jl",BD), TNSy("jge",BD), TNSy("jle",BD), TNSy("jg",BD), +}, { +/* [8,0] */ IND(dis_op80), IND(dis_op81), INDx(dis_op82), IND(dis_op83), +/* [8,4] */ TNS("testb",RMw), TS("test",RMw), TNS("xchgb",RMw), TS("xchg",RMw), +/* [8,8] */ TNS("movb",RMw), TS("mov",RMw), TNS("movb",MRw), TS("mov",MRw), +/* [8,C] */ TNS("movw",SM), TS("lea",MR), TNS("movw",MS), TSp("pop",M), +}, { +/* [9,0] */ TNS("nop",NORM), TS("xchg",RA), TS("xchg",RA), TS("xchg",RA), +/* [9,4] */ TS("xchg",RA), TS("xchg",RA), TS("xchg",RA), TS("xchg",RA), +/* [9,8] */ TNS("cXtX",CBW), TNS("cXtX",CWD), TNSx("lcall",SO), TNS("fwait",NORM), +/* [9,C] */ TSZy("pushf",IMPLMEM,4),TSZy("popf",IMPLMEM,4), TNSx("sahf",NORM), TNSx("lahf",NORM), +}, { +/* [A,0] */ TNS("movb",OA), TS("mov",OA), TNS("movb",AO), TS("mov",AO), +/* [A,4] */ TNSZ("movsb",SD,1), TS("movs",SD), TNSZ("cmpsb",SD,1), TS("cmps",SD), +/* [A,8] */ TNS("testb",IA), TS("test",IA), TNS("stosb",AD), TS("stos",AD), +/* [A,C] */ TNS("lodsb",SA), TS("lods",SA), TNS("scasb",AD), TS("scas",AD), +}, { +/* [B,0] */ TNS("movb",IR), TNS("movb",IR), TNS("movb",IR), TNS("movb",IR), +/* [B,4] */ TNS("movb",IR), TNS("movb",IR), TNS("movb",IR), TNS("movb",IR), +/* [B,8] */ TS("mov",IR), TS("mov",IR), TS("mov",IR), TS("mov",IR), +/* [B,C] */ TS("mov",IR), TS("mov",IR), TS("mov",IR), TS("mov",IR), +}, { +/* [C,0] */ IND(dis_opC0), IND(dis_opC1), TNSyp("ret",RET), TNSyp("ret",NORM), +/* [C,4] */ TNSx("les",MR), TNSx("lds",MR), TNS("movb",IMw), TS("mov",IMw), +/* [C,8] */ TNSyp("enter",ENTER), TNSyp("leave",NORM), TNS("lret",RET), TNS("lret",NORM), +/* [C,C] */ TNS("int",INT3), TNS("int",INTx), TNSx("into",NORM), TNS("iret",NORM), +}, { +/* [D,0] */ IND(dis_opD0), IND(dis_opD1), IND(dis_opD2), IND(dis_opD3), +/* [D,4] */ TNSx("aam",U), TNSx("aad",U), TNSx("falc",NORM), TNSZ("xlat",IMPLMEM,1), + +/* 287 instructions. Note that although the indirect field */ +/* indicates opFP1n2 for further decoding, this is not necessarily */ +/* the case since the opFP arrays are not partitioned according to key1 */ +/* and key2. opFP1n2 is given only to indicate that we haven't */ +/* finished decoding the instruction. */ +/* [D,8] */ IND(&dis_opFP1n2[0][0]), IND(&dis_opFP1n2[0][0]), IND(&dis_opFP1n2[0][0]), IND(&dis_opFP1n2[0][0]), +/* [D,C] */ IND(&dis_opFP1n2[0][0]), IND(&dis_opFP1n2[0][0]), IND(&dis_opFP1n2[0][0]), IND(&dis_opFP1n2[0][0]), +}, { +/* [E,0] */ TNSy("loopnz",BD), TNSy("loopz",BD), TNSy("loop",BD), TNSy("jcxz",BD), +/* [E,4] */ TNS("inb",P), TS("in",P), TNS("outb",P), TS("out",P), +/* [E,8] */ TNSyp("call",D), TNSy("jmp",D), TNSx("ljmp",SO), TNSy("jmp",BD), +/* [E,C] */ TNS("inb",V), TS("in",V), TNS("outb",V), TS("out",V), +}, { +/* [F,0] */ TNS("lock",LOCK), TNS("icebp", NORM), TNS("repnz",PREFIX), TNS("repz",PREFIX), +/* [F,4] */ TNS("hlt",NORM), TNS("cmc",NORM), IND(dis_opF6), IND(dis_opF7), +/* [F,8] */ TNS("clc",NORM), TNS("stc",NORM), TNS("cli",NORM), TNS("sti",NORM), +/* [F,C] */ TNS("cld",NORM), TNS("std",NORM), IND(dis_opFE), IND(dis_opFF), +} }; + +/* END CSTYLED */ + +/* + * common functions to decode and disassemble an x86 or amd64 instruction + */ + +/* + * These are the individual fields of a REX prefix. Note that a REX + * prefix with none of these set is still needed to: + * - use the MOVSXD (sign extend 32 to 64 bits) instruction + * - access the %sil, %dil, %bpl, %spl registers + */ +#define REX_W 0x08 /* 64 bit operand size when set */ +#define REX_R 0x04 /* high order bit extension of ModRM reg field */ +#define REX_X 0x02 /* high order bit extension of SIB index field */ +#define REX_B 0x01 /* extends ModRM r_m, SIB base, or opcode reg */ + +static uint_t opnd_size; /* SIZE16, SIZE32 or SIZE64 */ +static uint_t addr_size; /* SIZE16, SIZE32 or SIZE64 */ + +/* + * Even in 64 bit mode, usually only 4 byte immediate operands are supported. + */ +static int isize[] = {1, 2, 4, 4}; +static int isize64[] = {1, 2, 4, 8}; + +/* + * Just a bunch of useful macros. + */ +#define WBIT(x) (x & 0x1) /* to get w bit */ +#define REGNO(x) (x & 0x7) /* to get 3 bit register */ +#define VBIT(x) ((x)>>1 & 0x1) /* to get 'v' bit */ +#define OPSIZE(osize, wbit) ((wbit) ? isize[osize] : 1) +#define OPSIZE64(osize, wbit) ((wbit) ? isize64[osize] : 1) + +#define REG_ONLY 3 /* mode to indicate a register operand (not memory) */ + +#define BYTE_OPND 0 /* w-bit value indicating byte register */ +#define LONG_OPND 1 /* w-bit value indicating opnd_size register */ +#define MM_OPND 2 /* "value" used to indicate a mmx reg */ +#define XMM_OPND 3 /* "value" used to indicate a xmm reg */ +#define SEG_OPND 4 /* "value" used to indicate a segment reg */ +#define CONTROL_OPND 5 /* "value" used to indicate a control reg */ +#define DEBUG_OPND 6 /* "value" used to indicate a debug reg */ +#define TEST_OPND 7 /* "value" used to indicate a test reg */ +#define WORD_OPND 8 /* w-bit value indicating word size reg */ + +/* + * Get the next byte and separate the op code into the high and low nibbles. + */ +static int +dtrace_get_opcode(dis86_t *x, uint_t *high, uint_t *low) +{ + int byte; + + /* + * x86 instructions have a maximum length of 15 bytes. Bail out if + * we try to read more. + */ + if (x->d86_len >= 15) + return (x->d86_error = 1); + + if (x->d86_error) + return (1); + byte = x->d86_get_byte(x->d86_data); + if (byte < 0) + return (x->d86_error = 1); + x->d86_bytes[x->d86_len++] = byte; + *low = byte & 0xf; /* ----xxxx low 4 bits */ + *high = byte >> 4 & 0xf; /* xxxx---- bits 7 to 4 */ + return (0); +} + +/* + * Get and decode an SIB (scaled index base) byte + */ +static void +dtrace_get_SIB(dis86_t *x, uint_t *ss, uint_t *index, uint_t *base) +{ + int byte; + + if (x->d86_error) + return; + + byte = x->d86_get_byte(x->d86_data); + if (byte < 0) { + x->d86_error = 1; + return; + } + x->d86_bytes[x->d86_len++] = byte; + + *base = byte & 0x7; + *index = (byte >> 3) & 0x7; + *ss = (byte >> 6) & 0x3; +} + +/* + * Get the byte following the op code and separate it into the + * mode, register, and r/m fields. + */ +static void +dtrace_get_modrm(dis86_t *x, uint_t *mode, uint_t *reg, uint_t *r_m) +{ + if (x->d86_got_modrm == 0) { + if (x->d86_rmindex == -1) + x->d86_rmindex = x->d86_len; + dtrace_get_SIB(x, mode, reg, r_m); + x->d86_got_modrm = 1; + } +} + +/* + * Adjust register selection based on any REX prefix bits present. + */ +/*ARGSUSED*/ +static void +dtrace_rex_adjust(uint_t rex_prefix, uint_t mode, uint_t *reg, uint_t *r_m) +{ + if (reg != NULL && r_m == NULL) { + if (rex_prefix & REX_B) + *reg += 8; + } else { + if (reg != NULL && (REX_R & rex_prefix) != 0) + *reg += 8; + if (r_m != NULL && (REX_B & rex_prefix) != 0) + *r_m += 8; + } +} + +/* + * Get an immediate operand of the given size, with sign extension. + */ +static void +dtrace_imm_opnd(dis86_t *x, int wbit, int size, int opindex) +{ + int i; + int byte; + int valsize = 0; + + if (x->d86_numopnds < opindex + 1) + x->d86_numopnds = opindex + 1; + + switch (wbit) { + case BYTE_OPND: + valsize = 1; + break; + case LONG_OPND: + if (x->d86_opnd_size == SIZE16) + valsize = 2; + else if (x->d86_opnd_size == SIZE32) + valsize = 4; + else + valsize = 8; + break; + case MM_OPND: + case XMM_OPND: + case SEG_OPND: + case CONTROL_OPND: + case DEBUG_OPND: + case TEST_OPND: + valsize = size; + break; + case WORD_OPND: + valsize = 2; + break; + } + if (valsize < size) + valsize = size; + + if (x->d86_error) + return; + x->d86_opnd[opindex].d86_value = 0; + for (i = 0; i < size; ++i) { + byte = x->d86_get_byte(x->d86_data); + if (byte < 0) { + x->d86_error = 1; + return; + } + x->d86_bytes[x->d86_len++] = byte; + x->d86_opnd[opindex].d86_value |= (uint64_t)byte << (i * 8); + } + /* Do sign extension */ + if (x->d86_bytes[x->d86_len - 1] & 0x80) { + for (; i < valsize; i++) + x->d86_opnd[opindex].d86_value |= + (uint64_t)0xff << (i* 8); + } +#ifdef DIS_TEXT + x->d86_opnd[opindex].d86_mode = MODE_SIGNED; + x->d86_opnd[opindex].d86_value_size = valsize; + x->d86_imm_bytes += size; +#endif +} + +/* + * Get an ip relative operand of the given size, with sign extension. + */ +static void +dtrace_disp_opnd(dis86_t *x, int wbit, int size, int opindex) +{ + dtrace_imm_opnd(x, wbit, size, opindex); +#ifdef DIS_TEXT + x->d86_opnd[opindex].d86_mode = MODE_IPREL; +#endif +} + +/* + * Check to see if there is a segment override prefix pending. + * If so, print it in the current 'operand' location and set + * the override flag back to false. + */ +/*ARGSUSED*/ +static void +dtrace_check_override(dis86_t *x, int opindex) +{ +#ifdef DIS_TEXT + if (x->d86_seg_prefix) { + (void) strlcat(x->d86_opnd[opindex].d86_prefix, + x->d86_seg_prefix, PFIXLEN); + } +#endif + x->d86_seg_prefix = NULL; +} + + +/* + * Process a single instruction Register or Memory operand. + * + * mode = addressing mode from ModRM byte + * r_m = r_m (or reg if mode == 3) field from ModRM byte + * wbit = indicates which register (8bit, 16bit, ... MMX, etc.) set to use. + * o = index of operand that we are processing (0, 1 or 2) + * + * the value of reg or r_m must have already been adjusted for any REX prefix. + */ +/*ARGSUSED*/ +static void +dtrace_get_operand(dis86_t *x, uint_t mode, uint_t r_m, int wbit, int opindex) +{ + int have_SIB = 0; /* flag presence of scale-index-byte */ + uint_t ss; /* scale-factor from opcode */ + uint_t index; /* index register number */ + uint_t base; /* base register number */ + int dispsize; /* size of displacement in bytes */ +#ifdef DIS_TEXT + char *opnd = x->d86_opnd[opindex].d86_opnd; +#endif + + if (x->d86_numopnds < opindex + 1) + x->d86_numopnds = opindex + 1; + + if (x->d86_error) + return; + + /* + * first handle a simple register + */ + if (mode == REG_ONLY) { +#ifdef DIS_TEXT + switch (wbit) { + case MM_OPND: + (void) strlcat(opnd, dis_MMREG[r_m], OPLEN); + break; + case XMM_OPND: + (void) strlcat(opnd, dis_XMMREG[r_m], OPLEN); + break; + case SEG_OPND: + (void) strlcat(opnd, dis_SEGREG[r_m], OPLEN); + break; + case CONTROL_OPND: + (void) strlcat(opnd, dis_CONTROLREG[r_m], OPLEN); + break; + case DEBUG_OPND: + (void) strlcat(opnd, dis_DEBUGREG[r_m], OPLEN); + break; + case TEST_OPND: + (void) strlcat(opnd, dis_TESTREG[r_m], OPLEN); + break; + case BYTE_OPND: + if (x->d86_rex_prefix == 0) + (void) strlcat(opnd, dis_REG8[r_m], OPLEN); + else + (void) strlcat(opnd, dis_REG8_REX[r_m], OPLEN); + break; + case WORD_OPND: + (void) strlcat(opnd, dis_REG16[r_m], OPLEN); + break; + case LONG_OPND: + if (x->d86_opnd_size == SIZE16) + (void) strlcat(opnd, dis_REG16[r_m], OPLEN); + else if (x->d86_opnd_size == SIZE32) + (void) strlcat(opnd, dis_REG32[r_m], OPLEN); + else + (void) strlcat(opnd, dis_REG64[r_m], OPLEN); + break; + } +#endif /* DIS_TEXT */ + return; + } + + /* + * if symbolic representation, skip override prefix, if any + */ + dtrace_check_override(x, opindex); + + /* + * Handle 16 bit memory references first, since they decode + * the mode values more simply. + * mode 1 is r_m + 8 bit displacement + * mode 2 is r_m + 16 bit displacement + * mode 0 is just r_m, unless r_m is 6 which is 16 bit disp + */ + if (x->d86_addr_size == SIZE16) { + if ((mode == 0 && r_m == 6) || mode == 2) + dtrace_imm_opnd(x, WORD_OPND, 2, opindex); + else if (mode == 1) + dtrace_imm_opnd(x, BYTE_OPND, 1, opindex); +#ifdef DIS_TEXT + if (mode == 0 && r_m == 6) + x->d86_opnd[opindex].d86_mode = MODE_SIGNED; + else if (mode == 0) + x->d86_opnd[opindex].d86_mode = MODE_NONE; + else + x->d86_opnd[opindex].d86_mode = MODE_OFFSET; + (void) strlcat(opnd, dis_addr16[mode][r_m], OPLEN); +#endif + return; + } + + /* + * 32 and 64 bit addressing modes are more complex since they + * can involve an SIB (scaled index and base) byte to decode. + */ + if (r_m == ESP_REGNO || r_m == ESP_REGNO + 8) { + have_SIB = 1; + dtrace_get_SIB(x, &ss, &index, &base); + if (x->d86_error) + return; + if (base != 5 || mode != 0) + if (x->d86_rex_prefix & REX_B) + base += 8; + if (x->d86_rex_prefix & REX_X) + index += 8; + } else { + base = r_m; + } + + /* + * Compute the displacement size and get its bytes + */ + dispsize = 0; + + if (mode == 1) + dispsize = 1; + else if (mode == 2) + dispsize = 4; + else if ((r_m & 7) == EBP_REGNO || + (have_SIB && (base & 7) == EBP_REGNO)) + dispsize = 4; + + if (dispsize > 0) { + dtrace_imm_opnd(x, dispsize == 4 ? LONG_OPND : BYTE_OPND, + dispsize, opindex); + if (x->d86_error) + return; + } + +#ifdef DIS_TEXT + if (dispsize > 0) + x->d86_opnd[opindex].d86_mode = MODE_OFFSET; + + if (have_SIB == 0) { + if (x->d86_mode == SIZE32) { + if (mode == 0) + (void) strlcat(opnd, dis_addr32_mode0[r_m], + OPLEN); + else + (void) strlcat(opnd, dis_addr32_mode12[r_m], + OPLEN); + } else { + if (mode == 0) + (void) strlcat(opnd, dis_addr64_mode0[r_m], + OPLEN); + else + (void) strlcat(opnd, dis_addr64_mode12[r_m], + OPLEN); + } + } else { + uint_t need_paren = 0; + char **regs; + if (x->d86_mode == SIZE32) /* NOTE this is not addr_size! */ + regs = (char **)dis_REG32; + else + regs = (char **)dis_REG64; + + /* + * print the base (if any) + */ + if (base == EBP_REGNO && mode == 0) { + if (index != ESP_REGNO) { + (void) strlcat(opnd, "(", OPLEN); + need_paren = 1; + } + } else { + (void) strlcat(opnd, "(", OPLEN); + (void) strlcat(opnd, regs[base], OPLEN); + need_paren = 1; + } + + /* + * print the index (if any) + */ + if (index != ESP_REGNO) { + (void) strlcat(opnd, ",", OPLEN); + (void) strlcat(opnd, regs[index], OPLEN); + (void) strlcat(opnd, dis_scale_factor[ss], OPLEN); + } else + if (need_paren) + (void) strlcat(opnd, ")", OPLEN); + } +#endif +} + +/* + * Operand sequence for standard instruction involving one register + * and one register/memory operand. + * wbit indicates a byte(0) or opnd_size(1) operation + * vbit indicates direction (0 for "opcode r,r_m") or (1 for "opcode r_m, r") + */ +#define STANDARD_MODRM(x, mode, reg, r_m, rex_prefix, wbit, vbit) { \ + dtrace_get_modrm(x, &mode, ®, &r_m); \ + dtrace_rex_adjust(rex_prefix, mode, ®, &r_m); \ + dtrace_get_operand(x, mode, r_m, wbit, vbit); \ + dtrace_get_operand(x, REG_ONLY, reg, wbit, 1 - vbit); \ +} + +/* + * Similar to above, but allows for the two operands to be of different + * classes (ie. wbit). + * wbit is for the r_m operand + * w2 is for the reg operand + */ +#define MIXED_MM(x, mode, reg, r_m, rex_prefix, wbit, w2, vbit) { \ + dtrace_get_modrm(x, &mode, ®, &r_m); \ + dtrace_rex_adjust(rex_prefix, mode, ®, &r_m); \ + dtrace_get_operand(x, mode, r_m, wbit, vbit); \ + dtrace_get_operand(x, REG_ONLY, reg, w2, 1 - vbit); \ +} + +/* + * Similar, but for 2 operands plus an immediate. + */ +#define THREEOPERAND(x, mode, reg, r_m, rex_prefix, wbit, w2, immsize) { \ + dtrace_get_modrm(x, &mode, ®, &r_m); \ + dtrace_rex_adjust(rex_prefix, mode, ®, &r_m); \ + dtrace_get_operand(x, mode, r_m, wbit, 1); \ + dtrace_get_operand(x, REG_ONLY, reg, w2, 2); \ + dtrace_imm_opnd(x, wbit, immsize, 0); \ +} + +/* + * Dissassemble a single x86 or amd64 instruction. + * + * Mode determines the default operating mode (SIZE16, SIZE32 or SIZE64) + * for interpreting instructions. + * + * returns non-zero for bad opcode + */ +int +dtrace_disx86(dis86_t *x, uint_t cpu_mode) +{ + const instable_t *dp = NULL; /* decode table being used */ +#ifdef DIS_TEXT + uint_t i; +#endif +#ifdef DIS_MEM + uint_t nomem = 0; +#define NOMEM (nomem = 1) +#else +#define NOMEM /* nothing */ +#endif + uint_t wbit = 0; /* opcode wbit, 0 is 8 bit, !0 for opnd_size */ + uint_t w2; /* wbit value for second operand */ + uint_t vbit; + uint_t mode = 0; /* mode value from ModRM byte */ + uint_t reg; /* reg value from ModRM byte */ + uint_t r_m; /* r_m value from ModRM byte */ + + uint_t opcode1; /* high nibble of 1st byte */ + uint_t opcode2; /* low nibble of 1st byte */ + uint_t opcode3; /* extra opcode bits usually from ModRM byte */ + uint_t opcode4; /* high nibble of 2nd byte */ + uint_t opcode5; /* low nibble of 2ne byte */ + uint_t opcode6; /* high nibble of 3rd byte */ + uint_t opcode7; /* low nibble of 3rd byte */ + uint_t opcode_bytes = 1; + + /* + * legacy prefixes come in 5 flavors, you should have only one of each + */ + uint_t opnd_size_prefix = 0; + uint_t addr_size_prefix = 0; + uint_t segment_prefix = 0; + uint_t lock_prefix = 0; + uint_t rep_prefix = 0; + uint_t rex_prefix = 0; /* amd64 register extension prefix */ + size_t off; + + x->d86_len = 0; + x->d86_rmindex = -1; + x->d86_error = 0; +#ifdef DIS_TEXT + x->d86_numopnds = 0; + x->d86_seg_prefix = NULL; + x->d86_mneu[0] = 0; + for (i = 0; i < 3; ++i) { + x->d86_opnd[i].d86_opnd[0] = 0; + x->d86_opnd[i].d86_prefix[0] = 0; + x->d86_opnd[i].d86_value_size = 0; + x->d86_opnd[i].d86_value = 0; + x->d86_opnd[i].d86_mode = MODE_NONE; + } +#endif + x->d86_error = 0; + x->d86_memsize = 0; + + if (cpu_mode == SIZE16) { + opnd_size = SIZE16; + addr_size = SIZE16; + } else if (cpu_mode == SIZE32) { + opnd_size = SIZE32; + addr_size = SIZE32; + } else { + opnd_size = SIZE32; + addr_size = SIZE64; + } + + /* + * Get one opcode byte and check for zero padding that follows + * jump tables. + */ + if (dtrace_get_opcode(x, &opcode1, &opcode2) != 0) + goto error; + + if (opcode1 == 0 && opcode2 == 0 && + x->d86_check_func != NULL && x->d86_check_func(x->d86_data)) { +#ifdef DIS_TEXT + (void) strncpy(x->d86_mneu, ".byte\t0", OPLEN); +#endif + goto done; + } + + /* + * Gather up legacy x86 prefix bytes. + */ + for (;;) { + uint_t *which_prefix = NULL; + + dp = &dis_distable[opcode1][opcode2]; + + switch (dp->it_adrmode) { + case PREFIX: + which_prefix = &rep_prefix; + break; + case LOCK: + which_prefix = &lock_prefix; + break; + case OVERRIDE: + which_prefix = &segment_prefix; +#ifdef DIS_TEXT + x->d86_seg_prefix = (char *)dp->it_name; +#endif + if (dp->it_invalid64 && cpu_mode == SIZE64) + goto error; + break; + case AM: + which_prefix = &addr_size_prefix; + break; + case DM: + which_prefix = &opnd_size_prefix; + break; + } + if (which_prefix == NULL) + break; + *which_prefix = (opcode1 << 4) | opcode2; + if (dtrace_get_opcode(x, &opcode1, &opcode2) != 0) + goto error; + } + + /* + * Handle amd64 mode PREFIX values. + * Some of the segment prefixes are no-ops. (only FS/GS actually work) + * We might have a REX prefix (opcodes 0x40-0x4f) + */ + if (cpu_mode == SIZE64) { + if (segment_prefix != 0x64 && segment_prefix != 0x65) + segment_prefix = 0; + + if (opcode1 == 0x4) { + rex_prefix = (opcode1 << 4) | opcode2; + if (dtrace_get_opcode(x, &opcode1, &opcode2) != 0) + goto error; + dp = &dis_distable[opcode1][opcode2]; + } + } + + /* + * Deal with selection of operand and address size now. + * Note that the REX.W bit being set causes opnd_size_prefix to be + * ignored. + */ + if (cpu_mode == SIZE64) { + if (rex_prefix & 0x08) + opnd_size = SIZE64; + else if (opnd_size_prefix) + opnd_size = SIZE16; + + if (addr_size_prefix) + addr_size = SIZE32; + } else if (cpu_mode == SIZE32) { + if (opnd_size_prefix) + opnd_size = SIZE16; + if (addr_size_prefix) + addr_size = SIZE16; + } else { + if (opnd_size_prefix) + opnd_size = SIZE32; + if (addr_size_prefix) + addr_size = SIZE32; + } + + /* + * The pause instruction - a repz'd nop. This doesn't fit + * with any of the other prefix goop added for SSE, so we'll + * special-case it here. + */ + if (rep_prefix == 0xf3 && opcode1 == 0x9 && opcode2 == 0x0) { + rep_prefix = 0; + dp = &dis_opPause; + } + + /* + * Some 386 instructions have 2 bytes of opcode before the mod_r/m + * byte so we may need to perform a table indirection. + */ + if (dp->it_indirect == dis_op0F[0]) { + if (dtrace_get_opcode(x, &opcode4, &opcode5) != 0) + goto error; + opcode_bytes = 2; + if (opcode4 == 0x7 && opcode5 >= 0x1 && opcode5 <= 0x3) { + uint_t subcode; + + if (dtrace_get_opcode(x, &opcode6, &opcode7) != 0) + goto error; + opcode_bytes = 3; + subcode = ((opcode6 & 0x3) << 1) | + ((opcode7 & 0x8) >> 3); + dp = &dis_op0F7123[opcode5][subcode]; + } else if ((opcode4 == 0xc) && (opcode5 >= 0x8)) { + dp = &dis_op0FC8[0]; + } else { + dp = &dis_op0F[opcode4][opcode5]; + } + } + + /* + * If still not at a TERM decode entry, then a ModRM byte + * exists and its fields further decode the instruction. + */ + x->d86_got_modrm = 0; + if (dp->it_indirect != TERM) { + dtrace_get_modrm(x, &mode, &opcode3, &r_m); + if (x->d86_error) + goto error; + reg = opcode3; + + /* + * decode 287 instructions (D8-DF) from opcodeN + */ + if (opcode1 == 0xD && opcode2 >= 0x8) { + if (opcode2 == 0xB && mode == 0x3 && opcode3 == 4) + dp = &dis_opFP5[r_m]; + else if (opcode2 == 0xA && mode == 0x3 && opcode3 < 4) + dp = &dis_opFP7[opcode3]; + else if (opcode2 == 0xB && mode == 0x3) + dp = &dis_opFP6[opcode3]; + else if (opcode2 == 0x9 && mode == 0x3 && opcode3 >= 4) + dp = &dis_opFP4[opcode3 - 4][r_m]; + else if (mode == 0x3) + dp = &dis_opFP3[opcode2 - 8][opcode3]; + else + dp = &dis_opFP1n2[opcode2 - 8][opcode3]; + } else { + dp = dp->it_indirect + opcode3; + } + } + + /* + * In amd64 bit mode, ARPL opcode is changed to MOVSXD + * (sign extend 32bit to 64 bit) + */ + if (cpu_mode == SIZE64 && opcode1 == 0x6 && opcode2 == 0x3) + dp = &dis_opMOVSLD; + + /* + * at this point we should have a correct (or invalid) opcode + */ + if ((cpu_mode == SIZE64 && dp->it_invalid64) || + (cpu_mode != SIZE64 && dp->it_invalid32)) + goto error; + if (dp->it_indirect != TERM) + goto error; + + /* + * deal with MMX/SSE opcodes which are changed by prefixes + */ + switch (dp->it_adrmode) { + case MMO: + case MMOIMPL: + case MMO3P: + case MMOM3: + case MMOMS: + case MMOPM: + case MMOPRM: + case MMOS: + case XMMO: + case XMMOM: + case XMMOMS: + case XMMOPM: + case XMMOS: + case XMMOMX: + case XMMOX3: + case XMMOXMM: + /* + * This is horrible. Some SIMD instructions take the + * form 0x0F 0x?? ..., which is easily decoded using the + * existing tables. Other SIMD instructions use various + * prefix bytes to overload existing instructions. For + * Example, addps is F0, 58, whereas addss is F3 (repz), + * F0, 58. Presumably someone got a raise for this. + * + * If we see one of the instructions which can be + * modified in this way (if we've got one of the SIMDO* + * address modes), we'll check to see if the last prefix + * was a repz. If it was, we strip the prefix from the + * mnemonic, and we indirect using the dis_opSIMDrepz + * table. + */ + + /* + * Calculate our offset in dis_op0F + */ + if ((uintptr_t)dp - (uintptr_t)dis_op0F > sizeof (dis_op0F)) + goto error; + + off = ((uintptr_t)dp - (uintptr_t)dis_op0F) / + sizeof (instable_t); + + /* + * Rewrite if this instruction used one of the magic prefixes. + */ + if (rep_prefix) { + if (rep_prefix == 0xf2) + dp = &dis_opSIMDrepnz[off]; + else + dp = &dis_opSIMDrepz[off]; + rep_prefix = 0; + } else if (opnd_size_prefix) { + dp = &dis_opSIMDdata16[off]; + opnd_size_prefix = 0; + if (opnd_size == SIZE16) + opnd_size = SIZE32; + } + break; + + case MMOSH: + /* + * As with the "normal" SIMD instructions, the MMX + * shuffle instructions are overloaded. These + * instructions, however, are special in that they use + * an extra byte, and thus an extra table. As of this + * writing, they only use the opnd_size prefix. + */ + + /* + * Calculate our offset in dis_op0F7123 + */ + if ((uintptr_t)dp - (uintptr_t)dis_op0F7123 > + sizeof (dis_op0F7123)) + goto error; + + if (opnd_size_prefix) { + off = ((uintptr_t)dp - (uintptr_t)dis_op0F7123) / + sizeof (instable_t); + dp = &dis_opSIMD7123[off]; + opnd_size_prefix = 0; + if (opnd_size == SIZE16) + opnd_size = SIZE32; + } + break; + } + + /* + * In 64 bit mode, some opcodes automatically use opnd_size == SIZE64. + */ + if (cpu_mode == SIZE64) + if (dp->it_always64 || (opnd_size == SIZE32 && dp->it_stackop)) + opnd_size = SIZE64; + +#ifdef DIS_TEXT + /* + * At this point most instructions can format the opcode mnemonic + * including the prefixes. + */ + if (lock_prefix) + (void) strlcat(x->d86_mneu, "lock ", OPLEN); + + if (rep_prefix == 0xf2) + (void) strlcat(x->d86_mneu, "repnz ", OPLEN); + else if (rep_prefix == 0xf3) + (void) strlcat(x->d86_mneu, "repz ", OPLEN); + + if (cpu_mode == SIZE64 && addr_size_prefix) + (void) strlcat(x->d86_mneu, "addr32 ", OPLEN); + + if (dp->it_adrmode != CBW && + dp->it_adrmode != CWD && + dp->it_adrmode != XMMSFNC) { + if (strcmp(dp->it_name, "INVALID") == 0) + goto error; + (void) strlcat(x->d86_mneu, dp->it_name, OPLEN); + if (dp->it_suffix) { + char *types[] = {"", "w", "l", "q"}; + if (opcode_bytes == 2 && opcode4 == 4) { + /* It's a cmovx.yy. Replace the suffix x */ + for (i = 5; i < OPLEN; i++) { + if (x->d86_mneu[i] == '.') + break; + } + x->d86_mneu[i - 1] = *types[opnd_size]; + } else { + (void) strlcat(x->d86_mneu, types[opnd_size], + OPLEN); + } + } + } +#endif + + /* + * Process operands based on the addressing modes. + */ + x->d86_mode = cpu_mode; + x->d86_rex_prefix = rex_prefix; + x->d86_opnd_size = opnd_size; + x->d86_addr_size = addr_size; + vbit = 0; /* initialize for mem/reg -> reg */ + switch (dp->it_adrmode) { + /* + * amd64 instruction to sign extend 32 bit reg/mem operands + * into 64 bit register values + */ + case MOVSXZ: +#ifdef DIS_TEXT + if (rex_prefix == 0) + (void) strncpy(x->d86_mneu, "movzld", OPLEN); +#endif + dtrace_get_modrm(x, &mode, ®, &r_m); + dtrace_rex_adjust(rex_prefix, mode, ®, &r_m); + x->d86_opnd_size = SIZE64; + dtrace_get_operand(x, REG_ONLY, reg, LONG_OPND, 1); + x->d86_opnd_size = opnd_size = SIZE32; + wbit = LONG_OPND; + dtrace_get_operand(x, mode, r_m, wbit, 0); + break; + + /* + * movsbl movsbw movsbq (0x0FBE) or movswl movswq (0x0FBF) + * movzbl movzbw movzbq (0x0FB6) or mobzwl movzwq (0x0FB7) + * wbit lives in 2nd byte, note that operands + * are different sized + */ + case MOVZ: + if (rex_prefix & REX_W) { + /* target register size = 64 bit */ + x->d86_mneu[5] = 'q'; + } + dtrace_get_modrm(x, &mode, ®, &r_m); + dtrace_rex_adjust(rex_prefix, mode, ®, &r_m); + dtrace_get_operand(x, REG_ONLY, reg, LONG_OPND, 1); + x->d86_opnd_size = opnd_size = SIZE16; + wbit = WBIT(opcode5); + dtrace_get_operand(x, mode, r_m, wbit, 0); + break; + + /* + * imul instruction, with either 8-bit or longer immediate + * opcode 0x6B for byte, sign-extended displacement, 0x69 for word(s) + */ + case IMUL: + wbit = LONG_OPND; + THREEOPERAND(x, mode, reg, r_m, rex_prefix, wbit, LONG_OPND, + OPSIZE(opnd_size, opcode2 == 0x9)); + break; + + /* memory or register operand to register, with 'w' bit */ + case MRw: + wbit = WBIT(opcode2); + STANDARD_MODRM(x, mode, reg, r_m, rex_prefix, wbit, 0); + break; + + /* register to memory or register operand, with 'w' bit */ + /* arpl happens to fit here also because it is odd */ + case RMw: + if (opcode_bytes == 2) + wbit = WBIT(opcode5); + else + wbit = WBIT(opcode2); + STANDARD_MODRM(x, mode, reg, r_m, rex_prefix, wbit, 1); + break; + + /* xaddb instruction */ + case XADDB: + wbit = 0; + STANDARD_MODRM(x, mode, reg, r_m, rex_prefix, wbit, 1); + break; + + /* MMX register to memory or register operand */ + case MMS: + case MMOS: +#ifdef DIS_TEXT + wbit = strcmp(dp->it_name, "movd") ? MM_OPND : LONG_OPND; +#else + wbit = LONG_OPND; +#endif + MIXED_MM(x, mode, reg, r_m, rex_prefix, wbit, MM_OPND, 1); + break; + + /* MMX register to memory */ + case MMOMS: + dtrace_get_modrm(x, &mode, ®, &r_m); + if (mode == REG_ONLY) + goto error; + wbit = MM_OPND; + MIXED_MM(x, mode, reg, r_m, rex_prefix, wbit, MM_OPND, 1); + break; + + /* Double shift. Has immediate operand specifying the shift. */ + case DSHIFT: + wbit = LONG_OPND; + dtrace_get_modrm(x, &mode, ®, &r_m); + dtrace_rex_adjust(rex_prefix, mode, ®, &r_m); + dtrace_get_operand(x, mode, r_m, wbit, 2); + dtrace_get_operand(x, REG_ONLY, reg, LONG_OPND, 1); + dtrace_imm_opnd(x, wbit, 1, 0); + break; + + /* + * Double shift. With no immediate operand, specifies using %cl. + */ + case DSHIFTcl: + wbit = LONG_OPND; + STANDARD_MODRM(x, mode, reg, r_m, rex_prefix, wbit, 1); + break; + + /* immediate to memory or register operand */ + case IMlw: + wbit = WBIT(opcode2); + dtrace_rex_adjust(rex_prefix, mode, NULL, &r_m); + dtrace_get_operand(x, mode, r_m, wbit, 1); + /* + * Have long immediate for opcode 0x81, but not 0x80 nor 0x83 + */ + dtrace_imm_opnd(x, wbit, OPSIZE(opnd_size, opcode2 == 1), 0); + break; + + /* immediate to memory or register operand with the */ + /* 'w' bit present */ + case IMw: + wbit = WBIT(opcode2); + dtrace_get_modrm(x, &mode, ®, &r_m); + dtrace_rex_adjust(rex_prefix, mode, NULL, &r_m); + dtrace_get_operand(x, mode, r_m, wbit, 1); + dtrace_imm_opnd(x, wbit, OPSIZE(opnd_size, wbit), 0); + break; + + /* immediate to register with register in low 3 bits */ + /* of op code */ + case IR: + /* w-bit here (with regs) is bit 3 */ + wbit = opcode2 >>3 & 0x1; + reg = REGNO(opcode2); + dtrace_rex_adjust(rex_prefix, mode, ®, NULL); + mode = REG_ONLY; + r_m = reg; + dtrace_get_operand(x, mode, r_m, wbit, 1); + dtrace_imm_opnd(x, wbit, OPSIZE64(opnd_size, wbit), 0); + break; + + /* MMX immediate shift of register */ + case MMSH: + case MMOSH: + wbit = MM_OPND; + goto mm_shift; /* in next case */ + + /* SIMD immediate shift of register */ + case XMMSH: + wbit = XMM_OPND; +mm_shift: + reg = REGNO(opcode7); + dtrace_rex_adjust(rex_prefix, mode, ®, NULL); + dtrace_get_operand(x, REG_ONLY, reg, wbit, 1); + dtrace_imm_opnd(x, wbit, 1, 0); + NOMEM; + break; + + /* accumulator to memory operand */ + case AO: + vbit = 1; + /*FALLTHROUGH*/ + + /* memory operand to accumulator */ + case OA: + wbit = WBIT(opcode2); + dtrace_get_operand(x, REG_ONLY, EAX_REGNO, wbit, 1 - vbit); + dtrace_imm_opnd(x, wbit, OPSIZE64(addr_size, LONG_OPND), vbit); +#ifdef DIS_TEXT + x->d86_opnd[vbit].d86_mode = MODE_OFFSET; +#endif + break; + + + /* segment register to memory or register operand */ + case SM: + vbit = 1; + /*FALLTHROUGH*/ + + /* memory or register operand to segment register */ + case MS: + dtrace_get_modrm(x, &mode, ®, &r_m); + dtrace_rex_adjust(rex_prefix, mode, NULL, &r_m); + dtrace_get_operand(x, mode, r_m, LONG_OPND, vbit); + dtrace_get_operand(x, REG_ONLY, reg, SEG_OPND, 1 - vbit); + break; + + /* + * rotate or shift instructions, which may shift by 1 or + * consult the cl register, depending on the 'v' bit + */ + case Mv: + vbit = VBIT(opcode2); + wbit = WBIT(opcode2); + dtrace_rex_adjust(rex_prefix, mode, NULL, &r_m); + dtrace_get_operand(x, mode, r_m, wbit, 1); +#ifdef DIS_TEXT + if (vbit) { + (void) strlcat(x->d86_opnd[0].d86_opnd, "%cl", OPLEN); + } else { + x->d86_opnd[0].d86_mode = MODE_SIGNED; + x->d86_opnd[0].d86_value_size = 1; + x->d86_opnd[0].d86_value = 1; + } +#endif + break; + /* + * immediate rotate or shift instructions + */ + case MvI: + wbit = WBIT(opcode2); +normal_imm_mem: + dtrace_rex_adjust(rex_prefix, mode, NULL, &r_m); + dtrace_get_operand(x, mode, r_m, wbit, 1); + dtrace_imm_opnd(x, wbit, 1, 0); + break; + + /* bit test instructions */ + case MIb: + wbit = LONG_OPND; + goto normal_imm_mem; + + /* single memory or register operand with 'w' bit present */ + case Mw: + wbit = WBIT(opcode2); +just_mem: + dtrace_get_modrm(x, &mode, ®, &r_m); + dtrace_rex_adjust(rex_prefix, mode, NULL, &r_m); + dtrace_get_operand(x, mode, r_m, wbit, 0); + break; + + case SWAPGS: + if (cpu_mode == SIZE64 && mode == 3 && r_m == 0) { +#ifdef DIS_TEXT + (void) strncpy(x->d86_mneu, "swapgs", OPLEN); +#endif + NOMEM; + break; + } + /*FALLTHROUGH*/ + + /* prefetch instruction - memory operand, but no memory acess */ + case PREF: + NOMEM; + /*FALLTHROUGH*/ + + /* single memory or register operand */ + case M: + wbit = LONG_OPND; + goto just_mem; + + /* single memory or register byte operand */ + case Mb: + wbit = BYTE_OPND; + goto just_mem; + + case MO: + /* Similar to M, but only memory (no direct registers) */ + wbit = LONG_OPND; + dtrace_get_modrm(x, &mode, ®, &r_m); + if (mode == 3) + goto error; + dtrace_rex_adjust(rex_prefix, mode, NULL, &r_m); + dtrace_get_operand(x, mode, r_m, wbit, 0); + break; + + /* move special register to register or reverse if vbit */ + case SREG: + switch (opcode5) { + + case 2: + vbit = 1; + /*FALLTHROUGH*/ + case 0: + wbit = CONTROL_OPND; + break; + + case 3: + vbit = 1; + /*FALLTHROUGH*/ + case 1: + wbit = DEBUG_OPND; + break; + + case 6: + vbit = 1; + /*FALLTHROUGH*/ + case 4: + wbit = TEST_OPND; + break; + + } + dtrace_get_modrm(x, &mode, ®, &r_m); + dtrace_rex_adjust(rex_prefix, mode, ®, &r_m); + dtrace_get_operand(x, REG_ONLY, reg, wbit, vbit); + dtrace_get_operand(x, REG_ONLY, r_m, LONG_OPND, 1 - vbit); + NOMEM; + break; + + /* + * single register operand with register in the low 3 + * bits of op code + */ + case R: + if (opcode_bytes == 2) + reg = REGNO(opcode5); + else + reg = REGNO(opcode2); + dtrace_rex_adjust(rex_prefix, mode, ®, NULL); + dtrace_get_operand(x, REG_ONLY, reg, LONG_OPND, 0); + NOMEM; + break; + + /* + * register to accumulator with register in the low 3 + * bits of op code, xchg instructions + */ + case RA: + NOMEM; + reg = REGNO(opcode2); + dtrace_rex_adjust(rex_prefix, mode, ®, NULL); + dtrace_get_operand(x, REG_ONLY, reg, LONG_OPND, 0); + dtrace_get_operand(x, REG_ONLY, EAX_REGNO, LONG_OPND, 1); + break; + + /* + * single segment register operand, with register in + * bits 3-4 of op code byte + */ + case SEG: + NOMEM; + reg = (x->d86_bytes[x->d86_len - 1] >> 3) & 0x3; + dtrace_get_operand(x, REG_ONLY, reg, SEG_OPND, 0); + break; + + /* + * single segment register operand, with register in + * bits 3-5 of op code + */ + case LSEG: + NOMEM; + /* long seg reg from opcode */ + reg = (x->d86_bytes[x->d86_len - 1] >> 3) & 0x7; + dtrace_get_operand(x, REG_ONLY, reg, SEG_OPND, 0); + break; + + /* memory or register operand to register */ + case MR: + wbit = LONG_OPND; + STANDARD_MODRM(x, mode, reg, r_m, rex_prefix, wbit, 0); + break; + + case RM: + wbit = LONG_OPND; + STANDARD_MODRM(x, mode, reg, r_m, rex_prefix, wbit, 1); + break; + + /* MMX/SIMD-Int memory or mm reg to mm reg */ + case MM: + case MMO: +#ifdef DIS_TEXT + wbit = strcmp(dp->it_name, "movd") ? MM_OPND : LONG_OPND; +#else + wbit = LONG_OPND; +#endif + MIXED_MM(x, mode, reg, r_m, rex_prefix, wbit, MM_OPND, 0); + break; + + case MMOIMPL: +#ifdef DIS_TEXT + wbit = strcmp(dp->it_name, "movd") ? MM_OPND : LONG_OPND; +#else + wbit = LONG_OPND; +#endif + dtrace_get_modrm(x, &mode, ®, &r_m); + if (mode != REG_ONLY) + goto error; + + dtrace_rex_adjust(rex_prefix, mode, ®, &r_m); + dtrace_get_operand(x, mode, r_m, wbit, 0); + dtrace_get_operand(x, REG_ONLY, reg, MM_OPND, 1); + mode = 0; /* change for memory access size... */ + break; + + /* MMX/SIMD-Int and SIMD-FP predicated mm reg to r32 */ + case MMO3P: + wbit = MM_OPND; + goto xmm3p; + case XMM3P: + wbit = XMM_OPND; +xmm3p: + dtrace_get_modrm(x, &mode, ®, &r_m); + if (mode != REG_ONLY) + goto error; + + THREEOPERAND(x, mode, reg, r_m, rex_prefix, wbit, LONG_OPND, 1); + NOMEM; + break; + + /* MMX/SIMD-Int predicated r32/mem to mm reg */ + case MMOPRM: + wbit = LONG_OPND; + w2 = MM_OPND; + goto xmmprm; + case XMMPRM: + wbit = LONG_OPND; + w2 = XMM_OPND; +xmmprm: + THREEOPERAND(x, mode, reg, r_m, rex_prefix, wbit, w2, 1); + break; + + /* MMX/SIMD-Int predicated mm/mem to mm reg */ + case MMOPM: + wbit = w2 = MM_OPND; + goto xmmprm; + + /* MMX/SIMD-Int mm reg to r32 */ + case MMOM3: + NOMEM; + dtrace_get_modrm(x, &mode, ®, &r_m); + if (mode != REG_ONLY) + goto error; + wbit = MM_OPND; + MIXED_MM(x, mode, reg, r_m, rex_prefix, wbit, LONG_OPND, 0); + break; + + /* SIMD memory or xmm reg operand to xmm reg */ + case XMM: + case XMMO: + case XMMXIMPL: + wbit = XMM_OPND; + STANDARD_MODRM(x, mode, reg, r_m, rex_prefix, wbit, 0); + + if (dp->it_adrmode == XMMXIMPL && mode != REG_ONLY) + goto error; + +#ifdef DIS_TEXT + /* + * movlps and movhlps share opcodes. They differ in the + * addressing modes allowed for their operands. + * movhps and movlhps behave similarly. + */ + if (mode == REG_ONLY) { + if (strcmp(dp->it_name, "movlps") == 0) + (void) strncpy(x->d86_mneu, "movhlps", OPLEN); + else if (strcmp(dp->it_name, "movhps") == 0) + (void) strncpy(x->d86_mneu, "movlhps", OPLEN); + } +#endif + if (dp->it_adrmode == XMMXIMPL) + mode = 0; /* change for memory access size... */ + break; + + /* SIMD xmm reg to memory or xmm reg */ + case XMMS: + case XMMOS: + case XMMMS: + case XMMOMS: + dtrace_get_modrm(x, &mode, ®, &r_m); +#ifdef DIS_TEXT + if ((strcmp(dp->it_name, "movlps") == 0 || + strcmp(dp->it_name, "movhps") == 0 || + strcmp(dp->it_name, "movntps") == 0) && + mode == REG_ONLY) + goto error; +#endif + wbit = XMM_OPND; + MIXED_MM(x, mode, reg, r_m, rex_prefix, wbit, XMM_OPND, 1); + break; + + /* SIMD memory to xmm reg */ + case XMMM: + case XMMOM: + wbit = XMM_OPND; + dtrace_get_modrm(x, &mode, ®, &r_m); +#ifdef DIS_TEXT + if (mode == REG_ONLY) { + if (strcmp(dp->it_name, "movhps") == 0) + (void) strncpy(x->d86_mneu, "movlhps", OPLEN); + else + goto error; + } +#endif + MIXED_MM(x, mode, reg, r_m, rex_prefix, wbit, XMM_OPND, 0); + break; + + /* SIMD memory or r32 to xmm reg */ + case XMM3MX: + wbit = LONG_OPND; + MIXED_MM(x, mode, reg, r_m, rex_prefix, wbit, XMM_OPND, 0); + break; + + case XMM3MXS: + wbit = LONG_OPND; + MIXED_MM(x, mode, reg, r_m, rex_prefix, wbit, XMM_OPND, 1); + break; + + /* SIMD memory or mm reg to xmm reg */ + case XMMOMX: + /* SIMD mm to xmm */ + case XMMMX: + wbit = MM_OPND; + MIXED_MM(x, mode, reg, r_m, rex_prefix, wbit, XMM_OPND, 0); + break; + + /* SIMD memory or xmm reg to mm reg */ + case XMMXMM: + case XMMOXMM: + case XMMXM: + wbit = XMM_OPND; + MIXED_MM(x, mode, reg, r_m, rex_prefix, wbit, MM_OPND, 0); + break; + + + /* SIMD memory or xmm reg to r32 */ + case XMMXM3: + wbit = XMM_OPND; + MIXED_MM(x, mode, reg, r_m, rex_prefix, wbit, LONG_OPND, 0); + break; + + /* SIMD xmm to r32 */ + case XMMX3: + case XMMOX3: + dtrace_get_modrm(x, &mode, ®, &r_m); + if (mode != REG_ONLY) + goto error; + dtrace_rex_adjust(rex_prefix, mode, ®, &r_m); + dtrace_get_operand(x, mode, r_m, XMM_OPND, 0); + dtrace_get_operand(x, REG_ONLY, reg, LONG_OPND, 1); + NOMEM; + break; + + /* SIMD predicated memory or xmm reg with/to xmm reg */ + case XMMP: + case XMMOPM: + wbit = XMM_OPND; + THREEOPERAND(x, mode, reg, r_m, rex_prefix, wbit, XMM_OPND, 1); + +#ifdef DIS_TEXT + /* + * cmpps and cmpss vary their instruction name based + * on the value of imm8. Other XMMP instructions, + * such as shufps, require explicit specification of + * the predicate. + */ + if (dp->it_name[0] == 'c' && + dp->it_name[1] == 'm' && + dp->it_name[2] == 'p' && + strlen(dp->it_name) == 5) { + uchar_t pred = x->d86_opnd[0].d86_value & 0xff; + + if (pred >= (sizeof (dis_PREDSUFFIX) / sizeof (char *))) + goto error; + + (void) strncpy(x->d86_mneu, "cmp", OPLEN); + (void) strlcat(x->d86_mneu, dis_PREDSUFFIX[pred], + OPLEN); + (void) strlcat(x->d86_mneu, + dp->it_name + strlen(dp->it_name) - 2, + OPLEN); + x->d86_opnd[0] = x->d86_opnd[1]; + x->d86_opnd[1] = x->d86_opnd[2]; + x->d86_numopnds = 2; + } +#endif + break; + + /* immediate operand to accumulator */ + case IA: + wbit = WBIT(opcode2); + dtrace_get_operand(x, REG_ONLY, EAX_REGNO, wbit, 1); + dtrace_imm_opnd(x, wbit, OPSIZE(opnd_size, wbit), 0); + NOMEM; + break; + + /* memory or register operand to accumulator */ + case MA: + wbit = WBIT(opcode2); + dtrace_rex_adjust(rex_prefix, mode, NULL, &r_m); + dtrace_get_operand(x, mode, r_m, wbit, 0); + break; + + /* si register to di register used to reference memory */ + case SD: +#ifdef DIS_TEXT + dtrace_check_override(x, 0); + x->d86_numopnds = 2; + if (addr_size == SIZE64) { + (void) strlcat(x->d86_opnd[0].d86_opnd, "(%rsi)", + OPLEN); + (void) strlcat(x->d86_opnd[1].d86_opnd, "(%rdi)", + OPLEN); + } else if (addr_size == SIZE32) { + (void) strlcat(x->d86_opnd[0].d86_opnd, "(%esi)", + OPLEN); + (void) strlcat(x->d86_opnd[1].d86_opnd, "(%edi)", + OPLEN); + } else { + (void) strlcat(x->d86_opnd[0].d86_opnd, "(%si)", + OPLEN); + (void) strlcat(x->d86_opnd[1].d86_opnd, "(%di)", + OPLEN); + } +#endif + wbit = LONG_OPND; + break; + + /* accumulator to di register */ + case AD: + wbit = WBIT(opcode2); +#ifdef DIS_TEXT + dtrace_check_override(x, 1); + x->d86_numopnds = 2; + dtrace_get_operand(x, REG_ONLY, EAX_REGNO, wbit, 0); + if (addr_size == SIZE64) + (void) strlcat(x->d86_opnd[1].d86_opnd, "(%rdi)", + OPLEN); + else if (addr_size == SIZE32) + (void) strlcat(x->d86_opnd[1].d86_opnd, "(%edi)", + OPLEN); + else + (void) strlcat(x->d86_opnd[1].d86_opnd, "(%di)", + OPLEN); +#endif + break; + + /* si register to accumulator */ + case SA: + wbit = WBIT(opcode2); +#ifdef DIS_TEXT + dtrace_check_override(x, 0); + x->d86_numopnds = 2; + if (addr_size == SIZE64) + (void) strlcat(x->d86_opnd[0].d86_opnd, "(%rsi)", + OPLEN); + else if (addr_size == SIZE32) + (void) strlcat(x->d86_opnd[0].d86_opnd, "(%esi)", + OPLEN); + else + (void) strlcat(x->d86_opnd[0].d86_opnd, "(%si)", + OPLEN); + dtrace_get_operand(x, REG_ONLY, EAX_REGNO, wbit, 1); +#endif + break; + + /* + * single operand, a 16/32 bit displacement + */ + case D: + wbit = LONG_OPND; + dtrace_disp_opnd(x, wbit, OPSIZE(opnd_size, LONG_OPND), 0); + NOMEM; + break; + + /* jmp/call indirect to memory or register operand */ + case INM: +#ifdef DIS_TEXT + (void) strlcat(x->d86_opnd[0].d86_prefix, "*", OPLEN); +#endif + dtrace_rex_adjust(rex_prefix, mode, NULL, &r_m); + dtrace_get_operand(x, mode, r_m, LONG_OPND, 0); + wbit = LONG_OPND; + break; + + /* + * for long jumps and long calls -- a new code segment + * register and an offset in IP -- stored in object + * code in reverse order. Note - not valid in amd64 + */ + case SO: + dtrace_check_override(x, 1); + wbit = LONG_OPND; + dtrace_imm_opnd(x, wbit, OPSIZE(opnd_size, LONG_OPND), 1); +#ifdef DIS_TEXT + x->d86_opnd[1].d86_mode = MODE_SIGNED; +#endif + /* will now get segment operand */ + dtrace_imm_opnd(x, wbit, 2, 0); + break; + + /* + * jmp/call. single operand, 8 bit displacement. + * added to current EIP in 'compofff' + */ + case BD: + dtrace_disp_opnd(x, BYTE_OPND, 1, 0); + NOMEM; + break; + + /* single 32/16 bit immediate operand */ + case I: + wbit = LONG_OPND; + dtrace_imm_opnd(x, wbit, OPSIZE(opnd_size, LONG_OPND), 0); + break; + + /* single 8 bit immediate operand */ + case Ib: + wbit = LONG_OPND; + dtrace_imm_opnd(x, wbit, 1, 0); + break; + + case ENTER: + wbit = LONG_OPND; + dtrace_imm_opnd(x, wbit, 2, 0); + dtrace_imm_opnd(x, wbit, 1, 1); + switch (opnd_size) { + case SIZE64: + x->d86_memsize = (x->d86_opnd[1].d86_value + 1) * 8; + break; + case SIZE32: + x->d86_memsize = (x->d86_opnd[1].d86_value + 1) * 4; + break; + case SIZE16: + x->d86_memsize = (x->d86_opnd[1].d86_value + 1) * 2; + break; + } + + break; + + /* 16-bit immediate operand */ + case RET: + wbit = LONG_OPND; + dtrace_imm_opnd(x, wbit, 2, 0); + break; + + /* single 8 bit port operand */ + case P: + dtrace_check_override(x, 0); + dtrace_imm_opnd(x, BYTE_OPND, 1, 0); + NOMEM; + break; + + /* single operand, dx register (variable port instruction) */ + case V: + x->d86_numopnds = 1; + dtrace_check_override(x, 0); +#ifdef DIS_TEXT + (void) strlcat(x->d86_opnd[0].d86_opnd, "(%dx)", OPLEN); +#endif + NOMEM; + break; + + /* + * The int instruction, which has two forms: + * int 3 (breakpoint) or + * int n, where n is indicated in the subsequent + * byte (format Ib). The int 3 instruction (opcode 0xCC), + * where, although the 3 looks like an operand, + * it is implied by the opcode. It must be converted + * to the correct base and output. + */ + case INT3: +#ifdef DIS_TEXT + x->d86_numopnds = 1; + x->d86_opnd[0].d86_mode = MODE_SIGNED; + x->d86_opnd[0].d86_value_size = 1; + x->d86_opnd[0].d86_value = 3; +#endif + NOMEM; + break; + + /* single 8 bit immediate operand */ + case INTx: + dtrace_imm_opnd(x, BYTE_OPND, 1, 0); + NOMEM; + break; + + /* an unused byte must be discarded */ + case U: + if (x->d86_get_byte(x->d86_data) < 0) + goto error; + x->d86_len++; + NOMEM; + break; + + case CBW: +#ifdef DIS_TEXT + if (opnd_size == SIZE16) + (void) strlcat(x->d86_mneu, "cbtw", OPLEN); + else if (opnd_size == SIZE32) + (void) strlcat(x->d86_mneu, "cwtl", OPLEN); + else + (void) strlcat(x->d86_mneu, "cltq", OPLEN); +#endif + wbit = LONG_OPND; + NOMEM; + break; + + case CWD: +#ifdef DIS_TEXT + if (opnd_size == SIZE16) + (void) strlcat(x->d86_mneu, "cwtd", OPLEN); + else if (opnd_size == SIZE32) + (void) strlcat(x->d86_mneu, "cltd", OPLEN); + else + (void) strlcat(x->d86_mneu, "cqtd", OPLEN); +#endif + wbit = LONG_OPND; + NOMEM; + break; + + case XMMSFNC: + /* + * sfence is sfence if mode is REG_ONLY. If mode isn't + * REG_ONLY, mnemonic should be 'clflush'. + */ + dtrace_get_modrm(x, &mode, ®, &r_m); + + /* sfence doesn't take operands */ +#ifdef DIS_TEXT + if (mode == REG_ONLY) { + (void) strlcat(x->d86_mneu, "sfence", OPLEN); + } else { + (void) strlcat(x->d86_mneu, "clflush", OPLEN); + dtrace_rex_adjust(rex_prefix, mode, ®, &r_m); + dtrace_get_operand(x, mode, r_m, BYTE_OPND, 0); + NOMEM; + } +#else + if (mode != REG_ONLY) { + dtrace_rex_adjust(rex_prefix, mode, ®, &r_m); + dtrace_get_operand(x, mode, r_m, BYTE_OPND, 0); + NOMEM; + } +#endif + break; + + /* + * no disassembly, the mnemonic was all there was so go on + */ + case NORM: + if (dp->it_invalid32 && cpu_mode != SIZE64) + goto error; + NOMEM; + /*FALLTHROUGH*/ + case IMPLMEM: + break; + + case XMMFENCE: + /* + * Only the following exact byte sequences are allowed: + * + * 0f ae e8 lfence + * 0f ae f0 mfence + */ + if ((uint8_t)x->d86_bytes[x->d86_len - 1] != 0xe8 && + (uint8_t)x->d86_bytes[x->d86_len - 1] != 0xf0) + goto error; + + break; + + + /* float reg */ + case F: +#ifdef DIS_TEXT + x->d86_numopnds = 1; + (void) strlcat(x->d86_opnd[0].d86_opnd, "%st(X)", OPLEN); + x->d86_opnd[0].d86_opnd[4] = r_m + '0'; +#endif + NOMEM; + break; + + /* float reg to float reg, with ret bit present */ + case FF: + vbit = opcode2 >> 2 & 0x1; /* vbit = 1: st -> st(i) */ + /*FALLTHROUGH*/ + case FFC: /* case for vbit always = 0 */ +#ifdef DIS_TEXT + x->d86_numopnds = 2; + (void) strlcat(x->d86_opnd[1 - vbit].d86_opnd, "%st", OPLEN); + (void) strlcat(x->d86_opnd[vbit].d86_opnd, "%st(X)", OPLEN); + x->d86_opnd[vbit].d86_opnd[4] = r_m + '0'; +#endif + NOMEM; + break; + + /* an invalid op code */ + case AM: + case DM: + case OVERRIDE: + case PREFIX: + case UNKNOWN: + NOMEM; + default: + goto error; + } /* end switch */ + if (x->d86_error) + goto error; + +done: +#ifdef DIS_MEM + /* + * compute the size of any memory accessed by the instruction + */ + if (x->d86_memsize != 0) { + return (0); + } else if (dp->it_stackop) { + switch (opnd_size) { + case SIZE16: + x->d86_memsize = 2; + break; + case SIZE32: + x->d86_memsize = 4; + break; + case SIZE64: + x->d86_memsize = 8; + break; + } + } else if (nomem || mode == REG_ONLY) { + x->d86_memsize = 0; + + } else if (dp->it_size != 0) { + /* + * In 64 bit mode descriptor table entries + * go up to 10 bytes and popf/pushf are always 8 bytes + */ + if (x->d86_mode == SIZE64 && dp->it_size == 6) + x->d86_memsize = 10; + else if (x->d86_mode == SIZE64 && opcode1 == 0x9 && + (opcode2 == 0xc || opcode2 == 0xd)) + x->d86_memsize = 8; + else + x->d86_memsize = dp->it_size; + + } else if (wbit == 0) { + x->d86_memsize = 1; + + } else if (wbit == LONG_OPND) { + if (opnd_size == SIZE64) + x->d86_memsize = 8; + else if (opnd_size == SIZE32) + x->d86_memsize = 4; + else + x->d86_memsize = 2; + + } else if (wbit == SEG_OPND) { + x->d86_memsize = 4; + + } else { + x->d86_memsize = 8; + } +#endif + return (0); + +error: +#ifdef DIS_TEXT + (void) strlcat(x->d86_mneu, "undef", OPLEN); +#endif + return (1); +} + +#ifdef DIS_TEXT + +/* + * Some instructions should have immediate operands printed + * as unsigned integers. We compare against this table. + */ +static char *unsigned_ops[] = { + "or", "and", "xor", "test", "in", "out", "lcall", "ljmp", + "rcr", "rcl", "ror", "rol", "shl", "shr", "sal", "psr", "psl", + 0 +}; + +static int +isunsigned_op(char *opcode) +{ + char *where; + int i; + int is_unsigned = 0; + + /* + * Work back to start of last mnemonic, since we may have + * prefixes on some opcodes. + */ + where = opcode + strlen(opcode) - 1; + while (where > opcode && *where != ' ') + --where; + if (*where == ' ') + ++where; + + for (i = 0; unsigned_ops[i]; ++i) { + if (strncmp(where, unsigned_ops[i], + strlen(unsigned_ops[i]))) + continue; + is_unsigned = 1; + break; + } + return (is_unsigned); +} + +/* ARGSUSED */ +void +dtrace_disx86_str(dis86_t *dis, uint_t mode, uintptr_t pc, char *buf, + size_t buflen) +{ + int i; + + dis->d86_sprintf_func(buf, buflen, "%-6s ", dis->d86_mneu); + + /* + * For PC-relative jumps, the pc is really the next pc after executing + * this instruction, so increment it appropriately. + */ + pc += dis->d86_len; + + for (i = 0; i < dis->d86_numopnds; i++) { + d86opnd_t *op = &dis->d86_opnd[i]; + int64_t sv; + uint64_t mask; + + if (i != 0) + (void) strlcat(buf, ",", buflen); + + (void) strlcat(buf, op->d86_prefix, buflen); + + sv = op->d86_value; + + switch (op->d86_mode) { + + case MODE_NONE: + + (void) strlcat(buf, op->d86_opnd, buflen); + break; + + case MODE_SIGNED: + case MODE_IMPLIED: + case MODE_OFFSET: + + if (dis->d86_seg_prefix) + (void) strlcat(buf, dis->d86_seg_prefix, + buflen); + + switch (op->d86_value_size) { + case 1: + sv = (int8_t)sv; + mask = 0xff; + break; + case 2: + sv = (int16_t)sv; + mask = 0xffff; + break; + case 4: + sv = (int32_t)sv; + mask = 0xffffffff; + break; + case 8: + mask = 0xffffffffffffffffULL; + break; + } + + if (op->d86_mode == MODE_SIGNED || + op->d86_mode == MODE_IMPLIED) + (void) strlcat(buf, "$", buflen); + + if (sv < 0 && sv > -0xffff && + !isunsigned_op(dis->d86_mneu)) { + dis->d86_sprintf_func(buf + strlen(buf), + buflen - strlen(buf), + (dis->d86_flags & DIS_OP_OCTAL) ? + "-0%llo" : "-0x%llx", -sv & mask); + } else { + dis->d86_sprintf_func(buf + strlen(buf), + buflen - strlen(buf), + (dis->d86_flags & DIS_OP_OCTAL) ? + "0%llo" : "0x%llx", sv & mask); + } + (void) strlcat(buf, op->d86_opnd, buflen); + break; + + case MODE_IPREL: + + switch (op->d86_value_size) { + case 1: + sv = (int8_t)sv; + break; + case 2: + sv = (int16_t)sv; + break; + case 4: + sv = (int32_t)sv; + break; + } + + if (sv < 0) + dis->d86_sprintf_func(buf + strlen(buf), + buflen - strlen(buf), + (dis->d86_flags & DIS_OP_OCTAL) ? + "-0%llo" : "-0x%llx", -sv - dis->d86_len); + else + dis->d86_sprintf_func(buf + strlen(buf), + buflen - strlen(buf), + (dis->d86_flags & DIS_OP_OCTAL) ? + "+0%llo" : "+0x%llx", sv + dis->d86_len); + + (void) strlcat(buf, "\t<", buflen); + + if (dis->d86_sym_lookup == NULL || + dis->d86_sym_lookup(dis->d86_data, pc + sv, + buf + strlen(buf), buflen - strlen(buf)) != 0) + dis->d86_sprintf_func(buf + strlen(buf), + buflen - strlen(buf), + (dis->d86_flags & DIS_OP_OCTAL) ? + "0%llo" : "0x%llx", pc + sv); + + (void) strlcat(buf, ">", buflen); + + break; + } + } +} + +#endif /* DIS_TEXT */ diff --git a/sys/cddl/dev/dtrace/amd64/dis_tables.h b/sys/cddl/dev/dtrace/amd64/dis_tables.h new file mode 100644 index 0000000..b45a8c5 --- /dev/null +++ b/sys/cddl/dev/dtrace/amd64/dis_tables.h @@ -0,0 +1,112 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + * + * $FreeBSD$ + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* Copyright (c) 1988 AT&T */ +/* All Rights Reserved */ + + +#ifndef _DIS_TABLES_H +#define _DIS_TABLES_H + +#if defined(sun) +#pragma ident "@(#)dis_tables.h 1.7 06/03/02 SMI" +#endif + +/* + * Constants and prototypes for the IA32 disassembler backend. See dis_tables.c + * for usage information and documentation. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include <sys/types.h> +#include <sys/param.h> + +/* + * values for cpu mode + */ +#define SIZE16 1 +#define SIZE32 2 +#define SIZE64 3 + +#define OPLEN 256 +#define PFIXLEN 8 +#define NCPS 12 /* number of chars per symbol */ + +/* + * data structures that must be provided to dtrace_dis86() + */ +typedef struct d86opnd { + char d86_opnd[OPLEN]; /* symbolic rep of operand */ + char d86_prefix[PFIXLEN]; /* any prefix string or "" */ + uint_t d86_mode; /* mode for immediate */ + uint_t d86_value_size; /* size in bytes of d86_value */ + uint64_t d86_value; /* immediate value of opnd */ +} d86opnd_t; + +typedef struct dis86 { + uint_t d86_mode; + uint_t d86_error; + uint_t d86_len; /* instruction length */ + int d86_rmindex; /* index of modrm byte or -1 */ + uint_t d86_memsize; /* size of memory referenced */ + char d86_bytes[16]; /* bytes of instruction */ + char d86_mneu[OPLEN]; + uint_t d86_numopnds; + uint_t d86_rex_prefix; /* value of REX prefix if !0 */ + char *d86_seg_prefix; /* segment prefix, if any */ + uint_t d86_opnd_size; + uint_t d86_addr_size; + uint_t d86_got_modrm; + struct d86opnd d86_opnd[3]; /* up to 3 operands */ + int (*d86_check_func)(void *); + int (*d86_get_byte)(void *); +#ifdef DIS_TEXT + int (*d86_sym_lookup)(void *, uint64_t, char *, size_t); + int (*d86_sprintf_func)(char *, size_t, const char *, ...); + int d86_flags; + uint_t d86_imm_bytes; +#endif + void *d86_data; +} dis86_t; + +extern int dtrace_disx86(dis86_t *x, uint_t cpu_mode); + +#define DIS_OP_OCTAL 0x1 /* Print all numbers in octal */ + +#ifdef DIS_TEXT +extern void dtrace_disx86_str(dis86_t *x, uint_t cpu_mode, uintptr_t pc, + char *buf, size_t len); +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* _DIS_TABLES_H */ diff --git a/sys/cddl/dev/dtrace/amd64/dtrace_asm.S b/sys/cddl/dev/dtrace/amd64/dtrace_asm.S new file mode 100644 index 0000000..cf8314c --- /dev/null +++ b/sys/cddl/dev/dtrace/amd64/dtrace_asm.S @@ -0,0 +1,573 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + * + * Portions Copyright 2008 John Birrell <jb@freebsd.org> + * + * $FreeBSD$ + * + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#define _ASM + +#include <machine/asmacros.h> +#include <sys/cpuvar_defs.h> +#include <sys/dtrace.h> + +#include "assym.s" + +#define INTR_POP \ + MEXITCOUNT; \ + movq TF_RDI(%rsp),%rdi; \ + movq TF_RSI(%rsp),%rsi; \ + movq TF_RDX(%rsp),%rdx; \ + movq TF_RCX(%rsp),%rcx; \ + movq TF_R8(%rsp),%r8; \ + movq TF_R9(%rsp),%r9; \ + movq TF_RAX(%rsp),%rax; \ + movq TF_RBX(%rsp),%rbx; \ + movq TF_RBP(%rsp),%rbp; \ + movq TF_R10(%rsp),%r10; \ + movq TF_R11(%rsp),%r11; \ + movq TF_R12(%rsp),%r12; \ + movq TF_R13(%rsp),%r13; \ + movq TF_R14(%rsp),%r14; \ + movq TF_R15(%rsp),%r15; \ + testb $SEL_RPL_MASK,TF_CS(%rsp); \ + jz 1f; \ + cli; \ + swapgs; \ +1: addq $TF_RIP,%rsp; + + + .globl calltrap + .type calltrap,@function + ENTRY(dtrace_invop_start) + + /* + * #BP traps with %rip set to the next address. We need to decrement + * the value to indicate the address of the int3 (0xcc) instruction + * that we substituted. + */ + movq TF_RIP(%rsp), %rdi + decq %rdi + movq TF_RSP(%rsp), %rsi + movq TF_RAX(%rsp), %rdx + pushq (%rsi) + movq %rsp, %rsi + call dtrace_invop + ALTENTRY(dtrace_invop_callsite) + addq $8, %rsp + cmpl $DTRACE_INVOP_PUSHL_EBP, %eax + je bp_push + cmpl $DTRACE_INVOP_LEAVE, %eax + je bp_leave + cmpl $DTRACE_INVOP_NOP, %eax + je bp_nop + cmpl $DTRACE_INVOP_RET, %eax + je bp_ret + + /* When all else fails handle the trap in the usual way. */ + jmpq *dtrace_invop_calltrap_addr + +bp_push: + /* + * We must emulate a "pushq %rbp". To do this, we pull the stack + * down 8 bytes, and then store the base pointer. + */ + INTR_POP + subq $16, %rsp /* make room for %rbp */ + pushq %rax /* push temp */ + movq 24(%rsp), %rax /* load calling RIP */ + movq %rax, 8(%rsp) /* store calling RIP */ + movq 32(%rsp), %rax /* load calling CS */ + movq %rax, 16(%rsp) /* store calling CS */ + movq 40(%rsp), %rax /* load calling RFLAGS */ + movq %rax, 24(%rsp) /* store calling RFLAGS */ + movq 48(%rsp), %rax /* load calling RSP */ + subq $8, %rax /* make room for %rbp */ + movq %rax, 32(%rsp) /* store calling RSP */ + movq 56(%rsp), %rax /* load calling SS */ + movq %rax, 40(%rsp) /* store calling SS */ + movq 32(%rsp), %rax /* reload calling RSP */ + movq %rbp, (%rax) /* store %rbp there */ + popq %rax /* pop off temp */ + iretq /* return from interrupt */ + /*NOTREACHED*/ + +bp_leave: + /* + * We must emulate a "leave", which is the same as a "movq %rbp, %rsp" + * followed by a "popq %rbp". This is quite a bit simpler on amd64 + * than it is on i386 -- we can exploit the fact that the %rsp is + * explicitly saved to effect the pop without having to reshuffle + * the other data pushed for the trap. + */ + INTR_POP + pushq %rax /* push temp */ + movq 8(%rsp), %rax /* load calling RIP */ + movq %rax, 8(%rsp) /* store calling RIP */ + movq (%rbp), %rax /* get new %rbp */ + addq $8, %rbp /* adjust new %rsp */ + movq %rbp, 32(%rsp) /* store new %rsp */ + movq %rax, %rbp /* set new %rbp */ + popq %rax /* pop off temp */ + iretq /* return from interrupt */ + /*NOTREACHED*/ + +bp_nop: + /* We must emulate a "nop". */ + INTR_POP + iretq + /*NOTREACHED*/ + +bp_ret: + INTR_POP + pushq %rax /* push temp */ + movq 32(%rsp), %rax /* load %rsp */ + movq (%rax), %rax /* load calling RIP */ + movq %rax, 8(%rsp) /* store calling RIP */ + addq $8, 32(%rsp) /* adjust new %rsp */ + popq %rax /* pop off temp */ + iretq /* return from interrupt */ + /*NOTREACHED*/ + + END(dtrace_invop_start) + +/* +void dtrace_invop_init(void) +*/ + ENTRY(dtrace_invop_init) + movq $dtrace_invop_start, dtrace_invop_jump_addr(%rip) + ret + END(dtrace_invop_init) + +/* +void dtrace_invop_uninit(void) +*/ + ENTRY(dtrace_invop_uninit) + movq $0, dtrace_invop_jump_addr(%rip) + ret + END(dtrace_invop_uninit) + +/* +greg_t dtrace_getfp(void) +*/ + ENTRY(dtrace_getfp) + movq %rbp, %rax + ret + END(dtrace_getfp) + +/* +uint32_t +dtrace_cas32(uint32_t *target, uint32_t cmp, uint32_t new) +*/ + ENTRY(dtrace_cas32) + movl %esi, %eax + lock + cmpxchgl %edx, (%rdi) + ret + END(dtrace_cas32) + +/* +void * +dtrace_casptr(void *target, void *cmp, void *new) +*/ + ENTRY(dtrace_casptr) + movq %rsi, %rax + lock + cmpxchgq %rdx, (%rdi) + ret + END(dtrace_casptr) + +/* +uintptr_t +dtrace_caller(int aframes) +*/ + ENTRY(dtrace_caller) + movq $-1, %rax + ret + END(dtrace_caller) + +/* +void +dtrace_copy(uintptr_t src, uintptr_t dest, size_t size) +*/ + ENTRY(dtrace_copy) + pushq %rbp + movq %rsp, %rbp + + xchgq %rdi, %rsi /* make %rsi source, %rdi dest */ + movq %rdx, %rcx /* load count */ + repz /* repeat for count ... */ + smovb /* move from %ds:rsi to %ed:rdi */ + leave + ret + END(dtrace_copy) + +/* +void +dtrace_copystr(uintptr_t uaddr, uintptr_t kaddr, size_t size, + volatile uint16_t *flags) +*/ + ENTRY(dtrace_copystr) + pushq %rbp + movq %rsp, %rbp + +0: + movb (%rdi), %al /* load from source */ + movb %al, (%rsi) /* store to destination */ + addq $1, %rdi /* increment source pointer */ + addq $1, %rsi /* increment destination pointer */ + subq $1, %rdx /* decrement remaining count */ + cmpb $0, %al + je 2f + testq $0xfff, %rdx /* test if count is 4k-aligned */ + jnz 1f /* if not, continue with copying */ + testq $CPU_DTRACE_BADADDR, (%rcx) /* load and test dtrace flags */ + jnz 2f +1: + cmpq $0, %rdx + jne 0b +2: + leave + ret + + END(dtrace_copystr) + +/* +uintptr_t +dtrace_fulword(void *addr) +*/ + ENTRY(dtrace_fulword) + movq (%rdi), %rax + ret + END(dtrace_fulword) + +/* +uint8_t +dtrace_fuword8_nocheck(void *addr) +*/ + ENTRY(dtrace_fuword8_nocheck) + xorq %rax, %rax + movb (%rdi), %al + ret + END(dtrace_fuword8_nocheck) + +/* +uint16_t +dtrace_fuword16_nocheck(void *addr) +*/ + ENTRY(dtrace_fuword16_nocheck) + xorq %rax, %rax + movw (%rdi), %ax + ret + END(dtrace_fuword16_nocheck) + +/* +uint32_t +dtrace_fuword32_nocheck(void *addr) +*/ + ENTRY(dtrace_fuword32_nocheck) + xorq %rax, %rax + movl (%rdi), %eax + ret + END(dtrace_fuword32_nocheck) + +/* +uint64_t +dtrace_fuword64_nocheck(void *addr) +*/ + ENTRY(dtrace_fuword64_nocheck) + movq (%rdi), %rax + ret + END(dtrace_fuword64_nocheck) + +/* +void +dtrace_probe_error(dtrace_state_t *state, dtrace_epid_t epid, int which, + int fault, int fltoffs, uintptr_t illval) +*/ + ENTRY(dtrace_probe_error) + pushq %rbp + movq %rsp, %rbp + subq $0x8, %rsp + movq %r9, (%rsp) + movq %r8, %r9 + movq %rcx, %r8 + movq %rdx, %rcx + movq %rsi, %rdx + movq %rdi, %rsi + movl dtrace_probeid_error(%rip), %edi + call dtrace_probe + addq $0x8, %rsp + leave + ret + END(dtrace_probe_error) + +/* +void +dtrace_membar_producer(void) +*/ + ENTRY(dtrace_membar_producer) + rep; ret /* use 2 byte return instruction when branch target */ + /* AMD Software Optimization Guide - Section 6.2 */ + END(dtrace_membar_producer) + +/* +void +dtrace_membar_consumer(void) +*/ + ENTRY(dtrace_membar_consumer) + rep; ret /* use 2 byte return instruction when branch target */ + /* AMD Software Optimization Guide - Section 6.2 */ + END(dtrace_membar_consumer) + +/* +dtrace_icookie_t +dtrace_interrupt_disable(void) +*/ + ENTRY(dtrace_interrupt_disable) + pushfq + popq %rax + cli + ret + END(dtrace_interrupt_disable) + +/* +void +dtrace_interrupt_enable(dtrace_icookie_t cookie) +*/ + ENTRY(dtrace_interrupt_enable) + pushq %rdi + popfq + ret + END(dtrace_interrupt_enable) + +/* + * The panic() and cmn_err() functions invoke vpanic() as a common entry point + * into the panic code implemented in panicsys(). vpanic() is responsible + * for passing through the format string and arguments, and constructing a + * regs structure on the stack into which it saves the current register + * values. If we are not dying due to a fatal trap, these registers will + * then be preserved in panicbuf as the current processor state. Before + * invoking panicsys(), vpanic() activates the first panic trigger (see + * common/os/panic.c) and switches to the panic_stack if successful. Note that + * DTrace takes a slightly different panic path if it must panic from probe + * context. Instead of calling panic, it calls into dtrace_vpanic(), which + * sets up the initial stack as vpanic does, calls dtrace_panic_trigger(), and + * branches back into vpanic(). + */ + +/* +void +vpanic(const char *format, va_list alist) +*/ + ENTRY(vpanic) /* Initial stack layout: */ + + pushq %rbp /* | %rip | 0x60 */ + movq %rsp, %rbp /* | %rbp | 0x58 */ + pushfq /* | rfl | 0x50 */ + pushq %r11 /* | %r11 | 0x48 */ + pushq %r10 /* | %r10 | 0x40 */ + pushq %rbx /* | %rbx | 0x38 */ + pushq %rax /* | %rax | 0x30 */ + pushq %r9 /* | %r9 | 0x28 */ + pushq %r8 /* | %r8 | 0x20 */ + pushq %rcx /* | %rcx | 0x18 */ + pushq %rdx /* | %rdx | 0x10 */ + pushq %rsi /* | %rsi | 0x8 alist */ + pushq %rdi /* | %rdi | 0x0 format */ + + movq %rsp, %rbx /* %rbx = current %rsp */ + + leaq panic_quiesce(%rip), %rdi /* %rdi = &panic_quiesce */ + call panic_trigger /* %eax = panic_trigger() */ + +vpanic_common: + /* + * The panic_trigger result is in %eax from the call above, and + * dtrace_panic places it in %eax before branching here. + * The rdmsr instructions that follow below will clobber %eax so + * we stash the panic_trigger result in %r11d. + */ + movl %eax, %r11d + cmpl $0, %r11d + je 0f + + /* + * If panic_trigger() was successful, we are the first to initiate a + * panic: we now switch to the reserved panic_stack before continuing. + */ + leaq panic_stack(%rip), %rsp + addq $PANICSTKSIZE, %rsp +0: subq $REGSIZE, %rsp + /* + * Now that we've got everything set up, store the register values as + * they were when we entered vpanic() to the designated location in + * the regs structure we allocated on the stack. + */ +#ifdef notyet + movq 0x0(%rbx), %rcx + movq %rcx, REGOFF_RDI(%rsp) + movq 0x8(%rbx), %rcx + movq %rcx, REGOFF_RSI(%rsp) + movq 0x10(%rbx), %rcx + movq %rcx, REGOFF_RDX(%rsp) + movq 0x18(%rbx), %rcx + movq %rcx, REGOFF_RCX(%rsp) + movq 0x20(%rbx), %rcx + + movq %rcx, REGOFF_R8(%rsp) + movq 0x28(%rbx), %rcx + movq %rcx, REGOFF_R9(%rsp) + movq 0x30(%rbx), %rcx + movq %rcx, REGOFF_RAX(%rsp) + movq 0x38(%rbx), %rcx + movq %rcx, REGOFF_RBX(%rsp) + movq 0x58(%rbx), %rcx + + movq %rcx, REGOFF_RBP(%rsp) + movq 0x40(%rbx), %rcx + movq %rcx, REGOFF_R10(%rsp) + movq 0x48(%rbx), %rcx + movq %rcx, REGOFF_R11(%rsp) + movq %r12, REGOFF_R12(%rsp) + + movq %r13, REGOFF_R13(%rsp) + movq %r14, REGOFF_R14(%rsp) + movq %r15, REGOFF_R15(%rsp) + + xorl %ecx, %ecx + movw %ds, %cx + movq %rcx, REGOFF_DS(%rsp) + movw %es, %cx + movq %rcx, REGOFF_ES(%rsp) + movw %fs, %cx + movq %rcx, REGOFF_FS(%rsp) + movw %gs, %cx + movq %rcx, REGOFF_GS(%rsp) + + movq $0, REGOFF_TRAPNO(%rsp) + + movq $0, REGOFF_ERR(%rsp) + leaq vpanic(%rip), %rcx + movq %rcx, REGOFF_RIP(%rsp) + movw %cs, %cx + movzwq %cx, %rcx + movq %rcx, REGOFF_CS(%rsp) + movq 0x50(%rbx), %rcx + movq %rcx, REGOFF_RFL(%rsp) + movq %rbx, %rcx + addq $0x60, %rcx + movq %rcx, REGOFF_RSP(%rsp) + movw %ss, %cx + movzwq %cx, %rcx + movq %rcx, REGOFF_SS(%rsp) + + /* + * panicsys(format, alist, rp, on_panic_stack) + */ + movq REGOFF_RDI(%rsp), %rdi /* format */ + movq REGOFF_RSI(%rsp), %rsi /* alist */ + movq %rsp, %rdx /* struct regs */ + movl %r11d, %ecx /* on_panic_stack */ + call panicsys + addq $REGSIZE, %rsp +#endif + popq %rdi + popq %rsi + popq %rdx + popq %rcx + popq %r8 + popq %r9 + popq %rax + popq %rbx + popq %r10 + popq %r11 + popfq + leave + ret + END(vpanic) + +/* +void +dtrace_vpanic(const char *format, va_list alist) +*/ + ENTRY(dtrace_vpanic) /* Initial stack layout: */ + + pushq %rbp /* | %rip | 0x60 */ + movq %rsp, %rbp /* | %rbp | 0x58 */ + pushfq /* | rfl | 0x50 */ + pushq %r11 /* | %r11 | 0x48 */ + pushq %r10 /* | %r10 | 0x40 */ + pushq %rbx /* | %rbx | 0x38 */ + pushq %rax /* | %rax | 0x30 */ + pushq %r9 /* | %r9 | 0x28 */ + pushq %r8 /* | %r8 | 0x20 */ + pushq %rcx /* | %rcx | 0x18 */ + pushq %rdx /* | %rdx | 0x10 */ + pushq %rsi /* | %rsi | 0x8 alist */ + pushq %rdi /* | %rdi | 0x0 format */ + + movq %rsp, %rbx /* %rbx = current %rsp */ + + leaq panic_quiesce(%rip), %rdi /* %rdi = &panic_quiesce */ + call dtrace_panic_trigger /* %eax = dtrace_panic_trigger() */ + jmp vpanic_common + + END(dtrace_vpanic) + +/* +int +panic_trigger(int *tp) +*/ + ENTRY(panic_trigger) + xorl %eax, %eax + movl $0xdefacedd, %edx + lock + xchgl %edx, (%rdi) + cmpl $0, %edx + je 0f + movl $0, %eax + ret +0: movl $1, %eax + ret + END(panic_trigger) + +/* +int +dtrace_panic_trigger(int *tp) +*/ + ENTRY(dtrace_panic_trigger) + xorl %eax, %eax + movl $0xdefacedd, %edx + lock + xchgl %edx, (%rdi) + cmpl $0, %edx + je 0f + movl $0, %eax + ret +0: movl $1, %eax + ret + END(dtrace_panic_trigger) diff --git a/sys/cddl/dev/dtrace/amd64/dtrace_isa.c b/sys/cddl/dev/dtrace/amd64/dtrace_isa.c new file mode 100644 index 0000000..8cd2a0f --- /dev/null +++ b/sys/cddl/dev/dtrace/amd64/dtrace_isa.c @@ -0,0 +1,612 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + * + * $FreeBSD$ + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ +#include <sys/cdefs.h> + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/stack.h> +#include <sys/pcpu.h> + +#include <machine/frame.h> +#include <machine/md_var.h> +#include <machine/reg.h> +#include <machine/stack.h> + +#include <vm/vm.h> +#include <vm/vm_param.h> +#include <vm/pmap.h> + +extern uintptr_t kernbase; +uintptr_t kernelbase = (uintptr_t) &kernbase; + +uint8_t dtrace_fuword8_nocheck(void *); +uint16_t dtrace_fuword16_nocheck(void *); +uint32_t dtrace_fuword32_nocheck(void *); +uint64_t dtrace_fuword64_nocheck(void *); + +void +dtrace_getpcstack(pc_t *pcstack, int pcstack_limit, int aframes, + uint32_t *intrpc) +{ + int depth = 0; + register_t rbp; + struct amd64_frame *frame; + vm_offset_t callpc; + pc_t caller = (pc_t) solaris_cpu[curcpu].cpu_dtrace_caller; + + if (intrpc != 0) + pcstack[depth++] = (pc_t) intrpc; + + aframes++; + + __asm __volatile("movq %%rbp,%0" : "=r" (rbp)); + + frame = (struct amd64_frame *)rbp; + while (depth < pcstack_limit) { + if (!INKERNEL((long) frame)) + break; + + callpc = frame->f_retaddr; + + if (!INKERNEL(callpc)) + break; + + if (aframes > 0) { + aframes--; + if ((aframes == 0) && (caller != 0)) { + pcstack[depth++] = caller; + } + } + else { + pcstack[depth++] = callpc; + } + + if (frame->f_frame <= frame || + (vm_offset_t)frame->f_frame >= + (vm_offset_t)rbp + KSTACK_PAGES * PAGE_SIZE) + break; + frame = frame->f_frame; + } + + for (; depth < pcstack_limit; depth++) { + pcstack[depth] = 0; + } +} + +static int +dtrace_getustack_common(uint64_t *pcstack, int pcstack_limit, uintptr_t pc, + uintptr_t sp) +{ + volatile uint16_t *flags = + (volatile uint16_t *)&cpu_core[curcpu].cpuc_dtrace_flags; + struct amd64_frame *frame; + int ret = 0; + + ASSERT(pcstack == NULL || pcstack_limit > 0); + + while (pc != 0 && sp != 0) { + ret++; + if (pcstack != NULL) { + *pcstack++ = (uint64_t)pc; + pcstack_limit--; + if (pcstack_limit <= 0) + break; + } + + frame = (struct amd64_frame *) sp; + + pc = dtrace_fulword(&frame->f_retaddr); + sp = dtrace_fulword(&frame->f_frame); + + /* + * This is totally bogus: if we faulted, we're going to clear + * the fault and break. This is to deal with the apparently + * broken Java stacks on x86. + */ + if (*flags & CPU_DTRACE_FAULT) { + *flags &= ~CPU_DTRACE_FAULT; + break; + } + } + + return (ret); +} + +void +dtrace_getupcstack(uint64_t *pcstack, int pcstack_limit) +{ + proc_t *p = curproc; + struct trapframe *tf; + uintptr_t pc, sp; + volatile uint16_t *flags = + (volatile uint16_t *)&cpu_core[curcpu].cpuc_dtrace_flags; + int n; + + if (*flags & CPU_DTRACE_FAULT) + return; + + if (pcstack_limit <= 0) + return; + + /* + * If there's no user context we still need to zero the stack. + */ + if (p == NULL || (tf = curthread->td_frame) == NULL) + goto zero; + + *pcstack++ = (uint64_t)p->p_pid; + pcstack_limit--; + + if (pcstack_limit <= 0) + return; + + pc = tf->tf_rip; + sp = tf->tf_rsp; + + if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_ENTRY)) { + *pcstack++ = (uint64_t)pc; + pcstack_limit--; + if (pcstack_limit <= 0) + return; + + pc = dtrace_fulword((void *) sp); + } + + n = dtrace_getustack_common(pcstack, pcstack_limit, pc, sp); + ASSERT(n >= 0); + ASSERT(n <= pcstack_limit); + + pcstack += n; + pcstack_limit -= n; + +zero: + while (pcstack_limit-- > 0) + *pcstack++ = 0; +} + +int +dtrace_getustackdepth(void) +{ + proc_t *p = curproc; + struct trapframe *tf; + uintptr_t pc, sp; + int n = 0; + + if (p == NULL || (tf = curthread->td_frame) == NULL) + return (0); + + if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_FAULT)) + return (-1); + + pc = tf->tf_rip; + sp = tf->tf_rsp; + + if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_ENTRY)) { + n++; + + pc = dtrace_fulword((void *) sp); + } + + n += dtrace_getustack_common(NULL, 0, pc, sp); + + return (n); +} + +#ifdef notyet +void +dtrace_getufpstack(uint64_t *pcstack, uint64_t *fpstack, int pcstack_limit) +{ + klwp_t *lwp = ttolwp(curthread); + proc_t *p = curproc; + struct regs *rp; + uintptr_t pc, sp, oldcontext; + volatile uint16_t *flags = + (volatile uint16_t *)&cpu_core[curcpu].cpuc_dtrace_flags; + size_t s1, s2; + + if (*flags & CPU_DTRACE_FAULT) + return; + + if (pcstack_limit <= 0) + return; + + /* + * If there's no user context we still need to zero the stack. + */ + if (lwp == NULL || p == NULL || (rp = lwp->lwp_regs) == NULL) + goto zero; + + *pcstack++ = (uint64_t)p->p_pid; + pcstack_limit--; + + if (pcstack_limit <= 0) + return; + + pc = rp->r_pc; + sp = rp->r_fp; + oldcontext = lwp->lwp_oldcontext; + + s1 = sizeof (struct xframe) + 2 * sizeof (long); + s2 = s1 + sizeof (siginfo_t); + + if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_ENTRY)) { + *pcstack++ = (uint64_t)pc; + *fpstack++ = 0; + pcstack_limit--; + if (pcstack_limit <= 0) + return; + + if (p->p_model == DATAMODEL_NATIVE) + pc = dtrace_fulword((void *)rp->r_sp); + else + pc = dtrace_fuword32((void *)rp->r_sp); + } + + while (pc != 0 && sp != 0) { + *pcstack++ = (uint64_t)pc; + *fpstack++ = sp; + pcstack_limit--; + if (pcstack_limit <= 0) + break; + + if (oldcontext == sp + s1 || oldcontext == sp + s2) { + ucontext_t *ucp = (ucontext_t *)oldcontext; + greg_t *gregs = ucp->uc_mcontext.gregs; + + sp = dtrace_fulword(&gregs[REG_FP]); + pc = dtrace_fulword(&gregs[REG_PC]); + + oldcontext = dtrace_fulword(&ucp->uc_link); + } else { + struct xframe *fr = (struct xframe *)sp; + + pc = dtrace_fulword(&fr->fr_savpc); + sp = dtrace_fulword(&fr->fr_savfp); + } + + /* + * This is totally bogus: if we faulted, we're going to clear + * the fault and break. This is to deal with the apparently + * broken Java stacks on x86. + */ + if (*flags & CPU_DTRACE_FAULT) { + *flags &= ~CPU_DTRACE_FAULT; + break; + } + } + +zero: + while (pcstack_limit-- > 0) + *pcstack++ = NULL; +} +#endif + +/*ARGSUSED*/ +uint64_t +dtrace_getarg(int arg, int aframes) +{ + uintptr_t val; + struct amd64_frame *fp = (struct amd64_frame *)dtrace_getfp(); + uintptr_t *stack; + int i; + + /* + * A total of 6 arguments are passed via registers; any argument with + * index of 5 or lower is therefore in a register. + */ + int inreg = 5; + + for (i = 1; i <= aframes; i++) { + fp = fp->f_frame; + + if (fp->f_retaddr == (long)dtrace_invop_callsite) { + /* + * In the case of amd64, we will use the pointer to the + * regs structure that was pushed when we took the + * trap. To get this structure, we must increment + * beyond the frame structure, and then again beyond + * the calling RIP stored in dtrace_invop(). If the + * argument that we're seeking is passed on the stack, + * we'll pull the true stack pointer out of the saved + * registers and decrement our argument by the number + * of arguments passed in registers; if the argument + * we're seeking is passed in regsiters, we can just + * load it directly. + */ + struct reg *rp = (struct reg *)((uintptr_t)&fp[1] + + sizeof (uintptr_t)); + + if (arg <= inreg) { + stack = (uintptr_t *)&rp->r_rdi; + } else { + stack = (uintptr_t *)(rp->r_rsp); + arg -= inreg; + } + goto load; + } + + } + + /* + * We know that we did not come through a trap to get into + * dtrace_probe() -- the provider simply called dtrace_probe() + * directly. As this is the case, we need to shift the argument + * that we're looking for: the probe ID is the first argument to + * dtrace_probe(), so the argument n will actually be found where + * one would expect to find argument (n + 1). + */ + arg++; + + if (arg <= inreg) { + /* + * This shouldn't happen. If the argument is passed in a + * register then it should have been, well, passed in a + * register... + */ + DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); + return (0); + } + + arg -= (inreg + 1); + stack = (uintptr_t *)&fp[1]; + +load: + DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); + val = stack[arg]; + DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); + + return (val); + return (0); +} + +int +dtrace_getstackdepth(int aframes) +{ + int depth = 0; + struct amd64_frame *frame; + vm_offset_t rbp; + + aframes++; + rbp = dtrace_getfp(); + frame = (struct amd64_frame *)rbp; + depth++; + for(;;) { + if (!INKERNEL((long) frame)) + break; + if (!INKERNEL((long) frame->f_frame)) + break; + depth++; + if (frame->f_frame <= frame || + (vm_offset_t)frame->f_frame >= + (vm_offset_t)rbp + KSTACK_PAGES * PAGE_SIZE) + break; + frame = frame->f_frame; + } + if (depth < aframes) + return 0; + else + return depth - aframes; +} + +#ifdef notyet +ulong_t +dtrace_getreg(struct regs *rp, uint_t reg) +{ +#if defined(__amd64) + int regmap[] = { + REG_GS, /* GS */ + REG_FS, /* FS */ + REG_ES, /* ES */ + REG_DS, /* DS */ + REG_RDI, /* EDI */ + REG_RSI, /* ESI */ + REG_RBP, /* EBP */ + REG_RSP, /* ESP */ + REG_RBX, /* EBX */ + REG_RDX, /* EDX */ + REG_RCX, /* ECX */ + REG_RAX, /* EAX */ + REG_TRAPNO, /* TRAPNO */ + REG_ERR, /* ERR */ + REG_RIP, /* EIP */ + REG_CS, /* CS */ + REG_RFL, /* EFL */ + REG_RSP, /* UESP */ + REG_SS /* SS */ + }; + + if (reg <= SS) { + if (reg >= sizeof (regmap) / sizeof (int)) { + DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); + return (0); + } + + reg = regmap[reg]; + } else { + reg -= SS + 1; + } + + switch (reg) { + case REG_RDI: + return (rp->r_rdi); + case REG_RSI: + return (rp->r_rsi); + case REG_RDX: + return (rp->r_rdx); + case REG_RCX: + return (rp->r_rcx); + case REG_R8: + return (rp->r_r8); + case REG_R9: + return (rp->r_r9); + case REG_RAX: + return (rp->r_rax); + case REG_RBX: + return (rp->r_rbx); + case REG_RBP: + return (rp->r_rbp); + case REG_R10: + return (rp->r_r10); + case REG_R11: + return (rp->r_r11); + case REG_R12: + return (rp->r_r12); + case REG_R13: + return (rp->r_r13); + case REG_R14: + return (rp->r_r14); + case REG_R15: + return (rp->r_r15); + case REG_DS: + return (rp->r_ds); + case REG_ES: + return (rp->r_es); + case REG_FS: + return (rp->r_fs); + case REG_GS: + return (rp->r_gs); + case REG_TRAPNO: + return (rp->r_trapno); + case REG_ERR: + return (rp->r_err); + case REG_RIP: + return (rp->r_rip); + case REG_CS: + return (rp->r_cs); + case REG_SS: + return (rp->r_ss); + case REG_RFL: + return (rp->r_rfl); + case REG_RSP: + return (rp->r_rsp); + default: + DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); + return (0); + } + +#else + if (reg > SS) { + DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); + return (0); + } + + return ((&rp->r_gs)[reg]); +#endif +} +#endif + +static int +dtrace_copycheck(uintptr_t uaddr, uintptr_t kaddr, size_t size) +{ + ASSERT(kaddr >= kernelbase && kaddr + size >= kaddr); + + if (uaddr + size >= kernelbase || uaddr + size < uaddr) { + DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR); + cpu_core[curcpu].cpuc_dtrace_illval = uaddr; + return (0); + } + + return (1); +} + +void +dtrace_copyin(uintptr_t uaddr, uintptr_t kaddr, size_t size, + volatile uint16_t *flags) +{ + if (dtrace_copycheck(uaddr, kaddr, size)) + dtrace_copy(uaddr, kaddr, size); +} + +void +dtrace_copyout(uintptr_t kaddr, uintptr_t uaddr, size_t size, + volatile uint16_t *flags) +{ + if (dtrace_copycheck(uaddr, kaddr, size)) + dtrace_copy(kaddr, uaddr, size); +} + +void +dtrace_copyinstr(uintptr_t uaddr, uintptr_t kaddr, size_t size, + volatile uint16_t *flags) +{ + if (dtrace_copycheck(uaddr, kaddr, size)) + dtrace_copystr(uaddr, kaddr, size, flags); +} + +void +dtrace_copyoutstr(uintptr_t kaddr, uintptr_t uaddr, size_t size, + volatile uint16_t *flags) +{ + if (dtrace_copycheck(uaddr, kaddr, size)) + dtrace_copystr(kaddr, uaddr, size, flags); +} + +uint8_t +dtrace_fuword8(void *uaddr) +{ + if ((uintptr_t)uaddr >= kernelbase) { + DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR); + cpu_core[curcpu].cpuc_dtrace_illval = (uintptr_t)uaddr; + return (0); + } + return (dtrace_fuword8_nocheck(uaddr)); +} + +uint16_t +dtrace_fuword16(void *uaddr) +{ + if ((uintptr_t)uaddr >= kernelbase) { + DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR); + cpu_core[curcpu].cpuc_dtrace_illval = (uintptr_t)uaddr; + return (0); + } + return (dtrace_fuword16_nocheck(uaddr)); +} + +uint32_t +dtrace_fuword32(void *uaddr) +{ + if ((uintptr_t)uaddr >= kernelbase) { + DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR); + cpu_core[curcpu].cpuc_dtrace_illval = (uintptr_t)uaddr; + return (0); + } + return (dtrace_fuword32_nocheck(uaddr)); +} + +uint64_t +dtrace_fuword64(void *uaddr) +{ + if ((uintptr_t)uaddr >= kernelbase) { + DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR); + cpu_core[curcpu].cpuc_dtrace_illval = (uintptr_t)uaddr; + return (0); + } + return (dtrace_fuword64_nocheck(uaddr)); +} diff --git a/sys/cddl/dev/dtrace/amd64/dtrace_subr.c b/sys/cddl/dev/dtrace/amd64/dtrace_subr.c new file mode 100644 index 0000000..4bf0f03 --- /dev/null +++ b/sys/cddl/dev/dtrace/amd64/dtrace_subr.c @@ -0,0 +1,507 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + * + * $FreeBSD$ + * + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/types.h> +#include <sys/kernel.h> +#include <sys/malloc.h> +#include <sys/kmem.h> +#include <sys/smp.h> +#include <sys/dtrace_impl.h> +#include <sys/dtrace_bsd.h> +#include <machine/clock.h> +#include <machine/frame.h> +#include <vm/pmap.h> + +extern uintptr_t kernelbase; +extern uintptr_t dtrace_in_probe_addr; +extern int dtrace_in_probe; + +int dtrace_invop(uintptr_t, uintptr_t *, uintptr_t); + +typedef struct dtrace_invop_hdlr { + int (*dtih_func)(uintptr_t, uintptr_t *, uintptr_t); + struct dtrace_invop_hdlr *dtih_next; +} dtrace_invop_hdlr_t; + +dtrace_invop_hdlr_t *dtrace_invop_hdlr; + +int +dtrace_invop(uintptr_t addr, uintptr_t *stack, uintptr_t eax) +{ + dtrace_invop_hdlr_t *hdlr; + int rval; + + for (hdlr = dtrace_invop_hdlr; hdlr != NULL; hdlr = hdlr->dtih_next) + if ((rval = hdlr->dtih_func(addr, stack, eax)) != 0) + return (rval); + + return (0); +} + +void +dtrace_invop_add(int (*func)(uintptr_t, uintptr_t *, uintptr_t)) +{ + dtrace_invop_hdlr_t *hdlr; + + hdlr = kmem_alloc(sizeof (dtrace_invop_hdlr_t), KM_SLEEP); + hdlr->dtih_func = func; + hdlr->dtih_next = dtrace_invop_hdlr; + dtrace_invop_hdlr = hdlr; +} + +void +dtrace_invop_remove(int (*func)(uintptr_t, uintptr_t *, uintptr_t)) +{ + dtrace_invop_hdlr_t *hdlr = dtrace_invop_hdlr, *prev = NULL; + + for (;;) { + if (hdlr == NULL) + panic("attempt to remove non-existent invop handler"); + + if (hdlr->dtih_func == func) + break; + + prev = hdlr; + hdlr = hdlr->dtih_next; + } + + if (prev == NULL) { + ASSERT(dtrace_invop_hdlr == hdlr); + dtrace_invop_hdlr = hdlr->dtih_next; + } else { + ASSERT(dtrace_invop_hdlr != hdlr); + prev->dtih_next = hdlr->dtih_next; + } + + kmem_free(hdlr, 0); +} + +/*ARGSUSED*/ +void +dtrace_toxic_ranges(void (*func)(uintptr_t base, uintptr_t limit)) +{ + (*func)(0, (uintptr_t) addr_PTmap); +} + +void +dtrace_xcall(processorid_t cpu, dtrace_xcall_t func, void *arg) +{ + cpumask_t cpus; + + critical_enter(); + + if (cpu == DTRACE_CPUALL) + cpus = all_cpus; + else + cpus = (cpumask_t) (1 << cpu); + + /* If the current CPU is in the set, call the function directly: */ + if ((cpus & (1 << curcpu)) != 0) { + (*func)(arg); + + /* Mask the current CPU from the set */ + cpus &= ~(1 << curcpu); + } + + /* If there are any CPUs in the set, cross-call to those CPUs */ + if (cpus != 0) + smp_rendezvous_cpus(cpus, NULL, func, smp_no_rendevous_barrier, arg); + + critical_exit(); +} + +static void +dtrace_sync_func(void) +{ +} + +void +dtrace_sync(void) +{ + dtrace_xcall(DTRACE_CPUALL, (dtrace_xcall_t)dtrace_sync_func, NULL); +} + +#ifdef notyet +int (*dtrace_fasttrap_probe_ptr)(struct regs *); +int (*dtrace_pid_probe_ptr)(struct regs *); +int (*dtrace_return_probe_ptr)(struct regs *); + +void +dtrace_user_probe(struct regs *rp, caddr_t addr, processorid_t cpuid) +{ + krwlock_t *rwp; + proc_t *p = curproc; + extern void trap(struct regs *, caddr_t, processorid_t); + + if (USERMODE(rp->r_cs) || (rp->r_ps & PS_VM)) { + if (curthread->t_cred != p->p_cred) { + cred_t *oldcred = curthread->t_cred; + /* + * DTrace accesses t_cred in probe context. t_cred + * must always be either NULL, or point to a valid, + * allocated cred structure. + */ + curthread->t_cred = crgetcred(); + crfree(oldcred); + } + } + + if (rp->r_trapno == T_DTRACE_RET) { + uint8_t step = curthread->t_dtrace_step; + uint8_t ret = curthread->t_dtrace_ret; + uintptr_t npc = curthread->t_dtrace_npc; + + if (curthread->t_dtrace_ast) { + aston(curthread); + curthread->t_sig_check = 1; + } + + /* + * Clear all user tracing flags. + */ + curthread->t_dtrace_ft = 0; + + /* + * If we weren't expecting to take a return probe trap, kill + * the process as though it had just executed an unassigned + * trap instruction. + */ + if (step == 0) { + tsignal(curthread, SIGILL); + return; + } + + /* + * If we hit this trap unrelated to a return probe, we're + * just here to reset the AST flag since we deferred a signal + * until after we logically single-stepped the instruction we + * copied out. + */ + if (ret == 0) { + rp->r_pc = npc; + return; + } + + /* + * We need to wait until after we've called the + * dtrace_return_probe_ptr function pointer to set %pc. + */ + rwp = &CPU->cpu_ft_lock; + rw_enter(rwp, RW_READER); + if (dtrace_return_probe_ptr != NULL) + (void) (*dtrace_return_probe_ptr)(rp); + rw_exit(rwp); + rp->r_pc = npc; + + } else if (rp->r_trapno == T_DTRACE_PROBE) { + rwp = &CPU->cpu_ft_lock; + rw_enter(rwp, RW_READER); + if (dtrace_fasttrap_probe_ptr != NULL) + (void) (*dtrace_fasttrap_probe_ptr)(rp); + rw_exit(rwp); + + } else if (rp->r_trapno == T_BPTFLT) { + uint8_t instr; + rwp = &CPU->cpu_ft_lock; + + /* + * The DTrace fasttrap provider uses the breakpoint trap + * (int 3). We let DTrace take the first crack at handling + * this trap; if it's not a probe that DTrace knowns about, + * we call into the trap() routine to handle it like a + * breakpoint placed by a conventional debugger. + */ + rw_enter(rwp, RW_READER); + if (dtrace_pid_probe_ptr != NULL && + (*dtrace_pid_probe_ptr)(rp) == 0) { + rw_exit(rwp); + return; + } + rw_exit(rwp); + + /* + * If the instruction that caused the breakpoint trap doesn't + * look like an int 3 anymore, it may be that this tracepoint + * was removed just after the user thread executed it. In + * that case, return to user land to retry the instuction. + */ + if (fuword8((void *)(rp->r_pc - 1), &instr) == 0 && + instr != FASTTRAP_INSTR) { + rp->r_pc--; + return; + } + + trap(rp, addr, cpuid); + + } else { + trap(rp, addr, cpuid); + } +} + +void +dtrace_safe_synchronous_signal(void) +{ + kthread_t *t = curthread; + struct regs *rp = lwptoregs(ttolwp(t)); + size_t isz = t->t_dtrace_npc - t->t_dtrace_pc; + + ASSERT(t->t_dtrace_on); + + /* + * If we're not in the range of scratch addresses, we're not actually + * tracing user instructions so turn off the flags. If the instruction + * we copied out caused a synchonous trap, reset the pc back to its + * original value and turn off the flags. + */ + if (rp->r_pc < t->t_dtrace_scrpc || + rp->r_pc > t->t_dtrace_astpc + isz) { + t->t_dtrace_ft = 0; + } else if (rp->r_pc == t->t_dtrace_scrpc || + rp->r_pc == t->t_dtrace_astpc) { + rp->r_pc = t->t_dtrace_pc; + t->t_dtrace_ft = 0; + } +} + +int +dtrace_safe_defer_signal(void) +{ + kthread_t *t = curthread; + struct regs *rp = lwptoregs(ttolwp(t)); + size_t isz = t->t_dtrace_npc - t->t_dtrace_pc; + + ASSERT(t->t_dtrace_on); + + /* + * If we're not in the range of scratch addresses, we're not actually + * tracing user instructions so turn off the flags. + */ + if (rp->r_pc < t->t_dtrace_scrpc || + rp->r_pc > t->t_dtrace_astpc + isz) { + t->t_dtrace_ft = 0; + return (0); + } + + /* + * If we've executed the original instruction, but haven't performed + * the jmp back to t->t_dtrace_npc or the clean up of any registers + * used to emulate %rip-relative instructions in 64-bit mode, do that + * here and take the signal right away. We detect this condition by + * seeing if the program counter is the range [scrpc + isz, astpc). + */ + if (t->t_dtrace_astpc - rp->r_pc < + t->t_dtrace_astpc - t->t_dtrace_scrpc - isz) { +#ifdef __amd64 + /* + * If there is a scratch register and we're on the + * instruction immediately after the modified instruction, + * restore the value of that scratch register. + */ + if (t->t_dtrace_reg != 0 && + rp->r_pc == t->t_dtrace_scrpc + isz) { + switch (t->t_dtrace_reg) { + case REG_RAX: + rp->r_rax = t->t_dtrace_regv; + break; + case REG_RCX: + rp->r_rcx = t->t_dtrace_regv; + break; + case REG_R8: + rp->r_r8 = t->t_dtrace_regv; + break; + case REG_R9: + rp->r_r9 = t->t_dtrace_regv; + break; + } + } +#endif + rp->r_pc = t->t_dtrace_npc; + t->t_dtrace_ft = 0; + return (0); + } + + /* + * Otherwise, make sure we'll return to the kernel after executing + * the copied out instruction and defer the signal. + */ + if (!t->t_dtrace_step) { + ASSERT(rp->r_pc < t->t_dtrace_astpc); + rp->r_pc += t->t_dtrace_astpc - t->t_dtrace_scrpc; + t->t_dtrace_step = 1; + } + + t->t_dtrace_ast = 1; + + return (1); +} +#endif + +static int64_t tgt_cpu_tsc; +static int64_t hst_cpu_tsc; +static int64_t tsc_skew[MAXCPU]; + +static void +dtrace_gethrtime_init_sync(void *arg) +{ +#ifdef CHECK_SYNC + /* + * Delay this function from returning on one + * of the CPUs to check that the synchronisation + * works. + */ + uintptr_t cpu = (uintptr_t) arg; + + if (cpu == curcpu) { + int i; + for (i = 0; i < 1000000000; i++) + tgt_cpu_tsc = rdtsc(); + tgt_cpu_tsc = 0; + } +#endif +} + +static void +dtrace_gethrtime_init_cpu(void *arg) +{ + uintptr_t cpu = (uintptr_t) arg; + + if (cpu == curcpu) + tgt_cpu_tsc = rdtsc(); + else + hst_cpu_tsc = rdtsc(); +} + +static void +dtrace_gethrtime_init(void *arg) +{ + cpumask_t map; + int i; + struct pcpu *cp; + + /* The current CPU is the reference one. */ + tsc_skew[curcpu] = 0; + + for (i = 0; i <= mp_maxid; i++) { + if (i == curcpu) + continue; + + if ((cp = pcpu_find(i)) == NULL) + continue; + + map = 0; + map |= (1 << curcpu); + map |= (1 << i); + + smp_rendezvous_cpus(map, dtrace_gethrtime_init_sync, + dtrace_gethrtime_init_cpu, + smp_no_rendevous_barrier, (void *)(uintptr_t) i); + + tsc_skew[i] = tgt_cpu_tsc - hst_cpu_tsc; + } +} + +SYSINIT(dtrace_gethrtime_init, SI_SUB_SMP, SI_ORDER_ANY, dtrace_gethrtime_init, NULL); + +/* + * DTrace needs a high resolution time function which can + * be called from a probe context and guaranteed not to have + * instrumented with probes itself. + * + * Returns nanoseconds since boot. + */ +uint64_t +dtrace_gethrtime() +{ + return ((rdtsc() + tsc_skew[curcpu]) * (int64_t) 1000000000 / tsc_freq); +} + +uint64_t +dtrace_gethrestime(void) +{ + printf("%s(%d): XXX\n",__func__,__LINE__); + return (0); +} + +/* Function to handle DTrace traps during probes. See amd64/amd64/trap.c */ +int +dtrace_trap(struct trapframe *frame, u_int type) +{ + /* + * A trap can occur while DTrace executes a probe. Before + * executing the probe, DTrace blocks re-scheduling and sets + * a flag in it's per-cpu flags to indicate that it doesn't + * want to fault. On returning from the the probe, the no-fault + * flag is cleared and finally re-scheduling is enabled. + * + * Check if DTrace has enabled 'no-fault' mode: + * + */ + if ((cpu_core[curcpu].cpuc_dtrace_flags & CPU_DTRACE_NOFAULT) != 0) { + /* + * There are only a couple of trap types that are expected. + * All the rest will be handled in the usual way. + */ + switch (type) { + /* Privilieged instruction fault. */ + case T_PRIVINFLT: + break; + /* General protection fault. */ + case T_PROTFLT: + /* Flag an illegal operation. */ + cpu_core[curcpu].cpuc_dtrace_flags |= CPU_DTRACE_ILLOP; + + /* + * Offset the instruction pointer to the instruction + * following the one causing the fault. + */ + frame->tf_rip += dtrace_instr_size((u_char *) frame->tf_rip); + return (1); + /* Page fault. */ + case T_PAGEFLT: + /* Flag a bad address. */ + cpu_core[curcpu].cpuc_dtrace_flags |= CPU_DTRACE_BADADDR; + cpu_core[curcpu].cpuc_dtrace_illval = frame->tf_addr; + + /* + * Offset the instruction pointer to the instruction + * following the one causing the fault. + */ + frame->tf_rip += dtrace_instr_size((u_char *) frame->tf_rip); + return (1); + default: + /* Handle all other traps in the usual way. */ + break; + } + } + + /* Handle the trap in the usual way. */ + return (0); +} diff --git a/sys/cddl/dev/dtrace/amd64/instr_size.c b/sys/cddl/dev/dtrace/amd64/instr_size.c new file mode 100644 index 0000000..418d9f1 --- /dev/null +++ b/sys/cddl/dev/dtrace/amd64/instr_size.c @@ -0,0 +1,132 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + * + * $FreeBSD$ + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* Copyright (c) 1988 AT&T */ +/* All Rights Reserved */ + + +#if defined(sun) +#pragma ident "@(#)instr_size.c 1.14 05/07/08 SMI" +#endif + +#include <sys/types.h> +#include <sys/param.h> +#include <sys/proc.h> +#if defined(sun) +#include <sys/cmn_err.h> +#include <sys/archsystm.h> +#include <sys/copyops.h> +#include <vm/seg_enum.h> +#include <sys/privregs.h> +#else +typedef u_int model_t; +#define DATAMODEL_NATIVE 0 +int dtrace_instr_size(uchar_t *); +#endif + +#include <dis_tables.h> + +/* + * This subsystem (with the minor exception of the instr_size() function) is + * is called from DTrace probe context. This imposes several requirements on + * the implementation: + * + * 1. External subsystems and functions may not be referenced. The one current + * exception is for cmn_err, but only to signal the detection of table + * errors. Assuming the tables are correct, no combination of input is to + * trigger a cmn_err call. + * + * 2. These functions can't be allowed to be traced. To prevent this, + * all functions in the probe path (everything except instr_size()) must + * have names that begin with "dtrace_". + */ + +typedef enum dis_isize { + DIS_ISIZE_INSTR, + DIS_ISIZE_OPERAND +} dis_isize_t; + + +/* + * get a byte from instruction stream + */ +static int +dtrace_dis_get_byte(void *p) +{ + int ret; + uchar_t **instr = p; + + ret = **instr; + *instr += 1; + + return (ret); +} + +/* + * Returns either the size of a given instruction, in bytes, or the size of that + * instruction's memory access (if any), depending on the value of `which'. + * If a programming error in the tables is detected, the system will panic to + * ease diagnosis. Invalid instructions will not be flagged. They will appear + * to have an instruction size between 1 and the actual size, and will be + * reported as having no memory impact. + */ +/* ARGSUSED2 */ +static int +dtrace_dis_isize(uchar_t *instr, dis_isize_t which, model_t model, int *rmindex) +{ + int sz; + dis86_t x; + uint_t mode = SIZE64; + +#if defined(sun) + mode = (model == DATAMODEL_LP64) ? SIZE64 : SIZE32; +#endif + + x.d86_data = (void **)&instr; + x.d86_get_byte = dtrace_dis_get_byte; + x.d86_check_func = NULL; + + if (dtrace_disx86(&x, mode) != 0) + return (-1); + + if (which == DIS_ISIZE_INSTR) + sz = x.d86_len; /* length of the instruction */ + else + sz = x.d86_memsize; /* length of memory operand */ + + if (rmindex != NULL) + *rmindex = x.d86_rmindex; + return (sz); +} + +int +dtrace_instr_size(uchar_t *instr) +{ + return (dtrace_dis_isize(instr, DIS_ISIZE_INSTR, DATAMODEL_NATIVE, + NULL)); +} diff --git a/sys/cddl/dev/dtrace/dtrace_anon.c b/sys/cddl/dev/dtrace/dtrace_anon.c new file mode 100644 index 0000000..b81ec5b --- /dev/null +++ b/sys/cddl/dev/dtrace/dtrace_anon.c @@ -0,0 +1,84 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + * + * $FreeBSD$ + */ + +/* + * DTrace Anonymous Enabling Functions + */ +static void +dtrace_anon_init(void *dummy) +{ + dtrace_state_t *state = NULL; + dtrace_enabling_t *enab; + + mutex_enter(&cpu_lock); + mutex_enter(&dtrace_provider_lock); + mutex_enter(&dtrace_lock); + + dtrace_anon_property(); + + mutex_exit(&cpu_lock); + + /* + * If there are already providers, we must ask them to provide their + * probes, and then match any anonymous enabling against them. Note + * that there should be no other retained enablings at this time: + * the only retained enablings at this time should be the anonymous + * enabling. + */ + if (dtrace_anon.dta_enabling != NULL) { + ASSERT(dtrace_retained == dtrace_anon.dta_enabling); + + dtrace_enabling_provide(NULL); + state = dtrace_anon.dta_state; + + /* + * We couldn't hold cpu_lock across the above call to + * dtrace_enabling_provide(), but we must hold it to actually + * enable the probes. We have to drop all of our locks, pick + * up cpu_lock, and regain our locks before matching the + * retained anonymous enabling. + */ + mutex_exit(&dtrace_lock); + mutex_exit(&dtrace_provider_lock); + + mutex_enter(&cpu_lock); + mutex_enter(&dtrace_provider_lock); + mutex_enter(&dtrace_lock); + + if ((enab = dtrace_anon.dta_enabling) != NULL) + (void) dtrace_enabling_match(enab, NULL); + + mutex_exit(&cpu_lock); + } + + mutex_exit(&dtrace_provider_lock); + mutex_exit(&dtrace_lock); + + if (state != NULL) { + /* + * If we created any anonymous state, set it going now. + */ + (void) dtrace_state_go(state, &dtrace_anon.dta_beganon); + } +} diff --git a/sys/cddl/dev/dtrace/dtrace_cddl.h b/sys/cddl/dev/dtrace/dtrace_cddl.h new file mode 100644 index 0000000..75fe864 --- /dev/null +++ b/sys/cddl/dev/dtrace/dtrace_cddl.h @@ -0,0 +1,134 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + * + * $FreeBSD$ + * + */ + +#ifndef _DTRACE_CDDL_H_ +#define _DTRACE_CDDL_H_ + +#include <sys/proc.h> + +#define LOCK_LEVEL 10 + +/* + * Kernel DTrace extension to 'struct proc' for FreeBSD. + */ +typedef struct kdtrace_proc { + int p_dtrace_probes; /* Are there probes for this proc? */ + u_int64_t p_dtrace_count; /* Number of DTrace tracepoints */ + void *p_dtrace_helpers; /* DTrace helpers, if any */ + +} kdtrace_proc_t; + +/* + * Kernel DTrace extension to 'struct thread' for FreeBSD. + */ +typedef struct kdtrace_thread { + u_int8_t td_dtrace_stop; /* Indicates a DTrace-desired stop */ + u_int8_t td_dtrace_sig; /* Signal sent via DTrace's raise() */ + u_int td_predcache; /* DTrace predicate cache */ + u_int64_t td_dtrace_vtime; /* DTrace virtual time */ + u_int64_t td_dtrace_start; /* DTrace slice start time */ + + union __tdu { + struct __tds { + u_int8_t _td_dtrace_on; + /* Hit a fasttrap tracepoint. */ + u_int8_t _td_dtrace_step; + /* About to return to kernel. */ + u_int8_t _td_dtrace_ret; + /* Handling a return probe. */ + u_int8_t _td_dtrace_ast; + /* Saved ast flag. */ + } _tds; + u_long _td_dtrace_ft; /* Bitwise or of these flags. */ + } _tdu; +#define td_dtrace_ft _tdu._td_dtrace_ft +#define td_dtrace_on _tdu._tds._td_dtrace_on +#define td_dtrace_step _tdu._tds._td_dtrace_step +#define td_dtrace_ret _tdu._tds._td_dtrace_ret +#define td_dtrace_ast _tdu._tds._td_dtrace_ast + + uintptr_t td_dtrace_pc; /* DTrace saved pc from fasttrap. */ + uintptr_t td_dtrace_npc; /* DTrace next pc from fasttrap. */ + uintptr_t td_dtrace_scrpc; + /* DTrace per-thread scratch location. */ + uintptr_t td_dtrace_astpc; + /* DTrace return sequence location. */ + u_int64_t td_hrtime; /* Last time on cpu. */ + int td_errno; /* Syscall return value. */ +} kdtrace_thread_t; + +/* + * Definitions to reference fields in the FreeBSD DTrace structures defined + * above using the names of fields in similar structures in Solaris. Note + * that the separation on FreeBSD is a licensing constraint designed to + * keep the GENERIC kernel BSD licensed. + */ +#define t_dtrace_vtime td_dtrace->td_dtrace_vtime +#define t_dtrace_start td_dtrace->td_dtrace_start +#define t_dtrace_stop td_dtrace->td_dtrace_stop +#define t_dtrace_sig td_dtrace->td_dtrace_sig +#define t_predcache td_dtrace->td_predcache +#define p_dtrace_helpers p_dtrace->p_dtrace_helpers + +/* + * Definitions for fields in struct proc which are named differntly in FreeBSD. + */ +#define p_cred p_ucred +#define p_parent p_pptr + +/* + * Definitions for fields in struct thread which are named differntly in FreeBSD. + */ +#define t_procp td_proc +#define t_tid td_tid +#define t_did td_tid + + +int priv_policy(const cred_t *, int, boolean_t, int, const char *); +boolean_t priv_policy_only(const cred_t *, int, boolean_t); +boolean_t priv_policy_choice(const cred_t *, int, boolean_t); + +/* + * Test privilege. Audit success or failure, allow privilege debugging. + * Returns 0 for success, err for failure. + */ +#define PRIV_POLICY(cred, priv, all, err, reason) \ + priv_policy((cred), (priv), (all), (err), (reason)) + +/* + * Test privilege. Audit success only, no privilege debugging. + * Returns 1 for success, and 0 for failure. + */ +#define PRIV_POLICY_CHOICE(cred, priv, all) \ + priv_policy_choice((cred), (priv), (all)) + +/* + * Test privilege. No priv_debugging, no auditing. + * Returns 1 for success, and 0 for failure. + */ + +#define PRIV_POLICY_ONLY(cred, priv, all) \ + priv_policy_only((cred), (priv), (all)) + +#endif /* !_DTRACE_CDDL_H_ */ diff --git a/sys/cddl/dev/dtrace/dtrace_clone.c b/sys/cddl/dev/dtrace/dtrace_clone.c new file mode 100644 index 0000000..52e48ff --- /dev/null +++ b/sys/cddl/dev/dtrace/dtrace_clone.c @@ -0,0 +1,61 @@ +/*- + * Copyright (C) 2006 John Birrell <jb@freebsd.org>. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice(s), this list of conditions and the following disclaimer as + * the first lines of this file unmodified other than the possible + * addition of one or more copyright notices. + * 2. Redistributions in binary form must reproduce the above copyright + * notice(s), this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * $FreeBSD$ + * + */ + +static void +dtrace_clone(void *arg, struct ucred *cred, char *name, int namelen, struct cdev **dev) +{ + int u = -1; + size_t len; + + if (*dev != NULL) + return; + + len = strlen(name); + + if (len != 6 && len != 13) + return; + + if (bcmp(name,"dtrace",6) != 0) + return; + + if (len == 13 && bcmp(name,"dtrace/dtrace",13) != 0) + return; + + /* Clone the device to the new minor number. */ + if (clone_create(&dtrace_clones, &dtrace_cdevsw, &u, dev, 0) != 0) + /* Create the /dev/dtrace/dtraceNN entry. */ + *dev = make_dev_cred(&dtrace_cdevsw, unit2minor(u), cred, + UID_ROOT, GID_WHEEL, 0600, "dtrace/dtrace%d", u); + if (*dev != NULL) { + dev_ref(*dev); + (*dev)->si_flags |= SI_CHEAPCLONE; + } +} diff --git a/sys/cddl/dev/dtrace/dtrace_debug.c b/sys/cddl/dev/dtrace/dtrace_debug.c new file mode 100644 index 0000000..24a7a09 --- /dev/null +++ b/sys/cddl/dev/dtrace/dtrace_debug.c @@ -0,0 +1,596 @@ +/*- + * Copyright (C) 2008 John Birrell <jb@freebsd.org>. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice(s), this list of conditions and the following disclaimer as + * the first lines of this file unmodified other than the possible + * addition of one or more copyright notices. + * 2. Redistributions in binary form must reproduce the above copyright + * notice(s), this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * $FreeBSD$ + * + */ + +#ifdef DEBUG + +#if defined(__amd64__) +static __inline int +dtrace_cmpset_long(volatile u_long *dst, u_long exp, u_long src) +{ + u_char res; + + __asm __volatile( + " lock ; " + " cmpxchgq %2,%1 ; " + " sete %0 ; " + "1: " + "# dtrace_cmpset_long" + : "=a" (res), /* 0 */ + "=m" (*dst) /* 1 */ + : "r" (src), /* 2 */ + "a" (exp), /* 3 */ + "m" (*dst) /* 4 */ + : "memory"); + + return (res); +} +#elif defined(__i386__) +static __inline int +dtrace_cmpset_long(volatile u_long *dst, u_long exp, u_long src) +{ + u_char res; + + __asm __volatile( + " lock ; " + " cmpxchgl %2,%1 ; " + " sete %0 ; " + "1: " + "# dtrace_cmpset_long" + : "=a" (res), /* 0 */ + "=m" (*dst) /* 1 */ + : "r" (src), /* 2 */ + "a" (exp), /* 3 */ + "m" (*dst) /* 4 */ + : "memory"); + + return (res); +} +#endif + +#define DTRACE_DEBUG_BUFR_SIZE (32 * 1024) + +struct dtrace_debug_data { + char bufr[DTRACE_DEBUG_BUFR_SIZE]; + char *first; + char *last; + char *next; +} dtrace_debug_data[MAXCPU]; + +static char dtrace_debug_bufr[DTRACE_DEBUG_BUFR_SIZE]; + +static volatile u_long dtrace_debug_flag[MAXCPU]; + +static void +dtrace_debug_lock(int cpu) +{ + while (dtrace_cmpset_long(&dtrace_debug_flag[cpu], 0, 1) == 0) + /* Loop until the lock is obtained. */ + ; +} + +static void +dtrace_debug_unlock(int cpu) +{ + dtrace_debug_flag[cpu] = 0; +} + +static void +dtrace_debug_init(void *dummy) +{ + int i; + struct dtrace_debug_data *d; + + for (i = 0; i <= mp_maxid; i++) { + if (pcpu_find(i) == NULL) + continue; + + d = &dtrace_debug_data[i]; + + if (d->first == NULL) { + d->first = d->bufr; + d->next = d->bufr; + d->last = d->bufr + DTRACE_DEBUG_BUFR_SIZE - 1; + *(d->last) = '\0'; + } + } +} + +SYSINIT(dtrace_debug_init, SI_SUB_KDTRACE, SI_ORDER_ANY, dtrace_debug_init, NULL); +SYSINIT(dtrace_debug_smpinit, SI_SUB_SMP, SI_ORDER_ANY, dtrace_debug_init, NULL); + +static void +dtrace_debug_output(void) +{ + char *p; + int i; + struct dtrace_debug_data *d; + uintptr_t count; + + for (i = 0; i <= mp_maxid; i++) { + if (pcpu_find(i) == NULL) + continue; + + dtrace_debug_lock(i); + + d = &dtrace_debug_data[i]; + + count = 0; + + if (d->first < d->next) { + char *p1 = dtrace_debug_bufr; + + count = (uintptr_t) d->next - (uintptr_t) d->first; + + for (p = d->first; p < d->next; p++) + *p1++ = *p; + } else if (d->next > d->first) { + char *p1 = dtrace_debug_bufr; + + count = (uintptr_t) d->last - (uintptr_t) d->first; + + for (p = d->first; p < d->last; p++) + *p1++ = *p; + + count += (uintptr_t) d->next - (uintptr_t) d->bufr; + + for (p = d->bufr; p < d->next; p++) + *p1++ = *p; + } + + d->first = d->bufr; + d->next = d->bufr; + + dtrace_debug_unlock(i); + + if (count > 0) { + char *last = dtrace_debug_bufr + count; + + p = dtrace_debug_bufr; + + while (p < last) { + if (*p == '\0') { + p++; + continue; + } + + printf("%s", p); + + p += strlen(p); + } + } + } +} + +/* + * Functions below here are called from the probe context, so they can't call + * _any_ functions outside the dtrace module without running foul of the function + * boundary trace provider (fbt). The purpose of these functions is limited to + * buffering debug strings for output when the probe completes on the current CPU. + */ + +static __inline void +dtrace_debug__putc(char c) +{ + struct dtrace_debug_data *d = &dtrace_debug_data[curcpu]; + + *d->next++ = c; + + if (d->next == d->last) + d->next = d->bufr; + + *(d->next) = '\0'; + + if (d->next == d->first) + d->first++; + + if (d->first == d->last) + d->first = d->bufr; +} + +static void __used +dtrace_debug_putc(char c) +{ + dtrace_debug_lock(curcpu); + + dtrace_debug__putc(c); + + dtrace_debug_unlock(curcpu); +} + +static void __used +dtrace_debug_puts(const char *s) +{ + dtrace_debug_lock(curcpu); + + while (*s != '\0') + dtrace_debug__putc(*s++); + + dtrace_debug__putc('\0'); + + dtrace_debug_unlock(curcpu); +} + +/* + * Snaffled from sys/kern/subr_prf.c + * + * Put a NUL-terminated ASCII number (base <= 36) in a buffer in reverse + * order; return an optional length and a pointer to the last character + * written in the buffer (i.e., the first character of the string). + * The buffer pointed to by `nbuf' must have length >= MAXNBUF. + */ +static char * +dtrace_debug_ksprintn(char *nbuf, uintmax_t num, int base, int *lenp, int upper) +{ + char *p, c; + + p = nbuf; + *p = '\0'; + do { + c = hex2ascii(num % base); + *++p = upper ? toupper(c) : c; + } while (num /= base); + if (lenp) + *lenp = p - nbuf; + return (p); +} + +#define MAXNBUF (sizeof(intmax_t) * NBBY + 1) + +static void +dtrace_debug_vprintf(const char *fmt, va_list ap) +{ + char nbuf[MAXNBUF]; + const char *p, *percent, *q; + u_char *up; + int ch, n; + uintmax_t num; + int base, lflag, qflag, tmp, width, ladjust, sharpflag, neg, sign, dot; + int cflag, hflag, jflag, tflag, zflag; + int dwidth, upper; + int radix = 10; + char padc; + int stop = 0, retval = 0; + + num = 0; + + if (fmt == NULL) + fmt = "(fmt null)\n"; + + for (;;) { + padc = ' '; + width = 0; + while ((ch = (u_char)*fmt++) != '%' || stop) { + if (ch == '\0') { + dtrace_debug__putc('\0'); + return; + } + dtrace_debug__putc(ch); + } + percent = fmt - 1; + qflag = 0; lflag = 0; ladjust = 0; sharpflag = 0; neg = 0; + sign = 0; dot = 0; dwidth = 0; upper = 0; + cflag = 0; hflag = 0; jflag = 0; tflag = 0; zflag = 0; +reswitch: switch (ch = (u_char)*fmt++) { + case '.': + dot = 1; + goto reswitch; + case '#': + sharpflag = 1; + goto reswitch; + case '+': + sign = 1; + goto reswitch; + case '-': + ladjust = 1; + goto reswitch; + case '%': + dtrace_debug__putc(ch); + break; + case '*': + if (!dot) { + width = va_arg(ap, int); + if (width < 0) { + ladjust = !ladjust; + width = -width; + } + } else { + dwidth = va_arg(ap, int); + } + goto reswitch; + case '0': + if (!dot) { + padc = '0'; + goto reswitch; + } + case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + for (n = 0;; ++fmt) { + n = n * 10 + ch - '0'; + ch = *fmt; + if (ch < '0' || ch > '9') + break; + } + if (dot) + dwidth = n; + else + width = n; + goto reswitch; + case 'b': + num = (u_int)va_arg(ap, int); + p = va_arg(ap, char *); + for (q = dtrace_debug_ksprintn(nbuf, num, *p++, NULL, 0); *q;) + dtrace_debug__putc(*q--); + + if (num == 0) + break; + + for (tmp = 0; *p;) { + n = *p++; + if (num & (1 << (n - 1))) { + dtrace_debug__putc(tmp ? ',' : '<'); + for (; (n = *p) > ' '; ++p) + dtrace_debug__putc(n); + tmp = 1; + } else + for (; *p > ' '; ++p) + continue; + } + if (tmp) + dtrace_debug__putc('>'); + break; + case 'c': + dtrace_debug__putc(va_arg(ap, int)); + break; + case 'D': + up = va_arg(ap, u_char *); + p = va_arg(ap, char *); + if (!width) + width = 16; + while(width--) { + dtrace_debug__putc(hex2ascii(*up >> 4)); + dtrace_debug__putc(hex2ascii(*up & 0x0f)); + up++; + if (width) + for (q=p;*q;q++) + dtrace_debug__putc(*q); + } + break; + case 'd': + case 'i': + base = 10; + sign = 1; + goto handle_sign; + case 'h': + if (hflag) { + hflag = 0; + cflag = 1; + } else + hflag = 1; + goto reswitch; + case 'j': + jflag = 1; + goto reswitch; + case 'l': + if (lflag) { + lflag = 0; + qflag = 1; + } else + lflag = 1; + goto reswitch; + case 'n': + if (jflag) + *(va_arg(ap, intmax_t *)) = retval; + else if (qflag) + *(va_arg(ap, quad_t *)) = retval; + else if (lflag) + *(va_arg(ap, long *)) = retval; + else if (zflag) + *(va_arg(ap, size_t *)) = retval; + else if (hflag) + *(va_arg(ap, short *)) = retval; + else if (cflag) + *(va_arg(ap, char *)) = retval; + else + *(va_arg(ap, int *)) = retval; + break; + case 'o': + base = 8; + goto handle_nosign; + case 'p': + base = 16; + sharpflag = (width == 0); + sign = 0; + num = (uintptr_t)va_arg(ap, void *); + goto number; + case 'q': + qflag = 1; + goto reswitch; + case 'r': + base = radix; + if (sign) + goto handle_sign; + goto handle_nosign; + case 's': + p = va_arg(ap, char *); + if (p == NULL) + p = "(null)"; + if (!dot) + n = strlen (p); + else + for (n = 0; n < dwidth && p[n]; n++) + continue; + + width -= n; + + if (!ladjust && width > 0) + while (width--) + dtrace_debug__putc(padc); + while (n--) + dtrace_debug__putc(*p++); + if (ladjust && width > 0) + while (width--) + dtrace_debug__putc(padc); + break; + case 't': + tflag = 1; + goto reswitch; + case 'u': + base = 10; + goto handle_nosign; + case 'X': + upper = 1; + case 'x': + base = 16; + goto handle_nosign; + case 'y': + base = 16; + sign = 1; + goto handle_sign; + case 'z': + zflag = 1; + goto reswitch; +handle_nosign: + sign = 0; + if (jflag) + num = va_arg(ap, uintmax_t); + else if (qflag) + num = va_arg(ap, u_quad_t); + else if (tflag) + num = va_arg(ap, ptrdiff_t); + else if (lflag) + num = va_arg(ap, u_long); + else if (zflag) + num = va_arg(ap, size_t); + else if (hflag) + num = (u_short)va_arg(ap, int); + else if (cflag) + num = (u_char)va_arg(ap, int); + else + num = va_arg(ap, u_int); + goto number; +handle_sign: + if (jflag) + num = va_arg(ap, intmax_t); + else if (qflag) + num = va_arg(ap, quad_t); + else if (tflag) + num = va_arg(ap, ptrdiff_t); + else if (lflag) + num = va_arg(ap, long); + else if (zflag) + num = va_arg(ap, size_t); + else if (hflag) + num = (short)va_arg(ap, int); + else if (cflag) + num = (char)va_arg(ap, int); + else + num = va_arg(ap, int); +number: + if (sign && (intmax_t)num < 0) { + neg = 1; + num = -(intmax_t)num; + } + p = dtrace_debug_ksprintn(nbuf, num, base, &tmp, upper); + if (sharpflag && num != 0) { + if (base == 8) + tmp++; + else if (base == 16) + tmp += 2; + } + if (neg) + tmp++; + + if (!ladjust && padc != '0' && width + && (width -= tmp) > 0) + while (width--) + dtrace_debug__putc(padc); + if (neg) + dtrace_debug__putc('-'); + if (sharpflag && num != 0) { + if (base == 8) { + dtrace_debug__putc('0'); + } else if (base == 16) { + dtrace_debug__putc('0'); + dtrace_debug__putc('x'); + } + } + if (!ladjust && width && (width -= tmp) > 0) + while (width--) + dtrace_debug__putc(padc); + + while (*p) + dtrace_debug__putc(*p--); + + if (ladjust && width && (width -= tmp) > 0) + while (width--) + dtrace_debug__putc(padc); + + break; + default: + while (percent < fmt) + dtrace_debug__putc(*percent++); + /* + * Since we ignore an formatting argument it is no + * longer safe to obey the remaining formatting + * arguments as the arguments will no longer match + * the format specs. + */ + stop = 1; + break; + } + } + + dtrace_debug__putc('\0'); +} + +void +dtrace_debug_printf(const char *fmt, ...) +{ + va_list ap; + + dtrace_debug_lock(curcpu); + + va_start(ap, fmt); + + dtrace_debug_vprintf(fmt, ap); + + va_end(ap); + + dtrace_debug_unlock(curcpu); +} + +#else + +#define dtrace_debug_output() +#define dtrace_debug_puts(_s) +#define dtrace_debug_printf(fmt, ...) + +#endif diff --git a/sys/cddl/dev/dtrace/dtrace_hacks.c b/sys/cddl/dev/dtrace/dtrace_hacks.c new file mode 100644 index 0000000..21da9f8 --- /dev/null +++ b/sys/cddl/dev/dtrace/dtrace_hacks.c @@ -0,0 +1,13 @@ +/* $FreeBSD$ */ +/* XXX Hacks.... */ + +dtrace_cacheid_t dtrace_predcache_id; + +int panic_quiesce; +char panic_stack[PANICSTKSIZE]; + +boolean_t +priv_policy_only(const cred_t *a, int b, boolean_t c) +{ + return 0; +} diff --git a/sys/cddl/dev/dtrace/dtrace_ioctl.c b/sys/cddl/dev/dtrace/dtrace_ioctl.c new file mode 100644 index 0000000..3866c3d --- /dev/null +++ b/sys/cddl/dev/dtrace/dtrace_ioctl.c @@ -0,0 +1,777 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + * + * $FreeBSD$ + * + */ + +static int dtrace_verbose_ioctl; +SYSCTL_INT(_debug_dtrace, OID_AUTO, verbose_ioctl, CTLFLAG_RW, &dtrace_verbose_ioctl, 0, ""); + +#define DTRACE_IOCTL_PRINTF(fmt, ...) if (dtrace_verbose_ioctl) printf(fmt, ## __VA_ARGS__ ) + +/* ARGSUSED */ +static int +dtrace_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, + int flags __unused, struct thread *td) +{ + dtrace_state_t *state = dev->si_drv1; + int error = 0; + if (state == NULL) + return (EINVAL); + + if (state->dts_anon) { + ASSERT(dtrace_anon.dta_state == NULL); + state = state->dts_anon; + } + + switch (cmd) { + case DTRACEIOC_AGGDESC: { + dtrace_aggdesc_t **paggdesc = (dtrace_aggdesc_t **) addr; + dtrace_aggdesc_t aggdesc; + dtrace_action_t *act; + dtrace_aggregation_t *agg; + int nrecs; + uint32_t offs; + dtrace_recdesc_t *lrec; + void *buf; + size_t size; + uintptr_t dest; + + DTRACE_IOCTL_PRINTF("%s(%d): DTRACEIOC_AGGDESC\n",__func__,__LINE__); + + if (copyin((void *) *paggdesc, &aggdesc, sizeof (aggdesc)) != 0) + return (EFAULT); + + mutex_enter(&dtrace_lock); + + if ((agg = dtrace_aggid2agg(state, aggdesc.dtagd_id)) == NULL) { + mutex_exit(&dtrace_lock); + return (EINVAL); + } + + aggdesc.dtagd_epid = agg->dtag_ecb->dte_epid; + + nrecs = aggdesc.dtagd_nrecs; + aggdesc.dtagd_nrecs = 0; + + offs = agg->dtag_base; + lrec = &agg->dtag_action.dta_rec; + aggdesc.dtagd_size = lrec->dtrd_offset + lrec->dtrd_size - offs; + + for (act = agg->dtag_first; ; act = act->dta_next) { + ASSERT(act->dta_intuple || + DTRACEACT_ISAGG(act->dta_kind)); + + /* + * If this action has a record size of zero, it + * denotes an argument to the aggregating action. + * Because the presence of this record doesn't (or + * shouldn't) affect the way the data is interpreted, + * we don't copy it out to save user-level the + * confusion of dealing with a zero-length record. + */ + if (act->dta_rec.dtrd_size == 0) { + ASSERT(agg->dtag_hasarg); + continue; + } + + aggdesc.dtagd_nrecs++; + + if (act == &agg->dtag_action) + break; + } + + /* + * Now that we have the size, we need to allocate a temporary + * buffer in which to store the complete description. We need + * the temporary buffer to be able to drop dtrace_lock() + * across the copyout(), below. + */ + size = sizeof (dtrace_aggdesc_t) + + (aggdesc.dtagd_nrecs * sizeof (dtrace_recdesc_t)); + + buf = kmem_alloc(size, KM_SLEEP); + dest = (uintptr_t)buf; + + bcopy(&aggdesc, (void *)dest, sizeof (aggdesc)); + dest += offsetof(dtrace_aggdesc_t, dtagd_rec[0]); + + for (act = agg->dtag_first; ; act = act->dta_next) { + dtrace_recdesc_t rec = act->dta_rec; + + /* + * See the comment in the above loop for why we pass + * over zero-length records. + */ + if (rec.dtrd_size == 0) { + ASSERT(agg->dtag_hasarg); + continue; + } + + if (nrecs-- == 0) + break; + + rec.dtrd_offset -= offs; + bcopy(&rec, (void *)dest, sizeof (rec)); + dest += sizeof (dtrace_recdesc_t); + + if (act == &agg->dtag_action) + break; + } + + mutex_exit(&dtrace_lock); + + if (copyout(buf, (void *) *paggdesc, dest - (uintptr_t)buf) != 0) { + kmem_free(buf, size); + return (EFAULT); + } + + kmem_free(buf, size); + return (0); + } + case DTRACEIOC_AGGSNAP: + case DTRACEIOC_BUFSNAP: { + dtrace_bufdesc_t **pdesc = (dtrace_bufdesc_t **) addr; + dtrace_bufdesc_t desc; + caddr_t cached; + dtrace_buffer_t *buf; + + dtrace_debug_output(); + + if (copyin((void *) *pdesc, &desc, sizeof (desc)) != 0) + return (EFAULT); + + DTRACE_IOCTL_PRINTF("%s(%d): %s curcpu %d cpu %d\n", + __func__,__LINE__, + cmd == DTRACEIOC_AGGSNAP ? + "DTRACEIOC_AGGSNAP":"DTRACEIOC_BUFSNAP", + curcpu, desc.dtbd_cpu); + + if (desc.dtbd_cpu < 0 || desc.dtbd_cpu >= NCPU) + return (ENOENT); + if (pcpu_find(desc.dtbd_cpu) == NULL) + return (ENOENT); + + mutex_enter(&dtrace_lock); + + if (cmd == DTRACEIOC_BUFSNAP) { + buf = &state->dts_buffer[desc.dtbd_cpu]; + } else { + buf = &state->dts_aggbuffer[desc.dtbd_cpu]; + } + + if (buf->dtb_flags & (DTRACEBUF_RING | DTRACEBUF_FILL)) { + size_t sz = buf->dtb_offset; + + if (state->dts_activity != DTRACE_ACTIVITY_STOPPED) { + mutex_exit(&dtrace_lock); + return (EBUSY); + } + + /* + * If this buffer has already been consumed, we're + * going to indicate that there's nothing left here + * to consume. + */ + if (buf->dtb_flags & DTRACEBUF_CONSUMED) { + mutex_exit(&dtrace_lock); + + desc.dtbd_size = 0; + desc.dtbd_drops = 0; + desc.dtbd_errors = 0; + desc.dtbd_oldest = 0; + sz = sizeof (desc); + + if (copyout(&desc, (void *) *pdesc, sz) != 0) + return (EFAULT); + + return (0); + } + + /* + * If this is a ring buffer that has wrapped, we want + * to copy the whole thing out. + */ + if (buf->dtb_flags & DTRACEBUF_WRAPPED) { + dtrace_buffer_polish(buf); + sz = buf->dtb_size; + } + + if (copyout(buf->dtb_tomax, desc.dtbd_data, sz) != 0) { + mutex_exit(&dtrace_lock); + return (EFAULT); + } + + desc.dtbd_size = sz; + desc.dtbd_drops = buf->dtb_drops; + desc.dtbd_errors = buf->dtb_errors; + desc.dtbd_oldest = buf->dtb_xamot_offset; + + mutex_exit(&dtrace_lock); + + if (copyout(&desc, (void *) *pdesc, sizeof (desc)) != 0) + return (EFAULT); + + buf->dtb_flags |= DTRACEBUF_CONSUMED; + + return (0); + } + + if (buf->dtb_tomax == NULL) { + ASSERT(buf->dtb_xamot == NULL); + mutex_exit(&dtrace_lock); + return (ENOENT); + } + + cached = buf->dtb_tomax; + ASSERT(!(buf->dtb_flags & DTRACEBUF_NOSWITCH)); + + dtrace_xcall(desc.dtbd_cpu, + (dtrace_xcall_t)dtrace_buffer_switch, buf); + + state->dts_errors += buf->dtb_xamot_errors; + + /* + * If the buffers did not actually switch, then the cross call + * did not take place -- presumably because the given CPU is + * not in the ready set. If this is the case, we'll return + * ENOENT. + */ + if (buf->dtb_tomax == cached) { + ASSERT(buf->dtb_xamot != cached); + mutex_exit(&dtrace_lock); + return (ENOENT); + } + + ASSERT(cached == buf->dtb_xamot); + + DTRACE_IOCTL_PRINTF("%s(%d): copyout the buffer snapshot\n",__func__,__LINE__); + + /* + * We have our snapshot; now copy it out. + */ + if (copyout(buf->dtb_xamot, desc.dtbd_data, + buf->dtb_xamot_offset) != 0) { + mutex_exit(&dtrace_lock); + return (EFAULT); + } + + desc.dtbd_size = buf->dtb_xamot_offset; + desc.dtbd_drops = buf->dtb_xamot_drops; + desc.dtbd_errors = buf->dtb_xamot_errors; + desc.dtbd_oldest = 0; + + mutex_exit(&dtrace_lock); + + DTRACE_IOCTL_PRINTF("%s(%d): copyout buffer desc: size %zd drops %lu errors %lu\n",__func__,__LINE__,(size_t) desc.dtbd_size,(u_long) desc.dtbd_drops,(u_long) desc.dtbd_errors); + + /* + * Finally, copy out the buffer description. + */ + if (copyout(&desc, (void *) *pdesc, sizeof (desc)) != 0) + return (EFAULT); + + return (0); + } + case DTRACEIOC_CONF: { + dtrace_conf_t conf; + + DTRACE_IOCTL_PRINTF("%s(%d): DTRACEIOC_CONF\n",__func__,__LINE__); + + bzero(&conf, sizeof (conf)); + conf.dtc_difversion = DIF_VERSION; + conf.dtc_difintregs = DIF_DIR_NREGS; + conf.dtc_diftupregs = DIF_DTR_NREGS; + conf.dtc_ctfmodel = CTF_MODEL_NATIVE; + + *((dtrace_conf_t *) addr) = conf; + + return (0); + } + case DTRACEIOC_DOFGET: { + dof_hdr_t **pdof = (dof_hdr_t **) addr; + dof_hdr_t hdr, *dof = *pdof; + int rval; + uint64_t len; + + DTRACE_IOCTL_PRINTF("%s(%d): DTRACEIOC_DOFGET\n",__func__,__LINE__); + + if (copyin((void *)dof, &hdr, sizeof (hdr)) != 0) + return (EFAULT); + + mutex_enter(&dtrace_lock); + dof = dtrace_dof_create(state); + mutex_exit(&dtrace_lock); + + len = MIN(hdr.dofh_loadsz, dof->dofh_loadsz); + rval = copyout(dof, (void *) *pdof, len); + dtrace_dof_destroy(dof); + + return (rval == 0 ? 0 : EFAULT); + } + case DTRACEIOC_ENABLE: { + dof_hdr_t *dof = NULL; + dtrace_enabling_t *enab = NULL; + dtrace_vstate_t *vstate; + int err = 0; + int rval; + dtrace_enable_io_t *p = (dtrace_enable_io_t *) addr; + + DTRACE_IOCTL_PRINTF("%s(%d): DTRACEIOC_ENABLE\n",__func__,__LINE__); + + /* + * If a NULL argument has been passed, we take this as our + * cue to reevaluate our enablings. + */ + if (p->dof == NULL) { + dtrace_enabling_matchall(); + + return (0); + } + + if ((dof = dtrace_dof_copyin((uintptr_t) p->dof, &rval)) == NULL) + return (EINVAL); + + mutex_enter(&cpu_lock); + mutex_enter(&dtrace_lock); + vstate = &state->dts_vstate; + + if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE) { + mutex_exit(&dtrace_lock); + mutex_exit(&cpu_lock); + dtrace_dof_destroy(dof); + return (EBUSY); + } + + if (dtrace_dof_slurp(dof, vstate, td->td_ucred, &enab, 0, B_TRUE) != 0) { + mutex_exit(&dtrace_lock); + mutex_exit(&cpu_lock); + dtrace_dof_destroy(dof); + return (EINVAL); + } + + if ((rval = dtrace_dof_options(dof, state)) != 0) { + dtrace_enabling_destroy(enab); + mutex_exit(&dtrace_lock); + mutex_exit(&cpu_lock); + dtrace_dof_destroy(dof); + return (rval); + } + + if ((err = dtrace_enabling_match(enab, &p->n_matched)) == 0) { + err = dtrace_enabling_retain(enab); + } else { + dtrace_enabling_destroy(enab); + } + + mutex_exit(&cpu_lock); + mutex_exit(&dtrace_lock); + dtrace_dof_destroy(dof); + + return (err); + } + case DTRACEIOC_EPROBE: { + dtrace_eprobedesc_t **pepdesc = (dtrace_eprobedesc_t **) addr; + dtrace_eprobedesc_t epdesc; + dtrace_ecb_t *ecb; + dtrace_action_t *act; + void *buf; + size_t size; + uintptr_t dest; + int nrecs; + + DTRACE_IOCTL_PRINTF("%s(%d): DTRACEIOC_EPROBE\n",__func__,__LINE__); + + if (copyin((void *)*pepdesc, &epdesc, sizeof (epdesc)) != 0) + return (EFAULT); + + mutex_enter(&dtrace_lock); + + if ((ecb = dtrace_epid2ecb(state, epdesc.dtepd_epid)) == NULL) { + mutex_exit(&dtrace_lock); + return (EINVAL); + } + + if (ecb->dte_probe == NULL) { + mutex_exit(&dtrace_lock); + return (EINVAL); + } + + epdesc.dtepd_probeid = ecb->dte_probe->dtpr_id; + epdesc.dtepd_uarg = ecb->dte_uarg; + epdesc.dtepd_size = ecb->dte_size; + + nrecs = epdesc.dtepd_nrecs; + epdesc.dtepd_nrecs = 0; + for (act = ecb->dte_action; act != NULL; act = act->dta_next) { + if (DTRACEACT_ISAGG(act->dta_kind) || act->dta_intuple) + continue; + + epdesc.dtepd_nrecs++; + } + + /* + * Now that we have the size, we need to allocate a temporary + * buffer in which to store the complete description. We need + * the temporary buffer to be able to drop dtrace_lock() + * across the copyout(), below. + */ + size = sizeof (dtrace_eprobedesc_t) + + (epdesc.dtepd_nrecs * sizeof (dtrace_recdesc_t)); + + buf = kmem_alloc(size, KM_SLEEP); + dest = (uintptr_t)buf; + + bcopy(&epdesc, (void *)dest, sizeof (epdesc)); + dest += offsetof(dtrace_eprobedesc_t, dtepd_rec[0]); + + for (act = ecb->dte_action; act != NULL; act = act->dta_next) { + if (DTRACEACT_ISAGG(act->dta_kind) || act->dta_intuple) + continue; + + if (nrecs-- == 0) + break; + + bcopy(&act->dta_rec, (void *)dest, + sizeof (dtrace_recdesc_t)); + dest += sizeof (dtrace_recdesc_t); + } + + mutex_exit(&dtrace_lock); + + if (copyout(buf, (void *) *pepdesc, dest - (uintptr_t)buf) != 0) { + kmem_free(buf, size); + return (EFAULT); + } + + kmem_free(buf, size); + return (0); + } + case DTRACEIOC_FORMAT: { + dtrace_fmtdesc_t *fmt = (dtrace_fmtdesc_t *) addr; + char *str; + int len; + + DTRACE_IOCTL_PRINTF("%s(%d): DTRACEIOC_FORMAT\n",__func__,__LINE__); + + mutex_enter(&dtrace_lock); + + if (fmt->dtfd_format == 0 || + fmt->dtfd_format > state->dts_nformats) { + mutex_exit(&dtrace_lock); + return (EINVAL); + } + + /* + * Format strings are allocated contiguously and they are + * never freed; if a format index is less than the number + * of formats, we can assert that the format map is non-NULL + * and that the format for the specified index is non-NULL. + */ + ASSERT(state->dts_formats != NULL); + str = state->dts_formats[fmt->dtfd_format - 1]; + ASSERT(str != NULL); + + len = strlen(str) + 1; + + if (len > fmt->dtfd_length) { + fmt->dtfd_length = len; + } else { + if (copyout(str, fmt->dtfd_string, len) != 0) { + mutex_exit(&dtrace_lock); + return (EINVAL); + } + } + + mutex_exit(&dtrace_lock); + return (0); + } + case DTRACEIOC_GO: { + int rval; + processorid_t *cpuid = (processorid_t *) addr; + + DTRACE_IOCTL_PRINTF("%s(%d): DTRACEIOC_GO\n",__func__,__LINE__); + + rval = dtrace_state_go(state, cpuid); + + return (rval); + } + case DTRACEIOC_PROBEARG: { + dtrace_argdesc_t *desc = (dtrace_argdesc_t *) addr; + dtrace_probe_t *probe; + dtrace_provider_t *prov; + + DTRACE_IOCTL_PRINTF("%s(%d): DTRACEIOC_PROBEARG\n",__func__,__LINE__); + + if (desc->dtargd_id == DTRACE_IDNONE) + return (EINVAL); + + if (desc->dtargd_ndx == DTRACE_ARGNONE) + return (EINVAL); + + mutex_enter(&dtrace_provider_lock); + mutex_enter(&mod_lock); + mutex_enter(&dtrace_lock); + + if (desc->dtargd_id > dtrace_nprobes) { + mutex_exit(&dtrace_lock); + mutex_exit(&mod_lock); + mutex_exit(&dtrace_provider_lock); + return (EINVAL); + } + + if ((probe = dtrace_probes[desc->dtargd_id - 1]) == NULL) { + mutex_exit(&dtrace_lock); + mutex_exit(&mod_lock); + mutex_exit(&dtrace_provider_lock); + return (EINVAL); + } + + mutex_exit(&dtrace_lock); + + prov = probe->dtpr_provider; + + if (prov->dtpv_pops.dtps_getargdesc == NULL) { + /* + * There isn't any typed information for this probe. + * Set the argument number to DTRACE_ARGNONE. + */ + desc->dtargd_ndx = DTRACE_ARGNONE; + } else { + desc->dtargd_native[0] = '\0'; + desc->dtargd_xlate[0] = '\0'; + desc->dtargd_mapping = desc->dtargd_ndx; + + prov->dtpv_pops.dtps_getargdesc(prov->dtpv_arg, + probe->dtpr_id, probe->dtpr_arg, desc); + } + + mutex_exit(&mod_lock); + mutex_exit(&dtrace_provider_lock); + + return (0); + } + case DTRACEIOC_PROBEMATCH: + case DTRACEIOC_PROBES: { + dtrace_probedesc_t *p_desc = (dtrace_probedesc_t *) addr; + dtrace_probe_t *probe = NULL; + dtrace_probekey_t pkey; + dtrace_id_t i; + int m = 0; + uint32_t priv = 0; + uid_t uid = 0; + zoneid_t zoneid = 0; + + DTRACE_IOCTL_PRINTF("%s(%d): %s\n",__func__,__LINE__, + cmd == DTRACEIOC_PROBEMATCH ? + "DTRACEIOC_PROBEMATCH":"DTRACEIOC_PROBES"); + + p_desc->dtpd_provider[DTRACE_PROVNAMELEN - 1] = '\0'; + p_desc->dtpd_mod[DTRACE_MODNAMELEN - 1] = '\0'; + p_desc->dtpd_func[DTRACE_FUNCNAMELEN - 1] = '\0'; + p_desc->dtpd_name[DTRACE_NAMELEN - 1] = '\0'; + + /* + * Before we attempt to match this probe, we want to give + * all providers the opportunity to provide it. + */ + if (p_desc->dtpd_id == DTRACE_IDNONE) { + mutex_enter(&dtrace_provider_lock); + dtrace_probe_provide(p_desc, NULL); + mutex_exit(&dtrace_provider_lock); + p_desc->dtpd_id++; + } + + if (cmd == DTRACEIOC_PROBEMATCH) { + dtrace_probekey(p_desc, &pkey); + pkey.dtpk_id = DTRACE_IDNONE; + } + + dtrace_cred2priv(td->td_ucred, &priv, &uid, &zoneid); + + mutex_enter(&dtrace_lock); + + if (cmd == DTRACEIOC_PROBEMATCH) { + for (i = p_desc->dtpd_id; i <= dtrace_nprobes; i++) { + if ((probe = dtrace_probes[i - 1]) != NULL && + (m = dtrace_match_probe(probe, &pkey, + priv, uid, zoneid)) != 0) + break; + } + + if (m < 0) { + mutex_exit(&dtrace_lock); + return (EINVAL); + } + + } else { + for (i = p_desc->dtpd_id; i <= dtrace_nprobes; i++) { + if ((probe = dtrace_probes[i - 1]) != NULL && + dtrace_match_priv(probe, priv, uid, zoneid)) + break; + } + } + + if (probe == NULL) { + mutex_exit(&dtrace_lock); + return (ESRCH); + } + + dtrace_probe_description(probe, p_desc); + mutex_exit(&dtrace_lock); + + return (0); + } + case DTRACEIOC_PROVIDER: { + dtrace_providerdesc_t *pvd = (dtrace_providerdesc_t *) addr; + dtrace_provider_t *pvp; + + DTRACE_IOCTL_PRINTF("%s(%d): DTRACEIOC_PROVIDER\n",__func__,__LINE__); + + pvd->dtvd_name[DTRACE_PROVNAMELEN - 1] = '\0'; + mutex_enter(&dtrace_provider_lock); + + for (pvp = dtrace_provider; pvp != NULL; pvp = pvp->dtpv_next) { + if (strcmp(pvp->dtpv_name, pvd->dtvd_name) == 0) + break; + } + + mutex_exit(&dtrace_provider_lock); + + if (pvp == NULL) + return (ESRCH); + + bcopy(&pvp->dtpv_priv, &pvd->dtvd_priv, sizeof (dtrace_ppriv_t)); + bcopy(&pvp->dtpv_attr, &pvd->dtvd_attr, sizeof (dtrace_pattr_t)); + + return (0); + } + case DTRACEIOC_REPLICATE: { + dtrace_repldesc_t *desc = (dtrace_repldesc_t *) addr; + dtrace_probedesc_t *match = &desc->dtrpd_match; + dtrace_probedesc_t *create = &desc->dtrpd_create; + int err; + + DTRACE_IOCTL_PRINTF("%s(%d): DTRACEIOC_REPLICATE\n",__func__,__LINE__); + + match->dtpd_provider[DTRACE_PROVNAMELEN - 1] = '\0'; + match->dtpd_mod[DTRACE_MODNAMELEN - 1] = '\0'; + match->dtpd_func[DTRACE_FUNCNAMELEN - 1] = '\0'; + match->dtpd_name[DTRACE_NAMELEN - 1] = '\0'; + + create->dtpd_provider[DTRACE_PROVNAMELEN - 1] = '\0'; + create->dtpd_mod[DTRACE_MODNAMELEN - 1] = '\0'; + create->dtpd_func[DTRACE_FUNCNAMELEN - 1] = '\0'; + create->dtpd_name[DTRACE_NAMELEN - 1] = '\0'; + + mutex_enter(&dtrace_lock); + err = dtrace_enabling_replicate(state, match, create); + mutex_exit(&dtrace_lock); + + return (err); + } + case DTRACEIOC_STATUS: { + dtrace_status_t *stat = (dtrace_status_t *) addr; + dtrace_dstate_t *dstate; + int i, j; + uint64_t nerrs; + + DTRACE_IOCTL_PRINTF("%s(%d): DTRACEIOC_STATUS\n",__func__,__LINE__); + + /* + * See the comment in dtrace_state_deadman() for the reason + * for setting dts_laststatus to INT64_MAX before setting + * it to the correct value. + */ + state->dts_laststatus = INT64_MAX; + dtrace_membar_producer(); + state->dts_laststatus = dtrace_gethrtime(); + + bzero(stat, sizeof (*stat)); + + mutex_enter(&dtrace_lock); + + if (state->dts_activity == DTRACE_ACTIVITY_INACTIVE) { + mutex_exit(&dtrace_lock); + return (ENOENT); + } + + if (state->dts_activity == DTRACE_ACTIVITY_DRAINING) + stat->dtst_exiting = 1; + + nerrs = state->dts_errors; + dstate = &state->dts_vstate.dtvs_dynvars; + + for (i = 0; i < NCPU; i++) { +#if !defined(sun) + if (pcpu_find(i) == NULL) + continue; +#endif + dtrace_dstate_percpu_t *dcpu = &dstate->dtds_percpu[i]; + + stat->dtst_dyndrops += dcpu->dtdsc_drops; + stat->dtst_dyndrops_dirty += dcpu->dtdsc_dirty_drops; + stat->dtst_dyndrops_rinsing += dcpu->dtdsc_rinsing_drops; + + if (state->dts_buffer[i].dtb_flags & DTRACEBUF_FULL) + stat->dtst_filled++; + + nerrs += state->dts_buffer[i].dtb_errors; + + for (j = 0; j < state->dts_nspeculations; j++) { + dtrace_speculation_t *spec; + dtrace_buffer_t *buf; + + spec = &state->dts_speculations[j]; + buf = &spec->dtsp_buffer[i]; + stat->dtst_specdrops += buf->dtb_xamot_drops; + } + } + + stat->dtst_specdrops_busy = state->dts_speculations_busy; + stat->dtst_specdrops_unavail = state->dts_speculations_unavail; + stat->dtst_stkstroverflows = state->dts_stkstroverflows; + stat->dtst_dblerrors = state->dts_dblerrors; + stat->dtst_killed = + (state->dts_activity == DTRACE_ACTIVITY_KILLED); + stat->dtst_errors = nerrs; + + mutex_exit(&dtrace_lock); + + return (0); + } + case DTRACEIOC_STOP: { + int rval; + processorid_t *cpuid = (processorid_t *) addr; + + DTRACE_IOCTL_PRINTF("%s(%d): DTRACEIOC_STOP\n",__func__,__LINE__); + + mutex_enter(&dtrace_lock); + rval = dtrace_state_stop(state, cpuid); + mutex_exit(&dtrace_lock); + + return (rval); + } + default: + error = ENOTTY; + } + return (error); +} diff --git a/sys/cddl/dev/dtrace/dtrace_load.c b/sys/cddl/dev/dtrace/dtrace_load.c new file mode 100644 index 0000000..368d53e --- /dev/null +++ b/sys/cddl/dev/dtrace/dtrace_load.c @@ -0,0 +1,164 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + * + * $FreeBSD$ + * + */ + +static void +dtrace_ap_start(void *dummy) +{ + int i; + + mutex_enter(&cpu_lock); + + /* Setup the rest of the CPUs. */ + for (i = 1; i <= mp_maxid; i++) { + if (pcpu_find(i) == NULL) + continue; + + (void) dtrace_cpu_setup(CPU_CONFIG, i); + } + + mutex_exit(&cpu_lock); +} + +SYSINIT(dtrace_ap_start, SI_SUB_SMP, SI_ORDER_ANY, dtrace_ap_start, NULL); + +static void +dtrace_load(void *dummy) +{ + dtrace_provider_id_t id; + + /* Hook into the trap handler. */ + dtrace_trap_func = dtrace_trap; + + /* Hang our hook for thread switches. */ + dtrace_vtime_switch_func = dtrace_vtime_switch; + + /* Hang our hook for exceptions. */ + dtrace_invop_init(); + + /* + * XXX This is a short term hack to avoid having to comment + * out lots and lots of lock/unlock calls. + */ + mutex_init(&mod_lock,"XXX mod_lock hack", MUTEX_DEFAULT, NULL); + + /* + * Initialise the mutexes without 'witness' because the dtrace + * code is mostly written to wait for memory. To have the + * witness code change a malloc() from M_WAITOK to M_NOWAIT + * because a lock is held would surely create a panic in a + * low memory situation. And that low memory situation might be + * the very problem we are trying to trace. + */ + mutex_init(&dtrace_lock,"dtrace probe state", MUTEX_DEFAULT, NULL); + mutex_init(&dtrace_provider_lock,"dtrace provider state", MUTEX_DEFAULT, NULL); + mutex_init(&dtrace_meta_lock,"dtrace meta-provider state", MUTEX_DEFAULT, NULL); + mutex_init(&dtrace_errlock,"dtrace error lock", MUTEX_DEFAULT, NULL); + + mutex_enter(&dtrace_provider_lock); + mutex_enter(&dtrace_lock); + mutex_enter(&cpu_lock); + + ASSERT(MUTEX_HELD(&cpu_lock)); + + dtrace_arena = new_unrhdr(1, INT_MAX, &dtrace_unr_mtx); + + dtrace_state_cache = kmem_cache_create("dtrace_state_cache", + sizeof (dtrace_dstate_percpu_t) * NCPU, DTRACE_STATE_ALIGN, + NULL, NULL, NULL, NULL, NULL, 0); + + ASSERT(MUTEX_HELD(&cpu_lock)); + dtrace_bymod = dtrace_hash_create(offsetof(dtrace_probe_t, dtpr_mod), + offsetof(dtrace_probe_t, dtpr_nextmod), + offsetof(dtrace_probe_t, dtpr_prevmod)); + + dtrace_byfunc = dtrace_hash_create(offsetof(dtrace_probe_t, dtpr_func), + offsetof(dtrace_probe_t, dtpr_nextfunc), + offsetof(dtrace_probe_t, dtpr_prevfunc)); + + dtrace_byname = dtrace_hash_create(offsetof(dtrace_probe_t, dtpr_name), + offsetof(dtrace_probe_t, dtpr_nextname), + offsetof(dtrace_probe_t, dtpr_prevname)); + + if (dtrace_retain_max < 1) { + cmn_err(CE_WARN, "illegal value (%lu) for dtrace_retain_max; " + "setting to 1", dtrace_retain_max); + dtrace_retain_max = 1; + } + + /* + * Now discover our toxic ranges. + */ + dtrace_toxic_ranges(dtrace_toxrange_add); + + /* + * Before we register ourselves as a provider to our own framework, + * we would like to assert that dtrace_provider is NULL -- but that's + * not true if we were loaded as a dependency of a DTrace provider. + * Once we've registered, we can assert that dtrace_provider is our + * pseudo provider. + */ + (void) dtrace_register("dtrace", &dtrace_provider_attr, + DTRACE_PRIV_NONE, 0, &dtrace_provider_ops, NULL, &id); + + ASSERT(dtrace_provider != NULL); + ASSERT((dtrace_provider_id_t)dtrace_provider == id); + + dtrace_probeid_begin = dtrace_probe_create((dtrace_provider_id_t) + dtrace_provider, NULL, NULL, "BEGIN", 0, NULL); + dtrace_probeid_end = dtrace_probe_create((dtrace_provider_id_t) + dtrace_provider, NULL, NULL, "END", 0, NULL); + dtrace_probeid_error = dtrace_probe_create((dtrace_provider_id_t) + dtrace_provider, NULL, NULL, "ERROR", 1, NULL); + + mutex_exit(&cpu_lock); + + /* + * If DTrace helper tracing is enabled, we need to allocate the + * trace buffer and initialize the values. + */ + if (dtrace_helptrace_enabled) { + ASSERT(dtrace_helptrace_buffer == NULL); + dtrace_helptrace_buffer = + kmem_zalloc(dtrace_helptrace_bufsize, KM_SLEEP); + dtrace_helptrace_next = 0; + } + + mutex_exit(&dtrace_lock); + mutex_exit(&dtrace_provider_lock); + + mutex_enter(&cpu_lock); + + /* Setup the boot CPU */ + (void) dtrace_cpu_setup(CPU_CONFIG, 0); + + mutex_exit(&cpu_lock); + + /* Enable device cloning. */ + clone_setup(&dtrace_clones); + + /* Setup device cloning events. */ + eh_tag = EVENTHANDLER_REGISTER(dev_clone, dtrace_clone, 0, 1000); + + return; +} diff --git a/sys/cddl/dev/dtrace/dtrace_modevent.c b/sys/cddl/dev/dtrace/dtrace_modevent.c new file mode 100644 index 0000000..8d31853 --- /dev/null +++ b/sys/cddl/dev/dtrace/dtrace_modevent.c @@ -0,0 +1,47 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + * + * $FreeBSD$ + * + */ + +/* ARGSUSED */ +static int +dtrace_modevent(module_t mod __unused, int type, void *data __unused) +{ + int error = 0; + + switch (type) { + case MOD_LOAD: + break; + + case MOD_UNLOAD: + break; + + case MOD_SHUTDOWN: + break; + + default: + error = EOPNOTSUPP; + break; + + } + return (error); +} diff --git a/sys/cddl/dev/dtrace/dtrace_sysctl.c b/sys/cddl/dev/dtrace/dtrace_sysctl.c new file mode 100644 index 0000000..00ed709 --- /dev/null +++ b/sys/cddl/dev/dtrace/dtrace_sysctl.c @@ -0,0 +1,82 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + * + * $FreeBSD$ + * + */ + +int dtrace_debug = 0; +TUNABLE_INT("debug.dtrace.debug", &dtrace_debug); +SYSCTL_INT(_debug_dtrace, OID_AUTO, debug, CTLFLAG_RW, &dtrace_debug, 0, ""); + +/* Report registered DTrace providers. */ +static int +sysctl_dtrace_providers(SYSCTL_HANDLER_ARGS) +{ + char *p_name = NULL; + dtrace_provider_t + *prov = dtrace_provider; + int error = 0; + size_t len = 0; + + mutex_enter(&dtrace_provider_lock); + mutex_enter(&dtrace_lock); + + /* Compute the length of the space-separated provider name string. */ + while (prov != NULL) { + len += strlen(prov->dtpv_name) + 1; + prov = prov->dtpv_next; + } + + if ((p_name = kmem_alloc(len, KM_SLEEP)) == NULL) + error = ENOMEM; + else { + /* Start with an empty string. */ + *p_name = '\0'; + + /* Point to the first provider again. */ + prov = dtrace_provider; + + /* Loop through the providers, appending the names. */ + while (prov != NULL) { + if (prov != dtrace_provider) + (void) strlcat(p_name, " ", len); + + (void) strlcat(p_name, prov->dtpv_name, len); + + prov = prov->dtpv_next; + } + } + + mutex_exit(&dtrace_lock); + mutex_exit(&dtrace_provider_lock); + + if (p_name != NULL) { + error = sysctl_handle_string(oidp, p_name, len, req); + + kmem_free(p_name, 0); + } + + return (error); +} + +SYSCTL_PROC(_debug_dtrace, OID_AUTO, providers, CTLTYPE_STRING | CTLFLAG_RD, + 0, 0, sysctl_dtrace_providers, "A", ""); + diff --git a/sys/cddl/dev/dtrace/dtrace_test.c b/sys/cddl/dev/dtrace/dtrace_test.c new file mode 100644 index 0000000..d484fb2 --- /dev/null +++ b/sys/cddl/dev/dtrace/dtrace_test.c @@ -0,0 +1,73 @@ +/*- + * Copyright 2008 John Birrell <jb@FreeBSD.org> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + * + */ + +#include <sys/cdefs.h> +#include <sys/types.h> +#include <sys/param.h> +#include <sys/conf.h> +#include <sys/kernel.h> +#include <sys/module.h> +#include <sys/vnode.h> + +/* + * These are variables that the DTrace test suite references in the + * Solaris kernel. We define them here so that the tests function + * unaltered. + */ +int kmem_flags; + +typedef struct vnode vnode_t; +vnode_t dummy; +vnode_t *rootvp = &dummy; + +static int +dtrace_test_modevent(module_t mod, int type, void *data) +{ + int error = 0; + + switch (type) { + case MOD_LOAD: + break; + + case MOD_UNLOAD: + break; + + case MOD_SHUTDOWN: + break; + + default: + error = EOPNOTSUPP; + break; + + } + return (error); +} + +DEV_MODULE(dtrace_test, dtrace_test_modevent, NULL); +MODULE_VERSION(dtrace_test, 1); +MODULE_DEPEND(dtrace_test, dtraceall, 1, 1, 1); diff --git a/sys/cddl/dev/dtrace/dtrace_unload.c b/sys/cddl/dev/dtrace/dtrace_unload.c new file mode 100644 index 0000000..eb14543 --- /dev/null +++ b/sys/cddl/dev/dtrace/dtrace_unload.c @@ -0,0 +1,138 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + * + * $FreeBSD$ + * + */ + +static int +dtrace_unload() +{ + dtrace_state_t *state; + int error = 0; + + /* + * Check if there is still an event handler callback + * registered. + */ + if (eh_tag != 0) { + /* De-register the device cloning event handler. */ + EVENTHANDLER_DEREGISTER(dev_clone, eh_tag); + eh_tag = 0; + + /* Stop device cloning. */ + clone_cleanup(&dtrace_clones); + } + + mutex_enter(&dtrace_provider_lock); + mutex_enter(&dtrace_lock); + mutex_enter(&cpu_lock); + + ASSERT(dtrace_opens == 0); + + if (dtrace_helpers > 0) { + mutex_exit(&cpu_lock); + mutex_exit(&dtrace_lock); + mutex_exit(&dtrace_provider_lock); + return (EBUSY); + } + + if (dtrace_unregister((dtrace_provider_id_t)dtrace_provider) != 0) { + mutex_exit(&cpu_lock); + mutex_exit(&dtrace_lock); + mutex_exit(&dtrace_provider_lock); + return (EBUSY); + } + + dtrace_provider = NULL; + + if ((state = dtrace_anon_grab()) != NULL) { + /* + * If there were ECBs on this state, the provider should + * have not been allowed to detach; assert that there is + * none. + */ + ASSERT(state->dts_necbs == 0); + dtrace_state_destroy(state); + } + + bzero(&dtrace_anon, sizeof (dtrace_anon_t)); + + mutex_exit(&cpu_lock); + + if (dtrace_helptrace_enabled) { + kmem_free(dtrace_helptrace_buffer, 0); + dtrace_helptrace_buffer = NULL; + } + + if (dtrace_probes != NULL) { + kmem_free(dtrace_probes, 0); + dtrace_probes = NULL; + dtrace_nprobes = 0; + } + + dtrace_hash_destroy(dtrace_bymod); + dtrace_hash_destroy(dtrace_byfunc); + dtrace_hash_destroy(dtrace_byname); + dtrace_bymod = NULL; + dtrace_byfunc = NULL; + dtrace_byname = NULL; + + kmem_cache_destroy(dtrace_state_cache); + + delete_unrhdr(dtrace_arena); + + if (dtrace_toxrange != NULL) { + kmem_free(dtrace_toxrange, 0); + dtrace_toxrange = NULL; + dtrace_toxranges = 0; + dtrace_toxranges_max = 0; + } + + ASSERT(dtrace_vtime_references == 0); + ASSERT(dtrace_opens == 0); + ASSERT(dtrace_retained == NULL); + + mutex_exit(&dtrace_lock); + mutex_exit(&dtrace_provider_lock); + + mutex_destroy(&dtrace_meta_lock); + mutex_destroy(&dtrace_provider_lock); + mutex_destroy(&dtrace_lock); + mutex_destroy(&dtrace_errlock); + + /* XXX Hack */ + mutex_destroy(&mod_lock); + + /* Reset our hook for exceptions. */ + dtrace_invop_uninit(); + + /* + * Reset our hook for thread switches, but ensure that vtime isn't + * active first. + */ + dtrace_vtime_active = 0; + dtrace_vtime_switch_func = NULL; + + /* Unhook from the trap handler. */ + dtrace_trap_func = NULL; + + return (error); +} diff --git a/sys/cddl/dev/dtrace/dtrace_vtime.c b/sys/cddl/dev/dtrace/dtrace_vtime.c new file mode 100644 index 0000000..a3fa7f7 --- /dev/null +++ b/sys/cddl/dev/dtrace/dtrace_vtime.c @@ -0,0 +1,101 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + * + * $FreeBSD$ + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +void +dtrace_vtime_enable(void) +{ + dtrace_vtime_state_t state, nstate = 0; + + do { + state = dtrace_vtime_active; + + switch (state) { + case DTRACE_VTIME_INACTIVE: + nstate = DTRACE_VTIME_ACTIVE; + break; + + case DTRACE_VTIME_INACTIVE_TNF: + nstate = DTRACE_VTIME_ACTIVE_TNF; + break; + + case DTRACE_VTIME_ACTIVE: + case DTRACE_VTIME_ACTIVE_TNF: + panic("DTrace virtual time already enabled"); + /*NOTREACHED*/ + } + + } while (dtrace_cas32((uint32_t *)&dtrace_vtime_active, + state, nstate) != state); +} + +void +dtrace_vtime_disable(void) +{ + dtrace_vtime_state_t state, nstate = 0; + + do { + state = dtrace_vtime_active; + + switch (state) { + case DTRACE_VTIME_ACTIVE: + nstate = DTRACE_VTIME_INACTIVE; + break; + + case DTRACE_VTIME_ACTIVE_TNF: + nstate = DTRACE_VTIME_INACTIVE_TNF; + break; + + case DTRACE_VTIME_INACTIVE: + case DTRACE_VTIME_INACTIVE_TNF: + panic("DTrace virtual time already disabled"); + /*NOTREACHED*/ + } + + } while (dtrace_cas32((uint32_t *)&dtrace_vtime_active, + state, nstate) != state); +} + +void +dtrace_vtime_switch(kthread_t *next) +{ + dtrace_icookie_t cookie; + hrtime_t ts; + + cookie = dtrace_interrupt_disable(); + ts = dtrace_gethrtime(); + + if (curthread->t_dtrace_start != 0) { + curthread->t_dtrace_vtime += ts - curthread->t_dtrace_start; + curthread->t_dtrace_start = 0; + } + + if (next != NULL) + next->t_dtrace_start = ts; + + dtrace_interrupt_enable(cookie); +} diff --git a/sys/cddl/dev/dtrace/i386/dis_tables.c b/sys/cddl/dev/dtrace/i386/dis_tables.c new file mode 100644 index 0000000..5a5bc25 --- /dev/null +++ b/sys/cddl/dev/dtrace/i386/dis_tables.c @@ -0,0 +1,3193 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + * + * $FreeBSD$ + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* Copyright (c) 1988 AT&T */ +/* All Rights Reserved */ + + +#if defined(sun) +#pragma ident "@(#)dis_tables.c 1.11 06/03/02 SMI" +#endif + +#include "dis_tables.h" + +/* BEGIN CSTYLED */ + +/* + * Disassembly begins in dis_distable, which is equivalent to the One-byte + * Opcode Map in the Intel IA32 ISA Reference (page A-6 in my copy). The + * decoding loops then traverse out through the other tables as necessary to + * decode a given instruction. + * + * The behavior of this file can be controlled by one of the following flags: + * + * DIS_TEXT Include text for disassembly + * DIS_MEM Include memory-size calculations + * + * Either or both of these can be defined. + * + * This file is not, and will never be, cstyled. If anything, the tables should + * be taken out another tab stop or two so nothing overlaps. + */ + +/* + * These functions must be provided for the consumer to do disassembly. + */ +#ifdef DIS_TEXT +extern char *strncpy(char *, const char *, size_t); +extern size_t strlen(const char *); +extern int strcmp(const char *, const char *); +extern int strncmp(const char *, const char *, size_t); +extern size_t strlcat(char *, const char *, size_t); +#endif + + +#define TERM NULL /* used to indicate that the 'indirect' */ + /* field terminates - no pointer. */ + +/* Used to decode instructions. */ +typedef struct instable { + const struct instable *it_indirect; /* for decode op codes */ + uchar_t it_adrmode; +#ifdef DIS_TEXT + char it_name[NCPS]; + uint_t it_suffix:1; /* mneu + "w", "l", or "d" */ +#endif +#ifdef DIS_MEM + uint_t it_size:16; +#endif + uint_t it_invalid64:1; /* opcode invalid in amd64 */ + uint_t it_always64:1; /* 64 bit when in 64 bit mode */ + uint_t it_invalid32:1; /* invalid in IA32 */ + uint_t it_stackop:1; /* push/pop stack operation */ +} instable_t; + +/* + * Instruction formats. + */ +enum { + UNKNOWN, + MRw, + IMlw, + IMw, + IR, + OA, + AO, + MS, + SM, + Mv, + Mw, + M, /* register or memory */ + Mb, /* register or memory, always byte sized */ + MO, /* memory only (no registers) */ + PREF, + SWAPGS, + R, + RA, + SEG, + MR, + RM, + IA, + MA, + SD, + AD, + SA, + D, + INM, + SO, + BD, + I, + P, + V, + DSHIFT, /* for double shift that has an 8-bit immediate */ + U, + OVERRIDE, + NORM, /* instructions w/o ModR/M byte, no memory access */ + IMPLMEM, /* instructions w/o ModR/M byte, implicit mem access */ + O, /* for call */ + JTAB, /* jump table */ + IMUL, /* for 186 iimul instr */ + CBW, /* so data16 can be evaluated for cbw and variants */ + MvI, /* for 186 logicals */ + ENTER, /* for 186 enter instr */ + RMw, /* for 286 arpl instr */ + Ib, /* for push immediate byte */ + F, /* for 287 instructions */ + FF, /* for 287 instructions */ + FFC, /* for 287 instructions */ + DM, /* 16-bit data */ + AM, /* 16-bit addr */ + LSEG, /* for 3-bit seg reg encoding */ + MIb, /* for 386 logicals */ + SREG, /* for 386 special registers */ + PREFIX, /* a REP instruction prefix */ + LOCK, /* a LOCK instruction prefix */ + INT3, /* The int 3 instruction, which has a fake operand */ + INTx, /* The normal int instruction, with explicit int num */ + DSHIFTcl, /* for double shift that implicitly uses %cl */ + CWD, /* so data16 can be evaluated for cwd and variants */ + RET, /* single immediate 16-bit operand */ + MOVZ, /* for movs and movz, with different size operands */ + XADDB, /* for xaddb */ + MOVSXZ, /* AMD64 mov sign extend 32 to 64 bit instruction */ + +/* + * MMX/SIMD addressing modes. + */ + + MMO, /* Prefixable MMX/SIMD-Int mm/mem -> mm */ + MMOIMPL, /* Prefixable MMX/SIMD-Int mm -> mm (mem) */ + MMO3P, /* Prefixable MMX/SIMD-Int mm -> r32,imm8 */ + MMOM3, /* Prefixable MMX/SIMD-Int mm -> r32 */ + MMOS, /* Prefixable MMX/SIMD-Int mm -> mm/mem */ + MMOMS, /* Prefixable MMX/SIMD-Int mm -> mem */ + MMOPM, /* MMX/SIMD-Int mm/mem -> mm,imm8 */ + MMOPRM, /* Prefixable MMX/SIMD-Int r32/mem -> mm,imm8 */ + MMOSH, /* Prefixable MMX mm,imm8 */ + MM, /* MMX/SIMD-Int mm/mem -> mm */ + MMS, /* MMX/SIMD-Int mm -> mm/mem */ + MMSH, /* MMX mm,imm8 */ + XMMO, /* Prefixable SIMD xmm/mem -> xmm */ + XMMOS, /* Prefixable SIMD xmm -> xmm/mem */ + XMMOPM, /* Prefixable SIMD xmm/mem w/to xmm,imm8 */ + XMMOMX, /* Prefixable SIMD mm/mem -> xmm */ + XMMOX3, /* Prefixable SIMD xmm -> r32 */ + XMMOXMM, /* Prefixable SIMD xmm/mem -> mm */ + XMMOM, /* Prefixable SIMD xmm -> mem */ + XMMOMS, /* Prefixable SIMD mem -> xmm */ + XMM, /* SIMD xmm/mem -> xmm */ + XMMXIMPL, /* SIMD xmm -> xmm (mem) */ + XMM3P, /* SIMD xmm -> r32,imm8 */ + XMMP, /* SIMD xmm/mem w/to xmm,imm8 */ + XMMPRM, /* SIMD r32/mem -> xmm,imm8 */ + XMMS, /* SIMD xmm -> xmm/mem */ + XMMM, /* SIMD mem -> xmm */ + XMMMS, /* SIMD xmm -> mem */ + XMM3MX, /* SIMD r32/mem -> xmm */ + XMM3MXS, /* SIMD xmm -> r32/mem */ + XMMSH, /* SIMD xmm,imm8 */ + XMMXM3, /* SIMD xmm/mem -> r32 */ + XMMX3, /* SIMD xmm -> r32 */ + XMMXMM, /* SIMD xmm/mem -> mm */ + XMMMX, /* SIMD mm -> xmm */ + XMMXM, /* SIMD xmm -> mm */ + XMMFENCE, /* SIMD lfence or mfence */ + XMMSFNC /* SIMD sfence (none or mem) */ +}; + +#define FILL 0x90 /* Fill byte used for alignment (nop) */ + +/* +** Register numbers for the i386 +*/ +#define EAX_REGNO 0 +#define ECX_REGNO 1 +#define EDX_REGNO 2 +#define EBX_REGNO 3 +#define ESP_REGNO 4 +#define EBP_REGNO 5 +#define ESI_REGNO 6 +#define EDI_REGNO 7 + +/* + * modes for immediate values + */ +#define MODE_NONE 0 +#define MODE_IPREL 1 /* signed IP relative value */ +#define MODE_SIGNED 2 /* sign extended immediate */ +#define MODE_IMPLIED 3 /* constant value implied from opcode */ +#define MODE_OFFSET 4 /* offset part of an address */ + +/* + * The letters used in these macros are: + * IND - indirect to another to another table + * "T" - means to Terminate indirections (this is the final opcode) + * "S" - means "operand length suffix required" + * "NS" - means "no suffix" which is the operand length suffix of the opcode + * "Z" - means instruction size arg required + * "u" - means the opcode is invalid in IA32 but valid in amd64 + * "x" - means the opcode is invalid in amd64, but not IA32 + * "y" - means the operand size is always 64 bits in 64 bit mode + * "p" - means push/pop stack operation + */ + +#if defined(DIS_TEXT) && defined(DIS_MEM) +#define IND(table) {table, 0, "", 0, 0, 0, 0, 0, 0} +#define INDx(table) {table, 0, "", 0, 0, 1, 0, 0, 0} +#define TNS(name, amode) {TERM, amode, name, 0, 0, 0, 0, 0, 0} +#define TNSu(name, amode) {TERM, amode, name, 0, 0, 0, 0, 1, 0} +#define TNSx(name, amode) {TERM, amode, name, 0, 0, 1, 0, 0, 0} +#define TNSy(name, amode) {TERM, amode, name, 0, 0, 0, 1, 0, 0} +#define TNSyp(name, amode) {TERM, amode, name, 0, 0, 0, 1, 0, 1} +#define TNSZ(name, amode, sz) {TERM, amode, name, 0, sz, 0, 0, 0, 0} +#define TNSZy(name, amode, sz) {TERM, amode, name, 0, sz, 0, 1, 0, 0} +#define TS(name, amode) {TERM, amode, name, 1, 0, 0, 0, 0, 0} +#define TSx(name, amode) {TERM, amode, name, 1, 0, 1, 0, 0, 0} +#define TSy(name, amode) {TERM, amode, name, 1, 0, 0, 1, 0, 0} +#define TSp(name, amode) {TERM, amode, name, 1, 0, 0, 0, 0, 1} +#define TSZ(name, amode, sz) {TERM, amode, name, 1, sz, 0, 0, 0, 0} +#define TSZx(name, amode, sz) {TERM, amode, name, 1, sz, 1, 0, 0, 0} +#define TSZy(name, amode, sz) {TERM, amode, name, 1, sz, 0, 1, 0, 0} +#define INVALID {TERM, UNKNOWN, "", 0, 0, 0, 0, 0} +#elif defined(DIS_TEXT) +#define IND(table) {table, 0, "", 0, 0, 0, 0, 0} +#define INDx(table) {table, 0, "", 0, 1, 0, 0, 0} +#define TNS(name, amode) {TERM, amode, name, 0, 0, 0, 0, 0} +#define TNSu(name, amode) {TERM, amode, name, 0, 0, 0, 1, 0} +#define TNSx(name, amode) {TERM, amode, name, 0, 1, 0, 0, 0} +#define TNSy(name, amode) {TERM, amode, name, 0, 0, 1, 0, 0} +#define TNSyp(name, amode) {TERM, amode, name, 0, 0, 1, 0, 1} +#define TNSZ(name, amode, sz) {TERM, amode, name, 0, 0, 0, 0, 0} +#define TNSZy(name, amode, sz) {TERM, amode, name, 0, 0, 1, 0, 0} +#define TS(name, amode) {TERM, amode, name, 1, 0, 0, 0, 0} +#define TSx(name, amode) {TERM, amode, name, 1, 1, 0, 0, 0} +#define TSy(name, amode) {TERM, amode, name, 1, 0, 1, 0, 0} +#define TSp(name, amode) {TERM, amode, name, 1, 0, 0, 0, 1} +#define TSZ(name, amode, sz) {TERM, amode, name, 1, 0, 0, 0, 0} +#define TSZx(name, amode, sz) {TERM, amode, name, 1, 1, 0, 0, 0} +#define TSZy(name, amode, sz) {TERM, amode, name, 1, 0, 1, 0, 0} +#define INVALID {TERM, UNKNOWN, "", 0, 0, 0, 0, 0} +#elif defined(DIS_MEM) +#define IND(table) {table, 0, 0, 0, 0, 0, 0} +#define INDx(table) {table, 0, 0, 1, 0, 0, 0} +#define TNS(name, amode) {TERM, amode, 0, 0, 0, 0, 0} +#define TNSu(name, amode) {TERM, amode, 0, 0, 0, 1, 0} +#define TNSy(name, amode) {TERM, amode, 0, 0, 1, 0, 0} +#define TNSyp(name, amode) {TERM, amode, 0, 0, 1, 0, 1} +#define TNSx(name, amode) {TERM, amode, 0, 1, 0, 0, 0} +#define TNSZ(name, amode, sz) {TERM, amode, sz, 0, 0, 0, 0} +#define TNSZy(name, amode, sz) {TERM, amode, sz, 0, 1, 0, 0} +#define TS(name, amode) {TERM, amode, 0, 0, 0, 0, 0} +#define TSx(name, amode) {TERM, amode, 0, 1, 0, 0, 0} +#define TSy(name, amode) {TERM, amode, 0, 0, 1, 0, 0} +#define TSp(name, amode) {TERM, amode, 0, 0, 0, 0, 1} +#define TSZ(name, amode, sz) {TERM, amode, sz, 0, 0, 0, 0} +#define TSZx(name, amode, sz) {TERM, amode, sz, 1, 0, 0, 0} +#define TSZy(name, amode, sz) {TERM, amode, sz, 0, 1, 0, 0} +#define INVALID {TERM, UNKNOWN, 0, 0, 0, 0, 0} +#else +#define IND(table) {table[0], 0, 0, 0, 0, 0} +#define INDx(table) {table[0], 0, 1, 0, 0, 0} +#define TNS(name, amode) {TERM, amode, 0, 0, 0, 0} +#define TNSu(name, amode) {TERM, amode, 0, 0, 1, 0} +#define TNSy(name, amode) {TERM, amode, 0, 1, 0, 0} +#define TNSyp(name, amode) {TERM, amode, 0, 1, 0, 1} +#define TNSx(name, amode) {TERM, amode, 1, 0, 0, 0} +#define TNSZ(name, amode, sz) {TERM, amode, 0, 0, 0, 0} +#define TNSZy(name, amode, sz) {TERM, amode, 0, 1, 0, 0} +#define TS(name, amode) {TERM, amode, 0, 0, 0, 0} +#define TSx(name, amode) {TERM, amode, 1, 0, 0, 0} +#define TSy(name, amode) {TERM, amode, 0, 1, 0, 0} +#define TSp(name, amode) {TERM, amode, 0, 0, 0, 1} +#define TSZ(name, amode, sz) {TERM, amode, 0, 0, 0, 0} +#define TSZx(name, amode, sz) {TERM, amode, 1, 0, 0, 0} +#define TSZy(name, amode, sz) {TERM, amode, 0, 1, 0, 0} +#define INVALID {TERM, UNKNOWN, 0, 0, 0, 0} +#endif + +#ifdef DIS_TEXT +/* + * this decodes the r_m field for mode's 0, 1, 2 in 16 bit mode + */ +const char *const dis_addr16[3][8] = { +"(%bx,%si)", "(%bx,%di)", "(%bp,%si)", "(%bp,%di)", "(%si)", "(%di)", "", + "(%bx)", +"(%bx,%si)", "(%bx,%di)", "(%bp,%si)", "(%bp,%di)", "(%si)", "(%di", "(%bp)", + "(%bx)", +"(%bx,%si)", "(%bx,%di)", "(%bp,%si)", "(%bp,%di)", "(%si)", "(%di)", "(%bp)", + "(%bx)", +}; + + +/* + * This decodes 32 bit addressing mode r_m field for modes 0, 1, 2 + */ +const char *const dis_addr32_mode0[16] = { + "(%eax)", "(%ecx)", "(%edx)", "(%ebx)", "", "", "(%esi)", "(%edi)", + "(%r8d)", "(%r9d)", "(%r10d)", "(%r11d)", "", "", "(%r14d)", "(%r15d)" +}; + +const char *const dis_addr32_mode12[16] = { + "(%eax)", "(%ecx)", "(%edx)", "(%ebx)", "", "(%ebp)", "(%esi)", "(%edi)", + "(%r8d)", "(%r9d)", "(%r10d)", "(%r11d)", "", "(%r13d)", "(%r14d)", "(%r15d)" +}; + +/* + * This decodes 64 bit addressing mode r_m field for modes 0, 1, 2 + */ +const char *const dis_addr64_mode0[16] = { + "(%rax)", "(%rcx)", "(%rdx)", "(%rbx)", "", "(%rip)", "(%rsi)", "(%rdi)", + "(%r8)", "(%r9)", "(%r10)", "(%r11)", "(%r12)", "(%rip)", "(%r14)", "(%r15)" +}; +const char *const dis_addr64_mode12[16] = { + "(%rax)", "(%rcx)", "(%rdx)", "(%rbx)", "", "(%rbp)", "(%rsi)", "(%rdi)", + "(%r8)", "(%r9)", "(%r10)", "(%r11)", "(%r12)", "(%r13)", "(%r14)", "(%r15)" +}; + +/* + * decode for scale from SIB byte + */ +const char *const dis_scale_factor[4] = { ")", ",2)", ",4)", ",8)" }; + +/* + * register decoding for normal references to registers (ie. not addressing) + */ +const char *const dis_REG8[16] = { + "%al", "%cl", "%dl", "%bl", "%ah", "%ch", "%dh", "%bh", + "%r8b", "%r9b", "%r10b", "%r11b", "%r12b", "%r13b", "%r14b", "%r15b" +}; + +const char *const dis_REG8_REX[16] = { + "%al", "%cl", "%dl", "%bl", "%spl", "%bpl", "%sil", "%dil", + "%r8b", "%r9b", "%r10b", "%r11b", "%r12b", "%r13b", "%r14b", "%r15b" +}; + +const char *const dis_REG16[16] = { + "%ax", "%cx", "%dx", "%bx", "%sp", "%bp", "%si", "%di", + "%r8w", "%r9w", "%r10w", "%r11w", "%r12w", "%r13w", "%r14w", "%r15w" +}; + +const char *const dis_REG32[16] = { + "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi", + "%r8d", "%r9d", "%r10d", "%r11d", "%r12d", "%r13d", "%r14d", "%r15d" +}; + +const char *const dis_REG64[16] = { + "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi", + "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15" +}; + +const char *const dis_DEBUGREG[16] = { + "%db0", "%db1", "%db2", "%db3", "%db4", "%db5", "%db6", "%db7", + "%db8", "%db9", "%db10", "%db11", "%db12", "%db13", "%db14", "%db15" +}; + +const char *const dis_CONTROLREG[16] = { + "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5?", "%cr6?", "%cr7?", + "%cr8", "%cr9?", "%cr10?", "%cr11?", "%cr12?", "%cr13?", "%cr14?", "%cr15?" +}; + +const char *const dis_TESTREG[16] = { + "%tr0?", "%tr1?", "%tr2?", "%tr3", "%tr4", "%tr5", "%tr6", "%tr7", + "%tr0?", "%tr1?", "%tr2?", "%tr3", "%tr4", "%tr5", "%tr6", "%tr7" +}; + +const char *const dis_MMREG[16] = { + "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7", + "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7" +}; + +const char *const dis_XMMREG[16] = { + "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7", + "%xmm8", "%xmm9", "%xmm10", "%xmm11", "%xmm12", "%xmm13", "%xmm14", "%xmm15" +}; + +const char *const dis_SEGREG[16] = { + "%es", "%cs", "%ss", "%ds", "%fs", "%gs", "<reserved>", "<reserved>", + "%es", "%cs", "%ss", "%ds", "%fs", "%gs", "<reserved>", "<reserved>" +}; + +/* + * SIMD predicate suffixes + */ +const char *const dis_PREDSUFFIX[8] = { + "eq", "lt", "le", "unord", "neq", "nlt", "nle", "ord" +}; + + + +#endif /* DIS_TEXT */ + + + + +/* + * "decode table" for 64 bit mode MOVSXD instruction (opcode 0x63) + */ +const instable_t dis_opMOVSLD = TNS("movslq",MOVSXZ); + +/* + * "decode table" for pause and clflush instructions + */ +const instable_t dis_opPause = TNS("pause", NORM); + +/* + * Decode table for 0x0F00 opcodes + */ +const instable_t dis_op0F00[8] = { + +/* [0] */ TNS("sldt",M), TNS("str",M), TNSy("lldt",M), TNSy("ltr",M), +/* [4] */ TNSZ("verr",M,2), TNSZ("verw",M,2), INVALID, INVALID, +}; + + +/* + * Decode table for 0x0F01 opcodes + */ +const instable_t dis_op0F01[8] = { + +/* [0] */ TNSZ("sgdt",MO,6), TNSZ("sidt",MO,6), TNSZ("lgdt",MO,6), TNSZ("lidt",MO,6), +/* [4] */ TNSZ("smsw",M,2), INVALID, TNSZ("lmsw",M,2), TNS("invlpg",SWAPGS), +}; + +/* + * Decode table for 0x0F18 opcodes -- SIMD prefetch + */ +const instable_t dis_op0F18[8] = { + +/* [0] */ TNS("prefetchnta",PREF),TNS("prefetcht0",PREF), TNS("prefetcht1",PREF), TNS("prefetcht2",PREF), +/* [4] */ INVALID, INVALID, INVALID, INVALID, +}; + +/* + * Decode table for 0x0FAE opcodes -- SIMD state save/restore + */ +const instable_t dis_op0FAE[8] = { +/* [0] */ TNSZ("fxsave",M,512), TNSZ("fxrstor",M,512), TNS("ldmxcsr",M), TNS("stmxcsr",M), +/* [4] */ INVALID, TNS("lfence",XMMFENCE), TNS("mfence",XMMFENCE), TNS("sfence",XMMSFNC), +}; + +/* + * Decode table for 0x0FBA opcodes + */ + +const instable_t dis_op0FBA[8] = { + +/* [0] */ INVALID, INVALID, INVALID, INVALID, +/* [4] */ TS("bt",MIb), TS("bts",MIb), TS("btr",MIb), TS("btc",MIb), +}; + +/* + * Decode table for 0x0FC7 opcode + */ + +const instable_t dis_op0FC7[8] = { + +/* [0] */ INVALID, TNS("cmpxchg8b",M), INVALID, INVALID, +/* [4] */ INVALID, INVALID, INVALID, INVALID, +}; + + +/* + * Decode table for 0x0FC8 opcode -- 486 bswap instruction + * + *bit pattern: 0000 1111 1100 1reg + */ +const instable_t dis_op0FC8[4] = { +/* [0] */ TNS("bswap",R), INVALID, INVALID, INVALID, +}; + +/* + * Decode table for 0x0F71, 0x0F72, and 0x0F73 opcodes -- MMX instructions + */ +const instable_t dis_op0F7123[4][8] = { +{ +/* [70].0 */ INVALID, INVALID, INVALID, INVALID, +/* .4 */ INVALID, INVALID, INVALID, INVALID, +}, { +/* [71].0 */ INVALID, INVALID, TNS("psrlw",MMOSH), INVALID, +/* .4 */ TNS("psraw",MMOSH), INVALID, TNS("psllw",MMOSH), INVALID, +}, { +/* [72].0 */ INVALID, INVALID, TNS("psrld",MMOSH), INVALID, +/* .4 */ TNS("psrad",MMOSH), INVALID, TNS("pslld",MMOSH), INVALID, +}, { +/* [73].0 */ INVALID, INVALID, TNS("psrlq",MMOSH), TNS("INVALID",MMOSH), +/* .4 */ INVALID, INVALID, TNS("psllq",MMOSH), TNS("INVALID",MMOSH), +} }; + +/* + * Decode table for SIMD extensions to above 0x0F71-0x0F73 opcodes. + */ +const instable_t dis_opSIMD7123[32] = { +/* [70].0 */ INVALID, INVALID, INVALID, INVALID, +/* .4 */ INVALID, INVALID, INVALID, INVALID, + +/* [71].0 */ INVALID, INVALID, TNS("psrlw",XMMSH), INVALID, +/* .4 */ TNS("psraw",XMMSH), INVALID, TNS("psllw",XMMSH), INVALID, + +/* [72].0 */ INVALID, INVALID, TNS("psrld",XMMSH), INVALID, +/* .4 */ TNS("psrad",XMMSH), INVALID, TNS("pslld",XMMSH), INVALID, + +/* [73].0 */ INVALID, INVALID, TNS("psrlq",XMMSH), TNS("psrldq",XMMSH), +/* .4 */ INVALID, INVALID, TNS("psllq",XMMSH), TNS("pslldq",XMMSH), +}; + +/* + * SIMD instructions have been wedged into the existing IA32 instruction + * set through the use of prefixes. That is, while 0xf0 0x58 may be + * addps, 0xf3 0xf0 0x58 (literally, repz addps) is a completely different + * instruction - addss. At present, three prefixes have been coopted in + * this manner - address size (0x66), repnz (0xf2) and repz (0xf3). The + * following tables are used to provide the prefixed instruction names. + * The arrays are sparse, but they're fast. + */ + +/* + * Decode table for SIMD instructions with the address size (0x66) prefix. + */ +const instable_t dis_opSIMDdata16[256] = { +/* [00] */ INVALID, INVALID, INVALID, INVALID, +/* [04] */ INVALID, INVALID, INVALID, INVALID, +/* [08] */ INVALID, INVALID, INVALID, INVALID, +/* [0C] */ INVALID, INVALID, INVALID, INVALID, + +/* [10] */ TNSZ("movupd",XMM,16), TNSZ("movupd",XMMS,16), TNSZ("movlpd",XMMM,8), TNSZ("movlpd",XMMMS,8), +/* [14] */ TNSZ("unpcklpd",XMM,16),TNSZ("unpckhpd",XMM,16),TNSZ("movhpd",XMMM,8), TNSZ("movhpd",XMMMS,8), +/* [18] */ INVALID, INVALID, INVALID, INVALID, +/* [1C] */ INVALID, INVALID, INVALID, INVALID, + +/* [20] */ INVALID, INVALID, INVALID, INVALID, +/* [24] */ INVALID, INVALID, INVALID, INVALID, +/* [28] */ TNSZ("movapd",XMM,16), TNSZ("movapd",XMMS,16), TNSZ("cvtpi2pd",XMMOMX,8),TNSZ("movntpd",XMMOMS,16), +/* [2C] */ TNSZ("cvttpd2pi",XMMXMM,16),TNSZ("cvtpd2pi",XMMXMM,16),TNSZ("ucomisd",XMM,8),TNSZ("comisd",XMM,8), + +/* [30] */ INVALID, INVALID, INVALID, INVALID, +/* [34] */ INVALID, INVALID, INVALID, INVALID, +/* [38] */ INVALID, INVALID, INVALID, INVALID, +/* [3C] */ INVALID, INVALID, INVALID, INVALID, + +/* [40] */ INVALID, INVALID, INVALID, INVALID, +/* [44] */ INVALID, INVALID, INVALID, INVALID, +/* [48] */ INVALID, INVALID, INVALID, INVALID, +/* [4C] */ INVALID, INVALID, INVALID, INVALID, + +/* [50] */ TNS("movmskpd",XMMOX3), TNSZ("sqrtpd",XMM,16), INVALID, INVALID, +/* [54] */ TNSZ("andpd",XMM,16), TNSZ("andnpd",XMM,16), TNSZ("orpd",XMM,16), TNSZ("xorpd",XMM,16), +/* [58] */ TNSZ("addpd",XMM,16), TNSZ("mulpd",XMM,16), TNSZ("cvtpd2ps",XMM,16),TNSZ("cvtps2dq",XMM,16), +/* [5C] */ TNSZ("subpd",XMM,16), TNSZ("minpd",XMM,16), TNSZ("divpd",XMM,16), TNSZ("maxpd",XMM,16), + +/* [60] */ TNSZ("punpcklbw",XMM,16),TNSZ("punpcklwd",XMM,16),TNSZ("punpckldq",XMM,16),TNSZ("packsswb",XMM,16), +/* [64] */ TNSZ("pcmpgtb",XMM,16), TNSZ("pcmpgtw",XMM,16), TNSZ("pcmpgtd",XMM,16), TNSZ("packuswb",XMM,16), +/* [68] */ TNSZ("punpckhbw",XMM,16),TNSZ("punpckhwd",XMM,16),TNSZ("punpckhdq",XMM,16),TNSZ("packssdw",XMM,16), +/* [6C] */ TNSZ("punpcklqdq",XMM,16),TNSZ("punpckhqdq",XMM,16),TNSZ("movd",XMM3MX,4),TNSZ("movdqa",XMM,16), + +/* [70] */ TNSZ("pshufd",XMMP,16), INVALID, INVALID, INVALID, +/* [74] */ TNSZ("pcmpeqb",XMM,16), TNSZ("pcmpeqw",XMM,16), TNSZ("pcmpeqd",XMM,16), INVALID, +/* [78] */ INVALID, INVALID, INVALID, INVALID, +/* [7C] */ INVALID, INVALID, TNSZ("movd",XMM3MXS,4), TNSZ("movdqa",XMMS,16), + +/* [80] */ INVALID, INVALID, INVALID, INVALID, +/* [84] */ INVALID, INVALID, INVALID, INVALID, +/* [88] */ INVALID, INVALID, INVALID, INVALID, +/* [8C] */ INVALID, INVALID, INVALID, INVALID, + +/* [90] */ INVALID, INVALID, INVALID, INVALID, +/* [94] */ INVALID, INVALID, INVALID, INVALID, +/* [98] */ INVALID, INVALID, INVALID, INVALID, +/* [9C] */ INVALID, INVALID, INVALID, INVALID, + +/* [A0] */ INVALID, INVALID, INVALID, INVALID, +/* [A4] */ INVALID, INVALID, INVALID, INVALID, +/* [A8] */ INVALID, INVALID, INVALID, INVALID, +/* [AC] */ INVALID, INVALID, INVALID, INVALID, + +/* [B0] */ INVALID, INVALID, INVALID, INVALID, +/* [B4] */ INVALID, INVALID, INVALID, INVALID, +/* [B8] */ INVALID, INVALID, INVALID, INVALID, +/* [BC] */ INVALID, INVALID, INVALID, INVALID, + +/* [C0] */ INVALID, INVALID, TNSZ("cmppd",XMMP,16), INVALID, +/* [C4] */ TNSZ("pinsrw",XMMPRM,2),TNS("pextrw",XMM3P), TNSZ("shufpd",XMMP,16), INVALID, +/* [C8] */ INVALID, INVALID, INVALID, INVALID, +/* [CC] */ INVALID, INVALID, INVALID, INVALID, + +/* [D0] */ INVALID, TNSZ("psrlw",XMM,16), TNSZ("psrld",XMM,16), TNSZ("psrlq",XMM,16), +/* [D4] */ TNSZ("paddq",XMM,16), TNSZ("pmullw",XMM,16), TNSZ("movq",XMMS,8), TNS("pmovmskb",XMMX3), +/* [D8] */ TNSZ("psubusb",XMM,16), TNSZ("psubusw",XMM,16), TNSZ("pminub",XMM,16), TNSZ("pand",XMM,16), +/* [DC] */ TNSZ("paddusb",XMM,16), TNSZ("paddusw",XMM,16), TNSZ("pmaxub",XMM,16), TNSZ("pandn",XMM,16), + +/* [E0] */ TNSZ("pavgb",XMM,16), TNSZ("psraw",XMM,16), TNSZ("psrad",XMM,16), TNSZ("pavgw",XMM,16), +/* [E4] */ TNSZ("pmulhuw",XMM,16), TNSZ("pmulhw",XMM,16), TNSZ("cvttpd2dq",XMM,16),TNSZ("movntdq",XMMS,16), +/* [E8] */ TNSZ("psubsb",XMM,16), TNSZ("psubsw",XMM,16), TNSZ("pminsw",XMM,16), TNSZ("por",XMM,16), +/* [EC] */ TNSZ("paddsb",XMM,16), TNSZ("paddsw",XMM,16), TNSZ("pmaxsw",XMM,16), TNSZ("pxor",XMM,16), + +/* [F0] */ INVALID, TNSZ("psllw",XMM,16), TNSZ("pslld",XMM,16), TNSZ("psllq",XMM,16), +/* [F4] */ TNSZ("pmuludq",XMM,16), TNSZ("pmaddwd",XMM,16), TNSZ("psadbw",XMM,16), TNSZ("maskmovdqu", XMMXIMPL,16), +/* [F8] */ TNSZ("psubb",XMM,16), TNSZ("psubw",XMM,16), TNSZ("psubd",XMM,16), TNSZ("psubq",XMM,16), +/* [FC] */ TNSZ("paddb",XMM,16), TNSZ("paddw",XMM,16), TNSZ("paddd",XMM,16), INVALID, +}; + +/* + * Decode table for SIMD instructions with the repnz (0xf2) prefix. + */ +const instable_t dis_opSIMDrepnz[256] = { +/* [00] */ INVALID, INVALID, INVALID, INVALID, +/* [04] */ INVALID, INVALID, INVALID, INVALID, +/* [08] */ INVALID, INVALID, INVALID, INVALID, +/* [0C] */ INVALID, INVALID, INVALID, INVALID, + +/* [10] */ TNSZ("movsd",XMM,8), TNSZ("movsd",XMMS,8), INVALID, INVALID, +/* [14] */ INVALID, INVALID, INVALID, INVALID, +/* [18] */ INVALID, INVALID, INVALID, INVALID, +/* [1C] */ INVALID, INVALID, INVALID, INVALID, + +/* [20] */ INVALID, INVALID, INVALID, INVALID, +/* [24] */ INVALID, INVALID, INVALID, INVALID, +/* [28] */ INVALID, INVALID, TNSZ("cvtsi2sd",XMM3MX,4),INVALID, +/* [2C] */ TNSZ("cvttsd2si",XMMXM3,8),TNSZ("cvtsd2si",XMMXM3,8),INVALID, INVALID, + +/* [30] */ INVALID, INVALID, INVALID, INVALID, +/* [34] */ INVALID, INVALID, INVALID, INVALID, +/* [38] */ INVALID, INVALID, INVALID, INVALID, +/* [3C] */ INVALID, INVALID, INVALID, INVALID, + +/* [40] */ INVALID, INVALID, INVALID, INVALID, +/* [44] */ INVALID, INVALID, INVALID, INVALID, +/* [48] */ INVALID, INVALID, INVALID, INVALID, +/* [4C] */ INVALID, INVALID, INVALID, INVALID, + +/* [50] */ INVALID, TNSZ("sqrtsd",XMM,8), INVALID, INVALID, +/* [54] */ INVALID, INVALID, INVALID, INVALID, +/* [58] */ TNSZ("addsd",XMM,8), TNSZ("mulsd",XMM,8), TNSZ("cvtsd2ss",XMM,8), INVALID, +/* [5C] */ TNSZ("subsd",XMM,8), TNSZ("minsd",XMM,8), TNSZ("divsd",XMM,8), TNSZ("maxsd",XMM,8), + +/* [60] */ INVALID, INVALID, INVALID, INVALID, +/* [64] */ INVALID, INVALID, INVALID, INVALID, +/* [68] */ INVALID, INVALID, INVALID, INVALID, +/* [6C] */ INVALID, INVALID, INVALID, INVALID, + +/* [70] */ TNSZ("pshuflw",XMMP,16),INVALID, INVALID, INVALID, +/* [74] */ INVALID, INVALID, INVALID, INVALID, +/* [78] */ INVALID, INVALID, INVALID, INVALID, +/* [7C] */ INVALID, INVALID, INVALID, INVALID, + +/* [80] */ INVALID, INVALID, INVALID, INVALID, +/* [84] */ INVALID, INVALID, INVALID, INVALID, +/* [88] */ INVALID, INVALID, INVALID, INVALID, +/* [0C] */ INVALID, INVALID, INVALID, INVALID, + +/* [90] */ INVALID, INVALID, INVALID, INVALID, +/* [94] */ INVALID, INVALID, INVALID, INVALID, +/* [98] */ INVALID, INVALID, INVALID, INVALID, +/* [9C] */ INVALID, INVALID, INVALID, INVALID, + +/* [A0] */ INVALID, INVALID, INVALID, INVALID, +/* [A4] */ INVALID, INVALID, INVALID, INVALID, +/* [A8] */ INVALID, INVALID, INVALID, INVALID, +/* [AC] */ INVALID, INVALID, INVALID, INVALID, + +/* [B0] */ INVALID, INVALID, INVALID, INVALID, +/* [B4] */ INVALID, INVALID, INVALID, INVALID, +/* [B8] */ INVALID, INVALID, INVALID, INVALID, +/* [BC] */ INVALID, INVALID, INVALID, INVALID, + +/* [C0] */ INVALID, INVALID, TNSZ("cmpsd",XMMP,8), INVALID, +/* [C4] */ INVALID, INVALID, INVALID, INVALID, +/* [C8] */ INVALID, INVALID, INVALID, INVALID, +/* [CC] */ INVALID, INVALID, INVALID, INVALID, + +/* [D0] */ INVALID, INVALID, INVALID, INVALID, +/* [D4] */ INVALID, INVALID, TNS("movdq2q",XMMXM), INVALID, +/* [D8] */ INVALID, INVALID, INVALID, INVALID, +/* [DC] */ INVALID, INVALID, INVALID, INVALID, + +/* [E0] */ INVALID, INVALID, INVALID, INVALID, +/* [E4] */ INVALID, INVALID, TNSZ("cvtpd2dq",XMM,16),INVALID, +/* [E8] */ INVALID, INVALID, INVALID, INVALID, +/* [EC] */ INVALID, INVALID, INVALID, INVALID, + +/* [F0] */ INVALID, INVALID, INVALID, INVALID, +/* [F4] */ INVALID, INVALID, INVALID, INVALID, +/* [F8] */ INVALID, INVALID, INVALID, INVALID, +/* [FC] */ INVALID, INVALID, INVALID, INVALID, +}; + +/* + * Decode table for SIMD instructions with the repz (0xf3) prefix. + */ +const instable_t dis_opSIMDrepz[256] = { +/* [00] */ INVALID, INVALID, INVALID, INVALID, +/* [04] */ INVALID, INVALID, INVALID, INVALID, +/* [08] */ INVALID, INVALID, INVALID, INVALID, +/* [0C] */ INVALID, INVALID, INVALID, INVALID, + +/* [10] */ TNSZ("movss",XMM,4), TNSZ("movss",XMMS,4), INVALID, INVALID, +/* [14] */ INVALID, INVALID, INVALID, INVALID, +/* [18] */ INVALID, INVALID, INVALID, INVALID, +/* [1C] */ INVALID, INVALID, INVALID, INVALID, + +/* [20] */ INVALID, INVALID, INVALID, INVALID, +/* [24] */ INVALID, INVALID, INVALID, INVALID, +/* [28] */ INVALID, INVALID, TNSZ("cvtsi2ss",XMM3MX,4),INVALID, +/* [2C] */ TNSZ("cvttss2si",XMMXM3,4),TNSZ("cvtss2si",XMMXM3,4),INVALID, INVALID, + +/* [30] */ INVALID, INVALID, INVALID, INVALID, +/* [34] */ INVALID, INVALID, INVALID, INVALID, +/* [38] */ INVALID, INVALID, INVALID, INVALID, +/* [3C] */ INVALID, INVALID, INVALID, INVALID, + +/* [40] */ INVALID, INVALID, INVALID, INVALID, +/* [44] */ INVALID, INVALID, INVALID, INVALID, +/* [48] */ INVALID, INVALID, INVALID, INVALID, +/* [4C] */ INVALID, INVALID, INVALID, INVALID, + +/* [50] */ INVALID, TNSZ("sqrtss",XMM,4), TNSZ("rsqrtss",XMM,4), TNSZ("rcpss",XMM,4), +/* [54] */ INVALID, INVALID, INVALID, INVALID, +/* [58] */ TNSZ("addss",XMM,4), TNSZ("mulss",XMM,4), TNSZ("cvtss2sd",XMM,4), TNSZ("cvttps2dq",XMM,16), +/* [5C] */ TNSZ("subss",XMM,4), TNSZ("minss",XMM,4), TNSZ("divss",XMM,4), TNSZ("maxss",XMM,4), + +/* [60] */ INVALID, INVALID, INVALID, INVALID, +/* [64] */ INVALID, INVALID, INVALID, INVALID, +/* [68] */ INVALID, INVALID, INVALID, INVALID, +/* [6C] */ INVALID, INVALID, INVALID, TNSZ("movdqu",XMM,16), + +/* [70] */ TNSZ("pshufhw",XMMP,16),INVALID, INVALID, INVALID, +/* [74] */ INVALID, INVALID, INVALID, INVALID, +/* [78] */ INVALID, INVALID, INVALID, INVALID, +/* [7C] */ INVALID, INVALID, TNSZ("movq",XMM,8), TNSZ("movdqu",XMMS,16), + +/* [80] */ INVALID, INVALID, INVALID, INVALID, +/* [84] */ INVALID, INVALID, INVALID, INVALID, +/* [88] */ INVALID, INVALID, INVALID, INVALID, +/* [0C] */ INVALID, INVALID, INVALID, INVALID, + +/* [90] */ INVALID, INVALID, INVALID, INVALID, +/* [94] */ INVALID, INVALID, INVALID, INVALID, +/* [98] */ INVALID, INVALID, INVALID, INVALID, +/* [9C] */ INVALID, INVALID, INVALID, INVALID, + +/* [A0] */ INVALID, INVALID, INVALID, INVALID, +/* [A4] */ INVALID, INVALID, INVALID, INVALID, +/* [A8] */ INVALID, INVALID, INVALID, INVALID, +/* [AC] */ INVALID, INVALID, INVALID, INVALID, + +/* [B0] */ INVALID, INVALID, INVALID, INVALID, +/* [B4] */ INVALID, INVALID, INVALID, INVALID, +/* [B8] */ INVALID, INVALID, INVALID, INVALID, +/* [BC] */ INVALID, INVALID, INVALID, INVALID, + +/* [C0] */ INVALID, INVALID, TNSZ("cmpss",XMMP,4), INVALID, +/* [C4] */ INVALID, INVALID, INVALID, INVALID, +/* [C8] */ INVALID, INVALID, INVALID, INVALID, +/* [CC] */ INVALID, INVALID, INVALID, INVALID, + +/* [D0] */ INVALID, INVALID, INVALID, INVALID, +/* [D4] */ INVALID, INVALID, TNS("movq2dq",XMMMX), INVALID, +/* [D8] */ INVALID, INVALID, INVALID, INVALID, +/* [DC] */ INVALID, INVALID, INVALID, INVALID, + +/* [E0] */ INVALID, INVALID, INVALID, INVALID, +/* [E4] */ INVALID, INVALID, TNSZ("cvtdq2pd",XMM,8), INVALID, +/* [E8] */ INVALID, INVALID, INVALID, INVALID, +/* [EC] */ INVALID, INVALID, INVALID, INVALID, + +/* [F0] */ INVALID, INVALID, INVALID, INVALID, +/* [F4] */ INVALID, INVALID, INVALID, INVALID, +/* [F8] */ INVALID, INVALID, INVALID, INVALID, +/* [FC] */ INVALID, INVALID, INVALID, INVALID, +}; + +/* + * Decode table for 0x0F opcodes + */ + +const instable_t dis_op0F[16][16] = { +{ +/* [00] */ IND(dis_op0F00), IND(dis_op0F01), TNS("lar",MR), TNS("lsl",MR), +/* [04] */ INVALID, TNS("syscall",NORM), TNS("clts",NORM), TNS("sysret",NORM), +/* [08] */ TNS("invd",NORM), TNS("wbinvd",NORM), INVALID, TNS("ud2",NORM), +/* [0C] */ INVALID, INVALID, INVALID, INVALID, +}, { +/* [10] */ TNSZ("movups",XMMO,16), TNSZ("movups",XMMOS,16),TNSZ("movlps",XMMO,8), TNSZ("movlps",XMMOS,8), +/* [14] */ TNSZ("unpcklps",XMMO,16),TNSZ("unpckhps",XMMO,16),TNSZ("movhps",XMMOM,8),TNSZ("movhps",XMMOMS,8), +/* [18] */ IND(dis_op0F18), INVALID, INVALID, INVALID, +/* [1C] */ INVALID, INVALID, INVALID, INVALID, +}, { +/* [20] */ TSy("mov",SREG), TSy("mov",SREG), TSy("mov",SREG), TSy("mov",SREG), +/* [24] */ TSx("mov",SREG), INVALID, TSx("mov",SREG), INVALID, +/* [28] */ TNSZ("movaps",XMMO,16), TNSZ("movaps",XMMOS,16),TNSZ("cvtpi2ps",XMMOMX,8),TNSZ("movntps",XMMOS,16), +/* [2C] */ TNSZ("cvttps2pi",XMMOXMM,8),TNSZ("cvtps2pi",XMMOXMM,8),TNSZ("ucomiss",XMMO,4),TNSZ("comiss",XMMO,4), +}, { +/* [30] */ TNS("wrmsr",NORM), TNS("rdtsc",NORM), TNS("rdmsr",NORM), TNS("rdpmc",NORM), +/* [34] */ TNSx("sysenter",NORM), TNSx("sysexit",NORM), INVALID, INVALID, +/* [38] */ INVALID, INVALID, INVALID, INVALID, +/* [3C] */ INVALID, INVALID, INVALID, INVALID, +}, { +/* [40] */ TS("cmovx.o",MR), TS("cmovx.no",MR), TS("cmovx.b",MR), TS("cmovx.ae",MR), +/* [44] */ TS("cmovx.e",MR), TS("cmovx.ne",MR), TS("cmovx.be",MR), TS("cmovx.a",MR), +/* [48] */ TS("cmovx.s",MR), TS("cmovx.ns",MR), TS("cmovx.pe",MR), TS("cmovx.po",MR), +/* [4C] */ TS("cmovx.l",MR), TS("cmovx.ge",MR), TS("cmovx.le",MR), TS("cmovx.g",MR), +}, { +/* [50] */ TNS("movmskps",XMMOX3), TNSZ("sqrtps",XMMO,16), TNSZ("rsqrtps",XMMO,16),TNSZ("rcpps",XMMO,16), +/* [54] */ TNSZ("andps",XMMO,16), TNSZ("andnps",XMMO,16), TNSZ("orps",XMMO,16), TNSZ("xorps",XMMO,16), +/* [58] */ TNSZ("addps",XMMO,16), TNSZ("mulps",XMMO,16), TNSZ("cvtps2pd",XMMO,8),TNSZ("cvtdq2ps",XMMO,16), +/* [5C] */ TNSZ("subps",XMMO,16), TNSZ("minps",XMMO,16), TNSZ("divps",XMMO,16), TNSZ("maxps",XMMO,16), +}, { +/* [60] */ TNSZ("punpcklbw",MMO,4),TNSZ("punpcklwd",MMO,4),TNSZ("punpckldq",MMO,4),TNSZ("packsswb",MMO,8), +/* [64] */ TNSZ("pcmpgtb",MMO,8), TNSZ("pcmpgtw",MMO,8), TNSZ("pcmpgtd",MMO,8), TNSZ("packuswb",MMO,8), +/* [68] */ TNSZ("punpckhbw",MMO,8),TNSZ("punpckhwd",MMO,8),TNSZ("punpckhdq",MMO,8),TNSZ("packssdw",MMO,8), +/* [6C] */ TNSZ("INVALID",MMO,0), TNSZ("INVALID",MMO,0), TNSZ("movd",MMO,4), TNSZ("movq",MMO,8), +}, { +/* [70] */ TNSZ("pshufw",MMOPM,8), TNS("psrXXX",MR), TNS("psrXXX",MR), TNS("psrXXX",MR), +/* [74] */ TNSZ("pcmpeqb",MMO,8), TNSZ("pcmpeqw",MMO,8), TNSZ("pcmpeqd",MMO,8), TNS("emms",NORM), +/* [78] */ INVALID, INVALID, INVALID, INVALID, +/* [7C] */ INVALID, INVALID, TNSZ("movd",MMOS,4), TNSZ("movq",MMOS,8), +}, { +/* [80] */ TNS("jo",D), TNS("jno",D), TNS("jb",D), TNS("jae",D), +/* [84] */ TNS("je",D), TNS("jne",D), TNS("jbe",D), TNS("ja",D), +/* [88] */ TNS("js",D), TNS("jns",D), TNS("jp",D), TNS("jnp",D), +/* [8C] */ TNS("jl",D), TNS("jge",D), TNS("jle",D), TNS("jg",D), +}, { +/* [90] */ TNS("seto",Mb), TNS("setno",Mb), TNS("setb",Mb), TNS("setae",Mb), +/* [94] */ TNS("sete",Mb), TNS("setne",Mb), TNS("setbe",Mb), TNS("seta",Mb), +/* [98] */ TNS("sets",Mb), TNS("setns",Mb), TNS("setp",Mb), TNS("setnp",Mb), +/* [9C] */ TNS("setl",Mb), TNS("setge",Mb), TNS("setle",Mb), TNS("setg",Mb), +}, { +/* [A0] */ TSp("push",LSEG), TSp("pop",LSEG), TNS("cpuid",NORM), TS("bt",RMw), +/* [A4] */ TS("shld",DSHIFT), TS("shld",DSHIFTcl), INVALID, INVALID, +/* [A8] */ TSp("push",LSEG), TSp("pop",LSEG), TNS("rsm",NORM), TS("bts",RMw), +/* [AC] */ TS("shrd",DSHIFT), TS("shrd",DSHIFTcl), IND(dis_op0FAE), TS("imul",MRw), +}, { +/* [B0] */ TNS("cmpxchgb",RMw), TS("cmpxchg",RMw), TS("lss",MR), TS("btr",RMw), +/* [B4] */ TS("lfs",MR), TS("lgs",MR), TS("movzb",MOVZ), TNS("movzwl",MOVZ), +/* [B8] */ INVALID, INVALID, IND(dis_op0FBA), TS("btc",RMw), +/* [BC] */ TS("bsf",MRw), TS("bsr",MRw), TS("movsb",MOVZ), TNS("movswl",MOVZ), +}, { +/* [C0] */ TNS("xaddb",XADDB), TS("xadd",RMw), TNSZ("cmpps",XMMOPM,16),TNS("movnti",RM), +/* [C4] */ TNSZ("pinsrw",MMOPRM,2),TNS("pextrw",MMO3P), TNSZ("shufps",XMMOPM,16),IND(dis_op0FC7), +/* [C8] */ INVALID, INVALID, INVALID, INVALID, +/* [CC] */ INVALID, INVALID, INVALID, INVALID, +}, { +/* [D0] */ INVALID, TNSZ("psrlw",MMO,8), TNSZ("psrld",MMO,8), TNSZ("psrlq",MMO,8), +/* [D4] */ TNSZ("paddq",MMO,8), TNSZ("pmullw",MMO,8), TNSZ("INVALID",MMO,0), TNS("pmovmskb",MMOM3), +/* [D8] */ TNSZ("psubusb",MMO,8), TNSZ("psubusw",MMO,8), TNSZ("pminub",MMO,8), TNSZ("pand",MMO,8), +/* [DC] */ TNSZ("paddusb",MMO,8), TNSZ("paddusw",MMO,8), TNSZ("pmaxub",MMO,8), TNSZ("pandn",MMO,8), +}, { +/* [E0] */ TNSZ("pavgb",MMO,8), TNSZ("psraw",MMO,8), TNSZ("psrad",MMO,8), TNSZ("pavgw",MMO,8), +/* [E4] */ TNSZ("pmulhuw",MMO,8), TNSZ("pmulhw",MMO,8), TNS("INVALID",XMMO), TNSZ("movntq",MMOMS,8), +/* [E8] */ TNSZ("psubsb",MMO,8), TNSZ("psubsw",MMO,8), TNSZ("pminsw",MMO,8), TNSZ("por",MMO,8), +/* [EC] */ TNSZ("paddsb",MMO,8), TNSZ("paddsw",MMO,8), TNSZ("pmaxsw",MMO,8), TNSZ("pxor",MMO,8), +}, { +/* [F0] */ INVALID, TNSZ("psllw",MMO,8), TNSZ("pslld",MMO,8), TNSZ("psllq",MMO,8), +/* [F4] */ TNSZ("pmuludq",MMO,8), TNSZ("pmaddwd",MMO,8), TNSZ("psadbw",MMO,8), TNSZ("maskmovq",MMOIMPL,8), +/* [F8] */ TNSZ("psubb",MMO,8), TNSZ("psubw",MMO,8), TNSZ("psubd",MMO,8), TNSZ("psubq",MMO,8), +/* [FC] */ TNSZ("paddb",MMO,8), TNSZ("paddw",MMO,8), TNSZ("paddd",MMO,8), INVALID, +} }; + + +/* + * Decode table for 0x80 opcodes + */ + +const instable_t dis_op80[8] = { + +/* [0] */ TNS("addb",IMlw), TNS("orb",IMw), TNS("adcb",IMlw), TNS("sbbb",IMlw), +/* [4] */ TNS("andb",IMw), TNS("subb",IMlw), TNS("xorb",IMw), TNS("cmpb",IMlw), +}; + + +/* + * Decode table for 0x81 opcodes. + */ + +const instable_t dis_op81[8] = { + +/* [0] */ TS("add",IMlw), TS("or",IMw), TS("adc",IMlw), TS("sbb",IMlw), +/* [4] */ TS("and",IMw), TS("sub",IMlw), TS("xor",IMw), TS("cmp",IMlw), +}; + + +/* + * Decode table for 0x82 opcodes. + */ + +const instable_t dis_op82[8] = { + +/* [0] */ TNSx("addb",IMlw), TNSx("orb",IMlw), TNSx("adcb",IMlw), TNSx("sbbb",IMlw), +/* [4] */ TNSx("andb",IMlw), TNSx("subb",IMlw), TNSx("xorb",IMlw), TNSx("cmpb",IMlw), +}; +/* + * Decode table for 0x83 opcodes. + */ + +const instable_t dis_op83[8] = { + +/* [0] */ TS("add",IMlw), TS("or",IMlw), TS("adc",IMlw), TS("sbb",IMlw), +/* [4] */ TS("and",IMlw), TS("sub",IMlw), TS("xor",IMlw), TS("cmp",IMlw), +}; + +/* + * Decode table for 0xC0 opcodes. + */ + +const instable_t dis_opC0[8] = { + +/* [0] */ TNS("rolb",MvI), TNS("rorb",MvI), TNS("rclb",MvI), TNS("rcrb",MvI), +/* [4] */ TNS("shlb",MvI), TNS("shrb",MvI), INVALID, TNS("sarb",MvI), +}; + +/* + * Decode table for 0xD0 opcodes. + */ + +const instable_t dis_opD0[8] = { + +/* [0] */ TNS("rolb",Mv), TNS("rorb",Mv), TNS("rclb",Mv), TNS("rcrb",Mv), +/* [4] */ TNS("shlb",Mv), TNS("shrb",Mv), TNS("salb",Mv), TNS("sarb",Mv), +}; + +/* + * Decode table for 0xC1 opcodes. + * 186 instruction set + */ + +const instable_t dis_opC1[8] = { + +/* [0] */ TS("rol",MvI), TS("ror",MvI), TS("rcl",MvI), TS("rcr",MvI), +/* [4] */ TS("shl",MvI), TS("shr",MvI), TS("sal",MvI), TS("sar",MvI), +}; + +/* + * Decode table for 0xD1 opcodes. + */ + +const instable_t dis_opD1[8] = { + +/* [0] */ TS("rol",Mv), TS("ror",Mv), TS("rcl",Mv), TS("rcr",Mv), +/* [4] */ TS("shl",Mv), TS("shr",Mv), TS("sal",Mv), TS("sar",Mv), +}; + + +/* + * Decode table for 0xD2 opcodes. + */ + +const instable_t dis_opD2[8] = { + +/* [0] */ TNS("rolb",Mv), TNS("rorb",Mv), TNS("rclb",Mv), TNS("rcrb",Mv), +/* [4] */ TNS("shlb",Mv), TNS("shrb",Mv), TNS("salb",Mv), TNS("sarb",Mv), +}; +/* + * Decode table for 0xD3 opcodes. + */ + +const instable_t dis_opD3[8] = { + +/* [0] */ TS("rol",Mv), TS("ror",Mv), TS("rcl",Mv), TS("rcr",Mv), +/* [4] */ TS("shl",Mv), TS("shr",Mv), TS("salb",Mv), TS("sar",Mv), +}; + + +/* + * Decode table for 0xF6 opcodes. + */ + +const instable_t dis_opF6[8] = { + +/* [0] */ TNS("testb",IMw), TNS("testb",IMw), TNS("notb",Mw), TNS("negb",Mw), +/* [4] */ TNS("mulb",MA), TNS("imulb",MA), TNS("divb",MA), TNS("idivb",MA), +}; + + +/* + * Decode table for 0xF7 opcodes. + */ + +const instable_t dis_opF7[8] = { + +/* [0] */ TS("test",IMw), TS("test",IMw), TS("not",Mw), TS("neg",Mw), +/* [4] */ TS("mul",MA), TS("imul",MA), TS("div",MA), TS("idiv",MA), +}; + + +/* + * Decode table for 0xFE opcodes. + */ + +const instable_t dis_opFE[8] = { + +/* [0] */ TNS("incb",Mw), TNS("decb",Mw), INVALID, INVALID, +/* [4] */ INVALID, INVALID, INVALID, INVALID, +}; +/* + * Decode table for 0xFF opcodes. + */ + +const instable_t dis_opFF[8] = { + +/* [0] */ TS("inc",Mw), TS("dec",Mw), TNSyp("call",INM), TNS("lcall",INM), +/* [4] */ TNSy("jmp",INM), TNS("ljmp",INM), TSp("push",M), INVALID, +}; + +/* for 287 instructions, which are a mess to decode */ + +const instable_t dis_opFP1n2[8][8] = { +{ +/* bit pattern: 1101 1xxx MODxx xR/M */ +/* [0,0] */ TNS("fadds",M), TNS("fmuls",M), TNS("fcoms",M), TNS("fcomps",M), +/* [0,4] */ TNS("fsubs",M), TNS("fsubrs",M), TNS("fdivs",M), TNS("fdivrs",M), +}, { +/* [1,0] */ TNS("flds",M), INVALID, TNS("fsts",M), TNS("fstps",M), +/* [1,4] */ TNSZ("fldenv",M,28), TNSZ("fldcw",M,2), TNSZ("fnstenv",M,28), TNSZ("fnstcw",M,2), +}, { +/* [2,0] */ TNS("fiaddl",M), TNS("fimull",M), TNS("ficoml",M), TNS("ficompl",M), +/* [2,4] */ TNS("fisubl",M), TNS("fisubrl",M), TNS("fidivl",M), TNS("fidivrl",M), +}, { +/* [3,0] */ TNS("fildl",M), INVALID, TNS("fistl",M), TNS("fistpl",M), +/* [3,4] */ INVALID, TNSZ("fldt",M,10), INVALID, TNSZ("fstpt",M,10), +}, { +/* [4,0] */ TNSZ("faddl",M,8), TNSZ("fmull",M,8), TNSZ("fcoml",M,8), TNSZ("fcompl",M,8), +/* [4,1] */ TNSZ("fsubl",M,8), TNSZ("fsubrl",M,8), TNSZ("fdivl",M,8), TNSZ("fdivrl",M,8), +}, { +/* [5,0] */ TNSZ("fldl",M,8), INVALID, TNSZ("fstl",M,8), TNSZ("fstpl",M,8), +/* [5,4] */ TNSZ("frstor",M,108), INVALID, TNSZ("fnsave",M,108), TNSZ("fnstsw",M,2), +}, { +/* [6,0] */ TNSZ("fiadd",M,2), TNSZ("fimul",M,2), TNSZ("ficom",M,2), TNSZ("ficomp",M,2), +/* [6,4] */ TNSZ("fisub",M,2), TNSZ("fisubr",M,2), TNSZ("fidiv",M,2), TNSZ("fidivr",M,2), +}, { +/* [7,0] */ TNSZ("fild",M,2), INVALID, TNSZ("fist",M,2), TNSZ("fistp",M,2), +/* [7,4] */ TNSZ("fbld",M,10), TNSZ("fildll",M,8), TNSZ("fbstp",M,10), TNSZ("fistpll",M,8), +} }; + +const instable_t dis_opFP3[8][8] = { +{ +/* bit pattern: 1101 1xxx 11xx xREG */ +/* [0,0] */ TNS("fadd",FF), TNS("fmul",FF), TNS("fcom",F), TNS("fcomp",F), +/* [0,4] */ TNS("fsub",FF), TNS("fsubr",FF), TNS("fdiv",FF), TNS("fdivr",FF), +}, { +/* [1,0] */ TNS("fld",F), TNS("fxch",F), TNS("fnop",NORM), TNS("fstp",F), +/* [1,4] */ INVALID, INVALID, INVALID, INVALID, +}, { +/* [2,0] */ INVALID, INVALID, INVALID, INVALID, +/* [2,4] */ INVALID, TNS("fucompp",NORM), INVALID, INVALID, +}, { +/* [3,0] */ INVALID, INVALID, INVALID, INVALID, +/* [3,4] */ INVALID, INVALID, INVALID, INVALID, +}, { +/* [4,0] */ TNS("fadd",FF), TNS("fmul",FF), TNS("fcom",F), TNS("fcomp",F), +/* [4,4] */ TNS("fsub",FF), TNS("fsubr",FF), TNS("fdiv",FF), TNS("fdivr",FF), +}, { +/* [5,0] */ TNS("ffree",F), TNS("fxch",F), TNS("fst",F), TNS("fstp",F), +/* [5,4] */ TNS("fucom",F), TNS("fucomp",F), INVALID, INVALID, +}, { +/* [6,0] */ TNS("faddp",FF), TNS("fmulp",FF), TNS("fcomp",F), TNS("fcompp",NORM), +/* [6,4] */ TNS("fsubp",FF), TNS("fsubrp",FF), TNS("fdivp",FF), TNS("fdivrp",FF), +}, { +/* [7,0] */ TNS("ffree",F), TNS("fxch",F), TNS("fstp",F), TNS("fstp",F), +/* [7,4] */ TNS("fnstsw",M), TNS("fucomip",FFC), TNS("fcomip",FFC), INVALID, +} }; + +const instable_t dis_opFP4[4][8] = { +{ +/* bit pattern: 1101 1001 111x xxxx */ +/* [0,0] */ TNS("fchs",NORM), TNS("fabs",NORM), INVALID, INVALID, +/* [0,4] */ TNS("ftst",NORM), TNS("fxam",NORM), TNS("ftstp",NORM), INVALID, +}, { +/* [1,0] */ TNS("fld1",NORM), TNS("fldl2t",NORM), TNS("fldl2e",NORM), TNS("fldpi",NORM), +/* [1,4] */ TNS("fldlg2",NORM), TNS("fldln2",NORM), TNS("fldz",NORM), INVALID, +}, { +/* [2,0] */ TNS("f2xm1",NORM), TNS("fyl2x",NORM), TNS("fptan",NORM), TNS("fpatan",NORM), +/* [2,4] */ TNS("fxtract",NORM), TNS("fprem1",NORM), TNS("fdecstp",NORM), TNS("fincstp",NORM), +}, { +/* [3,0] */ TNS("fprem",NORM), TNS("fyl2xp1",NORM), TNS("fsqrt",NORM), TNS("fsincos",NORM), +/* [3,4] */ TNS("frndint",NORM), TNS("fscale",NORM), TNS("fsin",NORM), TNS("fcos",NORM), +} }; + +const instable_t dis_opFP5[8] = { +/* bit pattern: 1101 1011 111x xxxx */ +/* [0] */ TNS("feni",NORM), TNS("fdisi",NORM), TNS("fnclex",NORM), TNS("fninit",NORM), +/* [4] */ TNS("fsetpm",NORM), TNS("frstpm",NORM), INVALID, INVALID, +}; + +const instable_t dis_opFP6[8] = { +/* bit pattern: 1101 1011 11yy yxxx */ +/* [00] */ TNS("fcmov.nb",FF), TNS("fcmov.ne",FF), TNS("fcmov.nbe",FF), TNS("fcmov.nu",FF), +/* [04] */ INVALID, TNS("fucomi",F), TNS("fcomi",F), INVALID, +}; + +const instable_t dis_opFP7[8] = { +/* bit pattern: 1101 1010 11yy yxxx */ +/* [00] */ TNS("fcmov.b",FF), TNS("fcmov.e",FF), TNS("fcmov.be",FF), TNS("fcmov.u",FF), +/* [04] */ INVALID, INVALID, INVALID, INVALID, +}; + +/* + * Main decode table for the op codes. The first two nibbles + * will be used as an index into the table. If there is a + * a need to further decode an instruction, the array to be + * referenced is indicated with the other two entries being + * empty. + */ + +const instable_t dis_distable[16][16] = { +{ +/* [0,0] */ TNS("addb",RMw), TS("add",RMw), TNS("addb",MRw), TS("add",MRw), +/* [0,4] */ TNS("addb",IA), TS("add",IA), TSx("push",SEG), TSx("pop",SEG), +/* [0,8] */ TNS("orb",RMw), TS("or",RMw), TNS("orb",MRw), TS("or",MRw), +/* [0,C] */ TNS("orb",IA), TS("or",IA), TSx("push",SEG), IND(&dis_op0F[0][0]), +}, { +/* [1,0] */ TNS("adcb",RMw), TS("adc",RMw), TNS("adcb",MRw), TS("adc",MRw), +/* [1,4] */ TNS("adcb",IA), TS("adc",IA), TSx("push",SEG), TSx("pop",SEG), +/* [1,8] */ TNS("sbbb",RMw), TS("sbb",RMw), TNS("sbbb",MRw), TS("sbb",MRw), +/* [1,C] */ TNS("sbbb",IA), TS("sbb",IA), TSx("push",SEG), TSx("pop",SEG), +}, { +/* [2,0] */ TNS("andb",RMw), TS("and",RMw), TNS("andb",MRw), TS("and",MRw), +/* [2,4] */ TNS("andb",IA), TS("and",IA), TNSx("%es:",OVERRIDE), TNSx("daa",NORM), +/* [2,8] */ TNS("subb",RMw), TS("sub",RMw), TNS("subb",MRw), TS("sub",MRw), +/* [2,C] */ TNS("subb",IA), TS("sub",IA), TNSx("%cs:",OVERRIDE), TNSx("das",NORM), +}, { +/* [3,0] */ TNS("xorb",RMw), TS("xor",RMw), TNS("xorb",MRw), TS("xor",MRw), +/* [3,4] */ TNS("xorb",IA), TS("xor",IA), TNSx("%ss:",OVERRIDE), TNSx("aaa",NORM), +/* [3,8] */ TNS("cmpb",RMw), TS("cmp",RMw), TNS("cmpb",MRw), TS("cmp",MRw), +/* [3,C] */ TNS("cmpb",IA), TS("cmp",IA), TNSx("%ds:",OVERRIDE), TNSx("aas",NORM), +}, { +/* [4,0] */ TSx("inc",R), TSx("inc",R), TSx("inc",R), TSx("inc",R), +/* [4,4] */ TSx("inc",R), TSx("inc",R), TSx("inc",R), TSx("inc",R), +/* [4,8] */ TSx("dec",R), TSx("dec",R), TSx("dec",R), TSx("dec",R), +/* [4,C] */ TSx("dec",R), TSx("dec",R), TSx("dec",R), TSx("dec",R), +}, { +/* [5,0] */ TSp("push",R), TSp("push",R), TSp("push",R), TSp("push",R), +/* [5,4] */ TSp("push",R), TSp("push",R), TSp("push",R), TSp("push",R), +/* [5,8] */ TSp("pop",R), TSp("pop",R), TSp("pop",R), TSp("pop",R), +/* [5,C] */ TSp("pop",R), TSp("pop",R), TSp("pop",R), TSp("pop",R), +}, { +/* [6,0] */ TSZx("pusha",IMPLMEM,28),TSZx("popa",IMPLMEM,28), TSx("bound",MR), TNS("arpl",RMw), +/* [6,4] */ TNS("%fs:",OVERRIDE), TNS("%gs:",OVERRIDE), TNS("data16",DM), TNS("addr16",AM), +/* [6,8] */ TSp("push",I), TS("imul",IMUL), TSp("push",Ib), TS("imul",IMUL), +/* [6,C] */ TNSZ("insb",IMPLMEM,1), TSZ("ins",IMPLMEM,4), TNSZ("outsb",IMPLMEM,1),TSZ("outs",IMPLMEM,4), +}, { +/* [7,0] */ TNSy("jo",BD), TNSy("jno",BD), TNSy("jb",BD), TNSy("jae",BD), +/* [7,4] */ TNSy("je",BD), TNSy("jne",BD), TNSy("jbe",BD), TNSy("ja",BD), +/* [7,8] */ TNSy("js",BD), TNSy("jns",BD), TNSy("jp",BD), TNSy("jnp",BD), +/* [7,C] */ TNSy("jl",BD), TNSy("jge",BD), TNSy("jle",BD), TNSy("jg",BD), +}, { +/* [8,0] */ IND(dis_op80), IND(dis_op81), INDx(dis_op82), IND(dis_op83), +/* [8,4] */ TNS("testb",RMw), TS("test",RMw), TNS("xchgb",RMw), TS("xchg",RMw), +/* [8,8] */ TNS("movb",RMw), TS("mov",RMw), TNS("movb",MRw), TS("mov",MRw), +/* [8,C] */ TNS("movw",SM), TS("lea",MR), TNS("movw",MS), TSp("pop",M), +}, { +/* [9,0] */ TNS("nop",NORM), TS("xchg",RA), TS("xchg",RA), TS("xchg",RA), +/* [9,4] */ TS("xchg",RA), TS("xchg",RA), TS("xchg",RA), TS("xchg",RA), +/* [9,8] */ TNS("cXtX",CBW), TNS("cXtX",CWD), TNSx("lcall",SO), TNS("fwait",NORM), +/* [9,C] */ TSZy("pushf",IMPLMEM,4),TSZy("popf",IMPLMEM,4), TNSx("sahf",NORM), TNSx("lahf",NORM), +}, { +/* [A,0] */ TNS("movb",OA), TS("mov",OA), TNS("movb",AO), TS("mov",AO), +/* [A,4] */ TNSZ("movsb",SD,1), TS("movs",SD), TNSZ("cmpsb",SD,1), TS("cmps",SD), +/* [A,8] */ TNS("testb",IA), TS("test",IA), TNS("stosb",AD), TS("stos",AD), +/* [A,C] */ TNS("lodsb",SA), TS("lods",SA), TNS("scasb",AD), TS("scas",AD), +}, { +/* [B,0] */ TNS("movb",IR), TNS("movb",IR), TNS("movb",IR), TNS("movb",IR), +/* [B,4] */ TNS("movb",IR), TNS("movb",IR), TNS("movb",IR), TNS("movb",IR), +/* [B,8] */ TS("mov",IR), TS("mov",IR), TS("mov",IR), TS("mov",IR), +/* [B,C] */ TS("mov",IR), TS("mov",IR), TS("mov",IR), TS("mov",IR), +}, { +/* [C,0] */ IND(dis_opC0), IND(dis_opC1), TNSyp("ret",RET), TNSyp("ret",NORM), +/* [C,4] */ TNSx("les",MR), TNSx("lds",MR), TNS("movb",IMw), TS("mov",IMw), +/* [C,8] */ TNSyp("enter",ENTER), TNSyp("leave",NORM), TNS("lret",RET), TNS("lret",NORM), +/* [C,C] */ TNS("int",INT3), TNS("int",INTx), TNSx("into",NORM), TNS("iret",NORM), +}, { +/* [D,0] */ IND(dis_opD0), IND(dis_opD1), IND(dis_opD2), IND(dis_opD3), +/* [D,4] */ TNSx("aam",U), TNSx("aad",U), TNSx("falc",NORM), TNSZ("xlat",IMPLMEM,1), + +/* 287 instructions. Note that although the indirect field */ +/* indicates opFP1n2 for further decoding, this is not necessarily */ +/* the case since the opFP arrays are not partitioned according to key1 */ +/* and key2. opFP1n2 is given only to indicate that we haven't */ +/* finished decoding the instruction. */ +/* [D,8] */ IND(&dis_opFP1n2[0][0]), IND(&dis_opFP1n2[0][0]), IND(&dis_opFP1n2[0][0]), IND(&dis_opFP1n2[0][0]), +/* [D,C] */ IND(&dis_opFP1n2[0][0]), IND(&dis_opFP1n2[0][0]), IND(&dis_opFP1n2[0][0]), IND(&dis_opFP1n2[0][0]), +}, { +/* [E,0] */ TNSy("loopnz",BD), TNSy("loopz",BD), TNSy("loop",BD), TNSy("jcxz",BD), +/* [E,4] */ TNS("inb",P), TS("in",P), TNS("outb",P), TS("out",P), +/* [E,8] */ TNSyp("call",D), TNSy("jmp",D), TNSx("ljmp",SO), TNSy("jmp",BD), +/* [E,C] */ TNS("inb",V), TS("in",V), TNS("outb",V), TS("out",V), +}, { +/* [F,0] */ TNS("lock",LOCK), TNS("icebp", NORM), TNS("repnz",PREFIX), TNS("repz",PREFIX), +/* [F,4] */ TNS("hlt",NORM), TNS("cmc",NORM), IND(dis_opF6), IND(dis_opF7), +/* [F,8] */ TNS("clc",NORM), TNS("stc",NORM), TNS("cli",NORM), TNS("sti",NORM), +/* [F,C] */ TNS("cld",NORM), TNS("std",NORM), IND(dis_opFE), IND(dis_opFF), +} }; + +/* END CSTYLED */ + +/* + * common functions to decode and disassemble an x86 or amd64 instruction + */ + +/* + * These are the individual fields of a REX prefix. Note that a REX + * prefix with none of these set is still needed to: + * - use the MOVSXD (sign extend 32 to 64 bits) instruction + * - access the %sil, %dil, %bpl, %spl registers + */ +#define REX_W 0x08 /* 64 bit operand size when set */ +#define REX_R 0x04 /* high order bit extension of ModRM reg field */ +#define REX_X 0x02 /* high order bit extension of SIB index field */ +#define REX_B 0x01 /* extends ModRM r_m, SIB base, or opcode reg */ + +static uint_t opnd_size; /* SIZE16, SIZE32 or SIZE64 */ +static uint_t addr_size; /* SIZE16, SIZE32 or SIZE64 */ + +/* + * Even in 64 bit mode, usually only 4 byte immediate operands are supported. + */ +static int isize[] = {1, 2, 4, 4}; +static int isize64[] = {1, 2, 4, 8}; + +/* + * Just a bunch of useful macros. + */ +#define WBIT(x) (x & 0x1) /* to get w bit */ +#define REGNO(x) (x & 0x7) /* to get 3 bit register */ +#define VBIT(x) ((x)>>1 & 0x1) /* to get 'v' bit */ +#define OPSIZE(osize, wbit) ((wbit) ? isize[osize] : 1) +#define OPSIZE64(osize, wbit) ((wbit) ? isize64[osize] : 1) + +#define REG_ONLY 3 /* mode to indicate a register operand (not memory) */ + +#define BYTE_OPND 0 /* w-bit value indicating byte register */ +#define LONG_OPND 1 /* w-bit value indicating opnd_size register */ +#define MM_OPND 2 /* "value" used to indicate a mmx reg */ +#define XMM_OPND 3 /* "value" used to indicate a xmm reg */ +#define SEG_OPND 4 /* "value" used to indicate a segment reg */ +#define CONTROL_OPND 5 /* "value" used to indicate a control reg */ +#define DEBUG_OPND 6 /* "value" used to indicate a debug reg */ +#define TEST_OPND 7 /* "value" used to indicate a test reg */ +#define WORD_OPND 8 /* w-bit value indicating word size reg */ + +/* + * Get the next byte and separate the op code into the high and low nibbles. + */ +static int +dtrace_get_opcode(dis86_t *x, uint_t *high, uint_t *low) +{ + int byte; + + /* + * x86 instructions have a maximum length of 15 bytes. Bail out if + * we try to read more. + */ + if (x->d86_len >= 15) + return (x->d86_error = 1); + + if (x->d86_error) + return (1); + byte = x->d86_get_byte(x->d86_data); + if (byte < 0) + return (x->d86_error = 1); + x->d86_bytes[x->d86_len++] = byte; + *low = byte & 0xf; /* ----xxxx low 4 bits */ + *high = byte >> 4 & 0xf; /* xxxx---- bits 7 to 4 */ + return (0); +} + +/* + * Get and decode an SIB (scaled index base) byte + */ +static void +dtrace_get_SIB(dis86_t *x, uint_t *ss, uint_t *index, uint_t *base) +{ + int byte; + + if (x->d86_error) + return; + + byte = x->d86_get_byte(x->d86_data); + if (byte < 0) { + x->d86_error = 1; + return; + } + x->d86_bytes[x->d86_len++] = byte; + + *base = byte & 0x7; + *index = (byte >> 3) & 0x7; + *ss = (byte >> 6) & 0x3; +} + +/* + * Get the byte following the op code and separate it into the + * mode, register, and r/m fields. + */ +static void +dtrace_get_modrm(dis86_t *x, uint_t *mode, uint_t *reg, uint_t *r_m) +{ + if (x->d86_got_modrm == 0) { + if (x->d86_rmindex == -1) + x->d86_rmindex = x->d86_len; + dtrace_get_SIB(x, mode, reg, r_m); + x->d86_got_modrm = 1; + } +} + +/* + * Adjust register selection based on any REX prefix bits present. + */ +/*ARGSUSED*/ +static void +dtrace_rex_adjust(uint_t rex_prefix, uint_t mode, uint_t *reg, uint_t *r_m) +{ + if (reg != NULL && r_m == NULL) { + if (rex_prefix & REX_B) + *reg += 8; + } else { + if (reg != NULL && (REX_R & rex_prefix) != 0) + *reg += 8; + if (r_m != NULL && (REX_B & rex_prefix) != 0) + *r_m += 8; + } +} + +/* + * Get an immediate operand of the given size, with sign extension. + */ +static void +dtrace_imm_opnd(dis86_t *x, int wbit, int size, int opindex) +{ + int i; + int byte; + int valsize = 0; + + if (x->d86_numopnds < opindex + 1) + x->d86_numopnds = opindex + 1; + + switch (wbit) { + case BYTE_OPND: + valsize = 1; + break; + case LONG_OPND: + if (x->d86_opnd_size == SIZE16) + valsize = 2; + else if (x->d86_opnd_size == SIZE32) + valsize = 4; + else + valsize = 8; + break; + case MM_OPND: + case XMM_OPND: + case SEG_OPND: + case CONTROL_OPND: + case DEBUG_OPND: + case TEST_OPND: + valsize = size; + break; + case WORD_OPND: + valsize = 2; + break; + } + if (valsize < size) + valsize = size; + + if (x->d86_error) + return; + x->d86_opnd[opindex].d86_value = 0; + for (i = 0; i < size; ++i) { + byte = x->d86_get_byte(x->d86_data); + if (byte < 0) { + x->d86_error = 1; + return; + } + x->d86_bytes[x->d86_len++] = byte; + x->d86_opnd[opindex].d86_value |= (uint64_t)byte << (i * 8); + } + /* Do sign extension */ + if (x->d86_bytes[x->d86_len - 1] & 0x80) { + for (; i < valsize; i++) + x->d86_opnd[opindex].d86_value |= + (uint64_t)0xff << (i* 8); + } +#ifdef DIS_TEXT + x->d86_opnd[opindex].d86_mode = MODE_SIGNED; + x->d86_opnd[opindex].d86_value_size = valsize; + x->d86_imm_bytes += size; +#endif +} + +/* + * Get an ip relative operand of the given size, with sign extension. + */ +static void +dtrace_disp_opnd(dis86_t *x, int wbit, int size, int opindex) +{ + dtrace_imm_opnd(x, wbit, size, opindex); +#ifdef DIS_TEXT + x->d86_opnd[opindex].d86_mode = MODE_IPREL; +#endif +} + +/* + * Check to see if there is a segment override prefix pending. + * If so, print it in the current 'operand' location and set + * the override flag back to false. + */ +/*ARGSUSED*/ +static void +dtrace_check_override(dis86_t *x, int opindex) +{ +#ifdef DIS_TEXT + if (x->d86_seg_prefix) { + (void) strlcat(x->d86_opnd[opindex].d86_prefix, + x->d86_seg_prefix, PFIXLEN); + } +#endif + x->d86_seg_prefix = NULL; +} + + +/* + * Process a single instruction Register or Memory operand. + * + * mode = addressing mode from ModRM byte + * r_m = r_m (or reg if mode == 3) field from ModRM byte + * wbit = indicates which register (8bit, 16bit, ... MMX, etc.) set to use. + * o = index of operand that we are processing (0, 1 or 2) + * + * the value of reg or r_m must have already been adjusted for any REX prefix. + */ +/*ARGSUSED*/ +static void +dtrace_get_operand(dis86_t *x, uint_t mode, uint_t r_m, int wbit, int opindex) +{ + int have_SIB = 0; /* flag presence of scale-index-byte */ + uint_t ss; /* scale-factor from opcode */ + uint_t index; /* index register number */ + uint_t base; /* base register number */ + int dispsize; /* size of displacement in bytes */ +#ifdef DIS_TEXT + char *opnd = x->d86_opnd[opindex].d86_opnd; +#endif + + if (x->d86_numopnds < opindex + 1) + x->d86_numopnds = opindex + 1; + + if (x->d86_error) + return; + + /* + * first handle a simple register + */ + if (mode == REG_ONLY) { +#ifdef DIS_TEXT + switch (wbit) { + case MM_OPND: + (void) strlcat(opnd, dis_MMREG[r_m], OPLEN); + break; + case XMM_OPND: + (void) strlcat(opnd, dis_XMMREG[r_m], OPLEN); + break; + case SEG_OPND: + (void) strlcat(opnd, dis_SEGREG[r_m], OPLEN); + break; + case CONTROL_OPND: + (void) strlcat(opnd, dis_CONTROLREG[r_m], OPLEN); + break; + case DEBUG_OPND: + (void) strlcat(opnd, dis_DEBUGREG[r_m], OPLEN); + break; + case TEST_OPND: + (void) strlcat(opnd, dis_TESTREG[r_m], OPLEN); + break; + case BYTE_OPND: + if (x->d86_rex_prefix == 0) + (void) strlcat(opnd, dis_REG8[r_m], OPLEN); + else + (void) strlcat(opnd, dis_REG8_REX[r_m], OPLEN); + break; + case WORD_OPND: + (void) strlcat(opnd, dis_REG16[r_m], OPLEN); + break; + case LONG_OPND: + if (x->d86_opnd_size == SIZE16) + (void) strlcat(opnd, dis_REG16[r_m], OPLEN); + else if (x->d86_opnd_size == SIZE32) + (void) strlcat(opnd, dis_REG32[r_m], OPLEN); + else + (void) strlcat(opnd, dis_REG64[r_m], OPLEN); + break; + } +#endif /* DIS_TEXT */ + return; + } + + /* + * if symbolic representation, skip override prefix, if any + */ + dtrace_check_override(x, opindex); + + /* + * Handle 16 bit memory references first, since they decode + * the mode values more simply. + * mode 1 is r_m + 8 bit displacement + * mode 2 is r_m + 16 bit displacement + * mode 0 is just r_m, unless r_m is 6 which is 16 bit disp + */ + if (x->d86_addr_size == SIZE16) { + if ((mode == 0 && r_m == 6) || mode == 2) + dtrace_imm_opnd(x, WORD_OPND, 2, opindex); + else if (mode == 1) + dtrace_imm_opnd(x, BYTE_OPND, 1, opindex); +#ifdef DIS_TEXT + if (mode == 0 && r_m == 6) + x->d86_opnd[opindex].d86_mode = MODE_SIGNED; + else if (mode == 0) + x->d86_opnd[opindex].d86_mode = MODE_NONE; + else + x->d86_opnd[opindex].d86_mode = MODE_OFFSET; + (void) strlcat(opnd, dis_addr16[mode][r_m], OPLEN); +#endif + return; + } + + /* + * 32 and 64 bit addressing modes are more complex since they + * can involve an SIB (scaled index and base) byte to decode. + */ + if (r_m == ESP_REGNO || r_m == ESP_REGNO + 8) { + have_SIB = 1; + dtrace_get_SIB(x, &ss, &index, &base); + if (x->d86_error) + return; + if (base != 5 || mode != 0) + if (x->d86_rex_prefix & REX_B) + base += 8; + if (x->d86_rex_prefix & REX_X) + index += 8; + } else { + base = r_m; + } + + /* + * Compute the displacement size and get its bytes + */ + dispsize = 0; + + if (mode == 1) + dispsize = 1; + else if (mode == 2) + dispsize = 4; + else if ((r_m & 7) == EBP_REGNO || + (have_SIB && (base & 7) == EBP_REGNO)) + dispsize = 4; + + if (dispsize > 0) { + dtrace_imm_opnd(x, dispsize == 4 ? LONG_OPND : BYTE_OPND, + dispsize, opindex); + if (x->d86_error) + return; + } + +#ifdef DIS_TEXT + if (dispsize > 0) + x->d86_opnd[opindex].d86_mode = MODE_OFFSET; + + if (have_SIB == 0) { + if (x->d86_mode == SIZE32) { + if (mode == 0) + (void) strlcat(opnd, dis_addr32_mode0[r_m], + OPLEN); + else + (void) strlcat(opnd, dis_addr32_mode12[r_m], + OPLEN); + } else { + if (mode == 0) + (void) strlcat(opnd, dis_addr64_mode0[r_m], + OPLEN); + else + (void) strlcat(opnd, dis_addr64_mode12[r_m], + OPLEN); + } + } else { + uint_t need_paren = 0; + char **regs; + if (x->d86_mode == SIZE32) /* NOTE this is not addr_size! */ + regs = (char **)dis_REG32; + else + regs = (char **)dis_REG64; + + /* + * print the base (if any) + */ + if (base == EBP_REGNO && mode == 0) { + if (index != ESP_REGNO) { + (void) strlcat(opnd, "(", OPLEN); + need_paren = 1; + } + } else { + (void) strlcat(opnd, "(", OPLEN); + (void) strlcat(opnd, regs[base], OPLEN); + need_paren = 1; + } + + /* + * print the index (if any) + */ + if (index != ESP_REGNO) { + (void) strlcat(opnd, ",", OPLEN); + (void) strlcat(opnd, regs[index], OPLEN); + (void) strlcat(opnd, dis_scale_factor[ss], OPLEN); + } else + if (need_paren) + (void) strlcat(opnd, ")", OPLEN); + } +#endif +} + +/* + * Operand sequence for standard instruction involving one register + * and one register/memory operand. + * wbit indicates a byte(0) or opnd_size(1) operation + * vbit indicates direction (0 for "opcode r,r_m") or (1 for "opcode r_m, r") + */ +#define STANDARD_MODRM(x, mode, reg, r_m, rex_prefix, wbit, vbit) { \ + dtrace_get_modrm(x, &mode, ®, &r_m); \ + dtrace_rex_adjust(rex_prefix, mode, ®, &r_m); \ + dtrace_get_operand(x, mode, r_m, wbit, vbit); \ + dtrace_get_operand(x, REG_ONLY, reg, wbit, 1 - vbit); \ +} + +/* + * Similar to above, but allows for the two operands to be of different + * classes (ie. wbit). + * wbit is for the r_m operand + * w2 is for the reg operand + */ +#define MIXED_MM(x, mode, reg, r_m, rex_prefix, wbit, w2, vbit) { \ + dtrace_get_modrm(x, &mode, ®, &r_m); \ + dtrace_rex_adjust(rex_prefix, mode, ®, &r_m); \ + dtrace_get_operand(x, mode, r_m, wbit, vbit); \ + dtrace_get_operand(x, REG_ONLY, reg, w2, 1 - vbit); \ +} + +/* + * Similar, but for 2 operands plus an immediate. + */ +#define THREEOPERAND(x, mode, reg, r_m, rex_prefix, wbit, w2, immsize) { \ + dtrace_get_modrm(x, &mode, ®, &r_m); \ + dtrace_rex_adjust(rex_prefix, mode, ®, &r_m); \ + dtrace_get_operand(x, mode, r_m, wbit, 1); \ + dtrace_get_operand(x, REG_ONLY, reg, w2, 2); \ + dtrace_imm_opnd(x, wbit, immsize, 0); \ +} + +/* + * Dissassemble a single x86 or amd64 instruction. + * + * Mode determines the default operating mode (SIZE16, SIZE32 or SIZE64) + * for interpreting instructions. + * + * returns non-zero for bad opcode + */ +int +dtrace_disx86(dis86_t *x, uint_t cpu_mode) +{ + const instable_t *dp = NULL; /* decode table being used */ +#ifdef DIS_TEXT + uint_t i; +#endif +#ifdef DIS_MEM + uint_t nomem = 0; +#define NOMEM (nomem = 1) +#else +#define NOMEM /* nothing */ +#endif + uint_t wbit = 0; /* opcode wbit, 0 is 8 bit, !0 for opnd_size */ + uint_t w2; /* wbit value for second operand */ + uint_t vbit; + uint_t mode = 0; /* mode value from ModRM byte */ + uint_t reg; /* reg value from ModRM byte */ + uint_t r_m; /* r_m value from ModRM byte */ + + uint_t opcode1; /* high nibble of 1st byte */ + uint_t opcode2; /* low nibble of 1st byte */ + uint_t opcode3; /* extra opcode bits usually from ModRM byte */ + uint_t opcode4; /* high nibble of 2nd byte */ + uint_t opcode5; /* low nibble of 2ne byte */ + uint_t opcode6; /* high nibble of 3rd byte */ + uint_t opcode7; /* low nibble of 3rd byte */ + uint_t opcode_bytes = 1; + + /* + * legacy prefixes come in 5 flavors, you should have only one of each + */ + uint_t opnd_size_prefix = 0; + uint_t addr_size_prefix = 0; + uint_t segment_prefix = 0; + uint_t lock_prefix = 0; + uint_t rep_prefix = 0; + uint_t rex_prefix = 0; /* amd64 register extension prefix */ + size_t off; + + x->d86_len = 0; + x->d86_rmindex = -1; + x->d86_error = 0; +#ifdef DIS_TEXT + x->d86_numopnds = 0; + x->d86_seg_prefix = NULL; + x->d86_mneu[0] = 0; + for (i = 0; i < 3; ++i) { + x->d86_opnd[i].d86_opnd[0] = 0; + x->d86_opnd[i].d86_prefix[0] = 0; + x->d86_opnd[i].d86_value_size = 0; + x->d86_opnd[i].d86_value = 0; + x->d86_opnd[i].d86_mode = MODE_NONE; + } +#endif + x->d86_error = 0; + x->d86_memsize = 0; + + if (cpu_mode == SIZE16) { + opnd_size = SIZE16; + addr_size = SIZE16; + } else if (cpu_mode == SIZE32) { + opnd_size = SIZE32; + addr_size = SIZE32; + } else { + opnd_size = SIZE32; + addr_size = SIZE64; + } + + /* + * Get one opcode byte and check for zero padding that follows + * jump tables. + */ + if (dtrace_get_opcode(x, &opcode1, &opcode2) != 0) + goto error; + + if (opcode1 == 0 && opcode2 == 0 && + x->d86_check_func != NULL && x->d86_check_func(x->d86_data)) { +#ifdef DIS_TEXT + (void) strncpy(x->d86_mneu, ".byte\t0", OPLEN); +#endif + goto done; + } + + /* + * Gather up legacy x86 prefix bytes. + */ + for (;;) { + uint_t *which_prefix = NULL; + + dp = &dis_distable[opcode1][opcode2]; + + switch (dp->it_adrmode) { + case PREFIX: + which_prefix = &rep_prefix; + break; + case LOCK: + which_prefix = &lock_prefix; + break; + case OVERRIDE: + which_prefix = &segment_prefix; +#ifdef DIS_TEXT + x->d86_seg_prefix = (char *)dp->it_name; +#endif + if (dp->it_invalid64 && cpu_mode == SIZE64) + goto error; + break; + case AM: + which_prefix = &addr_size_prefix; + break; + case DM: + which_prefix = &opnd_size_prefix; + break; + } + if (which_prefix == NULL) + break; + *which_prefix = (opcode1 << 4) | opcode2; + if (dtrace_get_opcode(x, &opcode1, &opcode2) != 0) + goto error; + } + + /* + * Handle amd64 mode PREFIX values. + * Some of the segment prefixes are no-ops. (only FS/GS actually work) + * We might have a REX prefix (opcodes 0x40-0x4f) + */ + if (cpu_mode == SIZE64) { + if (segment_prefix != 0x64 && segment_prefix != 0x65) + segment_prefix = 0; + + if (opcode1 == 0x4) { + rex_prefix = (opcode1 << 4) | opcode2; + if (dtrace_get_opcode(x, &opcode1, &opcode2) != 0) + goto error; + dp = &dis_distable[opcode1][opcode2]; + } + } + + /* + * Deal with selection of operand and address size now. + * Note that the REX.W bit being set causes opnd_size_prefix to be + * ignored. + */ + if (cpu_mode == SIZE64) { + if (rex_prefix & 0x08) + opnd_size = SIZE64; + else if (opnd_size_prefix) + opnd_size = SIZE16; + + if (addr_size_prefix) + addr_size = SIZE32; + } else if (cpu_mode == SIZE32) { + if (opnd_size_prefix) + opnd_size = SIZE16; + if (addr_size_prefix) + addr_size = SIZE16; + } else { + if (opnd_size_prefix) + opnd_size = SIZE32; + if (addr_size_prefix) + addr_size = SIZE32; + } + + /* + * The pause instruction - a repz'd nop. This doesn't fit + * with any of the other prefix goop added for SSE, so we'll + * special-case it here. + */ + if (rep_prefix == 0xf3 && opcode1 == 0x9 && opcode2 == 0x0) { + rep_prefix = 0; + dp = &dis_opPause; + } + + /* + * Some 386 instructions have 2 bytes of opcode before the mod_r/m + * byte so we may need to perform a table indirection. + */ + if (dp->it_indirect == dis_op0F[0]) { + if (dtrace_get_opcode(x, &opcode4, &opcode5) != 0) + goto error; + opcode_bytes = 2; + if (opcode4 == 0x7 && opcode5 >= 0x1 && opcode5 <= 0x3) { + uint_t subcode; + + if (dtrace_get_opcode(x, &opcode6, &opcode7) != 0) + goto error; + opcode_bytes = 3; + subcode = ((opcode6 & 0x3) << 1) | + ((opcode7 & 0x8) >> 3); + dp = &dis_op0F7123[opcode5][subcode]; + } else if ((opcode4 == 0xc) && (opcode5 >= 0x8)) { + dp = &dis_op0FC8[0]; + } else { + dp = &dis_op0F[opcode4][opcode5]; + } + } + + /* + * If still not at a TERM decode entry, then a ModRM byte + * exists and its fields further decode the instruction. + */ + x->d86_got_modrm = 0; + if (dp->it_indirect != TERM) { + dtrace_get_modrm(x, &mode, &opcode3, &r_m); + if (x->d86_error) + goto error; + reg = opcode3; + + /* + * decode 287 instructions (D8-DF) from opcodeN + */ + if (opcode1 == 0xD && opcode2 >= 0x8) { + if (opcode2 == 0xB && mode == 0x3 && opcode3 == 4) + dp = &dis_opFP5[r_m]; + else if (opcode2 == 0xA && mode == 0x3 && opcode3 < 4) + dp = &dis_opFP7[opcode3]; + else if (opcode2 == 0xB && mode == 0x3) + dp = &dis_opFP6[opcode3]; + else if (opcode2 == 0x9 && mode == 0x3 && opcode3 >= 4) + dp = &dis_opFP4[opcode3 - 4][r_m]; + else if (mode == 0x3) + dp = &dis_opFP3[opcode2 - 8][opcode3]; + else + dp = &dis_opFP1n2[opcode2 - 8][opcode3]; + } else { + dp = dp->it_indirect + opcode3; + } + } + + /* + * In amd64 bit mode, ARPL opcode is changed to MOVSXD + * (sign extend 32bit to 64 bit) + */ + if (cpu_mode == SIZE64 && opcode1 == 0x6 && opcode2 == 0x3) + dp = &dis_opMOVSLD; + + /* + * at this point we should have a correct (or invalid) opcode + */ + if ((cpu_mode == SIZE64 && dp->it_invalid64) || + (cpu_mode != SIZE64 && dp->it_invalid32)) + goto error; + if (dp->it_indirect != TERM) + goto error; + + /* + * deal with MMX/SSE opcodes which are changed by prefixes + */ + switch (dp->it_adrmode) { + case MMO: + case MMOIMPL: + case MMO3P: + case MMOM3: + case MMOMS: + case MMOPM: + case MMOPRM: + case MMOS: + case XMMO: + case XMMOM: + case XMMOMS: + case XMMOPM: + case XMMOS: + case XMMOMX: + case XMMOX3: + case XMMOXMM: + /* + * This is horrible. Some SIMD instructions take the + * form 0x0F 0x?? ..., which is easily decoded using the + * existing tables. Other SIMD instructions use various + * prefix bytes to overload existing instructions. For + * Example, addps is F0, 58, whereas addss is F3 (repz), + * F0, 58. Presumably someone got a raise for this. + * + * If we see one of the instructions which can be + * modified in this way (if we've got one of the SIMDO* + * address modes), we'll check to see if the last prefix + * was a repz. If it was, we strip the prefix from the + * mnemonic, and we indirect using the dis_opSIMDrepz + * table. + */ + + /* + * Calculate our offset in dis_op0F + */ + if ((uintptr_t)dp - (uintptr_t)dis_op0F > sizeof (dis_op0F)) + goto error; + + off = ((uintptr_t)dp - (uintptr_t)dis_op0F) / + sizeof (instable_t); + + /* + * Rewrite if this instruction used one of the magic prefixes. + */ + if (rep_prefix) { + if (rep_prefix == 0xf2) + dp = &dis_opSIMDrepnz[off]; + else + dp = &dis_opSIMDrepz[off]; + rep_prefix = 0; + } else if (opnd_size_prefix) { + dp = &dis_opSIMDdata16[off]; + opnd_size_prefix = 0; + if (opnd_size == SIZE16) + opnd_size = SIZE32; + } + break; + + case MMOSH: + /* + * As with the "normal" SIMD instructions, the MMX + * shuffle instructions are overloaded. These + * instructions, however, are special in that they use + * an extra byte, and thus an extra table. As of this + * writing, they only use the opnd_size prefix. + */ + + /* + * Calculate our offset in dis_op0F7123 + */ + if ((uintptr_t)dp - (uintptr_t)dis_op0F7123 > + sizeof (dis_op0F7123)) + goto error; + + if (opnd_size_prefix) { + off = ((uintptr_t)dp - (uintptr_t)dis_op0F7123) / + sizeof (instable_t); + dp = &dis_opSIMD7123[off]; + opnd_size_prefix = 0; + if (opnd_size == SIZE16) + opnd_size = SIZE32; + } + break; + } + + /* + * In 64 bit mode, some opcodes automatically use opnd_size == SIZE64. + */ + if (cpu_mode == SIZE64) + if (dp->it_always64 || (opnd_size == SIZE32 && dp->it_stackop)) + opnd_size = SIZE64; + +#ifdef DIS_TEXT + /* + * At this point most instructions can format the opcode mnemonic + * including the prefixes. + */ + if (lock_prefix) + (void) strlcat(x->d86_mneu, "lock ", OPLEN); + + if (rep_prefix == 0xf2) + (void) strlcat(x->d86_mneu, "repnz ", OPLEN); + else if (rep_prefix == 0xf3) + (void) strlcat(x->d86_mneu, "repz ", OPLEN); + + if (cpu_mode == SIZE64 && addr_size_prefix) + (void) strlcat(x->d86_mneu, "addr32 ", OPLEN); + + if (dp->it_adrmode != CBW && + dp->it_adrmode != CWD && + dp->it_adrmode != XMMSFNC) { + if (strcmp(dp->it_name, "INVALID") == 0) + goto error; + (void) strlcat(x->d86_mneu, dp->it_name, OPLEN); + if (dp->it_suffix) { + char *types[] = {"", "w", "l", "q"}; + if (opcode_bytes == 2 && opcode4 == 4) { + /* It's a cmovx.yy. Replace the suffix x */ + for (i = 5; i < OPLEN; i++) { + if (x->d86_mneu[i] == '.') + break; + } + x->d86_mneu[i - 1] = *types[opnd_size]; + } else { + (void) strlcat(x->d86_mneu, types[opnd_size], + OPLEN); + } + } + } +#endif + + /* + * Process operands based on the addressing modes. + */ + x->d86_mode = cpu_mode; + x->d86_rex_prefix = rex_prefix; + x->d86_opnd_size = opnd_size; + x->d86_addr_size = addr_size; + vbit = 0; /* initialize for mem/reg -> reg */ + switch (dp->it_adrmode) { + /* + * amd64 instruction to sign extend 32 bit reg/mem operands + * into 64 bit register values + */ + case MOVSXZ: +#ifdef DIS_TEXT + if (rex_prefix == 0) + (void) strncpy(x->d86_mneu, "movzld", OPLEN); +#endif + dtrace_get_modrm(x, &mode, ®, &r_m); + dtrace_rex_adjust(rex_prefix, mode, ®, &r_m); + x->d86_opnd_size = SIZE64; + dtrace_get_operand(x, REG_ONLY, reg, LONG_OPND, 1); + x->d86_opnd_size = opnd_size = SIZE32; + wbit = LONG_OPND; + dtrace_get_operand(x, mode, r_m, wbit, 0); + break; + + /* + * movsbl movsbw movsbq (0x0FBE) or movswl movswq (0x0FBF) + * movzbl movzbw movzbq (0x0FB6) or mobzwl movzwq (0x0FB7) + * wbit lives in 2nd byte, note that operands + * are different sized + */ + case MOVZ: + if (rex_prefix & REX_W) { + /* target register size = 64 bit */ + x->d86_mneu[5] = 'q'; + } + dtrace_get_modrm(x, &mode, ®, &r_m); + dtrace_rex_adjust(rex_prefix, mode, ®, &r_m); + dtrace_get_operand(x, REG_ONLY, reg, LONG_OPND, 1); + x->d86_opnd_size = opnd_size = SIZE16; + wbit = WBIT(opcode5); + dtrace_get_operand(x, mode, r_m, wbit, 0); + break; + + /* + * imul instruction, with either 8-bit or longer immediate + * opcode 0x6B for byte, sign-extended displacement, 0x69 for word(s) + */ + case IMUL: + wbit = LONG_OPND; + THREEOPERAND(x, mode, reg, r_m, rex_prefix, wbit, LONG_OPND, + OPSIZE(opnd_size, opcode2 == 0x9)); + break; + + /* memory or register operand to register, with 'w' bit */ + case MRw: + wbit = WBIT(opcode2); + STANDARD_MODRM(x, mode, reg, r_m, rex_prefix, wbit, 0); + break; + + /* register to memory or register operand, with 'w' bit */ + /* arpl happens to fit here also because it is odd */ + case RMw: + if (opcode_bytes == 2) + wbit = WBIT(opcode5); + else + wbit = WBIT(opcode2); + STANDARD_MODRM(x, mode, reg, r_m, rex_prefix, wbit, 1); + break; + + /* xaddb instruction */ + case XADDB: + wbit = 0; + STANDARD_MODRM(x, mode, reg, r_m, rex_prefix, wbit, 1); + break; + + /* MMX register to memory or register operand */ + case MMS: + case MMOS: +#ifdef DIS_TEXT + wbit = strcmp(dp->it_name, "movd") ? MM_OPND : LONG_OPND; +#else + wbit = LONG_OPND; +#endif + MIXED_MM(x, mode, reg, r_m, rex_prefix, wbit, MM_OPND, 1); + break; + + /* MMX register to memory */ + case MMOMS: + dtrace_get_modrm(x, &mode, ®, &r_m); + if (mode == REG_ONLY) + goto error; + wbit = MM_OPND; + MIXED_MM(x, mode, reg, r_m, rex_prefix, wbit, MM_OPND, 1); + break; + + /* Double shift. Has immediate operand specifying the shift. */ + case DSHIFT: + wbit = LONG_OPND; + dtrace_get_modrm(x, &mode, ®, &r_m); + dtrace_rex_adjust(rex_prefix, mode, ®, &r_m); + dtrace_get_operand(x, mode, r_m, wbit, 2); + dtrace_get_operand(x, REG_ONLY, reg, LONG_OPND, 1); + dtrace_imm_opnd(x, wbit, 1, 0); + break; + + /* + * Double shift. With no immediate operand, specifies using %cl. + */ + case DSHIFTcl: + wbit = LONG_OPND; + STANDARD_MODRM(x, mode, reg, r_m, rex_prefix, wbit, 1); + break; + + /* immediate to memory or register operand */ + case IMlw: + wbit = WBIT(opcode2); + dtrace_rex_adjust(rex_prefix, mode, NULL, &r_m); + dtrace_get_operand(x, mode, r_m, wbit, 1); + /* + * Have long immediate for opcode 0x81, but not 0x80 nor 0x83 + */ + dtrace_imm_opnd(x, wbit, OPSIZE(opnd_size, opcode2 == 1), 0); + break; + + /* immediate to memory or register operand with the */ + /* 'w' bit present */ + case IMw: + wbit = WBIT(opcode2); + dtrace_get_modrm(x, &mode, ®, &r_m); + dtrace_rex_adjust(rex_prefix, mode, NULL, &r_m); + dtrace_get_operand(x, mode, r_m, wbit, 1); + dtrace_imm_opnd(x, wbit, OPSIZE(opnd_size, wbit), 0); + break; + + /* immediate to register with register in low 3 bits */ + /* of op code */ + case IR: + /* w-bit here (with regs) is bit 3 */ + wbit = opcode2 >>3 & 0x1; + reg = REGNO(opcode2); + dtrace_rex_adjust(rex_prefix, mode, ®, NULL); + mode = REG_ONLY; + r_m = reg; + dtrace_get_operand(x, mode, r_m, wbit, 1); + dtrace_imm_opnd(x, wbit, OPSIZE64(opnd_size, wbit), 0); + break; + + /* MMX immediate shift of register */ + case MMSH: + case MMOSH: + wbit = MM_OPND; + goto mm_shift; /* in next case */ + + /* SIMD immediate shift of register */ + case XMMSH: + wbit = XMM_OPND; +mm_shift: + reg = REGNO(opcode7); + dtrace_rex_adjust(rex_prefix, mode, ®, NULL); + dtrace_get_operand(x, REG_ONLY, reg, wbit, 1); + dtrace_imm_opnd(x, wbit, 1, 0); + NOMEM; + break; + + /* accumulator to memory operand */ + case AO: + vbit = 1; + /*FALLTHROUGH*/ + + /* memory operand to accumulator */ + case OA: + wbit = WBIT(opcode2); + dtrace_get_operand(x, REG_ONLY, EAX_REGNO, wbit, 1 - vbit); + dtrace_imm_opnd(x, wbit, OPSIZE64(addr_size, LONG_OPND), vbit); +#ifdef DIS_TEXT + x->d86_opnd[vbit].d86_mode = MODE_OFFSET; +#endif + break; + + + /* segment register to memory or register operand */ + case SM: + vbit = 1; + /*FALLTHROUGH*/ + + /* memory or register operand to segment register */ + case MS: + dtrace_get_modrm(x, &mode, ®, &r_m); + dtrace_rex_adjust(rex_prefix, mode, NULL, &r_m); + dtrace_get_operand(x, mode, r_m, LONG_OPND, vbit); + dtrace_get_operand(x, REG_ONLY, reg, SEG_OPND, 1 - vbit); + break; + + /* + * rotate or shift instructions, which may shift by 1 or + * consult the cl register, depending on the 'v' bit + */ + case Mv: + vbit = VBIT(opcode2); + wbit = WBIT(opcode2); + dtrace_rex_adjust(rex_prefix, mode, NULL, &r_m); + dtrace_get_operand(x, mode, r_m, wbit, 1); +#ifdef DIS_TEXT + if (vbit) { + (void) strlcat(x->d86_opnd[0].d86_opnd, "%cl", OPLEN); + } else { + x->d86_opnd[0].d86_mode = MODE_SIGNED; + x->d86_opnd[0].d86_value_size = 1; + x->d86_opnd[0].d86_value = 1; + } +#endif + break; + /* + * immediate rotate or shift instructions + */ + case MvI: + wbit = WBIT(opcode2); +normal_imm_mem: + dtrace_rex_adjust(rex_prefix, mode, NULL, &r_m); + dtrace_get_operand(x, mode, r_m, wbit, 1); + dtrace_imm_opnd(x, wbit, 1, 0); + break; + + /* bit test instructions */ + case MIb: + wbit = LONG_OPND; + goto normal_imm_mem; + + /* single memory or register operand with 'w' bit present */ + case Mw: + wbit = WBIT(opcode2); +just_mem: + dtrace_get_modrm(x, &mode, ®, &r_m); + dtrace_rex_adjust(rex_prefix, mode, NULL, &r_m); + dtrace_get_operand(x, mode, r_m, wbit, 0); + break; + + case SWAPGS: + if (cpu_mode == SIZE64 && mode == 3 && r_m == 0) { +#ifdef DIS_TEXT + (void) strncpy(x->d86_mneu, "swapgs", OPLEN); +#endif + NOMEM; + break; + } + /*FALLTHROUGH*/ + + /* prefetch instruction - memory operand, but no memory acess */ + case PREF: + NOMEM; + /*FALLTHROUGH*/ + + /* single memory or register operand */ + case M: + wbit = LONG_OPND; + goto just_mem; + + /* single memory or register byte operand */ + case Mb: + wbit = BYTE_OPND; + goto just_mem; + + case MO: + /* Similar to M, but only memory (no direct registers) */ + wbit = LONG_OPND; + dtrace_get_modrm(x, &mode, ®, &r_m); + if (mode == 3) + goto error; + dtrace_rex_adjust(rex_prefix, mode, NULL, &r_m); + dtrace_get_operand(x, mode, r_m, wbit, 0); + break; + + /* move special register to register or reverse if vbit */ + case SREG: + switch (opcode5) { + + case 2: + vbit = 1; + /*FALLTHROUGH*/ + case 0: + wbit = CONTROL_OPND; + break; + + case 3: + vbit = 1; + /*FALLTHROUGH*/ + case 1: + wbit = DEBUG_OPND; + break; + + case 6: + vbit = 1; + /*FALLTHROUGH*/ + case 4: + wbit = TEST_OPND; + break; + + } + dtrace_get_modrm(x, &mode, ®, &r_m); + dtrace_rex_adjust(rex_prefix, mode, ®, &r_m); + dtrace_get_operand(x, REG_ONLY, reg, wbit, vbit); + dtrace_get_operand(x, REG_ONLY, r_m, LONG_OPND, 1 - vbit); + NOMEM; + break; + + /* + * single register operand with register in the low 3 + * bits of op code + */ + case R: + if (opcode_bytes == 2) + reg = REGNO(opcode5); + else + reg = REGNO(opcode2); + dtrace_rex_adjust(rex_prefix, mode, ®, NULL); + dtrace_get_operand(x, REG_ONLY, reg, LONG_OPND, 0); + NOMEM; + break; + + /* + * register to accumulator with register in the low 3 + * bits of op code, xchg instructions + */ + case RA: + NOMEM; + reg = REGNO(opcode2); + dtrace_rex_adjust(rex_prefix, mode, ®, NULL); + dtrace_get_operand(x, REG_ONLY, reg, LONG_OPND, 0); + dtrace_get_operand(x, REG_ONLY, EAX_REGNO, LONG_OPND, 1); + break; + + /* + * single segment register operand, with register in + * bits 3-4 of op code byte + */ + case SEG: + NOMEM; + reg = (x->d86_bytes[x->d86_len - 1] >> 3) & 0x3; + dtrace_get_operand(x, REG_ONLY, reg, SEG_OPND, 0); + break; + + /* + * single segment register operand, with register in + * bits 3-5 of op code + */ + case LSEG: + NOMEM; + /* long seg reg from opcode */ + reg = (x->d86_bytes[x->d86_len - 1] >> 3) & 0x7; + dtrace_get_operand(x, REG_ONLY, reg, SEG_OPND, 0); + break; + + /* memory or register operand to register */ + case MR: + wbit = LONG_OPND; + STANDARD_MODRM(x, mode, reg, r_m, rex_prefix, wbit, 0); + break; + + case RM: + wbit = LONG_OPND; + STANDARD_MODRM(x, mode, reg, r_m, rex_prefix, wbit, 1); + break; + + /* MMX/SIMD-Int memory or mm reg to mm reg */ + case MM: + case MMO: +#ifdef DIS_TEXT + wbit = strcmp(dp->it_name, "movd") ? MM_OPND : LONG_OPND; +#else + wbit = LONG_OPND; +#endif + MIXED_MM(x, mode, reg, r_m, rex_prefix, wbit, MM_OPND, 0); + break; + + case MMOIMPL: +#ifdef DIS_TEXT + wbit = strcmp(dp->it_name, "movd") ? MM_OPND : LONG_OPND; +#else + wbit = LONG_OPND; +#endif + dtrace_get_modrm(x, &mode, ®, &r_m); + if (mode != REG_ONLY) + goto error; + + dtrace_rex_adjust(rex_prefix, mode, ®, &r_m); + dtrace_get_operand(x, mode, r_m, wbit, 0); + dtrace_get_operand(x, REG_ONLY, reg, MM_OPND, 1); + mode = 0; /* change for memory access size... */ + break; + + /* MMX/SIMD-Int and SIMD-FP predicated mm reg to r32 */ + case MMO3P: + wbit = MM_OPND; + goto xmm3p; + case XMM3P: + wbit = XMM_OPND; +xmm3p: + dtrace_get_modrm(x, &mode, ®, &r_m); + if (mode != REG_ONLY) + goto error; + + THREEOPERAND(x, mode, reg, r_m, rex_prefix, wbit, LONG_OPND, 1); + NOMEM; + break; + + /* MMX/SIMD-Int predicated r32/mem to mm reg */ + case MMOPRM: + wbit = LONG_OPND; + w2 = MM_OPND; + goto xmmprm; + case XMMPRM: + wbit = LONG_OPND; + w2 = XMM_OPND; +xmmprm: + THREEOPERAND(x, mode, reg, r_m, rex_prefix, wbit, w2, 1); + break; + + /* MMX/SIMD-Int predicated mm/mem to mm reg */ + case MMOPM: + wbit = w2 = MM_OPND; + goto xmmprm; + + /* MMX/SIMD-Int mm reg to r32 */ + case MMOM3: + NOMEM; + dtrace_get_modrm(x, &mode, ®, &r_m); + if (mode != REG_ONLY) + goto error; + wbit = MM_OPND; + MIXED_MM(x, mode, reg, r_m, rex_prefix, wbit, LONG_OPND, 0); + break; + + /* SIMD memory or xmm reg operand to xmm reg */ + case XMM: + case XMMO: + case XMMXIMPL: + wbit = XMM_OPND; + STANDARD_MODRM(x, mode, reg, r_m, rex_prefix, wbit, 0); + + if (dp->it_adrmode == XMMXIMPL && mode != REG_ONLY) + goto error; + +#ifdef DIS_TEXT + /* + * movlps and movhlps share opcodes. They differ in the + * addressing modes allowed for their operands. + * movhps and movlhps behave similarly. + */ + if (mode == REG_ONLY) { + if (strcmp(dp->it_name, "movlps") == 0) + (void) strncpy(x->d86_mneu, "movhlps", OPLEN); + else if (strcmp(dp->it_name, "movhps") == 0) + (void) strncpy(x->d86_mneu, "movlhps", OPLEN); + } +#endif + if (dp->it_adrmode == XMMXIMPL) + mode = 0; /* change for memory access size... */ + break; + + /* SIMD xmm reg to memory or xmm reg */ + case XMMS: + case XMMOS: + case XMMMS: + case XMMOMS: + dtrace_get_modrm(x, &mode, ®, &r_m); +#ifdef DIS_TEXT + if ((strcmp(dp->it_name, "movlps") == 0 || + strcmp(dp->it_name, "movhps") == 0 || + strcmp(dp->it_name, "movntps") == 0) && + mode == REG_ONLY) + goto error; +#endif + wbit = XMM_OPND; + MIXED_MM(x, mode, reg, r_m, rex_prefix, wbit, XMM_OPND, 1); + break; + + /* SIMD memory to xmm reg */ + case XMMM: + case XMMOM: + wbit = XMM_OPND; + dtrace_get_modrm(x, &mode, ®, &r_m); +#ifdef DIS_TEXT + if (mode == REG_ONLY) { + if (strcmp(dp->it_name, "movhps") == 0) + (void) strncpy(x->d86_mneu, "movlhps", OPLEN); + else + goto error; + } +#endif + MIXED_MM(x, mode, reg, r_m, rex_prefix, wbit, XMM_OPND, 0); + break; + + /* SIMD memory or r32 to xmm reg */ + case XMM3MX: + wbit = LONG_OPND; + MIXED_MM(x, mode, reg, r_m, rex_prefix, wbit, XMM_OPND, 0); + break; + + case XMM3MXS: + wbit = LONG_OPND; + MIXED_MM(x, mode, reg, r_m, rex_prefix, wbit, XMM_OPND, 1); + break; + + /* SIMD memory or mm reg to xmm reg */ + case XMMOMX: + /* SIMD mm to xmm */ + case XMMMX: + wbit = MM_OPND; + MIXED_MM(x, mode, reg, r_m, rex_prefix, wbit, XMM_OPND, 0); + break; + + /* SIMD memory or xmm reg to mm reg */ + case XMMXMM: + case XMMOXMM: + case XMMXM: + wbit = XMM_OPND; + MIXED_MM(x, mode, reg, r_m, rex_prefix, wbit, MM_OPND, 0); + break; + + + /* SIMD memory or xmm reg to r32 */ + case XMMXM3: + wbit = XMM_OPND; + MIXED_MM(x, mode, reg, r_m, rex_prefix, wbit, LONG_OPND, 0); + break; + + /* SIMD xmm to r32 */ + case XMMX3: + case XMMOX3: + dtrace_get_modrm(x, &mode, ®, &r_m); + if (mode != REG_ONLY) + goto error; + dtrace_rex_adjust(rex_prefix, mode, ®, &r_m); + dtrace_get_operand(x, mode, r_m, XMM_OPND, 0); + dtrace_get_operand(x, REG_ONLY, reg, LONG_OPND, 1); + NOMEM; + break; + + /* SIMD predicated memory or xmm reg with/to xmm reg */ + case XMMP: + case XMMOPM: + wbit = XMM_OPND; + THREEOPERAND(x, mode, reg, r_m, rex_prefix, wbit, XMM_OPND, 1); + +#ifdef DIS_TEXT + /* + * cmpps and cmpss vary their instruction name based + * on the value of imm8. Other XMMP instructions, + * such as shufps, require explicit specification of + * the predicate. + */ + if (dp->it_name[0] == 'c' && + dp->it_name[1] == 'm' && + dp->it_name[2] == 'p' && + strlen(dp->it_name) == 5) { + uchar_t pred = x->d86_opnd[0].d86_value & 0xff; + + if (pred >= (sizeof (dis_PREDSUFFIX) / sizeof (char *))) + goto error; + + (void) strncpy(x->d86_mneu, "cmp", OPLEN); + (void) strlcat(x->d86_mneu, dis_PREDSUFFIX[pred], + OPLEN); + (void) strlcat(x->d86_mneu, + dp->it_name + strlen(dp->it_name) - 2, + OPLEN); + x->d86_opnd[0] = x->d86_opnd[1]; + x->d86_opnd[1] = x->d86_opnd[2]; + x->d86_numopnds = 2; + } +#endif + break; + + /* immediate operand to accumulator */ + case IA: + wbit = WBIT(opcode2); + dtrace_get_operand(x, REG_ONLY, EAX_REGNO, wbit, 1); + dtrace_imm_opnd(x, wbit, OPSIZE(opnd_size, wbit), 0); + NOMEM; + break; + + /* memory or register operand to accumulator */ + case MA: + wbit = WBIT(opcode2); + dtrace_rex_adjust(rex_prefix, mode, NULL, &r_m); + dtrace_get_operand(x, mode, r_m, wbit, 0); + break; + + /* si register to di register used to reference memory */ + case SD: +#ifdef DIS_TEXT + dtrace_check_override(x, 0); + x->d86_numopnds = 2; + if (addr_size == SIZE64) { + (void) strlcat(x->d86_opnd[0].d86_opnd, "(%rsi)", + OPLEN); + (void) strlcat(x->d86_opnd[1].d86_opnd, "(%rdi)", + OPLEN); + } else if (addr_size == SIZE32) { + (void) strlcat(x->d86_opnd[0].d86_opnd, "(%esi)", + OPLEN); + (void) strlcat(x->d86_opnd[1].d86_opnd, "(%edi)", + OPLEN); + } else { + (void) strlcat(x->d86_opnd[0].d86_opnd, "(%si)", + OPLEN); + (void) strlcat(x->d86_opnd[1].d86_opnd, "(%di)", + OPLEN); + } +#endif + wbit = LONG_OPND; + break; + + /* accumulator to di register */ + case AD: + wbit = WBIT(opcode2); +#ifdef DIS_TEXT + dtrace_check_override(x, 1); + x->d86_numopnds = 2; + dtrace_get_operand(x, REG_ONLY, EAX_REGNO, wbit, 0); + if (addr_size == SIZE64) + (void) strlcat(x->d86_opnd[1].d86_opnd, "(%rdi)", + OPLEN); + else if (addr_size == SIZE32) + (void) strlcat(x->d86_opnd[1].d86_opnd, "(%edi)", + OPLEN); + else + (void) strlcat(x->d86_opnd[1].d86_opnd, "(%di)", + OPLEN); +#endif + break; + + /* si register to accumulator */ + case SA: + wbit = WBIT(opcode2); +#ifdef DIS_TEXT + dtrace_check_override(x, 0); + x->d86_numopnds = 2; + if (addr_size == SIZE64) + (void) strlcat(x->d86_opnd[0].d86_opnd, "(%rsi)", + OPLEN); + else if (addr_size == SIZE32) + (void) strlcat(x->d86_opnd[0].d86_opnd, "(%esi)", + OPLEN); + else + (void) strlcat(x->d86_opnd[0].d86_opnd, "(%si)", + OPLEN); + dtrace_get_operand(x, REG_ONLY, EAX_REGNO, wbit, 1); +#endif + break; + + /* + * single operand, a 16/32 bit displacement + */ + case D: + wbit = LONG_OPND; + dtrace_disp_opnd(x, wbit, OPSIZE(opnd_size, LONG_OPND), 0); + NOMEM; + break; + + /* jmp/call indirect to memory or register operand */ + case INM: +#ifdef DIS_TEXT + (void) strlcat(x->d86_opnd[0].d86_prefix, "*", OPLEN); +#endif + dtrace_rex_adjust(rex_prefix, mode, NULL, &r_m); + dtrace_get_operand(x, mode, r_m, LONG_OPND, 0); + wbit = LONG_OPND; + break; + + /* + * for long jumps and long calls -- a new code segment + * register and an offset in IP -- stored in object + * code in reverse order. Note - not valid in amd64 + */ + case SO: + dtrace_check_override(x, 1); + wbit = LONG_OPND; + dtrace_imm_opnd(x, wbit, OPSIZE(opnd_size, LONG_OPND), 1); +#ifdef DIS_TEXT + x->d86_opnd[1].d86_mode = MODE_SIGNED; +#endif + /* will now get segment operand */ + dtrace_imm_opnd(x, wbit, 2, 0); + break; + + /* + * jmp/call. single operand, 8 bit displacement. + * added to current EIP in 'compofff' + */ + case BD: + dtrace_disp_opnd(x, BYTE_OPND, 1, 0); + NOMEM; + break; + + /* single 32/16 bit immediate operand */ + case I: + wbit = LONG_OPND; + dtrace_imm_opnd(x, wbit, OPSIZE(opnd_size, LONG_OPND), 0); + break; + + /* single 8 bit immediate operand */ + case Ib: + wbit = LONG_OPND; + dtrace_imm_opnd(x, wbit, 1, 0); + break; + + case ENTER: + wbit = LONG_OPND; + dtrace_imm_opnd(x, wbit, 2, 0); + dtrace_imm_opnd(x, wbit, 1, 1); + switch (opnd_size) { + case SIZE64: + x->d86_memsize = (x->d86_opnd[1].d86_value + 1) * 8; + break; + case SIZE32: + x->d86_memsize = (x->d86_opnd[1].d86_value + 1) * 4; + break; + case SIZE16: + x->d86_memsize = (x->d86_opnd[1].d86_value + 1) * 2; + break; + } + + break; + + /* 16-bit immediate operand */ + case RET: + wbit = LONG_OPND; + dtrace_imm_opnd(x, wbit, 2, 0); + break; + + /* single 8 bit port operand */ + case P: + dtrace_check_override(x, 0); + dtrace_imm_opnd(x, BYTE_OPND, 1, 0); + NOMEM; + break; + + /* single operand, dx register (variable port instruction) */ + case V: + x->d86_numopnds = 1; + dtrace_check_override(x, 0); +#ifdef DIS_TEXT + (void) strlcat(x->d86_opnd[0].d86_opnd, "(%dx)", OPLEN); +#endif + NOMEM; + break; + + /* + * The int instruction, which has two forms: + * int 3 (breakpoint) or + * int n, where n is indicated in the subsequent + * byte (format Ib). The int 3 instruction (opcode 0xCC), + * where, although the 3 looks like an operand, + * it is implied by the opcode. It must be converted + * to the correct base and output. + */ + case INT3: +#ifdef DIS_TEXT + x->d86_numopnds = 1; + x->d86_opnd[0].d86_mode = MODE_SIGNED; + x->d86_opnd[0].d86_value_size = 1; + x->d86_opnd[0].d86_value = 3; +#endif + NOMEM; + break; + + /* single 8 bit immediate operand */ + case INTx: + dtrace_imm_opnd(x, BYTE_OPND, 1, 0); + NOMEM; + break; + + /* an unused byte must be discarded */ + case U: + if (x->d86_get_byte(x->d86_data) < 0) + goto error; + x->d86_len++; + NOMEM; + break; + + case CBW: +#ifdef DIS_TEXT + if (opnd_size == SIZE16) + (void) strlcat(x->d86_mneu, "cbtw", OPLEN); + else if (opnd_size == SIZE32) + (void) strlcat(x->d86_mneu, "cwtl", OPLEN); + else + (void) strlcat(x->d86_mneu, "cltq", OPLEN); +#endif + wbit = LONG_OPND; + NOMEM; + break; + + case CWD: +#ifdef DIS_TEXT + if (opnd_size == SIZE16) + (void) strlcat(x->d86_mneu, "cwtd", OPLEN); + else if (opnd_size == SIZE32) + (void) strlcat(x->d86_mneu, "cltd", OPLEN); + else + (void) strlcat(x->d86_mneu, "cqtd", OPLEN); +#endif + wbit = LONG_OPND; + NOMEM; + break; + + case XMMSFNC: + /* + * sfence is sfence if mode is REG_ONLY. If mode isn't + * REG_ONLY, mnemonic should be 'clflush'. + */ + dtrace_get_modrm(x, &mode, ®, &r_m); + + /* sfence doesn't take operands */ +#ifdef DIS_TEXT + if (mode == REG_ONLY) { + (void) strlcat(x->d86_mneu, "sfence", OPLEN); + } else { + (void) strlcat(x->d86_mneu, "clflush", OPLEN); + dtrace_rex_adjust(rex_prefix, mode, ®, &r_m); + dtrace_get_operand(x, mode, r_m, BYTE_OPND, 0); + NOMEM; + } +#else + if (mode != REG_ONLY) { + dtrace_rex_adjust(rex_prefix, mode, ®, &r_m); + dtrace_get_operand(x, mode, r_m, BYTE_OPND, 0); + NOMEM; + } +#endif + break; + + /* + * no disassembly, the mnemonic was all there was so go on + */ + case NORM: + if (dp->it_invalid32 && cpu_mode != SIZE64) + goto error; + NOMEM; + /*FALLTHROUGH*/ + case IMPLMEM: + break; + + case XMMFENCE: + /* + * Only the following exact byte sequences are allowed: + * + * 0f ae e8 lfence + * 0f ae f0 mfence + */ + if ((uint8_t)x->d86_bytes[x->d86_len - 1] != 0xe8 && + (uint8_t)x->d86_bytes[x->d86_len - 1] != 0xf0) + goto error; + + break; + + + /* float reg */ + case F: +#ifdef DIS_TEXT + x->d86_numopnds = 1; + (void) strlcat(x->d86_opnd[0].d86_opnd, "%st(X)", OPLEN); + x->d86_opnd[0].d86_opnd[4] = r_m + '0'; +#endif + NOMEM; + break; + + /* float reg to float reg, with ret bit present */ + case FF: + vbit = opcode2 >> 2 & 0x1; /* vbit = 1: st -> st(i) */ + /*FALLTHROUGH*/ + case FFC: /* case for vbit always = 0 */ +#ifdef DIS_TEXT + x->d86_numopnds = 2; + (void) strlcat(x->d86_opnd[1 - vbit].d86_opnd, "%st", OPLEN); + (void) strlcat(x->d86_opnd[vbit].d86_opnd, "%st(X)", OPLEN); + x->d86_opnd[vbit].d86_opnd[4] = r_m + '0'; +#endif + NOMEM; + break; + + /* an invalid op code */ + case AM: + case DM: + case OVERRIDE: + case PREFIX: + case UNKNOWN: + NOMEM; + default: + goto error; + } /* end switch */ + if (x->d86_error) + goto error; + +done: +#ifdef DIS_MEM + /* + * compute the size of any memory accessed by the instruction + */ + if (x->d86_memsize != 0) { + return (0); + } else if (dp->it_stackop) { + switch (opnd_size) { + case SIZE16: + x->d86_memsize = 2; + break; + case SIZE32: + x->d86_memsize = 4; + break; + case SIZE64: + x->d86_memsize = 8; + break; + } + } else if (nomem || mode == REG_ONLY) { + x->d86_memsize = 0; + + } else if (dp->it_size != 0) { + /* + * In 64 bit mode descriptor table entries + * go up to 10 bytes and popf/pushf are always 8 bytes + */ + if (x->d86_mode == SIZE64 && dp->it_size == 6) + x->d86_memsize = 10; + else if (x->d86_mode == SIZE64 && opcode1 == 0x9 && + (opcode2 == 0xc || opcode2 == 0xd)) + x->d86_memsize = 8; + else + x->d86_memsize = dp->it_size; + + } else if (wbit == 0) { + x->d86_memsize = 1; + + } else if (wbit == LONG_OPND) { + if (opnd_size == SIZE64) + x->d86_memsize = 8; + else if (opnd_size == SIZE32) + x->d86_memsize = 4; + else + x->d86_memsize = 2; + + } else if (wbit == SEG_OPND) { + x->d86_memsize = 4; + + } else { + x->d86_memsize = 8; + } +#endif + return (0); + +error: +#ifdef DIS_TEXT + (void) strlcat(x->d86_mneu, "undef", OPLEN); +#endif + return (1); +} + +#ifdef DIS_TEXT + +/* + * Some instructions should have immediate operands printed + * as unsigned integers. We compare against this table. + */ +static char *unsigned_ops[] = { + "or", "and", "xor", "test", "in", "out", "lcall", "ljmp", + "rcr", "rcl", "ror", "rol", "shl", "shr", "sal", "psr", "psl", + 0 +}; + +static int +isunsigned_op(char *opcode) +{ + char *where; + int i; + int is_unsigned = 0; + + /* + * Work back to start of last mnemonic, since we may have + * prefixes on some opcodes. + */ + where = opcode + strlen(opcode) - 1; + while (where > opcode && *where != ' ') + --where; + if (*where == ' ') + ++where; + + for (i = 0; unsigned_ops[i]; ++i) { + if (strncmp(where, unsigned_ops[i], + strlen(unsigned_ops[i]))) + continue; + is_unsigned = 1; + break; + } + return (is_unsigned); +} + +/* ARGSUSED */ +void +dtrace_disx86_str(dis86_t *dis, uint_t mode, uintptr_t pc, char *buf, + size_t buflen) +{ + int i; + + dis->d86_sprintf_func(buf, buflen, "%-6s ", dis->d86_mneu); + + /* + * For PC-relative jumps, the pc is really the next pc after executing + * this instruction, so increment it appropriately. + */ + pc += dis->d86_len; + + for (i = 0; i < dis->d86_numopnds; i++) { + d86opnd_t *op = &dis->d86_opnd[i]; + int64_t sv; + uint64_t mask; + + if (i != 0) + (void) strlcat(buf, ",", buflen); + + (void) strlcat(buf, op->d86_prefix, buflen); + + sv = op->d86_value; + + switch (op->d86_mode) { + + case MODE_NONE: + + (void) strlcat(buf, op->d86_opnd, buflen); + break; + + case MODE_SIGNED: + case MODE_IMPLIED: + case MODE_OFFSET: + + if (dis->d86_seg_prefix) + (void) strlcat(buf, dis->d86_seg_prefix, + buflen); + + switch (op->d86_value_size) { + case 1: + sv = (int8_t)sv; + mask = 0xff; + break; + case 2: + sv = (int16_t)sv; + mask = 0xffff; + break; + case 4: + sv = (int32_t)sv; + mask = 0xffffffff; + break; + case 8: + mask = 0xffffffffffffffffULL; + break; + } + + if (op->d86_mode == MODE_SIGNED || + op->d86_mode == MODE_IMPLIED) + (void) strlcat(buf, "$", buflen); + + if (sv < 0 && sv > -0xffff && + !isunsigned_op(dis->d86_mneu)) { + dis->d86_sprintf_func(buf + strlen(buf), + buflen - strlen(buf), + (dis->d86_flags & DIS_OP_OCTAL) ? + "-0%llo" : "-0x%llx", -sv & mask); + } else { + dis->d86_sprintf_func(buf + strlen(buf), + buflen - strlen(buf), + (dis->d86_flags & DIS_OP_OCTAL) ? + "0%llo" : "0x%llx", sv & mask); + } + (void) strlcat(buf, op->d86_opnd, buflen); + break; + + case MODE_IPREL: + + switch (op->d86_value_size) { + case 1: + sv = (int8_t)sv; + break; + case 2: + sv = (int16_t)sv; + break; + case 4: + sv = (int32_t)sv; + break; + } + + if (sv < 0) + dis->d86_sprintf_func(buf + strlen(buf), + buflen - strlen(buf), + (dis->d86_flags & DIS_OP_OCTAL) ? + "-0%llo" : "-0x%llx", -sv - dis->d86_len); + else + dis->d86_sprintf_func(buf + strlen(buf), + buflen - strlen(buf), + (dis->d86_flags & DIS_OP_OCTAL) ? + "+0%llo" : "+0x%llx", sv + dis->d86_len); + + (void) strlcat(buf, "\t<", buflen); + + if (dis->d86_sym_lookup == NULL || + dis->d86_sym_lookup(dis->d86_data, pc + sv, + buf + strlen(buf), buflen - strlen(buf)) != 0) + dis->d86_sprintf_func(buf + strlen(buf), + buflen - strlen(buf), + (dis->d86_flags & DIS_OP_OCTAL) ? + "0%llo" : "0x%llx", pc + sv); + + (void) strlcat(buf, ">", buflen); + + break; + } + } +} + +#endif /* DIS_TEXT */ diff --git a/sys/cddl/dev/dtrace/i386/dis_tables.h b/sys/cddl/dev/dtrace/i386/dis_tables.h new file mode 100644 index 0000000..b45a8c5 --- /dev/null +++ b/sys/cddl/dev/dtrace/i386/dis_tables.h @@ -0,0 +1,112 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + * + * $FreeBSD$ + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* Copyright (c) 1988 AT&T */ +/* All Rights Reserved */ + + +#ifndef _DIS_TABLES_H +#define _DIS_TABLES_H + +#if defined(sun) +#pragma ident "@(#)dis_tables.h 1.7 06/03/02 SMI" +#endif + +/* + * Constants and prototypes for the IA32 disassembler backend. See dis_tables.c + * for usage information and documentation. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include <sys/types.h> +#include <sys/param.h> + +/* + * values for cpu mode + */ +#define SIZE16 1 +#define SIZE32 2 +#define SIZE64 3 + +#define OPLEN 256 +#define PFIXLEN 8 +#define NCPS 12 /* number of chars per symbol */ + +/* + * data structures that must be provided to dtrace_dis86() + */ +typedef struct d86opnd { + char d86_opnd[OPLEN]; /* symbolic rep of operand */ + char d86_prefix[PFIXLEN]; /* any prefix string or "" */ + uint_t d86_mode; /* mode for immediate */ + uint_t d86_value_size; /* size in bytes of d86_value */ + uint64_t d86_value; /* immediate value of opnd */ +} d86opnd_t; + +typedef struct dis86 { + uint_t d86_mode; + uint_t d86_error; + uint_t d86_len; /* instruction length */ + int d86_rmindex; /* index of modrm byte or -1 */ + uint_t d86_memsize; /* size of memory referenced */ + char d86_bytes[16]; /* bytes of instruction */ + char d86_mneu[OPLEN]; + uint_t d86_numopnds; + uint_t d86_rex_prefix; /* value of REX prefix if !0 */ + char *d86_seg_prefix; /* segment prefix, if any */ + uint_t d86_opnd_size; + uint_t d86_addr_size; + uint_t d86_got_modrm; + struct d86opnd d86_opnd[3]; /* up to 3 operands */ + int (*d86_check_func)(void *); + int (*d86_get_byte)(void *); +#ifdef DIS_TEXT + int (*d86_sym_lookup)(void *, uint64_t, char *, size_t); + int (*d86_sprintf_func)(char *, size_t, const char *, ...); + int d86_flags; + uint_t d86_imm_bytes; +#endif + void *d86_data; +} dis86_t; + +extern int dtrace_disx86(dis86_t *x, uint_t cpu_mode); + +#define DIS_OP_OCTAL 0x1 /* Print all numbers in octal */ + +#ifdef DIS_TEXT +extern void dtrace_disx86_str(dis86_t *x, uint_t cpu_mode, uintptr_t pc, + char *buf, size_t len); +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* _DIS_TABLES_H */ diff --git a/sys/cddl/dev/dtrace/i386/dtrace_asm.S b/sys/cddl/dev/dtrace/i386/dtrace_asm.S new file mode 100644 index 0000000..787a3c8 --- /dev/null +++ b/sys/cddl/dev/dtrace/i386/dtrace_asm.S @@ -0,0 +1,527 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + * + * $FreeBSD$ + */ +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#define _ASM + +#include <machine/asmacros.h> +#include <sys/cpuvar_defs.h> +#include <sys/dtrace.h> + +#include "assym.s" + + .globl calltrap + .type calltrap,@function + ENTRY(dtrace_invop_start) + + pushl %eax /* push %eax -- may be return value */ + pushl %esp /* push stack pointer */ + addl $48, (%esp) /* adjust to incoming args */ + pushl 40(%esp) /* push calling EIP */ + + /* + * Call dtrace_invop to let it check if the exception was + * a fbt one. The return value in %eax will tell us what + * dtrace_invop wants us to do. + */ + call dtrace_invop + + /* + * We pushed 3 times for the arguments to dtrace_invop, + * so we need to increment the stack pointer to get rid of + * those values. + */ + addl $12, %esp + ALTENTRY(dtrace_invop_callsite) + cmpl $DTRACE_INVOP_PUSHL_EBP, %eax + je invop_push + cmpl $DTRACE_INVOP_POPL_EBP, %eax + je invop_pop + cmpl $DTRACE_INVOP_LEAVE, %eax + je invop_leave + cmpl $DTRACE_INVOP_NOP, %eax + je invop_nop + + /* When all else fails handle the trap in the usual way. */ + jmpl *dtrace_invop_calltrap_addr + +invop_push: + /* + * We must emulate a "pushl %ebp". To do this, we pull the stack + * down 4 bytes, and then store the base pointer. + */ + popal + subl $4, %esp /* make room for %ebp */ + pushl %eax /* push temp */ + movl 8(%esp), %eax /* load calling EIP */ + incl %eax /* increment over LOCK prefix */ + movl %eax, 4(%esp) /* store calling EIP */ + movl 12(%esp), %eax /* load calling CS */ + movl %eax, 8(%esp) /* store calling CS */ + movl 16(%esp), %eax /* load calling EFLAGS */ + movl %eax, 12(%esp) /* store calling EFLAGS */ + movl %ebp, 16(%esp) /* push %ebp */ + popl %eax /* pop off temp */ + iret /* Return from interrupt. */ +invop_pop: + /* + * We must emulate a "popl %ebp". To do this, we do the opposite of + * the above: we remove the %ebp from the stack, and squeeze up the + * saved state from the trap. + */ + popal + pushl %eax /* push temp */ + movl 16(%esp), %ebp /* pop %ebp */ + movl 12(%esp), %eax /* load calling EFLAGS */ + movl %eax, 16(%esp) /* store calling EFLAGS */ + movl 8(%esp), %eax /* load calling CS */ + movl %eax, 12(%esp) /* store calling CS */ + movl 4(%esp), %eax /* load calling EIP */ + incl %eax /* increment over LOCK prefix */ + movl %eax, 8(%esp) /* store calling EIP */ + popl %eax /* pop off temp */ + addl $4, %esp /* adjust stack pointer */ + iret /* Return from interrupt. */ +invop_leave: + /* + * We must emulate a "leave", which is the same as a "movl %ebp, %esp" + * followed by a "popl %ebp". This looks similar to the above, but + * requires two temporaries: one for the new base pointer, and one + * for the staging register. + */ + popa + pushl %eax /* push temp */ + pushl %ebx /* push temp */ + movl %ebp, %ebx /* set temp to old %ebp */ + movl (%ebx), %ebp /* pop %ebp */ + movl 16(%esp), %eax /* load calling EFLAGS */ + movl %eax, (%ebx) /* store calling EFLAGS */ + movl 12(%esp), %eax /* load calling CS */ + movl %eax, -4(%ebx) /* store calling CS */ + movl 8(%esp), %eax /* load calling EIP */ + incl %eax /* increment over LOCK prefix */ + movl %eax, -8(%ebx) /* store calling EIP */ + movl %ebx, -4(%esp) /* temporarily store new %esp */ + popl %ebx /* pop off temp */ + popl %eax /* pop off temp */ + movl -12(%esp), %esp /* set stack pointer */ + subl $8, %esp /* adjust for three pushes, one pop */ + iret /* return from interrupt */ +invop_nop: + /* + * We must emulate a "nop". This is obviously not hard: we need only + * advance the %eip by one. + */ + popa + incl (%esp) + iret /* return from interrupt */ + + END(dtrace_invop_start) + +/* +void dtrace_invop_init(void) +*/ + ENTRY(dtrace_invop_init) + movl $dtrace_invop_start, dtrace_invop_jump_addr + ret + END(dtrace_invop_init) + +/* +void dtrace_invop_uninit(void) +*/ + ENTRY(dtrace_invop_uninit) + movl $0, dtrace_invop_jump_addr + ret + END(dtrace_invop_uninit) + +/* +greg_t dtrace_getfp(void) +*/ + + ENTRY(dtrace_getfp) + movl %ebp, %eax + ret + END(dtrace_getfp) + +/* +uint32_t dtrace_cas32(uint32_t *target, uint32_t cmp, uint32_t new) +*/ + + ENTRY(dtrace_cas32) + ALTENTRY(dtrace_casptr) + movl 4(%esp), %edx + movl 8(%esp), %eax + movl 12(%esp), %ecx + lock + cmpxchgl %ecx, (%edx) + ret + END(dtrace_casptr) + END(dtrace_cas32) + +/* +uintptr_t dtrace_caller(int aframes) +*/ + + ENTRY(dtrace_caller) + movl $-1, %eax + ret + END(dtrace_caller) + +/* +void dtrace_copy(uintptr_t src, uintptr_t dest, size_t size) +*/ + + ENTRY(dtrace_copy) + pushl %ebp + movl %esp, %ebp + pushl %esi + pushl %edi + + movl 8(%ebp), %esi /* Load source address */ + movl 12(%ebp), %edi /* Load destination address */ + movl 16(%ebp), %ecx /* Load count */ + repz /* Repeat for count... */ + smovb /* move from %ds:si to %es:di */ + + popl %edi + popl %esi + movl %ebp, %esp + popl %ebp + ret + END(dtrace_copy) + +/* +void dtrace_copystr(uintptr_t uaddr, uintptr_t kaddr, size_t size) +*/ + + ENTRY(dtrace_copystr) + + pushl %ebp /* Setup stack frame */ + movl %esp, %ebp + pushl %ebx /* Save registers */ + + movl 8(%ebp), %ebx /* Load source address */ + movl 12(%ebp), %edx /* Load destination address */ + movl 16(%ebp), %ecx /* Load count */ + +0: + movb (%ebx), %al /* Load from source */ + movb %al, (%edx) /* Store to destination */ + incl %ebx /* Increment source pointer */ + incl %edx /* Increment destination pointer */ + decl %ecx /* Decrement remaining count */ + cmpb $0, %al + je 1f + cmpl $0, %ecx + jne 0b + +1: + popl %ebx + movl %ebp, %esp + popl %ebp + ret + + END(dtrace_copystr) + +/* +uintptr_t dtrace_fulword(void *addr) +*/ + + ENTRY(dtrace_fulword) + movl 4(%esp), %ecx + xorl %eax, %eax + movl (%ecx), %eax + ret + END(dtrace_fulword) + +/* +uint8_t dtrace_fuword8_nocheck(void *addr) +*/ + + ENTRY(dtrace_fuword8_nocheck) + movl 4(%esp), %ecx + xorl %eax, %eax + movzbl (%ecx), %eax + ret + END(dtrace_fuword8_nocheck) + +/* +uint16_t dtrace_fuword16_nocheck(void *addr) +*/ + + ENTRY(dtrace_fuword16_nocheck) + movl 4(%esp), %ecx + xorl %eax, %eax + movzwl (%ecx), %eax + ret + END(dtrace_fuword16_nocheck) + +/* +uint32_t dtrace_fuword32_nocheck(void *addr) +*/ + + ENTRY(dtrace_fuword32_nocheck) + movl 4(%esp), %ecx + xorl %eax, %eax + movl (%ecx), %eax + ret + END(dtrace_fuword32_nocheck) + +/* +uint64_t dtrace_fuword64_nocheck(void *addr) +*/ + + ENTRY(dtrace_fuword64_nocheck) + movl 4(%esp), %ecx + xorl %eax, %eax + xorl %edx, %edx + movl (%ecx), %eax + movl 4(%ecx), %edx + ret + END(dtrace_fuword64_nocheck) + +/* +void dtrace_probe_error(dtrace_state_t *state, dtrace_epid_t epid, int which, int fault, int fltoffs, uintptr_t illval) +*/ + + ENTRY(dtrace_probe_error) + pushl %ebp + movl %esp, %ebp + pushl 0x1c(%ebp) + pushl 0x18(%ebp) + pushl 0x14(%ebp) + pushl 0x10(%ebp) + pushl 0xc(%ebp) + pushl 0x8(%ebp) + pushl dtrace_probeid_error + call dtrace_probe + movl %ebp, %esp + popl %ebp + ret + END(dtrace_probe_error) + +/* +void dtrace_membar_producer(void) +*/ + + ENTRY(dtrace_membar_producer) + rep; ret /* use 2 byte return instruction when branch target */ + /* AMD Software Optimization Guide - Section 6.2 */ + END(dtrace_membar_producer) + +/* +void dtrace_membar_consumer(void) +*/ + + ENTRY(dtrace_membar_consumer) + rep; ret /* use 2 byte return instruction when branch target */ + /* AMD Software Optimization Guide - Section 6.2 */ + END(dtrace_membar_consumer) + +/* +dtrace_icookie_t dtrace_interrupt_disable(void) +*/ + ENTRY(dtrace_interrupt_disable) + pushfl + popl %eax + cli + ret + END(dtrace_interrupt_disable) + +/* +void dtrace_interrupt_enable(dtrace_icookie_t cookie) +*/ + ENTRY(dtrace_interrupt_enable) + movl 4(%esp), %eax + pushl %eax + popfl + ret + END(dtrace_interrupt_enable) + +/* + * The panic() and cmn_err() functions invoke vpanic() as a common entry point + * into the panic code implemented in panicsys(). vpanic() is responsible + * for passing through the format string and arguments, and constructing a + * regs structure on the stack into which it saves the current register + * values. If we are not dying due to a fatal trap, these registers will + * then be preserved in panicbuf as the current processor state. Before + * invoking panicsys(), vpanic() activates the first panic trigger (see + * common/os/panic.c) and switches to the panic_stack if successful. Note that + * DTrace takes a slightly different panic path if it must panic from probe + * context. Instead of calling panic, it calls into dtrace_vpanic(), which + * sets up the initial stack as vpanic does, calls dtrace_panic_trigger(), and + * branches back into vpanic(). + */ +/* +void vpanic(const char *format, va_list alist) +*/ + ENTRY(vpanic) /* Initial stack layout: */ + + pushl %ebp /* | %eip | 20 */ + movl %esp, %ebp /* | %ebp | 16 */ + pushl %eax /* | %eax | 12 */ + pushl %ebx /* | %ebx | 8 */ + pushl %ecx /* | %ecx | 4 */ + pushl %edx /* | %edx | 0 */ + + movl %esp, %ebx /* %ebx = current stack pointer */ + + lea panic_quiesce, %eax /* %eax = &panic_quiesce */ + pushl %eax /* push &panic_quiesce */ + call panic_trigger /* %eax = panic_trigger() */ + addl $4, %esp /* reset stack pointer */ + +vpanic_common: + cmpl $0, %eax /* if (%eax == 0) */ + je 0f /* goto 0f; */ + + /* + * If panic_trigger() was successful, we are the first to initiate a + * panic: we now switch to the reserved panic_stack before continuing. + */ + lea panic_stack, %esp /* %esp = panic_stack */ + addl $PANICSTKSIZE, %esp /* %esp += PANICSTKSIZE */ + +0: subl $REGSIZE, %esp /* allocate struct regs */ + + /* + * Now that we've got everything set up, store the register values as + * they were when we entered vpanic() to the designated location in + * the regs structure we allocated on the stack. + */ +#ifdef notyet + mov %gs, %edx + mov %edx, REGOFF_GS(%esp) + mov %fs, %edx + mov %edx, REGOFF_FS(%esp) + mov %es, %edx + mov %edx, REGOFF_ES(%esp) + mov %ds, %edx + mov %edx, REGOFF_DS(%esp) + movl %edi, REGOFF_EDI(%esp) + movl %esi, REGOFF_ESI(%esp) + movl 16(%ebx), %ecx + movl %ecx, REGOFF_EBP(%esp) + movl %ebx, %ecx + addl $20, %ecx + movl %ecx, REGOFF_ESP(%esp) + movl 8(%ebx), %ecx + movl %ecx, REGOFF_EBX(%esp) + movl 0(%ebx), %ecx + movl %ecx, REGOFF_EDX(%esp) + movl 4(%ebx), %ecx + movl %ecx, REGOFF_ECX(%esp) + movl 12(%ebx), %ecx + movl %ecx, REGOFF_EAX(%esp) + movl $0, REGOFF_TRAPNO(%esp) + movl $0, REGOFF_ERR(%esp) + lea vpanic, %ecx + movl %ecx, REGOFF_EIP(%esp) + mov %cs, %edx + movl %edx, REGOFF_CS(%esp) + pushfl + popl %ecx + movl %ecx, REGOFF_EFL(%esp) + movl $0, REGOFF_UESP(%esp) + mov %ss, %edx + movl %edx, REGOFF_SS(%esp) + + movl %esp, %ecx /* %ecx = ®s */ + pushl %eax /* push on_panic_stack */ + pushl %ecx /* push ®s */ + movl 12(%ebp), %ecx /* %ecx = alist */ + pushl %ecx /* push alist */ + movl 8(%ebp), %ecx /* %ecx = format */ + pushl %ecx /* push format */ + call panicsys /* panicsys(); */ + addl $16, %esp /* pop arguments */ + + addl $REGSIZE, %esp +#endif + popl %edx + popl %ecx + popl %ebx + popl %eax + leave + ret + END(vpanic) + +/* +void dtrace_vpanic(const char *format, va_list alist) +*/ + ENTRY(dtrace_vpanic) /* Initial stack layout: */ + + pushl %ebp /* | %eip | 20 */ + movl %esp, %ebp /* | %ebp | 16 */ + pushl %eax /* | %eax | 12 */ + pushl %ebx /* | %ebx | 8 */ + pushl %ecx /* | %ecx | 4 */ + pushl %edx /* | %edx | 0 */ + + movl %esp, %ebx /* %ebx = current stack pointer */ + + lea panic_quiesce, %eax /* %eax = &panic_quiesce */ + pushl %eax /* push &panic_quiesce */ + call dtrace_panic_trigger /* %eax = dtrace_panic_trigger() */ + addl $4, %esp /* reset stack pointer */ + jmp vpanic_common /* jump back to common code */ + + END(dtrace_vpanic) + +/* +int +panic_trigger(int *tp) +*/ + ENTRY(panic_trigger) + xorl %eax, %eax + movl $0xdefacedd, %edx + lock + xchgl %edx, (%edi) + cmpl $0, %edx + je 0f + movl $0, %eax + ret +0: movl $1, %eax + ret + END(panic_trigger) + +/* +int +dtrace_panic_trigger(int *tp) +*/ + ENTRY(dtrace_panic_trigger) + xorl %eax, %eax + movl $0xdefacedd, %edx + lock + xchgl %edx, (%edi) + cmpl $0, %edx + je 0f + movl $0, %eax + ret +0: movl $1, %eax + ret + END(dtrace_panic_trigger) diff --git a/sys/cddl/dev/dtrace/i386/dtrace_isa.c b/sys/cddl/dev/dtrace/i386/dtrace_isa.c new file mode 100644 index 0000000..bf891aa --- /dev/null +++ b/sys/cddl/dev/dtrace/i386/dtrace_isa.c @@ -0,0 +1,622 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + * + * $FreeBSD$ + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ +#include <sys/cdefs.h> + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/stack.h> +#include <sys/pcpu.h> + +#include <machine/md_var.h> +#include <machine/stack.h> + +#include <vm/vm.h> +#include <vm/vm_param.h> +#include <vm/pmap.h> + +extern uintptr_t kernbase; +uintptr_t kernelbase = (uintptr_t) &kernbase; + +#define INKERNEL(va) (((vm_offset_t)(va)) >= USRSTACK && \ + ((vm_offset_t)(va)) < VM_MAX_KERNEL_ADDRESS) + +uint8_t dtrace_fuword8_nocheck(void *); +uint16_t dtrace_fuword16_nocheck(void *); +uint32_t dtrace_fuword32_nocheck(void *); +uint64_t dtrace_fuword64_nocheck(void *); + +void +dtrace_getpcstack(pc_t *pcstack, int pcstack_limit, int aframes, + uint32_t *intrpc) +{ + int depth = 0; + register_t ebp; + struct i386_frame *frame; + vm_offset_t callpc; + pc_t caller = (pc_t) solaris_cpu[curcpu].cpu_dtrace_caller; + + if (intrpc != 0) + pcstack[depth++] = (pc_t) intrpc; + + aframes++; + + __asm __volatile("movl %%ebp,%0" : "=r" (ebp)); + + frame = (struct i386_frame *)ebp; + while (depth < pcstack_limit) { + if (!INKERNEL(frame)) + break; + + callpc = frame->f_retaddr; + + if (!INKERNEL(callpc)) + break; + + if (aframes > 0) { + aframes--; + if ((aframes == 0) && (caller != 0)) { + pcstack[depth++] = caller; + } + } + else { + pcstack[depth++] = callpc; + } + + if (frame->f_frame <= frame || + (vm_offset_t)frame->f_frame >= + (vm_offset_t)ebp + KSTACK_PAGES * PAGE_SIZE) + break; + frame = frame->f_frame; + } + + for (; depth < pcstack_limit; depth++) { + pcstack[depth] = 0; + } +} + +#ifdef notyet +static int +dtrace_getustack_common(uint64_t *pcstack, int pcstack_limit, uintptr_t pc, + uintptr_t sp) +{ + klwp_t *lwp = ttolwp(curthread); + proc_t *p = curproc; + uintptr_t oldcontext = lwp->lwp_oldcontext; + volatile uint16_t *flags = + (volatile uint16_t *)&cpu_core[curcpu].cpuc_dtrace_flags; + size_t s1, s2; + int ret = 0; + + ASSERT(pcstack == NULL || pcstack_limit > 0); + + if (p->p_model == DATAMODEL_NATIVE) { + s1 = sizeof (struct frame) + 2 * sizeof (long); + s2 = s1 + sizeof (siginfo_t); + } else { + s1 = sizeof (struct frame32) + 3 * sizeof (int); + s2 = s1 + sizeof (siginfo32_t); + } + + while (pc != 0 && sp != 0) { + ret++; + if (pcstack != NULL) { + *pcstack++ = (uint64_t)pc; + pcstack_limit--; + if (pcstack_limit <= 0) + break; + } + + if (oldcontext == sp + s1 || oldcontext == sp + s2) { + if (p->p_model == DATAMODEL_NATIVE) { + ucontext_t *ucp = (ucontext_t *)oldcontext; + greg_t *gregs = ucp->uc_mcontext.gregs; + + sp = dtrace_fulword(&gregs[REG_FP]); + pc = dtrace_fulword(&gregs[REG_PC]); + + oldcontext = dtrace_fulword(&ucp->uc_link); + } else { + ucontext32_t *ucp = (ucontext32_t *)oldcontext; + greg32_t *gregs = ucp->uc_mcontext.gregs; + + sp = dtrace_fuword32(&gregs[EBP]); + pc = dtrace_fuword32(&gregs[EIP]); + + oldcontext = dtrace_fuword32(&ucp->uc_link); + } + } else { + if (p->p_model == DATAMODEL_NATIVE) { + struct frame *fr = (struct frame *)sp; + + pc = dtrace_fulword(&fr->fr_savpc); + sp = dtrace_fulword(&fr->fr_savfp); + } else { + struct frame32 *fr = (struct frame32 *)sp; + + pc = dtrace_fuword32(&fr->fr_savpc); + sp = dtrace_fuword32(&fr->fr_savfp); + } + } + + /* + * This is totally bogus: if we faulted, we're going to clear + * the fault and break. This is to deal with the apparently + * broken Java stacks on x86. + */ + if (*flags & CPU_DTRACE_FAULT) { + *flags &= ~CPU_DTRACE_FAULT; + break; + } + } + + return (ret); +} + +void +dtrace_getupcstack(uint64_t *pcstack, int pcstack_limit) +{ + klwp_t *lwp = ttolwp(curthread); + proc_t *p = curproc; + struct regs *rp; + uintptr_t pc, sp; + volatile uint16_t *flags = + (volatile uint16_t *)&cpu_core[curcpu].cpuc_dtrace_flags; + int n; + + if (*flags & CPU_DTRACE_FAULT) + return; + + if (pcstack_limit <= 0) + return; + + /* + * If there's no user context we still need to zero the stack. + */ + if (lwp == NULL || p == NULL || (rp = lwp->lwp_regs) == NULL) + goto zero; + + *pcstack++ = (uint64_t)p->p_pid; + pcstack_limit--; + + if (pcstack_limit <= 0) + return; + + pc = rp->r_pc; + sp = rp->r_fp; + + if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_ENTRY)) { + *pcstack++ = (uint64_t)pc; + pcstack_limit--; + if (pcstack_limit <= 0) + return; + + if (p->p_model == DATAMODEL_NATIVE) + pc = dtrace_fulword((void *)rp->r_sp); + else + pc = dtrace_fuword32((void *)rp->r_sp); + } + + n = dtrace_getustack_common(pcstack, pcstack_limit, pc, sp); + ASSERT(n >= 0); + ASSERT(n <= pcstack_limit); + + pcstack += n; + pcstack_limit -= n; + +zero: + while (pcstack_limit-- > 0) + *pcstack++ = NULL; +} + +int +dtrace_getustackdepth(void) +{ +} + +void +dtrace_getufpstack(uint64_t *pcstack, uint64_t *fpstack, int pcstack_limit) +{ + klwp_t *lwp = ttolwp(curthread); + proc_t *p = curproc; + struct regs *rp; + uintptr_t pc, sp, oldcontext; + volatile uint16_t *flags = + (volatile uint16_t *)&cpu_core[curcpu].cpuc_dtrace_flags; + size_t s1, s2; + + if (*flags & CPU_DTRACE_FAULT) + return; + + if (pcstack_limit <= 0) + return; + + /* + * If there's no user context we still need to zero the stack. + */ + if (lwp == NULL || p == NULL || (rp = lwp->lwp_regs) == NULL) + goto zero; + + *pcstack++ = (uint64_t)p->p_pid; + pcstack_limit--; + + if (pcstack_limit <= 0) + return; + + pc = rp->r_pc; + sp = rp->r_fp; + oldcontext = lwp->lwp_oldcontext; + + if (p->p_model == DATAMODEL_NATIVE) { + s1 = sizeof (struct frame) + 2 * sizeof (long); + s2 = s1 + sizeof (siginfo_t); + } else { + s1 = sizeof (struct frame32) + 3 * sizeof (int); + s2 = s1 + sizeof (siginfo32_t); + } + + if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_ENTRY)) { + *pcstack++ = (uint64_t)pc; + *fpstack++ = 0; + pcstack_limit--; + if (pcstack_limit <= 0) + return; + + if (p->p_model == DATAMODEL_NATIVE) + pc = dtrace_fulword((void *)rp->r_sp); + else + pc = dtrace_fuword32((void *)rp->r_sp); + } + + while (pc != 0 && sp != 0) { + *pcstack++ = (uint64_t)pc; + *fpstack++ = sp; + pcstack_limit--; + if (pcstack_limit <= 0) + break; + + if (oldcontext == sp + s1 || oldcontext == sp + s2) { + if (p->p_model == DATAMODEL_NATIVE) { + ucontext_t *ucp = (ucontext_t *)oldcontext; + greg_t *gregs = ucp->uc_mcontext.gregs; + + sp = dtrace_fulword(&gregs[REG_FP]); + pc = dtrace_fulword(&gregs[REG_PC]); + + oldcontext = dtrace_fulword(&ucp->uc_link); + } else { + ucontext_t *ucp = (ucontext_t *)oldcontext; + greg_t *gregs = ucp->uc_mcontext.gregs; + + sp = dtrace_fuword32(&gregs[EBP]); + pc = dtrace_fuword32(&gregs[EIP]); + + oldcontext = dtrace_fuword32(&ucp->uc_link); + } + } else { + if (p->p_model == DATAMODEL_NATIVE) { + struct frame *fr = (struct frame *)sp; + + pc = dtrace_fulword(&fr->fr_savpc); + sp = dtrace_fulword(&fr->fr_savfp); + } else { + struct frame32 *fr = (struct frame32 *)sp; + + pc = dtrace_fuword32(&fr->fr_savpc); + sp = dtrace_fuword32(&fr->fr_savfp); + } + } + + /* + * This is totally bogus: if we faulted, we're going to clear + * the fault and break. This is to deal with the apparently + * broken Java stacks on x86. + */ + if (*flags & CPU_DTRACE_FAULT) { + *flags &= ~CPU_DTRACE_FAULT; + break; + } + } + +zero: + while (pcstack_limit-- > 0) + *pcstack++ = NULL; +} +#endif + +uint64_t +dtrace_getarg(int arg, int aframes) +{ + uintptr_t val; + struct i386_frame *fp = (struct i386_frame *)dtrace_getfp(); + uintptr_t *stack; + int i; + + for (i = 1; i <= aframes; i++) { + fp = fp->f_frame; + + if (fp->f_retaddr == (long)dtrace_invop_callsite) { + /* + * If we pass through the invalid op handler, we will + * use the pointer that it passed to the stack as the + * second argument to dtrace_invop() as the pointer to + * the stack. When using this stack, we must step + * beyond the EIP/RIP that was pushed when the trap was + * taken -- hence the "+ 1" below. + */ + stack = ((uintptr_t **)&fp[1])[1] + 1; + goto load; + } + + } + + /* + * We know that we did not come through a trap to get into + * dtrace_probe() -- the provider simply called dtrace_probe() + * directly. As this is the case, we need to shift the argument + * that we're looking for: the probe ID is the first argument to + * dtrace_probe(), so the argument n will actually be found where + * one would expect to find argument (n + 1). + */ + arg++; + + stack = (uintptr_t *)&fp[1]; + +load: + DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); + val = stack[arg]; + DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); + + return (val); +} + +int +dtrace_getstackdepth(int aframes) +{ + int depth = 0; + struct i386_frame *frame; + vm_offset_t ebp; + + aframes++; + ebp = dtrace_getfp(); + frame = (struct i386_frame *)ebp; + depth++; + for(;;) { + if (!INKERNEL((long) frame)) + break; + if (!INKERNEL((long) frame->f_frame)) + break; + depth++; + if (frame->f_frame <= frame || + (vm_offset_t)frame->f_frame >= + (vm_offset_t)ebp + KSTACK_PAGES * PAGE_SIZE) + break; + frame = frame->f_frame; + } + if (depth < aframes) + return 0; + else + return depth - aframes; +} + +#ifdef notyet +ulong_t +dtrace_getreg(struct regs *rp, uint_t reg) +{ +#if defined(__amd64) + int regmap[] = { + REG_GS, /* GS */ + REG_FS, /* FS */ + REG_ES, /* ES */ + REG_DS, /* DS */ + REG_RDI, /* EDI */ + REG_RSI, /* ESI */ + REG_RBP, /* EBP */ + REG_RSP, /* ESP */ + REG_RBX, /* EBX */ + REG_RDX, /* EDX */ + REG_RCX, /* ECX */ + REG_RAX, /* EAX */ + REG_TRAPNO, /* TRAPNO */ + REG_ERR, /* ERR */ + REG_RIP, /* EIP */ + REG_CS, /* CS */ + REG_RFL, /* EFL */ + REG_RSP, /* UESP */ + REG_SS /* SS */ + }; + + if (reg <= SS) { + if (reg >= sizeof (regmap) / sizeof (int)) { + DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); + return (0); + } + + reg = regmap[reg]; + } else { + reg -= SS + 1; + } + + switch (reg) { + case REG_RDI: + return (rp->r_rdi); + case REG_RSI: + return (rp->r_rsi); + case REG_RDX: + return (rp->r_rdx); + case REG_RCX: + return (rp->r_rcx); + case REG_R8: + return (rp->r_r8); + case REG_R9: + return (rp->r_r9); + case REG_RAX: + return (rp->r_rax); + case REG_RBX: + return (rp->r_rbx); + case REG_RBP: + return (rp->r_rbp); + case REG_R10: + return (rp->r_r10); + case REG_R11: + return (rp->r_r11); + case REG_R12: + return (rp->r_r12); + case REG_R13: + return (rp->r_r13); + case REG_R14: + return (rp->r_r14); + case REG_R15: + return (rp->r_r15); + case REG_DS: + return (rp->r_ds); + case REG_ES: + return (rp->r_es); + case REG_FS: + return (rp->r_fs); + case REG_GS: + return (rp->r_gs); + case REG_TRAPNO: + return (rp->r_trapno); + case REG_ERR: + return (rp->r_err); + case REG_RIP: + return (rp->r_rip); + case REG_CS: + return (rp->r_cs); + case REG_SS: + return (rp->r_ss); + case REG_RFL: + return (rp->r_rfl); + case REG_RSP: + return (rp->r_rsp); + default: + DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); + return (0); + } + +#else + if (reg > SS) { + DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); + return (0); + } + + return ((&rp->r_gs)[reg]); +#endif +} +#endif + +static int +dtrace_copycheck(uintptr_t uaddr, uintptr_t kaddr, size_t size) +{ + ASSERT(kaddr >= kernelbase && kaddr + size >= kaddr); + + if (uaddr + size >= kernelbase || uaddr + size < uaddr) { + DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR); + cpu_core[curcpu].cpuc_dtrace_illval = uaddr; + return (0); + } + + return (1); +} + +void +dtrace_copyin(uintptr_t uaddr, uintptr_t kaddr, size_t size, + volatile uint16_t *flags) +{ + if (dtrace_copycheck(uaddr, kaddr, size)) + dtrace_copy(uaddr, kaddr, size); +} + +void +dtrace_copyout(uintptr_t kaddr, uintptr_t uaddr, size_t size, + volatile uint16_t *flags) +{ + if (dtrace_copycheck(uaddr, kaddr, size)) + dtrace_copy(kaddr, uaddr, size); +} + +void +dtrace_copyinstr(uintptr_t uaddr, uintptr_t kaddr, size_t size, + volatile uint16_t *flags) +{ + if (dtrace_copycheck(uaddr, kaddr, size)) + dtrace_copystr(uaddr, kaddr, size, flags); +} + +void +dtrace_copyoutstr(uintptr_t kaddr, uintptr_t uaddr, size_t size, + volatile uint16_t *flags) +{ + if (dtrace_copycheck(uaddr, kaddr, size)) + dtrace_copystr(kaddr, uaddr, size, flags); +} + +uint8_t +dtrace_fuword8(void *uaddr) +{ + if ((uintptr_t)uaddr >= kernelbase) { + DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR); + cpu_core[curcpu].cpuc_dtrace_illval = (uintptr_t)uaddr; + return (0); + } + return (dtrace_fuword8_nocheck(uaddr)); +} + +uint16_t +dtrace_fuword16(void *uaddr) +{ + if ((uintptr_t)uaddr >= kernelbase) { + DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR); + cpu_core[curcpu].cpuc_dtrace_illval = (uintptr_t)uaddr; + return (0); + } + return (dtrace_fuword16_nocheck(uaddr)); +} + +uint32_t +dtrace_fuword32(void *uaddr) +{ + if ((uintptr_t)uaddr >= kernelbase) { + DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR); + cpu_core[curcpu].cpuc_dtrace_illval = (uintptr_t)uaddr; + return (0); + } + return (dtrace_fuword32_nocheck(uaddr)); +} + +uint64_t +dtrace_fuword64(void *uaddr) +{ + if ((uintptr_t)uaddr >= kernelbase) { + DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR); + cpu_core[curcpu].cpuc_dtrace_illval = (uintptr_t)uaddr; + return (0); + } + return (dtrace_fuword64_nocheck(uaddr)); +} diff --git a/sys/cddl/dev/dtrace/i386/dtrace_subr.c b/sys/cddl/dev/dtrace/i386/dtrace_subr.c new file mode 100644 index 0000000..50515b6 --- /dev/null +++ b/sys/cddl/dev/dtrace/i386/dtrace_subr.c @@ -0,0 +1,503 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + * + * $FreeBSD$ + * + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/types.h> +#include <sys/kernel.h> +#include <sys/malloc.h> +#include <sys/kmem.h> +#include <sys/smp.h> +#include <sys/dtrace_impl.h> +#include <sys/dtrace_bsd.h> +#include <machine/clock.h> +#include <machine/frame.h> +#include <vm/pmap.h> + +extern uintptr_t kernelbase; +extern uintptr_t dtrace_in_probe_addr; +extern int dtrace_in_probe; + +int dtrace_invop(uintptr_t, uintptr_t *, uintptr_t); + +typedef struct dtrace_invop_hdlr { + int (*dtih_func)(uintptr_t, uintptr_t *, uintptr_t); + struct dtrace_invop_hdlr *dtih_next; +} dtrace_invop_hdlr_t; + +dtrace_invop_hdlr_t *dtrace_invop_hdlr; + +int +dtrace_invop(uintptr_t addr, uintptr_t *stack, uintptr_t eax) +{ + dtrace_invop_hdlr_t *hdlr; + int rval; + + for (hdlr = dtrace_invop_hdlr; hdlr != NULL; hdlr = hdlr->dtih_next) + if ((rval = hdlr->dtih_func(addr, stack, eax)) != 0) + return (rval); + + return (0); +} + +void +dtrace_invop_add(int (*func)(uintptr_t, uintptr_t *, uintptr_t)) +{ + dtrace_invop_hdlr_t *hdlr; + + hdlr = kmem_alloc(sizeof (dtrace_invop_hdlr_t), KM_SLEEP); + hdlr->dtih_func = func; + hdlr->dtih_next = dtrace_invop_hdlr; + dtrace_invop_hdlr = hdlr; +} + +void +dtrace_invop_remove(int (*func)(uintptr_t, uintptr_t *, uintptr_t)) +{ + dtrace_invop_hdlr_t *hdlr = dtrace_invop_hdlr, *prev = NULL; + + for (;;) { + if (hdlr == NULL) + panic("attempt to remove non-existent invop handler"); + + if (hdlr->dtih_func == func) + break; + + prev = hdlr; + hdlr = hdlr->dtih_next; + } + + if (prev == NULL) { + ASSERT(dtrace_invop_hdlr == hdlr); + dtrace_invop_hdlr = hdlr->dtih_next; + } else { + ASSERT(dtrace_invop_hdlr != hdlr); + prev->dtih_next = hdlr->dtih_next; + } + + kmem_free(hdlr, 0); +} + +void +dtrace_toxic_ranges(void (*func)(uintptr_t base, uintptr_t limit)) +{ + (*func)(0, kernelbase); +} + +void +dtrace_xcall(processorid_t cpu, dtrace_xcall_t func, void *arg) +{ + cpumask_t cpus; + + critical_enter(); + + if (cpu == DTRACE_CPUALL) + cpus = all_cpus; + else + cpus = (cpumask_t) (1 << cpu); + + /* If the current CPU is in the set, call the function directly: */ + if ((cpus & (1 << curcpu)) != 0) { + (*func)(arg); + + /* Mask the current CPU from the set */ + cpus &= ~(1 << curcpu); + } + + /* If there are any CPUs in the set, cross-call to those CPUs */ + if (cpus != 0) + smp_rendezvous_cpus(cpus, NULL, func, smp_no_rendevous_barrier, arg); + + critical_exit(); +} + +static void +dtrace_sync_func(void) +{ +} + +void +dtrace_sync(void) +{ + dtrace_xcall(DTRACE_CPUALL, (dtrace_xcall_t)dtrace_sync_func, NULL); +} + +#ifdef notyet +int (*dtrace_fasttrap_probe_ptr)(struct regs *); +int (*dtrace_pid_probe_ptr)(struct regs *); +int (*dtrace_return_probe_ptr)(struct regs *); + +void +dtrace_user_probe(struct regs *rp, caddr_t addr, processorid_t cpuid) +{ + krwlock_t *rwp; + proc_t *p = curproc; + extern void trap(struct regs *, caddr_t, processorid_t); + + if (USERMODE(rp->r_cs) || (rp->r_ps & PS_VM)) { + if (curthread->t_cred != p->p_cred) { + cred_t *oldcred = curthread->t_cred; + /* + * DTrace accesses t_cred in probe context. t_cred + * must always be either NULL, or point to a valid, + * allocated cred structure. + */ + curthread->t_cred = crgetcred(); + crfree(oldcred); + } + } + + if (rp->r_trapno == T_DTRACE_RET) { + uint8_t step = curthread->t_dtrace_step; + uint8_t ret = curthread->t_dtrace_ret; + uintptr_t npc = curthread->t_dtrace_npc; + + if (curthread->t_dtrace_ast) { + aston(curthread); + curthread->t_sig_check = 1; + } + + /* + * Clear all user tracing flags. + */ + curthread->t_dtrace_ft = 0; + + /* + * If we weren't expecting to take a return probe trap, kill + * the process as though it had just executed an unassigned + * trap instruction. + */ + if (step == 0) { + tsignal(curthread, SIGILL); + return; + } + + /* + * If we hit this trap unrelated to a return probe, we're + * just here to reset the AST flag since we deferred a signal + * until after we logically single-stepped the instruction we + * copied out. + */ + if (ret == 0) { + rp->r_pc = npc; + return; + } + + /* + * We need to wait until after we've called the + * dtrace_return_probe_ptr function pointer to set %pc. + */ + rwp = &CPU->cpu_ft_lock; + rw_enter(rwp, RW_READER); + if (dtrace_return_probe_ptr != NULL) + (void) (*dtrace_return_probe_ptr)(rp); + rw_exit(rwp); + rp->r_pc = npc; + + } else if (rp->r_trapno == T_DTRACE_PROBE) { + rwp = &CPU->cpu_ft_lock; + rw_enter(rwp, RW_READER); + if (dtrace_fasttrap_probe_ptr != NULL) + (void) (*dtrace_fasttrap_probe_ptr)(rp); + rw_exit(rwp); + + } else if (rp->r_trapno == T_BPTFLT) { + uint8_t instr; + rwp = &CPU->cpu_ft_lock; + + /* + * The DTrace fasttrap provider uses the breakpoint trap + * (int 3). We let DTrace take the first crack at handling + * this trap; if it's not a probe that DTrace knowns about, + * we call into the trap() routine to handle it like a + * breakpoint placed by a conventional debugger. + */ + rw_enter(rwp, RW_READER); + if (dtrace_pid_probe_ptr != NULL && + (*dtrace_pid_probe_ptr)(rp) == 0) { + rw_exit(rwp); + return; + } + rw_exit(rwp); + + /* + * If the instruction that caused the breakpoint trap doesn't + * look like an int 3 anymore, it may be that this tracepoint + * was removed just after the user thread executed it. In + * that case, return to user land to retry the instuction. + */ + if (fuword8((void *)(rp->r_pc - 1), &instr) == 0 && + instr != FASTTRAP_INSTR) { + rp->r_pc--; + return; + } + + trap(rp, addr, cpuid); + + } else { + trap(rp, addr, cpuid); + } +} + +void +dtrace_safe_synchronous_signal(void) +{ + kthread_t *t = curthread; + struct regs *rp = lwptoregs(ttolwp(t)); + size_t isz = t->t_dtrace_npc - t->t_dtrace_pc; + + ASSERT(t->t_dtrace_on); + + /* + * If we're not in the range of scratch addresses, we're not actually + * tracing user instructions so turn off the flags. If the instruction + * we copied out caused a synchonous trap, reset the pc back to its + * original value and turn off the flags. + */ + if (rp->r_pc < t->t_dtrace_scrpc || + rp->r_pc > t->t_dtrace_astpc + isz) { + t->t_dtrace_ft = 0; + } else if (rp->r_pc == t->t_dtrace_scrpc || + rp->r_pc == t->t_dtrace_astpc) { + rp->r_pc = t->t_dtrace_pc; + t->t_dtrace_ft = 0; + } +} + +int +dtrace_safe_defer_signal(void) +{ + kthread_t *t = curthread; + struct regs *rp = lwptoregs(ttolwp(t)); + size_t isz = t->t_dtrace_npc - t->t_dtrace_pc; + + ASSERT(t->t_dtrace_on); + + /* + * If we're not in the range of scratch addresses, we're not actually + * tracing user instructions so turn off the flags. + */ + if (rp->r_pc < t->t_dtrace_scrpc || + rp->r_pc > t->t_dtrace_astpc + isz) { + t->t_dtrace_ft = 0; + return (0); + } + + /* + * If we've executed the original instruction, but haven't performed + * the jmp back to t->t_dtrace_npc or the clean up of any registers + * used to emulate %rip-relative instructions in 64-bit mode, do that + * here and take the signal right away. We detect this condition by + * seeing if the program counter is the range [scrpc + isz, astpc). + */ + if (t->t_dtrace_astpc - rp->r_pc < + t->t_dtrace_astpc - t->t_dtrace_scrpc - isz) { +#ifdef __amd64 + /* + * If there is a scratch register and we're on the + * instruction immediately after the modified instruction, + * restore the value of that scratch register. + */ + if (t->t_dtrace_reg != 0 && + rp->r_pc == t->t_dtrace_scrpc + isz) { + switch (t->t_dtrace_reg) { + case REG_RAX: + rp->r_rax = t->t_dtrace_regv; + break; + case REG_RCX: + rp->r_rcx = t->t_dtrace_regv; + break; + case REG_R8: + rp->r_r8 = t->t_dtrace_regv; + break; + case REG_R9: + rp->r_r9 = t->t_dtrace_regv; + break; + } + } +#endif + rp->r_pc = t->t_dtrace_npc; + t->t_dtrace_ft = 0; + return (0); + } + + /* + * Otherwise, make sure we'll return to the kernel after executing + * the copied out instruction and defer the signal. + */ + if (!t->t_dtrace_step) { + ASSERT(rp->r_pc < t->t_dtrace_astpc); + rp->r_pc += t->t_dtrace_astpc - t->t_dtrace_scrpc; + t->t_dtrace_step = 1; + } + + t->t_dtrace_ast = 1; + + return (1); +} +#endif + +static int64_t tgt_cpu_tsc; +static int64_t hst_cpu_tsc; +static int64_t tsc_skew[MAXCPU]; + +static void +dtrace_gethrtime_init_sync(void *arg) +{ +#ifdef CHECK_SYNC + /* + * Delay this function from returning on one + * of the CPUs to check that the synchronisation + * works. + */ + uintptr_t cpu = (uintptr_t) arg; + + if (cpu == curcpu) { + int i; + for (i = 0; i < 1000000000; i++) + tgt_cpu_tsc = rdtsc(); + tgt_cpu_tsc = 0; + } +#endif +} + +static void +dtrace_gethrtime_init_cpu(void *arg) +{ + uintptr_t cpu = (uintptr_t) arg; + + if (cpu == curcpu) + tgt_cpu_tsc = rdtsc(); + else + hst_cpu_tsc = rdtsc(); +} + +static void +dtrace_gethrtime_init(void *arg) +{ + cpumask_t map; + int i; + struct pcpu *cp; + + /* The current CPU is the reference one. */ + tsc_skew[curcpu] = 0; + + for (i = 0; i <= mp_maxid; i++) { + if (i == curcpu) + continue; + + if ((cp = pcpu_find(i)) == NULL) + continue; + + map = 0; + map |= (1 << curcpu); + map |= (1 << i); + + smp_rendezvous_cpus(map, dtrace_gethrtime_init_sync, + dtrace_gethrtime_init_cpu, + smp_no_rendevous_barrier, (void *)(uintptr_t) i); + + tsc_skew[i] = tgt_cpu_tsc - hst_cpu_tsc; + } +} + +SYSINIT(dtrace_gethrtime_init, SI_SUB_SMP, SI_ORDER_ANY, dtrace_gethrtime_init, NULL); + +/* + * DTrace needs a high resolution time function which can + * be called from a probe context and guaranteed not to have + * instrumented with probes itself. + * + * Returns nanoseconds since boot. + */ +uint64_t +dtrace_gethrtime() +{ + return ((rdtsc() + tsc_skew[curcpu]) * (int64_t) 1000000000 / tsc_freq); +} + +uint64_t +dtrace_gethrestime(void) +{ + printf("%s(%d): XXX\n",__func__,__LINE__); + return (0); +} + +/* Function to handle DTrace traps during probes. See i386/i386/trap.c */ +int +dtrace_trap(struct trapframe *frame, u_int type) +{ + /* + * A trap can occur while DTrace executes a probe. Before + * executing the probe, DTrace blocks re-scheduling and sets + * a flag in it's per-cpu flags to indicate that it doesn't + * want to fault. On returning from the the probe, the no-fault + * flag is cleared and finally re-scheduling is enabled. + * + * Check if DTrace has enabled 'no-fault' mode: + * + */ + if ((cpu_core[curcpu].cpuc_dtrace_flags & CPU_DTRACE_NOFAULT) != 0) { + /* + * There are only a couple of trap types that are expected. + * All the rest will be handled in the usual way. + */ + switch (type) { + /* General protection fault. */ + case T_PROTFLT: + /* Flag an illegal operation. */ + cpu_core[curcpu].cpuc_dtrace_flags |= CPU_DTRACE_ILLOP; + + /* + * Offset the instruction pointer to the instruction + * following the one causing the fault. + */ + frame->tf_eip += dtrace_instr_size((u_char *) frame->tf_eip); + return (1); + /* Page fault. */ + case T_PAGEFLT: + /* Flag a bad address. */ + cpu_core[curcpu].cpuc_dtrace_flags |= CPU_DTRACE_BADADDR; + cpu_core[curcpu].cpuc_dtrace_illval = rcr2(); + + /* + * Offset the instruction pointer to the instruction + * following the one causing the fault. + */ + frame->tf_eip += dtrace_instr_size((u_char *) frame->tf_eip); + return (1); + default: + /* Handle all other traps in the usual way. */ + break; + } + } + + /* Handle the trap in the usual way. */ + return (0); +} diff --git a/sys/cddl/dev/dtrace/i386/instr_size.c b/sys/cddl/dev/dtrace/i386/instr_size.c new file mode 100644 index 0000000..fb6af2d --- /dev/null +++ b/sys/cddl/dev/dtrace/i386/instr_size.c @@ -0,0 +1,132 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + * + * $FreeBSD$ + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* Copyright (c) 1988 AT&T */ +/* All Rights Reserved */ + + +#if defined(sun) +#pragma ident "@(#)instr_size.c 1.14 05/07/08 SMI" +#endif + +#include <sys/types.h> +#include <sys/param.h> +#include <sys/proc.h> +#if defined(sun) +#include <sys/cmn_err.h> +#include <sys/archsystm.h> +#include <sys/copyops.h> +#include <vm/seg_enum.h> +#include <sys/privregs.h> +#else +typedef u_int model_t; +#define DATAMODEL_NATIVE 0 +int dtrace_instr_size(uchar_t *); +#endif + +#include <dis_tables.h> + +/* + * This subsystem (with the minor exception of the instr_size() function) is + * is called from DTrace probe context. This imposes several requirements on + * the implementation: + * + * 1. External subsystems and functions may not be referenced. The one current + * exception is for cmn_err, but only to signal the detection of table + * errors. Assuming the tables are correct, no combination of input is to + * trigger a cmn_err call. + * + * 2. These functions can't be allowed to be traced. To prevent this, + * all functions in the probe path (everything except instr_size()) must + * have names that begin with "dtrace_". + */ + +typedef enum dis_isize { + DIS_ISIZE_INSTR, + DIS_ISIZE_OPERAND +} dis_isize_t; + + +/* + * get a byte from instruction stream + */ +static int +dtrace_dis_get_byte(void *p) +{ + int ret; + uchar_t **instr = p; + + ret = **instr; + *instr += 1; + + return (ret); +} + +/* + * Returns either the size of a given instruction, in bytes, or the size of that + * instruction's memory access (if any), depending on the value of `which'. + * If a programming error in the tables is detected, the system will panic to + * ease diagnosis. Invalid instructions will not be flagged. They will appear + * to have an instruction size between 1 and the actual size, and will be + * reported as having no memory impact. + */ +/* ARGSUSED2 */ +static int +dtrace_dis_isize(uchar_t *instr, dis_isize_t which, model_t model, int *rmindex) +{ + int sz; + dis86_t x; + uint_t mode = SIZE32; + +#if defined(sun) + mode = (model == DATAMODEL_LP64) ? SIZE64 : SIZE32; +#endif + + x.d86_data = (void **)&instr; + x.d86_get_byte = dtrace_dis_get_byte; + x.d86_check_func = NULL; + + if (dtrace_disx86(&x, mode) != 0) + return (-1); + + if (which == DIS_ISIZE_INSTR) + sz = x.d86_len; /* length of the instruction */ + else + sz = x.d86_memsize; /* length of memory operand */ + + if (rmindex != NULL) + *rmindex = x.d86_rmindex; + return (sz); +} + +int +dtrace_instr_size(uchar_t *instr) +{ + return (dtrace_dis_isize(instr, DIS_ISIZE_INSTR, DATAMODEL_NATIVE, + NULL)); +} diff --git a/sys/cddl/dev/fbt/fbt.c b/sys/cddl/dev/fbt/fbt.c new file mode 100644 index 0000000..f3ddc83 --- /dev/null +++ b/sys/cddl/dev/fbt/fbt.c @@ -0,0 +1,1411 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + * + * Portions Copyright 2006-2008 John Birrell jb@freebsd.org + * + * $FreeBSD$ + * + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <sys/cdefs.h> +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/conf.h> +#include <sys/cpuvar.h> +#include <sys/fcntl.h> +#include <sys/filio.h> +#include <sys/kdb.h> +#include <sys/kernel.h> +#include <sys/kmem.h> +#include <sys/kthread.h> +#include <sys/limits.h> +#include <sys/linker.h> +#include <sys/lock.h> +#include <sys/malloc.h> +#include <sys/module.h> +#include <sys/mutex.h> +#include <sys/pcpu.h> +#include <sys/poll.h> +#include <sys/proc.h> +#include <sys/selinfo.h> +#include <sys/smp.h> +#include <sys/syscall.h> +#include <sys/sysent.h> +#include <sys/sysproto.h> +#include <sys/uio.h> +#include <sys/unistd.h> +#include <machine/stdarg.h> + +#include <sys/dtrace.h> +#include <sys/dtrace_bsd.h> + +MALLOC_DEFINE(M_FBT, "fbt", "Function Boundary Tracing"); + +#define FBT_PUSHL_EBP 0x55 +#define FBT_MOVL_ESP_EBP0_V0 0x8b +#define FBT_MOVL_ESP_EBP1_V0 0xec +#define FBT_MOVL_ESP_EBP0_V1 0x89 +#define FBT_MOVL_ESP_EBP1_V1 0xe5 +#define FBT_REX_RSP_RBP 0x48 + +#define FBT_POPL_EBP 0x5d +#define FBT_RET 0xc3 +#define FBT_RET_IMM16 0xc2 +#define FBT_LEAVE 0xc9 + +#ifdef __amd64__ +#define FBT_PATCHVAL 0xcc +#else +#define FBT_PATCHVAL 0xf0 +#endif + +static d_open_t fbt_open; +static int fbt_unload(void); +static void fbt_getargdesc(void *, dtrace_id_t, void *, dtrace_argdesc_t *); +static void fbt_provide_module(void *, modctl_t *); +static void fbt_destroy(void *, dtrace_id_t, void *); +static void fbt_enable(void *, dtrace_id_t, void *); +static void fbt_disable(void *, dtrace_id_t, void *); +static void fbt_load(void *); +static void fbt_suspend(void *, dtrace_id_t, void *); +static void fbt_resume(void *, dtrace_id_t, void *); + +#define FBT_ENTRY "entry" +#define FBT_RETURN "return" +#define FBT_ADDR2NDX(addr) ((((uintptr_t)(addr)) >> 4) & fbt_probetab_mask) +#define FBT_PROBETAB_SIZE 0x8000 /* 32k entries -- 128K total */ + +static struct cdevsw fbt_cdevsw = { + .d_version = D_VERSION, + .d_open = fbt_open, + .d_name = "fbt", +}; + +static dtrace_pattr_t fbt_attr = { +{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON }, +{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, +{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA }, +{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON }, +{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA }, +}; + +static dtrace_pops_t fbt_pops = { + NULL, + fbt_provide_module, + fbt_enable, + fbt_disable, + fbt_suspend, + fbt_resume, + fbt_getargdesc, + NULL, + NULL, + fbt_destroy +}; + +typedef struct fbt_probe { + struct fbt_probe *fbtp_hashnext; + uint8_t *fbtp_patchpoint; + int8_t fbtp_rval; + uint8_t fbtp_patchval; + uint8_t fbtp_savedval; + uintptr_t fbtp_roffset; + dtrace_id_t fbtp_id; + const char *fbtp_name; + modctl_t *fbtp_ctl; + int fbtp_loadcnt; + int fbtp_primary; + int fbtp_invop_cnt; + int fbtp_symindx; + struct fbt_probe *fbtp_next; +} fbt_probe_t; + +static struct cdev *fbt_cdev; +static dtrace_provider_id_t fbt_id; +static fbt_probe_t **fbt_probetab; +static int fbt_probetab_size; +static int fbt_probetab_mask; +static int fbt_verbose = 0; + +static void +fbt_doubletrap(void) +{ + fbt_probe_t *fbt; + int i; + + for (i = 0; i < fbt_probetab_size; i++) { + fbt = fbt_probetab[i]; + + for (; fbt != NULL; fbt = fbt->fbtp_next) + *fbt->fbtp_patchpoint = fbt->fbtp_savedval; + } +} + +static int +fbt_invop(uintptr_t addr, uintptr_t *stack, uintptr_t rval) +{ + solaris_cpu_t *cpu = &solaris_cpu[curcpu]; + uintptr_t stack0, stack1, stack2, stack3, stack4; + fbt_probe_t *fbt = fbt_probetab[FBT_ADDR2NDX(addr)]; + + for (; fbt != NULL; fbt = fbt->fbtp_hashnext) { + if ((uintptr_t)fbt->fbtp_patchpoint == addr) { + fbt->fbtp_invop_cnt++; + if (fbt->fbtp_roffset == 0) { + int i = 0; + /* + * When accessing the arguments on the stack, + * we must protect against accessing beyond + * the stack. We can safely set NOFAULT here + * -- we know that interrupts are already + * disabled. + */ + DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); + cpu->cpu_dtrace_caller = stack[i++]; + stack0 = stack[i++]; + stack1 = stack[i++]; + stack2 = stack[i++]; + stack3 = stack[i++]; + stack4 = stack[i++]; + DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | + CPU_DTRACE_BADADDR); + + dtrace_probe(fbt->fbtp_id, stack0, stack1, + stack2, stack3, stack4); + + cpu->cpu_dtrace_caller = 0; + } else { +#ifdef __amd64__ + /* + * On amd64, we instrument the ret, not the + * leave. We therefore need to set the caller + * to assure that the top frame of a stack() + * action is correct. + */ + DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); + cpu->cpu_dtrace_caller = stack[0]; + DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | + CPU_DTRACE_BADADDR); +#endif + + dtrace_probe(fbt->fbtp_id, fbt->fbtp_roffset, + rval, 0, 0, 0); + cpu->cpu_dtrace_caller = 0; + } + + return (fbt->fbtp_rval); + } + } + + return (0); +} + +static int +fbt_provide_module_function(linker_file_t lf, int symindx, + linker_symval_t *symval, void *opaque) +{ + char *modname = opaque; + const char *name = symval->name; + fbt_probe_t *fbt, *retfbt; + int j; + int size; + u_int8_t *instr, *limit; + + if (strncmp(name, "dtrace_", 7) == 0 && + strncmp(name, "dtrace_safe_", 12) != 0) { + /* + * Anything beginning with "dtrace_" may be called + * from probe context unless it explicitly indicates + * that it won't be called from probe context by + * using the prefix "dtrace_safe_". + */ + return (0); + } + + if (name[0] == '_' && name[1] == '_') + return (0); + + size = symval->size; + + instr = (u_int8_t *) symval->value; + limit = (u_int8_t *) symval->value + symval->size; + +#ifdef __amd64__ + while (instr < limit) { + if (*instr == FBT_PUSHL_EBP) + break; + + if ((size = dtrace_instr_size(instr)) <= 0) + break; + + instr += size; + } + + if (instr >= limit || *instr != FBT_PUSHL_EBP) { + /* + * We either don't save the frame pointer in this + * function, or we ran into some disassembly + * screw-up. Either way, we bail. + */ + return (0); + } +#else + if (instr[0] != FBT_PUSHL_EBP) + return (0); + + if (!(instr[1] == FBT_MOVL_ESP_EBP0_V0 && + instr[2] == FBT_MOVL_ESP_EBP1_V0) && + !(instr[1] == FBT_MOVL_ESP_EBP0_V1 && + instr[2] == FBT_MOVL_ESP_EBP1_V1)) + return (0); +#endif + + fbt = malloc(sizeof (fbt_probe_t), M_FBT, M_WAITOK | M_ZERO); + fbt->fbtp_name = name; + fbt->fbtp_id = dtrace_probe_create(fbt_id, modname, + name, FBT_ENTRY, 3, fbt); + fbt->fbtp_patchpoint = instr; + fbt->fbtp_ctl = lf; + fbt->fbtp_loadcnt = lf->loadcnt; + fbt->fbtp_rval = DTRACE_INVOP_PUSHL_EBP; + fbt->fbtp_savedval = *instr; + fbt->fbtp_patchval = FBT_PATCHVAL; + fbt->fbtp_symindx = symindx; + + fbt->fbtp_hashnext = fbt_probetab[FBT_ADDR2NDX(instr)]; + fbt_probetab[FBT_ADDR2NDX(instr)] = fbt; + + lf->fbt_nentries++; + + retfbt = NULL; +again: + if (instr >= limit) + return (0); + + /* + * If this disassembly fails, then we've likely walked off into + * a jump table or some other unsuitable area. Bail out of the + * disassembly now. + */ + if ((size = dtrace_instr_size(instr)) <= 0) + return (0); + +#ifdef __amd64__ + /* + * We only instrument "ret" on amd64 -- we don't yet instrument + * ret imm16, largely because the compiler doesn't seem to + * (yet) emit them in the kernel... + */ + if (*instr != FBT_RET) { + instr += size; + goto again; + } +#else + if (!(size == 1 && + (*instr == FBT_POPL_EBP || *instr == FBT_LEAVE) && + (*(instr + 1) == FBT_RET || + *(instr + 1) == FBT_RET_IMM16))) { + instr += size; + goto again; + } +#endif + + /* + * We (desperately) want to avoid erroneously instrumenting a + * jump table, especially given that our markers are pretty + * short: two bytes on x86, and just one byte on amd64. To + * determine if we're looking at a true instruction sequence + * or an inline jump table that happens to contain the same + * byte sequences, we resort to some heuristic sleeze: we + * treat this instruction as being contained within a pointer, + * and see if that pointer points to within the body of the + * function. If it does, we refuse to instrument it. + */ + for (j = 0; j < sizeof (uintptr_t); j++) { + caddr_t check = (caddr_t) instr - j; + uint8_t *ptr; + + if (check < symval->value) + break; + + if (check + sizeof (caddr_t) > (caddr_t)limit) + continue; + + ptr = *(uint8_t **)check; + + if (ptr >= (uint8_t *) symval->value && ptr < limit) { + instr += size; + goto again; + } + } + + /* + * We have a winner! + */ + fbt = malloc(sizeof (fbt_probe_t), M_FBT, M_WAITOK | M_ZERO); + fbt->fbtp_name = name; + + if (retfbt == NULL) { + fbt->fbtp_id = dtrace_probe_create(fbt_id, modname, + name, FBT_RETURN, 3, fbt); + } else { + retfbt->fbtp_next = fbt; + fbt->fbtp_id = retfbt->fbtp_id; + } + + retfbt = fbt; + fbt->fbtp_patchpoint = instr; + fbt->fbtp_ctl = lf; + fbt->fbtp_loadcnt = lf->loadcnt; + fbt->fbtp_symindx = symindx; + +#ifndef __amd64__ + if (*instr == FBT_POPL_EBP) { + fbt->fbtp_rval = DTRACE_INVOP_POPL_EBP; + } else { + ASSERT(*instr == FBT_LEAVE); + fbt->fbtp_rval = DTRACE_INVOP_LEAVE; + } + fbt->fbtp_roffset = + (uintptr_t)(instr - (uint8_t *) symval->value) + 1; + +#else + ASSERT(*instr == FBT_RET); + fbt->fbtp_rval = DTRACE_INVOP_RET; + fbt->fbtp_roffset = + (uintptr_t)(instr - (uint8_t *) symval->value); +#endif + + fbt->fbtp_savedval = *instr; + fbt->fbtp_patchval = FBT_PATCHVAL; + fbt->fbtp_hashnext = fbt_probetab[FBT_ADDR2NDX(instr)]; + fbt_probetab[FBT_ADDR2NDX(instr)] = fbt; + + lf->fbt_nentries++; + + instr += size; + goto again; +} + +static void +fbt_provide_module(void *arg, modctl_t *lf) +{ + char modname[MAXPATHLEN]; + int i; + size_t len; + + strlcpy(modname, lf->filename, sizeof(modname)); + len = strlen(modname); + if (len > 3 && strcmp(modname + len - 3, ".ko") == 0) + modname[len - 3] = '\0'; + + /* + * Employees of dtrace and their families are ineligible. Void + * where prohibited. + */ + if (strcmp(modname, "dtrace") == 0) + return; + + /* + * The cyclic timer subsystem can be built as a module and DTrace + * depends on that, so it is ineligible too. + */ + if (strcmp(modname, "cyclic") == 0) + return; + + /* + * To register with DTrace, a module must list 'dtrace' as a + * dependency in order for the kernel linker to resolve + * symbols like dtrace_register(). All modules with such a + * dependency are ineligible for FBT tracing. + */ + for (i = 0; i < lf->ndeps; i++) + if (strncmp(lf->deps[i]->filename, "dtrace", 6) == 0) + return; + + if (lf->fbt_nentries) { + /* + * This module has some FBT entries allocated; we're afraid + * to screw with it. + */ + return; + } + + /* + * List the functions in the module and the symbol values. + */ + (void) linker_file_function_listall(lf, fbt_provide_module_function, modname); +} + +static void +fbt_destroy(void *arg, dtrace_id_t id, void *parg) +{ + fbt_probe_t *fbt = parg, *next, *hash, *last; + modctl_t *ctl; + int ndx; + + do { + ctl = fbt->fbtp_ctl; + + ctl->fbt_nentries--; + + /* + * Now we need to remove this probe from the fbt_probetab. + */ + ndx = FBT_ADDR2NDX(fbt->fbtp_patchpoint); + last = NULL; + hash = fbt_probetab[ndx]; + + while (hash != fbt) { + ASSERT(hash != NULL); + last = hash; + hash = hash->fbtp_hashnext; + } + + if (last != NULL) { + last->fbtp_hashnext = fbt->fbtp_hashnext; + } else { + fbt_probetab[ndx] = fbt->fbtp_hashnext; + } + + next = fbt->fbtp_next; + free(fbt, M_FBT); + + fbt = next; + } while (fbt != NULL); +} + +static void +fbt_enable(void *arg, dtrace_id_t id, void *parg) +{ + fbt_probe_t *fbt = parg; + modctl_t *ctl = fbt->fbtp_ctl; + + ctl->nenabled++; + + /* + * Now check that our modctl has the expected load count. If it + * doesn't, this module must have been unloaded and reloaded -- and + * we're not going to touch it. + */ + if (ctl->loadcnt != fbt->fbtp_loadcnt) { + if (fbt_verbose) { + printf("fbt is failing for probe %s " + "(module %s reloaded)", + fbt->fbtp_name, ctl->filename); + } + + return; + } + + for (; fbt != NULL; fbt = fbt->fbtp_next) { + *fbt->fbtp_patchpoint = fbt->fbtp_patchval; + } +} + +static void +fbt_disable(void *arg, dtrace_id_t id, void *parg) +{ + fbt_probe_t *fbt = parg; + modctl_t *ctl = fbt->fbtp_ctl; + + ASSERT(ctl->nenabled > 0); + ctl->nenabled--; + + if ((ctl->loadcnt != fbt->fbtp_loadcnt)) + return; + + for (; fbt != NULL; fbt = fbt->fbtp_next) + *fbt->fbtp_patchpoint = fbt->fbtp_savedval; +} + +static void +fbt_suspend(void *arg, dtrace_id_t id, void *parg) +{ + fbt_probe_t *fbt = parg; + modctl_t *ctl = fbt->fbtp_ctl; + + ASSERT(ctl->nenabled > 0); + + if ((ctl->loadcnt != fbt->fbtp_loadcnt)) + return; + + for (; fbt != NULL; fbt = fbt->fbtp_next) + *fbt->fbtp_patchpoint = fbt->fbtp_savedval; +} + +static void +fbt_resume(void *arg, dtrace_id_t id, void *parg) +{ + fbt_probe_t *fbt = parg; + modctl_t *ctl = fbt->fbtp_ctl; + + ASSERT(ctl->nenabled > 0); + + if ((ctl->loadcnt != fbt->fbtp_loadcnt)) + return; + + for (; fbt != NULL; fbt = fbt->fbtp_next) + *fbt->fbtp_patchpoint = fbt->fbtp_patchval; +} + +static int +fbt_ctfoff_init(modctl_t *lf, linker_ctf_t *lc) +{ + const Elf_Sym *symp = lc->symtab;; + const char *name; + const ctf_header_t *hp = (const ctf_header_t *) lc->ctftab; + const uint8_t *ctfdata = lc->ctftab + sizeof(ctf_header_t); + int i; + uint32_t *ctfoff; + uint32_t objtoff = hp->cth_objtoff; + uint32_t funcoff = hp->cth_funcoff; + ushort_t info; + ushort_t vlen; + + /* Sanity check. */ + if (hp->cth_magic != CTF_MAGIC) { + printf("Bad magic value in CTF data of '%s'\n",lf->pathname); + return (EINVAL); + } + + if (lc->symtab == NULL) { + printf("No symbol table in '%s'\n",lf->pathname); + return (EINVAL); + } + + if ((ctfoff = malloc(sizeof(uint32_t) * lc->nsym, M_LINKER, M_WAITOK)) == NULL) + return (ENOMEM); + + *lc->ctfoffp = ctfoff; + + for (i = 0; i < lc->nsym; i++, ctfoff++, symp++) { + if (symp->st_name == 0 || symp->st_shndx == SHN_UNDEF) { + *ctfoff = 0xffffffff; + continue; + } + + if (symp->st_name < lc->strcnt) + name = lc->strtab + symp->st_name; + else + name = "(?)"; + + switch (ELF_ST_TYPE(symp->st_info)) { + case STT_OBJECT: + if (objtoff >= hp->cth_funcoff || + (symp->st_shndx == SHN_ABS && symp->st_value == 0)) { + *ctfoff = 0xffffffff; + break; + } + + *ctfoff = objtoff; + objtoff += sizeof (ushort_t); + break; + + case STT_FUNC: + if (funcoff >= hp->cth_typeoff) { + *ctfoff = 0xffffffff; + break; + } + + *ctfoff = funcoff; + + info = *((const ushort_t *)(ctfdata + funcoff)); + vlen = CTF_INFO_VLEN(info); + + /* + * If we encounter a zero pad at the end, just skip it. + * Otherwise skip over the function and its return type + * (+2) and the argument list (vlen). + */ + if (CTF_INFO_KIND(info) == CTF_K_UNKNOWN && vlen == 0) + funcoff += sizeof (ushort_t); /* skip pad */ + else + funcoff += sizeof (ushort_t) * (vlen + 2); + break; + + default: + *ctfoff = 0xffffffff; + break; + } + } + + return (0); +} + +static ssize_t +fbt_get_ctt_size(uint8_t version, const ctf_type_t *tp, ssize_t *sizep, + ssize_t *incrementp) +{ + ssize_t size, increment; + + if (version > CTF_VERSION_1 && + tp->ctt_size == CTF_LSIZE_SENT) { + size = CTF_TYPE_LSIZE(tp); + increment = sizeof (ctf_type_t); + } else { + size = tp->ctt_size; + increment = sizeof (ctf_stype_t); + } + + if (sizep) + *sizep = size; + if (incrementp) + *incrementp = increment; + + return (size); +} + +static int +fbt_typoff_init(linker_ctf_t *lc) +{ + const ctf_header_t *hp = (const ctf_header_t *) lc->ctftab; + const ctf_type_t *tbuf; + const ctf_type_t *tend; + const ctf_type_t *tp; + const uint8_t *ctfdata = lc->ctftab + sizeof(ctf_header_t); + int ctf_typemax = 0; + uint32_t *xp; + ulong_t pop[CTF_K_MAX + 1] = { 0 }; + + + /* Sanity check. */ + if (hp->cth_magic != CTF_MAGIC) + return (EINVAL); + + tbuf = (const ctf_type_t *) (ctfdata + hp->cth_typeoff); + tend = (const ctf_type_t *) (ctfdata + hp->cth_stroff); + + int child = hp->cth_parname != 0; + + /* + * We make two passes through the entire type section. In this first + * pass, we count the number of each type and the total number of types. + */ + for (tp = tbuf; tp < tend; ctf_typemax++) { + ushort_t kind = CTF_INFO_KIND(tp->ctt_info); + ulong_t vlen = CTF_INFO_VLEN(tp->ctt_info); + ssize_t size, increment; + + size_t vbytes; + uint_t n; + + (void) fbt_get_ctt_size(hp->cth_version, tp, &size, &increment); + + switch (kind) { + case CTF_K_INTEGER: + case CTF_K_FLOAT: + vbytes = sizeof (uint_t); + break; + case CTF_K_ARRAY: + vbytes = sizeof (ctf_array_t); + break; + case CTF_K_FUNCTION: + vbytes = sizeof (ushort_t) * (vlen + (vlen & 1)); + break; + case CTF_K_STRUCT: + case CTF_K_UNION: + if (size < CTF_LSTRUCT_THRESH) { + ctf_member_t *mp = (ctf_member_t *) + ((uintptr_t)tp + increment); + + vbytes = sizeof (ctf_member_t) * vlen; + for (n = vlen; n != 0; n--, mp++) + child |= CTF_TYPE_ISCHILD(mp->ctm_type); + } else { + ctf_lmember_t *lmp = (ctf_lmember_t *) + ((uintptr_t)tp + increment); + + vbytes = sizeof (ctf_lmember_t) * vlen; + for (n = vlen; n != 0; n--, lmp++) + child |= + CTF_TYPE_ISCHILD(lmp->ctlm_type); + } + break; + case CTF_K_ENUM: + vbytes = sizeof (ctf_enum_t) * vlen; + break; + case CTF_K_FORWARD: + /* + * For forward declarations, ctt_type is the CTF_K_* + * kind for the tag, so bump that population count too. + * If ctt_type is unknown, treat the tag as a struct. + */ + if (tp->ctt_type == CTF_K_UNKNOWN || + tp->ctt_type >= CTF_K_MAX) + pop[CTF_K_STRUCT]++; + else + pop[tp->ctt_type]++; + /*FALLTHRU*/ + case CTF_K_UNKNOWN: + vbytes = 0; + break; + case CTF_K_POINTER: + case CTF_K_TYPEDEF: + case CTF_K_VOLATILE: + case CTF_K_CONST: + case CTF_K_RESTRICT: + child |= CTF_TYPE_ISCHILD(tp->ctt_type); + vbytes = 0; + break; + default: + printf("%s(%d): detected invalid CTF kind -- %u\n", __func__, __LINE__, kind); + return (EIO); + } + tp = (ctf_type_t *)((uintptr_t)tp + increment + vbytes); + pop[kind]++; + } + + *lc->typlenp = ctf_typemax; + + if ((xp = malloc(sizeof(uint32_t) * ctf_typemax, M_LINKER, M_ZERO | M_WAITOK)) == NULL) + return (ENOMEM); + + *lc->typoffp = xp; + + /* type id 0 is used as a sentinel value */ + *xp++ = 0; + + /* + * In the second pass, fill in the type offset. + */ + for (tp = tbuf; tp < tend; xp++) { + ushort_t kind = CTF_INFO_KIND(tp->ctt_info); + ulong_t vlen = CTF_INFO_VLEN(tp->ctt_info); + ssize_t size, increment; + + size_t vbytes; + uint_t n; + + (void) fbt_get_ctt_size(hp->cth_version, tp, &size, &increment); + + switch (kind) { + case CTF_K_INTEGER: + case CTF_K_FLOAT: + vbytes = sizeof (uint_t); + break; + case CTF_K_ARRAY: + vbytes = sizeof (ctf_array_t); + break; + case CTF_K_FUNCTION: + vbytes = sizeof (ushort_t) * (vlen + (vlen & 1)); + break; + case CTF_K_STRUCT: + case CTF_K_UNION: + if (size < CTF_LSTRUCT_THRESH) { + ctf_member_t *mp = (ctf_member_t *) + ((uintptr_t)tp + increment); + + vbytes = sizeof (ctf_member_t) * vlen; + for (n = vlen; n != 0; n--, mp++) + child |= CTF_TYPE_ISCHILD(mp->ctm_type); + } else { + ctf_lmember_t *lmp = (ctf_lmember_t *) + ((uintptr_t)tp + increment); + + vbytes = sizeof (ctf_lmember_t) * vlen; + for (n = vlen; n != 0; n--, lmp++) + child |= + CTF_TYPE_ISCHILD(lmp->ctlm_type); + } + break; + case CTF_K_ENUM: + vbytes = sizeof (ctf_enum_t) * vlen; + break; + case CTF_K_FORWARD: + case CTF_K_UNKNOWN: + vbytes = 0; + break; + case CTF_K_POINTER: + case CTF_K_TYPEDEF: + case CTF_K_VOLATILE: + case CTF_K_CONST: + case CTF_K_RESTRICT: + vbytes = 0; + break; + default: + printf("%s(%d): detected invalid CTF kind -- %u\n", __func__, __LINE__, kind); + return (EIO); + } + *xp = (uint32_t)((uintptr_t) tp - (uintptr_t) ctfdata); + tp = (ctf_type_t *)((uintptr_t)tp + increment + vbytes); + } + + return (0); +} + +/* + * CTF Declaration Stack + * + * In order to implement ctf_type_name(), we must convert a type graph back + * into a C type declaration. Unfortunately, a type graph represents a storage + * class ordering of the type whereas a type declaration must obey the C rules + * for operator precedence, and the two orderings are frequently in conflict. + * For example, consider these CTF type graphs and their C declarations: + * + * CTF_K_POINTER -> CTF_K_FUNCTION -> CTF_K_INTEGER : int (*)() + * CTF_K_POINTER -> CTF_K_ARRAY -> CTF_K_INTEGER : int (*)[] + * + * In each case, parentheses are used to raise operator * to higher lexical + * precedence, so the string form of the C declaration cannot be constructed by + * walking the type graph links and forming the string from left to right. + * + * The functions in this file build a set of stacks from the type graph nodes + * corresponding to the C operator precedence levels in the appropriate order. + * The code in ctf_type_name() can then iterate over the levels and nodes in + * lexical precedence order and construct the final C declaration string. + */ +typedef struct ctf_list { + struct ctf_list *l_prev; /* previous pointer or tail pointer */ + struct ctf_list *l_next; /* next pointer or head pointer */ +} ctf_list_t; + +#define ctf_list_prev(elem) ((void *)(((ctf_list_t *)(elem))->l_prev)) +#define ctf_list_next(elem) ((void *)(((ctf_list_t *)(elem))->l_next)) + +typedef enum { + CTF_PREC_BASE, + CTF_PREC_POINTER, + CTF_PREC_ARRAY, + CTF_PREC_FUNCTION, + CTF_PREC_MAX +} ctf_decl_prec_t; + +typedef struct ctf_decl_node { + ctf_list_t cd_list; /* linked list pointers */ + ctf_id_t cd_type; /* type identifier */ + uint_t cd_kind; /* type kind */ + uint_t cd_n; /* type dimension if array */ +} ctf_decl_node_t; + +typedef struct ctf_decl { + ctf_list_t cd_nodes[CTF_PREC_MAX]; /* declaration node stacks */ + int cd_order[CTF_PREC_MAX]; /* storage order of decls */ + ctf_decl_prec_t cd_qualp; /* qualifier precision */ + ctf_decl_prec_t cd_ordp; /* ordered precision */ + char *cd_buf; /* buffer for output */ + char *cd_ptr; /* buffer location */ + char *cd_end; /* buffer limit */ + size_t cd_len; /* buffer space required */ + int cd_err; /* saved error value */ +} ctf_decl_t; + +/* + * Simple doubly-linked list append routine. This implementation assumes that + * each list element contains an embedded ctf_list_t as the first member. + * An additional ctf_list_t is used to store the head (l_next) and tail + * (l_prev) pointers. The current head and tail list elements have their + * previous and next pointers set to NULL, respectively. + */ +static void +ctf_list_append(ctf_list_t *lp, void *new) +{ + ctf_list_t *p = lp->l_prev; /* p = tail list element */ + ctf_list_t *q = new; /* q = new list element */ + + lp->l_prev = q; + q->l_prev = p; + q->l_next = NULL; + + if (p != NULL) + p->l_next = q; + else + lp->l_next = q; +} + +/* + * Prepend the specified existing element to the given ctf_list_t. The + * existing pointer should be pointing at a struct with embedded ctf_list_t. + */ +static void +ctf_list_prepend(ctf_list_t *lp, void *new) +{ + ctf_list_t *p = new; /* p = new list element */ + ctf_list_t *q = lp->l_next; /* q = head list element */ + + lp->l_next = p; + p->l_prev = NULL; + p->l_next = q; + + if (q != NULL) + q->l_prev = p; + else + lp->l_prev = p; +} + +static void +ctf_decl_init(ctf_decl_t *cd, char *buf, size_t len) +{ + int i; + + bzero(cd, sizeof (ctf_decl_t)); + + for (i = CTF_PREC_BASE; i < CTF_PREC_MAX; i++) + cd->cd_order[i] = CTF_PREC_BASE - 1; + + cd->cd_qualp = CTF_PREC_BASE; + cd->cd_ordp = CTF_PREC_BASE; + + cd->cd_buf = buf; + cd->cd_ptr = buf; + cd->cd_end = buf + len; +} + +static void +ctf_decl_fini(ctf_decl_t *cd) +{ + ctf_decl_node_t *cdp, *ndp; + int i; + + for (i = CTF_PREC_BASE; i < CTF_PREC_MAX; i++) { + for (cdp = ctf_list_next(&cd->cd_nodes[i]); + cdp != NULL; cdp = ndp) { + ndp = ctf_list_next(cdp); + free(cdp, M_FBT); + } + } +} + +static const ctf_type_t * +ctf_lookup_by_id(linker_ctf_t *lc, ctf_id_t type) +{ + const ctf_type_t *tp; + uint32_t offset; + uint32_t *typoff = *lc->typoffp; + + if (type >= *lc->typlenp) { + printf("%s(%d): type %d exceeds max %ld\n",__func__,__LINE__,(int) type,*lc->typlenp); + return(NULL); + } + + /* Check if the type isn't cross-referenced. */ + if ((offset = typoff[type]) == 0) { + printf("%s(%d): type %d isn't cross referenced\n",__func__,__LINE__, (int) type); + return(NULL); + } + + tp = (const ctf_type_t *)(lc->ctftab + offset + sizeof(ctf_header_t)); + + return (tp); +} + +static void +fbt_array_info(linker_ctf_t *lc, ctf_id_t type, ctf_arinfo_t *arp) +{ + const ctf_header_t *hp = (const ctf_header_t *) lc->ctftab; + const ctf_type_t *tp; + const ctf_array_t *ap; + ssize_t increment; + + bzero(arp, sizeof(*arp)); + + if ((tp = ctf_lookup_by_id(lc, type)) == NULL) + return; + + if (CTF_INFO_KIND(tp->ctt_info) != CTF_K_ARRAY) + return; + + (void) fbt_get_ctt_size(hp->cth_version, tp, NULL, &increment); + + ap = (const ctf_array_t *)((uintptr_t)tp + increment); + arp->ctr_contents = ap->cta_contents; + arp->ctr_index = ap->cta_index; + arp->ctr_nelems = ap->cta_nelems; +} + +static const char * +ctf_strptr(linker_ctf_t *lc, int name) +{ + const ctf_header_t *hp = (const ctf_header_t *) lc->ctftab;; + const char *strp = ""; + + if (name < 0 || name >= hp->cth_strlen) + return(strp); + + strp = (const char *)(lc->ctftab + hp->cth_stroff + name + sizeof(ctf_header_t)); + + return (strp); +} + +static void +ctf_decl_push(ctf_decl_t *cd, linker_ctf_t *lc, ctf_id_t type) +{ + ctf_decl_node_t *cdp; + ctf_decl_prec_t prec; + uint_t kind, n = 1; + int is_qual = 0; + + const ctf_type_t *tp; + ctf_arinfo_t ar; + + if ((tp = ctf_lookup_by_id(lc, type)) == NULL) { + cd->cd_err = ENOENT; + return; + } + + switch (kind = CTF_INFO_KIND(tp->ctt_info)) { + case CTF_K_ARRAY: + fbt_array_info(lc, type, &ar); + ctf_decl_push(cd, lc, ar.ctr_contents); + n = ar.ctr_nelems; + prec = CTF_PREC_ARRAY; + break; + + case CTF_K_TYPEDEF: + if (ctf_strptr(lc, tp->ctt_name)[0] == '\0') { + ctf_decl_push(cd, lc, tp->ctt_type); + return; + } + prec = CTF_PREC_BASE; + break; + + case CTF_K_FUNCTION: + ctf_decl_push(cd, lc, tp->ctt_type); + prec = CTF_PREC_FUNCTION; + break; + + case CTF_K_POINTER: + ctf_decl_push(cd, lc, tp->ctt_type); + prec = CTF_PREC_POINTER; + break; + + case CTF_K_VOLATILE: + case CTF_K_CONST: + case CTF_K_RESTRICT: + ctf_decl_push(cd, lc, tp->ctt_type); + prec = cd->cd_qualp; + is_qual++; + break; + + default: + prec = CTF_PREC_BASE; + } + + if ((cdp = malloc(sizeof (ctf_decl_node_t), M_FBT, M_WAITOK)) == NULL) { + cd->cd_err = EAGAIN; + return; + } + + cdp->cd_type = type; + cdp->cd_kind = kind; + cdp->cd_n = n; + + if (ctf_list_next(&cd->cd_nodes[prec]) == NULL) + cd->cd_order[prec] = cd->cd_ordp++; + + /* + * Reset cd_qualp to the highest precedence level that we've seen so + * far that can be qualified (CTF_PREC_BASE or CTF_PREC_POINTER). + */ + if (prec > cd->cd_qualp && prec < CTF_PREC_ARRAY) + cd->cd_qualp = prec; + + /* + * C array declarators are ordered inside out so prepend them. Also by + * convention qualifiers of base types precede the type specifier (e.g. + * const int vs. int const) even though the two forms are equivalent. + */ + if (kind == CTF_K_ARRAY || (is_qual && prec == CTF_PREC_BASE)) + ctf_list_prepend(&cd->cd_nodes[prec], cdp); + else + ctf_list_append(&cd->cd_nodes[prec], cdp); +} + +static void +ctf_decl_sprintf(ctf_decl_t *cd, const char *format, ...) +{ + size_t len = (size_t)(cd->cd_end - cd->cd_ptr); + va_list ap; + size_t n; + + va_start(ap, format); + n = vsnprintf(cd->cd_ptr, len, format, ap); + va_end(ap); + + cd->cd_ptr += MIN(n, len); + cd->cd_len += n; +} + +static ssize_t +fbt_type_name(linker_ctf_t *lc, ctf_id_t type, char *buf, size_t len) +{ + ctf_decl_t cd; + ctf_decl_node_t *cdp; + ctf_decl_prec_t prec, lp, rp; + int ptr, arr; + uint_t k; + + if (lc == NULL && type == CTF_ERR) + return (-1); /* simplify caller code by permitting CTF_ERR */ + + ctf_decl_init(&cd, buf, len); + ctf_decl_push(&cd, lc, type); + + if (cd.cd_err != 0) { + ctf_decl_fini(&cd); + return (-1); + } + + /* + * If the type graph's order conflicts with lexical precedence order + * for pointers or arrays, then we need to surround the declarations at + * the corresponding lexical precedence with parentheses. This can + * result in either a parenthesized pointer (*) as in int (*)() or + * int (*)[], or in a parenthesized pointer and array as in int (*[])(). + */ + ptr = cd.cd_order[CTF_PREC_POINTER] > CTF_PREC_POINTER; + arr = cd.cd_order[CTF_PREC_ARRAY] > CTF_PREC_ARRAY; + + rp = arr ? CTF_PREC_ARRAY : ptr ? CTF_PREC_POINTER : -1; + lp = ptr ? CTF_PREC_POINTER : arr ? CTF_PREC_ARRAY : -1; + + k = CTF_K_POINTER; /* avoid leading whitespace (see below) */ + + for (prec = CTF_PREC_BASE; prec < CTF_PREC_MAX; prec++) { + for (cdp = ctf_list_next(&cd.cd_nodes[prec]); + cdp != NULL; cdp = ctf_list_next(cdp)) { + + const ctf_type_t *tp = + ctf_lookup_by_id(lc, cdp->cd_type); + const char *name = ctf_strptr(lc, tp->ctt_name); + + if (k != CTF_K_POINTER && k != CTF_K_ARRAY) + ctf_decl_sprintf(&cd, " "); + + if (lp == prec) { + ctf_decl_sprintf(&cd, "("); + lp = -1; + } + + switch (cdp->cd_kind) { + case CTF_K_INTEGER: + case CTF_K_FLOAT: + case CTF_K_TYPEDEF: + ctf_decl_sprintf(&cd, "%s", name); + break; + case CTF_K_POINTER: + ctf_decl_sprintf(&cd, "*"); + break; + case CTF_K_ARRAY: + ctf_decl_sprintf(&cd, "[%u]", cdp->cd_n); + break; + case CTF_K_FUNCTION: + ctf_decl_sprintf(&cd, "()"); + break; + case CTF_K_STRUCT: + case CTF_K_FORWARD: + ctf_decl_sprintf(&cd, "struct %s", name); + break; + case CTF_K_UNION: + ctf_decl_sprintf(&cd, "union %s", name); + break; + case CTF_K_ENUM: + ctf_decl_sprintf(&cd, "enum %s", name); + break; + case CTF_K_VOLATILE: + ctf_decl_sprintf(&cd, "volatile"); + break; + case CTF_K_CONST: + ctf_decl_sprintf(&cd, "const"); + break; + case CTF_K_RESTRICT: + ctf_decl_sprintf(&cd, "restrict"); + break; + } + + k = cdp->cd_kind; + } + + if (rp == prec) + ctf_decl_sprintf(&cd, ")"); + } + + ctf_decl_fini(&cd); + return (cd.cd_len); +} + +static void +fbt_getargdesc(void *arg __unused, dtrace_id_t id __unused, void *parg, dtrace_argdesc_t *desc) +{ + const ushort_t *dp; + fbt_probe_t *fbt = parg; + linker_ctf_t lc; + modctl_t *ctl = fbt->fbtp_ctl; + int ndx = desc->dtargd_ndx; + int symindx = fbt->fbtp_symindx; + uint32_t *ctfoff; + uint32_t offset; + ushort_t info, kind, n; + + desc->dtargd_ndx = DTRACE_ARGNONE; + + /* Get a pointer to the CTF data and it's length. */ + if (linker_ctf_get(ctl, &lc) != 0) + /* No CTF data? Something wrong? *shrug* */ + return; + + /* Check if this module hasn't been initialised yet. */ + if (*lc.ctfoffp == NULL) { + /* + * Initialise the CTF object and function symindx to + * byte offset array. + */ + if (fbt_ctfoff_init(ctl, &lc) != 0) + return; + + /* Initialise the CTF type to byte offset array. */ + if (fbt_typoff_init(&lc) != 0) + return; + } + + ctfoff = *lc.ctfoffp; + + if (ctfoff == NULL || *lc.typoffp == NULL) + return; + + /* Check if the symbol index is out of range. */ + if (symindx >= lc.nsym) + return; + + /* Check if the symbol isn't cross-referenced. */ + if ((offset = ctfoff[symindx]) == 0xffffffff) + return; + + dp = (const ushort_t *)(lc.ctftab + offset + sizeof(ctf_header_t)); + + info = *dp++; + kind = CTF_INFO_KIND(info); + n = CTF_INFO_VLEN(info); + + if (kind == CTF_K_UNKNOWN && n == 0) { + printf("%s(%d): Unknown function!\n",__func__,__LINE__); + return; + } + + if (kind != CTF_K_FUNCTION) { + printf("%s(%d): Expected a function!\n",__func__,__LINE__); + return; + } + + /* Check if the requested argument doesn't exist. */ + if (ndx >= n) + return; + + /* Skip the return type and arguments up to the one requested. */ + dp += ndx + 1; + + if (fbt_type_name(&lc, *dp, desc->dtargd_native, sizeof(desc->dtargd_native)) > 0) + desc->dtargd_ndx = ndx; + + return; +} + +static void +fbt_load(void *dummy) +{ + /* Create the /dev/dtrace/fbt entry. */ + fbt_cdev = make_dev(&fbt_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, + "dtrace/fbt"); + + /* Default the probe table size if not specified. */ + if (fbt_probetab_size == 0) + fbt_probetab_size = FBT_PROBETAB_SIZE; + + /* Choose the hash mask for the probe table. */ + fbt_probetab_mask = fbt_probetab_size - 1; + + /* Allocate memory for the probe table. */ + fbt_probetab = + malloc(fbt_probetab_size * sizeof (fbt_probe_t *), M_FBT, M_WAITOK | M_ZERO); + + dtrace_doubletrap_func = fbt_doubletrap; + dtrace_invop_add(fbt_invop); + + if (dtrace_register("fbt", &fbt_attr, DTRACE_PRIV_USER, + NULL, &fbt_pops, NULL, &fbt_id) != 0) + return; +} + + +static int +fbt_unload() +{ + int error = 0; + + /* De-register the invalid opcode handler. */ + dtrace_invop_remove(fbt_invop); + + dtrace_doubletrap_func = NULL; + + /* De-register this DTrace provider. */ + if ((error = dtrace_unregister(fbt_id)) != 0) + return (error); + + /* Free the probe table. */ + free(fbt_probetab, M_FBT); + fbt_probetab = NULL; + fbt_probetab_mask = 0; + + destroy_dev(fbt_cdev); + + return (error); +} + +static int +fbt_modevent(module_t mod __unused, int type, void *data __unused) +{ + int error = 0; + + switch (type) { + case MOD_LOAD: + break; + + case MOD_UNLOAD: + break; + + case MOD_SHUTDOWN: + break; + + default: + error = EOPNOTSUPP; + break; + + } + + return (error); +} + +static int +fbt_open(struct cdev *dev __unused, int oflags __unused, int devtype __unused, struct thread *td __unused) +{ + return (0); +} + +SYSINIT(fbt_load, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, fbt_load, NULL); +SYSUNINIT(fbt_unload, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, fbt_unload, NULL); + +DEV_MODULE(fbt, fbt_modevent, NULL); +MODULE_VERSION(fbt, 1); +MODULE_DEPEND(fbt, dtrace, 1, 1, 1); +MODULE_DEPEND(fbt, opensolaris, 1, 1, 1); diff --git a/sys/cddl/dev/profile/profile.c b/sys/cddl/dev/profile/profile.c new file mode 100644 index 0000000..ea52e2e --- /dev/null +++ b/sys/cddl/dev/profile/profile.c @@ -0,0 +1,531 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + * + * Portions Copyright 2006-2008 John Birrell jb@freebsd.org + * + * $FreeBSD$ + * + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <sys/cdefs.h> +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/conf.h> +#include <sys/cpuvar.h> +#include <sys/fcntl.h> +#include <sys/filio.h> +#include <sys/kdb.h> +#include <sys/kernel.h> +#include <sys/kmem.h> +#include <sys/kthread.h> +#include <sys/limits.h> +#include <sys/linker.h> +#include <sys/lock.h> +#include <sys/malloc.h> +#include <sys/module.h> +#include <sys/mutex.h> +#include <sys/poll.h> +#include <sys/proc.h> +#include <sys/selinfo.h> +#include <sys/smp.h> +#include <sys/uio.h> +#include <sys/unistd.h> +#include <machine/stdarg.h> + +#include <sys/cyclic.h> +#include <sys/dtrace.h> +#include <sys/dtrace_bsd.h> + +#define PROF_NAMELEN 15 + +#define PROF_PROFILE 0 +#define PROF_TICK 1 +#define PROF_PREFIX_PROFILE "profile-" +#define PROF_PREFIX_TICK "tick-" + +/* + * Regardless of platform, there are five artificial frames in the case of the + * profile provider: + * + * profile_fire + * cyclic_expire + * cyclic_fire + * [ cbe ] + * [ locore ] + * + * On amd64, there are two frames associated with locore: one in locore, and + * another in common interrupt dispatch code. (i386 has not been modified to + * use this common layer.) Further, on i386, the interrupted instruction + * appears as its own stack frame. All of this means that we need to add one + * frame for amd64, and then take one away for both amd64 and i386. + * + * On SPARC, the picture is further complicated because the compiler + * optimizes away tail-calls -- so the following frames are optimized away: + * + * profile_fire + * cyclic_expire + * + * This gives three frames. However, on DEBUG kernels, the cyclic_expire + * frame cannot be tail-call eliminated, yielding four frames in this case. + * + * All of the above constraints lead to the mess below. Yes, the profile + * provider should ideally figure this out on-the-fly by hiting one of its own + * probes and then walking its own stack trace. This is complicated, however, + * and the static definition doesn't seem to be overly brittle. Still, we + * allow for a manual override in case we get it completely wrong. + */ +#ifdef __amd64 +#define PROF_ARTIFICIAL_FRAMES 7 +#else +#ifdef __i386 +#define PROF_ARTIFICIAL_FRAMES 6 +#else +#ifdef __sparc +#ifdef DEBUG +#define PROF_ARTIFICIAL_FRAMES 4 +#else +#define PROF_ARTIFICIAL_FRAMES 3 +#endif +#endif +#endif +#endif + +typedef struct profile_probe { + char prof_name[PROF_NAMELEN]; + dtrace_id_t prof_id; + int prof_kind; + hrtime_t prof_interval; + cyclic_id_t prof_cyclic; +} profile_probe_t; + +typedef struct profile_probe_percpu { + hrtime_t profc_expected; + hrtime_t profc_interval; + profile_probe_t *profc_probe; +} profile_probe_percpu_t; + +static d_open_t profile_open; +static int profile_unload(void); +static void profile_create(hrtime_t, char *, int); +static void profile_destroy(void *, dtrace_id_t, void *); +static void profile_enable(void *, dtrace_id_t, void *); +static void profile_disable(void *, dtrace_id_t, void *); +static void profile_load(void *); +static void profile_provide(void *, dtrace_probedesc_t *); + +static int profile_rates[] = { + 97, 199, 499, 997, 1999, + 4001, 4999, 0, 0, 0, + 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0 +}; + +static int profile_ticks[] = { + 1, 10, 100, 500, 1000, + 5000, 0, 0, 0, 0, + 0, 0, 0, 0, 0 +}; + +/* + * profile_max defines the upper bound on the number of profile probes that + * can exist (this is to prevent malicious or clumsy users from exhausing + * system resources by creating a slew of profile probes). At mod load time, + * this gets its value from PROFILE_MAX_DEFAULT or profile-max-probes if it's + * present in the profile.conf file. + */ +#define PROFILE_MAX_DEFAULT 1000 /* default max. number of probes */ +static uint32_t profile_max = PROFILE_MAX_DEFAULT; + /* maximum number of profile probes */ +static uint32_t profile_total; /* current number of profile probes */ + +static struct cdevsw profile_cdevsw = { + .d_version = D_VERSION, + .d_open = profile_open, + .d_name = "profile", +}; + +static dtrace_pattr_t profile_attr = { +{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON }, +{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, +{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA }, +{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON }, +{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA }, +}; + +static dtrace_pops_t profile_pops = { + profile_provide, + NULL, + profile_enable, + profile_disable, + NULL, + NULL, + NULL, + NULL, + NULL, + profile_destroy +}; + +static struct cdev *profile_cdev; +static dtrace_provider_id_t profile_id; +static hrtime_t profile_interval_min = NANOSEC / 5000; /* 5000 hz */ +static int profile_aframes = 0; /* override */ + +static void +profile_fire(void *arg) +{ + profile_probe_percpu_t *pcpu = arg; + profile_probe_t *prof = pcpu->profc_probe; + hrtime_t late; + solaris_cpu_t *c = &solaris_cpu[curcpu]; + + late = gethrtime() - pcpu->profc_expected; + pcpu->profc_expected += pcpu->profc_interval; + + dtrace_probe(prof->prof_id, c->cpu_profile_pc, + c->cpu_profile_upc, late, 0, 0); +} + +static void +profile_tick(void *arg) +{ + profile_probe_t *prof = arg; + solaris_cpu_t *c = &solaris_cpu[curcpu]; + + dtrace_probe(prof->prof_id, c->cpu_profile_pc, + c->cpu_profile_upc, 0, 0, 0); +} + +static void +profile_create(hrtime_t interval, char *name, int kind) +{ + profile_probe_t *prof; + + if (interval < profile_interval_min) + return; + + if (dtrace_probe_lookup(profile_id, NULL, NULL, name) != 0) + return; + + atomic_add_32(&profile_total, 1); + if (profile_total > profile_max) { + atomic_add_32(&profile_total, -1); + return; + } + + prof = kmem_zalloc(sizeof (profile_probe_t), KM_SLEEP); + (void) strcpy(prof->prof_name, name); + prof->prof_interval = interval; + prof->prof_cyclic = CYCLIC_NONE; + prof->prof_kind = kind; + prof->prof_id = dtrace_probe_create(profile_id, + NULL, NULL, name, + profile_aframes ? profile_aframes : PROF_ARTIFICIAL_FRAMES, prof); +} + +/*ARGSUSED*/ +static void +profile_provide(void *arg, dtrace_probedesc_t *desc) +{ + int i, j, rate, kind; + hrtime_t val = 0, mult = 1, len = 0; + char *name, *suffix = NULL; + + const struct { + char *prefix; + int kind; + } types[] = { + { PROF_PREFIX_PROFILE, PROF_PROFILE }, + { PROF_PREFIX_TICK, PROF_TICK }, + { 0, 0 } + }; + + const struct { + char *name; + hrtime_t mult; + } suffixes[] = { + { "ns", NANOSEC / NANOSEC }, + { "nsec", NANOSEC / NANOSEC }, + { "us", NANOSEC / MICROSEC }, + { "usec", NANOSEC / MICROSEC }, + { "ms", NANOSEC / MILLISEC }, + { "msec", NANOSEC / MILLISEC }, + { "s", NANOSEC / SEC }, + { "sec", NANOSEC / SEC }, + { "m", NANOSEC * (hrtime_t)60 }, + { "min", NANOSEC * (hrtime_t)60 }, + { "h", NANOSEC * (hrtime_t)(60 * 60) }, + { "hour", NANOSEC * (hrtime_t)(60 * 60) }, + { "d", NANOSEC * (hrtime_t)(24 * 60 * 60) }, + { "day", NANOSEC * (hrtime_t)(24 * 60 * 60) }, + { "hz", 0 }, + { NULL } + }; + + if (desc == NULL) { + char n[PROF_NAMELEN]; + + /* + * If no description was provided, provide all of our probes. + */ + for (i = 0; i < sizeof (profile_rates) / sizeof (int); i++) { + if ((rate = profile_rates[i]) == 0) + continue; + + (void) snprintf(n, PROF_NAMELEN, "%s%d", + PROF_PREFIX_PROFILE, rate); + profile_create(NANOSEC / rate, n, PROF_PROFILE); + } + + for (i = 0; i < sizeof (profile_ticks) / sizeof (int); i++) { + if ((rate = profile_ticks[i]) == 0) + continue; + + (void) snprintf(n, PROF_NAMELEN, "%s%d", + PROF_PREFIX_TICK, rate); + profile_create(NANOSEC / rate, n, PROF_TICK); + } + + return; + } + + name = desc->dtpd_name; + + for (i = 0; types[i].prefix != NULL; i++) { + len = strlen(types[i].prefix); + + if (strncmp(name, types[i].prefix, len) != 0) + continue; + break; + } + + if (types[i].prefix == NULL) + return; + + kind = types[i].kind; + j = strlen(name) - len; + + /* + * We need to start before any time suffix. + */ + for (j = strlen(name); j >= len; j--) { + if (name[j] >= '0' && name[j] <= '9') + break; + suffix = &name[j]; + } + + ASSERT(suffix != NULL); + + /* + * Now determine the numerical value present in the probe name. + */ + for (; j >= len; j--) { + if (name[j] < '0' || name[j] > '9') + return; + + val += (name[j] - '0') * mult; + mult *= (hrtime_t)10; + } + + if (val == 0) + return; + + /* + * Look-up the suffix to determine the multiplier. + */ + for (i = 0, mult = 0; suffixes[i].name != NULL; i++) { + if (strcasecmp(suffixes[i].name, suffix) == 0) { + mult = suffixes[i].mult; + break; + } + } + + if (suffixes[i].name == NULL && *suffix != '\0') + return; + + if (mult == 0) { + /* + * The default is frequency-per-second. + */ + val = NANOSEC / val; + } else { + val *= mult; + } + + profile_create(val, name, kind); +} + +/* ARGSUSED */ +static void +profile_destroy(void *arg, dtrace_id_t id, void *parg) +{ + profile_probe_t *prof = parg; + + ASSERT(prof->prof_cyclic == CYCLIC_NONE); + kmem_free(prof, sizeof (profile_probe_t)); + + ASSERT(profile_total >= 1); + atomic_add_32(&profile_total, -1); +} + +/*ARGSUSED*/ +static void +profile_online(void *arg, cpu_t *cpu, cyc_handler_t *hdlr, cyc_time_t *when) +{ + profile_probe_t *prof = arg; + profile_probe_percpu_t *pcpu; + + pcpu = kmem_zalloc(sizeof (profile_probe_percpu_t), KM_SLEEP); + pcpu->profc_probe = prof; + + hdlr->cyh_func = profile_fire; + hdlr->cyh_arg = pcpu; + + when->cyt_interval = prof->prof_interval; + when->cyt_when = gethrtime() + when->cyt_interval; + + pcpu->profc_expected = when->cyt_when; + pcpu->profc_interval = when->cyt_interval; +} + +/*ARGSUSED*/ +static void +profile_offline(void *arg, cpu_t *cpu, void *oarg) +{ + profile_probe_percpu_t *pcpu = oarg; + + ASSERT(pcpu->profc_probe == arg); + kmem_free(pcpu, sizeof (profile_probe_percpu_t)); +} + +/* ARGSUSED */ +static void +profile_enable(void *arg, dtrace_id_t id, void *parg) +{ + profile_probe_t *prof = parg; + cyc_omni_handler_t omni; + cyc_handler_t hdlr; + cyc_time_t when; + + ASSERT(prof->prof_interval != 0); + ASSERT(MUTEX_HELD(&cpu_lock)); + + if (prof->prof_kind == PROF_TICK) { + hdlr.cyh_func = profile_tick; + hdlr.cyh_arg = prof; + + when.cyt_interval = prof->prof_interval; + when.cyt_when = gethrtime() + when.cyt_interval; + } else { + ASSERT(prof->prof_kind == PROF_PROFILE); + omni.cyo_online = profile_online; + omni.cyo_offline = profile_offline; + omni.cyo_arg = prof; + } + + if (prof->prof_kind == PROF_TICK) { + prof->prof_cyclic = cyclic_add(&hdlr, &when); + } else { + prof->prof_cyclic = cyclic_add_omni(&omni); + } +} + +/* ARGSUSED */ +static void +profile_disable(void *arg, dtrace_id_t id, void *parg) +{ + profile_probe_t *prof = parg; + + ASSERT(prof->prof_cyclic != CYCLIC_NONE); + ASSERT(MUTEX_HELD(&cpu_lock)); + + cyclic_remove(prof->prof_cyclic); + prof->prof_cyclic = CYCLIC_NONE; +} + +static void +profile_load(void *dummy) +{ + /* Create the /dev/dtrace/profile entry. */ + profile_cdev = make_dev(&profile_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, + "dtrace/profile"); + + if (dtrace_register("profile", &profile_attr, DTRACE_PRIV_USER, + NULL, &profile_pops, NULL, &profile_id) != 0) + return; +} + + +static int +profile_unload() +{ + int error = 0; + + if ((error = dtrace_unregister(profile_id)) != 0) + return (error); + + destroy_dev(profile_cdev); + + return (error); +} + +/* ARGSUSED */ +static int +profile_modevent(module_t mod __unused, int type, void *data __unused) +{ + int error = 0; + + switch (type) { + case MOD_LOAD: + break; + + case MOD_UNLOAD: + break; + + case MOD_SHUTDOWN: + break; + + default: + error = EOPNOTSUPP; + break; + + } + return (error); +} + +/* ARGSUSED */ +static int +profile_open(struct cdev *dev __unused, int oflags __unused, int devtype __unused, struct thread *td __unused) +{ + return (0); +} + +SYSINIT(profile_load, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, profile_load, NULL); +SYSUNINIT(profile_unload, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, profile_unload, NULL); + +DEV_MODULE(profile, profile_modevent, NULL); +MODULE_VERSION(profile, 1); +MODULE_DEPEND(profile, dtrace, 1, 1, 1); +MODULE_DEPEND(profile, cyclic, 1, 1, 1); +MODULE_DEPEND(profile, opensolaris, 1, 1, 1); diff --git a/sys/cddl/dev/prototype.c b/sys/cddl/dev/prototype.c new file mode 100644 index 0000000..1f7f8b5 --- /dev/null +++ b/sys/cddl/dev/prototype.c @@ -0,0 +1,144 @@ +/* + * This file is freeware. You are free to use it and add your own + * license. + * + * $FreeBSD$ + * + */ + +#include <sys/cdefs.h> +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/conf.h> +#include <sys/kernel.h> +#include <sys/module.h> + +#include <sys/dtrace.h> + +static d_open_t prototype_open; +static int prototype_unload(void); +static void prototype_getargdesc(void *, dtrace_id_t, void *, dtrace_argdesc_t *); +static void prototype_provide(void *, dtrace_probedesc_t *); +static void prototype_destroy(void *, dtrace_id_t, void *); +static void prototype_enable(void *, dtrace_id_t, void *); +static void prototype_disable(void *, dtrace_id_t, void *); +static void prototype_load(void *); + +static struct cdevsw prototype_cdevsw = { + .d_version = D_VERSION, + .d_open = prototype_open, + .d_name = "prototype", +}; + +static dtrace_pattr_t prototype_attr = { +{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON }, +{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, +{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA }, +{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON }, +{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA }, +}; + +static dtrace_pops_t prototype_pops = { + prototype_provide, + NULL, + prototype_enable, + prototype_disable, + NULL, + NULL, + prototype_getargdesc, + NULL, + NULL, + prototype_destroy +}; + +static struct cdev *prototype_cdev; +static dtrace_provider_id_t prototype_id; + +static void +prototype_getargdesc(void *arg, dtrace_id_t id, void *parg, dtrace_argdesc_t *desc) +{ +} + +static void +prototype_provide(void *arg, dtrace_probedesc_t *desc) +{ +} + +static void +prototype_destroy(void *arg, dtrace_id_t id, void *parg) +{ +} + +static void +prototype_enable(void *arg, dtrace_id_t id, void *parg) +{ +} + +static void +prototype_disable(void *arg, dtrace_id_t id, void *parg) +{ +} + +static void +prototype_load(void *dummy) +{ + /* Create the /dev/dtrace/prototype entry. */ + prototype_cdev = make_dev(&prototype_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, + "dtrace/prototype"); + + if (dtrace_register("prototype", &prototype_attr, DTRACE_PRIV_USER, + NULL, &prototype_pops, NULL, &prototype_id) != 0) + return; +} + + +static int +prototype_unload() +{ + int error = 0; + + if ((error = dtrace_unregister(prototype_id)) != 0) + return (error); + + destroy_dev(prototype_cdev); + + return (error); +} + +static int +prototype_modevent(module_t mod __unused, int type, void *data __unused) +{ + int error = 0; + + switch (type) { + case MOD_LOAD: + break; + + case MOD_UNLOAD: + break; + + case MOD_SHUTDOWN: + break; + + default: + error = EOPNOTSUPP; + break; + + } + + return (error); +} + +static int +prototype_open(struct cdev *dev __unused, int oflags __unused, int devtype __unused, struct thread *td __unused) +{ + return (0); +} + +SYSINIT(prototype_load, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, prototype_load, NULL); +SYSUNINIT(prototype_unload, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, prototype_unload, NULL); + +DEV_MODULE(prototype, prototype_modevent, NULL); +MODULE_VERSION(prototype, 1); +MODULE_DEPEND(prototype, dtrace, 1, 1, 1); +MODULE_DEPEND(prototype, opensolaris, 1, 1, 1); diff --git a/sys/cddl/dev/sdt/sdt.c b/sys/cddl/dev/sdt/sdt.c new file mode 100644 index 0000000..d5d4172 --- /dev/null +++ b/sys/cddl/dev/sdt/sdt.c @@ -0,0 +1,254 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + * + * Portions Copyright 2006-2008 John Birrell jb@freebsd.org + * + * $FreeBSD$ + * + */ + +#ifndef KDTRACE_HOOKS +#define KDTRACE_HOOKS +#endif + +#include <sys/cdefs.h> +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/conf.h> +#include <sys/kernel.h> +#include <sys/limits.h> +#include <sys/lock.h> +#include <sys/linker.h> +#include <sys/module.h> +#include <sys/mutex.h> + +#include <sys/dtrace.h> +#include <sys/sdt.h> + +#define SDT_ADDR2NDX(addr) (((uintptr_t)(addr)) >> 4) + +static d_open_t sdt_open; +static int sdt_unload(void); +static void sdt_getargdesc(void *, dtrace_id_t, void *, dtrace_argdesc_t *); +static void sdt_provide_probes(void *, dtrace_probedesc_t *); +static void sdt_destroy(void *, dtrace_id_t, void *); +static void sdt_enable(void *, dtrace_id_t, void *); +static void sdt_disable(void *, dtrace_id_t, void *); +static void sdt_load(void *); + +static struct cdevsw sdt_cdevsw = { + .d_version = D_VERSION, + .d_open = sdt_open, + .d_name = "sdt", +}; + +static dtrace_pattr_t sdt_attr = { +{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON }, +{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, +{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA }, +{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON }, +{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA }, +}; + +static dtrace_pops_t sdt_pops = { + sdt_provide_probes, + NULL, + sdt_enable, + sdt_disable, + NULL, + NULL, + sdt_getargdesc, + NULL, + NULL, + sdt_destroy +}; + +static struct cdev *sdt_cdev; + +static int +sdt_argtype_callback(struct sdt_argtype *argtype, void *arg) +{ + dtrace_argdesc_t *desc = arg; + + if (desc->dtargd_ndx == argtype->ndx) { + desc->dtargd_mapping = desc->dtargd_ndx; /* XXX */ + strlcpy(desc->dtargd_native, argtype->type, + sizeof(desc->dtargd_native)); + desc->dtargd_xlate[0] = '\0'; /* XXX */ + } + + return (0); +} + +static void +sdt_getargdesc(void *arg, dtrace_id_t id, void *parg, dtrace_argdesc_t *desc) +{ + struct sdt_probe *probe = parg; + + if (desc->dtargd_ndx < probe->n_args) + (void) (sdt_argtype_listall(probe, sdt_argtype_callback, desc)); + else + desc->dtargd_ndx = DTRACE_ARGNONE; + + return; +} + +static int +sdt_probe_callback(struct sdt_probe *probe, void *arg __unused) +{ + struct sdt_provider *prov = probe->prov; + char mod[64]; + char func[64]; + char name[64]; + + /* + * Unfortunately this is necessary because the Solaris DTrace + * code mixes consts and non-consts with casts to override + * the incompatibilies. On FreeBSD, we use strict warnings + * in gcc, so we have to respect const vs non-const. + */ + strlcpy(mod, probe->mod, sizeof(mod)); + strlcpy(func, probe->func, sizeof(func)); + strlcpy(name, probe->name, sizeof(name)); + + if (dtrace_probe_lookup(prov->id, mod, func, name) != 0) + return (0); + + (void) dtrace_probe_create(prov->id, probe->mod, probe->func, + probe->name, 0, probe); + + return (0); +} + +static int +sdt_provider_entry(struct sdt_provider *prov, void *arg) +{ + return (sdt_probe_listall(prov, sdt_probe_callback, NULL)); +} + +static void +sdt_provide_probes(void *arg, dtrace_probedesc_t *desc) +{ + if (desc != NULL) + return; + + (void) sdt_provider_listall(sdt_provider_entry, NULL); +} + +static void +sdt_destroy(void *arg, dtrace_id_t id, void *parg) +{ + /* Nothing to do here. */ +} + +static void +sdt_enable(void *arg, dtrace_id_t id, void *parg) +{ + struct sdt_probe *probe = parg; + + probe->id = id; +} + +static void +sdt_disable(void *arg, dtrace_id_t id, void *parg) +{ + struct sdt_probe *probe = parg; + + probe->id = 0; +} + +static int +sdt_provider_reg_callback(struct sdt_provider *prov, void *arg __unused) +{ + return (dtrace_register(prov->name, &sdt_attr, DTRACE_PRIV_USER, + NULL, &sdt_pops, NULL, (dtrace_provider_id_t *) &prov->id)); +} + +static void +sdt_load(void *dummy) +{ + /* Create the /dev/dtrace/sdt entry. */ + sdt_cdev = make_dev(&sdt_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, + "dtrace/sdt"); + + sdt_probe_func = dtrace_probe; + + (void) sdt_provider_listall(sdt_provider_reg_callback, NULL); +} + +static int +sdt_provider_unreg_callback(struct sdt_provider *prov, void *arg __unused) +{ + return (dtrace_unregister(prov->id)); +} + +static int +sdt_unload() +{ + int error = 0; + + sdt_probe_func = sdt_probe_stub; + + (void) sdt_provider_listall(sdt_provider_unreg_callback, NULL); + + destroy_dev(sdt_cdev); + + return (error); +} + +/* ARGSUSED */ +static int +sdt_modevent(module_t mod __unused, int type, void *data __unused) +{ + int error = 0; + + switch (type) { + case MOD_LOAD: + break; + + case MOD_UNLOAD: + break; + + case MOD_SHUTDOWN: + break; + + default: + error = EOPNOTSUPP; + break; + + } + + return (error); +} + +/* ARGSUSED */ +static int +sdt_open(struct cdev *dev __unused, int oflags __unused, int devtype __unused, struct thread *td __unused) +{ + return (0); +} + +SYSINIT(sdt_load, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, sdt_load, NULL); +SYSUNINIT(sdt_unload, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, sdt_unload, NULL); + +DEV_MODULE(sdt, sdt_modevent, NULL); +MODULE_VERSION(sdt, 1); +MODULE_DEPEND(sdt, dtrace, 1, 1, 1); +MODULE_DEPEND(sdt, opensolaris, 1, 1, 1); diff --git a/sys/cddl/dev/systrace/systrace.c b/sys/cddl/dev/systrace/systrace.c new file mode 100644 index 0000000..a768245 --- /dev/null +++ b/sys/cddl/dev/systrace/systrace.c @@ -0,0 +1,292 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + * + * Portions Copyright 2006-2008 John Birrell jb@freebsd.org + * + * $FreeBSD$ + * + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <sys/cdefs.h> +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/conf.h> +#include <sys/cpuvar.h> +#include <sys/fcntl.h> +#include <sys/filio.h> +#include <sys/kdb.h> +#include <sys/kernel.h> +#include <sys/kmem.h> +#include <sys/kthread.h> +#include <sys/limits.h> +#include <sys/linker.h> +#include <sys/lock.h> +#include <sys/malloc.h> +#include <sys/module.h> +#include <sys/mutex.h> +#include <sys/poll.h> +#include <sys/proc.h> +#include <sys/selinfo.h> +#include <sys/smp.h> +#include <sys/syscall.h> +#include <sys/sysent.h> +#include <sys/sysproto.h> +#include <sys/uio.h> +#include <sys/unistd.h> +#include <machine/stdarg.h> + +#include <sys/dtrace.h> + +#define SYSTRACE_ARTIFICIAL_FRAMES 1 + +#define SYSTRACE_SHIFT 16 +#define SYSTRACE_ISENTRY(x) ((int)(x) >> SYSTRACE_SHIFT) +#define SYSTRACE_SYSNUM(x) ((int)(x) & ((1 << SYSTRACE_SHIFT) - 1)) +#define SYSTRACE_ENTRY(id) ((1 << SYSTRACE_SHIFT) | (id)) +#define SYSTRACE_RETURN(id) (id) + +#if ((1 << SYSTRACE_SHIFT) <= SYS_MAXSYSCALL) +#error 1 << SYSTRACE_SHIFT must exceed number of system calls +#endif + +extern char *syscallnames[]; + +static d_open_t systrace_open; +static int systrace_unload(void); +static void systrace_getargdesc(void *, dtrace_id_t, void *, dtrace_argdesc_t *); +static void systrace_args(int, void *, u_int64_t *, int *); +static void systrace_probe(u_int32_t, int, struct sysent *, void *); +static void systrace_provide(void *, dtrace_probedesc_t *); +static void systrace_destroy(void *, dtrace_id_t, void *); +static void systrace_enable(void *, dtrace_id_t, void *); +static void systrace_disable(void *, dtrace_id_t, void *); +static void systrace_load(void *); + +static struct cdevsw systrace_cdevsw = { + .d_version = D_VERSION, + .d_open = systrace_open, + .d_name = "systrace", +}; + +static dtrace_pattr_t systrace_attr = { +{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON }, +{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, +{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA }, +{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON }, +{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA }, +}; + +static dtrace_pops_t systrace_pops = { + systrace_provide, + NULL, + systrace_enable, + systrace_disable, + NULL, + NULL, + systrace_getargdesc, + NULL, + NULL, + systrace_destroy +}; + +static struct cdev *systrace_cdev; +static dtrace_provider_id_t systrace_id; + +/* + * The syscall arguments are processed into a DTrace argument array + * using a generated function. See sys/kern/makesyscalls.sh. + */ +#include <kern/systrace_args.c> + +static void +systrace_probe(u_int32_t id, int sysnum, struct sysent *sysent, void *params) +{ + int n_args = 0; + u_int64_t uargs[8]; + + /* + * Check if this syscall has a custom argument conversion + * function registered. If so, it is a syscall registered + * by a loaded module. + */ + if (sysent->sy_systrace_args_func != NULL) + /* + * Convert the syscall parameters using the registered + * function. + */ + (*sysent->sy_systrace_args_func)(params, uargs, &n_args); + else + /* + * Use the built-in system call argument conversion + * function to translate the syscall structure fields + * into thhe array of 64-bit values that DTrace + * expects. + */ + systrace_args(sysnum, params, uargs, &n_args); + + /* Process the probe using the converted argments. */ + dtrace_probe(id, uargs[0], uargs[1], uargs[2], uargs[3], uargs[4]); +} + +static void +systrace_getargdesc(void *arg, dtrace_id_t id, void *parg, dtrace_argdesc_t *desc) +{ + int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg); + + systrace_setargdesc(sysnum, desc->dtargd_ndx, desc->dtargd_native, + sizeof(desc->dtargd_native)); + + if (desc->dtargd_native[0] == '\0') + desc->dtargd_ndx = DTRACE_ARGNONE; + + return; +} + +static void +systrace_provide(void *arg, dtrace_probedesc_t *desc) +{ + int i; + + if (desc != NULL) + return; + + for (i = 0; i < SYS_MAXSYSCALL; i++) { + if (dtrace_probe_lookup(systrace_id, NULL, + syscallnames[i], "entry") != 0) + continue; + + (void) dtrace_probe_create(systrace_id, NULL, syscallnames[i], + "entry", SYSTRACE_ARTIFICIAL_FRAMES, + (void *)((uintptr_t)SYSTRACE_ENTRY(i))); + (void) dtrace_probe_create(systrace_id, NULL, syscallnames[i], + "return", SYSTRACE_ARTIFICIAL_FRAMES, + (void *)((uintptr_t)SYSTRACE_RETURN(i))); + } +} + +static void +systrace_destroy(void *arg, dtrace_id_t id, void *parg) +{ +#ifdef DEBUG + int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg); + + /* + * There's nothing to do here but assert that we have actually been + * disabled. + */ + if (SYSTRACE_ISENTRY((uintptr_t)parg)) { + ASSERT(sysent[sysnum].sy_entry == 0); + } else { + ASSERT(sysent[sysnum].sy_return == 0); + } +#endif +} + +static void +systrace_enable(void *arg, dtrace_id_t id, void *parg) +{ + int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg); + + if (SYSTRACE_ISENTRY((uintptr_t)parg)) + sysent[sysnum].sy_entry = id; + else + sysent[sysnum].sy_return = id; +} + +static void +systrace_disable(void *arg, dtrace_id_t id, void *parg) +{ + int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg); + + sysent[sysnum].sy_entry = 0; + sysent[sysnum].sy_return = 0; +} + +static void +systrace_load(void *dummy) +{ + /* Create the /dev/dtrace/systrace entry. */ + systrace_cdev = make_dev(&systrace_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, + "dtrace/systrace"); + + if (dtrace_register("syscall", &systrace_attr, DTRACE_PRIV_USER, + NULL, &systrace_pops, NULL, &systrace_id) != 0) + return; + + systrace_probe_func = systrace_probe; +} + + +static int +systrace_unload() +{ + int error = 0; + + if ((error = dtrace_unregister(systrace_id)) != 0) + return (error); + + systrace_probe_func = NULL; + + destroy_dev(systrace_cdev); + + return (error); +} + +static int +systrace_modevent(module_t mod __unused, int type, void *data __unused) +{ + int error = 0; + + switch (type) { + case MOD_LOAD: + break; + + case MOD_UNLOAD: + break; + + case MOD_SHUTDOWN: + break; + + default: + error = EOPNOTSUPP; + break; + + } + return (error); +} + +static int +systrace_open(struct cdev *dev __unused, int oflags __unused, int devtype __unused, struct thread *td __unused) +{ + return (0); +} + +SYSINIT(systrace_load, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, systrace_load, NULL); +SYSUNINIT(systrace_unload, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, systrace_unload, NULL); + +DEV_MODULE(systrace, systrace_modevent, NULL); +MODULE_VERSION(systrace, 1); +MODULE_DEPEND(systrace, dtrace, 1, 1, 1); +MODULE_DEPEND(systrace, opensolaris, 1, 1, 1); |