summaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-01-06 08:02:58 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2012-01-06 08:02:58 -0800
commit35b740e4662ef386f0c60e1b60aaf5b44db9914c (patch)
tree502a8f9499bc1b4cb3300d666dab2d01a1921224 /arch
parent423d091dfe58d3109d84c408810a7cfa82f6f184 (diff)
parent9e183426bfb52bb44bf3c443d6587e4d02478603 (diff)
downloadop-kernel-dev-35b740e4662ef386f0c60e1b60aaf5b44db9914c.zip
op-kernel-dev-35b740e4662ef386f0c60e1b60aaf5b44db9914c.tar.gz
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (106 commits) perf kvm: Fix copy & paste error in description perf script: Kill script_spec__delete perf top: Fix a memory leak perf stat: Introduce get_ratio_color() helper perf session: Remove impossible condition check perf tools: Fix feature-bits rework fallout, remove unused variable perf script: Add generic perl handler to process events perf tools: Use for_each_set_bit() to iterate over feature flags perf tools: Unify handling of features when writing feature section perf report: Accept fifos as input file perf tools: Moving code in some files perf tools: Fix out-of-bound access to struct perf_session perf tools: Continue processing header on unknown features perf tools: Improve macros for struct feature_ops perf: builtin-record: Document and check that mmap_pages must be a power of two. perf: builtin-record: Provide advice if mmap'ing fails with EPERM. perf tools: Fix truncated annotation perf script: look up thread using tid instead of pid perf tools: Look up thread names for system wide profiling perf tools: Fix comm for processes with named threads ...
Diffstat (limited to 'arch')
-rw-r--r--arch/Kconfig4
-rw-r--r--arch/s390/oprofile/hwsampler.c7
-rw-r--r--arch/s390/oprofile/init.c373
-rw-r--r--arch/s390/oprofile/op_counter.h23
-rw-r--r--arch/x86/include/asm/insn.h7
-rw-r--r--arch/x86/include/asm/perf_event.h44
-rw-r--r--arch/x86/kernel/cpu/perf_event.c262
-rw-r--r--arch/x86/kernel/cpu/perf_event.h51
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd.c2
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c88
-rw-r--r--arch/x86/kernel/jump_label.c2
-rw-r--r--arch/x86/lib/inat.c9
-rw-r--r--arch/x86/lib/insn.c4
-rw-r--r--arch/x86/lib/x86-opcode-map.txt606
-rw-r--r--arch/x86/oprofile/Makefile3
-rw-r--r--arch/x86/oprofile/init.c30
-rw-r--r--arch/x86/oprofile/nmi_int.c27
-rw-r--r--arch/x86/oprofile/nmi_timer_int.c50
-rw-r--r--arch/x86/tools/Makefile11
-rw-r--r--arch/x86/tools/gen-insn-attr-x86.awk21
-rw-r--r--arch/x86/tools/insn_sanity.c275
21 files changed, 1406 insertions, 493 deletions
diff --git a/arch/Kconfig b/arch/Kconfig
index 4b0669c..2505740 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -30,6 +30,10 @@ config OPROFILE_EVENT_MULTIPLEX
config HAVE_OPROFILE
bool
+config OPROFILE_NMI_TIMER
+ def_bool y
+ depends on PERF_EVENTS && HAVE_PERF_EVENTS_NMI
+
config KPROBES
bool "Kprobes"
depends on MODULES
diff --git a/arch/s390/oprofile/hwsampler.c b/arch/s390/oprofile/hwsampler.c
index f43c0e4..9daee91 100644
--- a/arch/s390/oprofile/hwsampler.c
+++ b/arch/s390/oprofile/hwsampler.c
@@ -22,6 +22,7 @@
#include <asm/irq.h>
#include "hwsampler.h"
+#include "op_counter.h"
#define MAX_NUM_SDB 511
#define MIN_NUM_SDB 1
@@ -896,6 +897,8 @@ static void add_samples_to_oprofile(unsigned int cpu, unsigned long *sdbt,
if (sample_data_ptr->P == 1) {
/* userspace sample */
unsigned int pid = sample_data_ptr->prim_asn;
+ if (!counter_config.user)
+ goto skip_sample;
rcu_read_lock();
tsk = pid_task(find_vpid(pid), PIDTYPE_PID);
if (tsk)
@@ -903,6 +906,8 @@ static void add_samples_to_oprofile(unsigned int cpu, unsigned long *sdbt,
rcu_read_unlock();
} else {
/* kernelspace sample */
+ if (!counter_config.kernel)
+ goto skip_sample;
regs = task_pt_regs(current);
}
@@ -910,7 +915,7 @@ static void add_samples_to_oprofile(unsigned int cpu, unsigned long *sdbt,
oprofile_add_ext_hw_sample(sample_data_ptr->ia, regs, 0,
!sample_data_ptr->P, tsk);
mutex_unlock(&hws_sem);
-
+ skip_sample:
sample_data_ptr++;
}
}
diff --git a/arch/s390/oprofile/init.c b/arch/s390/oprofile/init.c
index bd58b72..2297be4 100644
--- a/arch/s390/oprofile/init.c
+++ b/arch/s390/oprofile/init.c
@@ -2,10 +2,11 @@
* arch/s390/oprofile/init.c
*
* S390 Version
- * Copyright (C) 2003 IBM Deutschland Entwicklung GmbH, IBM Corporation
+ * Copyright (C) 2002-2011 IBM Deutschland Entwicklung GmbH, IBM Corporation
* Author(s): Thomas Spatzier (tspat@de.ibm.com)
* Author(s): Mahesh Salgaonkar (mahesh@linux.vnet.ibm.com)
* Author(s): Heinz Graalfs (graalfs@linux.vnet.ibm.com)
+ * Author(s): Andreas Krebbel (krebbel@linux.vnet.ibm.com)
*
* @remark Copyright 2002-2011 OProfile authors
*/
@@ -14,6 +15,8 @@
#include <linux/init.h>
#include <linux/errno.h>
#include <linux/fs.h>
+#include <linux/module.h>
+#include <asm/processor.h>
#include "../../../drivers/oprofile/oprof.h"
@@ -22,6 +25,7 @@ extern void s390_backtrace(struct pt_regs * const regs, unsigned int depth);
#ifdef CONFIG_64BIT
#include "hwsampler.h"
+#include "op_counter.h"
#define DEFAULT_INTERVAL 4127518
@@ -35,16 +39,41 @@ static unsigned long oprofile_max_interval;
static unsigned long oprofile_sdbt_blocks = DEFAULT_SDBT_BLOCKS;
static unsigned long oprofile_sdb_blocks = DEFAULT_SDB_BLOCKS;
-static int hwsampler_file;
+static int hwsampler_enabled;
static int hwsampler_running; /* start_mutex must be held to change */
+static int hwsampler_available;
static struct oprofile_operations timer_ops;
+struct op_counter_config counter_config;
+
+enum __force_cpu_type {
+ reserved = 0, /* do not force */
+ timer,
+};
+static int force_cpu_type;
+
+static int set_cpu_type(const char *str, struct kernel_param *kp)
+{
+ if (!strcmp(str, "timer")) {
+ force_cpu_type = timer;
+ printk(KERN_INFO "oprofile: forcing timer to be returned "
+ "as cpu type\n");
+ } else {
+ force_cpu_type = 0;
+ }
+
+ return 0;
+}
+module_param_call(cpu_type, set_cpu_type, NULL, NULL, 0);
+MODULE_PARM_DESC(cpu_type, "Force legacy basic mode sampling"
+ "(report cpu_type \"timer\"");
+
static int oprofile_hwsampler_start(void)
{
int retval;
- hwsampler_running = hwsampler_file;
+ hwsampler_running = hwsampler_enabled;
if (!hwsampler_running)
return timer_ops.start();
@@ -72,10 +101,16 @@ static void oprofile_hwsampler_stop(void)
return;
}
+/*
+ * File ops used for:
+ * /dev/oprofile/0/enabled
+ * /dev/oprofile/hwsampling/hwsampler (cpu_type = timer)
+ */
+
static ssize_t hwsampler_read(struct file *file, char __user *buf,
size_t count, loff_t *offset)
{
- return oprofilefs_ulong_to_user(hwsampler_file, buf, count, offset);
+ return oprofilefs_ulong_to_user(hwsampler_enabled, buf, count, offset);
}
static ssize_t hwsampler_write(struct file *file, char const __user *buf,
@@ -91,6 +126,9 @@ static ssize_t hwsampler_write(struct file *file, char const __user *buf,
if (retval <= 0)
return retval;
+ if (val != 0 && val != 1)
+ return -EINVAL;
+
if (oprofile_started)
/*
* save to do without locking as we set
@@ -99,7 +137,7 @@ static ssize_t hwsampler_write(struct file *file, char const __user *buf,
*/
return -EBUSY;
- hwsampler_file = val;
+ hwsampler_enabled = val;
return count;
}
@@ -109,38 +147,311 @@ static const struct file_operations hwsampler_fops = {
.write = hwsampler_write,
};
+/*
+ * File ops used for:
+ * /dev/oprofile/0/count
+ * /dev/oprofile/hwsampling/hw_interval (cpu_type = timer)
+ *
+ * Make sure that the value is within the hardware range.
+ */
+
+static ssize_t hw_interval_read(struct file *file, char __user *buf,
+ size_t count, loff_t *offset)
+{
+ return oprofilefs_ulong_to_user(oprofile_hw_interval, buf,
+ count, offset);
+}
+
+static ssize_t hw_interval_write(struct file *file, char const __user *buf,
+ size_t count, loff_t *offset)
+{
+ unsigned long val;
+ int retval;
+
+ if (*offset)
+ return -EINVAL;
+ retval = oprofilefs_ulong_from_user(&val, buf, count);
+ if (retval)
+ return retval;
+ if (val < oprofile_min_interval)
+ oprofile_hw_interval = oprofile_min_interval;
+ else if (val > oprofile_max_interval)
+ oprofile_hw_interval = oprofile_max_interval;
+ else
+ oprofile_hw_interval = val;
+
+ return count;
+}
+
+static const struct file_operations hw_interval_fops = {
+ .read = hw_interval_read,
+ .write = hw_interval_write,
+};
+
+/*
+ * File ops used for:
+ * /dev/oprofile/0/event
+ * Only a single event with number 0 is supported with this counter.
+ *
+ * /dev/oprofile/0/unit_mask
+ * This is a dummy file needed by the user space tools.
+ * No value other than 0 is accepted or returned.
+ */
+
+static ssize_t hwsampler_zero_read(struct file *file, char __user *buf,
+ size_t count, loff_t *offset)
+{
+ return oprofilefs_ulong_to_user(0, buf, count, offset);
+}
+
+static ssize_t hwsampler_zero_write(struct file *file, char const __user *buf,
+ size_t count, loff_t *offset)
+{
+ unsigned long val;
+ int retval;
+
+ if (*offset)
+ return -EINVAL;
+
+ retval = oprofilefs_ulong_from_user(&val, buf, count);
+ if (retval)
+ return retval;
+ if (val != 0)
+ return -EINVAL;
+ return count;
+}
+
+static const struct file_operations zero_fops = {
+ .read = hwsampler_zero_read,
+ .write = hwsampler_zero_write,
+};
+
+/* /dev/oprofile/0/kernel file ops. */
+
+static ssize_t hwsampler_kernel_read(struct file *file, char __user *buf,
+ size_t count, loff_t *offset)
+{
+ return oprofilefs_ulong_to_user(counter_config.kernel,
+ buf, count, offset);
+}
+
+static ssize_t hwsampler_kernel_write(struct file *file, char const __user *buf,
+ size_t count, loff_t *offset)
+{
+ unsigned long val;
+ int retval;
+
+ if (*offset)
+ return -EINVAL;
+
+ retval = oprofilefs_ulong_from_user(&val, buf, count);
+ if (retval)
+ return retval;
+
+ if (val != 0 && val != 1)
+ return -EINVAL;
+
+ counter_config.kernel = val;
+
+ return count;
+}
+
+static const struct file_operations kernel_fops = {
+ .read = hwsampler_kernel_read,
+ .write = hwsampler_kernel_write,
+};
+
+/* /dev/oprofile/0/user file ops. */
+
+static ssize_t hwsampler_user_read(struct file *file, char __user *buf,
+ size_t count, loff_t *offset)
+{
+ return oprofilefs_ulong_to_user(counter_config.user,
+ buf, count, offset);
+}
+
+static ssize_t hwsampler_user_write(struct file *file, char const __user *buf,
+ size_t count, loff_t *offset)
+{
+ unsigned long val;
+ int retval;
+
+ if (*offset)
+ return -EINVAL;
+
+ retval = oprofilefs_ulong_from_user(&val, buf, count);
+ if (retval)
+ return retval;
+
+ if (val != 0 && val != 1)
+ return -EINVAL;
+
+ counter_config.user = val;
+
+ return count;
+}
+
+static const struct file_operations user_fops = {
+ .read = hwsampler_user_read,
+ .write = hwsampler_user_write,
+};
+
+
+/*
+ * File ops used for: /dev/oprofile/timer/enabled
+ * The value always has to be the inverted value of hwsampler_enabled. So
+ * no separate variable is created. That way we do not need locking.
+ */
+
+static ssize_t timer_enabled_read(struct file *file, char __user *buf,
+ size_t count, loff_t *offset)
+{
+ return oprofilefs_ulong_to_user(!hwsampler_enabled, buf, count, offset);
+}
+
+static ssize_t timer_enabled_write(struct file *file, char const __user *buf,
+ size_t count, loff_t *offset)
+{
+ unsigned long val;
+ int retval;
+
+ if (*offset)
+ return -EINVAL;
+
+ retval = oprofilefs_ulong_from_user(&val, buf, count);
+ if (retval)
+ return retval;
+
+ if (val != 0 && val != 1)
+ return -EINVAL;
+
+ /* Timer cannot be disabled without having hardware sampling. */
+ if (val == 0 && !hwsampler_available)
+ return -EINVAL;
+
+ if (oprofile_started)
+ /*
+ * save to do without locking as we set
+ * hwsampler_running in start() when start_mutex is
+ * held
+ */
+ return -EBUSY;
+
+ hwsampler_enabled = !val;
+
+ return count;
+}
+
+static const struct file_operations timer_enabled_fops = {
+ .read = timer_enabled_read,
+ .write = timer_enabled_write,
+};
+
+
static int oprofile_create_hwsampling_files(struct super_block *sb,
- struct dentry *root)
+ struct dentry *root)
{
- struct dentry *hw_dir;
+ struct dentry *dir;
+
+ dir = oprofilefs_mkdir(sb, root, "timer");
+ if (!dir)
+ return -EINVAL;
+
+ oprofilefs_create_file(sb, dir, "enabled", &timer_enabled_fops);
+
+ if (!hwsampler_available)
+ return 0;
/* reinitialize default values */
- hwsampler_file = 1;
+ hwsampler_enabled = 1;
+ counter_config.kernel = 1;
+ counter_config.user = 1;
- hw_dir = oprofilefs_mkdir(sb, root, "hwsampling");
- if (!hw_dir)
- return -EINVAL;
+ if (!force_cpu_type) {
+ /*
+ * Create the counter file system. A single virtual
+ * counter is created which can be used to
+ * enable/disable hardware sampling dynamically from
+ * user space. The user space will configure a single
+ * counter with a single event. The value of 'event'
+ * and 'unit_mask' are not evaluated by the kernel code
+ * and can only be set to 0.
+ */
+
+ dir = oprofilefs_mkdir(sb, root, "0");
+ if (!dir)
+ return -EINVAL;
- oprofilefs_create_file(sb, hw_dir, "hwsampler", &hwsampler_fops);
- oprofilefs_create_ulong(sb, hw_dir, "hw_interval",
- &oprofile_hw_interval);
- oprofilefs_create_ro_ulong(sb, hw_dir, "hw_min_interval",
- &oprofile_min_interval);
- oprofilefs_create_ro_ulong(sb, hw_dir, "hw_max_interval",
- &oprofile_max_interval);
- oprofilefs_create_ulong(sb, hw_dir, "hw_sdbt_blocks",
- &oprofile_sdbt_blocks);
+ oprofilefs_create_file(sb, dir, "enabled", &hwsampler_fops);
+ oprofilefs_create_file(sb, dir, "event", &zero_fops);
+ oprofilefs_create_file(sb, dir, "count", &hw_interval_fops);
+ oprofilefs_create_file(sb, dir, "unit_mask", &zero_fops);
+ oprofilefs_create_file(sb, dir, "kernel", &kernel_fops);
+ oprofilefs_create_file(sb, dir, "user", &user_fops);
+ oprofilefs_create_ulong(sb, dir, "hw_sdbt_blocks",
+ &oprofile_sdbt_blocks);
+ } else {
+ /*
+ * Hardware sampling can be used but the cpu_type is
+ * forced to timer in order to deal with legacy user
+ * space tools. The /dev/oprofile/hwsampling fs is
+ * provided in that case.
+ */
+ dir = oprofilefs_mkdir(sb, root, "hwsampling");
+ if (!dir)
+ return -EINVAL;
+
+ oprofilefs_create_file(sb, dir, "hwsampler",
+ &hwsampler_fops);
+ oprofilefs_create_file(sb, dir, "hw_interval",
+ &hw_interval_fops);
+ oprofilefs_create_ro_ulong(sb, dir, "hw_min_interval",
+ &oprofile_min_interval);
+ oprofilefs_create_ro_ulong(sb, dir, "hw_max_interval",
+ &oprofile_max_interval);
+ oprofilefs_create_ulong(sb, dir, "hw_sdbt_blocks",
+ &oprofile_sdbt_blocks);
+ }
return 0;
}
static int oprofile_hwsampler_init(struct oprofile_operations *ops)
{
+ /*
+ * Initialize the timer mode infrastructure as well in order
+ * to be able to switch back dynamically. oprofile_timer_init
+ * is not supposed to fail.
+ */
+ if (oprofile_timer_init(ops))
+ BUG();
+
+ memcpy(&timer_ops, ops, sizeof(timer_ops));
+ ops->create_files = oprofile_create_hwsampling_files;
+
+ /*
+ * If the user space tools do not support newer cpu types,
+ * the force_cpu_type module parameter
+ * can be used to always return \"timer\" as cpu type.
+ */
+ if (force_cpu_type != timer) {
+ struct cpuid id;
+
+ get_cpu_id (&id);
+
+ switch (id.machine) {
+ case 0x2097: case 0x2098: ops->cpu_type = "s390/z10"; break;
+ case 0x2817: case 0x2818: ops->cpu_type = "s390/z196"; break;
+ default: return -ENODEV;
+ }
+ }
+
if (hwsampler_setup())
return -ENODEV;
/*
- * create hwsampler files only if hwsampler_setup() succeeds.
+ * Query the range for the sampling interval from the
+ * hardware.
*/
oprofile_min_interval = hwsampler_query_min_interval();
if (oprofile_min_interval == 0)
@@ -155,23 +466,17 @@ static int oprofile_hwsampler_init(struct oprofile_operations *ops)
if (oprofile_hw_interval > oprofile_max_interval)
oprofile_hw_interval = oprofile_max_interval;
- if (oprofile_timer_init(ops))
- return -ENODEV;
-
- printk(KERN_INFO "oprofile: using hardware sampling\n");
-
- memcpy(&timer_ops, ops, sizeof(timer_ops));
+ printk(KERN_INFO "oprofile: System z hardware sampling "
+ "facility found.\n");
ops->start = oprofile_hwsampler_start;
ops->stop = oprofile_hwsampler_stop;
- ops->create_files = oprofile_create_hwsampling_files;
return 0;
}
static void oprofile_hwsampler_exit(void)
{
- oprofile_timer_exit();
hwsampler_shutdown();
}
@@ -182,7 +487,15 @@ int __init oprofile_arch_init(struct oprofile_operations *ops)
ops->backtrace = s390_backtrace;
#ifdef CONFIG_64BIT
- return oprofile_hwsampler_init(ops);
+
+ /*
+ * -ENODEV is not reported to the caller. The module itself
+ * will use the timer mode sampling as fallback and this is
+ * always available.
+ */
+ hwsampler_available = oprofile_hwsampler_init(ops) == 0;
+
+ return 0;
#else
return -ENODEV;
#endif
diff --git a/arch/s390/oprofile/op_counter.h b/arch/s390/oprofile/op_counter.h
new file mode 100644
index 0000000..1a8d3ca
--- /dev/null
+++ b/arch/s390/oprofile/op_counter.h
@@ -0,0 +1,23 @@
+/**
+ * arch/s390/oprofile/op_counter.h
+ *
+ * Copyright (C) 2011 IBM Deutschland Entwicklung GmbH, IBM Corporation
+ * Author(s): Andreas Krebbel (krebbel@linux.vnet.ibm.com)
+ *
+ * @remark Copyright 2011 OProfile authors
+ */
+
+#ifndef OP_COUNTER_H
+#define OP_COUNTER_H
+
+struct op_counter_config {
+ /* `enabled' maps to the hwsampler_file variable. */
+ /* `count' maps to the oprofile_hw_interval variable. */
+ /* `event' and `unit_mask' are unused. */
+ unsigned long kernel;
+ unsigned long user;
+};
+
+extern struct op_counter_config counter_config;
+
+#endif /* OP_COUNTER_H */
diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h
index 88c765e..74df3f1 100644
--- a/arch/x86/include/asm/insn.h
+++ b/arch/x86/include/asm/insn.h
@@ -137,6 +137,13 @@ static inline int insn_is_avx(struct insn *insn)
return (insn->vex_prefix.value != 0);
}
+/* Ensure this instruction is decoded completely */
+static inline int insn_complete(struct insn *insn)
+{
+ return insn->opcode.got && insn->modrm.got && insn->sib.got &&
+ insn->displacement.got && insn->immediate.got;
+}
+
static inline insn_byte_t insn_vex_m_bits(struct insn *insn)
{
if (insn->vex_prefix.nbytes == 2) /* 2 bytes VEX */
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index f61c62f..096c975 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -57,6 +57,7 @@
(1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX))
#define ARCH_PERFMON_BRANCH_MISSES_RETIRED 6
+#define ARCH_PERFMON_EVENTS_COUNT 7
/*
* Intel "Architectural Performance Monitoring" CPUID
@@ -72,6 +73,19 @@ union cpuid10_eax {
unsigned int full;
};
+union cpuid10_ebx {
+ struct {
+ unsigned int no_unhalted_core_cycles:1;
+ unsigned int no_instructions_retired:1;
+ unsigned int no_unhalted_reference_cycles:1;
+ unsigned int no_llc_reference:1;
+ unsigned int no_llc_misses:1;
+ unsigned int no_branch_instruction_retired:1;
+ unsigned int no_branch_misses_retired:1;
+ } split;
+ unsigned int full;
+};
+
union cpuid10_edx {
struct {
unsigned int num_counters_fixed:5;
@@ -81,6 +95,15 @@ union cpuid10_edx {
unsigned int full;
};
+struct x86_pmu_capability {
+ int version;
+ int num_counters_gp;
+ int num_counters_fixed;
+ int bit_width_gp;
+ int bit_width_fixed;
+ unsigned int events_mask;
+ int events_mask_len;
+};
/*
* Fixed-purpose performance events:
@@ -89,23 +112,24 @@ union cpuid10_edx {
/*
* All 3 fixed-mode PMCs are configured via this single MSR:
*/
-#define MSR_ARCH_PERFMON_FIXED_CTR_CTRL 0x38d
+#define MSR_ARCH_PERFMON_FIXED_CTR_CTRL 0x38d
/*
* The counts are available in three separate MSRs:
*/
/* Instr_Retired.Any: */
-#define MSR_ARCH_PERFMON_FIXED_CTR0 0x309
-#define X86_PMC_IDX_FIXED_INSTRUCTIONS (X86_PMC_IDX_FIXED + 0)
+#define MSR_ARCH_PERFMON_FIXED_CTR0 0x309
+#define X86_PMC_IDX_FIXED_INSTRUCTIONS (X86_PMC_IDX_FIXED + 0)
/* CPU_CLK_Unhalted.Core: */
-#define MSR_ARCH_PERFMON_FIXED_CTR1 0x30a
-#define X86_PMC_IDX_FIXED_CPU_CYCLES (X86_PMC_IDX_FIXED + 1)
+#define MSR_ARCH_PERFMON_FIXED_CTR1 0x30a
+#define X86_PMC_IDX_FIXED_CPU_CYCLES (X86_PMC_IDX_FIXED + 1)
/* CPU_CLK_Unhalted.Ref: */
-#define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b
-#define X86_PMC_IDX_FIXED_BUS_CYCLES (X86_PMC_IDX_FIXED + 2)
+#define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b
+#define X86_PMC_IDX_FIXED_REF_CYCLES (X86_PMC_IDX_FIXED + 2)
+#define X86_PMC_MSK_FIXED_REF_CYCLES (1ULL << X86_PMC_IDX_FIXED_REF_CYCLES)
/*
* We model BTS tracing as another fixed-mode PMC.
@@ -202,6 +226,7 @@ struct perf_guest_switch_msr {
};
extern struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr);
+extern void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap);
#else
static inline perf_guest_switch_msr *perf_guest_get_msrs(int *nr)
{
@@ -209,6 +234,11 @@ static inline perf_guest_switch_msr *perf_guest_get_msrs(int *nr)
return NULL;
}
+static inline void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap)
+{
+ memset(cap, 0, sizeof(*cap));
+}
+
static inline void perf_events_lapic_init(void) { }
#endif
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 2bda212..5adce10 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -484,18 +484,195 @@ static inline int is_x86_event(struct perf_event *event)
return event->pmu == &pmu;
}
+/*
+ * Event scheduler state:
+ *
+ * Assign events iterating over all events and counters, beginning
+ * with events with least weights first. Keep the current iterator
+ * state in struct sched_state.
+ */
+struct sched_state {
+ int weight;
+ int event; /* event index */
+ int counter; /* counter index */
+ int unassigned; /* number of events to be assigned left */
+ unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+};
+
+/* Total max is X86_PMC_IDX_MAX, but we are O(n!) limited */
+#define SCHED_STATES_MAX 2
+
+struct perf_sched {
+ int max_weight;
+ int max_events;
+ struct event_constraint **constraints;
+ struct sched_state state;
+ int saved_states;
+ struct sched_state saved[SCHED_STATES_MAX];
+};
+
+/*
+ * Initialize interator that runs through all events and counters.
+ */
+static void perf_sched_init(struct perf_sched *sched, struct event_constraint **c,
+ int num, int wmin, int wmax)
+{
+ int idx;
+
+ memset(sched, 0, sizeof(*sched));
+ sched->max_events = num;
+ sched->max_weight = wmax;
+ sched->constraints = c;
+
+ for (idx = 0; idx < num; idx++) {
+ if (c[idx]->weight == wmin)
+ break;
+ }
+
+ sched->state.event = idx; /* start with min weight */
+ sched->state.weight = wmin;
+ sched->state.unassigned = num;
+}
+
+static void perf_sched_save_state(struct perf_sched *sched)
+{
+ if (WARN_ON_ONCE(sched->saved_states >= SCHED_STATES_MAX))
+ return;
+
+ sched->saved[sched->saved_states] = sched->state;
+ sched->saved_states++;
+}
+
+static bool perf_sched_restore_state(struct perf_sched *sched)
+{
+ if (!sched->saved_states)
+ return false;
+
+ sched->saved_states--;
+ sched->state = sched->saved[sched->saved_states];
+
+ /* continue with next counter: */
+ clear_bit(sched->state.counter++, sched->state.used);
+
+ return true;
+}
+
+/*
+ * Select a counter for the current event to schedule. Return true on
+ * success.
+ */
+static bool __perf_sched_find_counter(struct perf_sched *sched)
+{
+ struct event_constraint *c;
+ int idx;
+
+ if (!sched->state.unassigned)
+ return false;
+
+ if (sched->state.event >= sched->max_events)
+ return false;
+
+ c = sched->constraints[sched->state.event];
+
+ /* Prefer fixed purpose counters */
+ if (x86_pmu.num_counters_fixed) {
+ idx = X86_PMC_IDX_FIXED;
+ for_each_set_bit_cont(idx, c->idxmsk, X86_PMC_IDX_MAX) {
+ if (!__test_and_set_bit(idx, sched->state.used))
+ goto done;
+ }
+ }
+ /* Grab the first unused counter starting with idx */
+ idx = sched->state.counter;
+ for_each_set_bit_cont(idx, c->idxmsk, X86_PMC_IDX_FIXED) {
+ if (!__test_and_set_bit(idx, sched->state.used))
+ goto done;
+ }
+
+ return false;
+
+done:
+ sched->state.counter = idx;
+
+ if (c->overlap)
+ perf_sched_save_state(sched);
+
+ return true;
+}
+
+static bool perf_sched_find_counter(struct perf_sched *sched)
+{
+ while (!__perf_sched_find_counter(sched)) {
+ if (!perf_sched_restore_state(sched))
+ return false;
+ }
+
+ return true;
+}
+
+/*
+ * Go through all unassigned events and find the next one to schedule.
+ * Take events with the least weight first. Return true on success.
+ */
+static bool perf_sched_next_event(struct perf_sched *sched)
+{
+ struct event_constraint *c;
+
+ if (!sched->state.unassigned || !--sched->state.unassigned)
+ return false;
+
+ do {
+ /* next event */
+ sched->state.event++;
+ if (sched->state.event >= sched->max_events) {
+ /* next weight */
+ sched->state.event = 0;
+ sched->state.weight++;
+ if (sched->state.weight > sched->max_weight)
+ return false;
+ }
+ c = sched->constraints[sched->state.event];
+ } while (c->weight != sched->state.weight);
+
+ sched->state.counter = 0; /* start with first counter */
+
+ return true;
+}
+
+/*
+ * Assign a counter for each event.
+ */
+static int perf_assign_events(struct event_constraint **constraints, int n,
+ int wmin, int wmax, int *assign)
+{
+ struct perf_sched sched;
+
+ perf_sched_init(&sched, constraints, n, wmin, wmax);
+
+ do {
+ if (!perf_sched_find_counter(&sched))
+ break; /* failed */
+ if (assign)
+ assign[sched.state.event] = sched.state.counter;
+ } while (perf_sched_next_event(&sched));
+
+ return sched.state.unassigned;
+}
+
int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
{
struct event_constraint *c, *constraints[X86_PMC_IDX_MAX];
unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
- int i, j, w, wmax, num = 0;
+ int i, wmin, wmax, num = 0;
struct hw_perf_event *hwc;
bitmap_zero(used_mask, X86_PMC_IDX_MAX);
- for (i = 0; i < n; i++) {
+ for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) {
c = x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]);
constraints[i] = c;
+ wmin = min(wmin, c->weight);
+ wmax = max(wmax, c->weight);
}
/*
@@ -521,60 +698,12 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
if (assign)
assign[i] = hwc->idx;
}
- if (i == n)
- goto done;
-
- /*
- * begin slow path
- */
-
- bitmap_zero(used_mask, X86_PMC_IDX_MAX);
- /*
- * weight = number of possible counters
- *
- * 1 = most constrained, only works on one counter
- * wmax = least constrained, works on any counter
- *
- * assign events to counters starting with most
- * constrained events.
- */
- wmax = x86_pmu.num_counters;
+ /* slow path */
+ if (i != n)
+ num = perf_assign_events(constraints, n, wmin, wmax, assign);
/*
- * when fixed event counters are present,
- * wmax is incremented by 1 to account
- * for one more choice
- */
- if (x86_pmu.num_counters_fixed)
- wmax++;
-
- for (w = 1, num = n; num && w <= wmax; w++) {
- /* for each event */
- for (i = 0; num && i < n; i++) {
- c = constraints[i];
- hwc = &cpuc->event_list[i]->hw;
-
- if (c->weight != w)
- continue;
-
- for_each_set_bit(j, c->idxmsk, X86_PMC_IDX_MAX) {
- if (!test_bit(j, used_mask))
- break;
- }
-
- if (j == X86_PMC_IDX_MAX)
- break;
-
- __set_bit(j, used_mask);
-
- if (assign)
- assign[i] = j;
- num--;
- }
- }
-done:
- /*
* scheduling failed or is just a simulation,
* free resources if necessary
*/
@@ -1119,6 +1248,7 @@ static void __init pmu_check_apic(void)
static int __init init_hw_perf_events(void)
{
+ struct x86_pmu_quirk *quirk;
struct event_constraint *c;
int err;
@@ -1147,8 +1277,8 @@ static int __init init_hw_perf_events(void)
pr_cont("%s PMU driver.\n", x86_pmu.name);
- if (x86_pmu.quirks)
- x86_pmu.quirks();
+ for (quirk = x86_pmu.quirks; quirk; quirk = quirk->next)
+ quirk->func();
if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) {
WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!",
@@ -1171,12 +1301,18 @@ static int __init init_hw_perf_events(void)
unconstrained = (struct event_constraint)
__EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1,
- 0, x86_pmu.num_counters);
+ 0, x86_pmu.num_counters, 0);
if (x86_pmu.event_constraints) {
+ /*
+ * event on fixed counter2 (REF_CYCLES) only works on this
+ * counter, so do not extend mask to generic counters
+ */
for_each_event_constraint(c, x86_pmu.event_constraints) {
- if (c->cmask != X86_RAW_EVENT_MASK)
+ if (c->cmask != X86_RAW_EVENT_MASK
+ || c->idxmsk64 == X86_PMC_MSK_FIXED_REF_CYCLES) {
continue;
+ }
c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1;
c->weight += x86_pmu.num_counters;
@@ -1566,3 +1702,15 @@ unsigned long perf_misc_flags(struct pt_regs *regs)
return misc;
}
+
+void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap)
+{
+ cap->version = x86_pmu.version;
+ cap->num_counters_gp = x86_pmu.num_counters;
+ cap->num_counters_fixed = x86_pmu.num_counters_fixed;
+ cap->bit_width_gp = x86_pmu.cntval_bits;
+ cap->bit_width_fixed = x86_pmu.cntval_bits;
+ cap->events_mask = (unsigned int)x86_pmu.events_maskl;
+ cap->events_mask_len = x86_pmu.events_mask_len;
+}
+EXPORT_SYMBOL_GPL(perf_get_x86_pmu_capability);
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index b9698d4..8944062 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -45,6 +45,7 @@ struct event_constraint {
u64 code;
u64 cmask;
int weight;
+ int overlap;
};
struct amd_nb {
@@ -151,15 +152,40 @@ struct cpu_hw_events {
void *kfree_on_online;
};
-#define __EVENT_CONSTRAINT(c, n, m, w) {\
+#define __EVENT_CONSTRAINT(c, n, m, w, o) {\
{ .idxmsk64 = (n) }, \
.code = (c), \
.cmask = (m), \
.weight = (w), \
+ .overlap = (o), \
}
#define EVENT_CONSTRAINT(c, n, m) \
- __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n))
+ __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 0)
+
+/*
+ * The overlap flag marks event constraints with overlapping counter
+ * masks. This is the case if the counter mask of such an event is not
+ * a subset of any other counter mask of a constraint with an equal or
+ * higher weight, e.g.:
+ *
+ * c_overlaps = EVENT_CONSTRAINT_OVERLAP(0, 0x09, 0);
+ * c_another1 = EVENT_CONSTRAINT(0, 0x07, 0);
+ * c_another2 = EVENT_CONSTRAINT(0, 0x38, 0);
+ *
+ * The event scheduler may not select the correct counter in the first
+ * cycle because it needs to know which subsequent events will be
+ * scheduled. It may fail to schedule the events then. So we set the
+ * overlap flag for such constraints to give the scheduler a hint which
+ * events to select for counter rescheduling.
+ *
+ * Care must be taken as the rescheduling algorithm is O(n!) which
+ * will increase scheduling cycles for an over-commited system
+ * dramatically. The number of such EVENT_CONSTRAINT_OVERLAP() macros
+ * and its counter masks must be kept at a minimum.
+ */
+#define EVENT_CONSTRAINT_OVERLAP(c, n, m) \
+ __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 1)
/*
* Constraint on the Event code.
@@ -235,6 +261,11 @@ union perf_capabilities {
u64 capabilities;
};
+struct x86_pmu_quirk {
+ struct x86_pmu_quirk *next;
+ void (*func)(void);
+};
+
/*
* struct x86_pmu - generic x86 pmu
*/
@@ -259,6 +290,11 @@ struct x86_pmu {
int num_counters_fixed;
int cntval_bits;
u64 cntval_mask;
+ union {
+ unsigned long events_maskl;
+ unsigned long events_mask[BITS_TO_LONGS(ARCH_PERFMON_EVENTS_COUNT)];
+ };
+ int events_mask_len;
int apic;
u64 max_period;
struct event_constraint *
@@ -268,7 +304,7 @@ struct x86_pmu {
void (*put_event_constraints)(struct cpu_hw_events *cpuc,
struct perf_event *event);
struct event_constraint *event_constraints;
- void (*quirks)(void);
+ struct x86_pmu_quirk *quirks;
int perfctr_second_write;
int (*cpu_prepare)(int cpu);
@@ -309,6 +345,15 @@ struct x86_pmu {
struct perf_guest_switch_msr *(*guest_get_msrs)(int *nr);
};
+#define x86_add_quirk(func_) \
+do { \
+ static struct x86_pmu_quirk __quirk __initdata = { \
+ .func = func_, \
+ }; \
+ __quirk.next = x86_pmu.quirks; \
+ x86_pmu.quirks = &__quirk; \
+} while (0)
+
#define ERF_NO_HT_SHARING 1
#define ERF_HAS_RSP_1 2
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index aeefd45..0397b23 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -492,7 +492,7 @@ static __initconst const struct x86_pmu amd_pmu = {
static struct event_constraint amd_f15_PMC0 = EVENT_CONSTRAINT(0, 0x01, 0);
static struct event_constraint amd_f15_PMC20 = EVENT_CONSTRAINT(0, 0x07, 0);
static struct event_constraint amd_f15_PMC3 = EVENT_CONSTRAINT(0, 0x08, 0);
-static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT(0, 0x09, 0);
+static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT_OVERLAP(0, 0x09, 0);
static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0);
static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0);
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 121f1be..3bd37bd 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -28,6 +28,7 @@ static u64 intel_perfmon_event_map[PERF_COUNT_HW_MAX] __read_mostly =
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
[PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
[PERF_COUNT_HW_BUS_CYCLES] = 0x013c,
+ [PERF_COUNT_HW_REF_CPU_CYCLES] = 0x0300, /* pseudo-encoding */
};
static struct event_constraint intel_core_event_constraints[] __read_mostly =
@@ -45,12 +46,7 @@ static struct event_constraint intel_core2_event_constraints[] __read_mostly =
{
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
- /*
- * Core2 has Fixed Counter 2 listed as CPU_CLK_UNHALTED.REF and event
- * 0x013c as CPU_CLK_UNHALTED.BUS and specifies there is a fixed
- * ratio between these counters.
- */
- /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
+ FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
@@ -68,7 +64,7 @@ static struct event_constraint intel_nehalem_event_constraints[] __read_mostly =
{
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
- /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
+ FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */
INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */
INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */
@@ -90,7 +86,7 @@ static struct event_constraint intel_westmere_event_constraints[] __read_mostly
{
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
- /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
+ FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */
INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */
@@ -102,7 +98,7 @@ static struct event_constraint intel_snb_event_constraints[] __read_mostly =
{
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
- /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
+ FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.PENDING */
INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
@@ -125,7 +121,7 @@ static struct event_constraint intel_gen_event_constraints[] __read_mostly =
{
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
- /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
+ FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
EVENT_CONSTRAINT_END
};
@@ -1519,7 +1515,7 @@ static __initconst const struct x86_pmu intel_pmu = {
.guest_get_msrs = intel_guest_get_msrs,
};
-static void intel_clovertown_quirks(void)
+static __init void intel_clovertown_quirk(void)
{
/*
* PEBS is unreliable due to:
@@ -1545,19 +1541,60 @@ static void intel_clovertown_quirks(void)
x86_pmu.pebs_constraints = NULL;
}
-static void intel_sandybridge_quirks(void)
+static __init void intel_sandybridge_quirk(void)
{
printk(KERN_WARNING "PEBS disabled due to CPU errata.\n");
x86_pmu.pebs = 0;
x86_pmu.pebs_constraints = NULL;
}
+static const struct { int id; char *name; } intel_arch_events_map[] __initconst = {
+ { PERF_COUNT_HW_CPU_CYCLES, "cpu cycles" },
+ { PERF_COUNT_HW_INSTRUCTIONS, "instructions" },
+ { PERF_COUNT_HW_BUS_CYCLES, "bus cycles" },
+ { PERF_COUNT_HW_CACHE_REFERENCES, "cache references" },
+ { PERF_COUNT_HW_CACHE_MISSES, "cache misses" },
+ { PERF_COUNT_HW_BRANCH_INSTRUCTIONS, "branch instructions" },
+ { PERF_COUNT_HW_BRANCH_MISSES, "branch misses" },
+};
+
+static __init void intel_arch_events_quirk(void)
+{
+ int bit;
+
+ /* disable event that reported as not presend by cpuid */
+ for_each_set_bit(bit, x86_pmu.events_mask, ARRAY_SIZE(intel_arch_events_map)) {
+ intel_perfmon_event_map[intel_arch_events_map[bit].id] = 0;
+ printk(KERN_WARNING "CPUID marked event: \'%s\' unavailable\n",
+ intel_arch_events_map[bit].name);
+ }
+}
+
+static __init void intel_nehalem_quirk(void)
+{
+ union cpuid10_ebx ebx;
+
+ ebx.full = x86_pmu.events_maskl;
+ if (ebx.split.no_branch_misses_retired) {
+ /*
+ * Erratum AAJ80 detected, we work it around by using
+ * the BR_MISP_EXEC.ANY event. This will over-count
+ * branch-misses, but it's still much better than the
+ * architectural event which is often completely bogus:
+ */
+ intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x7f89;
+ ebx.split.no_branch_misses_retired = 0;
+ x86_pmu.events_maskl = ebx.full;
+ printk(KERN_INFO "CPU erratum AAJ80 worked around\n");
+ }
+}
+
__init int intel_pmu_init(void)
{
union cpuid10_edx edx;
union cpuid10_eax eax;
+ union cpuid10_ebx ebx;
unsigned int unused;
- unsigned int ebx;
int version;
if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
@@ -1574,8 +1611,8 @@ __init int intel_pmu_init(void)
* Check whether the Architectural PerfMon supports
* Branch Misses Retired hw_event or not.
*/
- cpuid(10, &eax.full, &ebx, &unused, &edx.full);
- if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED)
+ cpuid(10, &eax.full, &ebx.full, &unused, &edx.full);
+ if (eax.split.mask_length < ARCH_PERFMON_EVENTS_COUNT)
return -ENODEV;
version = eax.split.version_id;
@@ -1589,6 +1626,9 @@ __init int intel_pmu_init(void)
x86_pmu.cntval_bits = eax.split.bit_width;
x86_pmu.cntval_mask = (1ULL << eax.split.bit_width) - 1;
+ x86_pmu.events_maskl = ebx.full;
+ x86_pmu.events_mask_len = eax.split.mask_length;
+
/*
* Quirk: v2 perfmon does not report fixed-purpose events, so
* assume at least 3 events:
@@ -1608,6 +1648,8 @@ __init int intel_pmu_init(void)
intel_ds_init();
+ x86_add_quirk(intel_arch_events_quirk); /* Install first, so it runs last */
+
/*
* Install the hw-cache-events table:
*/
@@ -1617,7 +1659,7 @@ __init int intel_pmu_init(void)
break;
case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */
- x86_pmu.quirks = intel_clovertown_quirks;
+ x86_add_quirk(intel_clovertown_quirk);
case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */
case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */
case 29: /* six-core 45 nm xeon "Dunnington" */
@@ -1651,17 +1693,8 @@ __init int intel_pmu_init(void)
/* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */
intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x1803fb1;
- if (ebx & 0x40) {
- /*
- * Erratum AAJ80 detected, we work it around by using
- * the BR_MISP_EXEC.ANY event. This will over-count
- * branch-misses, but it's still much better than the
- * architectural event which is often completely bogus:
- */
- intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x7f89;
+ x86_add_quirk(intel_nehalem_quirk);
- pr_cont("erratum AAJ80 worked around, ");
- }
pr_cont("Nehalem events, ");
break;
@@ -1701,7 +1734,7 @@ __init int intel_pmu_init(void)
break;
case 42: /* SandyBridge */
- x86_pmu.quirks = intel_sandybridge_quirks;
+ x86_add_quirk(intel_sandybridge_quirk);
case 45: /* SandyBridge, "Romely-EP" */
memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
@@ -1738,5 +1771,6 @@ __init int intel_pmu_init(void)
break;
}
}
+
return 0;
}
diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
index ea9d5f2f..2889b3d 100644
--- a/arch/x86/kernel/jump_label.c
+++ b/arch/x86/kernel/jump_label.c
@@ -50,7 +50,7 @@ void arch_jump_label_transform(struct jump_entry *entry,
put_online_cpus();
}
-void arch_jump_label_transform_static(struct jump_entry *entry,
+__init_or_module void arch_jump_label_transform_static(struct jump_entry *entry,
enum jump_label_type type)
{
__jump_label_transform(entry, type, text_poke_early);
diff --git a/arch/x86/lib/inat.c b/arch/x86/lib/inat.c
index 46fc4ee..88ad5fb 100644
--- a/arch/x86/lib/inat.c
+++ b/arch/x86/lib/inat.c
@@ -82,9 +82,16 @@ insn_attr_t inat_get_avx_attribute(insn_byte_t opcode, insn_byte_t vex_m,
const insn_attr_t *table;
if (vex_m > X86_VEX_M_MAX || vex_p > INAT_LSTPFX_MAX)
return 0;
- table = inat_avx_tables[vex_m][vex_p];
+ /* At first, this checks the master table */
+ table = inat_avx_tables[vex_m][0];
if (!table)
return 0;
+ if (!inat_is_group(table[opcode]) && vex_p) {
+ /* If this is not a group, get attribute directly */
+ table = inat_avx_tables[vex_m][vex_p];
+ if (!table)
+ return 0;
+ }
return table[opcode];
}
diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c
index 374562e..5a1f9f3 100644
--- a/arch/x86/lib/insn.c
+++ b/arch/x86/lib/insn.c
@@ -202,7 +202,7 @@ void insn_get_opcode(struct insn *insn)
m = insn_vex_m_bits(insn);
p = insn_vex_p_bits(insn);
insn->attr = inat_get_avx_attribute(op, m, p);
- if (!inat_accept_vex(insn->attr))
+ if (!inat_accept_vex(insn->attr) && !inat_is_group(insn->attr))
insn->attr = 0; /* This instruction is bad */
goto end; /* VEX has only 1 byte for opcode */
}
@@ -249,6 +249,8 @@ void insn_get_modrm(struct insn *insn)
pfx = insn_last_prefix(insn);
insn->attr = inat_get_group_attribute(mod, pfx,
insn->attr);
+ if (insn_is_avx(insn) && !inat_accept_vex(insn->attr))
+ insn->attr = 0; /* This is bad */
}
}
diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt
index a793da5..5b83c51 100644
--- a/arch/x86/lib/x86-opcode-map.txt
+++ b/arch/x86/lib/x86-opcode-map.txt
@@ -1,5 +1,11 @@
# x86 Opcode Maps
#
+# This is (mostly) based on following documentations.
+# - Intel(R) 64 and IA-32 Architectures Software Developer's Manual Vol.2
+# (#325383-040US, October 2011)
+# - Intel(R) Advanced Vector Extensions Programming Reference
+# (#319433-011,JUNE 2011).
+#
#<Opcode maps>
# Table: table-name
# Referrer: escaped-name
@@ -15,10 +21,13 @@
# EndTable
#
# AVX Superscripts
-# (VEX): this opcode can accept VEX prefix.
-# (oVEX): this opcode requires VEX prefix.
-# (o128): this opcode only supports 128bit VEX.
-# (o256): this opcode only supports 256bit VEX.
+# (v): this opcode requires VEX prefix.
+# (v1): this opcode only supports 128bit VEX.
+#
+# Last Prefix Superscripts
+# - (66): the last prefix is 0x66
+# - (F3): the last prefix is 0xF3
+# - (F2): the last prefix is 0xF2
#
Table: one byte opcode
@@ -199,8 +208,8 @@ a0: MOV AL,Ob
a1: MOV rAX,Ov
a2: MOV Ob,AL
a3: MOV Ov,rAX
-a4: MOVS/B Xb,Yb
-a5: MOVS/W/D/Q Xv,Yv
+a4: MOVS/B Yb,Xb
+a5: MOVS/W/D/Q Yv,Xv
a6: CMPS/B Xb,Yb
a7: CMPS/W/D Xv,Yv
a8: TEST AL,Ib
@@ -233,8 +242,8 @@ c0: Grp2 Eb,Ib (1A)
c1: Grp2 Ev,Ib (1A)
c2: RETN Iw (f64)
c3: RETN
-c4: LES Gz,Mp (i64) | 3bytes-VEX (Prefix)
-c5: LDS Gz,Mp (i64) | 2bytes-VEX (Prefix)
+c4: LES Gz,Mp (i64) | VEX+2byte (Prefix)
+c5: LDS Gz,Mp (i64) | VEX+1byte (Prefix)
c6: Grp11 Eb,Ib (1A)
c7: Grp11 Ev,Iz (1A)
c8: ENTER Iw,Ib
@@ -320,14 +329,19 @@ AVXcode: 1
# 3DNow! uses the last imm byte as opcode extension.
0f: 3DNow! Pq,Qq,Ib
# 0x0f 0x10-0x1f
-10: movups Vps,Wps (VEX) | movss Vss,Wss (F3),(VEX),(o128) | movupd Vpd,Wpd (66),(VEX) | movsd Vsd,Wsd (F2),(VEX),(o128)
-11: movups Wps,Vps (VEX) | movss Wss,Vss (F3),(VEX),(o128) | movupd Wpd,Vpd (66),(VEX) | movsd Wsd,Vsd (F2),(VEX),(o128)
-12: movlps Vq,Mq (VEX),(o128) | movlpd Vq,Mq (66),(VEX),(o128) | movhlps Vq,Uq (VEX),(o128) | movddup Vq,Wq (F2),(VEX) | movsldup Vq,Wq (F3),(VEX)
-13: mpvlps Mq,Vq (VEX),(o128) | movlpd Mq,Vq (66),(VEX),(o128)
-14: unpcklps Vps,Wq (VEX) | unpcklpd Vpd,Wq (66),(VEX)
-15: unpckhps Vps,Wq (VEX) | unpckhpd Vpd,Wq (66),(VEX)
-16: movhps Vq,Mq (VEX),(o128) | movhpd Vq,Mq (66),(VEX),(o128) | movlsps Vq,Uq (VEX),(o128) | movshdup Vq,Wq (F3),(VEX)
-17: movhps Mq,Vq (VEX),(o128) | movhpd Mq,Vq (66),(VEX),(o128)
+# NOTE: According to Intel SDM opcode map, vmovups and vmovupd has no operands
+# but it actually has operands. And also, vmovss and vmovsd only accept 128bit.
+# MOVSS/MOVSD has too many forms(3) on SDM. This map just shows a typical form.
+# Many AVX instructions lack v1 superscript, according to Intel AVX-Prgramming
+# Reference A.1
+10: vmovups Vps,Wps | vmovupd Vpd,Wpd (66) | vmovss Vx,Hx,Wss (F3),(v1) | vmovsd Vx,Hx,Wsd (F2),(v1)
+11: vmovups Wps,Vps | vmovupd Wpd,Vpd (66) | vmovss Wss,Hx,Vss (F3),(v1) | vmovsd Wsd,Hx,Vsd (F2),(v1)
+12: vmovlps Vq,Hq,Mq (v1) | vmovhlps Vq,Hq,Uq (v1) | vmovlpd Vq,Hq,Mq (66),(v1) | vmovsldup Vx,Wx (F3) | vmovddup Vx,Wx (F2)
+13: vmovlps Mq,Vq (v1) | vmovlpd Mq,Vq (66),(v1)
+14: vunpcklps Vx,Hx,Wx | vunpcklpd Vx,Hx,Wx (66)
+15: vunpckhps Vx,Hx,Wx | vunpckhpd Vx,Hx,Wx (66)
+16: vmovhps Vdq,Hq,Mq (v1) | vmovlhps Vdq,Hq,Uq (v1) | vmovhpd Vdq,Hq,Mq (66),(v1) | vmovshdup Vx,Wx (F3)
+17: vmovhps Mq,Vq (v1) | vmovhpd Mq,Vq (66),(v1)
18: Grp16 (1A)
19:
1a:
@@ -345,14 +359,14 @@ AVXcode: 1
25:
26:
27:
-28: movaps Vps,Wps (VEX) | movapd Vpd,Wpd (66),(VEX)
-29: movaps Wps,Vps (VEX) | movapd Wpd,Vpd (66),(VEX)
-2a: cvtpi2ps Vps,Qpi | cvtsi2ss Vss,Ed/q (F3),(VEX),(o128) | cvtpi2pd Vpd,Qpi (66) | cvtsi2sd Vsd,Ed/q (F2),(VEX),(o128)
-2b: movntps Mps,Vps (VEX) | movntpd Mpd,Vpd (66),(VEX)
-2c: cvttps2pi Ppi,Wps | cvttss2si Gd/q,Wss (F3),(VEX),(o128) | cvttpd2pi Ppi,Wpd (66) | cvttsd2si Gd/q,Wsd (F2),(VEX),(o128)
-2d: cvtps2pi Ppi,Wps | cvtss2si Gd/q,Wss (F3),(VEX),(o128) | cvtpd2pi Qpi,Wpd (66) | cvtsd2si Gd/q,Wsd (F2),(VEX),(o128)
-2e: ucomiss Vss,Wss (VEX),(o128) | ucomisd Vsd,Wsd (66),(VEX),(o128)
-2f: comiss Vss,Wss (VEX),(o128) | comisd Vsd,Wsd (66),(VEX),(o128)
+28: vmovaps Vps,Wps | vmovapd Vpd,Wpd (66)
+29: vmovaps Wps,Vps | vmovapd Wpd,Vpd (66)
+2a: cvtpi2ps Vps,Qpi | cvtpi2pd Vpd,Qpi (66) | vcvtsi2ss Vss,Hss,Ey (F3),(v1) | vcvtsi2sd Vsd,Hsd,Ey (F2),(v1)
+2b: vmovntps Mps,Vps | vmovntpd Mpd,Vpd (66)
+2c: cvttps2pi Ppi,Wps | cvttpd2pi Ppi,Wpd (66) | vcvttss2si Gy,Wss (F3),(v1) | vcvttsd2si Gy,Wsd (F2),(v1)
+2d: cvtps2pi Ppi,Wps | cvtpd2pi Qpi,Wpd (66) | vcvtss2si Gy,Wss (F3),(v1) | vcvtsd2si Gy,Wsd (F2),(v1)
+2e: vucomiss Vss,Wss (v1) | vucomisd Vsd,Wsd (66),(v1)
+2f: vcomiss Vss,Wss (v1) | vcomisd Vsd,Wsd (66),(v1)
# 0x0f 0x30-0x3f
30: WRMSR
31: RDTSC
@@ -388,65 +402,66 @@ AVXcode: 1
4e: CMOVLE/NG Gv,Ev
4f: CMOVNLE/G Gv,Ev
# 0x0f 0x50-0x5f
-50: movmskps Gd/q,Ups (VEX) | movmskpd Gd/q,Upd (66),(VEX)
-51: sqrtps Vps,Wps (VEX) | sqrtss Vss,Wss (F3),(VEX),(o128) | sqrtpd Vpd,Wpd (66),(VEX) | sqrtsd Vsd,Wsd (F2),(VEX),(o128)
-52: rsqrtps Vps,Wps (VEX) | rsqrtss Vss,Wss (F3),(VEX),(o128)
-53: rcpps Vps,Wps (VEX) | rcpss Vss,Wss (F3),(VEX),(o128)
-54: andps Vps,Wps (VEX) | andpd Vpd,Wpd (66),(VEX)
-55: andnps Vps,Wps (VEX) | andnpd Vpd,Wpd (66),(VEX)
-56: orps Vps,Wps (VEX) | orpd Vpd,Wpd (66),(VEX)
-57: xorps Vps,Wps (VEX) | xorpd Vpd,Wpd (66),(VEX)
-58: addps Vps,Wps (VEX) | addss Vss,Wss (F3),(VEX),(o128) | addpd Vpd,Wpd (66),(VEX) | addsd Vsd,Wsd (F2),(VEX),(o128)
-59: mulps Vps,Wps (VEX) | mulss Vss,Wss (F3),(VEX),(o128) | mulpd Vpd,Wpd (66),(VEX) | mulsd Vsd,Wsd (F2),(VEX),(o128)
-5a: cvtps2pd Vpd,Wps (VEX) | cvtss2sd Vsd,Wss (F3),(VEX),(o128) | cvtpd2ps Vps,Wpd (66),(VEX) | cvtsd2ss Vsd,Wsd (F2),(VEX),(o128)
-5b: cvtdq2ps Vps,Wdq (VEX) | cvtps2dq Vdq,Wps (66),(VEX) | cvttps2dq Vdq,Wps (F3),(VEX)
-5c: subps Vps,Wps (VEX) | subss Vss,Wss (F3),(VEX),(o128) | subpd Vpd,Wpd (66),(VEX) | subsd Vsd,Wsd (F2),(VEX),(o128)
-5d: minps Vps,Wps (VEX) | minss Vss,Wss (F3),(VEX),(o128) | minpd Vpd,Wpd (66),(VEX) | minsd Vsd,Wsd (F2),(VEX),(o128)
-5e: divps Vps,Wps (VEX) | divss Vss,Wss (F3),(VEX),(o128) | divpd Vpd,Wpd (66),(VEX) | divsd Vsd,Wsd (F2),(VEX),(o128)
-5f: maxps Vps,Wps (VEX) | maxss Vss,Wss (F3),(VEX),(o128) | maxpd Vpd,Wpd (66),(VEX) | maxsd Vsd,Wsd (F2),(VEX),(o128)
+50: vmovmskps Gy,Ups | vmovmskpd Gy,Upd (66)
+51: vsqrtps Vps,Wps | vsqrtpd Vpd,Wpd (66) | vsqrtss Vss,Hss,Wss (F3),(v1) | vsqrtsd Vsd,Hsd,Wsd (F2),(v1)
+52: vrsqrtps Vps,Wps | vrsqrtss Vss,Hss,Wss (F3),(v1)
+53: vrcpps Vps,Wps | vrcpss Vss,Hss,Wss (F3),(v1)
+54: vandps Vps,Hps,Wps | vandpd Vpd,Hpd,Wpd (66)
+55: vandnps Vps,Hps,Wps | vandnpd Vpd,Hpd,Wpd (66)
+56: vorps Vps,Hps,Wps | vorpd Vpd,Hpd,Wpd (66)
+57: vxorps Vps,Hps,Wps | vxorpd Vpd,Hpd,Wpd (66)
+58: vaddps Vps,Hps,Wps | vaddpd Vpd,Hpd,Wpd (66) | vaddss Vss,Hss,Wss (F3),(v1) | vaddsd Vsd,Hsd,Wsd (F2),(v1)
+59: vmulps Vps,Hps,Wps | vmulpd Vpd,Hpd,Wpd (66) | vmulss Vss,Hss,Wss (F3),(v1) | vmulsd Vsd,Hsd,Wsd (F2),(v1)
+5a: vcvtps2pd Vpd,Wps | vcvtpd2ps Vps,Wpd (66) | vcvtss2sd Vsd,Hx,Wss (F3),(v1) | vcvtsd2ss Vss,Hx,Wsd (F2),(v1)
+5b: vcvtdq2ps Vps,Wdq | vcvtps2dq Vdq,Wps (66) | vcvttps2dq Vdq,Wps (F3)
+5c: vsubps Vps,Hps,Wps | vsubpd Vpd,Hpd,Wpd (66) | vsubss Vss,Hss,Wss (F3),(v1) | vsubsd Vsd,Hsd,Wsd (F2),(v1)
+5d: vminps Vps,Hps,Wps | vminpd Vpd,Hpd,Wpd (66) | vminss Vss,Hss,Wss (F3),(v1) | vminsd Vsd,Hsd,Wsd (F2),(v1)
+5e: vdivps Vps,Hps,Wps | vdivpd Vpd,Hpd,Wpd (66) | vdivss Vss,Hss,Wss (F3),(v1) | vdivsd Vsd,Hsd,Wsd (F2),(v1)
+5f: vmaxps Vps,Hps,Wps | vmaxpd Vpd,Hpd,Wpd (66) | vmaxss Vss,Hss,Wss (F3),(v1) | vmaxsd Vsd,Hsd,Wsd (F2),(v1)
# 0x0f 0x60-0x6f
-60: punpcklbw Pq,Qd | punpcklbw Vdq,Wdq (66),(VEX),(o128)
-61: punpcklwd Pq,Qd | punpcklwd Vdq,Wdq (66),(VEX),(o128)
-62: punpckldq Pq,Qd | punpckldq Vdq,Wdq (66),(VEX),(o128)
-63: packsswb Pq,Qq | packsswb Vdq,Wdq (66),(VEX),(o128)
-64: pcmpgtb Pq,Qq | pcmpgtb Vdq,Wdq (66),(VEX),(o128)
-65: pcmpgtw Pq,Qq | pcmpgtw Vdq,Wdq (66),(VEX),(o128)
-66: pcmpgtd Pq,Qq | pcmpgtd Vdq,Wdq (66),(VEX),(o128)
-67: packuswb Pq,Qq | packuswb Vdq,Wdq (66),(VEX),(o128)
-68: punpckhbw Pq,Qd | punpckhbw Vdq,Wdq (66),(VEX),(o128)
-69: punpckhwd Pq,Qd | punpckhwd Vdq,Wdq (66),(VEX),(o128)
-6a: punpckhdq Pq,Qd | punpckhdq Vdq,Wdq (66),(VEX),(o128)
-6b: packssdw Pq,Qd | packssdw Vdq,Wdq (66),(VEX),(o128)
-6c: punpcklqdq Vdq,Wdq (66),(VEX),(o128)
-6d: punpckhqdq Vdq,Wdq (66),(VEX),(o128)
-6e: movd/q/ Pd,Ed/q | movd/q Vdq,Ed/q (66),(VEX),(o128)
-6f: movq Pq,Qq | movdqa Vdq,Wdq (66),(VEX) | movdqu Vdq,Wdq (F3),(VEX)
+60: punpcklbw Pq,Qd | vpunpcklbw Vx,Hx,Wx (66),(v1)
+61: punpcklwd Pq,Qd | vpunpcklwd Vx,Hx,Wx (66),(v1)
+62: punpckldq Pq,Qd | vpunpckldq Vx,Hx,Wx (66),(v1)
+63: packsswb Pq,Qq | vpacksswb Vx,Hx,Wx (66),(v1)
+64: pcmpgtb Pq,Qq | vpcmpgtb Vx,Hx,Wx (66),(v1)
+65: pcmpgtw Pq,Qq | vpcmpgtw Vx,Hx,Wx (66),(v1)
+66: pcmpgtd Pq,Qq | vpcmpgtd Vx,Hx,Wx (66),(v1)
+67: packuswb Pq,Qq | vpackuswb Vx,Hx,Wx (66),(v1)
+68: punpckhbw Pq,Qd | vpunpckhbw Vx,Hx,Wx (66),(v1)
+69: punpckhwd Pq,Qd | vpunpckhwd Vx,Hx,Wx (66),(v1)
+6a: punpckhdq Pq,Qd | vpunpckhdq Vx,Hx,Wx (66),(v1)
+6b: packssdw Pq,Qd | vpackssdw Vx,Hx,Wx (66),(v1)
+6c: vpunpcklqdq Vx,Hx,Wx (66),(v1)
+6d: vpunpckhqdq Vx,Hx,Wx (66),(v1)
+6e: movd/q Pd,Ey | vmovd/q Vy,Ey (66),(v1)
+6f: movq Pq,Qq | vmovdqa Vx,Wx (66) | vmovdqu Vx,Wx (F3)
# 0x0f 0x70-0x7f
-70: pshufw Pq,Qq,Ib | pshufd Vdq,Wdq,Ib (66),(VEX),(o128) | pshufhw Vdq,Wdq,Ib (F3),(VEX),(o128) | pshuflw VdqWdq,Ib (F2),(VEX),(o128)
+70: pshufw Pq,Qq,Ib | vpshufd Vx,Wx,Ib (66),(v1) | vpshufhw Vx,Wx,Ib (F3),(v1) | vpshuflw Vx,Wx,Ib (F2),(v1)
71: Grp12 (1A)
72: Grp13 (1A)
73: Grp14 (1A)
-74: pcmpeqb Pq,Qq | pcmpeqb Vdq,Wdq (66),(VEX),(o128)
-75: pcmpeqw Pq,Qq | pcmpeqw Vdq,Wdq (66),(VEX),(o128)
-76: pcmpeqd Pq,Qq | pcmpeqd Vdq,Wdq (66),(VEX),(o128)
-77: emms/vzeroupper/vzeroall (VEX)
-78: VMREAD Ed/q,Gd/q
-79: VMWRITE Gd/q,Ed/q
+74: pcmpeqb Pq,Qq | vpcmpeqb Vx,Hx,Wx (66),(v1)
+75: pcmpeqw Pq,Qq | vpcmpeqw Vx,Hx,Wx (66),(v1)
+76: pcmpeqd Pq,Qq | vpcmpeqd Vx,Hx,Wx (66),(v1)
+# Note: Remove (v), because vzeroall and vzeroupper becomes emms without VEX.
+77: emms | vzeroupper | vzeroall
+78: VMREAD Ey,Gy
+79: VMWRITE Gy,Ey
7a:
7b:
-7c: haddps Vps,Wps (F2),(VEX) | haddpd Vpd,Wpd (66),(VEX)
-7d: hsubps Vps,Wps (F2),(VEX) | hsubpd Vpd,Wpd (66),(VEX)
-7e: movd/q Ed/q,Pd | movd/q Ed/q,Vdq (66),(VEX),(o128) | movq Vq,Wq (F3),(VEX),(o128)
-7f: movq Qq,Pq | movdqa Wdq,Vdq (66),(VEX) | movdqu Wdq,Vdq (F3),(VEX)
+7c: vhaddpd Vpd,Hpd,Wpd (66) | vhaddps Vps,Hps,Wps (F2)
+7d: vhsubpd Vpd,Hpd,Wpd (66) | vhsubps Vps,Hps,Wps (F2)
+7e: movd/q Ey,Pd | vmovd/q Ey,Vy (66),(v1) | vmovq Vq,Wq (F3),(v1)
+7f: movq Qq,Pq | vmovdqa Wx,Vx (66) | vmovdqu Wx,Vx (F3)
# 0x0f 0x80-0x8f
80: JO Jz (f64)
81: JNO Jz (f64)
-82: JB/JNAE/JC Jz (f64)
-83: JNB/JAE/JNC Jz (f64)
-84: JZ/JE Jz (f64)
-85: JNZ/JNE Jz (f64)
+82: JB/JC/JNAE Jz (f64)
+83: JAE/JNB/JNC Jz (f64)
+84: JE/JZ Jz (f64)
+85: JNE/JNZ Jz (f64)
86: JBE/JNA Jz (f64)
-87: JNBE/JA Jz (f64)
+87: JA/JNBE Jz (f64)
88: JS Jz (f64)
89: JNS Jz (f64)
8a: JP/JPE Jz (f64)
@@ -502,18 +517,18 @@ b8: JMPE | POPCNT Gv,Ev (F3)
b9: Grp10 (1A)
ba: Grp8 Ev,Ib (1A)
bb: BTC Ev,Gv
-bc: BSF Gv,Ev
-bd: BSR Gv,Ev
+bc: BSF Gv,Ev | TZCNT Gv,Ev (F3)
+bd: BSR Gv,Ev | LZCNT Gv,Ev (F3)
be: MOVSX Gv,Eb
bf: MOVSX Gv,Ew
# 0x0f 0xc0-0xcf
c0: XADD Eb,Gb
c1: XADD Ev,Gv
-c2: cmpps Vps,Wps,Ib (VEX) | cmpss Vss,Wss,Ib (F3),(VEX),(o128) | cmppd Vpd,Wpd,Ib (66),(VEX) | cmpsd Vsd,Wsd,Ib (F2),(VEX)
-c3: movnti Md/q,Gd/q
-c4: pinsrw Pq,Rd/q/Mw,Ib | pinsrw Vdq,Rd/q/Mw,Ib (66),(VEX),(o128)
-c5: pextrw Gd,Nq,Ib | pextrw Gd,Udq,Ib (66),(VEX),(o128)
-c6: shufps Vps,Wps,Ib (VEX) | shufpd Vpd,Wpd,Ib (66),(VEX)
+c2: vcmpps Vps,Hps,Wps,Ib | vcmppd Vpd,Hpd,Wpd,Ib (66) | vcmpss Vss,Hss,Wss,Ib (F3),(v1) | vcmpsd Vsd,Hsd,Wsd,Ib (F2),(v1)
+c3: movnti My,Gy
+c4: pinsrw Pq,Ry/Mw,Ib | vpinsrw Vdq,Hdq,Ry/Mw,Ib (66),(v1)
+c5: pextrw Gd,Nq,Ib | vpextrw Gd,Udq,Ib (66),(v1)
+c6: vshufps Vps,Hps,Wps,Ib | vshufpd Vpd,Hpd,Wpd,Ib (66)
c7: Grp9 (1A)
c8: BSWAP RAX/EAX/R8/R8D
c9: BSWAP RCX/ECX/R9/R9D
@@ -524,55 +539,55 @@ cd: BSWAP RBP/EBP/R13/R13D
ce: BSWAP RSI/ESI/R14/R14D
cf: BSWAP RDI/EDI/R15/R15D
# 0x0f 0xd0-0xdf
-d0: addsubps Vps,Wps (F2),(VEX) | addsubpd Vpd,Wpd (66),(VEX)
-d1: psrlw Pq,Qq | psrlw Vdq,Wdq (66),(VEX),(o128)
-d2: psrld Pq,Qq | psrld Vdq,Wdq (66),(VEX),(o128)
-d3: psrlq Pq,Qq | psrlq Vdq,Wdq (66),(VEX),(o128)
-d4: paddq Pq,Qq | paddq Vdq,Wdq (66),(VEX),(o128)
-d5: pmullw Pq,Qq | pmullw Vdq,Wdq (66),(VEX),(o128)
-d6: movq Wq,Vq (66),(VEX),(o128) | movq2dq Vdq,Nq (F3) | movdq2q Pq,Uq (F2)
-d7: pmovmskb Gd,Nq | pmovmskb Gd,Udq (66),(VEX),(o128)
-d8: psubusb Pq,Qq | psubusb Vdq,Wdq (66),(VEX),(o128)
-d9: psubusw Pq,Qq | psubusw Vdq,Wdq (66),(VEX),(o128)
-da: pminub Pq,Qq | pminub Vdq,Wdq (66),(VEX),(o128)
-db: pand Pq,Qq | pand Vdq,Wdq (66),(VEX),(o128)
-dc: paddusb Pq,Qq | paddusb Vdq,Wdq (66),(VEX),(o128)
-dd: paddusw Pq,Qq | paddusw Vdq,Wdq (66),(VEX),(o128)
-de: pmaxub Pq,Qq | pmaxub Vdq,Wdq (66),(VEX),(o128)
-df: pandn Pq,Qq | pandn Vdq,Wdq (66),(VEX),(o128)
+d0: vaddsubpd Vpd,Hpd,Wpd (66) | vaddsubps Vps,Hps,Wps (F2)
+d1: psrlw Pq,Qq | vpsrlw Vx,Hx,Wx (66),(v1)
+d2: psrld Pq,Qq | vpsrld Vx,Hx,Wx (66),(v1)
+d3: psrlq Pq,Qq | vpsrlq Vx,Hx,Wx (66),(v1)
+d4: paddq Pq,Qq | vpaddq Vx,Hx,Wx (66),(v1)
+d5: pmullw Pq,Qq | vpmullw Vx,Hx,Wx (66),(v1)
+d6: vmovq Wq,Vq (66),(v1) | movq2dq Vdq,Nq (F3) | movdq2q Pq,Uq (F2)
+d7: pmovmskb Gd,Nq | vpmovmskb Gd,Ux (66),(v1)
+d8: psubusb Pq,Qq | vpsubusb Vx,Hx,Wx (66),(v1)
+d9: psubusw Pq,Qq | vpsubusw Vx,Hx,Wx (66),(v1)
+da: pminub Pq,Qq | vpminub Vx,Hx,Wx (66),(v1)
+db: pand Pq,Qq | vpand Vx,Hx,Wx (66),(v1)
+dc: paddusb Pq,Qq | vpaddusb Vx,Hx,Wx (66),(v1)
+dd: paddusw Pq,Qq | vpaddusw Vx,Hx,Wx (66),(v1)
+de: pmaxub Pq,Qq | vpmaxub Vx,Hx,Wx (66),(v1)
+df: pandn Pq,Qq | vpandn Vx,Hx,Wx (66),(v1)
# 0x0f 0xe0-0xef
-e0: pavgb Pq,Qq | pavgb Vdq,Wdq (66),(VEX),(o128)
-e1: psraw Pq,Qq | psraw Vdq,Wdq (66),(VEX),(o128)
-e2: psrad Pq,Qq | psrad Vdq,Wdq (66),(VEX),(o128)
-e3: pavgw Pq,Qq | pavgw Vdq,Wdq (66),(VEX),(o128)
-e4: pmulhuw Pq,Qq | pmulhuw Vdq,Wdq (66),(VEX),(o128)
-e5: pmulhw Pq,Qq | pmulhw Vdq,Wdq (66),(VEX),(o128)
-e6: cvtpd2dq Vdq,Wpd (F2),(VEX) | cvttpd2dq Vdq,Wpd (66),(VEX) | cvtdq2pd Vpd,Wdq (F3),(VEX)
-e7: movntq Mq,Pq | movntdq Mdq,Vdq (66),(VEX)
-e8: psubsb Pq,Qq | psubsb Vdq,Wdq (66),(VEX),(o128)
-e9: psubsw Pq,Qq | psubsw Vdq,Wdq (66),(VEX),(o128)
-ea: pminsw Pq,Qq | pminsw Vdq,Wdq (66),(VEX),(o128)
-eb: por Pq,Qq | por Vdq,Wdq (66),(VEX),(o128)
-ec: paddsb Pq,Qq | paddsb Vdq,Wdq (66),(VEX),(o128)
-ed: paddsw Pq,Qq | paddsw Vdq,Wdq (66),(VEX),(o128)
-ee: pmaxsw Pq,Qq | pmaxsw Vdq,Wdq (66),(VEX),(o128)
-ef: pxor Pq,Qq | pxor Vdq,Wdq (66),(VEX),(o128)
+e0: pavgb Pq,Qq | vpavgb Vx,Hx,Wx (66),(v1)
+e1: psraw Pq,Qq | vpsraw Vx,Hx,Wx (66),(v1)
+e2: psrad Pq,Qq | vpsrad Vx,Hx,Wx (66),(v1)
+e3: pavgw Pq,Qq | vpavgw Vx,Hx,Wx (66),(v1)
+e4: pmulhuw Pq,Qq | vpmulhuw Vx,Hx,Wx (66),(v1)
+e5: pmulhw Pq,Qq | vpmulhw Vx,Hx,Wx (66),(v1)
+e6: vcvttpd2dq Vx,Wpd (66) | vcvtdq2pd Vx,Wdq (F3) | vcvtpd2dq Vx,Wpd (F2)
+e7: movntq Mq,Pq | vmovntdq Mx,Vx (66)
+e8: psubsb Pq,Qq | vpsubsb Vx,Hx,Wx (66),(v1)
+e9: psubsw Pq,Qq | vpsubsw Vx,Hx,Wx (66),(v1)
+ea: pminsw Pq,Qq | vpminsw Vx,Hx,Wx (66),(v1)
+eb: por Pq,Qq | vpor Vx,Hx,Wx (66),(v1)
+ec: paddsb Pq,Qq | vpaddsb Vx,Hx,Wx (66),(v1)
+ed: paddsw Pq,Qq | vpaddsw Vx,Hx,Wx (66),(v1)
+ee: pmaxsw Pq,Qq | vpmaxsw Vx,Hx,Wx (66),(v1)
+ef: pxor Pq,Qq | vpxor Vx,Hx,Wx (66),(v1)
# 0x0f 0xf0-0xff
-f0: lddqu Vdq,Mdq (F2),(VEX)
-f1: psllw Pq,Qq | psllw Vdq,Wdq (66),(VEX),(o128)
-f2: pslld Pq,Qq | pslld Vdq,Wdq (66),(VEX),(o128)
-f3: psllq Pq,Qq | psllq Vdq,Wdq (66),(VEX),(o128)
-f4: pmuludq Pq,Qq | pmuludq Vdq,Wdq (66),(VEX),(o128)
-f5: pmaddwd Pq,Qq | pmaddwd Vdq,Wdq (66),(VEX),(o128)
-f6: psadbw Pq,Qq | psadbw Vdq,Wdq (66),(VEX),(o128)
-f7: maskmovq Pq,Nq | maskmovdqu Vdq,Udq (66),(VEX),(o128)
-f8: psubb Pq,Qq | psubb Vdq,Wdq (66),(VEX),(o128)
-f9: psubw Pq,Qq | psubw Vdq,Wdq (66),(VEX),(o128)
-fa: psubd Pq,Qq | psubd Vdq,Wdq (66),(VEX),(o128)
-fb: psubq Pq,Qq | psubq Vdq,Wdq (66),(VEX),(o128)
-fc: paddb Pq,Qq | paddb Vdq,Wdq (66),(VEX),(o128)
-fd: paddw Pq,Qq | paddw Vdq,Wdq (66),(VEX),(o128)
-fe: paddd Pq,Qq | paddd Vdq,Wdq (66),(VEX),(o128)
+f0: vlddqu Vx,Mx (F2)
+f1: psllw Pq,Qq | vpsllw Vx,Hx,Wx (66),(v1)
+f2: pslld Pq,Qq | vpslld Vx,Hx,Wx (66),(v1)
+f3: psllq Pq,Qq | vpsllq Vx,Hx,Wx (66),(v1)
+f4: pmuludq Pq,Qq | vpmuludq Vx,Hx,Wx (66),(v1)
+f5: pmaddwd Pq,Qq | vpmaddwd Vx,Hx,Wx (66),(v1)
+f6: psadbw Pq,Qq | vpsadbw Vx,Hx,Wx (66),(v1)
+f7: maskmovq Pq,Nq | vmaskmovdqu Vx,Ux (66),(v1)
+f8: psubb Pq,Qq | vpsubb Vx,Hx,Wx (66),(v1)
+f9: psubw Pq,Qq | vpsubw Vx,Hx,Wx (66),(v1)
+fa: psubd Pq,Qq | vpsubd Vx,Hx,Wx (66),(v1)
+fb: psubq Pq,Qq | vpsubq Vx,Hx,Wx (66),(v1)
+fc: paddb Pq,Qq | vpaddb Vx,Hx,Wx (66),(v1)
+fd: paddw Pq,Qq | vpaddw Vx,Hx,Wx (66),(v1)
+fe: paddd Pq,Qq | vpaddd Vx,Hx,Wx (66),(v1)
ff:
EndTable
@@ -580,155 +595,193 @@ Table: 3-byte opcode 1 (0x0f 0x38)
Referrer: 3-byte escape 1
AVXcode: 2
# 0x0f 0x38 0x00-0x0f
-00: pshufb Pq,Qq | pshufb Vdq,Wdq (66),(VEX),(o128)
-01: phaddw Pq,Qq | phaddw Vdq,Wdq (66),(VEX),(o128)
-02: phaddd Pq,Qq | phaddd Vdq,Wdq (66),(VEX),(o128)
-03: phaddsw Pq,Qq | phaddsw Vdq,Wdq (66),(VEX),(o128)
-04: pmaddubsw Pq,Qq | pmaddubsw Vdq,Wdq (66),(VEX),(o128)
-05: phsubw Pq,Qq | phsubw Vdq,Wdq (66),(VEX),(o128)
-06: phsubd Pq,Qq | phsubd Vdq,Wdq (66),(VEX),(o128)
-07: phsubsw Pq,Qq | phsubsw Vdq,Wdq (66),(VEX),(o128)
-08: psignb Pq,Qq | psignb Vdq,Wdq (66),(VEX),(o128)
-09: psignw Pq,Qq | psignw Vdq,Wdq (66),(VEX),(o128)
-0a: psignd Pq,Qq | psignd Vdq,Wdq (66),(VEX),(o128)
-0b: pmulhrsw Pq,Qq | pmulhrsw Vdq,Wdq (66),(VEX),(o128)
-0c: Vpermilps /r (66),(oVEX)
-0d: Vpermilpd /r (66),(oVEX)
-0e: vtestps /r (66),(oVEX)
-0f: vtestpd /r (66),(oVEX)
+00: pshufb Pq,Qq | vpshufb Vx,Hx,Wx (66),(v1)
+01: phaddw Pq,Qq | vphaddw Vx,Hx,Wx (66),(v1)
+02: phaddd Pq,Qq | vphaddd Vx,Hx,Wx (66),(v1)
+03: phaddsw Pq,Qq | vphaddsw Vx,Hx,Wx (66),(v1)
+04: pmaddubsw Pq,Qq | vpmaddubsw Vx,Hx,Wx (66),(v1)
+05: phsubw Pq,Qq | vphsubw Vx,Hx,Wx (66),(v1)
+06: phsubd Pq,Qq | vphsubd Vx,Hx,Wx (66),(v1)
+07: phsubsw Pq,Qq | vphsubsw Vx,Hx,Wx (66),(v1)
+08: psignb Pq,Qq | vpsignb Vx,Hx,Wx (66),(v1)
+09: psignw Pq,Qq | vpsignw Vx,Hx,Wx (66),(v1)
+0a: psignd Pq,Qq | vpsignd Vx,Hx,Wx (66),(v1)
+0b: pmulhrsw Pq,Qq | vpmulhrsw Vx,Hx,Wx (66),(v1)
+0c: vpermilps Vx,Hx,Wx (66),(v)
+0d: vpermilpd Vx,Hx,Wx (66),(v)
+0e: vtestps Vx,Wx (66),(v)
+0f: vtestpd Vx,Wx (66),(v)
# 0x0f 0x38 0x10-0x1f
10: pblendvb Vdq,Wdq (66)
11:
12:
-13:
+13: vcvtph2ps Vx,Wx,Ib (66),(v)
14: blendvps Vdq,Wdq (66)
15: blendvpd Vdq,Wdq (66)
-16:
-17: ptest Vdq,Wdq (66),(VEX)
-18: vbroadcastss /r (66),(oVEX)
-19: vbroadcastsd /r (66),(oVEX),(o256)
-1a: vbroadcastf128 /r (66),(oVEX),(o256)
+16: vpermps Vqq,Hqq,Wqq (66),(v)
+17: vptest Vx,Wx (66)
+18: vbroadcastss Vx,Wd (66),(v)
+19: vbroadcastsd Vqq,Wq (66),(v)
+1a: vbroadcastf128 Vqq,Mdq (66),(v)
1b:
-1c: pabsb Pq,Qq | pabsb Vdq,Wdq (66),(VEX),(o128)
-1d: pabsw Pq,Qq | pabsw Vdq,Wdq (66),(VEX),(o128)
-1e: pabsd Pq,Qq | pabsd Vdq,Wdq (66),(VEX),(o128)
+1c: pabsb Pq,Qq | vpabsb Vx,Wx (66),(v1)
+1d: pabsw Pq,Qq | vpabsw Vx,Wx (66),(v1)
+1e: pabsd Pq,Qq | vpabsd Vx,Wx (66),(v1)
1f:
# 0x0f 0x38 0x20-0x2f
-20: pmovsxbw Vdq,Udq/Mq (66),(VEX),(o128)
-21: pmovsxbd Vdq,Udq/Md (66),(VEX),(o128)
-22: pmovsxbq Vdq,Udq/Mw (66),(VEX),(o128)
-23: pmovsxwd Vdq,Udq/Mq (66),(VEX),(o128)
-24: pmovsxwq Vdq,Udq/Md (66),(VEX),(o128)
-25: pmovsxdq Vdq,Udq/Mq (66),(VEX),(o128)
+20: vpmovsxbw Vx,Ux/Mq (66),(v1)
+21: vpmovsxbd Vx,Ux/Md (66),(v1)
+22: vpmovsxbq Vx,Ux/Mw (66),(v1)
+23: vpmovsxwd Vx,Ux/Mq (66),(v1)
+24: vpmovsxwq Vx,Ux/Md (66),(v1)
+25: vpmovsxdq Vx,Ux/Mq (66),(v1)
26:
27:
-28: pmuldq Vdq,Wdq (66),(VEX),(o128)
-29: pcmpeqq Vdq,Wdq (66),(VEX),(o128)
-2a: movntdqa Vdq,Mdq (66),(VEX),(o128)
-2b: packusdw Vdq,Wdq (66),(VEX),(o128)
-2c: vmaskmovps(ld) /r (66),(oVEX)
-2d: vmaskmovpd(ld) /r (66),(oVEX)
-2e: vmaskmovps(st) /r (66),(oVEX)
-2f: vmaskmovpd(st) /r (66),(oVEX)
+28: vpmuldq Vx,Hx,Wx (66),(v1)
+29: vpcmpeqq Vx,Hx,Wx (66),(v1)
+2a: vmovntdqa Vx,Mx (66),(v1)
+2b: vpackusdw Vx,Hx,Wx (66),(v1)
+2c: vmaskmovps Vx,Hx,Mx (66),(v)
+2d: vmaskmovpd Vx,Hx,Mx (66),(v)
+2e: vmaskmovps Mx,Hx,Vx (66),(v)
+2f: vmaskmovpd Mx,Hx,Vx (66),(v)
# 0x0f 0x38 0x30-0x3f
-30: pmovzxbw Vdq,Udq/Mq (66),(VEX),(o128)
-31: pmovzxbd Vdq,Udq/Md (66),(VEX),(o128)
-32: pmovzxbq Vdq,Udq/Mw (66),(VEX),(o128)
-33: pmovzxwd Vdq,Udq/Mq (66),(VEX),(o128)
-34: pmovzxwq Vdq,Udq/Md (66),(VEX),(o128)
-35: pmovzxdq Vdq,Udq/Mq (66),(VEX),(o128)
-36:
-37: pcmpgtq Vdq,Wdq (66),(VEX),(o128)
-38: pminsb Vdq,Wdq (66),(VEX),(o128)
-39: pminsd Vdq,Wdq (66),(VEX),(o128)
-3a: pminuw Vdq,Wdq (66),(VEX),(o128)
-3b: pminud Vdq,Wdq (66),(VEX),(o128)
-3c: pmaxsb Vdq,Wdq (66),(VEX),(o128)
-3d: pmaxsd Vdq,Wdq (66),(VEX),(o128)
-3e: pmaxuw Vdq,Wdq (66),(VEX),(o128)
-3f: pmaxud Vdq,Wdq (66),(VEX),(o128)
+30: vpmovzxbw Vx,Ux/Mq (66),(v1)
+31: vpmovzxbd Vx,Ux/Md (66),(v1)
+32: vpmovzxbq Vx,Ux/Mw (66),(v1)
+33: vpmovzxwd Vx,Ux/Mq (66),(v1)
+34: vpmovzxwq Vx,Ux/Md (66),(v1)
+35: vpmovzxdq Vx,Ux/Mq (66),(v1)
+36: vpermd Vqq,Hqq,Wqq (66),(v)
+37: vpcmpgtq Vx,Hx,Wx (66),(v1)
+38: vpminsb Vx,Hx,Wx (66),(v1)
+39: vpminsd Vx,Hx,Wx (66),(v1)
+3a: vpminuw Vx,Hx,Wx (66),(v1)
+3b: vpminud Vx,Hx,Wx (66),(v1)
+3c: vpmaxsb Vx,Hx,Wx (66),(v1)
+3d: vpmaxsd Vx,Hx,Wx (66),(v1)
+3e: vpmaxuw Vx,Hx,Wx (66),(v1)
+3f: vpmaxud Vx,Hx,Wx (66),(v1)
# 0x0f 0x38 0x40-0x8f
-40: pmulld Vdq,Wdq (66),(VEX),(o128)
-41: phminposuw Vdq,Wdq (66),(VEX),(o128)
-80: INVEPT Gd/q,Mdq (66)
-81: INVPID Gd/q,Mdq (66)
+40: vpmulld Vx,Hx,Wx (66),(v1)
+41: vphminposuw Vdq,Wdq (66),(v1)
+42:
+43:
+44:
+45: vpsrlvd/q Vx,Hx,Wx (66),(v)
+46: vpsravd Vx,Hx,Wx (66),(v)
+47: vpsllvd/q Vx,Hx,Wx (66),(v)
+# Skip 0x48-0x57
+58: vpbroadcastd Vx,Wx (66),(v)
+59: vpbroadcastq Vx,Wx (66),(v)
+5a: vbroadcasti128 Vqq,Mdq (66),(v)
+# Skip 0x5b-0x77
+78: vpbroadcastb Vx,Wx (66),(v)
+79: vpbroadcastw Vx,Wx (66),(v)
+# Skip 0x7a-0x7f
+80: INVEPT Gy,Mdq (66)
+81: INVPID Gy,Mdq (66)
+82: INVPCID Gy,Mdq (66)
+8c: vpmaskmovd/q Vx,Hx,Mx (66),(v)
+8e: vpmaskmovd/q Mx,Vx,Hx (66),(v)
# 0x0f 0x38 0x90-0xbf (FMA)
-96: vfmaddsub132pd/ps /r (66),(VEX)
-97: vfmsubadd132pd/ps /r (66),(VEX)
-98: vfmadd132pd/ps /r (66),(VEX)
-99: vfmadd132sd/ss /r (66),(VEX),(o128)
-9a: vfmsub132pd/ps /r (66),(VEX)
-9b: vfmsub132sd/ss /r (66),(VEX),(o128)
-9c: vfnmadd132pd/ps /r (66),(VEX)
-9d: vfnmadd132sd/ss /r (66),(VEX),(o128)
-9e: vfnmsub132pd/ps /r (66),(VEX)
-9f: vfnmsub132sd/ss /r (66),(VEX),(o128)
-a6: vfmaddsub213pd/ps /r (66),(VEX)
-a7: vfmsubadd213pd/ps /r (66),(VEX)
-a8: vfmadd213pd/ps /r (66),(VEX)
-a9: vfmadd213sd/ss /r (66),(VEX),(o128)
-aa: vfmsub213pd/ps /r (66),(VEX)
-ab: vfmsub213sd/ss /r (66),(VEX),(o128)
-ac: vfnmadd213pd/ps /r (66),(VEX)
-ad: vfnmadd213sd/ss /r (66),(VEX),(o128)
-ae: vfnmsub213pd/ps /r (66),(VEX)
-af: vfnmsub213sd/ss /r (66),(VEX),(o128)
-b6: vfmaddsub231pd/ps /r (66),(VEX)
-b7: vfmsubadd231pd/ps /r (66),(VEX)
-b8: vfmadd231pd/ps /r (66),(VEX)
-b9: vfmadd231sd/ss /r (66),(VEX),(o128)
-ba: vfmsub231pd/ps /r (66),(VEX)
-bb: vfmsub231sd/ss /r (66),(VEX),(o128)
-bc: vfnmadd231pd/ps /r (66),(VEX)
-bd: vfnmadd231sd/ss /r (66),(VEX),(o128)
-be: vfnmsub231pd/ps /r (66),(VEX)
-bf: vfnmsub231sd/ss /r (66),(VEX),(o128)
+90: vgatherdd/q Vx,Hx,Wx (66),(v)
+91: vgatherqd/q Vx,Hx,Wx (66),(v)
+92: vgatherdps/d Vx,Hx,Wx (66),(v)
+93: vgatherqps/d Vx,Hx,Wx (66),(v)
+94:
+95:
+96: vfmaddsub132ps/d Vx,Hx,Wx (66),(v)
+97: vfmsubadd132ps/d Vx,Hx,Wx (66),(v)
+98: vfmadd132ps/d Vx,Hx,Wx (66),(v)
+99: vfmadd132ss/d Vx,Hx,Wx (66),(v),(v1)
+9a: vfmsub132ps/d Vx,Hx,Wx (66),(v)
+9b: vfmsub132ss/d Vx,Hx,Wx (66),(v),(v1)
+9c: vfnmadd132ps/d Vx,Hx,Wx (66),(v)
+9d: vfnmadd132ss/d Vx,Hx,Wx (66),(v),(v1)
+9e: vfnmsub132ps/d Vx,Hx,Wx (66),(v)
+9f: vfnmsub132ss/d Vx,Hx,Wx (66),(v),(v1)
+a6: vfmaddsub213ps/d Vx,Hx,Wx (66),(v)
+a7: vfmsubadd213ps/d Vx,Hx,Wx (66),(v)
+a8: vfmadd213ps/d Vx,Hx,Wx (66),(v)
+a9: vfmadd213ss/d Vx,Hx,Wx (66),(v),(v1)
+aa: vfmsub213ps/d Vx,Hx,Wx (66),(v)
+ab: vfmsub213ss/d Vx,Hx,Wx (66),(v),(v1)
+ac: vfnmadd213ps/d Vx,Hx,Wx (66),(v)
+ad: vfnmadd213ss/d Vx,Hx,Wx (66),(v),(v1)
+ae: vfnmsub213ps/d Vx,Hx,Wx (66),(v)
+af: vfnmsub213ss/d Vx,Hx,Wx (66),(v),(v1)
+b6: vfmaddsub231ps/d Vx,Hx,Wx (66),(v)
+b7: vfmsubadd231ps/d Vx,Hx,Wx (66),(v)
+b8: vfmadd231ps/d Vx,Hx,Wx (66),(v)
+b9: vfmadd231ss/d Vx,Hx,Wx (66),(v),(v1)
+ba: vfmsub231ps/d Vx,Hx,Wx (66),(v)
+bb: vfmsub231ss/d Vx,Hx,Wx (66),(v),(v1)
+bc: vfnmadd231ps/d Vx,Hx,Wx (66),(v)
+bd: vfnmadd231ss/d Vx,Hx,Wx (66),(v),(v1)
+be: vfnmsub231ps/d Vx,Hx,Wx (66),(v)
+bf: vfnmsub231ss/d Vx,Hx,Wx (66),(v),(v1)
# 0x0f 0x38 0xc0-0xff
-db: aesimc Vdq,Wdq (66),(VEX),(o128)
-dc: aesenc Vdq,Wdq (66),(VEX),(o128)
-dd: aesenclast Vdq,Wdq (66),(VEX),(o128)
-de: aesdec Vdq,Wdq (66),(VEX),(o128)
-df: aesdeclast Vdq,Wdq (66),(VEX),(o128)
-f0: MOVBE Gv,Mv | CRC32 Gd,Eb (F2)
-f1: MOVBE Mv,Gv | CRC32 Gd,Ev (F2)
+db: VAESIMC Vdq,Wdq (66),(v1)
+dc: VAESENC Vdq,Hdq,Wdq (66),(v1)
+dd: VAESENCLAST Vdq,Hdq,Wdq (66),(v1)
+de: VAESDEC Vdq,Hdq,Wdq (66),(v1)
+df: VAESDECLAST Vdq,Hdq,Wdq (66),(v1)
+f0: MOVBE Gy,My | MOVBE Gw,Mw (66) | CRC32 Gd,Eb (F2)
+f1: MOVBE My,Gy | MOVBE Mw,Gw (66) | CRC32 Gd,Ey (F2)
+f3: ANDN Gy,By,Ey (v)
+f4: Grp17 (1A)
+f5: BZHI Gy,Ey,By (v) | PEXT Gy,By,Ey (F3),(v) | PDEP Gy,By,Ey (F2),(v)
+f6: MULX By,Gy,rDX,Ey (F2),(v)
+f7: BEXTR Gy,Ey,By (v) | SHLX Gy,Ey,By (66),(v) | SARX Gy,Ey,By (F3),(v) | SHRX Gy,Ey,By (F2),(v)
EndTable
Table: 3-byte opcode 2 (0x0f 0x3a)
Referrer: 3-byte escape 2
AVXcode: 3
# 0x0f 0x3a 0x00-0xff
-04: vpermilps /r,Ib (66),(oVEX)
-05: vpermilpd /r,Ib (66),(oVEX)
-06: vperm2f128 /r,Ib (66),(oVEX),(o256)
-08: roundps Vdq,Wdq,Ib (66),(VEX)
-09: roundpd Vdq,Wdq,Ib (66),(VEX)
-0a: roundss Vss,Wss,Ib (66),(VEX),(o128)
-0b: roundsd Vsd,Wsd,Ib (66),(VEX),(o128)
-0c: blendps Vdq,Wdq,Ib (66),(VEX)
-0d: blendpd Vdq,Wdq,Ib (66),(VEX)
-0e: pblendw Vdq,Wdq,Ib (66),(VEX),(o128)
-0f: palignr Pq,Qq,Ib | palignr Vdq,Wdq,Ib (66),(VEX),(o128)
-14: pextrb Rd/Mb,Vdq,Ib (66),(VEX),(o128)
-15: pextrw Rd/Mw,Vdq,Ib (66),(VEX),(o128)
-16: pextrd/pextrq Ed/q,Vdq,Ib (66),(VEX),(o128)
-17: extractps Ed,Vdq,Ib (66),(VEX),(o128)
-18: vinsertf128 /r,Ib (66),(oVEX),(o256)
-19: vextractf128 /r,Ib (66),(oVEX),(o256)
-20: pinsrb Vdq,Rd/q/Mb,Ib (66),(VEX),(o128)
-21: insertps Vdq,Udq/Md,Ib (66),(VEX),(o128)
-22: pinsrd/pinsrq Vdq,Ed/q,Ib (66),(VEX),(o128)
-40: dpps Vdq,Wdq,Ib (66),(VEX)
-41: dppd Vdq,Wdq,Ib (66),(VEX),(o128)
-42: mpsadbw Vdq,Wdq,Ib (66),(VEX),(o128)
-44: pclmulq Vdq,Wdq,Ib (66),(VEX),(o128)
-4a: vblendvps /r,Ib (66),(oVEX)
-4b: vblendvpd /r,Ib (66),(oVEX)
-4c: vpblendvb /r,Ib (66),(oVEX),(o128)
-60: pcmpestrm Vdq,Wdq,Ib (66),(VEX),(o128)
-61: pcmpestri Vdq,Wdq,Ib (66),(VEX),(o128)
-62: pcmpistrm Vdq,Wdq,Ib (66),(VEX),(o128)
-63: pcmpistri Vdq,Wdq,Ib (66),(VEX),(o128)
-df: aeskeygenassist Vdq,Wdq,Ib (66),(VEX),(o128)
+00: vpermq Vqq,Wqq,Ib (66),(v)
+01: vpermpd Vqq,Wqq,Ib (66),(v)
+02: vpblendd Vx,Hx,Wx,Ib (66),(v)
+03:
+04: vpermilps Vx,Wx,Ib (66),(v)
+05: vpermilpd Vx,Wx,Ib (66),(v)
+06: vperm2f128 Vqq,Hqq,Wqq,Ib (66),(v)
+07:
+08: vroundps Vx,Wx,Ib (66)
+09: vroundpd Vx,Wx,Ib (66)
+0a: vroundss Vss,Wss,Ib (66),(v1)
+0b: vroundsd Vsd,Wsd,Ib (66),(v1)
+0c: vblendps Vx,Hx,Wx,Ib (66)
+0d: vblendpd Vx,Hx,Wx,Ib (66)
+0e: vpblendw Vx,Hx,Wx,Ib (66),(v1)
+0f: palignr Pq,Qq,Ib | vpalignr Vx,Hx,Wx,Ib (66),(v1)
+14: vpextrb Rd/Mb,Vdq,Ib (66),(v1)
+15: vpextrw Rd/Mw,Vdq,Ib (66),(v1)
+16: vpextrd/q Ey,Vdq,Ib (66),(v1)
+17: vextractps Ed,Vdq,Ib (66),(v1)
+18: vinsertf128 Vqq,Hqq,Wqq,Ib (66),(v)
+19: vextractf128 Wdq,Vqq,Ib (66),(v)
+1d: vcvtps2ph Wx,Vx,Ib (66),(v)
+20: vpinsrb Vdq,Hdq,Ry/Mb,Ib (66),(v1)
+21: vinsertps Vdq,Hdq,Udq/Md,Ib (66),(v1)
+22: vpinsrd/q Vdq,Hdq,Ey,Ib (66),(v1)
+38: vinserti128 Vqq,Hqq,Wqq,Ib (66),(v)
+39: vextracti128 Wdq,Vqq,Ib (66),(v)
+40: vdpps Vx,Hx,Wx,Ib (66)
+41: vdppd Vdq,Hdq,Wdq,Ib (66),(v1)
+42: vmpsadbw Vx,Hx,Wx,Ib (66),(v1)
+44: vpclmulqdq Vdq,Hdq,Wdq,Ib (66),(v1)
+46: vperm2i128 Vqq,Hqq,Wqq,Ib (66),(v)
+4a: vblendvps Vx,Hx,Wx,Lx (66),(v)
+4b: vblendvpd Vx,Hx,Wx,Lx (66),(v)
+4c: vpblendvb Vx,Hx,Wx,Lx (66),(v1)
+60: vpcmpestrm Vdq,Wdq,Ib (66),(v1)
+61: vpcmpestri Vdq,Wdq,Ib (66),(v1)
+62: vpcmpistrm Vdq,Wdq,Ib (66),(v1)
+63: vpcmpistri Vdq,Wdq,Ib (66),(v1)
+df: VAESKEYGEN Vdq,Wdq,Ib (66),(v1)
+f0: RORX Gy,Ey,Ib (F2),(v)
EndTable
GrpTable: Grp1
@@ -790,7 +843,7 @@ GrpTable: Grp5
2: CALLN Ev (f64)
3: CALLF Ep
4: JMPN Ev (f64)
-5: JMPF Ep
+5: JMPF Mp
6: PUSH Ev (d64)
7:
EndTable
@@ -807,7 +860,7 @@ EndTable
GrpTable: Grp7
0: SGDT Ms | VMCALL (001),(11B) | VMLAUNCH (010),(11B) | VMRESUME (011),(11B) | VMXOFF (100),(11B)
1: SIDT Ms | MONITOR (000),(11B) | MWAIT (001)
-2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B)
+2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | VMFUNC (100),(11B)
3: LIDT Ms
4: SMSW Mw/Rv
5:
@@ -824,44 +877,45 @@ EndTable
GrpTable: Grp9
1: CMPXCHG8B/16B Mq/Mdq
-6: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3)
-7: VMPTRST Mq
+6: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3) | RDRAND Rv (11B)
+7: VMPTRST Mq | VMPTRST Mq (F3)
EndTable
GrpTable: Grp10
EndTable
GrpTable: Grp11
+# Note: the operands are given by group opcode
0: MOV
EndTable
GrpTable: Grp12
-2: psrlw Nq,Ib (11B) | psrlw Udq,Ib (66),(11B),(VEX),(o128)
-4: psraw Nq,Ib (11B) | psraw Udq,Ib (66),(11B),(VEX),(o128)
-6: psllw Nq,Ib (11B) | psllw Udq,Ib (66),(11B),(VEX),(o128)
+2: psrlw Nq,Ib (11B) | vpsrlw Hx,Ux,Ib (66),(11B),(v1)
+4: psraw Nq,Ib (11B) | vpsraw Hx,Ux,Ib (66),(11B),(v1)
+6: psllw Nq,Ib (11B) | vpsllw Hx,Ux,Ib (66),(11B),(v1)
EndTable
GrpTable: Grp13
-2: psrld Nq,Ib (11B) | psrld Udq,Ib (66),(11B),(VEX),(o128)
-4: psrad Nq,Ib (11B) | psrad Udq,Ib (66),(11B),(VEX),(o128)
-6: pslld Nq,Ib (11B) | pslld Udq,Ib (66),(11B),(VEX),(o128)
+2: psrld Nq,Ib (11B) | vpsrld Hx,Ux,Ib (66),(11B),(v1)
+4: psrad Nq,Ib (11B) | vpsrad Hx,Ux,Ib (66),(11B),(v1)
+6: pslld Nq,Ib (11B) | vpslld Hx,Ux,Ib (66),(11B),(v1)
EndTable
GrpTable: Grp14
-2: psrlq Nq,Ib (11B) | psrlq Udq,Ib (66),(11B),(VEX),(o128)
-3: psrldq Udq,Ib (66),(11B),(VEX),(o128)
-6: psllq Nq,Ib (11B) | psllq Udq,Ib (66),(11B),(VEX),(o128)
-7: pslldq Udq,Ib (66),(11B),(VEX),(o128)
+2: psrlq Nq,Ib (11B) | vpsrlq Hx,Ux,Ib (66),(11B),(v1)
+3: vpsrldq Hx,Ux,Ib (66),(11B),(v1)
+6: psllq Nq,Ib (11B) | vpsllq Hx,Ux,Ib (66),(11B),(v1)
+7: vpslldq Hx,Ux,Ib (66),(11B),(v1)
EndTable
GrpTable: Grp15
-0: fxsave
-1: fxstor
-2: ldmxcsr (VEX)
-3: stmxcsr (VEX)
+0: fxsave | RDFSBASE Ry (F3),(11B)
+1: fxstor | RDGSBASE Ry (F3),(11B)
+2: vldmxcsr Md (v1) | WRFSBASE Ry (F3),(11B)
+3: vstmxcsr Md (v1) | WRGSBASE Ry (F3),(11B)
4: XSAVE
5: XRSTOR | lfence (11B)
-6: mfence (11B)
+6: XSAVEOPT | mfence (11B)
7: clflush | sfence (11B)
EndTable
@@ -872,6 +926,12 @@ GrpTable: Grp16
3: prefetch T2
EndTable
+GrpTable: Grp17
+1: BLSR By,Ey (v)
+2: BLSMSK By,Ey (v)
+3: BLSI By,Ey (v)
+EndTable
+
# AMD's Prefetch Group
GrpTable: GrpP
0: PREFETCH
diff --git a/arch/x86/oprofile/Makefile b/arch/x86/oprofile/Makefile
index 446902b..1599f56 100644
--- a/arch/x86/oprofile/Makefile
+++ b/arch/x86/oprofile/Makefile
@@ -4,9 +4,8 @@ DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \
oprof.o cpu_buffer.o buffer_sync.o \
event_buffer.o oprofile_files.o \
oprofilefs.o oprofile_stats.o \
- timer_int.o )
+ timer_int.o nmi_timer_int.o )
oprofile-y := $(DRIVER_OBJS) init.o backtrace.o
oprofile-$(CONFIG_X86_LOCAL_APIC) += nmi_int.o op_model_amd.o \
op_model_ppro.o op_model_p4.o
-oprofile-$(CONFIG_X86_IO_APIC) += nmi_timer_int.o
diff --git a/arch/x86/oprofile/init.c b/arch/x86/oprofile/init.c
index f148cf6..9e138d0 100644
--- a/arch/x86/oprofile/init.c
+++ b/arch/x86/oprofile/init.c
@@ -16,37 +16,23 @@
* with the NMI mode driver.
*/
+#ifdef CONFIG_X86_LOCAL_APIC
extern int op_nmi_init(struct oprofile_operations *ops);
-extern int op_nmi_timer_init(struct oprofile_operations *ops);
extern void op_nmi_exit(void);
-extern void x86_backtrace(struct pt_regs * const regs, unsigned int depth);
+#else
+static int op_nmi_init(struct oprofile_operations *ops) { return -ENODEV; }
+static void op_nmi_exit(void) { }
+#endif
-static int nmi_timer;
+extern void x86_backtrace(struct pt_regs * const regs, unsigned int depth);
int __init oprofile_arch_init(struct oprofile_operations *ops)
{
- int ret;
-
- ret = -ENODEV;
-
-#ifdef CONFIG_X86_LOCAL_APIC
- ret = op_nmi_init(ops);
-#endif
- nmi_timer = (ret != 0);
-#ifdef CONFIG_X86_IO_APIC
- if (nmi_timer)
- ret = op_nmi_timer_init(ops);
-#endif
ops->backtrace = x86_backtrace;
-
- return ret;
+ return op_nmi_init(ops);
}
-
void oprofile_arch_exit(void)
{
-#ifdef CONFIG_X86_LOCAL_APIC
- if (!nmi_timer)
- op_nmi_exit();
-#endif
+ op_nmi_exit();
}
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index 75f9528..26b8a85 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -595,24 +595,36 @@ static int __init p4_init(char **cpu_type)
return 0;
}
-static int force_arch_perfmon;
-static int force_cpu_type(const char *str, struct kernel_param *kp)
+enum __force_cpu_type {
+ reserved = 0, /* do not force */
+ timer,
+ arch_perfmon,
+};
+
+static int force_cpu_type;
+
+static int set_cpu_type(const char *str, struct kernel_param *kp)
{
- if (!strcmp(str, "arch_perfmon")) {
- force_arch_perfmon = 1;
+ if (!strcmp(str, "timer")) {
+ force_cpu_type = timer;
+ printk(KERN_INFO "oprofile: forcing NMI timer mode\n");
+ } else if (!strcmp(str, "arch_perfmon")) {
+ force_cpu_type = arch_perfmon;
printk(KERN_INFO "oprofile: forcing architectural perfmon\n");
+ } else {
+ force_cpu_type = 0;
}
return 0;
}
-module_param_call(cpu_type, force_cpu_type, NULL, NULL, 0);
+module_param_call(cpu_type, set_cpu_type, NULL, NULL, 0);
static int __init ppro_init(char **cpu_type)
{
__u8 cpu_model = boot_cpu_data.x86_model;
struct op_x86_model_spec *spec = &op_ppro_spec; /* default */
- if (force_arch_perfmon && cpu_has_arch_perfmon)
+ if (force_cpu_type == arch_perfmon && cpu_has_arch_perfmon)
return 0;
/*
@@ -679,6 +691,9 @@ int __init op_nmi_init(struct oprofile_operations *ops)
if (!cpu_has_apic)
return -ENODEV;
+ if (force_cpu_type == timer)
+ return -ENODEV;
+
switch (vendor) {
case X86_VENDOR_AMD:
/* Needs to be at least an Athlon (or hammer in 32bit mode) */
diff --git a/arch/x86/oprofile/nmi_timer_int.c b/arch/x86/oprofile/nmi_timer_int.c
deleted file mode 100644
index 7f8052c..0000000
--- a/arch/x86/oprofile/nmi_timer_int.c
+++ /dev/null
@@ -1,50 +0,0 @@
-/**
- * @file nmi_timer_int.c
- *
- * @remark Copyright 2003 OProfile authors
- * @remark Read the file COPYING
- *
- * @author Zwane Mwaikambo <zwane@linuxpower.ca>
- */
-
-#include <linux/init.h>
-#include <linux/smp.h>
-#include <linux/errno.h>
-#include <linux/oprofile.h>
-#include <linux/rcupdate.h>
-#include <linux/kdebug.h>
-
-#include <asm/nmi.h>
-#include <asm/apic.h>
-#include <asm/ptrace.h>
-
-static int profile_timer_exceptions_notify(unsigned int val, struct pt_regs *regs)
-{
- oprofile_add_sample(regs, 0);
- return NMI_HANDLED;
-}
-
-static int timer_start(void)
-{
- if (register_nmi_handler(NMI_LOCAL, profile_timer_exceptions_notify,
- 0, "oprofile-timer"))
- return 1;
- return 0;
-}
-
-
-static void timer_stop(void)
-{
- unregister_nmi_handler(NMI_LOCAL, "oprofile-timer");
- synchronize_sched(); /* Allow already-started NMIs to complete. */
-}
-
-
-int __init op_nmi_timer_init(struct oprofile_operations *ops)
-{
- ops->start = timer_start;
- ops->stop = timer_stop;
- ops->cpu_type = "timer";
- printk(KERN_INFO "oprofile: using NMI timer interrupt.\n");
- return 0;
-}
diff --git a/arch/x86/tools/Makefile b/arch/x86/tools/Makefile
index f820826..d511aa9 100644
--- a/arch/x86/tools/Makefile
+++ b/arch/x86/tools/Makefile
@@ -18,14 +18,21 @@ chkobjdump = $(srctree)/arch/x86/tools/chkobjdump.awk
quiet_cmd_posttest = TEST $@
cmd_posttest = ($(OBJDUMP) -v | $(AWK) -f $(chkobjdump)) || $(OBJDUMP) -d -j .text $(objtree)/vmlinux | $(AWK) -f $(distill_awk) | $(obj)/test_get_len $(posttest_64bit) $(posttest_verbose)
-posttest: $(obj)/test_get_len vmlinux
+quiet_cmd_sanitytest = TEST $@
+ cmd_sanitytest = $(obj)/insn_sanity $(posttest_64bit) -m 1000000
+
+posttest: $(obj)/test_get_len vmlinux $(obj)/insn_sanity
$(call cmd,posttest)
+ $(call cmd,sanitytest)
-hostprogs-y := test_get_len
+hostprogs-y += test_get_len insn_sanity
# -I needed for generated C source and C source which in the kernel tree.
HOSTCFLAGS_test_get_len.o := -Wall -I$(objtree)/arch/x86/lib/ -I$(srctree)/arch/x86/include/ -I$(srctree)/arch/x86/lib/ -I$(srctree)/include/
+HOSTCFLAGS_insn_sanity.o := -Wall -I$(objtree)/arch/x86/lib/ -I$(srctree)/arch/x86/include/ -I$(srctree)/arch/x86/lib/ -I$(srctree)/include/
+
# Dependencies are also needed.
$(obj)/test_get_len.o: $(srctree)/arch/x86/lib/insn.c $(srctree)/arch/x86/lib/inat.c $(srctree)/arch/x86/include/asm/inat_types.h $(srctree)/arch/x86/include/asm/inat.h $(srctree)/arch/x86/include/asm/insn.h $(objtree)/arch/x86/lib/inat-tables.c
+$(obj)/insn_sanity.o: $(srctree)/arch/x86/lib/insn.c $(srctree)/arch/x86/lib/inat.c $(srctree)/arch/x86/include/asm/inat_types.h $(srctree)/arch/x86/include/asm/inat.h $(srctree)/arch/x86/include/asm/insn.h $(objtree)/arch/x86/lib/inat-tables.c
diff --git a/arch/x86/tools/gen-insn-attr-x86.awk b/arch/x86/tools/gen-insn-attr-x86.awk
index eaf11f5..5f6a5b6 100644
--- a/arch/x86/tools/gen-insn-attr-x86.awk
+++ b/arch/x86/tools/gen-insn-attr-x86.awk
@@ -47,7 +47,7 @@ BEGIN {
sep_expr = "^\\|$"
group_expr = "^Grp[0-9A-Za-z]+"
- imm_expr = "^[IJAO][a-z]"
+ imm_expr = "^[IJAOL][a-z]"
imm_flag["Ib"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)"
imm_flag["Jb"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)"
imm_flag["Iw"] = "INAT_MAKE_IMM(INAT_IMM_WORD)"
@@ -59,6 +59,7 @@ BEGIN {
imm_flag["Iv"] = "INAT_MAKE_IMM(INAT_IMM_VWORD)"
imm_flag["Ob"] = "INAT_MOFFSET"
imm_flag["Ov"] = "INAT_MOFFSET"
+ imm_flag["Lx"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)"
modrm_expr = "^([CDEGMNPQRSUVW/][a-z]+|NTA|T[012])"
force64_expr = "\\([df]64\\)"
@@ -70,8 +71,12 @@ BEGIN {
lprefix3_expr = "\\(F2\\)"
max_lprefix = 4
- vexok_expr = "\\(VEX\\)"
- vexonly_expr = "\\(oVEX\\)"
+ # All opcodes starting with lower-case 'v' or with (v1) superscript
+ # accepts VEX prefix
+ vexok_opcode_expr = "^v.*"
+ vexok_expr = "\\(v1\\)"
+ # All opcodes with (v) superscript supports *only* VEX prefix
+ vexonly_expr = "\\(v\\)"
prefix_expr = "\\(Prefix\\)"
prefix_num["Operand-Size"] = "INAT_PFX_OPNDSZ"
@@ -85,8 +90,8 @@ BEGIN {
prefix_num["SEG=GS"] = "INAT_PFX_GS"
prefix_num["SEG=SS"] = "INAT_PFX_SS"
prefix_num["Address-Size"] = "INAT_PFX_ADDRSZ"
- prefix_num["2bytes-VEX"] = "INAT_PFX_VEX2"
- prefix_num["3bytes-VEX"] = "INAT_PFX_VEX3"
+ prefix_num["VEX+1byte"] = "INAT_PFX_VEX2"
+ prefix_num["VEX+2byte"] = "INAT_PFX_VEX3"
clear_vars()
}
@@ -310,12 +315,10 @@ function convert_operands(count,opnd, i,j,imm,mod)
if (match(opcode, fpu_expr))
flags = add_flags(flags, "INAT_MODRM")
- # check VEX only code
+ # check VEX codes
if (match(ext, vexonly_expr))
flags = add_flags(flags, "INAT_VEXOK | INAT_VEXONLY")
-
- # check VEX only code
- if (match(ext, vexok_expr))
+ else if (match(ext, vexok_expr) || match(opcode, vexok_opcode_expr))
flags = add_flags(flags, "INAT_VEXOK")
# check prefixes
diff --git a/arch/x86/tools/insn_sanity.c b/arch/x86/tools/insn_sanity.c
new file mode 100644
index 0000000..cc2f8c1
--- /dev/null
+++ b/arch/x86/tools/insn_sanity.c
@@ -0,0 +1,275 @@
+/*
+ * x86 decoder sanity test - based on test_get_insn.c
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2009
+ * Copyright (C) Hitachi, Ltd., 2011
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+#define unlikely(cond) (cond)
+#define ARRAY_SIZE(a) (sizeof(a)/sizeof(a[0]))
+
+#include <asm/insn.h>
+#include <inat.c>
+#include <insn.c>
+
+/*
+ * Test of instruction analysis against tampering.
+ * Feed random binary to instruction decoder and ensure not to
+ * access out-of-instruction-buffer.
+ */
+
+#define DEFAULT_MAX_ITER 10000
+#define INSN_NOP 0x90
+
+static const char *prog; /* Program name */
+static int verbose; /* Verbosity */
+static int x86_64; /* x86-64 bit mode flag */
+static unsigned int seed; /* Random seed */
+static unsigned long iter_start; /* Start of iteration number */
+static unsigned long iter_end = DEFAULT_MAX_ITER; /* End of iteration number */
+static FILE *input_file; /* Input file name */
+
+static void usage(const char *err)
+{
+ if (err)
+ fprintf(stderr, "Error: %s\n\n", err);
+ fprintf(stderr, "Usage: %s [-y|-n|-v] [-s seed[,no]] [-m max] [-i input]\n", prog);
+ fprintf(stderr, "\t-y 64bit mode\n");
+ fprintf(stderr, "\t-n 32bit mode\n");
+ fprintf(stderr, "\t-v Verbosity(-vv dumps any decoded result)\n");
+ fprintf(stderr, "\t-s Give a random seed (and iteration number)\n");
+ fprintf(stderr, "\t-m Give a maximum iteration number\n");
+ fprintf(stderr, "\t-i Give an input file with decoded binary\n");
+ exit(1);
+}
+
+static void dump_field(FILE *fp, const char *name, const char *indent,
+ struct insn_field *field)
+{
+ fprintf(fp, "%s.%s = {\n", indent, name);
+ fprintf(fp, "%s\t.value = %d, bytes[] = {%x, %x, %x, %x},\n",
+ indent, field->value, field->bytes[0], field->bytes[1],
+ field->bytes[2], field->bytes[3]);
+ fprintf(fp, "%s\t.got = %d, .nbytes = %d},\n", indent,
+ field->got, field->nbytes);
+}
+
+static void dump_insn(FILE *fp, struct insn *insn)
+{
+ fprintf(fp, "Instruction = {\n");
+ dump_field(fp, "prefixes", "\t", &insn->prefixes);
+ dump_field(fp, "rex_prefix", "\t", &insn->rex_prefix);
+ dump_field(fp, "vex_prefix", "\t", &insn->vex_prefix);
+ dump_field(fp, "opcode", "\t", &insn->opcode);
+ dump_field(fp, "modrm", "\t", &insn->modrm);
+ dump_field(fp, "sib", "\t", &insn->sib);
+ dump_field(fp, "displacement", "\t", &insn->displacement);
+ dump_field(fp, "immediate1", "\t", &insn->immediate1);
+ dump_field(fp, "immediate2", "\t", &insn->immediate2);
+ fprintf(fp, "\t.attr = %x, .opnd_bytes = %d, .addr_bytes = %d,\n",
+ insn->attr, insn->opnd_bytes, insn->addr_bytes);
+ fprintf(fp, "\t.length = %d, .x86_64 = %d, .kaddr = %p}\n",
+ insn->length, insn->x86_64, insn->kaddr);
+}
+
+static void dump_stream(FILE *fp, const char *msg, unsigned long nr_iter,
+ unsigned char *insn_buf, struct insn *insn)
+{
+ int i;
+
+ fprintf(fp, "%s:\n", msg);
+
+ dump_insn(fp, insn);
+
+ fprintf(fp, "You can reproduce this with below command(s);\n");
+
+ /* Input a decoded instruction sequence directly */
+ fprintf(fp, " $ echo ");
+ for (i = 0; i < MAX_INSN_SIZE; i++)
+ fprintf(fp, " %02x", insn_buf[i]);
+ fprintf(fp, " | %s -i -\n", prog);
+
+ if (!input_file) {
+ fprintf(fp, "Or \n");
+ /* Give a seed and iteration number */
+ fprintf(fp, " $ %s -s 0x%x,%lu\n", prog, seed, nr_iter);
+ }
+}
+
+static void init_random_seed(void)
+{
+ int fd;
+
+ fd = open("/dev/urandom", O_RDONLY);
+ if (fd < 0)
+ goto fail;
+
+ if (read(fd, &seed, sizeof(seed)) != sizeof(seed))
+ goto fail;
+
+ close(fd);
+ return;
+fail:
+ usage("Failed to open /dev/urandom");
+}
+
+/* Read given instruction sequence from the input file */
+static int read_next_insn(unsigned char *insn_buf)
+{
+ char buf[256] = "", *tmp;
+ int i;
+
+ tmp = fgets(buf, ARRAY_SIZE(buf), input_file);
+ if (tmp == NULL || feof(input_file))
+ return 0;
+
+ for (i = 0; i < MAX_INSN_SIZE; i++) {
+ insn_buf[i] = (unsigned char)strtoul(tmp, &tmp, 16);
+ if (*tmp != ' ')
+ break;
+ }
+
+ return i;
+}
+
+static int generate_insn(unsigned char *insn_buf)
+{
+ int i;
+
+ if (input_file)
+ return read_next_insn(insn_buf);
+
+ /* Fills buffer with random binary up to MAX_INSN_SIZE */
+ for (i = 0; i < MAX_INSN_SIZE - 1; i += 2)
+ *(unsigned short *)(&insn_buf[i]) = random() & 0xffff;
+
+ while (i < MAX_INSN_SIZE)
+ insn_buf[i++] = random() & 0xff;
+
+ return i;
+}
+
+static void parse_args(int argc, char **argv)
+{
+ int c;
+ char *tmp = NULL;
+ int set_seed = 0;
+
+ prog = argv[0];
+ while ((c = getopt(argc, argv, "ynvs:m:i:")) != -1) {
+ switch (c) {
+ case 'y':
+ x86_64 = 1;
+ break;
+ case 'n':
+ x86_64 = 0;
+ break;
+ case 'v':
+ verbose++;
+ break;
+ case 'i':
+ if (strcmp("-", optarg) == 0)
+ input_file = stdin;
+ else
+ input_file = fopen(optarg, "r");
+ if (!input_file)
+ usage("Failed to open input file");
+ break;
+ case 's':
+ seed = (unsigned int)strtoul(optarg, &tmp, 0);
+ if (*tmp == ',') {
+ optarg = tmp + 1;
+ iter_start = strtoul(optarg, &tmp, 0);
+ }
+ if (*tmp != '\0' || tmp == optarg)
+ usage("Failed to parse seed");
+ set_seed = 1;
+ break;
+ case 'm':
+ iter_end = strtoul(optarg, &tmp, 0);
+ if (*tmp != '\0' || tmp == optarg)
+ usage("Failed to parse max_iter");
+ break;
+ default:
+ usage(NULL);
+ }
+ }
+
+ /* Check errors */
+ if (iter_end < iter_start)
+ usage("Max iteration number must be bigger than iter-num");
+
+ if (set_seed && input_file)
+ usage("Don't use input file (-i) with random seed (-s)");
+
+ /* Initialize random seed */
+ if (!input_file) {
+ if (!set_seed) /* No seed is given */
+ init_random_seed();
+ srand(seed);
+ }
+}
+
+int main(int argc, char **argv)
+{
+ struct insn insn;
+ int insns = 0;
+ int errors = 0;
+ unsigned long i;
+ unsigned char insn_buf[MAX_INSN_SIZE * 2];
+
+ parse_args(argc, argv);
+
+ /* Prepare stop bytes with NOPs */
+ memset(insn_buf + MAX_INSN_SIZE, INSN_NOP, MAX_INSN_SIZE);
+
+ for (i = 0; i < iter_end; i++) {
+ if (generate_insn(insn_buf) <= 0)
+ break;
+
+ if (i < iter_start) /* Skip to given iteration number */
+ continue;
+
+ /* Decode an instruction */
+ insn_init(&insn, insn_buf, x86_64);
+ insn_get_length(&insn);
+
+ if (insn.next_byte <= insn.kaddr ||
+ insn.kaddr + MAX_INSN_SIZE < insn.next_byte) {
+ /* Access out-of-range memory */
+ dump_stream(stderr, "Error: Found an access violation", i, insn_buf, &insn);
+ errors++;
+ } else if (verbose && !insn_complete(&insn))
+ dump_stream(stdout, "Info: Found an undecodable input", i, insn_buf, &insn);
+ else if (verbose >= 2)
+ dump_insn(stdout, &insn);
+ insns++;
+ }
+
+ fprintf(stdout, "%s: decoded and checked %d %s instructions with %d errors (seed:0x%x)\n", (errors) ? "Failure" : "Success", insns, (input_file) ? "given" : "random", errors, seed);
+
+ return errors ? 1 : 0;
+}
OpenPOWER on IntegriCloud