summaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/Kconfig29
-rw-r--r--arch/alpha/kernel/perf_event.c4
-rw-r--r--arch/alpha/kernel/srmcons.c78
-rw-r--r--arch/arm/include/asm/perf_event.h4
-rw-r--r--arch/arm/kernel/perf_event.c4
-rw-r--r--arch/arm/kernel/process.c4
-rw-r--r--arch/arm/kernel/smp.c7
-rw-r--r--arch/arm/mach-imx/mx31moboard-devboard.c24
-rw-r--r--arch/arm/mach-imx/mx31moboard-marxbot.c24
-rw-r--r--arch/arm/mach-pxa/pxa3xx-ulpi.c20
-rw-r--r--arch/arm/mach-tegra/include/mach/usb_phy.h2
-rw-r--r--arch/arm/mach-tegra/usb_phy.c4
-rw-r--r--arch/arm/plat-mxc/include/mach/mxc_ehci.h2
-rw-r--r--arch/arm/plat-mxc/include/mach/ulpi.h6
-rw-r--r--arch/arm/plat-mxc/ulpi.c8
-rw-r--r--arch/avr32/kernel/process.c4
-rw-r--r--arch/blackfin/kernel/process.c4
-rw-r--r--arch/cris/kernel/process.c4
-rw-r--r--arch/frv/include/asm/perf_event.h2
-rw-r--r--arch/frv/kernel/process.c4
-rw-r--r--arch/h8300/kernel/process.c4
-rw-r--r--arch/hexagon/include/asm/perf_event.h2
-rw-r--r--arch/hexagon/kernel/smp.c2
-rw-r--r--arch/ia64/hp/sim/boot/fw-emu.c17
-rw-r--r--arch/ia64/hp/sim/hpsim_irq.c36
-rw-r--r--arch/ia64/hp/sim/hpsim_setup.c6
-rw-r--r--arch/ia64/hp/sim/simeth.c19
-rw-r--r--arch/ia64/hp/sim/simserial.c705
-rw-r--r--arch/ia64/include/asm/hpsim.h2
-rw-r--r--arch/ia64/include/asm/paravirt.h6
-rw-r--r--arch/ia64/kernel/paravirt.c4
-rw-r--r--arch/ia64/kernel/process.c4
-rw-r--r--arch/m32r/kernel/process.c4
-rw-r--r--arch/m68k/emu/nfcon.c1
-rw-r--r--arch/m68k/kernel/process_mm.c4
-rw-r--r--arch/m68k/kernel/process_no.c4
-rw-r--r--arch/microblaze/kernel/process.c4
-rw-r--r--arch/mips/ath79/dev-usb.c31
-rw-r--r--arch/mips/include/asm/jump_label.h2
-rw-r--r--arch/mips/kernel/perf_event_mipsxx.c4
-rw-r--r--arch/mips/kernel/process.c4
-rw-r--r--arch/mn10300/kernel/process.c4
-rw-r--r--arch/parisc/kernel/pdc_cons.c59
-rw-r--r--arch/parisc/kernel/process.c4
-rw-r--r--arch/powerpc/include/asm/jump_label.h2
-rw-r--r--arch/powerpc/include/asm/perf_event_server.h2
-rw-r--r--arch/powerpc/kernel/idle.c8
-rw-r--r--arch/powerpc/kernel/perf_event.c10
-rw-r--r--arch/powerpc/platforms/iseries/setup.c8
-rw-r--r--arch/s390/include/asm/jump_label.h2
-rw-r--r--arch/s390/include/asm/perf_event.h1
-rw-r--r--arch/s390/kernel/irq.c9
-rw-r--r--arch/s390/kernel/process.c4
-rw-r--r--arch/s390/kernel/smp.c6
-rw-r--r--arch/score/kernel/process.c4
-rw-r--r--arch/sh/kernel/idle.c4
-rw-r--r--arch/sh/kernel/perf_event.c4
-rw-r--r--arch/sparc/include/asm/jump_label.h2
-rw-r--r--arch/sparc/kernel/perf_event.c4
-rw-r--r--arch/sparc/kernel/process_32.c8
-rw-r--r--arch/sparc/kernel/process_64.c10
-rw-r--r--arch/tile/kernel/process.c4
-rw-r--r--arch/x86/Kconfig4
-rw-r--r--arch/x86/crypto/aesni-intel_glue.c12
-rw-r--r--arch/x86/crypto/crc32c-intel.c11
-rw-r--r--arch/x86/crypto/ghash-clmulni-intel_glue.c12
-rw-r--r--arch/x86/include/asm/cpu_device_id.h13
-rw-r--r--arch/x86/include/asm/cpufeature.h1
-rw-r--r--arch/x86/include/asm/hardirq.h1
-rw-r--r--arch/x86/include/asm/inat.h5
-rw-r--r--arch/x86/include/asm/insn.h18
-rw-r--r--arch/x86/include/asm/jump_label.h6
-rw-r--r--arch/x86/include/asm/msr-index.h7
-rw-r--r--arch/x86/include/asm/paravirt.h6
-rw-r--r--arch/x86/include/asm/perf_event.h2
-rw-r--r--arch/x86/include/asm/timer.h8
-rw-r--r--arch/x86/kernel/Makefile1
-rw-r--r--arch/x86/kernel/cpu/Makefile1
-rw-r--r--arch/x86/kernel/cpu/amd.c3
-rw-r--r--arch/x86/kernel/cpu/match.c91
-rw-r--r--arch/x86/kernel/cpu/perf_event.c167
-rw-r--r--arch/x86/kernel/cpu/perf_event.h50
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd.c3
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c141
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c22
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_lbr.c526
-rw-r--r--arch/x86/kernel/cpu/scattered.c1
-rw-r--r--arch/x86/kernel/irq_32.c11
-rw-r--r--arch/x86/kernel/kprobes-common.h102
-rw-r--r--arch/x86/kernel/kprobes-opt.c512
-rw-r--r--arch/x86/kernel/kprobes.c664
-rw-r--r--arch/x86/kernel/kvm.c4
-rw-r--r--arch/x86/kernel/microcode_core.c15
-rw-r--r--arch/x86/kernel/paravirt.c4
-rw-r--r--arch/x86/kernel/process.c24
-rw-r--r--arch/x86/kernel/process_32.c4
-rw-r--r--arch/x86/kernel/process_64.c4
-rw-r--r--arch/x86/kernel/smpboot.c13
-rw-r--r--arch/x86/kernel/time.c3
-rw-r--r--arch/x86/kernel/tsc.c3
-rw-r--r--arch/x86/kernel/tsc_sync.c29
-rw-r--r--arch/x86/kvm/mmu_audit.c8
-rw-r--r--arch/x86/lib/inat.c36
-rw-r--r--arch/x86/lib/insn.c13
-rw-r--r--arch/xtensa/kernel/process.c4
-rw-r--r--arch/xtensa/platforms/iss/console.c22
106 files changed, 2193 insertions, 1656 deletions
diff --git a/arch/Kconfig b/arch/Kconfig
index 4f55c73..5b448a7 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -47,18 +47,29 @@ config KPROBES
If in doubt, say "N".
config JUMP_LABEL
- bool "Optimize trace point call sites"
+ bool "Optimize very unlikely/likely branches"
depends on HAVE_ARCH_JUMP_LABEL
help
+ This option enables a transparent branch optimization that
+ makes certain almost-always-true or almost-always-false branch
+ conditions even cheaper to execute within the kernel.
+
+ Certain performance-sensitive kernel code, such as trace points,
+ scheduler functionality, networking code and KVM have such
+ branches and include support for this optimization technique.
+
If it is detected that the compiler has support for "asm goto",
- the kernel will compile trace point locations with just a
- nop instruction. When trace points are enabled, the nop will
- be converted to a jump to the trace function. This technique
- lowers overhead and stress on the branch prediction of the
- processor.
-
- On i386, options added to the compiler flags may increase
- the size of the kernel slightly.
+ the kernel will compile such branches with just a nop
+ instruction. When the condition flag is toggled to true, the
+ nop will be converted to a jump instruction to execute the
+ conditional block of instructions.
+
+ This technique lowers overhead and stress on the branch prediction
+ of the processor and generally makes the kernel faster. The update
+ of the condition is slower, but those are always very rare.
+
+ ( On 32-bit x86, the necessary options added to the compiler
+ flags may increase the size of the kernel slightly. )
config OPTPROBES
def_bool y
diff --git a/arch/alpha/kernel/perf_event.c b/arch/alpha/kernel/perf_event.c
index 8143cd7..0dae252 100644
--- a/arch/alpha/kernel/perf_event.c
+++ b/arch/alpha/kernel/perf_event.c
@@ -685,6 +685,10 @@ static int alpha_pmu_event_init(struct perf_event *event)
{
int err;
+ /* does not support taken branch sampling */
+ if (has_branch_stack(event))
+ return -EOPNOTSUPP;
+
switch (event->attr.type) {
case PERF_TYPE_RAW:
case PERF_TYPE_HARDWARE:
diff --git a/arch/alpha/kernel/srmcons.c b/arch/alpha/kernel/srmcons.c
index 783f4e5..3ea8094 100644
--- a/arch/alpha/kernel/srmcons.c
+++ b/arch/alpha/kernel/srmcons.c
@@ -30,10 +30,9 @@ static int srm_is_registered_console = 0;
#define MAX_SRM_CONSOLE_DEVICES 1 /* only support 1 console device */
struct srmcons_private {
- struct tty_struct *tty;
+ struct tty_port port;
struct timer_list timer;
- spinlock_t lock;
-};
+} srmcons_singleton;
typedef union _srmcons_result {
struct {
@@ -68,22 +67,21 @@ static void
srmcons_receive_chars(unsigned long data)
{
struct srmcons_private *srmconsp = (struct srmcons_private *)data;
+ struct tty_port *port = &srmconsp->port;
unsigned long flags;
int incr = 10;
local_irq_save(flags);
if (spin_trylock(&srmcons_callback_lock)) {
- if (!srmcons_do_receive_chars(srmconsp->tty))
+ if (!srmcons_do_receive_chars(port->tty))
incr = 100;
spin_unlock(&srmcons_callback_lock);
}
- spin_lock(&srmconsp->lock);
- if (srmconsp->tty) {
- srmconsp->timer.expires = jiffies + incr;
- add_timer(&srmconsp->timer);
- }
- spin_unlock(&srmconsp->lock);
+ spin_lock(&port->lock);
+ if (port->tty)
+ mod_timer(&srmconsp->timer, jiffies + incr);
+ spin_unlock(&port->lock);
local_irq_restore(flags);
}
@@ -156,56 +154,22 @@ srmcons_chars_in_buffer(struct tty_struct *tty)
}
static int
-srmcons_get_private_struct(struct srmcons_private **ps)
-{
- static struct srmcons_private *srmconsp = NULL;
- static DEFINE_SPINLOCK(srmconsp_lock);
- unsigned long flags;
- int retval = 0;
-
- if (srmconsp == NULL) {
- srmconsp = kmalloc(sizeof(*srmconsp), GFP_KERNEL);
- spin_lock_irqsave(&srmconsp_lock, flags);
-
- if (srmconsp == NULL)
- retval = -ENOMEM;
- else {
- srmconsp->tty = NULL;
- spin_lock_init(&srmconsp->lock);
- init_timer(&srmconsp->timer);
- }
-
- spin_unlock_irqrestore(&srmconsp_lock, flags);
- }
-
- *ps = srmconsp;
- return retval;
-}
-
-static int
srmcons_open(struct tty_struct *tty, struct file *filp)
{
- struct srmcons_private *srmconsp;
+ struct srmcons_private *srmconsp = &srmcons_singleton;
+ struct tty_port *port = &srmconsp->port;
unsigned long flags;
- int retval;
-
- retval = srmcons_get_private_struct(&srmconsp);
- if (retval)
- return retval;
- spin_lock_irqsave(&srmconsp->lock, flags);
+ spin_lock_irqsave(&port->lock, flags);
- if (!srmconsp->tty) {
+ if (!port->tty) {
tty->driver_data = srmconsp;
-
- srmconsp->tty = tty;
- srmconsp->timer.function = srmcons_receive_chars;
- srmconsp->timer.data = (unsigned long)srmconsp;
- srmconsp->timer.expires = jiffies + 10;
- add_timer(&srmconsp->timer);
+ tty->port = port;
+ port->tty = tty; /* XXX proper refcounting */
+ mod_timer(&srmconsp->timer, jiffies + 10);
}
- spin_unlock_irqrestore(&srmconsp->lock, flags);
+ spin_unlock_irqrestore(&port->lock, flags);
return 0;
}
@@ -214,16 +178,17 @@ static void
srmcons_close(struct tty_struct *tty, struct file *filp)
{
struct srmcons_private *srmconsp = tty->driver_data;
+ struct tty_port *port = &srmconsp->port;
unsigned long flags;
- spin_lock_irqsave(&srmconsp->lock, flags);
+ spin_lock_irqsave(&port->lock, flags);
if (tty->count == 1) {
- srmconsp->tty = NULL;
+ port->tty = NULL;
del_timer(&srmconsp->timer);
}
- spin_unlock_irqrestore(&srmconsp->lock, flags);
+ spin_unlock_irqrestore(&port->lock, flags);
}
@@ -240,6 +205,9 @@ static const struct tty_operations srmcons_ops = {
static int __init
srmcons_init(void)
{
+ tty_port_init(&srmcons_singleton.port);
+ setup_timer(&srmcons_singleton.timer, srmcons_receive_chars,
+ (unsigned long)&srmcons_singleton);
if (srm_is_registered_console) {
struct tty_driver *driver;
int err;
diff --git a/arch/arm/include/asm/perf_event.h b/arch/arm/include/asm/perf_event.h
index 99cfe36..7523340 100644
--- a/arch/arm/include/asm/perf_event.h
+++ b/arch/arm/include/asm/perf_event.h
@@ -12,10 +12,6 @@
#ifndef __ARM_PERF_EVENT_H__
#define __ARM_PERF_EVENT_H__
-/* ARM performance counters start from 1 (in the cp15 accesses) so use the
- * same indexes here for consistency. */
-#define PERF_EVENT_INDEX_OFFSET 1
-
/* ARM perf PMU IDs for use by internal perf clients. */
enum arm_perf_pmu_ids {
ARM_PERF_PMU_ID_XSCALE1 = 0,
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index b2abfa1..8a89d3b 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -539,6 +539,10 @@ static int armpmu_event_init(struct perf_event *event)
int err = 0;
atomic_t *active_events = &armpmu->active_events;
+ /* does not support taken branch sampling */
+ if (has_branch_stack(event))
+ return -EOPNOTSUPP;
+
if (armpmu->map_event(event) == -ENOENT)
return -ENOENT;
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index 971d65c..c2ae3cd 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -239,9 +239,7 @@ void cpu_idle(void)
leds_event(led_idle_end);
rcu_idle_exit();
tick_nohz_idle_exit();
- preempt_enable_no_resched();
- schedule();
- preempt_disable();
+ schedule_preempt_disabled();
}
}
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index cdeb727..d616ed5 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -295,13 +295,6 @@ asmlinkage void __cpuinit secondary_start_kernel(void)
*/
percpu_timer_setup();
- while (!cpu_active(cpu))
- cpu_relax();
-
- /*
- * cpu_active bit is set, so it's safe to enalbe interrupts
- * now.
- */
local_irq_enable();
local_fiq_enable();
diff --git a/arch/arm/mach-imx/mx31moboard-devboard.c b/arch/arm/mach-imx/mx31moboard-devboard.c
index 0aa2536..cc285e5 100644
--- a/arch/arm/mach-imx/mx31moboard-devboard.c
+++ b/arch/arm/mach-imx/mx31moboard-devboard.c
@@ -158,7 +158,7 @@ static int devboard_usbh1_hw_init(struct platform_device *pdev)
#define USBH1_VBUSEN_B IOMUX_TO_GPIO(MX31_PIN_NFRE_B)
#define USBH1_MODE IOMUX_TO_GPIO(MX31_PIN_NFALE)
-static int devboard_isp1105_init(struct otg_transceiver *otg)
+static int devboard_isp1105_init(struct usb_phy *otg)
{
int ret = gpio_request(USBH1_MODE, "usbh1-mode");
if (ret)
@@ -177,7 +177,7 @@ static int devboard_isp1105_init(struct otg_transceiver *otg)
}
-static int devboard_isp1105_set_vbus(struct otg_transceiver *otg, bool on)
+static int devboard_isp1105_set_vbus(struct usb_otg *otg, bool on)
{
if (on)
gpio_set_value(USBH1_VBUSEN_B, 0);
@@ -194,18 +194,24 @@ static struct mxc_usbh_platform_data usbh1_pdata __initdata = {
static int __init devboard_usbh1_init(void)
{
- struct otg_transceiver *otg;
+ struct usb_phy *phy;
struct platform_device *pdev;
- otg = kzalloc(sizeof(*otg), GFP_KERNEL);
- if (!otg)
+ phy = kzalloc(sizeof(*phy), GFP_KERNEL);
+ if (!phy)
return -ENOMEM;
- otg->label = "ISP1105";
- otg->init = devboard_isp1105_init;
- otg->set_vbus = devboard_isp1105_set_vbus;
+ phy->otg = kzalloc(sizeof(struct usb_otg), GFP_KERNEL);
+ if (!phy->otg) {
+ kfree(phy);
+ return -ENOMEM;
+ }
+
+ phy->label = "ISP1105";
+ phy->init = devboard_isp1105_init;
+ phy->otg->set_vbus = devboard_isp1105_set_vbus;
- usbh1_pdata.otg = otg;
+ usbh1_pdata.otg = phy;
pdev = imx31_add_mxc_ehci_hs(1, &usbh1_pdata);
if (IS_ERR(pdev))
diff --git a/arch/arm/mach-imx/mx31moboard-marxbot.c b/arch/arm/mach-imx/mx31moboard-marxbot.c
index bb639cb..135c90e 100644
--- a/arch/arm/mach-imx/mx31moboard-marxbot.c
+++ b/arch/arm/mach-imx/mx31moboard-marxbot.c
@@ -272,7 +272,7 @@ static int marxbot_usbh1_hw_init(struct platform_device *pdev)
#define USBH1_VBUSEN_B IOMUX_TO_GPIO(MX31_PIN_NFRE_B)
#define USBH1_MODE IOMUX_TO_GPIO(MX31_PIN_NFALE)
-static int marxbot_isp1105_init(struct otg_transceiver *otg)
+static int marxbot_isp1105_init(struct usb_phy *otg)
{
int ret = gpio_request(USBH1_MODE, "usbh1-mode");
if (ret)
@@ -291,7 +291,7 @@ static int marxbot_isp1105_init(struct otg_transceiver *otg)
}
-static int marxbot_isp1105_set_vbus(struct otg_transceiver *otg, bool on)
+static int marxbot_isp1105_set_vbus(struct usb_otg *otg, bool on)
{
if (on)
gpio_set_value(USBH1_VBUSEN_B, 0);
@@ -308,18 +308,24 @@ static struct mxc_usbh_platform_data usbh1_pdata __initdata = {
static int __init marxbot_usbh1_init(void)
{
- struct otg_transceiver *otg;
+ struct usb_phy *phy;
struct platform_device *pdev;
- otg = kzalloc(sizeof(*otg), GFP_KERNEL);
- if (!otg)
+ phy = kzalloc(sizeof(*phy), GFP_KERNEL);
+ if (!phy)
return -ENOMEM;
- otg->label = "ISP1105";
- otg->init = marxbot_isp1105_init;
- otg->set_vbus = marxbot_isp1105_set_vbus;
+ phy->otg = kzalloc(sizeof(struct usb_otg), GFP_KERNEL);
+ if (!phy->otg) {
+ kfree(phy);
+ return -ENOMEM;
+ }
+
+ phy->label = "ISP1105";
+ phy->init = marxbot_isp1105_init;
+ phy->otg->set_vbus = marxbot_isp1105_set_vbus;
- usbh1_pdata.otg = otg;
+ usbh1_pdata.otg = phy;
pdev = imx31_add_mxc_ehci_hs(1, &usbh1_pdata);
if (IS_ERR(pdev))
diff --git a/arch/arm/mach-pxa/pxa3xx-ulpi.c b/arch/arm/mach-pxa/pxa3xx-ulpi.c
index e28dfb88..5ead6d4 100644
--- a/arch/arm/mach-pxa/pxa3xx-ulpi.c
+++ b/arch/arm/mach-pxa/pxa3xx-ulpi.c
@@ -33,7 +33,7 @@ struct pxa3xx_u2d_ulpi {
struct clk *clk;
void __iomem *mmio_base;
- struct otg_transceiver *otg;
+ struct usb_phy *otg;
unsigned int ulpi_mode;
};
@@ -79,7 +79,7 @@ static int pxa310_ulpi_poll(void)
return -ETIMEDOUT;
}
-static int pxa310_ulpi_read(struct otg_transceiver *otg, u32 reg)
+static int pxa310_ulpi_read(struct usb_phy *otg, u32 reg)
{
int err;
@@ -98,7 +98,7 @@ static int pxa310_ulpi_read(struct otg_transceiver *otg, u32 reg)
return u2d_readl(U2DOTGUCR) & U2DOTGUCR_RDATA;
}
-static int pxa310_ulpi_write(struct otg_transceiver *otg, u32 val, u32 reg)
+static int pxa310_ulpi_write(struct usb_phy *otg, u32 val, u32 reg)
{
if (pxa310_ulpi_get_phymode() != SYNCH) {
pr_warning("%s: PHY is not in SYNCH mode!\n", __func__);
@@ -111,7 +111,7 @@ static int pxa310_ulpi_write(struct otg_transceiver *otg, u32 val, u32 reg)
return pxa310_ulpi_poll();
}
-struct otg_io_access_ops pxa310_ulpi_access_ops = {
+struct usb_phy_io_ops pxa310_ulpi_access_ops = {
.read = pxa310_ulpi_read,
.write = pxa310_ulpi_write,
};
@@ -139,19 +139,19 @@ static int pxa310_start_otg_host_transcvr(struct usb_bus *host)
pxa310_otg_transceiver_rtsm();
- err = otg_init(u2d->otg);
+ err = usb_phy_init(u2d->otg);
if (err) {
pr_err("OTG transceiver init failed");
return err;
}
- err = otg_set_vbus(u2d->otg, 1);
+ err = otg_set_vbus(u2d->otg->otg, 1);
if (err) {
pr_err("OTG transceiver VBUS set failed");
return err;
}
- err = otg_set_host(u2d->otg, host);
+ err = otg_set_host(u2d->otg->otg, host);
if (err)
pr_err("OTG transceiver Host mode set failed");
@@ -189,9 +189,9 @@ static void pxa310_stop_otg_hc(void)
{
pxa310_otg_transceiver_rtsm();
- otg_set_host(u2d->otg, NULL);
- otg_set_vbus(u2d->otg, 0);
- otg_shutdown(u2d->otg);
+ otg_set_host(u2d->otg->otg, NULL);
+ otg_set_vbus(u2d->otg->otg, 0);
+ usb_phy_shutdown(u2d->otg);
}
static void pxa310_u2d_setup_otg_hc(void)
diff --git a/arch/arm/mach-tegra/include/mach/usb_phy.h b/arch/arm/mach-tegra/include/mach/usb_phy.h
index d4b8f9e..de1a0f6 100644
--- a/arch/arm/mach-tegra/include/mach/usb_phy.h
+++ b/arch/arm/mach-tegra/include/mach/usb_phy.h
@@ -58,7 +58,7 @@ struct tegra_usb_phy {
struct clk *pad_clk;
enum tegra_usb_phy_mode mode;
void *config;
- struct otg_transceiver *ulpi;
+ struct usb_phy *ulpi;
};
struct tegra_usb_phy *tegra_usb_phy_open(int instance, void __iomem *regs,
diff --git a/arch/arm/mach-tegra/usb_phy.c b/arch/arm/mach-tegra/usb_phy.c
index 37576a7..ad321f9 100644
--- a/arch/arm/mach-tegra/usb_phy.c
+++ b/arch/arm/mach-tegra/usb_phy.c
@@ -608,13 +608,13 @@ static int ulpi_phy_power_on(struct tegra_usb_phy *phy)
writel(val, base + ULPI_TIMING_CTRL_1);
/* Fix VbusInvalid due to floating VBUS */
- ret = otg_io_write(phy->ulpi, 0x40, 0x08);
+ ret = usb_phy_io_write(phy->ulpi, 0x40, 0x08);
if (ret) {
pr_err("%s: ulpi write failed\n", __func__);
return ret;
}
- ret = otg_io_write(phy->ulpi, 0x80, 0x0B);
+ ret = usb_phy_io_write(phy->ulpi, 0x80, 0x0B);
if (ret) {
pr_err("%s: ulpi write failed\n", __func__);
return ret;
diff --git a/arch/arm/plat-mxc/include/mach/mxc_ehci.h b/arch/arm/plat-mxc/include/mach/mxc_ehci.h
index 2c159dc..9ffd1bb 100644
--- a/arch/arm/plat-mxc/include/mach/mxc_ehci.h
+++ b/arch/arm/plat-mxc/include/mach/mxc_ehci.h
@@ -44,7 +44,7 @@ struct mxc_usbh_platform_data {
int (*exit)(struct platform_device *pdev);
unsigned int portsc;
- struct otg_transceiver *otg;
+ struct usb_phy *otg;
};
int mx51_initialize_usb_hw(int port, unsigned int flags);
diff --git a/arch/arm/plat-mxc/include/mach/ulpi.h b/arch/arm/plat-mxc/include/mach/ulpi.h
index f9161c9..42bdaca 100644
--- a/arch/arm/plat-mxc/include/mach/ulpi.h
+++ b/arch/arm/plat-mxc/include/mach/ulpi.h
@@ -2,15 +2,15 @@
#define __MACH_ULPI_H
#ifdef CONFIG_USB_ULPI
-struct otg_transceiver *imx_otg_ulpi_create(unsigned int flags);
+struct usb_phy *imx_otg_ulpi_create(unsigned int flags);
#else
-static inline struct otg_transceiver *imx_otg_ulpi_create(unsigned int flags)
+static inline struct usb_phy *imx_otg_ulpi_create(unsigned int flags)
{
return NULL;
}
#endif
-extern struct otg_io_access_ops mxc_ulpi_access_ops;
+extern struct usb_phy_io_ops mxc_ulpi_access_ops;
#endif /* __MACH_ULPI_H */
diff --git a/arch/arm/plat-mxc/ulpi.c b/arch/arm/plat-mxc/ulpi.c
index 477e45b..d296342 100644
--- a/arch/arm/plat-mxc/ulpi.c
+++ b/arch/arm/plat-mxc/ulpi.c
@@ -58,7 +58,7 @@ static int ulpi_poll(void __iomem *view, u32 bit)
return -ETIMEDOUT;
}
-static int ulpi_read(struct otg_transceiver *otg, u32 reg)
+static int ulpi_read(struct usb_phy *otg, u32 reg)
{
int ret;
void __iomem *view = otg->io_priv;
@@ -84,7 +84,7 @@ static int ulpi_read(struct otg_transceiver *otg, u32 reg)
return (__raw_readl(view) >> ULPIVW_RDATA_SHIFT) & ULPIVW_RDATA_MASK;
}
-static int ulpi_write(struct otg_transceiver *otg, u32 val, u32 reg)
+static int ulpi_write(struct usb_phy *otg, u32 val, u32 reg)
{
int ret;
void __iomem *view = otg->io_priv;
@@ -106,13 +106,13 @@ static int ulpi_write(struct otg_transceiver *otg, u32 val, u32 reg)
return ulpi_poll(view, ULPIVW_RUN);
}
-struct otg_io_access_ops mxc_ulpi_access_ops = {
+struct usb_phy_io_ops mxc_ulpi_access_ops = {
.read = ulpi_read,
.write = ulpi_write,
};
EXPORT_SYMBOL_GPL(mxc_ulpi_access_ops);
-struct otg_transceiver *imx_otg_ulpi_create(unsigned int flags)
+struct usb_phy *imx_otg_ulpi_create(unsigned int flags)
{
return otg_ulpi_create(&mxc_ulpi_access_ops, flags);
}
diff --git a/arch/avr32/kernel/process.c b/arch/avr32/kernel/process.c
index ea33957..92c5af9 100644
--- a/arch/avr32/kernel/process.c
+++ b/arch/avr32/kernel/process.c
@@ -40,9 +40,7 @@ void cpu_idle(void)
cpu_idle_sleep();
rcu_idle_exit();
tick_nohz_idle_exit();
- preempt_enable_no_resched();
- schedule();
- preempt_disable();
+ schedule_preempt_disabled();
}
}
diff --git a/arch/blackfin/kernel/process.c b/arch/blackfin/kernel/process.c
index 8dd0416..a80a643 100644
--- a/arch/blackfin/kernel/process.c
+++ b/arch/blackfin/kernel/process.c
@@ -94,9 +94,7 @@ void cpu_idle(void)
idle();
rcu_idle_exit();
tick_nohz_idle_exit();
- preempt_enable_no_resched();
- schedule();
- preempt_disable();
+ schedule_preempt_disabled();
}
}
diff --git a/arch/cris/kernel/process.c b/arch/cris/kernel/process.c
index aa585e4..d8f50ff 100644
--- a/arch/cris/kernel/process.c
+++ b/arch/cris/kernel/process.c
@@ -115,9 +115,7 @@ void cpu_idle (void)
idle = default_idle;
idle();
}
- preempt_enable_no_resched();
- schedule();
- preempt_disable();
+ schedule_preempt_disabled();
}
}
diff --git a/arch/frv/include/asm/perf_event.h b/arch/frv/include/asm/perf_event.h
index a69e015..c52ea55 100644
--- a/arch/frv/include/asm/perf_event.h
+++ b/arch/frv/include/asm/perf_event.h
@@ -12,6 +12,4 @@
#ifndef _ASM_PERF_EVENT_H
#define _ASM_PERF_EVENT_H
-#define PERF_EVENT_INDEX_OFFSET 0
-
#endif /* _ASM_PERF_EVENT_H */
diff --git a/arch/frv/kernel/process.c b/arch/frv/kernel/process.c
index 3901df1..29cc497 100644
--- a/arch/frv/kernel/process.c
+++ b/arch/frv/kernel/process.c
@@ -92,9 +92,7 @@ void cpu_idle(void)
idle();
}
- preempt_enable_no_resched();
- schedule();
- preempt_disable();
+ schedule_preempt_disabled();
}
}
diff --git a/arch/h8300/kernel/process.c b/arch/h8300/kernel/process.c
index 933bd38..1a173b3 100644
--- a/arch/h8300/kernel/process.c
+++ b/arch/h8300/kernel/process.c
@@ -81,9 +81,7 @@ void cpu_idle(void)
while (1) {
while (!need_resched())
idle();
- preempt_enable_no_resched();
- schedule();
- preempt_disable();
+ schedule_preempt_disabled();
}
}
diff --git a/arch/hexagon/include/asm/perf_event.h b/arch/hexagon/include/asm/perf_event.h
index 6c2910f..8b8526b 100644
--- a/arch/hexagon/include/asm/perf_event.h
+++ b/arch/hexagon/include/asm/perf_event.h
@@ -19,6 +19,4 @@
#ifndef _ASM_PERF_EVENT_H
#define _ASM_PERF_EVENT_H
-#define PERF_EVENT_INDEX_OFFSET 0
-
#endif /* _ASM_PERF_EVENT_H */
diff --git a/arch/hexagon/kernel/smp.c b/arch/hexagon/kernel/smp.c
index c871a2c..0123c63 100644
--- a/arch/hexagon/kernel/smp.c
+++ b/arch/hexagon/kernel/smp.c
@@ -179,8 +179,6 @@ void __cpuinit start_secondary(void)
printk(KERN_INFO "%s cpu %d\n", __func__, current_thread_info()->cpu);
set_cpu_online(cpu, true);
- while (!cpumask_test_cpu(cpu, cpu_active_mask))
- cpu_relax();
local_irq_enable();
cpu_idle();
diff --git a/arch/ia64/hp/sim/boot/fw-emu.c b/arch/ia64/hp/sim/boot/fw-emu.c
index bf6d9d8..0216e28 100644
--- a/arch/ia64/hp/sim/boot/fw-emu.c
+++ b/arch/ia64/hp/sim/boot/fw-emu.c
@@ -160,28 +160,19 @@ sal_emulator (long index, unsigned long in1, unsigned long in2,
*/
status = 0;
if (index == SAL_FREQ_BASE) {
- switch (in1) {
- case SAL_FREQ_BASE_PLATFORM:
+ if (in1 == SAL_FREQ_BASE_PLATFORM)
r9 = 200000000;
- break;
-
- case SAL_FREQ_BASE_INTERVAL_TIMER:
+ else if (in1 == SAL_FREQ_BASE_INTERVAL_TIMER) {
/*
* Is this supposed to be the cr.itc frequency
* or something platform specific? The SAL
* doc ain't exactly clear on this...
*/
r9 = 700000000;
- break;
-
- case SAL_FREQ_BASE_REALTIME_CLOCK:
+ } else if (in1 == SAL_FREQ_BASE_REALTIME_CLOCK)
r9 = 1;
- break;
-
- default:
+ else
status = -1;
- break;
- }
} else if (index == SAL_SET_VECTORS) {
;
} else if (index == SAL_GET_STATE_INFO) {
diff --git a/arch/ia64/hp/sim/hpsim_irq.c b/arch/ia64/hp/sim/hpsim_irq.c
index 4bd9a63..0aa70eb 100644
--- a/arch/ia64/hp/sim/hpsim_irq.c
+++ b/arch/ia64/hp/sim/hpsim_irq.c
@@ -10,6 +10,8 @@
#include <linux/sched.h>
#include <linux/irq.h>
+#include "hpsim_ssc.h"
+
static unsigned int
hpsim_irq_startup(struct irq_data *data)
{
@@ -37,15 +39,37 @@ static struct irq_chip irq_type_hp_sim = {
.irq_set_affinity = hpsim_set_affinity_noop,
};
+static void hpsim_irq_set_chip(int irq)
+{
+ struct irq_chip *chip = irq_get_chip(irq);
+
+ if (chip == &no_irq_chip)
+ irq_set_chip(irq, &irq_type_hp_sim);
+}
+
+static void hpsim_connect_irq(int intr, int irq)
+{
+ ia64_ssc(intr, irq, 0, 0, SSC_CONNECT_INTERRUPT);
+}
+
+int hpsim_get_irq(int intr)
+{
+ int irq = assign_irq_vector(AUTO_ASSIGN);
+
+ if (irq >= 0) {
+ hpsim_irq_set_chip(irq);
+ irq_set_handler(irq, handle_simple_irq);
+ hpsim_connect_irq(intr, irq);
+ }
+
+ return irq;
+}
+
void __init
hpsim_irq_init (void)
{
int i;
- for_each_active_irq(i) {
- struct irq_chip *chip = irq_get_chip(i);
-
- if (chip == &no_irq_chip)
- irq_set_chip(i, &irq_type_hp_sim);
- }
+ for_each_active_irq(i)
+ hpsim_irq_set_chip(i);
}
diff --git a/arch/ia64/hp/sim/hpsim_setup.c b/arch/ia64/hp/sim/hpsim_setup.c
index f629e90..664a540 100644
--- a/arch/ia64/hp/sim/hpsim_setup.c
+++ b/arch/ia64/hp/sim/hpsim_setup.c
@@ -26,12 +26,6 @@
#include "hpsim_ssc.h"
void
-ia64_ssc_connect_irq (long intr, long irq)
-{
- ia64_ssc(intr, irq, 0, 0, SSC_CONNECT_INTERRUPT);
-}
-
-void
ia64_ctl_trace (long on)
{
ia64_ssc(on, 0, 0, 0, SSC_CTL_TRACE);
diff --git a/arch/ia64/hp/sim/simeth.c b/arch/ia64/hp/sim/simeth.c
index 440001e..a63218e 100644
--- a/arch/ia64/hp/sim/simeth.c
+++ b/arch/ia64/hp/sim/simeth.c
@@ -129,17 +129,6 @@ netdev_probe(char *name, unsigned char *ether)
static inline int
-netdev_connect(int irq)
-{
- /* XXX Fix me
- * this does not support multiple cards
- * also no return value
- */
- ia64_ssc_connect_irq(NETWORK_INTR, irq);
- return 0;
-}
-
-static inline int
netdev_attach(int fd, int irq, unsigned int ipaddr)
{
/* this puts the host interface in the right mode (start interrupting) */
@@ -226,15 +215,13 @@ simeth_probe1(void)
return err;
}
- if ((rc = assign_irq_vector(AUTO_ASSIGN)) < 0)
- panic("%s: out of interrupt vectors!\n", __func__);
- dev->irq = rc;
-
/*
* attach the interrupt in the simulator, this does enable interrupts
* until a netdev_attach() is called
*/
- netdev_connect(dev->irq);
+ if ((rc = hpsim_get_irq(NETWORK_INTR)) < 0)
+ panic("%s: out of interrupt vectors!\n", __func__);
+ dev->irq = rc;
printk(KERN_INFO "%s: hosteth=%s simfd=%d, HwAddr=%pm, IRQ %d\n",
dev->name, simeth_device, local->simfd, dev->dev_addr, dev->irq);
diff --git a/arch/ia64/hp/sim/simserial.c b/arch/ia64/hp/sim/simserial.c
index bff0824..c34785d 100644
--- a/arch/ia64/hp/sim/simserial.c
+++ b/arch/ia64/hp/sim/simserial.c
@@ -4,16 +4,11 @@
* This driver is mostly used for bringup purposes and will go away.
* It has a strong dependency on the system console. All outputs
* are rerouted to the same facility as the one used by printk which, in our
- * case means sys_sim.c console (goes via the simulator). The code hereafter
- * is completely leveraged from the serial.c driver.
+ * case means sys_sim.c console (goes via the simulator).
*
* Copyright (C) 1999-2000, 2002-2003 Hewlett-Packard Co
* Stephane Eranian <eranian@hpl.hp.com>
* David Mosberger-Tang <davidm@hpl.hp.com>
- *
- * 02/04/00 D. Mosberger Merged in serial.c bug fixes in rs_close().
- * 02/25/00 D. Mosberger Synced up with 2.3.99pre-5 version of serial.c.
- * 07/30/02 D. Mosberger Replace sti()/cli() with explicit spinlocks & local irq masking
*/
#include <linux/init.h>
@@ -27,15 +22,17 @@
#include <linux/seq_file.h>
#include <linux/slab.h>
#include <linux/capability.h>
+#include <linux/circ_buf.h>
#include <linux/console.h>
+#include <linux/irq.h>
#include <linux/module.h>
#include <linux/serial.h>
-#include <linux/serialP.h>
#include <linux/sysrq.h>
+#include <linux/uaccess.h>
+
+#include <asm/hpsim.h>
-#include <asm/irq.h>
-#include <asm/hw_irq.h>
-#include <asm/uaccess.h>
+#include "hpsim_ssc.h"
#undef SIMSERIAL_DEBUG /* define this to get some debug information */
@@ -43,118 +40,44 @@
#define NR_PORTS 1 /* only one port for now */
-#define IRQ_T(info) ((info->flags & ASYNC_SHARE_IRQ) ? IRQF_SHARED : IRQF_DISABLED)
-
-#define SSC_GETCHAR 21
-
-extern long ia64_ssc (long, long, long, long, int);
-extern void ia64_ssc_connect_irq (long intr, long irq);
-
-static char *serial_name = "SimSerial driver";
-static char *serial_version = "0.6";
-
-/*
- * This has been extracted from asm/serial.h. We need one eventually but
- * I don't know exactly what we're going to put in it so just fake one
- * for now.
- */
-#define BASE_BAUD ( 1843200 / 16 )
-
-#define STD_COM_FLAGS (ASYNC_BOOT_AUTOCONF | ASYNC_SKIP_TEST)
-
-/*
- * Most of the values here are meaningless to this particular driver.
- * However some values must be preserved for the code (leveraged from serial.c
- * to work correctly).
- * port must not be 0
- * type must not be UNKNOWN
- * So I picked arbitrary (guess from where?) values instead
- */
-static struct serial_state rs_table[NR_PORTS]={
- /* UART CLK PORT IRQ FLAGS */
- { 0, BASE_BAUD, 0x3F8, 0, STD_COM_FLAGS,0,PORT_16550 } /* ttyS0 */
+struct serial_state {
+ struct tty_port port;
+ struct circ_buf xmit;
+ int irq;
+ int x_char;
};
-/*
- * Just for the fun of it !
- */
-static struct serial_uart_config uart_config[] = {
- { "unknown", 1, 0 },
- { "8250", 1, 0 },
- { "16450", 1, 0 },
- { "16550", 1, 0 },
- { "16550A", 16, UART_CLEAR_FIFO | UART_USE_FIFO },
- { "cirrus", 1, 0 },
- { "ST16650", 1, UART_CLEAR_FIFO | UART_STARTECH },
- { "ST16650V2", 32, UART_CLEAR_FIFO | UART_USE_FIFO |
- UART_STARTECH },
- { "TI16750", 64, UART_CLEAR_FIFO | UART_USE_FIFO},
- { NULL, 0}
-};
+static struct serial_state rs_table[NR_PORTS];
struct tty_driver *hp_simserial_driver;
-static struct async_struct *IRQ_ports[NR_IRQS];
-
static struct console *console;
-static unsigned char *tmp_buf;
-
-extern struct console *console_drivers; /* from kernel/printk.c */
-
-/*
- * ------------------------------------------------------------
- * rs_stop() and rs_start()
- *
- * This routines are called before setting or resetting tty->stopped.
- * They enable or disable transmitter interrupts, as necessary.
- * ------------------------------------------------------------
- */
-static void rs_stop(struct tty_struct *tty)
-{
-#ifdef SIMSERIAL_DEBUG
- printk("rs_stop: tty->stopped=%d tty->hw_stopped=%d tty->flow_stopped=%d\n",
- tty->stopped, tty->hw_stopped, tty->flow_stopped);
-#endif
-
-}
-
-static void rs_start(struct tty_struct *tty)
-{
-#ifdef SIMSERIAL_DEBUG
- printk("rs_start: tty->stopped=%d tty->hw_stopped=%d tty->flow_stopped=%d\n",
- tty->stopped, tty->hw_stopped, tty->flow_stopped);
-#endif
-}
-
-static void receive_chars(struct tty_struct *tty)
+static void receive_chars(struct tty_struct *tty)
{
unsigned char ch;
static unsigned char seen_esc = 0;
while ( (ch = ia64_ssc(0, 0, 0, 0, SSC_GETCHAR)) ) {
- if ( ch == 27 && seen_esc == 0 ) {
+ if (ch == 27 && seen_esc == 0) {
seen_esc = 1;
continue;
- } else {
- if ( seen_esc==1 && ch == 'O' ) {
- seen_esc = 2;
- continue;
- } else if ( seen_esc == 2 ) {
- if ( ch == 'P' ) /* F1 */
- show_state();
+ } else if (seen_esc == 1 && ch == 'O') {
+ seen_esc = 2;
+ continue;
+ } else if (seen_esc == 2) {
+ if (ch == 'P') /* F1 */
+ show_state();
#ifdef CONFIG_MAGIC_SYSRQ
- if ( ch == 'S' ) { /* F4 */
- do
- ch = ia64_ssc(0, 0, 0, 0,
- SSC_GETCHAR);
- while (!ch);
- handle_sysrq(ch);
- }
-#endif
- seen_esc = 0;
- continue;
+ if (ch == 'S') { /* F4 */
+ do {
+ ch = ia64_ssc(0, 0, 0, 0, SSC_GETCHAR);
+ } while (!ch);
+ handle_sysrq(ch);
}
+#endif
+ seen_esc = 0;
+ continue;
}
seen_esc = 0;
@@ -169,22 +92,19 @@ static void receive_chars(struct tty_struct *tty)
*/
static irqreturn_t rs_interrupt_single(int irq, void *dev_id)
{
- struct async_struct * info;
+ struct serial_state *info = dev_id;
+ struct tty_struct *tty = tty_port_tty_get(&info->port);
- /*
- * I don't know exactly why they don't use the dev_id opaque data
- * pointer instead of this extra lookup table
- */
- info = IRQ_ports[irq];
- if (!info || !info->tty) {
- printk(KERN_INFO "simrs_interrupt_single: info|tty=0 info=%p problem\n", info);
+ if (!tty) {
+ printk(KERN_INFO "%s: tty=0 problem\n", __func__);
return IRQ_NONE;
}
/*
* pretty simple in our case, because we only get interrupts
* on inbound traffic
*/
- receive_chars(info->tty);
+ receive_chars(tty);
+ tty_kref_put(tty);
return IRQ_HANDLED;
}
@@ -194,17 +114,12 @@ static irqreturn_t rs_interrupt_single(int irq, void *dev_id)
* -------------------------------------------------------------------
*/
-static void do_softint(struct work_struct *private_)
-{
- printk(KERN_ERR "simserial: do_softint called\n");
-}
-
static int rs_put_char(struct tty_struct *tty, unsigned char ch)
{
- struct async_struct *info = (struct async_struct *)tty->driver_data;
+ struct serial_state *info = tty->driver_data;
unsigned long flags;
- if (!tty || !info->xmit.buf)
+ if (!info->xmit.buf)
return 0;
local_irq_save(flags);
@@ -218,12 +133,12 @@ static int rs_put_char(struct tty_struct *tty, unsigned char ch)
return 1;
}
-static void transmit_chars(struct async_struct *info, int *intr_done)
+static void transmit_chars(struct tty_struct *tty, struct serial_state *info,
+ int *intr_done)
{
int count;
unsigned long flags;
-
local_irq_save(flags);
if (info->x_char) {
@@ -231,16 +146,16 @@ static void transmit_chars(struct async_struct *info, int *intr_done)
console->write(console, &c, 1);
- info->state->icount.tx++;
info->x_char = 0;
goto out;
}
- if (info->xmit.head == info->xmit.tail || info->tty->stopped || info->tty->hw_stopped) {
+ if (info->xmit.head == info->xmit.tail || tty->stopped ||
+ tty->hw_stopped) {
#ifdef SIMSERIAL_DEBUG
printk("transmit_chars: head=%d, tail=%d, stopped=%d\n",
- info->xmit.head, info->xmit.tail, info->tty->stopped);
+ info->xmit.head, info->xmit.tail, tty->stopped);
#endif
goto out;
}
@@ -272,24 +187,24 @@ out:
static void rs_flush_chars(struct tty_struct *tty)
{
- struct async_struct *info = (struct async_struct *)tty->driver_data;
+ struct serial_state *info = tty->driver_data;
- if (info->xmit.head == info->xmit.tail || tty->stopped || tty->hw_stopped ||
- !info->xmit.buf)
+ if (info->xmit.head == info->xmit.tail || tty->stopped ||
+ tty->hw_stopped || !info->xmit.buf)
return;
- transmit_chars(info, NULL);
+ transmit_chars(tty, info, NULL);
}
-
static int rs_write(struct tty_struct * tty,
const unsigned char *buf, int count)
{
+ struct serial_state *info = tty->driver_data;
int c, ret = 0;
- struct async_struct *info = (struct async_struct *)tty->driver_data;
unsigned long flags;
- if (!tty || !info->xmit.buf || !tmp_buf) return 0;
+ if (!info->xmit.buf)
+ return 0;
local_irq_save(flags);
while (1) {
@@ -310,30 +225,30 @@ static int rs_write(struct tty_struct * tty,
/*
* Hey, we transmit directly from here in our case
*/
- if (CIRC_CNT(info->xmit.head, info->xmit.tail, SERIAL_XMIT_SIZE)
- && !tty->stopped && !tty->hw_stopped) {
- transmit_chars(info, NULL);
- }
+ if (CIRC_CNT(info->xmit.head, info->xmit.tail, SERIAL_XMIT_SIZE) &&
+ !tty->stopped && !tty->hw_stopped)
+ transmit_chars(tty, info, NULL);
+
return ret;
}
static int rs_write_room(struct tty_struct *tty)
{
- struct async_struct *info = (struct async_struct *)tty->driver_data;
+ struct serial_state *info = tty->driver_data;
return CIRC_SPACE(info->xmit.head, info->xmit.tail, SERIAL_XMIT_SIZE);
}
static int rs_chars_in_buffer(struct tty_struct *tty)
{
- struct async_struct *info = (struct async_struct *)tty->driver_data;
+ struct serial_state *info = tty->driver_data;
return CIRC_CNT(info->xmit.head, info->xmit.tail, SERIAL_XMIT_SIZE);
}
static void rs_flush_buffer(struct tty_struct *tty)
{
- struct async_struct *info = (struct async_struct *)tty->driver_data;
+ struct serial_state *info = tty->driver_data;
unsigned long flags;
local_irq_save(flags);
@@ -349,7 +264,7 @@ static void rs_flush_buffer(struct tty_struct *tty)
*/
static void rs_send_xchar(struct tty_struct *tty, char ch)
{
- struct async_struct *info = (struct async_struct *)tty->driver_data;
+ struct serial_state *info = tty->driver_data;
info->x_char = ch;
if (ch) {
@@ -357,7 +272,7 @@ static void rs_send_xchar(struct tty_struct *tty, char ch)
* I guess we could call console->write() directly but
* let's do that for now.
*/
- transmit_chars(info, NULL);
+ transmit_chars(tty, info, NULL);
}
}
@@ -371,14 +286,15 @@ static void rs_send_xchar(struct tty_struct *tty, char ch)
*/
static void rs_throttle(struct tty_struct * tty)
{
- if (I_IXOFF(tty)) rs_send_xchar(tty, STOP_CHAR(tty));
+ if (I_IXOFF(tty))
+ rs_send_xchar(tty, STOP_CHAR(tty));
printk(KERN_INFO "simrs_throttle called\n");
}
static void rs_unthrottle(struct tty_struct * tty)
{
- struct async_struct *info = (struct async_struct *)tty->driver_data;
+ struct serial_state *info = tty->driver_data;
if (I_IXOFF(tty)) {
if (info->x_char)
@@ -389,7 +305,6 @@ static void rs_unthrottle(struct tty_struct * tty)
printk(KERN_INFO "simrs_unthrottle called\n");
}
-
static int rs_ioctl(struct tty_struct *tty, unsigned int cmd, unsigned long arg)
{
if ((cmd != TIOCGSERIAL) && (cmd != TIOCSSERIAL) &&
@@ -400,48 +315,21 @@ static int rs_ioctl(struct tty_struct *tty, unsigned int cmd, unsigned long arg)
}
switch (cmd) {
- case TIOCGSERIAL:
- printk(KERN_INFO "simrs_ioctl TIOCGSERIAL called\n");
- return 0;
- case TIOCSSERIAL:
- printk(KERN_INFO "simrs_ioctl TIOCSSERIAL called\n");
- return 0;
- case TIOCSERCONFIG:
- printk(KERN_INFO "rs_ioctl: TIOCSERCONFIG called\n");
- return -EINVAL;
-
- case TIOCSERGETLSR: /* Get line status register */
- printk(KERN_INFO "rs_ioctl: TIOCSERGETLSR called\n");
- return -EINVAL;
-
- case TIOCSERGSTRUCT:
- printk(KERN_INFO "rs_ioctl: TIOCSERGSTRUCT called\n");
-#if 0
- if (copy_to_user((struct async_struct *) arg,
- info, sizeof(struct async_struct)))
- return -EFAULT;
-#endif
- return 0;
-
- /*
- * Wait for any of the 4 modem inputs (DCD,RI,DSR,CTS) to change
- * - mask passed in arg for lines of interest
- * (use |'ed TIOCM_RNG/DSR/CD/CTS for masking)
- * Caller should use TIOCGICOUNT to see which one it was
- */
- case TIOCMIWAIT:
- printk(KERN_INFO "rs_ioctl: TIOCMIWAIT: called\n");
- return 0;
- case TIOCSERGWILD:
- case TIOCSERSWILD:
- /* "setserial -W" is called in Debian boot */
- printk (KERN_INFO "TIOCSER?WILD ioctl obsolete, ignored.\n");
- return 0;
-
- default:
- return -ENOIOCTLCMD;
- }
- return 0;
+ case TIOCGSERIAL:
+ case TIOCSSERIAL:
+ case TIOCSERGSTRUCT:
+ case TIOCMIWAIT:
+ return 0;
+ case TIOCSERCONFIG:
+ case TIOCSERGETLSR: /* Get line status register */
+ return -EINVAL;
+ case TIOCSERGWILD:
+ case TIOCSERSWILD:
+ /* "setserial -W" is called in Debian boot */
+ printk (KERN_INFO "TIOCSER?WILD ioctl obsolete, ignored.\n");
+ return 0;
+ }
+ return -ENOIOCTLCMD;
}
#define RELEVANT_IFLAG(iflag) (iflag & (IGNBRK|BRKINT|IGNPAR|PARMRK|INPCK))
@@ -452,220 +340,50 @@ static void rs_set_termios(struct tty_struct *tty, struct ktermios *old_termios)
if ((old_termios->c_cflag & CRTSCTS) &&
!(tty->termios->c_cflag & CRTSCTS)) {
tty->hw_stopped = 0;
- rs_start(tty);
}
}
/*
* This routine will shutdown a serial port; interrupts are disabled, and
* DTR is dropped if the hangup on close termio flag is on.
*/
-static void shutdown(struct async_struct * info)
+static void shutdown(struct tty_port *port)
{
- unsigned long flags;
- struct serial_state *state;
- int retval;
-
- if (!(info->flags & ASYNC_INITIALIZED)) return;
-
- state = info->state;
-
-#ifdef SIMSERIAL_DEBUG
- printk("Shutting down serial port %d (irq %d)....", info->line,
- state->irq);
-#endif
+ struct serial_state *info = container_of(port, struct serial_state,
+ port);
+ unsigned long flags;
local_irq_save(flags);
- {
- /*
- * First unlink the serial port from the IRQ chain...
- */
- if (info->next_port)
- info->next_port->prev_port = info->prev_port;
- if (info->prev_port)
- info->prev_port->next_port = info->next_port;
- else
- IRQ_ports[state->irq] = info->next_port;
-
- /*
- * Free the IRQ, if necessary
- */
- if (state->irq && (!IRQ_ports[state->irq] ||
- !IRQ_ports[state->irq]->next_port)) {
- if (IRQ_ports[state->irq]) {
- free_irq(state->irq, NULL);
- retval = request_irq(state->irq, rs_interrupt_single,
- IRQ_T(info), "serial", NULL);
-
- if (retval)
- printk(KERN_ERR "serial shutdown: request_irq: error %d"
- " Couldn't reacquire IRQ.\n", retval);
- } else
- free_irq(state->irq, NULL);
- }
-
- if (info->xmit.buf) {
- free_page((unsigned long) info->xmit.buf);
- info->xmit.buf = NULL;
- }
-
- if (info->tty) set_bit(TTY_IO_ERROR, &info->tty->flags);
+ if (info->irq)
+ free_irq(info->irq, info);
- info->flags &= ~ASYNC_INITIALIZED;
+ if (info->xmit.buf) {
+ free_page((unsigned long) info->xmit.buf);
+ info->xmit.buf = NULL;
}
local_irq_restore(flags);
}
-/*
- * ------------------------------------------------------------
- * rs_close()
- *
- * This routine is called when the serial port gets closed. First, we
- * wait for the last remaining data to be sent. Then, we unlink its
- * async structure from the interrupt chain if necessary, and we free
- * that IRQ if nothing is left in the chain.
- * ------------------------------------------------------------
- */
static void rs_close(struct tty_struct *tty, struct file * filp)
{
- struct async_struct * info = (struct async_struct *)tty->driver_data;
- struct serial_state *state;
- unsigned long flags;
-
- if (!info ) return;
-
- state = info->state;
-
- local_irq_save(flags);
- if (tty_hung_up_p(filp)) {
-#ifdef SIMSERIAL_DEBUG
- printk("rs_close: hung_up\n");
-#endif
- local_irq_restore(flags);
- return;
- }
-#ifdef SIMSERIAL_DEBUG
- printk("rs_close ttys%d, count = %d\n", info->line, state->count);
-#endif
- if ((tty->count == 1) && (state->count != 1)) {
- /*
- * Uh, oh. tty->count is 1, which means that the tty
- * structure will be freed. state->count should always
- * be one in these conditions. If it's greater than
- * one, we've got real problems, since it means the
- * serial port won't be shutdown.
- */
- printk(KERN_ERR "rs_close: bad serial port count; tty->count is 1, "
- "state->count is %d\n", state->count);
- state->count = 1;
- }
- if (--state->count < 0) {
- printk(KERN_ERR "rs_close: bad serial port count for ttys%d: %d\n",
- info->line, state->count);
- state->count = 0;
- }
- if (state->count) {
- local_irq_restore(flags);
- return;
- }
- info->flags |= ASYNC_CLOSING;
- local_irq_restore(flags);
+ struct serial_state *info = tty->driver_data;
- /*
- * Now we wait for the transmit buffer to clear; and we notify
- * the line discipline to only process XON/XOFF characters.
- */
- shutdown(info);
- rs_flush_buffer(tty);
- tty_ldisc_flush(tty);
- info->event = 0;
- info->tty = NULL;
- if (info->blocked_open) {
- if (info->close_delay)
- schedule_timeout_interruptible(info->close_delay);
- wake_up_interruptible(&info->open_wait);
- }
- info->flags &= ~(ASYNC_NORMAL_ACTIVE|ASYNC_CLOSING);
- wake_up_interruptible(&info->close_wait);
+ tty_port_close(&info->port, tty, filp);
}
-/*
- * rs_wait_until_sent() --- wait until the transmitter is empty
- */
-static void rs_wait_until_sent(struct tty_struct *tty, int timeout)
-{
-}
-
-
-/*
- * rs_hangup() --- called by tty_hangup() when a hangup is signaled.
- */
static void rs_hangup(struct tty_struct *tty)
{
- struct async_struct * info = (struct async_struct *)tty->driver_data;
- struct serial_state *state = info->state;
-
-#ifdef SIMSERIAL_DEBUG
- printk("rs_hangup: called\n");
-#endif
-
- state = info->state;
+ struct serial_state *info = tty->driver_data;
rs_flush_buffer(tty);
- if (info->flags & ASYNC_CLOSING)
- return;
- shutdown(info);
-
- info->event = 0;
- state->count = 0;
- info->flags &= ~ASYNC_NORMAL_ACTIVE;
- info->tty = NULL;
- wake_up_interruptible(&info->open_wait);
+ tty_port_hangup(&info->port);
}
-
-static int get_async_struct(int line, struct async_struct **ret_info)
+static int activate(struct tty_port *port, struct tty_struct *tty)
{
- struct async_struct *info;
- struct serial_state *sstate;
-
- sstate = rs_table + line;
- sstate->count++;
- if (sstate->info) {
- *ret_info = sstate->info;
- return 0;
- }
- info = kzalloc(sizeof(struct async_struct), GFP_KERNEL);
- if (!info) {
- sstate->count--;
- return -ENOMEM;
- }
- init_waitqueue_head(&info->open_wait);
- init_waitqueue_head(&info->close_wait);
- init_waitqueue_head(&info->delta_msr_wait);
- info->magic = SERIAL_MAGIC;
- info->port = sstate->port;
- info->flags = sstate->flags;
- info->xmit_fifo_size = sstate->xmit_fifo_size;
- info->line = line;
- INIT_WORK(&info->work, do_softint);
- info->state = sstate;
- if (sstate->info) {
- kfree(info);
- *ret_info = sstate->info;
- return 0;
- }
- *ret_info = sstate->info = info;
- return 0;
-}
-
-static int
-startup(struct async_struct *info)
-{
- unsigned long flags;
- int retval=0;
- irq_handler_t handler;
- struct serial_state *state= info->state;
- unsigned long page;
+ struct serial_state *state = container_of(port, struct serial_state,
+ port);
+ unsigned long flags, page;
+ int retval = 0;
page = get_zeroed_page(GFP_KERNEL);
if (!page)
@@ -673,86 +391,31 @@ startup(struct async_struct *info)
local_irq_save(flags);
- if (info->flags & ASYNC_INITIALIZED) {
- free_page(page);
- goto errout;
- }
-
- if (!state->port || !state->type) {
- if (info->tty) set_bit(TTY_IO_ERROR, &info->tty->flags);
- free_page(page);
- goto errout;
- }
- if (info->xmit.buf)
+ if (state->xmit.buf)
free_page(page);
else
- info->xmit.buf = (unsigned char *) page;
-
-#ifdef SIMSERIAL_DEBUG
- printk("startup: ttys%d (irq %d)...", info->line, state->irq);
-#endif
+ state->xmit.buf = (unsigned char *) page;
- /*
- * Allocate the IRQ if necessary
- */
- if (state->irq && (!IRQ_ports[state->irq] ||
- !IRQ_ports[state->irq]->next_port)) {
- if (IRQ_ports[state->irq]) {
- retval = -EBUSY;
- goto errout;
- } else
- handler = rs_interrupt_single;
-
- retval = request_irq(state->irq, handler, IRQ_T(info), "simserial", NULL);
- if (retval) {
- if (capable(CAP_SYS_ADMIN)) {
- if (info->tty)
- set_bit(TTY_IO_ERROR,
- &info->tty->flags);
- retval = 0;
- }
+ if (state->irq) {
+ retval = request_irq(state->irq, rs_interrupt_single, 0,
+ "simserial", state);
+ if (retval)
goto errout;
- }
}
- /*
- * Insert serial port into IRQ chain.
- */
- info->prev_port = NULL;
- info->next_port = IRQ_ports[state->irq];
- if (info->next_port)
- info->next_port->prev_port = info;
- IRQ_ports[state->irq] = info;
-
- if (info->tty) clear_bit(TTY_IO_ERROR, &info->tty->flags);
-
- info->xmit.head = info->xmit.tail = 0;
-
-#if 0
- /*
- * Set up serial timers...
- */
- timer_table[RS_TIMER].expires = jiffies + 2*HZ/100;
- timer_active |= 1 << RS_TIMER;
-#endif
+ state->xmit.head = state->xmit.tail = 0;
/*
* Set up the tty->alt_speed kludge
*/
- if (info->tty) {
- if ((info->flags & ASYNC_SPD_MASK) == ASYNC_SPD_HI)
- info->tty->alt_speed = 57600;
- if ((info->flags & ASYNC_SPD_MASK) == ASYNC_SPD_VHI)
- info->tty->alt_speed = 115200;
- if ((info->flags & ASYNC_SPD_MASK) == ASYNC_SPD_SHI)
- info->tty->alt_speed = 230400;
- if ((info->flags & ASYNC_SPD_MASK) == ASYNC_SPD_WARP)
- info->tty->alt_speed = 460800;
- }
-
- info->flags |= ASYNC_INITIALIZED;
- local_irq_restore(flags);
- return 0;
+ if ((port->flags & ASYNC_SPD_MASK) == ASYNC_SPD_HI)
+ tty->alt_speed = 57600;
+ if ((port->flags & ASYNC_SPD_MASK) == ASYNC_SPD_VHI)
+ tty->alt_speed = 115200;
+ if ((port->flags & ASYNC_SPD_MASK) == ASYNC_SPD_SHI)
+ tty->alt_speed = 230400;
+ if ((port->flags & ASYNC_SPD_MASK) == ASYNC_SPD_WARP)
+ tty->alt_speed = 460800;
errout:
local_irq_restore(flags);
@@ -768,56 +431,11 @@ errout:
*/
static int rs_open(struct tty_struct *tty, struct file * filp)
{
- struct async_struct *info;
- int retval, line;
- unsigned long page;
+ struct serial_state *info = rs_table + tty->index;
+ struct tty_port *port = &info->port;
- line = tty->index;
- if ((line < 0) || (line >= NR_PORTS))
- return -ENODEV;
- retval = get_async_struct(line, &info);
- if (retval)
- return retval;
tty->driver_data = info;
- info->tty = tty;
-
-#ifdef SIMSERIAL_DEBUG
- printk("rs_open %s, count = %d\n", tty->name, info->state->count);
-#endif
- info->tty->low_latency = (info->flags & ASYNC_LOW_LATENCY) ? 1 : 0;
-
- if (!tmp_buf) {
- page = get_zeroed_page(GFP_KERNEL);
- if (!page)
- return -ENOMEM;
- if (tmp_buf)
- free_page(page);
- else
- tmp_buf = (unsigned char *) page;
- }
-
- /*
- * If the port is the middle of closing, bail out now
- */
- if (tty_hung_up_p(filp) ||
- (info->flags & ASYNC_CLOSING)) {
- if (info->flags & ASYNC_CLOSING)
- interruptible_sleep_on(&info->close_wait);
-#ifdef SERIAL_DO_RESTART
- return ((info->flags & ASYNC_HUP_NOTIFY) ?
- -EAGAIN : -ERESTARTSYS);
-#else
- return -EAGAIN;
-#endif
- }
-
- /*
- * Start up serial port
- */
- retval = startup(info);
- if (retval) {
- return retval;
- }
+ tty->low_latency = (port->flags & ASYNC_LOW_LATENCY) ? 1 : 0;
/*
* figure out which console to use (should be one already)
@@ -828,30 +446,21 @@ static int rs_open(struct tty_struct *tty, struct file * filp)
console = console->next;
}
-#ifdef SIMSERIAL_DEBUG
- printk("rs_open ttys%d successful\n", info->line);
-#endif
- return 0;
+ return tty_port_open(port, tty, filp);
}
/*
* /proc fs routines....
*/
-static inline void line_info(struct seq_file *m, struct serial_state *state)
-{
- seq_printf(m, "%d: uart:%s port:%lX irq:%d\n",
- state->line, uart_config[state->type].name,
- state->port, state->irq);
-}
-
static int rs_proc_show(struct seq_file *m, void *v)
{
int i;
- seq_printf(m, "simserinfo:1.0 driver:%s\n", serial_version);
+ seq_printf(m, "simserinfo:1.0\n");
for (i = 0; i < NR_PORTS; i++)
- line_info(m, &rs_table[i]);
+ seq_printf(m, "%d: uart:16550 port:3F8 irq:%d\n",
+ i, rs_table[i].irq);
return 0;
}
@@ -868,25 +477,6 @@ static const struct file_operations rs_proc_fops = {
.release = single_release,
};
-/*
- * ---------------------------------------------------------------------
- * rs_init() and friends
- *
- * rs_init() is called at boot-time to initialize the serial driver.
- * ---------------------------------------------------------------------
- */
-
-/*
- * This routine prints out the appropriate serial driver version
- * number, and identifies which options were configured into this
- * driver.
- */
-static inline void show_serial_version(void)
-{
- printk(KERN_INFO "%s version %s with", serial_name, serial_version);
- printk(KERN_INFO " no serial options enabled\n");
-}
-
static const struct tty_operations hp_ops = {
.open = rs_open,
.close = rs_close,
@@ -901,34 +491,31 @@ static const struct tty_operations hp_ops = {
.unthrottle = rs_unthrottle,
.send_xchar = rs_send_xchar,
.set_termios = rs_set_termios,
- .stop = rs_stop,
- .start = rs_start,
.hangup = rs_hangup,
- .wait_until_sent = rs_wait_until_sent,
.proc_fops = &rs_proc_fops,
};
-/*
- * The serial driver boot-time initialization code!
- */
-static int __init
-simrs_init (void)
+static const struct tty_port_operations hp_port_ops = {
+ .activate = activate,
+ .shutdown = shutdown,
+};
+
+static int __init simrs_init(void)
{
- int i, rc;
- struct serial_state *state;
+ struct serial_state *state;
+ int retval;
if (!ia64_platform_is("hpsim"))
return -ENODEV;
- hp_simserial_driver = alloc_tty_driver(1);
+ hp_simserial_driver = alloc_tty_driver(NR_PORTS);
if (!hp_simserial_driver)
return -ENOMEM;
- show_serial_version();
+ printk(KERN_INFO "SimSerial driver with no serial options enabled\n");
/* Initialize the tty_driver structure */
- hp_simserial_driver->owner = THIS_MODULE;
hp_simserial_driver->driver_name = "simserial";
hp_simserial_driver->name = "ttyS";
hp_simserial_driver->major = TTY_MAJOR;
@@ -941,31 +528,33 @@ simrs_init (void)
hp_simserial_driver->flags = TTY_DRIVER_REAL_RAW;
tty_set_operations(hp_simserial_driver, &hp_ops);
- /*
- * Let's have a little bit of fun !
- */
- for (i = 0, state = rs_table; i < NR_PORTS; i++,state++) {
+ state = rs_table;
+ tty_port_init(&state->port);
+ state->port.ops = &hp_port_ops;
+ state->port.close_delay = 0; /* XXX really 0? */
- if (state->type == PORT_UNKNOWN) continue;
+ retval = hpsim_get_irq(KEYBOARD_INTR);
+ if (retval < 0) {
+ printk(KERN_ERR "%s: out of interrupt vectors!\n",
+ __func__);
+ goto err_free_tty;
+ }
- if (!state->irq) {
- if ((rc = assign_irq_vector(AUTO_ASSIGN)) < 0)
- panic("%s: out of interrupt vectors!\n",
- __func__);
- state->irq = rc;
- ia64_ssc_connect_irq(KEYBOARD_INTR, state->irq);
- }
+ state->irq = retval;
- printk(KERN_INFO "ttyS%d at 0x%04lx (irq = %d) is a %s\n",
- state->line,
- state->port, state->irq,
- uart_config[state->type].name);
- }
+ /* the port is imaginary */
+ printk(KERN_INFO "ttyS0 at 0x03f8 (irq = %d) is a 16550\n", state->irq);
- if (tty_register_driver(hp_simserial_driver))
- panic("Couldn't register simserial driver\n");
+ retval = tty_register_driver(hp_simserial_driver);
+ if (retval) {
+ printk(KERN_ERR "Couldn't register simserial driver\n");
+ goto err_free_tty;
+ }
return 0;
+err_free_tty:
+ put_tty_driver(hp_simserial_driver);
+ return retval;
}
#ifndef MODULE
diff --git a/arch/ia64/include/asm/hpsim.h b/arch/ia64/include/asm/hpsim.h
index 892ab19..0fe5022 100644
--- a/arch/ia64/include/asm/hpsim.h
+++ b/arch/ia64/include/asm/hpsim.h
@@ -10,7 +10,7 @@ int simcons_register(void);
struct tty_driver;
extern struct tty_driver *hp_simserial_driver;
-void ia64_ssc_connect_irq(long intr, long irq);
+extern int hpsim_get_irq(int intr);
void ia64_ctl_trace(long on);
#endif
diff --git a/arch/ia64/include/asm/paravirt.h b/arch/ia64/include/asm/paravirt.h
index 32551d3..b149b88 100644
--- a/arch/ia64/include/asm/paravirt.h
+++ b/arch/ia64/include/asm/paravirt.h
@@ -281,9 +281,9 @@ paravirt_init_missing_ticks_accounting(int cpu)
pv_time_ops.init_missing_ticks_accounting(cpu);
}
-struct jump_label_key;
-extern struct jump_label_key paravirt_steal_enabled;
-extern struct jump_label_key paravirt_steal_rq_enabled;
+struct static_key;
+extern struct static_key paravirt_steal_enabled;
+extern struct static_key paravirt_steal_rq_enabled;
static inline int
paravirt_do_steal_accounting(unsigned long *new_itm)
diff --git a/arch/ia64/kernel/paravirt.c b/arch/ia64/kernel/paravirt.c
index 1008682..1b22f6de 100644
--- a/arch/ia64/kernel/paravirt.c
+++ b/arch/ia64/kernel/paravirt.c
@@ -634,8 +634,8 @@ struct pv_irq_ops pv_irq_ops = {
* pv_time_ops
* time operations
*/
-struct jump_label_key paravirt_steal_enabled;
-struct jump_label_key paravirt_steal_rq_enabled;
+struct static_key paravirt_steal_enabled;
+struct static_key paravirt_steal_rq_enabled;
static int
ia64_native_do_steal_accounting(unsigned long *new_itm)
diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c
index 6d33c5c..9dc52b6 100644
--- a/arch/ia64/kernel/process.c
+++ b/arch/ia64/kernel/process.c
@@ -330,9 +330,7 @@ cpu_idle (void)
normal_xtp();
#endif
}
- preempt_enable_no_resched();
- schedule();
- preempt_disable();
+ schedule_preempt_disabled();
check_pgt_cache();
if (cpu_is_offline(cpu))
play_dead();
diff --git a/arch/m32r/kernel/process.c b/arch/m32r/kernel/process.c
index 422bea9..3a4a32b 100644
--- a/arch/m32r/kernel/process.c
+++ b/arch/m32r/kernel/process.c
@@ -90,9 +90,7 @@ void cpu_idle (void)
idle();
}
- preempt_enable_no_resched();
- schedule();
- preempt_disable();
+ schedule_preempt_disabled();
}
}
diff --git a/arch/m68k/emu/nfcon.c b/arch/m68k/emu/nfcon.c
index ab20dc0..8db25e8 100644
--- a/arch/m68k/emu/nfcon.c
+++ b/arch/m68k/emu/nfcon.c
@@ -127,7 +127,6 @@ static int __init nfcon_init(void)
if (!nfcon_tty_driver)
return -ENOMEM;
- nfcon_tty_driver->owner = THIS_MODULE;
nfcon_tty_driver->driver_name = "nfcon";
nfcon_tty_driver->name = "nfcon";
nfcon_tty_driver->type = TTY_DRIVER_TYPE_SYSTEM;
diff --git a/arch/m68k/kernel/process_mm.c b/arch/m68k/kernel/process_mm.c
index 099283e..fe4186b 100644
--- a/arch/m68k/kernel/process_mm.c
+++ b/arch/m68k/kernel/process_mm.c
@@ -78,9 +78,7 @@ void cpu_idle(void)
while (1) {
while (!need_resched())
idle();
- preempt_enable_no_resched();
- schedule();
- preempt_disable();
+ schedule_preempt_disabled();
}
}
diff --git a/arch/m68k/kernel/process_no.c b/arch/m68k/kernel/process_no.c
index 5e1078c..f7fe6c3 100644
--- a/arch/m68k/kernel/process_no.c
+++ b/arch/m68k/kernel/process_no.c
@@ -73,9 +73,7 @@ void cpu_idle(void)
/* endless idle loop with no priority at all */
while (1) {
idle();
- preempt_enable_no_resched();
- schedule();
- preempt_disable();
+ schedule_preempt_disabled();
}
}
diff --git a/arch/microblaze/kernel/process.c b/arch/microblaze/kernel/process.c
index 7dcb5bf..9155f7d 100644
--- a/arch/microblaze/kernel/process.c
+++ b/arch/microblaze/kernel/process.c
@@ -110,9 +110,7 @@ void cpu_idle(void)
rcu_idle_exit();
tick_nohz_idle_exit();
- preempt_enable_no_resched();
- schedule();
- preempt_disable();
+ schedule_preempt_disabled();
check_pgt_cache();
}
}
diff --git a/arch/mips/ath79/dev-usb.c b/arch/mips/ath79/dev-usb.c
index 002d6d2..36e9570 100644
--- a/arch/mips/ath79/dev-usb.c
+++ b/arch/mips/ath79/dev-usb.c
@@ -17,6 +17,8 @@
#include <linux/irq.h>
#include <linux/dma-mapping.h>
#include <linux/platform_device.h>
+#include <linux/usb/ehci_pdriver.h>
+#include <linux/usb/ohci_pdriver.h>
#include <asm/mach-ath79/ath79.h>
#include <asm/mach-ath79/ar71xx_regs.h>
@@ -36,14 +38,19 @@ static struct resource ath79_ohci_resources[] = {
};
static u64 ath79_ohci_dmamask = DMA_BIT_MASK(32);
+
+static struct usb_ohci_pdata ath79_ohci_pdata = {
+};
+
static struct platform_device ath79_ohci_device = {
- .name = "ath79-ohci",
+ .name = "ohci-platform",
.id = -1,
.resource = ath79_ohci_resources,
.num_resources = ARRAY_SIZE(ath79_ohci_resources),
.dev = {
.dma_mask = &ath79_ohci_dmamask,
.coherent_dma_mask = DMA_BIT_MASK(32),
+ .platform_data = &ath79_ohci_pdata,
},
};
@@ -60,8 +67,20 @@ static struct resource ath79_ehci_resources[] = {
};
static u64 ath79_ehci_dmamask = DMA_BIT_MASK(32);
+
+static struct usb_ehci_pdata ath79_ehci_pdata_v1 = {
+ .has_synopsys_hc_bug = 1,
+ .port_power_off = 1,
+};
+
+static struct usb_ehci_pdata ath79_ehci_pdata_v2 = {
+ .caps_offset = 0x100,
+ .has_tt = 1,
+ .port_power_off = 1,
+};
+
static struct platform_device ath79_ehci_device = {
- .name = "ath79-ehci",
+ .name = "ehci-platform",
.id = -1,
.resource = ath79_ehci_resources,
.num_resources = ARRAY_SIZE(ath79_ehci_resources),
@@ -101,7 +120,7 @@ static void __init ath79_usb_setup(void)
ath79_ehci_resources[0].start = AR71XX_EHCI_BASE;
ath79_ehci_resources[0].end = AR71XX_EHCI_BASE + AR71XX_EHCI_SIZE - 1;
- ath79_ehci_device.name = "ar71xx-ehci";
+ ath79_ehci_device.dev.platform_data = &ath79_ehci_pdata_v1;
platform_device_register(&ath79_ehci_device);
}
@@ -142,7 +161,7 @@ static void __init ar724x_usb_setup(void)
ath79_ehci_resources[0].start = AR724X_EHCI_BASE;
ath79_ehci_resources[0].end = AR724X_EHCI_BASE + AR724X_EHCI_SIZE - 1;
- ath79_ehci_device.name = "ar724x-ehci";
+ ath79_ehci_device.dev.platform_data = &ath79_ehci_pdata_v2;
platform_device_register(&ath79_ehci_device);
}
@@ -159,7 +178,7 @@ static void __init ar913x_usb_setup(void)
ath79_ehci_resources[0].start = AR913X_EHCI_BASE;
ath79_ehci_resources[0].end = AR913X_EHCI_BASE + AR913X_EHCI_SIZE - 1;
- ath79_ehci_device.name = "ar913x-ehci";
+ ath79_ehci_device.dev.platform_data = &ath79_ehci_pdata_v2;
platform_device_register(&ath79_ehci_device);
}
@@ -176,7 +195,7 @@ static void __init ar933x_usb_setup(void)
ath79_ehci_resources[0].start = AR933X_EHCI_BASE;
ath79_ehci_resources[0].end = AR933X_EHCI_BASE + AR933X_EHCI_SIZE - 1;
- ath79_ehci_device.name = "ar933x-ehci";
+ ath79_ehci_device.dev.platform_data = &ath79_ehci_pdata_v2;
platform_device_register(&ath79_ehci_device);
}
diff --git a/arch/mips/include/asm/jump_label.h b/arch/mips/include/asm/jump_label.h
index 1881b31..4d6d77e 100644
--- a/arch/mips/include/asm/jump_label.h
+++ b/arch/mips/include/asm/jump_label.h
@@ -20,7 +20,7 @@
#define WORD_INSN ".word"
#endif
-static __always_inline bool arch_static_branch(struct jump_label_key *key)
+static __always_inline bool arch_static_branch(struct static_key *key)
{
asm goto("1:\tnop\n\t"
"nop\n\t"
diff --git a/arch/mips/kernel/perf_event_mipsxx.c b/arch/mips/kernel/perf_event_mipsxx.c
index e3b897a..811084f 100644
--- a/arch/mips/kernel/perf_event_mipsxx.c
+++ b/arch/mips/kernel/perf_event_mipsxx.c
@@ -606,6 +606,10 @@ static int mipspmu_event_init(struct perf_event *event)
{
int err = 0;
+ /* does not support taken branch sampling */
+ if (has_branch_stack(event))
+ return -EOPNOTSUPP;
+
switch (event->attr.type) {
case PERF_TYPE_RAW:
case PERF_TYPE_HARDWARE:
diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
index 7955409..61f1cb4 100644
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -80,9 +80,7 @@ void __noreturn cpu_idle(void)
#endif
rcu_idle_exit();
tick_nohz_idle_exit();
- preempt_enable_no_resched();
- schedule();
- preempt_disable();
+ schedule_preempt_disabled();
}
}
diff --git a/arch/mn10300/kernel/process.c b/arch/mn10300/kernel/process.c
index 28eec31..cac401d 100644
--- a/arch/mn10300/kernel/process.c
+++ b/arch/mn10300/kernel/process.c
@@ -123,9 +123,7 @@ void cpu_idle(void)
idle();
}
- preempt_enable_no_resched();
- schedule();
- preempt_disable();
+ schedule_preempt_disabled();
}
}
diff --git a/arch/parisc/kernel/pdc_cons.c b/arch/parisc/kernel/pdc_cons.c
index fc770be..4f00459 100644
--- a/arch/parisc/kernel/pdc_cons.c
+++ b/arch/parisc/kernel/pdc_cons.c
@@ -90,11 +90,13 @@ static int pdc_console_setup(struct console *co, char *options)
#define PDC_CONS_POLL_DELAY (30 * HZ / 1000)
-static struct timer_list pdc_console_timer;
+static void pdc_console_poll(unsigned long unused);
+static DEFINE_TIMER(pdc_console_timer, pdc_console_poll, 0, 0);
+static struct tty_port tty_port;
static int pdc_console_tty_open(struct tty_struct *tty, struct file *filp)
{
-
+ tty_port_tty_set(&tty_port, tty);
mod_timer(&pdc_console_timer, jiffies + PDC_CONS_POLL_DELAY);
return 0;
@@ -102,8 +104,10 @@ static int pdc_console_tty_open(struct tty_struct *tty, struct file *filp)
static void pdc_console_tty_close(struct tty_struct *tty, struct file *filp)
{
- if (!tty->count)
- del_timer(&pdc_console_timer);
+ if (!tty->count) {
+ del_timer_sync(&pdc_console_timer);
+ tty_port_tty_set(&tty_port, NULL);
+ }
}
static int pdc_console_tty_write(struct tty_struct *tty, const unsigned char *buf, int count)
@@ -122,8 +126,6 @@ static int pdc_console_tty_chars_in_buffer(struct tty_struct *tty)
return 0; /* no buffer */
}
-static struct tty_driver *pdc_console_tty_driver;
-
static const struct tty_operations pdc_console_tty_ops = {
.open = pdc_console_tty_open,
.close = pdc_console_tty_close,
@@ -134,10 +136,8 @@ static const struct tty_operations pdc_console_tty_ops = {
static void pdc_console_poll(unsigned long unused)
{
-
int data, count = 0;
-
- struct tty_struct *tty = pdc_console_tty_driver->ttys[0];
+ struct tty_struct *tty = tty_port_tty_get(&tty_port);
if (!tty)
return;
@@ -153,15 +153,17 @@ static void pdc_console_poll(unsigned long unused)
if (count)
tty_flip_buffer_push(tty);
- if (tty->count && (pdc_cons.flags & CON_ENABLED))
+ tty_kref_put(tty);
+
+ if (pdc_cons.flags & CON_ENABLED)
mod_timer(&pdc_console_timer, jiffies + PDC_CONS_POLL_DELAY);
}
+static struct tty_driver *pdc_console_tty_driver;
+
static int __init pdc_console_tty_driver_init(void)
{
-
int err;
- struct tty_driver *drv;
/* Check if the console driver is still registered.
* It is unregistered if the pdc console was not selected as the
@@ -183,32 +185,29 @@ static int __init pdc_console_tty_driver_init(void)
printk(KERN_INFO "The PDC console driver is still registered, removing CON_BOOT flag\n");
pdc_cons.flags &= ~CON_BOOT;
- drv = alloc_tty_driver(1);
+ tty_port_init(&tty_port);
- if (!drv)
- return -ENOMEM;
+ pdc_console_tty_driver = alloc_tty_driver(1);
- drv->driver_name = "pdc_cons";
- drv->name = "ttyB";
- drv->major = MUX_MAJOR;
- drv->minor_start = 0;
- drv->type = TTY_DRIVER_TYPE_SYSTEM;
- drv->init_termios = tty_std_termios;
- drv->flags = TTY_DRIVER_REAL_RAW | TTY_DRIVER_RESET_TERMIOS;
- tty_set_operations(drv, &pdc_console_tty_ops);
+ if (!pdc_console_tty_driver)
+ return -ENOMEM;
- err = tty_register_driver(drv);
+ pdc_console_tty_driver->driver_name = "pdc_cons";
+ pdc_console_tty_driver->name = "ttyB";
+ pdc_console_tty_driver->major = MUX_MAJOR;
+ pdc_console_tty_driver->minor_start = 0;
+ pdc_console_tty_driver->type = TTY_DRIVER_TYPE_SYSTEM;
+ pdc_console_tty_driver->init_termios = tty_std_termios;
+ pdc_console_tty_driver->flags = TTY_DRIVER_REAL_RAW |
+ TTY_DRIVER_RESET_TERMIOS;
+ tty_set_operations(pdc_console_tty_driver, &pdc_console_tty_ops);
+
+ err = tty_register_driver(pdc_console_tty_driver);
if (err) {
printk(KERN_ERR "Unable to register the PDC console TTY driver\n");
return err;
}
- pdc_console_tty_driver = drv;
-
- /* No need to initialize the pdc_console_timer if tty isn't allocated */
- init_timer(&pdc_console_timer);
- pdc_console_timer.function = pdc_console_poll;
-
return 0;
}
diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c
index 62c60b8..d4b94b3 100644
--- a/arch/parisc/kernel/process.c
+++ b/arch/parisc/kernel/process.c
@@ -71,9 +71,7 @@ void cpu_idle(void)
while (1) {
while (!need_resched())
barrier();
- preempt_enable_no_resched();
- schedule();
- preempt_disable();
+ schedule_preempt_disabled();
check_pgt_cache();
}
}
diff --git a/arch/powerpc/include/asm/jump_label.h b/arch/powerpc/include/asm/jump_label.h
index 938986e..ae098c4 100644
--- a/arch/powerpc/include/asm/jump_label.h
+++ b/arch/powerpc/include/asm/jump_label.h
@@ -17,7 +17,7 @@
#define JUMP_ENTRY_TYPE stringify_in_c(FTR_ENTRY_LONG)
#define JUMP_LABEL_NOP_SIZE 4
-static __always_inline bool arch_static_branch(struct jump_label_key *key)
+static __always_inline bool arch_static_branch(struct static_key *key)
{
asm goto("1:\n\t"
"nop\n\t"
diff --git a/arch/powerpc/include/asm/perf_event_server.h b/arch/powerpc/include/asm/perf_event_server.h
index 8f1df12..1a8093f 100644
--- a/arch/powerpc/include/asm/perf_event_server.h
+++ b/arch/powerpc/include/asm/perf_event_server.h
@@ -61,8 +61,6 @@ struct pt_regs;
extern unsigned long perf_misc_flags(struct pt_regs *regs);
extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
-#define PERF_EVENT_INDEX_OFFSET 1
-
/*
* Only override the default definitions in include/linux/perf_event.h
* if we have hardware PMU support.
diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c
index 0a48bf5..c97fc60 100644
--- a/arch/powerpc/kernel/idle.c
+++ b/arch/powerpc/kernel/idle.c
@@ -101,11 +101,11 @@ void cpu_idle(void)
ppc64_runlatch_on();
rcu_idle_exit();
tick_nohz_idle_exit();
- preempt_enable_no_resched();
- if (cpu_should_die())
+ if (cpu_should_die()) {
+ sched_preempt_enable_no_resched();
cpu_die();
- schedule();
- preempt_disable();
+ }
+ schedule_preempt_disabled();
}
}
diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c
index 64483fd..c2e27ed 100644
--- a/arch/powerpc/kernel/perf_event.c
+++ b/arch/powerpc/kernel/perf_event.c
@@ -1084,6 +1084,10 @@ static int power_pmu_event_init(struct perf_event *event)
if (!ppmu)
return -ENOENT;
+ /* does not support taken branch sampling */
+ if (has_branch_stack(event))
+ return -EOPNOTSUPP;
+
switch (event->attr.type) {
case PERF_TYPE_HARDWARE:
ev = event->attr.config;
@@ -1193,6 +1197,11 @@ static int power_pmu_event_init(struct perf_event *event)
return err;
}
+static int power_pmu_event_idx(struct perf_event *event)
+{
+ return event->hw.idx;
+}
+
struct pmu power_pmu = {
.pmu_enable = power_pmu_enable,
.pmu_disable = power_pmu_disable,
@@ -1205,6 +1214,7 @@ struct pmu power_pmu = {
.start_txn = power_pmu_start_txn,
.cancel_txn = power_pmu_cancel_txn,
.commit_txn = power_pmu_commit_txn,
+ .event_idx = power_pmu_event_idx,
};
/*
diff --git a/arch/powerpc/platforms/iseries/setup.c b/arch/powerpc/platforms/iseries/setup.c
index 8fc6258..a5fbf4c 100644
--- a/arch/powerpc/platforms/iseries/setup.c
+++ b/arch/powerpc/platforms/iseries/setup.c
@@ -584,9 +584,7 @@ static void iseries_shared_idle(void)
if (hvlpevent_is_pending())
process_iSeries_events();
- preempt_enable_no_resched();
- schedule();
- preempt_disable();
+ schedule_preempt_disabled();
}
}
@@ -615,9 +613,7 @@ static void iseries_dedicated_idle(void)
ppc64_runlatch_on();
rcu_idle_exit();
tick_nohz_idle_exit();
- preempt_enable_no_resched();
- schedule();
- preempt_disable();
+ schedule_preempt_disabled();
}
}
diff --git a/arch/s390/include/asm/jump_label.h b/arch/s390/include/asm/jump_label.h
index 95a6cf2..6c32190 100644
--- a/arch/s390/include/asm/jump_label.h
+++ b/arch/s390/include/asm/jump_label.h
@@ -13,7 +13,7 @@
#define ASM_ALIGN ".balign 4"
#endif
-static __always_inline bool arch_static_branch(struct jump_label_key *key)
+static __always_inline bool arch_static_branch(struct static_key *key)
{
asm goto("0: brcl 0,0\n"
".pushsection __jump_table, \"aw\"\n"
diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h
index a75f168..4eb444e 100644
--- a/arch/s390/include/asm/perf_event.h
+++ b/arch/s390/include/asm/perf_event.h
@@ -6,4 +6,3 @@
/* Empty, just to avoid compiling error */
-#define PERF_EVENT_INDEX_OFFSET 0
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c
index b9a7fdd..e30b2df 100644
--- a/arch/s390/kernel/irq.c
+++ b/arch/s390/kernel/irq.c
@@ -165,13 +165,6 @@ static inline int ext_hash(u16 code)
return (code + (code >> 9)) & 0xff;
}
-static void ext_int_hash_update(struct rcu_head *head)
-{
- struct ext_int_info *p = container_of(head, struct ext_int_info, rcu);
-
- kfree(p);
-}
-
int register_external_interrupt(u16 code, ext_int_handler_t handler)
{
struct ext_int_info *p;
@@ -202,7 +195,7 @@ int unregister_external_interrupt(u16 code, ext_int_handler_t handler)
list_for_each_entry_rcu(p, &ext_int_hash[index], entry)
if (p->code == code && p->handler == handler) {
list_del_rcu(&p->entry);
- call_rcu(&p->rcu, ext_int_hash_update);
+ kfree_rcu(p, rcu);
}
spin_unlock_irqrestore(&ext_int_hash_lock, flags);
return 0;
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index e795933..7618085 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -97,9 +97,7 @@ void cpu_idle(void)
tick_nohz_idle_exit();
if (test_thread_flag(TIF_MCCK_PENDING))
s390_handle_mcck();
- preempt_enable_no_resched();
- schedule();
- preempt_disable();
+ schedule_preempt_disabled();
}
}
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 2398ce6..b0e28c4 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -550,12 +550,6 @@ int __cpuinit start_secondary(void *cpuvoid)
S390_lowcore.restart_psw.addr =
PSW_ADDR_AMODE | (unsigned long) psw_restart_int_handler;
__ctl_set_bit(0, 28); /* Enable lowcore protection */
- /*
- * Wait until the cpu which brought this one up marked it
- * active before enabling interrupts.
- */
- while (!cpumask_test_cpu(smp_processor_id(), cpu_active_mask))
- cpu_relax();
local_irq_enable();
/* cpu_idle will call schedule for us */
cpu_idle();
diff --git a/arch/score/kernel/process.c b/arch/score/kernel/process.c
index 25d0803..2707023 100644
--- a/arch/score/kernel/process.c
+++ b/arch/score/kernel/process.c
@@ -53,9 +53,7 @@ void __noreturn cpu_idle(void)
while (!need_resched())
barrier();
- preempt_enable_no_resched();
- schedule();
- preempt_disable();
+ schedule_preempt_disabled();
}
}
diff --git a/arch/sh/kernel/idle.c b/arch/sh/kernel/idle.c
index 406508d..7e48928 100644
--- a/arch/sh/kernel/idle.c
+++ b/arch/sh/kernel/idle.c
@@ -114,9 +114,7 @@ void cpu_idle(void)
rcu_idle_exit();
tick_nohz_idle_exit();
- preempt_enable_no_resched();
- schedule();
- preempt_disable();
+ schedule_preempt_disabled();
}
}
diff --git a/arch/sh/kernel/perf_event.c b/arch/sh/kernel/perf_event.c
index 10b14e3..068b8a2 100644
--- a/arch/sh/kernel/perf_event.c
+++ b/arch/sh/kernel/perf_event.c
@@ -310,6 +310,10 @@ static int sh_pmu_event_init(struct perf_event *event)
{
int err;
+ /* does not support taken branch sampling */
+ if (has_branch_stack(event))
+ return -EOPNOTSUPP;
+
switch (event->attr.type) {
case PERF_TYPE_RAW:
case PERF_TYPE_HW_CACHE:
diff --git a/arch/sparc/include/asm/jump_label.h b/arch/sparc/include/asm/jump_label.h
index fc73a82..5080d16 100644
--- a/arch/sparc/include/asm/jump_label.h
+++ b/arch/sparc/include/asm/jump_label.h
@@ -7,7 +7,7 @@
#define JUMP_LABEL_NOP_SIZE 4
-static __always_inline bool arch_static_branch(struct jump_label_key *key)
+static __always_inline bool arch_static_branch(struct static_key *key)
{
asm goto("1:\n\t"
"nop\n\t"
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c
index 614da62..8e16a4a 100644
--- a/arch/sparc/kernel/perf_event.c
+++ b/arch/sparc/kernel/perf_event.c
@@ -1105,6 +1105,10 @@ static int sparc_pmu_event_init(struct perf_event *event)
if (atomic_read(&nmi_active) < 0)
return -ENODEV;
+ /* does not support taken branch sampling */
+ if (has_branch_stack(event))
+ return -EOPNOTSUPP;
+
switch (attr->type) {
case PERF_TYPE_HARDWARE:
if (attr->config >= sparc_pmu->max_events)
diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c
index f793742..935fdbc 100644
--- a/arch/sparc/kernel/process_32.c
+++ b/arch/sparc/kernel/process_32.c
@@ -113,9 +113,7 @@ void cpu_idle(void)
while (!need_resched())
cpu_relax();
}
- preempt_enable_no_resched();
- schedule();
- preempt_disable();
+ schedule_preempt_disabled();
check_pgt_cache();
}
}
@@ -138,9 +136,7 @@ void cpu_idle(void)
while (!need_resched())
cpu_relax();
}
- preempt_enable_no_resched();
- schedule();
- preempt_disable();
+ schedule_preempt_disabled();
check_pgt_cache();
}
}
diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c
index 39d8b05..06b5b5f 100644
--- a/arch/sparc/kernel/process_64.c
+++ b/arch/sparc/kernel/process_64.c
@@ -104,15 +104,13 @@ void cpu_idle(void)
rcu_idle_exit();
tick_nohz_idle_exit();
- preempt_enable_no_resched();
-
#ifdef CONFIG_HOTPLUG_CPU
- if (cpu_is_offline(cpu))
+ if (cpu_is_offline(cpu)) {
+ sched_preempt_enable_no_resched();
cpu_play_dead();
+ }
#endif
-
- schedule();
- preempt_disable();
+ schedule_preempt_disabled();
}
}
diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c
index 4c1ac6e..6ae495e 100644
--- a/arch/tile/kernel/process.c
+++ b/arch/tile/kernel/process.c
@@ -108,9 +108,7 @@ void cpu_idle(void)
}
rcu_idle_exit();
tick_nohz_idle_exit();
- preempt_enable_no_resched();
- schedule();
- preempt_disable();
+ schedule_preempt_disabled();
}
}
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 5bed94e1..d523a6c 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -82,6 +82,7 @@ config X86
select CLKEVT_I8253
select ARCH_HAVE_NMI_SAFE_CMPXCHG
select GENERIC_IOMAP
+ select DCACHE_WORD_ACCESS if !DEBUG_PAGEALLOC
config INSTRUCTION_DECODER
def_bool (KPROBES || PERF_EVENTS)
@@ -179,6 +180,9 @@ config ARCH_HAS_DEFAULT_IDLE
config ARCH_HAS_CACHE_LINE_SIZE
def_bool y
+config ARCH_HAS_CPU_AUTOPROBE
+ def_bool y
+
config HAVE_SETUP_PER_CPU_AREA
def_bool y
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index 545d0ce..b3350bd 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -28,6 +28,7 @@
#include <crypto/aes.h>
#include <crypto/cryptd.h>
#include <crypto/ctr.h>
+#include <asm/cpu_device_id.h>
#include <asm/i387.h>
#include <asm/aes.h>
#include <crypto/scatterwalk.h>
@@ -1253,14 +1254,19 @@ static struct crypto_alg __rfc4106_alg = {
};
#endif
+
+static const struct x86_cpu_id aesni_cpu_id[] = {
+ X86_FEATURE_MATCH(X86_FEATURE_AES),
+ {}
+};
+MODULE_DEVICE_TABLE(x86cpu, aesni_cpu_id);
+
static int __init aesni_init(void)
{
int err;
- if (!cpu_has_aes) {
- printk(KERN_INFO "Intel AES-NI instructions are not detected.\n");
+ if (!x86_match_cpu(aesni_cpu_id))
return -ENODEV;
- }
if ((err = crypto_fpu_init()))
goto fpu_err;
diff --git a/arch/x86/crypto/crc32c-intel.c b/arch/x86/crypto/crc32c-intel.c
index b9d0026..493f959 100644
--- a/arch/x86/crypto/crc32c-intel.c
+++ b/arch/x86/crypto/crc32c-intel.c
@@ -31,6 +31,7 @@
#include <crypto/internal/hash.h>
#include <asm/cpufeature.h>
+#include <asm/cpu_device_id.h>
#define CHKSUM_BLOCK_SIZE 1
#define CHKSUM_DIGEST_SIZE 4
@@ -173,13 +174,17 @@ static struct shash_alg alg = {
}
};
+static const struct x86_cpu_id crc32c_cpu_id[] = {
+ X86_FEATURE_MATCH(X86_FEATURE_XMM4_2),
+ {}
+};
+MODULE_DEVICE_TABLE(x86cpu, crc32c_cpu_id);
static int __init crc32c_intel_mod_init(void)
{
- if (cpu_has_xmm4_2)
- return crypto_register_shash(&alg);
- else
+ if (!x86_match_cpu(crc32c_cpu_id))
return -ENODEV;
+ return crypto_register_shash(&alg);
}
static void __exit crc32c_intel_mod_fini(void)
diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c
index 976aa64..b4bf0a6 100644
--- a/arch/x86/crypto/ghash-clmulni-intel_glue.c
+++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c
@@ -20,6 +20,7 @@
#include <crypto/gf128mul.h>
#include <crypto/internal/hash.h>
#include <asm/i387.h>
+#include <asm/cpu_device_id.h>
#define GHASH_BLOCK_SIZE 16
#define GHASH_DIGEST_SIZE 16
@@ -294,15 +295,18 @@ static struct ahash_alg ghash_async_alg = {
},
};
+static const struct x86_cpu_id pcmul_cpu_id[] = {
+ X86_FEATURE_MATCH(X86_FEATURE_PCLMULQDQ), /* Pickle-Mickle-Duck */
+ {}
+};
+MODULE_DEVICE_TABLE(x86cpu, pcmul_cpu_id);
+
static int __init ghash_pclmulqdqni_mod_init(void)
{
int err;
- if (!cpu_has_pclmulqdq) {
- printk(KERN_INFO "Intel PCLMULQDQ-NI instructions are not"
- " detected.\n");
+ if (!x86_match_cpu(pcmul_cpu_id))
return -ENODEV;
- }
err = crypto_register_shash(&ghash_alg);
if (err)
diff --git a/arch/x86/include/asm/cpu_device_id.h b/arch/x86/include/asm/cpu_device_id.h
new file mode 100644
index 0000000..ff501e5
--- /dev/null
+++ b/arch/x86/include/asm/cpu_device_id.h
@@ -0,0 +1,13 @@
+#ifndef _CPU_DEVICE_ID
+#define _CPU_DEVICE_ID 1
+
+/*
+ * Declare drivers belonging to specific x86 CPUs
+ * Similar in spirit to pci_device_id and related PCI functions
+ */
+
+#include <linux/mod_devicetable.h>
+
+extern const struct x86_cpu_id *x86_match_cpu(const struct x86_cpu_id *match);
+
+#endif
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 8d67d42..dcb839e 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -177,6 +177,7 @@
#define X86_FEATURE_PLN (7*32+ 5) /* Intel Power Limit Notification */
#define X86_FEATURE_PTS (7*32+ 6) /* Intel Package Thermal Status */
#define X86_FEATURE_DTS (7*32+ 7) /* Digital Thermal Sensor */
+#define X86_FEATURE_HW_PSTATE (7*32+ 8) /* AMD HW-PState */
/* Virtualization flags: Linux defined, word 8 */
#define X86_FEATURE_TPR_SHADOW (8*32+ 0) /* Intel TPR Shadow */
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index da0b3ca..382f75d 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -7,7 +7,6 @@
typedef struct {
unsigned int __softirq_pending;
unsigned int __nmi_count; /* arch dependent */
- unsigned int irq0_irqs;
#ifdef CONFIG_X86_LOCAL_APIC
unsigned int apic_timer_irqs; /* arch dependent */
unsigned int irq_spurious_count;
diff --git a/arch/x86/include/asm/inat.h b/arch/x86/include/asm/inat.h
index 205b063..74a2e31 100644
--- a/arch/x86/include/asm/inat.h
+++ b/arch/x86/include/asm/inat.h
@@ -97,11 +97,12 @@
/* Attribute search APIs */
extern insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode);
+extern int inat_get_last_prefix_id(insn_byte_t last_pfx);
extern insn_attr_t inat_get_escape_attribute(insn_byte_t opcode,
- insn_byte_t last_pfx,
+ int lpfx_id,
insn_attr_t esc_attr);
extern insn_attr_t inat_get_group_attribute(insn_byte_t modrm,
- insn_byte_t last_pfx,
+ int lpfx_id,
insn_attr_t esc_attr);
extern insn_attr_t inat_get_avx_attribute(insn_byte_t opcode,
insn_byte_t vex_m,
diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h
index 74df3f1..48eb30a 100644
--- a/arch/x86/include/asm/insn.h
+++ b/arch/x86/include/asm/insn.h
@@ -96,12 +96,6 @@ struct insn {
#define X86_VEX_P(vex) ((vex) & 0x03) /* VEX3 Byte2, VEX2 Byte1 */
#define X86_VEX_M_MAX 0x1f /* VEX3.M Maximum value */
-/* The last prefix is needed for two-byte and three-byte opcodes */
-static inline insn_byte_t insn_last_prefix(struct insn *insn)
-{
- return insn->prefixes.bytes[3];
-}
-
extern void insn_init(struct insn *insn, const void *kaddr, int x86_64);
extern void insn_get_prefixes(struct insn *insn);
extern void insn_get_opcode(struct insn *insn);
@@ -160,6 +154,18 @@ static inline insn_byte_t insn_vex_p_bits(struct insn *insn)
return X86_VEX_P(insn->vex_prefix.bytes[2]);
}
+/* Get the last prefix id from last prefix or VEX prefix */
+static inline int insn_last_prefix_id(struct insn *insn)
+{
+ if (insn_is_avx(insn))
+ return insn_vex_p_bits(insn); /* VEX_p is a SIMD prefix id */
+
+ if (insn->prefixes.bytes[3])
+ return inat_get_last_prefix_id(insn->prefixes.bytes[3]);
+
+ return 0;
+}
+
/* Offset of each field from kaddr */
static inline int insn_offset_rex_prefix(struct insn *insn)
{
diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h
index a32b18c..3a16c14 100644
--- a/arch/x86/include/asm/jump_label.h
+++ b/arch/x86/include/asm/jump_label.h
@@ -9,12 +9,12 @@
#define JUMP_LABEL_NOP_SIZE 5
-#define JUMP_LABEL_INITIAL_NOP ".byte 0xe9 \n\t .long 0\n\t"
+#define STATIC_KEY_INITIAL_NOP ".byte 0xe9 \n\t .long 0\n\t"
-static __always_inline bool arch_static_branch(struct jump_label_key *key)
+static __always_inline bool arch_static_branch(struct static_key *key)
{
asm goto("1:"
- JUMP_LABEL_INITIAL_NOP
+ STATIC_KEY_INITIAL_NOP
".pushsection __jump_table, \"aw\" \n\t"
_ASM_ALIGN "\n\t"
_ASM_PTR "1b, %l[l_yes], %c0 \n\t"
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index a6962d9..ccb8059 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -56,6 +56,13 @@
#define MSR_OFFCORE_RSP_0 0x000001a6
#define MSR_OFFCORE_RSP_1 0x000001a7
+#define MSR_LBR_SELECT 0x000001c8
+#define MSR_LBR_TOS 0x000001c9
+#define MSR_LBR_NHM_FROM 0x00000680
+#define MSR_LBR_NHM_TO 0x000006c0
+#define MSR_LBR_CORE_FROM 0x00000040
+#define MSR_LBR_CORE_TO 0x00000060
+
#define MSR_IA32_PEBS_ENABLE 0x000003f1
#define MSR_IA32_DS_AREA 0x00000600
#define MSR_IA32_PERF_CAPABILITIES 0x00000345
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index a7d2db9..c0180fd 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -230,9 +230,9 @@ static inline unsigned long long paravirt_sched_clock(void)
return PVOP_CALL0(unsigned long long, pv_time_ops.sched_clock);
}
-struct jump_label_key;
-extern struct jump_label_key paravirt_steal_enabled;
-extern struct jump_label_key paravirt_steal_rq_enabled;
+struct static_key;
+extern struct static_key paravirt_steal_enabled;
+extern struct static_key paravirt_steal_rq_enabled;
static inline u64 paravirt_steal_clock(int cpu)
{
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 461ce43..e8fb2c7 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -188,8 +188,6 @@ extern u32 get_ibs_caps(void);
#ifdef CONFIG_PERF_EVENTS
extern void perf_events_lapic_init(void);
-#define PERF_EVENT_INDEX_OFFSET 0
-
/*
* Abuse bit 3 of the cpu eflags register to indicate proper PEBS IP fixups.
* This flag is otherwise unused and ABI specified to be 0, so nobody should
diff --git a/arch/x86/include/asm/timer.h b/arch/x86/include/asm/timer.h
index 431793e..34baa0e 100644
--- a/arch/x86/include/asm/timer.h
+++ b/arch/x86/include/asm/timer.h
@@ -57,14 +57,10 @@ DECLARE_PER_CPU(unsigned long long, cyc2ns_offset);
static inline unsigned long long __cycles_2_ns(unsigned long long cyc)
{
- unsigned long long quot;
- unsigned long long rem;
int cpu = smp_processor_id();
unsigned long long ns = per_cpu(cyc2ns_offset, cpu);
- quot = (cyc >> CYC2NS_SCALE_FACTOR);
- rem = cyc & ((1ULL << CYC2NS_SCALE_FACTOR) - 1);
- ns += quot * per_cpu(cyc2ns, cpu) +
- ((rem * per_cpu(cyc2ns, cpu)) >> CYC2NS_SCALE_FACTOR);
+ ns += mult_frac(cyc, per_cpu(cyc2ns, cpu),
+ (1UL << CYC2NS_SCALE_FACTOR));
return ns;
}
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 5369059..532d2e0 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -69,6 +69,7 @@ obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o
obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o
obj-$(CONFIG_KPROBES) += kprobes.o
+obj-$(CONFIG_OPTPROBES) += kprobes-opt.o
obj-$(CONFIG_MODULES) += module.o
obj-$(CONFIG_DOUBLEFAULT) += doublefault_32.o
obj-$(CONFIG_KGDB) += kgdb.o
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 25f24dc..6ab6aa2 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -16,6 +16,7 @@ obj-y := intel_cacheinfo.o scattered.o topology.o
obj-y += proc.o capflags.o powerflags.o common.o
obj-y += vmware.o hypervisor.o sched.o mshyperv.o
obj-y += rdrand.o
+obj-y += match.o
obj-$(CONFIG_X86_32) += bugs.o
obj-$(CONFIG_X86_64) += bugs_64.o
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index f4773f4..0a44b90 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -5,6 +5,7 @@
#include <linux/mm.h>
#include <linux/io.h>
+#include <linux/sched.h>
#include <asm/processor.h>
#include <asm/apic.h>
#include <asm/cpu.h>
@@ -456,6 +457,8 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
if (c->x86_power & (1 << 8)) {
set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC);
+ if (!check_tsc_unstable())
+ sched_clock_stable = 1;
}
#ifdef CONFIG_X86_64
diff --git a/arch/x86/kernel/cpu/match.c b/arch/x86/kernel/cpu/match.c
new file mode 100644
index 0000000..5502b28
--- /dev/null
+++ b/arch/x86/kernel/cpu/match.c
@@ -0,0 +1,91 @@
+#include <asm/cpu_device_id.h>
+#include <asm/processor.h>
+#include <linux/cpu.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+
+/**
+ * x86_match_cpu - match current CPU again an array of x86_cpu_ids
+ * @match: Pointer to array of x86_cpu_ids. Last entry terminated with
+ * {}.
+ *
+ * Return the entry if the current CPU matches the entries in the
+ * passed x86_cpu_id match table. Otherwise NULL. The match table
+ * contains vendor (X86_VENDOR_*), family, model and feature bits or
+ * respective wildcard entries.
+ *
+ * A typical table entry would be to match a specific CPU
+ * { X86_VENDOR_INTEL, 6, 0x12 }
+ * or to match a specific CPU feature
+ * { X86_FEATURE_MATCH(X86_FEATURE_FOOBAR) }
+ *
+ * Fields can be wildcarded with %X86_VENDOR_ANY, %X86_FAMILY_ANY,
+ * %X86_MODEL_ANY, %X86_FEATURE_ANY or 0 (except for vendor)
+ *
+ * Arrays used to match for this should also be declared using
+ * MODULE_DEVICE_TABLE(x86_cpu, ...)
+ *
+ * This always matches against the boot cpu, assuming models and features are
+ * consistent over all CPUs.
+ */
+const struct x86_cpu_id *x86_match_cpu(const struct x86_cpu_id *match)
+{
+ const struct x86_cpu_id *m;
+ struct cpuinfo_x86 *c = &boot_cpu_data;
+
+ for (m = match; m->vendor | m->family | m->model | m->feature; m++) {
+ if (m->vendor != X86_VENDOR_ANY && c->x86_vendor != m->vendor)
+ continue;
+ if (m->family != X86_FAMILY_ANY && c->x86 != m->family)
+ continue;
+ if (m->model != X86_MODEL_ANY && c->x86_model != m->model)
+ continue;
+ if (m->feature != X86_FEATURE_ANY && !cpu_has(c, m->feature))
+ continue;
+ return m;
+ }
+ return NULL;
+}
+EXPORT_SYMBOL(x86_match_cpu);
+
+ssize_t arch_print_cpu_modalias(struct device *dev,
+ struct device_attribute *attr,
+ char *bufptr)
+{
+ int size = PAGE_SIZE;
+ int i, n;
+ char *buf = bufptr;
+
+ n = snprintf(buf, size, "x86cpu:vendor:%04X:family:%04X:"
+ "model:%04X:feature:",
+ boot_cpu_data.x86_vendor,
+ boot_cpu_data.x86,
+ boot_cpu_data.x86_model);
+ size -= n;
+ buf += n;
+ size -= 1;
+ for (i = 0; i < NCAPINTS*32; i++) {
+ if (boot_cpu_has(i)) {
+ n = snprintf(buf, size, ",%04X", i);
+ if (n >= size) {
+ WARN(1, "x86 features overflow page\n");
+ break;
+ }
+ size -= n;
+ buf += n;
+ }
+ }
+ *buf++ = '\n';
+ return buf - bufptr;
+}
+
+int arch_cpu_uevent(struct device *dev, struct kobj_uevent_env *env)
+{
+ char *buf = kzalloc(PAGE_SIZE, GFP_KERNEL);
+ if (buf) {
+ arch_print_cpu_modalias(NULL, NULL, buf);
+ add_uevent_var(env, "MODALIAS=%s", buf);
+ kfree(buf);
+ }
+ return 0;
+}
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 5adce10..0a18d16 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -24,6 +24,7 @@
#include <linux/slab.h>
#include <linux/cpu.h>
#include <linux/bitops.h>
+#include <linux/device.h>
#include <asm/apic.h>
#include <asm/stacktrace.h>
@@ -31,6 +32,7 @@
#include <asm/compat.h>
#include <asm/smp.h>
#include <asm/alternative.h>
+#include <asm/timer.h>
#include "perf_event.h"
@@ -351,6 +353,36 @@ int x86_setup_perfctr(struct perf_event *event)
return 0;
}
+/*
+ * check that branch_sample_type is compatible with
+ * settings needed for precise_ip > 1 which implies
+ * using the LBR to capture ALL taken branches at the
+ * priv levels of the measurement
+ */
+static inline int precise_br_compat(struct perf_event *event)
+{
+ u64 m = event->attr.branch_sample_type;
+ u64 b = 0;
+
+ /* must capture all branches */
+ if (!(m & PERF_SAMPLE_BRANCH_ANY))
+ return 0;
+
+ m &= PERF_SAMPLE_BRANCH_KERNEL | PERF_SAMPLE_BRANCH_USER;
+
+ if (!event->attr.exclude_user)
+ b |= PERF_SAMPLE_BRANCH_USER;
+
+ if (!event->attr.exclude_kernel)
+ b |= PERF_SAMPLE_BRANCH_KERNEL;
+
+ /*
+ * ignore PERF_SAMPLE_BRANCH_HV, not supported on x86
+ */
+
+ return m == b;
+}
+
int x86_pmu_hw_config(struct perf_event *event)
{
if (event->attr.precise_ip) {
@@ -367,6 +399,36 @@ int x86_pmu_hw_config(struct perf_event *event)
if (event->attr.precise_ip > precise)
return -EOPNOTSUPP;
+ /*
+ * check that PEBS LBR correction does not conflict with
+ * whatever the user is asking with attr->branch_sample_type
+ */
+ if (event->attr.precise_ip > 1) {
+ u64 *br_type = &event->attr.branch_sample_type;
+
+ if (has_branch_stack(event)) {
+ if (!precise_br_compat(event))
+ return -EOPNOTSUPP;
+
+ /* branch_sample_type is compatible */
+
+ } else {
+ /*
+ * user did not specify branch_sample_type
+ *
+ * For PEBS fixups, we capture all
+ * the branches at the priv level of the
+ * event.
+ */
+ *br_type = PERF_SAMPLE_BRANCH_ANY;
+
+ if (!event->attr.exclude_user)
+ *br_type |= PERF_SAMPLE_BRANCH_USER;
+
+ if (!event->attr.exclude_kernel)
+ *br_type |= PERF_SAMPLE_BRANCH_KERNEL;
+ }
+ }
}
/*
@@ -424,6 +486,10 @@ static int __x86_pmu_event_init(struct perf_event *event)
/* mark unused */
event->hw.extra_reg.idx = EXTRA_REG_NONE;
+ /* mark not used */
+ event->hw.extra_reg.idx = EXTRA_REG_NONE;
+ event->hw.branch_reg.idx = EXTRA_REG_NONE;
+
return x86_pmu.hw_config(event);
}
@@ -1210,6 +1276,8 @@ x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
break;
case CPU_STARTING:
+ if (x86_pmu.attr_rdpmc)
+ set_in_cr4(X86_CR4_PCE);
if (x86_pmu.cpu_starting)
x86_pmu.cpu_starting(cpu);
break;
@@ -1319,6 +1387,8 @@ static int __init init_hw_perf_events(void)
}
}
+ x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */
+
pr_info("... version: %d\n", x86_pmu.version);
pr_info("... bit width: %d\n", x86_pmu.cntval_bits);
pr_info("... generic registers: %d\n", x86_pmu.num_counters);
@@ -1542,23 +1612,106 @@ static int x86_pmu_event_init(struct perf_event *event)
return err;
}
+static int x86_pmu_event_idx(struct perf_event *event)
+{
+ int idx = event->hw.idx;
+
+ if (x86_pmu.num_counters_fixed && idx >= X86_PMC_IDX_FIXED) {
+ idx -= X86_PMC_IDX_FIXED;
+ idx |= 1 << 30;
+ }
+
+ return idx + 1;
+}
+
+static ssize_t get_attr_rdpmc(struct device *cdev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ return snprintf(buf, 40, "%d\n", x86_pmu.attr_rdpmc);
+}
+
+static void change_rdpmc(void *info)
+{
+ bool enable = !!(unsigned long)info;
+
+ if (enable)
+ set_in_cr4(X86_CR4_PCE);
+ else
+ clear_in_cr4(X86_CR4_PCE);
+}
+
+static ssize_t set_attr_rdpmc(struct device *cdev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ unsigned long val = simple_strtoul(buf, NULL, 0);
+
+ if (!!val != !!x86_pmu.attr_rdpmc) {
+ x86_pmu.attr_rdpmc = !!val;
+ smp_call_function(change_rdpmc, (void *)val, 1);
+ }
+
+ return count;
+}
+
+static DEVICE_ATTR(rdpmc, S_IRUSR | S_IWUSR, get_attr_rdpmc, set_attr_rdpmc);
+
+static struct attribute *x86_pmu_attrs[] = {
+ &dev_attr_rdpmc.attr,
+ NULL,
+};
+
+static struct attribute_group x86_pmu_attr_group = {
+ .attrs = x86_pmu_attrs,
+};
+
+static const struct attribute_group *x86_pmu_attr_groups[] = {
+ &x86_pmu_attr_group,
+ NULL,
+};
+
+static void x86_pmu_flush_branch_stack(void)
+{
+ if (x86_pmu.flush_branch_stack)
+ x86_pmu.flush_branch_stack();
+}
+
static struct pmu pmu = {
- .pmu_enable = x86_pmu_enable,
- .pmu_disable = x86_pmu_disable,
+ .pmu_enable = x86_pmu_enable,
+ .pmu_disable = x86_pmu_disable,
+
+ .attr_groups = x86_pmu_attr_groups,
.event_init = x86_pmu_event_init,
- .add = x86_pmu_add,
- .del = x86_pmu_del,
- .start = x86_pmu_start,
- .stop = x86_pmu_stop,
- .read = x86_pmu_read,
+ .add = x86_pmu_add,
+ .del = x86_pmu_del,
+ .start = x86_pmu_start,
+ .stop = x86_pmu_stop,
+ .read = x86_pmu_read,
.start_txn = x86_pmu_start_txn,
.cancel_txn = x86_pmu_cancel_txn,
.commit_txn = x86_pmu_commit_txn,
+
+ .event_idx = x86_pmu_event_idx,
+ .flush_branch_stack = x86_pmu_flush_branch_stack,
};
+void perf_update_user_clock(struct perf_event_mmap_page *userpg, u64 now)
+{
+ if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
+ return;
+
+ if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
+ return;
+
+ userpg->time_mult = this_cpu_read(cyc2ns);
+ userpg->time_shift = CYC2NS_SCALE_FACTOR;
+ userpg->time_offset = this_cpu_read(cyc2ns_offset) - now;
+}
+
/*
* callchain support
*/
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index c30c807..8484e77 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -33,6 +33,7 @@ enum extra_reg_type {
EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */
EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */
+ EXTRA_REG_LBR = 2, /* lbr_select */
EXTRA_REG_MAX /* number of entries needed */
};
@@ -130,6 +131,8 @@ struct cpu_hw_events {
void *lbr_context;
struct perf_branch_stack lbr_stack;
struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES];
+ struct er_account *lbr_sel;
+ u64 br_sel;
/*
* Intel host/guest exclude bits
@@ -268,6 +271,29 @@ struct x86_pmu_quirk {
void (*func)(void);
};
+union x86_pmu_config {
+ struct {
+ u64 event:8,
+ umask:8,
+ usr:1,
+ os:1,
+ edge:1,
+ pc:1,
+ interrupt:1,
+ __reserved1:1,
+ en:1,
+ inv:1,
+ cmask:8,
+ event2:4,
+ __reserved2:4,
+ go:1,
+ ho:1;
+ } bits;
+ u64 value;
+};
+
+#define X86_CONFIG(args...) ((union x86_pmu_config){.bits = {args}}).value
+
/*
* struct x86_pmu - generic x86 pmu
*/
@@ -309,10 +335,19 @@ struct x86_pmu {
struct x86_pmu_quirk *quirks;
int perfctr_second_write;
+ /*
+ * sysfs attrs
+ */
+ int attr_rdpmc;
+
+ /*
+ * CPU Hotplug hooks
+ */
int (*cpu_prepare)(int cpu);
void (*cpu_starting)(int cpu);
void (*cpu_dying)(int cpu);
void (*cpu_dead)(int cpu);
+ void (*flush_branch_stack)(void);
/*
* Intel Arch Perfmon v2+
@@ -334,6 +369,8 @@ struct x86_pmu {
*/
unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */
int lbr_nr; /* hardware stack size */
+ u64 lbr_sel_mask; /* LBR_SELECT valid bits */
+ const int *lbr_sel_map; /* lbr_select mappings */
/*
* Extra registers for events
@@ -447,6 +484,15 @@ extern struct event_constraint emptyconstraint;
extern struct event_constraint unconstrained;
+static inline bool kernel_ip(unsigned long ip)
+{
+#ifdef CONFIG_X86_32
+ return ip > PAGE_OFFSET;
+#else
+ return (long)ip < 0;
+#endif
+}
+
#ifdef CONFIG_CPU_SUP_AMD
int amd_pmu_init(void);
@@ -527,6 +573,10 @@ void intel_pmu_lbr_init_nhm(void);
void intel_pmu_lbr_init_atom(void);
+void intel_pmu_lbr_init_snb(void);
+
+int intel_pmu_setup_lbr_filter(struct perf_event *event);
+
int p4_pmu_init(void);
int p6_pmu_init(void);
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index 67250a5..dd002fa 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -139,6 +139,9 @@ static int amd_pmu_hw_config(struct perf_event *event)
if (ret)
return ret;
+ if (has_branch_stack(event))
+ return -EOPNOTSUPP;
+
if (event->attr.exclude_host && event->attr.exclude_guest)
/*
* When HO == GO == 1 the hardware treats that as GO == HO == 0
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 61d4f79..6a84e7f 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -728,6 +728,19 @@ static __initconst const u64 atom_hw_cache_event_ids
},
};
+static inline bool intel_pmu_needs_lbr_smpl(struct perf_event *event)
+{
+ /* user explicitly requested branch sampling */
+ if (has_branch_stack(event))
+ return true;
+
+ /* implicit branch sampling to correct PEBS skid */
+ if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1)
+ return true;
+
+ return false;
+}
+
static void intel_pmu_disable_all(void)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
@@ -882,6 +895,13 @@ static void intel_pmu_disable_event(struct perf_event *event)
cpuc->intel_ctrl_guest_mask &= ~(1ull << hwc->idx);
cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx);
+ /*
+ * must disable before any actual event
+ * because any event may be combined with LBR
+ */
+ if (intel_pmu_needs_lbr_smpl(event))
+ intel_pmu_lbr_disable(event);
+
if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
intel_pmu_disable_fixed(hwc);
return;
@@ -936,6 +956,12 @@ static void intel_pmu_enable_event(struct perf_event *event)
intel_pmu_enable_bts(hwc->config);
return;
}
+ /*
+ * must enabled before any actual event
+ * because any event may be combined with LBR
+ */
+ if (intel_pmu_needs_lbr_smpl(event))
+ intel_pmu_lbr_enable(event);
if (event->attr.exclude_host)
cpuc->intel_ctrl_guest_mask |= (1ull << hwc->idx);
@@ -1058,6 +1084,9 @@ again:
data.period = event->hw.last_period;
+ if (has_branch_stack(event))
+ data.br_stack = &cpuc->lbr_stack;
+
if (perf_event_overflow(event, &data, regs))
x86_pmu_stop(event, 0);
}
@@ -1124,17 +1153,17 @@ static bool intel_try_alt_er(struct perf_event *event, int orig_idx)
*/
static struct event_constraint *
__intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
- struct perf_event *event)
+ struct perf_event *event,
+ struct hw_perf_event_extra *reg)
{
struct event_constraint *c = &emptyconstraint;
- struct hw_perf_event_extra *reg = &event->hw.extra_reg;
struct er_account *era;
unsigned long flags;
int orig_idx = reg->idx;
/* already allocated shared msr */
if (reg->alloc)
- return &unconstrained;
+ return NULL; /* call x86_get_event_constraint() */
again:
era = &cpuc->shared_regs->regs[reg->idx];
@@ -1157,14 +1186,10 @@ again:
reg->alloc = 1;
/*
- * All events using extra_reg are unconstrained.
- * Avoids calling x86_get_event_constraints()
- *
- * Must revisit if extra_reg controlling events
- * ever have constraints. Worst case we go through
- * the regular event constraint table.
+ * need to call x86_get_event_constraint()
+ * to check if associated event has constraints
*/
- c = &unconstrained;
+ c = NULL;
} else if (intel_try_alt_er(event, orig_idx)) {
raw_spin_unlock_irqrestore(&era->lock, flags);
goto again;
@@ -1201,11 +1226,23 @@ static struct event_constraint *
intel_shared_regs_constraints(struct cpu_hw_events *cpuc,
struct perf_event *event)
{
- struct event_constraint *c = NULL;
-
- if (event->hw.extra_reg.idx != EXTRA_REG_NONE)
- c = __intel_shared_reg_get_constraints(cpuc, event);
-
+ struct event_constraint *c = NULL, *d;
+ struct hw_perf_event_extra *xreg, *breg;
+
+ xreg = &event->hw.extra_reg;
+ if (xreg->idx != EXTRA_REG_NONE) {
+ c = __intel_shared_reg_get_constraints(cpuc, event, xreg);
+ if (c == &emptyconstraint)
+ return c;
+ }
+ breg = &event->hw.branch_reg;
+ if (breg->idx != EXTRA_REG_NONE) {
+ d = __intel_shared_reg_get_constraints(cpuc, event, breg);
+ if (d == &emptyconstraint) {
+ __intel_shared_reg_put_constraints(cpuc, xreg);
+ c = d;
+ }
+ }
return c;
}
@@ -1253,6 +1290,10 @@ intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc,
reg = &event->hw.extra_reg;
if (reg->idx != EXTRA_REG_NONE)
__intel_shared_reg_put_constraints(cpuc, reg);
+
+ reg = &event->hw.branch_reg;
+ if (reg->idx != EXTRA_REG_NONE)
+ __intel_shared_reg_put_constraints(cpuc, reg);
}
static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
@@ -1288,12 +1329,19 @@ static int intel_pmu_hw_config(struct perf_event *event)
*
* Thereby we gain a PEBS capable cycle counter.
*/
- u64 alt_config = 0x108000c0; /* INST_RETIRED.TOTAL_CYCLES */
+ u64 alt_config = X86_CONFIG(.event=0xc0, .inv=1, .cmask=16);
+
alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK);
event->hw.config = alt_config;
}
+ if (intel_pmu_needs_lbr_smpl(event)) {
+ ret = intel_pmu_setup_lbr_filter(event);
+ if (ret)
+ return ret;
+ }
+
if (event->attr.type != PERF_TYPE_RAW)
return 0;
@@ -1432,7 +1480,7 @@ static int intel_pmu_cpu_prepare(int cpu)
{
struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
- if (!x86_pmu.extra_regs)
+ if (!(x86_pmu.extra_regs || x86_pmu.lbr_sel_map))
return NOTIFY_OK;
cpuc->shared_regs = allocate_shared_regs(cpu);
@@ -1454,22 +1502,28 @@ static void intel_pmu_cpu_starting(int cpu)
*/
intel_pmu_lbr_reset();
- if (!cpuc->shared_regs || (x86_pmu.er_flags & ERF_NO_HT_SHARING))
+ cpuc->lbr_sel = NULL;
+
+ if (!cpuc->shared_regs)
return;
- for_each_cpu(i, topology_thread_cpumask(cpu)) {
- struct intel_shared_regs *pc;
+ if (!(x86_pmu.er_flags & ERF_NO_HT_SHARING)) {
+ for_each_cpu(i, topology_thread_cpumask(cpu)) {
+ struct intel_shared_regs *pc;
- pc = per_cpu(cpu_hw_events, i).shared_regs;
- if (pc && pc->core_id == core_id) {
- cpuc->kfree_on_online = cpuc->shared_regs;
- cpuc->shared_regs = pc;
- break;
+ pc = per_cpu(cpu_hw_events, i).shared_regs;
+ if (pc && pc->core_id == core_id) {
+ cpuc->kfree_on_online = cpuc->shared_regs;
+ cpuc->shared_regs = pc;
+ break;
+ }
}
+ cpuc->shared_regs->core_id = core_id;
+ cpuc->shared_regs->refcnt++;
}
- cpuc->shared_regs->core_id = core_id;
- cpuc->shared_regs->refcnt++;
+ if (x86_pmu.lbr_sel_map)
+ cpuc->lbr_sel = &cpuc->shared_regs->regs[EXTRA_REG_LBR];
}
static void intel_pmu_cpu_dying(int cpu)
@@ -1487,6 +1541,18 @@ static void intel_pmu_cpu_dying(int cpu)
fini_debug_store_on_cpu(cpu);
}
+static void intel_pmu_flush_branch_stack(void)
+{
+ /*
+ * Intel LBR does not tag entries with the
+ * PID of the current task, then we need to
+ * flush it on ctxsw
+ * For now, we simply reset it
+ */
+ if (x86_pmu.lbr_nr)
+ intel_pmu_lbr_reset();
+}
+
static __initconst const struct x86_pmu intel_pmu = {
.name = "Intel",
.handle_irq = intel_pmu_handle_irq,
@@ -1514,6 +1580,7 @@ static __initconst const struct x86_pmu intel_pmu = {
.cpu_starting = intel_pmu_cpu_starting,
.cpu_dying = intel_pmu_cpu_dying,
.guest_get_msrs = intel_guest_get_msrs,
+ .flush_branch_stack = intel_pmu_flush_branch_stack,
};
static __init void intel_clovertown_quirk(void)
@@ -1690,9 +1757,11 @@ __init int intel_pmu_init(void)
x86_pmu.extra_regs = intel_nehalem_extra_regs;
/* UOPS_ISSUED.STALLED_CYCLES */
- intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e;
+ intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
+ X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
/* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */
- intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x1803fb1;
+ intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =
+ X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1);
x86_add_quirk(intel_nehalem_quirk);
@@ -1727,9 +1796,11 @@ __init int intel_pmu_init(void)
x86_pmu.er_flags |= ERF_HAS_RSP_1;
/* UOPS_ISSUED.STALLED_CYCLES */
- intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e;
+ intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
+ X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
/* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */
- intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x1803fb1;
+ intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =
+ X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1);
pr_cont("Westmere events, ");
break;
@@ -1740,7 +1811,7 @@ __init int intel_pmu_init(void)
memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
- intel_pmu_lbr_init_nhm();
+ intel_pmu_lbr_init_snb();
x86_pmu.event_constraints = intel_snb_event_constraints;
x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints;
@@ -1750,9 +1821,11 @@ __init int intel_pmu_init(void)
x86_pmu.er_flags |= ERF_NO_HT_SHARING;
/* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */
- intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e;
+ intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
+ X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
/* UOPS_DISPATCHED.THREAD,c=1,i=1 to count stall cycles*/
- intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x18001b1;
+ intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =
+ X86_CONFIG(.event=0xb1, .umask=0x01, .inv=1, .cmask=1);
pr_cont("SandyBridge events, ");
break;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index d6bd49f..7f64df1 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -3,6 +3,7 @@
#include <linux/slab.h>
#include <asm/perf_event.h>
+#include <asm/insn.h>
#include "perf_event.h"
@@ -439,9 +440,6 @@ void intel_pmu_pebs_enable(struct perf_event *event)
hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
cpuc->pebs_enabled |= 1ULL << hwc->idx;
-
- if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1)
- intel_pmu_lbr_enable(event);
}
void intel_pmu_pebs_disable(struct perf_event *event)
@@ -454,9 +452,6 @@ void intel_pmu_pebs_disable(struct perf_event *event)
wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
hwc->config |= ARCH_PERFMON_EVENTSEL_INT;
-
- if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1)
- intel_pmu_lbr_disable(event);
}
void intel_pmu_pebs_enable_all(void)
@@ -475,17 +470,6 @@ void intel_pmu_pebs_disable_all(void)
wrmsrl(MSR_IA32_PEBS_ENABLE, 0);
}
-#include <asm/insn.h>
-
-static inline bool kernel_ip(unsigned long ip)
-{
-#ifdef CONFIG_X86_32
- return ip > PAGE_OFFSET;
-#else
- return (long)ip < 0;
-#endif
-}
-
static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
@@ -572,6 +556,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
* both formats and we don't use the other fields in this
* routine.
*/
+ struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
struct pebs_record_core *pebs = __pebs;
struct perf_sample_data data;
struct pt_regs regs;
@@ -602,6 +587,9 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
else
regs.flags &= ~PERF_EFLAGS_EXACT;
+ if (has_branch_stack(event))
+ data.br_stack = &cpuc->lbr_stack;
+
if (perf_event_overflow(event, &data, &regs))
x86_pmu_stop(event, 0);
}
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
index 47a7e63..520b426 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
@@ -3,6 +3,7 @@
#include <asm/perf_event.h>
#include <asm/msr.h>
+#include <asm/insn.h>
#include "perf_event.h"
@@ -14,6 +15,100 @@ enum {
};
/*
+ * Intel LBR_SELECT bits
+ * Intel Vol3a, April 2011, Section 16.7 Table 16-10
+ *
+ * Hardware branch filter (not available on all CPUs)
+ */
+#define LBR_KERNEL_BIT 0 /* do not capture at ring0 */
+#define LBR_USER_BIT 1 /* do not capture at ring > 0 */
+#define LBR_JCC_BIT 2 /* do not capture conditional branches */
+#define LBR_REL_CALL_BIT 3 /* do not capture relative calls */
+#define LBR_IND_CALL_BIT 4 /* do not capture indirect calls */
+#define LBR_RETURN_BIT 5 /* do not capture near returns */
+#define LBR_IND_JMP_BIT 6 /* do not capture indirect jumps */
+#define LBR_REL_JMP_BIT 7 /* do not capture relative jumps */
+#define LBR_FAR_BIT 8 /* do not capture far branches */
+
+#define LBR_KERNEL (1 << LBR_KERNEL_BIT)
+#define LBR_USER (1 << LBR_USER_BIT)
+#define LBR_JCC (1 << LBR_JCC_BIT)
+#define LBR_REL_CALL (1 << LBR_REL_CALL_BIT)
+#define LBR_IND_CALL (1 << LBR_IND_CALL_BIT)
+#define LBR_RETURN (1 << LBR_RETURN_BIT)
+#define LBR_REL_JMP (1 << LBR_REL_JMP_BIT)
+#define LBR_IND_JMP (1 << LBR_IND_JMP_BIT)
+#define LBR_FAR (1 << LBR_FAR_BIT)
+
+#define LBR_PLM (LBR_KERNEL | LBR_USER)
+
+#define LBR_SEL_MASK 0x1ff /* valid bits in LBR_SELECT */
+#define LBR_NOT_SUPP -1 /* LBR filter not supported */
+#define LBR_IGN 0 /* ignored */
+
+#define LBR_ANY \
+ (LBR_JCC |\
+ LBR_REL_CALL |\
+ LBR_IND_CALL |\
+ LBR_RETURN |\
+ LBR_REL_JMP |\
+ LBR_IND_JMP |\
+ LBR_FAR)
+
+#define LBR_FROM_FLAG_MISPRED (1ULL << 63)
+
+#define for_each_branch_sample_type(x) \
+ for ((x) = PERF_SAMPLE_BRANCH_USER; \
+ (x) < PERF_SAMPLE_BRANCH_MAX; (x) <<= 1)
+
+/*
+ * x86control flow change classification
+ * x86control flow changes include branches, interrupts, traps, faults
+ */
+enum {
+ X86_BR_NONE = 0, /* unknown */
+
+ X86_BR_USER = 1 << 0, /* branch target is user */
+ X86_BR_KERNEL = 1 << 1, /* branch target is kernel */
+
+ X86_BR_CALL = 1 << 2, /* call */
+ X86_BR_RET = 1 << 3, /* return */
+ X86_BR_SYSCALL = 1 << 4, /* syscall */
+ X86_BR_SYSRET = 1 << 5, /* syscall return */
+ X86_BR_INT = 1 << 6, /* sw interrupt */
+ X86_BR_IRET = 1 << 7, /* return from interrupt */
+ X86_BR_JCC = 1 << 8, /* conditional */
+ X86_BR_JMP = 1 << 9, /* jump */
+ X86_BR_IRQ = 1 << 10,/* hw interrupt or trap or fault */
+ X86_BR_IND_CALL = 1 << 11,/* indirect calls */
+};
+
+#define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL)
+
+#define X86_BR_ANY \
+ (X86_BR_CALL |\
+ X86_BR_RET |\
+ X86_BR_SYSCALL |\
+ X86_BR_SYSRET |\
+ X86_BR_INT |\
+ X86_BR_IRET |\
+ X86_BR_JCC |\
+ X86_BR_JMP |\
+ X86_BR_IRQ |\
+ X86_BR_IND_CALL)
+
+#define X86_BR_ALL (X86_BR_PLM | X86_BR_ANY)
+
+#define X86_BR_ANY_CALL \
+ (X86_BR_CALL |\
+ X86_BR_IND_CALL |\
+ X86_BR_SYSCALL |\
+ X86_BR_IRQ |\
+ X86_BR_INT)
+
+static void intel_pmu_lbr_filter(struct cpu_hw_events *cpuc);
+
+/*
* We only support LBR implementations that have FREEZE_LBRS_ON_PMI
* otherwise it becomes near impossible to get a reliable stack.
*/
@@ -21,6 +116,10 @@ enum {
static void __intel_pmu_lbr_enable(void)
{
u64 debugctl;
+ struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+
+ if (cpuc->lbr_sel)
+ wrmsrl(MSR_LBR_SELECT, cpuc->lbr_sel->config);
rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
debugctl |= (DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
@@ -76,11 +175,11 @@ void intel_pmu_lbr_enable(struct perf_event *event)
* Reset the LBR stack if we changed task context to
* avoid data leaks.
*/
-
if (event->ctx->task && cpuc->lbr_context != event->ctx) {
intel_pmu_lbr_reset();
cpuc->lbr_context = event->ctx;
}
+ cpuc->br_sel = event->hw.branch_reg.reg;
cpuc->lbr_users++;
}
@@ -95,8 +194,11 @@ void intel_pmu_lbr_disable(struct perf_event *event)
cpuc->lbr_users--;
WARN_ON_ONCE(cpuc->lbr_users < 0);
- if (cpuc->enabled && !cpuc->lbr_users)
+ if (cpuc->enabled && !cpuc->lbr_users) {
__intel_pmu_lbr_disable();
+ /* avoid stale pointer */
+ cpuc->lbr_context = NULL;
+ }
}
void intel_pmu_lbr_enable_all(void)
@@ -115,6 +217,9 @@ void intel_pmu_lbr_disable_all(void)
__intel_pmu_lbr_disable();
}
+/*
+ * TOS = most recently recorded branch
+ */
static inline u64 intel_pmu_lbr_tos(void)
{
u64 tos;
@@ -142,15 +247,15 @@ static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr);
- cpuc->lbr_entries[i].from = msr_lastbranch.from;
- cpuc->lbr_entries[i].to = msr_lastbranch.to;
- cpuc->lbr_entries[i].flags = 0;
+ cpuc->lbr_entries[i].from = msr_lastbranch.from;
+ cpuc->lbr_entries[i].to = msr_lastbranch.to;
+ cpuc->lbr_entries[i].mispred = 0;
+ cpuc->lbr_entries[i].predicted = 0;
+ cpuc->lbr_entries[i].reserved = 0;
}
cpuc->lbr_stack.nr = i;
}
-#define LBR_FROM_FLAG_MISPRED (1ULL << 63)
-
/*
* Due to lack of segmentation in Linux the effective address (offset)
* is the same as the linear address, allowing us to merge the LIP and EIP
@@ -165,19 +270,22 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
for (i = 0; i < x86_pmu.lbr_nr; i++) {
unsigned long lbr_idx = (tos - i) & mask;
- u64 from, to, flags = 0;
+ u64 from, to, mis = 0, pred = 0;
rdmsrl(x86_pmu.lbr_from + lbr_idx, from);
rdmsrl(x86_pmu.lbr_to + lbr_idx, to);
if (lbr_format == LBR_FORMAT_EIP_FLAGS) {
- flags = !!(from & LBR_FROM_FLAG_MISPRED);
+ mis = !!(from & LBR_FROM_FLAG_MISPRED);
+ pred = !mis;
from = (u64)((((s64)from) << 1) >> 1);
}
- cpuc->lbr_entries[i].from = from;
- cpuc->lbr_entries[i].to = to;
- cpuc->lbr_entries[i].flags = flags;
+ cpuc->lbr_entries[i].from = from;
+ cpuc->lbr_entries[i].to = to;
+ cpuc->lbr_entries[i].mispred = mis;
+ cpuc->lbr_entries[i].predicted = pred;
+ cpuc->lbr_entries[i].reserved = 0;
}
cpuc->lbr_stack.nr = i;
}
@@ -193,28 +301,404 @@ void intel_pmu_lbr_read(void)
intel_pmu_lbr_read_32(cpuc);
else
intel_pmu_lbr_read_64(cpuc);
+
+ intel_pmu_lbr_filter(cpuc);
+}
+
+/*
+ * SW filter is used:
+ * - in case there is no HW filter
+ * - in case the HW filter has errata or limitations
+ */
+static void intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
+{
+ u64 br_type = event->attr.branch_sample_type;
+ int mask = 0;
+
+ if (br_type & PERF_SAMPLE_BRANCH_USER)
+ mask |= X86_BR_USER;
+
+ if (br_type & PERF_SAMPLE_BRANCH_KERNEL)
+ mask |= X86_BR_KERNEL;
+
+ /* we ignore BRANCH_HV here */
+
+ if (br_type & PERF_SAMPLE_BRANCH_ANY)
+ mask |= X86_BR_ANY;
+
+ if (br_type & PERF_SAMPLE_BRANCH_ANY_CALL)
+ mask |= X86_BR_ANY_CALL;
+
+ if (br_type & PERF_SAMPLE_BRANCH_ANY_RETURN)
+ mask |= X86_BR_RET | X86_BR_IRET | X86_BR_SYSRET;
+
+ if (br_type & PERF_SAMPLE_BRANCH_IND_CALL)
+ mask |= X86_BR_IND_CALL;
+ /*
+ * stash actual user request into reg, it may
+ * be used by fixup code for some CPU
+ */
+ event->hw.branch_reg.reg = mask;
+}
+
+/*
+ * setup the HW LBR filter
+ * Used only when available, may not be enough to disambiguate
+ * all branches, may need the help of the SW filter
+ */
+static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event)
+{
+ struct hw_perf_event_extra *reg;
+ u64 br_type = event->attr.branch_sample_type;
+ u64 mask = 0, m;
+ u64 v;
+
+ for_each_branch_sample_type(m) {
+ if (!(br_type & m))
+ continue;
+
+ v = x86_pmu.lbr_sel_map[m];
+ if (v == LBR_NOT_SUPP)
+ return -EOPNOTSUPP;
+
+ if (v != LBR_IGN)
+ mask |= v;
+ }
+ reg = &event->hw.branch_reg;
+ reg->idx = EXTRA_REG_LBR;
+
+ /* LBR_SELECT operates in suppress mode so invert mask */
+ reg->config = ~mask & x86_pmu.lbr_sel_mask;
+
+ return 0;
+}
+
+int intel_pmu_setup_lbr_filter(struct perf_event *event)
+{
+ int ret = 0;
+
+ /*
+ * no LBR on this PMU
+ */
+ if (!x86_pmu.lbr_nr)
+ return -EOPNOTSUPP;
+
+ /*
+ * setup SW LBR filter
+ */
+ intel_pmu_setup_sw_lbr_filter(event);
+
+ /*
+ * setup HW LBR filter, if any
+ */
+ if (x86_pmu.lbr_sel_map)
+ ret = intel_pmu_setup_hw_lbr_filter(event);
+
+ return ret;
}
+/*
+ * return the type of control flow change at address "from"
+ * intruction is not necessarily a branch (in case of interrupt).
+ *
+ * The branch type returned also includes the priv level of the
+ * target of the control flow change (X86_BR_USER, X86_BR_KERNEL).
+ *
+ * If a branch type is unknown OR the instruction cannot be
+ * decoded (e.g., text page not present), then X86_BR_NONE is
+ * returned.
+ */
+static int branch_type(unsigned long from, unsigned long to)
+{
+ struct insn insn;
+ void *addr;
+ int bytes, size = MAX_INSN_SIZE;
+ int ret = X86_BR_NONE;
+ int ext, to_plm, from_plm;
+ u8 buf[MAX_INSN_SIZE];
+ int is64 = 0;
+
+ to_plm = kernel_ip(to) ? X86_BR_KERNEL : X86_BR_USER;
+ from_plm = kernel_ip(from) ? X86_BR_KERNEL : X86_BR_USER;
+
+ /*
+ * maybe zero if lbr did not fill up after a reset by the time
+ * we get a PMU interrupt
+ */
+ if (from == 0 || to == 0)
+ return X86_BR_NONE;
+
+ if (from_plm == X86_BR_USER) {
+ /*
+ * can happen if measuring at the user level only
+ * and we interrupt in a kernel thread, e.g., idle.
+ */
+ if (!current->mm)
+ return X86_BR_NONE;
+
+ /* may fail if text not present */
+ bytes = copy_from_user_nmi(buf, (void __user *)from, size);
+ if (bytes != size)
+ return X86_BR_NONE;
+
+ addr = buf;
+ } else
+ addr = (void *)from;
+
+ /*
+ * decoder needs to know the ABI especially
+ * on 64-bit systems running 32-bit apps
+ */
+#ifdef CONFIG_X86_64
+ is64 = kernel_ip((unsigned long)addr) || !test_thread_flag(TIF_IA32);
+#endif
+ insn_init(&insn, addr, is64);
+ insn_get_opcode(&insn);
+
+ switch (insn.opcode.bytes[0]) {
+ case 0xf:
+ switch (insn.opcode.bytes[1]) {
+ case 0x05: /* syscall */
+ case 0x34: /* sysenter */
+ ret = X86_BR_SYSCALL;
+ break;
+ case 0x07: /* sysret */
+ case 0x35: /* sysexit */
+ ret = X86_BR_SYSRET;
+ break;
+ case 0x80 ... 0x8f: /* conditional */
+ ret = X86_BR_JCC;
+ break;
+ default:
+ ret = X86_BR_NONE;
+ }
+ break;
+ case 0x70 ... 0x7f: /* conditional */
+ ret = X86_BR_JCC;
+ break;
+ case 0xc2: /* near ret */
+ case 0xc3: /* near ret */
+ case 0xca: /* far ret */
+ case 0xcb: /* far ret */
+ ret = X86_BR_RET;
+ break;
+ case 0xcf: /* iret */
+ ret = X86_BR_IRET;
+ break;
+ case 0xcc ... 0xce: /* int */
+ ret = X86_BR_INT;
+ break;
+ case 0xe8: /* call near rel */
+ case 0x9a: /* call far absolute */
+ ret = X86_BR_CALL;
+ break;
+ case 0xe0 ... 0xe3: /* loop jmp */
+ ret = X86_BR_JCC;
+ break;
+ case 0xe9 ... 0xeb: /* jmp */
+ ret = X86_BR_JMP;
+ break;
+ case 0xff: /* call near absolute, call far absolute ind */
+ insn_get_modrm(&insn);
+ ext = (insn.modrm.bytes[0] >> 3) & 0x7;
+ switch (ext) {
+ case 2: /* near ind call */
+ case 3: /* far ind call */
+ ret = X86_BR_IND_CALL;
+ break;
+ case 4:
+ case 5:
+ ret = X86_BR_JMP;
+ break;
+ }
+ break;
+ default:
+ ret = X86_BR_NONE;
+ }
+ /*
+ * interrupts, traps, faults (and thus ring transition) may
+ * occur on any instructions. Thus, to classify them correctly,
+ * we need to first look at the from and to priv levels. If they
+ * are different and to is in the kernel, then it indicates
+ * a ring transition. If the from instruction is not a ring
+ * transition instr (syscall, systenter, int), then it means
+ * it was a irq, trap or fault.
+ *
+ * we have no way of detecting kernel to kernel faults.
+ */
+ if (from_plm == X86_BR_USER && to_plm == X86_BR_KERNEL
+ && ret != X86_BR_SYSCALL && ret != X86_BR_INT)
+ ret = X86_BR_IRQ;
+
+ /*
+ * branch priv level determined by target as
+ * is done by HW when LBR_SELECT is implemented
+ */
+ if (ret != X86_BR_NONE)
+ ret |= to_plm;
+
+ return ret;
+}
+
+/*
+ * implement actual branch filter based on user demand.
+ * Hardware may not exactly satisfy that request, thus
+ * we need to inspect opcodes. Mismatched branches are
+ * discarded. Therefore, the number of branches returned
+ * in PERF_SAMPLE_BRANCH_STACK sample may vary.
+ */
+static void
+intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
+{
+ u64 from, to;
+ int br_sel = cpuc->br_sel;
+ int i, j, type;
+ bool compress = false;
+
+ /* if sampling all branches, then nothing to filter */
+ if ((br_sel & X86_BR_ALL) == X86_BR_ALL)
+ return;
+
+ for (i = 0; i < cpuc->lbr_stack.nr; i++) {
+
+ from = cpuc->lbr_entries[i].from;
+ to = cpuc->lbr_entries[i].to;
+
+ type = branch_type(from, to);
+
+ /* if type does not correspond, then discard */
+ if (type == X86_BR_NONE || (br_sel & type) != type) {
+ cpuc->lbr_entries[i].from = 0;
+ compress = true;
+ }
+ }
+
+ if (!compress)
+ return;
+
+ /* remove all entries with from=0 */
+ for (i = 0; i < cpuc->lbr_stack.nr; ) {
+ if (!cpuc->lbr_entries[i].from) {
+ j = i;
+ while (++j < cpuc->lbr_stack.nr)
+ cpuc->lbr_entries[j-1] = cpuc->lbr_entries[j];
+ cpuc->lbr_stack.nr--;
+ if (!cpuc->lbr_entries[i].from)
+ continue;
+ }
+ i++;
+ }
+}
+
+/*
+ * Map interface branch filters onto LBR filters
+ */
+static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = {
+ [PERF_SAMPLE_BRANCH_ANY] = LBR_ANY,
+ [PERF_SAMPLE_BRANCH_USER] = LBR_USER,
+ [PERF_SAMPLE_BRANCH_KERNEL] = LBR_KERNEL,
+ [PERF_SAMPLE_BRANCH_HV] = LBR_IGN,
+ [PERF_SAMPLE_BRANCH_ANY_RETURN] = LBR_RETURN | LBR_REL_JMP
+ | LBR_IND_JMP | LBR_FAR,
+ /*
+ * NHM/WSM erratum: must include REL_JMP+IND_JMP to get CALL branches
+ */
+ [PERF_SAMPLE_BRANCH_ANY_CALL] =
+ LBR_REL_CALL | LBR_IND_CALL | LBR_REL_JMP | LBR_IND_JMP | LBR_FAR,
+ /*
+ * NHM/WSM erratum: must include IND_JMP to capture IND_CALL
+ */
+ [PERF_SAMPLE_BRANCH_IND_CALL] = LBR_IND_CALL | LBR_IND_JMP,
+};
+
+static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = {
+ [PERF_SAMPLE_BRANCH_ANY] = LBR_ANY,
+ [PERF_SAMPLE_BRANCH_USER] = LBR_USER,
+ [PERF_SAMPLE_BRANCH_KERNEL] = LBR_KERNEL,
+ [PERF_SAMPLE_BRANCH_HV] = LBR_IGN,
+ [PERF_SAMPLE_BRANCH_ANY_RETURN] = LBR_RETURN | LBR_FAR,
+ [PERF_SAMPLE_BRANCH_ANY_CALL] = LBR_REL_CALL | LBR_IND_CALL
+ | LBR_FAR,
+ [PERF_SAMPLE_BRANCH_IND_CALL] = LBR_IND_CALL,
+};
+
+/* core */
void intel_pmu_lbr_init_core(void)
{
x86_pmu.lbr_nr = 4;
- x86_pmu.lbr_tos = 0x01c9;
- x86_pmu.lbr_from = 0x40;
- x86_pmu.lbr_to = 0x60;
+ x86_pmu.lbr_tos = MSR_LBR_TOS;
+ x86_pmu.lbr_from = MSR_LBR_CORE_FROM;
+ x86_pmu.lbr_to = MSR_LBR_CORE_TO;
+
+ /*
+ * SW branch filter usage:
+ * - compensate for lack of HW filter
+ */
+ pr_cont("4-deep LBR, ");
}
+/* nehalem/westmere */
void intel_pmu_lbr_init_nhm(void)
{
x86_pmu.lbr_nr = 16;
- x86_pmu.lbr_tos = 0x01c9;
- x86_pmu.lbr_from = 0x680;
- x86_pmu.lbr_to = 0x6c0;
+ x86_pmu.lbr_tos = MSR_LBR_TOS;
+ x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
+ x86_pmu.lbr_to = MSR_LBR_NHM_TO;
+
+ x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
+ x86_pmu.lbr_sel_map = nhm_lbr_sel_map;
+
+ /*
+ * SW branch filter usage:
+ * - workaround LBR_SEL errata (see above)
+ * - support syscall, sysret capture.
+ * That requires LBR_FAR but that means far
+ * jmp need to be filtered out
+ */
+ pr_cont("16-deep LBR, ");
+}
+
+/* sandy bridge */
+void intel_pmu_lbr_init_snb(void)
+{
+ x86_pmu.lbr_nr = 16;
+ x86_pmu.lbr_tos = MSR_LBR_TOS;
+ x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
+ x86_pmu.lbr_to = MSR_LBR_NHM_TO;
+
+ x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
+ x86_pmu.lbr_sel_map = snb_lbr_sel_map;
+
+ /*
+ * SW branch filter usage:
+ * - support syscall, sysret capture.
+ * That requires LBR_FAR but that means far
+ * jmp need to be filtered out
+ */
+ pr_cont("16-deep LBR, ");
}
+/* atom */
void intel_pmu_lbr_init_atom(void)
{
+ /*
+ * only models starting at stepping 10 seems
+ * to have an operational LBR which can freeze
+ * on PMU interrupt
+ */
+ if (boot_cpu_data.x86_mask < 10) {
+ pr_cont("LBR disabled due to erratum");
+ return;
+ }
+
x86_pmu.lbr_nr = 8;
- x86_pmu.lbr_tos = 0x01c9;
- x86_pmu.lbr_from = 0x40;
- x86_pmu.lbr_to = 0x60;
+ x86_pmu.lbr_tos = MSR_LBR_TOS;
+ x86_pmu.lbr_from = MSR_LBR_CORE_FROM;
+ x86_pmu.lbr_to = MSR_LBR_CORE_TO;
+
+ /*
+ * SW branch filter usage:
+ * - compensate for lack of HW filter
+ */
+ pr_cont("8-deep LBR, ");
}
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index c7f64e6..addf9e8 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -40,6 +40,7 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c)
{ X86_FEATURE_EPB, CR_ECX, 3, 0x00000006, 0 },
{ X86_FEATURE_XSAVEOPT, CR_EAX, 0, 0x0000000d, 1 },
{ X86_FEATURE_CPB, CR_EDX, 9, 0x80000007, 0 },
+ { X86_FEATURE_HW_PSTATE, CR_EDX, 7, 0x80000007, 0 },
{ X86_FEATURE_NPT, CR_EDX, 0, 0x8000000a, 0 },
{ X86_FEATURE_LBRV, CR_EDX, 1, 0x8000000a, 0 },
{ X86_FEATURE_SVML, CR_EDX, 2, 0x8000000a, 0 },
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 40fc861..58b7f27 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -100,13 +100,8 @@ execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq)
irqctx->tinfo.task = curctx->tinfo.task;
irqctx->tinfo.previous_esp = current_stack_pointer;
- /*
- * Copy the softirq bits in preempt_count so that the
- * softirq checks work in the hardirq context.
- */
- irqctx->tinfo.preempt_count =
- (irqctx->tinfo.preempt_count & ~SOFTIRQ_MASK) |
- (curctx->tinfo.preempt_count & SOFTIRQ_MASK);
+ /* Copy the preempt_count so that the [soft]irq checks work. */
+ irqctx->tinfo.preempt_count = curctx->tinfo.preempt_count;
if (unlikely(overflow))
call_on_stack(print_stack_overflow, isp);
@@ -196,7 +191,7 @@ bool handle_irq(unsigned irq, struct pt_regs *regs)
if (unlikely(!desc))
return false;
- if (!execute_on_irq_stack(overflow, desc, irq)) {
+ if (user_mode_vm(regs) || !execute_on_irq_stack(overflow, desc, irq)) {
if (unlikely(overflow))
print_stack_overflow();
desc->handle_irq(irq, desc);
diff --git a/arch/x86/kernel/kprobes-common.h b/arch/x86/kernel/kprobes-common.h
new file mode 100644
index 0000000..3230b68
--- /dev/null
+++ b/arch/x86/kernel/kprobes-common.h
@@ -0,0 +1,102 @@
+#ifndef __X86_KERNEL_KPROBES_COMMON_H
+#define __X86_KERNEL_KPROBES_COMMON_H
+
+/* Kprobes and Optprobes common header */
+
+#ifdef CONFIG_X86_64
+#define SAVE_REGS_STRING \
+ /* Skip cs, ip, orig_ax. */ \
+ " subq $24, %rsp\n" \
+ " pushq %rdi\n" \
+ " pushq %rsi\n" \
+ " pushq %rdx\n" \
+ " pushq %rcx\n" \
+ " pushq %rax\n" \
+ " pushq %r8\n" \
+ " pushq %r9\n" \
+ " pushq %r10\n" \
+ " pushq %r11\n" \
+ " pushq %rbx\n" \
+ " pushq %rbp\n" \
+ " pushq %r12\n" \
+ " pushq %r13\n" \
+ " pushq %r14\n" \
+ " pushq %r15\n"
+#define RESTORE_REGS_STRING \
+ " popq %r15\n" \
+ " popq %r14\n" \
+ " popq %r13\n" \
+ " popq %r12\n" \
+ " popq %rbp\n" \
+ " popq %rbx\n" \
+ " popq %r11\n" \
+ " popq %r10\n" \
+ " popq %r9\n" \
+ " popq %r8\n" \
+ " popq %rax\n" \
+ " popq %rcx\n" \
+ " popq %rdx\n" \
+ " popq %rsi\n" \
+ " popq %rdi\n" \
+ /* Skip orig_ax, ip, cs */ \
+ " addq $24, %rsp\n"
+#else
+#define SAVE_REGS_STRING \
+ /* Skip cs, ip, orig_ax and gs. */ \
+ " subl $16, %esp\n" \
+ " pushl %fs\n" \
+ " pushl %es\n" \
+ " pushl %ds\n" \
+ " pushl %eax\n" \
+ " pushl %ebp\n" \
+ " pushl %edi\n" \
+ " pushl %esi\n" \
+ " pushl %edx\n" \
+ " pushl %ecx\n" \
+ " pushl %ebx\n"
+#define RESTORE_REGS_STRING \
+ " popl %ebx\n" \
+ " popl %ecx\n" \
+ " popl %edx\n" \
+ " popl %esi\n" \
+ " popl %edi\n" \
+ " popl %ebp\n" \
+ " popl %eax\n" \
+ /* Skip ds, es, fs, gs, orig_ax, and ip. Note: don't pop cs here*/\
+ " addl $24, %esp\n"
+#endif
+
+/* Ensure if the instruction can be boostable */
+extern int can_boost(kprobe_opcode_t *instruction);
+/* Recover instruction if given address is probed */
+extern unsigned long recover_probed_instruction(kprobe_opcode_t *buf,
+ unsigned long addr);
+/*
+ * Copy an instruction and adjust the displacement if the instruction
+ * uses the %rip-relative addressing mode.
+ */
+extern int __copy_instruction(u8 *dest, u8 *src);
+
+/* Generate a relative-jump/call instruction */
+extern void synthesize_reljump(void *from, void *to);
+extern void synthesize_relcall(void *from, void *to);
+
+#ifdef CONFIG_OPTPROBES
+extern int arch_init_optprobes(void);
+extern int setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter);
+extern unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr);
+#else /* !CONFIG_OPTPROBES */
+static inline int arch_init_optprobes(void)
+{
+ return 0;
+}
+static inline int setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter)
+{
+ return 0;
+}
+static inline unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr)
+{
+ return addr;
+}
+#endif
+#endif
diff --git a/arch/x86/kernel/kprobes-opt.c b/arch/x86/kernel/kprobes-opt.c
new file mode 100644
index 0000000..c5e410e
--- /dev/null
+++ b/arch/x86/kernel/kprobes-opt.c
@@ -0,0 +1,512 @@
+/*
+ * Kernel Probes Jump Optimization (Optprobes)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2002, 2004
+ * Copyright (C) Hitachi Ltd., 2012
+ */
+#include <linux/kprobes.h>
+#include <linux/ptrace.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/hardirq.h>
+#include <linux/preempt.h>
+#include <linux/module.h>
+#include <linux/kdebug.h>
+#include <linux/kallsyms.h>
+#include <linux/ftrace.h>
+
+#include <asm/cacheflush.h>
+#include <asm/desc.h>
+#include <asm/pgtable.h>
+#include <asm/uaccess.h>
+#include <asm/alternative.h>
+#include <asm/insn.h>
+#include <asm/debugreg.h>
+
+#include "kprobes-common.h"
+
+unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr)
+{
+ struct optimized_kprobe *op;
+ struct kprobe *kp;
+ long offs;
+ int i;
+
+ for (i = 0; i < RELATIVEJUMP_SIZE; i++) {
+ kp = get_kprobe((void *)addr - i);
+ /* This function only handles jump-optimized kprobe */
+ if (kp && kprobe_optimized(kp)) {
+ op = container_of(kp, struct optimized_kprobe, kp);
+ /* If op->list is not empty, op is under optimizing */
+ if (list_empty(&op->list))
+ goto found;
+ }
+ }
+
+ return addr;
+found:
+ /*
+ * If the kprobe can be optimized, original bytes which can be
+ * overwritten by jump destination address. In this case, original
+ * bytes must be recovered from op->optinsn.copied_insn buffer.
+ */
+ memcpy(buf, (void *)addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
+ if (addr == (unsigned long)kp->addr) {
+ buf[0] = kp->opcode;
+ memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
+ } else {
+ offs = addr - (unsigned long)kp->addr - 1;
+ memcpy(buf, op->optinsn.copied_insn + offs, RELATIVE_ADDR_SIZE - offs);
+ }
+
+ return (unsigned long)buf;
+}
+
+/* Insert a move instruction which sets a pointer to eax/rdi (1st arg). */
+static void __kprobes synthesize_set_arg1(kprobe_opcode_t *addr, unsigned long val)
+{
+#ifdef CONFIG_X86_64
+ *addr++ = 0x48;
+ *addr++ = 0xbf;
+#else
+ *addr++ = 0xb8;
+#endif
+ *(unsigned long *)addr = val;
+}
+
+static void __used __kprobes kprobes_optinsn_template_holder(void)
+{
+ asm volatile (
+ ".global optprobe_template_entry\n"
+ "optprobe_template_entry:\n"
+#ifdef CONFIG_X86_64
+ /* We don't bother saving the ss register */
+ " pushq %rsp\n"
+ " pushfq\n"
+ SAVE_REGS_STRING
+ " movq %rsp, %rsi\n"
+ ".global optprobe_template_val\n"
+ "optprobe_template_val:\n"
+ ASM_NOP5
+ ASM_NOP5
+ ".global optprobe_template_call\n"
+ "optprobe_template_call:\n"
+ ASM_NOP5
+ /* Move flags to rsp */
+ " movq 144(%rsp), %rdx\n"
+ " movq %rdx, 152(%rsp)\n"
+ RESTORE_REGS_STRING
+ /* Skip flags entry */
+ " addq $8, %rsp\n"
+ " popfq\n"
+#else /* CONFIG_X86_32 */
+ " pushf\n"
+ SAVE_REGS_STRING
+ " movl %esp, %edx\n"
+ ".global optprobe_template_val\n"
+ "optprobe_template_val:\n"
+ ASM_NOP5
+ ".global optprobe_template_call\n"
+ "optprobe_template_call:\n"
+ ASM_NOP5
+ RESTORE_REGS_STRING
+ " addl $4, %esp\n" /* skip cs */
+ " popf\n"
+#endif
+ ".global optprobe_template_end\n"
+ "optprobe_template_end:\n");
+}
+
+#define TMPL_MOVE_IDX \
+ ((long)&optprobe_template_val - (long)&optprobe_template_entry)
+#define TMPL_CALL_IDX \
+ ((long)&optprobe_template_call - (long)&optprobe_template_entry)
+#define TMPL_END_IDX \
+ ((long)&optprobe_template_end - (long)&optprobe_template_entry)
+
+#define INT3_SIZE sizeof(kprobe_opcode_t)
+
+/* Optimized kprobe call back function: called from optinsn */
+static void __kprobes optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs)
+{
+ struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+ unsigned long flags;
+
+ /* This is possible if op is under delayed unoptimizing */
+ if (kprobe_disabled(&op->kp))
+ return;
+
+ local_irq_save(flags);
+ if (kprobe_running()) {
+ kprobes_inc_nmissed_count(&op->kp);
+ } else {
+ /* Save skipped registers */
+#ifdef CONFIG_X86_64
+ regs->cs = __KERNEL_CS;
+#else
+ regs->cs = __KERNEL_CS | get_kernel_rpl();
+ regs->gs = 0;
+#endif
+ regs->ip = (unsigned long)op->kp.addr + INT3_SIZE;
+ regs->orig_ax = ~0UL;
+
+ __this_cpu_write(current_kprobe, &op->kp);
+ kcb->kprobe_status = KPROBE_HIT_ACTIVE;
+ opt_pre_handler(&op->kp, regs);
+ __this_cpu_write(current_kprobe, NULL);
+ }
+ local_irq_restore(flags);
+}
+
+static int __kprobes copy_optimized_instructions(u8 *dest, u8 *src)
+{
+ int len = 0, ret;
+
+ while (len < RELATIVEJUMP_SIZE) {
+ ret = __copy_instruction(dest + len, src + len);
+ if (!ret || !can_boost(dest + len))
+ return -EINVAL;
+ len += ret;
+ }
+ /* Check whether the address range is reserved */
+ if (ftrace_text_reserved(src, src + len - 1) ||
+ alternatives_text_reserved(src, src + len - 1) ||
+ jump_label_text_reserved(src, src + len - 1))
+ return -EBUSY;
+
+ return len;
+}
+
+/* Check whether insn is indirect jump */
+static int __kprobes insn_is_indirect_jump(struct insn *insn)
+{
+ return ((insn->opcode.bytes[0] == 0xff &&
+ (X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */
+ insn->opcode.bytes[0] == 0xea); /* Segment based jump */
+}
+
+/* Check whether insn jumps into specified address range */
+static int insn_jump_into_range(struct insn *insn, unsigned long start, int len)
+{
+ unsigned long target = 0;
+
+ switch (insn->opcode.bytes[0]) {
+ case 0xe0: /* loopne */
+ case 0xe1: /* loope */
+ case 0xe2: /* loop */
+ case 0xe3: /* jcxz */
+ case 0xe9: /* near relative jump */
+ case 0xeb: /* short relative jump */
+ break;
+ case 0x0f:
+ if ((insn->opcode.bytes[1] & 0xf0) == 0x80) /* jcc near */
+ break;
+ return 0;
+ default:
+ if ((insn->opcode.bytes[0] & 0xf0) == 0x70) /* jcc short */
+ break;
+ return 0;
+ }
+ target = (unsigned long)insn->next_byte + insn->immediate.value;
+
+ return (start <= target && target <= start + len);
+}
+
+/* Decode whole function to ensure any instructions don't jump into target */
+static int __kprobes can_optimize(unsigned long paddr)
+{
+ unsigned long addr, size = 0, offset = 0;
+ struct insn insn;
+ kprobe_opcode_t buf[MAX_INSN_SIZE];
+
+ /* Lookup symbol including addr */
+ if (!kallsyms_lookup_size_offset(paddr, &size, &offset))
+ return 0;
+
+ /*
+ * Do not optimize in the entry code due to the unstable
+ * stack handling.
+ */
+ if ((paddr >= (unsigned long)__entry_text_start) &&
+ (paddr < (unsigned long)__entry_text_end))
+ return 0;
+
+ /* Check there is enough space for a relative jump. */
+ if (size - offset < RELATIVEJUMP_SIZE)
+ return 0;
+
+ /* Decode instructions */
+ addr = paddr - offset;
+ while (addr < paddr - offset + size) { /* Decode until function end */
+ if (search_exception_tables(addr))
+ /*
+ * Since some fixup code will jumps into this function,
+ * we can't optimize kprobe in this function.
+ */
+ return 0;
+ kernel_insn_init(&insn, (void *)recover_probed_instruction(buf, addr));
+ insn_get_length(&insn);
+ /* Another subsystem puts a breakpoint */
+ if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION)
+ return 0;
+ /* Recover address */
+ insn.kaddr = (void *)addr;
+ insn.next_byte = (void *)(addr + insn.length);
+ /* Check any instructions don't jump into target */
+ if (insn_is_indirect_jump(&insn) ||
+ insn_jump_into_range(&insn, paddr + INT3_SIZE,
+ RELATIVE_ADDR_SIZE))
+ return 0;
+ addr += insn.length;
+ }
+
+ return 1;
+}
+
+/* Check optimized_kprobe can actually be optimized. */
+int __kprobes arch_check_optimized_kprobe(struct optimized_kprobe *op)
+{
+ int i;
+ struct kprobe *p;
+
+ for (i = 1; i < op->optinsn.size; i++) {
+ p = get_kprobe(op->kp.addr + i);
+ if (p && !kprobe_disabled(p))
+ return -EEXIST;
+ }
+
+ return 0;
+}
+
+/* Check the addr is within the optimized instructions. */
+int __kprobes
+arch_within_optimized_kprobe(struct optimized_kprobe *op, unsigned long addr)
+{
+ return ((unsigned long)op->kp.addr <= addr &&
+ (unsigned long)op->kp.addr + op->optinsn.size > addr);
+}
+
+/* Free optimized instruction slot */
+static __kprobes
+void __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty)
+{
+ if (op->optinsn.insn) {
+ free_optinsn_slot(op->optinsn.insn, dirty);
+ op->optinsn.insn = NULL;
+ op->optinsn.size = 0;
+ }
+}
+
+void __kprobes arch_remove_optimized_kprobe(struct optimized_kprobe *op)
+{
+ __arch_remove_optimized_kprobe(op, 1);
+}
+
+/*
+ * Copy replacing target instructions
+ * Target instructions MUST be relocatable (checked inside)
+ * This is called when new aggr(opt)probe is allocated or reused.
+ */
+int __kprobes arch_prepare_optimized_kprobe(struct optimized_kprobe *op)
+{
+ u8 *buf;
+ int ret;
+ long rel;
+
+ if (!can_optimize((unsigned long)op->kp.addr))
+ return -EILSEQ;
+
+ op->optinsn.insn = get_optinsn_slot();
+ if (!op->optinsn.insn)
+ return -ENOMEM;
+
+ /*
+ * Verify if the address gap is in 2GB range, because this uses
+ * a relative jump.
+ */
+ rel = (long)op->optinsn.insn - (long)op->kp.addr + RELATIVEJUMP_SIZE;
+ if (abs(rel) > 0x7fffffff)
+ return -ERANGE;
+
+ buf = (u8 *)op->optinsn.insn;
+
+ /* Copy instructions into the out-of-line buffer */
+ ret = copy_optimized_instructions(buf + TMPL_END_IDX, op->kp.addr);
+ if (ret < 0) {
+ __arch_remove_optimized_kprobe(op, 0);
+ return ret;
+ }
+ op->optinsn.size = ret;
+
+ /* Copy arch-dep-instance from template */
+ memcpy(buf, &optprobe_template_entry, TMPL_END_IDX);
+
+ /* Set probe information */
+ synthesize_set_arg1(buf + TMPL_MOVE_IDX, (unsigned long)op);
+
+ /* Set probe function call */
+ synthesize_relcall(buf + TMPL_CALL_IDX, optimized_callback);
+
+ /* Set returning jmp instruction at the tail of out-of-line buffer */
+ synthesize_reljump(buf + TMPL_END_IDX + op->optinsn.size,
+ (u8 *)op->kp.addr + op->optinsn.size);
+
+ flush_icache_range((unsigned long) buf,
+ (unsigned long) buf + TMPL_END_IDX +
+ op->optinsn.size + RELATIVEJUMP_SIZE);
+ return 0;
+}
+
+#define MAX_OPTIMIZE_PROBES 256
+static struct text_poke_param *jump_poke_params;
+static struct jump_poke_buffer {
+ u8 buf[RELATIVEJUMP_SIZE];
+} *jump_poke_bufs;
+
+static void __kprobes setup_optimize_kprobe(struct text_poke_param *tprm,
+ u8 *insn_buf,
+ struct optimized_kprobe *op)
+{
+ s32 rel = (s32)((long)op->optinsn.insn -
+ ((long)op->kp.addr + RELATIVEJUMP_SIZE));
+
+ /* Backup instructions which will be replaced by jump address */
+ memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE,
+ RELATIVE_ADDR_SIZE);
+
+ insn_buf[0] = RELATIVEJUMP_OPCODE;
+ *(s32 *)(&insn_buf[1]) = rel;
+
+ tprm->addr = op->kp.addr;
+ tprm->opcode = insn_buf;
+ tprm->len = RELATIVEJUMP_SIZE;
+}
+
+/*
+ * Replace breakpoints (int3) with relative jumps.
+ * Caller must call with locking kprobe_mutex and text_mutex.
+ */
+void __kprobes arch_optimize_kprobes(struct list_head *oplist)
+{
+ struct optimized_kprobe *op, *tmp;
+ int c = 0;
+
+ list_for_each_entry_safe(op, tmp, oplist, list) {
+ WARN_ON(kprobe_disabled(&op->kp));
+ /* Setup param */
+ setup_optimize_kprobe(&jump_poke_params[c],
+ jump_poke_bufs[c].buf, op);
+ list_del_init(&op->list);
+ if (++c >= MAX_OPTIMIZE_PROBES)
+ break;
+ }
+
+ /*
+ * text_poke_smp doesn't support NMI/MCE code modifying.
+ * However, since kprobes itself also doesn't support NMI/MCE
+ * code probing, it's not a problem.
+ */
+ text_poke_smp_batch(jump_poke_params, c);
+}
+
+static void __kprobes setup_unoptimize_kprobe(struct text_poke_param *tprm,
+ u8 *insn_buf,
+ struct optimized_kprobe *op)
+{
+ /* Set int3 to first byte for kprobes */
+ insn_buf[0] = BREAKPOINT_INSTRUCTION;
+ memcpy(insn_buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
+
+ tprm->addr = op->kp.addr;
+ tprm->opcode = insn_buf;
+ tprm->len = RELATIVEJUMP_SIZE;
+}
+
+/*
+ * Recover original instructions and breakpoints from relative jumps.
+ * Caller must call with locking kprobe_mutex.
+ */
+extern void arch_unoptimize_kprobes(struct list_head *oplist,
+ struct list_head *done_list)
+{
+ struct optimized_kprobe *op, *tmp;
+ int c = 0;
+
+ list_for_each_entry_safe(op, tmp, oplist, list) {
+ /* Setup param */
+ setup_unoptimize_kprobe(&jump_poke_params[c],
+ jump_poke_bufs[c].buf, op);
+ list_move(&op->list, done_list);
+ if (++c >= MAX_OPTIMIZE_PROBES)
+ break;
+ }
+
+ /*
+ * text_poke_smp doesn't support NMI/MCE code modifying.
+ * However, since kprobes itself also doesn't support NMI/MCE
+ * code probing, it's not a problem.
+ */
+ text_poke_smp_batch(jump_poke_params, c);
+}
+
+/* Replace a relative jump with a breakpoint (int3). */
+void __kprobes arch_unoptimize_kprobe(struct optimized_kprobe *op)
+{
+ u8 buf[RELATIVEJUMP_SIZE];
+
+ /* Set int3 to first byte for kprobes */
+ buf[0] = BREAKPOINT_INSTRUCTION;
+ memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
+ text_poke_smp(op->kp.addr, buf, RELATIVEJUMP_SIZE);
+}
+
+int __kprobes
+setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter)
+{
+ struct optimized_kprobe *op;
+
+ if (p->flags & KPROBE_FLAG_OPTIMIZED) {
+ /* This kprobe is really able to run optimized path. */
+ op = container_of(p, struct optimized_kprobe, kp);
+ /* Detour through copied instructions */
+ regs->ip = (unsigned long)op->optinsn.insn + TMPL_END_IDX;
+ if (!reenter)
+ reset_current_kprobe();
+ preempt_enable_no_resched();
+ return 1;
+ }
+ return 0;
+}
+
+int __kprobes arch_init_optprobes(void)
+{
+ /* Allocate code buffer and parameter array */
+ jump_poke_bufs = kmalloc(sizeof(struct jump_poke_buffer) *
+ MAX_OPTIMIZE_PROBES, GFP_KERNEL);
+ if (!jump_poke_bufs)
+ return -ENOMEM;
+
+ jump_poke_params = kmalloc(sizeof(struct text_poke_param) *
+ MAX_OPTIMIZE_PROBES, GFP_KERNEL);
+ if (!jump_poke_params) {
+ kfree(jump_poke_bufs);
+ jump_poke_bufs = NULL;
+ return -ENOMEM;
+ }
+
+ return 0;
+}
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 7da647d..e213fc8 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -30,16 +30,15 @@
* <jkenisto@us.ibm.com> and Prasanna S Panchamukhi
* <prasanna@in.ibm.com> added function-return probes.
* 2005-May Rusty Lynch <rusty.lynch@intel.com>
- * Added function return probes functionality
+ * Added function return probes functionality
* 2006-Feb Masami Hiramatsu <hiramatu@sdl.hitachi.co.jp> added
- * kprobe-booster and kretprobe-booster for i386.
+ * kprobe-booster and kretprobe-booster for i386.
* 2007-Dec Masami Hiramatsu <mhiramat@redhat.com> added kprobe-booster
- * and kretprobe-booster for x86-64
+ * and kretprobe-booster for x86-64
* 2007-Dec Masami Hiramatsu <mhiramat@redhat.com>, Arjan van de Ven
- * <arjan@infradead.org> and Jim Keniston <jkenisto@us.ibm.com>
- * unified x86 kprobes code.
+ * <arjan@infradead.org> and Jim Keniston <jkenisto@us.ibm.com>
+ * unified x86 kprobes code.
*/
-
#include <linux/kprobes.h>
#include <linux/ptrace.h>
#include <linux/string.h>
@@ -59,6 +58,8 @@
#include <asm/insn.h>
#include <asm/debugreg.h>
+#include "kprobes-common.h"
+
void jprobe_return_end(void);
DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
@@ -108,6 +109,7 @@ struct kretprobe_blackpoint kretprobe_blacklist[] = {
doesn't switch kernel stack.*/
{NULL, NULL} /* Terminator */
};
+
const int kretprobe_blacklist_size = ARRAY_SIZE(kretprobe_blacklist);
static void __kprobes __synthesize_relative_insn(void *from, void *to, u8 op)
@@ -123,11 +125,17 @@ static void __kprobes __synthesize_relative_insn(void *from, void *to, u8 op)
}
/* Insert a jump instruction at address 'from', which jumps to address 'to'.*/
-static void __kprobes synthesize_reljump(void *from, void *to)
+void __kprobes synthesize_reljump(void *from, void *to)
{
__synthesize_relative_insn(from, to, RELATIVEJUMP_OPCODE);
}
+/* Insert a call instruction at address 'from', which calls address 'to'.*/
+void __kprobes synthesize_relcall(void *from, void *to)
+{
+ __synthesize_relative_insn(from, to, RELATIVECALL_OPCODE);
+}
+
/*
* Skip the prefixes of the instruction.
*/
@@ -151,7 +159,7 @@ static kprobe_opcode_t *__kprobes skip_prefixes(kprobe_opcode_t *insn)
* Returns non-zero if opcode is boostable.
* RIP relative instructions are adjusted at copying time in 64 bits mode
*/
-static int __kprobes can_boost(kprobe_opcode_t *opcodes)
+int __kprobes can_boost(kprobe_opcode_t *opcodes)
{
kprobe_opcode_t opcode;
kprobe_opcode_t *orig_opcodes = opcodes;
@@ -207,13 +215,15 @@ retry:
}
}
-/* Recover the probed instruction at addr for further analysis. */
-static int recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr)
+static unsigned long
+__recover_probed_insn(kprobe_opcode_t *buf, unsigned long addr)
{
struct kprobe *kp;
+
kp = get_kprobe((void *)addr);
+ /* There is no probe, return original address */
if (!kp)
- return -EINVAL;
+ return addr;
/*
* Basically, kp->ainsn.insn has an original instruction.
@@ -230,14 +240,29 @@ static int recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr)
*/
memcpy(buf, kp->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
buf[0] = kp->opcode;
- return 0;
+ return (unsigned long)buf;
+}
+
+/*
+ * Recover the probed instruction at addr for further analysis.
+ * Caller must lock kprobes by kprobe_mutex, or disable preemption
+ * for preventing to release referencing kprobes.
+ */
+unsigned long recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr)
+{
+ unsigned long __addr;
+
+ __addr = __recover_optprobed_insn(buf, addr);
+ if (__addr != addr)
+ return __addr;
+
+ return __recover_probed_insn(buf, addr);
}
/* Check if paddr is at an instruction boundary */
static int __kprobes can_probe(unsigned long paddr)
{
- int ret;
- unsigned long addr, offset = 0;
+ unsigned long addr, __addr, offset = 0;
struct insn insn;
kprobe_opcode_t buf[MAX_INSN_SIZE];
@@ -247,26 +272,24 @@ static int __kprobes can_probe(unsigned long paddr)
/* Decode instructions */
addr = paddr - offset;
while (addr < paddr) {
- kernel_insn_init(&insn, (void *)addr);
- insn_get_opcode(&insn);
-
/*
* Check if the instruction has been modified by another
* kprobe, in which case we replace the breakpoint by the
* original instruction in our buffer.
+ * Also, jump optimization will change the breakpoint to
+ * relative-jump. Since the relative-jump itself is
+ * normally used, we just go through if there is no kprobe.
*/
- if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) {
- ret = recover_probed_instruction(buf, addr);
- if (ret)
- /*
- * Another debugging subsystem might insert
- * this breakpoint. In that case, we can't
- * recover it.
- */
- return 0;
- kernel_insn_init(&insn, buf);
- }
+ __addr = recover_probed_instruction(buf, addr);
+ kernel_insn_init(&insn, (void *)__addr);
insn_get_length(&insn);
+
+ /*
+ * Another debugging subsystem might insert this breakpoint.
+ * In that case, we can't recover it.
+ */
+ if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION)
+ return 0;
addr += insn.length;
}
@@ -299,24 +322,16 @@ static int __kprobes is_IF_modifier(kprobe_opcode_t *insn)
* If not, return null.
* Only applicable to 64-bit x86.
*/
-static int __kprobes __copy_instruction(u8 *dest, u8 *src, int recover)
+int __kprobes __copy_instruction(u8 *dest, u8 *src)
{
struct insn insn;
- int ret;
kprobe_opcode_t buf[MAX_INSN_SIZE];
- kernel_insn_init(&insn, src);
- if (recover) {
- insn_get_opcode(&insn);
- if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) {
- ret = recover_probed_instruction(buf,
- (unsigned long)src);
- if (ret)
- return 0;
- kernel_insn_init(&insn, buf);
- }
- }
+ kernel_insn_init(&insn, (void *)recover_probed_instruction(buf, (unsigned long)src));
insn_get_length(&insn);
+ /* Another subsystem puts a breakpoint, failed to recover */
+ if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION)
+ return 0;
memcpy(dest, insn.kaddr, insn.length);
#ifdef CONFIG_X86_64
@@ -337,8 +352,7 @@ static int __kprobes __copy_instruction(u8 *dest, u8 *src, int recover)
* extension of the original signed 32-bit displacement would
* have given.
*/
- newdisp = (u8 *) src + (s64) insn.displacement.value -
- (u8 *) dest;
+ newdisp = (u8 *) src + (s64) insn.displacement.value - (u8 *) dest;
BUG_ON((s64) (s32) newdisp != newdisp); /* Sanity check. */
disp = (u8 *) dest + insn_offset_displacement(&insn);
*(s32 *) disp = (s32) newdisp;
@@ -349,18 +363,20 @@ static int __kprobes __copy_instruction(u8 *dest, u8 *src, int recover)
static void __kprobes arch_copy_kprobe(struct kprobe *p)
{
+ /* Copy an instruction with recovering if other optprobe modifies it.*/
+ __copy_instruction(p->ainsn.insn, p->addr);
+
/*
- * Copy an instruction without recovering int3, because it will be
- * put by another subsystem.
+ * __copy_instruction can modify the displacement of the instruction,
+ * but it doesn't affect boostable check.
*/
- __copy_instruction(p->ainsn.insn, p->addr, 0);
-
- if (can_boost(p->addr))
+ if (can_boost(p->ainsn.insn))
p->ainsn.boostable = 0;
else
p->ainsn.boostable = -1;
- p->opcode = *p->addr;
+ /* Also, displacement change doesn't affect the first byte */
+ p->opcode = p->ainsn.insn[0];
}
int __kprobes arch_prepare_kprobe(struct kprobe *p)
@@ -442,8 +458,8 @@ static void __kprobes restore_btf(void)
}
}
-void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
- struct pt_regs *regs)
+void __kprobes
+arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs)
{
unsigned long *sara = stack_addr(regs);
@@ -453,16 +469,8 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
*sara = (unsigned long) &kretprobe_trampoline;
}
-#ifdef CONFIG_OPTPROBES
-static int __kprobes setup_detour_execution(struct kprobe *p,
- struct pt_regs *regs,
- int reenter);
-#else
-#define setup_detour_execution(p, regs, reenter) (0)
-#endif
-
-static void __kprobes setup_singlestep(struct kprobe *p, struct pt_regs *regs,
- struct kprobe_ctlblk *kcb, int reenter)
+static void __kprobes
+setup_singlestep(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb, int reenter)
{
if (setup_detour_execution(p, regs, reenter))
return;
@@ -504,8 +512,8 @@ static void __kprobes setup_singlestep(struct kprobe *p, struct pt_regs *regs,
* within the handler. We save the original kprobes variables and just single
* step on the instruction of the new probe without calling any user handlers.
*/
-static int __kprobes reenter_kprobe(struct kprobe *p, struct pt_regs *regs,
- struct kprobe_ctlblk *kcb)
+static int __kprobes
+reenter_kprobe(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb)
{
switch (kcb->kprobe_status) {
case KPROBE_HIT_SSDONE:
@@ -600,69 +608,6 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
return 0;
}
-#ifdef CONFIG_X86_64
-#define SAVE_REGS_STRING \
- /* Skip cs, ip, orig_ax. */ \
- " subq $24, %rsp\n" \
- " pushq %rdi\n" \
- " pushq %rsi\n" \
- " pushq %rdx\n" \
- " pushq %rcx\n" \
- " pushq %rax\n" \
- " pushq %r8\n" \
- " pushq %r9\n" \
- " pushq %r10\n" \
- " pushq %r11\n" \
- " pushq %rbx\n" \
- " pushq %rbp\n" \
- " pushq %r12\n" \
- " pushq %r13\n" \
- " pushq %r14\n" \
- " pushq %r15\n"
-#define RESTORE_REGS_STRING \
- " popq %r15\n" \
- " popq %r14\n" \
- " popq %r13\n" \
- " popq %r12\n" \
- " popq %rbp\n" \
- " popq %rbx\n" \
- " popq %r11\n" \
- " popq %r10\n" \
- " popq %r9\n" \
- " popq %r8\n" \
- " popq %rax\n" \
- " popq %rcx\n" \
- " popq %rdx\n" \
- " popq %rsi\n" \
- " popq %rdi\n" \
- /* Skip orig_ax, ip, cs */ \
- " addq $24, %rsp\n"
-#else
-#define SAVE_REGS_STRING \
- /* Skip cs, ip, orig_ax and gs. */ \
- " subl $16, %esp\n" \
- " pushl %fs\n" \
- " pushl %es\n" \
- " pushl %ds\n" \
- " pushl %eax\n" \
- " pushl %ebp\n" \
- " pushl %edi\n" \
- " pushl %esi\n" \
- " pushl %edx\n" \
- " pushl %ecx\n" \
- " pushl %ebx\n"
-#define RESTORE_REGS_STRING \
- " popl %ebx\n" \
- " popl %ecx\n" \
- " popl %edx\n" \
- " popl %esi\n" \
- " popl %edi\n" \
- " popl %ebp\n" \
- " popl %eax\n" \
- /* Skip ds, es, fs, gs, orig_ax, and ip. Note: don't pop cs here*/\
- " addl $24, %esp\n"
-#endif
-
/*
* When a retprobed function returns, this code saves registers and
* calls trampoline_handler() runs, which calls the kretprobe's handler.
@@ -816,8 +761,8 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
* jump instruction after the copied instruction, that jumps to the next
* instruction after the probepoint.
*/
-static void __kprobes resume_execution(struct kprobe *p,
- struct pt_regs *regs, struct kprobe_ctlblk *kcb)
+static void __kprobes
+resume_execution(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb)
{
unsigned long *tos = stack_addr(regs);
unsigned long copy_ip = (unsigned long)p->ainsn.insn;
@@ -996,8 +941,8 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
/*
* Wrapper routine for handling exceptions.
*/
-int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
- unsigned long val, void *data)
+int __kprobes
+kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, void *data)
{
struct die_args *args = data;
int ret = NOTIFY_DONE;
@@ -1107,466 +1052,9 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
return 0;
}
-
-#ifdef CONFIG_OPTPROBES
-
-/* Insert a call instruction at address 'from', which calls address 'to'.*/
-static void __kprobes synthesize_relcall(void *from, void *to)
-{
- __synthesize_relative_insn(from, to, RELATIVECALL_OPCODE);
-}
-
-/* Insert a move instruction which sets a pointer to eax/rdi (1st arg). */
-static void __kprobes synthesize_set_arg1(kprobe_opcode_t *addr,
- unsigned long val)
-{
-#ifdef CONFIG_X86_64
- *addr++ = 0x48;
- *addr++ = 0xbf;
-#else
- *addr++ = 0xb8;
-#endif
- *(unsigned long *)addr = val;
-}
-
-static void __used __kprobes kprobes_optinsn_template_holder(void)
-{
- asm volatile (
- ".global optprobe_template_entry\n"
- "optprobe_template_entry: \n"
-#ifdef CONFIG_X86_64
- /* We don't bother saving the ss register */
- " pushq %rsp\n"
- " pushfq\n"
- SAVE_REGS_STRING
- " movq %rsp, %rsi\n"
- ".global optprobe_template_val\n"
- "optprobe_template_val: \n"
- ASM_NOP5
- ASM_NOP5
- ".global optprobe_template_call\n"
- "optprobe_template_call: \n"
- ASM_NOP5
- /* Move flags to rsp */
- " movq 144(%rsp), %rdx\n"
- " movq %rdx, 152(%rsp)\n"
- RESTORE_REGS_STRING
- /* Skip flags entry */
- " addq $8, %rsp\n"
- " popfq\n"
-#else /* CONFIG_X86_32 */
- " pushf\n"
- SAVE_REGS_STRING
- " movl %esp, %edx\n"
- ".global optprobe_template_val\n"
- "optprobe_template_val: \n"
- ASM_NOP5
- ".global optprobe_template_call\n"
- "optprobe_template_call: \n"
- ASM_NOP5
- RESTORE_REGS_STRING
- " addl $4, %esp\n" /* skip cs */
- " popf\n"
-#endif
- ".global optprobe_template_end\n"
- "optprobe_template_end: \n");
-}
-
-#define TMPL_MOVE_IDX \
- ((long)&optprobe_template_val - (long)&optprobe_template_entry)
-#define TMPL_CALL_IDX \
- ((long)&optprobe_template_call - (long)&optprobe_template_entry)
-#define TMPL_END_IDX \
- ((long)&optprobe_template_end - (long)&optprobe_template_entry)
-
-#define INT3_SIZE sizeof(kprobe_opcode_t)
-
-/* Optimized kprobe call back function: called from optinsn */
-static void __kprobes optimized_callback(struct optimized_kprobe *op,
- struct pt_regs *regs)
-{
- struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
- unsigned long flags;
-
- /* This is possible if op is under delayed unoptimizing */
- if (kprobe_disabled(&op->kp))
- return;
-
- local_irq_save(flags);
- if (kprobe_running()) {
- kprobes_inc_nmissed_count(&op->kp);
- } else {
- /* Save skipped registers */
-#ifdef CONFIG_X86_64
- regs->cs = __KERNEL_CS;
-#else
- regs->cs = __KERNEL_CS | get_kernel_rpl();
- regs->gs = 0;
-#endif
- regs->ip = (unsigned long)op->kp.addr + INT3_SIZE;
- regs->orig_ax = ~0UL;
-
- __this_cpu_write(current_kprobe, &op->kp);
- kcb->kprobe_status = KPROBE_HIT_ACTIVE;
- opt_pre_handler(&op->kp, regs);
- __this_cpu_write(current_kprobe, NULL);
- }
- local_irq_restore(flags);
-}
-
-static int __kprobes copy_optimized_instructions(u8 *dest, u8 *src)
-{
- int len = 0, ret;
-
- while (len < RELATIVEJUMP_SIZE) {
- ret = __copy_instruction(dest + len, src + len, 1);
- if (!ret || !can_boost(dest + len))
- return -EINVAL;
- len += ret;
- }
- /* Check whether the address range is reserved */
- if (ftrace_text_reserved(src, src + len - 1) ||
- alternatives_text_reserved(src, src + len - 1) ||
- jump_label_text_reserved(src, src + len - 1))
- return -EBUSY;
-
- return len;
-}
-
-/* Check whether insn is indirect jump */
-static int __kprobes insn_is_indirect_jump(struct insn *insn)
-{
- return ((insn->opcode.bytes[0] == 0xff &&
- (X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */
- insn->opcode.bytes[0] == 0xea); /* Segment based jump */
-}
-
-/* Check whether insn jumps into specified address range */
-static int insn_jump_into_range(struct insn *insn, unsigned long start, int len)
-{
- unsigned long target = 0;
-
- switch (insn->opcode.bytes[0]) {
- case 0xe0: /* loopne */
- case 0xe1: /* loope */
- case 0xe2: /* loop */
- case 0xe3: /* jcxz */
- case 0xe9: /* near relative jump */
- case 0xeb: /* short relative jump */
- break;
- case 0x0f:
- if ((insn->opcode.bytes[1] & 0xf0) == 0x80) /* jcc near */
- break;
- return 0;
- default:
- if ((insn->opcode.bytes[0] & 0xf0) == 0x70) /* jcc short */
- break;
- return 0;
- }
- target = (unsigned long)insn->next_byte + insn->immediate.value;
-
- return (start <= target && target <= start + len);
-}
-
-/* Decode whole function to ensure any instructions don't jump into target */
-static int __kprobes can_optimize(unsigned long paddr)
-{
- int ret;
- unsigned long addr, size = 0, offset = 0;
- struct insn insn;
- kprobe_opcode_t buf[MAX_INSN_SIZE];
-
- /* Lookup symbol including addr */
- if (!kallsyms_lookup_size_offset(paddr, &size, &offset))
- return 0;
-
- /*
- * Do not optimize in the entry code due to the unstable
- * stack handling.
- */
- if ((paddr >= (unsigned long )__entry_text_start) &&
- (paddr < (unsigned long )__entry_text_end))
- return 0;
-
- /* Check there is enough space for a relative jump. */
- if (size - offset < RELATIVEJUMP_SIZE)
- return 0;
-
- /* Decode instructions */
- addr = paddr - offset;
- while (addr < paddr - offset + size) { /* Decode until function end */
- if (search_exception_tables(addr))
- /*
- * Since some fixup code will jumps into this function,
- * we can't optimize kprobe in this function.
- */
- return 0;
- kernel_insn_init(&insn, (void *)addr);
- insn_get_opcode(&insn);
- if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) {
- ret = recover_probed_instruction(buf, addr);
- if (ret)
- return 0;
- kernel_insn_init(&insn, buf);
- }
- insn_get_length(&insn);
- /* Recover address */
- insn.kaddr = (void *)addr;
- insn.next_byte = (void *)(addr + insn.length);
- /* Check any instructions don't jump into target */
- if (insn_is_indirect_jump(&insn) ||
- insn_jump_into_range(&insn, paddr + INT3_SIZE,
- RELATIVE_ADDR_SIZE))
- return 0;
- addr += insn.length;
- }
-
- return 1;
-}
-
-/* Check optimized_kprobe can actually be optimized. */
-int __kprobes arch_check_optimized_kprobe(struct optimized_kprobe *op)
-{
- int i;
- struct kprobe *p;
-
- for (i = 1; i < op->optinsn.size; i++) {
- p = get_kprobe(op->kp.addr + i);
- if (p && !kprobe_disabled(p))
- return -EEXIST;
- }
-
- return 0;
-}
-
-/* Check the addr is within the optimized instructions. */
-int __kprobes arch_within_optimized_kprobe(struct optimized_kprobe *op,
- unsigned long addr)
-{
- return ((unsigned long)op->kp.addr <= addr &&
- (unsigned long)op->kp.addr + op->optinsn.size > addr);
-}
-
-/* Free optimized instruction slot */
-static __kprobes
-void __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty)
-{
- if (op->optinsn.insn) {
- free_optinsn_slot(op->optinsn.insn, dirty);
- op->optinsn.insn = NULL;
- op->optinsn.size = 0;
- }
-}
-
-void __kprobes arch_remove_optimized_kprobe(struct optimized_kprobe *op)
-{
- __arch_remove_optimized_kprobe(op, 1);
-}
-
-/*
- * Copy replacing target instructions
- * Target instructions MUST be relocatable (checked inside)
- */
-int __kprobes arch_prepare_optimized_kprobe(struct optimized_kprobe *op)
-{
- u8 *buf;
- int ret;
- long rel;
-
- if (!can_optimize((unsigned long)op->kp.addr))
- return -EILSEQ;
-
- op->optinsn.insn = get_optinsn_slot();
- if (!op->optinsn.insn)
- return -ENOMEM;
-
- /*
- * Verify if the address gap is in 2GB range, because this uses
- * a relative jump.
- */
- rel = (long)op->optinsn.insn - (long)op->kp.addr + RELATIVEJUMP_SIZE;
- if (abs(rel) > 0x7fffffff)
- return -ERANGE;
-
- buf = (u8 *)op->optinsn.insn;
-
- /* Copy instructions into the out-of-line buffer */
- ret = copy_optimized_instructions(buf + TMPL_END_IDX, op->kp.addr);
- if (ret < 0) {
- __arch_remove_optimized_kprobe(op, 0);
- return ret;
- }
- op->optinsn.size = ret;
-
- /* Copy arch-dep-instance from template */
- memcpy(buf, &optprobe_template_entry, TMPL_END_IDX);
-
- /* Set probe information */
- synthesize_set_arg1(buf + TMPL_MOVE_IDX, (unsigned long)op);
-
- /* Set probe function call */
- synthesize_relcall(buf + TMPL_CALL_IDX, optimized_callback);
-
- /* Set returning jmp instruction at the tail of out-of-line buffer */
- synthesize_reljump(buf + TMPL_END_IDX + op->optinsn.size,
- (u8 *)op->kp.addr + op->optinsn.size);
-
- flush_icache_range((unsigned long) buf,
- (unsigned long) buf + TMPL_END_IDX +
- op->optinsn.size + RELATIVEJUMP_SIZE);
- return 0;
-}
-
-#define MAX_OPTIMIZE_PROBES 256
-static struct text_poke_param *jump_poke_params;
-static struct jump_poke_buffer {
- u8 buf[RELATIVEJUMP_SIZE];
-} *jump_poke_bufs;
-
-static void __kprobes setup_optimize_kprobe(struct text_poke_param *tprm,
- u8 *insn_buf,
- struct optimized_kprobe *op)
-{
- s32 rel = (s32)((long)op->optinsn.insn -
- ((long)op->kp.addr + RELATIVEJUMP_SIZE));
-
- /* Backup instructions which will be replaced by jump address */
- memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE,
- RELATIVE_ADDR_SIZE);
-
- insn_buf[0] = RELATIVEJUMP_OPCODE;
- *(s32 *)(&insn_buf[1]) = rel;
-
- tprm->addr = op->kp.addr;
- tprm->opcode = insn_buf;
- tprm->len = RELATIVEJUMP_SIZE;
-}
-
-/*
- * Replace breakpoints (int3) with relative jumps.
- * Caller must call with locking kprobe_mutex and text_mutex.
- */
-void __kprobes arch_optimize_kprobes(struct list_head *oplist)
-{
- struct optimized_kprobe *op, *tmp;
- int c = 0;
-
- list_for_each_entry_safe(op, tmp, oplist, list) {
- WARN_ON(kprobe_disabled(&op->kp));
- /* Setup param */
- setup_optimize_kprobe(&jump_poke_params[c],
- jump_poke_bufs[c].buf, op);
- list_del_init(&op->list);
- if (++c >= MAX_OPTIMIZE_PROBES)
- break;
- }
-
- /*
- * text_poke_smp doesn't support NMI/MCE code modifying.
- * However, since kprobes itself also doesn't support NMI/MCE
- * code probing, it's not a problem.
- */
- text_poke_smp_batch(jump_poke_params, c);
-}
-
-static void __kprobes setup_unoptimize_kprobe(struct text_poke_param *tprm,
- u8 *insn_buf,
- struct optimized_kprobe *op)
-{
- /* Set int3 to first byte for kprobes */
- insn_buf[0] = BREAKPOINT_INSTRUCTION;
- memcpy(insn_buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
-
- tprm->addr = op->kp.addr;
- tprm->opcode = insn_buf;
- tprm->len = RELATIVEJUMP_SIZE;
-}
-
-/*
- * Recover original instructions and breakpoints from relative jumps.
- * Caller must call with locking kprobe_mutex.
- */
-extern void arch_unoptimize_kprobes(struct list_head *oplist,
- struct list_head *done_list)
-{
- struct optimized_kprobe *op, *tmp;
- int c = 0;
-
- list_for_each_entry_safe(op, tmp, oplist, list) {
- /* Setup param */
- setup_unoptimize_kprobe(&jump_poke_params[c],
- jump_poke_bufs[c].buf, op);
- list_move(&op->list, done_list);
- if (++c >= MAX_OPTIMIZE_PROBES)
- break;
- }
-
- /*
- * text_poke_smp doesn't support NMI/MCE code modifying.
- * However, since kprobes itself also doesn't support NMI/MCE
- * code probing, it's not a problem.
- */
- text_poke_smp_batch(jump_poke_params, c);
-}
-
-/* Replace a relative jump with a breakpoint (int3). */
-void __kprobes arch_unoptimize_kprobe(struct optimized_kprobe *op)
-{
- u8 buf[RELATIVEJUMP_SIZE];
-
- /* Set int3 to first byte for kprobes */
- buf[0] = BREAKPOINT_INSTRUCTION;
- memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
- text_poke_smp(op->kp.addr, buf, RELATIVEJUMP_SIZE);
-}
-
-static int __kprobes setup_detour_execution(struct kprobe *p,
- struct pt_regs *regs,
- int reenter)
-{
- struct optimized_kprobe *op;
-
- if (p->flags & KPROBE_FLAG_OPTIMIZED) {
- /* This kprobe is really able to run optimized path. */
- op = container_of(p, struct optimized_kprobe, kp);
- /* Detour through copied instructions */
- regs->ip = (unsigned long)op->optinsn.insn + TMPL_END_IDX;
- if (!reenter)
- reset_current_kprobe();
- preempt_enable_no_resched();
- return 1;
- }
- return 0;
-}
-
-static int __kprobes init_poke_params(void)
-{
- /* Allocate code buffer and parameter array */
- jump_poke_bufs = kmalloc(sizeof(struct jump_poke_buffer) *
- MAX_OPTIMIZE_PROBES, GFP_KERNEL);
- if (!jump_poke_bufs)
- return -ENOMEM;
-
- jump_poke_params = kmalloc(sizeof(struct text_poke_param) *
- MAX_OPTIMIZE_PROBES, GFP_KERNEL);
- if (!jump_poke_params) {
- kfree(jump_poke_bufs);
- jump_poke_bufs = NULL;
- return -ENOMEM;
- }
-
- return 0;
-}
-#else /* !CONFIG_OPTPROBES */
-static int __kprobes init_poke_params(void)
-{
- return 0;
-}
-#endif
-
int __init arch_init_kprobes(void)
{
- return init_poke_params();
+ return arch_init_optprobes();
}
int __kprobes arch_trampoline_kprobe(struct kprobe *p)
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index f0c6fd6..694d801 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -438,9 +438,9 @@ void __init kvm_guest_init(void)
static __init int activate_jump_labels(void)
{
if (has_steal_clock) {
- jump_label_inc(&paravirt_steal_enabled);
+ static_key_slow_inc(&paravirt_steal_enabled);
if (steal_acc)
- jump_label_inc(&paravirt_steal_rq_enabled);
+ static_key_slow_inc(&paravirt_steal_rq_enabled);
}
return 0;
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index fda91c3..87a0f86 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -86,6 +86,7 @@
#include <asm/microcode.h>
#include <asm/processor.h>
+#include <asm/cpu_device_id.h>
MODULE_DESCRIPTION("Microcode Update Driver");
MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>");
@@ -504,6 +505,20 @@ static struct notifier_block __refdata mc_cpu_notifier = {
.notifier_call = mc_cpu_callback,
};
+#ifdef MODULE
+/* Autoload on Intel and AMD systems */
+static const struct x86_cpu_id microcode_id[] = {
+#ifdef CONFIG_MICROCODE_INTEL
+ { X86_VENDOR_INTEL, X86_FAMILY_ANY, X86_MODEL_ANY, },
+#endif
+#ifdef CONFIG_MICROCODE_AMD
+ { X86_VENDOR_AMD, X86_FAMILY_ANY, X86_MODEL_ANY, },
+#endif
+ {}
+};
+MODULE_DEVICE_TABLE(x86cpu, microcode_id);
+#endif
+
static int __init microcode_init(void)
{
struct cpuinfo_x86 *c = &cpu_data(0);
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index d90272e..ada2f99 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -202,8 +202,8 @@ static void native_flush_tlb_single(unsigned long addr)
__native_flush_tlb_single(addr);
}
-struct jump_label_key paravirt_steal_enabled;
-struct jump_label_key paravirt_steal_rq_enabled;
+struct static_key paravirt_steal_enabled;
+struct static_key paravirt_steal_rq_enabled;
static u64 native_steal_clock(int cpu)
{
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 15763af..44eefde 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -377,8 +377,8 @@ static inline int hlt_use_halt(void)
void default_idle(void)
{
if (hlt_use_halt()) {
- trace_power_start(POWER_CSTATE, 1, smp_processor_id());
- trace_cpu_idle(1, smp_processor_id());
+ trace_power_start_rcuidle(POWER_CSTATE, 1, smp_processor_id());
+ trace_cpu_idle_rcuidle(1, smp_processor_id());
current_thread_info()->status &= ~TS_POLLING;
/*
* TS_POLLING-cleared state must be visible before we
@@ -391,8 +391,8 @@ void default_idle(void)
else
local_irq_enable();
current_thread_info()->status |= TS_POLLING;
- trace_power_end(smp_processor_id());
- trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id());
+ trace_power_end_rcuidle(smp_processor_id());
+ trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
} else {
local_irq_enable();
/* loop is done by the caller */
@@ -450,8 +450,8 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait);
static void mwait_idle(void)
{
if (!need_resched()) {
- trace_power_start(POWER_CSTATE, 1, smp_processor_id());
- trace_cpu_idle(1, smp_processor_id());
+ trace_power_start_rcuidle(POWER_CSTATE, 1, smp_processor_id());
+ trace_cpu_idle_rcuidle(1, smp_processor_id());
if (this_cpu_has(X86_FEATURE_CLFLUSH_MONITOR))
clflush((void *)&current_thread_info()->flags);
@@ -461,8 +461,8 @@ static void mwait_idle(void)
__sti_mwait(0, 0);
else
local_irq_enable();
- trace_power_end(smp_processor_id());
- trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id());
+ trace_power_end_rcuidle(smp_processor_id());
+ trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
} else
local_irq_enable();
}
@@ -474,13 +474,13 @@ static void mwait_idle(void)
*/
static void poll_idle(void)
{
- trace_power_start(POWER_CSTATE, 0, smp_processor_id());
- trace_cpu_idle(0, smp_processor_id());
+ trace_power_start_rcuidle(POWER_CSTATE, 0, smp_processor_id());
+ trace_cpu_idle_rcuidle(0, smp_processor_id());
local_irq_enable();
while (!need_resched())
cpu_relax();
- trace_power_end(smp_processor_id());
- trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id());
+ trace_power_end_rcuidle(smp_processor_id());
+ trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
}
/*
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index c08d1ff..49888fe 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -119,9 +119,7 @@ void cpu_idle(void)
}
rcu_idle_exit();
tick_nohz_idle_exit();
- preempt_enable_no_resched();
- schedule();
- preempt_disable();
+ schedule_preempt_disabled();
}
}
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index cfa5c90..e34257c 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -156,9 +156,7 @@ void cpu_idle(void)
}
tick_nohz_idle_exit();
- preempt_enable_no_resched();
- schedule();
- preempt_disable();
+ schedule_preempt_disabled();
}
}
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 66d250c..58f7816 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -291,19 +291,6 @@ notrace static void __cpuinit start_secondary(void *unused)
per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
x86_platform.nmi_init();
- /*
- * Wait until the cpu which brought this one up marked it
- * online before enabling interrupts. If we don't do that then
- * we can end up waking up the softirq thread before this cpu
- * reached the active state, which makes the scheduler unhappy
- * and schedule the softirq thread on the wrong cpu. This is
- * only observable with forced threaded interrupts, but in
- * theory it could also happen w/o them. It's just way harder
- * to achieve.
- */
- while (!cpumask_test_cpu(smp_processor_id(), cpu_active_mask))
- cpu_relax();
-
/* enable local interrupts */
local_irq_enable();
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c
index dd5fbf4..c6eba2b 100644
--- a/arch/x86/kernel/time.c
+++ b/arch/x86/kernel/time.c
@@ -57,9 +57,6 @@ EXPORT_SYMBOL(profile_pc);
*/
static irqreturn_t timer_interrupt(int irq, void *dev_id)
{
- /* Keep nmi watchdog up to date */
- inc_irq_stat(irq0_irqs);
-
global_clock_event->event_handler(global_clock_event);
/* MCA bus quirk: Acknowledge irq0 by setting bit 7 in port 0x61 */
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index a62c201..183c592 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -620,7 +620,8 @@ static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
if (cpu_khz) {
*scale = (NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR)/cpu_khz;
- *offset = ns_now - (tsc_now * *scale >> CYC2NS_SCALE_FACTOR);
+ *offset = ns_now - mult_frac(tsc_now, *scale,
+ (1UL << CYC2NS_SCALE_FACTOR));
}
sched_clock_idle_wakeup_event(0);
diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c
index 9eba29b..fc25e60 100644
--- a/arch/x86/kernel/tsc_sync.c
+++ b/arch/x86/kernel/tsc_sync.c
@@ -42,7 +42,7 @@ static __cpuinitdata int nr_warps;
/*
* TSC-warp measurement loop running on both CPUs:
*/
-static __cpuinit void check_tsc_warp(void)
+static __cpuinit void check_tsc_warp(unsigned int timeout)
{
cycles_t start, now, prev, end;
int i;
@@ -51,9 +51,9 @@ static __cpuinit void check_tsc_warp(void)
start = get_cycles();
rdtsc_barrier();
/*
- * The measurement runs for 20 msecs:
+ * The measurement runs for 'timeout' msecs:
*/
- end = start + tsc_khz * 20ULL;
+ end = start + (cycles_t) tsc_khz * timeout;
now = start;
for (i = 0; ; i++) {
@@ -99,6 +99,25 @@ static __cpuinit void check_tsc_warp(void)
}
/*
+ * If the target CPU coming online doesn't have any of its core-siblings
+ * online, a timeout of 20msec will be used for the TSC-warp measurement
+ * loop. Otherwise a smaller timeout of 2msec will be used, as we have some
+ * information about this socket already (and this information grows as we
+ * have more and more logical-siblings in that socket).
+ *
+ * Ideally we should be able to skip the TSC sync check on the other
+ * core-siblings, if the first logical CPU in a socket passed the sync test.
+ * But as the TSC is per-logical CPU and can potentially be modified wrongly
+ * by the bios, TSC sync test for smaller duration should be able
+ * to catch such errors. Also this will catch the condition where all the
+ * cores in the socket doesn't get reset at the same time.
+ */
+static inline unsigned int loop_timeout(int cpu)
+{
+ return (cpumask_weight(cpu_core_mask(cpu)) > 1) ? 2 : 20;
+}
+
+/*
* Source CPU calls into this - it waits for the freshly booted
* target CPU to arrive and then starts the measurement:
*/
@@ -135,7 +154,7 @@ void __cpuinit check_tsc_sync_source(int cpu)
*/
atomic_inc(&start_count);
- check_tsc_warp();
+ check_tsc_warp(loop_timeout(cpu));
while (atomic_read(&stop_count) != cpus-1)
cpu_relax();
@@ -183,7 +202,7 @@ void __cpuinit check_tsc_sync_target(void)
while (atomic_read(&start_count) != cpus)
cpu_relax();
- check_tsc_warp();
+ check_tsc_warp(loop_timeout(smp_processor_id()));
/*
* Ok, we are done:
diff --git a/arch/x86/kvm/mmu_audit.c b/arch/x86/kvm/mmu_audit.c
index fe15dcc..ea7b4fd 100644
--- a/arch/x86/kvm/mmu_audit.c
+++ b/arch/x86/kvm/mmu_audit.c
@@ -234,7 +234,7 @@ static void audit_vcpu_spte(struct kvm_vcpu *vcpu)
}
static bool mmu_audit;
-static struct jump_label_key mmu_audit_key;
+static struct static_key mmu_audit_key;
static void __kvm_mmu_audit(struct kvm_vcpu *vcpu, int point)
{
@@ -250,7 +250,7 @@ static void __kvm_mmu_audit(struct kvm_vcpu *vcpu, int point)
static inline void kvm_mmu_audit(struct kvm_vcpu *vcpu, int point)
{
- if (static_branch((&mmu_audit_key)))
+ if (static_key_false((&mmu_audit_key)))
__kvm_mmu_audit(vcpu, point);
}
@@ -259,7 +259,7 @@ static void mmu_audit_enable(void)
if (mmu_audit)
return;
- jump_label_inc(&mmu_audit_key);
+ static_key_slow_inc(&mmu_audit_key);
mmu_audit = true;
}
@@ -268,7 +268,7 @@ static void mmu_audit_disable(void)
if (!mmu_audit)
return;
- jump_label_dec(&mmu_audit_key);
+ static_key_slow_dec(&mmu_audit_key);
mmu_audit = false;
}
diff --git a/arch/x86/lib/inat.c b/arch/x86/lib/inat.c
index 88ad5fb..c1f01a8 100644
--- a/arch/x86/lib/inat.c
+++ b/arch/x86/lib/inat.c
@@ -29,46 +29,46 @@ insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode)
return inat_primary_table[opcode];
}
-insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, insn_byte_t last_pfx,
+int inat_get_last_prefix_id(insn_byte_t last_pfx)
+{
+ insn_attr_t lpfx_attr;
+
+ lpfx_attr = inat_get_opcode_attribute(last_pfx);
+ return inat_last_prefix_id(lpfx_attr);
+}
+
+insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, int lpfx_id,
insn_attr_t esc_attr)
{
const insn_attr_t *table;
- insn_attr_t lpfx_attr;
- int n, m = 0;
+ int n;
n = inat_escape_id(esc_attr);
- if (last_pfx) {
- lpfx_attr = inat_get_opcode_attribute(last_pfx);
- m = inat_last_prefix_id(lpfx_attr);
- }
+
table = inat_escape_tables[n][0];
if (!table)
return 0;
- if (inat_has_variant(table[opcode]) && m) {
- table = inat_escape_tables[n][m];
+ if (inat_has_variant(table[opcode]) && lpfx_id) {
+ table = inat_escape_tables[n][lpfx_id];
if (!table)
return 0;
}
return table[opcode];
}
-insn_attr_t inat_get_group_attribute(insn_byte_t modrm, insn_byte_t last_pfx,
+insn_attr_t inat_get_group_attribute(insn_byte_t modrm, int lpfx_id,
insn_attr_t grp_attr)
{
const insn_attr_t *table;
- insn_attr_t lpfx_attr;
- int n, m = 0;
+ int n;
n = inat_group_id(grp_attr);
- if (last_pfx) {
- lpfx_attr = inat_get_opcode_attribute(last_pfx);
- m = inat_last_prefix_id(lpfx_attr);
- }
+
table = inat_group_tables[n][0];
if (!table)
return inat_group_common_attribute(grp_attr);
- if (inat_has_variant(table[X86_MODRM_REG(modrm)]) && m) {
- table = inat_group_tables[n][m];
+ if (inat_has_variant(table[X86_MODRM_REG(modrm)]) && lpfx_id) {
+ table = inat_group_tables[n][lpfx_id];
if (!table)
return inat_group_common_attribute(grp_attr);
}
diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c
index 5a1f9f3..25feb1a 100644
--- a/arch/x86/lib/insn.c
+++ b/arch/x86/lib/insn.c
@@ -185,7 +185,8 @@ err_out:
void insn_get_opcode(struct insn *insn)
{
struct insn_field *opcode = &insn->opcode;
- insn_byte_t op, pfx;
+ insn_byte_t op;
+ int pfx_id;
if (opcode->got)
return;
if (!insn->prefixes.got)
@@ -212,8 +213,8 @@ void insn_get_opcode(struct insn *insn)
/* Get escaped opcode */
op = get_next(insn_byte_t, insn);
opcode->bytes[opcode->nbytes++] = op;
- pfx = insn_last_prefix(insn);
- insn->attr = inat_get_escape_attribute(op, pfx, insn->attr);
+ pfx_id = insn_last_prefix_id(insn);
+ insn->attr = inat_get_escape_attribute(op, pfx_id, insn->attr);
}
if (inat_must_vex(insn->attr))
insn->attr = 0; /* This instruction is bad */
@@ -235,7 +236,7 @@ err_out:
void insn_get_modrm(struct insn *insn)
{
struct insn_field *modrm = &insn->modrm;
- insn_byte_t pfx, mod;
+ insn_byte_t pfx_id, mod;
if (modrm->got)
return;
if (!insn->opcode.got)
@@ -246,8 +247,8 @@ void insn_get_modrm(struct insn *insn)
modrm->value = mod;
modrm->nbytes = 1;
if (inat_is_group(insn->attr)) {
- pfx = insn_last_prefix(insn);
- insn->attr = inat_get_group_attribute(mod, pfx,
+ pfx_id = insn_last_prefix_id(insn);
+ insn->attr = inat_get_group_attribute(mod, pfx_id,
insn->attr);
if (insn_is_avx(insn) && !inat_accept_vex(insn->attr))
insn->attr = 0; /* This is bad */
diff --git a/arch/xtensa/kernel/process.c b/arch/xtensa/kernel/process.c
index 47041e7..2c90047 100644
--- a/arch/xtensa/kernel/process.c
+++ b/arch/xtensa/kernel/process.c
@@ -113,9 +113,7 @@ void cpu_idle(void)
while (1) {
while (!need_resched())
platform_idle();
- preempt_enable_no_resched();
- schedule();
- preempt_disable();
+ schedule_preempt_disabled();
}
}
diff --git a/arch/xtensa/platforms/iss/console.c b/arch/xtensa/platforms/iss/console.c
index 2c723e8..f9726f6 100644
--- a/arch/xtensa/platforms/iss/console.c
+++ b/arch/xtensa/platforms/iss/console.c
@@ -19,7 +19,6 @@
#include <linux/param.h>
#include <linux/seq_file.h>
#include <linux/serial.h>
-#include <linux/serialP.h>
#include <asm/uaccess.h>
#include <asm/irq.h>
@@ -37,6 +36,7 @@
#define SERIAL_TIMER_VALUE (20 * HZ)
static struct tty_driver *serial_driver;
+static struct tty_port serial_port;
static struct timer_list serial_timer;
static DEFINE_SPINLOCK(timer_lock);
@@ -68,17 +68,10 @@ static void rs_poll(unsigned long);
static int rs_open(struct tty_struct *tty, struct file * filp)
{
- int line = tty->index;
-
- if ((line < 0) || (line >= SERIAL_MAX_NUM_LINES))
- return -ENODEV;
-
+ tty->port = &serial_port;
spin_lock(&timer_lock);
-
if (tty->count == 1) {
- init_timer(&serial_timer);
- serial_timer.data = (unsigned long) tty;
- serial_timer.function = rs_poll;
+ setup_timer(&serial_timer, rs_poll, (unsigned long)tty);
mod_timer(&serial_timer, jiffies + SERIAL_TIMER_VALUE);
}
spin_unlock(&timer_lock);
@@ -99,10 +92,10 @@ static int rs_open(struct tty_struct *tty, struct file * filp)
*/
static void rs_close(struct tty_struct *tty, struct file * filp)
{
- spin_lock(&timer_lock);
+ spin_lock_bh(&timer_lock);
if (tty->count == 1)
del_timer_sync(&serial_timer);
- spin_unlock(&timer_lock);
+ spin_unlock_bh(&timer_lock);
}
@@ -210,13 +203,14 @@ static const struct tty_operations serial_ops = {
int __init rs_init(void)
{
- serial_driver = alloc_tty_driver(1);
+ tty_port_init(&serial_port);
+
+ serial_driver = alloc_tty_driver(SERIAL_MAX_NUM_LINES);
printk ("%s %s\n", serial_name, serial_version);
/* Initialize the tty_driver structure */
- serial_driver->owner = THIS_MODULE;
serial_driver->driver_name = "iss_serial";
serial_driver->name = "ttyS";
serial_driver->major = TTY_MAJOR;
OpenPOWER on IntegriCloud