diff options
Diffstat (limited to 'arch')
106 files changed, 2193 insertions, 1656 deletions
diff --git a/arch/Kconfig b/arch/Kconfig index 4f55c73..5b448a7 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -47,18 +47,29 @@ config KPROBES If in doubt, say "N". config JUMP_LABEL - bool "Optimize trace point call sites" + bool "Optimize very unlikely/likely branches" depends on HAVE_ARCH_JUMP_LABEL help + This option enables a transparent branch optimization that + makes certain almost-always-true or almost-always-false branch + conditions even cheaper to execute within the kernel. + + Certain performance-sensitive kernel code, such as trace points, + scheduler functionality, networking code and KVM have such + branches and include support for this optimization technique. + If it is detected that the compiler has support for "asm goto", - the kernel will compile trace point locations with just a - nop instruction. When trace points are enabled, the nop will - be converted to a jump to the trace function. This technique - lowers overhead and stress on the branch prediction of the - processor. - - On i386, options added to the compiler flags may increase - the size of the kernel slightly. + the kernel will compile such branches with just a nop + instruction. When the condition flag is toggled to true, the + nop will be converted to a jump instruction to execute the + conditional block of instructions. + + This technique lowers overhead and stress on the branch prediction + of the processor and generally makes the kernel faster. The update + of the condition is slower, but those are always very rare. + + ( On 32-bit x86, the necessary options added to the compiler + flags may increase the size of the kernel slightly. ) config OPTPROBES def_bool y diff --git a/arch/alpha/kernel/perf_event.c b/arch/alpha/kernel/perf_event.c index 8143cd7..0dae252 100644 --- a/arch/alpha/kernel/perf_event.c +++ b/arch/alpha/kernel/perf_event.c @@ -685,6 +685,10 @@ static int alpha_pmu_event_init(struct perf_event *event) { int err; + /* does not support taken branch sampling */ + if (has_branch_stack(event)) + return -EOPNOTSUPP; + switch (event->attr.type) { case PERF_TYPE_RAW: case PERF_TYPE_HARDWARE: diff --git a/arch/alpha/kernel/srmcons.c b/arch/alpha/kernel/srmcons.c index 783f4e5..3ea8094 100644 --- a/arch/alpha/kernel/srmcons.c +++ b/arch/alpha/kernel/srmcons.c @@ -30,10 +30,9 @@ static int srm_is_registered_console = 0; #define MAX_SRM_CONSOLE_DEVICES 1 /* only support 1 console device */ struct srmcons_private { - struct tty_struct *tty; + struct tty_port port; struct timer_list timer; - spinlock_t lock; -}; +} srmcons_singleton; typedef union _srmcons_result { struct { @@ -68,22 +67,21 @@ static void srmcons_receive_chars(unsigned long data) { struct srmcons_private *srmconsp = (struct srmcons_private *)data; + struct tty_port *port = &srmconsp->port; unsigned long flags; int incr = 10; local_irq_save(flags); if (spin_trylock(&srmcons_callback_lock)) { - if (!srmcons_do_receive_chars(srmconsp->tty)) + if (!srmcons_do_receive_chars(port->tty)) incr = 100; spin_unlock(&srmcons_callback_lock); } - spin_lock(&srmconsp->lock); - if (srmconsp->tty) { - srmconsp->timer.expires = jiffies + incr; - add_timer(&srmconsp->timer); - } - spin_unlock(&srmconsp->lock); + spin_lock(&port->lock); + if (port->tty) + mod_timer(&srmconsp->timer, jiffies + incr); + spin_unlock(&port->lock); local_irq_restore(flags); } @@ -156,56 +154,22 @@ srmcons_chars_in_buffer(struct tty_struct *tty) } static int -srmcons_get_private_struct(struct srmcons_private **ps) -{ - static struct srmcons_private *srmconsp = NULL; - static DEFINE_SPINLOCK(srmconsp_lock); - unsigned long flags; - int retval = 0; - - if (srmconsp == NULL) { - srmconsp = kmalloc(sizeof(*srmconsp), GFP_KERNEL); - spin_lock_irqsave(&srmconsp_lock, flags); - - if (srmconsp == NULL) - retval = -ENOMEM; - else { - srmconsp->tty = NULL; - spin_lock_init(&srmconsp->lock); - init_timer(&srmconsp->timer); - } - - spin_unlock_irqrestore(&srmconsp_lock, flags); - } - - *ps = srmconsp; - return retval; -} - -static int srmcons_open(struct tty_struct *tty, struct file *filp) { - struct srmcons_private *srmconsp; + struct srmcons_private *srmconsp = &srmcons_singleton; + struct tty_port *port = &srmconsp->port; unsigned long flags; - int retval; - - retval = srmcons_get_private_struct(&srmconsp); - if (retval) - return retval; - spin_lock_irqsave(&srmconsp->lock, flags); + spin_lock_irqsave(&port->lock, flags); - if (!srmconsp->tty) { + if (!port->tty) { tty->driver_data = srmconsp; - - srmconsp->tty = tty; - srmconsp->timer.function = srmcons_receive_chars; - srmconsp->timer.data = (unsigned long)srmconsp; - srmconsp->timer.expires = jiffies + 10; - add_timer(&srmconsp->timer); + tty->port = port; + port->tty = tty; /* XXX proper refcounting */ + mod_timer(&srmconsp->timer, jiffies + 10); } - spin_unlock_irqrestore(&srmconsp->lock, flags); + spin_unlock_irqrestore(&port->lock, flags); return 0; } @@ -214,16 +178,17 @@ static void srmcons_close(struct tty_struct *tty, struct file *filp) { struct srmcons_private *srmconsp = tty->driver_data; + struct tty_port *port = &srmconsp->port; unsigned long flags; - spin_lock_irqsave(&srmconsp->lock, flags); + spin_lock_irqsave(&port->lock, flags); if (tty->count == 1) { - srmconsp->tty = NULL; + port->tty = NULL; del_timer(&srmconsp->timer); } - spin_unlock_irqrestore(&srmconsp->lock, flags); + spin_unlock_irqrestore(&port->lock, flags); } @@ -240,6 +205,9 @@ static const struct tty_operations srmcons_ops = { static int __init srmcons_init(void) { + tty_port_init(&srmcons_singleton.port); + setup_timer(&srmcons_singleton.timer, srmcons_receive_chars, + (unsigned long)&srmcons_singleton); if (srm_is_registered_console) { struct tty_driver *driver; int err; diff --git a/arch/arm/include/asm/perf_event.h b/arch/arm/include/asm/perf_event.h index 99cfe36..7523340 100644 --- a/arch/arm/include/asm/perf_event.h +++ b/arch/arm/include/asm/perf_event.h @@ -12,10 +12,6 @@ #ifndef __ARM_PERF_EVENT_H__ #define __ARM_PERF_EVENT_H__ -/* ARM performance counters start from 1 (in the cp15 accesses) so use the - * same indexes here for consistency. */ -#define PERF_EVENT_INDEX_OFFSET 1 - /* ARM perf PMU IDs for use by internal perf clients. */ enum arm_perf_pmu_ids { ARM_PERF_PMU_ID_XSCALE1 = 0, diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index b2abfa1..8a89d3b 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -539,6 +539,10 @@ static int armpmu_event_init(struct perf_event *event) int err = 0; atomic_t *active_events = &armpmu->active_events; + /* does not support taken branch sampling */ + if (has_branch_stack(event)) + return -EOPNOTSUPP; + if (armpmu->map_event(event) == -ENOENT) return -ENOENT; diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index 971d65c..c2ae3cd 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -239,9 +239,7 @@ void cpu_idle(void) leds_event(led_idle_end); rcu_idle_exit(); tick_nohz_idle_exit(); - preempt_enable_no_resched(); - schedule(); - preempt_disable(); + schedule_preempt_disabled(); } } diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index cdeb727..d616ed5 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -295,13 +295,6 @@ asmlinkage void __cpuinit secondary_start_kernel(void) */ percpu_timer_setup(); - while (!cpu_active(cpu)) - cpu_relax(); - - /* - * cpu_active bit is set, so it's safe to enalbe interrupts - * now. - */ local_irq_enable(); local_fiq_enable(); diff --git a/arch/arm/mach-imx/mx31moboard-devboard.c b/arch/arm/mach-imx/mx31moboard-devboard.c index 0aa2536..cc285e5 100644 --- a/arch/arm/mach-imx/mx31moboard-devboard.c +++ b/arch/arm/mach-imx/mx31moboard-devboard.c @@ -158,7 +158,7 @@ static int devboard_usbh1_hw_init(struct platform_device *pdev) #define USBH1_VBUSEN_B IOMUX_TO_GPIO(MX31_PIN_NFRE_B) #define USBH1_MODE IOMUX_TO_GPIO(MX31_PIN_NFALE) -static int devboard_isp1105_init(struct otg_transceiver *otg) +static int devboard_isp1105_init(struct usb_phy *otg) { int ret = gpio_request(USBH1_MODE, "usbh1-mode"); if (ret) @@ -177,7 +177,7 @@ static int devboard_isp1105_init(struct otg_transceiver *otg) } -static int devboard_isp1105_set_vbus(struct otg_transceiver *otg, bool on) +static int devboard_isp1105_set_vbus(struct usb_otg *otg, bool on) { if (on) gpio_set_value(USBH1_VBUSEN_B, 0); @@ -194,18 +194,24 @@ static struct mxc_usbh_platform_data usbh1_pdata __initdata = { static int __init devboard_usbh1_init(void) { - struct otg_transceiver *otg; + struct usb_phy *phy; struct platform_device *pdev; - otg = kzalloc(sizeof(*otg), GFP_KERNEL); - if (!otg) + phy = kzalloc(sizeof(*phy), GFP_KERNEL); + if (!phy) return -ENOMEM; - otg->label = "ISP1105"; - otg->init = devboard_isp1105_init; - otg->set_vbus = devboard_isp1105_set_vbus; + phy->otg = kzalloc(sizeof(struct usb_otg), GFP_KERNEL); + if (!phy->otg) { + kfree(phy); + return -ENOMEM; + } + + phy->label = "ISP1105"; + phy->init = devboard_isp1105_init; + phy->otg->set_vbus = devboard_isp1105_set_vbus; - usbh1_pdata.otg = otg; + usbh1_pdata.otg = phy; pdev = imx31_add_mxc_ehci_hs(1, &usbh1_pdata); if (IS_ERR(pdev)) diff --git a/arch/arm/mach-imx/mx31moboard-marxbot.c b/arch/arm/mach-imx/mx31moboard-marxbot.c index bb639cb..135c90e 100644 --- a/arch/arm/mach-imx/mx31moboard-marxbot.c +++ b/arch/arm/mach-imx/mx31moboard-marxbot.c @@ -272,7 +272,7 @@ static int marxbot_usbh1_hw_init(struct platform_device *pdev) #define USBH1_VBUSEN_B IOMUX_TO_GPIO(MX31_PIN_NFRE_B) #define USBH1_MODE IOMUX_TO_GPIO(MX31_PIN_NFALE) -static int marxbot_isp1105_init(struct otg_transceiver *otg) +static int marxbot_isp1105_init(struct usb_phy *otg) { int ret = gpio_request(USBH1_MODE, "usbh1-mode"); if (ret) @@ -291,7 +291,7 @@ static int marxbot_isp1105_init(struct otg_transceiver *otg) } -static int marxbot_isp1105_set_vbus(struct otg_transceiver *otg, bool on) +static int marxbot_isp1105_set_vbus(struct usb_otg *otg, bool on) { if (on) gpio_set_value(USBH1_VBUSEN_B, 0); @@ -308,18 +308,24 @@ static struct mxc_usbh_platform_data usbh1_pdata __initdata = { static int __init marxbot_usbh1_init(void) { - struct otg_transceiver *otg; + struct usb_phy *phy; struct platform_device *pdev; - otg = kzalloc(sizeof(*otg), GFP_KERNEL); - if (!otg) + phy = kzalloc(sizeof(*phy), GFP_KERNEL); + if (!phy) return -ENOMEM; - otg->label = "ISP1105"; - otg->init = marxbot_isp1105_init; - otg->set_vbus = marxbot_isp1105_set_vbus; + phy->otg = kzalloc(sizeof(struct usb_otg), GFP_KERNEL); + if (!phy->otg) { + kfree(phy); + return -ENOMEM; + } + + phy->label = "ISP1105"; + phy->init = marxbot_isp1105_init; + phy->otg->set_vbus = marxbot_isp1105_set_vbus; - usbh1_pdata.otg = otg; + usbh1_pdata.otg = phy; pdev = imx31_add_mxc_ehci_hs(1, &usbh1_pdata); if (IS_ERR(pdev)) diff --git a/arch/arm/mach-pxa/pxa3xx-ulpi.c b/arch/arm/mach-pxa/pxa3xx-ulpi.c index e28dfb88..5ead6d4 100644 --- a/arch/arm/mach-pxa/pxa3xx-ulpi.c +++ b/arch/arm/mach-pxa/pxa3xx-ulpi.c @@ -33,7 +33,7 @@ struct pxa3xx_u2d_ulpi { struct clk *clk; void __iomem *mmio_base; - struct otg_transceiver *otg; + struct usb_phy *otg; unsigned int ulpi_mode; }; @@ -79,7 +79,7 @@ static int pxa310_ulpi_poll(void) return -ETIMEDOUT; } -static int pxa310_ulpi_read(struct otg_transceiver *otg, u32 reg) +static int pxa310_ulpi_read(struct usb_phy *otg, u32 reg) { int err; @@ -98,7 +98,7 @@ static int pxa310_ulpi_read(struct otg_transceiver *otg, u32 reg) return u2d_readl(U2DOTGUCR) & U2DOTGUCR_RDATA; } -static int pxa310_ulpi_write(struct otg_transceiver *otg, u32 val, u32 reg) +static int pxa310_ulpi_write(struct usb_phy *otg, u32 val, u32 reg) { if (pxa310_ulpi_get_phymode() != SYNCH) { pr_warning("%s: PHY is not in SYNCH mode!\n", __func__); @@ -111,7 +111,7 @@ static int pxa310_ulpi_write(struct otg_transceiver *otg, u32 val, u32 reg) return pxa310_ulpi_poll(); } -struct otg_io_access_ops pxa310_ulpi_access_ops = { +struct usb_phy_io_ops pxa310_ulpi_access_ops = { .read = pxa310_ulpi_read, .write = pxa310_ulpi_write, }; @@ -139,19 +139,19 @@ static int pxa310_start_otg_host_transcvr(struct usb_bus *host) pxa310_otg_transceiver_rtsm(); - err = otg_init(u2d->otg); + err = usb_phy_init(u2d->otg); if (err) { pr_err("OTG transceiver init failed"); return err; } - err = otg_set_vbus(u2d->otg, 1); + err = otg_set_vbus(u2d->otg->otg, 1); if (err) { pr_err("OTG transceiver VBUS set failed"); return err; } - err = otg_set_host(u2d->otg, host); + err = otg_set_host(u2d->otg->otg, host); if (err) pr_err("OTG transceiver Host mode set failed"); @@ -189,9 +189,9 @@ static void pxa310_stop_otg_hc(void) { pxa310_otg_transceiver_rtsm(); - otg_set_host(u2d->otg, NULL); - otg_set_vbus(u2d->otg, 0); - otg_shutdown(u2d->otg); + otg_set_host(u2d->otg->otg, NULL); + otg_set_vbus(u2d->otg->otg, 0); + usb_phy_shutdown(u2d->otg); } static void pxa310_u2d_setup_otg_hc(void) diff --git a/arch/arm/mach-tegra/include/mach/usb_phy.h b/arch/arm/mach-tegra/include/mach/usb_phy.h index d4b8f9e..de1a0f6 100644 --- a/arch/arm/mach-tegra/include/mach/usb_phy.h +++ b/arch/arm/mach-tegra/include/mach/usb_phy.h @@ -58,7 +58,7 @@ struct tegra_usb_phy { struct clk *pad_clk; enum tegra_usb_phy_mode mode; void *config; - struct otg_transceiver *ulpi; + struct usb_phy *ulpi; }; struct tegra_usb_phy *tegra_usb_phy_open(int instance, void __iomem *regs, diff --git a/arch/arm/mach-tegra/usb_phy.c b/arch/arm/mach-tegra/usb_phy.c index 37576a7..ad321f9 100644 --- a/arch/arm/mach-tegra/usb_phy.c +++ b/arch/arm/mach-tegra/usb_phy.c @@ -608,13 +608,13 @@ static int ulpi_phy_power_on(struct tegra_usb_phy *phy) writel(val, base + ULPI_TIMING_CTRL_1); /* Fix VbusInvalid due to floating VBUS */ - ret = otg_io_write(phy->ulpi, 0x40, 0x08); + ret = usb_phy_io_write(phy->ulpi, 0x40, 0x08); if (ret) { pr_err("%s: ulpi write failed\n", __func__); return ret; } - ret = otg_io_write(phy->ulpi, 0x80, 0x0B); + ret = usb_phy_io_write(phy->ulpi, 0x80, 0x0B); if (ret) { pr_err("%s: ulpi write failed\n", __func__); return ret; diff --git a/arch/arm/plat-mxc/include/mach/mxc_ehci.h b/arch/arm/plat-mxc/include/mach/mxc_ehci.h index 2c159dc..9ffd1bb 100644 --- a/arch/arm/plat-mxc/include/mach/mxc_ehci.h +++ b/arch/arm/plat-mxc/include/mach/mxc_ehci.h @@ -44,7 +44,7 @@ struct mxc_usbh_platform_data { int (*exit)(struct platform_device *pdev); unsigned int portsc; - struct otg_transceiver *otg; + struct usb_phy *otg; }; int mx51_initialize_usb_hw(int port, unsigned int flags); diff --git a/arch/arm/plat-mxc/include/mach/ulpi.h b/arch/arm/plat-mxc/include/mach/ulpi.h index f9161c9..42bdaca 100644 --- a/arch/arm/plat-mxc/include/mach/ulpi.h +++ b/arch/arm/plat-mxc/include/mach/ulpi.h @@ -2,15 +2,15 @@ #define __MACH_ULPI_H #ifdef CONFIG_USB_ULPI -struct otg_transceiver *imx_otg_ulpi_create(unsigned int flags); +struct usb_phy *imx_otg_ulpi_create(unsigned int flags); #else -static inline struct otg_transceiver *imx_otg_ulpi_create(unsigned int flags) +static inline struct usb_phy *imx_otg_ulpi_create(unsigned int flags) { return NULL; } #endif -extern struct otg_io_access_ops mxc_ulpi_access_ops; +extern struct usb_phy_io_ops mxc_ulpi_access_ops; #endif /* __MACH_ULPI_H */ diff --git a/arch/arm/plat-mxc/ulpi.c b/arch/arm/plat-mxc/ulpi.c index 477e45b..d296342 100644 --- a/arch/arm/plat-mxc/ulpi.c +++ b/arch/arm/plat-mxc/ulpi.c @@ -58,7 +58,7 @@ static int ulpi_poll(void __iomem *view, u32 bit) return -ETIMEDOUT; } -static int ulpi_read(struct otg_transceiver *otg, u32 reg) +static int ulpi_read(struct usb_phy *otg, u32 reg) { int ret; void __iomem *view = otg->io_priv; @@ -84,7 +84,7 @@ static int ulpi_read(struct otg_transceiver *otg, u32 reg) return (__raw_readl(view) >> ULPIVW_RDATA_SHIFT) & ULPIVW_RDATA_MASK; } -static int ulpi_write(struct otg_transceiver *otg, u32 val, u32 reg) +static int ulpi_write(struct usb_phy *otg, u32 val, u32 reg) { int ret; void __iomem *view = otg->io_priv; @@ -106,13 +106,13 @@ static int ulpi_write(struct otg_transceiver *otg, u32 val, u32 reg) return ulpi_poll(view, ULPIVW_RUN); } -struct otg_io_access_ops mxc_ulpi_access_ops = { +struct usb_phy_io_ops mxc_ulpi_access_ops = { .read = ulpi_read, .write = ulpi_write, }; EXPORT_SYMBOL_GPL(mxc_ulpi_access_ops); -struct otg_transceiver *imx_otg_ulpi_create(unsigned int flags) +struct usb_phy *imx_otg_ulpi_create(unsigned int flags) { return otg_ulpi_create(&mxc_ulpi_access_ops, flags); } diff --git a/arch/avr32/kernel/process.c b/arch/avr32/kernel/process.c index ea33957..92c5af9 100644 --- a/arch/avr32/kernel/process.c +++ b/arch/avr32/kernel/process.c @@ -40,9 +40,7 @@ void cpu_idle(void) cpu_idle_sleep(); rcu_idle_exit(); tick_nohz_idle_exit(); - preempt_enable_no_resched(); - schedule(); - preempt_disable(); + schedule_preempt_disabled(); } } diff --git a/arch/blackfin/kernel/process.c b/arch/blackfin/kernel/process.c index 8dd0416..a80a643 100644 --- a/arch/blackfin/kernel/process.c +++ b/arch/blackfin/kernel/process.c @@ -94,9 +94,7 @@ void cpu_idle(void) idle(); rcu_idle_exit(); tick_nohz_idle_exit(); - preempt_enable_no_resched(); - schedule(); - preempt_disable(); + schedule_preempt_disabled(); } } diff --git a/arch/cris/kernel/process.c b/arch/cris/kernel/process.c index aa585e4..d8f50ff 100644 --- a/arch/cris/kernel/process.c +++ b/arch/cris/kernel/process.c @@ -115,9 +115,7 @@ void cpu_idle (void) idle = default_idle; idle(); } - preempt_enable_no_resched(); - schedule(); - preempt_disable(); + schedule_preempt_disabled(); } } diff --git a/arch/frv/include/asm/perf_event.h b/arch/frv/include/asm/perf_event.h index a69e015..c52ea55 100644 --- a/arch/frv/include/asm/perf_event.h +++ b/arch/frv/include/asm/perf_event.h @@ -12,6 +12,4 @@ #ifndef _ASM_PERF_EVENT_H #define _ASM_PERF_EVENT_H -#define PERF_EVENT_INDEX_OFFSET 0 - #endif /* _ASM_PERF_EVENT_H */ diff --git a/arch/frv/kernel/process.c b/arch/frv/kernel/process.c index 3901df1..29cc497 100644 --- a/arch/frv/kernel/process.c +++ b/arch/frv/kernel/process.c @@ -92,9 +92,7 @@ void cpu_idle(void) idle(); } - preempt_enable_no_resched(); - schedule(); - preempt_disable(); + schedule_preempt_disabled(); } } diff --git a/arch/h8300/kernel/process.c b/arch/h8300/kernel/process.c index 933bd38..1a173b3 100644 --- a/arch/h8300/kernel/process.c +++ b/arch/h8300/kernel/process.c @@ -81,9 +81,7 @@ void cpu_idle(void) while (1) { while (!need_resched()) idle(); - preempt_enable_no_resched(); - schedule(); - preempt_disable(); + schedule_preempt_disabled(); } } diff --git a/arch/hexagon/include/asm/perf_event.h b/arch/hexagon/include/asm/perf_event.h index 6c2910f..8b8526b 100644 --- a/arch/hexagon/include/asm/perf_event.h +++ b/arch/hexagon/include/asm/perf_event.h @@ -19,6 +19,4 @@ #ifndef _ASM_PERF_EVENT_H #define _ASM_PERF_EVENT_H -#define PERF_EVENT_INDEX_OFFSET 0 - #endif /* _ASM_PERF_EVENT_H */ diff --git a/arch/hexagon/kernel/smp.c b/arch/hexagon/kernel/smp.c index c871a2c..0123c63 100644 --- a/arch/hexagon/kernel/smp.c +++ b/arch/hexagon/kernel/smp.c @@ -179,8 +179,6 @@ void __cpuinit start_secondary(void) printk(KERN_INFO "%s cpu %d\n", __func__, current_thread_info()->cpu); set_cpu_online(cpu, true); - while (!cpumask_test_cpu(cpu, cpu_active_mask)) - cpu_relax(); local_irq_enable(); cpu_idle(); diff --git a/arch/ia64/hp/sim/boot/fw-emu.c b/arch/ia64/hp/sim/boot/fw-emu.c index bf6d9d8..0216e28 100644 --- a/arch/ia64/hp/sim/boot/fw-emu.c +++ b/arch/ia64/hp/sim/boot/fw-emu.c @@ -160,28 +160,19 @@ sal_emulator (long index, unsigned long in1, unsigned long in2, */ status = 0; if (index == SAL_FREQ_BASE) { - switch (in1) { - case SAL_FREQ_BASE_PLATFORM: + if (in1 == SAL_FREQ_BASE_PLATFORM) r9 = 200000000; - break; - - case SAL_FREQ_BASE_INTERVAL_TIMER: + else if (in1 == SAL_FREQ_BASE_INTERVAL_TIMER) { /* * Is this supposed to be the cr.itc frequency * or something platform specific? The SAL * doc ain't exactly clear on this... */ r9 = 700000000; - break; - - case SAL_FREQ_BASE_REALTIME_CLOCK: + } else if (in1 == SAL_FREQ_BASE_REALTIME_CLOCK) r9 = 1; - break; - - default: + else status = -1; - break; - } } else if (index == SAL_SET_VECTORS) { ; } else if (index == SAL_GET_STATE_INFO) { diff --git a/arch/ia64/hp/sim/hpsim_irq.c b/arch/ia64/hp/sim/hpsim_irq.c index 4bd9a63..0aa70eb 100644 --- a/arch/ia64/hp/sim/hpsim_irq.c +++ b/arch/ia64/hp/sim/hpsim_irq.c @@ -10,6 +10,8 @@ #include <linux/sched.h> #include <linux/irq.h> +#include "hpsim_ssc.h" + static unsigned int hpsim_irq_startup(struct irq_data *data) { @@ -37,15 +39,37 @@ static struct irq_chip irq_type_hp_sim = { .irq_set_affinity = hpsim_set_affinity_noop, }; +static void hpsim_irq_set_chip(int irq) +{ + struct irq_chip *chip = irq_get_chip(irq); + + if (chip == &no_irq_chip) + irq_set_chip(irq, &irq_type_hp_sim); +} + +static void hpsim_connect_irq(int intr, int irq) +{ + ia64_ssc(intr, irq, 0, 0, SSC_CONNECT_INTERRUPT); +} + +int hpsim_get_irq(int intr) +{ + int irq = assign_irq_vector(AUTO_ASSIGN); + + if (irq >= 0) { + hpsim_irq_set_chip(irq); + irq_set_handler(irq, handle_simple_irq); + hpsim_connect_irq(intr, irq); + } + + return irq; +} + void __init hpsim_irq_init (void) { int i; - for_each_active_irq(i) { - struct irq_chip *chip = irq_get_chip(i); - - if (chip == &no_irq_chip) - irq_set_chip(i, &irq_type_hp_sim); - } + for_each_active_irq(i) + hpsim_irq_set_chip(i); } diff --git a/arch/ia64/hp/sim/hpsim_setup.c b/arch/ia64/hp/sim/hpsim_setup.c index f629e90..664a540 100644 --- a/arch/ia64/hp/sim/hpsim_setup.c +++ b/arch/ia64/hp/sim/hpsim_setup.c @@ -26,12 +26,6 @@ #include "hpsim_ssc.h" void -ia64_ssc_connect_irq (long intr, long irq) -{ - ia64_ssc(intr, irq, 0, 0, SSC_CONNECT_INTERRUPT); -} - -void ia64_ctl_trace (long on) { ia64_ssc(on, 0, 0, 0, SSC_CTL_TRACE); diff --git a/arch/ia64/hp/sim/simeth.c b/arch/ia64/hp/sim/simeth.c index 440001e..a63218e 100644 --- a/arch/ia64/hp/sim/simeth.c +++ b/arch/ia64/hp/sim/simeth.c @@ -129,17 +129,6 @@ netdev_probe(char *name, unsigned char *ether) static inline int -netdev_connect(int irq) -{ - /* XXX Fix me - * this does not support multiple cards - * also no return value - */ - ia64_ssc_connect_irq(NETWORK_INTR, irq); - return 0; -} - -static inline int netdev_attach(int fd, int irq, unsigned int ipaddr) { /* this puts the host interface in the right mode (start interrupting) */ @@ -226,15 +215,13 @@ simeth_probe1(void) return err; } - if ((rc = assign_irq_vector(AUTO_ASSIGN)) < 0) - panic("%s: out of interrupt vectors!\n", __func__); - dev->irq = rc; - /* * attach the interrupt in the simulator, this does enable interrupts * until a netdev_attach() is called */ - netdev_connect(dev->irq); + if ((rc = hpsim_get_irq(NETWORK_INTR)) < 0) + panic("%s: out of interrupt vectors!\n", __func__); + dev->irq = rc; printk(KERN_INFO "%s: hosteth=%s simfd=%d, HwAddr=%pm, IRQ %d\n", dev->name, simeth_device, local->simfd, dev->dev_addr, dev->irq); diff --git a/arch/ia64/hp/sim/simserial.c b/arch/ia64/hp/sim/simserial.c index bff0824..c34785d 100644 --- a/arch/ia64/hp/sim/simserial.c +++ b/arch/ia64/hp/sim/simserial.c @@ -4,16 +4,11 @@ * This driver is mostly used for bringup purposes and will go away. * It has a strong dependency on the system console. All outputs * are rerouted to the same facility as the one used by printk which, in our - * case means sys_sim.c console (goes via the simulator). The code hereafter - * is completely leveraged from the serial.c driver. + * case means sys_sim.c console (goes via the simulator). * * Copyright (C) 1999-2000, 2002-2003 Hewlett-Packard Co * Stephane Eranian <eranian@hpl.hp.com> * David Mosberger-Tang <davidm@hpl.hp.com> - * - * 02/04/00 D. Mosberger Merged in serial.c bug fixes in rs_close(). - * 02/25/00 D. Mosberger Synced up with 2.3.99pre-5 version of serial.c. - * 07/30/02 D. Mosberger Replace sti()/cli() with explicit spinlocks & local irq masking */ #include <linux/init.h> @@ -27,15 +22,17 @@ #include <linux/seq_file.h> #include <linux/slab.h> #include <linux/capability.h> +#include <linux/circ_buf.h> #include <linux/console.h> +#include <linux/irq.h> #include <linux/module.h> #include <linux/serial.h> -#include <linux/serialP.h> #include <linux/sysrq.h> +#include <linux/uaccess.h> + +#include <asm/hpsim.h> -#include <asm/irq.h> -#include <asm/hw_irq.h> -#include <asm/uaccess.h> +#include "hpsim_ssc.h" #undef SIMSERIAL_DEBUG /* define this to get some debug information */ @@ -43,118 +40,44 @@ #define NR_PORTS 1 /* only one port for now */ -#define IRQ_T(info) ((info->flags & ASYNC_SHARE_IRQ) ? IRQF_SHARED : IRQF_DISABLED) - -#define SSC_GETCHAR 21 - -extern long ia64_ssc (long, long, long, long, int); -extern void ia64_ssc_connect_irq (long intr, long irq); - -static char *serial_name = "SimSerial driver"; -static char *serial_version = "0.6"; - -/* - * This has been extracted from asm/serial.h. We need one eventually but - * I don't know exactly what we're going to put in it so just fake one - * for now. - */ -#define BASE_BAUD ( 1843200 / 16 ) - -#define STD_COM_FLAGS (ASYNC_BOOT_AUTOCONF | ASYNC_SKIP_TEST) - -/* - * Most of the values here are meaningless to this particular driver. - * However some values must be preserved for the code (leveraged from serial.c - * to work correctly). - * port must not be 0 - * type must not be UNKNOWN - * So I picked arbitrary (guess from where?) values instead - */ -static struct serial_state rs_table[NR_PORTS]={ - /* UART CLK PORT IRQ FLAGS */ - { 0, BASE_BAUD, 0x3F8, 0, STD_COM_FLAGS,0,PORT_16550 } /* ttyS0 */ +struct serial_state { + struct tty_port port; + struct circ_buf xmit; + int irq; + int x_char; }; -/* - * Just for the fun of it ! - */ -static struct serial_uart_config uart_config[] = { - { "unknown", 1, 0 }, - { "8250", 1, 0 }, - { "16450", 1, 0 }, - { "16550", 1, 0 }, - { "16550A", 16, UART_CLEAR_FIFO | UART_USE_FIFO }, - { "cirrus", 1, 0 }, - { "ST16650", 1, UART_CLEAR_FIFO | UART_STARTECH }, - { "ST16650V2", 32, UART_CLEAR_FIFO | UART_USE_FIFO | - UART_STARTECH }, - { "TI16750", 64, UART_CLEAR_FIFO | UART_USE_FIFO}, - { NULL, 0} -}; +static struct serial_state rs_table[NR_PORTS]; struct tty_driver *hp_simserial_driver; -static struct async_struct *IRQ_ports[NR_IRQS]; - static struct console *console; -static unsigned char *tmp_buf; - -extern struct console *console_drivers; /* from kernel/printk.c */ - -/* - * ------------------------------------------------------------ - * rs_stop() and rs_start() - * - * This routines are called before setting or resetting tty->stopped. - * They enable or disable transmitter interrupts, as necessary. - * ------------------------------------------------------------ - */ -static void rs_stop(struct tty_struct *tty) -{ -#ifdef SIMSERIAL_DEBUG - printk("rs_stop: tty->stopped=%d tty->hw_stopped=%d tty->flow_stopped=%d\n", - tty->stopped, tty->hw_stopped, tty->flow_stopped); -#endif - -} - -static void rs_start(struct tty_struct *tty) -{ -#ifdef SIMSERIAL_DEBUG - printk("rs_start: tty->stopped=%d tty->hw_stopped=%d tty->flow_stopped=%d\n", - tty->stopped, tty->hw_stopped, tty->flow_stopped); -#endif -} - -static void receive_chars(struct tty_struct *tty) +static void receive_chars(struct tty_struct *tty) { unsigned char ch; static unsigned char seen_esc = 0; while ( (ch = ia64_ssc(0, 0, 0, 0, SSC_GETCHAR)) ) { - if ( ch == 27 && seen_esc == 0 ) { + if (ch == 27 && seen_esc == 0) { seen_esc = 1; continue; - } else { - if ( seen_esc==1 && ch == 'O' ) { - seen_esc = 2; - continue; - } else if ( seen_esc == 2 ) { - if ( ch == 'P' ) /* F1 */ - show_state(); + } else if (seen_esc == 1 && ch == 'O') { + seen_esc = 2; + continue; + } else if (seen_esc == 2) { + if (ch == 'P') /* F1 */ + show_state(); #ifdef CONFIG_MAGIC_SYSRQ - if ( ch == 'S' ) { /* F4 */ - do - ch = ia64_ssc(0, 0, 0, 0, - SSC_GETCHAR); - while (!ch); - handle_sysrq(ch); - } -#endif - seen_esc = 0; - continue; + if (ch == 'S') { /* F4 */ + do { + ch = ia64_ssc(0, 0, 0, 0, SSC_GETCHAR); + } while (!ch); + handle_sysrq(ch); } +#endif + seen_esc = 0; + continue; } seen_esc = 0; @@ -169,22 +92,19 @@ static void receive_chars(struct tty_struct *tty) */ static irqreturn_t rs_interrupt_single(int irq, void *dev_id) { - struct async_struct * info; + struct serial_state *info = dev_id; + struct tty_struct *tty = tty_port_tty_get(&info->port); - /* - * I don't know exactly why they don't use the dev_id opaque data - * pointer instead of this extra lookup table - */ - info = IRQ_ports[irq]; - if (!info || !info->tty) { - printk(KERN_INFO "simrs_interrupt_single: info|tty=0 info=%p problem\n", info); + if (!tty) { + printk(KERN_INFO "%s: tty=0 problem\n", __func__); return IRQ_NONE; } /* * pretty simple in our case, because we only get interrupts * on inbound traffic */ - receive_chars(info->tty); + receive_chars(tty); + tty_kref_put(tty); return IRQ_HANDLED; } @@ -194,17 +114,12 @@ static irqreturn_t rs_interrupt_single(int irq, void *dev_id) * ------------------------------------------------------------------- */ -static void do_softint(struct work_struct *private_) -{ - printk(KERN_ERR "simserial: do_softint called\n"); -} - static int rs_put_char(struct tty_struct *tty, unsigned char ch) { - struct async_struct *info = (struct async_struct *)tty->driver_data; + struct serial_state *info = tty->driver_data; unsigned long flags; - if (!tty || !info->xmit.buf) + if (!info->xmit.buf) return 0; local_irq_save(flags); @@ -218,12 +133,12 @@ static int rs_put_char(struct tty_struct *tty, unsigned char ch) return 1; } -static void transmit_chars(struct async_struct *info, int *intr_done) +static void transmit_chars(struct tty_struct *tty, struct serial_state *info, + int *intr_done) { int count; unsigned long flags; - local_irq_save(flags); if (info->x_char) { @@ -231,16 +146,16 @@ static void transmit_chars(struct async_struct *info, int *intr_done) console->write(console, &c, 1); - info->state->icount.tx++; info->x_char = 0; goto out; } - if (info->xmit.head == info->xmit.tail || info->tty->stopped || info->tty->hw_stopped) { + if (info->xmit.head == info->xmit.tail || tty->stopped || + tty->hw_stopped) { #ifdef SIMSERIAL_DEBUG printk("transmit_chars: head=%d, tail=%d, stopped=%d\n", - info->xmit.head, info->xmit.tail, info->tty->stopped); + info->xmit.head, info->xmit.tail, tty->stopped); #endif goto out; } @@ -272,24 +187,24 @@ out: static void rs_flush_chars(struct tty_struct *tty) { - struct async_struct *info = (struct async_struct *)tty->driver_data; + struct serial_state *info = tty->driver_data; - if (info->xmit.head == info->xmit.tail || tty->stopped || tty->hw_stopped || - !info->xmit.buf) + if (info->xmit.head == info->xmit.tail || tty->stopped || + tty->hw_stopped || !info->xmit.buf) return; - transmit_chars(info, NULL); + transmit_chars(tty, info, NULL); } - static int rs_write(struct tty_struct * tty, const unsigned char *buf, int count) { + struct serial_state *info = tty->driver_data; int c, ret = 0; - struct async_struct *info = (struct async_struct *)tty->driver_data; unsigned long flags; - if (!tty || !info->xmit.buf || !tmp_buf) return 0; + if (!info->xmit.buf) + return 0; local_irq_save(flags); while (1) { @@ -310,30 +225,30 @@ static int rs_write(struct tty_struct * tty, /* * Hey, we transmit directly from here in our case */ - if (CIRC_CNT(info->xmit.head, info->xmit.tail, SERIAL_XMIT_SIZE) - && !tty->stopped && !tty->hw_stopped) { - transmit_chars(info, NULL); - } + if (CIRC_CNT(info->xmit.head, info->xmit.tail, SERIAL_XMIT_SIZE) && + !tty->stopped && !tty->hw_stopped) + transmit_chars(tty, info, NULL); + return ret; } static int rs_write_room(struct tty_struct *tty) { - struct async_struct *info = (struct async_struct *)tty->driver_data; + struct serial_state *info = tty->driver_data; return CIRC_SPACE(info->xmit.head, info->xmit.tail, SERIAL_XMIT_SIZE); } static int rs_chars_in_buffer(struct tty_struct *tty) { - struct async_struct *info = (struct async_struct *)tty->driver_data; + struct serial_state *info = tty->driver_data; return CIRC_CNT(info->xmit.head, info->xmit.tail, SERIAL_XMIT_SIZE); } static void rs_flush_buffer(struct tty_struct *tty) { - struct async_struct *info = (struct async_struct *)tty->driver_data; + struct serial_state *info = tty->driver_data; unsigned long flags; local_irq_save(flags); @@ -349,7 +264,7 @@ static void rs_flush_buffer(struct tty_struct *tty) */ static void rs_send_xchar(struct tty_struct *tty, char ch) { - struct async_struct *info = (struct async_struct *)tty->driver_data; + struct serial_state *info = tty->driver_data; info->x_char = ch; if (ch) { @@ -357,7 +272,7 @@ static void rs_send_xchar(struct tty_struct *tty, char ch) * I guess we could call console->write() directly but * let's do that for now. */ - transmit_chars(info, NULL); + transmit_chars(tty, info, NULL); } } @@ -371,14 +286,15 @@ static void rs_send_xchar(struct tty_struct *tty, char ch) */ static void rs_throttle(struct tty_struct * tty) { - if (I_IXOFF(tty)) rs_send_xchar(tty, STOP_CHAR(tty)); + if (I_IXOFF(tty)) + rs_send_xchar(tty, STOP_CHAR(tty)); printk(KERN_INFO "simrs_throttle called\n"); } static void rs_unthrottle(struct tty_struct * tty) { - struct async_struct *info = (struct async_struct *)tty->driver_data; + struct serial_state *info = tty->driver_data; if (I_IXOFF(tty)) { if (info->x_char) @@ -389,7 +305,6 @@ static void rs_unthrottle(struct tty_struct * tty) printk(KERN_INFO "simrs_unthrottle called\n"); } - static int rs_ioctl(struct tty_struct *tty, unsigned int cmd, unsigned long arg) { if ((cmd != TIOCGSERIAL) && (cmd != TIOCSSERIAL) && @@ -400,48 +315,21 @@ static int rs_ioctl(struct tty_struct *tty, unsigned int cmd, unsigned long arg) } switch (cmd) { - case TIOCGSERIAL: - printk(KERN_INFO "simrs_ioctl TIOCGSERIAL called\n"); - return 0; - case TIOCSSERIAL: - printk(KERN_INFO "simrs_ioctl TIOCSSERIAL called\n"); - return 0; - case TIOCSERCONFIG: - printk(KERN_INFO "rs_ioctl: TIOCSERCONFIG called\n"); - return -EINVAL; - - case TIOCSERGETLSR: /* Get line status register */ - printk(KERN_INFO "rs_ioctl: TIOCSERGETLSR called\n"); - return -EINVAL; - - case TIOCSERGSTRUCT: - printk(KERN_INFO "rs_ioctl: TIOCSERGSTRUCT called\n"); -#if 0 - if (copy_to_user((struct async_struct *) arg, - info, sizeof(struct async_struct))) - return -EFAULT; -#endif - return 0; - - /* - * Wait for any of the 4 modem inputs (DCD,RI,DSR,CTS) to change - * - mask passed in arg for lines of interest - * (use |'ed TIOCM_RNG/DSR/CD/CTS for masking) - * Caller should use TIOCGICOUNT to see which one it was - */ - case TIOCMIWAIT: - printk(KERN_INFO "rs_ioctl: TIOCMIWAIT: called\n"); - return 0; - case TIOCSERGWILD: - case TIOCSERSWILD: - /* "setserial -W" is called in Debian boot */ - printk (KERN_INFO "TIOCSER?WILD ioctl obsolete, ignored.\n"); - return 0; - - default: - return -ENOIOCTLCMD; - } - return 0; + case TIOCGSERIAL: + case TIOCSSERIAL: + case TIOCSERGSTRUCT: + case TIOCMIWAIT: + return 0; + case TIOCSERCONFIG: + case TIOCSERGETLSR: /* Get line status register */ + return -EINVAL; + case TIOCSERGWILD: + case TIOCSERSWILD: + /* "setserial -W" is called in Debian boot */ + printk (KERN_INFO "TIOCSER?WILD ioctl obsolete, ignored.\n"); + return 0; + } + return -ENOIOCTLCMD; } #define RELEVANT_IFLAG(iflag) (iflag & (IGNBRK|BRKINT|IGNPAR|PARMRK|INPCK)) @@ -452,220 +340,50 @@ static void rs_set_termios(struct tty_struct *tty, struct ktermios *old_termios) if ((old_termios->c_cflag & CRTSCTS) && !(tty->termios->c_cflag & CRTSCTS)) { tty->hw_stopped = 0; - rs_start(tty); } } /* * This routine will shutdown a serial port; interrupts are disabled, and * DTR is dropped if the hangup on close termio flag is on. */ -static void shutdown(struct async_struct * info) +static void shutdown(struct tty_port *port) { - unsigned long flags; - struct serial_state *state; - int retval; - - if (!(info->flags & ASYNC_INITIALIZED)) return; - - state = info->state; - -#ifdef SIMSERIAL_DEBUG - printk("Shutting down serial port %d (irq %d)....", info->line, - state->irq); -#endif + struct serial_state *info = container_of(port, struct serial_state, + port); + unsigned long flags; local_irq_save(flags); - { - /* - * First unlink the serial port from the IRQ chain... - */ - if (info->next_port) - info->next_port->prev_port = info->prev_port; - if (info->prev_port) - info->prev_port->next_port = info->next_port; - else - IRQ_ports[state->irq] = info->next_port; - - /* - * Free the IRQ, if necessary - */ - if (state->irq && (!IRQ_ports[state->irq] || - !IRQ_ports[state->irq]->next_port)) { - if (IRQ_ports[state->irq]) { - free_irq(state->irq, NULL); - retval = request_irq(state->irq, rs_interrupt_single, - IRQ_T(info), "serial", NULL); - - if (retval) - printk(KERN_ERR "serial shutdown: request_irq: error %d" - " Couldn't reacquire IRQ.\n", retval); - } else - free_irq(state->irq, NULL); - } - - if (info->xmit.buf) { - free_page((unsigned long) info->xmit.buf); - info->xmit.buf = NULL; - } - - if (info->tty) set_bit(TTY_IO_ERROR, &info->tty->flags); + if (info->irq) + free_irq(info->irq, info); - info->flags &= ~ASYNC_INITIALIZED; + if (info->xmit.buf) { + free_page((unsigned long) info->xmit.buf); + info->xmit.buf = NULL; } local_irq_restore(flags); } -/* - * ------------------------------------------------------------ - * rs_close() - * - * This routine is called when the serial port gets closed. First, we - * wait for the last remaining data to be sent. Then, we unlink its - * async structure from the interrupt chain if necessary, and we free - * that IRQ if nothing is left in the chain. - * ------------------------------------------------------------ - */ static void rs_close(struct tty_struct *tty, struct file * filp) { - struct async_struct * info = (struct async_struct *)tty->driver_data; - struct serial_state *state; - unsigned long flags; - - if (!info ) return; - - state = info->state; - - local_irq_save(flags); - if (tty_hung_up_p(filp)) { -#ifdef SIMSERIAL_DEBUG - printk("rs_close: hung_up\n"); -#endif - local_irq_restore(flags); - return; - } -#ifdef SIMSERIAL_DEBUG - printk("rs_close ttys%d, count = %d\n", info->line, state->count); -#endif - if ((tty->count == 1) && (state->count != 1)) { - /* - * Uh, oh. tty->count is 1, which means that the tty - * structure will be freed. state->count should always - * be one in these conditions. If it's greater than - * one, we've got real problems, since it means the - * serial port won't be shutdown. - */ - printk(KERN_ERR "rs_close: bad serial port count; tty->count is 1, " - "state->count is %d\n", state->count); - state->count = 1; - } - if (--state->count < 0) { - printk(KERN_ERR "rs_close: bad serial port count for ttys%d: %d\n", - info->line, state->count); - state->count = 0; - } - if (state->count) { - local_irq_restore(flags); - return; - } - info->flags |= ASYNC_CLOSING; - local_irq_restore(flags); + struct serial_state *info = tty->driver_data; - /* - * Now we wait for the transmit buffer to clear; and we notify - * the line discipline to only process XON/XOFF characters. - */ - shutdown(info); - rs_flush_buffer(tty); - tty_ldisc_flush(tty); - info->event = 0; - info->tty = NULL; - if (info->blocked_open) { - if (info->close_delay) - schedule_timeout_interruptible(info->close_delay); - wake_up_interruptible(&info->open_wait); - } - info->flags &= ~(ASYNC_NORMAL_ACTIVE|ASYNC_CLOSING); - wake_up_interruptible(&info->close_wait); + tty_port_close(&info->port, tty, filp); } -/* - * rs_wait_until_sent() --- wait until the transmitter is empty - */ -static void rs_wait_until_sent(struct tty_struct *tty, int timeout) -{ -} - - -/* - * rs_hangup() --- called by tty_hangup() when a hangup is signaled. - */ static void rs_hangup(struct tty_struct *tty) { - struct async_struct * info = (struct async_struct *)tty->driver_data; - struct serial_state *state = info->state; - -#ifdef SIMSERIAL_DEBUG - printk("rs_hangup: called\n"); -#endif - - state = info->state; + struct serial_state *info = tty->driver_data; rs_flush_buffer(tty); - if (info->flags & ASYNC_CLOSING) - return; - shutdown(info); - - info->event = 0; - state->count = 0; - info->flags &= ~ASYNC_NORMAL_ACTIVE; - info->tty = NULL; - wake_up_interruptible(&info->open_wait); + tty_port_hangup(&info->port); } - -static int get_async_struct(int line, struct async_struct **ret_info) +static int activate(struct tty_port *port, struct tty_struct *tty) { - struct async_struct *info; - struct serial_state *sstate; - - sstate = rs_table + line; - sstate->count++; - if (sstate->info) { - *ret_info = sstate->info; - return 0; - } - info = kzalloc(sizeof(struct async_struct), GFP_KERNEL); - if (!info) { - sstate->count--; - return -ENOMEM; - } - init_waitqueue_head(&info->open_wait); - init_waitqueue_head(&info->close_wait); - init_waitqueue_head(&info->delta_msr_wait); - info->magic = SERIAL_MAGIC; - info->port = sstate->port; - info->flags = sstate->flags; - info->xmit_fifo_size = sstate->xmit_fifo_size; - info->line = line; - INIT_WORK(&info->work, do_softint); - info->state = sstate; - if (sstate->info) { - kfree(info); - *ret_info = sstate->info; - return 0; - } - *ret_info = sstate->info = info; - return 0; -} - -static int -startup(struct async_struct *info) -{ - unsigned long flags; - int retval=0; - irq_handler_t handler; - struct serial_state *state= info->state; - unsigned long page; + struct serial_state *state = container_of(port, struct serial_state, + port); + unsigned long flags, page; + int retval = 0; page = get_zeroed_page(GFP_KERNEL); if (!page) @@ -673,86 +391,31 @@ startup(struct async_struct *info) local_irq_save(flags); - if (info->flags & ASYNC_INITIALIZED) { - free_page(page); - goto errout; - } - - if (!state->port || !state->type) { - if (info->tty) set_bit(TTY_IO_ERROR, &info->tty->flags); - free_page(page); - goto errout; - } - if (info->xmit.buf) + if (state->xmit.buf) free_page(page); else - info->xmit.buf = (unsigned char *) page; - -#ifdef SIMSERIAL_DEBUG - printk("startup: ttys%d (irq %d)...", info->line, state->irq); -#endif + state->xmit.buf = (unsigned char *) page; - /* - * Allocate the IRQ if necessary - */ - if (state->irq && (!IRQ_ports[state->irq] || - !IRQ_ports[state->irq]->next_port)) { - if (IRQ_ports[state->irq]) { - retval = -EBUSY; - goto errout; - } else - handler = rs_interrupt_single; - - retval = request_irq(state->irq, handler, IRQ_T(info), "simserial", NULL); - if (retval) { - if (capable(CAP_SYS_ADMIN)) { - if (info->tty) - set_bit(TTY_IO_ERROR, - &info->tty->flags); - retval = 0; - } + if (state->irq) { + retval = request_irq(state->irq, rs_interrupt_single, 0, + "simserial", state); + if (retval) goto errout; - } } - /* - * Insert serial port into IRQ chain. - */ - info->prev_port = NULL; - info->next_port = IRQ_ports[state->irq]; - if (info->next_port) - info->next_port->prev_port = info; - IRQ_ports[state->irq] = info; - - if (info->tty) clear_bit(TTY_IO_ERROR, &info->tty->flags); - - info->xmit.head = info->xmit.tail = 0; - -#if 0 - /* - * Set up serial timers... - */ - timer_table[RS_TIMER].expires = jiffies + 2*HZ/100; - timer_active |= 1 << RS_TIMER; -#endif + state->xmit.head = state->xmit.tail = 0; /* * Set up the tty->alt_speed kludge */ - if (info->tty) { - if ((info->flags & ASYNC_SPD_MASK) == ASYNC_SPD_HI) - info->tty->alt_speed = 57600; - if ((info->flags & ASYNC_SPD_MASK) == ASYNC_SPD_VHI) - info->tty->alt_speed = 115200; - if ((info->flags & ASYNC_SPD_MASK) == ASYNC_SPD_SHI) - info->tty->alt_speed = 230400; - if ((info->flags & ASYNC_SPD_MASK) == ASYNC_SPD_WARP) - info->tty->alt_speed = 460800; - } - - info->flags |= ASYNC_INITIALIZED; - local_irq_restore(flags); - return 0; + if ((port->flags & ASYNC_SPD_MASK) == ASYNC_SPD_HI) + tty->alt_speed = 57600; + if ((port->flags & ASYNC_SPD_MASK) == ASYNC_SPD_VHI) + tty->alt_speed = 115200; + if ((port->flags & ASYNC_SPD_MASK) == ASYNC_SPD_SHI) + tty->alt_speed = 230400; + if ((port->flags & ASYNC_SPD_MASK) == ASYNC_SPD_WARP) + tty->alt_speed = 460800; errout: local_irq_restore(flags); @@ -768,56 +431,11 @@ errout: */ static int rs_open(struct tty_struct *tty, struct file * filp) { - struct async_struct *info; - int retval, line; - unsigned long page; + struct serial_state *info = rs_table + tty->index; + struct tty_port *port = &info->port; - line = tty->index; - if ((line < 0) || (line >= NR_PORTS)) - return -ENODEV; - retval = get_async_struct(line, &info); - if (retval) - return retval; tty->driver_data = info; - info->tty = tty; - -#ifdef SIMSERIAL_DEBUG - printk("rs_open %s, count = %d\n", tty->name, info->state->count); -#endif - info->tty->low_latency = (info->flags & ASYNC_LOW_LATENCY) ? 1 : 0; - - if (!tmp_buf) { - page = get_zeroed_page(GFP_KERNEL); - if (!page) - return -ENOMEM; - if (tmp_buf) - free_page(page); - else - tmp_buf = (unsigned char *) page; - } - - /* - * If the port is the middle of closing, bail out now - */ - if (tty_hung_up_p(filp) || - (info->flags & ASYNC_CLOSING)) { - if (info->flags & ASYNC_CLOSING) - interruptible_sleep_on(&info->close_wait); -#ifdef SERIAL_DO_RESTART - return ((info->flags & ASYNC_HUP_NOTIFY) ? - -EAGAIN : -ERESTARTSYS); -#else - return -EAGAIN; -#endif - } - - /* - * Start up serial port - */ - retval = startup(info); - if (retval) { - return retval; - } + tty->low_latency = (port->flags & ASYNC_LOW_LATENCY) ? 1 : 0; /* * figure out which console to use (should be one already) @@ -828,30 +446,21 @@ static int rs_open(struct tty_struct *tty, struct file * filp) console = console->next; } -#ifdef SIMSERIAL_DEBUG - printk("rs_open ttys%d successful\n", info->line); -#endif - return 0; + return tty_port_open(port, tty, filp); } /* * /proc fs routines.... */ -static inline void line_info(struct seq_file *m, struct serial_state *state) -{ - seq_printf(m, "%d: uart:%s port:%lX irq:%d\n", - state->line, uart_config[state->type].name, - state->port, state->irq); -} - static int rs_proc_show(struct seq_file *m, void *v) { int i; - seq_printf(m, "simserinfo:1.0 driver:%s\n", serial_version); + seq_printf(m, "simserinfo:1.0\n"); for (i = 0; i < NR_PORTS; i++) - line_info(m, &rs_table[i]); + seq_printf(m, "%d: uart:16550 port:3F8 irq:%d\n", + i, rs_table[i].irq); return 0; } @@ -868,25 +477,6 @@ static const struct file_operations rs_proc_fops = { .release = single_release, }; -/* - * --------------------------------------------------------------------- - * rs_init() and friends - * - * rs_init() is called at boot-time to initialize the serial driver. - * --------------------------------------------------------------------- - */ - -/* - * This routine prints out the appropriate serial driver version - * number, and identifies which options were configured into this - * driver. - */ -static inline void show_serial_version(void) -{ - printk(KERN_INFO "%s version %s with", serial_name, serial_version); - printk(KERN_INFO " no serial options enabled\n"); -} - static const struct tty_operations hp_ops = { .open = rs_open, .close = rs_close, @@ -901,34 +491,31 @@ static const struct tty_operations hp_ops = { .unthrottle = rs_unthrottle, .send_xchar = rs_send_xchar, .set_termios = rs_set_termios, - .stop = rs_stop, - .start = rs_start, .hangup = rs_hangup, - .wait_until_sent = rs_wait_until_sent, .proc_fops = &rs_proc_fops, }; -/* - * The serial driver boot-time initialization code! - */ -static int __init -simrs_init (void) +static const struct tty_port_operations hp_port_ops = { + .activate = activate, + .shutdown = shutdown, +}; + +static int __init simrs_init(void) { - int i, rc; - struct serial_state *state; + struct serial_state *state; + int retval; if (!ia64_platform_is("hpsim")) return -ENODEV; - hp_simserial_driver = alloc_tty_driver(1); + hp_simserial_driver = alloc_tty_driver(NR_PORTS); if (!hp_simserial_driver) return -ENOMEM; - show_serial_version(); + printk(KERN_INFO "SimSerial driver with no serial options enabled\n"); /* Initialize the tty_driver structure */ - hp_simserial_driver->owner = THIS_MODULE; hp_simserial_driver->driver_name = "simserial"; hp_simserial_driver->name = "ttyS"; hp_simserial_driver->major = TTY_MAJOR; @@ -941,31 +528,33 @@ simrs_init (void) hp_simserial_driver->flags = TTY_DRIVER_REAL_RAW; tty_set_operations(hp_simserial_driver, &hp_ops); - /* - * Let's have a little bit of fun ! - */ - for (i = 0, state = rs_table; i < NR_PORTS; i++,state++) { + state = rs_table; + tty_port_init(&state->port); + state->port.ops = &hp_port_ops; + state->port.close_delay = 0; /* XXX really 0? */ - if (state->type == PORT_UNKNOWN) continue; + retval = hpsim_get_irq(KEYBOARD_INTR); + if (retval < 0) { + printk(KERN_ERR "%s: out of interrupt vectors!\n", + __func__); + goto err_free_tty; + } - if (!state->irq) { - if ((rc = assign_irq_vector(AUTO_ASSIGN)) < 0) - panic("%s: out of interrupt vectors!\n", - __func__); - state->irq = rc; - ia64_ssc_connect_irq(KEYBOARD_INTR, state->irq); - } + state->irq = retval; - printk(KERN_INFO "ttyS%d at 0x%04lx (irq = %d) is a %s\n", - state->line, - state->port, state->irq, - uart_config[state->type].name); - } + /* the port is imaginary */ + printk(KERN_INFO "ttyS0 at 0x03f8 (irq = %d) is a 16550\n", state->irq); - if (tty_register_driver(hp_simserial_driver)) - panic("Couldn't register simserial driver\n"); + retval = tty_register_driver(hp_simserial_driver); + if (retval) { + printk(KERN_ERR "Couldn't register simserial driver\n"); + goto err_free_tty; + } return 0; +err_free_tty: + put_tty_driver(hp_simserial_driver); + return retval; } #ifndef MODULE diff --git a/arch/ia64/include/asm/hpsim.h b/arch/ia64/include/asm/hpsim.h index 892ab19..0fe5022 100644 --- a/arch/ia64/include/asm/hpsim.h +++ b/arch/ia64/include/asm/hpsim.h @@ -10,7 +10,7 @@ int simcons_register(void); struct tty_driver; extern struct tty_driver *hp_simserial_driver; -void ia64_ssc_connect_irq(long intr, long irq); +extern int hpsim_get_irq(int intr); void ia64_ctl_trace(long on); #endif diff --git a/arch/ia64/include/asm/paravirt.h b/arch/ia64/include/asm/paravirt.h index 32551d3..b149b88 100644 --- a/arch/ia64/include/asm/paravirt.h +++ b/arch/ia64/include/asm/paravirt.h @@ -281,9 +281,9 @@ paravirt_init_missing_ticks_accounting(int cpu) pv_time_ops.init_missing_ticks_accounting(cpu); } -struct jump_label_key; -extern struct jump_label_key paravirt_steal_enabled; -extern struct jump_label_key paravirt_steal_rq_enabled; +struct static_key; +extern struct static_key paravirt_steal_enabled; +extern struct static_key paravirt_steal_rq_enabled; static inline int paravirt_do_steal_accounting(unsigned long *new_itm) diff --git a/arch/ia64/kernel/paravirt.c b/arch/ia64/kernel/paravirt.c index 1008682..1b22f6de 100644 --- a/arch/ia64/kernel/paravirt.c +++ b/arch/ia64/kernel/paravirt.c @@ -634,8 +634,8 @@ struct pv_irq_ops pv_irq_ops = { * pv_time_ops * time operations */ -struct jump_label_key paravirt_steal_enabled; -struct jump_label_key paravirt_steal_rq_enabled; +struct static_key paravirt_steal_enabled; +struct static_key paravirt_steal_rq_enabled; static int ia64_native_do_steal_accounting(unsigned long *new_itm) diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c index 6d33c5c..9dc52b6 100644 --- a/arch/ia64/kernel/process.c +++ b/arch/ia64/kernel/process.c @@ -330,9 +330,7 @@ cpu_idle (void) normal_xtp(); #endif } - preempt_enable_no_resched(); - schedule(); - preempt_disable(); + schedule_preempt_disabled(); check_pgt_cache(); if (cpu_is_offline(cpu)) play_dead(); diff --git a/arch/m32r/kernel/process.c b/arch/m32r/kernel/process.c index 422bea9..3a4a32b 100644 --- a/arch/m32r/kernel/process.c +++ b/arch/m32r/kernel/process.c @@ -90,9 +90,7 @@ void cpu_idle (void) idle(); } - preempt_enable_no_resched(); - schedule(); - preempt_disable(); + schedule_preempt_disabled(); } } diff --git a/arch/m68k/emu/nfcon.c b/arch/m68k/emu/nfcon.c index ab20dc0..8db25e8 100644 --- a/arch/m68k/emu/nfcon.c +++ b/arch/m68k/emu/nfcon.c @@ -127,7 +127,6 @@ static int __init nfcon_init(void) if (!nfcon_tty_driver) return -ENOMEM; - nfcon_tty_driver->owner = THIS_MODULE; nfcon_tty_driver->driver_name = "nfcon"; nfcon_tty_driver->name = "nfcon"; nfcon_tty_driver->type = TTY_DRIVER_TYPE_SYSTEM; diff --git a/arch/m68k/kernel/process_mm.c b/arch/m68k/kernel/process_mm.c index 099283e..fe4186b 100644 --- a/arch/m68k/kernel/process_mm.c +++ b/arch/m68k/kernel/process_mm.c @@ -78,9 +78,7 @@ void cpu_idle(void) while (1) { while (!need_resched()) idle(); - preempt_enable_no_resched(); - schedule(); - preempt_disable(); + schedule_preempt_disabled(); } } diff --git a/arch/m68k/kernel/process_no.c b/arch/m68k/kernel/process_no.c index 5e1078c..f7fe6c3 100644 --- a/arch/m68k/kernel/process_no.c +++ b/arch/m68k/kernel/process_no.c @@ -73,9 +73,7 @@ void cpu_idle(void) /* endless idle loop with no priority at all */ while (1) { idle(); - preempt_enable_no_resched(); - schedule(); - preempt_disable(); + schedule_preempt_disabled(); } } diff --git a/arch/microblaze/kernel/process.c b/arch/microblaze/kernel/process.c index 7dcb5bf..9155f7d 100644 --- a/arch/microblaze/kernel/process.c +++ b/arch/microblaze/kernel/process.c @@ -110,9 +110,7 @@ void cpu_idle(void) rcu_idle_exit(); tick_nohz_idle_exit(); - preempt_enable_no_resched(); - schedule(); - preempt_disable(); + schedule_preempt_disabled(); check_pgt_cache(); } } diff --git a/arch/mips/ath79/dev-usb.c b/arch/mips/ath79/dev-usb.c index 002d6d2..36e9570 100644 --- a/arch/mips/ath79/dev-usb.c +++ b/arch/mips/ath79/dev-usb.c @@ -17,6 +17,8 @@ #include <linux/irq.h> #include <linux/dma-mapping.h> #include <linux/platform_device.h> +#include <linux/usb/ehci_pdriver.h> +#include <linux/usb/ohci_pdriver.h> #include <asm/mach-ath79/ath79.h> #include <asm/mach-ath79/ar71xx_regs.h> @@ -36,14 +38,19 @@ static struct resource ath79_ohci_resources[] = { }; static u64 ath79_ohci_dmamask = DMA_BIT_MASK(32); + +static struct usb_ohci_pdata ath79_ohci_pdata = { +}; + static struct platform_device ath79_ohci_device = { - .name = "ath79-ohci", + .name = "ohci-platform", .id = -1, .resource = ath79_ohci_resources, .num_resources = ARRAY_SIZE(ath79_ohci_resources), .dev = { .dma_mask = &ath79_ohci_dmamask, .coherent_dma_mask = DMA_BIT_MASK(32), + .platform_data = &ath79_ohci_pdata, }, }; @@ -60,8 +67,20 @@ static struct resource ath79_ehci_resources[] = { }; static u64 ath79_ehci_dmamask = DMA_BIT_MASK(32); + +static struct usb_ehci_pdata ath79_ehci_pdata_v1 = { + .has_synopsys_hc_bug = 1, + .port_power_off = 1, +}; + +static struct usb_ehci_pdata ath79_ehci_pdata_v2 = { + .caps_offset = 0x100, + .has_tt = 1, + .port_power_off = 1, +}; + static struct platform_device ath79_ehci_device = { - .name = "ath79-ehci", + .name = "ehci-platform", .id = -1, .resource = ath79_ehci_resources, .num_resources = ARRAY_SIZE(ath79_ehci_resources), @@ -101,7 +120,7 @@ static void __init ath79_usb_setup(void) ath79_ehci_resources[0].start = AR71XX_EHCI_BASE; ath79_ehci_resources[0].end = AR71XX_EHCI_BASE + AR71XX_EHCI_SIZE - 1; - ath79_ehci_device.name = "ar71xx-ehci"; + ath79_ehci_device.dev.platform_data = &ath79_ehci_pdata_v1; platform_device_register(&ath79_ehci_device); } @@ -142,7 +161,7 @@ static void __init ar724x_usb_setup(void) ath79_ehci_resources[0].start = AR724X_EHCI_BASE; ath79_ehci_resources[0].end = AR724X_EHCI_BASE + AR724X_EHCI_SIZE - 1; - ath79_ehci_device.name = "ar724x-ehci"; + ath79_ehci_device.dev.platform_data = &ath79_ehci_pdata_v2; platform_device_register(&ath79_ehci_device); } @@ -159,7 +178,7 @@ static void __init ar913x_usb_setup(void) ath79_ehci_resources[0].start = AR913X_EHCI_BASE; ath79_ehci_resources[0].end = AR913X_EHCI_BASE + AR913X_EHCI_SIZE - 1; - ath79_ehci_device.name = "ar913x-ehci"; + ath79_ehci_device.dev.platform_data = &ath79_ehci_pdata_v2; platform_device_register(&ath79_ehci_device); } @@ -176,7 +195,7 @@ static void __init ar933x_usb_setup(void) ath79_ehci_resources[0].start = AR933X_EHCI_BASE; ath79_ehci_resources[0].end = AR933X_EHCI_BASE + AR933X_EHCI_SIZE - 1; - ath79_ehci_device.name = "ar933x-ehci"; + ath79_ehci_device.dev.platform_data = &ath79_ehci_pdata_v2; platform_device_register(&ath79_ehci_device); } diff --git a/arch/mips/include/asm/jump_label.h b/arch/mips/include/asm/jump_label.h index 1881b31..4d6d77e 100644 --- a/arch/mips/include/asm/jump_label.h +++ b/arch/mips/include/asm/jump_label.h @@ -20,7 +20,7 @@ #define WORD_INSN ".word" #endif -static __always_inline bool arch_static_branch(struct jump_label_key *key) +static __always_inline bool arch_static_branch(struct static_key *key) { asm goto("1:\tnop\n\t" "nop\n\t" diff --git a/arch/mips/kernel/perf_event_mipsxx.c b/arch/mips/kernel/perf_event_mipsxx.c index e3b897a..811084f 100644 --- a/arch/mips/kernel/perf_event_mipsxx.c +++ b/arch/mips/kernel/perf_event_mipsxx.c @@ -606,6 +606,10 @@ static int mipspmu_event_init(struct perf_event *event) { int err = 0; + /* does not support taken branch sampling */ + if (has_branch_stack(event)) + return -EOPNOTSUPP; + switch (event->attr.type) { case PERF_TYPE_RAW: case PERF_TYPE_HARDWARE: diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index 7955409..61f1cb4 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -80,9 +80,7 @@ void __noreturn cpu_idle(void) #endif rcu_idle_exit(); tick_nohz_idle_exit(); - preempt_enable_no_resched(); - schedule(); - preempt_disable(); + schedule_preempt_disabled(); } } diff --git a/arch/mn10300/kernel/process.c b/arch/mn10300/kernel/process.c index 28eec31..cac401d 100644 --- a/arch/mn10300/kernel/process.c +++ b/arch/mn10300/kernel/process.c @@ -123,9 +123,7 @@ void cpu_idle(void) idle(); } - preempt_enable_no_resched(); - schedule(); - preempt_disable(); + schedule_preempt_disabled(); } } diff --git a/arch/parisc/kernel/pdc_cons.c b/arch/parisc/kernel/pdc_cons.c index fc770be..4f00459 100644 --- a/arch/parisc/kernel/pdc_cons.c +++ b/arch/parisc/kernel/pdc_cons.c @@ -90,11 +90,13 @@ static int pdc_console_setup(struct console *co, char *options) #define PDC_CONS_POLL_DELAY (30 * HZ / 1000) -static struct timer_list pdc_console_timer; +static void pdc_console_poll(unsigned long unused); +static DEFINE_TIMER(pdc_console_timer, pdc_console_poll, 0, 0); +static struct tty_port tty_port; static int pdc_console_tty_open(struct tty_struct *tty, struct file *filp) { - + tty_port_tty_set(&tty_port, tty); mod_timer(&pdc_console_timer, jiffies + PDC_CONS_POLL_DELAY); return 0; @@ -102,8 +104,10 @@ static int pdc_console_tty_open(struct tty_struct *tty, struct file *filp) static void pdc_console_tty_close(struct tty_struct *tty, struct file *filp) { - if (!tty->count) - del_timer(&pdc_console_timer); + if (!tty->count) { + del_timer_sync(&pdc_console_timer); + tty_port_tty_set(&tty_port, NULL); + } } static int pdc_console_tty_write(struct tty_struct *tty, const unsigned char *buf, int count) @@ -122,8 +126,6 @@ static int pdc_console_tty_chars_in_buffer(struct tty_struct *tty) return 0; /* no buffer */ } -static struct tty_driver *pdc_console_tty_driver; - static const struct tty_operations pdc_console_tty_ops = { .open = pdc_console_tty_open, .close = pdc_console_tty_close, @@ -134,10 +136,8 @@ static const struct tty_operations pdc_console_tty_ops = { static void pdc_console_poll(unsigned long unused) { - int data, count = 0; - - struct tty_struct *tty = pdc_console_tty_driver->ttys[0]; + struct tty_struct *tty = tty_port_tty_get(&tty_port); if (!tty) return; @@ -153,15 +153,17 @@ static void pdc_console_poll(unsigned long unused) if (count) tty_flip_buffer_push(tty); - if (tty->count && (pdc_cons.flags & CON_ENABLED)) + tty_kref_put(tty); + + if (pdc_cons.flags & CON_ENABLED) mod_timer(&pdc_console_timer, jiffies + PDC_CONS_POLL_DELAY); } +static struct tty_driver *pdc_console_tty_driver; + static int __init pdc_console_tty_driver_init(void) { - int err; - struct tty_driver *drv; /* Check if the console driver is still registered. * It is unregistered if the pdc console was not selected as the @@ -183,32 +185,29 @@ static int __init pdc_console_tty_driver_init(void) printk(KERN_INFO "The PDC console driver is still registered, removing CON_BOOT flag\n"); pdc_cons.flags &= ~CON_BOOT; - drv = alloc_tty_driver(1); + tty_port_init(&tty_port); - if (!drv) - return -ENOMEM; + pdc_console_tty_driver = alloc_tty_driver(1); - drv->driver_name = "pdc_cons"; - drv->name = "ttyB"; - drv->major = MUX_MAJOR; - drv->minor_start = 0; - drv->type = TTY_DRIVER_TYPE_SYSTEM; - drv->init_termios = tty_std_termios; - drv->flags = TTY_DRIVER_REAL_RAW | TTY_DRIVER_RESET_TERMIOS; - tty_set_operations(drv, &pdc_console_tty_ops); + if (!pdc_console_tty_driver) + return -ENOMEM; - err = tty_register_driver(drv); + pdc_console_tty_driver->driver_name = "pdc_cons"; + pdc_console_tty_driver->name = "ttyB"; + pdc_console_tty_driver->major = MUX_MAJOR; + pdc_console_tty_driver->minor_start = 0; + pdc_console_tty_driver->type = TTY_DRIVER_TYPE_SYSTEM; + pdc_console_tty_driver->init_termios = tty_std_termios; + pdc_console_tty_driver->flags = TTY_DRIVER_REAL_RAW | + TTY_DRIVER_RESET_TERMIOS; + tty_set_operations(pdc_console_tty_driver, &pdc_console_tty_ops); + + err = tty_register_driver(pdc_console_tty_driver); if (err) { printk(KERN_ERR "Unable to register the PDC console TTY driver\n"); return err; } - pdc_console_tty_driver = drv; - - /* No need to initialize the pdc_console_timer if tty isn't allocated */ - init_timer(&pdc_console_timer); - pdc_console_timer.function = pdc_console_poll; - return 0; } diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c index 62c60b8..d4b94b3 100644 --- a/arch/parisc/kernel/process.c +++ b/arch/parisc/kernel/process.c @@ -71,9 +71,7 @@ void cpu_idle(void) while (1) { while (!need_resched()) barrier(); - preempt_enable_no_resched(); - schedule(); - preempt_disable(); + schedule_preempt_disabled(); check_pgt_cache(); } } diff --git a/arch/powerpc/include/asm/jump_label.h b/arch/powerpc/include/asm/jump_label.h index 938986e..ae098c4 100644 --- a/arch/powerpc/include/asm/jump_label.h +++ b/arch/powerpc/include/asm/jump_label.h @@ -17,7 +17,7 @@ #define JUMP_ENTRY_TYPE stringify_in_c(FTR_ENTRY_LONG) #define JUMP_LABEL_NOP_SIZE 4 -static __always_inline bool arch_static_branch(struct jump_label_key *key) +static __always_inline bool arch_static_branch(struct static_key *key) { asm goto("1:\n\t" "nop\n\t" diff --git a/arch/powerpc/include/asm/perf_event_server.h b/arch/powerpc/include/asm/perf_event_server.h index 8f1df12..1a8093f 100644 --- a/arch/powerpc/include/asm/perf_event_server.h +++ b/arch/powerpc/include/asm/perf_event_server.h @@ -61,8 +61,6 @@ struct pt_regs; extern unsigned long perf_misc_flags(struct pt_regs *regs); extern unsigned long perf_instruction_pointer(struct pt_regs *regs); -#define PERF_EVENT_INDEX_OFFSET 1 - /* * Only override the default definitions in include/linux/perf_event.h * if we have hardware PMU support. diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c index 0a48bf5..c97fc60 100644 --- a/arch/powerpc/kernel/idle.c +++ b/arch/powerpc/kernel/idle.c @@ -101,11 +101,11 @@ void cpu_idle(void) ppc64_runlatch_on(); rcu_idle_exit(); tick_nohz_idle_exit(); - preempt_enable_no_resched(); - if (cpu_should_die()) + if (cpu_should_die()) { + sched_preempt_enable_no_resched(); cpu_die(); - schedule(); - preempt_disable(); + } + schedule_preempt_disabled(); } } diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c index 64483fd..c2e27ed 100644 --- a/arch/powerpc/kernel/perf_event.c +++ b/arch/powerpc/kernel/perf_event.c @@ -1084,6 +1084,10 @@ static int power_pmu_event_init(struct perf_event *event) if (!ppmu) return -ENOENT; + /* does not support taken branch sampling */ + if (has_branch_stack(event)) + return -EOPNOTSUPP; + switch (event->attr.type) { case PERF_TYPE_HARDWARE: ev = event->attr.config; @@ -1193,6 +1197,11 @@ static int power_pmu_event_init(struct perf_event *event) return err; } +static int power_pmu_event_idx(struct perf_event *event) +{ + return event->hw.idx; +} + struct pmu power_pmu = { .pmu_enable = power_pmu_enable, .pmu_disable = power_pmu_disable, @@ -1205,6 +1214,7 @@ struct pmu power_pmu = { .start_txn = power_pmu_start_txn, .cancel_txn = power_pmu_cancel_txn, .commit_txn = power_pmu_commit_txn, + .event_idx = power_pmu_event_idx, }; /* diff --git a/arch/powerpc/platforms/iseries/setup.c b/arch/powerpc/platforms/iseries/setup.c index 8fc6258..a5fbf4c 100644 --- a/arch/powerpc/platforms/iseries/setup.c +++ b/arch/powerpc/platforms/iseries/setup.c @@ -584,9 +584,7 @@ static void iseries_shared_idle(void) if (hvlpevent_is_pending()) process_iSeries_events(); - preempt_enable_no_resched(); - schedule(); - preempt_disable(); + schedule_preempt_disabled(); } } @@ -615,9 +613,7 @@ static void iseries_dedicated_idle(void) ppc64_runlatch_on(); rcu_idle_exit(); tick_nohz_idle_exit(); - preempt_enable_no_resched(); - schedule(); - preempt_disable(); + schedule_preempt_disabled(); } } diff --git a/arch/s390/include/asm/jump_label.h b/arch/s390/include/asm/jump_label.h index 95a6cf2..6c32190 100644 --- a/arch/s390/include/asm/jump_label.h +++ b/arch/s390/include/asm/jump_label.h @@ -13,7 +13,7 @@ #define ASM_ALIGN ".balign 4" #endif -static __always_inline bool arch_static_branch(struct jump_label_key *key) +static __always_inline bool arch_static_branch(struct static_key *key) { asm goto("0: brcl 0,0\n" ".pushsection __jump_table, \"aw\"\n" diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h index a75f168..4eb444e 100644 --- a/arch/s390/include/asm/perf_event.h +++ b/arch/s390/include/asm/perf_event.h @@ -6,4 +6,3 @@ /* Empty, just to avoid compiling error */ -#define PERF_EVENT_INDEX_OFFSET 0 diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index b9a7fdd..e30b2df 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -165,13 +165,6 @@ static inline int ext_hash(u16 code) return (code + (code >> 9)) & 0xff; } -static void ext_int_hash_update(struct rcu_head *head) -{ - struct ext_int_info *p = container_of(head, struct ext_int_info, rcu); - - kfree(p); -} - int register_external_interrupt(u16 code, ext_int_handler_t handler) { struct ext_int_info *p; @@ -202,7 +195,7 @@ int unregister_external_interrupt(u16 code, ext_int_handler_t handler) list_for_each_entry_rcu(p, &ext_int_hash[index], entry) if (p->code == code && p->handler == handler) { list_del_rcu(&p->entry); - call_rcu(&p->rcu, ext_int_hash_update); + kfree_rcu(p, rcu); } spin_unlock_irqrestore(&ext_int_hash_lock, flags); return 0; diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index e795933..7618085 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -97,9 +97,7 @@ void cpu_idle(void) tick_nohz_idle_exit(); if (test_thread_flag(TIF_MCCK_PENDING)) s390_handle_mcck(); - preempt_enable_no_resched(); - schedule(); - preempt_disable(); + schedule_preempt_disabled(); } } diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 2398ce6..b0e28c4 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -550,12 +550,6 @@ int __cpuinit start_secondary(void *cpuvoid) S390_lowcore.restart_psw.addr = PSW_ADDR_AMODE | (unsigned long) psw_restart_int_handler; __ctl_set_bit(0, 28); /* Enable lowcore protection */ - /* - * Wait until the cpu which brought this one up marked it - * active before enabling interrupts. - */ - while (!cpumask_test_cpu(smp_processor_id(), cpu_active_mask)) - cpu_relax(); local_irq_enable(); /* cpu_idle will call schedule for us */ cpu_idle(); diff --git a/arch/score/kernel/process.c b/arch/score/kernel/process.c index 25d0803..2707023 100644 --- a/arch/score/kernel/process.c +++ b/arch/score/kernel/process.c @@ -53,9 +53,7 @@ void __noreturn cpu_idle(void) while (!need_resched()) barrier(); - preempt_enable_no_resched(); - schedule(); - preempt_disable(); + schedule_preempt_disabled(); } } diff --git a/arch/sh/kernel/idle.c b/arch/sh/kernel/idle.c index 406508d..7e48928 100644 --- a/arch/sh/kernel/idle.c +++ b/arch/sh/kernel/idle.c @@ -114,9 +114,7 @@ void cpu_idle(void) rcu_idle_exit(); tick_nohz_idle_exit(); - preempt_enable_no_resched(); - schedule(); - preempt_disable(); + schedule_preempt_disabled(); } } diff --git a/arch/sh/kernel/perf_event.c b/arch/sh/kernel/perf_event.c index 10b14e3..068b8a2 100644 --- a/arch/sh/kernel/perf_event.c +++ b/arch/sh/kernel/perf_event.c @@ -310,6 +310,10 @@ static int sh_pmu_event_init(struct perf_event *event) { int err; + /* does not support taken branch sampling */ + if (has_branch_stack(event)) + return -EOPNOTSUPP; + switch (event->attr.type) { case PERF_TYPE_RAW: case PERF_TYPE_HW_CACHE: diff --git a/arch/sparc/include/asm/jump_label.h b/arch/sparc/include/asm/jump_label.h index fc73a82..5080d16 100644 --- a/arch/sparc/include/asm/jump_label.h +++ b/arch/sparc/include/asm/jump_label.h @@ -7,7 +7,7 @@ #define JUMP_LABEL_NOP_SIZE 4 -static __always_inline bool arch_static_branch(struct jump_label_key *key) +static __always_inline bool arch_static_branch(struct static_key *key) { asm goto("1:\n\t" "nop\n\t" diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 614da62..8e16a4a 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -1105,6 +1105,10 @@ static int sparc_pmu_event_init(struct perf_event *event) if (atomic_read(&nmi_active) < 0) return -ENODEV; + /* does not support taken branch sampling */ + if (has_branch_stack(event)) + return -EOPNOTSUPP; + switch (attr->type) { case PERF_TYPE_HARDWARE: if (attr->config >= sparc_pmu->max_events) diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c index f793742..935fdbc 100644 --- a/arch/sparc/kernel/process_32.c +++ b/arch/sparc/kernel/process_32.c @@ -113,9 +113,7 @@ void cpu_idle(void) while (!need_resched()) cpu_relax(); } - preempt_enable_no_resched(); - schedule(); - preempt_disable(); + schedule_preempt_disabled(); check_pgt_cache(); } } @@ -138,9 +136,7 @@ void cpu_idle(void) while (!need_resched()) cpu_relax(); } - preempt_enable_no_resched(); - schedule(); - preempt_disable(); + schedule_preempt_disabled(); check_pgt_cache(); } } diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c index 39d8b05..06b5b5f 100644 --- a/arch/sparc/kernel/process_64.c +++ b/arch/sparc/kernel/process_64.c @@ -104,15 +104,13 @@ void cpu_idle(void) rcu_idle_exit(); tick_nohz_idle_exit(); - preempt_enable_no_resched(); - #ifdef CONFIG_HOTPLUG_CPU - if (cpu_is_offline(cpu)) + if (cpu_is_offline(cpu)) { + sched_preempt_enable_no_resched(); cpu_play_dead(); + } #endif - - schedule(); - preempt_disable(); + schedule_preempt_disabled(); } } diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c index 4c1ac6e..6ae495e 100644 --- a/arch/tile/kernel/process.c +++ b/arch/tile/kernel/process.c @@ -108,9 +108,7 @@ void cpu_idle(void) } rcu_idle_exit(); tick_nohz_idle_exit(); - preempt_enable_no_resched(); - schedule(); - preempt_disable(); + schedule_preempt_disabled(); } } diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 5bed94e1..d523a6c 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -82,6 +82,7 @@ config X86 select CLKEVT_I8253 select ARCH_HAVE_NMI_SAFE_CMPXCHG select GENERIC_IOMAP + select DCACHE_WORD_ACCESS if !DEBUG_PAGEALLOC config INSTRUCTION_DECODER def_bool (KPROBES || PERF_EVENTS) @@ -179,6 +180,9 @@ config ARCH_HAS_DEFAULT_IDLE config ARCH_HAS_CACHE_LINE_SIZE def_bool y +config ARCH_HAS_CPU_AUTOPROBE + def_bool y + config HAVE_SETUP_PER_CPU_AREA def_bool y diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 545d0ce..b3350bd 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -28,6 +28,7 @@ #include <crypto/aes.h> #include <crypto/cryptd.h> #include <crypto/ctr.h> +#include <asm/cpu_device_id.h> #include <asm/i387.h> #include <asm/aes.h> #include <crypto/scatterwalk.h> @@ -1253,14 +1254,19 @@ static struct crypto_alg __rfc4106_alg = { }; #endif + +static const struct x86_cpu_id aesni_cpu_id[] = { + X86_FEATURE_MATCH(X86_FEATURE_AES), + {} +}; +MODULE_DEVICE_TABLE(x86cpu, aesni_cpu_id); + static int __init aesni_init(void) { int err; - if (!cpu_has_aes) { - printk(KERN_INFO "Intel AES-NI instructions are not detected.\n"); + if (!x86_match_cpu(aesni_cpu_id)) return -ENODEV; - } if ((err = crypto_fpu_init())) goto fpu_err; diff --git a/arch/x86/crypto/crc32c-intel.c b/arch/x86/crypto/crc32c-intel.c index b9d0026..493f959 100644 --- a/arch/x86/crypto/crc32c-intel.c +++ b/arch/x86/crypto/crc32c-intel.c @@ -31,6 +31,7 @@ #include <crypto/internal/hash.h> #include <asm/cpufeature.h> +#include <asm/cpu_device_id.h> #define CHKSUM_BLOCK_SIZE 1 #define CHKSUM_DIGEST_SIZE 4 @@ -173,13 +174,17 @@ static struct shash_alg alg = { } }; +static const struct x86_cpu_id crc32c_cpu_id[] = { + X86_FEATURE_MATCH(X86_FEATURE_XMM4_2), + {} +}; +MODULE_DEVICE_TABLE(x86cpu, crc32c_cpu_id); static int __init crc32c_intel_mod_init(void) { - if (cpu_has_xmm4_2) - return crypto_register_shash(&alg); - else + if (!x86_match_cpu(crc32c_cpu_id)) return -ENODEV; + return crypto_register_shash(&alg); } static void __exit crc32c_intel_mod_fini(void) diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c index 976aa64..b4bf0a6 100644 --- a/arch/x86/crypto/ghash-clmulni-intel_glue.c +++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c @@ -20,6 +20,7 @@ #include <crypto/gf128mul.h> #include <crypto/internal/hash.h> #include <asm/i387.h> +#include <asm/cpu_device_id.h> #define GHASH_BLOCK_SIZE 16 #define GHASH_DIGEST_SIZE 16 @@ -294,15 +295,18 @@ static struct ahash_alg ghash_async_alg = { }, }; +static const struct x86_cpu_id pcmul_cpu_id[] = { + X86_FEATURE_MATCH(X86_FEATURE_PCLMULQDQ), /* Pickle-Mickle-Duck */ + {} +}; +MODULE_DEVICE_TABLE(x86cpu, pcmul_cpu_id); + static int __init ghash_pclmulqdqni_mod_init(void) { int err; - if (!cpu_has_pclmulqdq) { - printk(KERN_INFO "Intel PCLMULQDQ-NI instructions are not" - " detected.\n"); + if (!x86_match_cpu(pcmul_cpu_id)) return -ENODEV; - } err = crypto_register_shash(&ghash_alg); if (err) diff --git a/arch/x86/include/asm/cpu_device_id.h b/arch/x86/include/asm/cpu_device_id.h new file mode 100644 index 0000000..ff501e5 --- /dev/null +++ b/arch/x86/include/asm/cpu_device_id.h @@ -0,0 +1,13 @@ +#ifndef _CPU_DEVICE_ID +#define _CPU_DEVICE_ID 1 + +/* + * Declare drivers belonging to specific x86 CPUs + * Similar in spirit to pci_device_id and related PCI functions + */ + +#include <linux/mod_devicetable.h> + +extern const struct x86_cpu_id *x86_match_cpu(const struct x86_cpu_id *match); + +#endif diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 8d67d42..dcb839e 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -177,6 +177,7 @@ #define X86_FEATURE_PLN (7*32+ 5) /* Intel Power Limit Notification */ #define X86_FEATURE_PTS (7*32+ 6) /* Intel Package Thermal Status */ #define X86_FEATURE_DTS (7*32+ 7) /* Digital Thermal Sensor */ +#define X86_FEATURE_HW_PSTATE (7*32+ 8) /* AMD HW-PState */ /* Virtualization flags: Linux defined, word 8 */ #define X86_FEATURE_TPR_SHADOW (8*32+ 0) /* Intel TPR Shadow */ diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h index da0b3ca..382f75d 100644 --- a/arch/x86/include/asm/hardirq.h +++ b/arch/x86/include/asm/hardirq.h @@ -7,7 +7,6 @@ typedef struct { unsigned int __softirq_pending; unsigned int __nmi_count; /* arch dependent */ - unsigned int irq0_irqs; #ifdef CONFIG_X86_LOCAL_APIC unsigned int apic_timer_irqs; /* arch dependent */ unsigned int irq_spurious_count; diff --git a/arch/x86/include/asm/inat.h b/arch/x86/include/asm/inat.h index 205b063..74a2e31 100644 --- a/arch/x86/include/asm/inat.h +++ b/arch/x86/include/asm/inat.h @@ -97,11 +97,12 @@ /* Attribute search APIs */ extern insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode); +extern int inat_get_last_prefix_id(insn_byte_t last_pfx); extern insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, - insn_byte_t last_pfx, + int lpfx_id, insn_attr_t esc_attr); extern insn_attr_t inat_get_group_attribute(insn_byte_t modrm, - insn_byte_t last_pfx, + int lpfx_id, insn_attr_t esc_attr); extern insn_attr_t inat_get_avx_attribute(insn_byte_t opcode, insn_byte_t vex_m, diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h index 74df3f1..48eb30a 100644 --- a/arch/x86/include/asm/insn.h +++ b/arch/x86/include/asm/insn.h @@ -96,12 +96,6 @@ struct insn { #define X86_VEX_P(vex) ((vex) & 0x03) /* VEX3 Byte2, VEX2 Byte1 */ #define X86_VEX_M_MAX 0x1f /* VEX3.M Maximum value */ -/* The last prefix is needed for two-byte and three-byte opcodes */ -static inline insn_byte_t insn_last_prefix(struct insn *insn) -{ - return insn->prefixes.bytes[3]; -} - extern void insn_init(struct insn *insn, const void *kaddr, int x86_64); extern void insn_get_prefixes(struct insn *insn); extern void insn_get_opcode(struct insn *insn); @@ -160,6 +154,18 @@ static inline insn_byte_t insn_vex_p_bits(struct insn *insn) return X86_VEX_P(insn->vex_prefix.bytes[2]); } +/* Get the last prefix id from last prefix or VEX prefix */ +static inline int insn_last_prefix_id(struct insn *insn) +{ + if (insn_is_avx(insn)) + return insn_vex_p_bits(insn); /* VEX_p is a SIMD prefix id */ + + if (insn->prefixes.bytes[3]) + return inat_get_last_prefix_id(insn->prefixes.bytes[3]); + + return 0; +} + /* Offset of each field from kaddr */ static inline int insn_offset_rex_prefix(struct insn *insn) { diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h index a32b18c..3a16c14 100644 --- a/arch/x86/include/asm/jump_label.h +++ b/arch/x86/include/asm/jump_label.h @@ -9,12 +9,12 @@ #define JUMP_LABEL_NOP_SIZE 5 -#define JUMP_LABEL_INITIAL_NOP ".byte 0xe9 \n\t .long 0\n\t" +#define STATIC_KEY_INITIAL_NOP ".byte 0xe9 \n\t .long 0\n\t" -static __always_inline bool arch_static_branch(struct jump_label_key *key) +static __always_inline bool arch_static_branch(struct static_key *key) { asm goto("1:" - JUMP_LABEL_INITIAL_NOP + STATIC_KEY_INITIAL_NOP ".pushsection __jump_table, \"aw\" \n\t" _ASM_ALIGN "\n\t" _ASM_PTR "1b, %l[l_yes], %c0 \n\t" diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index a6962d9..ccb8059 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -56,6 +56,13 @@ #define MSR_OFFCORE_RSP_0 0x000001a6 #define MSR_OFFCORE_RSP_1 0x000001a7 +#define MSR_LBR_SELECT 0x000001c8 +#define MSR_LBR_TOS 0x000001c9 +#define MSR_LBR_NHM_FROM 0x00000680 +#define MSR_LBR_NHM_TO 0x000006c0 +#define MSR_LBR_CORE_FROM 0x00000040 +#define MSR_LBR_CORE_TO 0x00000060 + #define MSR_IA32_PEBS_ENABLE 0x000003f1 #define MSR_IA32_DS_AREA 0x00000600 #define MSR_IA32_PERF_CAPABILITIES 0x00000345 diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index a7d2db9..c0180fd 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -230,9 +230,9 @@ static inline unsigned long long paravirt_sched_clock(void) return PVOP_CALL0(unsigned long long, pv_time_ops.sched_clock); } -struct jump_label_key; -extern struct jump_label_key paravirt_steal_enabled; -extern struct jump_label_key paravirt_steal_rq_enabled; +struct static_key; +extern struct static_key paravirt_steal_enabled; +extern struct static_key paravirt_steal_rq_enabled; static inline u64 paravirt_steal_clock(int cpu) { diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 461ce43..e8fb2c7 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -188,8 +188,6 @@ extern u32 get_ibs_caps(void); #ifdef CONFIG_PERF_EVENTS extern void perf_events_lapic_init(void); -#define PERF_EVENT_INDEX_OFFSET 0 - /* * Abuse bit 3 of the cpu eflags register to indicate proper PEBS IP fixups. * This flag is otherwise unused and ABI specified to be 0, so nobody should diff --git a/arch/x86/include/asm/timer.h b/arch/x86/include/asm/timer.h index 431793e..34baa0e 100644 --- a/arch/x86/include/asm/timer.h +++ b/arch/x86/include/asm/timer.h @@ -57,14 +57,10 @@ DECLARE_PER_CPU(unsigned long long, cyc2ns_offset); static inline unsigned long long __cycles_2_ns(unsigned long long cyc) { - unsigned long long quot; - unsigned long long rem; int cpu = smp_processor_id(); unsigned long long ns = per_cpu(cyc2ns_offset, cpu); - quot = (cyc >> CYC2NS_SCALE_FACTOR); - rem = cyc & ((1ULL << CYC2NS_SCALE_FACTOR) - 1); - ns += quot * per_cpu(cyc2ns, cpu) + - ((rem * per_cpu(cyc2ns, cpu)) >> CYC2NS_SCALE_FACTOR); + ns += mult_frac(cyc, per_cpu(cyc2ns, cpu), + (1UL << CYC2NS_SCALE_FACTOR)); return ns; } diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 5369059..532d2e0 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -69,6 +69,7 @@ obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o obj-$(CONFIG_KPROBES) += kprobes.o +obj-$(CONFIG_OPTPROBES) += kprobes-opt.o obj-$(CONFIG_MODULES) += module.o obj-$(CONFIG_DOUBLEFAULT) += doublefault_32.o obj-$(CONFIG_KGDB) += kgdb.o diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 25f24dc..6ab6aa2 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -16,6 +16,7 @@ obj-y := intel_cacheinfo.o scattered.o topology.o obj-y += proc.o capflags.o powerflags.o common.o obj-y += vmware.o hypervisor.o sched.o mshyperv.o obj-y += rdrand.o +obj-y += match.o obj-$(CONFIG_X86_32) += bugs.o obj-$(CONFIG_X86_64) += bugs_64.o diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index f4773f4..0a44b90 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -5,6 +5,7 @@ #include <linux/mm.h> #include <linux/io.h> +#include <linux/sched.h> #include <asm/processor.h> #include <asm/apic.h> #include <asm/cpu.h> @@ -456,6 +457,8 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) if (c->x86_power & (1 << 8)) { set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); + if (!check_tsc_unstable()) + sched_clock_stable = 1; } #ifdef CONFIG_X86_64 diff --git a/arch/x86/kernel/cpu/match.c b/arch/x86/kernel/cpu/match.c new file mode 100644 index 0000000..5502b28 --- /dev/null +++ b/arch/x86/kernel/cpu/match.c @@ -0,0 +1,91 @@ +#include <asm/cpu_device_id.h> +#include <asm/processor.h> +#include <linux/cpu.h> +#include <linux/module.h> +#include <linux/slab.h> + +/** + * x86_match_cpu - match current CPU again an array of x86_cpu_ids + * @match: Pointer to array of x86_cpu_ids. Last entry terminated with + * {}. + * + * Return the entry if the current CPU matches the entries in the + * passed x86_cpu_id match table. Otherwise NULL. The match table + * contains vendor (X86_VENDOR_*), family, model and feature bits or + * respective wildcard entries. + * + * A typical table entry would be to match a specific CPU + * { X86_VENDOR_INTEL, 6, 0x12 } + * or to match a specific CPU feature + * { X86_FEATURE_MATCH(X86_FEATURE_FOOBAR) } + * + * Fields can be wildcarded with %X86_VENDOR_ANY, %X86_FAMILY_ANY, + * %X86_MODEL_ANY, %X86_FEATURE_ANY or 0 (except for vendor) + * + * Arrays used to match for this should also be declared using + * MODULE_DEVICE_TABLE(x86_cpu, ...) + * + * This always matches against the boot cpu, assuming models and features are + * consistent over all CPUs. + */ +const struct x86_cpu_id *x86_match_cpu(const struct x86_cpu_id *match) +{ + const struct x86_cpu_id *m; + struct cpuinfo_x86 *c = &boot_cpu_data; + + for (m = match; m->vendor | m->family | m->model | m->feature; m++) { + if (m->vendor != X86_VENDOR_ANY && c->x86_vendor != m->vendor) + continue; + if (m->family != X86_FAMILY_ANY && c->x86 != m->family) + continue; + if (m->model != X86_MODEL_ANY && c->x86_model != m->model) + continue; + if (m->feature != X86_FEATURE_ANY && !cpu_has(c, m->feature)) + continue; + return m; + } + return NULL; +} +EXPORT_SYMBOL(x86_match_cpu); + +ssize_t arch_print_cpu_modalias(struct device *dev, + struct device_attribute *attr, + char *bufptr) +{ + int size = PAGE_SIZE; + int i, n; + char *buf = bufptr; + + n = snprintf(buf, size, "x86cpu:vendor:%04X:family:%04X:" + "model:%04X:feature:", + boot_cpu_data.x86_vendor, + boot_cpu_data.x86, + boot_cpu_data.x86_model); + size -= n; + buf += n; + size -= 1; + for (i = 0; i < NCAPINTS*32; i++) { + if (boot_cpu_has(i)) { + n = snprintf(buf, size, ",%04X", i); + if (n >= size) { + WARN(1, "x86 features overflow page\n"); + break; + } + size -= n; + buf += n; + } + } + *buf++ = '\n'; + return buf - bufptr; +} + +int arch_cpu_uevent(struct device *dev, struct kobj_uevent_env *env) +{ + char *buf = kzalloc(PAGE_SIZE, GFP_KERNEL); + if (buf) { + arch_print_cpu_modalias(NULL, NULL, buf); + add_uevent_var(env, "MODALIAS=%s", buf); + kfree(buf); + } + return 0; +} diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 5adce10..0a18d16 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -24,6 +24,7 @@ #include <linux/slab.h> #include <linux/cpu.h> #include <linux/bitops.h> +#include <linux/device.h> #include <asm/apic.h> #include <asm/stacktrace.h> @@ -31,6 +32,7 @@ #include <asm/compat.h> #include <asm/smp.h> #include <asm/alternative.h> +#include <asm/timer.h> #include "perf_event.h" @@ -351,6 +353,36 @@ int x86_setup_perfctr(struct perf_event *event) return 0; } +/* + * check that branch_sample_type is compatible with + * settings needed for precise_ip > 1 which implies + * using the LBR to capture ALL taken branches at the + * priv levels of the measurement + */ +static inline int precise_br_compat(struct perf_event *event) +{ + u64 m = event->attr.branch_sample_type; + u64 b = 0; + + /* must capture all branches */ + if (!(m & PERF_SAMPLE_BRANCH_ANY)) + return 0; + + m &= PERF_SAMPLE_BRANCH_KERNEL | PERF_SAMPLE_BRANCH_USER; + + if (!event->attr.exclude_user) + b |= PERF_SAMPLE_BRANCH_USER; + + if (!event->attr.exclude_kernel) + b |= PERF_SAMPLE_BRANCH_KERNEL; + + /* + * ignore PERF_SAMPLE_BRANCH_HV, not supported on x86 + */ + + return m == b; +} + int x86_pmu_hw_config(struct perf_event *event) { if (event->attr.precise_ip) { @@ -367,6 +399,36 @@ int x86_pmu_hw_config(struct perf_event *event) if (event->attr.precise_ip > precise) return -EOPNOTSUPP; + /* + * check that PEBS LBR correction does not conflict with + * whatever the user is asking with attr->branch_sample_type + */ + if (event->attr.precise_ip > 1) { + u64 *br_type = &event->attr.branch_sample_type; + + if (has_branch_stack(event)) { + if (!precise_br_compat(event)) + return -EOPNOTSUPP; + + /* branch_sample_type is compatible */ + + } else { + /* + * user did not specify branch_sample_type + * + * For PEBS fixups, we capture all + * the branches at the priv level of the + * event. + */ + *br_type = PERF_SAMPLE_BRANCH_ANY; + + if (!event->attr.exclude_user) + *br_type |= PERF_SAMPLE_BRANCH_USER; + + if (!event->attr.exclude_kernel) + *br_type |= PERF_SAMPLE_BRANCH_KERNEL; + } + } } /* @@ -424,6 +486,10 @@ static int __x86_pmu_event_init(struct perf_event *event) /* mark unused */ event->hw.extra_reg.idx = EXTRA_REG_NONE; + /* mark not used */ + event->hw.extra_reg.idx = EXTRA_REG_NONE; + event->hw.branch_reg.idx = EXTRA_REG_NONE; + return x86_pmu.hw_config(event); } @@ -1210,6 +1276,8 @@ x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu) break; case CPU_STARTING: + if (x86_pmu.attr_rdpmc) + set_in_cr4(X86_CR4_PCE); if (x86_pmu.cpu_starting) x86_pmu.cpu_starting(cpu); break; @@ -1319,6 +1387,8 @@ static int __init init_hw_perf_events(void) } } + x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */ + pr_info("... version: %d\n", x86_pmu.version); pr_info("... bit width: %d\n", x86_pmu.cntval_bits); pr_info("... generic registers: %d\n", x86_pmu.num_counters); @@ -1542,23 +1612,106 @@ static int x86_pmu_event_init(struct perf_event *event) return err; } +static int x86_pmu_event_idx(struct perf_event *event) +{ + int idx = event->hw.idx; + + if (x86_pmu.num_counters_fixed && idx >= X86_PMC_IDX_FIXED) { + idx -= X86_PMC_IDX_FIXED; + idx |= 1 << 30; + } + + return idx + 1; +} + +static ssize_t get_attr_rdpmc(struct device *cdev, + struct device_attribute *attr, + char *buf) +{ + return snprintf(buf, 40, "%d\n", x86_pmu.attr_rdpmc); +} + +static void change_rdpmc(void *info) +{ + bool enable = !!(unsigned long)info; + + if (enable) + set_in_cr4(X86_CR4_PCE); + else + clear_in_cr4(X86_CR4_PCE); +} + +static ssize_t set_attr_rdpmc(struct device *cdev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + unsigned long val = simple_strtoul(buf, NULL, 0); + + if (!!val != !!x86_pmu.attr_rdpmc) { + x86_pmu.attr_rdpmc = !!val; + smp_call_function(change_rdpmc, (void *)val, 1); + } + + return count; +} + +static DEVICE_ATTR(rdpmc, S_IRUSR | S_IWUSR, get_attr_rdpmc, set_attr_rdpmc); + +static struct attribute *x86_pmu_attrs[] = { + &dev_attr_rdpmc.attr, + NULL, +}; + +static struct attribute_group x86_pmu_attr_group = { + .attrs = x86_pmu_attrs, +}; + +static const struct attribute_group *x86_pmu_attr_groups[] = { + &x86_pmu_attr_group, + NULL, +}; + +static void x86_pmu_flush_branch_stack(void) +{ + if (x86_pmu.flush_branch_stack) + x86_pmu.flush_branch_stack(); +} + static struct pmu pmu = { - .pmu_enable = x86_pmu_enable, - .pmu_disable = x86_pmu_disable, + .pmu_enable = x86_pmu_enable, + .pmu_disable = x86_pmu_disable, + + .attr_groups = x86_pmu_attr_groups, .event_init = x86_pmu_event_init, - .add = x86_pmu_add, - .del = x86_pmu_del, - .start = x86_pmu_start, - .stop = x86_pmu_stop, - .read = x86_pmu_read, + .add = x86_pmu_add, + .del = x86_pmu_del, + .start = x86_pmu_start, + .stop = x86_pmu_stop, + .read = x86_pmu_read, .start_txn = x86_pmu_start_txn, .cancel_txn = x86_pmu_cancel_txn, .commit_txn = x86_pmu_commit_txn, + + .event_idx = x86_pmu_event_idx, + .flush_branch_stack = x86_pmu_flush_branch_stack, }; +void perf_update_user_clock(struct perf_event_mmap_page *userpg, u64 now) +{ + if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) + return; + + if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) + return; + + userpg->time_mult = this_cpu_read(cyc2ns); + userpg->time_shift = CYC2NS_SCALE_FACTOR; + userpg->time_offset = this_cpu_read(cyc2ns_offset) - now; +} + /* * callchain support */ diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index c30c807..8484e77 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -33,6 +33,7 @@ enum extra_reg_type { EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */ EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */ + EXTRA_REG_LBR = 2, /* lbr_select */ EXTRA_REG_MAX /* number of entries needed */ }; @@ -130,6 +131,8 @@ struct cpu_hw_events { void *lbr_context; struct perf_branch_stack lbr_stack; struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES]; + struct er_account *lbr_sel; + u64 br_sel; /* * Intel host/guest exclude bits @@ -268,6 +271,29 @@ struct x86_pmu_quirk { void (*func)(void); }; +union x86_pmu_config { + struct { + u64 event:8, + umask:8, + usr:1, + os:1, + edge:1, + pc:1, + interrupt:1, + __reserved1:1, + en:1, + inv:1, + cmask:8, + event2:4, + __reserved2:4, + go:1, + ho:1; + } bits; + u64 value; +}; + +#define X86_CONFIG(args...) ((union x86_pmu_config){.bits = {args}}).value + /* * struct x86_pmu - generic x86 pmu */ @@ -309,10 +335,19 @@ struct x86_pmu { struct x86_pmu_quirk *quirks; int perfctr_second_write; + /* + * sysfs attrs + */ + int attr_rdpmc; + + /* + * CPU Hotplug hooks + */ int (*cpu_prepare)(int cpu); void (*cpu_starting)(int cpu); void (*cpu_dying)(int cpu); void (*cpu_dead)(int cpu); + void (*flush_branch_stack)(void); /* * Intel Arch Perfmon v2+ @@ -334,6 +369,8 @@ struct x86_pmu { */ unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */ int lbr_nr; /* hardware stack size */ + u64 lbr_sel_mask; /* LBR_SELECT valid bits */ + const int *lbr_sel_map; /* lbr_select mappings */ /* * Extra registers for events @@ -447,6 +484,15 @@ extern struct event_constraint emptyconstraint; extern struct event_constraint unconstrained; +static inline bool kernel_ip(unsigned long ip) +{ +#ifdef CONFIG_X86_32 + return ip > PAGE_OFFSET; +#else + return (long)ip < 0; +#endif +} + #ifdef CONFIG_CPU_SUP_AMD int amd_pmu_init(void); @@ -527,6 +573,10 @@ void intel_pmu_lbr_init_nhm(void); void intel_pmu_lbr_init_atom(void); +void intel_pmu_lbr_init_snb(void); + +int intel_pmu_setup_lbr_filter(struct perf_event *event); + int p4_pmu_init(void); int p6_pmu_init(void); diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index 67250a5..dd002fa 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c @@ -139,6 +139,9 @@ static int amd_pmu_hw_config(struct perf_event *event) if (ret) return ret; + if (has_branch_stack(event)) + return -EOPNOTSUPP; + if (event->attr.exclude_host && event->attr.exclude_guest) /* * When HO == GO == 1 the hardware treats that as GO == HO == 0 diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 61d4f79..6a84e7f 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -728,6 +728,19 @@ static __initconst const u64 atom_hw_cache_event_ids }, }; +static inline bool intel_pmu_needs_lbr_smpl(struct perf_event *event) +{ + /* user explicitly requested branch sampling */ + if (has_branch_stack(event)) + return true; + + /* implicit branch sampling to correct PEBS skid */ + if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1) + return true; + + return false; +} + static void intel_pmu_disable_all(void) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); @@ -882,6 +895,13 @@ static void intel_pmu_disable_event(struct perf_event *event) cpuc->intel_ctrl_guest_mask &= ~(1ull << hwc->idx); cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx); + /* + * must disable before any actual event + * because any event may be combined with LBR + */ + if (intel_pmu_needs_lbr_smpl(event)) + intel_pmu_lbr_disable(event); + if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { intel_pmu_disable_fixed(hwc); return; @@ -936,6 +956,12 @@ static void intel_pmu_enable_event(struct perf_event *event) intel_pmu_enable_bts(hwc->config); return; } + /* + * must enabled before any actual event + * because any event may be combined with LBR + */ + if (intel_pmu_needs_lbr_smpl(event)) + intel_pmu_lbr_enable(event); if (event->attr.exclude_host) cpuc->intel_ctrl_guest_mask |= (1ull << hwc->idx); @@ -1058,6 +1084,9 @@ again: data.period = event->hw.last_period; + if (has_branch_stack(event)) + data.br_stack = &cpuc->lbr_stack; + if (perf_event_overflow(event, &data, regs)) x86_pmu_stop(event, 0); } @@ -1124,17 +1153,17 @@ static bool intel_try_alt_er(struct perf_event *event, int orig_idx) */ static struct event_constraint * __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc, - struct perf_event *event) + struct perf_event *event, + struct hw_perf_event_extra *reg) { struct event_constraint *c = &emptyconstraint; - struct hw_perf_event_extra *reg = &event->hw.extra_reg; struct er_account *era; unsigned long flags; int orig_idx = reg->idx; /* already allocated shared msr */ if (reg->alloc) - return &unconstrained; + return NULL; /* call x86_get_event_constraint() */ again: era = &cpuc->shared_regs->regs[reg->idx]; @@ -1157,14 +1186,10 @@ again: reg->alloc = 1; /* - * All events using extra_reg are unconstrained. - * Avoids calling x86_get_event_constraints() - * - * Must revisit if extra_reg controlling events - * ever have constraints. Worst case we go through - * the regular event constraint table. + * need to call x86_get_event_constraint() + * to check if associated event has constraints */ - c = &unconstrained; + c = NULL; } else if (intel_try_alt_er(event, orig_idx)) { raw_spin_unlock_irqrestore(&era->lock, flags); goto again; @@ -1201,11 +1226,23 @@ static struct event_constraint * intel_shared_regs_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) { - struct event_constraint *c = NULL; - - if (event->hw.extra_reg.idx != EXTRA_REG_NONE) - c = __intel_shared_reg_get_constraints(cpuc, event); - + struct event_constraint *c = NULL, *d; + struct hw_perf_event_extra *xreg, *breg; + + xreg = &event->hw.extra_reg; + if (xreg->idx != EXTRA_REG_NONE) { + c = __intel_shared_reg_get_constraints(cpuc, event, xreg); + if (c == &emptyconstraint) + return c; + } + breg = &event->hw.branch_reg; + if (breg->idx != EXTRA_REG_NONE) { + d = __intel_shared_reg_get_constraints(cpuc, event, breg); + if (d == &emptyconstraint) { + __intel_shared_reg_put_constraints(cpuc, xreg); + c = d; + } + } return c; } @@ -1253,6 +1290,10 @@ intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc, reg = &event->hw.extra_reg; if (reg->idx != EXTRA_REG_NONE) __intel_shared_reg_put_constraints(cpuc, reg); + + reg = &event->hw.branch_reg; + if (reg->idx != EXTRA_REG_NONE) + __intel_shared_reg_put_constraints(cpuc, reg); } static void intel_put_event_constraints(struct cpu_hw_events *cpuc, @@ -1288,12 +1329,19 @@ static int intel_pmu_hw_config(struct perf_event *event) * * Thereby we gain a PEBS capable cycle counter. */ - u64 alt_config = 0x108000c0; /* INST_RETIRED.TOTAL_CYCLES */ + u64 alt_config = X86_CONFIG(.event=0xc0, .inv=1, .cmask=16); + alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK); event->hw.config = alt_config; } + if (intel_pmu_needs_lbr_smpl(event)) { + ret = intel_pmu_setup_lbr_filter(event); + if (ret) + return ret; + } + if (event->attr.type != PERF_TYPE_RAW) return 0; @@ -1432,7 +1480,7 @@ static int intel_pmu_cpu_prepare(int cpu) { struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); - if (!x86_pmu.extra_regs) + if (!(x86_pmu.extra_regs || x86_pmu.lbr_sel_map)) return NOTIFY_OK; cpuc->shared_regs = allocate_shared_regs(cpu); @@ -1454,22 +1502,28 @@ static void intel_pmu_cpu_starting(int cpu) */ intel_pmu_lbr_reset(); - if (!cpuc->shared_regs || (x86_pmu.er_flags & ERF_NO_HT_SHARING)) + cpuc->lbr_sel = NULL; + + if (!cpuc->shared_regs) return; - for_each_cpu(i, topology_thread_cpumask(cpu)) { - struct intel_shared_regs *pc; + if (!(x86_pmu.er_flags & ERF_NO_HT_SHARING)) { + for_each_cpu(i, topology_thread_cpumask(cpu)) { + struct intel_shared_regs *pc; - pc = per_cpu(cpu_hw_events, i).shared_regs; - if (pc && pc->core_id == core_id) { - cpuc->kfree_on_online = cpuc->shared_regs; - cpuc->shared_regs = pc; - break; + pc = per_cpu(cpu_hw_events, i).shared_regs; + if (pc && pc->core_id == core_id) { + cpuc->kfree_on_online = cpuc->shared_regs; + cpuc->shared_regs = pc; + break; + } } + cpuc->shared_regs->core_id = core_id; + cpuc->shared_regs->refcnt++; } - cpuc->shared_regs->core_id = core_id; - cpuc->shared_regs->refcnt++; + if (x86_pmu.lbr_sel_map) + cpuc->lbr_sel = &cpuc->shared_regs->regs[EXTRA_REG_LBR]; } static void intel_pmu_cpu_dying(int cpu) @@ -1487,6 +1541,18 @@ static void intel_pmu_cpu_dying(int cpu) fini_debug_store_on_cpu(cpu); } +static void intel_pmu_flush_branch_stack(void) +{ + /* + * Intel LBR does not tag entries with the + * PID of the current task, then we need to + * flush it on ctxsw + * For now, we simply reset it + */ + if (x86_pmu.lbr_nr) + intel_pmu_lbr_reset(); +} + static __initconst const struct x86_pmu intel_pmu = { .name = "Intel", .handle_irq = intel_pmu_handle_irq, @@ -1514,6 +1580,7 @@ static __initconst const struct x86_pmu intel_pmu = { .cpu_starting = intel_pmu_cpu_starting, .cpu_dying = intel_pmu_cpu_dying, .guest_get_msrs = intel_guest_get_msrs, + .flush_branch_stack = intel_pmu_flush_branch_stack, }; static __init void intel_clovertown_quirk(void) @@ -1690,9 +1757,11 @@ __init int intel_pmu_init(void) x86_pmu.extra_regs = intel_nehalem_extra_regs; /* UOPS_ISSUED.STALLED_CYCLES */ - intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e; + intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = + X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */ - intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x1803fb1; + intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = + X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1); x86_add_quirk(intel_nehalem_quirk); @@ -1727,9 +1796,11 @@ __init int intel_pmu_init(void) x86_pmu.er_flags |= ERF_HAS_RSP_1; /* UOPS_ISSUED.STALLED_CYCLES */ - intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e; + intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = + X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */ - intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x1803fb1; + intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = + X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1); pr_cont("Westmere events, "); break; @@ -1740,7 +1811,7 @@ __init int intel_pmu_init(void) memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, sizeof(hw_cache_event_ids)); - intel_pmu_lbr_init_nhm(); + intel_pmu_lbr_init_snb(); x86_pmu.event_constraints = intel_snb_event_constraints; x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints; @@ -1750,9 +1821,11 @@ __init int intel_pmu_init(void) x86_pmu.er_flags |= ERF_NO_HT_SHARING; /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */ - intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e; + intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = + X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); /* UOPS_DISPATCHED.THREAD,c=1,i=1 to count stall cycles*/ - intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x18001b1; + intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = + X86_CONFIG(.event=0xb1, .umask=0x01, .inv=1, .cmask=1); pr_cont("SandyBridge events, "); break; diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index d6bd49f..7f64df1 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -3,6 +3,7 @@ #include <linux/slab.h> #include <asm/perf_event.h> +#include <asm/insn.h> #include "perf_event.h" @@ -439,9 +440,6 @@ void intel_pmu_pebs_enable(struct perf_event *event) hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT; cpuc->pebs_enabled |= 1ULL << hwc->idx; - - if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1) - intel_pmu_lbr_enable(event); } void intel_pmu_pebs_disable(struct perf_event *event) @@ -454,9 +452,6 @@ void intel_pmu_pebs_disable(struct perf_event *event) wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); hwc->config |= ARCH_PERFMON_EVENTSEL_INT; - - if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1) - intel_pmu_lbr_disable(event); } void intel_pmu_pebs_enable_all(void) @@ -475,17 +470,6 @@ void intel_pmu_pebs_disable_all(void) wrmsrl(MSR_IA32_PEBS_ENABLE, 0); } -#include <asm/insn.h> - -static inline bool kernel_ip(unsigned long ip) -{ -#ifdef CONFIG_X86_32 - return ip > PAGE_OFFSET; -#else - return (long)ip < 0; -#endif -} - static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); @@ -572,6 +556,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event, * both formats and we don't use the other fields in this * routine. */ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct pebs_record_core *pebs = __pebs; struct perf_sample_data data; struct pt_regs regs; @@ -602,6 +587,9 @@ static void __intel_pmu_pebs_event(struct perf_event *event, else regs.flags &= ~PERF_EFLAGS_EXACT; + if (has_branch_stack(event)) + data.br_stack = &cpuc->lbr_stack; + if (perf_event_overflow(event, &data, ®s)) x86_pmu_stop(event, 0); } diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c index 47a7e63..520b426 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c +++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c @@ -3,6 +3,7 @@ #include <asm/perf_event.h> #include <asm/msr.h> +#include <asm/insn.h> #include "perf_event.h" @@ -14,6 +15,100 @@ enum { }; /* + * Intel LBR_SELECT bits + * Intel Vol3a, April 2011, Section 16.7 Table 16-10 + * + * Hardware branch filter (not available on all CPUs) + */ +#define LBR_KERNEL_BIT 0 /* do not capture at ring0 */ +#define LBR_USER_BIT 1 /* do not capture at ring > 0 */ +#define LBR_JCC_BIT 2 /* do not capture conditional branches */ +#define LBR_REL_CALL_BIT 3 /* do not capture relative calls */ +#define LBR_IND_CALL_BIT 4 /* do not capture indirect calls */ +#define LBR_RETURN_BIT 5 /* do not capture near returns */ +#define LBR_IND_JMP_BIT 6 /* do not capture indirect jumps */ +#define LBR_REL_JMP_BIT 7 /* do not capture relative jumps */ +#define LBR_FAR_BIT 8 /* do not capture far branches */ + +#define LBR_KERNEL (1 << LBR_KERNEL_BIT) +#define LBR_USER (1 << LBR_USER_BIT) +#define LBR_JCC (1 << LBR_JCC_BIT) +#define LBR_REL_CALL (1 << LBR_REL_CALL_BIT) +#define LBR_IND_CALL (1 << LBR_IND_CALL_BIT) +#define LBR_RETURN (1 << LBR_RETURN_BIT) +#define LBR_REL_JMP (1 << LBR_REL_JMP_BIT) +#define LBR_IND_JMP (1 << LBR_IND_JMP_BIT) +#define LBR_FAR (1 << LBR_FAR_BIT) + +#define LBR_PLM (LBR_KERNEL | LBR_USER) + +#define LBR_SEL_MASK 0x1ff /* valid bits in LBR_SELECT */ +#define LBR_NOT_SUPP -1 /* LBR filter not supported */ +#define LBR_IGN 0 /* ignored */ + +#define LBR_ANY \ + (LBR_JCC |\ + LBR_REL_CALL |\ + LBR_IND_CALL |\ + LBR_RETURN |\ + LBR_REL_JMP |\ + LBR_IND_JMP |\ + LBR_FAR) + +#define LBR_FROM_FLAG_MISPRED (1ULL << 63) + +#define for_each_branch_sample_type(x) \ + for ((x) = PERF_SAMPLE_BRANCH_USER; \ + (x) < PERF_SAMPLE_BRANCH_MAX; (x) <<= 1) + +/* + * x86control flow change classification + * x86control flow changes include branches, interrupts, traps, faults + */ +enum { + X86_BR_NONE = 0, /* unknown */ + + X86_BR_USER = 1 << 0, /* branch target is user */ + X86_BR_KERNEL = 1 << 1, /* branch target is kernel */ + + X86_BR_CALL = 1 << 2, /* call */ + X86_BR_RET = 1 << 3, /* return */ + X86_BR_SYSCALL = 1 << 4, /* syscall */ + X86_BR_SYSRET = 1 << 5, /* syscall return */ + X86_BR_INT = 1 << 6, /* sw interrupt */ + X86_BR_IRET = 1 << 7, /* return from interrupt */ + X86_BR_JCC = 1 << 8, /* conditional */ + X86_BR_JMP = 1 << 9, /* jump */ + X86_BR_IRQ = 1 << 10,/* hw interrupt or trap or fault */ + X86_BR_IND_CALL = 1 << 11,/* indirect calls */ +}; + +#define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL) + +#define X86_BR_ANY \ + (X86_BR_CALL |\ + X86_BR_RET |\ + X86_BR_SYSCALL |\ + X86_BR_SYSRET |\ + X86_BR_INT |\ + X86_BR_IRET |\ + X86_BR_JCC |\ + X86_BR_JMP |\ + X86_BR_IRQ |\ + X86_BR_IND_CALL) + +#define X86_BR_ALL (X86_BR_PLM | X86_BR_ANY) + +#define X86_BR_ANY_CALL \ + (X86_BR_CALL |\ + X86_BR_IND_CALL |\ + X86_BR_SYSCALL |\ + X86_BR_IRQ |\ + X86_BR_INT) + +static void intel_pmu_lbr_filter(struct cpu_hw_events *cpuc); + +/* * We only support LBR implementations that have FREEZE_LBRS_ON_PMI * otherwise it becomes near impossible to get a reliable stack. */ @@ -21,6 +116,10 @@ enum { static void __intel_pmu_lbr_enable(void) { u64 debugctl; + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + + if (cpuc->lbr_sel) + wrmsrl(MSR_LBR_SELECT, cpuc->lbr_sel->config); rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); debugctl |= (DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); @@ -76,11 +175,11 @@ void intel_pmu_lbr_enable(struct perf_event *event) * Reset the LBR stack if we changed task context to * avoid data leaks. */ - if (event->ctx->task && cpuc->lbr_context != event->ctx) { intel_pmu_lbr_reset(); cpuc->lbr_context = event->ctx; } + cpuc->br_sel = event->hw.branch_reg.reg; cpuc->lbr_users++; } @@ -95,8 +194,11 @@ void intel_pmu_lbr_disable(struct perf_event *event) cpuc->lbr_users--; WARN_ON_ONCE(cpuc->lbr_users < 0); - if (cpuc->enabled && !cpuc->lbr_users) + if (cpuc->enabled && !cpuc->lbr_users) { __intel_pmu_lbr_disable(); + /* avoid stale pointer */ + cpuc->lbr_context = NULL; + } } void intel_pmu_lbr_enable_all(void) @@ -115,6 +217,9 @@ void intel_pmu_lbr_disable_all(void) __intel_pmu_lbr_disable(); } +/* + * TOS = most recently recorded branch + */ static inline u64 intel_pmu_lbr_tos(void) { u64 tos; @@ -142,15 +247,15 @@ static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc) rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr); - cpuc->lbr_entries[i].from = msr_lastbranch.from; - cpuc->lbr_entries[i].to = msr_lastbranch.to; - cpuc->lbr_entries[i].flags = 0; + cpuc->lbr_entries[i].from = msr_lastbranch.from; + cpuc->lbr_entries[i].to = msr_lastbranch.to; + cpuc->lbr_entries[i].mispred = 0; + cpuc->lbr_entries[i].predicted = 0; + cpuc->lbr_entries[i].reserved = 0; } cpuc->lbr_stack.nr = i; } -#define LBR_FROM_FLAG_MISPRED (1ULL << 63) - /* * Due to lack of segmentation in Linux the effective address (offset) * is the same as the linear address, allowing us to merge the LIP and EIP @@ -165,19 +270,22 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) for (i = 0; i < x86_pmu.lbr_nr; i++) { unsigned long lbr_idx = (tos - i) & mask; - u64 from, to, flags = 0; + u64 from, to, mis = 0, pred = 0; rdmsrl(x86_pmu.lbr_from + lbr_idx, from); rdmsrl(x86_pmu.lbr_to + lbr_idx, to); if (lbr_format == LBR_FORMAT_EIP_FLAGS) { - flags = !!(from & LBR_FROM_FLAG_MISPRED); + mis = !!(from & LBR_FROM_FLAG_MISPRED); + pred = !mis; from = (u64)((((s64)from) << 1) >> 1); } - cpuc->lbr_entries[i].from = from; - cpuc->lbr_entries[i].to = to; - cpuc->lbr_entries[i].flags = flags; + cpuc->lbr_entries[i].from = from; + cpuc->lbr_entries[i].to = to; + cpuc->lbr_entries[i].mispred = mis; + cpuc->lbr_entries[i].predicted = pred; + cpuc->lbr_entries[i].reserved = 0; } cpuc->lbr_stack.nr = i; } @@ -193,28 +301,404 @@ void intel_pmu_lbr_read(void) intel_pmu_lbr_read_32(cpuc); else intel_pmu_lbr_read_64(cpuc); + + intel_pmu_lbr_filter(cpuc); +} + +/* + * SW filter is used: + * - in case there is no HW filter + * - in case the HW filter has errata or limitations + */ +static void intel_pmu_setup_sw_lbr_filter(struct perf_event *event) +{ + u64 br_type = event->attr.branch_sample_type; + int mask = 0; + + if (br_type & PERF_SAMPLE_BRANCH_USER) + mask |= X86_BR_USER; + + if (br_type & PERF_SAMPLE_BRANCH_KERNEL) + mask |= X86_BR_KERNEL; + + /* we ignore BRANCH_HV here */ + + if (br_type & PERF_SAMPLE_BRANCH_ANY) + mask |= X86_BR_ANY; + + if (br_type & PERF_SAMPLE_BRANCH_ANY_CALL) + mask |= X86_BR_ANY_CALL; + + if (br_type & PERF_SAMPLE_BRANCH_ANY_RETURN) + mask |= X86_BR_RET | X86_BR_IRET | X86_BR_SYSRET; + + if (br_type & PERF_SAMPLE_BRANCH_IND_CALL) + mask |= X86_BR_IND_CALL; + /* + * stash actual user request into reg, it may + * be used by fixup code for some CPU + */ + event->hw.branch_reg.reg = mask; +} + +/* + * setup the HW LBR filter + * Used only when available, may not be enough to disambiguate + * all branches, may need the help of the SW filter + */ +static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event) +{ + struct hw_perf_event_extra *reg; + u64 br_type = event->attr.branch_sample_type; + u64 mask = 0, m; + u64 v; + + for_each_branch_sample_type(m) { + if (!(br_type & m)) + continue; + + v = x86_pmu.lbr_sel_map[m]; + if (v == LBR_NOT_SUPP) + return -EOPNOTSUPP; + + if (v != LBR_IGN) + mask |= v; + } + reg = &event->hw.branch_reg; + reg->idx = EXTRA_REG_LBR; + + /* LBR_SELECT operates in suppress mode so invert mask */ + reg->config = ~mask & x86_pmu.lbr_sel_mask; + + return 0; +} + +int intel_pmu_setup_lbr_filter(struct perf_event *event) +{ + int ret = 0; + + /* + * no LBR on this PMU + */ + if (!x86_pmu.lbr_nr) + return -EOPNOTSUPP; + + /* + * setup SW LBR filter + */ + intel_pmu_setup_sw_lbr_filter(event); + + /* + * setup HW LBR filter, if any + */ + if (x86_pmu.lbr_sel_map) + ret = intel_pmu_setup_hw_lbr_filter(event); + + return ret; } +/* + * return the type of control flow change at address "from" + * intruction is not necessarily a branch (in case of interrupt). + * + * The branch type returned also includes the priv level of the + * target of the control flow change (X86_BR_USER, X86_BR_KERNEL). + * + * If a branch type is unknown OR the instruction cannot be + * decoded (e.g., text page not present), then X86_BR_NONE is + * returned. + */ +static int branch_type(unsigned long from, unsigned long to) +{ + struct insn insn; + void *addr; + int bytes, size = MAX_INSN_SIZE; + int ret = X86_BR_NONE; + int ext, to_plm, from_plm; + u8 buf[MAX_INSN_SIZE]; + int is64 = 0; + + to_plm = kernel_ip(to) ? X86_BR_KERNEL : X86_BR_USER; + from_plm = kernel_ip(from) ? X86_BR_KERNEL : X86_BR_USER; + + /* + * maybe zero if lbr did not fill up after a reset by the time + * we get a PMU interrupt + */ + if (from == 0 || to == 0) + return X86_BR_NONE; + + if (from_plm == X86_BR_USER) { + /* + * can happen if measuring at the user level only + * and we interrupt in a kernel thread, e.g., idle. + */ + if (!current->mm) + return X86_BR_NONE; + + /* may fail if text not present */ + bytes = copy_from_user_nmi(buf, (void __user *)from, size); + if (bytes != size) + return X86_BR_NONE; + + addr = buf; + } else + addr = (void *)from; + + /* + * decoder needs to know the ABI especially + * on 64-bit systems running 32-bit apps + */ +#ifdef CONFIG_X86_64 + is64 = kernel_ip((unsigned long)addr) || !test_thread_flag(TIF_IA32); +#endif + insn_init(&insn, addr, is64); + insn_get_opcode(&insn); + + switch (insn.opcode.bytes[0]) { + case 0xf: + switch (insn.opcode.bytes[1]) { + case 0x05: /* syscall */ + case 0x34: /* sysenter */ + ret = X86_BR_SYSCALL; + break; + case 0x07: /* sysret */ + case 0x35: /* sysexit */ + ret = X86_BR_SYSRET; + break; + case 0x80 ... 0x8f: /* conditional */ + ret = X86_BR_JCC; + break; + default: + ret = X86_BR_NONE; + } + break; + case 0x70 ... 0x7f: /* conditional */ + ret = X86_BR_JCC; + break; + case 0xc2: /* near ret */ + case 0xc3: /* near ret */ + case 0xca: /* far ret */ + case 0xcb: /* far ret */ + ret = X86_BR_RET; + break; + case 0xcf: /* iret */ + ret = X86_BR_IRET; + break; + case 0xcc ... 0xce: /* int */ + ret = X86_BR_INT; + break; + case 0xe8: /* call near rel */ + case 0x9a: /* call far absolute */ + ret = X86_BR_CALL; + break; + case 0xe0 ... 0xe3: /* loop jmp */ + ret = X86_BR_JCC; + break; + case 0xe9 ... 0xeb: /* jmp */ + ret = X86_BR_JMP; + break; + case 0xff: /* call near absolute, call far absolute ind */ + insn_get_modrm(&insn); + ext = (insn.modrm.bytes[0] >> 3) & 0x7; + switch (ext) { + case 2: /* near ind call */ + case 3: /* far ind call */ + ret = X86_BR_IND_CALL; + break; + case 4: + case 5: + ret = X86_BR_JMP; + break; + } + break; + default: + ret = X86_BR_NONE; + } + /* + * interrupts, traps, faults (and thus ring transition) may + * occur on any instructions. Thus, to classify them correctly, + * we need to first look at the from and to priv levels. If they + * are different and to is in the kernel, then it indicates + * a ring transition. If the from instruction is not a ring + * transition instr (syscall, systenter, int), then it means + * it was a irq, trap or fault. + * + * we have no way of detecting kernel to kernel faults. + */ + if (from_plm == X86_BR_USER && to_plm == X86_BR_KERNEL + && ret != X86_BR_SYSCALL && ret != X86_BR_INT) + ret = X86_BR_IRQ; + + /* + * branch priv level determined by target as + * is done by HW when LBR_SELECT is implemented + */ + if (ret != X86_BR_NONE) + ret |= to_plm; + + return ret; +} + +/* + * implement actual branch filter based on user demand. + * Hardware may not exactly satisfy that request, thus + * we need to inspect opcodes. Mismatched branches are + * discarded. Therefore, the number of branches returned + * in PERF_SAMPLE_BRANCH_STACK sample may vary. + */ +static void +intel_pmu_lbr_filter(struct cpu_hw_events *cpuc) +{ + u64 from, to; + int br_sel = cpuc->br_sel; + int i, j, type; + bool compress = false; + + /* if sampling all branches, then nothing to filter */ + if ((br_sel & X86_BR_ALL) == X86_BR_ALL) + return; + + for (i = 0; i < cpuc->lbr_stack.nr; i++) { + + from = cpuc->lbr_entries[i].from; + to = cpuc->lbr_entries[i].to; + + type = branch_type(from, to); + + /* if type does not correspond, then discard */ + if (type == X86_BR_NONE || (br_sel & type) != type) { + cpuc->lbr_entries[i].from = 0; + compress = true; + } + } + + if (!compress) + return; + + /* remove all entries with from=0 */ + for (i = 0; i < cpuc->lbr_stack.nr; ) { + if (!cpuc->lbr_entries[i].from) { + j = i; + while (++j < cpuc->lbr_stack.nr) + cpuc->lbr_entries[j-1] = cpuc->lbr_entries[j]; + cpuc->lbr_stack.nr--; + if (!cpuc->lbr_entries[i].from) + continue; + } + i++; + } +} + +/* + * Map interface branch filters onto LBR filters + */ +static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = { + [PERF_SAMPLE_BRANCH_ANY] = LBR_ANY, + [PERF_SAMPLE_BRANCH_USER] = LBR_USER, + [PERF_SAMPLE_BRANCH_KERNEL] = LBR_KERNEL, + [PERF_SAMPLE_BRANCH_HV] = LBR_IGN, + [PERF_SAMPLE_BRANCH_ANY_RETURN] = LBR_RETURN | LBR_REL_JMP + | LBR_IND_JMP | LBR_FAR, + /* + * NHM/WSM erratum: must include REL_JMP+IND_JMP to get CALL branches + */ + [PERF_SAMPLE_BRANCH_ANY_CALL] = + LBR_REL_CALL | LBR_IND_CALL | LBR_REL_JMP | LBR_IND_JMP | LBR_FAR, + /* + * NHM/WSM erratum: must include IND_JMP to capture IND_CALL + */ + [PERF_SAMPLE_BRANCH_IND_CALL] = LBR_IND_CALL | LBR_IND_JMP, +}; + +static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = { + [PERF_SAMPLE_BRANCH_ANY] = LBR_ANY, + [PERF_SAMPLE_BRANCH_USER] = LBR_USER, + [PERF_SAMPLE_BRANCH_KERNEL] = LBR_KERNEL, + [PERF_SAMPLE_BRANCH_HV] = LBR_IGN, + [PERF_SAMPLE_BRANCH_ANY_RETURN] = LBR_RETURN | LBR_FAR, + [PERF_SAMPLE_BRANCH_ANY_CALL] = LBR_REL_CALL | LBR_IND_CALL + | LBR_FAR, + [PERF_SAMPLE_BRANCH_IND_CALL] = LBR_IND_CALL, +}; + +/* core */ void intel_pmu_lbr_init_core(void) { x86_pmu.lbr_nr = 4; - x86_pmu.lbr_tos = 0x01c9; - x86_pmu.lbr_from = 0x40; - x86_pmu.lbr_to = 0x60; + x86_pmu.lbr_tos = MSR_LBR_TOS; + x86_pmu.lbr_from = MSR_LBR_CORE_FROM; + x86_pmu.lbr_to = MSR_LBR_CORE_TO; + + /* + * SW branch filter usage: + * - compensate for lack of HW filter + */ + pr_cont("4-deep LBR, "); } +/* nehalem/westmere */ void intel_pmu_lbr_init_nhm(void) { x86_pmu.lbr_nr = 16; - x86_pmu.lbr_tos = 0x01c9; - x86_pmu.lbr_from = 0x680; - x86_pmu.lbr_to = 0x6c0; + x86_pmu.lbr_tos = MSR_LBR_TOS; + x86_pmu.lbr_from = MSR_LBR_NHM_FROM; + x86_pmu.lbr_to = MSR_LBR_NHM_TO; + + x86_pmu.lbr_sel_mask = LBR_SEL_MASK; + x86_pmu.lbr_sel_map = nhm_lbr_sel_map; + + /* + * SW branch filter usage: + * - workaround LBR_SEL errata (see above) + * - support syscall, sysret capture. + * That requires LBR_FAR but that means far + * jmp need to be filtered out + */ + pr_cont("16-deep LBR, "); +} + +/* sandy bridge */ +void intel_pmu_lbr_init_snb(void) +{ + x86_pmu.lbr_nr = 16; + x86_pmu.lbr_tos = MSR_LBR_TOS; + x86_pmu.lbr_from = MSR_LBR_NHM_FROM; + x86_pmu.lbr_to = MSR_LBR_NHM_TO; + + x86_pmu.lbr_sel_mask = LBR_SEL_MASK; + x86_pmu.lbr_sel_map = snb_lbr_sel_map; + + /* + * SW branch filter usage: + * - support syscall, sysret capture. + * That requires LBR_FAR but that means far + * jmp need to be filtered out + */ + pr_cont("16-deep LBR, "); } +/* atom */ void intel_pmu_lbr_init_atom(void) { + /* + * only models starting at stepping 10 seems + * to have an operational LBR which can freeze + * on PMU interrupt + */ + if (boot_cpu_data.x86_mask < 10) { + pr_cont("LBR disabled due to erratum"); + return; + } + x86_pmu.lbr_nr = 8; - x86_pmu.lbr_tos = 0x01c9; - x86_pmu.lbr_from = 0x40; - x86_pmu.lbr_to = 0x60; + x86_pmu.lbr_tos = MSR_LBR_TOS; + x86_pmu.lbr_from = MSR_LBR_CORE_FROM; + x86_pmu.lbr_to = MSR_LBR_CORE_TO; + + /* + * SW branch filter usage: + * - compensate for lack of HW filter + */ + pr_cont("8-deep LBR, "); } diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index c7f64e6..addf9e8 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -40,6 +40,7 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c) { X86_FEATURE_EPB, CR_ECX, 3, 0x00000006, 0 }, { X86_FEATURE_XSAVEOPT, CR_EAX, 0, 0x0000000d, 1 }, { X86_FEATURE_CPB, CR_EDX, 9, 0x80000007, 0 }, + { X86_FEATURE_HW_PSTATE, CR_EDX, 7, 0x80000007, 0 }, { X86_FEATURE_NPT, CR_EDX, 0, 0x8000000a, 0 }, { X86_FEATURE_LBRV, CR_EDX, 1, 0x8000000a, 0 }, { X86_FEATURE_SVML, CR_EDX, 2, 0x8000000a, 0 }, diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 40fc861..58b7f27 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -100,13 +100,8 @@ execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq) irqctx->tinfo.task = curctx->tinfo.task; irqctx->tinfo.previous_esp = current_stack_pointer; - /* - * Copy the softirq bits in preempt_count so that the - * softirq checks work in the hardirq context. - */ - irqctx->tinfo.preempt_count = - (irqctx->tinfo.preempt_count & ~SOFTIRQ_MASK) | - (curctx->tinfo.preempt_count & SOFTIRQ_MASK); + /* Copy the preempt_count so that the [soft]irq checks work. */ + irqctx->tinfo.preempt_count = curctx->tinfo.preempt_count; if (unlikely(overflow)) call_on_stack(print_stack_overflow, isp); @@ -196,7 +191,7 @@ bool handle_irq(unsigned irq, struct pt_regs *regs) if (unlikely(!desc)) return false; - if (!execute_on_irq_stack(overflow, desc, irq)) { + if (user_mode_vm(regs) || !execute_on_irq_stack(overflow, desc, irq)) { if (unlikely(overflow)) print_stack_overflow(); desc->handle_irq(irq, desc); diff --git a/arch/x86/kernel/kprobes-common.h b/arch/x86/kernel/kprobes-common.h new file mode 100644 index 0000000..3230b68 --- /dev/null +++ b/arch/x86/kernel/kprobes-common.h @@ -0,0 +1,102 @@ +#ifndef __X86_KERNEL_KPROBES_COMMON_H +#define __X86_KERNEL_KPROBES_COMMON_H + +/* Kprobes and Optprobes common header */ + +#ifdef CONFIG_X86_64 +#define SAVE_REGS_STRING \ + /* Skip cs, ip, orig_ax. */ \ + " subq $24, %rsp\n" \ + " pushq %rdi\n" \ + " pushq %rsi\n" \ + " pushq %rdx\n" \ + " pushq %rcx\n" \ + " pushq %rax\n" \ + " pushq %r8\n" \ + " pushq %r9\n" \ + " pushq %r10\n" \ + " pushq %r11\n" \ + " pushq %rbx\n" \ + " pushq %rbp\n" \ + " pushq %r12\n" \ + " pushq %r13\n" \ + " pushq %r14\n" \ + " pushq %r15\n" +#define RESTORE_REGS_STRING \ + " popq %r15\n" \ + " popq %r14\n" \ + " popq %r13\n" \ + " popq %r12\n" \ + " popq %rbp\n" \ + " popq %rbx\n" \ + " popq %r11\n" \ + " popq %r10\n" \ + " popq %r9\n" \ + " popq %r8\n" \ + " popq %rax\n" \ + " popq %rcx\n" \ + " popq %rdx\n" \ + " popq %rsi\n" \ + " popq %rdi\n" \ + /* Skip orig_ax, ip, cs */ \ + " addq $24, %rsp\n" +#else +#define SAVE_REGS_STRING \ + /* Skip cs, ip, orig_ax and gs. */ \ + " subl $16, %esp\n" \ + " pushl %fs\n" \ + " pushl %es\n" \ + " pushl %ds\n" \ + " pushl %eax\n" \ + " pushl %ebp\n" \ + " pushl %edi\n" \ + " pushl %esi\n" \ + " pushl %edx\n" \ + " pushl %ecx\n" \ + " pushl %ebx\n" +#define RESTORE_REGS_STRING \ + " popl %ebx\n" \ + " popl %ecx\n" \ + " popl %edx\n" \ + " popl %esi\n" \ + " popl %edi\n" \ + " popl %ebp\n" \ + " popl %eax\n" \ + /* Skip ds, es, fs, gs, orig_ax, and ip. Note: don't pop cs here*/\ + " addl $24, %esp\n" +#endif + +/* Ensure if the instruction can be boostable */ +extern int can_boost(kprobe_opcode_t *instruction); +/* Recover instruction if given address is probed */ +extern unsigned long recover_probed_instruction(kprobe_opcode_t *buf, + unsigned long addr); +/* + * Copy an instruction and adjust the displacement if the instruction + * uses the %rip-relative addressing mode. + */ +extern int __copy_instruction(u8 *dest, u8 *src); + +/* Generate a relative-jump/call instruction */ +extern void synthesize_reljump(void *from, void *to); +extern void synthesize_relcall(void *from, void *to); + +#ifdef CONFIG_OPTPROBES +extern int arch_init_optprobes(void); +extern int setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter); +extern unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr); +#else /* !CONFIG_OPTPROBES */ +static inline int arch_init_optprobes(void) +{ + return 0; +} +static inline int setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter) +{ + return 0; +} +static inline unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr) +{ + return addr; +} +#endif +#endif diff --git a/arch/x86/kernel/kprobes-opt.c b/arch/x86/kernel/kprobes-opt.c new file mode 100644 index 0000000..c5e410e --- /dev/null +++ b/arch/x86/kernel/kprobes-opt.c @@ -0,0 +1,512 @@ +/* + * Kernel Probes Jump Optimization (Optprobes) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2002, 2004 + * Copyright (C) Hitachi Ltd., 2012 + */ +#include <linux/kprobes.h> +#include <linux/ptrace.h> +#include <linux/string.h> +#include <linux/slab.h> +#include <linux/hardirq.h> +#include <linux/preempt.h> +#include <linux/module.h> +#include <linux/kdebug.h> +#include <linux/kallsyms.h> +#include <linux/ftrace.h> + +#include <asm/cacheflush.h> +#include <asm/desc.h> +#include <asm/pgtable.h> +#include <asm/uaccess.h> +#include <asm/alternative.h> +#include <asm/insn.h> +#include <asm/debugreg.h> + +#include "kprobes-common.h" + +unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr) +{ + struct optimized_kprobe *op; + struct kprobe *kp; + long offs; + int i; + + for (i = 0; i < RELATIVEJUMP_SIZE; i++) { + kp = get_kprobe((void *)addr - i); + /* This function only handles jump-optimized kprobe */ + if (kp && kprobe_optimized(kp)) { + op = container_of(kp, struct optimized_kprobe, kp); + /* If op->list is not empty, op is under optimizing */ + if (list_empty(&op->list)) + goto found; + } + } + + return addr; +found: + /* + * If the kprobe can be optimized, original bytes which can be + * overwritten by jump destination address. In this case, original + * bytes must be recovered from op->optinsn.copied_insn buffer. + */ + memcpy(buf, (void *)addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); + if (addr == (unsigned long)kp->addr) { + buf[0] = kp->opcode; + memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE); + } else { + offs = addr - (unsigned long)kp->addr - 1; + memcpy(buf, op->optinsn.copied_insn + offs, RELATIVE_ADDR_SIZE - offs); + } + + return (unsigned long)buf; +} + +/* Insert a move instruction which sets a pointer to eax/rdi (1st arg). */ +static void __kprobes synthesize_set_arg1(kprobe_opcode_t *addr, unsigned long val) +{ +#ifdef CONFIG_X86_64 + *addr++ = 0x48; + *addr++ = 0xbf; +#else + *addr++ = 0xb8; +#endif + *(unsigned long *)addr = val; +} + +static void __used __kprobes kprobes_optinsn_template_holder(void) +{ + asm volatile ( + ".global optprobe_template_entry\n" + "optprobe_template_entry:\n" +#ifdef CONFIG_X86_64 + /* We don't bother saving the ss register */ + " pushq %rsp\n" + " pushfq\n" + SAVE_REGS_STRING + " movq %rsp, %rsi\n" + ".global optprobe_template_val\n" + "optprobe_template_val:\n" + ASM_NOP5 + ASM_NOP5 + ".global optprobe_template_call\n" + "optprobe_template_call:\n" + ASM_NOP5 + /* Move flags to rsp */ + " movq 144(%rsp), %rdx\n" + " movq %rdx, 152(%rsp)\n" + RESTORE_REGS_STRING + /* Skip flags entry */ + " addq $8, %rsp\n" + " popfq\n" +#else /* CONFIG_X86_32 */ + " pushf\n" + SAVE_REGS_STRING + " movl %esp, %edx\n" + ".global optprobe_template_val\n" + "optprobe_template_val:\n" + ASM_NOP5 + ".global optprobe_template_call\n" + "optprobe_template_call:\n" + ASM_NOP5 + RESTORE_REGS_STRING + " addl $4, %esp\n" /* skip cs */ + " popf\n" +#endif + ".global optprobe_template_end\n" + "optprobe_template_end:\n"); +} + +#define TMPL_MOVE_IDX \ + ((long)&optprobe_template_val - (long)&optprobe_template_entry) +#define TMPL_CALL_IDX \ + ((long)&optprobe_template_call - (long)&optprobe_template_entry) +#define TMPL_END_IDX \ + ((long)&optprobe_template_end - (long)&optprobe_template_entry) + +#define INT3_SIZE sizeof(kprobe_opcode_t) + +/* Optimized kprobe call back function: called from optinsn */ +static void __kprobes optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs) +{ + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + unsigned long flags; + + /* This is possible if op is under delayed unoptimizing */ + if (kprobe_disabled(&op->kp)) + return; + + local_irq_save(flags); + if (kprobe_running()) { + kprobes_inc_nmissed_count(&op->kp); + } else { + /* Save skipped registers */ +#ifdef CONFIG_X86_64 + regs->cs = __KERNEL_CS; +#else + regs->cs = __KERNEL_CS | get_kernel_rpl(); + regs->gs = 0; +#endif + regs->ip = (unsigned long)op->kp.addr + INT3_SIZE; + regs->orig_ax = ~0UL; + + __this_cpu_write(current_kprobe, &op->kp); + kcb->kprobe_status = KPROBE_HIT_ACTIVE; + opt_pre_handler(&op->kp, regs); + __this_cpu_write(current_kprobe, NULL); + } + local_irq_restore(flags); +} + +static int __kprobes copy_optimized_instructions(u8 *dest, u8 *src) +{ + int len = 0, ret; + + while (len < RELATIVEJUMP_SIZE) { + ret = __copy_instruction(dest + len, src + len); + if (!ret || !can_boost(dest + len)) + return -EINVAL; + len += ret; + } + /* Check whether the address range is reserved */ + if (ftrace_text_reserved(src, src + len - 1) || + alternatives_text_reserved(src, src + len - 1) || + jump_label_text_reserved(src, src + len - 1)) + return -EBUSY; + + return len; +} + +/* Check whether insn is indirect jump */ +static int __kprobes insn_is_indirect_jump(struct insn *insn) +{ + return ((insn->opcode.bytes[0] == 0xff && + (X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */ + insn->opcode.bytes[0] == 0xea); /* Segment based jump */ +} + +/* Check whether insn jumps into specified address range */ +static int insn_jump_into_range(struct insn *insn, unsigned long start, int len) +{ + unsigned long target = 0; + + switch (insn->opcode.bytes[0]) { + case 0xe0: /* loopne */ + case 0xe1: /* loope */ + case 0xe2: /* loop */ + case 0xe3: /* jcxz */ + case 0xe9: /* near relative jump */ + case 0xeb: /* short relative jump */ + break; + case 0x0f: + if ((insn->opcode.bytes[1] & 0xf0) == 0x80) /* jcc near */ + break; + return 0; + default: + if ((insn->opcode.bytes[0] & 0xf0) == 0x70) /* jcc short */ + break; + return 0; + } + target = (unsigned long)insn->next_byte + insn->immediate.value; + + return (start <= target && target <= start + len); +} + +/* Decode whole function to ensure any instructions don't jump into target */ +static int __kprobes can_optimize(unsigned long paddr) +{ + unsigned long addr, size = 0, offset = 0; + struct insn insn; + kprobe_opcode_t buf[MAX_INSN_SIZE]; + + /* Lookup symbol including addr */ + if (!kallsyms_lookup_size_offset(paddr, &size, &offset)) + return 0; + + /* + * Do not optimize in the entry code due to the unstable + * stack handling. + */ + if ((paddr >= (unsigned long)__entry_text_start) && + (paddr < (unsigned long)__entry_text_end)) + return 0; + + /* Check there is enough space for a relative jump. */ + if (size - offset < RELATIVEJUMP_SIZE) + return 0; + + /* Decode instructions */ + addr = paddr - offset; + while (addr < paddr - offset + size) { /* Decode until function end */ + if (search_exception_tables(addr)) + /* + * Since some fixup code will jumps into this function, + * we can't optimize kprobe in this function. + */ + return 0; + kernel_insn_init(&insn, (void *)recover_probed_instruction(buf, addr)); + insn_get_length(&insn); + /* Another subsystem puts a breakpoint */ + if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) + return 0; + /* Recover address */ + insn.kaddr = (void *)addr; + insn.next_byte = (void *)(addr + insn.length); + /* Check any instructions don't jump into target */ + if (insn_is_indirect_jump(&insn) || + insn_jump_into_range(&insn, paddr + INT3_SIZE, + RELATIVE_ADDR_SIZE)) + return 0; + addr += insn.length; + } + + return 1; +} + +/* Check optimized_kprobe can actually be optimized. */ +int __kprobes arch_check_optimized_kprobe(struct optimized_kprobe *op) +{ + int i; + struct kprobe *p; + + for (i = 1; i < op->optinsn.size; i++) { + p = get_kprobe(op->kp.addr + i); + if (p && !kprobe_disabled(p)) + return -EEXIST; + } + + return 0; +} + +/* Check the addr is within the optimized instructions. */ +int __kprobes +arch_within_optimized_kprobe(struct optimized_kprobe *op, unsigned long addr) +{ + return ((unsigned long)op->kp.addr <= addr && + (unsigned long)op->kp.addr + op->optinsn.size > addr); +} + +/* Free optimized instruction slot */ +static __kprobes +void __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty) +{ + if (op->optinsn.insn) { + free_optinsn_slot(op->optinsn.insn, dirty); + op->optinsn.insn = NULL; + op->optinsn.size = 0; + } +} + +void __kprobes arch_remove_optimized_kprobe(struct optimized_kprobe *op) +{ + __arch_remove_optimized_kprobe(op, 1); +} + +/* + * Copy replacing target instructions + * Target instructions MUST be relocatable (checked inside) + * This is called when new aggr(opt)probe is allocated or reused. + */ +int __kprobes arch_prepare_optimized_kprobe(struct optimized_kprobe *op) +{ + u8 *buf; + int ret; + long rel; + + if (!can_optimize((unsigned long)op->kp.addr)) + return -EILSEQ; + + op->optinsn.insn = get_optinsn_slot(); + if (!op->optinsn.insn) + return -ENOMEM; + + /* + * Verify if the address gap is in 2GB range, because this uses + * a relative jump. + */ + rel = (long)op->optinsn.insn - (long)op->kp.addr + RELATIVEJUMP_SIZE; + if (abs(rel) > 0x7fffffff) + return -ERANGE; + + buf = (u8 *)op->optinsn.insn; + + /* Copy instructions into the out-of-line buffer */ + ret = copy_optimized_instructions(buf + TMPL_END_IDX, op->kp.addr); + if (ret < 0) { + __arch_remove_optimized_kprobe(op, 0); + return ret; + } + op->optinsn.size = ret; + + /* Copy arch-dep-instance from template */ + memcpy(buf, &optprobe_template_entry, TMPL_END_IDX); + + /* Set probe information */ + synthesize_set_arg1(buf + TMPL_MOVE_IDX, (unsigned long)op); + + /* Set probe function call */ + synthesize_relcall(buf + TMPL_CALL_IDX, optimized_callback); + + /* Set returning jmp instruction at the tail of out-of-line buffer */ + synthesize_reljump(buf + TMPL_END_IDX + op->optinsn.size, + (u8 *)op->kp.addr + op->optinsn.size); + + flush_icache_range((unsigned long) buf, + (unsigned long) buf + TMPL_END_IDX + + op->optinsn.size + RELATIVEJUMP_SIZE); + return 0; +} + +#define MAX_OPTIMIZE_PROBES 256 +static struct text_poke_param *jump_poke_params; +static struct jump_poke_buffer { + u8 buf[RELATIVEJUMP_SIZE]; +} *jump_poke_bufs; + +static void __kprobes setup_optimize_kprobe(struct text_poke_param *tprm, + u8 *insn_buf, + struct optimized_kprobe *op) +{ + s32 rel = (s32)((long)op->optinsn.insn - + ((long)op->kp.addr + RELATIVEJUMP_SIZE)); + + /* Backup instructions which will be replaced by jump address */ + memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE, + RELATIVE_ADDR_SIZE); + + insn_buf[0] = RELATIVEJUMP_OPCODE; + *(s32 *)(&insn_buf[1]) = rel; + + tprm->addr = op->kp.addr; + tprm->opcode = insn_buf; + tprm->len = RELATIVEJUMP_SIZE; +} + +/* + * Replace breakpoints (int3) with relative jumps. + * Caller must call with locking kprobe_mutex and text_mutex. + */ +void __kprobes arch_optimize_kprobes(struct list_head *oplist) +{ + struct optimized_kprobe *op, *tmp; + int c = 0; + + list_for_each_entry_safe(op, tmp, oplist, list) { + WARN_ON(kprobe_disabled(&op->kp)); + /* Setup param */ + setup_optimize_kprobe(&jump_poke_params[c], + jump_poke_bufs[c].buf, op); + list_del_init(&op->list); + if (++c >= MAX_OPTIMIZE_PROBES) + break; + } + + /* + * text_poke_smp doesn't support NMI/MCE code modifying. + * However, since kprobes itself also doesn't support NMI/MCE + * code probing, it's not a problem. + */ + text_poke_smp_batch(jump_poke_params, c); +} + +static void __kprobes setup_unoptimize_kprobe(struct text_poke_param *tprm, + u8 *insn_buf, + struct optimized_kprobe *op) +{ + /* Set int3 to first byte for kprobes */ + insn_buf[0] = BREAKPOINT_INSTRUCTION; + memcpy(insn_buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE); + + tprm->addr = op->kp.addr; + tprm->opcode = insn_buf; + tprm->len = RELATIVEJUMP_SIZE; +} + +/* + * Recover original instructions and breakpoints from relative jumps. + * Caller must call with locking kprobe_mutex. + */ +extern void arch_unoptimize_kprobes(struct list_head *oplist, + struct list_head *done_list) +{ + struct optimized_kprobe *op, *tmp; + int c = 0; + + list_for_each_entry_safe(op, tmp, oplist, list) { + /* Setup param */ + setup_unoptimize_kprobe(&jump_poke_params[c], + jump_poke_bufs[c].buf, op); + list_move(&op->list, done_list); + if (++c >= MAX_OPTIMIZE_PROBES) + break; + } + + /* + * text_poke_smp doesn't support NMI/MCE code modifying. + * However, since kprobes itself also doesn't support NMI/MCE + * code probing, it's not a problem. + */ + text_poke_smp_batch(jump_poke_params, c); +} + +/* Replace a relative jump with a breakpoint (int3). */ +void __kprobes arch_unoptimize_kprobe(struct optimized_kprobe *op) +{ + u8 buf[RELATIVEJUMP_SIZE]; + + /* Set int3 to first byte for kprobes */ + buf[0] = BREAKPOINT_INSTRUCTION; + memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE); + text_poke_smp(op->kp.addr, buf, RELATIVEJUMP_SIZE); +} + +int __kprobes +setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter) +{ + struct optimized_kprobe *op; + + if (p->flags & KPROBE_FLAG_OPTIMIZED) { + /* This kprobe is really able to run optimized path. */ + op = container_of(p, struct optimized_kprobe, kp); + /* Detour through copied instructions */ + regs->ip = (unsigned long)op->optinsn.insn + TMPL_END_IDX; + if (!reenter) + reset_current_kprobe(); + preempt_enable_no_resched(); + return 1; + } + return 0; +} + +int __kprobes arch_init_optprobes(void) +{ + /* Allocate code buffer and parameter array */ + jump_poke_bufs = kmalloc(sizeof(struct jump_poke_buffer) * + MAX_OPTIMIZE_PROBES, GFP_KERNEL); + if (!jump_poke_bufs) + return -ENOMEM; + + jump_poke_params = kmalloc(sizeof(struct text_poke_param) * + MAX_OPTIMIZE_PROBES, GFP_KERNEL); + if (!jump_poke_params) { + kfree(jump_poke_bufs); + jump_poke_bufs = NULL; + return -ENOMEM; + } + + return 0; +} diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 7da647d..e213fc8 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -30,16 +30,15 @@ * <jkenisto@us.ibm.com> and Prasanna S Panchamukhi * <prasanna@in.ibm.com> added function-return probes. * 2005-May Rusty Lynch <rusty.lynch@intel.com> - * Added function return probes functionality + * Added function return probes functionality * 2006-Feb Masami Hiramatsu <hiramatu@sdl.hitachi.co.jp> added - * kprobe-booster and kretprobe-booster for i386. + * kprobe-booster and kretprobe-booster for i386. * 2007-Dec Masami Hiramatsu <mhiramat@redhat.com> added kprobe-booster - * and kretprobe-booster for x86-64 + * and kretprobe-booster for x86-64 * 2007-Dec Masami Hiramatsu <mhiramat@redhat.com>, Arjan van de Ven - * <arjan@infradead.org> and Jim Keniston <jkenisto@us.ibm.com> - * unified x86 kprobes code. + * <arjan@infradead.org> and Jim Keniston <jkenisto@us.ibm.com> + * unified x86 kprobes code. */ - #include <linux/kprobes.h> #include <linux/ptrace.h> #include <linux/string.h> @@ -59,6 +58,8 @@ #include <asm/insn.h> #include <asm/debugreg.h> +#include "kprobes-common.h" + void jprobe_return_end(void); DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; @@ -108,6 +109,7 @@ struct kretprobe_blackpoint kretprobe_blacklist[] = { doesn't switch kernel stack.*/ {NULL, NULL} /* Terminator */ }; + const int kretprobe_blacklist_size = ARRAY_SIZE(kretprobe_blacklist); static void __kprobes __synthesize_relative_insn(void *from, void *to, u8 op) @@ -123,11 +125,17 @@ static void __kprobes __synthesize_relative_insn(void *from, void *to, u8 op) } /* Insert a jump instruction at address 'from', which jumps to address 'to'.*/ -static void __kprobes synthesize_reljump(void *from, void *to) +void __kprobes synthesize_reljump(void *from, void *to) { __synthesize_relative_insn(from, to, RELATIVEJUMP_OPCODE); } +/* Insert a call instruction at address 'from', which calls address 'to'.*/ +void __kprobes synthesize_relcall(void *from, void *to) +{ + __synthesize_relative_insn(from, to, RELATIVECALL_OPCODE); +} + /* * Skip the prefixes of the instruction. */ @@ -151,7 +159,7 @@ static kprobe_opcode_t *__kprobes skip_prefixes(kprobe_opcode_t *insn) * Returns non-zero if opcode is boostable. * RIP relative instructions are adjusted at copying time in 64 bits mode */ -static int __kprobes can_boost(kprobe_opcode_t *opcodes) +int __kprobes can_boost(kprobe_opcode_t *opcodes) { kprobe_opcode_t opcode; kprobe_opcode_t *orig_opcodes = opcodes; @@ -207,13 +215,15 @@ retry: } } -/* Recover the probed instruction at addr for further analysis. */ -static int recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr) +static unsigned long +__recover_probed_insn(kprobe_opcode_t *buf, unsigned long addr) { struct kprobe *kp; + kp = get_kprobe((void *)addr); + /* There is no probe, return original address */ if (!kp) - return -EINVAL; + return addr; /* * Basically, kp->ainsn.insn has an original instruction. @@ -230,14 +240,29 @@ static int recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr) */ memcpy(buf, kp->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); buf[0] = kp->opcode; - return 0; + return (unsigned long)buf; +} + +/* + * Recover the probed instruction at addr for further analysis. + * Caller must lock kprobes by kprobe_mutex, or disable preemption + * for preventing to release referencing kprobes. + */ +unsigned long recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr) +{ + unsigned long __addr; + + __addr = __recover_optprobed_insn(buf, addr); + if (__addr != addr) + return __addr; + + return __recover_probed_insn(buf, addr); } /* Check if paddr is at an instruction boundary */ static int __kprobes can_probe(unsigned long paddr) { - int ret; - unsigned long addr, offset = 0; + unsigned long addr, __addr, offset = 0; struct insn insn; kprobe_opcode_t buf[MAX_INSN_SIZE]; @@ -247,26 +272,24 @@ static int __kprobes can_probe(unsigned long paddr) /* Decode instructions */ addr = paddr - offset; while (addr < paddr) { - kernel_insn_init(&insn, (void *)addr); - insn_get_opcode(&insn); - /* * Check if the instruction has been modified by another * kprobe, in which case we replace the breakpoint by the * original instruction in our buffer. + * Also, jump optimization will change the breakpoint to + * relative-jump. Since the relative-jump itself is + * normally used, we just go through if there is no kprobe. */ - if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) { - ret = recover_probed_instruction(buf, addr); - if (ret) - /* - * Another debugging subsystem might insert - * this breakpoint. In that case, we can't - * recover it. - */ - return 0; - kernel_insn_init(&insn, buf); - } + __addr = recover_probed_instruction(buf, addr); + kernel_insn_init(&insn, (void *)__addr); insn_get_length(&insn); + + /* + * Another debugging subsystem might insert this breakpoint. + * In that case, we can't recover it. + */ + if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) + return 0; addr += insn.length; } @@ -299,24 +322,16 @@ static int __kprobes is_IF_modifier(kprobe_opcode_t *insn) * If not, return null. * Only applicable to 64-bit x86. */ -static int __kprobes __copy_instruction(u8 *dest, u8 *src, int recover) +int __kprobes __copy_instruction(u8 *dest, u8 *src) { struct insn insn; - int ret; kprobe_opcode_t buf[MAX_INSN_SIZE]; - kernel_insn_init(&insn, src); - if (recover) { - insn_get_opcode(&insn); - if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) { - ret = recover_probed_instruction(buf, - (unsigned long)src); - if (ret) - return 0; - kernel_insn_init(&insn, buf); - } - } + kernel_insn_init(&insn, (void *)recover_probed_instruction(buf, (unsigned long)src)); insn_get_length(&insn); + /* Another subsystem puts a breakpoint, failed to recover */ + if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) + return 0; memcpy(dest, insn.kaddr, insn.length); #ifdef CONFIG_X86_64 @@ -337,8 +352,7 @@ static int __kprobes __copy_instruction(u8 *dest, u8 *src, int recover) * extension of the original signed 32-bit displacement would * have given. */ - newdisp = (u8 *) src + (s64) insn.displacement.value - - (u8 *) dest; + newdisp = (u8 *) src + (s64) insn.displacement.value - (u8 *) dest; BUG_ON((s64) (s32) newdisp != newdisp); /* Sanity check. */ disp = (u8 *) dest + insn_offset_displacement(&insn); *(s32 *) disp = (s32) newdisp; @@ -349,18 +363,20 @@ static int __kprobes __copy_instruction(u8 *dest, u8 *src, int recover) static void __kprobes arch_copy_kprobe(struct kprobe *p) { + /* Copy an instruction with recovering if other optprobe modifies it.*/ + __copy_instruction(p->ainsn.insn, p->addr); + /* - * Copy an instruction without recovering int3, because it will be - * put by another subsystem. + * __copy_instruction can modify the displacement of the instruction, + * but it doesn't affect boostable check. */ - __copy_instruction(p->ainsn.insn, p->addr, 0); - - if (can_boost(p->addr)) + if (can_boost(p->ainsn.insn)) p->ainsn.boostable = 0; else p->ainsn.boostable = -1; - p->opcode = *p->addr; + /* Also, displacement change doesn't affect the first byte */ + p->opcode = p->ainsn.insn[0]; } int __kprobes arch_prepare_kprobe(struct kprobe *p) @@ -442,8 +458,8 @@ static void __kprobes restore_btf(void) } } -void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, - struct pt_regs *regs) +void __kprobes +arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs) { unsigned long *sara = stack_addr(regs); @@ -453,16 +469,8 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, *sara = (unsigned long) &kretprobe_trampoline; } -#ifdef CONFIG_OPTPROBES -static int __kprobes setup_detour_execution(struct kprobe *p, - struct pt_regs *regs, - int reenter); -#else -#define setup_detour_execution(p, regs, reenter) (0) -#endif - -static void __kprobes setup_singlestep(struct kprobe *p, struct pt_regs *regs, - struct kprobe_ctlblk *kcb, int reenter) +static void __kprobes +setup_singlestep(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb, int reenter) { if (setup_detour_execution(p, regs, reenter)) return; @@ -504,8 +512,8 @@ static void __kprobes setup_singlestep(struct kprobe *p, struct pt_regs *regs, * within the handler. We save the original kprobes variables and just single * step on the instruction of the new probe without calling any user handlers. */ -static int __kprobes reenter_kprobe(struct kprobe *p, struct pt_regs *regs, - struct kprobe_ctlblk *kcb) +static int __kprobes +reenter_kprobe(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb) { switch (kcb->kprobe_status) { case KPROBE_HIT_SSDONE: @@ -600,69 +608,6 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) return 0; } -#ifdef CONFIG_X86_64 -#define SAVE_REGS_STRING \ - /* Skip cs, ip, orig_ax. */ \ - " subq $24, %rsp\n" \ - " pushq %rdi\n" \ - " pushq %rsi\n" \ - " pushq %rdx\n" \ - " pushq %rcx\n" \ - " pushq %rax\n" \ - " pushq %r8\n" \ - " pushq %r9\n" \ - " pushq %r10\n" \ - " pushq %r11\n" \ - " pushq %rbx\n" \ - " pushq %rbp\n" \ - " pushq %r12\n" \ - " pushq %r13\n" \ - " pushq %r14\n" \ - " pushq %r15\n" -#define RESTORE_REGS_STRING \ - " popq %r15\n" \ - " popq %r14\n" \ - " popq %r13\n" \ - " popq %r12\n" \ - " popq %rbp\n" \ - " popq %rbx\n" \ - " popq %r11\n" \ - " popq %r10\n" \ - " popq %r9\n" \ - " popq %r8\n" \ - " popq %rax\n" \ - " popq %rcx\n" \ - " popq %rdx\n" \ - " popq %rsi\n" \ - " popq %rdi\n" \ - /* Skip orig_ax, ip, cs */ \ - " addq $24, %rsp\n" -#else -#define SAVE_REGS_STRING \ - /* Skip cs, ip, orig_ax and gs. */ \ - " subl $16, %esp\n" \ - " pushl %fs\n" \ - " pushl %es\n" \ - " pushl %ds\n" \ - " pushl %eax\n" \ - " pushl %ebp\n" \ - " pushl %edi\n" \ - " pushl %esi\n" \ - " pushl %edx\n" \ - " pushl %ecx\n" \ - " pushl %ebx\n" -#define RESTORE_REGS_STRING \ - " popl %ebx\n" \ - " popl %ecx\n" \ - " popl %edx\n" \ - " popl %esi\n" \ - " popl %edi\n" \ - " popl %ebp\n" \ - " popl %eax\n" \ - /* Skip ds, es, fs, gs, orig_ax, and ip. Note: don't pop cs here*/\ - " addl $24, %esp\n" -#endif - /* * When a retprobed function returns, this code saves registers and * calls trampoline_handler() runs, which calls the kretprobe's handler. @@ -816,8 +761,8 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) * jump instruction after the copied instruction, that jumps to the next * instruction after the probepoint. */ -static void __kprobes resume_execution(struct kprobe *p, - struct pt_regs *regs, struct kprobe_ctlblk *kcb) +static void __kprobes +resume_execution(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb) { unsigned long *tos = stack_addr(regs); unsigned long copy_ip = (unsigned long)p->ainsn.insn; @@ -996,8 +941,8 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) /* * Wrapper routine for handling exceptions. */ -int __kprobes kprobe_exceptions_notify(struct notifier_block *self, - unsigned long val, void *data) +int __kprobes +kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, void *data) { struct die_args *args = data; int ret = NOTIFY_DONE; @@ -1107,466 +1052,9 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) return 0; } - -#ifdef CONFIG_OPTPROBES - -/* Insert a call instruction at address 'from', which calls address 'to'.*/ -static void __kprobes synthesize_relcall(void *from, void *to) -{ - __synthesize_relative_insn(from, to, RELATIVECALL_OPCODE); -} - -/* Insert a move instruction which sets a pointer to eax/rdi (1st arg). */ -static void __kprobes synthesize_set_arg1(kprobe_opcode_t *addr, - unsigned long val) -{ -#ifdef CONFIG_X86_64 - *addr++ = 0x48; - *addr++ = 0xbf; -#else - *addr++ = 0xb8; -#endif - *(unsigned long *)addr = val; -} - -static void __used __kprobes kprobes_optinsn_template_holder(void) -{ - asm volatile ( - ".global optprobe_template_entry\n" - "optprobe_template_entry: \n" -#ifdef CONFIG_X86_64 - /* We don't bother saving the ss register */ - " pushq %rsp\n" - " pushfq\n" - SAVE_REGS_STRING - " movq %rsp, %rsi\n" - ".global optprobe_template_val\n" - "optprobe_template_val: \n" - ASM_NOP5 - ASM_NOP5 - ".global optprobe_template_call\n" - "optprobe_template_call: \n" - ASM_NOP5 - /* Move flags to rsp */ - " movq 144(%rsp), %rdx\n" - " movq %rdx, 152(%rsp)\n" - RESTORE_REGS_STRING - /* Skip flags entry */ - " addq $8, %rsp\n" - " popfq\n" -#else /* CONFIG_X86_32 */ - " pushf\n" - SAVE_REGS_STRING - " movl %esp, %edx\n" - ".global optprobe_template_val\n" - "optprobe_template_val: \n" - ASM_NOP5 - ".global optprobe_template_call\n" - "optprobe_template_call: \n" - ASM_NOP5 - RESTORE_REGS_STRING - " addl $4, %esp\n" /* skip cs */ - " popf\n" -#endif - ".global optprobe_template_end\n" - "optprobe_template_end: \n"); -} - -#define TMPL_MOVE_IDX \ - ((long)&optprobe_template_val - (long)&optprobe_template_entry) -#define TMPL_CALL_IDX \ - ((long)&optprobe_template_call - (long)&optprobe_template_entry) -#define TMPL_END_IDX \ - ((long)&optprobe_template_end - (long)&optprobe_template_entry) - -#define INT3_SIZE sizeof(kprobe_opcode_t) - -/* Optimized kprobe call back function: called from optinsn */ -static void __kprobes optimized_callback(struct optimized_kprobe *op, - struct pt_regs *regs) -{ - struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); - unsigned long flags; - - /* This is possible if op is under delayed unoptimizing */ - if (kprobe_disabled(&op->kp)) - return; - - local_irq_save(flags); - if (kprobe_running()) { - kprobes_inc_nmissed_count(&op->kp); - } else { - /* Save skipped registers */ -#ifdef CONFIG_X86_64 - regs->cs = __KERNEL_CS; -#else - regs->cs = __KERNEL_CS | get_kernel_rpl(); - regs->gs = 0; -#endif - regs->ip = (unsigned long)op->kp.addr + INT3_SIZE; - regs->orig_ax = ~0UL; - - __this_cpu_write(current_kprobe, &op->kp); - kcb->kprobe_status = KPROBE_HIT_ACTIVE; - opt_pre_handler(&op->kp, regs); - __this_cpu_write(current_kprobe, NULL); - } - local_irq_restore(flags); -} - -static int __kprobes copy_optimized_instructions(u8 *dest, u8 *src) -{ - int len = 0, ret; - - while (len < RELATIVEJUMP_SIZE) { - ret = __copy_instruction(dest + len, src + len, 1); - if (!ret || !can_boost(dest + len)) - return -EINVAL; - len += ret; - } - /* Check whether the address range is reserved */ - if (ftrace_text_reserved(src, src + len - 1) || - alternatives_text_reserved(src, src + len - 1) || - jump_label_text_reserved(src, src + len - 1)) - return -EBUSY; - - return len; -} - -/* Check whether insn is indirect jump */ -static int __kprobes insn_is_indirect_jump(struct insn *insn) -{ - return ((insn->opcode.bytes[0] == 0xff && - (X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */ - insn->opcode.bytes[0] == 0xea); /* Segment based jump */ -} - -/* Check whether insn jumps into specified address range */ -static int insn_jump_into_range(struct insn *insn, unsigned long start, int len) -{ - unsigned long target = 0; - - switch (insn->opcode.bytes[0]) { - case 0xe0: /* loopne */ - case 0xe1: /* loope */ - case 0xe2: /* loop */ - case 0xe3: /* jcxz */ - case 0xe9: /* near relative jump */ - case 0xeb: /* short relative jump */ - break; - case 0x0f: - if ((insn->opcode.bytes[1] & 0xf0) == 0x80) /* jcc near */ - break; - return 0; - default: - if ((insn->opcode.bytes[0] & 0xf0) == 0x70) /* jcc short */ - break; - return 0; - } - target = (unsigned long)insn->next_byte + insn->immediate.value; - - return (start <= target && target <= start + len); -} - -/* Decode whole function to ensure any instructions don't jump into target */ -static int __kprobes can_optimize(unsigned long paddr) -{ - int ret; - unsigned long addr, size = 0, offset = 0; - struct insn insn; - kprobe_opcode_t buf[MAX_INSN_SIZE]; - - /* Lookup symbol including addr */ - if (!kallsyms_lookup_size_offset(paddr, &size, &offset)) - return 0; - - /* - * Do not optimize in the entry code due to the unstable - * stack handling. - */ - if ((paddr >= (unsigned long )__entry_text_start) && - (paddr < (unsigned long )__entry_text_end)) - return 0; - - /* Check there is enough space for a relative jump. */ - if (size - offset < RELATIVEJUMP_SIZE) - return 0; - - /* Decode instructions */ - addr = paddr - offset; - while (addr < paddr - offset + size) { /* Decode until function end */ - if (search_exception_tables(addr)) - /* - * Since some fixup code will jumps into this function, - * we can't optimize kprobe in this function. - */ - return 0; - kernel_insn_init(&insn, (void *)addr); - insn_get_opcode(&insn); - if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) { - ret = recover_probed_instruction(buf, addr); - if (ret) - return 0; - kernel_insn_init(&insn, buf); - } - insn_get_length(&insn); - /* Recover address */ - insn.kaddr = (void *)addr; - insn.next_byte = (void *)(addr + insn.length); - /* Check any instructions don't jump into target */ - if (insn_is_indirect_jump(&insn) || - insn_jump_into_range(&insn, paddr + INT3_SIZE, - RELATIVE_ADDR_SIZE)) - return 0; - addr += insn.length; - } - - return 1; -} - -/* Check optimized_kprobe can actually be optimized. */ -int __kprobes arch_check_optimized_kprobe(struct optimized_kprobe *op) -{ - int i; - struct kprobe *p; - - for (i = 1; i < op->optinsn.size; i++) { - p = get_kprobe(op->kp.addr + i); - if (p && !kprobe_disabled(p)) - return -EEXIST; - } - - return 0; -} - -/* Check the addr is within the optimized instructions. */ -int __kprobes arch_within_optimized_kprobe(struct optimized_kprobe *op, - unsigned long addr) -{ - return ((unsigned long)op->kp.addr <= addr && - (unsigned long)op->kp.addr + op->optinsn.size > addr); -} - -/* Free optimized instruction slot */ -static __kprobes -void __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty) -{ - if (op->optinsn.insn) { - free_optinsn_slot(op->optinsn.insn, dirty); - op->optinsn.insn = NULL; - op->optinsn.size = 0; - } -} - -void __kprobes arch_remove_optimized_kprobe(struct optimized_kprobe *op) -{ - __arch_remove_optimized_kprobe(op, 1); -} - -/* - * Copy replacing target instructions - * Target instructions MUST be relocatable (checked inside) - */ -int __kprobes arch_prepare_optimized_kprobe(struct optimized_kprobe *op) -{ - u8 *buf; - int ret; - long rel; - - if (!can_optimize((unsigned long)op->kp.addr)) - return -EILSEQ; - - op->optinsn.insn = get_optinsn_slot(); - if (!op->optinsn.insn) - return -ENOMEM; - - /* - * Verify if the address gap is in 2GB range, because this uses - * a relative jump. - */ - rel = (long)op->optinsn.insn - (long)op->kp.addr + RELATIVEJUMP_SIZE; - if (abs(rel) > 0x7fffffff) - return -ERANGE; - - buf = (u8 *)op->optinsn.insn; - - /* Copy instructions into the out-of-line buffer */ - ret = copy_optimized_instructions(buf + TMPL_END_IDX, op->kp.addr); - if (ret < 0) { - __arch_remove_optimized_kprobe(op, 0); - return ret; - } - op->optinsn.size = ret; - - /* Copy arch-dep-instance from template */ - memcpy(buf, &optprobe_template_entry, TMPL_END_IDX); - - /* Set probe information */ - synthesize_set_arg1(buf + TMPL_MOVE_IDX, (unsigned long)op); - - /* Set probe function call */ - synthesize_relcall(buf + TMPL_CALL_IDX, optimized_callback); - - /* Set returning jmp instruction at the tail of out-of-line buffer */ - synthesize_reljump(buf + TMPL_END_IDX + op->optinsn.size, - (u8 *)op->kp.addr + op->optinsn.size); - - flush_icache_range((unsigned long) buf, - (unsigned long) buf + TMPL_END_IDX + - op->optinsn.size + RELATIVEJUMP_SIZE); - return 0; -} - -#define MAX_OPTIMIZE_PROBES 256 -static struct text_poke_param *jump_poke_params; -static struct jump_poke_buffer { - u8 buf[RELATIVEJUMP_SIZE]; -} *jump_poke_bufs; - -static void __kprobes setup_optimize_kprobe(struct text_poke_param *tprm, - u8 *insn_buf, - struct optimized_kprobe *op) -{ - s32 rel = (s32)((long)op->optinsn.insn - - ((long)op->kp.addr + RELATIVEJUMP_SIZE)); - - /* Backup instructions which will be replaced by jump address */ - memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE, - RELATIVE_ADDR_SIZE); - - insn_buf[0] = RELATIVEJUMP_OPCODE; - *(s32 *)(&insn_buf[1]) = rel; - - tprm->addr = op->kp.addr; - tprm->opcode = insn_buf; - tprm->len = RELATIVEJUMP_SIZE; -} - -/* - * Replace breakpoints (int3) with relative jumps. - * Caller must call with locking kprobe_mutex and text_mutex. - */ -void __kprobes arch_optimize_kprobes(struct list_head *oplist) -{ - struct optimized_kprobe *op, *tmp; - int c = 0; - - list_for_each_entry_safe(op, tmp, oplist, list) { - WARN_ON(kprobe_disabled(&op->kp)); - /* Setup param */ - setup_optimize_kprobe(&jump_poke_params[c], - jump_poke_bufs[c].buf, op); - list_del_init(&op->list); - if (++c >= MAX_OPTIMIZE_PROBES) - break; - } - - /* - * text_poke_smp doesn't support NMI/MCE code modifying. - * However, since kprobes itself also doesn't support NMI/MCE - * code probing, it's not a problem. - */ - text_poke_smp_batch(jump_poke_params, c); -} - -static void __kprobes setup_unoptimize_kprobe(struct text_poke_param *tprm, - u8 *insn_buf, - struct optimized_kprobe *op) -{ - /* Set int3 to first byte for kprobes */ - insn_buf[0] = BREAKPOINT_INSTRUCTION; - memcpy(insn_buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE); - - tprm->addr = op->kp.addr; - tprm->opcode = insn_buf; - tprm->len = RELATIVEJUMP_SIZE; -} - -/* - * Recover original instructions and breakpoints from relative jumps. - * Caller must call with locking kprobe_mutex. - */ -extern void arch_unoptimize_kprobes(struct list_head *oplist, - struct list_head *done_list) -{ - struct optimized_kprobe *op, *tmp; - int c = 0; - - list_for_each_entry_safe(op, tmp, oplist, list) { - /* Setup param */ - setup_unoptimize_kprobe(&jump_poke_params[c], - jump_poke_bufs[c].buf, op); - list_move(&op->list, done_list); - if (++c >= MAX_OPTIMIZE_PROBES) - break; - } - - /* - * text_poke_smp doesn't support NMI/MCE code modifying. - * However, since kprobes itself also doesn't support NMI/MCE - * code probing, it's not a problem. - */ - text_poke_smp_batch(jump_poke_params, c); -} - -/* Replace a relative jump with a breakpoint (int3). */ -void __kprobes arch_unoptimize_kprobe(struct optimized_kprobe *op) -{ - u8 buf[RELATIVEJUMP_SIZE]; - - /* Set int3 to first byte for kprobes */ - buf[0] = BREAKPOINT_INSTRUCTION; - memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE); - text_poke_smp(op->kp.addr, buf, RELATIVEJUMP_SIZE); -} - -static int __kprobes setup_detour_execution(struct kprobe *p, - struct pt_regs *regs, - int reenter) -{ - struct optimized_kprobe *op; - - if (p->flags & KPROBE_FLAG_OPTIMIZED) { - /* This kprobe is really able to run optimized path. */ - op = container_of(p, struct optimized_kprobe, kp); - /* Detour through copied instructions */ - regs->ip = (unsigned long)op->optinsn.insn + TMPL_END_IDX; - if (!reenter) - reset_current_kprobe(); - preempt_enable_no_resched(); - return 1; - } - return 0; -} - -static int __kprobes init_poke_params(void) -{ - /* Allocate code buffer and parameter array */ - jump_poke_bufs = kmalloc(sizeof(struct jump_poke_buffer) * - MAX_OPTIMIZE_PROBES, GFP_KERNEL); - if (!jump_poke_bufs) - return -ENOMEM; - - jump_poke_params = kmalloc(sizeof(struct text_poke_param) * - MAX_OPTIMIZE_PROBES, GFP_KERNEL); - if (!jump_poke_params) { - kfree(jump_poke_bufs); - jump_poke_bufs = NULL; - return -ENOMEM; - } - - return 0; -} -#else /* !CONFIG_OPTPROBES */ -static int __kprobes init_poke_params(void) -{ - return 0; -} -#endif - int __init arch_init_kprobes(void) { - return init_poke_params(); + return arch_init_optprobes(); } int __kprobes arch_trampoline_kprobe(struct kprobe *p) diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index f0c6fd6..694d801 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -438,9 +438,9 @@ void __init kvm_guest_init(void) static __init int activate_jump_labels(void) { if (has_steal_clock) { - jump_label_inc(¶virt_steal_enabled); + static_key_slow_inc(¶virt_steal_enabled); if (steal_acc) - jump_label_inc(¶virt_steal_rq_enabled); + static_key_slow_inc(¶virt_steal_rq_enabled); } return 0; diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c index fda91c3..87a0f86 100644 --- a/arch/x86/kernel/microcode_core.c +++ b/arch/x86/kernel/microcode_core.c @@ -86,6 +86,7 @@ #include <asm/microcode.h> #include <asm/processor.h> +#include <asm/cpu_device_id.h> MODULE_DESCRIPTION("Microcode Update Driver"); MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>"); @@ -504,6 +505,20 @@ static struct notifier_block __refdata mc_cpu_notifier = { .notifier_call = mc_cpu_callback, }; +#ifdef MODULE +/* Autoload on Intel and AMD systems */ +static const struct x86_cpu_id microcode_id[] = { +#ifdef CONFIG_MICROCODE_INTEL + { X86_VENDOR_INTEL, X86_FAMILY_ANY, X86_MODEL_ANY, }, +#endif +#ifdef CONFIG_MICROCODE_AMD + { X86_VENDOR_AMD, X86_FAMILY_ANY, X86_MODEL_ANY, }, +#endif + {} +}; +MODULE_DEVICE_TABLE(x86cpu, microcode_id); +#endif + static int __init microcode_init(void) { struct cpuinfo_x86 *c = &cpu_data(0); diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index d90272e..ada2f99 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -202,8 +202,8 @@ static void native_flush_tlb_single(unsigned long addr) __native_flush_tlb_single(addr); } -struct jump_label_key paravirt_steal_enabled; -struct jump_label_key paravirt_steal_rq_enabled; +struct static_key paravirt_steal_enabled; +struct static_key paravirt_steal_rq_enabled; static u64 native_steal_clock(int cpu) { diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 15763af..44eefde 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -377,8 +377,8 @@ static inline int hlt_use_halt(void) void default_idle(void) { if (hlt_use_halt()) { - trace_power_start(POWER_CSTATE, 1, smp_processor_id()); - trace_cpu_idle(1, smp_processor_id()); + trace_power_start_rcuidle(POWER_CSTATE, 1, smp_processor_id()); + trace_cpu_idle_rcuidle(1, smp_processor_id()); current_thread_info()->status &= ~TS_POLLING; /* * TS_POLLING-cleared state must be visible before we @@ -391,8 +391,8 @@ void default_idle(void) else local_irq_enable(); current_thread_info()->status |= TS_POLLING; - trace_power_end(smp_processor_id()); - trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id()); + trace_power_end_rcuidle(smp_processor_id()); + trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); } else { local_irq_enable(); /* loop is done by the caller */ @@ -450,8 +450,8 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait); static void mwait_idle(void) { if (!need_resched()) { - trace_power_start(POWER_CSTATE, 1, smp_processor_id()); - trace_cpu_idle(1, smp_processor_id()); + trace_power_start_rcuidle(POWER_CSTATE, 1, smp_processor_id()); + trace_cpu_idle_rcuidle(1, smp_processor_id()); if (this_cpu_has(X86_FEATURE_CLFLUSH_MONITOR)) clflush((void *)¤t_thread_info()->flags); @@ -461,8 +461,8 @@ static void mwait_idle(void) __sti_mwait(0, 0); else local_irq_enable(); - trace_power_end(smp_processor_id()); - trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id()); + trace_power_end_rcuidle(smp_processor_id()); + trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); } else local_irq_enable(); } @@ -474,13 +474,13 @@ static void mwait_idle(void) */ static void poll_idle(void) { - trace_power_start(POWER_CSTATE, 0, smp_processor_id()); - trace_cpu_idle(0, smp_processor_id()); + trace_power_start_rcuidle(POWER_CSTATE, 0, smp_processor_id()); + trace_cpu_idle_rcuidle(0, smp_processor_id()); local_irq_enable(); while (!need_resched()) cpu_relax(); - trace_power_end(smp_processor_id()); - trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id()); + trace_power_end_rcuidle(smp_processor_id()); + trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); } /* diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index c08d1ff..49888fe 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -119,9 +119,7 @@ void cpu_idle(void) } rcu_idle_exit(); tick_nohz_idle_exit(); - preempt_enable_no_resched(); - schedule(); - preempt_disable(); + schedule_preempt_disabled(); } } diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index cfa5c90..e34257c 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -156,9 +156,7 @@ void cpu_idle(void) } tick_nohz_idle_exit(); - preempt_enable_no_resched(); - schedule(); - preempt_disable(); + schedule_preempt_disabled(); } } diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 66d250c..58f7816 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -291,19 +291,6 @@ notrace static void __cpuinit start_secondary(void *unused) per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; x86_platform.nmi_init(); - /* - * Wait until the cpu which brought this one up marked it - * online before enabling interrupts. If we don't do that then - * we can end up waking up the softirq thread before this cpu - * reached the active state, which makes the scheduler unhappy - * and schedule the softirq thread on the wrong cpu. This is - * only observable with forced threaded interrupts, but in - * theory it could also happen w/o them. It's just way harder - * to achieve. - */ - while (!cpumask_test_cpu(smp_processor_id(), cpu_active_mask)) - cpu_relax(); - /* enable local interrupts */ local_irq_enable(); diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c index dd5fbf4..c6eba2b 100644 --- a/arch/x86/kernel/time.c +++ b/arch/x86/kernel/time.c @@ -57,9 +57,6 @@ EXPORT_SYMBOL(profile_pc); */ static irqreturn_t timer_interrupt(int irq, void *dev_id) { - /* Keep nmi watchdog up to date */ - inc_irq_stat(irq0_irqs); - global_clock_event->event_handler(global_clock_event); /* MCA bus quirk: Acknowledge irq0 by setting bit 7 in port 0x61 */ diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index a62c201..183c592 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -620,7 +620,8 @@ static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu) if (cpu_khz) { *scale = (NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR)/cpu_khz; - *offset = ns_now - (tsc_now * *scale >> CYC2NS_SCALE_FACTOR); + *offset = ns_now - mult_frac(tsc_now, *scale, + (1UL << CYC2NS_SCALE_FACTOR)); } sched_clock_idle_wakeup_event(0); diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c index 9eba29b..fc25e60 100644 --- a/arch/x86/kernel/tsc_sync.c +++ b/arch/x86/kernel/tsc_sync.c @@ -42,7 +42,7 @@ static __cpuinitdata int nr_warps; /* * TSC-warp measurement loop running on both CPUs: */ -static __cpuinit void check_tsc_warp(void) +static __cpuinit void check_tsc_warp(unsigned int timeout) { cycles_t start, now, prev, end; int i; @@ -51,9 +51,9 @@ static __cpuinit void check_tsc_warp(void) start = get_cycles(); rdtsc_barrier(); /* - * The measurement runs for 20 msecs: + * The measurement runs for 'timeout' msecs: */ - end = start + tsc_khz * 20ULL; + end = start + (cycles_t) tsc_khz * timeout; now = start; for (i = 0; ; i++) { @@ -99,6 +99,25 @@ static __cpuinit void check_tsc_warp(void) } /* + * If the target CPU coming online doesn't have any of its core-siblings + * online, a timeout of 20msec will be used for the TSC-warp measurement + * loop. Otherwise a smaller timeout of 2msec will be used, as we have some + * information about this socket already (and this information grows as we + * have more and more logical-siblings in that socket). + * + * Ideally we should be able to skip the TSC sync check on the other + * core-siblings, if the first logical CPU in a socket passed the sync test. + * But as the TSC is per-logical CPU and can potentially be modified wrongly + * by the bios, TSC sync test for smaller duration should be able + * to catch such errors. Also this will catch the condition where all the + * cores in the socket doesn't get reset at the same time. + */ +static inline unsigned int loop_timeout(int cpu) +{ + return (cpumask_weight(cpu_core_mask(cpu)) > 1) ? 2 : 20; +} + +/* * Source CPU calls into this - it waits for the freshly booted * target CPU to arrive and then starts the measurement: */ @@ -135,7 +154,7 @@ void __cpuinit check_tsc_sync_source(int cpu) */ atomic_inc(&start_count); - check_tsc_warp(); + check_tsc_warp(loop_timeout(cpu)); while (atomic_read(&stop_count) != cpus-1) cpu_relax(); @@ -183,7 +202,7 @@ void __cpuinit check_tsc_sync_target(void) while (atomic_read(&start_count) != cpus) cpu_relax(); - check_tsc_warp(); + check_tsc_warp(loop_timeout(smp_processor_id())); /* * Ok, we are done: diff --git a/arch/x86/kvm/mmu_audit.c b/arch/x86/kvm/mmu_audit.c index fe15dcc..ea7b4fd 100644 --- a/arch/x86/kvm/mmu_audit.c +++ b/arch/x86/kvm/mmu_audit.c @@ -234,7 +234,7 @@ static void audit_vcpu_spte(struct kvm_vcpu *vcpu) } static bool mmu_audit; -static struct jump_label_key mmu_audit_key; +static struct static_key mmu_audit_key; static void __kvm_mmu_audit(struct kvm_vcpu *vcpu, int point) { @@ -250,7 +250,7 @@ static void __kvm_mmu_audit(struct kvm_vcpu *vcpu, int point) static inline void kvm_mmu_audit(struct kvm_vcpu *vcpu, int point) { - if (static_branch((&mmu_audit_key))) + if (static_key_false((&mmu_audit_key))) __kvm_mmu_audit(vcpu, point); } @@ -259,7 +259,7 @@ static void mmu_audit_enable(void) if (mmu_audit) return; - jump_label_inc(&mmu_audit_key); + static_key_slow_inc(&mmu_audit_key); mmu_audit = true; } @@ -268,7 +268,7 @@ static void mmu_audit_disable(void) if (!mmu_audit) return; - jump_label_dec(&mmu_audit_key); + static_key_slow_dec(&mmu_audit_key); mmu_audit = false; } diff --git a/arch/x86/lib/inat.c b/arch/x86/lib/inat.c index 88ad5fb..c1f01a8 100644 --- a/arch/x86/lib/inat.c +++ b/arch/x86/lib/inat.c @@ -29,46 +29,46 @@ insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode) return inat_primary_table[opcode]; } -insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, insn_byte_t last_pfx, +int inat_get_last_prefix_id(insn_byte_t last_pfx) +{ + insn_attr_t lpfx_attr; + + lpfx_attr = inat_get_opcode_attribute(last_pfx); + return inat_last_prefix_id(lpfx_attr); +} + +insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, int lpfx_id, insn_attr_t esc_attr) { const insn_attr_t *table; - insn_attr_t lpfx_attr; - int n, m = 0; + int n; n = inat_escape_id(esc_attr); - if (last_pfx) { - lpfx_attr = inat_get_opcode_attribute(last_pfx); - m = inat_last_prefix_id(lpfx_attr); - } + table = inat_escape_tables[n][0]; if (!table) return 0; - if (inat_has_variant(table[opcode]) && m) { - table = inat_escape_tables[n][m]; + if (inat_has_variant(table[opcode]) && lpfx_id) { + table = inat_escape_tables[n][lpfx_id]; if (!table) return 0; } return table[opcode]; } -insn_attr_t inat_get_group_attribute(insn_byte_t modrm, insn_byte_t last_pfx, +insn_attr_t inat_get_group_attribute(insn_byte_t modrm, int lpfx_id, insn_attr_t grp_attr) { const insn_attr_t *table; - insn_attr_t lpfx_attr; - int n, m = 0; + int n; n = inat_group_id(grp_attr); - if (last_pfx) { - lpfx_attr = inat_get_opcode_attribute(last_pfx); - m = inat_last_prefix_id(lpfx_attr); - } + table = inat_group_tables[n][0]; if (!table) return inat_group_common_attribute(grp_attr); - if (inat_has_variant(table[X86_MODRM_REG(modrm)]) && m) { - table = inat_group_tables[n][m]; + if (inat_has_variant(table[X86_MODRM_REG(modrm)]) && lpfx_id) { + table = inat_group_tables[n][lpfx_id]; if (!table) return inat_group_common_attribute(grp_attr); } diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c index 5a1f9f3..25feb1a 100644 --- a/arch/x86/lib/insn.c +++ b/arch/x86/lib/insn.c @@ -185,7 +185,8 @@ err_out: void insn_get_opcode(struct insn *insn) { struct insn_field *opcode = &insn->opcode; - insn_byte_t op, pfx; + insn_byte_t op; + int pfx_id; if (opcode->got) return; if (!insn->prefixes.got) @@ -212,8 +213,8 @@ void insn_get_opcode(struct insn *insn) /* Get escaped opcode */ op = get_next(insn_byte_t, insn); opcode->bytes[opcode->nbytes++] = op; - pfx = insn_last_prefix(insn); - insn->attr = inat_get_escape_attribute(op, pfx, insn->attr); + pfx_id = insn_last_prefix_id(insn); + insn->attr = inat_get_escape_attribute(op, pfx_id, insn->attr); } if (inat_must_vex(insn->attr)) insn->attr = 0; /* This instruction is bad */ @@ -235,7 +236,7 @@ err_out: void insn_get_modrm(struct insn *insn) { struct insn_field *modrm = &insn->modrm; - insn_byte_t pfx, mod; + insn_byte_t pfx_id, mod; if (modrm->got) return; if (!insn->opcode.got) @@ -246,8 +247,8 @@ void insn_get_modrm(struct insn *insn) modrm->value = mod; modrm->nbytes = 1; if (inat_is_group(insn->attr)) { - pfx = insn_last_prefix(insn); - insn->attr = inat_get_group_attribute(mod, pfx, + pfx_id = insn_last_prefix_id(insn); + insn->attr = inat_get_group_attribute(mod, pfx_id, insn->attr); if (insn_is_avx(insn) && !inat_accept_vex(insn->attr)) insn->attr = 0; /* This is bad */ diff --git a/arch/xtensa/kernel/process.c b/arch/xtensa/kernel/process.c index 47041e7..2c90047 100644 --- a/arch/xtensa/kernel/process.c +++ b/arch/xtensa/kernel/process.c @@ -113,9 +113,7 @@ void cpu_idle(void) while (1) { while (!need_resched()) platform_idle(); - preempt_enable_no_resched(); - schedule(); - preempt_disable(); + schedule_preempt_disabled(); } } diff --git a/arch/xtensa/platforms/iss/console.c b/arch/xtensa/platforms/iss/console.c index 2c723e8..f9726f6 100644 --- a/arch/xtensa/platforms/iss/console.c +++ b/arch/xtensa/platforms/iss/console.c @@ -19,7 +19,6 @@ #include <linux/param.h> #include <linux/seq_file.h> #include <linux/serial.h> -#include <linux/serialP.h> #include <asm/uaccess.h> #include <asm/irq.h> @@ -37,6 +36,7 @@ #define SERIAL_TIMER_VALUE (20 * HZ) static struct tty_driver *serial_driver; +static struct tty_port serial_port; static struct timer_list serial_timer; static DEFINE_SPINLOCK(timer_lock); @@ -68,17 +68,10 @@ static void rs_poll(unsigned long); static int rs_open(struct tty_struct *tty, struct file * filp) { - int line = tty->index; - - if ((line < 0) || (line >= SERIAL_MAX_NUM_LINES)) - return -ENODEV; - + tty->port = &serial_port; spin_lock(&timer_lock); - if (tty->count == 1) { - init_timer(&serial_timer); - serial_timer.data = (unsigned long) tty; - serial_timer.function = rs_poll; + setup_timer(&serial_timer, rs_poll, (unsigned long)tty); mod_timer(&serial_timer, jiffies + SERIAL_TIMER_VALUE); } spin_unlock(&timer_lock); @@ -99,10 +92,10 @@ static int rs_open(struct tty_struct *tty, struct file * filp) */ static void rs_close(struct tty_struct *tty, struct file * filp) { - spin_lock(&timer_lock); + spin_lock_bh(&timer_lock); if (tty->count == 1) del_timer_sync(&serial_timer); - spin_unlock(&timer_lock); + spin_unlock_bh(&timer_lock); } @@ -210,13 +203,14 @@ static const struct tty_operations serial_ops = { int __init rs_init(void) { - serial_driver = alloc_tty_driver(1); + tty_port_init(&serial_port); + + serial_driver = alloc_tty_driver(SERIAL_MAX_NUM_LINES); printk ("%s %s\n", serial_name, serial_version); /* Initialize the tty_driver structure */ - serial_driver->owner = THIS_MODULE; serial_driver->driver_name = "iss_serial"; serial_driver->name = "ttyS"; serial_driver->major = TTY_MAJOR; |