diff options
Diffstat (limited to 'arch')
150 files changed, 2472 insertions, 6131 deletions
diff --git a/arch/arm/boot/dts/am33xx.dtsi b/arch/arm/boot/dts/am33xx.dtsi index 9f53e82..4a4e02d 100644 --- a/arch/arm/boot/dts/am33xx.dtsi +++ b/arch/arm/boot/dts/am33xx.dtsi @@ -662,6 +662,8 @@ mac: ethernet@4a100000 { compatible = "ti,cpsw"; ti,hwmods = "cpgmac0"; + clocks = <&cpsw_125mhz_gclk>, <&cpsw_cpts_rft_clk>; + clock-names = "fck", "cpts"; cpdma_channels = <8>; ale_entries = <1024>; bd_ram_size = <0x2000>; diff --git a/arch/arm/boot/dts/am4372.dtsi b/arch/arm/boot/dts/am4372.dtsi index db464d7..49fa596 100644 --- a/arch/arm/boot/dts/am4372.dtsi +++ b/arch/arm/boot/dts/am4372.dtsi @@ -490,6 +490,8 @@ #address-cells = <1>; #size-cells = <1>; ti,hwmods = "cpgmac0"; + clocks = <&cpsw_125mhz_gclk>, <&cpsw_cpts_rft_clk>; + clock-names = "fck", "cpts"; status = "disabled"; cpdma_channels = <8>; ale_entries = <1024>; diff --git a/arch/arm/boot/dts/armada-xp-matrix.dts b/arch/arm/boot/dts/armada-xp-matrix.dts index 25674fe..7e291e2 100644 --- a/arch/arm/boot/dts/armada-xp-matrix.dts +++ b/arch/arm/boot/dts/armada-xp-matrix.dts @@ -57,6 +57,10 @@ ethernet@30000 { status = "okay"; phy-mode = "sgmii"; + fixed-link { + speed = <1000>; + full-duplex; + }; }; pcie-controller { diff --git a/arch/arm/boot/dts/dra7xx-clocks.dtsi b/arch/arm/boot/dts/dra7xx-clocks.dtsi index c767687..b03cfe4 100644 --- a/arch/arm/boot/dts/dra7xx-clocks.dtsi +++ b/arch/arm/boot/dts/dra7xx-clocks.dtsi @@ -26,7 +26,7 @@ clock-frequency = <0>; }; - atlclkin3_ck: atlclkin3_ck { + atl_clkin3_ck: atl_clkin3_ck { #clock-cells = <0>; compatible = "fixed-clock"; clock-frequency = <0>; @@ -277,7 +277,7 @@ dpll_mpu_ck: dpll_mpu_ck { #clock-cells = <0>; - compatible = "ti,omap4-dpll-clock"; + compatible = "ti,omap5-mpu-dpll-clock"; clocks = <&sys_clkin1>, <&mpu_dpll_hs_clk_div>; reg = <0x0160>, <0x0164>, <0x016c>, <0x0168>; }; @@ -730,7 +730,7 @@ mcasp1_ahclkr_mux: mcasp1_ahclkr_mux { #clock-cells = <0>; compatible = "ti,mux-clock"; - clocks = <&abe_24m_fclk>, <&abe_sys_clk_div>, <&func_24m_clk>, <&atlclkin3_ck>, <&atl_clkin2_ck>, <&atl_clkin1_ck>, <&atl_clkin0_ck>, <&sys_clkin2>, <&ref_clkin0_ck>, <&ref_clkin1_ck>, <&ref_clkin2_ck>, <&ref_clkin3_ck>, <&mlb_clk>, <&mlbp_clk>; + clocks = <&abe_24m_fclk>, <&abe_sys_clk_div>, <&func_24m_clk>, <&atl_clkin3_ck>, <&atl_clkin2_ck>, <&atl_clkin1_ck>, <&atl_clkin0_ck>, <&sys_clkin2>, <&ref_clkin0_ck>, <&ref_clkin1_ck>, <&ref_clkin2_ck>, <&ref_clkin3_ck>, <&mlb_clk>, <&mlbp_clk>; ti,bit-shift = <28>; reg = <0x0550>; }; @@ -738,7 +738,7 @@ mcasp1_ahclkx_mux: mcasp1_ahclkx_mux { #clock-cells = <0>; compatible = "ti,mux-clock"; - clocks = <&abe_24m_fclk>, <&abe_sys_clk_div>, <&func_24m_clk>, <&atlclkin3_ck>, <&atl_clkin2_ck>, <&atl_clkin1_ck>, <&atl_clkin0_ck>, <&sys_clkin2>, <&ref_clkin0_ck>, <&ref_clkin1_ck>, <&ref_clkin2_ck>, <&ref_clkin3_ck>, <&mlb_clk>, <&mlbp_clk>; + clocks = <&abe_24m_fclk>, <&abe_sys_clk_div>, <&func_24m_clk>, <&atl_clkin3_ck>, <&atl_clkin2_ck>, <&atl_clkin1_ck>, <&atl_clkin0_ck>, <&sys_clkin2>, <&ref_clkin0_ck>, <&ref_clkin1_ck>, <&ref_clkin2_ck>, <&ref_clkin3_ck>, <&mlb_clk>, <&mlbp_clk>; ti,bit-shift = <24>; reg = <0x0550>; }; @@ -1639,7 +1639,7 @@ mcasp2_ahclkr_mux: mcasp2_ahclkr_mux { #clock-cells = <0>; compatible = "ti,mux-clock"; - clocks = <&abe_24m_fclk>, <&abe_sys_clk_div>, <&func_24m_clk>, <&atlclkin3_ck>, <&atl_clkin2_ck>, <&atl_clkin1_ck>, <&atl_clkin0_ck>, <&sys_clkin2>, <&ref_clkin0_ck>, <&ref_clkin1_ck>, <&ref_clkin2_ck>, <&ref_clkin3_ck>, <&mlb_clk>, <&mlbp_clk>; + clocks = <&abe_24m_fclk>, <&abe_sys_clk_div>, <&func_24m_clk>, <&atl_clkin3_ck>, <&atl_clkin2_ck>, <&atl_clkin1_ck>, <&atl_clkin0_ck>, <&sys_clkin2>, <&ref_clkin0_ck>, <&ref_clkin1_ck>, <&ref_clkin2_ck>, <&ref_clkin3_ck>, <&mlb_clk>, <&mlbp_clk>; ti,bit-shift = <28>; reg = <0x1860>; }; @@ -1647,7 +1647,7 @@ mcasp2_ahclkx_mux: mcasp2_ahclkx_mux { #clock-cells = <0>; compatible = "ti,mux-clock"; - clocks = <&abe_24m_fclk>, <&abe_sys_clk_div>, <&func_24m_clk>, <&atlclkin3_ck>, <&atl_clkin2_ck>, <&atl_clkin1_ck>, <&atl_clkin0_ck>, <&sys_clkin2>, <&ref_clkin0_ck>, <&ref_clkin1_ck>, <&ref_clkin2_ck>, <&ref_clkin3_ck>, <&mlb_clk>, <&mlbp_clk>; + clocks = <&abe_24m_fclk>, <&abe_sys_clk_div>, <&func_24m_clk>, <&atl_clkin3_ck>, <&atl_clkin2_ck>, <&atl_clkin1_ck>, <&atl_clkin0_ck>, <&sys_clkin2>, <&ref_clkin0_ck>, <&ref_clkin1_ck>, <&ref_clkin2_ck>, <&ref_clkin3_ck>, <&mlb_clk>, <&mlbp_clk>; ti,bit-shift = <24>; reg = <0x1860>; }; @@ -1663,7 +1663,7 @@ mcasp3_ahclkx_mux: mcasp3_ahclkx_mux { #clock-cells = <0>; compatible = "ti,mux-clock"; - clocks = <&abe_24m_fclk>, <&abe_sys_clk_div>, <&func_24m_clk>, <&atlclkin3_ck>, <&atl_clkin2_ck>, <&atl_clkin1_ck>, <&atl_clkin0_ck>, <&sys_clkin2>, <&ref_clkin0_ck>, <&ref_clkin1_ck>, <&ref_clkin2_ck>, <&ref_clkin3_ck>, <&mlb_clk>, <&mlbp_clk>; + clocks = <&abe_24m_fclk>, <&abe_sys_clk_div>, <&func_24m_clk>, <&atl_clkin3_ck>, <&atl_clkin2_ck>, <&atl_clkin1_ck>, <&atl_clkin0_ck>, <&sys_clkin2>, <&ref_clkin0_ck>, <&ref_clkin1_ck>, <&ref_clkin2_ck>, <&ref_clkin3_ck>, <&mlb_clk>, <&mlbp_clk>; ti,bit-shift = <24>; reg = <0x1868>; }; @@ -1679,7 +1679,7 @@ mcasp4_ahclkx_mux: mcasp4_ahclkx_mux { #clock-cells = <0>; compatible = "ti,mux-clock"; - clocks = <&abe_24m_fclk>, <&abe_sys_clk_div>, <&func_24m_clk>, <&atlclkin3_ck>, <&atl_clkin2_ck>, <&atl_clkin1_ck>, <&atl_clkin0_ck>, <&sys_clkin2>, <&ref_clkin0_ck>, <&ref_clkin1_ck>, <&ref_clkin2_ck>, <&ref_clkin3_ck>, <&mlb_clk>, <&mlbp_clk>; + clocks = <&abe_24m_fclk>, <&abe_sys_clk_div>, <&func_24m_clk>, <&atl_clkin3_ck>, <&atl_clkin2_ck>, <&atl_clkin1_ck>, <&atl_clkin0_ck>, <&sys_clkin2>, <&ref_clkin0_ck>, <&ref_clkin1_ck>, <&ref_clkin2_ck>, <&ref_clkin3_ck>, <&mlb_clk>, <&mlbp_clk>; ti,bit-shift = <24>; reg = <0x1898>; }; @@ -1695,7 +1695,7 @@ mcasp5_ahclkx_mux: mcasp5_ahclkx_mux { #clock-cells = <0>; compatible = "ti,mux-clock"; - clocks = <&abe_24m_fclk>, <&abe_sys_clk_div>, <&func_24m_clk>, <&atlclkin3_ck>, <&atl_clkin2_ck>, <&atl_clkin1_ck>, <&atl_clkin0_ck>, <&sys_clkin2>, <&ref_clkin0_ck>, <&ref_clkin1_ck>, <&ref_clkin2_ck>, <&ref_clkin3_ck>, <&mlb_clk>, <&mlbp_clk>; + clocks = <&abe_24m_fclk>, <&abe_sys_clk_div>, <&func_24m_clk>, <&atl_clkin3_ck>, <&atl_clkin2_ck>, <&atl_clkin1_ck>, <&atl_clkin0_ck>, <&sys_clkin2>, <&ref_clkin0_ck>, <&ref_clkin1_ck>, <&ref_clkin2_ck>, <&ref_clkin3_ck>, <&mlb_clk>, <&mlbp_clk>; ti,bit-shift = <24>; reg = <0x1878>; }; @@ -1711,7 +1711,7 @@ mcasp6_ahclkx_mux: mcasp6_ahclkx_mux { #clock-cells = <0>; compatible = "ti,mux-clock"; - clocks = <&abe_24m_fclk>, <&abe_sys_clk_div>, <&func_24m_clk>, <&atlclkin3_ck>, <&atl_clkin2_ck>, <&atl_clkin1_ck>, <&atl_clkin0_ck>, <&sys_clkin2>, <&ref_clkin0_ck>, <&ref_clkin1_ck>, <&ref_clkin2_ck>, <&ref_clkin3_ck>, <&mlb_clk>, <&mlbp_clk>; + clocks = <&abe_24m_fclk>, <&abe_sys_clk_div>, <&func_24m_clk>, <&atl_clkin3_ck>, <&atl_clkin2_ck>, <&atl_clkin1_ck>, <&atl_clkin0_ck>, <&sys_clkin2>, <&ref_clkin0_ck>, <&ref_clkin1_ck>, <&ref_clkin2_ck>, <&ref_clkin3_ck>, <&mlb_clk>, <&mlbp_clk>; ti,bit-shift = <24>; reg = <0x1904>; }; @@ -1727,7 +1727,7 @@ mcasp7_ahclkx_mux: mcasp7_ahclkx_mux { #clock-cells = <0>; compatible = "ti,mux-clock"; - clocks = <&abe_24m_fclk>, <&abe_sys_clk_div>, <&func_24m_clk>, <&atlclkin3_ck>, <&atl_clkin2_ck>, <&atl_clkin1_ck>, <&atl_clkin0_ck>, <&sys_clkin2>, <&ref_clkin0_ck>, <&ref_clkin1_ck>, <&ref_clkin2_ck>, <&ref_clkin3_ck>, <&mlb_clk>, <&mlbp_clk>; + clocks = <&abe_24m_fclk>, <&abe_sys_clk_div>, <&func_24m_clk>, <&atl_clkin3_ck>, <&atl_clkin2_ck>, <&atl_clkin1_ck>, <&atl_clkin0_ck>, <&sys_clkin2>, <&ref_clkin0_ck>, <&ref_clkin1_ck>, <&ref_clkin2_ck>, <&ref_clkin3_ck>, <&mlb_clk>, <&mlbp_clk>; ti,bit-shift = <24>; reg = <0x1908>; }; @@ -1743,7 +1743,7 @@ mcasp8_ahclk_mux: mcasp8_ahclk_mux { #clock-cells = <0>; compatible = "ti,mux-clock"; - clocks = <&abe_24m_fclk>, <&abe_sys_clk_div>, <&func_24m_clk>, <&atlclkin3_ck>, <&atl_clkin2_ck>, <&atl_clkin1_ck>, <&atl_clkin0_ck>, <&sys_clkin2>, <&ref_clkin0_ck>, <&ref_clkin1_ck>, <&ref_clkin2_ck>, <&ref_clkin3_ck>, <&mlb_clk>, <&mlbp_clk>; + clocks = <&abe_24m_fclk>, <&abe_sys_clk_div>, <&func_24m_clk>, <&atl_clkin3_ck>, <&atl_clkin2_ck>, <&atl_clkin1_ck>, <&atl_clkin0_ck>, <&sys_clkin2>, <&ref_clkin0_ck>, <&ref_clkin1_ck>, <&ref_clkin2_ck>, <&ref_clkin3_ck>, <&mlb_clk>, <&mlbp_clk>; ti,bit-shift = <22>; reg = <0x1890>; }; diff --git a/arch/arm/boot/dts/omap54xx-clocks.dtsi b/arch/arm/boot/dts/omap54xx-clocks.dtsi index aeb142c..e67a23b 100644 --- a/arch/arm/boot/dts/omap54xx-clocks.dtsi +++ b/arch/arm/boot/dts/omap54xx-clocks.dtsi @@ -335,7 +335,7 @@ dpll_mpu_ck: dpll_mpu_ck { #clock-cells = <0>; - compatible = "ti,omap4-dpll-clock"; + compatible = "ti,omap5-mpu-dpll-clock"; clocks = <&sys_clkin>, <&mpu_dpll_hs_clk_div>; reg = <0x0160>, <0x0164>, <0x016c>, <0x0168>; }; diff --git a/arch/arm/boot/dts/vt8500.dtsi b/arch/arm/boot/dts/vt8500.dtsi index 51d0e91..1929ad3 100644 --- a/arch/arm/boot/dts/vt8500.dtsi +++ b/arch/arm/boot/dts/vt8500.dtsi @@ -165,5 +165,11 @@ reg = <0xd8100000 0x10000>; interrupts = <48>; }; + + ethernet@d8004000 { + compatible = "via,vt8500-rhine"; + reg = <0xd8004000 0x100>; + interrupts = <10>; + }; }; }; diff --git a/arch/arm/boot/dts/wm8650.dtsi b/arch/arm/boot/dts/wm8650.dtsi index 7525982..b1c59a7 100644 --- a/arch/arm/boot/dts/wm8650.dtsi +++ b/arch/arm/boot/dts/wm8650.dtsi @@ -218,5 +218,11 @@ reg = <0xd8100000 0x10000>; interrupts = <48>; }; + + ethernet@d8004000 { + compatible = "via,vt8500-rhine"; + reg = <0xd8004000 0x100>; + interrupts = <10>; + }; }; }; diff --git a/arch/arm/boot/dts/wm8850.dtsi b/arch/arm/boot/dts/wm8850.dtsi index d98386d..8fbccfbe 100644 --- a/arch/arm/boot/dts/wm8850.dtsi +++ b/arch/arm/boot/dts/wm8850.dtsi @@ -298,5 +298,11 @@ bus-width = <4>; sdon-inverted; }; + + ethernet@d8004000 { + compatible = "via,vt8500-rhine"; + reg = <0xd8004000 0x100>; + interrupts = <10>; + }; }; }; diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index a6bc431..4238bcb 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -410,7 +410,7 @@ __hw_perf_event_init(struct perf_event *event) */ hwc->config_base |= (unsigned long)mapping; - if (!hwc->sample_period) { + if (!is_sampling_event(event)) { /* * For non-sampling runs, limit the sample_period to half * of the counter width. That way, the new counter value diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c index a71ae15..af9e35e 100644 --- a/arch/arm/kernel/perf_event_cpu.c +++ b/arch/arm/kernel/perf_event_cpu.c @@ -126,8 +126,8 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler) irqs = min(pmu_device->num_resources, num_possible_cpus()); if (irqs < 1) { - pr_err("no irqs for PMUs defined\n"); - return -ENODEV; + printk_once("perf/ARM: No irqs for PMU defined, sampling events not supported\n"); + return 0; } irq = platform_get_irq(pmu_device, 0); @@ -191,6 +191,10 @@ static void cpu_pmu_init(struct arm_pmu *cpu_pmu) /* Ensure the PMU has sane values out of reset. */ if (cpu_pmu->reset) on_each_cpu(cpu_pmu->reset, cpu_pmu, 1); + + /* If no interrupts available, set the corresponding capability flag */ + if (!platform_get_irq(cpu_pmu->plat_device, 0)) + cpu_pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; } /* diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c index 3997c41..9d85318 100644 --- a/arch/arm/kernel/topology.c +++ b/arch/arm/kernel/topology.c @@ -26,30 +26,30 @@ #include <asm/topology.h> /* - * cpu power scale management + * cpu capacity scale management */ /* - * cpu power table + * cpu capacity table * This per cpu data structure describes the relative capacity of each core. * On a heteregenous system, cores don't have the same computation capacity - * and we reflect that difference in the cpu_power field so the scheduler can - * take this difference into account during load balance. A per cpu structure - * is preferred because each CPU updates its own cpu_power field during the - * load balance except for idle cores. One idle core is selected to run the - * rebalance_domains for all idle cores and the cpu_power can be updated - * during this sequence. + * and we reflect that difference in the cpu_capacity field so the scheduler + * can take this difference into account during load balance. A per cpu + * structure is preferred because each CPU updates its own cpu_capacity field + * during the load balance except for idle cores. One idle core is selected + * to run the rebalance_domains for all idle cores and the cpu_capacity can be + * updated during this sequence. */ static DEFINE_PER_CPU(unsigned long, cpu_scale); -unsigned long arch_scale_freq_power(struct sched_domain *sd, int cpu) +unsigned long arch_scale_freq_capacity(struct sched_domain *sd, int cpu) { return per_cpu(cpu_scale, cpu); } -static void set_power_scale(unsigned int cpu, unsigned long power) +static void set_capacity_scale(unsigned int cpu, unsigned long capacity) { - per_cpu(cpu_scale, cpu) = power; + per_cpu(cpu_scale, cpu) = capacity; } #ifdef CONFIG_OF @@ -62,11 +62,11 @@ struct cpu_efficiency { * Table of relative efficiency of each processors * The efficiency value must fit in 20bit and the final * cpu_scale value must be in the range - * 0 < cpu_scale < 3*SCHED_POWER_SCALE/2 + * 0 < cpu_scale < 3*SCHED_CAPACITY_SCALE/2 * in order to return at most 1 when DIV_ROUND_CLOSEST * is used to compute the capacity of a CPU. * Processors that are not defined in the table, - * use the default SCHED_POWER_SCALE value for cpu_scale. + * use the default SCHED_CAPACITY_SCALE value for cpu_scale. */ static const struct cpu_efficiency table_efficiency[] = { {"arm,cortex-a15", 3891}, @@ -83,9 +83,9 @@ static unsigned long middle_capacity = 1; * Iterate all CPUs' descriptor in DT and compute the efficiency * (as per table_efficiency). Also calculate a middle efficiency * as close as possible to (max{eff_i} - min{eff_i}) / 2 - * This is later used to scale the cpu_power field such that an - * 'average' CPU is of middle power. Also see the comments near - * table_efficiency[] and update_cpu_power(). + * This is later used to scale the cpu_capacity field such that an + * 'average' CPU is of middle capacity. Also see the comments near + * table_efficiency[] and update_cpu_capacity(). */ static void __init parse_dt_topology(void) { @@ -141,15 +141,15 @@ static void __init parse_dt_topology(void) * cpu_scale because all CPUs have the same capacity. Otherwise, we * compute a middle_capacity factor that will ensure that the capacity * of an 'average' CPU of the system will be as close as possible to - * SCHED_POWER_SCALE, which is the default value, but with the + * SCHED_CAPACITY_SCALE, which is the default value, but with the * constraint explained near table_efficiency[]. */ if (4*max_capacity < (3*(max_capacity + min_capacity))) middle_capacity = (min_capacity + max_capacity) - >> (SCHED_POWER_SHIFT+1); + >> (SCHED_CAPACITY_SHIFT+1); else middle_capacity = ((max_capacity / 3) - >> (SCHED_POWER_SHIFT-1)) + 1; + >> (SCHED_CAPACITY_SHIFT-1)) + 1; } @@ -158,20 +158,20 @@ static void __init parse_dt_topology(void) * boot. The update of all CPUs is in O(n^2) for heteregeneous system but the * function returns directly for SMP system. */ -static void update_cpu_power(unsigned int cpu) +static void update_cpu_capacity(unsigned int cpu) { if (!cpu_capacity(cpu)) return; - set_power_scale(cpu, cpu_capacity(cpu) / middle_capacity); + set_capacity_scale(cpu, cpu_capacity(cpu) / middle_capacity); - printk(KERN_INFO "CPU%u: update cpu_power %lu\n", - cpu, arch_scale_freq_power(NULL, cpu)); + printk(KERN_INFO "CPU%u: update cpu_capacity %lu\n", + cpu, arch_scale_freq_capacity(NULL, cpu)); } #else static inline void parse_dt_topology(void) {} -static inline void update_cpu_power(unsigned int cpuid) {} +static inline void update_cpu_capacity(unsigned int cpuid) {} #endif /* @@ -267,7 +267,7 @@ void store_cpu_topology(unsigned int cpuid) update_siblings_masks(cpuid); - update_cpu_power(cpuid); + update_cpu_capacity(cpuid); printk(KERN_INFO "CPU%u: thread %d, cpu %d, socket %d, mpidr %x\n", cpuid, cpu_topology[cpuid].thread_id, @@ -297,7 +297,7 @@ void __init init_cpu_topology(void) { unsigned int cpu; - /* init core mask and power*/ + /* init core mask and capacity */ for_each_possible_cpu(cpu) { struct cputopo_arm *cpu_topo = &(cpu_topology[cpu]); @@ -307,7 +307,7 @@ void __init init_cpu_topology(void) cpumask_clear(&cpu_topo->core_sibling); cpumask_clear(&cpu_topo->thread_sibling); - set_power_scale(cpu, SCHED_POWER_SCALE); + set_capacity_scale(cpu, SCHED_CAPACITY_SCALE); } smp_wmb(); diff --git a/arch/arm/mach-omap2/clkt2xxx_virt_prcm_set.c b/arch/arm/mach-omap2/clkt2xxx_virt_prcm_set.c index b935ed2..85e0b0c0 100644 --- a/arch/arm/mach-omap2/clkt2xxx_virt_prcm_set.c +++ b/arch/arm/mach-omap2/clkt2xxx_virt_prcm_set.c @@ -208,3 +208,56 @@ void omap2xxx_clkt_vps_late_init(void) clk_put(c); } } + +#ifdef CONFIG_OF +#include <linux/clk-provider.h> +#include <linux/clkdev.h> + +static const struct clk_ops virt_prcm_set_ops = { + .recalc_rate = &omap2_table_mpu_recalc, + .set_rate = &omap2_select_table_rate, + .round_rate = &omap2_round_to_table_rate, +}; + +/** + * omap2xxx_clkt_vps_init - initialize virt_prcm_set clock + * + * Does a manual init for the virtual prcm DVFS clock for OMAP2. This + * function is called only from omap2 DT clock init, as the virtual + * node is not modelled in the DT clock data. + */ +void omap2xxx_clkt_vps_init(void) +{ + struct clk_init_data init = { NULL }; + struct clk_hw_omap *hw = NULL; + struct clk *clk; + const char *parent_name = "mpu_ck"; + struct clk_lookup *lookup = NULL; + + omap2xxx_clkt_vps_late_init(); + omap2xxx_clkt_vps_check_bootloader_rates(); + + hw = kzalloc(sizeof(*hw), GFP_KERNEL); + lookup = kzalloc(sizeof(*lookup), GFP_KERNEL); + if (!hw || !lookup) + goto cleanup; + init.name = "virt_prcm_set"; + init.ops = &virt_prcm_set_ops; + init.parent_names = &parent_name; + init.num_parents = 1; + + hw->hw.init = &init; + + clk = clk_register(NULL, &hw->hw); + + lookup->dev_id = NULL; + lookup->con_id = "cpufreq_ck"; + lookup->clk = clk; + + clkdev_add(lookup); + return; +cleanup: + kfree(hw); + kfree(lookup); +} +#endif diff --git a/arch/arm/mach-omap2/clock.h b/arch/arm/mach-omap2/clock.h index bda767a..12f54d4 100644 --- a/arch/arm/mach-omap2/clock.h +++ b/arch/arm/mach-omap2/clock.h @@ -178,17 +178,6 @@ struct clksel { const struct clksel_rate *rates; }; -struct clk_hw_omap_ops { - void (*find_idlest)(struct clk_hw_omap *oclk, - void __iomem **idlest_reg, - u8 *idlest_bit, u8 *idlest_val); - void (*find_companion)(struct clk_hw_omap *oclk, - void __iomem **other_reg, - u8 *other_bit); - void (*allow_idle)(struct clk_hw_omap *oclk); - void (*deny_idle)(struct clk_hw_omap *oclk); -}; - unsigned long omap_fixed_divisor_recalc(struct clk_hw *hw, unsigned long parent_rate); @@ -279,8 +268,6 @@ extern const struct clk_hw_omap_ops clkhwops_omap3430es2_hsotgusb_wait; extern const struct clk_hw_omap_ops clkhwops_am35xx_ipss_module_wait; extern const struct clk_hw_omap_ops clkhwops_apll54; extern const struct clk_hw_omap_ops clkhwops_apll96; -extern const struct clk_hw_omap_ops clkhwops_omap2xxx_dpll; -extern const struct clk_hw_omap_ops clkhwops_omap2430_i2chs_wait; /* clksel_rate blocks shared between OMAP44xx and AM33xx */ extern const struct clksel_rate div_1_0_rates[]; diff --git a/arch/arm/mach-omap2/clock2xxx.h b/arch/arm/mach-omap2/clock2xxx.h index 539dc08..45f41a4 100644 --- a/arch/arm/mach-omap2/clock2xxx.h +++ b/arch/arm/mach-omap2/clock2xxx.h @@ -21,10 +21,6 @@ unsigned long omap2xxx_sys_clk_recalc(struct clk_hw *clk, unsigned long parent_rate); unsigned long omap2_osc_clk_recalc(struct clk_hw *clk, unsigned long parent_rate); -unsigned long omap2_dpllcore_recalc(struct clk_hw *hw, - unsigned long parent_rate); -int omap2_reprogram_dpllcore(struct clk_hw *clk, unsigned long rate, - unsigned long parent_rate); void omap2xxx_clkt_dpllcore_init(struct clk_hw *hw); unsigned long omap2_clk_apll54_recalc(struct clk_hw *hw, unsigned long parent_rate); diff --git a/arch/arm/mach-omap2/dpll3xxx.c b/arch/arm/mach-omap2/dpll3xxx.c index fcd8036..6d7ba37 100644 --- a/arch/arm/mach-omap2/dpll3xxx.c +++ b/arch/arm/mach-omap2/dpll3xxx.c @@ -319,6 +319,15 @@ static int omap3_noncore_dpll_program(struct clk_hw_omap *clk, u16 freqsel) /* Set DPLL multiplier, divider */ v = omap2_clk_readl(clk, dd->mult_div1_reg); + + /* Handle Duty Cycle Correction */ + if (dd->dcc_mask) { + if (dd->last_rounded_rate >= dd->dcc_rate) + v |= dd->dcc_mask; /* Enable DCC */ + else + v &= ~dd->dcc_mask; /* Disable DCC */ + } + v &= ~(dd->mult_mask | dd->div1_mask); v |= dd->last_rounded_m << __ffs(dd->mult_mask); v |= (dd->last_rounded_n - 1) << __ffs(dd->div1_mask); diff --git a/arch/arm/mach-tegra/board-paz00.c b/arch/arm/mach-tegra/board-paz00.c index e4dec9f..9c6029b 100644 --- a/arch/arm/mach-tegra/board-paz00.c +++ b/arch/arm/mach-tegra/board-paz00.c @@ -23,9 +23,7 @@ #include "board.h" static struct rfkill_gpio_platform_data wifi_rfkill_platform_data = { - .name = "wifi_rfkill", - .reset_gpio = 25, /* PD1 */ - .shutdown_gpio = 85, /* PK5 */ + .name = "wifi_rfkill", .type = RFKILL_TYPE_WLAN, }; diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index 6f879c3..fb5503c 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -136,7 +136,7 @@ static u16 saved_regs(struct jit_ctx *ctx) u16 ret = 0; if ((ctx->skf->len > 1) || - (ctx->skf->insns[0].code == BPF_S_RET_A)) + (ctx->skf->insns[0].code == (BPF_RET | BPF_A))) ret |= 1 << r_A; #ifdef CONFIG_FRAME_POINTER @@ -164,18 +164,10 @@ static inline int mem_words_used(struct jit_ctx *ctx) static inline bool is_load_to_a(u16 inst) { switch (inst) { - case BPF_S_LD_W_LEN: - case BPF_S_LD_W_ABS: - case BPF_S_LD_H_ABS: - case BPF_S_LD_B_ABS: - case BPF_S_ANC_CPU: - case BPF_S_ANC_IFINDEX: - case BPF_S_ANC_MARK: - case BPF_S_ANC_PROTOCOL: - case BPF_S_ANC_RXHASH: - case BPF_S_ANC_VLAN_TAG: - case BPF_S_ANC_VLAN_TAG_PRESENT: - case BPF_S_ANC_QUEUE: + case BPF_LD | BPF_W | BPF_LEN: + case BPF_LD | BPF_W | BPF_ABS: + case BPF_LD | BPF_H | BPF_ABS: + case BPF_LD | BPF_B | BPF_ABS: return true; default: return false; @@ -215,7 +207,7 @@ static void build_prologue(struct jit_ctx *ctx) emit(ARM_MOV_I(r_X, 0), ctx); /* do not leak kernel data to userspace */ - if ((first_inst != BPF_S_RET_K) && !(is_load_to_a(first_inst))) + if ((first_inst != (BPF_RET | BPF_K)) && !(is_load_to_a(first_inst))) emit(ARM_MOV_I(r_A, 0), ctx); /* stack space for the BPF_MEM words */ @@ -480,36 +472,39 @@ static int build_body(struct jit_ctx *ctx) u32 k; for (i = 0; i < prog->len; i++) { + u16 code; + inst = &(prog->insns[i]); /* K as an immediate value operand */ k = inst->k; + code = bpf_anc_helper(inst); /* compute offsets only in the fake pass */ if (ctx->target == NULL) ctx->offsets[i] = ctx->idx * 4; - switch (inst->code) { - case BPF_S_LD_IMM: + switch (code) { + case BPF_LD | BPF_IMM: emit_mov_i(r_A, k, ctx); break; - case BPF_S_LD_W_LEN: + case BPF_LD | BPF_W | BPF_LEN: ctx->seen |= SEEN_SKB; BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4); emit(ARM_LDR_I(r_A, r_skb, offsetof(struct sk_buff, len)), ctx); break; - case BPF_S_LD_MEM: + case BPF_LD | BPF_MEM: /* A = scratch[k] */ ctx->seen |= SEEN_MEM_WORD(k); emit(ARM_LDR_I(r_A, ARM_SP, SCRATCH_OFF(k)), ctx); break; - case BPF_S_LD_W_ABS: + case BPF_LD | BPF_W | BPF_ABS: load_order = 2; goto load; - case BPF_S_LD_H_ABS: + case BPF_LD | BPF_H | BPF_ABS: load_order = 1; goto load; - case BPF_S_LD_B_ABS: + case BPF_LD | BPF_B | BPF_ABS: load_order = 0; load: /* the interpreter will deal with the negative K */ @@ -552,31 +547,31 @@ load_common: emit_err_ret(ARM_COND_NE, ctx); emit(ARM_MOV_R(r_A, ARM_R0), ctx); break; - case BPF_S_LD_W_IND: + case BPF_LD | BPF_W | BPF_IND: load_order = 2; goto load_ind; - case BPF_S_LD_H_IND: + case BPF_LD | BPF_H | BPF_IND: load_order = 1; goto load_ind; - case BPF_S_LD_B_IND: + case BPF_LD | BPF_B | BPF_IND: load_order = 0; load_ind: OP_IMM3(ARM_ADD, r_off, r_X, k, ctx); goto load_common; - case BPF_S_LDX_IMM: + case BPF_LDX | BPF_IMM: ctx->seen |= SEEN_X; emit_mov_i(r_X, k, ctx); break; - case BPF_S_LDX_W_LEN: + case BPF_LDX | BPF_W | BPF_LEN: ctx->seen |= SEEN_X | SEEN_SKB; emit(ARM_LDR_I(r_X, r_skb, offsetof(struct sk_buff, len)), ctx); break; - case BPF_S_LDX_MEM: + case BPF_LDX | BPF_MEM: ctx->seen |= SEEN_X | SEEN_MEM_WORD(k); emit(ARM_LDR_I(r_X, ARM_SP, SCRATCH_OFF(k)), ctx); break; - case BPF_S_LDX_B_MSH: + case BPF_LDX | BPF_B | BPF_MSH: /* x = ((*(frame + k)) & 0xf) << 2; */ ctx->seen |= SEEN_X | SEEN_DATA | SEEN_CALL; /* the interpreter should deal with the negative K */ @@ -606,113 +601,113 @@ load_ind: emit(ARM_AND_I(r_X, ARM_R0, 0x00f), ctx); emit(ARM_LSL_I(r_X, r_X, 2), ctx); break; - case BPF_S_ST: + case BPF_ST: ctx->seen |= SEEN_MEM_WORD(k); emit(ARM_STR_I(r_A, ARM_SP, SCRATCH_OFF(k)), ctx); break; - case BPF_S_STX: + case BPF_STX: update_on_xread(ctx); ctx->seen |= SEEN_MEM_WORD(k); emit(ARM_STR_I(r_X, ARM_SP, SCRATCH_OFF(k)), ctx); break; - case BPF_S_ALU_ADD_K: + case BPF_ALU | BPF_ADD | BPF_K: /* A += K */ OP_IMM3(ARM_ADD, r_A, r_A, k, ctx); break; - case BPF_S_ALU_ADD_X: + case BPF_ALU | BPF_ADD | BPF_X: update_on_xread(ctx); emit(ARM_ADD_R(r_A, r_A, r_X), ctx); break; - case BPF_S_ALU_SUB_K: + case BPF_ALU | BPF_SUB | BPF_K: /* A -= K */ OP_IMM3(ARM_SUB, r_A, r_A, k, ctx); break; - case BPF_S_ALU_SUB_X: + case BPF_ALU | BPF_SUB | BPF_X: update_on_xread(ctx); emit(ARM_SUB_R(r_A, r_A, r_X), ctx); break; - case BPF_S_ALU_MUL_K: + case BPF_ALU | BPF_MUL | BPF_K: /* A *= K */ emit_mov_i(r_scratch, k, ctx); emit(ARM_MUL(r_A, r_A, r_scratch), ctx); break; - case BPF_S_ALU_MUL_X: + case BPF_ALU | BPF_MUL | BPF_X: update_on_xread(ctx); emit(ARM_MUL(r_A, r_A, r_X), ctx); break; - case BPF_S_ALU_DIV_K: + case BPF_ALU | BPF_DIV | BPF_K: if (k == 1) break; emit_mov_i(r_scratch, k, ctx); emit_udiv(r_A, r_A, r_scratch, ctx); break; - case BPF_S_ALU_DIV_X: + case BPF_ALU | BPF_DIV | BPF_X: update_on_xread(ctx); emit(ARM_CMP_I(r_X, 0), ctx); emit_err_ret(ARM_COND_EQ, ctx); emit_udiv(r_A, r_A, r_X, ctx); break; - case BPF_S_ALU_OR_K: + case BPF_ALU | BPF_OR | BPF_K: /* A |= K */ OP_IMM3(ARM_ORR, r_A, r_A, k, ctx); break; - case BPF_S_ALU_OR_X: + case BPF_ALU | BPF_OR | BPF_X: update_on_xread(ctx); emit(ARM_ORR_R(r_A, r_A, r_X), ctx); break; - case BPF_S_ALU_XOR_K: + case BPF_ALU | BPF_XOR | BPF_K: /* A ^= K; */ OP_IMM3(ARM_EOR, r_A, r_A, k, ctx); break; - case BPF_S_ANC_ALU_XOR_X: - case BPF_S_ALU_XOR_X: + case BPF_ANC | SKF_AD_ALU_XOR_X: + case BPF_ALU | BPF_XOR | BPF_X: /* A ^= X */ update_on_xread(ctx); emit(ARM_EOR_R(r_A, r_A, r_X), ctx); break; - case BPF_S_ALU_AND_K: + case BPF_ALU | BPF_AND | BPF_K: /* A &= K */ OP_IMM3(ARM_AND, r_A, r_A, k, ctx); break; - case BPF_S_ALU_AND_X: + case BPF_ALU | BPF_AND | BPF_X: update_on_xread(ctx); emit(ARM_AND_R(r_A, r_A, r_X), ctx); break; - case BPF_S_ALU_LSH_K: + case BPF_ALU | BPF_LSH | BPF_K: if (unlikely(k > 31)) return -1; emit(ARM_LSL_I(r_A, r_A, k), ctx); break; - case BPF_S_ALU_LSH_X: + case BPF_ALU | BPF_LSH | BPF_X: update_on_xread(ctx); emit(ARM_LSL_R(r_A, r_A, r_X), ctx); break; - case BPF_S_ALU_RSH_K: + case BPF_ALU | BPF_RSH | BPF_K: if (unlikely(k > 31)) return -1; emit(ARM_LSR_I(r_A, r_A, k), ctx); break; - case BPF_S_ALU_RSH_X: + case BPF_ALU | BPF_RSH | BPF_X: update_on_xread(ctx); emit(ARM_LSR_R(r_A, r_A, r_X), ctx); break; - case BPF_S_ALU_NEG: + case BPF_ALU | BPF_NEG: /* A = -A */ emit(ARM_RSB_I(r_A, r_A, 0), ctx); break; - case BPF_S_JMP_JA: + case BPF_JMP | BPF_JA: /* pc += K */ emit(ARM_B(b_imm(i + k + 1, ctx)), ctx); break; - case BPF_S_JMP_JEQ_K: + case BPF_JMP | BPF_JEQ | BPF_K: /* pc += (A == K) ? pc->jt : pc->jf */ condt = ARM_COND_EQ; goto cmp_imm; - case BPF_S_JMP_JGT_K: + case BPF_JMP | BPF_JGT | BPF_K: /* pc += (A > K) ? pc->jt : pc->jf */ condt = ARM_COND_HI; goto cmp_imm; - case BPF_S_JMP_JGE_K: + case BPF_JMP | BPF_JGE | BPF_K: /* pc += (A >= K) ? pc->jt : pc->jf */ condt = ARM_COND_HS; cmp_imm: @@ -731,22 +726,22 @@ cond_jump: _emit(condt ^ 1, ARM_B(b_imm(i + inst->jf + 1, ctx)), ctx); break; - case BPF_S_JMP_JEQ_X: + case BPF_JMP | BPF_JEQ | BPF_X: /* pc += (A == X) ? pc->jt : pc->jf */ condt = ARM_COND_EQ; goto cmp_x; - case BPF_S_JMP_JGT_X: + case BPF_JMP | BPF_JGT | BPF_X: /* pc += (A > X) ? pc->jt : pc->jf */ condt = ARM_COND_HI; goto cmp_x; - case BPF_S_JMP_JGE_X: + case BPF_JMP | BPF_JGE | BPF_X: /* pc += (A >= X) ? pc->jt : pc->jf */ condt = ARM_COND_CS; cmp_x: update_on_xread(ctx); emit(ARM_CMP_R(r_A, r_X), ctx); goto cond_jump; - case BPF_S_JMP_JSET_K: + case BPF_JMP | BPF_JSET | BPF_K: /* pc += (A & K) ? pc->jt : pc->jf */ condt = ARM_COND_NE; /* not set iff all zeroes iff Z==1 iff EQ */ @@ -759,16 +754,16 @@ cmp_x: emit(ARM_TST_I(r_A, imm12), ctx); } goto cond_jump; - case BPF_S_JMP_JSET_X: + case BPF_JMP | BPF_JSET | BPF_X: /* pc += (A & X) ? pc->jt : pc->jf */ update_on_xread(ctx); condt = ARM_COND_NE; emit(ARM_TST_R(r_A, r_X), ctx); goto cond_jump; - case BPF_S_RET_A: + case BPF_RET | BPF_A: emit(ARM_MOV_R(ARM_R0, r_A), ctx); goto b_epilogue; - case BPF_S_RET_K: + case BPF_RET | BPF_K: if ((k == 0) && (ctx->ret0_fp_idx < 0)) ctx->ret0_fp_idx = i; emit_mov_i(ARM_R0, k, ctx); @@ -776,17 +771,17 @@ b_epilogue: if (i != ctx->skf->len - 1) emit(ARM_B(b_imm(prog->len, ctx)), ctx); break; - case BPF_S_MISC_TAX: + case BPF_MISC | BPF_TAX: /* X = A */ ctx->seen |= SEEN_X; emit(ARM_MOV_R(r_X, r_A), ctx); break; - case BPF_S_MISC_TXA: + case BPF_MISC | BPF_TXA: /* A = X */ update_on_xread(ctx); emit(ARM_MOV_R(r_A, r_X), ctx); break; - case BPF_S_ANC_PROTOCOL: + case BPF_ANC | SKF_AD_PROTOCOL: /* A = ntohs(skb->protocol) */ ctx->seen |= SEEN_SKB; BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, @@ -795,7 +790,7 @@ b_epilogue: emit(ARM_LDRH_I(r_scratch, r_skb, off), ctx); emit_swap16(r_A, r_scratch, ctx); break; - case BPF_S_ANC_CPU: + case BPF_ANC | SKF_AD_CPU: /* r_scratch = current_thread_info() */ OP_IMM3(ARM_BIC, r_scratch, ARM_SP, THREAD_SIZE - 1, ctx); /* A = current_thread_info()->cpu */ @@ -803,7 +798,7 @@ b_epilogue: off = offsetof(struct thread_info, cpu); emit(ARM_LDR_I(r_A, r_scratch, off), ctx); break; - case BPF_S_ANC_IFINDEX: + case BPF_ANC | SKF_AD_IFINDEX: /* A = skb->dev->ifindex */ ctx->seen |= SEEN_SKB; off = offsetof(struct sk_buff, dev); @@ -817,30 +812,30 @@ b_epilogue: off = offsetof(struct net_device, ifindex); emit(ARM_LDR_I(r_A, r_scratch, off), ctx); break; - case BPF_S_ANC_MARK: + case BPF_ANC | SKF_AD_MARK: ctx->seen |= SEEN_SKB; BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4); off = offsetof(struct sk_buff, mark); emit(ARM_LDR_I(r_A, r_skb, off), ctx); break; - case BPF_S_ANC_RXHASH: + case BPF_ANC | SKF_AD_RXHASH: ctx->seen |= SEEN_SKB; BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4); off = offsetof(struct sk_buff, hash); emit(ARM_LDR_I(r_A, r_skb, off), ctx); break; - case BPF_S_ANC_VLAN_TAG: - case BPF_S_ANC_VLAN_TAG_PRESENT: + case BPF_ANC | SKF_AD_VLAN_TAG: + case BPF_ANC | SKF_AD_VLAN_TAG_PRESENT: ctx->seen |= SEEN_SKB; BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2); off = offsetof(struct sk_buff, vlan_tci); emit(ARM_LDRH_I(r_A, r_skb, off), ctx); - if (inst->code == BPF_S_ANC_VLAN_TAG) + if (code == (BPF_ANC | SKF_AD_VLAN_TAG)) OP_IMM3(ARM_AND, r_A, r_A, VLAN_VID_MASK, ctx); else OP_IMM3(ARM_AND, r_A, r_A, VLAN_TAG_PRESENT, ctx); break; - case BPF_S_ANC_QUEUE: + case BPF_ANC | SKF_AD_QUEUE: ctx->seen |= SEEN_SKB; BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2); diff --git a/arch/blackfin/configs/BF526-EZBRD_defconfig b/arch/blackfin/configs/BF526-EZBRD_defconfig index 1759fad..e66ba31 100644 --- a/arch/blackfin/configs/BF526-EZBRD_defconfig +++ b/arch/blackfin/configs/BF526-EZBRD_defconfig @@ -53,7 +53,6 @@ CONFIG_IP_PNP=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set CONFIG_MTD=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_INTELEXT=y @@ -63,6 +62,7 @@ CONFIG_MTD_COMPLEX_MAPPINGS=y CONFIG_MTD_PHYSMAP=y CONFIG_MTD_M25P80=y CONFIG_MTD_NAND=m +CONFIG_MTD_SPI_NOR=y CONFIG_BLK_DEV_RAM=y CONFIG_SCSI=y # CONFIG_SCSI_PROC_FS is not set diff --git a/arch/blackfin/configs/BF527-EZKIT-V2_defconfig b/arch/blackfin/configs/BF527-EZKIT-V2_defconfig index 3577296..0207c58 100644 --- a/arch/blackfin/configs/BF527-EZKIT-V2_defconfig +++ b/arch/blackfin/configs/BF527-EZKIT-V2_defconfig @@ -58,7 +58,6 @@ CONFIG_BFIN_SIR0=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set CONFIG_MTD=y -CONFIG_MTD_CHAR=m CONFIG_MTD_BLOCK=y CONFIG_MTD_JEDECPROBE=m CONFIG_MTD_RAM=y @@ -66,6 +65,7 @@ CONFIG_MTD_ROM=m CONFIG_MTD_COMPLEX_MAPPINGS=y CONFIG_MTD_M25P80=y CONFIG_MTD_NAND=m +CONFIG_MTD_SPI_NOR=y CONFIG_BLK_DEV_RAM=y CONFIG_SCSI=y # CONFIG_SCSI_PROC_FS is not set diff --git a/arch/blackfin/configs/BF527-EZKIT_defconfig b/arch/blackfin/configs/BF527-EZKIT_defconfig index 2e73a5d..99c131b 100644 --- a/arch/blackfin/configs/BF527-EZKIT_defconfig +++ b/arch/blackfin/configs/BF527-EZKIT_defconfig @@ -57,7 +57,6 @@ CONFIG_BFIN_SIR0=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set CONFIG_MTD=y -CONFIG_MTD_CHAR=m CONFIG_MTD_BLOCK=y CONFIG_MTD_JEDECPROBE=m CONFIG_MTD_RAM=y @@ -65,6 +64,7 @@ CONFIG_MTD_ROM=m CONFIG_MTD_COMPLEX_MAPPINGS=y CONFIG_MTD_M25P80=y CONFIG_MTD_NAND=m +CONFIG_MTD_SPI_NOR=y CONFIG_BLK_DEV_RAM=y CONFIG_SCSI=y # CONFIG_SCSI_PROC_FS is not set diff --git a/arch/blackfin/configs/BF548-EZKIT_defconfig b/arch/blackfin/configs/BF548-EZKIT_defconfig index f0a2ddf..38cb17d 100644 --- a/arch/blackfin/configs/BF548-EZKIT_defconfig +++ b/arch/blackfin/configs/BF548-EZKIT_defconfig @@ -64,7 +64,6 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_FW_LOADER=m CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_INTELEXT=y @@ -75,6 +74,7 @@ CONFIG_MTD_M25P80=y CONFIG_MTD_NAND=y CONFIG_MTD_NAND_BF5XX=y # CONFIG_MTD_NAND_BF5XX_HWECC is not set +CONFIG_MTD_SPI_NOR=y CONFIG_BLK_DEV_RAM=y # CONFIG_SCSI_PROC_FS is not set CONFIG_BLK_DEV_SD=y diff --git a/arch/blackfin/configs/BF609-EZKIT_defconfig b/arch/blackfin/configs/BF609-EZKIT_defconfig index 4ca39ab..a7e9bfd 100644 --- a/arch/blackfin/configs/BF609-EZKIT_defconfig +++ b/arch/blackfin/configs/BF609-EZKIT_defconfig @@ -57,7 +57,6 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_FW_LOADER=m CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_INTELEXT=y @@ -65,6 +64,7 @@ CONFIG_MTD_CFI_STAA=y CONFIG_MTD_COMPLEX_MAPPINGS=y CONFIG_MTD_PHYSMAP=y CONFIG_MTD_M25P80=y +CONFIG_MTD_SPI_NOR=y CONFIG_MTD_UBI=m CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y diff --git a/arch/blackfin/configs/BlackStamp_defconfig b/arch/blackfin/configs/BlackStamp_defconfig index 3853c47..f4a9200 100644 --- a/arch/blackfin/configs/BlackStamp_defconfig +++ b/arch/blackfin/configs/BlackStamp_defconfig @@ -45,7 +45,6 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_CHAR=m CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=m CONFIG_MTD_CFI_AMDSTD=m @@ -53,7 +52,7 @@ CONFIG_MTD_RAM=y CONFIG_MTD_ROM=m CONFIG_MTD_COMPLEX_MAPPINGS=y CONFIG_MTD_M25P80=y -# CONFIG_M25PXX_USE_FAST_READ is not set +CONFIG_MTD_SPI_NOR=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_NBD=y CONFIG_BLK_DEV_RAM=y diff --git a/arch/blackfin/configs/H8606_defconfig b/arch/blackfin/configs/H8606_defconfig index f754e49..0ff97d8 100644 --- a/arch/blackfin/configs/H8606_defconfig +++ b/arch/blackfin/configs/H8606_defconfig @@ -36,13 +36,12 @@ CONFIG_IRTTY_SIR=m # CONFIG_WIRELESS is not set # CONFIG_FW_LOADER is not set CONFIG_MTD=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_RAM=y CONFIG_MTD_ROM=y CONFIG_MTD_COMPLEX_MAPPINGS=y CONFIG_MTD_M25P80=y -# CONFIG_M25PXX_USE_FAST_READ is not set +CONFIG_MTD_SPI_NOR=y CONFIG_BLK_DEV_RAM=y CONFIG_MISC_DEVICES=y CONFIG_EEPROM_AT25=y diff --git a/arch/blackfin/include/asm/dma.h b/arch/blackfin/include/asm/dma.h index 8d1e4c2..40e9c2b 100644 --- a/arch/blackfin/include/asm/dma.h +++ b/arch/blackfin/include/asm/dma.h @@ -316,6 +316,8 @@ static inline void disable_dma(unsigned int channel) } static inline void enable_dma(unsigned int channel) { + dma_ch[channel].regs->curr_x_count = 0; + dma_ch[channel].regs->curr_y_count = 0; dma_ch[channel].regs->cfg |= DMAEN; } int set_dma_callback(unsigned int channel, irq_handler_t callback, void *data); diff --git a/arch/blackfin/mach-bf533/boards/stamp.c b/arch/blackfin/mach-bf533/boards/stamp.c index d098929..6f4bac9 100644 --- a/arch/blackfin/mach-bf533/boards/stamp.c +++ b/arch/blackfin/mach-bf533/boards/stamp.c @@ -17,6 +17,7 @@ #if IS_ENABLED(CONFIG_USB_ISP1362_HCD) #include <linux/usb/isp1362.h> #endif +#include <linux/gpio.h> #include <linux/irq.h> #include <linux/i2c.h> #include <asm/dma.h> diff --git a/arch/mips/bcm47xx/sprom.c b/arch/mips/bcm47xx/sprom.c index a8b5408..da4cdb1 100644 --- a/arch/mips/bcm47xx/sprom.c +++ b/arch/mips/bcm47xx/sprom.c @@ -168,6 +168,7 @@ static void nvram_read_alpha2(const char *prefix, const char *name, static void bcm47xx_fill_sprom_r1234589(struct ssb_sprom *sprom, const char *prefix, bool fallback) { + nvram_read_u16(prefix, NULL, "devid", &sprom->dev_id, 0, fallback); nvram_read_u8(prefix, NULL, "ledbh0", &sprom->gpio0, 0xff, fallback); nvram_read_u8(prefix, NULL, "ledbh1", &sprom->gpio1, 0xff, fallback); nvram_read_u8(prefix, NULL, "ledbh2", &sprom->gpio2, 0xff, fallback); diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug index 21c9f30..790352f 100644 --- a/arch/powerpc/Kconfig.debug +++ b/arch/powerpc/Kconfig.debug @@ -235,11 +235,6 @@ config PPC_EARLY_DEBUG_USBGECKO Select this to enable early debugging for Nintendo GameCube/Wii consoles via an external USB Gecko adapter. -config PPC_EARLY_DEBUG_WSP - bool "Early debugging via WSP's internal UART" - depends on PPC_WSP - select PPC_UDBG_16550 - config PPC_EARLY_DEBUG_PS3GELIC bool "Early debugging through the PS3 Ethernet port" depends on PPC_PS3 diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index 426dce7..ccc25ed 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -333,8 +333,8 @@ $(addprefix $(obj)/, $(initrd-y)): $(obj)/ramdisk.image.gz $(obj)/zImage.initrd.%: vmlinux $(wrapperbits) $(call if_changed,wrap,$*,,,$(obj)/ramdisk.image.gz) -$(obj)/zImage.%: vmlinux $(wrapperbits) - $(call if_changed,wrap,$*) +$(addprefix $(obj)/, $(sort $(filter zImage.%, $(image-y)))): vmlinux $(wrapperbits) + $(call if_changed,wrap,$(subst $(obj)/zImage.,,$@)) # dtbImage% - a dtbImage is a zImage with an embedded device tree blob $(obj)/dtbImage.initrd.%: vmlinux $(wrapperbits) $(obj)/%.dtb diff --git a/arch/powerpc/configs/chroma_defconfig b/arch/powerpc/configs/chroma_defconfig deleted file mode 100644 index 4f35fc4..0000000 --- a/arch/powerpc/configs/chroma_defconfig +++ /dev/null @@ -1,307 +0,0 @@ -CONFIG_PPC64=y -CONFIG_PPC_BOOK3E_64=y -# CONFIG_VIRT_CPU_ACCOUNTING_NATIVE is not set -CONFIG_SMP=y -CONFIG_NR_CPUS=256 -CONFIG_EXPERIMENTAL=y -CONFIG_SYSVIPC=y -CONFIG_POSIX_MQUEUE=y -CONFIG_BSD_PROCESS_ACCT=y -CONFIG_TASKSTATS=y -CONFIG_TASK_DELAY_ACCT=y -CONFIG_TASK_XACCT=y -CONFIG_TASK_IO_ACCOUNTING=y -CONFIG_AUDIT=y -CONFIG_AUDITSYSCALL=y -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_LOG_BUF_SHIFT=19 -CONFIG_CGROUPS=y -CONFIG_CGROUP_DEVICE=y -CONFIG_CPUSETS=y -CONFIG_CGROUP_CPUACCT=y -CONFIG_RESOURCE_COUNTERS=y -CONFIG_CGROUP_MEMCG=y -CONFIG_CGROUP_MEMCG_SWAP=y -CONFIG_NAMESPACES=y -CONFIG_RELAY=y -CONFIG_BLK_DEV_INITRD=y -CONFIG_INITRAMFS_SOURCE="" -CONFIG_RD_BZIP2=y -CONFIG_RD_LZMA=y -CONFIG_INITRAMFS_COMPRESSION_GZIP=y -CONFIG_KALLSYMS_ALL=y -CONFIG_EMBEDDED=y -CONFIG_PERF_EVENTS=y -CONFIG_PROFILING=y -CONFIG_OPROFILE=y -CONFIG_KPROBES=y -CONFIG_MODULES=y -CONFIG_MODULE_FORCE_LOAD=y -CONFIG_MODULE_UNLOAD=y -CONFIG_MODULE_FORCE_UNLOAD=y -CONFIG_MODVERSIONS=y -CONFIG_MODULE_SRCVERSION_ALL=y -CONFIG_SCOM_DEBUGFS=y -CONFIG_PPC_A2_DD2=y -CONFIG_KVM_GUEST=y -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y -CONFIG_HZ_100=y -# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set -CONFIG_BINFMT_MISC=y -CONFIG_NUMA=y -# CONFIG_MIGRATION is not set -CONFIG_PPC_64K_PAGES=y -CONFIG_SCHED_SMT=y -CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="" -# CONFIG_SECCOMP is not set -CONFIG_PCIEPORTBUS=y -# CONFIG_PCIEASPM is not set -CONFIG_PCI_MSI=y -CONFIG_PACKET=y -CONFIG_UNIX=y -CONFIG_XFRM_USER=m -CONFIG_XFRM_SUB_POLICY=y -CONFIG_XFRM_STATISTICS=y -CONFIG_NET_KEY=m -CONFIG_NET_KEY_MIGRATE=y -CONFIG_INET=y -CONFIG_IP_MULTICAST=y -CONFIG_IP_ADVANCED_ROUTER=y -CONFIG_IP_ROUTE_MULTIPATH=y -CONFIG_IP_ROUTE_VERBOSE=y -CONFIG_IP_PNP=y -CONFIG_IP_PNP_DHCP=y -CONFIG_IP_PNP_BOOTP=y -CONFIG_NET_IPIP=y -CONFIG_IP_MROUTE=y -CONFIG_IP_PIMSM_V1=y -CONFIG_IP_PIMSM_V2=y -CONFIG_SYN_COOKIES=y -CONFIG_INET_AH=m -CONFIG_INET_ESP=m -CONFIG_INET_IPCOMP=m -CONFIG_IPV6=y -CONFIG_IPV6_PRIVACY=y -CONFIG_IPV6_ROUTER_PREF=y -CONFIG_IPV6_ROUTE_INFO=y -CONFIG_IPV6_OPTIMISTIC_DAD=y -CONFIG_INET6_AH=y -CONFIG_INET6_ESP=y -CONFIG_INET6_IPCOMP=y -CONFIG_IPV6_MIP6=y -CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION=y -CONFIG_IPV6_TUNNEL=y -CONFIG_IPV6_MULTIPLE_TABLES=y -CONFIG_IPV6_SUBTREES=y -CONFIG_IPV6_MROUTE=y -CONFIG_IPV6_PIMSM_V2=y -CONFIG_NETFILTER=y -CONFIG_NF_CONNTRACK=m -CONFIG_NF_CONNTRACK_EVENTS=y -CONFIG_NF_CT_PROTO_UDPLITE=m -CONFIG_NF_CONNTRACK_FTP=m -CONFIG_NF_CONNTRACK_IRC=m -CONFIG_NF_CONNTRACK_TFTP=m -CONFIG_NF_CT_NETLINK=m -CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m -CONFIG_NETFILTER_XT_TARGET_CONNMARK=m -CONFIG_NETFILTER_XT_TARGET_MARK=m -CONFIG_NETFILTER_XT_TARGET_NFLOG=m -CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m -CONFIG_NETFILTER_XT_TARGET_TCPMSS=m -CONFIG_NETFILTER_XT_MATCH_COMMENT=m -CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m -CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m -CONFIG_NETFILTER_XT_MATCH_CONNMARK=m -CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m -CONFIG_NETFILTER_XT_MATCH_DCCP=m -CONFIG_NETFILTER_XT_MATCH_DSCP=m -CONFIG_NETFILTER_XT_MATCH_ESP=m -CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m -CONFIG_NETFILTER_XT_MATCH_HELPER=m -CONFIG_NETFILTER_XT_MATCH_IPRANGE=m -CONFIG_NETFILTER_XT_MATCH_LENGTH=m -CONFIG_NETFILTER_XT_MATCH_LIMIT=m -CONFIG_NETFILTER_XT_MATCH_MAC=m -CONFIG_NETFILTER_XT_MATCH_MARK=m -CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m -CONFIG_NETFILTER_XT_MATCH_OWNER=m -CONFIG_NETFILTER_XT_MATCH_POLICY=m -CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m -CONFIG_NETFILTER_XT_MATCH_QUOTA=m -CONFIG_NETFILTER_XT_MATCH_RATEEST=m -CONFIG_NETFILTER_XT_MATCH_REALM=m -CONFIG_NETFILTER_XT_MATCH_RECENT=m -CONFIG_NETFILTER_XT_MATCH_SCTP=m -CONFIG_NETFILTER_XT_MATCH_STATE=m -CONFIG_NETFILTER_XT_MATCH_STATISTIC=m -CONFIG_NETFILTER_XT_MATCH_STRING=m -CONFIG_NETFILTER_XT_MATCH_TCPMSS=m -CONFIG_NETFILTER_XT_MATCH_TIME=m -CONFIG_NETFILTER_XT_MATCH_U32=m -CONFIG_NF_CONNTRACK_IPV4=m -CONFIG_IP_NF_QUEUE=m -CONFIG_IP_NF_IPTABLES=m -CONFIG_IP_NF_MATCH_AH=m -CONFIG_IP_NF_MATCH_ECN=m -CONFIG_IP_NF_MATCH_TTL=m -CONFIG_IP_NF_FILTER=m -CONFIG_IP_NF_TARGET_REJECT=m -CONFIG_IP_NF_TARGET_LOG=m -CONFIG_IP_NF_TARGET_ULOG=m -CONFIG_NF_NAT=m -CONFIG_IP_NF_TARGET_MASQUERADE=m -CONFIG_IP_NF_TARGET_NETMAP=m -CONFIG_IP_NF_TARGET_REDIRECT=m -CONFIG_NET_TCPPROBE=y -# CONFIG_WIRELESS is not set -CONFIG_NET_9P=y -CONFIG_NET_9P_DEBUG=y -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" -CONFIG_DEVTMPFS=y -CONFIG_MTD=y -CONFIG_MTD_CHAR=y -CONFIG_MTD_BLOCK=y -CONFIG_MTD_CFI=y -CONFIG_MTD_CFI_ADV_OPTIONS=y -CONFIG_MTD_CFI_LE_BYTE_SWAP=y -CONFIG_MTD_CFI_INTELEXT=y -CONFIG_MTD_CFI_AMDSTD=y -CONFIG_MTD_CFI_STAA=y -CONFIG_MTD_PHYSMAP_OF=y -CONFIG_PROC_DEVICETREE=y -CONFIG_BLK_DEV_LOOP=y -CONFIG_BLK_DEV_CRYPTOLOOP=y -CONFIG_BLK_DEV_NBD=m -CONFIG_BLK_DEV_RAM=y -CONFIG_BLK_DEV_RAM_SIZE=65536 -CONFIG_CDROM_PKTCDVD=y -CONFIG_MISC_DEVICES=y -CONFIG_BLK_DEV_SD=y -CONFIG_BLK_DEV_SR=y -CONFIG_BLK_DEV_SR_VENDOR=y -CONFIG_CHR_DEV_SG=y -CONFIG_SCSI_MULTI_LUN=y -CONFIG_SCSI_CONSTANTS=y -CONFIG_SCSI_SPI_ATTRS=y -CONFIG_SCSI_FC_ATTRS=y -CONFIG_SCSI_ISCSI_ATTRS=m -CONFIG_SCSI_SAS_ATTRS=m -CONFIG_SCSI_SRP_ATTRS=y -CONFIG_ATA=y -CONFIG_SATA_AHCI=y -CONFIG_SATA_SIL24=y -CONFIG_SATA_MV=y -CONFIG_SATA_SIL=y -CONFIG_PATA_CMD64X=y -CONFIG_PATA_MARVELL=y -CONFIG_PATA_SIL680=y -CONFIG_MD=y -CONFIG_BLK_DEV_MD=y -CONFIG_MD_LINEAR=y -CONFIG_BLK_DEV_DM=y -CONFIG_DM_CRYPT=y -CONFIG_DM_SNAPSHOT=y -CONFIG_DM_MIRROR=y -CONFIG_DM_ZERO=y -CONFIG_DM_UEVENT=y -CONFIG_NETDEVICES=y -CONFIG_TUN=y -CONFIG_E1000E=y -CONFIG_TIGON3=y -# CONFIG_WLAN is not set -# CONFIG_INPUT is not set -# CONFIG_SERIO is not set -# CONFIG_VT is not set -CONFIG_DEVPTS_MULTIPLE_INSTANCES=y -CONFIG_SERIAL_8250=y -CONFIG_SERIAL_8250_CONSOLE=y -CONFIG_HW_RANDOM=y -CONFIG_RAW_DRIVER=y -CONFIG_MAX_RAW_DEVS=1024 -# CONFIG_HWMON is not set -# CONFIG_VGA_ARB is not set -# CONFIG_USB_SUPPORT is not set -CONFIG_EDAC=y -CONFIG_EDAC_MM_EDAC=y -CONFIG_RTC_CLASS=y -CONFIG_RTC_DRV_DS1511=y -CONFIG_RTC_DRV_DS1553=y -CONFIG_EXT2_FS=y -CONFIG_EXT2_FS_XATTR=y -CONFIG_EXT2_FS_POSIX_ACL=y -CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT2_FS_XIP=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y -CONFIG_EXT4_FS=y -# CONFIG_DNOTIFY is not set -CONFIG_FUSE_FS=y -CONFIG_ISO9660_FS=y -CONFIG_JOLIET=y -CONFIG_ZISOFS=y -CONFIG_UDF_FS=m -CONFIG_MSDOS_FS=y -CONFIG_VFAT_FS=y -CONFIG_PROC_KCORE=y -CONFIG_TMPFS=y -CONFIG_TMPFS_POSIX_ACL=y -CONFIG_CONFIGFS_FS=m -CONFIG_CRAMFS=y -CONFIG_NFS_FS=y -CONFIG_NFS_V3=y -CONFIG_NFS_V3_ACL=y -CONFIG_NFS_V4=y -CONFIG_NFS_V4_1=y -CONFIG_ROOT_NFS=y -CONFIG_CIFS=y -CONFIG_CIFS_WEAK_PW_HASH=y -CONFIG_CIFS_XATTR=y -CONFIG_CIFS_POSIX=y -CONFIG_NLS_CODEPAGE_437=y -CONFIG_NLS_ASCII=y -CONFIG_NLS_ISO8859_1=y -CONFIG_CRC_CCITT=m -CONFIG_CRC_T10DIF=y -CONFIG_LIBCRC32C=m -CONFIG_PRINTK_TIME=y -CONFIG_MAGIC_SYSRQ=y -CONFIG_STRIP_ASM_SYMS=y -CONFIG_DETECT_HUNG_TASK=y -# CONFIG_SCHED_DEBUG is not set -CONFIG_DEBUG_INFO=y -CONFIG_FTRACE_SYSCALLS=y -CONFIG_PPC_EMULATED_STATS=y -CONFIG_XMON=y -CONFIG_XMON_DEFAULT=y -CONFIG_IRQ_DOMAIN_DEBUG=y -CONFIG_PPC_EARLY_DEBUG=y -CONFIG_KEYS_DEBUG_PROC_KEYS=y -CONFIG_CRYPTO_NULL=m -CONFIG_CRYPTO_TEST=m -CONFIG_CRYPTO_CCM=m -CONFIG_CRYPTO_GCM=m -CONFIG_CRYPTO_PCBC=m -CONFIG_CRYPTO_MICHAEL_MIC=m -CONFIG_CRYPTO_SHA256=m -CONFIG_CRYPTO_SHA512=m -CONFIG_CRYPTO_TGR192=m -CONFIG_CRYPTO_WP512=m -CONFIG_CRYPTO_AES=m -CONFIG_CRYPTO_ANUBIS=m -CONFIG_CRYPTO_BLOWFISH=m -CONFIG_CRYPTO_CAST5=m -CONFIG_CRYPTO_CAST6=m -CONFIG_CRYPTO_KHAZAD=m -CONFIG_CRYPTO_SALSA20=m -CONFIG_CRYPTO_SERPENT=m -CONFIG_CRYPTO_TEA=m -CONFIG_CRYPTO_TWOFISH=m -CONFIG_CRYPTO_LZO=m -# CONFIG_CRYPTO_ANSI_CPRNG is not set -CONFIG_VIRTUALIZATION=y diff --git a/arch/powerpc/include/asm/cpm2.h b/arch/powerpc/include/asm/cpm2.h index f42e9ba..7c8608b 100644 --- a/arch/powerpc/include/asm/cpm2.h +++ b/arch/powerpc/include/asm/cpm2.h @@ -489,7 +489,6 @@ typedef struct scc_trans { #define FCC_GFMR_TCI ((uint)0x20000000) #define FCC_GFMR_TRX ((uint)0x10000000) #define FCC_GFMR_TTX ((uint)0x08000000) -#define FCC_GFMR_TTX ((uint)0x08000000) #define FCC_GFMR_CDP ((uint)0x04000000) #define FCC_GFMR_CTSP ((uint)0x02000000) #define FCC_GFMR_CDS ((uint)0x01000000) diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index b76f58c..fab7743 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -254,6 +254,7 @@ void *eeh_pe_traverse(struct eeh_pe *root, void *eeh_pe_dev_traverse(struct eeh_pe *root, eeh_traverse_func fn, void *flag); void eeh_pe_restore_bars(struct eeh_pe *pe); +const char *eeh_pe_loc_get(struct eeh_pe *pe); struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe); void *eeh_dev_init(struct device_node *dn, void *data); diff --git a/arch/powerpc/include/asm/eeh_event.h b/arch/powerpc/include/asm/eeh_event.h index 89d5670..1e551a2 100644 --- a/arch/powerpc/include/asm/eeh_event.h +++ b/arch/powerpc/include/asm/eeh_event.h @@ -33,7 +33,7 @@ struct eeh_event { int eeh_event_init(void); int eeh_send_failure_event(struct eeh_pe *pe); -void eeh_remove_event(struct eeh_pe *pe); +void eeh_remove_event(struct eeh_pe *pe, bool force); void eeh_handle_event(struct eeh_pe *pe); #endif /* __KERNEL__ */ diff --git a/arch/powerpc/include/asm/mmu-book3e.h b/arch/powerpc/include/asm/mmu-book3e.h index 901dac6..d0918e0 100644 --- a/arch/powerpc/include/asm/mmu-book3e.h +++ b/arch/powerpc/include/asm/mmu-book3e.h @@ -223,10 +223,6 @@ typedef struct { unsigned int id; unsigned int active; unsigned long vdso_base; -#ifdef CONFIG_PPC_ICSWX - struct spinlock *cop_lockp; /* guard cop related stuff */ - unsigned long acop; /* mask of enabled coprocessor types */ -#endif /* CONFIG_PPC_ICSWX */ #ifdef CONFIG_PPC_MM_SLICES u64 low_slices_psize; /* SLB page size encodings */ u64 high_slices_psize; /* 4 bits per slice for now */ diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index cb15cbb..4600188 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -599,9 +599,9 @@ enum { }; struct OpalIoPhbErrorCommon { - uint32_t version; - uint32_t ioType; - uint32_t len; + __be32 version; + __be32 ioType; + __be32 len; }; struct OpalIoP7IOCPhbErrorData { @@ -666,64 +666,64 @@ struct OpalIoP7IOCPhbErrorData { struct OpalIoPhb3ErrorData { struct OpalIoPhbErrorCommon common; - uint32_t brdgCtl; + __be32 brdgCtl; /* PHB3 UTL regs */ - uint32_t portStatusReg; - uint32_t rootCmplxStatus; - uint32_t busAgentStatus; + __be32 portStatusReg; + __be32 rootCmplxStatus; + __be32 busAgentStatus; /* PHB3 cfg regs */ - uint32_t deviceStatus; - uint32_t slotStatus; - uint32_t linkStatus; - uint32_t devCmdStatus; - uint32_t devSecStatus; + __be32 deviceStatus; + __be32 slotStatus; + __be32 linkStatus; + __be32 devCmdStatus; + __be32 devSecStatus; /* cfg AER regs */ - uint32_t rootErrorStatus; - uint32_t uncorrErrorStatus; - uint32_t corrErrorStatus; - uint32_t tlpHdr1; - uint32_t tlpHdr2; - uint32_t tlpHdr3; - uint32_t tlpHdr4; - uint32_t sourceId; + __be32 rootErrorStatus; + __be32 uncorrErrorStatus; + __be32 corrErrorStatus; + __be32 tlpHdr1; + __be32 tlpHdr2; + __be32 tlpHdr3; + __be32 tlpHdr4; + __be32 sourceId; - uint32_t rsv3; + __be32 rsv3; /* Record data about the call to allocate a buffer */ - uint64_t errorClass; - uint64_t correlator; + __be64 errorClass; + __be64 correlator; - uint64_t nFir; /* 000 */ - uint64_t nFirMask; /* 003 */ - uint64_t nFirWOF; /* 008 */ + __be64 nFir; /* 000 */ + __be64 nFirMask; /* 003 */ + __be64 nFirWOF; /* 008 */ /* PHB3 MMIO Error Regs */ - uint64_t phbPlssr; /* 120 */ - uint64_t phbCsr; /* 110 */ - uint64_t lemFir; /* C00 */ - uint64_t lemErrorMask; /* C18 */ - uint64_t lemWOF; /* C40 */ - uint64_t phbErrorStatus; /* C80 */ - uint64_t phbFirstErrorStatus; /* C88 */ - uint64_t phbErrorLog0; /* CC0 */ - uint64_t phbErrorLog1; /* CC8 */ - uint64_t mmioErrorStatus; /* D00 */ - uint64_t mmioFirstErrorStatus; /* D08 */ - uint64_t mmioErrorLog0; /* D40 */ - uint64_t mmioErrorLog1; /* D48 */ - uint64_t dma0ErrorStatus; /* D80 */ - uint64_t dma0FirstErrorStatus; /* D88 */ - uint64_t dma0ErrorLog0; /* DC0 */ - uint64_t dma0ErrorLog1; /* DC8 */ - uint64_t dma1ErrorStatus; /* E00 */ - uint64_t dma1FirstErrorStatus; /* E08 */ - uint64_t dma1ErrorLog0; /* E40 */ - uint64_t dma1ErrorLog1; /* E48 */ - uint64_t pestA[OPAL_PHB3_NUM_PEST_REGS]; - uint64_t pestB[OPAL_PHB3_NUM_PEST_REGS]; + __be64 phbPlssr; /* 120 */ + __be64 phbCsr; /* 110 */ + __be64 lemFir; /* C00 */ + __be64 lemErrorMask; /* C18 */ + __be64 lemWOF; /* C40 */ + __be64 phbErrorStatus; /* C80 */ + __be64 phbFirstErrorStatus; /* C88 */ + __be64 phbErrorLog0; /* CC0 */ + __be64 phbErrorLog1; /* CC8 */ + __be64 mmioErrorStatus; /* D00 */ + __be64 mmioFirstErrorStatus; /* D08 */ + __be64 mmioErrorLog0; /* D40 */ + __be64 mmioErrorLog1; /* D48 */ + __be64 dma0ErrorStatus; /* D80 */ + __be64 dma0FirstErrorStatus; /* D88 */ + __be64 dma0ErrorLog0; /* DC0 */ + __be64 dma0ErrorLog1; /* DC8 */ + __be64 dma1ErrorStatus; /* E00 */ + __be64 dma1FirstErrorStatus; /* E08 */ + __be64 dma1ErrorLog0; /* E40 */ + __be64 dma1ErrorLog1; /* E48 */ + __be64 pestA[OPAL_PHB3_NUM_PEST_REGS]; + __be64 pestB[OPAL_PHB3_NUM_PEST_REGS]; }; enum { @@ -851,8 +851,8 @@ int64_t opal_pci_mask_pe_error(uint64_t phb_id, uint16_t pe_number, uint8_t erro int64_t opal_set_slot_led_status(uint64_t phb_id, uint64_t slot_id, uint8_t led_type, uint8_t led_action); int64_t opal_get_epow_status(__be64 *status); int64_t opal_set_system_attention_led(uint8_t led_action); -int64_t opal_pci_next_error(uint64_t phb_id, uint64_t *first_frozen_pe, - uint16_t *pci_error_type, uint16_t *severity); +int64_t opal_pci_next_error(uint64_t phb_id, __be64 *first_frozen_pe, + __be16 *pci_error_type, __be16 *severity); int64_t opal_pci_poll(uint64_t phb_id); int64_t opal_return_cpu(void); int64_t opal_reinit_cpus(uint64_t flags); diff --git a/arch/powerpc/include/asm/reg_a2.h b/arch/powerpc/include/asm/reg_a2.h index 3d52a11..3ba9c6f 100644 --- a/arch/powerpc/include/asm/reg_a2.h +++ b/arch/powerpc/include/asm/reg_a2.h @@ -110,15 +110,6 @@ #define TLB1_UR ASM_CONST(0x0000000000000002) #define TLB1_SR ASM_CONST(0x0000000000000001) -#ifdef CONFIG_PPC_EARLY_DEBUG_WSP -#define WSP_UART_PHYS 0xffc000c000 -/* This needs to be careful chosen to hit a !0 congruence class - * in the TLB since we bolt it in way 3, which is already occupied - * by our linear mapping primary bolted entry in CC 0. - */ -#define WSP_UART_VIRT 0xf000000000001000 -#endif - /* A2 erativax attributes definitions */ #define ERATIVAX_RS_IS_ALL 0x000 #define ERATIVAX_RS_IS_TID 0x040 diff --git a/arch/powerpc/include/asm/switch_to.h b/arch/powerpc/include/asm/switch_to.h index 0e83e7d..58abeda 100644 --- a/arch/powerpc/include/asm/switch_to.h +++ b/arch/powerpc/include/asm/switch_to.h @@ -16,13 +16,15 @@ struct thread_struct; extern struct task_struct *_switch(struct thread_struct *prev, struct thread_struct *next); #ifdef CONFIG_PPC_BOOK3S_64 -static inline void save_tar(struct thread_struct *prev) +static inline void save_early_sprs(struct thread_struct *prev) { if (cpu_has_feature(CPU_FTR_ARCH_207S)) prev->tar = mfspr(SPRN_TAR); + if (cpu_has_feature(CPU_FTR_DSCR)) + prev->dscr = mfspr(SPRN_DSCR); } #else -static inline void save_tar(struct thread_struct *prev) {} +static inline void save_early_sprs(struct thread_struct *prev) {} #endif extern void enable_kernel_fp(void); @@ -84,6 +86,8 @@ static inline void clear_task_ebb(struct task_struct *t) { #ifdef CONFIG_PPC_BOOK3S_64 /* EBB perf events are not inherited, so clear all EBB state. */ + t->thread.ebbrr = 0; + t->thread.ebbhr = 0; t->thread.bescr = 0; t->thread.mmcr2 = 0; t->thread.mmcr0 = 0; diff --git a/arch/powerpc/include/asm/wsp.h b/arch/powerpc/include/asm/wsp.h deleted file mode 100644 index c7dc830..0000000 --- a/arch/powerpc/include/asm/wsp.h +++ /dev/null @@ -1,14 +0,0 @@ -/* - * Copyright 2011 Michael Ellerman, IBM Corp. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ -#ifndef __ASM_POWERPC_WSP_H -#define __ASM_POWERPC_WSP_H - -extern int wsp_get_chip_id(struct device_node *dn); - -#endif /* __ASM_POWERPC_WSP_H */ diff --git a/arch/powerpc/include/uapi/asm/cputable.h b/arch/powerpc/include/uapi/asm/cputable.h index 5b76579..de2c0e4 100644 --- a/arch/powerpc/include/uapi/asm/cputable.h +++ b/arch/powerpc/include/uapi/asm/cputable.h @@ -41,5 +41,6 @@ #define PPC_FEATURE2_EBB 0x10000000 #define PPC_FEATURE2_ISEL 0x08000000 #define PPC_FEATURE2_TAR 0x04000000 +#define PPC_FEATURE2_VEC_CRYPTO 0x02000000 #endif /* _UAPI__ASM_POWERPC_CPUTABLE_H */ diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index fab19ec..670c312 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -43,7 +43,6 @@ obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_power.o obj-$(CONFIG_PPC_BOOK3S_64) += mce.o mce_power.o obj64-$(CONFIG_RELOCATABLE) += reloc_64.o obj-$(CONFIG_PPC_BOOK3E_64) += exceptions-64e.o idle_book3e.o -obj-$(CONFIG_PPC_A2) += cpu_setup_a2.o obj-$(CONFIG_PPC64) += vdso64/ obj-$(CONFIG_ALTIVEC) += vecemu.o obj-$(CONFIG_PPC_970_NAP) += idle_power4.o diff --git a/arch/powerpc/kernel/cpu_setup_a2.S b/arch/powerpc/kernel/cpu_setup_a2.S deleted file mode 100644 index 61f079e..0000000 --- a/arch/powerpc/kernel/cpu_setup_a2.S +++ /dev/null @@ -1,120 +0,0 @@ -/* - * A2 specific assembly support code - * - * Copyright 2009 Ben Herrenschmidt, IBM Corp. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include <asm/asm-offsets.h> -#include <asm/ppc_asm.h> -#include <asm/ppc-opcode.h> -#include <asm/processor.h> -#include <asm/reg_a2.h> -#include <asm/reg.h> -#include <asm/thread_info.h> - -/* - * Disable thdid and class fields in ERATs to bump PID to full 14 bits capacity. - * This also prevents external LPID accesses but that isn't a problem when not a - * guest. Under PV, this setting will be ignored and MMUCR will return the right - * number of PID bits we can use. - */ -#define MMUCR1_EXTEND_PID \ - (MMUCR1_ICTID | MMUCR1_ITTID | MMUCR1_DCTID | \ - MMUCR1_DTTID | MMUCR1_DCCD) - -/* - * Use extended PIDs if enabled. - * Don't clear the ERATs on context sync events and enable I & D LRU. - * Enable ERAT back invalidate when tlbwe overwrites an entry. - */ -#define INITIAL_MMUCR1 \ - (MMUCR1_EXTEND_PID | MMUCR1_CSINV_NEVER | MMUCR1_IRRE | \ - MMUCR1_DRRE | MMUCR1_TLBWE_BINV) - -_GLOBAL(__setup_cpu_a2) - /* Some of these are actually thread local and some are - * core local but doing it always won't hurt - */ - -#ifdef CONFIG_PPC_ICSWX - /* Make sure ACOP starts out as zero */ - li r3,0 - mtspr SPRN_ACOP,r3 - - /* Skip the following if we are in Guest mode */ - mfmsr r3 - andis. r0,r3,MSR_GS@h - bne _icswx_skip_guest - - /* Enable icswx instruction */ - mfspr r3,SPRN_A2_CCR2 - ori r3,r3,A2_CCR2_ENABLE_ICSWX - mtspr SPRN_A2_CCR2,r3 - - /* Unmask all CTs in HACOP */ - li r3,-1 - mtspr SPRN_HACOP,r3 -_icswx_skip_guest: -#endif /* CONFIG_PPC_ICSWX */ - - /* Enable doorbell */ - mfspr r3,SPRN_A2_CCR2 - oris r3,r3,A2_CCR2_ENABLE_PC@h - mtspr SPRN_A2_CCR2,r3 - isync - - /* Setup CCR0 to disable power saving for now as it's busted - * in the current implementations. Setup CCR1 to wake on - * interrupts normally (we write the default value but who - * knows what FW may have clobbered...) - */ - li r3,0 - mtspr SPRN_A2_CCR0, r3 - LOAD_REG_IMMEDIATE(r3,0x0f0f0f0f) - mtspr SPRN_A2_CCR1, r3 - - /* Initialise MMUCR1 */ - lis r3,INITIAL_MMUCR1@h - ori r3,r3,INITIAL_MMUCR1@l - mtspr SPRN_MMUCR1,r3 - - /* Set MMUCR2 to enable 4K, 64K, 1M, 16M and 1G pages */ - LOAD_REG_IMMEDIATE(r3, 0x000a7531) - mtspr SPRN_MMUCR2,r3 - - /* Set MMUCR3 to write all thids bit to the TLB */ - LOAD_REG_IMMEDIATE(r3, 0x0000000f) - mtspr SPRN_MMUCR3,r3 - - /* Don't do ERAT stuff if running guest mode */ - mfmsr r3 - andis. r0,r3,MSR_GS@h - bne 1f - - /* Now set the I-ERAT watermark to 15 */ - lis r4,(MMUCR0_TLBSEL_I|MMUCR0_ECL)@h - mtspr SPRN_MMUCR0, r4 - li r4,A2_IERAT_SIZE-1 - PPC_ERATWE(R4,R4,3) - - /* Now set the D-ERAT watermark to 31 */ - lis r4,(MMUCR0_TLBSEL_D|MMUCR0_ECL)@h - mtspr SPRN_MMUCR0, r4 - li r4,A2_DERAT_SIZE-1 - PPC_ERATWE(R4,R4,3) - - /* And invalidate the beast just in case. That won't get rid of - * a bolted entry though it will be in LRU and so will go away eventually - * but let's not bother for now - */ - PPC_ERATILX(0,0,R0) -1: - blr - -_GLOBAL(__restore_cpu_a2) - b __setup_cpu_a2 diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S index 1557e7c..4673353 100644 --- a/arch/powerpc/kernel/cpu_setup_power.S +++ b/arch/powerpc/kernel/cpu_setup_power.S @@ -56,6 +56,7 @@ _GLOBAL(__setup_cpu_power8) li r0,0 mtspr SPRN_LPID,r0 mfspr r3,SPRN_LPCR + ori r3, r3, LPCR_PECEDH bl __init_LPCR bl __init_HFSCR bl __init_tlb_power8 @@ -74,6 +75,7 @@ _GLOBAL(__restore_cpu_power8) li r0,0 mtspr SPRN_LPID,r0 mfspr r3,SPRN_LPCR + ori r3, r3, LPCR_PECEDH bl __init_LPCR bl __init_HFSCR bl __init_tlb_power8 diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index c1faade..965291b 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -109,7 +109,8 @@ extern void __restore_cpu_e6500(void); PPC_FEATURE_PSERIES_PERFMON_COMPAT) #define COMMON_USER2_POWER8 (PPC_FEATURE2_ARCH_2_07 | \ PPC_FEATURE2_HTM_COMP | PPC_FEATURE2_DSCR | \ - PPC_FEATURE2_ISEL | PPC_FEATURE2_TAR) + PPC_FEATURE2_ISEL | PPC_FEATURE2_TAR | \ + PPC_FEATURE2_VEC_CRYPTO) #define COMMON_USER_PA6T (COMMON_USER_PPC64 | PPC_FEATURE_PA6T |\ PPC_FEATURE_TRUE_LE | \ PPC_FEATURE_HAS_ALTIVEC_COMP) @@ -2148,44 +2149,6 @@ static struct cpu_spec __initdata cpu_specs[] = { } #endif /* CONFIG_PPC32 */ #endif /* CONFIG_E500 */ - -#ifdef CONFIG_PPC_A2 - { /* Standard A2 (>= DD2) + FPU core */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x00480000, - .cpu_name = "A2 (>= DD2)", - .cpu_features = CPU_FTRS_A2, - .cpu_user_features = COMMON_USER_PPC64, - .mmu_features = MMU_FTRS_A2, - .icache_bsize = 64, - .dcache_bsize = 64, - .num_pmcs = 0, - .cpu_setup = __setup_cpu_a2, - .cpu_restore = __restore_cpu_a2, - .machine_check = machine_check_generic, - .platform = "ppca2", - }, - { /* This is a default entry to get going, to be replaced by - * a real one at some stage - */ -#define CPU_FTRS_BASE_BOOK3E (CPU_FTR_USE_TB | \ - CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_SMT | \ - CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE) - .pvr_mask = 0x00000000, - .pvr_value = 0x00000000, - .cpu_name = "Book3E", - .cpu_features = CPU_FTRS_BASE_BOOK3E, - .cpu_user_features = COMMON_USER_PPC64, - .mmu_features = MMU_FTR_TYPE_3E | MMU_FTR_USE_TLBILX | - MMU_FTR_USE_TLBIVAX_BCAST | - MMU_FTR_LOCK_BCAST_INVAL, - .icache_bsize = 64, - .dcache_bsize = 64, - .num_pmcs = 0, - .machine_check = machine_check_generic, - .platform = "power6", - }, -#endif /* CONFIG_PPC_A2 */ }; static struct cpu_spec the_cpu_spec; diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 7051ea3..86e2570 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -330,8 +330,8 @@ static int eeh_phb_check_failure(struct eeh_pe *pe) eeh_pe_state_mark(phb_pe, EEH_PE_ISOLATED); eeh_serialize_unlock(flags); - pr_err("EEH: PHB#%x failure detected\n", - phb_pe->phb->global_number); + pr_err("EEH: PHB#%x failure detected, location: %s\n", + phb_pe->phb->global_number, eeh_pe_loc_get(phb_pe)); dump_stack(); eeh_send_failure_event(phb_pe); @@ -358,10 +358,11 @@ out: int eeh_dev_check_failure(struct eeh_dev *edev) { int ret; + int active_flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); unsigned long flags; struct device_node *dn; struct pci_dev *dev; - struct eeh_pe *pe; + struct eeh_pe *pe, *parent_pe, *phb_pe; int rc = 0; const char *location; @@ -439,14 +440,34 @@ int eeh_dev_check_failure(struct eeh_dev *edev) */ if ((ret < 0) || (ret == EEH_STATE_NOT_SUPPORT) || - (ret & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) == - (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) { + ((ret & active_flags) == active_flags)) { eeh_stats.false_positives++; pe->false_positives++; rc = 0; goto dn_unlock; } + /* + * It should be corner case that the parent PE has been + * put into frozen state as well. We should take care + * that at first. + */ + parent_pe = pe->parent; + while (parent_pe) { + /* Hit the ceiling ? */ + if (parent_pe->type & EEH_PE_PHB) + break; + + /* Frozen parent PE ? */ + ret = eeh_ops->get_state(parent_pe, NULL); + if (ret > 0 && + (ret & active_flags) != active_flags) + pe = parent_pe; + + /* Next parent level */ + parent_pe = parent_pe->parent; + } + eeh_stats.slot_resets++; /* Avoid repeated reports of this failure, including problems @@ -460,8 +481,11 @@ int eeh_dev_check_failure(struct eeh_dev *edev) * a stack trace will help the device-driver authors figure * out what happened. So print that out. */ - pr_err("EEH: Frozen PE#%x detected on PHB#%x\n", - pe->addr, pe->phb->global_number); + phb_pe = eeh_phb_pe_get(pe->phb); + pr_err("EEH: Frozen PHB#%x-PE#%x detected\n", + pe->phb->global_number, pe->addr); + pr_err("EEH: PE location: %s, PHB location: %s\n", + eeh_pe_loc_get(pe), eeh_pe_loc_get(phb_pe)); dump_stack(); eeh_send_failure_event(pe); diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 7100a5b..420da61 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -447,8 +447,9 @@ static void *eeh_pe_detach_dev(void *data, void *userdata) * PE reset (for 3 times), we try to clear the frozen state * for 3 times as well. */ -static int eeh_clear_pe_frozen_state(struct eeh_pe *pe) +static void *__eeh_clear_pe_frozen_state(void *data, void *flag) { + struct eeh_pe *pe = (struct eeh_pe *)data; int i, rc; for (i = 0; i < 3; i++) { @@ -461,13 +462,24 @@ static int eeh_clear_pe_frozen_state(struct eeh_pe *pe) } /* The PE has been isolated, clear it */ - if (rc) + if (rc) { pr_warn("%s: Can't clear frozen PHB#%x-PE#%x (%d)\n", __func__, pe->phb->global_number, pe->addr, rc); - else + return (void *)pe; + } + + return NULL; +} + +static int eeh_clear_pe_frozen_state(struct eeh_pe *pe) +{ + void *rc; + + rc = eeh_pe_traverse(pe, __eeh_clear_pe_frozen_state, NULL); + if (!rc) eeh_pe_state_clear(pe, EEH_PE_ISOLATED); - return rc; + return rc ? -EIO : 0; } /** @@ -758,7 +770,7 @@ static void eeh_handle_special_event(void) eeh_serialize_lock(&flags); /* Purge all events */ - eeh_remove_event(NULL); + eeh_remove_event(NULL, true); list_for_each_entry(hose, &hose_list, list_node) { phb_pe = eeh_phb_pe_get(hose); @@ -777,7 +789,7 @@ static void eeh_handle_special_event(void) eeh_serialize_lock(&flags); /* Purge all events of the PHB */ - eeh_remove_event(pe); + eeh_remove_event(pe, true); if (rc == EEH_NEXT_ERR_DEAD_PHB) eeh_pe_state_mark(pe, EEH_PE_ISOLATED); diff --git a/arch/powerpc/kernel/eeh_event.c b/arch/powerpc/kernel/eeh_event.c index 72d748b..4eefb6e 100644 --- a/arch/powerpc/kernel/eeh_event.c +++ b/arch/powerpc/kernel/eeh_event.c @@ -152,24 +152,33 @@ int eeh_send_failure_event(struct eeh_pe *pe) /** * eeh_remove_event - Remove EEH event from the queue * @pe: Event binding to the PE + * @force: Event will be removed unconditionally * * On PowerNV platform, we might have subsequent coming events * is part of the former one. For that case, those subsequent * coming events are totally duplicated and unnecessary, thus * they should be removed. */ -void eeh_remove_event(struct eeh_pe *pe) +void eeh_remove_event(struct eeh_pe *pe, bool force) { unsigned long flags; struct eeh_event *event, *tmp; + /* + * If we have NULL PE passed in, we have dead IOC + * or we're sure we can report all existing errors + * by the caller. + * + * With "force", the event with associated PE that + * have been isolated, the event won't be removed + * to avoid event lost. + */ spin_lock_irqsave(&eeh_eventlist_lock, flags); list_for_each_entry_safe(event, tmp, &eeh_eventlist, list) { - /* - * If we don't have valid PE passed in, that means - * we already have event corresponding to dead IOC - * and all events should be purged. - */ + if (!force && event->pe && + (event->pe->state & EEH_PE_ISOLATED)) + continue; + if (!pe) { list_del(&event->list); kfree(event); diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c index 995c2a2..fbd01eb 100644 --- a/arch/powerpc/kernel/eeh_pe.c +++ b/arch/powerpc/kernel/eeh_pe.c @@ -792,6 +792,66 @@ void eeh_pe_restore_bars(struct eeh_pe *pe) } /** + * eeh_pe_loc_get - Retrieve location code binding to the given PE + * @pe: EEH PE + * + * Retrieve the location code of the given PE. If the primary PE bus + * is root bus, we will grab location code from PHB device tree node + * or root port. Otherwise, the upstream bridge's device tree node + * of the primary PE bus will be checked for the location code. + */ +const char *eeh_pe_loc_get(struct eeh_pe *pe) +{ + struct pci_controller *hose; + struct pci_bus *bus = eeh_pe_bus_get(pe); + struct pci_dev *pdev; + struct device_node *dn; + const char *loc; + + if (!bus) + return "N/A"; + + /* PHB PE or root PE ? */ + if (pci_is_root_bus(bus)) { + hose = pci_bus_to_host(bus); + loc = of_get_property(hose->dn, + "ibm,loc-code", NULL); + if (loc) + return loc; + loc = of_get_property(hose->dn, + "ibm,io-base-loc-code", NULL); + if (loc) + return loc; + + pdev = pci_get_slot(bus, 0x0); + } else { + pdev = bus->self; + } + + if (!pdev) { + loc = "N/A"; + goto out; + } + + dn = pci_device_to_OF_node(pdev); + if (!dn) { + loc = "N/A"; + goto out; + } + + loc = of_get_property(dn, "ibm,loc-code", NULL); + if (!loc) + loc = of_get_property(dn, "ibm,slot-location-code", NULL); + if (!loc) + loc = "N/A"; + +out: + if (pci_is_root_bus(bus) && pdev) + pci_dev_put(pdev); + return loc; +} + +/** * eeh_pe_bus_get - Retrieve PCI bus according to the given PE * @pe: EEH PE * diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 911d453..6528c5e 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -428,12 +428,6 @@ BEGIN_FTR_SECTION std r24,THREAD_VRSAVE(r3) END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) #endif /* CONFIG_ALTIVEC */ -#ifdef CONFIG_PPC64 -BEGIN_FTR_SECTION - mfspr r25,SPRN_DSCR - std r25,THREAD_DSCR(r3) -END_FTR_SECTION_IFSET(CPU_FTR_DSCR) -#endif and. r0,r0,r22 beq+ 1f andc r22,r22,r0 diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 771b4e9..bb9cac6 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -1467,22 +1467,6 @@ a2_tlbinit_after_linear_map: .globl a2_tlbinit_after_iprot_flush a2_tlbinit_after_iprot_flush: -#ifdef CONFIG_PPC_EARLY_DEBUG_WSP - /* Now establish early debug mappings if applicable */ - /* Restore the MAS0 we used for linear mapping load */ - mtspr SPRN_MAS0,r11 - - lis r3,(MAS1_VALID | MAS1_IPROT)@h - ori r3,r3,(BOOK3E_PAGESZ_4K << MAS1_TSIZE_SHIFT) - mtspr SPRN_MAS1,r3 - LOAD_REG_IMMEDIATE(r3, WSP_UART_VIRT | MAS2_I | MAS2_G) - mtspr SPRN_MAS2,r3 - LOAD_REG_IMMEDIATE(r3, WSP_UART_PHYS | MAS3_SR | MAS3_SW) - mtspr SPRN_MAS7_MAS3,r3 - /* re-use the MAS8 value from the linear mapping */ - tlbwe -#endif /* CONFIG_PPC_EARLY_DEBUG_WSP */ - PPC_TLBILX(0,0,R0) sync isync diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 20f11eb..a7d36b1 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -439,9 +439,9 @@ BEGIN_FTR_SECTION * R9 = CR * Original R9 to R13 is saved on PACA_EXMC * - * Switch to mc_emergency stack and handle re-entrancy (though we - * currently don't test for overflow). Save MCE registers srr1, - * srr0, dar and dsisr and then set ME=1 + * Switch to mc_emergency stack and handle re-entrancy (we limit + * the nested MCE upto level 4 to avoid stack overflow). + * Save MCE registers srr1, srr0, dar and dsisr and then set ME=1 * * We use paca->in_mce to check whether this is the first entry or * nested machine check. We increment paca->in_mce to track nested @@ -464,6 +464,9 @@ BEGIN_FTR_SECTION 0: subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */ addi r10,r10,1 /* increment paca->in_mce */ sth r10,PACA_IN_MCE(r13) + /* Limit nested MCE to level 4 to avoid stack overflow */ + cmpwi r10,4 + bgt 2f /* Check if we hit limit of 4 */ std r11,GPR1(r1) /* Save r1 on the stack. */ std r11,0(r1) /* make stack chain pointer */ mfspr r11,SPRN_SRR0 /* Save SRR0 */ @@ -482,10 +485,23 @@ BEGIN_FTR_SECTION ori r11,r11,MSR_RI /* turn on RI bit */ ld r12,PACAKBASE(r13) /* get high part of &label */ LOAD_HANDLER(r12, machine_check_handle_early) - mtspr SPRN_SRR0,r12 +1: mtspr SPRN_SRR0,r12 mtspr SPRN_SRR1,r11 rfid b . /* prevent speculative execution */ +2: + /* Stack overflow. Stay on emergency stack and panic. + * Keep the ME bit off while panic-ing, so that if we hit + * another machine check we checkstop. + */ + addi r1,r1,INT_FRAME_SIZE /* go back to previous stack frame */ + ld r11,PACAKMSR(r13) + ld r12,PACAKBASE(r13) + LOAD_HANDLER(r12, unrecover_mce) + li r10,MSR_ME + andc r11,r11,r10 /* Turn off MSR_ME */ + b 1b + b . /* prevent speculative execution */ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) machine_check_pSeries: @@ -1389,6 +1405,7 @@ machine_check_handle_early: bl save_nvgprs addi r3,r1,STACK_FRAME_OVERHEAD bl machine_check_early + std r3,RESULT(r1) /* Save result */ ld r12,_MSR(r1) #ifdef CONFIG_PPC_P7_NAP /* @@ -1443,11 +1460,33 @@ machine_check_handle_early: */ andi. r11,r12,MSR_RI bne 2f -1: addi r3,r1,STACK_FRAME_OVERHEAD - bl unrecoverable_exception - b 1b +1: mfspr r11,SPRN_SRR0 + ld r10,PACAKBASE(r13) + LOAD_HANDLER(r10,unrecover_mce) + mtspr SPRN_SRR0,r10 + ld r10,PACAKMSR(r13) + /* + * We are going down. But there are chances that we might get hit by + * another MCE during panic path and we may run into unstable state + * with no way out. Hence, turn ME bit off while going down, so that + * when another MCE is hit during panic path, system will checkstop + * and hypervisor will get restarted cleanly by SP. + */ + li r3,MSR_ME + andc r10,r10,r3 /* Turn off MSR_ME */ + mtspr SPRN_SRR1,r10 + rfid + b . 2: /* + * Check if we have successfully handled/recovered from error, if not + * then stay on emergency stack and panic. + */ + ld r3,RESULT(r1) /* Load result */ + cmpdi r3,0 /* see if we handled MCE successfully */ + + beq 1b /* if !handled then panic */ + /* * Return from MC interrupt. * Queue up the MCE event so that we can log it later, while * returning from kernel or opal call. @@ -1460,6 +1499,17 @@ machine_check_handle_early: MACHINE_CHECK_HANDLER_WINDUP b machine_check_pSeries +unrecover_mce: + /* Invoke machine_check_exception to print MCE event and panic. */ + addi r3,r1,STACK_FRAME_OVERHEAD + bl machine_check_exception + /* + * We will not reach here. Even if we did, there is no way out. Call + * unrecoverable_exception and die. + */ +1: addi r3,r1,STACK_FRAME_OVERHEAD + bl unrecoverable_exception + b 1b /* * r13 points to the PACA, r9 contains the saved CR, * r12 contain the saved SRR1, SRR0 is still ready for return diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index 67ee0d6..7d7d863 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -930,25 +930,6 @@ initial_mmu: tlbwe r4,r0,TLB_DATA /* Load the data portion of the entry */ tlbwe r3,r0,TLB_TAG /* Load the tag portion of the entry */ -#if defined(CONFIG_SERIAL_TEXT_DEBUG) && defined(SERIAL_DEBUG_IO_BASE) - - /* Load a TLB entry for the UART, so that ppc4xx_progress() can use - * the UARTs nice and early. We use a 4k real==virtual mapping. */ - - lis r3,SERIAL_DEBUG_IO_BASE@h - ori r3,r3,SERIAL_DEBUG_IO_BASE@l - mr r4,r3 - clrrwi r4,r4,12 - ori r4,r4,(TLB_WR|TLB_I|TLB_M|TLB_G) - - clrrwi r3,r3,12 - ori r3,r3,(TLB_VALID | TLB_PAGESZ(PAGESZ_4K)) - - li r0,0 /* TLB slot 0 */ - tlbwe r4,r0,TLB_DATA - tlbwe r3,r0,TLB_TAG -#endif /* CONFIG_SERIAL_DEBUG_TEXT && SERIAL_DEBUG_IO_BASE */ - isync /* Establish the exception vector base diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 8a1edbe..be99774 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -755,15 +755,15 @@ struct task_struct *__switch_to(struct task_struct *prev, WARN_ON(!irqs_disabled()); - /* Back up the TAR across context switches. + /* Back up the TAR and DSCR across context switches. * Note that the TAR is not available for use in the kernel. (To * provide this, the TAR should be backed up/restored on exception * entry/exit instead, and be in pt_regs. FIXME, this should be in * pt_regs anyway (for debug).) - * Save the TAR here before we do treclaim/trecheckpoint as these - * will change the TAR. + * Save the TAR and DSCR here before we do treclaim/trecheckpoint as + * these will change them. */ - save_tar(&prev->thread); + save_early_sprs(&prev->thread); __switch_to_tm(prev); diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index d4d4183..e239df3 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -471,7 +471,7 @@ void __init smp_setup_cpu_maps(void) for (j = 0; j < nthreads && cpu < nr_cpu_ids; j++) { DBG(" thread %d -> cpu %d (hard id %d)\n", j, cpu, be32_to_cpu(intserv[j])); - set_cpu_present(cpu, true); + set_cpu_present(cpu, of_device_is_available(dn)); set_hard_smp_processor_id(cpu, be32_to_cpu(intserv[j])); set_cpu_possible(cpu, true); cpu++; diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 7753af2..51a3ff7 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -749,7 +749,7 @@ int setup_profiling_timer(unsigned int multiplier) /* cpumask of CPUs with asymetric SMT dependancy */ static const int powerpc_smt_flags(void) { - int flags = SD_SHARE_CPUPOWER | SD_SHARE_PKG_RESOURCES; + int flags = SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES; if (cpu_has_feature(CPU_FTR_ASYM_SMT)) { printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n"); diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 7e711bd..9fff9cd 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -551,7 +551,7 @@ void timer_interrupt(struct pt_regs * regs) may_hard_irq_enable(); -#if defined(CONFIG_PPC32) && defined(CONFIG_PMAC) +#if defined(CONFIG_PPC32) && defined(CONFIG_PPC_PMAC) if (atomic_read(&ppc_n_lost_interrupts) != 0) do_IRQ(regs); #endif diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 1bd7ca2..239f1cd 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -295,6 +295,8 @@ long machine_check_early(struct pt_regs *regs) { long handled = 0; + __get_cpu_var(irq_stat).mce_exceptions++; + if (cur_cpu_spec && cur_cpu_spec->machine_check_early) handled = cur_cpu_spec->machine_check_early(regs); return handled; diff --git a/arch/powerpc/kernel/udbg.c b/arch/powerpc/kernel/udbg.c index a158375..b7aa072 100644 --- a/arch/powerpc/kernel/udbg.c +++ b/arch/powerpc/kernel/udbg.c @@ -62,8 +62,6 @@ void __init udbg_early_init(void) udbg_init_cpm(); #elif defined(CONFIG_PPC_EARLY_DEBUG_USBGECKO) udbg_init_usbgecko(); -#elif defined(CONFIG_PPC_EARLY_DEBUG_WSP) - udbg_init_wsp(); #elif defined(CONFIG_PPC_EARLY_DEBUG_MEMCONS) /* In memory console */ udbg_init_memcons(); diff --git a/arch/powerpc/kernel/udbg_16550.c b/arch/powerpc/kernel/udbg_16550.c index 75702e2..6e7c492 100644 --- a/arch/powerpc/kernel/udbg_16550.c +++ b/arch/powerpc/kernel/udbg_16550.c @@ -296,14 +296,3 @@ void __init udbg_init_40x_realmode(void) } #endif /* CONFIG_PPC_EARLY_DEBUG_40x */ - - -#ifdef CONFIG_PPC_EARLY_DEBUG_WSP - -void __init udbg_init_wsp(void) -{ - udbg_uart_init_mmio((void *)WSP_UART_VIRT, 1); - udbg_uart_setup(57600, 50000000); -} - -#endif /* CONFIG_PPC_EARLY_DEBUG_WSP */ diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c index 768a9f9..3a5c568 100644 --- a/arch/powerpc/kvm/book3s_hv_ras.c +++ b/arch/powerpc/kvm/book3s_hv_ras.c @@ -113,10 +113,8 @@ static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu) * We assume that if the condition is recovered then linux host * will have generated an error log event that we will pick * up and log later. - * Don't release mce event now. In case if condition is not - * recovered we do guest exit and go back to linux host machine - * check handler. Hence we need make sure that current mce event - * is available for linux host to consume. + * Don't release mce event now. We will queue up the event so that + * we can log the MCE event info on host console. */ if (!get_mce_event(&mce_evt, MCE_EVENT_DONTRELEASE)) goto out; @@ -128,11 +126,12 @@ static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu) out: /* - * If we have handled the error, then release the mce event because - * we will be delivering machine check to guest. + * We are now going enter guest either through machine check + * interrupt (for unhandled errors) or will continue from + * current HSRR0 (for handled errors) in guest. Hence + * queue up the event so that we can log it from host console later. */ - if (handled) - release_mce_event(); + machine_check_queue_event(); return handled; } diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 77356fd..868347e 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -2257,15 +2257,28 @@ machine_check_realmode: mr r3, r9 /* get vcpu pointer */ bl kvmppc_realmode_machine_check nop - cmpdi r3, 0 /* continue exiting from guest? */ + cmpdi r3, 0 /* Did we handle MCE ? */ ld r9, HSTATE_KVM_VCPU(r13) li r12, BOOK3S_INTERRUPT_MACHINE_CHECK - beq mc_cont + /* + * Deliver unhandled/fatal (e.g. UE) MCE errors to guest through + * machine check interrupt (set HSRR0 to 0x200). And for handled + * errors (no-fatal), just go back to guest execution with current + * HSRR0 instead of exiting guest. This new approach will inject + * machine check to guest for fatal error causing guest to crash. + * + * The old code used to return to host for unhandled errors which + * was causing guest to hang with soft lockups inside guest and + * makes it difficult to recover guest instance. + */ + ld r10, VCPU_PC(r9) + ld r11, VCPU_MSR(r9) + bne 2f /* Continue guest execution. */ /* If not, deliver a machine check. SRR0/1 are already set */ li r10, BOOK3S_INTERRUPT_MACHINE_CHECK ld r11, VCPU_MSR(r9) bl kvmppc_msr_interrupt - b fast_interrupt_c_return +2: b fast_interrupt_c_return /* * Check the reason we woke from nap, and take appropriate action. diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index c0511c2..412dd46 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -1470,7 +1470,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) regs->gpr[rd] = byterev_4(val); goto ldst_done; -#ifdef CONFIG_PPC_CPU +#ifdef CONFIG_PPC_FPU case 535: /* lfsx */ case 567: /* lfsux */ if (!(regs->msr & MSR_FP)) diff --git a/arch/powerpc/net/bpf_jit_64.S b/arch/powerpc/net/bpf_jit_64.S index e76eba7..8f87d92 100644 --- a/arch/powerpc/net/bpf_jit_64.S +++ b/arch/powerpc/net/bpf_jit_64.S @@ -78,7 +78,7 @@ sk_load_byte_positive_offset: blr /* - * BPF_S_LDX_B_MSH: ldxb 4*([offset]&0xf) + * BPF_LDX | BPF_B | BPF_MSH: ldxb 4*([offset]&0xf) * r_addr is the offset value */ .globl sk_load_byte_msh diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index 808ce1c..6dcdade 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -79,19 +79,11 @@ static void bpf_jit_build_prologue(struct sk_filter *fp, u32 *image, } switch (filter[0].code) { - case BPF_S_RET_K: - case BPF_S_LD_W_LEN: - case BPF_S_ANC_PROTOCOL: - case BPF_S_ANC_IFINDEX: - case BPF_S_ANC_MARK: - case BPF_S_ANC_RXHASH: - case BPF_S_ANC_VLAN_TAG: - case BPF_S_ANC_VLAN_TAG_PRESENT: - case BPF_S_ANC_CPU: - case BPF_S_ANC_QUEUE: - case BPF_S_LD_W_ABS: - case BPF_S_LD_H_ABS: - case BPF_S_LD_B_ABS: + case BPF_RET | BPF_K: + case BPF_LD | BPF_W | BPF_LEN: + case BPF_LD | BPF_W | BPF_ABS: + case BPF_LD | BPF_H | BPF_ABS: + case BPF_LD | BPF_B | BPF_ABS: /* first instruction sets A register (or is RET 'constant') */ break; default: @@ -144,6 +136,7 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image, for (i = 0; i < flen; i++) { unsigned int K = filter[i].k; + u16 code = bpf_anc_helper(&filter[i]); /* * addrs[] maps a BPF bytecode address into a real offset from @@ -151,35 +144,35 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image, */ addrs[i] = ctx->idx * 4; - switch (filter[i].code) { + switch (code) { /*** ALU ops ***/ - case BPF_S_ALU_ADD_X: /* A += X; */ + case BPF_ALU | BPF_ADD | BPF_X: /* A += X; */ ctx->seen |= SEEN_XREG; PPC_ADD(r_A, r_A, r_X); break; - case BPF_S_ALU_ADD_K: /* A += K; */ + case BPF_ALU | BPF_ADD | BPF_K: /* A += K; */ if (!K) break; PPC_ADDI(r_A, r_A, IMM_L(K)); if (K >= 32768) PPC_ADDIS(r_A, r_A, IMM_HA(K)); break; - case BPF_S_ALU_SUB_X: /* A -= X; */ + case BPF_ALU | BPF_SUB | BPF_X: /* A -= X; */ ctx->seen |= SEEN_XREG; PPC_SUB(r_A, r_A, r_X); break; - case BPF_S_ALU_SUB_K: /* A -= K */ + case BPF_ALU | BPF_SUB | BPF_K: /* A -= K */ if (!K) break; PPC_ADDI(r_A, r_A, IMM_L(-K)); if (K >= 32768) PPC_ADDIS(r_A, r_A, IMM_HA(-K)); break; - case BPF_S_ALU_MUL_X: /* A *= X; */ + case BPF_ALU | BPF_MUL | BPF_X: /* A *= X; */ ctx->seen |= SEEN_XREG; PPC_MUL(r_A, r_A, r_X); break; - case BPF_S_ALU_MUL_K: /* A *= K */ + case BPF_ALU | BPF_MUL | BPF_K: /* A *= K */ if (K < 32768) PPC_MULI(r_A, r_A, K); else { @@ -187,7 +180,7 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image, PPC_MUL(r_A, r_A, r_scratch1); } break; - case BPF_S_ALU_MOD_X: /* A %= X; */ + case BPF_ALU | BPF_MOD | BPF_X: /* A %= X; */ ctx->seen |= SEEN_XREG; PPC_CMPWI(r_X, 0); if (ctx->pc_ret0 != -1) { @@ -201,13 +194,13 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image, PPC_MUL(r_scratch1, r_X, r_scratch1); PPC_SUB(r_A, r_A, r_scratch1); break; - case BPF_S_ALU_MOD_K: /* A %= K; */ + case BPF_ALU | BPF_MOD | BPF_K: /* A %= K; */ PPC_LI32(r_scratch2, K); PPC_DIVWU(r_scratch1, r_A, r_scratch2); PPC_MUL(r_scratch1, r_scratch2, r_scratch1); PPC_SUB(r_A, r_A, r_scratch1); break; - case BPF_S_ALU_DIV_X: /* A /= X; */ + case BPF_ALU | BPF_DIV | BPF_X: /* A /= X; */ ctx->seen |= SEEN_XREG; PPC_CMPWI(r_X, 0); if (ctx->pc_ret0 != -1) { @@ -223,17 +216,17 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image, } PPC_DIVWU(r_A, r_A, r_X); break; - case BPF_S_ALU_DIV_K: /* A /= K */ + case BPF_ALU | BPF_DIV | BPF_K: /* A /= K */ if (K == 1) break; PPC_LI32(r_scratch1, K); PPC_DIVWU(r_A, r_A, r_scratch1); break; - case BPF_S_ALU_AND_X: + case BPF_ALU | BPF_AND | BPF_X: ctx->seen |= SEEN_XREG; PPC_AND(r_A, r_A, r_X); break; - case BPF_S_ALU_AND_K: + case BPF_ALU | BPF_AND | BPF_K: if (!IMM_H(K)) PPC_ANDI(r_A, r_A, K); else { @@ -241,51 +234,51 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image, PPC_AND(r_A, r_A, r_scratch1); } break; - case BPF_S_ALU_OR_X: + case BPF_ALU | BPF_OR | BPF_X: ctx->seen |= SEEN_XREG; PPC_OR(r_A, r_A, r_X); break; - case BPF_S_ALU_OR_K: + case BPF_ALU | BPF_OR | BPF_K: if (IMM_L(K)) PPC_ORI(r_A, r_A, IMM_L(K)); if (K >= 65536) PPC_ORIS(r_A, r_A, IMM_H(K)); break; - case BPF_S_ANC_ALU_XOR_X: - case BPF_S_ALU_XOR_X: /* A ^= X */ + case BPF_ANC | SKF_AD_ALU_XOR_X: + case BPF_ALU | BPF_XOR | BPF_X: /* A ^= X */ ctx->seen |= SEEN_XREG; PPC_XOR(r_A, r_A, r_X); break; - case BPF_S_ALU_XOR_K: /* A ^= K */ + case BPF_ALU | BPF_XOR | BPF_K: /* A ^= K */ if (IMM_L(K)) PPC_XORI(r_A, r_A, IMM_L(K)); if (K >= 65536) PPC_XORIS(r_A, r_A, IMM_H(K)); break; - case BPF_S_ALU_LSH_X: /* A <<= X; */ + case BPF_ALU | BPF_LSH | BPF_X: /* A <<= X; */ ctx->seen |= SEEN_XREG; PPC_SLW(r_A, r_A, r_X); break; - case BPF_S_ALU_LSH_K: + case BPF_ALU | BPF_LSH | BPF_K: if (K == 0) break; else PPC_SLWI(r_A, r_A, K); break; - case BPF_S_ALU_RSH_X: /* A >>= X; */ + case BPF_ALU | BPF_RSH | BPF_X: /* A >>= X; */ ctx->seen |= SEEN_XREG; PPC_SRW(r_A, r_A, r_X); break; - case BPF_S_ALU_RSH_K: /* A >>= K; */ + case BPF_ALU | BPF_RSH | BPF_K: /* A >>= K; */ if (K == 0) break; else PPC_SRWI(r_A, r_A, K); break; - case BPF_S_ALU_NEG: + case BPF_ALU | BPF_NEG: PPC_NEG(r_A, r_A); break; - case BPF_S_RET_K: + case BPF_RET | BPF_K: PPC_LI32(r_ret, K); if (!K) { if (ctx->pc_ret0 == -1) @@ -312,7 +305,7 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image, PPC_BLR(); } break; - case BPF_S_RET_A: + case BPF_RET | BPF_A: PPC_MR(r_ret, r_A); if (i != flen - 1) { if (ctx->seen) @@ -321,53 +314,53 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image, PPC_BLR(); } break; - case BPF_S_MISC_TAX: /* X = A */ + case BPF_MISC | BPF_TAX: /* X = A */ PPC_MR(r_X, r_A); break; - case BPF_S_MISC_TXA: /* A = X */ + case BPF_MISC | BPF_TXA: /* A = X */ ctx->seen |= SEEN_XREG; PPC_MR(r_A, r_X); break; /*** Constant loads/M[] access ***/ - case BPF_S_LD_IMM: /* A = K */ + case BPF_LD | BPF_IMM: /* A = K */ PPC_LI32(r_A, K); break; - case BPF_S_LDX_IMM: /* X = K */ + case BPF_LDX | BPF_IMM: /* X = K */ PPC_LI32(r_X, K); break; - case BPF_S_LD_MEM: /* A = mem[K] */ + case BPF_LD | BPF_MEM: /* A = mem[K] */ PPC_MR(r_A, r_M + (K & 0xf)); ctx->seen |= SEEN_MEM | (1<<(K & 0xf)); break; - case BPF_S_LDX_MEM: /* X = mem[K] */ + case BPF_LDX | BPF_MEM: /* X = mem[K] */ PPC_MR(r_X, r_M + (K & 0xf)); ctx->seen |= SEEN_MEM | (1<<(K & 0xf)); break; - case BPF_S_ST: /* mem[K] = A */ + case BPF_ST: /* mem[K] = A */ PPC_MR(r_M + (K & 0xf), r_A); ctx->seen |= SEEN_MEM | (1<<(K & 0xf)); break; - case BPF_S_STX: /* mem[K] = X */ + case BPF_STX: /* mem[K] = X */ PPC_MR(r_M + (K & 0xf), r_X); ctx->seen |= SEEN_XREG | SEEN_MEM | (1<<(K & 0xf)); break; - case BPF_S_LD_W_LEN: /* A = skb->len; */ + case BPF_LD | BPF_W | BPF_LEN: /* A = skb->len; */ BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4); PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, len)); break; - case BPF_S_LDX_W_LEN: /* X = skb->len; */ + case BPF_LDX | BPF_W | BPF_LEN: /* X = skb->len; */ PPC_LWZ_OFFS(r_X, r_skb, offsetof(struct sk_buff, len)); break; /*** Ancillary info loads ***/ - case BPF_S_ANC_PROTOCOL: /* A = ntohs(skb->protocol); */ + case BPF_ANC | SKF_AD_PROTOCOL: /* A = ntohs(skb->protocol); */ BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2); PPC_NTOHS_OFFS(r_A, r_skb, offsetof(struct sk_buff, protocol)); break; - case BPF_S_ANC_IFINDEX: + case BPF_ANC | SKF_AD_IFINDEX: PPC_LD_OFFS(r_scratch1, r_skb, offsetof(struct sk_buff, dev)); PPC_CMPDI(r_scratch1, 0); @@ -384,33 +377,33 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image, PPC_LWZ_OFFS(r_A, r_scratch1, offsetof(struct net_device, ifindex)); break; - case BPF_S_ANC_MARK: + case BPF_ANC | SKF_AD_MARK: BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4); PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, mark)); break; - case BPF_S_ANC_RXHASH: + case BPF_ANC | SKF_AD_RXHASH: BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4); PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, hash)); break; - case BPF_S_ANC_VLAN_TAG: - case BPF_S_ANC_VLAN_TAG_PRESENT: + case BPF_ANC | SKF_AD_VLAN_TAG: + case BPF_ANC | SKF_AD_VLAN_TAG_PRESENT: BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2); PPC_LHZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, vlan_tci)); - if (filter[i].code == BPF_S_ANC_VLAN_TAG) + if (code == (BPF_ANC | SKF_AD_VLAN_TAG)) PPC_ANDI(r_A, r_A, VLAN_VID_MASK); else PPC_ANDI(r_A, r_A, VLAN_TAG_PRESENT); break; - case BPF_S_ANC_QUEUE: + case BPF_ANC | SKF_AD_QUEUE: BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2); PPC_LHZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, queue_mapping)); break; - case BPF_S_ANC_CPU: + case BPF_ANC | SKF_AD_CPU: #ifdef CONFIG_SMP /* * PACA ptr is r13: @@ -426,13 +419,13 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image, break; /*** Absolute loads from packet header/data ***/ - case BPF_S_LD_W_ABS: + case BPF_LD | BPF_W | BPF_ABS: func = CHOOSE_LOAD_FUNC(K, sk_load_word); goto common_load; - case BPF_S_LD_H_ABS: + case BPF_LD | BPF_H | BPF_ABS: func = CHOOSE_LOAD_FUNC(K, sk_load_half); goto common_load; - case BPF_S_LD_B_ABS: + case BPF_LD | BPF_B | BPF_ABS: func = CHOOSE_LOAD_FUNC(K, sk_load_byte); common_load: /* Load from [K]. */ @@ -449,13 +442,13 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image, break; /*** Indirect loads from packet header/data ***/ - case BPF_S_LD_W_IND: + case BPF_LD | BPF_W | BPF_IND: func = sk_load_word; goto common_load_ind; - case BPF_S_LD_H_IND: + case BPF_LD | BPF_H | BPF_IND: func = sk_load_half; goto common_load_ind; - case BPF_S_LD_B_IND: + case BPF_LD | BPF_B | BPF_IND: func = sk_load_byte; common_load_ind: /* @@ -473,31 +466,31 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image, PPC_BCC(COND_LT, exit_addr); break; - case BPF_S_LDX_B_MSH: + case BPF_LDX | BPF_B | BPF_MSH: func = CHOOSE_LOAD_FUNC(K, sk_load_byte_msh); goto common_load; break; /*** Jump and branches ***/ - case BPF_S_JMP_JA: + case BPF_JMP | BPF_JA: if (K != 0) PPC_JMP(addrs[i + 1 + K]); break; - case BPF_S_JMP_JGT_K: - case BPF_S_JMP_JGT_X: + case BPF_JMP | BPF_JGT | BPF_K: + case BPF_JMP | BPF_JGT | BPF_X: true_cond = COND_GT; goto cond_branch; - case BPF_S_JMP_JGE_K: - case BPF_S_JMP_JGE_X: + case BPF_JMP | BPF_JGE | BPF_K: + case BPF_JMP | BPF_JGE | BPF_X: true_cond = COND_GE; goto cond_branch; - case BPF_S_JMP_JEQ_K: - case BPF_S_JMP_JEQ_X: + case BPF_JMP | BPF_JEQ | BPF_K: + case BPF_JMP | BPF_JEQ | BPF_X: true_cond = COND_EQ; goto cond_branch; - case BPF_S_JMP_JSET_K: - case BPF_S_JMP_JSET_X: + case BPF_JMP | BPF_JSET | BPF_K: + case BPF_JMP | BPF_JSET | BPF_X: true_cond = COND_NE; /* Fall through */ cond_branch: @@ -508,20 +501,20 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image, break; } - switch (filter[i].code) { - case BPF_S_JMP_JGT_X: - case BPF_S_JMP_JGE_X: - case BPF_S_JMP_JEQ_X: + switch (code) { + case BPF_JMP | BPF_JGT | BPF_X: + case BPF_JMP | BPF_JGE | BPF_X: + case BPF_JMP | BPF_JEQ | BPF_X: ctx->seen |= SEEN_XREG; PPC_CMPLW(r_A, r_X); break; - case BPF_S_JMP_JSET_X: + case BPF_JMP | BPF_JSET | BPF_X: ctx->seen |= SEEN_XREG; PPC_AND_DOT(r_scratch1, r_A, r_X); break; - case BPF_S_JMP_JEQ_K: - case BPF_S_JMP_JGT_K: - case BPF_S_JMP_JGE_K: + case BPF_JMP | BPF_JEQ | BPF_K: + case BPF_JMP | BPF_JGT | BPF_K: + case BPF_JMP | BPF_JGE | BPF_K: if (K < 32768) PPC_CMPLWI(r_A, K); else { @@ -529,7 +522,7 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image, PPC_CMPLW(r_A, r_scratch1); } break; - case BPF_S_JMP_JSET_K: + case BPF_JMP | BPF_JSET | BPF_K: if (K < 32768) /* PPC_ANDI is /only/ dot-form */ PPC_ANDI(r_scratch1, r_A, K); diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig index bf9c6d4..391b3f6 100644 --- a/arch/powerpc/platforms/Kconfig +++ b/arch/powerpc/platforms/Kconfig @@ -19,7 +19,6 @@ source "arch/powerpc/platforms/embedded6xx/Kconfig" source "arch/powerpc/platforms/44x/Kconfig" source "arch/powerpc/platforms/40x/Kconfig" source "arch/powerpc/platforms/amigaone/Kconfig" -source "arch/powerpc/platforms/wsp/Kconfig" config KVM_GUEST bool "KVM Guest support" diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 43b65ad..a41bd02 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -148,10 +148,6 @@ config POWER4 depends on PPC64 && PPC_BOOK3S def_bool y -config PPC_A2 - bool - depends on PPC_BOOK3E_64 - config TUNE_CELL bool "Optimize for Cell Broadband Engine" depends on PPC64 && PPC_BOOK3S @@ -280,7 +276,7 @@ config VSX config PPC_ICSWX bool "Support for PowerPC icswx coprocessor instruction" - depends on POWER4 || PPC_A2 + depends on POWER4 default n ---help--- diff --git a/arch/powerpc/platforms/Makefile b/arch/powerpc/platforms/Makefile index 879b4a4..469ef17 100644 --- a/arch/powerpc/platforms/Makefile +++ b/arch/powerpc/platforms/Makefile @@ -22,4 +22,3 @@ obj-$(CONFIG_PPC_CELL) += cell/ obj-$(CONFIG_PPC_PS3) += ps3/ obj-$(CONFIG_EMBEDDED6xx) += embedded6xx/ obj-$(CONFIG_AMIGAONE) += amigaone/ -obj-$(CONFIG_PPC_WSP) += wsp/ diff --git a/arch/powerpc/platforms/cell/spufs/spufs.h b/arch/powerpc/platforms/cell/spufs/spufs.h index 0ba3c95..bcfd6f0 100644 --- a/arch/powerpc/platforms/cell/spufs/spufs.h +++ b/arch/powerpc/platforms/cell/spufs/spufs.h @@ -35,7 +35,6 @@ #define SPUFS_PS_MAP_SIZE 0x20000 #define SPUFS_MFC_MAP_SIZE 0x1000 #define SPUFS_CNTL_MAP_SIZE 0x1000 -#define SPUFS_CNTL_MAP_SIZE 0x1000 #define SPUFS_SIGNAL_MAP_SIZE PAGE_SIZE #define SPUFS_MSS_MAP_SIZE 0x1000 diff --git a/arch/powerpc/platforms/powernv/Kconfig b/arch/powerpc/platforms/powernv/Kconfig index c252ee9..45a8ed0 100644 --- a/arch/powerpc/platforms/powernv/Kconfig +++ b/arch/powerpc/platforms/powernv/Kconfig @@ -17,6 +17,7 @@ config PPC_POWERNV select CPU_FREQ_GOV_USERSPACE select CPU_FREQ_GOV_ONDEMAND select CPU_FREQ_GOV_CONSERVATIVE + select PPC_DOORBELL default y config PPC_POWERNV_RTAS diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile index 4ad0d34..d55891f 100644 --- a/arch/powerpc/platforms/powernv/Makefile +++ b/arch/powerpc/platforms/powernv/Makefile @@ -1,9 +1,9 @@ obj-y += setup.o opal-takeover.o opal-wrappers.o opal.o opal-async.o obj-y += opal-rtc.o opal-nvram.o opal-lpc.o opal-flash.o obj-y += rng.o opal-elog.o opal-dump.o opal-sysparam.o opal-sensor.o -obj-y += opal-msglog.o subcore.o subcore-asm.o +obj-y += opal-msglog.o -obj-$(CONFIG_SMP) += smp.o +obj-$(CONFIG_SMP) += smp.o subcore.o subcore-asm.o obj-$(CONFIG_PCI) += pci.o pci-p5ioc2.o pci-ioda.o obj-$(CONFIG_EEH) += eeh-ioda.o eeh-powernv.o obj-$(CONFIG_PPC_SCOM) += opal-xscom.o diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c index 753f08e..8ad0c5b 100644 --- a/arch/powerpc/platforms/powernv/eeh-ioda.c +++ b/arch/powerpc/platforms/powernv/eeh-ioda.c @@ -267,7 +267,7 @@ static int ioda_eeh_get_state(struct eeh_pe *pe) { s64 ret = 0; u8 fstate; - u16 pcierr; + __be16 pcierr; u32 pe_no; int result; struct pci_controller *hose = pe->phb; @@ -316,7 +316,7 @@ static int ioda_eeh_get_state(struct eeh_pe *pe) result = 0; result &= ~EEH_STATE_RESET_ACTIVE; - if (pcierr != OPAL_EEH_PHB_ERROR) { + if (be16_to_cpu(pcierr) != OPAL_EEH_PHB_ERROR) { result |= EEH_STATE_MMIO_ACTIVE; result |= EEH_STATE_DMA_ACTIVE; result |= EEH_STATE_MMIO_ENABLED; @@ -705,18 +705,19 @@ static int ioda_eeh_next_error(struct eeh_pe **pe) { struct pci_controller *hose; struct pnv_phb *phb; - struct eeh_pe *phb_pe; - u64 frozen_pe_no; - u16 err_type, severity; + struct eeh_pe *phb_pe, *parent_pe; + __be64 frozen_pe_no; + __be16 err_type, severity; + int active_flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); long rc; - int ret = EEH_NEXT_ERR_NONE; + int state, ret = EEH_NEXT_ERR_NONE; /* * While running here, it's safe to purge the event queue. * And we should keep the cached OPAL notifier event sychronized * between the kernel and firmware. */ - eeh_remove_event(NULL); + eeh_remove_event(NULL, false); opal_notifier_update_evt(OPAL_EVENT_PCI_ERROR, 0x0ul); list_for_each_entry(hose, &hose_list, list_node) { @@ -742,8 +743,8 @@ static int ioda_eeh_next_error(struct eeh_pe **pe) } /* If the PHB doesn't have error, stop processing */ - if (err_type == OPAL_EEH_NO_ERROR || - severity == OPAL_EEH_SEV_NO_ERROR) { + if (be16_to_cpu(err_type) == OPAL_EEH_NO_ERROR || + be16_to_cpu(severity) == OPAL_EEH_SEV_NO_ERROR) { pr_devel("%s: No error found on PHB#%x\n", __func__, hose->global_number); continue; @@ -755,14 +756,14 @@ static int ioda_eeh_next_error(struct eeh_pe **pe) * specific PHB. */ pr_devel("%s: Error (%d, %d, %llu) on PHB#%x\n", - __func__, err_type, severity, - frozen_pe_no, hose->global_number); - switch (err_type) { + __func__, be16_to_cpu(err_type), be16_to_cpu(severity), + be64_to_cpu(frozen_pe_no), hose->global_number); + switch (be16_to_cpu(err_type)) { case OPAL_EEH_IOC_ERROR: - if (severity == OPAL_EEH_SEV_IOC_DEAD) { + if (be16_to_cpu(severity) == OPAL_EEH_SEV_IOC_DEAD) { pr_err("EEH: dead IOC detected\n"); ret = EEH_NEXT_ERR_DEAD_IOC; - } else if (severity == OPAL_EEH_SEV_INF) { + } else if (be16_to_cpu(severity) == OPAL_EEH_SEV_INF) { pr_info("EEH: IOC informative error " "detected\n"); ioda_eeh_hub_diag(hose); @@ -771,20 +772,26 @@ static int ioda_eeh_next_error(struct eeh_pe **pe) break; case OPAL_EEH_PHB_ERROR: - if (severity == OPAL_EEH_SEV_PHB_DEAD) { + if (be16_to_cpu(severity) == OPAL_EEH_SEV_PHB_DEAD) { *pe = phb_pe; - pr_err("EEH: dead PHB#%x detected\n", - hose->global_number); + pr_err("EEH: dead PHB#%x detected, " + "location: %s\n", + hose->global_number, + eeh_pe_loc_get(phb_pe)); ret = EEH_NEXT_ERR_DEAD_PHB; - } else if (severity == OPAL_EEH_SEV_PHB_FENCED) { + } else if (be16_to_cpu(severity) == + OPAL_EEH_SEV_PHB_FENCED) { *pe = phb_pe; - pr_err("EEH: fenced PHB#%x detected\n", - hose->global_number); + pr_err("EEH: Fenced PHB#%x detected, " + "location: %s\n", + hose->global_number, + eeh_pe_loc_get(phb_pe)); ret = EEH_NEXT_ERR_FENCED_PHB; - } else if (severity == OPAL_EEH_SEV_INF) { + } else if (be16_to_cpu(severity) == OPAL_EEH_SEV_INF) { pr_info("EEH: PHB#%x informative error " - "detected\n", - hose->global_number); + "detected, location: %s\n", + hose->global_number, + eeh_pe_loc_get(phb_pe)); ioda_eeh_phb_diag(hose); ret = EEH_NEXT_ERR_NONE; } @@ -792,34 +799,33 @@ static int ioda_eeh_next_error(struct eeh_pe **pe) break; case OPAL_EEH_PE_ERROR: /* - * If we can't find the corresponding PE, the - * PEEV / PEST would be messy. So we force an - * fenced PHB so that it can be recovered. - * - * If the PE has been marked as isolated, that - * should have been removed permanently or in - * progress with recovery. We needn't report - * it again. + * If we can't find the corresponding PE, we + * just try to unfreeze. */ - if (ioda_eeh_get_pe(hose, frozen_pe_no, pe)) { - *pe = phb_pe; - pr_err("EEH: Escalated fenced PHB#%x " - "detected for PE#%llx\n", - hose->global_number, - frozen_pe_no); - ret = EEH_NEXT_ERR_FENCED_PHB; + if (ioda_eeh_get_pe(hose, + be64_to_cpu(frozen_pe_no), pe)) { + /* Try best to clear it */ + pr_info("EEH: Clear non-existing PHB#%x-PE#%llx\n", + hose->global_number, frozen_pe_no); + pr_info("EEH: PHB location: %s\n", + eeh_pe_loc_get(phb_pe)); + opal_pci_eeh_freeze_clear(phb->opal_id, frozen_pe_no, + OPAL_EEH_ACTION_CLEAR_FREEZE_ALL); + ret = EEH_NEXT_ERR_NONE; } else if ((*pe)->state & EEH_PE_ISOLATED) { ret = EEH_NEXT_ERR_NONE; } else { pr_err("EEH: Frozen PE#%x on PHB#%x detected\n", (*pe)->addr, (*pe)->phb->global_number); + pr_err("EEH: PE location: %s, PHB location: %s\n", + eeh_pe_loc_get(*pe), eeh_pe_loc_get(phb_pe)); ret = EEH_NEXT_ERR_FROZEN_PE; } break; default: pr_warn("%s: Unexpected error type %d\n", - __func__, err_type); + __func__, be16_to_cpu(err_type)); } /* @@ -837,6 +843,31 @@ static int ioda_eeh_next_error(struct eeh_pe **pe) } /* + * We probably have the frozen parent PE out there and + * we need have to handle frozen parent PE firstly. + */ + if (ret == EEH_NEXT_ERR_FROZEN_PE) { + parent_pe = (*pe)->parent; + while (parent_pe) { + /* Hit the ceiling ? */ + if (parent_pe->type & EEH_PE_PHB) + break; + + /* Frozen parent PE ? */ + state = ioda_eeh_get_state(parent_pe); + if (state > 0 && + (state & active_flags) != active_flags) + *pe = parent_pe; + + /* Next parent level */ + parent_pe = parent_pe->parent; + } + + /* We possibly migrate to another PE */ + eeh_pe_state_mark(*pe, EEH_PE_ISOLATED); + } + + /* * If we have no errors on the specific PHB or only * informative error there, we continue poking it. * Otherwise, we need actions to be taken by upper diff --git a/arch/powerpc/platforms/powernv/opal-msglog.c b/arch/powerpc/platforms/powernv/opal-msglog.c index 1bb25b9..44ed78a 100644 --- a/arch/powerpc/platforms/powernv/opal-msglog.c +++ b/arch/powerpc/platforms/powernv/opal-msglog.c @@ -37,7 +37,8 @@ static ssize_t opal_msglog_read(struct file *file, struct kobject *kobj, { struct memcons *mc = bin_attr->private; const char *conbuf; - size_t ret, first_read = 0; + ssize_t ret; + size_t first_read = 0; uint32_t out_pos, avail; if (!mc) @@ -69,6 +70,9 @@ static ssize_t opal_msglog_read(struct file *file, struct kobject *kobj, to += first_read; count -= first_read; pos -= avail; + + if (count <= 0) + goto out; } /* Sanity check. The firmware should not do this to us. */ diff --git a/arch/powerpc/platforms/powernv/opal-sysparam.c b/arch/powerpc/platforms/powernv/opal-sysparam.c index d202f9b..9d1acf2 100644 --- a/arch/powerpc/platforms/powernv/opal-sysparam.c +++ b/arch/powerpc/platforms/powernv/opal-sysparam.c @@ -260,10 +260,10 @@ void __init opal_sys_param_init(void) attr[i].kobj_attr.attr.mode = S_IRUGO; break; case OPAL_SYSPARAM_WRITE: - attr[i].kobj_attr.attr.mode = S_IWUGO; + attr[i].kobj_attr.attr.mode = S_IWUSR; break; case OPAL_SYSPARAM_RW: - attr[i].kobj_attr.attr.mode = S_IRUGO | S_IWUGO; + attr[i].kobj_attr.attr.mode = S_IRUGO | S_IWUSR; break; default: break; diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index eefbfcc..f91a4e5 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -206,72 +206,91 @@ static void pnv_pci_dump_phb3_diag_data(struct pci_controller *hose, data = (struct OpalIoPhb3ErrorData*)common; pr_info("PHB3 PHB#%d Diag-data (Version: %d)\n", - hose->global_number, common->version); + hose->global_number, be32_to_cpu(common->version)); if (data->brdgCtl) pr_info("brdgCtl: %08x\n", - data->brdgCtl); + be32_to_cpu(data->brdgCtl)); if (data->portStatusReg || data->rootCmplxStatus || data->busAgentStatus) pr_info("UtlSts: %08x %08x %08x\n", - data->portStatusReg, data->rootCmplxStatus, - data->busAgentStatus); + be32_to_cpu(data->portStatusReg), + be32_to_cpu(data->rootCmplxStatus), + be32_to_cpu(data->busAgentStatus)); if (data->deviceStatus || data->slotStatus || data->linkStatus || data->devCmdStatus || data->devSecStatus) pr_info("RootSts: %08x %08x %08x %08x %08x\n", - data->deviceStatus, data->slotStatus, - data->linkStatus, data->devCmdStatus, - data->devSecStatus); + be32_to_cpu(data->deviceStatus), + be32_to_cpu(data->slotStatus), + be32_to_cpu(data->linkStatus), + be32_to_cpu(data->devCmdStatus), + be32_to_cpu(data->devSecStatus)); if (data->rootErrorStatus || data->uncorrErrorStatus || data->corrErrorStatus) pr_info("RootErrSts: %08x %08x %08x\n", - data->rootErrorStatus, data->uncorrErrorStatus, - data->corrErrorStatus); + be32_to_cpu(data->rootErrorStatus), + be32_to_cpu(data->uncorrErrorStatus), + be32_to_cpu(data->corrErrorStatus)); if (data->tlpHdr1 || data->tlpHdr2 || data->tlpHdr3 || data->tlpHdr4) pr_info("RootErrLog: %08x %08x %08x %08x\n", - data->tlpHdr1, data->tlpHdr2, - data->tlpHdr3, data->tlpHdr4); + be32_to_cpu(data->tlpHdr1), + be32_to_cpu(data->tlpHdr2), + be32_to_cpu(data->tlpHdr3), + be32_to_cpu(data->tlpHdr4)); if (data->sourceId || data->errorClass || data->correlator) pr_info("RootErrLog1: %08x %016llx %016llx\n", - data->sourceId, data->errorClass, - data->correlator); + be32_to_cpu(data->sourceId), + be64_to_cpu(data->errorClass), + be64_to_cpu(data->correlator)); if (data->nFir) pr_info("nFir: %016llx %016llx %016llx\n", - data->nFir, data->nFirMask, - data->nFirWOF); + be64_to_cpu(data->nFir), + be64_to_cpu(data->nFirMask), + be64_to_cpu(data->nFirWOF)); if (data->phbPlssr || data->phbCsr) pr_info("PhbSts: %016llx %016llx\n", - data->phbPlssr, data->phbCsr); + be64_to_cpu(data->phbPlssr), + be64_to_cpu(data->phbCsr)); if (data->lemFir) pr_info("Lem: %016llx %016llx %016llx\n", - data->lemFir, data->lemErrorMask, - data->lemWOF); + be64_to_cpu(data->lemFir), + be64_to_cpu(data->lemErrorMask), + be64_to_cpu(data->lemWOF)); if (data->phbErrorStatus) pr_info("PhbErr: %016llx %016llx %016llx %016llx\n", - data->phbErrorStatus, data->phbFirstErrorStatus, - data->phbErrorLog0, data->phbErrorLog1); + be64_to_cpu(data->phbErrorStatus), + be64_to_cpu(data->phbFirstErrorStatus), + be64_to_cpu(data->phbErrorLog0), + be64_to_cpu(data->phbErrorLog1)); if (data->mmioErrorStatus) pr_info("OutErr: %016llx %016llx %016llx %016llx\n", - data->mmioErrorStatus, data->mmioFirstErrorStatus, - data->mmioErrorLog0, data->mmioErrorLog1); + be64_to_cpu(data->mmioErrorStatus), + be64_to_cpu(data->mmioFirstErrorStatus), + be64_to_cpu(data->mmioErrorLog0), + be64_to_cpu(data->mmioErrorLog1)); if (data->dma0ErrorStatus) pr_info("InAErr: %016llx %016llx %016llx %016llx\n", - data->dma0ErrorStatus, data->dma0FirstErrorStatus, - data->dma0ErrorLog0, data->dma0ErrorLog1); + be64_to_cpu(data->dma0ErrorStatus), + be64_to_cpu(data->dma0FirstErrorStatus), + be64_to_cpu(data->dma0ErrorLog0), + be64_to_cpu(data->dma0ErrorLog1)); if (data->dma1ErrorStatus) pr_info("InBErr: %016llx %016llx %016llx %016llx\n", - data->dma1ErrorStatus, data->dma1FirstErrorStatus, - data->dma1ErrorLog0, data->dma1ErrorLog1); + be64_to_cpu(data->dma1ErrorStatus), + be64_to_cpu(data->dma1FirstErrorStatus), + be64_to_cpu(data->dma1ErrorLog0), + be64_to_cpu(data->dma1ErrorLog1)); for (i = 0; i < OPAL_PHB3_NUM_PEST_REGS; i++) { - if ((data->pestA[i] >> 63) == 0 && - (data->pestB[i] >> 63) == 0) + if ((be64_to_cpu(data->pestA[i]) >> 63) == 0 && + (be64_to_cpu(data->pestB[i]) >> 63) == 0) continue; pr_info("PE[%3d] A/B: %016llx %016llx\n", - i, data->pestA[i], data->pestB[i]); + i, be64_to_cpu(data->pestA[i]), + be64_to_cpu(data->pestB[i])); } } @@ -284,7 +303,7 @@ void pnv_pci_dump_phb_diag_data(struct pci_controller *hose, return; common = (struct OpalIoPhbErrorCommon *)log_buff; - switch (common->ioType) { + switch (be32_to_cpu(common->ioType)) { case OPAL_PHB_ERROR_DATA_TYPE_P7IOC: pnv_pci_dump_p7ioc_diag_data(hose, common); break; @@ -293,7 +312,7 @@ void pnv_pci_dump_phb_diag_data(struct pci_controller *hose, break; default: pr_warn("%s: Unrecognized ioType %d\n", - __func__, common->ioType); + __func__, be32_to_cpu(common->ioType)); } } diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index 8c16a5f..d9b88fa 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -35,11 +35,14 @@ #include <asm/rtas.h> #include <asm/opal.h> #include <asm/kexec.h> +#include <asm/smp.h> #include "powernv.h" static void __init pnv_setup_arch(void) { + set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT); + /* Initialize SMP */ pnv_smp_init(); diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c index 0062a43..5fcfcf4 100644 --- a/arch/powerpc/platforms/powernv/smp.c +++ b/arch/powerpc/platforms/powernv/smp.c @@ -32,6 +32,7 @@ #include <asm/opal.h> #include <asm/runlatch.h> #include <asm/code-patching.h> +#include <asm/dbell.h> #include "powernv.h" @@ -46,6 +47,11 @@ static void pnv_smp_setup_cpu(int cpu) { if (cpu != boot_cpuid) xics_setup_cpu(); + +#ifdef CONFIG_PPC_DOORBELL + if (cpu_has_feature(CPU_FTR_DBELL)) + doorbell_setup_this_cpu(); +#endif } int pnv_smp_kick_cpu(int nr) diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig index 2cb8b77..756b482 100644 --- a/arch/powerpc/platforms/pseries/Kconfig +++ b/arch/powerpc/platforms/pseries/Kconfig @@ -21,6 +21,7 @@ config PPC_PSERIES select HAVE_CONTEXT_TRACKING select HOTPLUG_CPU if SMP select ARCH_RANDOM + select PPC_DOORBELL default y config PPC_SPLPAR diff --git a/arch/powerpc/platforms/wsp/Kconfig b/arch/powerpc/platforms/wsp/Kconfig deleted file mode 100644 index 422a175..0000000 --- a/arch/powerpc/platforms/wsp/Kconfig +++ /dev/null @@ -1,30 +0,0 @@ -config PPC_WSP - bool - select PPC_A2 - select GENERIC_TBSYNC - select PPC_ICSWX - select PPC_SCOM - select PPC_XICS - select PPC_ICP_NATIVE - select PCI - select PPC_IO_WORKAROUNDS if PCI - select PPC_INDIRECT_PIO if PCI - default n - -menu "WSP platform selection" - depends on PPC_BOOK3E_64 - -config PPC_PSR2 - bool "PowerEN System Reference Platform 2" - select EPAPR_BOOT - select PPC_WSP - default y - -config PPC_CHROMA - bool "PowerEN PCIe Chroma Card" - select EPAPR_BOOT - select PPC_WSP - select OF_DYNAMIC - default y - -endmenu diff --git a/arch/powerpc/platforms/wsp/Makefile b/arch/powerpc/platforms/wsp/Makefile deleted file mode 100644 index 162fc60..0000000 --- a/arch/powerpc/platforms/wsp/Makefile +++ /dev/null @@ -1,10 +0,0 @@ -ccflags-y += $(NO_MINIMAL_TOC) - -obj-y += setup.o ics.o wsp.o -obj-$(CONFIG_PPC_PSR2) += psr2.o -obj-$(CONFIG_PPC_CHROMA) += chroma.o h8.o -obj-$(CONFIG_PPC_WSP) += opb_pic.o -obj-$(CONFIG_PPC_WSP) += scom_wsp.o -obj-$(CONFIG_SMP) += smp.o scom_smp.o -obj-$(CONFIG_PCI) += wsp_pci.o -obj-$(CONFIG_PCI_MSI) += msi.o diff --git a/arch/powerpc/platforms/wsp/chroma.c b/arch/powerpc/platforms/wsp/chroma.c deleted file mode 100644 index aaa46b3..0000000 --- a/arch/powerpc/platforms/wsp/chroma.c +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Copyright 2008-2011, IBM Corporation - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include <linux/delay.h> -#include <linux/init.h> -#include <linux/irq.h> -#include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/of.h> -#include <linux/smp.h> -#include <linux/time.h> -#include <linux/of_fdt.h> - -#include <asm/machdep.h> -#include <asm/udbg.h> - -#include "ics.h" -#include "wsp.h" - -void __init chroma_setup_arch(void) -{ - wsp_setup_arch(); - wsp_setup_h8(); - -} - -static int __init chroma_probe(void) -{ - unsigned long root = of_get_flat_dt_root(); - - if (!of_flat_dt_is_compatible(root, "ibm,wsp-chroma")) - return 0; - - return 1; -} - -define_machine(chroma_md) { - .name = "Chroma PCIe", - .probe = chroma_probe, - .setup_arch = chroma_setup_arch, - .restart = wsp_h8_restart, - .power_off = wsp_h8_power_off, - .halt = wsp_halt, - .calibrate_decr = generic_calibrate_decr, - .init_IRQ = wsp_setup_irq, - .progress = udbg_progress, - .power_save = book3e_idle, -}; - -machine_arch_initcall(chroma_md, wsp_probe_devices); diff --git a/arch/powerpc/platforms/wsp/h8.c b/arch/powerpc/platforms/wsp/h8.c deleted file mode 100644 index a3c87f3..0000000 --- a/arch/powerpc/platforms/wsp/h8.c +++ /dev/null @@ -1,135 +0,0 @@ -/* - * Copyright 2008-2011, IBM Corporation - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include <linux/kernel.h> -#include <linux/of.h> -#include <linux/io.h> -#include <linux/of_address.h> - -#include "wsp.h" - -/* - * The UART connection to the H8 is over ttyS1 which is just a 16550. - * We assume that FW has it setup right and no one messes with it. - */ - - -static u8 __iomem *h8; - -#define RBR 0 /* Receiver Buffer Register */ -#define THR 0 /* Transmitter Holding Register */ -#define LSR 5 /* Line Status Register */ -#define LSR_DR 0x01 /* LSR value for Data-Ready */ -#define LSR_THRE 0x20 /* LSR value for Transmitter-Holding-Register-Empty */ -static void wsp_h8_putc(int c) -{ - u8 lsr; - - do { - lsr = readb(h8 + LSR); - } while ((lsr & LSR_THRE) != LSR_THRE); - writeb(c, h8 + THR); -} - -static int wsp_h8_getc(void) -{ - u8 lsr; - - do { - lsr = readb(h8 + LSR); - } while ((lsr & LSR_DR) != LSR_DR); - - return readb(h8 + RBR); -} - -static void wsp_h8_puts(const char *s, int sz) -{ - int i; - - for (i = 0; i < sz; i++) { - wsp_h8_putc(s[i]); - - /* no flow control so wait for echo */ - wsp_h8_getc(); - } - wsp_h8_putc('\r'); - wsp_h8_putc('\n'); -} - -static void wsp_h8_terminal_cmd(const char *cmd, int sz) -{ - hard_irq_disable(); - wsp_h8_puts(cmd, sz); - /* should never return, but just in case */ - for (;;) - continue; -} - - -void wsp_h8_restart(char *cmd) -{ - static const char restart[] = "warm-reset"; - - (void)cmd; - wsp_h8_terminal_cmd(restart, sizeof(restart) - 1); -} - -void wsp_h8_power_off(void) -{ - static const char off[] = "power-off"; - - wsp_h8_terminal_cmd(off, sizeof(off) - 1); -} - -static void __iomem *wsp_h8_getaddr(void) -{ - struct device_node *aliases; - struct device_node *uart; - struct property *path; - void __iomem *va = NULL; - - /* - * there is nothing in the devtree to tell us which is mapped - * to the H8, but se know it is the second serial port. - */ - - aliases = of_find_node_by_path("/aliases"); - if (aliases == NULL) - return NULL; - - path = of_find_property(aliases, "serial1", NULL); - if (path == NULL) - goto out; - - uart = of_find_node_by_path(path->value); - if (uart == NULL) - goto out; - - va = of_iomap(uart, 0); - - /* remove it so no one messes with it */ - of_detach_node(uart); - of_node_put(uart); - -out: - of_node_put(aliases); - - return va; -} - -void __init wsp_setup_h8(void) -{ - h8 = wsp_h8_getaddr(); - - /* Devtree change? lets hard map it anyway */ - if (h8 == NULL) { - pr_warn("UART to H8 could not be found"); - h8 = ioremap(0xffc0008000ULL, 0x100); - } -} diff --git a/arch/powerpc/platforms/wsp/ics.c b/arch/powerpc/platforms/wsp/ics.c deleted file mode 100644 index 9cd92e6..0000000 --- a/arch/powerpc/platforms/wsp/ics.c +++ /dev/null @@ -1,762 +0,0 @@ -/* - * Copyright 2008-2011 IBM Corporation. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include <linux/cpu.h> -#include <linux/init.h> -#include <linux/interrupt.h> -#include <linux/irq.h> -#include <linux/kernel.h> -#include <linux/msi.h> -#include <linux/of.h> -#include <linux/slab.h> -#include <linux/smp.h> -#include <linux/spinlock.h> -#include <linux/types.h> -#include <linux/of_address.h> -#include <linux/of_irq.h> - -#include <asm/io.h> -#include <asm/irq.h> -#include <asm/xics.h> - -#include "wsp.h" -#include "ics.h" - - -/* WSP ICS */ - -struct wsp_ics { - struct ics ics; - struct device_node *dn; - void __iomem *regs; - spinlock_t lock; - unsigned long *bitmap; - u32 chip_id; - u32 lsi_base; - u32 lsi_count; - u64 hwirq_start; - u64 count; -#ifdef CONFIG_SMP - int *hwirq_cpu_map; -#endif -}; - -#define to_wsp_ics(ics) container_of(ics, struct wsp_ics, ics) - -#define INT_SRC_LAYER_BUID_REG(base) ((base) + 0x00) -#define IODA_TBL_ADDR_REG(base) ((base) + 0x18) -#define IODA_TBL_DATA_REG(base) ((base) + 0x20) -#define XIVE_UPDATE_REG(base) ((base) + 0x28) -#define ICS_INT_CAPS_REG(base) ((base) + 0x30) - -#define TBL_AUTO_INCREMENT ((1UL << 63) | (1UL << 15)) -#define TBL_SELECT_XIST (1UL << 48) -#define TBL_SELECT_XIVT (1UL << 49) - -#define IODA_IRQ(irq) ((irq) & (0x7FFULL)) /* HRM 5.1.3.4 */ - -#define XIST_REQUIRED 0x8 -#define XIST_REJECTED 0x4 -#define XIST_PRESENTED 0x2 -#define XIST_PENDING 0x1 - -#define XIVE_SERVER_SHIFT 42 -#define XIVE_SERVER_MASK 0xFFFFULL -#define XIVE_PRIORITY_MASK 0xFFULL -#define XIVE_PRIORITY_SHIFT 32 -#define XIVE_WRITE_ENABLE (1ULL << 63) - -/* - * The docs refer to a 6 bit field called ChipID, which consists of a - * 3 bit NodeID and a 3 bit ChipID. On WSP the ChipID is always zero - * so we ignore it, and every where we use "chip id" in this code we - * mean the NodeID. - */ -#define WSP_ICS_CHIP_SHIFT 17 - - -static struct wsp_ics *ics_list; -static int num_ics; - -/* ICS Source controller accessors */ - -static u64 wsp_ics_get_xive(struct wsp_ics *ics, unsigned int irq) -{ - unsigned long flags; - u64 xive; - - spin_lock_irqsave(&ics->lock, flags); - out_be64(IODA_TBL_ADDR_REG(ics->regs), TBL_SELECT_XIVT | IODA_IRQ(irq)); - xive = in_be64(IODA_TBL_DATA_REG(ics->regs)); - spin_unlock_irqrestore(&ics->lock, flags); - - return xive; -} - -static void wsp_ics_set_xive(struct wsp_ics *ics, unsigned int irq, u64 xive) -{ - xive &= ~XIVE_ADDR_MASK; - xive |= (irq & XIVE_ADDR_MASK); - xive |= XIVE_WRITE_ENABLE; - - out_be64(XIVE_UPDATE_REG(ics->regs), xive); -} - -static u64 xive_set_server(u64 xive, unsigned int server) -{ - u64 mask = ~(XIVE_SERVER_MASK << XIVE_SERVER_SHIFT); - - xive &= mask; - xive |= (server & XIVE_SERVER_MASK) << XIVE_SERVER_SHIFT; - - return xive; -} - -static u64 xive_set_priority(u64 xive, unsigned int priority) -{ - u64 mask = ~(XIVE_PRIORITY_MASK << XIVE_PRIORITY_SHIFT); - - xive &= mask; - xive |= (priority & XIVE_PRIORITY_MASK) << XIVE_PRIORITY_SHIFT; - - return xive; -} - - -#ifdef CONFIG_SMP -/* Find logical CPUs within mask on a given chip and store result in ret */ -void cpus_on_chip(int chip_id, cpumask_t *mask, cpumask_t *ret) -{ - int cpu, chip; - struct device_node *cpu_dn, *dn; - const u32 *prop; - - cpumask_clear(ret); - for_each_cpu(cpu, mask) { - cpu_dn = of_get_cpu_node(cpu, NULL); - if (!cpu_dn) - continue; - - prop = of_get_property(cpu_dn, "at-node", NULL); - if (!prop) { - of_node_put(cpu_dn); - continue; - } - - dn = of_find_node_by_phandle(*prop); - of_node_put(cpu_dn); - - chip = wsp_get_chip_id(dn); - if (chip == chip_id) - cpumask_set_cpu(cpu, ret); - - of_node_put(dn); - } -} - -/* Store a suitable CPU to handle a hwirq in the ics->hwirq_cpu_map cache */ -static int cache_hwirq_map(struct wsp_ics *ics, unsigned int hwirq, - const cpumask_t *affinity) -{ - cpumask_var_t avail, newmask; - int ret = -ENOMEM, cpu, cpu_rover = 0, target; - int index = hwirq - ics->hwirq_start; - unsigned int nodeid; - - BUG_ON(index < 0 || index >= ics->count); - - if (!ics->hwirq_cpu_map) - return -ENOMEM; - - if (!distribute_irqs) { - ics->hwirq_cpu_map[hwirq - ics->hwirq_start] = xics_default_server; - return 0; - } - - /* Allocate needed CPU masks */ - if (!alloc_cpumask_var(&avail, GFP_KERNEL)) - goto ret; - if (!alloc_cpumask_var(&newmask, GFP_KERNEL)) - goto freeavail; - - /* Find PBus attached to the source of this IRQ */ - nodeid = (hwirq >> WSP_ICS_CHIP_SHIFT) & 0x3; /* 12:14 */ - - /* Find CPUs that could handle this IRQ */ - if (affinity) - cpumask_and(avail, cpu_online_mask, affinity); - else - cpumask_copy(avail, cpu_online_mask); - - /* Narrow selection down to logical CPUs on the same chip */ - cpus_on_chip(nodeid, avail, newmask); - - /* Ensure we haven't narrowed it down to 0 */ - if (unlikely(cpumask_empty(newmask))) { - if (unlikely(cpumask_empty(avail))) { - ret = -1; - goto out; - } - cpumask_copy(newmask, avail); - } - - /* Choose a CPU out of those we narrowed it down to in round robin */ - target = hwirq % cpumask_weight(newmask); - for_each_cpu(cpu, newmask) { - if (cpu_rover++ >= target) { - ics->hwirq_cpu_map[index] = get_hard_smp_processor_id(cpu); - ret = 0; - goto out; - } - } - - /* Shouldn't happen */ - WARN_ON(1); - -out: - free_cpumask_var(newmask); -freeavail: - free_cpumask_var(avail); -ret: - if (ret < 0) { - ics->hwirq_cpu_map[index] = cpumask_first(cpu_online_mask); - pr_warning("Error, falling hwirq 0x%x routing back to CPU %i\n", - hwirq, ics->hwirq_cpu_map[index]); - } - return ret; -} - -static void alloc_irq_map(struct wsp_ics *ics) -{ - int i; - - ics->hwirq_cpu_map = kmalloc(sizeof(int) * ics->count, GFP_KERNEL); - if (!ics->hwirq_cpu_map) { - pr_warning("Allocate hwirq_cpu_map failed, " - "IRQ balancing disabled\n"); - return; - } - - for (i=0; i < ics->count; i++) - ics->hwirq_cpu_map[i] = xics_default_server; -} - -static int get_irq_server(struct wsp_ics *ics, unsigned int hwirq) -{ - int index = hwirq - ics->hwirq_start; - - BUG_ON(index < 0 || index >= ics->count); - - if (!ics->hwirq_cpu_map) - return xics_default_server; - - return ics->hwirq_cpu_map[index]; -} -#else /* !CONFIG_SMP */ -static int cache_hwirq_map(struct wsp_ics *ics, unsigned int hwirq, - const cpumask_t *affinity) -{ - return 0; -} - -static int get_irq_server(struct wsp_ics *ics, unsigned int hwirq) -{ - return xics_default_server; -} - -static void alloc_irq_map(struct wsp_ics *ics) { } -#endif - -static void wsp_chip_unmask_irq(struct irq_data *d) -{ - unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d); - struct wsp_ics *ics; - int server; - u64 xive; - - if (hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS) - return; - - ics = d->chip_data; - if (WARN_ON(!ics)) - return; - - server = get_irq_server(ics, hw_irq); - - xive = wsp_ics_get_xive(ics, hw_irq); - xive = xive_set_server(xive, server); - xive = xive_set_priority(xive, DEFAULT_PRIORITY); - wsp_ics_set_xive(ics, hw_irq, xive); -} - -static unsigned int wsp_chip_startup(struct irq_data *d) -{ - /* unmask it */ - wsp_chip_unmask_irq(d); - return 0; -} - -static void wsp_mask_real_irq(unsigned int hw_irq, struct wsp_ics *ics) -{ - u64 xive; - - if (hw_irq == XICS_IPI) - return; - - if (WARN_ON(!ics)) - return; - xive = wsp_ics_get_xive(ics, hw_irq); - xive = xive_set_server(xive, xics_default_server); - xive = xive_set_priority(xive, LOWEST_PRIORITY); - wsp_ics_set_xive(ics, hw_irq, xive); -} - -static void wsp_chip_mask_irq(struct irq_data *d) -{ - unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d); - struct wsp_ics *ics = d->chip_data; - - if (hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS) - return; - - wsp_mask_real_irq(hw_irq, ics); -} - -static int wsp_chip_set_affinity(struct irq_data *d, - const struct cpumask *cpumask, bool force) -{ - unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d); - struct wsp_ics *ics; - int ret; - u64 xive; - - if (hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS) - return -1; - - ics = d->chip_data; - if (WARN_ON(!ics)) - return -1; - xive = wsp_ics_get_xive(ics, hw_irq); - - /* - * For the moment only implement delivery to all cpus or one cpu. - * Get current irq_server for the given irq - */ - ret = cache_hwirq_map(ics, hw_irq, cpumask); - if (ret == -1) { - char cpulist[128]; - cpumask_scnprintf(cpulist, sizeof(cpulist), cpumask); - pr_warning("%s: No online cpus in the mask %s for irq %d\n", - __func__, cpulist, d->irq); - return -1; - } else if (ret == -ENOMEM) { - pr_warning("%s: Out of memory\n", __func__); - return -1; - } - - xive = xive_set_server(xive, get_irq_server(ics, hw_irq)); - wsp_ics_set_xive(ics, hw_irq, xive); - - return IRQ_SET_MASK_OK; -} - -static struct irq_chip wsp_irq_chip = { - .name = "WSP ICS", - .irq_startup = wsp_chip_startup, - .irq_mask = wsp_chip_mask_irq, - .irq_unmask = wsp_chip_unmask_irq, - .irq_set_affinity = wsp_chip_set_affinity -}; - -static int wsp_ics_host_match(struct ics *ics, struct device_node *dn) -{ - /* All ICSs in the system implement a global irq number space, - * so match against them all. */ - return of_device_is_compatible(dn, "ibm,ppc-xics"); -} - -static int wsp_ics_match_hwirq(struct wsp_ics *wsp_ics, unsigned int hwirq) -{ - if (hwirq >= wsp_ics->hwirq_start && - hwirq < wsp_ics->hwirq_start + wsp_ics->count) - return 1; - - return 0; -} - -static int wsp_ics_map(struct ics *ics, unsigned int virq) -{ - struct wsp_ics *wsp_ics = to_wsp_ics(ics); - unsigned int hw_irq = virq_to_hw(virq); - unsigned long flags; - - if (!wsp_ics_match_hwirq(wsp_ics, hw_irq)) - return -ENOENT; - - irq_set_chip_and_handler(virq, &wsp_irq_chip, handle_fasteoi_irq); - - irq_set_chip_data(virq, wsp_ics); - - spin_lock_irqsave(&wsp_ics->lock, flags); - bitmap_allocate_region(wsp_ics->bitmap, hw_irq - wsp_ics->hwirq_start, 0); - spin_unlock_irqrestore(&wsp_ics->lock, flags); - - return 0; -} - -static void wsp_ics_mask_unknown(struct ics *ics, unsigned long hw_irq) -{ - struct wsp_ics *wsp_ics = to_wsp_ics(ics); - - if (!wsp_ics_match_hwirq(wsp_ics, hw_irq)) - return; - - pr_err("%s: IRQ %lu (real) is invalid, disabling it.\n", __func__, hw_irq); - wsp_mask_real_irq(hw_irq, wsp_ics); -} - -static long wsp_ics_get_server(struct ics *ics, unsigned long hw_irq) -{ - struct wsp_ics *wsp_ics = to_wsp_ics(ics); - - if (!wsp_ics_match_hwirq(wsp_ics, hw_irq)) - return -ENOENT; - - return get_irq_server(wsp_ics, hw_irq); -} - -/* HW Number allocation API */ - -static struct wsp_ics *wsp_ics_find_dn_ics(struct device_node *dn) -{ - struct device_node *iparent; - int i; - - iparent = of_irq_find_parent(dn); - if (!iparent) { - pr_err("wsp_ics: Failed to find interrupt parent!\n"); - return NULL; - } - - for(i = 0; i < num_ics; i++) { - if(ics_list[i].dn == iparent) - break; - } - - if (i >= num_ics) { - pr_err("wsp_ics: Unable to find parent bitmap!\n"); - return NULL; - } - - return &ics_list[i]; -} - -int wsp_ics_alloc_irq(struct device_node *dn, int num) -{ - struct wsp_ics *ics; - int order, offset; - - ics = wsp_ics_find_dn_ics(dn); - if (!ics) - return -ENODEV; - - /* Fast, but overly strict if num isn't a power of two */ - order = get_count_order(num); - - spin_lock_irq(&ics->lock); - offset = bitmap_find_free_region(ics->bitmap, ics->count, order); - spin_unlock_irq(&ics->lock); - - if (offset < 0) - return offset; - - return offset + ics->hwirq_start; -} - -void wsp_ics_free_irq(struct device_node *dn, unsigned int irq) -{ - struct wsp_ics *ics; - - ics = wsp_ics_find_dn_ics(dn); - if (WARN_ON(!ics)) - return; - - spin_lock_irq(&ics->lock); - bitmap_release_region(ics->bitmap, irq, 0); - spin_unlock_irq(&ics->lock); -} - -/* Initialisation */ - -static int __init wsp_ics_bitmap_setup(struct wsp_ics *ics, - struct device_node *dn) -{ - int len, i, j, size; - u32 start, count; - const u32 *p; - - size = BITS_TO_LONGS(ics->count) * sizeof(long); - ics->bitmap = kzalloc(size, GFP_KERNEL); - if (!ics->bitmap) { - pr_err("wsp_ics: ENOMEM allocating IRQ bitmap!\n"); - return -ENOMEM; - } - - spin_lock_init(&ics->lock); - - p = of_get_property(dn, "available-ranges", &len); - if (!p || !len) { - /* FIXME this should be a WARN() once mambo is updated */ - pr_err("wsp_ics: No available-ranges defined for %s\n", - dn->full_name); - return 0; - } - - if (len % (2 * sizeof(u32)) != 0) { - /* FIXME this should be a WARN() once mambo is updated */ - pr_err("wsp_ics: Invalid available-ranges for %s\n", - dn->full_name); - return 0; - } - - bitmap_fill(ics->bitmap, ics->count); - - for (i = 0; i < len / sizeof(u32); i += 2) { - start = of_read_number(p + i, 1); - count = of_read_number(p + i + 1, 1); - - pr_devel("%s: start: %d count: %d\n", __func__, start, count); - - if ((start + count) > (ics->hwirq_start + ics->count) || - start < ics->hwirq_start) { - pr_err("wsp_ics: Invalid range! -> %d to %d\n", - start, start + count); - break; - } - - for (j = 0; j < count; j++) - bitmap_release_region(ics->bitmap, - (start + j) - ics->hwirq_start, 0); - } - - /* Ensure LSIs are not available for allocation */ - bitmap_allocate_region(ics->bitmap, ics->lsi_base, - get_count_order(ics->lsi_count)); - - return 0; -} - -static int __init wsp_ics_setup(struct wsp_ics *ics, struct device_node *dn) -{ - u32 lsi_buid, msi_buid, msi_base, msi_count; - void __iomem *regs; - const u32 *p; - int rc, len, i; - u64 caps, buid; - - p = of_get_property(dn, "interrupt-ranges", &len); - if (!p || len < (2 * sizeof(u32))) { - pr_err("wsp_ics: No/bad interrupt-ranges found on %s\n", - dn->full_name); - return -ENOENT; - } - - if (len > (2 * sizeof(u32))) { - pr_err("wsp_ics: Multiple ics ranges not supported.\n"); - return -EINVAL; - } - - regs = of_iomap(dn, 0); - if (!regs) { - pr_err("wsp_ics: of_iomap(%s) failed\n", dn->full_name); - return -ENXIO; - } - - ics->hwirq_start = of_read_number(p, 1); - ics->count = of_read_number(p + 1, 1); - ics->regs = regs; - - ics->chip_id = wsp_get_chip_id(dn); - if (WARN_ON(ics->chip_id < 0)) - ics->chip_id = 0; - - /* Get some informations about the critter */ - caps = in_be64(ICS_INT_CAPS_REG(ics->regs)); - buid = in_be64(INT_SRC_LAYER_BUID_REG(ics->regs)); - ics->lsi_count = caps >> 56; - msi_count = (caps >> 44) & 0x7ff; - - /* Note: LSI BUID is 9 bits, but really only 3 are BUID and the - * rest is mixed in the interrupt number. We store the whole - * thing though - */ - lsi_buid = (buid >> 48) & 0x1ff; - ics->lsi_base = (ics->chip_id << WSP_ICS_CHIP_SHIFT) | lsi_buid << 5; - msi_buid = (buid >> 37) & 0x7; - msi_base = (ics->chip_id << WSP_ICS_CHIP_SHIFT) | msi_buid << 11; - - pr_info("wsp_ics: Found %s\n", dn->full_name); - pr_info("wsp_ics: irq range : 0x%06llx..0x%06llx\n", - ics->hwirq_start, ics->hwirq_start + ics->count - 1); - pr_info("wsp_ics: %4d LSIs : 0x%06x..0x%06x\n", - ics->lsi_count, ics->lsi_base, - ics->lsi_base + ics->lsi_count - 1); - pr_info("wsp_ics: %4d MSIs : 0x%06x..0x%06x\n", - msi_count, msi_base, - msi_base + msi_count - 1); - - /* Let's check the HW config is sane */ - if (ics->lsi_base < ics->hwirq_start || - (ics->lsi_base + ics->lsi_count) > (ics->hwirq_start + ics->count)) - pr_warning("wsp_ics: WARNING ! LSIs out of interrupt-ranges !\n"); - if (msi_base < ics->hwirq_start || - (msi_base + msi_count) > (ics->hwirq_start + ics->count)) - pr_warning("wsp_ics: WARNING ! MSIs out of interrupt-ranges !\n"); - - /* We don't check for overlap between LSI and MSI, which will happen - * if we use the same BUID, I'm not sure yet how legit that is. - */ - - rc = wsp_ics_bitmap_setup(ics, dn); - if (rc) { - iounmap(regs); - return rc; - } - - ics->dn = of_node_get(dn); - alloc_irq_map(ics); - - for(i = 0; i < ics->count; i++) - wsp_mask_real_irq(ics->hwirq_start + i, ics); - - ics->ics.map = wsp_ics_map; - ics->ics.mask_unknown = wsp_ics_mask_unknown; - ics->ics.get_server = wsp_ics_get_server; - ics->ics.host_match = wsp_ics_host_match; - - xics_register_ics(&ics->ics); - - return 0; -} - -static void __init wsp_ics_set_default_server(void) -{ - struct device_node *np; - u32 hwid; - - /* Find the server number for the boot cpu. */ - np = of_get_cpu_node(boot_cpuid, NULL); - BUG_ON(!np); - - hwid = get_hard_smp_processor_id(boot_cpuid); - - pr_info("wsp_ics: default server is %#x, CPU %s\n", hwid, np->full_name); - xics_default_server = hwid; - - of_node_put(np); -} - -static int __init wsp_ics_init(void) -{ - struct device_node *dn; - struct wsp_ics *ics; - int rc, found; - - wsp_ics_set_default_server(); - - found = 0; - for_each_compatible_node(dn, NULL, "ibm,ppc-xics") - found++; - - if (found == 0) { - pr_err("wsp_ics: No ICS's found!\n"); - return -ENODEV; - } - - ics_list = kmalloc(sizeof(*ics) * found, GFP_KERNEL); - if (!ics_list) { - pr_err("wsp_ics: No memory for structs.\n"); - return -ENOMEM; - } - - num_ics = 0; - ics = ics_list; - for_each_compatible_node(dn, NULL, "ibm,wsp-xics") { - rc = wsp_ics_setup(ics, dn); - if (rc == 0) { - ics++; - num_ics++; - } - } - - if (found != num_ics) { - pr_err("wsp_ics: Failed setting up %d ICS's\n", - found - num_ics); - return -1; - } - - return 0; -} - -void __init wsp_init_irq(void) -{ - wsp_ics_init(); - xics_init(); - - /* We need to patch our irq chip's EOI to point to the right ICP */ - wsp_irq_chip.irq_eoi = icp_ops->eoi; -} - -#ifdef CONFIG_PCI_MSI -static void wsp_ics_msi_unmask_irq(struct irq_data *d) -{ - wsp_chip_unmask_irq(d); - unmask_msi_irq(d); -} - -static unsigned int wsp_ics_msi_startup(struct irq_data *d) -{ - wsp_ics_msi_unmask_irq(d); - return 0; -} - -static void wsp_ics_msi_mask_irq(struct irq_data *d) -{ - mask_msi_irq(d); - wsp_chip_mask_irq(d); -} - -/* - * we do it this way because we reassinge default EOI handling in - * irq_init() above - */ -static void wsp_ics_eoi(struct irq_data *data) -{ - wsp_irq_chip.irq_eoi(data); -} - -static struct irq_chip wsp_ics_msi = { - .name = "WSP ICS MSI", - .irq_startup = wsp_ics_msi_startup, - .irq_mask = wsp_ics_msi_mask_irq, - .irq_unmask = wsp_ics_msi_unmask_irq, - .irq_eoi = wsp_ics_eoi, - .irq_set_affinity = wsp_chip_set_affinity -}; - -void wsp_ics_set_msi_chip(unsigned int irq) -{ - irq_set_chip(irq, &wsp_ics_msi); -} - -void wsp_ics_set_std_chip(unsigned int irq) -{ - irq_set_chip(irq, &wsp_irq_chip); -} -#endif /* CONFIG_PCI_MSI */ diff --git a/arch/powerpc/platforms/wsp/ics.h b/arch/powerpc/platforms/wsp/ics.h deleted file mode 100644 index 07b644e..0000000 --- a/arch/powerpc/platforms/wsp/ics.h +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Copyright 2009 IBM Corporation. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#ifndef __ICS_H -#define __ICS_H - -#define XIVE_ADDR_MASK 0x7FFULL - -extern void wsp_init_irq(void); - -extern int wsp_ics_alloc_irq(struct device_node *dn, int num); -extern void wsp_ics_free_irq(struct device_node *dn, unsigned int irq); - -#ifdef CONFIG_PCI_MSI -extern void wsp_ics_set_msi_chip(unsigned int irq); -extern void wsp_ics_set_std_chip(unsigned int irq); -#endif /* CONFIG_PCI_MSI */ - -#endif /* __ICS_H */ diff --git a/arch/powerpc/platforms/wsp/msi.c b/arch/powerpc/platforms/wsp/msi.c deleted file mode 100644 index 380882f..0000000 --- a/arch/powerpc/platforms/wsp/msi.c +++ /dev/null @@ -1,102 +0,0 @@ -/* - * Copyright 2011 Michael Ellerman, IBM Corp. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include <linux/kernel.h> -#include <linux/pci.h> -#include <linux/msi.h> -#include <linux/irq.h> -#include <linux/interrupt.h> - -#include "msi.h" -#include "ics.h" -#include "wsp_pci.h" - -/* Magic addresses for 32 & 64-bit MSIs with hardcoded MVE 0 */ -#define MSI_ADDR_32 0xFFFF0000ul -#define MSI_ADDR_64 0x1000000000000000ul - -int wsp_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) -{ - struct pci_controller *phb; - struct msi_desc *entry; - struct msi_msg msg; - unsigned int virq; - int hwirq; - - phb = pci_bus_to_host(dev->bus); - if (!phb) - return -ENOENT; - - entry = list_first_entry(&dev->msi_list, struct msi_desc, list); - if (entry->msi_attrib.is_64) { - msg.address_lo = 0; - msg.address_hi = MSI_ADDR_64 >> 32; - } else { - msg.address_lo = MSI_ADDR_32; - msg.address_hi = 0; - } - - list_for_each_entry(entry, &dev->msi_list, list) { - hwirq = wsp_ics_alloc_irq(phb->dn, 1); - if (hwirq < 0) { - dev_warn(&dev->dev, "wsp_msi: hwirq alloc failed!\n"); - return hwirq; - } - - virq = irq_create_mapping(NULL, hwirq); - if (virq == NO_IRQ) { - dev_warn(&dev->dev, "wsp_msi: virq alloc failed!\n"); - return -1; - } - - dev_dbg(&dev->dev, "wsp_msi: allocated irq %#x/%#x\n", - hwirq, virq); - - wsp_ics_set_msi_chip(virq); - irq_set_msi_desc(virq, entry); - msg.data = hwirq & XIVE_ADDR_MASK; - write_msi_msg(virq, &msg); - } - - return 0; -} - -void wsp_teardown_msi_irqs(struct pci_dev *dev) -{ - struct pci_controller *phb; - struct msi_desc *entry; - int hwirq; - - phb = pci_bus_to_host(dev->bus); - - dev_dbg(&dev->dev, "wsp_msi: tearing down msi irqs\n"); - - list_for_each_entry(entry, &dev->msi_list, list) { - if (entry->irq == NO_IRQ) - continue; - - irq_set_msi_desc(entry->irq, NULL); - wsp_ics_set_std_chip(entry->irq); - - hwirq = virq_to_hw(entry->irq); - /* In this order to avoid racing with irq_create_mapping() */ - irq_dispose_mapping(entry->irq); - wsp_ics_free_irq(phb->dn, hwirq); - } -} - -void wsp_setup_phb_msi(struct pci_controller *phb) -{ - /* Create a single MVE at offset 0 that matches everything */ - out_be64(phb->cfg_data + PCIE_REG_IODA_ADDR, PCIE_REG_IODA_AD_TBL_MVT); - out_be64(phb->cfg_data + PCIE_REG_IODA_DATA0, 1ull << 63); - - ppc_md.setup_msi_irqs = wsp_setup_msi_irqs; - ppc_md.teardown_msi_irqs = wsp_teardown_msi_irqs; -} diff --git a/arch/powerpc/platforms/wsp/msi.h b/arch/powerpc/platforms/wsp/msi.h deleted file mode 100644 index 0ab27b7..0000000 --- a/arch/powerpc/platforms/wsp/msi.h +++ /dev/null @@ -1,19 +0,0 @@ -/* - * Copyright 2011 Michael Ellerman, IBM Corp. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#ifndef __WSP_MSI_H -#define __WSP_MSI_H - -#ifdef CONFIG_PCI_MSI -extern void wsp_setup_phb_msi(struct pci_controller *phb); -#else -static inline void wsp_setup_phb_msi(struct pci_controller *phb) { } -#endif - -#endif /* __WSP_MSI_H */ diff --git a/arch/powerpc/platforms/wsp/opb_pic.c b/arch/powerpc/platforms/wsp/opb_pic.c deleted file mode 100644 index 3f67298..0000000 --- a/arch/powerpc/platforms/wsp/opb_pic.c +++ /dev/null @@ -1,321 +0,0 @@ -/* - * IBM Onboard Peripheral Bus Interrupt Controller - * - * Copyright 2010 Jack Miller, IBM Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - */ - -#include <linux/interrupt.h> -#include <linux/io.h> -#include <linux/irq.h> -#include <linux/of.h> -#include <linux/slab.h> -#include <linux/time.h> -#include <linux/of_address.h> -#include <linux/of_irq.h> - -#include <asm/reg_a2.h> -#include <asm/irq.h> - -#define OPB_NR_IRQS 32 - -#define OPB_MLSASIER 0x04 /* MLS Accumulated Status IER */ -#define OPB_MLSIR 0x50 /* MLS Interrupt Register */ -#define OPB_MLSIER 0x54 /* MLS Interrupt Enable Register */ -#define OPB_MLSIPR 0x58 /* MLS Interrupt Polarity Register */ -#define OPB_MLSIIR 0x5c /* MLS Interrupt Inputs Register */ - -static int opb_index = 0; - -struct opb_pic { - struct irq_domain *host; - void *regs; - int index; - spinlock_t lock; -}; - -static u32 opb_in(struct opb_pic *opb, int offset) -{ - return in_be32(opb->regs + offset); -} - -static void opb_out(struct opb_pic *opb, int offset, u32 val) -{ - out_be32(opb->regs + offset, val); -} - -static void opb_unmask_irq(struct irq_data *d) -{ - struct opb_pic *opb; - unsigned long flags; - u32 ier, bitset; - - opb = d->chip_data; - bitset = (1 << (31 - irqd_to_hwirq(d))); - - spin_lock_irqsave(&opb->lock, flags); - - ier = opb_in(opb, OPB_MLSIER); - opb_out(opb, OPB_MLSIER, ier | bitset); - ier = opb_in(opb, OPB_MLSIER); - - spin_unlock_irqrestore(&opb->lock, flags); -} - -static void opb_mask_irq(struct irq_data *d) -{ - struct opb_pic *opb; - unsigned long flags; - u32 ier, mask; - - opb = d->chip_data; - mask = ~(1 << (31 - irqd_to_hwirq(d))); - - spin_lock_irqsave(&opb->lock, flags); - - ier = opb_in(opb, OPB_MLSIER); - opb_out(opb, OPB_MLSIER, ier & mask); - ier = opb_in(opb, OPB_MLSIER); // Flush posted writes - - spin_unlock_irqrestore(&opb->lock, flags); -} - -static void opb_ack_irq(struct irq_data *d) -{ - struct opb_pic *opb; - unsigned long flags; - u32 bitset; - - opb = d->chip_data; - bitset = (1 << (31 - irqd_to_hwirq(d))); - - spin_lock_irqsave(&opb->lock, flags); - - opb_out(opb, OPB_MLSIR, bitset); - opb_in(opb, OPB_MLSIR); // Flush posted writes - - spin_unlock_irqrestore(&opb->lock, flags); -} - -static void opb_mask_ack_irq(struct irq_data *d) -{ - struct opb_pic *opb; - unsigned long flags; - u32 bitset; - u32 ier, ir; - - opb = d->chip_data; - bitset = (1 << (31 - irqd_to_hwirq(d))); - - spin_lock_irqsave(&opb->lock, flags); - - ier = opb_in(opb, OPB_MLSIER); - opb_out(opb, OPB_MLSIER, ier & ~bitset); - ier = opb_in(opb, OPB_MLSIER); // Flush posted writes - - opb_out(opb, OPB_MLSIR, bitset); - ir = opb_in(opb, OPB_MLSIR); // Flush posted writes - - spin_unlock_irqrestore(&opb->lock, flags); -} - -static int opb_set_irq_type(struct irq_data *d, unsigned int flow) -{ - struct opb_pic *opb; - unsigned long flags; - int invert, ipr, mask, bit; - - opb = d->chip_data; - - /* The only information we're interested in in the type is whether it's - * a high or low trigger. For high triggered interrupts, the polarity - * set for it in the MLS Interrupt Polarity Register is 0, for low - * interrupts it's 1 so that the proper input in the MLS Interrupt Input - * Register is interrupted as asserting the interrupt. */ - - switch (flow) { - case IRQ_TYPE_NONE: - opb_mask_irq(d); - return 0; - - case IRQ_TYPE_LEVEL_HIGH: - invert = 0; - break; - - case IRQ_TYPE_LEVEL_LOW: - invert = 1; - break; - - default: - return -EINVAL; - } - - bit = (1 << (31 - irqd_to_hwirq(d))); - mask = ~bit; - - spin_lock_irqsave(&opb->lock, flags); - - ipr = opb_in(opb, OPB_MLSIPR); - ipr = (ipr & mask) | (invert ? bit : 0); - opb_out(opb, OPB_MLSIPR, ipr); - ipr = opb_in(opb, OPB_MLSIPR); // Flush posted writes - - spin_unlock_irqrestore(&opb->lock, flags); - - /* Record the type in the interrupt descriptor */ - irqd_set_trigger_type(d, flow); - - return 0; -} - -static struct irq_chip opb_irq_chip = { - .name = "OPB", - .irq_mask = opb_mask_irq, - .irq_unmask = opb_unmask_irq, - .irq_mask_ack = opb_mask_ack_irq, - .irq_ack = opb_ack_irq, - .irq_set_type = opb_set_irq_type -}; - -static int opb_host_map(struct irq_domain *host, unsigned int virq, - irq_hw_number_t hwirq) -{ - struct opb_pic *opb; - - opb = host->host_data; - - /* Most of the important stuff is handled by the generic host code, like - * the lookup, so just attach some info to the virtual irq */ - - irq_set_chip_data(virq, opb); - irq_set_chip_and_handler(virq, &opb_irq_chip, handle_level_irq); - irq_set_irq_type(virq, IRQ_TYPE_NONE); - - return 0; -} - -static const struct irq_domain_ops opb_host_ops = { - .map = opb_host_map, - .xlate = irq_domain_xlate_twocell, -}; - -irqreturn_t opb_irq_handler(int irq, void *private) -{ - struct opb_pic *opb; - u32 ir, src, subvirq; - - opb = (struct opb_pic *) private; - - /* Read the OPB MLS Interrupt Register for - * asserted interrupts */ - ir = opb_in(opb, OPB_MLSIR); - if (!ir) - return IRQ_NONE; - - do { - /* Get 1 - 32 source, *NOT* bit */ - src = 32 - ffs(ir); - - /* Translate from the OPB's conception of interrupt number to - * Linux's virtual IRQ */ - - subvirq = irq_linear_revmap(opb->host, src); - - generic_handle_irq(subvirq); - } while ((ir = opb_in(opb, OPB_MLSIR))); - - return IRQ_HANDLED; -} - -struct opb_pic *opb_pic_init_one(struct device_node *dn) -{ - struct opb_pic *opb; - struct resource res; - - if (of_address_to_resource(dn, 0, &res)) { - printk(KERN_ERR "opb: Couldn't translate resource\n"); - return NULL; - } - - opb = kzalloc(sizeof(struct opb_pic), GFP_KERNEL); - if (!opb) { - printk(KERN_ERR "opb: Failed to allocate opb struct!\n"); - return NULL; - } - - /* Get access to the OPB MMIO registers */ - opb->regs = ioremap(res.start + 0x10000, 0x1000); - if (!opb->regs) { - printk(KERN_ERR "opb: Failed to allocate register space!\n"); - goto free_opb; - } - - /* Allocate an irq domain so that Linux knows that despite only - * having one interrupt to issue, we're the controller for multiple - * hardware IRQs, so later we can lookup their virtual IRQs. */ - - opb->host = irq_domain_add_linear(dn, OPB_NR_IRQS, &opb_host_ops, opb); - if (!opb->host) { - printk(KERN_ERR "opb: Failed to allocate IRQ host!\n"); - goto free_regs; - } - - opb->index = opb_index++; - spin_lock_init(&opb->lock); - - /* Disable all interrupts by default */ - opb_out(opb, OPB_MLSASIER, 0); - opb_out(opb, OPB_MLSIER, 0); - - /* ACK any interrupts left by FW */ - opb_out(opb, OPB_MLSIR, 0xFFFFFFFF); - - return opb; - -free_regs: - iounmap(opb->regs); -free_opb: - kfree(opb); - return NULL; -} - -void __init opb_pic_init(void) -{ - struct device_node *dn; - struct opb_pic *opb; - int virq; - int rc; - - /* Call init_one for each OPB device */ - for_each_compatible_node(dn, NULL, "ibm,opb") { - - /* Fill in an OPB struct */ - opb = opb_pic_init_one(dn); - if (!opb) { - printk(KERN_WARNING "opb: Failed to init node, skipped!\n"); - continue; - } - - /* Map / get opb's hardware virtual irq */ - virq = irq_of_parse_and_map(dn, 0); - if (virq <= 0) { - printk("opb: irq_op_parse_and_map failed!\n"); - continue; - } - - /* Attach opb interrupt handler to new virtual IRQ */ - rc = request_irq(virq, opb_irq_handler, IRQF_NO_THREAD, - "OPB LS Cascade", opb); - if (rc) { - printk("opb: request_irq failed: %d\n", rc); - continue; - } - - printk("OPB%d init with %d IRQs at %p\n", opb->index, - OPB_NR_IRQS, opb->regs); - } -} diff --git a/arch/powerpc/platforms/wsp/psr2.c b/arch/powerpc/platforms/wsp/psr2.c deleted file mode 100644 index a87b414..0000000 --- a/arch/powerpc/platforms/wsp/psr2.c +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Copyright 2008-2011, IBM Corporation - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include <linux/delay.h> -#include <linux/init.h> -#include <linux/irq.h> -#include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/of.h> -#include <linux/smp.h> -#include <linux/time.h> -#include <linux/of_fdt.h> - -#include <asm/machdep.h> -#include <asm/udbg.h> - -#include "ics.h" -#include "wsp.h" - - -static void psr2_spin(void) -{ - hard_irq_disable(); - for (;;) - continue; -} - -static void psr2_restart(char *cmd) -{ - psr2_spin(); -} - -static int __init psr2_probe(void) -{ - unsigned long root = of_get_flat_dt_root(); - - if (of_flat_dt_is_compatible(root, "ibm,wsp-chroma")) { - /* chroma systems also claim they are psr2s */ - return 0; - } - - if (!of_flat_dt_is_compatible(root, "ibm,psr2")) - return 0; - - return 1; -} - -define_machine(psr2_md) { - .name = "PSR2 A2", - .probe = psr2_probe, - .setup_arch = wsp_setup_arch, - .restart = psr2_restart, - .power_off = psr2_spin, - .halt = psr2_spin, - .calibrate_decr = generic_calibrate_decr, - .init_IRQ = wsp_setup_irq, - .progress = udbg_progress, - .power_save = book3e_idle, -}; - -machine_arch_initcall(psr2_md, wsp_probe_devices); diff --git a/arch/powerpc/platforms/wsp/scom_smp.c b/arch/powerpc/platforms/wsp/scom_smp.c deleted file mode 100644 index 8c79ce0..0000000 --- a/arch/powerpc/platforms/wsp/scom_smp.c +++ /dev/null @@ -1,435 +0,0 @@ -/* - * SCOM support for A2 platforms - * - * Copyright 2007-2011 Benjamin Herrenschmidt, David Gibson, - * Michael Ellerman, IBM Corp. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include <linux/cpumask.h> -#include <linux/io.h> -#include <linux/of.h> -#include <linux/spinlock.h> -#include <linux/types.h> - -#include <asm/cputhreads.h> -#include <asm/reg_a2.h> -#include <asm/scom.h> -#include <asm/udbg.h> -#include <asm/code-patching.h> - -#include "wsp.h" - -#define SCOM_RAMC 0x2a /* Ram Command */ -#define SCOM_RAMC_TGT1_EXT 0x80000000 -#define SCOM_RAMC_SRC1_EXT 0x40000000 -#define SCOM_RAMC_SRC2_EXT 0x20000000 -#define SCOM_RAMC_SRC3_EXT 0x10000000 -#define SCOM_RAMC_ENABLE 0x00080000 -#define SCOM_RAMC_THREADSEL 0x00060000 -#define SCOM_RAMC_EXECUTE 0x00010000 -#define SCOM_RAMC_MSR_OVERRIDE 0x00008000 -#define SCOM_RAMC_MSR_PR 0x00004000 -#define SCOM_RAMC_MSR_GS 0x00002000 -#define SCOM_RAMC_FORCE 0x00001000 -#define SCOM_RAMC_FLUSH 0x00000800 -#define SCOM_RAMC_INTERRUPT 0x00000004 -#define SCOM_RAMC_ERROR 0x00000002 -#define SCOM_RAMC_DONE 0x00000001 -#define SCOM_RAMI 0x29 /* Ram Instruction */ -#define SCOM_RAMIC 0x28 /* Ram Instruction and Command */ -#define SCOM_RAMIC_INSN 0xffffffff00000000 -#define SCOM_RAMD 0x2d /* Ram Data */ -#define SCOM_RAMDH 0x2e /* Ram Data High */ -#define SCOM_RAMDL 0x2f /* Ram Data Low */ -#define SCOM_PCCR0 0x33 /* PC Configuration Register 0 */ -#define SCOM_PCCR0_ENABLE_DEBUG 0x80000000 -#define SCOM_PCCR0_ENABLE_RAM 0x40000000 -#define SCOM_THRCTL 0x30 /* Thread Control and Status */ -#define SCOM_THRCTL_T0_STOP 0x80000000 -#define SCOM_THRCTL_T1_STOP 0x40000000 -#define SCOM_THRCTL_T2_STOP 0x20000000 -#define SCOM_THRCTL_T3_STOP 0x10000000 -#define SCOM_THRCTL_T0_STEP 0x08000000 -#define SCOM_THRCTL_T1_STEP 0x04000000 -#define SCOM_THRCTL_T2_STEP 0x02000000 -#define SCOM_THRCTL_T3_STEP 0x01000000 -#define SCOM_THRCTL_T0_RUN 0x00800000 -#define SCOM_THRCTL_T1_RUN 0x00400000 -#define SCOM_THRCTL_T2_RUN 0x00200000 -#define SCOM_THRCTL_T3_RUN 0x00100000 -#define SCOM_THRCTL_T0_PM 0x00080000 -#define SCOM_THRCTL_T1_PM 0x00040000 -#define SCOM_THRCTL_T2_PM 0x00020000 -#define SCOM_THRCTL_T3_PM 0x00010000 -#define SCOM_THRCTL_T0_UDE 0x00008000 -#define SCOM_THRCTL_T1_UDE 0x00004000 -#define SCOM_THRCTL_T2_UDE 0x00002000 -#define SCOM_THRCTL_T3_UDE 0x00001000 -#define SCOM_THRCTL_ASYNC_DIS 0x00000800 -#define SCOM_THRCTL_TB_DIS 0x00000400 -#define SCOM_THRCTL_DEC_DIS 0x00000200 -#define SCOM_THRCTL_AND 0x31 /* Thread Control and Status */ -#define SCOM_THRCTL_OR 0x32 /* Thread Control and Status */ - - -static DEFINE_PER_CPU(scom_map_t, scom_ptrs); - -static scom_map_t get_scom(int cpu, struct device_node *np, int *first_thread) -{ - scom_map_t scom = per_cpu(scom_ptrs, cpu); - int tcpu; - - if (scom_map_ok(scom)) { - *first_thread = 0; - return scom; - } - - *first_thread = 1; - - scom = scom_map_device(np, 0); - - for (tcpu = cpu_first_thread_sibling(cpu); - tcpu <= cpu_last_thread_sibling(cpu); tcpu++) - per_cpu(scom_ptrs, tcpu) = scom; - - /* Hack: for the boot core, this will actually get called on - * the second thread up, not the first so our test above will - * set first_thread incorrectly. */ - if (cpu_first_thread_sibling(cpu) == 0) - *first_thread = 0; - - return scom; -} - -static int a2_scom_ram(scom_map_t scom, int thread, u32 insn, int extmask) -{ - u64 cmd, mask, val; - int n = 0; - - cmd = ((u64)insn << 32) | (((u64)extmask & 0xf) << 28) - | ((u64)thread << 17) | SCOM_RAMC_ENABLE | SCOM_RAMC_EXECUTE; - mask = SCOM_RAMC_DONE | SCOM_RAMC_INTERRUPT | SCOM_RAMC_ERROR; - - scom_write(scom, SCOM_RAMIC, cmd); - - for (;;) { - if (scom_read(scom, SCOM_RAMC, &val) != 0) { - pr_err("SCOM error on instruction 0x%08x, thread %d\n", - insn, thread); - return -1; - } - if (val & mask) - break; - pr_devel("Waiting on RAMC = 0x%llx\n", val); - if (++n == 3) { - pr_err("RAMC timeout on instruction 0x%08x, thread %d\n", - insn, thread); - return -1; - } - } - - if (val & SCOM_RAMC_INTERRUPT) { - pr_err("RAMC interrupt on instruction 0x%08x, thread %d\n", - insn, thread); - return -SCOM_RAMC_INTERRUPT; - } - - if (val & SCOM_RAMC_ERROR) { - pr_err("RAMC error on instruction 0x%08x, thread %d\n", - insn, thread); - return -SCOM_RAMC_ERROR; - } - - return 0; -} - -static int a2_scom_getgpr(scom_map_t scom, int thread, int gpr, int alt, - u64 *out_gpr) -{ - int rc; - - /* or rN, rN, rN */ - u32 insn = 0x7c000378 | (gpr << 21) | (gpr << 16) | (gpr << 11); - rc = a2_scom_ram(scom, thread, insn, alt ? 0xf : 0x0); - if (rc) - return rc; - - return scom_read(scom, SCOM_RAMD, out_gpr); -} - -static int a2_scom_getspr(scom_map_t scom, int thread, int spr, u64 *out_spr) -{ - int rc, sprhi, sprlo; - u32 insn; - - sprhi = spr >> 5; - sprlo = spr & 0x1f; - insn = 0x7c2002a6 | (sprlo << 16) | (sprhi << 11); /* mfspr r1,spr */ - - if (spr == 0x0ff0) - insn = 0x7c2000a6; /* mfmsr r1 */ - - rc = a2_scom_ram(scom, thread, insn, 0xf); - if (rc) - return rc; - return a2_scom_getgpr(scom, thread, 1, 1, out_spr); -} - -static int a2_scom_setgpr(scom_map_t scom, int thread, int gpr, - int alt, u64 val) -{ - u32 lis = 0x3c000000 | (gpr << 21); - u32 li = 0x38000000 | (gpr << 21); - u32 oris = 0x64000000 | (gpr << 21) | (gpr << 16); - u32 ori = 0x60000000 | (gpr << 21) | (gpr << 16); - u32 rldicr32 = 0x780007c6 | (gpr << 21) | (gpr << 16); - u32 highest = val >> 48; - u32 higher = (val >> 32) & 0xffff; - u32 high = (val >> 16) & 0xffff; - u32 low = val & 0xffff; - int lext = alt ? 0x8 : 0x0; - int oext = alt ? 0xf : 0x0; - int rc = 0; - - if (highest) - rc |= a2_scom_ram(scom, thread, lis | highest, lext); - - if (higher) { - if (highest) - rc |= a2_scom_ram(scom, thread, oris | higher, oext); - else - rc |= a2_scom_ram(scom, thread, li | higher, lext); - } - - if (highest || higher) - rc |= a2_scom_ram(scom, thread, rldicr32, oext); - - if (high) { - if (highest || higher) - rc |= a2_scom_ram(scom, thread, oris | high, oext); - else - rc |= a2_scom_ram(scom, thread, lis | high, lext); - } - - if (highest || higher || high) - rc |= a2_scom_ram(scom, thread, ori | low, oext); - else - rc |= a2_scom_ram(scom, thread, li | low, lext); - - return rc; -} - -static int a2_scom_setspr(scom_map_t scom, int thread, int spr, u64 val) -{ - int sprhi = spr >> 5; - int sprlo = spr & 0x1f; - /* mtspr spr, r1 */ - u32 insn = 0x7c2003a6 | (sprlo << 16) | (sprhi << 11); - - if (spr == 0x0ff0) - insn = 0x7c200124; /* mtmsr r1 */ - - if (a2_scom_setgpr(scom, thread, 1, 1, val)) - return -1; - - return a2_scom_ram(scom, thread, insn, 0xf); -} - -static int a2_scom_initial_tlb(scom_map_t scom, int thread) -{ - extern u32 a2_tlbinit_code_start[], a2_tlbinit_code_end[]; - extern u32 a2_tlbinit_after_iprot_flush[]; - extern u32 a2_tlbinit_after_linear_map[]; - u32 assoc, entries, i; - u64 epn, tlbcfg; - u32 *p; - int rc; - - /* Invalidate all entries (including iprot) */ - - rc = a2_scom_getspr(scom, thread, SPRN_TLB0CFG, &tlbcfg); - if (rc) - goto scom_fail; - entries = tlbcfg & TLBnCFG_N_ENTRY; - assoc = (tlbcfg & TLBnCFG_ASSOC) >> 24; - epn = 0; - - /* Set MMUCR2 to enable 4K, 64K, 1M, 16M and 1G pages */ - a2_scom_setspr(scom, thread, SPRN_MMUCR2, 0x000a7531); - /* Set MMUCR3 to write all thids bit to the TLB */ - a2_scom_setspr(scom, thread, SPRN_MMUCR3, 0x0000000f); - - /* Set MAS1 for 1G page size, and MAS2 to our initial EPN */ - a2_scom_setspr(scom, thread, SPRN_MAS1, MAS1_TSIZE(BOOK3E_PAGESZ_1GB)); - a2_scom_setspr(scom, thread, SPRN_MAS2, epn); - for (i = 0; i < entries; i++) { - - a2_scom_setspr(scom, thread, SPRN_MAS0, MAS0_ESEL(i % assoc)); - - /* tlbwe */ - rc = a2_scom_ram(scom, thread, 0x7c0007a4, 0); - if (rc) - goto scom_fail; - - /* Next entry is new address? */ - if((i + 1) % assoc == 0) { - epn += (1 << 30); - a2_scom_setspr(scom, thread, SPRN_MAS2, epn); - } - } - - /* Setup args for linear mapping */ - rc = a2_scom_setgpr(scom, thread, 3, 0, MAS0_TLBSEL(0)); - if (rc) - goto scom_fail; - - /* Linear mapping */ - for (p = a2_tlbinit_code_start; p < a2_tlbinit_after_linear_map; p++) { - rc = a2_scom_ram(scom, thread, *p, 0); - if (rc) - goto scom_fail; - } - - /* - * For the boot thread, between the linear mapping and the debug - * mappings there is a loop to flush iprot mappings. Ramming doesn't do - * branches, but the secondary threads don't need to be nearly as smart - * (i.e. we don't need to worry about invalidating the mapping we're - * standing on). - */ - - /* Debug mappings. Expects r11 = MAS0 from linear map (set above) */ - for (p = a2_tlbinit_after_iprot_flush; p < a2_tlbinit_code_end; p++) { - rc = a2_scom_ram(scom, thread, *p, 0); - if (rc) - goto scom_fail; - } - -scom_fail: - if (rc) - pr_err("Setting up initial TLB failed, err %d\n", rc); - - if (rc == -SCOM_RAMC_INTERRUPT) { - /* Interrupt, dump some status */ - int rc[10]; - u64 iar, srr0, srr1, esr, mas0, mas1, mas2, mas7_3, mas8, ccr2; - rc[0] = a2_scom_getspr(scom, thread, SPRN_IAR, &iar); - rc[1] = a2_scom_getspr(scom, thread, SPRN_SRR0, &srr0); - rc[2] = a2_scom_getspr(scom, thread, SPRN_SRR1, &srr1); - rc[3] = a2_scom_getspr(scom, thread, SPRN_ESR, &esr); - rc[4] = a2_scom_getspr(scom, thread, SPRN_MAS0, &mas0); - rc[5] = a2_scom_getspr(scom, thread, SPRN_MAS1, &mas1); - rc[6] = a2_scom_getspr(scom, thread, SPRN_MAS2, &mas2); - rc[7] = a2_scom_getspr(scom, thread, SPRN_MAS7_MAS3, &mas7_3); - rc[8] = a2_scom_getspr(scom, thread, SPRN_MAS8, &mas8); - rc[9] = a2_scom_getspr(scom, thread, SPRN_A2_CCR2, &ccr2); - pr_err(" -> retreived IAR =0x%llx (err %d)\n", iar, rc[0]); - pr_err(" retreived SRR0=0x%llx (err %d)\n", srr0, rc[1]); - pr_err(" retreived SRR1=0x%llx (err %d)\n", srr1, rc[2]); - pr_err(" retreived ESR =0x%llx (err %d)\n", esr, rc[3]); - pr_err(" retreived MAS0=0x%llx (err %d)\n", mas0, rc[4]); - pr_err(" retreived MAS1=0x%llx (err %d)\n", mas1, rc[5]); - pr_err(" retreived MAS2=0x%llx (err %d)\n", mas2, rc[6]); - pr_err(" retreived MS73=0x%llx (err %d)\n", mas7_3, rc[7]); - pr_err(" retreived MAS8=0x%llx (err %d)\n", mas8, rc[8]); - pr_err(" retreived CCR2=0x%llx (err %d)\n", ccr2, rc[9]); - } - - return rc; -} - -int a2_scom_startup_cpu(unsigned int lcpu, int thr_idx, struct device_node *np) -{ - u64 init_iar, init_msr, init_ccr2; - unsigned long start_here; - int rc, core_setup; - scom_map_t scom; - u64 pccr0; - - scom = get_scom(lcpu, np, &core_setup); - if (!scom) { - printk(KERN_ERR "Couldn't map SCOM for CPU%d\n", lcpu); - return -1; - } - - pr_devel("Bringing up CPU%d using SCOM...\n", lcpu); - - if (scom_read(scom, SCOM_PCCR0, &pccr0) != 0) { - printk(KERN_ERR "XSCOM failure readng PCCR0 on CPU%d\n", lcpu); - return -1; - } - scom_write(scom, SCOM_PCCR0, pccr0 | SCOM_PCCR0_ENABLE_DEBUG | - SCOM_PCCR0_ENABLE_RAM); - - /* Stop the thead with THRCTL. If we are setting up the TLB we stop all - * threads. We also disable asynchronous interrupts while RAMing. - */ - if (core_setup) - scom_write(scom, SCOM_THRCTL_OR, - SCOM_THRCTL_T0_STOP | - SCOM_THRCTL_T1_STOP | - SCOM_THRCTL_T2_STOP | - SCOM_THRCTL_T3_STOP | - SCOM_THRCTL_ASYNC_DIS); - else - scom_write(scom, SCOM_THRCTL_OR, SCOM_THRCTL_T0_STOP >> thr_idx); - - /* Flush its pipeline just in case */ - scom_write(scom, SCOM_RAMC, ((u64)thr_idx << 17) | - SCOM_RAMC_FLUSH | SCOM_RAMC_ENABLE); - - a2_scom_getspr(scom, thr_idx, SPRN_IAR, &init_iar); - a2_scom_getspr(scom, thr_idx, 0x0ff0, &init_msr); - a2_scom_getspr(scom, thr_idx, SPRN_A2_CCR2, &init_ccr2); - - /* Set MSR to MSR_CM (0x0ff0 is magic value for MSR_CM) */ - rc = a2_scom_setspr(scom, thr_idx, 0x0ff0, MSR_CM); - if (rc) { - pr_err("Failed to set MSR ! err %d\n", rc); - return rc; - } - - /* RAM in an sync/isync for the sake of it */ - a2_scom_ram(scom, thr_idx, 0x7c0004ac, 0); - a2_scom_ram(scom, thr_idx, 0x4c00012c, 0); - - if (core_setup) { - pr_devel("CPU%d is first thread in core, initializing TLB...\n", - lcpu); - rc = a2_scom_initial_tlb(scom, thr_idx); - if (rc) - goto fail; - } - - start_here = ppc_function_entry(core_setup ? generic_secondary_smp_init - : generic_secondary_thread_init); - pr_devel("CPU%d entry point at 0x%lx...\n", lcpu, start_here); - - rc |= a2_scom_setspr(scom, thr_idx, SPRN_IAR, start_here); - rc |= a2_scom_setgpr(scom, thr_idx, 3, 0, - get_hard_smp_processor_id(lcpu)); - /* - * Tell book3e_secondary_core_init not to set up the TLB, we've - * already done that. - */ - rc |= a2_scom_setgpr(scom, thr_idx, 4, 0, 1); - - rc |= a2_scom_setspr(scom, thr_idx, SPRN_TENS, 0x1 << thr_idx); - - scom_write(scom, SCOM_RAMC, 0); - scom_write(scom, SCOM_THRCTL_AND, ~(SCOM_THRCTL_T0_STOP >> thr_idx)); - scom_write(scom, SCOM_PCCR0, pccr0); -fail: - pr_devel(" SCOM initialization %s\n", rc ? "failed" : "succeeded"); - if (rc) { - pr_err("Old IAR=0x%08llx MSR=0x%08llx CCR2=0x%08llx\n", - init_iar, init_msr, init_ccr2); - } - - return rc; -} diff --git a/arch/powerpc/platforms/wsp/scom_wsp.c b/arch/powerpc/platforms/wsp/scom_wsp.c deleted file mode 100644 index 6538b4d..0000000 --- a/arch/powerpc/platforms/wsp/scom_wsp.c +++ /dev/null @@ -1,82 +0,0 @@ -/* - * SCOM backend for WSP - * - * Copyright 2010 Benjamin Herrenschmidt, IBM Corp. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include <linux/cpumask.h> -#include <linux/io.h> -#include <linux/of.h> -#include <linux/spinlock.h> -#include <linux/types.h> -#include <linux/of_address.h> - -#include <asm/cputhreads.h> -#include <asm/reg_a2.h> -#include <asm/scom.h> -#include <asm/udbg.h> - -#include "wsp.h" - - -static scom_map_t wsp_scom_map(struct device_node *dev, u64 reg, u64 count) -{ - struct resource r; - u64 xscom_addr; - - if (!of_get_property(dev, "scom-controller", NULL)) { - pr_err("%s: device %s is not a SCOM controller\n", - __func__, dev->full_name); - return SCOM_MAP_INVALID; - } - - if (of_address_to_resource(dev, 0, &r)) { - pr_debug("Failed to find SCOM controller address\n"); - return 0; - } - - /* Transform the SCOM address into an XSCOM offset */ - xscom_addr = ((reg & 0x7f000000) >> 1) | ((reg & 0xfffff) << 3); - - return (scom_map_t)ioremap(r.start + xscom_addr, count << 3); -} - -static void wsp_scom_unmap(scom_map_t map) -{ - iounmap((void *)map); -} - -static int wsp_scom_read(scom_map_t map, u64 reg, u64 *value) -{ - u64 __iomem *addr = (u64 __iomem *)map; - - *value = in_be64(addr + reg); - - return 0; -} - -static int wsp_scom_write(scom_map_t map, u64 reg, u64 value) -{ - u64 __iomem *addr = (u64 __iomem *)map; - - out_be64(addr + reg, value); - - return 0; -} - -static const struct scom_controller wsp_scom_controller = { - .map = wsp_scom_map, - .unmap = wsp_scom_unmap, - .read = wsp_scom_read, - .write = wsp_scom_write -}; - -void scom_init_wsp(void) -{ - scom_init(&wsp_scom_controller); -} diff --git a/arch/powerpc/platforms/wsp/setup.c b/arch/powerpc/platforms/wsp/setup.c deleted file mode 100644 index 11ac2f0..0000000 --- a/arch/powerpc/platforms/wsp/setup.c +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Copyright 2010 Michael Ellerman, IBM Corporation - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include <linux/kernel.h> -#include <linux/of_platform.h> - -#include "wsp.h" - -/* - * Find chip-id by walking up device tree looking for ibm,wsp-chip-id property. - * Won't work for nodes that are not a descendant of a wsp node. - */ -int wsp_get_chip_id(struct device_node *dn) -{ - const u32 *p; - int rc; - - /* Start looking at the specified node, not its parent */ - dn = of_node_get(dn); - while (dn && !(p = of_get_property(dn, "ibm,wsp-chip-id", NULL))) - dn = of_get_next_parent(dn); - - if (!dn) - return -1; - - rc = *p; - of_node_put(dn); - - return rc; -} diff --git a/arch/powerpc/platforms/wsp/smp.c b/arch/powerpc/platforms/wsp/smp.c deleted file mode 100644 index 332a18b..0000000 --- a/arch/powerpc/platforms/wsp/smp.c +++ /dev/null @@ -1,88 +0,0 @@ -/* - * SMP Support for A2 platforms - * - * Copyright 2007 Benjamin Herrenschmidt, IBM Corp. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - */ - -#include <linux/cpumask.h> -#include <linux/init.h> -#include <linux/kernel.h> -#include <linux/of.h> -#include <linux/smp.h> - -#include <asm/dbell.h> -#include <asm/machdep.h> -#include <asm/xics.h> - -#include "ics.h" -#include "wsp.h" - -static void smp_a2_setup_cpu(int cpu) -{ - doorbell_setup_this_cpu(); - - if (cpu != boot_cpuid) - xics_setup_cpu(); -} - -int smp_a2_kick_cpu(int nr) -{ - const char *enable_method; - struct device_node *np; - int thr_idx; - - if (nr < 0 || nr >= NR_CPUS) - return -ENOENT; - - np = of_get_cpu_node(nr, &thr_idx); - if (!np) - return -ENODEV; - - enable_method = of_get_property(np, "enable-method", NULL); - pr_devel("CPU%d has enable-method: \"%s\"\n", nr, enable_method); - - if (!enable_method) { - printk(KERN_ERR "CPU%d has no enable-method\n", nr); - return -ENOENT; - } else if (strcmp(enable_method, "ibm,a2-scom") == 0) { - if (a2_scom_startup_cpu(nr, thr_idx, np)) - return -1; - } else { - printk(KERN_ERR "CPU%d: Don't understand enable-method \"%s\"\n", - nr, enable_method); - return -EINVAL; - } - - /* - * The processor is currently spinning, waiting for the - * cpu_start field to become non-zero After we set cpu_start, - * the processor will continue on to secondary_start - */ - paca[nr].cpu_start = 1; - - return 0; -} - -static int __init smp_a2_probe(void) -{ - return num_possible_cpus(); -} - -static struct smp_ops_t a2_smp_ops = { - .message_pass = NULL, /* Use smp_muxed_ipi_message_pass */ - .cause_ipi = doorbell_cause_ipi, - .probe = smp_a2_probe, - .kick_cpu = smp_a2_kick_cpu, - .setup_cpu = smp_a2_setup_cpu, -}; - -void __init a2_setup_smp(void) -{ - smp_ops = &a2_smp_ops; -} diff --git a/arch/powerpc/platforms/wsp/wsp.c b/arch/powerpc/platforms/wsp/wsp.c deleted file mode 100644 index 58cd1f0..0000000 --- a/arch/powerpc/platforms/wsp/wsp.c +++ /dev/null @@ -1,117 +0,0 @@ -/* - * Copyright 2008-2011, IBM Corporation - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include <linux/kernel.h> -#include <linux/of.h> -#include <linux/of_device.h> -#include <linux/smp.h> -#include <linux/delay.h> -#include <linux/time.h> -#include <linux/of_address.h> - -#include <asm/scom.h> - -#include "wsp.h" -#include "ics.h" - -#define WSP_SOC_COMPATIBLE "ibm,wsp-soc" -#define PBIC_COMPATIBLE "ibm,wsp-pbic" -#define COPRO_COMPATIBLE "ibm,wsp-coprocessor" - -static int __init wsp_probe_buses(void) -{ - static __initdata struct of_device_id bus_ids[] = { - /* - * every node in between needs to be here or you won't - * find it - */ - { .compatible = WSP_SOC_COMPATIBLE, }, - { .compatible = PBIC_COMPATIBLE, }, - { .compatible = COPRO_COMPATIBLE, }, - {}, - }; - of_platform_bus_probe(NULL, bus_ids, NULL); - - return 0; -} - -void __init wsp_setup_arch(void) -{ - /* init to some ~sane value until calibrate_delay() runs */ - loops_per_jiffy = 50000000; - - scom_init_wsp(); - - /* Setup SMP callback */ -#ifdef CONFIG_SMP - a2_setup_smp(); -#endif -#ifdef CONFIG_PCI - wsp_setup_pci(); -#endif -} - -void __init wsp_setup_irq(void) -{ - wsp_init_irq(); - opb_pic_init(); -} - - -int __init wsp_probe_devices(void) -{ - struct device_node *np; - - /* Our RTC is a ds1500. It seems to be programatically compatible - * with the ds1511 for which we have a driver so let's use that - */ - np = of_find_compatible_node(NULL, NULL, "dallas,ds1500"); - if (np != NULL) { - struct resource res; - if (of_address_to_resource(np, 0, &res) == 0) - platform_device_register_simple("ds1511", 0, &res, 1); - } - - wsp_probe_buses(); - - return 0; -} - -void wsp_halt(void) -{ - u64 val; - scom_map_t m; - struct device_node *dn; - struct device_node *mine; - struct device_node *me; - int rc; - - me = of_get_cpu_node(smp_processor_id(), NULL); - mine = scom_find_parent(me); - - /* This will halt all the A2s but not power off the chip */ - for_each_node_with_property(dn, "scom-controller") { - if (dn == mine) - continue; - m = scom_map(dn, 0, 1); - - /* read-modify-write it so the HW probe does not get - * confused */ - rc = scom_read(m, 0, &val); - if (rc == 0) - scom_write(m, 0, val | 1); - scom_unmap(m); - } - m = scom_map(mine, 0, 1); - rc = scom_read(m, 0, &val); - if (rc == 0) - scom_write(m, 0, val | 1); - /* should never return */ - scom_unmap(m); -} diff --git a/arch/powerpc/platforms/wsp/wsp.h b/arch/powerpc/platforms/wsp/wsp.h deleted file mode 100644 index a563a8a..0000000 --- a/arch/powerpc/platforms/wsp/wsp.h +++ /dev/null @@ -1,29 +0,0 @@ -#ifndef __WSP_H -#define __WSP_H - -#include <asm/wsp.h> - -/* Devtree compatible strings for major devices */ -#define PCIE_COMPATIBLE "ibm,wsp-pciex" - -extern void wsp_setup_arch(void); -extern void wsp_setup_irq(void); -extern int wsp_probe_devices(void); -extern void wsp_halt(void); - -extern void wsp_setup_pci(void); -extern void scom_init_wsp(void); - -extern void a2_setup_smp(void); -extern int a2_scom_startup_cpu(unsigned int lcpu, int thr_idx, - struct device_node *np); -extern int smp_a2_kick_cpu(int nr); - -extern void opb_pic_init(void); - -/* chroma specific managment */ -extern void wsp_h8_restart(char *cmd); -extern void wsp_h8_power_off(void); -extern void __init wsp_setup_h8(void); - -#endif /* __WSP_H */ diff --git a/arch/powerpc/platforms/wsp/wsp_pci.c b/arch/powerpc/platforms/wsp/wsp_pci.c deleted file mode 100644 index 9a15e5b..0000000 --- a/arch/powerpc/platforms/wsp/wsp_pci.c +++ /dev/null @@ -1,1134 +0,0 @@ -/* - * Copyright 2010 Ben Herrenschmidt, IBM Corporation - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#define DEBUG - -#include <linux/kernel.h> -#include <linux/pci.h> -#include <linux/delay.h> -#include <linux/string.h> -#include <linux/init.h> -#include <linux/bootmem.h> -#include <linux/irq.h> -#include <linux/interrupt.h> -#include <linux/debugfs.h> - -#include <asm/sections.h> -#include <asm/io.h> -#include <asm/prom.h> -#include <asm/pci-bridge.h> -#include <asm/machdep.h> -#include <asm/ppc-pci.h> -#include <asm/iommu.h> -#include <asm/io-workarounds.h> -#include <asm/debug.h> - -#include "wsp.h" -#include "wsp_pci.h" -#include "msi.h" - - -/* Max number of TVTs for one table. Only 32-bit tables can use - * multiple TVTs and so the max currently supported is thus 8 - * since only 2G of DMA space is supported - */ -#define MAX_TABLE_TVT_COUNT 8 - -struct wsp_dma_table { - struct list_head link; - struct iommu_table table; - struct wsp_phb *phb; - struct page *tces[MAX_TABLE_TVT_COUNT]; -}; - -/* We support DMA regions from 0...2G in 32bit space (no support for - * 64-bit DMA just yet). Each device gets a separate TCE table (TVT - * entry) with validation enabled (though not supported by SimiCS - * just yet). - * - * To simplify things, we divide this 2G space into N regions based - * on the constant below which could be turned into a tunable eventually - * - * We then assign dynamically those regions to devices as they show up. - * - * We use a bitmap as an allocator for these. - * - * Tables are allocated/created dynamically as devices are discovered, - * multiple TVT entries are used if needed - * - * When 64-bit DMA support is added we should simply use a separate set - * of larger regions (the HW supports 64 TVT entries). We can - * additionally create a bypass region in 64-bit space for performances - * though that would have a cost in term of security. - * - * If you set NUM_DMA32_REGIONS to 1, then a single table is shared - * for all devices and bus/dev/fn validation is disabled - * - * Note that a DMA32 region cannot be smaller than 256M so the max - * supported here for now is 8. We don't yet support sharing regions - * between multiple devices so the max number of devices supported - * is MAX_TABLE_TVT_COUNT. - */ -#define NUM_DMA32_REGIONS 1 - -struct wsp_phb { - struct pci_controller *hose; - - /* Lock controlling access to the list of dma tables. - * It does -not- protect against dma_* operations on - * those tables, those should be stopped before an entry - * is removed from the list. - * - * The lock is also used for error handling operations - */ - spinlock_t lock; - struct list_head dma_tables; - unsigned long dma32_map; - unsigned long dma32_base; - unsigned int dma32_num_regions; - unsigned long dma32_region_size; - - /* Debugfs stuff */ - struct dentry *ddir; - - struct list_head all; -}; -static LIST_HEAD(wsp_phbs); - -//#define cfg_debug(fmt...) pr_debug(fmt) -#define cfg_debug(fmt...) - - -static int wsp_pcie_read_config(struct pci_bus *bus, unsigned int devfn, - int offset, int len, u32 *val) -{ - struct pci_controller *hose; - int suboff; - u64 addr; - - hose = pci_bus_to_host(bus); - if (hose == NULL) - return PCIBIOS_DEVICE_NOT_FOUND; - if (offset >= 0x1000) - return PCIBIOS_BAD_REGISTER_NUMBER; - addr = PCIE_REG_CA_ENABLE | - ((u64)bus->number) << PCIE_REG_CA_BUS_SHIFT | - ((u64)devfn) << PCIE_REG_CA_FUNC_SHIFT | - ((u64)offset & ~3) << PCIE_REG_CA_REG_SHIFT; - suboff = offset & 3; - - /* - * Note: the caller has already checked that offset is - * suitably aligned and that len is 1, 2 or 4. - */ - - switch (len) { - case 1: - addr |= (0x8ul >> suboff) << PCIE_REG_CA_BE_SHIFT; - out_be64(hose->cfg_data + PCIE_REG_CONFIG_ADDRESS, addr); - *val = (in_le32(hose->cfg_data + PCIE_REG_CONFIG_DATA) - >> (suboff << 3)) & 0xff; - cfg_debug("read 1 %02x:%02x:%02x + %02x/%x addr=0x%llx val=%02x\n", - bus->number, devfn >> 3, devfn & 7, - offset, suboff, addr, *val); - break; - case 2: - addr |= (0xcul >> suboff) << PCIE_REG_CA_BE_SHIFT; - out_be64(hose->cfg_data + PCIE_REG_CONFIG_ADDRESS, addr); - *val = (in_le32(hose->cfg_data + PCIE_REG_CONFIG_DATA) - >> (suboff << 3)) & 0xffff; - cfg_debug("read 2 %02x:%02x:%02x + %02x/%x addr=0x%llx val=%04x\n", - bus->number, devfn >> 3, devfn & 7, - offset, suboff, addr, *val); - break; - default: - addr |= 0xful << PCIE_REG_CA_BE_SHIFT; - out_be64(hose->cfg_data + PCIE_REG_CONFIG_ADDRESS, addr); - *val = in_le32(hose->cfg_data + PCIE_REG_CONFIG_DATA); - cfg_debug("read 4 %02x:%02x:%02x + %02x/%x addr=0x%llx val=%08x\n", - bus->number, devfn >> 3, devfn & 7, - offset, suboff, addr, *val); - break; - } - return PCIBIOS_SUCCESSFUL; -} - -static int wsp_pcie_write_config(struct pci_bus *bus, unsigned int devfn, - int offset, int len, u32 val) -{ - struct pci_controller *hose; - int suboff; - u64 addr; - - hose = pci_bus_to_host(bus); - if (hose == NULL) - return PCIBIOS_DEVICE_NOT_FOUND; - if (offset >= 0x1000) - return PCIBIOS_BAD_REGISTER_NUMBER; - addr = PCIE_REG_CA_ENABLE | - ((u64)bus->number) << PCIE_REG_CA_BUS_SHIFT | - ((u64)devfn) << PCIE_REG_CA_FUNC_SHIFT | - ((u64)offset & ~3) << PCIE_REG_CA_REG_SHIFT; - suboff = offset & 3; - - /* - * Note: the caller has already checked that offset is - * suitably aligned and that len is 1, 2 or 4. - */ - switch (len) { - case 1: - addr |= (0x8ul >> suboff) << PCIE_REG_CA_BE_SHIFT; - val <<= suboff << 3; - out_be64(hose->cfg_data + PCIE_REG_CONFIG_ADDRESS, addr); - out_le32(hose->cfg_data + PCIE_REG_CONFIG_DATA, val); - cfg_debug("write 1 %02x:%02x:%02x + %02x/%x addr=0x%llx val=%02x\n", - bus->number, devfn >> 3, devfn & 7, - offset, suboff, addr, val); - break; - case 2: - addr |= (0xcul >> suboff) << PCIE_REG_CA_BE_SHIFT; - val <<= suboff << 3; - out_be64(hose->cfg_data + PCIE_REG_CONFIG_ADDRESS, addr); - out_le32(hose->cfg_data + PCIE_REG_CONFIG_DATA, val); - cfg_debug("write 2 %02x:%02x:%02x + %02x/%x addr=0x%llx val=%04x\n", - bus->number, devfn >> 3, devfn & 7, - offset, suboff, addr, val); - break; - default: - addr |= 0xful << PCIE_REG_CA_BE_SHIFT; - out_be64(hose->cfg_data + PCIE_REG_CONFIG_ADDRESS, addr); - out_le32(hose->cfg_data + PCIE_REG_CONFIG_DATA, val); - cfg_debug("write 4 %02x:%02x:%02x + %02x/%x addr=0x%llx val=%08x\n", - bus->number, devfn >> 3, devfn & 7, - offset, suboff, addr, val); - break; - } - return PCIBIOS_SUCCESSFUL; -} - -static struct pci_ops wsp_pcie_pci_ops = -{ - .read = wsp_pcie_read_config, - .write = wsp_pcie_write_config, -}; - -#define TCE_SHIFT 12 -#define TCE_PAGE_SIZE (1 << TCE_SHIFT) -#define TCE_PCI_WRITE 0x2 /* write from PCI allowed */ -#define TCE_PCI_READ 0x1 /* read from PCI allowed */ -#define TCE_RPN_MASK 0x3fffffffffful /* 42-bit RPN (4K pages) */ -#define TCE_RPN_SHIFT 12 - -//#define dma_debug(fmt...) pr_debug(fmt) -#define dma_debug(fmt...) - -static int tce_build_wsp(struct iommu_table *tbl, long index, long npages, - unsigned long uaddr, enum dma_data_direction direction, - struct dma_attrs *attrs) -{ - struct wsp_dma_table *ptbl = container_of(tbl, - struct wsp_dma_table, - table); - u64 proto_tce; - u64 *tcep; - u64 rpn; - - proto_tce = TCE_PCI_READ; -#ifdef CONFIG_WSP_DD1_WORKAROUND_DD1_TCE_BUGS - proto_tce |= TCE_PCI_WRITE; -#else - if (direction != DMA_TO_DEVICE) - proto_tce |= TCE_PCI_WRITE; -#endif - - /* XXX Make this faster by factoring out the page address for - * within a TCE table - */ - while (npages--) { - /* We don't use it->base as the table can be scattered */ - tcep = (u64 *)page_address(ptbl->tces[index >> 16]); - tcep += (index & 0xffff); - - /* can't move this out since we might cross LMB boundary */ - rpn = __pa(uaddr) >> TCE_SHIFT; - *tcep = proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT; - - dma_debug("[DMA] TCE %p set to 0x%016llx (dma addr: 0x%lx)\n", - tcep, *tcep, (tbl->it_offset + index) << IOMMU_PAGE_SHIFT_4K); - - uaddr += TCE_PAGE_SIZE; - index++; - } - return 0; -} - -static void tce_free_wsp(struct iommu_table *tbl, long index, long npages) -{ - struct wsp_dma_table *ptbl = container_of(tbl, - struct wsp_dma_table, - table); -#ifndef CONFIG_WSP_DD1_WORKAROUND_DD1_TCE_BUGS - struct pci_controller *hose = ptbl->phb->hose; -#endif - u64 *tcep; - - /* XXX Make this faster by factoring out the page address for - * within a TCE table. Also use line-kill option to kill multiple - * TCEs at once - */ - while (npages--) { - /* We don't use it->base as the table can be scattered */ - tcep = (u64 *)page_address(ptbl->tces[index >> 16]); - tcep += (index & 0xffff); - dma_debug("[DMA] TCE %p cleared\n", tcep); - *tcep = 0; -#ifndef CONFIG_WSP_DD1_WORKAROUND_DD1_TCE_BUGS - /* Don't write there since it would pollute other MMIO accesses */ - out_be64(hose->cfg_data + PCIE_REG_TCE_KILL, - PCIE_REG_TCEKILL_SINGLE | PCIE_REG_TCEKILL_PS_4K | - (__pa(tcep) & PCIE_REG_TCEKILL_ADDR_MASK)); -#endif - index++; - } -} - -static struct wsp_dma_table *wsp_pci_create_dma32_table(struct wsp_phb *phb, - unsigned int region, - struct pci_dev *validate) -{ - struct pci_controller *hose = phb->hose; - unsigned long size = phb->dma32_region_size; - unsigned long addr = phb->dma32_region_size * region + phb->dma32_base; - struct wsp_dma_table *tbl; - int tvts_per_table, i, tvt, nid; - unsigned long flags; - - nid = of_node_to_nid(phb->hose->dn); - - /* Calculate how many TVTs are needed */ - tvts_per_table = size / 0x10000000; - if (tvts_per_table == 0) - tvts_per_table = 1; - - /* Calculate the base TVT index. We know all tables have the same - * size so we just do a simple multiply here - */ - tvt = region * tvts_per_table; - - pr_debug(" Region : %d\n", region); - pr_debug(" DMA range : 0x%08lx..0x%08lx\n", addr, addr + size - 1); - pr_debug(" Number of TVTs : %d\n", tvts_per_table); - pr_debug(" Base TVT : %d\n", tvt); - pr_debug(" Node : %d\n", nid); - - tbl = kzalloc_node(sizeof(struct wsp_dma_table), GFP_KERNEL, nid); - if (!tbl) - return ERR_PTR(-ENOMEM); - tbl->phb = phb; - - /* Create as many TVTs as needed, each represents 256M at most */ - for (i = 0; i < tvts_per_table; i++) { - u64 tvt_data1, tvt_data0; - - /* Allocate table. We use a 4K TCE size for now always so - * one table is always 8 * (258M / 4K) == 512K - */ - tbl->tces[i] = alloc_pages_node(nid, GFP_KERNEL, get_order(0x80000)); - if (tbl->tces[i] == NULL) - goto fail; - memset(page_address(tbl->tces[i]), 0, 0x80000); - - pr_debug(" TCE table %d at : %p\n", i, page_address(tbl->tces[i])); - - /* Table size. We currently set it to be the whole 256M region */ - tvt_data0 = 2ull << IODA_TVT0_TCE_TABLE_SIZE_SHIFT; - /* IO page size set to 4K */ - tvt_data1 = 1ull << IODA_TVT1_IO_PAGE_SIZE_SHIFT; - /* Shift in the address */ - tvt_data0 |= __pa(page_address(tbl->tces[i])) << IODA_TVT0_TTA_SHIFT; - - /* Validation stuff. We only validate fully bus/dev/fn for now - * one day maybe we can group devices but that isn't the case - * at the moment - */ - if (validate) { - tvt_data0 |= IODA_TVT0_BUSNUM_VALID_MASK; - tvt_data0 |= validate->bus->number; - tvt_data1 |= IODA_TVT1_DEVNUM_VALID; - tvt_data1 |= ((u64)PCI_SLOT(validate->devfn)) - << IODA_TVT1_DEVNUM_VALUE_SHIFT; - tvt_data1 |= IODA_TVT1_FUNCNUM_VALID; - tvt_data1 |= ((u64)PCI_FUNC(validate->devfn)) - << IODA_TVT1_FUNCNUM_VALUE_SHIFT; - } - - /* XX PE number is always 0 for now */ - - /* Program the values using the PHB lock */ - spin_lock_irqsave(&phb->lock, flags); - out_be64(hose->cfg_data + PCIE_REG_IODA_ADDR, - (tvt + i) | PCIE_REG_IODA_AD_TBL_TVT); - out_be64(hose->cfg_data + PCIE_REG_IODA_DATA1, tvt_data1); - out_be64(hose->cfg_data + PCIE_REG_IODA_DATA0, tvt_data0); - spin_unlock_irqrestore(&phb->lock, flags); - } - - /* Init bits and pieces */ - tbl->table.it_blocksize = 16; - tbl->table.it_page_shift = IOMMU_PAGE_SHIFT_4K; - tbl->table.it_offset = addr >> tbl->table.it_page_shift; - tbl->table.it_size = size >> tbl->table.it_page_shift; - - /* - * It's already blank but we clear it anyway. - * Consider an aditiona interface that makes cleaing optional - */ - iommu_init_table(&tbl->table, nid); - - list_add(&tbl->link, &phb->dma_tables); - return tbl; - - fail: - pr_debug(" Failed to allocate a 256M TCE table !\n"); - for (i = 0; i < tvts_per_table; i++) - if (tbl->tces[i]) - __free_pages(tbl->tces[i], get_order(0x80000)); - kfree(tbl); - return ERR_PTR(-ENOMEM); -} - -static void wsp_pci_dma_dev_setup(struct pci_dev *pdev) -{ - struct dev_archdata *archdata = &pdev->dev.archdata; - struct pci_controller *hose = pci_bus_to_host(pdev->bus); - struct wsp_phb *phb = hose->private_data; - struct wsp_dma_table *table = NULL; - unsigned long flags; - int i; - - /* Don't assign an iommu table to a bridge */ - if (pdev->hdr_type == PCI_HEADER_TYPE_BRIDGE) - return; - - pr_debug("%s: Setting up DMA...\n", pci_name(pdev)); - - spin_lock_irqsave(&phb->lock, flags); - - /* If only one region, check if it already exist */ - if (phb->dma32_num_regions == 1) { - spin_unlock_irqrestore(&phb->lock, flags); - if (list_empty(&phb->dma_tables)) - table = wsp_pci_create_dma32_table(phb, 0, NULL); - else - table = list_first_entry(&phb->dma_tables, - struct wsp_dma_table, - link); - } else { - /* else find a free region */ - for (i = 0; i < phb->dma32_num_regions && !table; i++) { - if (__test_and_set_bit(i, &phb->dma32_map)) - continue; - spin_unlock_irqrestore(&phb->lock, flags); - table = wsp_pci_create_dma32_table(phb, i, pdev); - } - } - - /* Check if we got an error */ - if (IS_ERR(table)) { - pr_err("%s: Failed to create DMA table, err %ld !\n", - pci_name(pdev), PTR_ERR(table)); - return; - } - - /* Or a valid table */ - if (table) { - pr_info("%s: Setup iommu: 32-bit DMA region 0x%08lx..0x%08lx\n", - pci_name(pdev), - table->table.it_offset << IOMMU_PAGE_SHIFT_4K, - (table->table.it_offset << IOMMU_PAGE_SHIFT_4K) - + phb->dma32_region_size - 1); - archdata->dma_data.iommu_table_base = &table->table; - return; - } - - /* Or no room */ - spin_unlock_irqrestore(&phb->lock, flags); - pr_err("%s: Out of DMA space !\n", pci_name(pdev)); -} - -static void __init wsp_pcie_configure_hw(struct pci_controller *hose) -{ - u64 val; - int i; - -#define DUMP_REG(x) \ - pr_debug("%-30s : 0x%016llx\n", #x, in_be64(hose->cfg_data + x)) - - /* - * Some WSP variants has a bogus class code by default in the PCI-E - * root complex's built-in P2P bridge - */ - val = in_be64(hose->cfg_data + PCIE_REG_SYS_CFG1); - pr_debug("PCI-E SYS_CFG1 : 0x%llx\n", val); - out_be64(hose->cfg_data + PCIE_REG_SYS_CFG1, - (val & ~PCIE_REG_SYS_CFG1_CLASS_CODE) | (PCI_CLASS_BRIDGE_PCI << 8)); - pr_debug("PCI-E SYS_CFG1 : 0x%llx\n", in_be64(hose->cfg_data + PCIE_REG_SYS_CFG1)); - -#ifdef CONFIG_WSP_DD1_WORKAROUND_DD1_TCE_BUGS - /* XXX Disable TCE caching, it doesn't work on DD1 */ - out_be64(hose->cfg_data + 0xe50, - in_be64(hose->cfg_data + 0xe50) | (3ull << 62)); - printk("PCI-E DEBUG CONTROL 5 = 0x%llx\n", in_be64(hose->cfg_data + 0xe50)); -#endif - - /* Configure M32A and IO. IO is hard wired to be 1M for now */ - out_be64(hose->cfg_data + PCIE_REG_IO_BASE_ADDR, hose->io_base_phys); - out_be64(hose->cfg_data + PCIE_REG_IO_BASE_MASK, - (~(hose->io_resource.end - hose->io_resource.start)) & - 0x3fffffff000ul); - out_be64(hose->cfg_data + PCIE_REG_IO_START_ADDR, 0 | 1); - - out_be64(hose->cfg_data + PCIE_REG_M32A_BASE_ADDR, - hose->mem_resources[0].start); - printk("Want to write to M32A_BASE_MASK : 0x%llx\n", - (~(hose->mem_resources[0].end - - hose->mem_resources[0].start)) & 0x3ffffff0000ul); - out_be64(hose->cfg_data + PCIE_REG_M32A_BASE_MASK, - (~(hose->mem_resources[0].end - - hose->mem_resources[0].start)) & 0x3ffffff0000ul); - out_be64(hose->cfg_data + PCIE_REG_M32A_START_ADDR, - (hose->mem_resources[0].start - hose->mem_offset[0]) | 1); - - /* Clear all TVT entries - * - * XX Might get TVT count from device-tree - */ - for (i = 0; i < IODA_TVT_COUNT; i++) { - out_be64(hose->cfg_data + PCIE_REG_IODA_ADDR, - PCIE_REG_IODA_AD_TBL_TVT | i); - out_be64(hose->cfg_data + PCIE_REG_IODA_DATA1, 0); - out_be64(hose->cfg_data + PCIE_REG_IODA_DATA0, 0); - } - - /* Kill the TCE cache */ - out_be64(hose->cfg_data + PCIE_REG_PHB_CONFIG, - in_be64(hose->cfg_data + PCIE_REG_PHB_CONFIG) | - PCIE_REG_PHBC_64B_TCE_EN); - - /* Enable 32 & 64-bit MSIs, IO space and M32A */ - val = PCIE_REG_PHBC_32BIT_MSI_EN | - PCIE_REG_PHBC_IO_EN | - PCIE_REG_PHBC_64BIT_MSI_EN | - PCIE_REG_PHBC_M32A_EN; - if (iommu_is_off) - val |= PCIE_REG_PHBC_DMA_XLATE_BYPASS; - pr_debug("Will write config: 0x%llx\n", val); - out_be64(hose->cfg_data + PCIE_REG_PHB_CONFIG, val); - - /* Enable error reporting */ - out_be64(hose->cfg_data + 0xe00, - in_be64(hose->cfg_data + 0xe00) | 0x0008000000000000ull); - - /* Mask an error that's generated when doing config space probe - * - * XXX Maybe we should only mask it around config space cycles... that or - * ignore it when we know we had a config space cycle recently ? - */ - out_be64(hose->cfg_data + PCIE_REG_DMA_ERR_STATUS_MASK, 0x8000000000000000ull); - out_be64(hose->cfg_data + PCIE_REG_DMA_ERR1_STATUS_MASK, 0x8000000000000000ull); - - /* Enable UTL errors, for now, all of them got to UTL irq 1 - * - * We similarily mask one UTL error caused apparently during normal - * probing. We also mask the link up error - */ - out_be64(hose->cfg_data + PCIE_UTL_SYS_BUS_AGENT_ERR_SEV, 0); - out_be64(hose->cfg_data + PCIE_UTL_RC_ERR_SEVERITY, 0); - out_be64(hose->cfg_data + PCIE_UTL_PCIE_PORT_ERROR_SEV, 0); - out_be64(hose->cfg_data + PCIE_UTL_SYS_BUS_AGENT_IRQ_EN, 0xffffffff00000000ull); - out_be64(hose->cfg_data + PCIE_UTL_PCIE_PORT_IRQ_EN, 0xff5fffff00000000ull); - out_be64(hose->cfg_data + PCIE_UTL_EP_ERR_IRQ_EN, 0xffffffff00000000ull); - - DUMP_REG(PCIE_REG_IO_BASE_ADDR); - DUMP_REG(PCIE_REG_IO_BASE_MASK); - DUMP_REG(PCIE_REG_IO_START_ADDR); - DUMP_REG(PCIE_REG_M32A_BASE_ADDR); - DUMP_REG(PCIE_REG_M32A_BASE_MASK); - DUMP_REG(PCIE_REG_M32A_START_ADDR); - DUMP_REG(PCIE_REG_M32B_BASE_ADDR); - DUMP_REG(PCIE_REG_M32B_BASE_MASK); - DUMP_REG(PCIE_REG_M32B_START_ADDR); - DUMP_REG(PCIE_REG_M64_BASE_ADDR); - DUMP_REG(PCIE_REG_M64_BASE_MASK); - DUMP_REG(PCIE_REG_M64_START_ADDR); - DUMP_REG(PCIE_REG_PHB_CONFIG); -} - -static void wsp_pci_wait_io_idle(struct wsp_phb *phb, unsigned long port) -{ - u64 val; - int i; - - for (i = 0; i < 10000; i++) { - val = in_be64(phb->hose->cfg_data + 0xe08); - if ((val & 0x1900000000000000ull) == 0x0100000000000000ull) - return; - udelay(1); - } - pr_warning("PCI IO timeout on domain %d port 0x%lx\n", - phb->hose->global_number, port); -} - -#define DEF_PCI_AC_RET_pio(name, ret, at, al, aa) \ -static ret wsp_pci_##name at \ -{ \ - struct iowa_bus *bus; \ - struct wsp_phb *phb; \ - unsigned long flags; \ - ret rval; \ - bus = iowa_pio_find_bus(aa); \ - WARN_ON(!bus); \ - phb = bus->private; \ - spin_lock_irqsave(&phb->lock, flags); \ - wsp_pci_wait_io_idle(phb, aa); \ - rval = __do_##name al; \ - spin_unlock_irqrestore(&phb->lock, flags); \ - return rval; \ -} - -#define DEF_PCI_AC_NORET_pio(name, at, al, aa) \ -static void wsp_pci_##name at \ -{ \ - struct iowa_bus *bus; \ - struct wsp_phb *phb; \ - unsigned long flags; \ - bus = iowa_pio_find_bus(aa); \ - WARN_ON(!bus); \ - phb = bus->private; \ - spin_lock_irqsave(&phb->lock, flags); \ - wsp_pci_wait_io_idle(phb, aa); \ - __do_##name al; \ - spin_unlock_irqrestore(&phb->lock, flags); \ -} - -#define DEF_PCI_AC_RET_mem(name, ret, at, al, aa) -#define DEF_PCI_AC_NORET_mem(name, at, al, aa) - -#define DEF_PCI_AC_RET(name, ret, at, al, space, aa) \ - DEF_PCI_AC_RET_##space(name, ret, at, al, aa) - -#define DEF_PCI_AC_NORET(name, at, al, space, aa) \ - DEF_PCI_AC_NORET_##space(name, at, al, aa) \ - - -#include <asm/io-defs.h> - -#undef DEF_PCI_AC_RET -#undef DEF_PCI_AC_NORET - -static struct ppc_pci_io wsp_pci_iops = { - .inb = wsp_pci_inb, - .inw = wsp_pci_inw, - .inl = wsp_pci_inl, - .outb = wsp_pci_outb, - .outw = wsp_pci_outw, - .outl = wsp_pci_outl, - .insb = wsp_pci_insb, - .insw = wsp_pci_insw, - .insl = wsp_pci_insl, - .outsb = wsp_pci_outsb, - .outsw = wsp_pci_outsw, - .outsl = wsp_pci_outsl, -}; - -static int __init wsp_setup_one_phb(struct device_node *np) -{ - struct pci_controller *hose; - struct wsp_phb *phb; - - pr_info("PCI: Setting up PCIe host bridge 0x%s\n", np->full_name); - - phb = zalloc_maybe_bootmem(sizeof(struct wsp_phb), GFP_KERNEL); - if (!phb) - return -ENOMEM; - hose = pcibios_alloc_controller(np); - if (!hose) { - /* Can't really free the phb */ - return -ENOMEM; - } - hose->private_data = phb; - phb->hose = hose; - - INIT_LIST_HEAD(&phb->dma_tables); - spin_lock_init(&phb->lock); - - /* XXX Use bus-range property ? */ - hose->first_busno = 0; - hose->last_busno = 0xff; - - /* We use cfg_data as the address for the whole bridge MMIO space - */ - hose->cfg_data = of_iomap(hose->dn, 0); - - pr_debug("PCIe registers mapped at 0x%p\n", hose->cfg_data); - - /* Get the ranges of the device-tree */ - pci_process_bridge_OF_ranges(hose, np, 0); - - /* XXX Force re-assigning of everything for now */ - pci_add_flags(PCI_REASSIGN_ALL_BUS | PCI_REASSIGN_ALL_RSRC | - PCI_ENABLE_PROC_DOMAINS); - - /* Calculate how the TCE space is divided */ - phb->dma32_base = 0; - phb->dma32_num_regions = NUM_DMA32_REGIONS; - if (phb->dma32_num_regions > MAX_TABLE_TVT_COUNT) { - pr_warning("IOMMU: Clamped to %d DMA32 regions\n", - MAX_TABLE_TVT_COUNT); - phb->dma32_num_regions = MAX_TABLE_TVT_COUNT; - } - phb->dma32_region_size = 0x80000000 / phb->dma32_num_regions; - - BUG_ON(!is_power_of_2(phb->dma32_region_size)); - - /* Setup config ops */ - hose->ops = &wsp_pcie_pci_ops; - - /* Configure the HW */ - wsp_pcie_configure_hw(hose); - - /* Instanciate IO workarounds */ - iowa_register_bus(hose, &wsp_pci_iops, NULL, phb); -#ifdef CONFIG_PCI_MSI - wsp_setup_phb_msi(hose); -#endif - - /* Add to global list */ - list_add(&phb->all, &wsp_phbs); - - return 0; -} - -void __init wsp_setup_pci(void) -{ - struct device_node *np; - int rc; - - /* Find host bridges */ - for_each_compatible_node(np, "pciex", PCIE_COMPATIBLE) { - rc = wsp_setup_one_phb(np); - if (rc) - pr_err("Failed to setup PCIe bridge %s, rc=%d\n", - np->full_name, rc); - } - - /* Establish device-tree linkage */ - pci_devs_phb_init(); - - /* Set DMA ops to use TCEs */ - if (iommu_is_off) { - pr_info("PCI-E: Disabled TCEs, using direct DMA\n"); - set_pci_dma_ops(&dma_direct_ops); - } else { - ppc_md.pci_dma_dev_setup = wsp_pci_dma_dev_setup; - ppc_md.tce_build = tce_build_wsp; - ppc_md.tce_free = tce_free_wsp; - set_pci_dma_ops(&dma_iommu_ops); - } -} - -#define err_debug(fmt...) pr_debug(fmt) -//#define err_debug(fmt...) - -static int __init wsp_pci_get_err_irq_no_dt(struct device_node *np) -{ - const u32 *prop; - int hw_irq; - - /* Ok, no interrupts property, let's try to find our child P2P */ - np = of_get_next_child(np, NULL); - if (np == NULL) - return 0; - - /* Grab it's interrupt map */ - prop = of_get_property(np, "interrupt-map", NULL); - if (prop == NULL) - return 0; - - /* Grab one of the interrupts in there, keep the low 4 bits */ - hw_irq = prop[5] & 0xf; - - /* 0..4 for PHB 0 and 5..9 for PHB 1 */ - if (hw_irq < 5) - hw_irq = 4; - else - hw_irq = 9; - hw_irq |= prop[5] & ~0xf; - - err_debug("PCI: Using 0x%x as error IRQ for %s\n", - hw_irq, np->parent->full_name); - return irq_create_mapping(NULL, hw_irq); -} - -static const struct { - u32 offset; - const char *name; -} wsp_pci_regs[] = { -#define DREG(x) { PCIE_REG_##x, #x } -#define DUTL(x) { PCIE_UTL_##x, "UTL_" #x } - /* Architected registers except CONFIG_ and IODA - * to avoid side effects - */ - DREG(DMA_CHAN_STATUS), - DREG(CPU_LOADSTORE_STATUS), - DREG(LOCK0), - DREG(LOCK1), - DREG(PHB_CONFIG), - DREG(IO_BASE_ADDR), - DREG(IO_BASE_MASK), - DREG(IO_START_ADDR), - DREG(M32A_BASE_ADDR), - DREG(M32A_BASE_MASK), - DREG(M32A_START_ADDR), - DREG(M32B_BASE_ADDR), - DREG(M32B_BASE_MASK), - DREG(M32B_START_ADDR), - DREG(M64_BASE_ADDR), - DREG(M64_BASE_MASK), - DREG(M64_START_ADDR), - DREG(TCE_KILL), - DREG(LOCK2), - DREG(PHB_GEN_CAP), - DREG(PHB_TCE_CAP), - DREG(PHB_IRQ_CAP), - DREG(PHB_EEH_CAP), - DREG(PAPR_ERR_INJ_CONTROL), - DREG(PAPR_ERR_INJ_ADDR), - DREG(PAPR_ERR_INJ_MASK), - - /* UTL core regs */ - DUTL(SYS_BUS_CONTROL), - DUTL(STATUS), - DUTL(SYS_BUS_AGENT_STATUS), - DUTL(SYS_BUS_AGENT_ERR_SEV), - DUTL(SYS_BUS_AGENT_IRQ_EN), - DUTL(SYS_BUS_BURST_SZ_CONF), - DUTL(REVISION_ID), - DUTL(OUT_POST_HDR_BUF_ALLOC), - DUTL(OUT_POST_DAT_BUF_ALLOC), - DUTL(IN_POST_HDR_BUF_ALLOC), - DUTL(IN_POST_DAT_BUF_ALLOC), - DUTL(OUT_NP_BUF_ALLOC), - DUTL(IN_NP_BUF_ALLOC), - DUTL(PCIE_TAGS_ALLOC), - DUTL(GBIF_READ_TAGS_ALLOC), - - DUTL(PCIE_PORT_CONTROL), - DUTL(PCIE_PORT_STATUS), - DUTL(PCIE_PORT_ERROR_SEV), - DUTL(PCIE_PORT_IRQ_EN), - DUTL(RC_STATUS), - DUTL(RC_ERR_SEVERITY), - DUTL(RC_IRQ_EN), - DUTL(EP_STATUS), - DUTL(EP_ERR_SEVERITY), - DUTL(EP_ERR_IRQ_EN), - DUTL(PCI_PM_CTRL1), - DUTL(PCI_PM_CTRL2), - - /* PCIe stack regs */ - DREG(SYSTEM_CONFIG1), - DREG(SYSTEM_CONFIG2), - DREG(EP_SYSTEM_CONFIG), - DREG(EP_FLR), - DREG(EP_BAR_CONFIG), - DREG(LINK_CONFIG), - DREG(PM_CONFIG), - DREG(DLP_CONTROL), - DREG(DLP_STATUS), - DREG(ERR_REPORT_CONTROL), - DREG(SLOT_CONTROL1), - DREG(SLOT_CONTROL2), - DREG(UTL_CONFIG), - DREG(BUFFERS_CONFIG), - DREG(ERROR_INJECT), - DREG(SRIOV_CONFIG), - DREG(PF0_SRIOV_STATUS), - DREG(PF1_SRIOV_STATUS), - DREG(PORT_NUMBER), - DREG(POR_SYSTEM_CONFIG), - - /* Internal logic regs */ - DREG(PHB_VERSION), - DREG(RESET), - DREG(PHB_CONTROL), - DREG(PHB_TIMEOUT_CONTROL1), - DREG(PHB_QUIESCE_DMA), - DREG(PHB_DMA_READ_TAG_ACTV), - DREG(PHB_TCE_READ_TAG_ACTV), - - /* FIR registers */ - DREG(LEM_FIR_ACCUM), - DREG(LEM_FIR_AND_MASK), - DREG(LEM_FIR_OR_MASK), - DREG(LEM_ACTION0), - DREG(LEM_ACTION1), - DREG(LEM_ERROR_MASK), - DREG(LEM_ERROR_AND_MASK), - DREG(LEM_ERROR_OR_MASK), - - /* Error traps registers */ - DREG(PHB_ERR_STATUS), - DREG(PHB_ERR_STATUS), - DREG(PHB_ERR1_STATUS), - DREG(PHB_ERR_INJECT), - DREG(PHB_ERR_LEM_ENABLE), - DREG(PHB_ERR_IRQ_ENABLE), - DREG(PHB_ERR_FREEZE_ENABLE), - DREG(PHB_ERR_SIDE_ENABLE), - DREG(PHB_ERR_LOG_0), - DREG(PHB_ERR_LOG_1), - DREG(PHB_ERR_STATUS_MASK), - DREG(PHB_ERR1_STATUS_MASK), - DREG(MMIO_ERR_STATUS), - DREG(MMIO_ERR1_STATUS), - DREG(MMIO_ERR_INJECT), - DREG(MMIO_ERR_LEM_ENABLE), - DREG(MMIO_ERR_IRQ_ENABLE), - DREG(MMIO_ERR_FREEZE_ENABLE), - DREG(MMIO_ERR_SIDE_ENABLE), - DREG(MMIO_ERR_LOG_0), - DREG(MMIO_ERR_LOG_1), - DREG(MMIO_ERR_STATUS_MASK), - DREG(MMIO_ERR1_STATUS_MASK), - DREG(DMA_ERR_STATUS), - DREG(DMA_ERR1_STATUS), - DREG(DMA_ERR_INJECT), - DREG(DMA_ERR_LEM_ENABLE), - DREG(DMA_ERR_IRQ_ENABLE), - DREG(DMA_ERR_FREEZE_ENABLE), - DREG(DMA_ERR_SIDE_ENABLE), - DREG(DMA_ERR_LOG_0), - DREG(DMA_ERR_LOG_1), - DREG(DMA_ERR_STATUS_MASK), - DREG(DMA_ERR1_STATUS_MASK), - - /* Debug and Trace registers */ - DREG(PHB_DEBUG_CONTROL0), - DREG(PHB_DEBUG_STATUS0), - DREG(PHB_DEBUG_CONTROL1), - DREG(PHB_DEBUG_STATUS1), - DREG(PHB_DEBUG_CONTROL2), - DREG(PHB_DEBUG_STATUS2), - DREG(PHB_DEBUG_CONTROL3), - DREG(PHB_DEBUG_STATUS3), - DREG(PHB_DEBUG_CONTROL4), - DREG(PHB_DEBUG_STATUS4), - DREG(PHB_DEBUG_CONTROL5), - DREG(PHB_DEBUG_STATUS5), - - /* Don't seem to exist ... - DREG(PHB_DEBUG_CONTROL6), - DREG(PHB_DEBUG_STATUS6), - */ -}; - -static int wsp_pci_regs_show(struct seq_file *m, void *private) -{ - struct wsp_phb *phb = m->private; - struct pci_controller *hose = phb->hose; - int i; - - for (i = 0; i < ARRAY_SIZE(wsp_pci_regs); i++) { - /* Skip write-only regs */ - if (wsp_pci_regs[i].offset == 0xc08 || - wsp_pci_regs[i].offset == 0xc10 || - wsp_pci_regs[i].offset == 0xc38 || - wsp_pci_regs[i].offset == 0xc40) - continue; - seq_printf(m, "0x%03x: 0x%016llx %s\n", - wsp_pci_regs[i].offset, - in_be64(hose->cfg_data + wsp_pci_regs[i].offset), - wsp_pci_regs[i].name); - } - return 0; -} - -static int wsp_pci_regs_open(struct inode *inode, struct file *file) -{ - return single_open(file, wsp_pci_regs_show, inode->i_private); -} - -static const struct file_operations wsp_pci_regs_fops = { - .open = wsp_pci_regs_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int wsp_pci_reg_set(void *data, u64 val) -{ - out_be64((void __iomem *)data, val); - return 0; -} - -static int wsp_pci_reg_get(void *data, u64 *val) -{ - *val = in_be64((void __iomem *)data); - return 0; -} - -DEFINE_SIMPLE_ATTRIBUTE(wsp_pci_reg_fops, wsp_pci_reg_get, wsp_pci_reg_set, "0x%llx\n"); - -static irqreturn_t wsp_pci_err_irq(int irq, void *dev_id) -{ - struct wsp_phb *phb = dev_id; - struct pci_controller *hose = phb->hose; - irqreturn_t handled = IRQ_NONE; - struct wsp_pcie_err_log_data ed; - - pr_err("PCI: Error interrupt on %s (PHB %d)\n", - hose->dn->full_name, hose->global_number); - again: - memset(&ed, 0, sizeof(ed)); - - /* Read and clear UTL errors */ - ed.utl_sys_err = in_be64(hose->cfg_data + PCIE_UTL_SYS_BUS_AGENT_STATUS); - if (ed.utl_sys_err) - out_be64(hose->cfg_data + PCIE_UTL_SYS_BUS_AGENT_STATUS, ed.utl_sys_err); - ed.utl_port_err = in_be64(hose->cfg_data + PCIE_UTL_PCIE_PORT_STATUS); - if (ed.utl_port_err) - out_be64(hose->cfg_data + PCIE_UTL_PCIE_PORT_STATUS, ed.utl_port_err); - ed.utl_rc_err = in_be64(hose->cfg_data + PCIE_UTL_RC_STATUS); - if (ed.utl_rc_err) - out_be64(hose->cfg_data + PCIE_UTL_RC_STATUS, ed.utl_rc_err); - - /* Read and clear main trap errors */ - ed.phb_err = in_be64(hose->cfg_data + PCIE_REG_PHB_ERR_STATUS); - if (ed.phb_err) { - ed.phb_err1 = in_be64(hose->cfg_data + PCIE_REG_PHB_ERR1_STATUS); - ed.phb_log0 = in_be64(hose->cfg_data + PCIE_REG_PHB_ERR_LOG_0); - ed.phb_log1 = in_be64(hose->cfg_data + PCIE_REG_PHB_ERR_LOG_1); - out_be64(hose->cfg_data + PCIE_REG_PHB_ERR1_STATUS, 0); - out_be64(hose->cfg_data + PCIE_REG_PHB_ERR_STATUS, 0); - } - ed.mmio_err = in_be64(hose->cfg_data + PCIE_REG_MMIO_ERR_STATUS); - if (ed.mmio_err) { - ed.mmio_err1 = in_be64(hose->cfg_data + PCIE_REG_MMIO_ERR1_STATUS); - ed.mmio_log0 = in_be64(hose->cfg_data + PCIE_REG_MMIO_ERR_LOG_0); - ed.mmio_log1 = in_be64(hose->cfg_data + PCIE_REG_MMIO_ERR_LOG_1); - out_be64(hose->cfg_data + PCIE_REG_MMIO_ERR1_STATUS, 0); - out_be64(hose->cfg_data + PCIE_REG_MMIO_ERR_STATUS, 0); - } - ed.dma_err = in_be64(hose->cfg_data + PCIE_REG_DMA_ERR_STATUS); - if (ed.dma_err) { - ed.dma_err1 = in_be64(hose->cfg_data + PCIE_REG_DMA_ERR1_STATUS); - ed.dma_log0 = in_be64(hose->cfg_data + PCIE_REG_DMA_ERR_LOG_0); - ed.dma_log1 = in_be64(hose->cfg_data + PCIE_REG_DMA_ERR_LOG_1); - out_be64(hose->cfg_data + PCIE_REG_DMA_ERR1_STATUS, 0); - out_be64(hose->cfg_data + PCIE_REG_DMA_ERR_STATUS, 0); - } - - /* Now print things out */ - if (ed.phb_err) { - pr_err(" PHB Error Status : 0x%016llx\n", ed.phb_err); - pr_err(" PHB First Error Status: 0x%016llx\n", ed.phb_err1); - pr_err(" PHB Error Log 0 : 0x%016llx\n", ed.phb_log0); - pr_err(" PHB Error Log 1 : 0x%016llx\n", ed.phb_log1); - } - if (ed.mmio_err) { - pr_err(" MMIO Error Status : 0x%016llx\n", ed.mmio_err); - pr_err(" MMIO First Error Status: 0x%016llx\n", ed.mmio_err1); - pr_err(" MMIO Error Log 0 : 0x%016llx\n", ed.mmio_log0); - pr_err(" MMIO Error Log 1 : 0x%016llx\n", ed.mmio_log1); - } - if (ed.dma_err) { - pr_err(" DMA Error Status : 0x%016llx\n", ed.dma_err); - pr_err(" DMA First Error Status: 0x%016llx\n", ed.dma_err1); - pr_err(" DMA Error Log 0 : 0x%016llx\n", ed.dma_log0); - pr_err(" DMA Error Log 1 : 0x%016llx\n", ed.dma_log1); - } - if (ed.utl_sys_err) - pr_err(" UTL Sys Error Status : 0x%016llx\n", ed.utl_sys_err); - if (ed.utl_port_err) - pr_err(" UTL Port Error Status : 0x%016llx\n", ed.utl_port_err); - if (ed.utl_rc_err) - pr_err(" UTL RC Error Status : 0x%016llx\n", ed.utl_rc_err); - - /* Interrupts are caused by the error traps. If we had any error there - * we loop again in case the UTL buffered some new stuff between - * going there and going to the traps - */ - if (ed.dma_err || ed.mmio_err || ed.phb_err) { - handled = IRQ_HANDLED; - goto again; - } - return handled; -} - -static void __init wsp_setup_pci_err_reporting(struct wsp_phb *phb) -{ - struct pci_controller *hose = phb->hose; - int err_irq, i, rc; - char fname[16]; - - /* Create a debugfs file for that PHB */ - sprintf(fname, "phb%d", phb->hose->global_number); - phb->ddir = debugfs_create_dir(fname, powerpc_debugfs_root); - - /* Some useful debug output */ - if (phb->ddir) { - struct dentry *d = debugfs_create_dir("regs", phb->ddir); - char tmp[64]; - - for (i = 0; i < ARRAY_SIZE(wsp_pci_regs); i++) { - sprintf(tmp, "%03x_%s", wsp_pci_regs[i].offset, - wsp_pci_regs[i].name); - debugfs_create_file(tmp, 0600, d, - hose->cfg_data + wsp_pci_regs[i].offset, - &wsp_pci_reg_fops); - } - debugfs_create_file("all_regs", 0600, phb->ddir, phb, &wsp_pci_regs_fops); - } - - /* Find the IRQ number for that PHB */ - err_irq = irq_of_parse_and_map(hose->dn, 0); - if (err_irq == 0) - /* XXX Error IRQ lacking from device-tree */ - err_irq = wsp_pci_get_err_irq_no_dt(hose->dn); - if (err_irq == 0) { - pr_err("PCI: Failed to fetch error interrupt for %s\n", - hose->dn->full_name); - return; - } - /* Request it */ - rc = request_irq(err_irq, wsp_pci_err_irq, 0, "wsp_pci error", phb); - if (rc) { - pr_err("PCI: Failed to request interrupt for %s\n", - hose->dn->full_name); - } - /* Enable interrupts for all errors for now */ - out_be64(hose->cfg_data + PCIE_REG_PHB_ERR_IRQ_ENABLE, 0xffffffffffffffffull); - out_be64(hose->cfg_data + PCIE_REG_MMIO_ERR_IRQ_ENABLE, 0xffffffffffffffffull); - out_be64(hose->cfg_data + PCIE_REG_DMA_ERR_IRQ_ENABLE, 0xffffffffffffffffull); -} - -/* - * This is called later to hookup with the error interrupt - */ -static int __init wsp_setup_pci_late(void) -{ - struct wsp_phb *phb; - - list_for_each_entry(phb, &wsp_phbs, all) - wsp_setup_pci_err_reporting(phb); - - return 0; -} -arch_initcall(wsp_setup_pci_late); diff --git a/arch/powerpc/platforms/wsp/wsp_pci.h b/arch/powerpc/platforms/wsp/wsp_pci.h deleted file mode 100644 index 52e9bd9..0000000 --- a/arch/powerpc/platforms/wsp/wsp_pci.h +++ /dev/null @@ -1,268 +0,0 @@ -/* - * Copyright 2010 Ben Herrenschmidt, IBM Corporation - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#ifndef __WSP_PCI_H -#define __WSP_PCI_H - -/* Architected registers */ -#define PCIE_REG_DMA_CHAN_STATUS 0x110 -#define PCIE_REG_CPU_LOADSTORE_STATUS 0x120 - -#define PCIE_REG_CONFIG_DATA 0x130 -#define PCIE_REG_LOCK0 0x138 -#define PCIE_REG_CONFIG_ADDRESS 0x140 -#define PCIE_REG_CA_ENABLE 0x8000000000000000ull -#define PCIE_REG_CA_BUS_MASK 0x0ff0000000000000ull -#define PCIE_REG_CA_BUS_SHIFT (20+32) -#define PCIE_REG_CA_DEV_MASK 0x000f800000000000ull -#define PCIE_REG_CA_DEV_SHIFT (15+32) -#define PCIE_REG_CA_FUNC_MASK 0x0000700000000000ull -#define PCIE_REG_CA_FUNC_SHIFT (12+32) -#define PCIE_REG_CA_REG_MASK 0x00000fff00000000ull -#define PCIE_REG_CA_REG_SHIFT ( 0+32) -#define PCIE_REG_CA_BE_MASK 0x00000000f0000000ull -#define PCIE_REG_CA_BE_SHIFT ( 28) -#define PCIE_REG_LOCK1 0x148 - -#define PCIE_REG_PHB_CONFIG 0x160 -#define PCIE_REG_PHBC_64B_TCE_EN 0x2000000000000000ull -#define PCIE_REG_PHBC_MMIO_DMA_FREEZE_EN 0x1000000000000000ull -#define PCIE_REG_PHBC_32BIT_MSI_EN 0x0080000000000000ull -#define PCIE_REG_PHBC_M64_EN 0x0040000000000000ull -#define PCIE_REG_PHBC_IO_EN 0x0008000000000000ull -#define PCIE_REG_PHBC_64BIT_MSI_EN 0x0002000000000000ull -#define PCIE_REG_PHBC_M32A_EN 0x0000800000000000ull -#define PCIE_REG_PHBC_M32B_EN 0x0000400000000000ull -#define PCIE_REG_PHBC_MSI_PE_VALIDATE 0x0000200000000000ull -#define PCIE_REG_PHBC_DMA_XLATE_BYPASS 0x0000100000000000ull - -#define PCIE_REG_IO_BASE_ADDR 0x170 -#define PCIE_REG_IO_BASE_MASK 0x178 -#define PCIE_REG_IO_START_ADDR 0x180 - -#define PCIE_REG_M32A_BASE_ADDR 0x190 -#define PCIE_REG_M32A_BASE_MASK 0x198 -#define PCIE_REG_M32A_START_ADDR 0x1a0 - -#define PCIE_REG_M32B_BASE_ADDR 0x1b0 -#define PCIE_REG_M32B_BASE_MASK 0x1b8 -#define PCIE_REG_M32B_START_ADDR 0x1c0 - -#define PCIE_REG_M64_BASE_ADDR 0x1e0 -#define PCIE_REG_M64_BASE_MASK 0x1e8 -#define PCIE_REG_M64_START_ADDR 0x1f0 - -#define PCIE_REG_TCE_KILL 0x210 -#define PCIE_REG_TCEKILL_SINGLE 0x8000000000000000ull -#define PCIE_REG_TCEKILL_ADDR_MASK 0x000003fffffffff8ull -#define PCIE_REG_TCEKILL_PS_4K 0 -#define PCIE_REG_TCEKILL_PS_64K 1 -#define PCIE_REG_TCEKILL_PS_16M 2 -#define PCIE_REG_TCEKILL_PS_16G 3 - -#define PCIE_REG_IODA_ADDR 0x220 -#define PCIE_REG_IODA_AD_AUTOINC 0x8000000000000000ull -#define PCIE_REG_IODA_AD_TBL_MVT 0x0005000000000000ull -#define PCIE_REG_IODA_AD_TBL_PELT 0x0006000000000000ull -#define PCIE_REG_IODA_AD_TBL_PESTA 0x0007000000000000ull -#define PCIE_REG_IODA_AD_TBL_PESTB 0x0008000000000000ull -#define PCIE_REG_IODA_AD_TBL_TVT 0x0009000000000000ull -#define PCIE_REG_IODA_AD_TBL_TCE 0x000a000000000000ull -#define PCIE_REG_IODA_DATA0 0x228 -#define PCIE_REG_IODA_DATA1 0x230 - -#define PCIE_REG_LOCK2 0x240 - -#define PCIE_REG_PHB_GEN_CAP 0x250 -#define PCIE_REG_PHB_TCE_CAP 0x258 -#define PCIE_REG_PHB_IRQ_CAP 0x260 -#define PCIE_REG_PHB_EEH_CAP 0x268 - -#define PCIE_REG_PAPR_ERR_INJ_CONTROL 0x2b0 -#define PCIE_REG_PAPR_ERR_INJ_ADDR 0x2b8 -#define PCIE_REG_PAPR_ERR_INJ_MASK 0x2c0 - - -#define PCIE_REG_SYS_CFG1 0x600 -#define PCIE_REG_SYS_CFG1_CLASS_CODE 0x0000000000ffffffull - -#define IODA_TVT0_TTA_MASK 0x000fffffffff0000ull -#define IODA_TVT0_TTA_SHIFT 4 -#define IODA_TVT0_BUSNUM_VALID_MASK 0x000000000000e000ull -#define IODA_TVT0_TCE_TABLE_SIZE_MASK 0x0000000000001f00ull -#define IODA_TVT0_TCE_TABLE_SIZE_SHIFT 8 -#define IODA_TVT0_BUSNUM_VALUE_MASK 0x00000000000000ffull -#define IODA_TVT0_BUSNUM_VALID_SHIFT 0 -#define IODA_TVT1_DEVNUM_VALID 0x2000000000000000ull -#define IODA_TVT1_DEVNUM_VALUE_MASK 0x1f00000000000000ull -#define IODA_TVT1_DEVNUM_VALUE_SHIFT 56 -#define IODA_TVT1_FUNCNUM_VALID 0x0008000000000000ull -#define IODA_TVT1_FUNCNUM_VALUE_MASK 0x0007000000000000ull -#define IODA_TVT1_FUNCNUM_VALUE_SHIFT 48 -#define IODA_TVT1_IO_PAGE_SIZE_MASK 0x00001f0000000000ull -#define IODA_TVT1_IO_PAGE_SIZE_SHIFT 40 -#define IODA_TVT1_PE_NUMBER_MASK 0x000000000000003full -#define IODA_TVT1_PE_NUMBER_SHIFT 0 - -#define IODA_TVT_COUNT 64 - -/* UTL Core registers */ -#define PCIE_UTL_SYS_BUS_CONTROL 0x400 -#define PCIE_UTL_STATUS 0x408 -#define PCIE_UTL_SYS_BUS_AGENT_STATUS 0x410 -#define PCIE_UTL_SYS_BUS_AGENT_ERR_SEV 0x418 -#define PCIE_UTL_SYS_BUS_AGENT_IRQ_EN 0x420 -#define PCIE_UTL_SYS_BUS_BURST_SZ_CONF 0x440 -#define PCIE_UTL_REVISION_ID 0x448 - -#define PCIE_UTL_OUT_POST_HDR_BUF_ALLOC 0x4c0 -#define PCIE_UTL_OUT_POST_DAT_BUF_ALLOC 0x4d0 -#define PCIE_UTL_IN_POST_HDR_BUF_ALLOC 0x4e0 -#define PCIE_UTL_IN_POST_DAT_BUF_ALLOC 0x4f0 -#define PCIE_UTL_OUT_NP_BUF_ALLOC 0x500 -#define PCIE_UTL_IN_NP_BUF_ALLOC 0x510 -#define PCIE_UTL_PCIE_TAGS_ALLOC 0x520 -#define PCIE_UTL_GBIF_READ_TAGS_ALLOC 0x530 - -#define PCIE_UTL_PCIE_PORT_CONTROL 0x540 -#define PCIE_UTL_PCIE_PORT_STATUS 0x548 -#define PCIE_UTL_PCIE_PORT_ERROR_SEV 0x550 -#define PCIE_UTL_PCIE_PORT_IRQ_EN 0x558 -#define PCIE_UTL_RC_STATUS 0x560 -#define PCIE_UTL_RC_ERR_SEVERITY 0x568 -#define PCIE_UTL_RC_IRQ_EN 0x570 -#define PCIE_UTL_EP_STATUS 0x578 -#define PCIE_UTL_EP_ERR_SEVERITY 0x580 -#define PCIE_UTL_EP_ERR_IRQ_EN 0x588 - -#define PCIE_UTL_PCI_PM_CTRL1 0x590 -#define PCIE_UTL_PCI_PM_CTRL2 0x598 - -/* PCIe stack registers */ -#define PCIE_REG_SYSTEM_CONFIG1 0x600 -#define PCIE_REG_SYSTEM_CONFIG2 0x608 -#define PCIE_REG_EP_SYSTEM_CONFIG 0x618 -#define PCIE_REG_EP_FLR 0x620 -#define PCIE_REG_EP_BAR_CONFIG 0x628 -#define PCIE_REG_LINK_CONFIG 0x630 -#define PCIE_REG_PM_CONFIG 0x640 -#define PCIE_REG_DLP_CONTROL 0x650 -#define PCIE_REG_DLP_STATUS 0x658 -#define PCIE_REG_ERR_REPORT_CONTROL 0x660 -#define PCIE_REG_SLOT_CONTROL1 0x670 -#define PCIE_REG_SLOT_CONTROL2 0x678 -#define PCIE_REG_UTL_CONFIG 0x680 -#define PCIE_REG_BUFFERS_CONFIG 0x690 -#define PCIE_REG_ERROR_INJECT 0x698 -#define PCIE_REG_SRIOV_CONFIG 0x6a0 -#define PCIE_REG_PF0_SRIOV_STATUS 0x6a8 -#define PCIE_REG_PF1_SRIOV_STATUS 0x6b0 -#define PCIE_REG_PORT_NUMBER 0x700 -#define PCIE_REG_POR_SYSTEM_CONFIG 0x708 - -/* PHB internal logic registers */ -#define PCIE_REG_PHB_VERSION 0x800 -#define PCIE_REG_RESET 0x808 -#define PCIE_REG_PHB_CONTROL 0x810 -#define PCIE_REG_PHB_TIMEOUT_CONTROL1 0x878 -#define PCIE_REG_PHB_QUIESCE_DMA 0x888 -#define PCIE_REG_PHB_DMA_READ_TAG_ACTV 0x900 -#define PCIE_REG_PHB_TCE_READ_TAG_ACTV 0x908 - -/* FIR registers */ -#define PCIE_REG_LEM_FIR_ACCUM 0xc00 -#define PCIE_REG_LEM_FIR_AND_MASK 0xc08 -#define PCIE_REG_LEM_FIR_OR_MASK 0xc10 -#define PCIE_REG_LEM_ACTION0 0xc18 -#define PCIE_REG_LEM_ACTION1 0xc20 -#define PCIE_REG_LEM_ERROR_MASK 0xc30 -#define PCIE_REG_LEM_ERROR_AND_MASK 0xc38 -#define PCIE_REG_LEM_ERROR_OR_MASK 0xc40 - -/* PHB Error registers */ -#define PCIE_REG_PHB_ERR_STATUS 0xc80 -#define PCIE_REG_PHB_ERR1_STATUS 0xc88 -#define PCIE_REG_PHB_ERR_INJECT 0xc90 -#define PCIE_REG_PHB_ERR_LEM_ENABLE 0xc98 -#define PCIE_REG_PHB_ERR_IRQ_ENABLE 0xca0 -#define PCIE_REG_PHB_ERR_FREEZE_ENABLE 0xca8 -#define PCIE_REG_PHB_ERR_SIDE_ENABLE 0xcb8 -#define PCIE_REG_PHB_ERR_LOG_0 0xcc0 -#define PCIE_REG_PHB_ERR_LOG_1 0xcc8 -#define PCIE_REG_PHB_ERR_STATUS_MASK 0xcd0 -#define PCIE_REG_PHB_ERR1_STATUS_MASK 0xcd8 - -#define PCIE_REG_MMIO_ERR_STATUS 0xd00 -#define PCIE_REG_MMIO_ERR1_STATUS 0xd08 -#define PCIE_REG_MMIO_ERR_INJECT 0xd10 -#define PCIE_REG_MMIO_ERR_LEM_ENABLE 0xd18 -#define PCIE_REG_MMIO_ERR_IRQ_ENABLE 0xd20 -#define PCIE_REG_MMIO_ERR_FREEZE_ENABLE 0xd28 -#define PCIE_REG_MMIO_ERR_SIDE_ENABLE 0xd38 -#define PCIE_REG_MMIO_ERR_LOG_0 0xd40 -#define PCIE_REG_MMIO_ERR_LOG_1 0xd48 -#define PCIE_REG_MMIO_ERR_STATUS_MASK 0xd50 -#define PCIE_REG_MMIO_ERR1_STATUS_MASK 0xd58 - -#define PCIE_REG_DMA_ERR_STATUS 0xd80 -#define PCIE_REG_DMA_ERR1_STATUS 0xd88 -#define PCIE_REG_DMA_ERR_INJECT 0xd90 -#define PCIE_REG_DMA_ERR_LEM_ENABLE 0xd98 -#define PCIE_REG_DMA_ERR_IRQ_ENABLE 0xda0 -#define PCIE_REG_DMA_ERR_FREEZE_ENABLE 0xda8 -#define PCIE_REG_DMA_ERR_SIDE_ENABLE 0xdb8 -#define PCIE_REG_DMA_ERR_LOG_0 0xdc0 -#define PCIE_REG_DMA_ERR_LOG_1 0xdc8 -#define PCIE_REG_DMA_ERR_STATUS_MASK 0xdd0 -#define PCIE_REG_DMA_ERR1_STATUS_MASK 0xdd8 - -/* Shortcuts for access to the above using the PHB definitions - * with an offset - */ -#define PCIE_REG_ERR_PHB_OFFSET 0x0 -#define PCIE_REG_ERR_MMIO_OFFSET 0x80 -#define PCIE_REG_ERR_DMA_OFFSET 0x100 - -/* Debug and Trace registers */ -#define PCIE_REG_PHB_DEBUG_CONTROL0 0xe00 -#define PCIE_REG_PHB_DEBUG_STATUS0 0xe08 -#define PCIE_REG_PHB_DEBUG_CONTROL1 0xe10 -#define PCIE_REG_PHB_DEBUG_STATUS1 0xe18 -#define PCIE_REG_PHB_DEBUG_CONTROL2 0xe20 -#define PCIE_REG_PHB_DEBUG_STATUS2 0xe28 -#define PCIE_REG_PHB_DEBUG_CONTROL3 0xe30 -#define PCIE_REG_PHB_DEBUG_STATUS3 0xe38 -#define PCIE_REG_PHB_DEBUG_CONTROL4 0xe40 -#define PCIE_REG_PHB_DEBUG_STATUS4 0xe48 -#define PCIE_REG_PHB_DEBUG_CONTROL5 0xe50 -#define PCIE_REG_PHB_DEBUG_STATUS5 0xe58 -#define PCIE_REG_PHB_DEBUG_CONTROL6 0xe60 -#define PCIE_REG_PHB_DEBUG_STATUS6 0xe68 - -/* Definition for PCIe errors */ -struct wsp_pcie_err_log_data { - __u64 phb_err; - __u64 phb_err1; - __u64 phb_log0; - __u64 phb_log1; - __u64 mmio_err; - __u64 mmio_err1; - __u64 mmio_log0; - __u64 mmio_log1; - __u64 dma_err; - __u64 dma_err1; - __u64 dma_log0; - __u64 dma_log1; - __u64 utl_sys_err; - __u64 utl_port_err; - __u64 utl_rc_err; - __u64 unused; -}; - -#endif /* __WSP_PCI_H */ diff --git a/arch/powerpc/sysdev/fsl_soc.c b/arch/powerpc/sysdev/fsl_soc.c index 228cf91..ffd1169 100644 --- a/arch/powerpc/sysdev/fsl_soc.c +++ b/arch/powerpc/sysdev/fsl_soc.c @@ -25,7 +25,6 @@ #include <linux/of.h> #include <linux/of_platform.h> #include <linux/phy.h> -#include <linux/phy_fixed.h> #include <linux/spi/spi.h> #include <linux/fsl_devices.h> #include <linux/fs_enet_pd.h> @@ -178,37 +177,6 @@ u32 get_baudrate(void) EXPORT_SYMBOL(get_baudrate); #endif /* CONFIG_CPM2 */ -#ifdef CONFIG_FIXED_PHY -static int __init of_add_fixed_phys(void) -{ - int ret; - struct device_node *np; - u32 *fixed_link; - struct fixed_phy_status status = {}; - - for_each_node_by_name(np, "ethernet") { - fixed_link = (u32 *)of_get_property(np, "fixed-link", NULL); - if (!fixed_link) - continue; - - status.link = 1; - status.duplex = fixed_link[1]; - status.speed = fixed_link[2]; - status.pause = fixed_link[3]; - status.asym_pause = fixed_link[4]; - - ret = fixed_phy_add(PHY_POLL, fixed_link[0], &status); - if (ret) { - of_node_put(np); - return ret; - } - } - - return 0; -} -arch_initcall(of_add_fixed_phys); -#endif /* CONFIG_FIXED_PHY */ - #if defined(CONFIG_FSL_SOC_BOOKE) || defined(CONFIG_PPC_86xx) static __be32 __iomem *rstcr; diff --git a/arch/powerpc/sysdev/xics/icp-native.c b/arch/powerpc/sysdev/xics/icp-native.c index 9dee470..de8d948 100644 --- a/arch/powerpc/sysdev/xics/icp-native.c +++ b/arch/powerpc/sysdev/xics/icp-native.c @@ -26,6 +26,7 @@ #include <asm/errno.h> #include <asm/xics.h> #include <asm/kvm_ppc.h> +#include <asm/dbell.h> struct icp_ipl { union { @@ -145,7 +146,13 @@ static unsigned int icp_native_get_irq(void) static void icp_native_cause_ipi(int cpu, unsigned long data) { kvmppc_set_host_ipi(cpu, 1); - icp_native_set_qirr(cpu, IPI_PRIORITY); +#ifdef CONFIG_PPC_DOORBELL + if (cpu_has_feature(CPU_FTR_DBELL) && + (cpumask_test_cpu(cpu, cpu_sibling_mask(smp_processor_id())))) + doorbell_cause_ipi(cpu, data); + else +#endif + icp_native_set_qirr(cpu, IPI_PRIORITY); } void xics_wake_cpu(int cpu) diff --git a/arch/powerpc/xmon/nonstdio.c b/arch/powerpc/xmon/nonstdio.c index bce3dcf..c987486 100644 --- a/arch/powerpc/xmon/nonstdio.c +++ b/arch/powerpc/xmon/nonstdio.c @@ -122,7 +122,7 @@ void xmon_printf(const char *format, ...) if (n && rc == 0) { /* No udbg hooks, fallback to printk() - dangerous */ - printk(xmon_outbuf); + printk("%s", xmon_outbuf); } } diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index e9f8fa9..a2cbd87 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -269,27 +269,17 @@ static void bpf_jit_noleaks(struct bpf_jit *jit, struct sock_filter *filter) EMIT4(0xa7c80000); /* Clear A if the first register does not set it. */ switch (filter[0].code) { - case BPF_S_LD_W_ABS: - case BPF_S_LD_H_ABS: - case BPF_S_LD_B_ABS: - case BPF_S_LD_W_LEN: - case BPF_S_LD_W_IND: - case BPF_S_LD_H_IND: - case BPF_S_LD_B_IND: - case BPF_S_LD_IMM: - case BPF_S_LD_MEM: - case BPF_S_MISC_TXA: - case BPF_S_ANC_PROTOCOL: - case BPF_S_ANC_PKTTYPE: - case BPF_S_ANC_IFINDEX: - case BPF_S_ANC_MARK: - case BPF_S_ANC_QUEUE: - case BPF_S_ANC_HATYPE: - case BPF_S_ANC_RXHASH: - case BPF_S_ANC_CPU: - case BPF_S_ANC_VLAN_TAG: - case BPF_S_ANC_VLAN_TAG_PRESENT: - case BPF_S_RET_K: + case BPF_LD | BPF_W | BPF_ABS: + case BPF_LD | BPF_H | BPF_ABS: + case BPF_LD | BPF_B | BPF_ABS: + case BPF_LD | BPF_W | BPF_LEN: + case BPF_LD | BPF_W | BPF_IND: + case BPF_LD | BPF_H | BPF_IND: + case BPF_LD | BPF_B | BPF_IND: + case BPF_LD | BPF_IMM: + case BPF_LD | BPF_MEM: + case BPF_MISC | BPF_TXA: + case BPF_RET | BPF_K: /* first instruction sets A register */ break; default: /* A = 0 */ @@ -304,15 +294,18 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct sock_filter *filter, unsigned int K; int offset; unsigned int mask; + u16 code; K = filter->k; - switch (filter->code) { - case BPF_S_ALU_ADD_X: /* A += X */ + code = bpf_anc_helper(filter); + + switch (code) { + case BPF_ALU | BPF_ADD | BPF_X: /* A += X */ jit->seen |= SEEN_XREG; /* ar %r5,%r12 */ EMIT2(0x1a5c); break; - case BPF_S_ALU_ADD_K: /* A += K */ + case BPF_ALU | BPF_ADD | BPF_K: /* A += K */ if (!K) break; if (K <= 16383) @@ -325,12 +318,12 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct sock_filter *filter, /* a %r5,<d(K)>(%r13) */ EMIT4_DISP(0x5a50d000, EMIT_CONST(K)); break; - case BPF_S_ALU_SUB_X: /* A -= X */ + case BPF_ALU | BPF_SUB | BPF_X: /* A -= X */ jit->seen |= SEEN_XREG; /* sr %r5,%r12 */ EMIT2(0x1b5c); break; - case BPF_S_ALU_SUB_K: /* A -= K */ + case BPF_ALU | BPF_SUB | BPF_K: /* A -= K */ if (!K) break; if (K <= 16384) @@ -343,12 +336,12 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct sock_filter *filter, /* s %r5,<d(K)>(%r13) */ EMIT4_DISP(0x5b50d000, EMIT_CONST(K)); break; - case BPF_S_ALU_MUL_X: /* A *= X */ + case BPF_ALU | BPF_MUL | BPF_X: /* A *= X */ jit->seen |= SEEN_XREG; /* msr %r5,%r12 */ EMIT4(0xb252005c); break; - case BPF_S_ALU_MUL_K: /* A *= K */ + case BPF_ALU | BPF_MUL | BPF_K: /* A *= K */ if (K <= 16383) /* mhi %r5,K */ EMIT4_IMM(0xa75c0000, K); @@ -359,7 +352,7 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct sock_filter *filter, /* ms %r5,<d(K)>(%r13) */ EMIT4_DISP(0x7150d000, EMIT_CONST(K)); break; - case BPF_S_ALU_DIV_X: /* A /= X */ + case BPF_ALU | BPF_DIV | BPF_X: /* A /= X */ jit->seen |= SEEN_XREG | SEEN_RET0; /* ltr %r12,%r12 */ EMIT2(0x12cc); @@ -370,7 +363,7 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct sock_filter *filter, /* dlr %r4,%r12 */ EMIT4(0xb997004c); break; - case BPF_S_ALU_DIV_K: /* A /= K */ + case BPF_ALU | BPF_DIV | BPF_K: /* A /= K */ if (K == 1) break; /* lhi %r4,0 */ @@ -378,7 +371,7 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct sock_filter *filter, /* dl %r4,<d(K)>(%r13) */ EMIT6_DISP(0xe340d000, 0x0097, EMIT_CONST(K)); break; - case BPF_S_ALU_MOD_X: /* A %= X */ + case BPF_ALU | BPF_MOD | BPF_X: /* A %= X */ jit->seen |= SEEN_XREG | SEEN_RET0; /* ltr %r12,%r12 */ EMIT2(0x12cc); @@ -391,7 +384,7 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct sock_filter *filter, /* lr %r5,%r4 */ EMIT2(0x1854); break; - case BPF_S_ALU_MOD_K: /* A %= K */ + case BPF_ALU | BPF_MOD | BPF_K: /* A %= K */ if (K == 1) { /* lhi %r5,0 */ EMIT4(0xa7580000); @@ -404,12 +397,12 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct sock_filter *filter, /* lr %r5,%r4 */ EMIT2(0x1854); break; - case BPF_S_ALU_AND_X: /* A &= X */ + case BPF_ALU | BPF_AND | BPF_X: /* A &= X */ jit->seen |= SEEN_XREG; /* nr %r5,%r12 */ EMIT2(0x145c); break; - case BPF_S_ALU_AND_K: /* A &= K */ + case BPF_ALU | BPF_AND | BPF_K: /* A &= K */ if (test_facility(21)) /* nilf %r5,<K> */ EMIT6_IMM(0xc05b0000, K); @@ -417,12 +410,12 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct sock_filter *filter, /* n %r5,<d(K)>(%r13) */ EMIT4_DISP(0x5450d000, EMIT_CONST(K)); break; - case BPF_S_ALU_OR_X: /* A |= X */ + case BPF_ALU | BPF_OR | BPF_X: /* A |= X */ jit->seen |= SEEN_XREG; /* or %r5,%r12 */ EMIT2(0x165c); break; - case BPF_S_ALU_OR_K: /* A |= K */ + case BPF_ALU | BPF_OR | BPF_K: /* A |= K */ if (test_facility(21)) /* oilf %r5,<K> */ EMIT6_IMM(0xc05d0000, K); @@ -430,55 +423,55 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct sock_filter *filter, /* o %r5,<d(K)>(%r13) */ EMIT4_DISP(0x5650d000, EMIT_CONST(K)); break; - case BPF_S_ANC_ALU_XOR_X: /* A ^= X; */ - case BPF_S_ALU_XOR_X: + case BPF_ANC | SKF_AD_ALU_XOR_X: /* A ^= X; */ + case BPF_ALU | BPF_XOR | BPF_X: jit->seen |= SEEN_XREG; /* xr %r5,%r12 */ EMIT2(0x175c); break; - case BPF_S_ALU_XOR_K: /* A ^= K */ + case BPF_ALU | BPF_XOR | BPF_K: /* A ^= K */ if (!K) break; /* x %r5,<d(K)>(%r13) */ EMIT4_DISP(0x5750d000, EMIT_CONST(K)); break; - case BPF_S_ALU_LSH_X: /* A <<= X; */ + case BPF_ALU | BPF_LSH | BPF_X: /* A <<= X; */ jit->seen |= SEEN_XREG; /* sll %r5,0(%r12) */ EMIT4(0x8950c000); break; - case BPF_S_ALU_LSH_K: /* A <<= K */ + case BPF_ALU | BPF_LSH | BPF_K: /* A <<= K */ if (K == 0) break; /* sll %r5,K */ EMIT4_DISP(0x89500000, K); break; - case BPF_S_ALU_RSH_X: /* A >>= X; */ + case BPF_ALU | BPF_RSH | BPF_X: /* A >>= X; */ jit->seen |= SEEN_XREG; /* srl %r5,0(%r12) */ EMIT4(0x8850c000); break; - case BPF_S_ALU_RSH_K: /* A >>= K; */ + case BPF_ALU | BPF_RSH | BPF_K: /* A >>= K; */ if (K == 0) break; /* srl %r5,K */ EMIT4_DISP(0x88500000, K); break; - case BPF_S_ALU_NEG: /* A = -A */ + case BPF_ALU | BPF_NEG: /* A = -A */ /* lnr %r5,%r5 */ EMIT2(0x1155); break; - case BPF_S_JMP_JA: /* ip += K */ + case BPF_JMP | BPF_JA: /* ip += K */ offset = addrs[i + K] + jit->start - jit->prg; EMIT4_PCREL(0xa7f40000, offset); break; - case BPF_S_JMP_JGT_K: /* ip += (A > K) ? jt : jf */ + case BPF_JMP | BPF_JGT | BPF_K: /* ip += (A > K) ? jt : jf */ mask = 0x200000; /* jh */ goto kbranch; - case BPF_S_JMP_JGE_K: /* ip += (A >= K) ? jt : jf */ + case BPF_JMP | BPF_JGE | BPF_K: /* ip += (A >= K) ? jt : jf */ mask = 0xa00000; /* jhe */ goto kbranch; - case BPF_S_JMP_JEQ_K: /* ip += (A == K) ? jt : jf */ + case BPF_JMP | BPF_JEQ | BPF_K: /* ip += (A == K) ? jt : jf */ mask = 0x800000; /* je */ kbranch: /* Emit compare if the branch targets are different */ if (filter->jt != filter->jf) { @@ -511,7 +504,7 @@ branch: if (filter->jt == filter->jf) { EMIT4_PCREL(0xa7040000 | (mask ^ 0xf00000), offset); } break; - case BPF_S_JMP_JSET_K: /* ip += (A & K) ? jt : jf */ + case BPF_JMP | BPF_JSET | BPF_K: /* ip += (A & K) ? jt : jf */ mask = 0x700000; /* jnz */ /* Emit test if the branch targets are different */ if (filter->jt != filter->jf) { @@ -525,13 +518,13 @@ branch: if (filter->jt == filter->jf) { EMIT4_IMM(0xa7510000, K); } goto branch; - case BPF_S_JMP_JGT_X: /* ip += (A > X) ? jt : jf */ + case BPF_JMP | BPF_JGT | BPF_X: /* ip += (A > X) ? jt : jf */ mask = 0x200000; /* jh */ goto xbranch; - case BPF_S_JMP_JGE_X: /* ip += (A >= X) ? jt : jf */ + case BPF_JMP | BPF_JGE | BPF_X: /* ip += (A >= X) ? jt : jf */ mask = 0xa00000; /* jhe */ goto xbranch; - case BPF_S_JMP_JEQ_X: /* ip += (A == X) ? jt : jf */ + case BPF_JMP | BPF_JEQ | BPF_X: /* ip += (A == X) ? jt : jf */ mask = 0x800000; /* je */ xbranch: /* Emit compare if the branch targets are different */ if (filter->jt != filter->jf) { @@ -540,7 +533,7 @@ xbranch: /* Emit compare if the branch targets are different */ EMIT2(0x195c); } goto branch; - case BPF_S_JMP_JSET_X: /* ip += (A & X) ? jt : jf */ + case BPF_JMP | BPF_JSET | BPF_X: /* ip += (A & X) ? jt : jf */ mask = 0x700000; /* jnz */ /* Emit test if the branch targets are different */ if (filter->jt != filter->jf) { @@ -551,15 +544,15 @@ xbranch: /* Emit compare if the branch targets are different */ EMIT2(0x144c); } goto branch; - case BPF_S_LD_W_ABS: /* A = *(u32 *) (skb->data+K) */ + case BPF_LD | BPF_W | BPF_ABS: /* A = *(u32 *) (skb->data+K) */ jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_WORD; offset = jit->off_load_word; goto load_abs; - case BPF_S_LD_H_ABS: /* A = *(u16 *) (skb->data+K) */ + case BPF_LD | BPF_H | BPF_ABS: /* A = *(u16 *) (skb->data+K) */ jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_HALF; offset = jit->off_load_half; goto load_abs; - case BPF_S_LD_B_ABS: /* A = *(u8 *) (skb->data+K) */ + case BPF_LD | BPF_B | BPF_ABS: /* A = *(u8 *) (skb->data+K) */ jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_BYTE; offset = jit->off_load_byte; load_abs: if ((int) K < 0) @@ -573,19 +566,19 @@ call_fn: /* lg %r1,<d(function)>(%r13) */ /* jnz <ret0> */ EMIT4_PCREL(0xa7740000, (jit->ret0_ip - jit->prg)); break; - case BPF_S_LD_W_IND: /* A = *(u32 *) (skb->data+K+X) */ + case BPF_LD | BPF_W | BPF_IND: /* A = *(u32 *) (skb->data+K+X) */ jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_IWORD; offset = jit->off_load_iword; goto call_fn; - case BPF_S_LD_H_IND: /* A = *(u16 *) (skb->data+K+X) */ + case BPF_LD | BPF_H | BPF_IND: /* A = *(u16 *) (skb->data+K+X) */ jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_IHALF; offset = jit->off_load_ihalf; goto call_fn; - case BPF_S_LD_B_IND: /* A = *(u8 *) (skb->data+K+X) */ + case BPF_LD | BPF_B | BPF_IND: /* A = *(u8 *) (skb->data+K+X) */ jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_IBYTE; offset = jit->off_load_ibyte; goto call_fn; - case BPF_S_LDX_B_MSH: + case BPF_LDX | BPF_B | BPF_MSH: /* X = (*(u8 *)(skb->data+K) & 0xf) << 2 */ jit->seen |= SEEN_RET0; if ((int) K < 0) { @@ -596,17 +589,17 @@ call_fn: /* lg %r1,<d(function)>(%r13) */ jit->seen |= SEEN_DATAREF | SEEN_LOAD_BMSH; offset = jit->off_load_bmsh; goto call_fn; - case BPF_S_LD_W_LEN: /* A = skb->len; */ + case BPF_LD | BPF_W | BPF_LEN: /* A = skb->len; */ BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4); /* l %r5,<d(len)>(%r2) */ EMIT4_DISP(0x58502000, offsetof(struct sk_buff, len)); break; - case BPF_S_LDX_W_LEN: /* X = skb->len; */ + case BPF_LDX | BPF_W | BPF_LEN: /* X = skb->len; */ jit->seen |= SEEN_XREG; /* l %r12,<d(len)>(%r2) */ EMIT4_DISP(0x58c02000, offsetof(struct sk_buff, len)); break; - case BPF_S_LD_IMM: /* A = K */ + case BPF_LD | BPF_IMM: /* A = K */ if (K <= 16383) /* lhi %r5,K */ EMIT4_IMM(0xa7580000, K); @@ -617,7 +610,7 @@ call_fn: /* lg %r1,<d(function)>(%r13) */ /* l %r5,<d(K)>(%r13) */ EMIT4_DISP(0x5850d000, EMIT_CONST(K)); break; - case BPF_S_LDX_IMM: /* X = K */ + case BPF_LDX | BPF_IMM: /* X = K */ jit->seen |= SEEN_XREG; if (K <= 16383) /* lhi %r12,<K> */ @@ -629,29 +622,29 @@ call_fn: /* lg %r1,<d(function)>(%r13) */ /* l %r12,<d(K)>(%r13) */ EMIT4_DISP(0x58c0d000, EMIT_CONST(K)); break; - case BPF_S_LD_MEM: /* A = mem[K] */ + case BPF_LD | BPF_MEM: /* A = mem[K] */ jit->seen |= SEEN_MEM; /* l %r5,<K>(%r15) */ EMIT4_DISP(0x5850f000, (jit->seen & SEEN_DATAREF) ? 160 + K*4 : K*4); break; - case BPF_S_LDX_MEM: /* X = mem[K] */ + case BPF_LDX | BPF_MEM: /* X = mem[K] */ jit->seen |= SEEN_XREG | SEEN_MEM; /* l %r12,<K>(%r15) */ EMIT4_DISP(0x58c0f000, (jit->seen & SEEN_DATAREF) ? 160 + K*4 : K*4); break; - case BPF_S_MISC_TAX: /* X = A */ + case BPF_MISC | BPF_TAX: /* X = A */ jit->seen |= SEEN_XREG; /* lr %r12,%r5 */ EMIT2(0x18c5); break; - case BPF_S_MISC_TXA: /* A = X */ + case BPF_MISC | BPF_TXA: /* A = X */ jit->seen |= SEEN_XREG; /* lr %r5,%r12 */ EMIT2(0x185c); break; - case BPF_S_RET_K: + case BPF_RET | BPF_K: if (K == 0) { jit->seen |= SEEN_RET0; if (last) @@ -671,33 +664,33 @@ call_fn: /* lg %r1,<d(function)>(%r13) */ EMIT4_PCREL(0xa7f40000, jit->exit_ip - jit->prg); } break; - case BPF_S_RET_A: + case BPF_RET | BPF_A: /* llgfr %r2,%r5 */ EMIT4(0xb9160025); /* j <exit> */ EMIT4_PCREL(0xa7f40000, jit->exit_ip - jit->prg); break; - case BPF_S_ST: /* mem[K] = A */ + case BPF_ST: /* mem[K] = A */ jit->seen |= SEEN_MEM; /* st %r5,<K>(%r15) */ EMIT4_DISP(0x5050f000, (jit->seen & SEEN_DATAREF) ? 160 + K*4 : K*4); break; - case BPF_S_STX: /* mem[K] = X : mov %ebx,off8(%rbp) */ + case BPF_STX: /* mem[K] = X : mov %ebx,off8(%rbp) */ jit->seen |= SEEN_XREG | SEEN_MEM; /* st %r12,<K>(%r15) */ EMIT4_DISP(0x50c0f000, (jit->seen & SEEN_DATAREF) ? 160 + K*4 : K*4); break; - case BPF_S_ANC_PROTOCOL: /* A = ntohs(skb->protocol); */ + case BPF_ANC | SKF_AD_PROTOCOL: /* A = ntohs(skb->protocol); */ BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2); /* lhi %r5,0 */ EMIT4(0xa7580000); /* icm %r5,3,<d(protocol)>(%r2) */ EMIT4_DISP(0xbf532000, offsetof(struct sk_buff, protocol)); break; - case BPF_S_ANC_IFINDEX: /* if (!skb->dev) return 0; - * A = skb->dev->ifindex */ + case BPF_ANC | SKF_AD_IFINDEX: /* if (!skb->dev) return 0; + * A = skb->dev->ifindex */ BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4); jit->seen |= SEEN_RET0; /* lg %r1,<d(dev)>(%r2) */ @@ -709,20 +702,20 @@ call_fn: /* lg %r1,<d(function)>(%r13) */ /* l %r5,<d(ifindex)>(%r1) */ EMIT4_DISP(0x58501000, offsetof(struct net_device, ifindex)); break; - case BPF_S_ANC_MARK: /* A = skb->mark */ + case BPF_ANC | SKF_AD_MARK: /* A = skb->mark */ BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4); /* l %r5,<d(mark)>(%r2) */ EMIT4_DISP(0x58502000, offsetof(struct sk_buff, mark)); break; - case BPF_S_ANC_QUEUE: /* A = skb->queue_mapping */ + case BPF_ANC | SKF_AD_QUEUE: /* A = skb->queue_mapping */ BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2); /* lhi %r5,0 */ EMIT4(0xa7580000); /* icm %r5,3,<d(queue_mapping)>(%r2) */ EMIT4_DISP(0xbf532000, offsetof(struct sk_buff, queue_mapping)); break; - case BPF_S_ANC_HATYPE: /* if (!skb->dev) return 0; - * A = skb->dev->type */ + case BPF_ANC | SKF_AD_HATYPE: /* if (!skb->dev) return 0; + * A = skb->dev->type */ BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, type) != 2); jit->seen |= SEEN_RET0; /* lg %r1,<d(dev)>(%r2) */ @@ -736,20 +729,20 @@ call_fn: /* lg %r1,<d(function)>(%r13) */ /* icm %r5,3,<d(type)>(%r1) */ EMIT4_DISP(0xbf531000, offsetof(struct net_device, type)); break; - case BPF_S_ANC_RXHASH: /* A = skb->hash */ + case BPF_ANC | SKF_AD_RXHASH: /* A = skb->hash */ BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4); /* l %r5,<d(hash)>(%r2) */ EMIT4_DISP(0x58502000, offsetof(struct sk_buff, hash)); break; - case BPF_S_ANC_VLAN_TAG: - case BPF_S_ANC_VLAN_TAG_PRESENT: + case BPF_ANC | SKF_AD_VLAN_TAG: + case BPF_ANC | SKF_AD_VLAN_TAG_PRESENT: BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2); BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000); /* lhi %r5,0 */ EMIT4(0xa7580000); /* icm %r5,3,<d(vlan_tci)>(%r2) */ EMIT4_DISP(0xbf532000, offsetof(struct sk_buff, vlan_tci)); - if (filter->code == BPF_S_ANC_VLAN_TAG) { + if (code == (BPF_ANC | SKF_AD_VLAN_TAG)) { /* nill %r5,0xefff */ EMIT4_IMM(0xa5570000, ~VLAN_TAG_PRESENT); } else { @@ -759,7 +752,7 @@ call_fn: /* lg %r1,<d(function)>(%r13) */ EMIT4_DISP(0x88500000, 12); } break; - case BPF_S_ANC_PKTTYPE: + case BPF_ANC | SKF_AD_PKTTYPE: if (pkt_type_offset < 0) goto out; /* lhi %r5,0 */ @@ -769,7 +762,7 @@ call_fn: /* lg %r1,<d(function)>(%r13) */ /* srl %r5,5 */ EMIT4_DISP(0x88500000, 5); break; - case BPF_S_ANC_CPU: /* A = smp_processor_id() */ + case BPF_ANC | SKF_AD_CPU: /* A = smp_processor_id() */ #ifdef CONFIG_SMP /* l %r5,<d(cpu_nr)> */ EMIT4_DISP(0x58500000, offsetof(struct _lowcore, cpu_nr)); diff --git a/arch/sparc/include/asm/checksum_32.h b/arch/sparc/include/asm/checksum_32.h index bdbda14..04471dc 100644 --- a/arch/sparc/include/asm/checksum_32.h +++ b/arch/sparc/include/asm/checksum_32.h @@ -238,4 +238,16 @@ static inline __sum16 ip_compute_csum(const void *buff, int len) return csum_fold(csum_partial(buff, len, 0)); } +#define HAVE_ARCH_CSUM_ADD +static inline __wsum csum_add(__wsum csum, __wsum addend) +{ + __asm__ __volatile__( + "addcc %0, %1, %0\n" + "addx %0, %%g0, %0" + : "=r" (csum) + : "r" (addend), "0" (csum)); + + return csum; +} + #endif /* !(__SPARC_CHECKSUM_H) */ diff --git a/arch/sparc/include/asm/checksum_64.h b/arch/sparc/include/asm/checksum_64.h index 019b961..2ff81ae 100644 --- a/arch/sparc/include/asm/checksum_64.h +++ b/arch/sparc/include/asm/checksum_64.h @@ -164,4 +164,16 @@ static inline __sum16 ip_compute_csum(const void *buff, int len) return csum_fold(csum_partial(buff, len, 0)); } +#define HAVE_ARCH_CSUM_ADD +static inline __wsum csum_add(__wsum csum, __wsum addend) +{ + __asm__ __volatile__( + "addcc %0, %1, %0\n" + "addx %0, %%g0, %0" + : "=r" (csum) + : "r" (addend), "0" (csum)); + + return csum; +} + #endif /* !(__SPARC64_CHECKSUM_H) */ diff --git a/arch/sparc/net/bpf_jit_comp.c b/arch/sparc/net/bpf_jit_comp.c index a82c6b2..892a102 100644 --- a/arch/sparc/net/bpf_jit_comp.c +++ b/arch/sparc/net/bpf_jit_comp.c @@ -83,9 +83,9 @@ static void bpf_flush_icache(void *start_, void *end_) #define BNE (F2(0, 2) | CONDNE) #ifdef CONFIG_SPARC64 -#define BNE_PTR (F2(0, 1) | CONDNE | (2 << 20)) +#define BE_PTR (F2(0, 1) | CONDE | (2 << 20)) #else -#define BNE_PTR BNE +#define BE_PTR BE #endif #define SETHI(K, REG) \ @@ -415,20 +415,11 @@ void bpf_jit_compile(struct sk_filter *fp) emit_reg_move(O7, r_saved_O7); switch (filter[0].code) { - case BPF_S_RET_K: - case BPF_S_LD_W_LEN: - case BPF_S_ANC_PROTOCOL: - case BPF_S_ANC_PKTTYPE: - case BPF_S_ANC_IFINDEX: - case BPF_S_ANC_MARK: - case BPF_S_ANC_RXHASH: - case BPF_S_ANC_VLAN_TAG: - case BPF_S_ANC_VLAN_TAG_PRESENT: - case BPF_S_ANC_CPU: - case BPF_S_ANC_QUEUE: - case BPF_S_LD_W_ABS: - case BPF_S_LD_H_ABS: - case BPF_S_LD_B_ABS: + case BPF_RET | BPF_K: + case BPF_LD | BPF_W | BPF_LEN: + case BPF_LD | BPF_W | BPF_ABS: + case BPF_LD | BPF_H | BPF_ABS: + case BPF_LD | BPF_B | BPF_ABS: /* The first instruction sets the A register (or is * a "RET 'constant'") */ @@ -445,59 +436,60 @@ void bpf_jit_compile(struct sk_filter *fp) unsigned int t_offset; unsigned int f_offset; u32 t_op, f_op; + u16 code = bpf_anc_helper(&filter[i]); int ilen; - switch (filter[i].code) { - case BPF_S_ALU_ADD_X: /* A += X; */ + switch (code) { + case BPF_ALU | BPF_ADD | BPF_X: /* A += X; */ emit_alu_X(ADD); break; - case BPF_S_ALU_ADD_K: /* A += K; */ + case BPF_ALU | BPF_ADD | BPF_K: /* A += K; */ emit_alu_K(ADD, K); break; - case BPF_S_ALU_SUB_X: /* A -= X; */ + case BPF_ALU | BPF_SUB | BPF_X: /* A -= X; */ emit_alu_X(SUB); break; - case BPF_S_ALU_SUB_K: /* A -= K */ + case BPF_ALU | BPF_SUB | BPF_K: /* A -= K */ emit_alu_K(SUB, K); break; - case BPF_S_ALU_AND_X: /* A &= X */ + case BPF_ALU | BPF_AND | BPF_X: /* A &= X */ emit_alu_X(AND); break; - case BPF_S_ALU_AND_K: /* A &= K */ + case BPF_ALU | BPF_AND | BPF_K: /* A &= K */ emit_alu_K(AND, K); break; - case BPF_S_ALU_OR_X: /* A |= X */ + case BPF_ALU | BPF_OR | BPF_X: /* A |= X */ emit_alu_X(OR); break; - case BPF_S_ALU_OR_K: /* A |= K */ + case BPF_ALU | BPF_OR | BPF_K: /* A |= K */ emit_alu_K(OR, K); break; - case BPF_S_ANC_ALU_XOR_X: /* A ^= X; */ - case BPF_S_ALU_XOR_X: + case BPF_ANC | SKF_AD_ALU_XOR_X: /* A ^= X; */ + case BPF_ALU | BPF_XOR | BPF_X: emit_alu_X(XOR); break; - case BPF_S_ALU_XOR_K: /* A ^= K */ + case BPF_ALU | BPF_XOR | BPF_K: /* A ^= K */ emit_alu_K(XOR, K); break; - case BPF_S_ALU_LSH_X: /* A <<= X */ + case BPF_ALU | BPF_LSH | BPF_X: /* A <<= X */ emit_alu_X(SLL); break; - case BPF_S_ALU_LSH_K: /* A <<= K */ + case BPF_ALU | BPF_LSH | BPF_K: /* A <<= K */ emit_alu_K(SLL, K); break; - case BPF_S_ALU_RSH_X: /* A >>= X */ + case BPF_ALU | BPF_RSH | BPF_X: /* A >>= X */ emit_alu_X(SRL); break; - case BPF_S_ALU_RSH_K: /* A >>= K */ + case BPF_ALU | BPF_RSH | BPF_K: /* A >>= K */ emit_alu_K(SRL, K); break; - case BPF_S_ALU_MUL_X: /* A *= X; */ + case BPF_ALU | BPF_MUL | BPF_X: /* A *= X; */ emit_alu_X(MUL); break; - case BPF_S_ALU_MUL_K: /* A *= K */ + case BPF_ALU | BPF_MUL | BPF_K: /* A *= K */ emit_alu_K(MUL, K); break; - case BPF_S_ALU_DIV_K: /* A /= K with K != 0*/ + case BPF_ALU | BPF_DIV | BPF_K: /* A /= K with K != 0*/ if (K == 1) break; emit_write_y(G0); @@ -512,7 +504,7 @@ void bpf_jit_compile(struct sk_filter *fp) #endif emit_alu_K(DIV, K); break; - case BPF_S_ALU_DIV_X: /* A /= X; */ + case BPF_ALU | BPF_DIV | BPF_X: /* A /= X; */ emit_cmpi(r_X, 0); if (pc_ret0 > 0) { t_offset = addrs[pc_ret0 - 1]; @@ -544,10 +536,10 @@ void bpf_jit_compile(struct sk_filter *fp) #endif emit_alu_X(DIV); break; - case BPF_S_ALU_NEG: + case BPF_ALU | BPF_NEG: emit_neg(); break; - case BPF_S_RET_K: + case BPF_RET | BPF_K: if (!K) { if (pc_ret0 == -1) pc_ret0 = i; @@ -556,7 +548,7 @@ void bpf_jit_compile(struct sk_filter *fp) emit_loadimm(K, r_A); } /* Fallthrough */ - case BPF_S_RET_A: + case BPF_RET | BPF_A: if (seen_or_pass0) { if (i != flen - 1) { emit_jump(cleanup_addr); @@ -573,18 +565,18 @@ void bpf_jit_compile(struct sk_filter *fp) emit_jmpl(r_saved_O7, 8, G0); emit_reg_move(r_A, O0); /* delay slot */ break; - case BPF_S_MISC_TAX: + case BPF_MISC | BPF_TAX: seen |= SEEN_XREG; emit_reg_move(r_A, r_X); break; - case BPF_S_MISC_TXA: + case BPF_MISC | BPF_TXA: seen |= SEEN_XREG; emit_reg_move(r_X, r_A); break; - case BPF_S_ANC_CPU: + case BPF_ANC | SKF_AD_CPU: emit_load_cpu(r_A); break; - case BPF_S_ANC_PROTOCOL: + case BPF_ANC | SKF_AD_PROTOCOL: emit_skb_load16(protocol, r_A); break; #if 0 @@ -592,38 +584,38 @@ void bpf_jit_compile(struct sk_filter *fp) * a bit field even though we very much * know what we are doing here. */ - case BPF_S_ANC_PKTTYPE: + case BPF_ANC | SKF_AD_PKTTYPE: __emit_skb_load8(pkt_type, r_A); emit_alu_K(SRL, 5); break; #endif - case BPF_S_ANC_IFINDEX: + case BPF_ANC | SKF_AD_IFINDEX: emit_skb_loadptr(dev, r_A); emit_cmpi(r_A, 0); - emit_branch(BNE_PTR, cleanup_addr + 4); + emit_branch(BE_PTR, cleanup_addr + 4); emit_nop(); emit_load32(r_A, struct net_device, ifindex, r_A); break; - case BPF_S_ANC_MARK: + case BPF_ANC | SKF_AD_MARK: emit_skb_load32(mark, r_A); break; - case BPF_S_ANC_QUEUE: + case BPF_ANC | SKF_AD_QUEUE: emit_skb_load16(queue_mapping, r_A); break; - case BPF_S_ANC_HATYPE: + case BPF_ANC | SKF_AD_HATYPE: emit_skb_loadptr(dev, r_A); emit_cmpi(r_A, 0); - emit_branch(BNE_PTR, cleanup_addr + 4); + emit_branch(BE_PTR, cleanup_addr + 4); emit_nop(); emit_load16(r_A, struct net_device, type, r_A); break; - case BPF_S_ANC_RXHASH: + case BPF_ANC | SKF_AD_RXHASH: emit_skb_load32(hash, r_A); break; - case BPF_S_ANC_VLAN_TAG: - case BPF_S_ANC_VLAN_TAG_PRESENT: + case BPF_ANC | SKF_AD_VLAN_TAG: + case BPF_ANC | SKF_AD_VLAN_TAG_PRESENT: emit_skb_load16(vlan_tci, r_A); - if (filter[i].code == BPF_S_ANC_VLAN_TAG) { + if (code == (BPF_ANC | SKF_AD_VLAN_TAG)) { emit_andi(r_A, VLAN_VID_MASK, r_A); } else { emit_loadimm(VLAN_TAG_PRESENT, r_TMP); @@ -631,44 +623,44 @@ void bpf_jit_compile(struct sk_filter *fp) } break; - case BPF_S_LD_IMM: + case BPF_LD | BPF_IMM: emit_loadimm(K, r_A); break; - case BPF_S_LDX_IMM: + case BPF_LDX | BPF_IMM: emit_loadimm(K, r_X); break; - case BPF_S_LD_MEM: + case BPF_LD | BPF_MEM: emit_ldmem(K * 4, r_A); break; - case BPF_S_LDX_MEM: + case BPF_LDX | BPF_MEM: emit_ldmem(K * 4, r_X); break; - case BPF_S_ST: + case BPF_ST: emit_stmem(K * 4, r_A); break; - case BPF_S_STX: + case BPF_STX: emit_stmem(K * 4, r_X); break; #define CHOOSE_LOAD_FUNC(K, func) \ ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset) - case BPF_S_LD_W_ABS: + case BPF_LD | BPF_W | BPF_ABS: func = CHOOSE_LOAD_FUNC(K, bpf_jit_load_word); common_load: seen |= SEEN_DATAREF; emit_loadimm(K, r_OFF); emit_call(func); break; - case BPF_S_LD_H_ABS: + case BPF_LD | BPF_H | BPF_ABS: func = CHOOSE_LOAD_FUNC(K, bpf_jit_load_half); goto common_load; - case BPF_S_LD_B_ABS: + case BPF_LD | BPF_B | BPF_ABS: func = CHOOSE_LOAD_FUNC(K, bpf_jit_load_byte); goto common_load; - case BPF_S_LDX_B_MSH: + case BPF_LDX | BPF_B | BPF_MSH: func = CHOOSE_LOAD_FUNC(K, bpf_jit_load_byte_msh); goto common_load; - case BPF_S_LD_W_IND: + case BPF_LD | BPF_W | BPF_IND: func = bpf_jit_load_word; common_load_ind: seen |= SEEN_DATAREF | SEEN_XREG; if (K) { @@ -683,13 +675,13 @@ common_load_ind: seen |= SEEN_DATAREF | SEEN_XREG; } emit_call(func); break; - case BPF_S_LD_H_IND: + case BPF_LD | BPF_H | BPF_IND: func = bpf_jit_load_half; goto common_load_ind; - case BPF_S_LD_B_IND: + case BPF_LD | BPF_B | BPF_IND: func = bpf_jit_load_byte; goto common_load_ind; - case BPF_S_JMP_JA: + case BPF_JMP | BPF_JA: emit_jump(addrs[i + K]); emit_nop(); break; @@ -700,14 +692,14 @@ common_load_ind: seen |= SEEN_DATAREF | SEEN_XREG; f_op = FOP; \ goto cond_branch - COND_SEL(BPF_S_JMP_JGT_K, BGU, BLEU); - COND_SEL(BPF_S_JMP_JGE_K, BGEU, BLU); - COND_SEL(BPF_S_JMP_JEQ_K, BE, BNE); - COND_SEL(BPF_S_JMP_JSET_K, BNE, BE); - COND_SEL(BPF_S_JMP_JGT_X, BGU, BLEU); - COND_SEL(BPF_S_JMP_JGE_X, BGEU, BLU); - COND_SEL(BPF_S_JMP_JEQ_X, BE, BNE); - COND_SEL(BPF_S_JMP_JSET_X, BNE, BE); + COND_SEL(BPF_JMP | BPF_JGT | BPF_K, BGU, BLEU); + COND_SEL(BPF_JMP | BPF_JGE | BPF_K, BGEU, BLU); + COND_SEL(BPF_JMP | BPF_JEQ | BPF_K, BE, BNE); + COND_SEL(BPF_JMP | BPF_JSET | BPF_K, BNE, BE); + COND_SEL(BPF_JMP | BPF_JGT | BPF_X, BGU, BLEU); + COND_SEL(BPF_JMP | BPF_JGE | BPF_X, BGEU, BLU); + COND_SEL(BPF_JMP | BPF_JEQ | BPF_X, BE, BNE); + COND_SEL(BPF_JMP | BPF_JSET | BPF_X, BNE, BE); cond_branch: f_offset = addrs[i + filter[i].jf]; t_offset = addrs[i + filter[i].jt]; @@ -719,20 +711,20 @@ cond_branch: f_offset = addrs[i + filter[i].jf]; break; } - switch (filter[i].code) { - case BPF_S_JMP_JGT_X: - case BPF_S_JMP_JGE_X: - case BPF_S_JMP_JEQ_X: + switch (code) { + case BPF_JMP | BPF_JGT | BPF_X: + case BPF_JMP | BPF_JGE | BPF_X: + case BPF_JMP | BPF_JEQ | BPF_X: seen |= SEEN_XREG; emit_cmp(r_A, r_X); break; - case BPF_S_JMP_JSET_X: + case BPF_JMP | BPF_JSET | BPF_X: seen |= SEEN_XREG; emit_btst(r_A, r_X); break; - case BPF_S_JMP_JEQ_K: - case BPF_S_JMP_JGT_K: - case BPF_S_JMP_JGE_K: + case BPF_JMP | BPF_JEQ | BPF_K: + case BPF_JMP | BPF_JGT | BPF_K: + case BPF_JMP | BPF_JGE | BPF_K: if (is_simm13(K)) { emit_cmpi(r_A, K); } else { @@ -740,7 +732,7 @@ cond_branch: f_offset = addrs[i + filter[i].jf]; emit_cmp(r_A, r_TMP); } break; - case BPF_S_JMP_JSET_K: + case BPF_JMP | BPF_JSET | BPF_K: if (is_simm13(K)) { emit_btsti(r_A, K); } else { diff --git a/arch/tile/include/asm/thread_info.h b/arch/tile/include/asm/thread_info.h index d767ff9..48e4fd0 100644 --- a/arch/tile/include/asm/thread_info.h +++ b/arch/tile/include/asm/thread_info.h @@ -94,7 +94,7 @@ register unsigned long stack_pointer __asm__("sp"); /* Sit on a nap instruction until interrupted. */ extern void smp_nap(void); -/* Enable interrupts racelessly and nap forever: helper for cpu_idle(). */ +/* Enable interrupts racelessly and nap forever: helper for arch_cpu_idle(). */ extern void _cpu_idle(void); #else /* __ASSEMBLY__ */ diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c index 74c9172..112abab 100644 --- a/arch/tile/kernel/setup.c +++ b/arch/tile/kernel/setup.c @@ -228,13 +228,10 @@ early_param("isolnodes", setup_isolnodes); #if defined(CONFIG_PCI) && !defined(__tilegx__) static int __init setup_pci_reserve(char* str) { - unsigned long mb; - - if (str == NULL || strict_strtoul(str, 0, &mb) != 0 || - mb > 3 * 1024) + if (str == NULL || kstrtouint(str, 0, &pci_reserve_mb) != 0 || + pci_reserve_mb > 3 * 1024) return -EINVAL; - pci_reserve_mb = mb; pr_info("Reserving %dMB for PCIE root complex mappings\n", pci_reserve_mb); return 0; @@ -691,7 +688,7 @@ static void __init setup_bootmem_allocator(void) /* Reserve any memory excluded by "memmap" arguments. */ for (i = 0; i < memmap_nr; ++i) { struct memmap_entry *m = &memmap_map[i]; - reserve_bootmem(m->addr, m->size, 0); + reserve_bootmem(m->addr, m->size, BOOTMEM_DEFAULT); } #ifdef CONFIG_BLK_DEV_INITRD @@ -715,7 +712,8 @@ static void __init setup_bootmem_allocator(void) #ifdef CONFIG_KEXEC if (crashk_res.start != crashk_res.end) - reserve_bootmem(crashk_res.start, resource_size(&crashk_res), 0); + reserve_bootmem(crashk_res.start, resource_size(&crashk_res), + BOOTMEM_DEFAULT); #endif } diff --git a/arch/tile/kernel/signal.c b/arch/tile/kernel/signal.c index 2d1dbf3..d1d026f 100644 --- a/arch/tile/kernel/signal.c +++ b/arch/tile/kernel/signal.c @@ -321,14 +321,13 @@ int show_unhandled_signals = 1; static int __init crashinfo(char *str) { - unsigned long val; const char *word; if (*str == '\0') - val = 2; - else if (*str != '=' || strict_strtoul(++str, 0, &val) != 0) + show_unhandled_signals = 2; + else if (*str != '=' || kstrtoint(++str, 0, &show_unhandled_signals) != 0) return 0; - show_unhandled_signals = val; + switch (show_unhandled_signals) { case 0: word = "No"; diff --git a/arch/tile/kernel/traps.c b/arch/tile/kernel/traps.c index 6b603d5..f3ceb63 100644 --- a/arch/tile/kernel/traps.c +++ b/arch/tile/kernel/traps.c @@ -42,10 +42,9 @@ static int __init setup_unaligned_fixup(char *str) * will still parse the instruction, then fire a SIGBUS with * the correct address from inside the single_step code. */ - long val; - if (strict_strtol(str, 0, &val) != 0) + if (kstrtoint(str, 0, &unaligned_fixup) != 0) return 0; - unaligned_fixup = val; + pr_info("Fixups for unaligned data accesses are %s\n", unaligned_fixup >= 0 ? (unaligned_fixup ? "enabled" : "disabled") : diff --git a/arch/tile/kernel/unaligned.c b/arch/tile/kernel/unaligned.c index b030b4e..c02ea2a 100644 --- a/arch/tile/kernel/unaligned.c +++ b/arch/tile/kernel/unaligned.c @@ -182,18 +182,7 @@ static void find_regs(tilegx_bundle_bits bundle, uint64_t *rd, uint64_t *ra, int i; uint64_t reg; uint64_t reg_map = 0, alias_reg_map = 0, map; - bool alias; - - *ra = -1; - *rb = -1; - - if (rd) - *rd = -1; - - *clob1 = -1; - *clob2 = -1; - *clob3 = -1; - alias = false; + bool alias = false; /* * Parse fault bundle, find potential used registers and mark @@ -569,7 +558,7 @@ void jit_bundle_gen(struct pt_regs *regs, tilegx_bundle_bits bundle, tilegx_bundle_bits bundle_2 = 0; /* If bundle_2_enable = false, bundle_2 is fnop/nop operation. */ bool bundle_2_enable = true; - uint64_t ra, rb, rd = -1, clob1, clob2, clob3; + uint64_t ra = -1, rb = -1, rd = -1, clob1 = -1, clob2 = -1, clob3 = -1; /* * Indicate if the unalign access * instruction's registers hit with diff --git a/arch/tile/mm/init.c b/arch/tile/mm/init.c index 0fa1acf..bfb3127 100644 --- a/arch/tile/mm/init.c +++ b/arch/tile/mm/init.c @@ -273,9 +273,9 @@ static pgprot_t __init init_pgprot(ulong address) /* * Otherwise we just hand out consecutive cpus. To avoid * requiring this function to hold state, we just walk forward from - * _sdata by PAGE_SIZE, skipping the readonly and init data, to reach - * the requested address, while walking cpu home around kdata_mask. - * This is typically no more than a dozen or so iterations. + * __end_rodata by PAGE_SIZE, skipping the readonly and init data, to + * reach the requested address, while walking cpu home around + * kdata_mask. This is typically no more than a dozen or so iterations. */ page = (((ulong)__end_rodata) + PAGE_SIZE - 1) & PAGE_MASK; BUG_ON(address < page || address >= (ulong)_end); @@ -912,7 +912,7 @@ static long __write_once initfree = 1; static int __init set_initfree(char *str) { long val; - if (strict_strtol(str, 0, &val) == 0) { + if (kstrtol(str, 0, &val) == 0) { initfree = val; pr_info("initfree: %s free init pages\n", initfree ? "will" : "won't"); diff --git a/arch/um/Makefile b/arch/um/Makefile index 36e658a..e4b1a96 100644 --- a/arch/um/Makefile +++ b/arch/um/Makefile @@ -111,8 +111,7 @@ endef KBUILD_KCONFIG := $(HOST_DIR)/um/Kconfig archheaders: - $(Q)$(MAKE) -C '$(srctree)' KBUILD_SRC= \ - ARCH=$(HEADER_ARCH) O='$(objtree)' archheaders + $(Q)$(MAKE) KBUILD_SRC= ARCH=$(HEADER_ARCH) archheaders archprepare: include/generated/user_constants.h diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index b660088..fcefdda 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -121,6 +121,7 @@ config X86 select MODULES_USE_ELF_RELA if X86_64 select CLONE_BACKWARDS if X86_32 select ARCH_USE_BUILTIN_BSWAP + select ARCH_USE_QUEUE_RWLOCK select OLD_SIGSUSPEND3 if X86_32 || IA32_EMULATION select OLD_SIGACTION if X86_32 select COMPAT_OLD_SIGACTION if IA32_EMULATION diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h index 4582e8e..7730c1c 100644 --- a/arch/x86/include/asm/asm.h +++ b/arch/x86/include/asm/asm.h @@ -57,6 +57,12 @@ .long (from) - . ; \ .long (to) - . + 0x7ffffff0 ; \ .popsection + +# define _ASM_NOKPROBE(entry) \ + .pushsection "_kprobe_blacklist","aw" ; \ + _ASM_ALIGN ; \ + _ASM_PTR (entry); \ + .popsection #else # define _ASM_EXTABLE(from,to) \ " .pushsection \"__ex_table\",\"a\"\n" \ @@ -71,6 +77,7 @@ " .long (" #from ") - .\n" \ " .long (" #to ") - . + 0x7ffffff0\n" \ " .popsection\n" +/* For C file, we already have NOKPROBE_SYMBOL macro */ #endif #endif /* _ASM_X86_ASM_H */ diff --git a/arch/x86/include/asm/checksum_64.h b/arch/x86/include/asm/checksum_64.h index e6fd8a0..cd00e17 100644 --- a/arch/x86/include/asm/checksum_64.h +++ b/arch/x86/include/asm/checksum_64.h @@ -184,8 +184,15 @@ static inline unsigned add32_with_carry(unsigned a, unsigned b) asm("addl %2,%0\n\t" "adcl $0,%0" : "=r" (a) - : "0" (a), "r" (b)); + : "0" (a), "rm" (b)); return a; } +#define HAVE_ARCH_CSUM_ADD +static inline __wsum csum_add(__wsum csum, __wsum addend) +{ + return (__force __wsum)add32_with_carry((__force unsigned)csum, + (__force unsigned)addend); +} + #endif /* _ASM_X86_CHECKSUM_64_H */ diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h index 9454c16..53cdfb2 100644 --- a/arch/x86/include/asm/kprobes.h +++ b/arch/x86/include/asm/kprobes.h @@ -116,4 +116,6 @@ struct kprobe_ctlblk { extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr); extern int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, void *data); +extern int kprobe_int3_handler(struct pt_regs *regs); +extern int kprobe_debug_handler(struct pt_regs *regs); #endif /* _ASM_X86_KPROBES_H */ diff --git a/arch/x86/include/asm/qrwlock.h b/arch/x86/include/asm/qrwlock.h new file mode 100644 index 0000000..70f46f0 --- /dev/null +++ b/arch/x86/include/asm/qrwlock.h @@ -0,0 +1,17 @@ +#ifndef _ASM_X86_QRWLOCK_H +#define _ASM_X86_QRWLOCK_H + +#include <asm-generic/qrwlock_types.h> + +#if !defined(CONFIG_X86_OOSTORE) && !defined(CONFIG_X86_PPRO_FENCE) +#define queue_write_unlock queue_write_unlock +static inline void queue_write_unlock(struct qrwlock *lock) +{ + barrier(); + ACCESS_ONCE(*(u8 *)&lock->cnts) = 0; +} +#endif + +#include <asm-generic/qrwlock.h> + +#endif /* _ASM_X86_QRWLOCK_H */ diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h index 0f62f54..54f1c80 100644 --- a/arch/x86/include/asm/spinlock.h +++ b/arch/x86/include/asm/spinlock.h @@ -187,6 +187,7 @@ static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) cpu_relax(); } +#ifndef CONFIG_QUEUE_RWLOCK /* * Read-write spinlocks, allowing multiple readers * but only one writer. @@ -269,6 +270,9 @@ static inline void arch_write_unlock(arch_rwlock_t *rw) asm volatile(LOCK_PREFIX WRITE_LOCK_ADD(%1) "%0" : "+m" (rw->write) : "i" (RW_LOCK_BIAS) : "memory"); } +#else +#include <asm/qrwlock.h> +#endif /* CONFIG_QUEUE_RWLOCK */ #define arch_read_lock_flags(lock, flags) arch_read_lock(lock) #define arch_write_lock_flags(lock, flags) arch_write_lock(lock) diff --git a/arch/x86/include/asm/spinlock_types.h b/arch/x86/include/asm/spinlock_types.h index 4f1bea1..73c4c00 100644 --- a/arch/x86/include/asm/spinlock_types.h +++ b/arch/x86/include/asm/spinlock_types.h @@ -34,6 +34,10 @@ typedef struct arch_spinlock { #define __ARCH_SPIN_LOCK_UNLOCKED { { 0 } } +#ifdef CONFIG_QUEUE_RWLOCK +#include <asm-generic/qrwlock_types.h> +#else #include <asm/rwlock.h> +#endif #endif /* _ASM_X86_SPINLOCK_TYPES_H */ diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 8ba1884..bc8352e 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -68,7 +68,7 @@ dotraplinkage void do_segment_not_present(struct pt_regs *, long); dotraplinkage void do_stack_segment(struct pt_regs *, long); #ifdef CONFIG_X86_64 dotraplinkage void do_double_fault(struct pt_regs *, long); -asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *); +asmlinkage struct pt_regs *sync_regs(struct pt_regs *); #endif dotraplinkage void do_general_protection(struct pt_regs *, long); dotraplinkage void do_page_fault(struct pt_regs *, unsigned long); @@ -103,7 +103,6 @@ static inline int get_si_code(unsigned long condition) extern int panic_on_unrecovered_nmi; -void math_error(struct pt_regs *, int, int); void math_emulate(struct math_emu_info *); #ifndef CONFIG_X86_32 asmlinkage void smp_thermal_interrupt(void); diff --git a/arch/x86/include/asm/uprobes.h b/arch/x86/include/asm/uprobes.h index 93bee7b..74f4c2f 100644 --- a/arch/x86/include/asm/uprobes.h +++ b/arch/x86/include/asm/uprobes.h @@ -41,18 +41,18 @@ struct arch_uprobe { u8 ixol[MAX_UINSN_BYTES]; }; - u16 fixups; const struct uprobe_xol_ops *ops; union { -#ifdef CONFIG_X86_64 - unsigned long rip_rela_target_address; -#endif struct { s32 offs; u8 ilen; u8 opc1; - } branch; + } branch; + struct { + u8 fixups; + u8 ilen; + } defparam; }; }; diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index df94598..703130f 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -5,7 +5,6 @@ #include <linux/mutex.h> #include <linux/list.h> #include <linux/stringify.h> -#include <linux/kprobes.h> #include <linux/mm.h> #include <linux/vmalloc.h> #include <linux/memory.h> @@ -551,7 +550,7 @@ void *__init_or_module text_poke_early(void *addr, const void *opcode, * * Note: Must be called under text_mutex. */ -void *__kprobes text_poke(void *addr, const void *opcode, size_t len) +void *text_poke(void *addr, const void *opcode, size_t len) { unsigned long flags; char *vaddr; diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c index eab6704..c3fcb5d 100644 --- a/arch/x86/kernel/apic/hw_nmi.c +++ b/arch/x86/kernel/apic/hw_nmi.c @@ -60,7 +60,7 @@ void arch_trigger_all_cpu_backtrace(void) smp_mb__after_atomic(); } -static int __kprobes +static int arch_trigger_all_cpu_backtrace_handler(unsigned int cmd, struct pt_regs *regs) { int cpu; @@ -80,6 +80,7 @@ arch_trigger_all_cpu_backtrace_handler(unsigned int cmd, struct pt_regs *regs) return NMI_DONE; } +NOKPROBE_SYMBOL(arch_trigger_all_cpu_backtrace_handler); static int __init register_trigger_all_cpu_backtrace(void) { diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 9d0a979..81e08ef 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2297,7 +2297,7 @@ int __ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, int err; if (!config_enabled(CONFIG_SMP)) - return -1; + return -EPERM; if (!cpumask_intersects(mask, cpu_online_mask)) return -EINVAL; @@ -2328,7 +2328,7 @@ int native_ioapic_set_affinity(struct irq_data *data, int ret; if (!config_enabled(CONFIG_SMP)) - return -1; + return -EPERM; raw_spin_lock_irqsave(&ioapic_lock, flags); ret = __ioapic_set_affinity(data, mask, &dest); @@ -3001,9 +3001,11 @@ msi_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force) struct irq_cfg *cfg = data->chip_data; struct msi_msg msg; unsigned int dest; + int ret; - if (__ioapic_set_affinity(data, mask, &dest)) - return -1; + ret = __ioapic_set_affinity(data, mask, &dest); + if (ret) + return ret; __get_cached_msi_msg(data->msi_desc, &msg); @@ -3100,9 +3102,11 @@ dmar_msi_set_affinity(struct irq_data *data, const struct cpumask *mask, struct irq_cfg *cfg = data->chip_data; unsigned int dest, irq = data->irq; struct msi_msg msg; + int ret; - if (__ioapic_set_affinity(data, mask, &dest)) - return -1; + ret = __ioapic_set_affinity(data, mask, &dest); + if (ret) + return ret; dmar_msi_read(irq, &msg); @@ -3149,9 +3153,11 @@ static int hpet_msi_set_affinity(struct irq_data *data, struct irq_cfg *cfg = data->chip_data; struct msi_msg msg; unsigned int dest; + int ret; - if (__ioapic_set_affinity(data, mask, &dest)) - return -1; + ret = __ioapic_set_affinity(data, mask, &dest); + if (ret) + return ret; hpet_msi_read(data->handler_data, &msg); @@ -3218,9 +3224,11 @@ ht_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force) { struct irq_cfg *cfg = data->chip_data; unsigned int dest; + int ret; - if (__ioapic_set_affinity(data, mask, &dest)) - return -1; + ret = __ioapic_set_affinity(data, mask, &dest); + if (ret) + return ret; target_ht_irq(data->irq, dest, cfg->vector); return IRQ_SET_MASK_OK_NOCOPY; diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 2cbbf88..ef1b93f 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -8,6 +8,7 @@ #include <linux/delay.h> #include <linux/sched.h> #include <linux/init.h> +#include <linux/kprobes.h> #include <linux/kgdb.h> #include <linux/smp.h> #include <linux/io.h> @@ -1193,6 +1194,7 @@ int is_debug_stack(unsigned long addr) (addr <= __get_cpu_var(debug_stack_addr) && addr > (__get_cpu_var(debug_stack_addr) - DEBUG_STKSZ)); } +NOKPROBE_SYMBOL(is_debug_stack); DEFINE_PER_CPU(u32, debug_idt_ctr); @@ -1201,6 +1203,7 @@ void debug_stack_set_zero(void) this_cpu_inc(debug_idt_ctr); load_current_idt(); } +NOKPROBE_SYMBOL(debug_stack_set_zero); void debug_stack_reset(void) { @@ -1209,6 +1212,7 @@ void debug_stack_reset(void) if (this_cpu_dec_return(debug_idt_ctr) == 0) load_current_idt(); } +NOKPROBE_SYMBOL(debug_stack_reset); #else /* CONFIG_X86_64 */ diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 76f98fe..a450373 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -132,15 +132,6 @@ static void __init ms_hyperv_init_platform(void) lapic_timer_frequency = hv_lapic_frequency; printk(KERN_INFO "HyperV: LAPIC Timer Frequency: %#x\n", lapic_timer_frequency); - - /* - * On Hyper-V, when we are booting off an EFI firmware stack, - * we do not have many legacy devices including PIC, PIT etc. - */ - if (efi_enabled(EFI_BOOT)) { - printk(KERN_INFO "HyperV: Using null_legacy_pic\n"); - legacy_pic = &null_legacy_pic; - } } #endif diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 89f3b7c..2bdfbff 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -303,15 +303,6 @@ int x86_setup_perfctr(struct perf_event *event) hwc->sample_period = x86_pmu.max_period; hwc->last_period = hwc->sample_period; local64_set(&hwc->period_left, hwc->sample_period); - } else { - /* - * If we have a PMU initialized but no APIC - * interrupts, we cannot sample hardware - * events (user-space has to fall back and - * sample via a hrtimer based software event): - */ - if (!x86_pmu.apic) - return -EOPNOTSUPP; } if (attr->type == PERF_TYPE_RAW) @@ -1293,7 +1284,7 @@ void perf_events_lapic_init(void) apic_write(APIC_LVTPC, APIC_DM_NMI); } -static int __kprobes +static int perf_event_nmi_handler(unsigned int cmd, struct pt_regs *regs) { u64 start_clock; @@ -1311,6 +1302,7 @@ perf_event_nmi_handler(unsigned int cmd, struct pt_regs *regs) return ret; } +NOKPROBE_SYMBOL(perf_event_nmi_handler); struct event_constraint emptyconstraint; struct event_constraint unconstrained; @@ -1366,6 +1358,15 @@ static void __init pmu_check_apic(void) x86_pmu.apic = 0; pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n"); pr_info("no hardware sampling interrupt available.\n"); + + /* + * If we have a PMU initialized but no APIC + * interrupts, we cannot sample hardware + * events (user-space has to fall back and + * sample via a hrtimer based software event): + */ + pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; + } static struct attribute_group x86_pmu_format_group = { diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/kernel/cpu/perf_event_amd_ibs.c index 4c36bbe..cbb1be3e 100644 --- a/arch/x86/kernel/cpu/perf_event_amd_ibs.c +++ b/arch/x86/kernel/cpu/perf_event_amd_ibs.c @@ -593,7 +593,7 @@ out: return 1; } -static int __kprobes +static int perf_ibs_nmi_handler(unsigned int cmd, struct pt_regs *regs) { int handled = 0; @@ -606,6 +606,7 @@ perf_ibs_nmi_handler(unsigned int cmd, struct pt_regs *regs) return handled; } +NOKPROBE_SYMBOL(perf_ibs_nmi_handler); static __init int perf_ibs_pmu_init(struct perf_ibs *perf_ibs, char *name) { diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c index d82d155..9dd2459 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c +++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c @@ -384,6 +384,9 @@ static void intel_pmu_setup_sw_lbr_filter(struct perf_event *event) if (br_type & PERF_SAMPLE_BRANCH_NO_TX) mask |= X86_BR_NO_TX; + if (br_type & PERF_SAMPLE_BRANCH_COND) + mask |= X86_BR_JCC; + /* * stash actual user request into reg, it may * be used by fixup code for some CPU @@ -678,6 +681,7 @@ static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = { * NHM/WSM erratum: must include IND_JMP to capture IND_CALL */ [PERF_SAMPLE_BRANCH_IND_CALL] = LBR_IND_CALL | LBR_IND_JMP, + [PERF_SAMPLE_BRANCH_COND] = LBR_JCC, }; static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = { @@ -689,6 +693,7 @@ static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = { [PERF_SAMPLE_BRANCH_ANY_CALL] = LBR_REL_CALL | LBR_IND_CALL | LBR_FAR, [PERF_SAMPLE_BRANCH_IND_CALL] = LBR_IND_CALL, + [PERF_SAMPLE_BRANCH_COND] = LBR_JCC, }; /* core */ diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index d9c12d3..b74ebc7 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -200,7 +200,7 @@ static arch_spinlock_t die_lock = __ARCH_SPIN_LOCK_UNLOCKED; static int die_owner = -1; static unsigned int die_nest_count; -unsigned __kprobes long oops_begin(void) +unsigned long oops_begin(void) { int cpu; unsigned long flags; @@ -223,8 +223,9 @@ unsigned __kprobes long oops_begin(void) return flags; } EXPORT_SYMBOL_GPL(oops_begin); +NOKPROBE_SYMBOL(oops_begin); -void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr) +void oops_end(unsigned long flags, struct pt_regs *regs, int signr) { if (regs && kexec_should_crash(current)) crash_kexec(regs); @@ -247,8 +248,9 @@ void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr) panic("Fatal exception"); do_exit(signr); } +NOKPROBE_SYMBOL(oops_end); -int __kprobes __die(const char *str, struct pt_regs *regs, long err) +int __die(const char *str, struct pt_regs *regs, long err) { #ifdef CONFIG_X86_32 unsigned short ss; @@ -291,6 +293,7 @@ int __kprobes __die(const char *str, struct pt_regs *regs, long err) #endif return 0; } +NOKPROBE_SYMBOL(__die); /* * This is gone through when something in the kernel has done something bad diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index 6cda0ba..2e1a685 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c @@ -419,7 +419,7 @@ static size_t __init gen6_stolen_size(int num, int slot, int func) return gmch_ctrl << 25; /* 32 MB units */ } -static size_t gen8_stolen_size(int num, int slot, int func) +static size_t __init gen8_stolen_size(int num, int slot, int func) { u16 gmch_ctrl; @@ -429,48 +429,73 @@ static size_t gen8_stolen_size(int num, int slot, int func) return gmch_ctrl << 25; /* 32 MB units */ } +static size_t __init chv_stolen_size(int num, int slot, int func) +{ + u16 gmch_ctrl; + + gmch_ctrl = read_pci_config_16(num, slot, func, SNB_GMCH_CTRL); + gmch_ctrl >>= SNB_GMCH_GMS_SHIFT; + gmch_ctrl &= SNB_GMCH_GMS_MASK; + + /* + * 0x0 to 0x10: 32MB increments starting at 0MB + * 0x11 to 0x16: 4MB increments starting at 8MB + * 0x17 to 0x1d: 4MB increments start at 36MB + */ + if (gmch_ctrl < 0x11) + return gmch_ctrl << 25; + else if (gmch_ctrl < 0x17) + return (gmch_ctrl - 0x11 + 2) << 22; + else + return (gmch_ctrl - 0x17 + 9) << 22; +} struct intel_stolen_funcs { size_t (*size)(int num, int slot, int func); u32 (*base)(int num, int slot, int func, size_t size); }; -static const struct intel_stolen_funcs i830_stolen_funcs = { +static const struct intel_stolen_funcs i830_stolen_funcs __initconst = { .base = i830_stolen_base, .size = i830_stolen_size, }; -static const struct intel_stolen_funcs i845_stolen_funcs = { +static const struct intel_stolen_funcs i845_stolen_funcs __initconst = { .base = i845_stolen_base, .size = i830_stolen_size, }; -static const struct intel_stolen_funcs i85x_stolen_funcs = { +static const struct intel_stolen_funcs i85x_stolen_funcs __initconst = { .base = i85x_stolen_base, .size = gen3_stolen_size, }; -static const struct intel_stolen_funcs i865_stolen_funcs = { +static const struct intel_stolen_funcs i865_stolen_funcs __initconst = { .base = i865_stolen_base, .size = gen3_stolen_size, }; -static const struct intel_stolen_funcs gen3_stolen_funcs = { +static const struct intel_stolen_funcs gen3_stolen_funcs __initconst = { .base = intel_stolen_base, .size = gen3_stolen_size, }; -static const struct intel_stolen_funcs gen6_stolen_funcs = { +static const struct intel_stolen_funcs gen6_stolen_funcs __initconst = { .base = intel_stolen_base, .size = gen6_stolen_size, }; -static const struct intel_stolen_funcs gen8_stolen_funcs = { +static const struct intel_stolen_funcs gen8_stolen_funcs __initconst = { .base = intel_stolen_base, .size = gen8_stolen_size, }; -static struct pci_device_id intel_stolen_ids[] __initdata = { +static const struct intel_stolen_funcs chv_stolen_funcs __initconst = { + .base = intel_stolen_base, + .size = chv_stolen_size, +}; + +static const struct pci_device_id intel_stolen_ids[] __initconst = { INTEL_I830_IDS(&i830_stolen_funcs), INTEL_I845G_IDS(&i845_stolen_funcs), INTEL_I85X_IDS(&i85x_stolen_funcs), @@ -496,7 +521,8 @@ static struct pci_device_id intel_stolen_ids[] __initdata = { INTEL_HSW_D_IDS(&gen6_stolen_funcs), INTEL_HSW_M_IDS(&gen6_stolen_funcs), INTEL_BDW_M_IDS(&gen8_stolen_funcs), - INTEL_BDW_D_IDS(&gen8_stolen_funcs) + INTEL_BDW_D_IDS(&gen8_stolen_funcs), + INTEL_CHV_IDS(&chv_stolen_funcs), }; static void __init intel_graphics_stolen(int num, int slot, int func) diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 98313ff..f0da82b 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -315,10 +315,6 @@ ENTRY(ret_from_kernel_thread) ENDPROC(ret_from_kernel_thread) /* - * Interrupt exit functions should be protected against kprobes - */ - .pushsection .kprobes.text, "ax" -/* * Return to user mode is not as complex as all this looks, * but we want the default path for a system call return to * go as quickly as possible which is why some of this is @@ -372,10 +368,6 @@ need_resched: END(resume_kernel) #endif CFI_ENDPROC -/* - * End of kprobes section - */ - .popsection /* SYSENTER_RETURN points to after the "sysenter" instruction in the vsyscall page. See vsyscall-sysentry.S, which defines the symbol. */ @@ -495,10 +487,6 @@ sysexit_audit: PTGS_TO_GS_EX ENDPROC(ia32_sysenter_target) -/* - * syscall stub including irq exit should be protected against kprobes - */ - .pushsection .kprobes.text, "ax" # system call handler stub ENTRY(system_call) RING0_INT_FRAME # can't unwind into user space anyway @@ -690,10 +678,6 @@ syscall_badsys: jmp resume_userspace END(syscall_badsys) CFI_ENDPROC -/* - * End of kprobes section - */ - .popsection .macro FIXUP_ESPFIX_STACK /* @@ -784,10 +768,6 @@ common_interrupt: ENDPROC(common_interrupt) CFI_ENDPROC -/* - * Irq entries should be protected against kprobes - */ - .pushsection .kprobes.text, "ax" #define BUILD_INTERRUPT3(name, nr, fn) \ ENTRY(name) \ RING0_INT_FRAME; \ @@ -964,10 +944,6 @@ ENTRY(spurious_interrupt_bug) jmp error_code CFI_ENDPROC END(spurious_interrupt_bug) -/* - * End of kprobes section - */ - .popsection #ifdef CONFIG_XEN /* Xen doesn't set %esp to be precisely what the normal sysenter @@ -1242,11 +1218,6 @@ return_to_handler: jmp *%ecx #endif -/* - * Some functions should be protected against kprobes - */ - .pushsection .kprobes.text, "ax" - #ifdef CONFIG_TRACING ENTRY(trace_page_fault) RING0_EC_FRAME @@ -1460,7 +1431,3 @@ ENTRY(async_page_fault) END(async_page_fault) #endif -/* - * End of kprobes section - */ - .popsection diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 48a2644..b25ca96 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -284,8 +284,6 @@ ENDPROC(native_usergs_sysret64) TRACE_IRQS_OFF .endm -/* save complete stack frame */ - .pushsection .kprobes.text, "ax" ENTRY(save_paranoid) XCPT_FRAME 1 RDI+8 cld @@ -314,7 +312,6 @@ ENTRY(save_paranoid) 1: ret CFI_ENDPROC END(save_paranoid) - .popsection /* * A newly forked process directly context switches into this address. @@ -772,10 +769,6 @@ END(interrupt) call \func .endm -/* - * Interrupt entry/exit should be protected against kprobes - */ - .pushsection .kprobes.text, "ax" /* * The interrupt stubs push (~vector+0x80) onto the stack and * then jump to common_interrupt. @@ -983,11 +976,6 @@ END(__do_double_fault) #endif /* - * End of kprobes section - */ - .popsection - -/* * APIC interrupts. */ .macro apicinterrupt3 num sym do_sym @@ -1321,11 +1309,6 @@ apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \ hyperv_callback_vector hyperv_vector_handler #endif /* CONFIG_HYPERV */ -/* - * Some functions should be protected against kprobes - */ - .pushsection .kprobes.text, "ax" - idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK idtentry int3 do_int3 has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK idtentry stack_segment do_stack_segment has_error_code=1 paranoid=1 @@ -1742,7 +1725,3 @@ ENTRY(ignore_sysret) CFI_ENDPROC END(ignore_sysret) -/* - * End of kprobes section - */ - .popsection diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index a67b47c..5f9cf20 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -32,7 +32,6 @@ #include <linux/irqflags.h> #include <linux/notifier.h> #include <linux/kallsyms.h> -#include <linux/kprobes.h> #include <linux/percpu.h> #include <linux/kdebug.h> #include <linux/kernel.h> @@ -424,7 +423,7 @@ EXPORT_SYMBOL_GPL(hw_breakpoint_restore); * NOTIFY_STOP returned for all other cases * */ -static int __kprobes hw_breakpoint_handler(struct die_args *args) +static int hw_breakpoint_handler(struct die_args *args) { int i, cpu, rc = NOTIFY_STOP; struct perf_event *bp; @@ -511,7 +510,7 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args) /* * Handle debug exception notifications. */ -int __kprobes hw_breakpoint_exceptions_notify( +int hw_breakpoint_exceptions_notify( struct notifier_block *unused, unsigned long val, void *data) { if (val != DIE_DEBUG) diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c index 2e977b5..8af81710 100644 --- a/arch/x86/kernel/i8259.c +++ b/arch/x86/kernel/i8259.c @@ -299,13 +299,31 @@ static void unmask_8259A(void) static void init_8259A(int auto_eoi) { unsigned long flags; + unsigned char probe_val = ~(1 << PIC_CASCADE_IR); + unsigned char new_val; i8259A_auto_eoi = auto_eoi; raw_spin_lock_irqsave(&i8259A_lock, flags); - outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ + /* + * Check to see if we have a PIC. + * Mask all except the cascade and read + * back the value we just wrote. If we don't + * have a PIC, we will read 0xff as opposed to the + * value we wrote. + */ outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */ + outb(probe_val, PIC_MASTER_IMR); + new_val = inb(PIC_MASTER_IMR); + if (new_val != probe_val) { + printk(KERN_INFO "Using NULL legacy PIC\n"); + legacy_pic = &null_legacy_pic; + raw_spin_unlock_irqrestore(&i8259A_lock, flags); + return; + } + + outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ /* * outb_pic - this has to work on a wide range of PC hardware. diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 11ccfb0..922d285 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -365,6 +365,7 @@ void fixup_irqs(void) struct irq_desc *desc; struct irq_data *data; struct irq_chip *chip; + int ret; for_each_irq_desc(irq, desc) { int break_affinity = 0; @@ -403,10 +404,14 @@ void fixup_irqs(void) if (!irqd_can_move_in_process_context(data) && chip->irq_mask) chip->irq_mask(data); - if (chip->irq_set_affinity) - chip->irq_set_affinity(data, affinity, true); - else if (!(warned++)) - set_affinity = 0; + if (chip->irq_set_affinity) { + ret = chip->irq_set_affinity(data, affinity, true); + if (ret == -ENOSPC) + pr_crit("IRQ %d set affinity failed because there are no available vectors. The device assigned to this IRQ is unstable.\n", irq); + } else { + if (!(warned++)) + set_affinity = 0; + } /* * We unmask if the irq was not marked masked by the diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 61b17dc..7596df6 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -112,7 +112,8 @@ struct kretprobe_blackpoint kretprobe_blacklist[] = { const int kretprobe_blacklist_size = ARRAY_SIZE(kretprobe_blacklist); -static void __kprobes __synthesize_relative_insn(void *from, void *to, u8 op) +static nokprobe_inline void +__synthesize_relative_insn(void *from, void *to, u8 op) { struct __arch_relative_insn { u8 op; @@ -125,21 +126,23 @@ static void __kprobes __synthesize_relative_insn(void *from, void *to, u8 op) } /* Insert a jump instruction at address 'from', which jumps to address 'to'.*/ -void __kprobes synthesize_reljump(void *from, void *to) +void synthesize_reljump(void *from, void *to) { __synthesize_relative_insn(from, to, RELATIVEJUMP_OPCODE); } +NOKPROBE_SYMBOL(synthesize_reljump); /* Insert a call instruction at address 'from', which calls address 'to'.*/ -void __kprobes synthesize_relcall(void *from, void *to) +void synthesize_relcall(void *from, void *to) { __synthesize_relative_insn(from, to, RELATIVECALL_OPCODE); } +NOKPROBE_SYMBOL(synthesize_relcall); /* * Skip the prefixes of the instruction. */ -static kprobe_opcode_t *__kprobes skip_prefixes(kprobe_opcode_t *insn) +static kprobe_opcode_t *skip_prefixes(kprobe_opcode_t *insn) { insn_attr_t attr; @@ -154,12 +157,13 @@ static kprobe_opcode_t *__kprobes skip_prefixes(kprobe_opcode_t *insn) #endif return insn; } +NOKPROBE_SYMBOL(skip_prefixes); /* * Returns non-zero if opcode is boostable. * RIP relative instructions are adjusted at copying time in 64 bits mode */ -int __kprobes can_boost(kprobe_opcode_t *opcodes) +int can_boost(kprobe_opcode_t *opcodes) { kprobe_opcode_t opcode; kprobe_opcode_t *orig_opcodes = opcodes; @@ -260,7 +264,7 @@ unsigned long recover_probed_instruction(kprobe_opcode_t *buf, unsigned long add } /* Check if paddr is at an instruction boundary */ -static int __kprobes can_probe(unsigned long paddr) +static int can_probe(unsigned long paddr) { unsigned long addr, __addr, offset = 0; struct insn insn; @@ -299,7 +303,7 @@ static int __kprobes can_probe(unsigned long paddr) /* * Returns non-zero if opcode modifies the interrupt flag. */ -static int __kprobes is_IF_modifier(kprobe_opcode_t *insn) +static int is_IF_modifier(kprobe_opcode_t *insn) { /* Skip prefixes */ insn = skip_prefixes(insn); @@ -322,7 +326,7 @@ static int __kprobes is_IF_modifier(kprobe_opcode_t *insn) * If not, return null. * Only applicable to 64-bit x86. */ -int __kprobes __copy_instruction(u8 *dest, u8 *src) +int __copy_instruction(u8 *dest, u8 *src) { struct insn insn; kprobe_opcode_t buf[MAX_INSN_SIZE]; @@ -365,7 +369,7 @@ int __kprobes __copy_instruction(u8 *dest, u8 *src) return insn.length; } -static int __kprobes arch_copy_kprobe(struct kprobe *p) +static int arch_copy_kprobe(struct kprobe *p) { int ret; @@ -392,7 +396,7 @@ static int __kprobes arch_copy_kprobe(struct kprobe *p) return 0; } -int __kprobes arch_prepare_kprobe(struct kprobe *p) +int arch_prepare_kprobe(struct kprobe *p) { if (alternatives_text_reserved(p->addr, p->addr)) return -EINVAL; @@ -407,17 +411,17 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) return arch_copy_kprobe(p); } -void __kprobes arch_arm_kprobe(struct kprobe *p) +void arch_arm_kprobe(struct kprobe *p) { text_poke(p->addr, ((unsigned char []){BREAKPOINT_INSTRUCTION}), 1); } -void __kprobes arch_disarm_kprobe(struct kprobe *p) +void arch_disarm_kprobe(struct kprobe *p) { text_poke(p->addr, &p->opcode, 1); } -void __kprobes arch_remove_kprobe(struct kprobe *p) +void arch_remove_kprobe(struct kprobe *p) { if (p->ainsn.insn) { free_insn_slot(p->ainsn.insn, (p->ainsn.boostable == 1)); @@ -425,7 +429,8 @@ void __kprobes arch_remove_kprobe(struct kprobe *p) } } -static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb) +static nokprobe_inline void +save_previous_kprobe(struct kprobe_ctlblk *kcb) { kcb->prev_kprobe.kp = kprobe_running(); kcb->prev_kprobe.status = kcb->kprobe_status; @@ -433,7 +438,8 @@ static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb) kcb->prev_kprobe.saved_flags = kcb->kprobe_saved_flags; } -static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb) +static nokprobe_inline void +restore_previous_kprobe(struct kprobe_ctlblk *kcb) { __this_cpu_write(current_kprobe, kcb->prev_kprobe.kp); kcb->kprobe_status = kcb->prev_kprobe.status; @@ -441,8 +447,9 @@ static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb) kcb->kprobe_saved_flags = kcb->prev_kprobe.saved_flags; } -static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs, - struct kprobe_ctlblk *kcb) +static nokprobe_inline void +set_current_kprobe(struct kprobe *p, struct pt_regs *regs, + struct kprobe_ctlblk *kcb) { __this_cpu_write(current_kprobe, p); kcb->kprobe_saved_flags = kcb->kprobe_old_flags @@ -451,7 +458,7 @@ static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs, kcb->kprobe_saved_flags &= ~X86_EFLAGS_IF; } -static void __kprobes clear_btf(void) +static nokprobe_inline void clear_btf(void) { if (test_thread_flag(TIF_BLOCKSTEP)) { unsigned long debugctl = get_debugctlmsr(); @@ -461,7 +468,7 @@ static void __kprobes clear_btf(void) } } -static void __kprobes restore_btf(void) +static nokprobe_inline void restore_btf(void) { if (test_thread_flag(TIF_BLOCKSTEP)) { unsigned long debugctl = get_debugctlmsr(); @@ -471,8 +478,7 @@ static void __kprobes restore_btf(void) } } -void __kprobes -arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs) +void arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs) { unsigned long *sara = stack_addr(regs); @@ -481,9 +487,10 @@ arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs) /* Replace the return addr with trampoline addr */ *sara = (unsigned long) &kretprobe_trampoline; } +NOKPROBE_SYMBOL(arch_prepare_kretprobe); -static void __kprobes -setup_singlestep(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb, int reenter) +static void setup_singlestep(struct kprobe *p, struct pt_regs *regs, + struct kprobe_ctlblk *kcb, int reenter) { if (setup_detour_execution(p, regs, reenter)) return; @@ -519,22 +526,24 @@ setup_singlestep(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *k else regs->ip = (unsigned long)p->ainsn.insn; } +NOKPROBE_SYMBOL(setup_singlestep); /* * We have reentered the kprobe_handler(), since another probe was hit while * within the handler. We save the original kprobes variables and just single * step on the instruction of the new probe without calling any user handlers. */ -static int __kprobes -reenter_kprobe(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb) +static int reenter_kprobe(struct kprobe *p, struct pt_regs *regs, + struct kprobe_ctlblk *kcb) { switch (kcb->kprobe_status) { case KPROBE_HIT_SSDONE: case KPROBE_HIT_ACTIVE: + case KPROBE_HIT_SS: kprobes_inc_nmissed_count(p); setup_singlestep(p, regs, kcb, 1); break; - case KPROBE_HIT_SS: + case KPROBE_REENTER: /* A probe has been hit in the codepath leading up to, or just * after, single-stepping of a probed instruction. This entire * codepath should strictly reside in .kprobes.text section. @@ -553,12 +562,13 @@ reenter_kprobe(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb return 1; } +NOKPROBE_SYMBOL(reenter_kprobe); /* * Interrupts are disabled on entry as trap3 is an interrupt gate and they * remain disabled throughout this function. */ -static int __kprobes kprobe_handler(struct pt_regs *regs) +int kprobe_int3_handler(struct pt_regs *regs) { kprobe_opcode_t *addr; struct kprobe *p; @@ -621,12 +631,13 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) preempt_enable_no_resched(); return 0; } +NOKPROBE_SYMBOL(kprobe_int3_handler); /* * When a retprobed function returns, this code saves registers and * calls trampoline_handler() runs, which calls the kretprobe's handler. */ -static void __used __kprobes kretprobe_trampoline_holder(void) +static void __used kretprobe_trampoline_holder(void) { asm volatile ( ".global kretprobe_trampoline\n" @@ -657,11 +668,13 @@ static void __used __kprobes kretprobe_trampoline_holder(void) #endif " ret\n"); } +NOKPROBE_SYMBOL(kretprobe_trampoline_holder); +NOKPROBE_SYMBOL(kretprobe_trampoline); /* * Called from kretprobe_trampoline */ -__visible __used __kprobes void *trampoline_handler(struct pt_regs *regs) +__visible __used void *trampoline_handler(struct pt_regs *regs) { struct kretprobe_instance *ri = NULL; struct hlist_head *head, empty_rp; @@ -747,6 +760,7 @@ __visible __used __kprobes void *trampoline_handler(struct pt_regs *regs) } return (void *)orig_ret_address; } +NOKPROBE_SYMBOL(trampoline_handler); /* * Called after single-stepping. p->addr is the address of the @@ -775,8 +789,8 @@ __visible __used __kprobes void *trampoline_handler(struct pt_regs *regs) * jump instruction after the copied instruction, that jumps to the next * instruction after the probepoint. */ -static void __kprobes -resume_execution(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb) +static void resume_execution(struct kprobe *p, struct pt_regs *regs, + struct kprobe_ctlblk *kcb) { unsigned long *tos = stack_addr(regs); unsigned long copy_ip = (unsigned long)p->ainsn.insn; @@ -851,12 +865,13 @@ resume_execution(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *k no_change: restore_btf(); } +NOKPROBE_SYMBOL(resume_execution); /* * Interrupts are disabled on entry as trap1 is an interrupt gate and they * remain disabled throughout this function. */ -static int __kprobes post_kprobe_handler(struct pt_regs *regs) +int kprobe_debug_handler(struct pt_regs *regs) { struct kprobe *cur = kprobe_running(); struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); @@ -891,8 +906,9 @@ out: return 1; } +NOKPROBE_SYMBOL(kprobe_debug_handler); -int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) +int kprobe_fault_handler(struct pt_regs *regs, int trapnr) { struct kprobe *cur = kprobe_running(); struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); @@ -949,12 +965,13 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) return 0; } +NOKPROBE_SYMBOL(kprobe_fault_handler); /* * Wrapper routine for handling exceptions. */ -int __kprobes -kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, void *data) +int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, + void *data) { struct die_args *args = data; int ret = NOTIFY_DONE; @@ -962,22 +979,7 @@ kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, void *d if (args->regs && user_mode_vm(args->regs)) return ret; - switch (val) { - case DIE_INT3: - if (kprobe_handler(args->regs)) - ret = NOTIFY_STOP; - break; - case DIE_DEBUG: - if (post_kprobe_handler(args->regs)) { - /* - * Reset the BS bit in dr6 (pointed by args->err) to - * denote completion of processing - */ - (*(unsigned long *)ERR_PTR(args->err)) &= ~DR_STEP; - ret = NOTIFY_STOP; - } - break; - case DIE_GPF: + if (val == DIE_GPF) { /* * To be potentially processing a kprobe fault and to * trust the result from kprobe_running(), we have @@ -986,14 +988,12 @@ kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, void *d if (!preemptible() && kprobe_running() && kprobe_fault_handler(args->regs, args->trapnr)) ret = NOTIFY_STOP; - break; - default: - break; } return ret; } +NOKPROBE_SYMBOL(kprobe_exceptions_notify); -int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) +int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) { struct jprobe *jp = container_of(p, struct jprobe, kp); unsigned long addr; @@ -1017,8 +1017,9 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) regs->ip = (unsigned long)(jp->entry); return 1; } +NOKPROBE_SYMBOL(setjmp_pre_handler); -void __kprobes jprobe_return(void) +void jprobe_return(void) { struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); @@ -1034,8 +1035,10 @@ void __kprobes jprobe_return(void) " nop \n"::"b" (kcb->jprobe_saved_sp):"memory"); } +NOKPROBE_SYMBOL(jprobe_return); +NOKPROBE_SYMBOL(jprobe_return_end); -int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) +int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) { struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); u8 *addr = (u8 *) (regs->ip - 1); @@ -1063,13 +1066,22 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) } return 0; } +NOKPROBE_SYMBOL(longjmp_break_handler); + +bool arch_within_kprobe_blacklist(unsigned long addr) +{ + return (addr >= (unsigned long)__kprobes_text_start && + addr < (unsigned long)__kprobes_text_end) || + (addr >= (unsigned long)__entry_text_start && + addr < (unsigned long)__entry_text_end); +} int __init arch_init_kprobes(void) { return 0; } -int __kprobes arch_trampoline_kprobe(struct kprobe *p) +int arch_trampoline_kprobe(struct kprobe *p) { return 0; } diff --git a/arch/x86/kernel/kprobes/ftrace.c b/arch/x86/kernel/kprobes/ftrace.c index 23ef5c5..717b02a 100644 --- a/arch/x86/kernel/kprobes/ftrace.c +++ b/arch/x86/kernel/kprobes/ftrace.c @@ -25,8 +25,9 @@ #include "common.h" -static int __skip_singlestep(struct kprobe *p, struct pt_regs *regs, - struct kprobe_ctlblk *kcb) +static nokprobe_inline +int __skip_singlestep(struct kprobe *p, struct pt_regs *regs, + struct kprobe_ctlblk *kcb) { /* * Emulate singlestep (and also recover regs->ip) @@ -41,18 +42,19 @@ static int __skip_singlestep(struct kprobe *p, struct pt_regs *regs, return 1; } -int __kprobes skip_singlestep(struct kprobe *p, struct pt_regs *regs, - struct kprobe_ctlblk *kcb) +int skip_singlestep(struct kprobe *p, struct pt_regs *regs, + struct kprobe_ctlblk *kcb) { if (kprobe_ftrace(p)) return __skip_singlestep(p, regs, kcb); else return 0; } +NOKPROBE_SYMBOL(skip_singlestep); /* Ftrace callback handler for kprobes */ -void __kprobes kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *ops, struct pt_regs *regs) +void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *ops, struct pt_regs *regs) { struct kprobe *p; struct kprobe_ctlblk *kcb; @@ -84,8 +86,9 @@ void __kprobes kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, end: local_irq_restore(flags); } +NOKPROBE_SYMBOL(kprobe_ftrace_handler); -int __kprobes arch_prepare_kprobe_ftrace(struct kprobe *p) +int arch_prepare_kprobe_ftrace(struct kprobe *p) { p->ainsn.insn = NULL; p->ainsn.boostable = -1; diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c index 898160b..f304773 100644 --- a/arch/x86/kernel/kprobes/opt.c +++ b/arch/x86/kernel/kprobes/opt.c @@ -77,7 +77,7 @@ found: } /* Insert a move instruction which sets a pointer to eax/rdi (1st arg). */ -static void __kprobes synthesize_set_arg1(kprobe_opcode_t *addr, unsigned long val) +static void synthesize_set_arg1(kprobe_opcode_t *addr, unsigned long val) { #ifdef CONFIG_X86_64 *addr++ = 0x48; @@ -138,7 +138,8 @@ asm ( #define INT3_SIZE sizeof(kprobe_opcode_t) /* Optimized kprobe call back function: called from optinsn */ -static void __kprobes optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs) +static void +optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs) { struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); unsigned long flags; @@ -168,8 +169,9 @@ static void __kprobes optimized_callback(struct optimized_kprobe *op, struct pt_ } local_irq_restore(flags); } +NOKPROBE_SYMBOL(optimized_callback); -static int __kprobes copy_optimized_instructions(u8 *dest, u8 *src) +static int copy_optimized_instructions(u8 *dest, u8 *src) { int len = 0, ret; @@ -189,7 +191,7 @@ static int __kprobes copy_optimized_instructions(u8 *dest, u8 *src) } /* Check whether insn is indirect jump */ -static int __kprobes insn_is_indirect_jump(struct insn *insn) +static int insn_is_indirect_jump(struct insn *insn) { return ((insn->opcode.bytes[0] == 0xff && (X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */ @@ -224,7 +226,7 @@ static int insn_jump_into_range(struct insn *insn, unsigned long start, int len) } /* Decode whole function to ensure any instructions don't jump into target */ -static int __kprobes can_optimize(unsigned long paddr) +static int can_optimize(unsigned long paddr) { unsigned long addr, size = 0, offset = 0; struct insn insn; @@ -275,7 +277,7 @@ static int __kprobes can_optimize(unsigned long paddr) } /* Check optimized_kprobe can actually be optimized. */ -int __kprobes arch_check_optimized_kprobe(struct optimized_kprobe *op) +int arch_check_optimized_kprobe(struct optimized_kprobe *op) { int i; struct kprobe *p; @@ -290,15 +292,15 @@ int __kprobes arch_check_optimized_kprobe(struct optimized_kprobe *op) } /* Check the addr is within the optimized instructions. */ -int __kprobes -arch_within_optimized_kprobe(struct optimized_kprobe *op, unsigned long addr) +int arch_within_optimized_kprobe(struct optimized_kprobe *op, + unsigned long addr) { return ((unsigned long)op->kp.addr <= addr && (unsigned long)op->kp.addr + op->optinsn.size > addr); } /* Free optimized instruction slot */ -static __kprobes +static void __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty) { if (op->optinsn.insn) { @@ -308,7 +310,7 @@ void __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty) } } -void __kprobes arch_remove_optimized_kprobe(struct optimized_kprobe *op) +void arch_remove_optimized_kprobe(struct optimized_kprobe *op) { __arch_remove_optimized_kprobe(op, 1); } @@ -318,7 +320,7 @@ void __kprobes arch_remove_optimized_kprobe(struct optimized_kprobe *op) * Target instructions MUST be relocatable (checked inside) * This is called when new aggr(opt)probe is allocated or reused. */ -int __kprobes arch_prepare_optimized_kprobe(struct optimized_kprobe *op) +int arch_prepare_optimized_kprobe(struct optimized_kprobe *op) { u8 *buf; int ret; @@ -372,7 +374,7 @@ int __kprobes arch_prepare_optimized_kprobe(struct optimized_kprobe *op) * Replace breakpoints (int3) with relative jumps. * Caller must call with locking kprobe_mutex and text_mutex. */ -void __kprobes arch_optimize_kprobes(struct list_head *oplist) +void arch_optimize_kprobes(struct list_head *oplist) { struct optimized_kprobe *op, *tmp; u8 insn_buf[RELATIVEJUMP_SIZE]; @@ -398,7 +400,7 @@ void __kprobes arch_optimize_kprobes(struct list_head *oplist) } /* Replace a relative jump with a breakpoint (int3). */ -void __kprobes arch_unoptimize_kprobe(struct optimized_kprobe *op) +void arch_unoptimize_kprobe(struct optimized_kprobe *op) { u8 insn_buf[RELATIVEJUMP_SIZE]; @@ -424,8 +426,7 @@ extern void arch_unoptimize_kprobes(struct list_head *oplist, } } -int __kprobes -setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter) +int setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter) { struct optimized_kprobe *op; @@ -441,3 +442,4 @@ setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter) } return 0; } +NOKPROBE_SYMBOL(setup_detour_execution); diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 7e97371..3dd8e2c 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -251,8 +251,9 @@ u32 kvm_read_and_reset_pf_reason(void) return reason; } EXPORT_SYMBOL_GPL(kvm_read_and_reset_pf_reason); +NOKPROBE_SYMBOL(kvm_read_and_reset_pf_reason); -dotraplinkage void __kprobes +dotraplinkage void do_async_page_fault(struct pt_regs *regs, unsigned long error_code) { enum ctx_state prev_state; @@ -276,6 +277,7 @@ do_async_page_fault(struct pt_regs *regs, unsigned long error_code) break; } } +NOKPROBE_SYMBOL(do_async_page_fault); static void __init paravirt_ops_setup(void) { diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index b4872b9..c3e985d 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -110,7 +110,7 @@ static void nmi_max_handler(struct irq_work *w) a->handler, whole_msecs, decimal_msecs); } -static int __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2b) +static int nmi_handle(unsigned int type, struct pt_regs *regs, bool b2b) { struct nmi_desc *desc = nmi_to_desc(type); struct nmiaction *a; @@ -146,6 +146,7 @@ static int __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2 /* return total number of NMI events handled */ return handled; } +NOKPROBE_SYMBOL(nmi_handle); int __register_nmi_handler(unsigned int type, struct nmiaction *action) { @@ -208,7 +209,7 @@ void unregister_nmi_handler(unsigned int type, const char *name) } EXPORT_SYMBOL_GPL(unregister_nmi_handler); -static __kprobes void +static void pci_serr_error(unsigned char reason, struct pt_regs *regs) { /* check to see if anyone registered against these types of errors */ @@ -238,8 +239,9 @@ pci_serr_error(unsigned char reason, struct pt_regs *regs) reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_SERR; outb(reason, NMI_REASON_PORT); } +NOKPROBE_SYMBOL(pci_serr_error); -static __kprobes void +static void io_check_error(unsigned char reason, struct pt_regs *regs) { unsigned long i; @@ -269,8 +271,9 @@ io_check_error(unsigned char reason, struct pt_regs *regs) reason &= ~NMI_REASON_CLEAR_IOCHK; outb(reason, NMI_REASON_PORT); } +NOKPROBE_SYMBOL(io_check_error); -static __kprobes void +static void unknown_nmi_error(unsigned char reason, struct pt_regs *regs) { int handled; @@ -298,11 +301,12 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs) pr_emerg("Dazed and confused, but trying to continue\n"); } +NOKPROBE_SYMBOL(unknown_nmi_error); static DEFINE_PER_CPU(bool, swallow_nmi); static DEFINE_PER_CPU(unsigned long, last_nmi_rip); -static __kprobes void default_do_nmi(struct pt_regs *regs) +static void default_do_nmi(struct pt_regs *regs) { unsigned char reason = 0; int handled; @@ -401,6 +405,7 @@ static __kprobes void default_do_nmi(struct pt_regs *regs) else unknown_nmi_error(reason, regs); } +NOKPROBE_SYMBOL(default_do_nmi); /* * NMIs can hit breakpoints which will cause it to lose its @@ -520,7 +525,7 @@ static inline void nmi_nesting_postprocess(void) } #endif -dotraplinkage notrace __kprobes void +dotraplinkage notrace void do_nmi(struct pt_regs *regs, long error_code) { nmi_nesting_preprocess(regs); @@ -537,6 +542,7 @@ do_nmi(struct pt_regs *regs, long error_code) /* On i386, may loop back to preprocess */ nmi_nesting_postprocess(); } +NOKPROBE_SYMBOL(do_nmi); void stop_nmi(void) { diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 1b10af8..548d25f 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -23,6 +23,7 @@ #include <linux/efi.h> #include <linux/bcd.h> #include <linux/highmem.h> +#include <linux/kprobes.h> #include <asm/bug.h> #include <asm/paravirt.h> @@ -389,6 +390,11 @@ __visible struct pv_cpu_ops pv_cpu_ops = { .end_context_switch = paravirt_nop, }; +/* At this point, native_get/set_debugreg has real function entries */ +NOKPROBE_SYMBOL(native_get_debugreg); +NOKPROBE_SYMBOL(native_set_debugreg); +NOKPROBE_SYMBOL(native_load_idt); + struct pv_apic_ops pv_apic_ops = { #ifdef CONFIG_X86_LOCAL_APIC .startup_ipi_hook = paravirt_nop, diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 898d077..ca5b02d 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -413,12 +413,11 @@ void set_personality_ia32(bool x32) set_thread_flag(TIF_ADDR32); /* Mark the associated mm as containing 32-bit tasks. */ - if (current->mm) - current->mm->context.ia32_compat = 1; - if (x32) { clear_thread_flag(TIF_IA32); set_thread_flag(TIF_X32); + if (current->mm) + current->mm->context.ia32_compat = TIF_X32; current->personality &= ~READ_IMPLIES_EXEC; /* is_compat_task() uses the presence of the x32 syscall bit flag to determine compat status */ @@ -426,6 +425,8 @@ void set_personality_ia32(bool x32) } else { set_thread_flag(TIF_IA32); clear_thread_flag(TIF_X32); + if (current->mm) + current->mm->context.ia32_compat = TIF_IA32; current->personality |= force_personality32; /* Prepare the first "return" to user space */ current_thread_info()->status |= TS_COMPAT; diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index f73b5d4..c6eb418 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -23,6 +23,7 @@ #include <linux/kernel.h> #include <linux/module.h> #include <linux/ptrace.h> +#include <linux/uprobes.h> #include <linux/string.h> #include <linux/delay.h> #include <linux/errno.h> @@ -106,7 +107,7 @@ static inline void preempt_conditional_cli(struct pt_regs *regs) preempt_count_dec(); } -static int __kprobes +static nokprobe_inline int do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str, struct pt_regs *regs, long error_code) { @@ -136,7 +137,38 @@ do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str, return -1; } -static void __kprobes +static siginfo_t *fill_trap_info(struct pt_regs *regs, int signr, int trapnr, + siginfo_t *info) +{ + unsigned long siaddr; + int sicode; + + switch (trapnr) { + default: + return SEND_SIG_PRIV; + + case X86_TRAP_DE: + sicode = FPE_INTDIV; + siaddr = uprobe_get_trap_addr(regs); + break; + case X86_TRAP_UD: + sicode = ILL_ILLOPN; + siaddr = uprobe_get_trap_addr(regs); + break; + case X86_TRAP_AC: + sicode = BUS_ADRALN; + siaddr = 0; + break; + } + + info->si_signo = signr; + info->si_errno = 0; + info->si_code = sicode; + info->si_addr = (void __user *)siaddr; + return info; +} + +static void do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, long error_code, siginfo_t *info) { @@ -168,60 +200,43 @@ do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, } #endif - if (info) - force_sig_info(signr, info, tsk); - else - force_sig(signr, tsk); + force_sig_info(signr, info ?: SEND_SIG_PRIV, tsk); } +NOKPROBE_SYMBOL(do_trap); -#define DO_ERROR(trapnr, signr, str, name) \ -dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ -{ \ - enum ctx_state prev_state; \ - \ - prev_state = exception_enter(); \ - if (notify_die(DIE_TRAP, str, regs, error_code, \ - trapnr, signr) == NOTIFY_STOP) { \ - exception_exit(prev_state); \ - return; \ - } \ - conditional_sti(regs); \ - do_trap(trapnr, signr, str, regs, error_code, NULL); \ - exception_exit(prev_state); \ +static void do_error_trap(struct pt_regs *regs, long error_code, char *str, + unsigned long trapnr, int signr) +{ + enum ctx_state prev_state = exception_enter(); + siginfo_t info; + + if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) != + NOTIFY_STOP) { + conditional_sti(regs); + do_trap(trapnr, signr, str, regs, error_code, + fill_trap_info(regs, signr, trapnr, &info)); + } + + exception_exit(prev_state); } -#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ +#define DO_ERROR(trapnr, signr, str, name) \ dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ { \ - siginfo_t info; \ - enum ctx_state prev_state; \ - \ - info.si_signo = signr; \ - info.si_errno = 0; \ - info.si_code = sicode; \ - info.si_addr = (void __user *)siaddr; \ - prev_state = exception_enter(); \ - if (notify_die(DIE_TRAP, str, regs, error_code, \ - trapnr, signr) == NOTIFY_STOP) { \ - exception_exit(prev_state); \ - return; \ - } \ - conditional_sti(regs); \ - do_trap(trapnr, signr, str, regs, error_code, &info); \ - exception_exit(prev_state); \ + do_error_trap(regs, error_code, str, trapnr, signr); \ } -DO_ERROR_INFO(X86_TRAP_DE, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip ) -DO_ERROR (X86_TRAP_OF, SIGSEGV, "overflow", overflow ) -DO_ERROR (X86_TRAP_BR, SIGSEGV, "bounds", bounds ) -DO_ERROR_INFO(X86_TRAP_UD, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip ) -DO_ERROR (X86_TRAP_OLD_MF, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun ) -DO_ERROR (X86_TRAP_TS, SIGSEGV, "invalid TSS", invalid_TSS ) -DO_ERROR (X86_TRAP_NP, SIGBUS, "segment not present", segment_not_present ) +DO_ERROR(X86_TRAP_DE, SIGFPE, "divide error", divide_error) +DO_ERROR(X86_TRAP_OF, SIGSEGV, "overflow", overflow) +DO_ERROR(X86_TRAP_BR, SIGSEGV, "bounds", bounds) +DO_ERROR(X86_TRAP_UD, SIGILL, "invalid opcode", invalid_op) +DO_ERROR(X86_TRAP_OLD_MF, SIGFPE, "coprocessor segment overrun",coprocessor_segment_overrun) +DO_ERROR(X86_TRAP_TS, SIGSEGV, "invalid TSS", invalid_TSS) +DO_ERROR(X86_TRAP_NP, SIGBUS, "segment not present", segment_not_present) #ifdef CONFIG_X86_32 -DO_ERROR (X86_TRAP_SS, SIGBUS, "stack segment", stack_segment ) +DO_ERROR(X86_TRAP_SS, SIGBUS, "stack segment", stack_segment) #endif -DO_ERROR_INFO(X86_TRAP_AC, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0 ) +DO_ERROR(X86_TRAP_AC, SIGBUS, "alignment check", alignment_check) #ifdef CONFIG_X86_64 /* Runs on IST stack */ @@ -263,7 +278,7 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) } #endif -dotraplinkage void __kprobes +dotraplinkage void do_general_protection(struct pt_regs *regs, long error_code) { struct task_struct *tsk; @@ -305,13 +320,14 @@ do_general_protection(struct pt_regs *regs, long error_code) pr_cont("\n"); } - force_sig(SIGSEGV, tsk); + force_sig_info(SIGSEGV, SEND_SIG_PRIV, tsk); exit: exception_exit(prev_state); } +NOKPROBE_SYMBOL(do_general_protection); /* May run on IST stack. */ -dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_code) +dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code) { enum ctx_state prev_state; @@ -327,13 +343,18 @@ dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_co if (poke_int3_handler(regs)) return; - prev_state = exception_enter(); #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP, SIGTRAP) == NOTIFY_STOP) goto exit; #endif /* CONFIG_KGDB_LOW_LEVEL_TRAP */ +#ifdef CONFIG_KPROBES + if (kprobe_int3_handler(regs)) + return; +#endif + prev_state = exception_enter(); + if (notify_die(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP, SIGTRAP) == NOTIFY_STOP) goto exit; @@ -350,6 +371,7 @@ dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_co exit: exception_exit(prev_state); } +NOKPROBE_SYMBOL(do_int3); #ifdef CONFIG_X86_64 /* @@ -357,7 +379,7 @@ exit: * for scheduling or signal handling. The actual stack switch is done in * entry.S */ -asmlinkage __visible __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs) +asmlinkage __visible struct pt_regs *sync_regs(struct pt_regs *eregs) { struct pt_regs *regs = eregs; /* Did already sync */ @@ -376,6 +398,7 @@ asmlinkage __visible __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs) *regs = *eregs; return regs; } +NOKPROBE_SYMBOL(sync_regs); #endif /* @@ -402,7 +425,7 @@ asmlinkage __visible __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs) * * May run on IST stack. */ -dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) +dotraplinkage void do_debug(struct pt_regs *regs, long error_code) { struct task_struct *tsk = current; enum ctx_state prev_state; @@ -410,8 +433,6 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) unsigned long dr6; int si_code; - prev_state = exception_enter(); - get_debugreg(dr6, 6); /* Filter out all the reserved bits which are preset to 1 */ @@ -440,6 +461,12 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) /* Store the virtualized DR6 value */ tsk->thread.debugreg6 = dr6; +#ifdef CONFIG_KPROBES + if (kprobe_debug_handler(regs)) + goto exit; +#endif + prev_state = exception_enter(); + if (notify_die(DIE_DEBUG, "debug", regs, (long)&dr6, error_code, SIGTRAP) == NOTIFY_STOP) goto exit; @@ -482,13 +509,14 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) exit: exception_exit(prev_state); } +NOKPROBE_SYMBOL(do_debug); /* * Note that we play around with the 'TS' bit in an attempt to get * the correct behaviour even in the presence of the asynchronous * IRQ13 behaviour */ -void math_error(struct pt_regs *regs, int error_code, int trapnr) +static void math_error(struct pt_regs *regs, int error_code, int trapnr) { struct task_struct *task = current; siginfo_t info; @@ -518,7 +546,7 @@ void math_error(struct pt_regs *regs, int error_code, int trapnr) task->thread.error_code = error_code; info.si_signo = SIGFPE; info.si_errno = 0; - info.si_addr = (void __user *)regs->ip; + info.si_addr = (void __user *)uprobe_get_trap_addr(regs); if (trapnr == X86_TRAP_MF) { unsigned short cwd, swd; /* @@ -645,7 +673,7 @@ void math_state_restore(void) */ if (unlikely(restore_fpu_checking(tsk))) { drop_init_fpu(tsk); - force_sig(SIGSEGV, tsk); + force_sig_info(SIGSEGV, SEND_SIG_PRIV, tsk); return; } @@ -653,7 +681,7 @@ void math_state_restore(void) } EXPORT_SYMBOL_GPL(math_state_restore); -dotraplinkage void __kprobes +dotraplinkage void do_device_not_available(struct pt_regs *regs, long error_code) { enum ctx_state prev_state; @@ -679,6 +707,7 @@ do_device_not_available(struct pt_regs *regs, long error_code) #endif exception_exit(prev_state); } +NOKPROBE_SYMBOL(do_device_not_available); #ifdef CONFIG_X86_32 dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code) diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index ace2291..5d1cbfe 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -32,20 +32,20 @@ /* Post-execution fixups. */ -/* No fixup needed */ -#define UPROBE_FIX_NONE 0x0 - /* Adjust IP back to vicinity of actual insn */ -#define UPROBE_FIX_IP 0x1 +#define UPROBE_FIX_IP 0x01 /* Adjust the return address of a call insn */ -#define UPROBE_FIX_CALL 0x2 +#define UPROBE_FIX_CALL 0x02 /* Instruction will modify TF, don't change it */ -#define UPROBE_FIX_SETF 0x4 +#define UPROBE_FIX_SETF 0x04 -#define UPROBE_FIX_RIP_AX 0x8000 -#define UPROBE_FIX_RIP_CX 0x4000 +#define UPROBE_FIX_RIP_SI 0x08 +#define UPROBE_FIX_RIP_DI 0x10 +#define UPROBE_FIX_RIP_BX 0x20 +#define UPROBE_FIX_RIP_MASK \ + (UPROBE_FIX_RIP_SI | UPROBE_FIX_RIP_DI | UPROBE_FIX_RIP_BX) #define UPROBE_TRAP_NR UINT_MAX @@ -67,6 +67,7 @@ * to keep gcc from statically optimizing it out, as variable_test_bit makes * some versions of gcc to think only *(unsigned long*) is used. */ +#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) static volatile u32 good_insns_32[256 / 32] = { /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ /* ---------------------------------------------- */ @@ -89,33 +90,12 @@ static volatile u32 good_insns_32[256 / 32] = { /* ---------------------------------------------- */ /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ }; +#else +#define good_insns_32 NULL +#endif -/* Using this for both 64-bit and 32-bit apps */ -static volatile u32 good_2byte_insns[256 / 32] = { - /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ - /* ---------------------------------------------- */ - W(0x00, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1) | /* 00 */ - W(0x10, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1) , /* 10 */ - W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1) | /* 20 */ - W(0x30, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 30 */ - W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */ - W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */ - W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 60 */ - W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1) , /* 70 */ - W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */ - W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */ - W(0xa0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1) | /* a0 */ - W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1) , /* b0 */ - W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* c0 */ - W(0xd0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */ - W(0xe0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* e0 */ - W(0xf0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0) /* f0 */ - /* ---------------------------------------------- */ - /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ -}; - -#ifdef CONFIG_X86_64 /* Good-instruction tables for 64-bit apps */ +#if defined(CONFIG_X86_64) static volatile u32 good_insns_64[256 / 32] = { /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ /* ---------------------------------------------- */ @@ -138,7 +118,33 @@ static volatile u32 good_insns_64[256 / 32] = { /* ---------------------------------------------- */ /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ }; +#else +#define good_insns_64 NULL #endif + +/* Using this for both 64-bit and 32-bit apps */ +static volatile u32 good_2byte_insns[256 / 32] = { + /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ + /* ---------------------------------------------- */ + W(0x00, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1) | /* 00 */ + W(0x10, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1) , /* 10 */ + W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1) | /* 20 */ + W(0x30, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 30 */ + W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */ + W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */ + W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 60 */ + W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1) , /* 70 */ + W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */ + W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */ + W(0xa0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1) | /* a0 */ + W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1) , /* b0 */ + W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* c0 */ + W(0xd0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */ + W(0xe0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* e0 */ + W(0xf0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0) /* f0 */ + /* ---------------------------------------------- */ + /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ +}; #undef W /* @@ -209,16 +215,25 @@ static bool is_prefix_bad(struct insn *insn) return false; } -static int validate_insn_32bits(struct arch_uprobe *auprobe, struct insn *insn) +static int uprobe_init_insn(struct arch_uprobe *auprobe, struct insn *insn, bool x86_64) { - insn_init(insn, auprobe->insn, false); + u32 volatile *good_insns; + + insn_init(insn, auprobe->insn, x86_64); + /* has the side-effect of processing the entire instruction */ + insn_get_length(insn); + if (WARN_ON_ONCE(!insn_complete(insn))) + return -ENOEXEC; - /* Skip good instruction prefixes; reject "bad" ones. */ - insn_get_opcode(insn); if (is_prefix_bad(insn)) return -ENOTSUPP; - if (test_bit(OPCODE1(insn), (unsigned long *)good_insns_32)) + if (x86_64) + good_insns = good_insns_64; + else + good_insns = good_insns_32; + + if (test_bit(OPCODE1(insn), (unsigned long *)good_insns)) return 0; if (insn->opcode.nbytes == 2) { @@ -230,14 +245,18 @@ static int validate_insn_32bits(struct arch_uprobe *auprobe, struct insn *insn) } #ifdef CONFIG_X86_64 +static inline bool is_64bit_mm(struct mm_struct *mm) +{ + return !config_enabled(CONFIG_IA32_EMULATION) || + !(mm->context.ia32_compat == TIF_IA32); +} /* * If arch_uprobe->insn doesn't use rip-relative addressing, return * immediately. Otherwise, rewrite the instruction so that it accesses * its memory operand indirectly through a scratch register. Set - * arch_uprobe->fixups and arch_uprobe->rip_rela_target_address - * accordingly. (The contents of the scratch register will be saved - * before we single-step the modified instruction, and restored - * afterward.) + * defparam->fixups accordingly. (The contents of the scratch register + * will be saved before we single-step the modified instruction, + * and restored afterward). * * We do this because a rip-relative instruction can access only a * relatively small area (+/- 2 GB from the instruction), and the XOL @@ -248,164 +267,192 @@ static int validate_insn_32bits(struct arch_uprobe *auprobe, struct insn *insn) * * Some useful facts about rip-relative instructions: * - * - There's always a modrm byte. + * - There's always a modrm byte with bit layout "00 reg 101". * - There's never a SIB byte. * - The displacement is always 4 bytes. + * - REX.B=1 bit in REX prefix, which normally extends r/m field, + * has no effect on rip-relative mode. It doesn't make modrm byte + * with r/m=101 refer to register 1101 = R13. */ -static void -handle_riprel_insn(struct arch_uprobe *auprobe, struct insn *insn) +static void riprel_analyze(struct arch_uprobe *auprobe, struct insn *insn) { u8 *cursor; u8 reg; + u8 reg2; if (!insn_rip_relative(insn)) return; /* - * insn_rip_relative() would have decoded rex_prefix, modrm. + * insn_rip_relative() would have decoded rex_prefix, vex_prefix, modrm. * Clear REX.b bit (extension of MODRM.rm field): - * we want to encode rax/rcx, not r8/r9. + * we want to encode low numbered reg, not r8+. */ if (insn->rex_prefix.nbytes) { cursor = auprobe->insn + insn_offset_rex_prefix(insn); - *cursor &= 0xfe; /* Clearing REX.B bit */ + /* REX byte has 0100wrxb layout, clearing REX.b bit */ + *cursor &= 0xfe; + } + /* + * Similar treatment for VEX3 prefix. + * TODO: add XOP/EVEX treatment when insn decoder supports them + */ + if (insn->vex_prefix.nbytes == 3) { + /* + * vex2: c5 rvvvvLpp (has no b bit) + * vex3/xop: c4/8f rxbmmmmm wvvvvLpp + * evex: 62 rxbR00mm wvvvv1pp zllBVaaa + * (evex will need setting of both b and x since + * in non-sib encoding evex.x is 4th bit of MODRM.rm) + * Setting VEX3.b (setting because it has inverted meaning): + */ + cursor = auprobe->insn + insn_offset_vex_prefix(insn) + 1; + *cursor |= 0x20; } /* + * Convert from rip-relative addressing to register-relative addressing + * via a scratch register. + * + * This is tricky since there are insns with modrm byte + * which also use registers not encoded in modrm byte: + * [i]div/[i]mul: implicitly use dx:ax + * shift ops: implicitly use cx + * cmpxchg: implicitly uses ax + * cmpxchg8/16b: implicitly uses dx:ax and bx:cx + * Encoding: 0f c7/1 modrm + * The code below thinks that reg=1 (cx), chooses si as scratch. + * mulx: implicitly uses dx: mulx r/m,r1,r2 does r1:r2 = dx * r/m. + * First appeared in Haswell (BMI2 insn). It is vex-encoded. + * Example where none of bx,cx,dx can be used as scratch reg: + * c4 e2 63 f6 0d disp32 mulx disp32(%rip),%ebx,%ecx + * [v]pcmpistri: implicitly uses cx, xmm0 + * [v]pcmpistrm: implicitly uses xmm0 + * [v]pcmpestri: implicitly uses ax, dx, cx, xmm0 + * [v]pcmpestrm: implicitly uses ax, dx, xmm0 + * Evil SSE4.2 string comparison ops from hell. + * maskmovq/[v]maskmovdqu: implicitly uses (ds:rdi) as destination. + * Encoding: 0f f7 modrm, 66 0f f7 modrm, vex-encoded: c5 f9 f7 modrm. + * Store op1, byte-masked by op2 msb's in each byte, to (ds:rdi). + * AMD says it has no 3-operand form (vex.vvvv must be 1111) + * and that it can have only register operands, not mem + * (its modrm byte must have mode=11). + * If these restrictions will ever be lifted, + * we'll need code to prevent selection of di as scratch reg! + * + * Summary: I don't know any insns with modrm byte which + * use SI register implicitly. DI register is used only + * by one insn (maskmovq) and BX register is used + * only by one too (cmpxchg8b). + * BP is stack-segment based (may be a problem?). + * AX, DX, CX are off-limits (many implicit users). + * SP is unusable (it's stack pointer - think about "pop mem"; + * also, rsp+disp32 needs sib encoding -> insn length change). + */ + + reg = MODRM_REG(insn); /* Fetch modrm.reg */ + reg2 = 0xff; /* Fetch vex.vvvv */ + if (insn->vex_prefix.nbytes == 2) + reg2 = insn->vex_prefix.bytes[1]; + else if (insn->vex_prefix.nbytes == 3) + reg2 = insn->vex_prefix.bytes[2]; + /* + * TODO: add XOP, EXEV vvvv reading. + * + * vex.vvvv field is in bits 6-3, bits are inverted. + * But in 32-bit mode, high-order bit may be ignored. + * Therefore, let's consider only 3 low-order bits. + */ + reg2 = ((reg2 >> 3) & 0x7) ^ 0x7; + /* + * Register numbering is ax,cx,dx,bx, sp,bp,si,di, r8..r15. + * + * Choose scratch reg. Order is important: must not select bx + * if we can use si (cmpxchg8b case!) + */ + if (reg != 6 && reg2 != 6) { + reg2 = 6; + auprobe->defparam.fixups |= UPROBE_FIX_RIP_SI; + } else if (reg != 7 && reg2 != 7) { + reg2 = 7; + auprobe->defparam.fixups |= UPROBE_FIX_RIP_DI; + /* TODO (paranoia): force maskmovq to not use di */ + } else { + reg2 = 3; + auprobe->defparam.fixups |= UPROBE_FIX_RIP_BX; + } + /* * Point cursor at the modrm byte. The next 4 bytes are the * displacement. Beyond the displacement, for some instructions, * is the immediate operand. */ cursor = auprobe->insn + insn_offset_modrm(insn); - insn_get_length(insn); - /* - * Convert from rip-relative addressing to indirect addressing - * via a scratch register. Change the r/m field from 0x5 (%rip) - * to 0x0 (%rax) or 0x1 (%rcx), and squeeze out the offset field. + * Change modrm from "00 reg 101" to "10 reg reg2". Example: + * 89 05 disp32 mov %eax,disp32(%rip) becomes + * 89 86 disp32 mov %eax,disp32(%rsi) */ - reg = MODRM_REG(insn); - if (reg == 0) { - /* - * The register operand (if any) is either the A register - * (%rax, %eax, etc.) or (if the 0x4 bit is set in the - * REX prefix) %r8. In any case, we know the C register - * is NOT the register operand, so we use %rcx (register - * #1) for the scratch register. - */ - auprobe->fixups = UPROBE_FIX_RIP_CX; - /* Change modrm from 00 000 101 to 00 000 001. */ - *cursor = 0x1; - } else { - /* Use %rax (register #0) for the scratch register. */ - auprobe->fixups = UPROBE_FIX_RIP_AX; - /* Change modrm from 00 xxx 101 to 00 xxx 000 */ - *cursor = (reg << 3); - } - - /* Target address = address of next instruction + (signed) offset */ - auprobe->rip_rela_target_address = (long)insn->length + insn->displacement.value; + *cursor = 0x80 | (reg << 3) | reg2; +} - /* Displacement field is gone; slide immediate field (if any) over. */ - if (insn->immediate.nbytes) { - cursor++; - memmove(cursor, cursor + insn->displacement.nbytes, insn->immediate.nbytes); - } +static inline unsigned long * +scratch_reg(struct arch_uprobe *auprobe, struct pt_regs *regs) +{ + if (auprobe->defparam.fixups & UPROBE_FIX_RIP_SI) + return ®s->si; + if (auprobe->defparam.fixups & UPROBE_FIX_RIP_DI) + return ®s->di; + return ®s->bx; } /* * If we're emulating a rip-relative instruction, save the contents * of the scratch register and store the target address in that register. */ -static void -pre_xol_rip_insn(struct arch_uprobe *auprobe, struct pt_regs *regs, - struct arch_uprobe_task *autask) -{ - if (auprobe->fixups & UPROBE_FIX_RIP_AX) { - autask->saved_scratch_register = regs->ax; - regs->ax = current->utask->vaddr; - regs->ax += auprobe->rip_rela_target_address; - } else if (auprobe->fixups & UPROBE_FIX_RIP_CX) { - autask->saved_scratch_register = regs->cx; - regs->cx = current->utask->vaddr; - regs->cx += auprobe->rip_rela_target_address; - } -} - -static void -handle_riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs, long *correction) +static void riprel_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) { - if (auprobe->fixups & (UPROBE_FIX_RIP_AX | UPROBE_FIX_RIP_CX)) { - struct arch_uprobe_task *autask; - - autask = ¤t->utask->autask; - if (auprobe->fixups & UPROBE_FIX_RIP_AX) - regs->ax = autask->saved_scratch_register; - else - regs->cx = autask->saved_scratch_register; + if (auprobe->defparam.fixups & UPROBE_FIX_RIP_MASK) { + struct uprobe_task *utask = current->utask; + unsigned long *sr = scratch_reg(auprobe, regs); - /* - * The original instruction includes a displacement, and so - * is 4 bytes longer than what we've just single-stepped. - * Caller may need to apply other fixups to handle stuff - * like "jmpq *...(%rip)" and "callq *...(%rip)". - */ - if (correction) - *correction += 4; + utask->autask.saved_scratch_register = *sr; + *sr = utask->vaddr + auprobe->defparam.ilen; } } -static int validate_insn_64bits(struct arch_uprobe *auprobe, struct insn *insn) +static void riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) { - insn_init(insn, auprobe->insn, true); - - /* Skip good instruction prefixes; reject "bad" ones. */ - insn_get_opcode(insn); - if (is_prefix_bad(insn)) - return -ENOTSUPP; + if (auprobe->defparam.fixups & UPROBE_FIX_RIP_MASK) { + struct uprobe_task *utask = current->utask; + unsigned long *sr = scratch_reg(auprobe, regs); - if (test_bit(OPCODE1(insn), (unsigned long *)good_insns_64)) - return 0; - - if (insn->opcode.nbytes == 2) { - if (test_bit(OPCODE2(insn), (unsigned long *)good_2byte_insns)) - return 0; + *sr = utask->autask.saved_scratch_register; } - return -ENOTSUPP; } - -static int validate_insn_bits(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn) +#else /* 32-bit: */ +static inline bool is_64bit_mm(struct mm_struct *mm) { - if (mm->context.ia32_compat) - return validate_insn_32bits(auprobe, insn); - return validate_insn_64bits(auprobe, insn); + return false; } -#else /* 32-bit: */ /* * No RIP-relative addressing on 32-bit */ -static void handle_riprel_insn(struct arch_uprobe *auprobe, struct insn *insn) +static void riprel_analyze(struct arch_uprobe *auprobe, struct insn *insn) { } -static void pre_xol_rip_insn(struct arch_uprobe *auprobe, struct pt_regs *regs, - struct arch_uprobe_task *autask) +static void riprel_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) { } -static void handle_riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs, - long *correction) +static void riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) { } - -static int validate_insn_bits(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn) -{ - return validate_insn_32bits(auprobe, insn); -} #endif /* CONFIG_X86_64 */ struct uprobe_xol_ops { bool (*emulate)(struct arch_uprobe *, struct pt_regs *); int (*pre_xol)(struct arch_uprobe *, struct pt_regs *); int (*post_xol)(struct arch_uprobe *, struct pt_regs *); + void (*abort)(struct arch_uprobe *, struct pt_regs *); }; static inline int sizeof_long(void) @@ -415,50 +462,67 @@ static inline int sizeof_long(void) static int default_pre_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs) { - pre_xol_rip_insn(auprobe, regs, ¤t->utask->autask); + riprel_pre_xol(auprobe, regs); return 0; } -/* - * Adjust the return address pushed by a call insn executed out of line. - */ -static int adjust_ret_addr(unsigned long sp, long correction) +static int push_ret_address(struct pt_regs *regs, unsigned long ip) { - int rasize = sizeof_long(); - long ra; - - if (copy_from_user(&ra, (void __user *)sp, rasize)) - return -EFAULT; + unsigned long new_sp = regs->sp - sizeof_long(); - ra += correction; - if (copy_to_user((void __user *)sp, &ra, rasize)) + if (copy_to_user((void __user *)new_sp, &ip, sizeof_long())) return -EFAULT; + regs->sp = new_sp; return 0; } +/* + * We have to fix things up as follows: + * + * Typically, the new ip is relative to the copied instruction. We need + * to make it relative to the original instruction (FIX_IP). Exceptions + * are return instructions and absolute or indirect jump or call instructions. + * + * If the single-stepped instruction was a call, the return address that + * is atop the stack is the address following the copied instruction. We + * need to make it the address following the original instruction (FIX_CALL). + * + * If the original instruction was a rip-relative instruction such as + * "movl %edx,0xnnnn(%rip)", we have instead executed an equivalent + * instruction using a scratch register -- e.g., "movl %edx,0xnnnn(%rsi)". + * We need to restore the contents of the scratch register + * (FIX_RIP_reg). + */ static int default_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs) { struct uprobe_task *utask = current->utask; - long correction = (long)(utask->vaddr - utask->xol_vaddr); - handle_riprel_post_xol(auprobe, regs, &correction); - if (auprobe->fixups & UPROBE_FIX_IP) + riprel_post_xol(auprobe, regs); + if (auprobe->defparam.fixups & UPROBE_FIX_IP) { + long correction = utask->vaddr - utask->xol_vaddr; regs->ip += correction; - - if (auprobe->fixups & UPROBE_FIX_CALL) { - if (adjust_ret_addr(regs->sp, correction)) { - regs->sp += sizeof_long(); + } else if (auprobe->defparam.fixups & UPROBE_FIX_CALL) { + regs->sp += sizeof_long(); /* Pop incorrect return address */ + if (push_ret_address(regs, utask->vaddr + auprobe->defparam.ilen)) return -ERESTART; - } } + /* popf; tell the caller to not touch TF */ + if (auprobe->defparam.fixups & UPROBE_FIX_SETF) + utask->autask.saved_tf = true; return 0; } +static void default_abort_op(struct arch_uprobe *auprobe, struct pt_regs *regs) +{ + riprel_post_xol(auprobe, regs); +} + static struct uprobe_xol_ops default_xol_ops = { .pre_xol = default_pre_xol_op, .post_xol = default_post_xol_op, + .abort = default_abort_op, }; static bool branch_is_call(struct arch_uprobe *auprobe) @@ -520,7 +584,6 @@ static bool branch_emulate_op(struct arch_uprobe *auprobe, struct pt_regs *regs) unsigned long offs = (long)auprobe->branch.offs; if (branch_is_call(auprobe)) { - unsigned long new_sp = regs->sp - sizeof_long(); /* * If it fails we execute this (mangled, see the comment in * branch_clear_offset) insn out-of-line. In the likely case @@ -530,9 +593,8 @@ static bool branch_emulate_op(struct arch_uprobe *auprobe, struct pt_regs *regs) * * But there is corner case, see the comment in ->post_xol(). */ - if (copy_to_user((void __user *)new_sp, &new_ip, sizeof_long())) + if (push_ret_address(regs, new_ip)) return false; - regs->sp = new_sp; } else if (!check_jmp_cond(auprobe, regs)) { offs = 0; } @@ -583,11 +645,7 @@ static struct uprobe_xol_ops branch_xol_ops = { static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn) { u8 opc1 = OPCODE1(insn); - - /* has the side-effect of processing the entire instruction */ - insn_get_length(insn); - if (WARN_ON_ONCE(!insn_complete(insn))) - return -ENOEXEC; + int i; switch (opc1) { case 0xeb: /* jmp 8 */ @@ -612,6 +670,16 @@ static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn) return -ENOSYS; } + /* + * 16-bit overrides such as CALLW (66 e8 nn nn) are not supported. + * Intel and AMD behavior differ in 64-bit mode: Intel ignores 66 prefix. + * No one uses these insns, reject any branch insns with such prefix. + */ + for (i = 0; i < insn->prefixes.nbytes; i++) { + if (insn->prefixes.bytes[i] == 0x66) + return -ENOTSUPP; + } + auprobe->branch.opc1 = opc1; auprobe->branch.ilen = insn->length; auprobe->branch.offs = insn->immediate.value; @@ -630,10 +698,10 @@ static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn) int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long addr) { struct insn insn; - bool fix_ip = true, fix_call = false; + u8 fix_ip_or_call = UPROBE_FIX_IP; int ret; - ret = validate_insn_bits(auprobe, mm, &insn); + ret = uprobe_init_insn(auprobe, &insn, is_64bit_mm(mm)); if (ret) return ret; @@ -642,44 +710,39 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, return ret; /* - * Figure out which fixups arch_uprobe_post_xol() will need to perform, - * and annotate arch_uprobe->fixups accordingly. To start with, ->fixups - * is either zero or it reflects rip-related fixups. + * Figure out which fixups default_post_xol_op() will need to perform, + * and annotate defparam->fixups accordingly. */ switch (OPCODE1(&insn)) { case 0x9d: /* popf */ - auprobe->fixups |= UPROBE_FIX_SETF; + auprobe->defparam.fixups |= UPROBE_FIX_SETF; break; case 0xc3: /* ret or lret -- ip is correct */ case 0xcb: case 0xc2: case 0xca: - fix_ip = false; + case 0xea: /* jmp absolute -- ip is correct */ + fix_ip_or_call = 0; break; case 0x9a: /* call absolute - Fix return addr, not ip */ - fix_call = true; - fix_ip = false; - break; - case 0xea: /* jmp absolute -- ip is correct */ - fix_ip = false; + fix_ip_or_call = UPROBE_FIX_CALL; break; case 0xff: - insn_get_modrm(&insn); switch (MODRM_REG(&insn)) { case 2: case 3: /* call or lcall, indirect */ - fix_call = true; + fix_ip_or_call = UPROBE_FIX_CALL; + break; case 4: case 5: /* jmp or ljmp, indirect */ - fix_ip = false; + fix_ip_or_call = 0; + break; } /* fall through */ default: - handle_riprel_insn(auprobe, &insn); + riprel_analyze(auprobe, &insn); } - if (fix_ip) - auprobe->fixups |= UPROBE_FIX_IP; - if (fix_call) - auprobe->fixups |= UPROBE_FIX_CALL; + auprobe->defparam.ilen = insn.length; + auprobe->defparam.fixups |= fix_ip_or_call; auprobe->ops = &default_xol_ops; return 0; @@ -694,6 +757,12 @@ int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) { struct uprobe_task *utask = current->utask; + if (auprobe->ops->pre_xol) { + int err = auprobe->ops->pre_xol(auprobe, regs); + if (err) + return err; + } + regs->ip = utask->xol_vaddr; utask->autask.saved_trap_nr = current->thread.trap_nr; current->thread.trap_nr = UPROBE_TRAP_NR; @@ -703,8 +772,6 @@ int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) if (test_tsk_thread_flag(current, TIF_BLOCKSTEP)) set_task_blockstep(current, false); - if (auprobe->ops->pre_xol) - return auprobe->ops->pre_xol(auprobe, regs); return 0; } @@ -732,56 +799,42 @@ bool arch_uprobe_xol_was_trapped(struct task_struct *t) * single-step, we single-stepped a copy of the instruction. * * This function prepares to resume execution after the single-step. - * We have to fix things up as follows: - * - * Typically, the new ip is relative to the copied instruction. We need - * to make it relative to the original instruction (FIX_IP). Exceptions - * are return instructions and absolute or indirect jump or call instructions. - * - * If the single-stepped instruction was a call, the return address that - * is atop the stack is the address following the copied instruction. We - * need to make it the address following the original instruction (FIX_CALL). - * - * If the original instruction was a rip-relative instruction such as - * "movl %edx,0xnnnn(%rip)", we have instead executed an equivalent - * instruction using a scratch register -- e.g., "movl %edx,(%rax)". - * We need to restore the contents of the scratch register and adjust - * the ip, keeping in mind that the instruction we executed is 4 bytes - * shorter than the original instruction (since we squeezed out the offset - * field). (FIX_RIP_AX or FIX_RIP_CX) */ int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) { struct uprobe_task *utask = current->utask; + bool send_sigtrap = utask->autask.saved_tf; + int err = 0; WARN_ON_ONCE(current->thread.trap_nr != UPROBE_TRAP_NR); + current->thread.trap_nr = utask->autask.saved_trap_nr; if (auprobe->ops->post_xol) { - int err = auprobe->ops->post_xol(auprobe, regs); + err = auprobe->ops->post_xol(auprobe, regs); if (err) { - arch_uprobe_abort_xol(auprobe, regs); /* - * Restart the probed insn. ->post_xol() must ensure - * this is really possible if it returns -ERESTART. + * Restore ->ip for restart or post mortem analysis. + * ->post_xol() must not return -ERESTART unless this + * is really possible. */ + regs->ip = utask->vaddr; if (err == -ERESTART) - return 0; - return err; + err = 0; + send_sigtrap = false; } } - - current->thread.trap_nr = utask->autask.saved_trap_nr; /* * arch_uprobe_pre_xol() doesn't save the state of TIF_BLOCKSTEP * so we can get an extra SIGTRAP if we do not clear TF. We need * to examine the opcode to make it right. */ - if (utask->autask.saved_tf) + if (send_sigtrap) send_sig(SIGTRAP, current, 0); - else if (!(auprobe->fixups & UPROBE_FIX_SETF)) + + if (!utask->autask.saved_tf) regs->flags &= ~X86_EFLAGS_TF; - return 0; + return err; } /* callback routine for handling exceptions. */ @@ -815,18 +868,18 @@ int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val, /* * This function gets called when XOL instruction either gets trapped or - * the thread has a fatal signal, or if arch_uprobe_post_xol() failed. - * Reset the instruction pointer to its probed address for the potential - * restart or for post mortem analysis. + * the thread has a fatal signal. Reset the instruction pointer to its + * probed address for the potential restart or for post mortem analysis. */ void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) { struct uprobe_task *utask = current->utask; - current->thread.trap_nr = utask->autask.saved_trap_nr; - handle_riprel_post_xol(auprobe, regs, NULL); - instruction_pointer_set(regs, utask->vaddr); + if (auprobe->ops->abort) + auprobe->ops->abort(auprobe, regs); + current->thread.trap_nr = utask->autask.saved_trap_nr; + regs->ip = utask->vaddr; /* clear TF if it was set by us in arch_uprobe_pre_xol() */ if (!utask->autask.saved_tf) regs->flags &= ~X86_EFLAGS_TF; diff --git a/arch/x86/lib/thunk_32.S b/arch/x86/lib/thunk_32.S index 2930ae0..28f85c91 100644 --- a/arch/x86/lib/thunk_32.S +++ b/arch/x86/lib/thunk_32.S @@ -4,8 +4,8 @@ * (inspired by Andi Kleen's thunk_64.S) * Subject to the GNU public license, v.2. No warranty of any kind. */ - #include <linux/linkage.h> + #include <asm/asm.h> #ifdef CONFIG_TRACE_IRQFLAGS /* put return address in eax (arg1) */ @@ -22,6 +22,7 @@ popl %ecx popl %eax ret + _ASM_NOKPROBE(\name) .endm thunk_ra trace_hardirqs_on_thunk,trace_hardirqs_on_caller diff --git a/arch/x86/lib/thunk_64.S b/arch/x86/lib/thunk_64.S index a63efd6..92d9fea 100644 --- a/arch/x86/lib/thunk_64.S +++ b/arch/x86/lib/thunk_64.S @@ -8,6 +8,7 @@ #include <linux/linkage.h> #include <asm/dwarf2.h> #include <asm/calling.h> +#include <asm/asm.h> /* rdi: arg1 ... normal C conventions. rax is saved/restored. */ .macro THUNK name, func, put_ret_addr_in_rdi=0 @@ -25,6 +26,7 @@ call \func jmp restore CFI_ENDPROC + _ASM_NOKPROBE(\name) .endm #ifdef CONFIG_TRACE_IRQFLAGS @@ -43,3 +45,4 @@ restore: RESTORE_ARGS ret CFI_ENDPROC + _ASM_NOKPROBE(restore) diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 858b47b..36642793 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -8,7 +8,7 @@ #include <linux/kdebug.h> /* oops_begin/end, ... */ #include <linux/module.h> /* search_exception_table */ #include <linux/bootmem.h> /* max_low_pfn */ -#include <linux/kprobes.h> /* __kprobes, ... */ +#include <linux/kprobes.h> /* NOKPROBE_SYMBOL, ... */ #include <linux/mmiotrace.h> /* kmmio_handler, ... */ #include <linux/perf_event.h> /* perf_sw_event */ #include <linux/hugetlb.h> /* hstate_index_to_shift */ @@ -46,7 +46,7 @@ enum x86_pf_error_code { * Returns 0 if mmiotrace is disabled, or if the fault is not * handled by mmiotrace: */ -static inline int __kprobes +static nokprobe_inline int kmmio_fault(struct pt_regs *regs, unsigned long addr) { if (unlikely(is_kmmio_active())) @@ -55,7 +55,7 @@ kmmio_fault(struct pt_regs *regs, unsigned long addr) return 0; } -static inline int __kprobes kprobes_fault(struct pt_regs *regs) +static nokprobe_inline int kprobes_fault(struct pt_regs *regs) { int ret = 0; @@ -262,7 +262,7 @@ void vmalloc_sync_all(void) * * Handle a fault on the vmalloc or module mapping area */ -static noinline __kprobes int vmalloc_fault(unsigned long address) +static noinline int vmalloc_fault(unsigned long address) { unsigned long pgd_paddr; pmd_t *pmd_k; @@ -292,6 +292,7 @@ static noinline __kprobes int vmalloc_fault(unsigned long address) return 0; } +NOKPROBE_SYMBOL(vmalloc_fault); /* * Did it hit the DOS screen memory VA from vm86 mode? @@ -359,7 +360,7 @@ void vmalloc_sync_all(void) * * This assumes no large pages in there. */ -static noinline __kprobes int vmalloc_fault(unsigned long address) +static noinline int vmalloc_fault(unsigned long address) { pgd_t *pgd, *pgd_ref; pud_t *pud, *pud_ref; @@ -426,6 +427,7 @@ static noinline __kprobes int vmalloc_fault(unsigned long address) return 0; } +NOKPROBE_SYMBOL(vmalloc_fault); #ifdef CONFIG_CPU_SUP_AMD static const char errata93_warning[] = @@ -928,7 +930,7 @@ static int spurious_fault_check(unsigned long error_code, pte_t *pte) * There are no security implications to leaving a stale TLB when * increasing the permissions on a page. */ -static noinline __kprobes int +static noinline int spurious_fault(unsigned long error_code, unsigned long address) { pgd_t *pgd; @@ -976,6 +978,7 @@ spurious_fault(unsigned long error_code, unsigned long address) return ret; } +NOKPROBE_SYMBOL(spurious_fault); int show_unhandled_signals = 1; @@ -1031,7 +1034,7 @@ static inline bool smap_violation(int error_code, struct pt_regs *regs) * {,trace_}do_page_fault() have notrace on. Having this an actual function * guarantees there's a function trace entry. */ -static void __kprobes noinline +static noinline void __do_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address) { @@ -1254,8 +1257,9 @@ good_area: up_read(&mm->mmap_sem); } +NOKPROBE_SYMBOL(__do_page_fault); -dotraplinkage void __kprobes notrace +dotraplinkage void notrace do_page_fault(struct pt_regs *regs, unsigned long error_code) { unsigned long address = read_cr2(); /* Get the faulting address */ @@ -1273,10 +1277,12 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code) __do_page_fault(regs, error_code, address); exception_exit(prev_state); } +NOKPROBE_SYMBOL(do_page_fault); #ifdef CONFIG_TRACING -static void trace_page_fault_entries(unsigned long address, struct pt_regs *regs, - unsigned long error_code) +static nokprobe_inline void +trace_page_fault_entries(unsigned long address, struct pt_regs *regs, + unsigned long error_code) { if (user_mode(regs)) trace_page_fault_user(address, regs, error_code); @@ -1284,7 +1290,7 @@ static void trace_page_fault_entries(unsigned long address, struct pt_regs *regs trace_page_fault_kernel(address, regs, error_code); } -dotraplinkage void __kprobes notrace +dotraplinkage void notrace trace_do_page_fault(struct pt_regs *regs, unsigned long error_code) { /* @@ -1301,4 +1307,5 @@ trace_do_page_fault(struct pt_regs *regs, unsigned long error_code) __do_page_fault(regs, error_code, address); exception_exit(prev_state); } +NOKPROBE_SYMBOL(trace_do_page_fault); #endif /* CONFIG_TRACING */ diff --git a/arch/x86/net/bpf_jit.S b/arch/x86/net/bpf_jit.S index 0149575..6440221 100644 --- a/arch/x86/net/bpf_jit.S +++ b/arch/x86/net/bpf_jit.S @@ -12,13 +12,16 @@ /* * Calling convention : - * rdi : skb pointer + * rbx : skb pointer (callee saved) * esi : offset of byte(s) to fetch in skb (can be scratched) - * r8 : copy of skb->data + * r10 : copy of skb->data * r9d : hlen = skb->len - skb->data_len */ -#define SKBDATA %r8 +#define SKBDATA %r10 #define SKF_MAX_NEG_OFF $(-0x200000) /* SKF_LL_OFF from filter.h */ +#define MAX_BPF_STACK (512 /* from filter.h */ + \ + 32 /* space for rbx,r13,r14,r15 */ + \ + 8 /* space for skb_copy_bits */) sk_load_word: .globl sk_load_word @@ -68,53 +71,31 @@ sk_load_byte_positive_offset: movzbl (SKBDATA,%rsi),%eax ret -/** - * sk_load_byte_msh - BPF_S_LDX_B_MSH helper - * - * Implements BPF_S_LDX_B_MSH : ldxb 4*([offset]&0xf) - * Must preserve A accumulator (%eax) - * Inputs : %esi is the offset value - */ -sk_load_byte_msh: - .globl sk_load_byte_msh - test %esi,%esi - js bpf_slow_path_byte_msh_neg - -sk_load_byte_msh_positive_offset: - .globl sk_load_byte_msh_positive_offset - cmp %esi,%r9d /* if (offset >= hlen) goto bpf_slow_path_byte_msh */ - jle bpf_slow_path_byte_msh - movzbl (SKBDATA,%rsi),%ebx - and $15,%bl - shl $2,%bl - ret - /* rsi contains offset and can be scratched */ #define bpf_slow_path_common(LEN) \ - push %rdi; /* save skb */ \ + mov %rbx, %rdi; /* arg1 == skb */ \ push %r9; \ push SKBDATA; \ /* rsi already has offset */ \ mov $LEN,%ecx; /* len */ \ - lea -12(%rbp),%rdx; \ + lea - MAX_BPF_STACK + 32(%rbp),%rdx; \ call skb_copy_bits; \ test %eax,%eax; \ pop SKBDATA; \ - pop %r9; \ - pop %rdi + pop %r9; bpf_slow_path_word: bpf_slow_path_common(4) js bpf_error - mov -12(%rbp),%eax + mov - MAX_BPF_STACK + 32(%rbp),%eax bswap %eax ret bpf_slow_path_half: bpf_slow_path_common(2) js bpf_error - mov -12(%rbp),%ax + mov - MAX_BPF_STACK + 32(%rbp),%ax rol $8,%ax movzwl %ax,%eax ret @@ -122,21 +103,11 @@ bpf_slow_path_half: bpf_slow_path_byte: bpf_slow_path_common(1) js bpf_error - movzbl -12(%rbp),%eax - ret - -bpf_slow_path_byte_msh: - xchg %eax,%ebx /* dont lose A , X is about to be scratched */ - bpf_slow_path_common(1) - js bpf_error - movzbl -12(%rbp),%eax - and $15,%al - shl $2,%al - xchg %eax,%ebx + movzbl - MAX_BPF_STACK + 32(%rbp),%eax ret #define sk_negative_common(SIZE) \ - push %rdi; /* save skb */ \ + mov %rbx, %rdi; /* arg1 == skb */ \ push %r9; \ push SKBDATA; \ /* rsi already has offset */ \ @@ -145,10 +116,8 @@ bpf_slow_path_byte_msh: test %rax,%rax; \ pop SKBDATA; \ pop %r9; \ - pop %rdi; \ jz bpf_error - bpf_slow_path_word_neg: cmp SKF_MAX_NEG_OFF, %esi /* test range */ jl bpf_error /* offset lower -> error */ @@ -179,22 +148,12 @@ sk_load_byte_negative_offset: movzbl (%rax), %eax ret -bpf_slow_path_byte_msh_neg: - cmp SKF_MAX_NEG_OFF, %esi - jl bpf_error -sk_load_byte_msh_negative_offset: - .globl sk_load_byte_msh_negative_offset - xchg %eax,%ebx /* dont lose A , X is about to be scratched */ - sk_negative_common(1) - movzbl (%rax),%eax - and $15,%al - shl $2,%al - xchg %eax,%ebx - ret - bpf_error: # force a return 0 from jit handler - xor %eax,%eax - mov -8(%rbp),%rbx + xor %eax,%eax + mov - MAX_BPF_STACK(%rbp),%rbx + mov - MAX_BPF_STACK + 8(%rbp),%r13 + mov - MAX_BPF_STACK + 16(%rbp),%r14 + mov - MAX_BPF_STACK + 24(%rbp),%r15 leaveq ret diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 6d5663a..99bef86 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -1,6 +1,7 @@ /* bpf_jit_comp.c : BPF JIT compiler * * Copyright (C) 2011-2013 Eric Dumazet (eric.dumazet@gmail.com) + * Internal BPF Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -14,28 +15,16 @@ #include <linux/if_vlan.h> #include <linux/random.h> -/* - * Conventions : - * EAX : BPF A accumulator - * EBX : BPF X accumulator - * RDI : pointer to skb (first argument given to JIT function) - * RBP : frame pointer (even if CONFIG_FRAME_POINTER=n) - * ECX,EDX,ESI : scratch registers - * r9d : skb->len - skb->data_len (headlen) - * r8 : skb->data - * -8(RBP) : saved RBX value - * -16(RBP)..-80(RBP) : BPF_MEMWORDS values - */ int bpf_jit_enable __read_mostly; /* * assembly code in arch/x86/net/bpf_jit.S */ -extern u8 sk_load_word[], sk_load_half[], sk_load_byte[], sk_load_byte_msh[]; +extern u8 sk_load_word[], sk_load_half[], sk_load_byte[]; extern u8 sk_load_word_positive_offset[], sk_load_half_positive_offset[]; -extern u8 sk_load_byte_positive_offset[], sk_load_byte_msh_positive_offset[]; +extern u8 sk_load_byte_positive_offset[]; extern u8 sk_load_word_negative_offset[], sk_load_half_negative_offset[]; -extern u8 sk_load_byte_negative_offset[], sk_load_byte_msh_negative_offset[]; +extern u8 sk_load_byte_negative_offset[]; static inline u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len) { @@ -56,30 +45,44 @@ static inline u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len) #define EMIT2(b1, b2) EMIT((b1) + ((b2) << 8), 2) #define EMIT3(b1, b2, b3) EMIT((b1) + ((b2) << 8) + ((b3) << 16), 3) #define EMIT4(b1, b2, b3, b4) EMIT((b1) + ((b2) << 8) + ((b3) << 16) + ((b4) << 24), 4) -#define EMIT1_off32(b1, off) do { EMIT1(b1); EMIT(off, 4);} while (0) - -#define CLEAR_A() EMIT2(0x31, 0xc0) /* xor %eax,%eax */ -#define CLEAR_X() EMIT2(0x31, 0xdb) /* xor %ebx,%ebx */ +#define EMIT1_off32(b1, off) \ + do {EMIT1(b1); EMIT(off, 4); } while (0) +#define EMIT2_off32(b1, b2, off) \ + do {EMIT2(b1, b2); EMIT(off, 4); } while (0) +#define EMIT3_off32(b1, b2, b3, off) \ + do {EMIT3(b1, b2, b3); EMIT(off, 4); } while (0) +#define EMIT4_off32(b1, b2, b3, b4, off) \ + do {EMIT4(b1, b2, b3, b4); EMIT(off, 4); } while (0) static inline bool is_imm8(int value) { return value <= 127 && value >= -128; } -static inline bool is_near(int offset) +static inline bool is_simm32(s64 value) { - return offset <= 127 && offset >= -128; + return value == (s64) (s32) value; } -#define EMIT_JMP(offset) \ -do { \ - if (offset) { \ - if (is_near(offset)) \ - EMIT2(0xeb, offset); /* jmp .+off8 */ \ - else \ - EMIT1_off32(0xe9, offset); /* jmp .+off32 */ \ - } \ -} while (0) +/* mov dst, src */ +#define EMIT_mov(DST, SRC) \ + do {if (DST != SRC) \ + EMIT3(add_2mod(0x48, DST, SRC), 0x89, add_2reg(0xC0, DST, SRC)); \ + } while (0) + +static int bpf_size_to_x86_bytes(int bpf_size) +{ + if (bpf_size == BPF_W) + return 4; + else if (bpf_size == BPF_H) + return 2; + else if (bpf_size == BPF_B) + return 1; + else if (bpf_size == BPF_DW) + return 4; /* imm32 */ + else + return 0; +} /* list of x86 cond jumps opcodes (. + s8) * Add 0x10 (and an extra 0x0f) to generate far jumps (. + s32) @@ -90,27 +93,8 @@ do { \ #define X86_JNE 0x75 #define X86_JBE 0x76 #define X86_JA 0x77 - -#define EMIT_COND_JMP(op, offset) \ -do { \ - if (is_near(offset)) \ - EMIT2(op, offset); /* jxx .+off8 */ \ - else { \ - EMIT2(0x0f, op + 0x10); \ - EMIT(offset, 4); /* jxx .+off32 */ \ - } \ -} while (0) - -#define COND_SEL(CODE, TOP, FOP) \ - case CODE: \ - t_op = TOP; \ - f_op = FOP; \ - goto cond_branch - - -#define SEEN_DATAREF 1 /* might call external helpers */ -#define SEEN_XREG 2 /* ebx is used */ -#define SEEN_MEM 4 /* use mem[] for temporary storage */ +#define X86_JGE 0x7D +#define X86_JG 0x7F static inline void bpf_flush_icache(void *start, void *end) { @@ -125,26 +109,6 @@ static inline void bpf_flush_icache(void *start, void *end) #define CHOOSE_LOAD_FUNC(K, func) \ ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset) -/* Helper to find the offset of pkt_type in sk_buff - * We want to make sure its still a 3bit field starting at a byte boundary. - */ -#define PKT_TYPE_MAX 7 -static int pkt_type_offset(void) -{ - struct sk_buff skb_probe = { - .pkt_type = ~0, - }; - char *ct = (char *)&skb_probe; - unsigned int off; - - for (off = 0; off < sizeof(struct sk_buff); off++) { - if (ct[off] == PKT_TYPE_MAX) - return off; - } - pr_err_once("Please fix pkt_type_offset(), as pkt_type couldn't be found\n"); - return -1; -} - struct bpf_binary_header { unsigned int pages; /* Note : for security reasons, bpf code will follow a randomly @@ -178,583 +142,771 @@ static struct bpf_binary_header *bpf_alloc_binary(unsigned int proglen, return header; } -void bpf_jit_compile(struct sk_filter *fp) +/* pick a register outside of BPF range for JIT internal work */ +#define AUX_REG (MAX_BPF_REG + 1) + +/* the following table maps BPF registers to x64 registers. + * x64 register r12 is unused, since if used as base address register + * in load/store instructions, it always needs an extra byte of encoding + */ +static const int reg2hex[] = { + [BPF_REG_0] = 0, /* rax */ + [BPF_REG_1] = 7, /* rdi */ + [BPF_REG_2] = 6, /* rsi */ + [BPF_REG_3] = 2, /* rdx */ + [BPF_REG_4] = 1, /* rcx */ + [BPF_REG_5] = 0, /* r8 */ + [BPF_REG_6] = 3, /* rbx callee saved */ + [BPF_REG_7] = 5, /* r13 callee saved */ + [BPF_REG_8] = 6, /* r14 callee saved */ + [BPF_REG_9] = 7, /* r15 callee saved */ + [BPF_REG_FP] = 5, /* rbp readonly */ + [AUX_REG] = 3, /* r11 temp register */ +}; + +/* is_ereg() == true if BPF register 'reg' maps to x64 r8..r15 + * which need extra byte of encoding. + * rax,rcx,...,rbp have simpler encoding + */ +static inline bool is_ereg(u32 reg) { - u8 temp[64]; - u8 *prog; - unsigned int proglen, oldproglen = 0; - int ilen, i; - int t_offset, f_offset; - u8 t_op, f_op, seen = 0, pass; - u8 *image = NULL; - struct bpf_binary_header *header = NULL; - u8 *func; - int pc_ret0 = -1; /* bpf index of first RET #0 instruction (if any) */ - unsigned int cleanup_addr; /* epilogue code offset */ - unsigned int *addrs; - const struct sock_filter *filter = fp->insns; - int flen = fp->len; + if (reg == BPF_REG_5 || reg == AUX_REG || + (reg >= BPF_REG_7 && reg <= BPF_REG_9)) + return true; + else + return false; +} - if (!bpf_jit_enable) - return; +/* add modifiers if 'reg' maps to x64 registers r8..r15 */ +static inline u8 add_1mod(u8 byte, u32 reg) +{ + if (is_ereg(reg)) + byte |= 1; + return byte; +} - addrs = kmalloc(flen * sizeof(*addrs), GFP_KERNEL); - if (addrs == NULL) - return; +static inline u8 add_2mod(u8 byte, u32 r1, u32 r2) +{ + if (is_ereg(r1)) + byte |= 1; + if (is_ereg(r2)) + byte |= 4; + return byte; +} - /* Before first pass, make a rough estimation of addrs[] - * each bpf instruction is translated to less than 64 bytes +/* encode 'dst_reg' register into x64 opcode 'byte' */ +static inline u8 add_1reg(u8 byte, u32 dst_reg) +{ + return byte + reg2hex[dst_reg]; +} + +/* encode 'dst_reg' and 'src_reg' registers into x64 opcode 'byte' */ +static inline u8 add_2reg(u8 byte, u32 dst_reg, u32 src_reg) +{ + return byte + reg2hex[dst_reg] + (reg2hex[src_reg] << 3); +} + +struct jit_context { + unsigned int cleanup_addr; /* epilogue code offset */ + bool seen_ld_abs; +}; + +static int do_jit(struct sk_filter *bpf_prog, int *addrs, u8 *image, + int oldproglen, struct jit_context *ctx) +{ + struct sock_filter_int *insn = bpf_prog->insnsi; + int insn_cnt = bpf_prog->len; + u8 temp[64]; + int i; + int proglen = 0; + u8 *prog = temp; + int stacksize = MAX_BPF_STACK + + 32 /* space for rbx, r13, r14, r15 */ + + 8 /* space for skb_copy_bits() buffer */; + + EMIT1(0x55); /* push rbp */ + EMIT3(0x48, 0x89, 0xE5); /* mov rbp,rsp */ + + /* sub rsp, stacksize */ + EMIT3_off32(0x48, 0x81, 0xEC, stacksize); + + /* all classic BPF filters use R6(rbx) save it */ + + /* mov qword ptr [rbp-X],rbx */ + EMIT3_off32(0x48, 0x89, 0x9D, -stacksize); + + /* sk_convert_filter() maps classic BPF register X to R7 and uses R8 + * as temporary, so all tcpdump filters need to spill/fill R7(r13) and + * R8(r14). R9(r15) spill could be made conditional, but there is only + * one 'bpf_error' return path out of helper functions inside bpf_jit.S + * The overhead of extra spill is negligible for any filter other + * than synthetic ones. Therefore not worth adding complexity. */ - for (proglen = 0, i = 0; i < flen; i++) { - proglen += 64; - addrs[i] = proglen; + + /* mov qword ptr [rbp-X],r13 */ + EMIT3_off32(0x4C, 0x89, 0xAD, -stacksize + 8); + /* mov qword ptr [rbp-X],r14 */ + EMIT3_off32(0x4C, 0x89, 0xB5, -stacksize + 16); + /* mov qword ptr [rbp-X],r15 */ + EMIT3_off32(0x4C, 0x89, 0xBD, -stacksize + 24); + + /* clear A and X registers */ + EMIT2(0x31, 0xc0); /* xor eax, eax */ + EMIT3(0x4D, 0x31, 0xED); /* xor r13, r13 */ + + if (ctx->seen_ld_abs) { + /* r9d : skb->len - skb->data_len (headlen) + * r10 : skb->data + */ + if (is_imm8(offsetof(struct sk_buff, len))) + /* mov %r9d, off8(%rdi) */ + EMIT4(0x44, 0x8b, 0x4f, + offsetof(struct sk_buff, len)); + else + /* mov %r9d, off32(%rdi) */ + EMIT3_off32(0x44, 0x8b, 0x8f, + offsetof(struct sk_buff, len)); + + if (is_imm8(offsetof(struct sk_buff, data_len))) + /* sub %r9d, off8(%rdi) */ + EMIT4(0x44, 0x2b, 0x4f, + offsetof(struct sk_buff, data_len)); + else + EMIT3_off32(0x44, 0x2b, 0x8f, + offsetof(struct sk_buff, data_len)); + + if (is_imm8(offsetof(struct sk_buff, data))) + /* mov %r10, off8(%rdi) */ + EMIT4(0x4c, 0x8b, 0x57, + offsetof(struct sk_buff, data)); + else + /* mov %r10, off32(%rdi) */ + EMIT3_off32(0x4c, 0x8b, 0x97, + offsetof(struct sk_buff, data)); } - cleanup_addr = proglen; /* epilogue address */ - for (pass = 0; pass < 10; pass++) { - u8 seen_or_pass0 = (pass == 0) ? (SEEN_XREG | SEEN_DATAREF | SEEN_MEM) : seen; - /* no prologue/epilogue for trivial filters (RET something) */ - proglen = 0; - prog = temp; + for (i = 0; i < insn_cnt; i++, insn++) { + const s32 imm32 = insn->imm; + u32 dst_reg = insn->dst_reg; + u32 src_reg = insn->src_reg; + u8 b1 = 0, b2 = 0, b3 = 0; + s64 jmp_offset; + u8 jmp_cond; + int ilen; + u8 *func; + + switch (insn->code) { + /* ALU */ + case BPF_ALU | BPF_ADD | BPF_X: + case BPF_ALU | BPF_SUB | BPF_X: + case BPF_ALU | BPF_AND | BPF_X: + case BPF_ALU | BPF_OR | BPF_X: + case BPF_ALU | BPF_XOR | BPF_X: + case BPF_ALU64 | BPF_ADD | BPF_X: + case BPF_ALU64 | BPF_SUB | BPF_X: + case BPF_ALU64 | BPF_AND | BPF_X: + case BPF_ALU64 | BPF_OR | BPF_X: + case BPF_ALU64 | BPF_XOR | BPF_X: + switch (BPF_OP(insn->code)) { + case BPF_ADD: b2 = 0x01; break; + case BPF_SUB: b2 = 0x29; break; + case BPF_AND: b2 = 0x21; break; + case BPF_OR: b2 = 0x09; break; + case BPF_XOR: b2 = 0x31; break; + } + if (BPF_CLASS(insn->code) == BPF_ALU64) + EMIT1(add_2mod(0x48, dst_reg, src_reg)); + else if (is_ereg(dst_reg) || is_ereg(src_reg)) + EMIT1(add_2mod(0x40, dst_reg, src_reg)); + EMIT2(b2, add_2reg(0xC0, dst_reg, src_reg)); + break; - if (seen_or_pass0) { - EMIT4(0x55, 0x48, 0x89, 0xe5); /* push %rbp; mov %rsp,%rbp */ - EMIT4(0x48, 0x83, 0xec, 96); /* subq $96,%rsp */ - /* note : must save %rbx in case bpf_error is hit */ - if (seen_or_pass0 & (SEEN_XREG | SEEN_DATAREF)) - EMIT4(0x48, 0x89, 0x5d, 0xf8); /* mov %rbx, -8(%rbp) */ - if (seen_or_pass0 & SEEN_XREG) - CLEAR_X(); /* make sure we dont leek kernel memory */ - - /* - * If this filter needs to access skb data, - * loads r9 and r8 with : - * r9 = skb->len - skb->data_len - * r8 = skb->data + /* mov dst, src */ + case BPF_ALU64 | BPF_MOV | BPF_X: + EMIT_mov(dst_reg, src_reg); + break; + + /* mov32 dst, src */ + case BPF_ALU | BPF_MOV | BPF_X: + if (is_ereg(dst_reg) || is_ereg(src_reg)) + EMIT1(add_2mod(0x40, dst_reg, src_reg)); + EMIT2(0x89, add_2reg(0xC0, dst_reg, src_reg)); + break; + + /* neg dst */ + case BPF_ALU | BPF_NEG: + case BPF_ALU64 | BPF_NEG: + if (BPF_CLASS(insn->code) == BPF_ALU64) + EMIT1(add_1mod(0x48, dst_reg)); + else if (is_ereg(dst_reg)) + EMIT1(add_1mod(0x40, dst_reg)); + EMIT2(0xF7, add_1reg(0xD8, dst_reg)); + break; + + case BPF_ALU | BPF_ADD | BPF_K: + case BPF_ALU | BPF_SUB | BPF_K: + case BPF_ALU | BPF_AND | BPF_K: + case BPF_ALU | BPF_OR | BPF_K: + case BPF_ALU | BPF_XOR | BPF_K: + case BPF_ALU64 | BPF_ADD | BPF_K: + case BPF_ALU64 | BPF_SUB | BPF_K: + case BPF_ALU64 | BPF_AND | BPF_K: + case BPF_ALU64 | BPF_OR | BPF_K: + case BPF_ALU64 | BPF_XOR | BPF_K: + if (BPF_CLASS(insn->code) == BPF_ALU64) + EMIT1(add_1mod(0x48, dst_reg)); + else if (is_ereg(dst_reg)) + EMIT1(add_1mod(0x40, dst_reg)); + + switch (BPF_OP(insn->code)) { + case BPF_ADD: b3 = 0xC0; break; + case BPF_SUB: b3 = 0xE8; break; + case BPF_AND: b3 = 0xE0; break; + case BPF_OR: b3 = 0xC8; break; + case BPF_XOR: b3 = 0xF0; break; + } + + if (is_imm8(imm32)) + EMIT3(0x83, add_1reg(b3, dst_reg), imm32); + else + EMIT2_off32(0x81, add_1reg(b3, dst_reg), imm32); + break; + + case BPF_ALU64 | BPF_MOV | BPF_K: + /* optimization: if imm32 is positive, + * use 'mov eax, imm32' (which zero-extends imm32) + * to save 2 bytes */ - if (seen_or_pass0 & SEEN_DATAREF) { - if (offsetof(struct sk_buff, len) <= 127) - /* mov off8(%rdi),%r9d */ - EMIT4(0x44, 0x8b, 0x4f, offsetof(struct sk_buff, len)); - else { - /* mov off32(%rdi),%r9d */ - EMIT3(0x44, 0x8b, 0x8f); - EMIT(offsetof(struct sk_buff, len), 4); - } - if (is_imm8(offsetof(struct sk_buff, data_len))) - /* sub off8(%rdi),%r9d */ - EMIT4(0x44, 0x2b, 0x4f, offsetof(struct sk_buff, data_len)); - else { - EMIT3(0x44, 0x2b, 0x8f); - EMIT(offsetof(struct sk_buff, data_len), 4); - } + if (imm32 < 0) { + /* 'mov rax, imm32' sign extends imm32 */ + b1 = add_1mod(0x48, dst_reg); + b2 = 0xC7; + b3 = 0xC0; + EMIT3_off32(b1, b2, add_1reg(b3, dst_reg), imm32); + break; + } - if (is_imm8(offsetof(struct sk_buff, data))) - /* mov off8(%rdi),%r8 */ - EMIT4(0x4c, 0x8b, 0x47, offsetof(struct sk_buff, data)); - else { - /* mov off32(%rdi),%r8 */ - EMIT3(0x4c, 0x8b, 0x87); - EMIT(offsetof(struct sk_buff, data), 4); - } + case BPF_ALU | BPF_MOV | BPF_K: + /* mov %eax, imm32 */ + if (is_ereg(dst_reg)) + EMIT1(add_1mod(0x40, dst_reg)); + EMIT1_off32(add_1reg(0xB8, dst_reg), imm32); + break; + + /* dst %= src, dst /= src, dst %= imm32, dst /= imm32 */ + case BPF_ALU | BPF_MOD | BPF_X: + case BPF_ALU | BPF_DIV | BPF_X: + case BPF_ALU | BPF_MOD | BPF_K: + case BPF_ALU | BPF_DIV | BPF_K: + case BPF_ALU64 | BPF_MOD | BPF_X: + case BPF_ALU64 | BPF_DIV | BPF_X: + case BPF_ALU64 | BPF_MOD | BPF_K: + case BPF_ALU64 | BPF_DIV | BPF_K: + EMIT1(0x50); /* push rax */ + EMIT1(0x52); /* push rdx */ + + if (BPF_SRC(insn->code) == BPF_X) + /* mov r11, src_reg */ + EMIT_mov(AUX_REG, src_reg); + else + /* mov r11, imm32 */ + EMIT3_off32(0x49, 0xC7, 0xC3, imm32); + + /* mov rax, dst_reg */ + EMIT_mov(BPF_REG_0, dst_reg); + + /* xor edx, edx + * equivalent to 'xor rdx, rdx', but one byte less + */ + EMIT2(0x31, 0xd2); + + if (BPF_SRC(insn->code) == BPF_X) { + /* if (src_reg == 0) return 0 */ + + /* cmp r11, 0 */ + EMIT4(0x49, 0x83, 0xFB, 0x00); + + /* jne .+9 (skip over pop, pop, xor and jmp) */ + EMIT2(X86_JNE, 1 + 1 + 2 + 5); + EMIT1(0x5A); /* pop rdx */ + EMIT1(0x58); /* pop rax */ + EMIT2(0x31, 0xc0); /* xor eax, eax */ + + /* jmp cleanup_addr + * addrs[i] - 11, because there are 11 bytes + * after this insn: div, mov, pop, pop, mov + */ + jmp_offset = ctx->cleanup_addr - (addrs[i] - 11); + EMIT1_off32(0xE9, jmp_offset); } - } - switch (filter[0].code) { - case BPF_S_RET_K: - case BPF_S_LD_W_LEN: - case BPF_S_ANC_PROTOCOL: - case BPF_S_ANC_IFINDEX: - case BPF_S_ANC_MARK: - case BPF_S_ANC_RXHASH: - case BPF_S_ANC_CPU: - case BPF_S_ANC_VLAN_TAG: - case BPF_S_ANC_VLAN_TAG_PRESENT: - case BPF_S_ANC_QUEUE: - case BPF_S_ANC_PKTTYPE: - case BPF_S_LD_W_ABS: - case BPF_S_LD_H_ABS: - case BPF_S_LD_B_ABS: - /* first instruction sets A register (or is RET 'constant') */ + if (BPF_CLASS(insn->code) == BPF_ALU64) + /* div r11 */ + EMIT3(0x49, 0xF7, 0xF3); + else + /* div r11d */ + EMIT3(0x41, 0xF7, 0xF3); + + if (BPF_OP(insn->code) == BPF_MOD) + /* mov r11, rdx */ + EMIT3(0x49, 0x89, 0xD3); + else + /* mov r11, rax */ + EMIT3(0x49, 0x89, 0xC3); + + EMIT1(0x5A); /* pop rdx */ + EMIT1(0x58); /* pop rax */ + + /* mov dst_reg, r11 */ + EMIT_mov(dst_reg, AUX_REG); break; - default: - /* make sure we dont leak kernel information to user */ - CLEAR_A(); /* A = 0 */ - } - for (i = 0; i < flen; i++) { - unsigned int K = filter[i].k; + case BPF_ALU | BPF_MUL | BPF_K: + case BPF_ALU | BPF_MUL | BPF_X: + case BPF_ALU64 | BPF_MUL | BPF_K: + case BPF_ALU64 | BPF_MUL | BPF_X: + EMIT1(0x50); /* push rax */ + EMIT1(0x52); /* push rdx */ + + /* mov r11, dst_reg */ + EMIT_mov(AUX_REG, dst_reg); + + if (BPF_SRC(insn->code) == BPF_X) + /* mov rax, src_reg */ + EMIT_mov(BPF_REG_0, src_reg); + else + /* mov rax, imm32 */ + EMIT3_off32(0x48, 0xC7, 0xC0, imm32); + + if (BPF_CLASS(insn->code) == BPF_ALU64) + EMIT1(add_1mod(0x48, AUX_REG)); + else if (is_ereg(AUX_REG)) + EMIT1(add_1mod(0x40, AUX_REG)); + /* mul(q) r11 */ + EMIT2(0xF7, add_1reg(0xE0, AUX_REG)); + + /* mov r11, rax */ + EMIT_mov(AUX_REG, BPF_REG_0); + + EMIT1(0x5A); /* pop rdx */ + EMIT1(0x58); /* pop rax */ + + /* mov dst_reg, r11 */ + EMIT_mov(dst_reg, AUX_REG); + break; - switch (filter[i].code) { - case BPF_S_ALU_ADD_X: /* A += X; */ - seen |= SEEN_XREG; - EMIT2(0x01, 0xd8); /* add %ebx,%eax */ - break; - case BPF_S_ALU_ADD_K: /* A += K; */ - if (!K) - break; - if (is_imm8(K)) - EMIT3(0x83, 0xc0, K); /* add imm8,%eax */ - else - EMIT1_off32(0x05, K); /* add imm32,%eax */ - break; - case BPF_S_ALU_SUB_X: /* A -= X; */ - seen |= SEEN_XREG; - EMIT2(0x29, 0xd8); /* sub %ebx,%eax */ - break; - case BPF_S_ALU_SUB_K: /* A -= K */ - if (!K) - break; - if (is_imm8(K)) - EMIT3(0x83, 0xe8, K); /* sub imm8,%eax */ - else - EMIT1_off32(0x2d, K); /* sub imm32,%eax */ - break; - case BPF_S_ALU_MUL_X: /* A *= X; */ - seen |= SEEN_XREG; - EMIT3(0x0f, 0xaf, 0xc3); /* imul %ebx,%eax */ - break; - case BPF_S_ALU_MUL_K: /* A *= K */ - if (is_imm8(K)) - EMIT3(0x6b, 0xc0, K); /* imul imm8,%eax,%eax */ - else { - EMIT2(0x69, 0xc0); /* imul imm32,%eax */ - EMIT(K, 4); - } - break; - case BPF_S_ALU_DIV_X: /* A /= X; */ - seen |= SEEN_XREG; - EMIT2(0x85, 0xdb); /* test %ebx,%ebx */ - if (pc_ret0 > 0) { - /* addrs[pc_ret0 - 1] is start address of target - * (addrs[i] - 4) is the address following this jmp - * ("xor %edx,%edx; div %ebx" being 4 bytes long) - */ - EMIT_COND_JMP(X86_JE, addrs[pc_ret0 - 1] - - (addrs[i] - 4)); - } else { - EMIT_COND_JMP(X86_JNE, 2 + 5); - CLEAR_A(); - EMIT1_off32(0xe9, cleanup_addr - (addrs[i] - 4)); /* jmp .+off32 */ - } - EMIT4(0x31, 0xd2, 0xf7, 0xf3); /* xor %edx,%edx; div %ebx */ - break; - case BPF_S_ALU_MOD_X: /* A %= X; */ - seen |= SEEN_XREG; - EMIT2(0x85, 0xdb); /* test %ebx,%ebx */ - if (pc_ret0 > 0) { - /* addrs[pc_ret0 - 1] is start address of target - * (addrs[i] - 6) is the address following this jmp - * ("xor %edx,%edx; div %ebx;mov %edx,%eax" being 6 bytes long) - */ - EMIT_COND_JMP(X86_JE, addrs[pc_ret0 - 1] - - (addrs[i] - 6)); - } else { - EMIT_COND_JMP(X86_JNE, 2 + 5); - CLEAR_A(); - EMIT1_off32(0xe9, cleanup_addr - (addrs[i] - 6)); /* jmp .+off32 */ - } - EMIT2(0x31, 0xd2); /* xor %edx,%edx */ - EMIT2(0xf7, 0xf3); /* div %ebx */ - EMIT2(0x89, 0xd0); /* mov %edx,%eax */ - break; - case BPF_S_ALU_MOD_K: /* A %= K; */ - if (K == 1) { - CLEAR_A(); - break; - } - EMIT2(0x31, 0xd2); /* xor %edx,%edx */ - EMIT1(0xb9);EMIT(K, 4); /* mov imm32,%ecx */ - EMIT2(0xf7, 0xf1); /* div %ecx */ - EMIT2(0x89, 0xd0); /* mov %edx,%eax */ - break; - case BPF_S_ALU_DIV_K: /* A /= K */ - if (K == 1) - break; - EMIT2(0x31, 0xd2); /* xor %edx,%edx */ - EMIT1(0xb9);EMIT(K, 4); /* mov imm32,%ecx */ - EMIT2(0xf7, 0xf1); /* div %ecx */ - break; - case BPF_S_ALU_AND_X: - seen |= SEEN_XREG; - EMIT2(0x21, 0xd8); /* and %ebx,%eax */ - break; - case BPF_S_ALU_AND_K: - if (K >= 0xFFFFFF00) { - EMIT2(0x24, K & 0xFF); /* and imm8,%al */ - } else if (K >= 0xFFFF0000) { - EMIT2(0x66, 0x25); /* and imm16,%ax */ - EMIT(K, 2); - } else { - EMIT1_off32(0x25, K); /* and imm32,%eax */ - } - break; - case BPF_S_ALU_OR_X: - seen |= SEEN_XREG; - EMIT2(0x09, 0xd8); /* or %ebx,%eax */ - break; - case BPF_S_ALU_OR_K: - if (is_imm8(K)) - EMIT3(0x83, 0xc8, K); /* or imm8,%eax */ - else - EMIT1_off32(0x0d, K); /* or imm32,%eax */ - break; - case BPF_S_ANC_ALU_XOR_X: /* A ^= X; */ - case BPF_S_ALU_XOR_X: - seen |= SEEN_XREG; - EMIT2(0x31, 0xd8); /* xor %ebx,%eax */ - break; - case BPF_S_ALU_XOR_K: /* A ^= K; */ - if (K == 0) - break; - if (is_imm8(K)) - EMIT3(0x83, 0xf0, K); /* xor imm8,%eax */ - else - EMIT1_off32(0x35, K); /* xor imm32,%eax */ - break; - case BPF_S_ALU_LSH_X: /* A <<= X; */ - seen |= SEEN_XREG; - EMIT4(0x89, 0xd9, 0xd3, 0xe0); /* mov %ebx,%ecx; shl %cl,%eax */ - break; - case BPF_S_ALU_LSH_K: - if (K == 0) - break; - else if (K == 1) - EMIT2(0xd1, 0xe0); /* shl %eax */ - else - EMIT3(0xc1, 0xe0, K); - break; - case BPF_S_ALU_RSH_X: /* A >>= X; */ - seen |= SEEN_XREG; - EMIT4(0x89, 0xd9, 0xd3, 0xe8); /* mov %ebx,%ecx; shr %cl,%eax */ - break; - case BPF_S_ALU_RSH_K: /* A >>= K; */ - if (K == 0) - break; - else if (K == 1) - EMIT2(0xd1, 0xe8); /* shr %eax */ - else - EMIT3(0xc1, 0xe8, K); - break; - case BPF_S_ALU_NEG: - EMIT2(0xf7, 0xd8); /* neg %eax */ - break; - case BPF_S_RET_K: - if (!K) { - if (pc_ret0 == -1) - pc_ret0 = i; - CLEAR_A(); - } else { - EMIT1_off32(0xb8, K); /* mov $imm32,%eax */ - } - /* fallinto */ - case BPF_S_RET_A: - if (seen_or_pass0) { - if (i != flen - 1) { - EMIT_JMP(cleanup_addr - addrs[i]); - break; - } - if (seen_or_pass0 & SEEN_XREG) - EMIT4(0x48, 0x8b, 0x5d, 0xf8); /* mov -8(%rbp),%rbx */ - EMIT1(0xc9); /* leaveq */ - } - EMIT1(0xc3); /* ret */ - break; - case BPF_S_MISC_TAX: /* X = A */ - seen |= SEEN_XREG; - EMIT2(0x89, 0xc3); /* mov %eax,%ebx */ - break; - case BPF_S_MISC_TXA: /* A = X */ - seen |= SEEN_XREG; - EMIT2(0x89, 0xd8); /* mov %ebx,%eax */ - break; - case BPF_S_LD_IMM: /* A = K */ - if (!K) - CLEAR_A(); - else - EMIT1_off32(0xb8, K); /* mov $imm32,%eax */ - break; - case BPF_S_LDX_IMM: /* X = K */ - seen |= SEEN_XREG; - if (!K) - CLEAR_X(); + /* shifts */ + case BPF_ALU | BPF_LSH | BPF_K: + case BPF_ALU | BPF_RSH | BPF_K: + case BPF_ALU | BPF_ARSH | BPF_K: + case BPF_ALU64 | BPF_LSH | BPF_K: + case BPF_ALU64 | BPF_RSH | BPF_K: + case BPF_ALU64 | BPF_ARSH | BPF_K: + if (BPF_CLASS(insn->code) == BPF_ALU64) + EMIT1(add_1mod(0x48, dst_reg)); + else if (is_ereg(dst_reg)) + EMIT1(add_1mod(0x40, dst_reg)); + + switch (BPF_OP(insn->code)) { + case BPF_LSH: b3 = 0xE0; break; + case BPF_RSH: b3 = 0xE8; break; + case BPF_ARSH: b3 = 0xF8; break; + } + EMIT3(0xC1, add_1reg(b3, dst_reg), imm32); + break; + + case BPF_ALU | BPF_END | BPF_FROM_BE: + switch (imm32) { + case 16: + /* emit 'ror %ax, 8' to swap lower 2 bytes */ + EMIT1(0x66); + if (is_ereg(dst_reg)) + EMIT1(0x41); + EMIT3(0xC1, add_1reg(0xC8, dst_reg), 8); + break; + case 32: + /* emit 'bswap eax' to swap lower 4 bytes */ + if (is_ereg(dst_reg)) + EMIT2(0x41, 0x0F); else - EMIT1_off32(0xbb, K); /* mov $imm32,%ebx */ - break; - case BPF_S_LD_MEM: /* A = mem[K] : mov off8(%rbp),%eax */ - seen |= SEEN_MEM; - EMIT3(0x8b, 0x45, 0xf0 - K*4); - break; - case BPF_S_LDX_MEM: /* X = mem[K] : mov off8(%rbp),%ebx */ - seen |= SEEN_XREG | SEEN_MEM; - EMIT3(0x8b, 0x5d, 0xf0 - K*4); - break; - case BPF_S_ST: /* mem[K] = A : mov %eax,off8(%rbp) */ - seen |= SEEN_MEM; - EMIT3(0x89, 0x45, 0xf0 - K*4); - break; - case BPF_S_STX: /* mem[K] = X : mov %ebx,off8(%rbp) */ - seen |= SEEN_XREG | SEEN_MEM; - EMIT3(0x89, 0x5d, 0xf0 - K*4); - break; - case BPF_S_LD_W_LEN: /* A = skb->len; */ - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4); - if (is_imm8(offsetof(struct sk_buff, len))) - /* mov off8(%rdi),%eax */ - EMIT3(0x8b, 0x47, offsetof(struct sk_buff, len)); - else { - EMIT2(0x8b, 0x87); - EMIT(offsetof(struct sk_buff, len), 4); - } - break; - case BPF_S_LDX_W_LEN: /* X = skb->len; */ - seen |= SEEN_XREG; - if (is_imm8(offsetof(struct sk_buff, len))) - /* mov off8(%rdi),%ebx */ - EMIT3(0x8b, 0x5f, offsetof(struct sk_buff, len)); - else { - EMIT2(0x8b, 0x9f); - EMIT(offsetof(struct sk_buff, len), 4); - } - break; - case BPF_S_ANC_PROTOCOL: /* A = ntohs(skb->protocol); */ - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2); - if (is_imm8(offsetof(struct sk_buff, protocol))) { - /* movzwl off8(%rdi),%eax */ - EMIT4(0x0f, 0xb7, 0x47, offsetof(struct sk_buff, protocol)); - } else { - EMIT3(0x0f, 0xb7, 0x87); /* movzwl off32(%rdi),%eax */ - EMIT(offsetof(struct sk_buff, protocol), 4); - } - EMIT2(0x86, 0xc4); /* ntohs() : xchg %al,%ah */ - break; - case BPF_S_ANC_IFINDEX: - if (is_imm8(offsetof(struct sk_buff, dev))) { - /* movq off8(%rdi),%rax */ - EMIT4(0x48, 0x8b, 0x47, offsetof(struct sk_buff, dev)); - } else { - EMIT3(0x48, 0x8b, 0x87); /* movq off32(%rdi),%rax */ - EMIT(offsetof(struct sk_buff, dev), 4); - } - EMIT3(0x48, 0x85, 0xc0); /* test %rax,%rax */ - EMIT_COND_JMP(X86_JE, cleanup_addr - (addrs[i] - 6)); - BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4); - EMIT2(0x8b, 0x80); /* mov off32(%rax),%eax */ - EMIT(offsetof(struct net_device, ifindex), 4); - break; - case BPF_S_ANC_MARK: - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4); - if (is_imm8(offsetof(struct sk_buff, mark))) { - /* mov off8(%rdi),%eax */ - EMIT3(0x8b, 0x47, offsetof(struct sk_buff, mark)); - } else { - EMIT2(0x8b, 0x87); - EMIT(offsetof(struct sk_buff, mark), 4); - } - break; - case BPF_S_ANC_RXHASH: - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4); - if (is_imm8(offsetof(struct sk_buff, hash))) { - /* mov off8(%rdi),%eax */ - EMIT3(0x8b, 0x47, offsetof(struct sk_buff, hash)); - } else { - EMIT2(0x8b, 0x87); - EMIT(offsetof(struct sk_buff, hash), 4); - } - break; - case BPF_S_ANC_QUEUE: - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2); - if (is_imm8(offsetof(struct sk_buff, queue_mapping))) { - /* movzwl off8(%rdi),%eax */ - EMIT4(0x0f, 0xb7, 0x47, offsetof(struct sk_buff, queue_mapping)); - } else { - EMIT3(0x0f, 0xb7, 0x87); /* movzwl off32(%rdi),%eax */ - EMIT(offsetof(struct sk_buff, queue_mapping), 4); - } - break; - case BPF_S_ANC_CPU: -#ifdef CONFIG_SMP - EMIT4(0x65, 0x8b, 0x04, 0x25); /* mov %gs:off32,%eax */ - EMIT((u32)(unsigned long)&cpu_number, 4); /* A = smp_processor_id(); */ -#else - CLEAR_A(); -#endif + EMIT1(0x0F); + EMIT1(add_1reg(0xC8, dst_reg)); break; - case BPF_S_ANC_VLAN_TAG: - case BPF_S_ANC_VLAN_TAG_PRESENT: - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2); - if (is_imm8(offsetof(struct sk_buff, vlan_tci))) { - /* movzwl off8(%rdi),%eax */ - EMIT4(0x0f, 0xb7, 0x47, offsetof(struct sk_buff, vlan_tci)); - } else { - EMIT3(0x0f, 0xb7, 0x87); /* movzwl off32(%rdi),%eax */ - EMIT(offsetof(struct sk_buff, vlan_tci), 4); - } - BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000); - if (filter[i].code == BPF_S_ANC_VLAN_TAG) { - EMIT3(0x80, 0xe4, 0xef); /* and $0xef,%ah */ - } else { - EMIT3(0xc1, 0xe8, 0x0c); /* shr $0xc,%eax */ - EMIT3(0x83, 0xe0, 0x01); /* and $0x1,%eax */ - } - break; - case BPF_S_ANC_PKTTYPE: - { - int off = pkt_type_offset(); - - if (off < 0) - goto out; - if (is_imm8(off)) { - /* movzbl off8(%rdi),%eax */ - EMIT4(0x0f, 0xb6, 0x47, off); - } else { - /* movbl off32(%rdi),%eax */ - EMIT3(0x0f, 0xb6, 0x87); - EMIT(off, 4); - } - EMIT3(0x83, 0xe0, PKT_TYPE_MAX); /* and $0x7,%eax */ + case 64: + /* emit 'bswap rax' to swap 8 bytes */ + EMIT3(add_1mod(0x48, dst_reg), 0x0F, + add_1reg(0xC8, dst_reg)); break; } - case BPF_S_LD_W_ABS: - func = CHOOSE_LOAD_FUNC(K, sk_load_word); -common_load: seen |= SEEN_DATAREF; - t_offset = func - (image + addrs[i]); - EMIT1_off32(0xbe, K); /* mov imm32,%esi */ - EMIT1_off32(0xe8, t_offset); /* call */ - break; - case BPF_S_LD_H_ABS: - func = CHOOSE_LOAD_FUNC(K, sk_load_half); - goto common_load; - case BPF_S_LD_B_ABS: - func = CHOOSE_LOAD_FUNC(K, sk_load_byte); - goto common_load; - case BPF_S_LDX_B_MSH: - func = CHOOSE_LOAD_FUNC(K, sk_load_byte_msh); - seen |= SEEN_DATAREF | SEEN_XREG; - t_offset = func - (image + addrs[i]); - EMIT1_off32(0xbe, K); /* mov imm32,%esi */ - EMIT1_off32(0xe8, t_offset); /* call sk_load_byte_msh */ - break; - case BPF_S_LD_W_IND: - func = sk_load_word; -common_load_ind: seen |= SEEN_DATAREF | SEEN_XREG; - t_offset = func - (image + addrs[i]); - if (K) { - if (is_imm8(K)) { - EMIT3(0x8d, 0x73, K); /* lea imm8(%rbx), %esi */ - } else { - EMIT2(0x8d, 0xb3); /* lea imm32(%rbx),%esi */ - EMIT(K, 4); - } - } else { - EMIT2(0x89,0xde); /* mov %ebx,%esi */ - } - EMIT1_off32(0xe8, t_offset); /* call sk_load_xxx_ind */ - break; - case BPF_S_LD_H_IND: - func = sk_load_half; - goto common_load_ind; - case BPF_S_LD_B_IND: - func = sk_load_byte; - goto common_load_ind; - case BPF_S_JMP_JA: - t_offset = addrs[i + K] - addrs[i]; - EMIT_JMP(t_offset); - break; - COND_SEL(BPF_S_JMP_JGT_K, X86_JA, X86_JBE); - COND_SEL(BPF_S_JMP_JGE_K, X86_JAE, X86_JB); - COND_SEL(BPF_S_JMP_JEQ_K, X86_JE, X86_JNE); - COND_SEL(BPF_S_JMP_JSET_K,X86_JNE, X86_JE); - COND_SEL(BPF_S_JMP_JGT_X, X86_JA, X86_JBE); - COND_SEL(BPF_S_JMP_JGE_X, X86_JAE, X86_JB); - COND_SEL(BPF_S_JMP_JEQ_X, X86_JE, X86_JNE); - COND_SEL(BPF_S_JMP_JSET_X,X86_JNE, X86_JE); - -cond_branch: f_offset = addrs[i + filter[i].jf] - addrs[i]; - t_offset = addrs[i + filter[i].jt] - addrs[i]; - - /* same targets, can avoid doing the test :) */ - if (filter[i].jt == filter[i].jf) { - EMIT_JMP(t_offset); - break; - } + break; + + case BPF_ALU | BPF_END | BPF_FROM_LE: + break; + + /* ST: *(u8*)(dst_reg + off) = imm */ + case BPF_ST | BPF_MEM | BPF_B: + if (is_ereg(dst_reg)) + EMIT2(0x41, 0xC6); + else + EMIT1(0xC6); + goto st; + case BPF_ST | BPF_MEM | BPF_H: + if (is_ereg(dst_reg)) + EMIT3(0x66, 0x41, 0xC7); + else + EMIT2(0x66, 0xC7); + goto st; + case BPF_ST | BPF_MEM | BPF_W: + if (is_ereg(dst_reg)) + EMIT2(0x41, 0xC7); + else + EMIT1(0xC7); + goto st; + case BPF_ST | BPF_MEM | BPF_DW: + EMIT2(add_1mod(0x48, dst_reg), 0xC7); + +st: if (is_imm8(insn->off)) + EMIT2(add_1reg(0x40, dst_reg), insn->off); + else + EMIT1_off32(add_1reg(0x80, dst_reg), insn->off); + + EMIT(imm32, bpf_size_to_x86_bytes(BPF_SIZE(insn->code))); + break; + + /* STX: *(u8*)(dst_reg + off) = src_reg */ + case BPF_STX | BPF_MEM | BPF_B: + /* emit 'mov byte ptr [rax + off], al' */ + if (is_ereg(dst_reg) || is_ereg(src_reg) || + /* have to add extra byte for x86 SIL, DIL regs */ + src_reg == BPF_REG_1 || src_reg == BPF_REG_2) + EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x88); + else + EMIT1(0x88); + goto stx; + case BPF_STX | BPF_MEM | BPF_H: + if (is_ereg(dst_reg) || is_ereg(src_reg)) + EMIT3(0x66, add_2mod(0x40, dst_reg, src_reg), 0x89); + else + EMIT2(0x66, 0x89); + goto stx; + case BPF_STX | BPF_MEM | BPF_W: + if (is_ereg(dst_reg) || is_ereg(src_reg)) + EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x89); + else + EMIT1(0x89); + goto stx; + case BPF_STX | BPF_MEM | BPF_DW: + EMIT2(add_2mod(0x48, dst_reg, src_reg), 0x89); +stx: if (is_imm8(insn->off)) + EMIT2(add_2reg(0x40, dst_reg, src_reg), insn->off); + else + EMIT1_off32(add_2reg(0x80, dst_reg, src_reg), + insn->off); + break; + + /* LDX: dst_reg = *(u8*)(src_reg + off) */ + case BPF_LDX | BPF_MEM | BPF_B: + /* emit 'movzx rax, byte ptr [rax + off]' */ + EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB6); + goto ldx; + case BPF_LDX | BPF_MEM | BPF_H: + /* emit 'movzx rax, word ptr [rax + off]' */ + EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB7); + goto ldx; + case BPF_LDX | BPF_MEM | BPF_W: + /* emit 'mov eax, dword ptr [rax+0x14]' */ + if (is_ereg(dst_reg) || is_ereg(src_reg)) + EMIT2(add_2mod(0x40, src_reg, dst_reg), 0x8B); + else + EMIT1(0x8B); + goto ldx; + case BPF_LDX | BPF_MEM | BPF_DW: + /* emit 'mov rax, qword ptr [rax+0x14]' */ + EMIT2(add_2mod(0x48, src_reg, dst_reg), 0x8B); +ldx: /* if insn->off == 0 we can save one extra byte, but + * special case of x86 r13 which always needs an offset + * is not worth the hassle + */ + if (is_imm8(insn->off)) + EMIT2(add_2reg(0x40, src_reg, dst_reg), insn->off); + else + EMIT1_off32(add_2reg(0x80, src_reg, dst_reg), + insn->off); + break; + + /* STX XADD: lock *(u32*)(dst_reg + off) += src_reg */ + case BPF_STX | BPF_XADD | BPF_W: + /* emit 'lock add dword ptr [rax + off], eax' */ + if (is_ereg(dst_reg) || is_ereg(src_reg)) + EMIT3(0xF0, add_2mod(0x40, dst_reg, src_reg), 0x01); + else + EMIT2(0xF0, 0x01); + goto xadd; + case BPF_STX | BPF_XADD | BPF_DW: + EMIT3(0xF0, add_2mod(0x48, dst_reg, src_reg), 0x01); +xadd: if (is_imm8(insn->off)) + EMIT2(add_2reg(0x40, dst_reg, src_reg), insn->off); + else + EMIT1_off32(add_2reg(0x80, dst_reg, src_reg), + insn->off); + break; + + /* call */ + case BPF_JMP | BPF_CALL: + func = (u8 *) __bpf_call_base + imm32; + jmp_offset = func - (image + addrs[i]); + if (ctx->seen_ld_abs) { + EMIT2(0x41, 0x52); /* push %r10 */ + EMIT2(0x41, 0x51); /* push %r9 */ + /* need to adjust jmp offset, since + * pop %r9, pop %r10 take 4 bytes after call insn + */ + jmp_offset += 4; + } + if (!imm32 || !is_simm32(jmp_offset)) { + pr_err("unsupported bpf func %d addr %p image %p\n", + imm32, func, image); + return -EINVAL; + } + EMIT1_off32(0xE8, jmp_offset); + if (ctx->seen_ld_abs) { + EMIT2(0x41, 0x59); /* pop %r9 */ + EMIT2(0x41, 0x5A); /* pop %r10 */ + } + break; + + /* cond jump */ + case BPF_JMP | BPF_JEQ | BPF_X: + case BPF_JMP | BPF_JNE | BPF_X: + case BPF_JMP | BPF_JGT | BPF_X: + case BPF_JMP | BPF_JGE | BPF_X: + case BPF_JMP | BPF_JSGT | BPF_X: + case BPF_JMP | BPF_JSGE | BPF_X: + /* cmp dst_reg, src_reg */ + EMIT3(add_2mod(0x48, dst_reg, src_reg), 0x39, + add_2reg(0xC0, dst_reg, src_reg)); + goto emit_cond_jmp; + + case BPF_JMP | BPF_JSET | BPF_X: + /* test dst_reg, src_reg */ + EMIT3(add_2mod(0x48, dst_reg, src_reg), 0x85, + add_2reg(0xC0, dst_reg, src_reg)); + goto emit_cond_jmp; + + case BPF_JMP | BPF_JSET | BPF_K: + /* test dst_reg, imm32 */ + EMIT1(add_1mod(0x48, dst_reg)); + EMIT2_off32(0xF7, add_1reg(0xC0, dst_reg), imm32); + goto emit_cond_jmp; + + case BPF_JMP | BPF_JEQ | BPF_K: + case BPF_JMP | BPF_JNE | BPF_K: + case BPF_JMP | BPF_JGT | BPF_K: + case BPF_JMP | BPF_JGE | BPF_K: + case BPF_JMP | BPF_JSGT | BPF_K: + case BPF_JMP | BPF_JSGE | BPF_K: + /* cmp dst_reg, imm8/32 */ + EMIT1(add_1mod(0x48, dst_reg)); + + if (is_imm8(imm32)) + EMIT3(0x83, add_1reg(0xF8, dst_reg), imm32); + else + EMIT2_off32(0x81, add_1reg(0xF8, dst_reg), imm32); + +emit_cond_jmp: /* convert BPF opcode to x86 */ + switch (BPF_OP(insn->code)) { + case BPF_JEQ: + jmp_cond = X86_JE; + break; + case BPF_JSET: + case BPF_JNE: + jmp_cond = X86_JNE; + break; + case BPF_JGT: + /* GT is unsigned '>', JA in x86 */ + jmp_cond = X86_JA; + break; + case BPF_JGE: + /* GE is unsigned '>=', JAE in x86 */ + jmp_cond = X86_JAE; + break; + case BPF_JSGT: + /* signed '>', GT in x86 */ + jmp_cond = X86_JG; + break; + case BPF_JSGE: + /* signed '>=', GE in x86 */ + jmp_cond = X86_JGE; + break; + default: /* to silence gcc warning */ + return -EFAULT; + } + jmp_offset = addrs[i + insn->off] - addrs[i]; + if (is_imm8(jmp_offset)) { + EMIT2(jmp_cond, jmp_offset); + } else if (is_simm32(jmp_offset)) { + EMIT2_off32(0x0F, jmp_cond + 0x10, jmp_offset); + } else { + pr_err("cond_jmp gen bug %llx\n", jmp_offset); + return -EFAULT; + } + + break; - switch (filter[i].code) { - case BPF_S_JMP_JGT_X: - case BPF_S_JMP_JGE_X: - case BPF_S_JMP_JEQ_X: - seen |= SEEN_XREG; - EMIT2(0x39, 0xd8); /* cmp %ebx,%eax */ - break; - case BPF_S_JMP_JSET_X: - seen |= SEEN_XREG; - EMIT2(0x85, 0xd8); /* test %ebx,%eax */ - break; - case BPF_S_JMP_JEQ_K: - if (K == 0) { - EMIT2(0x85, 0xc0); /* test %eax,%eax */ - break; - } - case BPF_S_JMP_JGT_K: - case BPF_S_JMP_JGE_K: - if (K <= 127) - EMIT3(0x83, 0xf8, K); /* cmp imm8,%eax */ + case BPF_JMP | BPF_JA: + jmp_offset = addrs[i + insn->off] - addrs[i]; + if (!jmp_offset) + /* optimize out nop jumps */ + break; +emit_jmp: + if (is_imm8(jmp_offset)) { + EMIT2(0xEB, jmp_offset); + } else if (is_simm32(jmp_offset)) { + EMIT1_off32(0xE9, jmp_offset); + } else { + pr_err("jmp gen bug %llx\n", jmp_offset); + return -EFAULT; + } + break; + + case BPF_LD | BPF_IND | BPF_W: + func = sk_load_word; + goto common_load; + case BPF_LD | BPF_ABS | BPF_W: + func = CHOOSE_LOAD_FUNC(imm32, sk_load_word); +common_load: ctx->seen_ld_abs = true; + jmp_offset = func - (image + addrs[i]); + if (!func || !is_simm32(jmp_offset)) { + pr_err("unsupported bpf func %d addr %p image %p\n", + imm32, func, image); + return -EINVAL; + } + if (BPF_MODE(insn->code) == BPF_ABS) { + /* mov %esi, imm32 */ + EMIT1_off32(0xBE, imm32); + } else { + /* mov %rsi, src_reg */ + EMIT_mov(BPF_REG_2, src_reg); + if (imm32) { + if (is_imm8(imm32)) + /* add %esi, imm8 */ + EMIT3(0x83, 0xC6, imm32); else - EMIT1_off32(0x3d, K); /* cmp imm32,%eax */ - break; - case BPF_S_JMP_JSET_K: - if (K <= 0xFF) - EMIT2(0xa8, K); /* test imm8,%al */ - else if (!(K & 0xFFFF00FF)) - EMIT3(0xf6, 0xc4, K >> 8); /* test imm8,%ah */ - else if (K <= 0xFFFF) { - EMIT2(0x66, 0xa9); /* test imm16,%ax */ - EMIT(K, 2); - } else { - EMIT1_off32(0xa9, K); /* test imm32,%eax */ - } - break; + /* add %esi, imm32 */ + EMIT2_off32(0x81, 0xC6, imm32); } - if (filter[i].jt != 0) { - if (filter[i].jf && f_offset) - t_offset += is_near(f_offset) ? 2 : 5; - EMIT_COND_JMP(t_op, t_offset); - if (filter[i].jf) - EMIT_JMP(f_offset); - break; - } - EMIT_COND_JMP(f_op, f_offset); - break; - default: - /* hmm, too complex filter, give up with jit compiler */ - goto out; } - ilen = prog - temp; - if (image) { - if (unlikely(proglen + ilen > oldproglen)) { - pr_err("bpb_jit_compile fatal error\n"); - kfree(addrs); - module_free(NULL, header); - return; - } - memcpy(image + proglen, temp, ilen); + /* skb pointer is in R6 (%rbx), it will be copied into + * %rdi if skb_copy_bits() call is necessary. + * sk_load_* helpers also use %r10 and %r9d. + * See bpf_jit.S + */ + EMIT1_off32(0xE8, jmp_offset); /* call */ + break; + + case BPF_LD | BPF_IND | BPF_H: + func = sk_load_half; + goto common_load; + case BPF_LD | BPF_ABS | BPF_H: + func = CHOOSE_LOAD_FUNC(imm32, sk_load_half); + goto common_load; + case BPF_LD | BPF_IND | BPF_B: + func = sk_load_byte; + goto common_load; + case BPF_LD | BPF_ABS | BPF_B: + func = CHOOSE_LOAD_FUNC(imm32, sk_load_byte); + goto common_load; + + case BPF_JMP | BPF_EXIT: + if (i != insn_cnt - 1) { + jmp_offset = ctx->cleanup_addr - addrs[i]; + goto emit_jmp; } - proglen += ilen; - addrs[i] = proglen; - prog = temp; + /* update cleanup_addr */ + ctx->cleanup_addr = proglen; + /* mov rbx, qword ptr [rbp-X] */ + EMIT3_off32(0x48, 0x8B, 0x9D, -stacksize); + /* mov r13, qword ptr [rbp-X] */ + EMIT3_off32(0x4C, 0x8B, 0xAD, -stacksize + 8); + /* mov r14, qword ptr [rbp-X] */ + EMIT3_off32(0x4C, 0x8B, 0xB5, -stacksize + 16); + /* mov r15, qword ptr [rbp-X] */ + EMIT3_off32(0x4C, 0x8B, 0xBD, -stacksize + 24); + + EMIT1(0xC9); /* leave */ + EMIT1(0xC3); /* ret */ + break; + + default: + /* By design x64 JIT should support all BPF instructions + * This error will be seen if new instruction was added + * to interpreter, but not to JIT + * or if there is junk in sk_filter + */ + pr_err("bpf_jit: unknown opcode %02x\n", insn->code); + return -EINVAL; } - /* last bpf instruction is always a RET : - * use it to give the cleanup instruction(s) addr - */ - cleanup_addr = proglen - 1; /* ret */ - if (seen_or_pass0) - cleanup_addr -= 1; /* leaveq */ - if (seen_or_pass0 & SEEN_XREG) - cleanup_addr -= 4; /* mov -8(%rbp),%rbx */ + ilen = prog - temp; + if (image) { + if (unlikely(proglen + ilen > oldproglen)) { + pr_err("bpf_jit_compile fatal error\n"); + return -EFAULT; + } + memcpy(image + proglen, temp, ilen); + } + proglen += ilen; + addrs[i] = proglen; + prog = temp; + } + return proglen; +} + +void bpf_jit_compile(struct sk_filter *prog) +{ +} + +void bpf_int_jit_compile(struct sk_filter *prog) +{ + struct bpf_binary_header *header = NULL; + int proglen, oldproglen = 0; + struct jit_context ctx = {}; + u8 *image = NULL; + int *addrs; + int pass; + int i; + + if (!bpf_jit_enable) + return; + + if (!prog || !prog->len) + return; + + addrs = kmalloc(prog->len * sizeof(*addrs), GFP_KERNEL); + if (!addrs) + return; + + /* Before first pass, make a rough estimation of addrs[] + * each bpf instruction is translated to less than 64 bytes + */ + for (proglen = 0, i = 0; i < prog->len; i++) { + proglen += 64; + addrs[i] = proglen; + } + ctx.cleanup_addr = proglen; + + for (pass = 0; pass < 10; pass++) { + proglen = do_jit(prog, addrs, image, oldproglen, &ctx); + if (proglen <= 0) { + image = NULL; + if (header) + module_free(NULL, header); + goto out; + } if (image) { if (proglen != oldproglen) - pr_err("bpb_jit_compile proglen=%u != oldproglen=%u\n", proglen, oldproglen); + pr_err("bpf_jit: proglen=%d != oldproglen=%d\n", + proglen, oldproglen); break; } if (proglen == oldproglen) { @@ -766,17 +918,16 @@ cond_branch: f_offset = addrs[i + filter[i].jf] - addrs[i]; } if (bpf_jit_enable > 1) - bpf_jit_dump(flen, proglen, pass, image); + bpf_jit_dump(prog->len, proglen, 0, image); if (image) { bpf_flush_icache(header, image + proglen); set_memory_ro((unsigned long)header, header->pages); - fp->bpf_func = (void *)image; - fp->jited = 1; + prog->bpf_func = (void *)image; + prog->jited = 1; } out: kfree(addrs); - return; } static void bpf_jit_free_deferred(struct work_struct *work) diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile index 9769df0..3c0809a 100644 --- a/arch/x86/vdso/Makefile +++ b/arch/x86/vdso/Makefile @@ -9,18 +9,9 @@ VDSOX32-$(CONFIG_X86_X32_ABI) := y VDSO32-$(CONFIG_X86_32) := y VDSO32-$(CONFIG_COMPAT) := y -vdso-install-$(VDSO64-y) += vdso.so -vdso-install-$(VDSOX32-y) += vdsox32.so -vdso-install-$(VDSO32-y) += $(vdso32-images) - - # files to link into the vdso -vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o - -vobjs-$(VDSOX32-y) += $(vobjx32s-compat) - -# Filter out x32 objects. -vobj64s := $(filter-out $(vobjx32s-compat),$(vobjs-y)) +vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o vdso-fakesections.o +vobjs-nox32 := vdso-fakesections.o # files to link into kernel obj-y += vma.o @@ -34,7 +25,7 @@ vdso_img-$(VDSO32-y) += 32-sysenter obj-$(VDSO32-y) += vdso32-setup.o -vobjs := $(foreach F,$(vobj64s),$(obj)/$F) +vobjs := $(foreach F,$(vobjs-y),$(obj)/$F) $(obj)/vdso.o: $(obj)/vdso.so @@ -104,7 +95,13 @@ VDSO_LDFLAGS_vdsox32.lds = -Wl,-m,elf32_x86_64 \ -Wl,-z,max-page-size=4096 \ -Wl,-z,common-page-size=4096 -vobjx32s-y := $(vobj64s:.o=-x32.o) +# 64-bit objects to re-brand as x32 +vobjs64-for-x32 := $(filter-out $(vobjs-nox32),$(vobjs-y)) + +# x32-rebranded versions +vobjx32s-y := $(vobjs64-for-x32:.o=-x32.o) + +# same thing, but in the output directory vobjx32s := $(foreach F,$(vobjx32s-y),$(obj)/$F) # Convert 64bit object file to x32 for x32 vDSO. @@ -176,15 +173,20 @@ VDSO_LDFLAGS = -fPIC -shared $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) \ GCOV_PROFILE := n # -# Install the unstripped copy of vdso*.so listed in $(vdso-install-y). +# Install the unstripped copies of vdso*.so. # -quiet_cmd_vdso_install = INSTALL $@ - cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@ -$(vdso-install-y): %.so: $(obj)/%.so.dbg FORCE +quiet_cmd_vdso_install = INSTALL $(@:install_%=%) + cmd_vdso_install = cp $< $(MODLIB)/vdso/$(@:install_%=%) + +vdso_img_insttargets := $(vdso_img_sodbg:%.dbg=install_%) + +$(MODLIB)/vdso: FORCE @mkdir -p $(MODLIB)/vdso + +$(vdso_img_insttargets): install_%: $(obj)/%.dbg $(MODLIB)/vdso FORCE $(call cmd,vdso_install) -PHONY += vdso_install $(vdso-install-y) -vdso_install: $(vdso-install-y) +PHONY += vdso_install $(vdso_img_insttargets) +vdso_install: $(vdso_img_insttargets) FORCE clean-files := vdso32-syscall* vdso32-sysenter* vdso32-int80* diff --git a/arch/x86/vdso/vdso-fakesections.c b/arch/x86/vdso/vdso-fakesections.c new file mode 100644 index 0000000..cb8a8d7 --- /dev/null +++ b/arch/x86/vdso/vdso-fakesections.c @@ -0,0 +1,32 @@ +/* + * Copyright 2014 Andy Lutomirski + * Subject to the GNU Public License, v.2 + * + * Hack to keep broken Go programs working. + * + * The Go runtime had a couple of bugs: it would read the section table to try + * to figure out how many dynamic symbols there were (it shouldn't have looked + * at the section table at all) and, if there were no SHT_SYNDYM section table + * entry, it would use an uninitialized value for the number of symbols. As a + * workaround, we supply a minimal section table. vdso2c will adjust the + * in-memory image so that "vdso_fake_sections" becomes the section table. + * + * The bug was introduced by: + * https://code.google.com/p/go/source/detail?r=56ea40aac72b (2012-08-31) + * and is being addressed in the Go runtime in this issue: + * https://code.google.com/p/go/issues/detail?id=8197 + */ + +#ifndef __x86_64__ +#error This hack is specific to the 64-bit vDSO +#endif + +#include <linux/elf.h> + +extern const __visible struct elf64_shdr vdso_fake_sections[]; +const __visible struct elf64_shdr vdso_fake_sections[] = { + { + .sh_type = SHT_DYNSYM, + .sh_entsize = sizeof(Elf64_Sym), + } +}; diff --git a/arch/x86/vdso/vdso2c.c b/arch/x86/vdso/vdso2c.c index 450ac6e..7a6bf50 100644 --- a/arch/x86/vdso/vdso2c.c +++ b/arch/x86/vdso/vdso2c.c @@ -54,7 +54,7 @@ static void fail(const char *format, ...) } /* - * Evil macros to do a little-endian read. + * Evil macros for little-endian reads and writes */ #define GLE(x, bits, ifnot) \ __builtin_choose_expr( \ @@ -62,11 +62,24 @@ static void fail(const char *format, ...) (__typeof__(*(x)))get_unaligned_le##bits(x), ifnot) extern void bad_get_le(void); -#define LAST_LE(x) \ +#define LAST_GLE(x) \ __builtin_choose_expr(sizeof(*(x)) == 1, *(x), bad_get_le()) #define GET_LE(x) \ - GLE(x, 64, GLE(x, 32, GLE(x, 16, LAST_LE(x)))) + GLE(x, 64, GLE(x, 32, GLE(x, 16, LAST_GLE(x)))) + +#define PLE(x, val, bits, ifnot) \ + __builtin_choose_expr( \ + (sizeof(*(x)) == bits/8), \ + put_unaligned_le##bits((val), (x)), ifnot) + +extern void bad_put_le(void); +#define LAST_PLE(x, val) \ + __builtin_choose_expr(sizeof(*(x)) == 1, *(x) = (val), bad_put_le()) + +#define PUT_LE(x, val) \ + PLE(x, val, 64, PLE(x, val, 32, PLE(x, val, 16, LAST_PLE(x, val)))) + #define NSYMS (sizeof(required_syms) / sizeof(required_syms[0])) diff --git a/arch/x86/vdso/vdso2c.h b/arch/x86/vdso/vdso2c.h index 8a07463..c6eefaf 100644 --- a/arch/x86/vdso/vdso2c.h +++ b/arch/x86/vdso/vdso2c.h @@ -18,6 +18,8 @@ static void GOFUNC(void *addr, size_t len, FILE *outfile, const char *name) const char *secstrings; uint64_t syms[NSYMS] = {}; + uint64_t fake_sections_value = 0, fake_sections_size = 0; + Elf_Phdr *pt = (Elf_Phdr *)(addr + GET_LE(&hdr->e_phoff)); /* Walk the segment table. */ @@ -84,6 +86,7 @@ static void GOFUNC(void *addr, size_t len, FILE *outfile, const char *name) GET_LE(&symtab_hdr->sh_entsize) * i; const char *name = addr + GET_LE(&strtab_hdr->sh_offset) + GET_LE(&sym->st_name); + for (k = 0; k < NSYMS; k++) { if (!strcmp(name, required_syms[k])) { if (syms[k]) { @@ -93,6 +96,13 @@ static void GOFUNC(void *addr, size_t len, FILE *outfile, const char *name) syms[k] = GET_LE(&sym->st_value); } } + + if (!strcmp(name, "vdso_fake_sections")) { + if (fake_sections_value) + fail("duplicate vdso_fake_sections\n"); + fake_sections_value = GET_LE(&sym->st_value); + fake_sections_size = GET_LE(&sym->st_size); + } } /* Validate mapping addresses. */ @@ -112,11 +122,14 @@ static void GOFUNC(void *addr, size_t len, FILE *outfile, const char *name) if (syms[sym_end_mapping] % 4096) fail("end_mapping must be a multiple of 4096\n"); - /* Remove sections. */ - hdr->e_shoff = 0; - hdr->e_shentsize = 0; - hdr->e_shnum = 0; - hdr->e_shstrndx = htole16(SHN_UNDEF); + /* Remove sections or use fakes */ + if (fake_sections_size % sizeof(Elf_Shdr)) + fail("vdso_fake_sections size is not a multiple of %ld\n", + (long)sizeof(Elf_Shdr)); + PUT_LE(&hdr->e_shoff, fake_sections_value); + PUT_LE(&hdr->e_shentsize, fake_sections_value ? sizeof(Elf_Shdr) : 0); + PUT_LE(&hdr->e_shnum, fake_sections_size / sizeof(Elf_Shdr)); + PUT_LE(&hdr->e_shstrndx, SHN_UNDEF); if (!name) { fwrite(addr, load_size, 1, outfile); |