diff options
Diffstat (limited to 'arch')
34 files changed, 463 insertions, 186 deletions
diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c index 15051e9..b054c5c 100644 --- a/arch/ia64/kernel/smpboot.c +++ b/arch/ia64/kernel/smpboot.c @@ -127,7 +127,7 @@ int smp_num_siblings = 1; volatile int ia64_cpu_to_sapicid[NR_CPUS]; EXPORT_SYMBOL(ia64_cpu_to_sapicid); -static volatile cpumask_t cpu_callin_map; +static cpumask_t cpu_callin_map; struct smp_boot_data smp_boot_data __initdata; @@ -477,6 +477,7 @@ do_boot_cpu (int sapicid, int cpu, struct task_struct *idle) for (timeout = 0; timeout < 100000; timeout++) { if (cpumask_test_cpu(cpu, &cpu_callin_map)) break; /* It has booted */ + barrier(); /* Make sure we re-read cpu_callin_map */ udelay(100); } Dprintk("\n"); diff --git a/arch/mips/ath79/setup.c b/arch/mips/ath79/setup.c index a73c93c..7fc8397 100644 --- a/arch/mips/ath79/setup.c +++ b/arch/mips/ath79/setup.c @@ -225,7 +225,7 @@ void __init plat_time_init(void) ddr_clk_rate = ath79_get_sys_clk_rate("ddr"); ref_clk_rate = ath79_get_sys_clk_rate("ref"); - pr_info("Clocks: CPU:%lu.%03luMHz, DDR:%lu.%03luMHz, AHB:%lu.%03luMHz, Ref:%lu.%03luMHz", + pr_info("Clocks: CPU:%lu.%03luMHz, DDR:%lu.%03luMHz, AHB:%lu.%03luMHz, Ref:%lu.%03luMHz\n", cpu_clk_rate / 1000000, (cpu_clk_rate / 1000) % 1000, ddr_clk_rate / 1000000, (ddr_clk_rate / 1000) % 1000, ahb_clk_rate / 1000000, (ahb_clk_rate / 1000) % 1000, diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c index e36515d..209e5b7 100644 --- a/arch/mips/kernel/cpu-probe.c +++ b/arch/mips/kernel/cpu-probe.c @@ -74,13 +74,12 @@ static inline void cpu_set_fpu_fcsr_mask(struct cpuinfo_mips *c) { unsigned long sr, mask, fcsr, fcsr0, fcsr1; + fcsr = c->fpu_csr31; mask = FPU_CSR_ALL_X | FPU_CSR_ALL_E | FPU_CSR_ALL_S | FPU_CSR_RM; sr = read_c0_status(); __enable_fpu(FPU_AS_IS); - fcsr = read_32bit_cp1_register(CP1_STATUS); - fcsr0 = fcsr & mask; write_32bit_cp1_register(CP1_STATUS, fcsr0); fcsr0 = read_32bit_cp1_register(CP1_STATUS); diff --git a/arch/mips/kernel/irq.c b/arch/mips/kernel/irq.c index 51f57d8..3c8a18a 100644 --- a/arch/mips/kernel/irq.c +++ b/arch/mips/kernel/irq.c @@ -109,7 +109,7 @@ void __init init_IRQ(void) #endif } -#ifdef DEBUG_STACKOVERFLOW +#ifdef CONFIG_DEBUG_STACKOVERFLOW static inline void check_stack_overflow(void) { unsigned long sp; diff --git a/arch/mips/kvm/emulate.c b/arch/mips/kvm/emulate.c index 4b50c57..d5fa3ea 100644 --- a/arch/mips/kvm/emulate.c +++ b/arch/mips/kvm/emulate.c @@ -2409,7 +2409,7 @@ enum emulation_result kvm_mips_complete_mmio_load(struct kvm_vcpu *vcpu, if (vcpu->mmio_needed == 2) *gpr = *(int16_t *) run->mmio.data; else - *gpr = *(int16_t *) run->mmio.data; + *gpr = *(uint16_t *)run->mmio.data; break; case 1: diff --git a/arch/mips/loongson/loongson-3/smp.c b/arch/mips/loongson/loongson-3/smp.c index e3c68b5..509877c 100644 --- a/arch/mips/loongson/loongson-3/smp.c +++ b/arch/mips/loongson/loongson-3/smp.c @@ -272,7 +272,7 @@ void loongson3_ipi_interrupt(struct pt_regs *regs) if (action & SMP_ASK_C0COUNT) { BUG_ON(cpu != 0); c0count = read_c0_count(); - for (i = 1; i < loongson_sysconf.nr_cpus; i++) + for (i = 1; i < num_possible_cpus(); i++) per_cpu(core0_c0count, i) = c0count; } } diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c index 0dbb65a..2e03ab1 100644 --- a/arch/mips/mm/c-r4k.c +++ b/arch/mips/mm/c-r4k.c @@ -1372,7 +1372,7 @@ static int probe_scache(void) scache_size = addr; c->scache.linesz = 16 << ((config & R4K_CONF_SB) >> 22); c->scache.ways = 1; - c->dcache.waybit = 0; /* does not matter */ + c->scache.waybit = 0; /* does not matter */ return 1; } diff --git a/arch/mips/net/bpf_jit.c b/arch/mips/net/bpf_jit.c index 5d61393..e23fdf2 100644 --- a/arch/mips/net/bpf_jit.c +++ b/arch/mips/net/bpf_jit.c @@ -681,11 +681,7 @@ static unsigned int get_stack_depth(struct jit_ctx *ctx) sp_off += config_enabled(CONFIG_64BIT) ? (ARGS_USED_BY_JIT + 1) * RSIZE : RSIZE; - /* - * Subtract the bytes for the last registers since we only care about - * the location on the stack pointer. - */ - return sp_off - RSIZE; + return sp_off; } static void build_prologue(struct jit_ctx *ctx) diff --git a/arch/mips/ralink/ill_acc.c b/arch/mips/ralink/ill_acc.c index e20b02e..e10d10b 100644 --- a/arch/mips/ralink/ill_acc.c +++ b/arch/mips/ralink/ill_acc.c @@ -41,7 +41,7 @@ static irqreturn_t ill_acc_irq_handler(int irq, void *_priv) addr, (type >> ILL_ACC_OFF_S) & ILL_ACC_OFF_M, type & ILL_ACC_LEN_M); - rt_memc_w32(REG_ILL_ACC_TYPE, REG_ILL_ACC_TYPE); + rt_memc_w32(ILL_INT_STATUS, REG_ILL_ACC_TYPE); return IRQ_HANDLED; } diff --git a/arch/sparc/include/asm/cpudata_64.h b/arch/sparc/include/asm/cpudata_64.h index a6e424d..a6cfdab 100644 --- a/arch/sparc/include/asm/cpudata_64.h +++ b/arch/sparc/include/asm/cpudata_64.h @@ -24,7 +24,8 @@ typedef struct { unsigned int icache_line_size; unsigned int ecache_size; unsigned int ecache_line_size; - int core_id; + unsigned short sock_id; + unsigned short core_id; int proc_id; } cpuinfo_sparc; diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h index dc165eb..2a52c91 100644 --- a/arch/sparc/include/asm/pgtable_64.h +++ b/arch/sparc/include/asm/pgtable_64.h @@ -308,12 +308,26 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t prot) " sllx %1, 32, %1\n" " or %0, %1, %0\n" " .previous\n" + " .section .sun_m7_2insn_patch, \"ax\"\n" + " .word 661b\n" + " sethi %%uhi(%4), %1\n" + " sethi %%hi(%4), %0\n" + " .word 662b\n" + " or %1, %%ulo(%4), %1\n" + " or %0, %%lo(%4), %0\n" + " .word 663b\n" + " sllx %1, 32, %1\n" + " or %0, %1, %0\n" + " .previous\n" : "=r" (mask), "=r" (tmp) : "i" (_PAGE_PADDR_4U | _PAGE_MODIFIED_4U | _PAGE_ACCESSED_4U | _PAGE_CP_4U | _PAGE_CV_4U | _PAGE_E_4U | _PAGE_SPECIAL | _PAGE_PMD_HUGE | _PAGE_SZALL_4U), "i" (_PAGE_PADDR_4V | _PAGE_MODIFIED_4V | _PAGE_ACCESSED_4V | _PAGE_CP_4V | _PAGE_CV_4V | _PAGE_E_4V | + _PAGE_SPECIAL | _PAGE_PMD_HUGE | _PAGE_SZALL_4V), + "i" (_PAGE_PADDR_4V | _PAGE_MODIFIED_4V | _PAGE_ACCESSED_4V | + _PAGE_CP_4V | _PAGE_E_4V | _PAGE_SPECIAL | _PAGE_PMD_HUGE | _PAGE_SZALL_4V)); return __pte((pte_val(pte) & mask) | (pgprot_val(prot) & ~mask)); @@ -342,9 +356,15 @@ static inline pgprot_t pgprot_noncached(pgprot_t prot) " andn %0, %4, %0\n" " or %0, %5, %0\n" " .previous\n" + " .section .sun_m7_2insn_patch, \"ax\"\n" + " .word 661b\n" + " andn %0, %6, %0\n" + " or %0, %5, %0\n" + " .previous\n" : "=r" (val) : "0" (val), "i" (_PAGE_CP_4U | _PAGE_CV_4U), "i" (_PAGE_E_4U), - "i" (_PAGE_CP_4V | _PAGE_CV_4V), "i" (_PAGE_E_4V)); + "i" (_PAGE_CP_4V | _PAGE_CV_4V), "i" (_PAGE_E_4V), + "i" (_PAGE_CP_4V)); return __pgprot(val); } diff --git a/arch/sparc/include/asm/topology_64.h b/arch/sparc/include/asm/topology_64.h index ed8f071..d1761df 100644 --- a/arch/sparc/include/asm/topology_64.h +++ b/arch/sparc/include/asm/topology_64.h @@ -40,11 +40,12 @@ static inline int pcibus_to_node(struct pci_bus *pbus) #ifdef CONFIG_SMP #define topology_physical_package_id(cpu) (cpu_data(cpu).proc_id) #define topology_core_id(cpu) (cpu_data(cpu).core_id) -#define topology_core_cpumask(cpu) (&cpu_core_map[cpu]) +#define topology_core_cpumask(cpu) (&cpu_core_sib_map[cpu]) #define topology_thread_cpumask(cpu) (&per_cpu(cpu_sibling_map, cpu)) #endif /* CONFIG_SMP */ extern cpumask_t cpu_core_map[NR_CPUS]; +extern cpumask_t cpu_core_sib_map[NR_CPUS]; static inline const struct cpumask *cpu_coregroup_mask(int cpu) { return &cpu_core_map[cpu]; diff --git a/arch/sparc/include/asm/trap_block.h b/arch/sparc/include/asm/trap_block.h index 6fd4436..ec9c04d 100644 --- a/arch/sparc/include/asm/trap_block.h +++ b/arch/sparc/include/asm/trap_block.h @@ -79,6 +79,8 @@ struct sun4v_2insn_patch_entry { }; extern struct sun4v_2insn_patch_entry __sun4v_2insn_patch, __sun4v_2insn_patch_end; +extern struct sun4v_2insn_patch_entry __sun_m7_2insn_patch, + __sun_m7_2insn_patch_end; #endif /* !(__ASSEMBLY__) */ diff --git a/arch/sparc/kernel/entry.h b/arch/sparc/kernel/entry.h index 07cc49e5..0f67942 100644 --- a/arch/sparc/kernel/entry.h +++ b/arch/sparc/kernel/entry.h @@ -69,6 +69,8 @@ void sun4v_patch_1insn_range(struct sun4v_1insn_patch_entry *, struct sun4v_1insn_patch_entry *); void sun4v_patch_2insn_range(struct sun4v_2insn_patch_entry *, struct sun4v_2insn_patch_entry *); +void sun_m7_patch_2insn_range(struct sun4v_2insn_patch_entry *, + struct sun4v_2insn_patch_entry *); extern unsigned int dcache_parity_tl1_occurred; extern unsigned int icache_parity_tl1_occurred; diff --git a/arch/sparc/kernel/leon_pci_grpci2.c b/arch/sparc/kernel/leon_pci_grpci2.c index 94e392b..814fb17 100644 --- a/arch/sparc/kernel/leon_pci_grpci2.c +++ b/arch/sparc/kernel/leon_pci_grpci2.c @@ -723,7 +723,6 @@ static int grpci2_of_probe(struct platform_device *ofdev) err = -ENOMEM; goto err1; } - memset(grpci2priv, 0, sizeof(*grpci2priv)); priv->regs = regs; priv->irq = ofdev->archdata.irqs[0]; /* BASE IRQ */ priv->irq_mode = (capability & STS_IRQMODE) >> STS_IRQMODE_BIT; diff --git a/arch/sparc/kernel/mdesc.c b/arch/sparc/kernel/mdesc.c index 26c80e1..6f80936 100644 --- a/arch/sparc/kernel/mdesc.c +++ b/arch/sparc/kernel/mdesc.c @@ -614,45 +614,68 @@ static void fill_in_one_cache(cpuinfo_sparc *c, struct mdesc_handle *hp, u64 mp) } } -static void mark_core_ids(struct mdesc_handle *hp, u64 mp, int core_id) +static void find_back_node_value(struct mdesc_handle *hp, u64 node, + char *srch_val, + void (*func)(struct mdesc_handle *, u64, int), + u64 val, int depth) { - u64 a; - - mdesc_for_each_arc(a, hp, mp, MDESC_ARC_TYPE_BACK) { - u64 t = mdesc_arc_target(hp, a); - const char *name; - const u64 *id; + u64 arc; - name = mdesc_node_name(hp, t); - if (!strcmp(name, "cpu")) { - id = mdesc_get_property(hp, t, "id", NULL); - if (*id < NR_CPUS) - cpu_data(*id).core_id = core_id; - } else { - u64 j; + /* Since we have an estimate of recursion depth, do a sanity check. */ + if (depth == 0) + return; - mdesc_for_each_arc(j, hp, t, MDESC_ARC_TYPE_BACK) { - u64 n = mdesc_arc_target(hp, j); - const char *n_name; + mdesc_for_each_arc(arc, hp, node, MDESC_ARC_TYPE_BACK) { + u64 n = mdesc_arc_target(hp, arc); + const char *name = mdesc_node_name(hp, n); - n_name = mdesc_node_name(hp, n); - if (strcmp(n_name, "cpu")) - continue; + if (!strcmp(srch_val, name)) + (*func)(hp, n, val); - id = mdesc_get_property(hp, n, "id", NULL); - if (*id < NR_CPUS) - cpu_data(*id).core_id = core_id; - } - } + find_back_node_value(hp, n, srch_val, func, val, depth-1); } } +static void __mark_core_id(struct mdesc_handle *hp, u64 node, + int core_id) +{ + const u64 *id = mdesc_get_property(hp, node, "id", NULL); + + if (*id < num_possible_cpus()) + cpu_data(*id).core_id = core_id; +} + +static void __mark_sock_id(struct mdesc_handle *hp, u64 node, + int sock_id) +{ + const u64 *id = mdesc_get_property(hp, node, "id", NULL); + + if (*id < num_possible_cpus()) + cpu_data(*id).sock_id = sock_id; +} + +static void mark_core_ids(struct mdesc_handle *hp, u64 mp, + int core_id) +{ + find_back_node_value(hp, mp, "cpu", __mark_core_id, core_id, 10); +} + +static void mark_sock_ids(struct mdesc_handle *hp, u64 mp, + int sock_id) +{ + find_back_node_value(hp, mp, "cpu", __mark_sock_id, sock_id, 10); +} + static void set_core_ids(struct mdesc_handle *hp) { int idx; u64 mp; idx = 1; + + /* Identify unique cores by looking for cpus backpointed to by + * level 1 instruction caches. + */ mdesc_for_each_node_by_name(hp, mp, "cache") { const u64 *level; const char *type; @@ -667,11 +690,72 @@ static void set_core_ids(struct mdesc_handle *hp) continue; mark_core_ids(hp, mp, idx); + idx++; + } +} + +static int set_sock_ids_by_cache(struct mdesc_handle *hp, int level) +{ + u64 mp; + int idx = 1; + int fnd = 0; + + /* Identify unique sockets by looking for cpus backpointed to by + * shared level n caches. + */ + mdesc_for_each_node_by_name(hp, mp, "cache") { + const u64 *cur_lvl; + + cur_lvl = mdesc_get_property(hp, mp, "level", NULL); + if (*cur_lvl != level) + continue; + + mark_sock_ids(hp, mp, idx); + idx++; + fnd = 1; + } + return fnd; +} + +static void set_sock_ids_by_socket(struct mdesc_handle *hp, u64 mp) +{ + int idx = 1; + mdesc_for_each_node_by_name(hp, mp, "socket") { + u64 a; + + mdesc_for_each_arc(a, hp, mp, MDESC_ARC_TYPE_FWD) { + u64 t = mdesc_arc_target(hp, a); + const char *name; + const u64 *id; + + name = mdesc_node_name(hp, t); + if (strcmp(name, "cpu")) + continue; + + id = mdesc_get_property(hp, t, "id", NULL); + if (*id < num_possible_cpus()) + cpu_data(*id).sock_id = idx; + } idx++; } } +static void set_sock_ids(struct mdesc_handle *hp) +{ + u64 mp; + + /* If machine description exposes sockets data use it. + * Otherwise fallback to use shared L3 or L2 caches. + */ + mp = mdesc_node_by_name(hp, MDESC_NODE_NULL, "sockets"); + if (mp != MDESC_NODE_NULL) + return set_sock_ids_by_socket(hp, mp); + + if (!set_sock_ids_by_cache(hp, 3)) + set_sock_ids_by_cache(hp, 2); +} + static void mark_proc_ids(struct mdesc_handle *hp, u64 mp, int proc_id) { u64 a; @@ -707,7 +791,6 @@ static void __set_proc_ids(struct mdesc_handle *hp, const char *exec_unit_name) continue; mark_proc_ids(hp, mp, idx); - idx++; } } @@ -900,6 +983,7 @@ void mdesc_fill_in_cpu_data(cpumask_t *mask) set_core_ids(hp); set_proc_ids(hp); + set_sock_ids(hp); mdesc_release(hp); diff --git a/arch/sparc/kernel/pci.c b/arch/sparc/kernel/pci.c index 6f7251f..c928bc6 100644 --- a/arch/sparc/kernel/pci.c +++ b/arch/sparc/kernel/pci.c @@ -1002,6 +1002,38 @@ static int __init pcibios_init(void) subsys_initcall(pcibios_init); #ifdef CONFIG_SYSFS + +#define SLOT_NAME_SIZE 11 /* Max decimal digits + null in u32 */ + +static void pcie_bus_slot_names(struct pci_bus *pbus) +{ + struct pci_dev *pdev; + struct pci_bus *bus; + + list_for_each_entry(pdev, &pbus->devices, bus_list) { + char name[SLOT_NAME_SIZE]; + struct pci_slot *pci_slot; + const u32 *slot_num; + int len; + + slot_num = of_get_property(pdev->dev.of_node, + "physical-slot#", &len); + + if (slot_num == NULL || len != 4) + continue; + + snprintf(name, sizeof(name), "%u", slot_num[0]); + pci_slot = pci_create_slot(pbus, slot_num[0], name, NULL); + + if (IS_ERR(pci_slot)) + pr_err("PCI: pci_create_slot returned %ld.\n", + PTR_ERR(pci_slot)); + } + + list_for_each_entry(bus, &pbus->children, node) + pcie_bus_slot_names(bus); +} + static void pci_bus_slot_names(struct device_node *node, struct pci_bus *bus) { const struct pci_slot_names { @@ -1053,18 +1085,29 @@ static int __init of_pci_slot_init(void) while ((pbus = pci_find_next_bus(pbus)) != NULL) { struct device_node *node; + struct pci_dev *pdev; + + pdev = list_first_entry(&pbus->devices, struct pci_dev, + bus_list); - if (pbus->self) { - /* PCI->PCI bridge */ - node = pbus->self->dev.of_node; + if (pdev && pci_is_pcie(pdev)) { + pcie_bus_slot_names(pbus); } else { - struct pci_pbm_info *pbm = pbus->sysdata; - /* Host PCI controller */ - node = pbm->op->dev.of_node; - } + if (pbus->self) { + + /* PCI->PCI bridge */ + node = pbus->self->dev.of_node; + + } else { + struct pci_pbm_info *pbm = pbus->sysdata; - pci_bus_slot_names(node, pbus); + /* Host PCI controller */ + node = pbm->op->dev.of_node; + } + + pci_bus_slot_names(node, pbus); + } } return 0; diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c index c38d19f..f7b2617 100644 --- a/arch/sparc/kernel/setup_64.c +++ b/arch/sparc/kernel/setup_64.c @@ -255,6 +255,24 @@ void sun4v_patch_2insn_range(struct sun4v_2insn_patch_entry *start, } } +void sun_m7_patch_2insn_range(struct sun4v_2insn_patch_entry *start, + struct sun4v_2insn_patch_entry *end) +{ + while (start < end) { + unsigned long addr = start->addr; + + *(unsigned int *) (addr + 0) = start->insns[0]; + wmb(); + __asm__ __volatile__("flush %0" : : "r" (addr + 0)); + + *(unsigned int *) (addr + 4) = start->insns[1]; + wmb(); + __asm__ __volatile__("flush %0" : : "r" (addr + 4)); + + start++; + } +} + static void __init sun4v_patch(void) { extern void sun4v_hvapi_init(void); @@ -267,6 +285,9 @@ static void __init sun4v_patch(void) sun4v_patch_2insn_range(&__sun4v_2insn_patch, &__sun4v_2insn_patch_end); + if (sun4v_chip_type == SUN4V_CHIP_SPARC_M7) + sun_m7_patch_2insn_range(&__sun_m7_2insn_patch, + &__sun_m7_2insn_patch_end); sun4v_hvapi_init(); } diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index 61139d9..19cd08d 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -60,8 +60,12 @@ DEFINE_PER_CPU(cpumask_t, cpu_sibling_map) = CPU_MASK_NONE; cpumask_t cpu_core_map[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = CPU_MASK_NONE }; +cpumask_t cpu_core_sib_map[NR_CPUS] __read_mostly = { + [0 ... NR_CPUS-1] = CPU_MASK_NONE }; + EXPORT_PER_CPU_SYMBOL(cpu_sibling_map); EXPORT_SYMBOL(cpu_core_map); +EXPORT_SYMBOL(cpu_core_sib_map); static cpumask_t smp_commenced_mask; @@ -1243,6 +1247,15 @@ void smp_fill_in_sib_core_maps(void) } } + for_each_present_cpu(i) { + unsigned int j; + + for_each_present_cpu(j) { + if (cpu_data(i).sock_id == cpu_data(j).sock_id) + cpumask_set_cpu(j, &cpu_core_sib_map[i]); + } + } + for_each_present_cpu(i) { unsigned int j; diff --git a/arch/sparc/kernel/vmlinux.lds.S b/arch/sparc/kernel/vmlinux.lds.S index 0924305..f1a2f68 100644 --- a/arch/sparc/kernel/vmlinux.lds.S +++ b/arch/sparc/kernel/vmlinux.lds.S @@ -138,6 +138,11 @@ SECTIONS *(.pause_3insn_patch) __pause_3insn_patch_end = .; } + .sun_m7_2insn_patch : { + __sun_m7_2insn_patch = .; + *(.sun_m7_2insn_patch) + __sun_m7_2insn_patch_end = .; + } PERCPU_SECTION(SMP_CACHE_BYTES) . = ALIGN(PAGE_SIZE); diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 4ca0d6b..559cb744 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -54,6 +54,7 @@ #include "init_64.h" unsigned long kern_linear_pte_xor[4] __read_mostly; +static unsigned long page_cache4v_flag; /* A bitmap, two bits for every 256MB of physical memory. These two * bits determine what page size we use for kernel linear @@ -1909,11 +1910,24 @@ static void __init sun4u_linear_pte_xor_finalize(void) static void __init sun4v_linear_pte_xor_finalize(void) { + unsigned long pagecv_flag; + + /* Bit 9 of TTE is no longer CV bit on M7 processor and it instead + * enables MCD error. Do not set bit 9 on M7 processor. + */ + switch (sun4v_chip_type) { + case SUN4V_CHIP_SPARC_M7: + pagecv_flag = 0x00; + break; + default: + pagecv_flag = _PAGE_CV_4V; + break; + } #ifndef CONFIG_DEBUG_PAGEALLOC if (cpu_pgsz_mask & HV_PGSZ_MASK_256MB) { kern_linear_pte_xor[1] = (_PAGE_VALID | _PAGE_SZ256MB_4V) ^ PAGE_OFFSET; - kern_linear_pte_xor[1] |= (_PAGE_CP_4V | _PAGE_CV_4V | + kern_linear_pte_xor[1] |= (_PAGE_CP_4V | pagecv_flag | _PAGE_P_4V | _PAGE_W_4V); } else { kern_linear_pte_xor[1] = kern_linear_pte_xor[0]; @@ -1922,7 +1936,7 @@ static void __init sun4v_linear_pte_xor_finalize(void) if (cpu_pgsz_mask & HV_PGSZ_MASK_2GB) { kern_linear_pte_xor[2] = (_PAGE_VALID | _PAGE_SZ2GB_4V) ^ PAGE_OFFSET; - kern_linear_pte_xor[2] |= (_PAGE_CP_4V | _PAGE_CV_4V | + kern_linear_pte_xor[2] |= (_PAGE_CP_4V | pagecv_flag | _PAGE_P_4V | _PAGE_W_4V); } else { kern_linear_pte_xor[2] = kern_linear_pte_xor[1]; @@ -1931,7 +1945,7 @@ static void __init sun4v_linear_pte_xor_finalize(void) if (cpu_pgsz_mask & HV_PGSZ_MASK_16GB) { kern_linear_pte_xor[3] = (_PAGE_VALID | _PAGE_SZ16GB_4V) ^ PAGE_OFFSET; - kern_linear_pte_xor[3] |= (_PAGE_CP_4V | _PAGE_CV_4V | + kern_linear_pte_xor[3] |= (_PAGE_CP_4V | pagecv_flag | _PAGE_P_4V | _PAGE_W_4V); } else { kern_linear_pte_xor[3] = kern_linear_pte_xor[2]; @@ -1958,6 +1972,13 @@ static phys_addr_t __init available_memory(void) return available; } +#define _PAGE_CACHE_4U (_PAGE_CP_4U | _PAGE_CV_4U) +#define _PAGE_CACHE_4V (_PAGE_CP_4V | _PAGE_CV_4V) +#define __DIRTY_BITS_4U (_PAGE_MODIFIED_4U | _PAGE_WRITE_4U | _PAGE_W_4U) +#define __DIRTY_BITS_4V (_PAGE_MODIFIED_4V | _PAGE_WRITE_4V | _PAGE_W_4V) +#define __ACCESS_BITS_4U (_PAGE_ACCESSED_4U | _PAGE_READ_4U | _PAGE_R) +#define __ACCESS_BITS_4V (_PAGE_ACCESSED_4V | _PAGE_READ_4V | _PAGE_R) + /* We need to exclude reserved regions. This exclusion will include * vmlinux and initrd. To be more precise the initrd size could be used to * compute a new lower limit because it is freed later during initialization. @@ -2034,6 +2055,25 @@ void __init paging_init(void) memset(swapper_4m_tsb, 0x40, sizeof(swapper_4m_tsb)); #endif + /* TTE.cv bit on sparc v9 occupies the same position as TTE.mcde + * bit on M7 processor. This is a conflicting usage of the same + * bit. Enabling TTE.cv on M7 would turn on Memory Corruption + * Detection error on all pages and this will lead to problems + * later. Kernel does not run with MCD enabled and hence rest + * of the required steps to fully configure memory corruption + * detection are not taken. We need to ensure TTE.mcde is not + * set on M7 processor. Compute the value of cacheability + * flag for use later taking this into consideration. + */ + switch (sun4v_chip_type) { + case SUN4V_CHIP_SPARC_M7: + page_cache4v_flag = _PAGE_CP_4V; + break; + default: + page_cache4v_flag = _PAGE_CACHE_4V; + break; + } + if (tlb_type == hypervisor) sun4v_pgprot_init(); else @@ -2274,13 +2314,6 @@ void free_initrd_mem(unsigned long start, unsigned long end) } #endif -#define _PAGE_CACHE_4U (_PAGE_CP_4U | _PAGE_CV_4U) -#define _PAGE_CACHE_4V (_PAGE_CP_4V | _PAGE_CV_4V) -#define __DIRTY_BITS_4U (_PAGE_MODIFIED_4U | _PAGE_WRITE_4U | _PAGE_W_4U) -#define __DIRTY_BITS_4V (_PAGE_MODIFIED_4V | _PAGE_WRITE_4V | _PAGE_W_4V) -#define __ACCESS_BITS_4U (_PAGE_ACCESSED_4U | _PAGE_READ_4U | _PAGE_R) -#define __ACCESS_BITS_4V (_PAGE_ACCESSED_4V | _PAGE_READ_4V | _PAGE_R) - pgprot_t PAGE_KERNEL __read_mostly; EXPORT_SYMBOL(PAGE_KERNEL); @@ -2312,8 +2345,7 @@ int __meminit vmemmap_populate(unsigned long vstart, unsigned long vend, _PAGE_P_4U | _PAGE_W_4U); if (tlb_type == hypervisor) pte_base = (_PAGE_VALID | _PAGE_SZ4MB_4V | - _PAGE_CP_4V | _PAGE_CV_4V | - _PAGE_P_4V | _PAGE_W_4V); + page_cache4v_flag | _PAGE_P_4V | _PAGE_W_4V); pte_base |= _PAGE_PMD_HUGE; @@ -2450,14 +2482,14 @@ static void __init sun4v_pgprot_init(void) int i; PAGE_KERNEL = __pgprot (_PAGE_PRESENT_4V | _PAGE_VALID | - _PAGE_CACHE_4V | _PAGE_P_4V | + page_cache4v_flag | _PAGE_P_4V | __ACCESS_BITS_4V | __DIRTY_BITS_4V | _PAGE_EXEC_4V); PAGE_KERNEL_LOCKED = PAGE_KERNEL; _PAGE_IE = _PAGE_IE_4V; _PAGE_E = _PAGE_E_4V; - _PAGE_CACHE = _PAGE_CACHE_4V; + _PAGE_CACHE = page_cache4v_flag; #ifdef CONFIG_DEBUG_PAGEALLOC kern_linear_pte_xor[0] = _PAGE_VALID ^ PAGE_OFFSET; @@ -2465,8 +2497,8 @@ static void __init sun4v_pgprot_init(void) kern_linear_pte_xor[0] = (_PAGE_VALID | _PAGE_SZ4MB_4V) ^ PAGE_OFFSET; #endif - kern_linear_pte_xor[0] |= (_PAGE_CP_4V | _PAGE_CV_4V | - _PAGE_P_4V | _PAGE_W_4V); + kern_linear_pte_xor[0] |= (page_cache4v_flag | _PAGE_P_4V | + _PAGE_W_4V); for (i = 1; i < 4; i++) kern_linear_pte_xor[i] = kern_linear_pte_xor[0]; @@ -2479,12 +2511,12 @@ static void __init sun4v_pgprot_init(void) _PAGE_SZ4MB_4V | _PAGE_SZ512K_4V | _PAGE_SZ64K_4V | _PAGE_SZ8K_4V); - page_none = _PAGE_PRESENT_4V | _PAGE_ACCESSED_4V | _PAGE_CACHE_4V; - page_shared = (_PAGE_VALID | _PAGE_PRESENT_4V | _PAGE_CACHE_4V | + page_none = _PAGE_PRESENT_4V | _PAGE_ACCESSED_4V | page_cache4v_flag; + page_shared = (_PAGE_VALID | _PAGE_PRESENT_4V | page_cache4v_flag | __ACCESS_BITS_4V | _PAGE_WRITE_4V | _PAGE_EXEC_4V); - page_copy = (_PAGE_VALID | _PAGE_PRESENT_4V | _PAGE_CACHE_4V | + page_copy = (_PAGE_VALID | _PAGE_PRESENT_4V | page_cache4v_flag | __ACCESS_BITS_4V | _PAGE_EXEC_4V); - page_readonly = (_PAGE_VALID | _PAGE_PRESENT_4V | _PAGE_CACHE_4V | + page_readonly = (_PAGE_VALID | _PAGE_PRESENT_4V | page_cache4v_flag | __ACCESS_BITS_4V | _PAGE_EXEC_4V); page_exec_bit = _PAGE_EXEC_4V; @@ -2542,7 +2574,7 @@ static unsigned long kern_large_tte(unsigned long paddr) _PAGE_EXEC_4U | _PAGE_L_4U | _PAGE_W_4U); if (tlb_type == hypervisor) val = (_PAGE_VALID | _PAGE_SZ4MB_4V | - _PAGE_CP_4V | _PAGE_CV_4V | _PAGE_P_4V | + page_cache4v_flag | _PAGE_P_4V | _PAGE_EXEC_4V | _PAGE_W_4V); return val | paddr; diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index 89dd0d7..805d25c 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -2,15 +2,14 @@ #define BOOT_COMPRESSED_MISC_H /* - * we have to be careful, because no indirections are allowed here, and - * paravirt_ops is a kind of one. As it will only run in baremetal anyway, - * we just keep it from happening + * Special hack: we have to be careful, because no indirections are allowed here, + * and paravirt_ops is a kind of one. As it will only run in baremetal anyway, + * we just keep it from happening. (This list needs to be extended when new + * paravirt and debugging variants are added.) */ #undef CONFIG_PARAVIRT +#undef CONFIG_PARAVIRT_SPINLOCKS #undef CONFIG_KASAN -#ifdef CONFIG_X86_32 -#define _ASM_X86_DESC_H 1 -#endif #include <linux/linkage.h> #include <linux/screen_info.h> diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index 19507ff..5fabf13 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -107,7 +107,7 @@ static inline unsigned long regs_return_value(struct pt_regs *regs) static inline int user_mode(struct pt_regs *regs) { #ifdef CONFIG_X86_32 - return (regs->cs & SEGMENT_RPL_MASK) == USER_RPL; + return ((regs->cs & SEGMENT_RPL_MASK) | (regs->flags & X86_VM_MASK)) >= USER_RPL; #else return !!(regs->cs & 3); #endif diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h index 5a9856e..7d5a192 100644 --- a/arch/x86/include/asm/segment.h +++ b/arch/x86/include/asm/segment.h @@ -231,11 +231,21 @@ #define TLS_SIZE (GDT_ENTRY_TLS_ENTRIES* 8) #ifdef __KERNEL__ + +/* + * early_idt_handler_array is an array of entry points referenced in the + * early IDT. For simplicity, it's a real array with one entry point + * every nine bytes. That leaves room for an optional 'push $0' if the + * vector has no error code (two bytes), a 'push $vector_number' (two + * bytes), and a jump to the common entry code (up to five bytes). + */ +#define EARLY_IDT_HANDLER_SIZE 9 + #ifndef __ASSEMBLY__ -extern const char early_idt_handlers[NUM_EXCEPTION_VECTORS][2+2+5]; +extern const char early_idt_handler_array[NUM_EXCEPTION_VECTORS][EARLY_IDT_HANDLER_SIZE]; #ifdef CONFIG_TRACING -# define trace_early_idt_handlers early_idt_handlers +# define trace_early_idt_handler_array early_idt_handler_array #endif /* diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 87848eb..4f7001f 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -190,6 +190,7 @@ static bool check_hw_exists(void) u64 val, val_fail, val_new= ~0; int i, reg, reg_fail, ret = 0; int bios_fail = 0; + int reg_safe = -1; /* * Check to see if the BIOS enabled any of the counters, if so @@ -204,6 +205,8 @@ static bool check_hw_exists(void) bios_fail = 1; val_fail = val; reg_fail = reg; + } else { + reg_safe = i; } } @@ -222,11 +225,22 @@ static bool check_hw_exists(void) } /* + * If all the counters are enabled, the below test will always + * fail. The tools will also become useless in this scenario. + * Just fail and disable the hardware counters. + */ + + if (reg_safe == -1) { + reg = reg_safe; + goto msr_fail; + } + + /* * Read the current value, change it and read it back to see if it * matches, this is needed to detect certain hardware emulators * (qemu/kvm) that don't trap on the MSR access and always return 0s. */ - reg = x86_pmu_event_addr(0); + reg = x86_pmu_event_addr(reg_safe); if (rdmsrl_safe(reg, &val)) goto msr_fail; val ^= 0xffffUL; @@ -611,6 +625,7 @@ struct sched_state { int event; /* event index */ int counter; /* counter index */ int unassigned; /* number of events to be assigned left */ + int nr_gp; /* number of GP counters used */ unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; }; @@ -620,27 +635,29 @@ struct sched_state { struct perf_sched { int max_weight; int max_events; - struct perf_event **events; - struct sched_state state; + int max_gp; int saved_states; + struct event_constraint **constraints; + struct sched_state state; struct sched_state saved[SCHED_STATES_MAX]; }; /* * Initialize interator that runs through all events and counters. */ -static void perf_sched_init(struct perf_sched *sched, struct perf_event **events, - int num, int wmin, int wmax) +static void perf_sched_init(struct perf_sched *sched, struct event_constraint **constraints, + int num, int wmin, int wmax, int gpmax) { int idx; memset(sched, 0, sizeof(*sched)); sched->max_events = num; sched->max_weight = wmax; - sched->events = events; + sched->max_gp = gpmax; + sched->constraints = constraints; for (idx = 0; idx < num; idx++) { - if (events[idx]->hw.constraint->weight == wmin) + if (constraints[idx]->weight == wmin) break; } @@ -687,7 +704,7 @@ static bool __perf_sched_find_counter(struct perf_sched *sched) if (sched->state.event >= sched->max_events) return false; - c = sched->events[sched->state.event]->hw.constraint; + c = sched->constraints[sched->state.event]; /* Prefer fixed purpose counters */ if (c->idxmsk64 & (~0ULL << INTEL_PMC_IDX_FIXED)) { idx = INTEL_PMC_IDX_FIXED; @@ -696,11 +713,16 @@ static bool __perf_sched_find_counter(struct perf_sched *sched) goto done; } } + /* Grab the first unused counter starting with idx */ idx = sched->state.counter; for_each_set_bit_from(idx, c->idxmsk, INTEL_PMC_IDX_FIXED) { - if (!__test_and_set_bit(idx, sched->state.used)) + if (!__test_and_set_bit(idx, sched->state.used)) { + if (sched->state.nr_gp++ >= sched->max_gp) + return false; + goto done; + } } return false; @@ -745,7 +767,7 @@ static bool perf_sched_next_event(struct perf_sched *sched) if (sched->state.weight > sched->max_weight) return false; } - c = sched->events[sched->state.event]->hw.constraint; + c = sched->constraints[sched->state.event]; } while (c->weight != sched->state.weight); sched->state.counter = 0; /* start with first counter */ @@ -756,12 +778,12 @@ static bool perf_sched_next_event(struct perf_sched *sched) /* * Assign a counter for each event. */ -int perf_assign_events(struct perf_event **events, int n, - int wmin, int wmax, int *assign) +int perf_assign_events(struct event_constraint **constraints, int n, + int wmin, int wmax, int gpmax, int *assign) { struct perf_sched sched; - perf_sched_init(&sched, events, n, wmin, wmax); + perf_sched_init(&sched, constraints, n, wmin, wmax, gpmax); do { if (!perf_sched_find_counter(&sched)) @@ -788,9 +810,9 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) x86_pmu.start_scheduling(cpuc); for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) { - hwc = &cpuc->event_list[i]->hw; + cpuc->event_constraint[i] = NULL; c = x86_pmu.get_event_constraints(cpuc, i, cpuc->event_list[i]); - hwc->constraint = c; + cpuc->event_constraint[i] = c; wmin = min(wmin, c->weight); wmax = max(wmax, c->weight); @@ -801,7 +823,7 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) */ for (i = 0; i < n; i++) { hwc = &cpuc->event_list[i]->hw; - c = hwc->constraint; + c = cpuc->event_constraint[i]; /* never assigned */ if (hwc->idx == -1) @@ -821,9 +843,26 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) } /* slow path */ - if (i != n) - unsched = perf_assign_events(cpuc->event_list, n, wmin, - wmax, assign); + if (i != n) { + int gpmax = x86_pmu.num_counters; + + /* + * Do not allow scheduling of more than half the available + * generic counters. + * + * This helps avoid counter starvation of sibling thread by + * ensuring at most half the counters cannot be in exclusive + * mode. There is no designated counters for the limits. Any + * N/2 counters can be used. This helps with events with + * specific counter constraints. + */ + if (is_ht_workaround_enabled() && !cpuc->is_fake && + READ_ONCE(cpuc->excl_cntrs->exclusive_present)) + gpmax /= 2; + + unsched = perf_assign_events(cpuc->event_constraint, n, wmin, + wmax, gpmax, assign); + } /* * In case of success (unsched = 0), mark events as committed, @@ -840,7 +879,7 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) e = cpuc->event_list[i]; e->hw.flags |= PERF_X86_EVENT_COMMITTED; if (x86_pmu.commit_scheduling) - x86_pmu.commit_scheduling(cpuc, e, assign[i]); + x86_pmu.commit_scheduling(cpuc, i, assign[i]); } } @@ -1292,8 +1331,10 @@ static void x86_pmu_del(struct perf_event *event, int flags) x86_pmu.put_event_constraints(cpuc, event); /* Delete the array entry. */ - while (++i < cpuc->n_events) + while (++i < cpuc->n_events) { cpuc->event_list[i-1] = cpuc->event_list[i]; + cpuc->event_constraint[i-1] = cpuc->event_constraint[i]; + } --cpuc->n_events; perf_event_update_userpage(event); diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 6ac5cb7..ef78516 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -74,6 +74,7 @@ struct event_constraint { #define PERF_X86_EVENT_EXCL 0x0040 /* HT exclusivity on counter */ #define PERF_X86_EVENT_DYNAMIC 0x0080 /* dynamic alloc'd constraint */ #define PERF_X86_EVENT_RDPMC_ALLOWED 0x0100 /* grant rdpmc permission */ +#define PERF_X86_EVENT_EXCL_ACCT 0x0200 /* accounted EXCL event */ struct amd_nb { @@ -134,8 +135,6 @@ enum intel_excl_state_type { struct intel_excl_states { enum intel_excl_state_type init_state[X86_PMC_IDX_MAX]; enum intel_excl_state_type state[X86_PMC_IDX_MAX]; - int num_alloc_cntrs;/* #counters allocated */ - int max_alloc_cntrs;/* max #counters allowed */ bool sched_started; /* true if scheduling has started */ }; @@ -144,6 +143,11 @@ struct intel_excl_cntrs { struct intel_excl_states states[2]; + union { + u16 has_exclusive[2]; + u32 exclusive_present; + }; + int refcnt; /* per-core: #HT threads */ unsigned core_id; /* per-core: core id */ }; @@ -172,7 +176,11 @@ struct cpu_hw_events { added in the current transaction */ int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */ u64 tags[X86_PMC_IDX_MAX]; + struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */ + struct event_constraint *event_constraint[X86_PMC_IDX_MAX]; + + int n_excl; /* the number of exclusive events */ unsigned int group_flag; int is_fake; @@ -519,9 +527,7 @@ struct x86_pmu { void (*put_event_constraints)(struct cpu_hw_events *cpuc, struct perf_event *event); - void (*commit_scheduling)(struct cpu_hw_events *cpuc, - struct perf_event *event, - int cntr); + void (*commit_scheduling)(struct cpu_hw_events *cpuc, int idx, int cntr); void (*start_scheduling)(struct cpu_hw_events *cpuc); @@ -717,8 +723,8 @@ static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, void x86_pmu_enable_all(int added); -int perf_assign_events(struct perf_event **events, int n, - int wmin, int wmax, int *assign); +int perf_assign_events(struct event_constraint **constraints, int n, + int wmin, int wmax, int gpmax, int *assign); int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign); void x86_pmu_stop(struct perf_event *event, int flags); @@ -929,4 +935,8 @@ static inline struct intel_shared_regs *allocate_shared_regs(int cpu) return NULL; } +static inline int is_ht_workaround_enabled(void) +{ + return 0; +} #endif /* CONFIG_CPU_SUP_INTEL */ diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 3998131..a1e35c9 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -1923,7 +1923,6 @@ intel_start_scheduling(struct cpu_hw_events *cpuc) xl = &excl_cntrs->states[tid]; xl->sched_started = true; - xl->num_alloc_cntrs = 0; /* * lock shared state until we are done scheduling * in stop_event_scheduling() @@ -2000,6 +1999,11 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event, * across HT threads */ is_excl = c->flags & PERF_X86_EVENT_EXCL; + if (is_excl && !(event->hw.flags & PERF_X86_EVENT_EXCL_ACCT)) { + event->hw.flags |= PERF_X86_EVENT_EXCL_ACCT; + if (!cpuc->n_excl++) + WRITE_ONCE(excl_cntrs->has_exclusive[tid], 1); + } /* * xl = state of current HT @@ -2008,18 +2012,6 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event, xl = &excl_cntrs->states[tid]; xlo = &excl_cntrs->states[o_tid]; - /* - * do not allow scheduling of more than max_alloc_cntrs - * which is set to half the available generic counters. - * this helps avoid counter starvation of sibling thread - * by ensuring at most half the counters cannot be in - * exclusive mode. There is not designated counters for the - * limits. Any N/2 counters can be used. This helps with - * events with specifix counter constraints - */ - if (xl->num_alloc_cntrs++ == xl->max_alloc_cntrs) - return &emptyconstraint; - cx = c; /* @@ -2106,7 +2098,7 @@ static struct event_constraint * intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx, struct perf_event *event) { - struct event_constraint *c1 = event->hw.constraint; + struct event_constraint *c1 = cpuc->event_constraint[idx]; struct event_constraint *c2; /* @@ -2150,6 +2142,11 @@ static void intel_put_excl_constraints(struct cpu_hw_events *cpuc, xl = &excl_cntrs->states[tid]; xlo = &excl_cntrs->states[o_tid]; + if (hwc->flags & PERF_X86_EVENT_EXCL_ACCT) { + hwc->flags &= ~PERF_X86_EVENT_EXCL_ACCT; + if (!--cpuc->n_excl) + WRITE_ONCE(excl_cntrs->has_exclusive[tid], 0); + } /* * put_constraint may be called from x86_schedule_events() @@ -2188,8 +2185,6 @@ intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc, static void intel_put_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) { - struct event_constraint *c = event->hw.constraint; - intel_put_shared_regs_event_constraints(cpuc, event); /* @@ -2197,19 +2192,14 @@ static void intel_put_event_constraints(struct cpu_hw_events *cpuc, * all events are subject to and must call the * put_excl_constraints() routine */ - if (c && cpuc->excl_cntrs) + if (cpuc->excl_cntrs) intel_put_excl_constraints(cpuc, event); - - /* cleanup dynamic constraint */ - if (c && (c->flags & PERF_X86_EVENT_DYNAMIC)) - event->hw.constraint = NULL; } -static void intel_commit_scheduling(struct cpu_hw_events *cpuc, - struct perf_event *event, int cntr) +static void intel_commit_scheduling(struct cpu_hw_events *cpuc, int idx, int cntr) { struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs; - struct event_constraint *c = event->hw.constraint; + struct event_constraint *c = cpuc->event_constraint[idx]; struct intel_excl_states *xlo, *xl; int tid = cpuc->excl_thread_id; int o_tid = 1 - tid; @@ -2639,8 +2629,6 @@ static void intel_pmu_cpu_starting(int cpu) cpuc->lbr_sel = &cpuc->shared_regs->regs[EXTRA_REG_LBR]; if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) { - int h = x86_pmu.num_counters >> 1; - for_each_cpu(i, topology_thread_cpumask(cpu)) { struct intel_excl_cntrs *c; @@ -2654,11 +2642,6 @@ static void intel_pmu_cpu_starting(int cpu) } cpuc->excl_cntrs->core_id = core_id; cpuc->excl_cntrs->refcnt++; - /* - * set hard limit to half the number of generic counters - */ - cpuc->excl_cntrs->states[0].max_alloc_cntrs = h; - cpuc->excl_cntrs->states[1].max_alloc_cntrs = h; } } diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 813f75d..7f73b35 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -706,9 +706,9 @@ void intel_pmu_pebs_disable(struct perf_event *event) cpuc->pebs_enabled &= ~(1ULL << hwc->idx); - if (event->hw.constraint->flags & PERF_X86_EVENT_PEBS_LDLAT) + if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT) cpuc->pebs_enabled &= ~(1ULL << (hwc->idx + 32)); - else if (event->hw.constraint->flags & PERF_X86_EVENT_PEBS_ST) + else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST) cpuc->pebs_enabled &= ~(1ULL << 63); if (cpuc->enabled) diff --git a/arch/x86/kernel/cpu/perf_event_intel_pt.c b/arch/x86/kernel/cpu/perf_event_intel_pt.c index ffe666c..123ff1b 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_pt.c +++ b/arch/x86/kernel/cpu/perf_event_intel_pt.c @@ -151,7 +151,7 @@ static int __init pt_pmu_hw_init(void) de_attr->attr.attr.name = pt_caps[i].name; - sysfs_attr_init(&de_attrs->attr.attr); + sysfs_attr_init(&de_attr->attr.attr); de_attr->attr.attr.mode = S_IRUGO; de_attr->attr.show = pt_cap_show; @@ -615,7 +615,8 @@ static int pt_buffer_reset_markers(struct pt_buffer *buf, struct perf_output_handle *handle) { - unsigned long idx, npages, end; + unsigned long head = local64_read(&buf->head); + unsigned long idx, npages, wakeup; if (buf->snapshot) return 0; @@ -634,17 +635,26 @@ static int pt_buffer_reset_markers(struct pt_buffer *buf, buf->topa_index[buf->stop_pos]->stop = 0; buf->topa_index[buf->intr_pos]->intr = 0; - if (pt_cap_get(PT_CAP_topa_multiple_entries)) { - npages = (handle->size + 1) >> PAGE_SHIFT; - end = (local64_read(&buf->head) >> PAGE_SHIFT) + npages; - /*if (end > handle->wakeup >> PAGE_SHIFT) - end = handle->wakeup >> PAGE_SHIFT;*/ - idx = end & (buf->nr_pages - 1); - buf->stop_pos = idx; - idx = (local64_read(&buf->head) >> PAGE_SHIFT) + npages - 1; - idx &= buf->nr_pages - 1; - buf->intr_pos = idx; - } + /* how many pages till the STOP marker */ + npages = handle->size >> PAGE_SHIFT; + + /* if it's on a page boundary, fill up one more page */ + if (!offset_in_page(head + handle->size + 1)) + npages++; + + idx = (head >> PAGE_SHIFT) + npages; + idx &= buf->nr_pages - 1; + buf->stop_pos = idx; + + wakeup = handle->wakeup >> PAGE_SHIFT; + + /* in the worst case, wake up the consumer one page before hard stop */ + idx = (head >> PAGE_SHIFT) + npages - 1; + if (idx > wakeup) + idx = wakeup; + + idx &= buf->nr_pages - 1; + buf->intr_pos = idx; buf->topa_index[buf->stop_pos]->stop = 1; buf->topa_index[buf->intr_pos]->intr = 1; diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c index c635b8b..dd319e5 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c @@ -365,9 +365,8 @@ static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int bitmap_zero(used_mask, UNCORE_PMC_IDX_MAX); for (i = 0, wmin = UNCORE_PMC_IDX_MAX, wmax = 0; i < n; i++) { - hwc = &box->event_list[i]->hw; c = uncore_get_event_constraint(box, box->event_list[i]); - hwc->constraint = c; + box->event_constraint[i] = c; wmin = min(wmin, c->weight); wmax = max(wmax, c->weight); } @@ -375,7 +374,7 @@ static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int /* fastpath, try to reuse previous register */ for (i = 0; i < n; i++) { hwc = &box->event_list[i]->hw; - c = hwc->constraint; + c = box->event_constraint[i]; /* never assigned */ if (hwc->idx == -1) @@ -395,8 +394,8 @@ static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int } /* slow path */ if (i != n) - ret = perf_assign_events(box->event_list, n, - wmin, wmax, assign); + ret = perf_assign_events(box->event_constraint, n, + wmin, wmax, n, assign); if (!assign || ret) { for (i = 0; i < n; i++) diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h index 6c8c1e7..f789ec9 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h @@ -97,6 +97,7 @@ struct intel_uncore_box { atomic_t refcnt; struct perf_event *events[UNCORE_PMC_IDX_MAX]; struct perf_event *event_list[UNCORE_PMC_IDX_MAX]; + struct event_constraint *event_constraint[UNCORE_PMC_IDX_MAX]; unsigned long active_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)]; u64 tags[UNCORE_PMC_IDX_MAX]; struct pci_dev *pci_dev; diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 2b55ee6..5a46681 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -167,7 +167,7 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data) clear_bss(); for (i = 0; i < NUM_EXCEPTION_VECTORS; i++) - set_intr_gate(i, early_idt_handlers[i]); + set_intr_gate(i, early_idt_handler_array[i]); load_idt((const struct desc_ptr *)&idt_descr); copy_bootdata(__va(real_mode_data)); diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index d031bad..53eeb22 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -478,21 +478,22 @@ is486: __INIT setup_once: /* - * Set up a idt with 256 entries pointing to ignore_int, - * interrupt gates. It doesn't actually load idt - that needs - * to be done on each CPU. Interrupts are enabled elsewhere, - * when we can be relatively sure everything is ok. + * Set up a idt with 256 interrupt gates that push zero if there + * is no error code and then jump to early_idt_handler_common. + * It doesn't actually load the idt - that needs to be done on + * each CPU. Interrupts are enabled elsewhere, when we can be + * relatively sure everything is ok. */ movl $idt_table,%edi - movl $early_idt_handlers,%eax + movl $early_idt_handler_array,%eax movl $NUM_EXCEPTION_VECTORS,%ecx 1: movl %eax,(%edi) movl %eax,4(%edi) /* interrupt gate, dpl=0, present */ movl $(0x8E000000 + __KERNEL_CS),2(%edi) - addl $9,%eax + addl $EARLY_IDT_HANDLER_SIZE,%eax addl $8,%edi loop 1b @@ -524,26 +525,28 @@ setup_once: andl $0,setup_once_ref /* Once is enough, thanks */ ret -ENTRY(early_idt_handlers) +ENTRY(early_idt_handler_array) # 36(%esp) %eflags # 32(%esp) %cs # 28(%esp) %eip # 24(%rsp) error code i = 0 .rept NUM_EXCEPTION_VECTORS - .if (EXCEPTION_ERRCODE_MASK >> i) & 1 - ASM_NOP2 - .else + .ifeq (EXCEPTION_ERRCODE_MASK >> i) & 1 pushl $0 # Dummy error code, to make stack frame uniform .endif pushl $i # 20(%esp) Vector number - jmp early_idt_handler + jmp early_idt_handler_common i = i + 1 + .fill early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE - ., 1, 0xcc .endr -ENDPROC(early_idt_handlers) +ENDPROC(early_idt_handler_array) - /* This is global to keep gas from relaxing the jumps */ -ENTRY(early_idt_handler) +early_idt_handler_common: + /* + * The stack is the hardware frame, an error code or zero, and the + * vector number. + */ cld cmpl $2,(%esp) # X86_TRAP_NMI @@ -603,7 +606,7 @@ ex_entry: is_nmi: addl $8,%esp /* drop vector number and error code */ iret -ENDPROC(early_idt_handler) +ENDPROC(early_idt_handler_common) /* This is the default interrupt "handler" :-) */ ALIGN diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index ae6588b..df7e780 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -321,26 +321,28 @@ bad_address: jmp bad_address __INIT - .globl early_idt_handlers -early_idt_handlers: +ENTRY(early_idt_handler_array) # 104(%rsp) %rflags # 96(%rsp) %cs # 88(%rsp) %rip # 80(%rsp) error code i = 0 .rept NUM_EXCEPTION_VECTORS - .if (EXCEPTION_ERRCODE_MASK >> i) & 1 - ASM_NOP2 - .else + .ifeq (EXCEPTION_ERRCODE_MASK >> i) & 1 pushq $0 # Dummy error code, to make stack frame uniform .endif pushq $i # 72(%rsp) Vector number - jmp early_idt_handler + jmp early_idt_handler_common i = i + 1 + .fill early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE - ., 1, 0xcc .endr +ENDPROC(early_idt_handler_array) -/* This is global to keep gas from relaxing the jumps */ -ENTRY(early_idt_handler) +early_idt_handler_common: + /* + * The stack is the hardware frame, an error code or zero, and the + * vector number. + */ cld cmpl $2,(%rsp) # X86_TRAP_NMI @@ -412,7 +414,7 @@ ENTRY(early_idt_handler) is_nmi: addq $16,%rsp # drop vector number and error code INTERRUPT_RETURN -ENDPROC(early_idt_handler) +ENDPROC(early_idt_handler_common) __INITDATA |