diff options
author | David S. Miller <davem@davemloft.net> | 2012-04-12 19:41:23 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2012-04-12 19:41:23 -0400 |
commit | 011e3c63251be832d23df9f0697626ab7b354d02 (patch) | |
tree | 2cad5b58c274c93ae49d9b58fb15d784d4dfd78f /arch | |
parent | c1412fce7eccae62b4de22494f6ab3ff8a90c0c6 (diff) | |
parent | ecca5c3acc0d0933d89abc44e60afb0cc8170e35 (diff) | |
download | op-kernel-dev-011e3c63251be832d23df9f0697626ab7b354d02.zip op-kernel-dev-011e3c63251be832d23df9f0697626ab7b354d02.tar.gz |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
Diffstat (limited to 'arch')
88 files changed, 832 insertions, 714 deletions
diff --git a/arch/c6x/include/asm/irq.h b/arch/c6x/include/asm/irq.h index f13b78d..ab4577f 100644 --- a/arch/c6x/include/asm/irq.h +++ b/arch/c6x/include/asm/irq.h @@ -42,10 +42,6 @@ /* This number is used when no interrupt has been assigned */ #define NO_IRQ 0 -struct irq_data; -extern irq_hw_number_t irqd_to_hwirq(struct irq_data *d); -extern irq_hw_number_t virq_to_hw(unsigned int virq); - extern void __init init_pic_c64xplus(void); extern void init_IRQ(void); diff --git a/arch/c6x/kernel/irq.c b/arch/c6x/kernel/irq.c index 65b8ddf..c90fb5e 100644 --- a/arch/c6x/kernel/irq.c +++ b/arch/c6x/kernel/irq.c @@ -130,16 +130,3 @@ int arch_show_interrupts(struct seq_file *p, int prec) seq_printf(p, "%*s: %10lu\n", prec, "Err", irq_err_count); return 0; } - -irq_hw_number_t irqd_to_hwirq(struct irq_data *d) -{ - return d->hwirq; -} -EXPORT_SYMBOL_GPL(irqd_to_hwirq); - -irq_hw_number_t virq_to_hw(unsigned int virq) -{ - struct irq_data *irq_data = irq_get_irq_data(virq); - return WARN_ON(!irq_data) ? 0 : irq_data->hwirq; -} -EXPORT_SYMBOL_GPL(virq_to_hw); diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h index cf417e51..e648af9 100644 --- a/arch/powerpc/include/asm/irq.h +++ b/arch/powerpc/include/asm/irq.h @@ -33,8 +33,6 @@ extern atomic_t ppc_n_lost_interrupts; /* Same thing, used by the generic IRQ code */ #define NR_IRQS_LEGACY NUM_ISA_INTERRUPTS -struct irq_data; -extern irq_hw_number_t irqd_to_hwirq(struct irq_data *d); extern irq_hw_number_t virq_to_hw(unsigned int virq); /** diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 3e57a00..ba3aeb4 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -206,40 +206,43 @@ reenable_mmu: /* re-enable mmu so we can */ andi. r10,r10,MSR_EE /* Did EE change? */ beq 1f - /* Save handler and return address into the 2 unused words - * of the STACK_FRAME_OVERHEAD (sneak sneak sneak). Everything - * else can be recovered from the pt_regs except r3 which for - * normal interrupts has been set to pt_regs and for syscalls - * is an argument, so we temporarily use ORIG_GPR3 to save it - */ - stw r9,8(r1) - stw r11,12(r1) - stw r3,ORIG_GPR3(r1) /* * The trace_hardirqs_off will use CALLER_ADDR0 and CALLER_ADDR1. * If from user mode there is only one stack frame on the stack, and * accessing CALLER_ADDR1 will cause oops. So we need create a dummy * stack frame to make trace_hardirqs_off happy. + * + * This is handy because we also need to save a bunch of GPRs, + * r3 can be different from GPR3(r1) at this point, r9 and r11 + * contains the old MSR and handler address respectively, + * r4 & r5 can contain page fault arguments that need to be passed + * along as well. r12, CCR, CTR, XER etc... are left clobbered as + * they aren't useful past this point (aren't syscall arguments), + * the rest is restored from the exception frame. */ + stwu r1,-32(r1) + stw r9,8(r1) + stw r11,12(r1) + stw r3,16(r1) + stw r4,20(r1) + stw r5,24(r1) andi. r12,r12,MSR_PR - beq 11f - stwu r1,-16(r1) + b 11f bl trace_hardirqs_off - addi r1,r1,16 b 12f - 11: bl trace_hardirqs_off 12: + lwz r5,24(r1) + lwz r4,20(r1) + lwz r3,16(r1) + lwz r11,12(r1) + lwz r9,8(r1) + addi r1,r1,32 lwz r0,GPR0(r1) - lwz r3,ORIG_GPR3(r1) - lwz r4,GPR4(r1) - lwz r5,GPR5(r1) lwz r6,GPR6(r1) lwz r7,GPR7(r1) lwz r8,GPR8(r1) - lwz r9,8(r1) - lwz r11,12(r1) 1: mtctr r11 mtlr r9 bctr /* jump to handler */ diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 243dbab..5ec1b23 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -560,12 +560,6 @@ void do_softirq(void) local_irq_restore(flags); } -irq_hw_number_t irqd_to_hwirq(struct irq_data *d) -{ - return d->hwirq; -} -EXPORT_SYMBOL_GPL(irqd_to_hwirq); - irq_hw_number_t virq_to_hw(unsigned int virq) { struct irq_data *irq_data = irq_get_irq_data(virq); diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index f88698c..4937c96 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1235,7 +1235,7 @@ void __ppc64_runlatch_on(void) ctrl |= CTRL_RUNLATCH; mtspr(SPRN_CTRLT, ctrl); - ti->local_flags |= TLF_RUNLATCH; + ti->local_flags |= _TLF_RUNLATCH; } /* Called with hard IRQs off */ @@ -1244,7 +1244,7 @@ void __ppc64_runlatch_off(void) struct thread_info *ti = current_thread_info(); unsigned long ctrl; - ti->local_flags &= ~TLF_RUNLATCH; + ti->local_flags &= ~_TLF_RUNLATCH; ctrl = mfspr(SPRN_CTRLF); ctrl &= ~CTRL_RUNLATCH; diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index bed1279..e1b60f5 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -173,9 +173,9 @@ static void __init kvm_linear_init_one(ulong size, int count, int type) static struct kvmppc_linear_info *kvm_alloc_linear(int type) { - struct kvmppc_linear_info *ri; + struct kvmppc_linear_info *ri, *ret; - ri = NULL; + ret = NULL; spin_lock(&linear_lock); list_for_each_entry(ri, &free_linears, list) { if (ri->type != type) @@ -183,11 +183,12 @@ static struct kvmppc_linear_info *kvm_alloc_linear(int type) list_del(&ri->list); atomic_inc(&ri->use_count); + memset(ri->base_virt, 0, ri->npages << PAGE_SHIFT); + ret = ri; break; } spin_unlock(&linear_lock); - memset(ri->base_virt, 0, ri->npages << PAGE_SHIFT); - return ri; + return ret; } static void kvm_release_linear(struct kvmppc_linear_info *ri) diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S index 3f7b674..d3fb4df 100644 --- a/arch/powerpc/kvm/book3s_hv_interrupts.S +++ b/arch/powerpc/kvm/book3s_hv_interrupts.S @@ -46,8 +46,10 @@ _GLOBAL(__kvmppc_vcore_entry) /* Save host state to the stack */ stdu r1, -SWITCH_FRAME_SIZE(r1) - /* Save non-volatile registers (r14 - r31) */ + /* Save non-volatile registers (r14 - r31) and CR */ SAVE_NVGPRS(r1) + mfcr r3 + std r3, _CCR(r1) /* Save host DSCR */ BEGIN_FTR_SECTION @@ -157,8 +159,10 @@ kvmppc_handler_highmem: * R13 = PACA */ - /* Restore non-volatile host registers (r14 - r31) */ + /* Restore non-volatile host registers (r14 - r31) and CR */ REST_NVGPRS(r1) + ld r4, _CCR(r1) + mtcr r4 addi r1, r1, SWITCH_FRAME_SIZE ld r0, PPC_LR_STKOFF(r1) diff --git a/arch/powerpc/kvm/book3s_interrupts.S b/arch/powerpc/kvm/book3s_interrupts.S index 0a8515a..3e35383 100644 --- a/arch/powerpc/kvm/book3s_interrupts.S +++ b/arch/powerpc/kvm/book3s_interrupts.S @@ -84,6 +84,10 @@ kvm_start_entry: /* Save non-volatile registers (r14 - r31) */ SAVE_NVGPRS(r1) + /* Save CR */ + mfcr r14 + stw r14, _CCR(r1) + /* Save LR */ PPC_STL r0, _LINK(r1) @@ -165,6 +169,9 @@ kvm_exit_loop: PPC_LL r4, _LINK(r1) mtlr r4 + lwz r14, _CCR(r1) + mtcr r14 + /* Restore non-volatile host registers (r14 - r31) */ REST_NVGPRS(r1) diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 642d885..7759053 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -777,6 +777,7 @@ program_interrupt: } } + preempt_disable(); if (!(r & RESUME_HOST)) { /* To avoid clobbering exit_reason, only check for signals if * we aren't already exiting to userspace for some other @@ -798,8 +799,6 @@ program_interrupt: run->exit_reason = KVM_EXIT_INTR; r = -EINTR; } else { - preempt_disable(); - /* In case an interrupt came in that was triggered * from userspace (like DEC), we need to check what * to inject now! */ @@ -881,7 +880,8 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) switch (reg->id) { case KVM_REG_PPC_HIOR: - r = put_user(to_book3s(vcpu)->hior, (u64 __user *)reg->addr); + r = copy_to_user((u64 __user *)(long)reg->addr, + &to_book3s(vcpu)->hior, sizeof(u64)); break; default: break; @@ -896,7 +896,8 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) switch (reg->id) { case KVM_REG_PPC_HIOR: - r = get_user(to_book3s(vcpu)->hior, (u64 __user *)reg->addr); + r = copy_from_user(&to_book3s(vcpu)->hior, + (u64 __user *)(long)reg->addr, sizeof(u64)); if (!r) to_book3s(vcpu)->hior_explicit = true; break; diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S index 10d8ef6..c8c4b87 100644 --- a/arch/powerpc/kvm/booke_interrupts.S +++ b/arch/powerpc/kvm/booke_interrupts.S @@ -34,7 +34,8 @@ /* r2 is special: it holds 'current', and it made nonvolatile in the * kernel with the -ffixed-r2 gcc option. */ #define HOST_R2 12 -#define HOST_NV_GPRS 16 +#define HOST_CR 16 +#define HOST_NV_GPRS 20 #define HOST_NV_GPR(n) (HOST_NV_GPRS + ((n - 14) * 4)) #define HOST_MIN_STACK_SIZE (HOST_NV_GPR(31) + 4) #define HOST_STACK_SIZE (((HOST_MIN_STACK_SIZE + 15) / 16) * 16) /* Align. */ @@ -296,8 +297,10 @@ heavyweight_exit: /* Return to kvm_vcpu_run(). */ lwz r4, HOST_STACK_LR(r1) + lwz r5, HOST_CR(r1) addi r1, r1, HOST_STACK_SIZE mtlr r4 + mtcr r5 /* r3 still contains the return code from kvmppc_handle_exit(). */ blr @@ -314,6 +317,8 @@ _GLOBAL(__kvmppc_vcpu_run) stw r3, HOST_RUN(r1) mflr r3 stw r3, HOST_STACK_LR(r1) + mfcr r5 + stw r5, HOST_CR(r1) /* Save host non-volatile register state to stack. */ stw r14, HOST_NV_GPR(r14)(r1) diff --git a/arch/powerpc/platforms/cell/axon_msi.c b/arch/powerpc/platforms/cell/axon_msi.c index db360fc..d09f3e8 100644 --- a/arch/powerpc/platforms/cell/axon_msi.c +++ b/arch/powerpc/platforms/cell/axon_msi.c @@ -392,7 +392,7 @@ static int axon_msi_probe(struct platform_device *device) } memset(msic->fifo_virt, 0xff, MSIC_FIFO_SIZE_BYTES); - msic->irq_domain = irq_domain_add_nomap(dn, &msic_host_ops, msic); + msic->irq_domain = irq_domain_add_nomap(dn, 0, &msic_host_ops, msic); if (!msic->irq_domain) { printk(KERN_ERR "axon_msi: couldn't allocate irq_domain for %s\n", dn->full_name); diff --git a/arch/powerpc/platforms/cell/beat_interrupt.c b/arch/powerpc/platforms/cell/beat_interrupt.c index e5c3a2c..f9a48af 100644 --- a/arch/powerpc/platforms/cell/beat_interrupt.c +++ b/arch/powerpc/platforms/cell/beat_interrupt.c @@ -239,7 +239,7 @@ void __init beatic_init_IRQ(void) ppc_md.get_irq = beatic_get_irq; /* Allocate an irq host */ - beatic_host = irq_domain_add_nomap(NULL, &beatic_pic_host_ops, NULL); + beatic_host = irq_domain_add_nomap(NULL, 0, &beatic_pic_host_ops, NULL); BUG_ON(beatic_host == NULL); irq_set_default_host(beatic_host); } diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c index a81e5a8..b4ddaa3 100644 --- a/arch/powerpc/platforms/powermac/smp.c +++ b/arch/powerpc/platforms/powermac/smp.c @@ -192,7 +192,7 @@ static int psurge_secondary_ipi_init(void) { int rc = -ENOMEM; - psurge_host = irq_domain_add_nomap(NULL, &psurge_host_ops, NULL); + psurge_host = irq_domain_add_nomap(NULL, 0, &psurge_host_ops, NULL); if (psurge_host) psurge_secondary_virq = irq_create_direct_mapping(psurge_host); diff --git a/arch/powerpc/platforms/ps3/interrupt.c b/arch/powerpc/platforms/ps3/interrupt.c index 2a4ff86..5f3b232 100644 --- a/arch/powerpc/platforms/ps3/interrupt.c +++ b/arch/powerpc/platforms/ps3/interrupt.c @@ -753,9 +753,8 @@ void __init ps3_init_IRQ(void) unsigned cpu; struct irq_domain *host; - host = irq_domain_add_nomap(NULL, &ps3_host_ops, NULL); + host = irq_domain_add_nomap(NULL, PS3_PLUG_MAX + 1, &ps3_host_ops, NULL); irq_set_default_host(host); - irq_set_virq_count(PS3_PLUG_MAX + 1); for_each_possible_cpu(cpu) { struct ps3_private *pd = &per_cpu(ps3_private, cpu); diff --git a/arch/sh/Kconfig.debug b/arch/sh/Kconfig.debug index c1d5a82..5f2bb42 100644 --- a/arch/sh/Kconfig.debug +++ b/arch/sh/Kconfig.debug @@ -61,6 +61,7 @@ config DUMP_CODE config DWARF_UNWINDER bool "Enable the DWARF unwinder for stacktraces" select FRAME_POINTER + depends on SUPERH32 default n help Enabling this option will make stacktraces more accurate, at diff --git a/arch/sh/boards/board-sh7785lcr.c b/arch/sh/boards/board-sh7785lcr.c index d879848..d0d6221 100644 --- a/arch/sh/boards/board-sh7785lcr.c +++ b/arch/sh/boards/board-sh7785lcr.c @@ -28,6 +28,7 @@ #include <cpu/sh7785.h> #include <asm/heartbeat.h> #include <asm/clock.h> +#include <asm/bl_bit.h> /* * NOTE: This board has 2 physical memory maps. diff --git a/arch/sh/boards/mach-hp6xx/pm.c b/arch/sh/boards/mach-hp6xx/pm.c index adc9b4b..8b50cf7 100644 --- a/arch/sh/boards/mach-hp6xx/pm.c +++ b/arch/sh/boards/mach-hp6xx/pm.c @@ -14,6 +14,7 @@ #include <linux/gfp.h> #include <asm/io.h> #include <asm/hd64461.h> +#include <asm/bl_bit.h> #include <mach/hp6xx.h> #include <cpu/dac.h> #include <asm/freq.h> diff --git a/arch/sh/drivers/dma/dma-sysfs.c b/arch/sh/drivers/dma/dma-sysfs.c index b1cb271..67ee956 100644 --- a/arch/sh/drivers/dma/dma-sysfs.c +++ b/arch/sh/drivers/dma/dma-sysfs.c @@ -54,7 +54,7 @@ static int __init dma_subsys_init(void) if (unlikely(ret)) return ret; - return device_create_file(dma_subsys.dev_root, &dev_attr_devices.attr); + return device_create_file(dma_subsys.dev_root, &dev_attr_devices); } postcore_initcall(dma_subsys_init); diff --git a/arch/sh/kernel/cpu/fpu.c b/arch/sh/kernel/cpu/fpu.c index 7f1b70c..f8f7af5 100644 --- a/arch/sh/kernel/cpu/fpu.c +++ b/arch/sh/kernel/cpu/fpu.c @@ -2,6 +2,7 @@ #include <linux/slab.h> #include <asm/processor.h> #include <asm/fpu.h> +#include <asm/traps.h> int init_fpu(struct task_struct *tsk) { diff --git a/arch/sh/kernel/cpu/sh2a/fpu.c b/arch/sh/kernel/cpu/sh2a/fpu.c index 488d24e..98bbaa4 100644 --- a/arch/sh/kernel/cpu/sh2a/fpu.c +++ b/arch/sh/kernel/cpu/sh2a/fpu.c @@ -14,6 +14,7 @@ #include <asm/processor.h> #include <asm/io.h> #include <asm/fpu.h> +#include <asm/traps.h> /* The PR (precision) bit in the FP Status Register must be clear when * an frchg instruction is executed, otherwise the instruction is undefined. diff --git a/arch/sh/kernel/cpu/sh4/fpu.c b/arch/sh/kernel/cpu/sh4/fpu.c index e74cd6c..69ab4d3 100644 --- a/arch/sh/kernel/cpu/sh4/fpu.c +++ b/arch/sh/kernel/cpu/sh4/fpu.c @@ -16,6 +16,7 @@ #include <cpu/fpu.h> #include <asm/processor.h> #include <asm/fpu.h> +#include <asm/traps.h> /* The PR (precision) bit in the FP Status Register must be clear when * an frchg instruction is executed, otherwise the instruction is undefined. diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7757.c b/arch/sh/kernel/cpu/sh4a/clock-sh7757.c index 5853989..04ab5ae 100644 --- a/arch/sh/kernel/cpu/sh4a/clock-sh7757.c +++ b/arch/sh/kernel/cpu/sh4a/clock-sh7757.c @@ -113,7 +113,7 @@ static struct clk_lookup lookups[] = { CLKDEV_CON_ID("cpu_clk", &div4_clks[DIV4_I]), /* MSTP32 clocks */ - CLKDEV_CON_ID("sdhi0", &mstp_clks[MSTP004]), + CLKDEV_DEV_ID("sh_mobile_sdhi.0", &mstp_clks[MSTP004]), CLKDEV_CON_ID("riic0", &mstp_clks[MSTP000]), CLKDEV_CON_ID("riic1", &mstp_clks[MSTP000]), CLKDEV_CON_ID("riic2", &mstp_clks[MSTP000]), diff --git a/arch/sh/kernel/cpu/shmobile/pm.c b/arch/sh/kernel/cpu/shmobile/pm.c index a6f95ae..08d27fa 100644 --- a/arch/sh/kernel/cpu/shmobile/pm.c +++ b/arch/sh/kernel/cpu/shmobile/pm.c @@ -16,6 +16,7 @@ #include <asm/suspend.h> #include <asm/uaccess.h> #include <asm/cacheflush.h> +#include <asm/bl_bit.h> /* * Notifier lists for pre/post sleep notification diff --git a/arch/sh/kernel/idle.c b/arch/sh/kernel/idle.c index 64852ec..ee226e2 100644 --- a/arch/sh/kernel/idle.c +++ b/arch/sh/kernel/idle.c @@ -17,8 +17,8 @@ #include <linux/irqflags.h> #include <linux/smp.h> #include <linux/cpuidle.h> -#include <asm/pgalloc.h> #include <linux/atomic.h> +#include <asm/pgalloc.h> #include <asm/smp.h> #include <asm/bl_bit.h> diff --git a/arch/sh/kernel/kgdb.c b/arch/sh/kernel/kgdb.c index efb6d39..b117781 100644 --- a/arch/sh/kernel/kgdb.c +++ b/arch/sh/kernel/kgdb.c @@ -14,6 +14,7 @@ #include <linux/irq.h> #include <linux/io.h> #include <asm/cacheflush.h> +#include <asm/traps.h> /* Macros for single step instruction identification */ #define OPCODE_BT(op) (((op) & 0xff00) == 0x8900) diff --git a/arch/sh/kernel/process_32.c b/arch/sh/kernel/process_32.c index f72e3a9..94273aa 100644 --- a/arch/sh/kernel/process_32.c +++ b/arch/sh/kernel/process_32.c @@ -26,6 +26,7 @@ #include <asm/mmu_context.h> #include <asm/fpu.h> #include <asm/syscalls.h> +#include <asm/switch_to.h> void show_regs(struct pt_regs * regs) { diff --git a/arch/sh/kernel/smp.c b/arch/sh/kernel/smp.c index a17a14d..eaebdf6 100644 --- a/arch/sh/kernel/smp.c +++ b/arch/sh/kernel/smp.c @@ -27,6 +27,7 @@ #include <asm/smp.h> #include <asm/cacheflush.h> #include <asm/sections.h> +#include <asm/setup.h> int __cpu_number_map[NR_CPUS]; /* Map physical to logical */ int __cpu_logical_map[NR_CPUS]; /* Map logical to physical */ diff --git a/arch/sh/kernel/vsyscall/vsyscall-sigreturn.S b/arch/sh/kernel/vsyscall/vsyscall-sigreturn.S index 555a64f..23af175 100644 --- a/arch/sh/kernel/vsyscall/vsyscall-sigreturn.S +++ b/arch/sh/kernel/vsyscall/vsyscall-sigreturn.S @@ -34,6 +34,41 @@ __kernel_rt_sigreturn: 1: .short __NR_rt_sigreturn .LEND_rt_sigreturn: .size __kernel_rt_sigreturn,.-.LSTART_rt_sigreturn + .previous .section .eh_frame,"a",@progbits +.LCIE1: + .ualong .LCIE1_end - .LCIE1_start +.LCIE1_start: + .ualong 0 /* CIE ID */ + .byte 0x1 /* Version number */ + .string "zRS" /* NUL-terminated augmentation string */ + .uleb128 0x1 /* Code alignment factor */ + .sleb128 -4 /* Data alignment factor */ + .byte 0x11 /* Return address register column */ + .uleb128 0x1 /* Augmentation length and data */ + .byte 0x1b /* DW_EH_PE_pcrel | DW_EH_PE_sdata4. */ + .byte 0xc, 0xf, 0x0 /* DW_CFA_def_cfa: r15 ofs 0 */ + + .align 2 +.LCIE1_end: + + .ualong .LFDE0_end-.LFDE0_start /* Length FDE0 */ +.LFDE0_start: + .ualong .LFDE0_start-.LCIE1 /* CIE pointer */ + .ualong .LSTART_sigreturn-. /* PC-relative start address */ + .ualong .LEND_sigreturn-.LSTART_sigreturn + .uleb128 0 /* Augmentation */ + .align 2 +.LFDE0_end: + + .ualong .LFDE1_end-.LFDE1_start /* Length FDE1 */ +.LFDE1_start: + .ualong .LFDE1_start-.LCIE1 /* CIE pointer */ + .ualong .LSTART_rt_sigreturn-. /* PC-relative start address */ + .ualong .LEND_rt_sigreturn-.LSTART_rt_sigreturn + .uleb128 0 /* Augmentation */ + .align 2 +.LFDE1_end: + .previous diff --git a/arch/sh/kernel/vsyscall/vsyscall-trapa.S b/arch/sh/kernel/vsyscall/vsyscall-trapa.S index 3e70f85..0eb74d0 100644 --- a/arch/sh/kernel/vsyscall/vsyscall-trapa.S +++ b/arch/sh/kernel/vsyscall/vsyscall-trapa.S @@ -3,37 +3,34 @@ .type __kernel_vsyscall,@function __kernel_vsyscall: .LSTART_vsyscall: - /* XXX: We'll have to do something here once we opt to use the vDSO - * page for something other than the signal trampoline.. as well as - * fill out .eh_frame -- PFM. */ + trapa #0x10 + nop .LEND_vsyscall: .size __kernel_vsyscall,.-.LSTART_vsyscall + .previous .section .eh_frame,"a",@progbits - .previous .LCIE: .ualong .LCIE_end - .LCIE_start .LCIE_start: .ualong 0 /* CIE ID */ .byte 0x1 /* Version number */ - .string "zRS" /* NUL-terminated augmentation string */ + .string "zR" /* NUL-terminated augmentation string */ .uleb128 0x1 /* Code alignment factor */ .sleb128 -4 /* Data alignment factor */ .byte 0x11 /* Return address register column */ - /* Augmentation length and data (none) */ - .byte 0xc /* DW_CFA_def_cfa */ - .uleb128 0xf /* r15 */ - .uleb128 0x0 /* offset 0 */ - + .uleb128 0x1 /* Augmentation length and data */ + .byte 0x1b /* DW_EH_PE_pcrel | DW_EH_PE_sdata4. */ + .byte 0xc,0xf,0x0 /* DW_CFA_def_cfa: r15 ofs 0 */ .align 2 .LCIE_end: .ualong .LFDE_end-.LFDE_start /* Length FDE */ .LFDE_start: - .ualong .LCIE /* CIE pointer */ - .ualong .LSTART_vsyscall-. /* start address */ + .ualong .LFDE_start-.LCIE /* CIE pointer */ + .ualong .LSTART_vsyscall-. /* PC-relative start address */ .ualong .LEND_vsyscall-.LSTART_vsyscall - .uleb128 0 + .uleb128 0 /* Augmentation */ .align 2 .LFDE_end: .previous diff --git a/arch/sh/mm/cache-sh4.c b/arch/sh/mm/cache-sh4.c index 112fea1..0e52928 100644 --- a/arch/sh/mm/cache-sh4.c +++ b/arch/sh/mm/cache-sh4.c @@ -18,6 +18,7 @@ #include <linux/highmem.h> #include <asm/pgtable.h> #include <asm/mmu_context.h> +#include <asm/cache_insns.h> #include <asm/cacheflush.h> /* diff --git a/arch/sh/mm/flush-sh4.c b/arch/sh/mm/flush-sh4.c index 75a17f5..0b85dd9 100644 --- a/arch/sh/mm/flush-sh4.c +++ b/arch/sh/mm/flush-sh4.c @@ -1,5 +1,6 @@ #include <linux/mm.h> #include <asm/mmu_context.h> +#include <asm/cache_insns.h> #include <asm/cacheflush.h> #include <asm/traps.h> diff --git a/arch/sh/mm/sram.c b/arch/sh/mm/sram.c index bc156ec..2d8fa71 100644 --- a/arch/sh/mm/sram.c +++ b/arch/sh/mm/sram.c @@ -9,6 +9,7 @@ */ #include <linux/init.h> #include <linux/kernel.h> +#include <linux/errno.h> #include <asm/sram.h> /* diff --git a/arch/sparc/kernel/leon_pci.c b/arch/sparc/kernel/leon_pci.c index aba6b95..19f5605 100644 --- a/arch/sparc/kernel/leon_pci.c +++ b/arch/sparc/kernel/leon_pci.c @@ -45,7 +45,6 @@ void leon_pci_init(struct platform_device *ofdev, struct leon_pci_info *info) void __devinit pcibios_fixup_bus(struct pci_bus *pbus) { - struct leon_pci_info *info = pbus->sysdata; struct pci_dev *dev; int i, has_io, has_mem; u16 cmd; @@ -111,18 +110,6 @@ int pcibios_enable_device(struct pci_dev *dev, int mask) return pci_enable_resources(dev, mask); } -struct device_node *pci_device_to_OF_node(struct pci_dev *pdev) -{ - /* - * Currently the OpenBoot nodes are not connected with the PCI device, - * this is because the LEON PROM does not create PCI nodes. Eventually - * this will change and the same approach as pcic.c can be used to - * match PROM nodes with pci devices. - */ - return NULL; -} -EXPORT_SYMBOL(pci_device_to_OF_node); - void __devinit pcibios_update_irq(struct pci_dev *dev, int irq) { #ifdef CONFIG_PCI_DEBUG diff --git a/arch/sparc/mm/fault_32.c b/arch/sparc/mm/fault_32.c index 7705c67..df3155a 100644 --- a/arch/sparc/mm/fault_32.c +++ b/arch/sparc/mm/fault_32.c @@ -225,6 +225,8 @@ asmlinkage void do_sparc_fault(struct pt_regs *regs, int text_fault, int write, unsigned long g2; int from_user = !(regs->psr & PSR_PS); int fault, code; + unsigned int flags = (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | + (write ? FAULT_FLAG_WRITE : 0)); if(text_fault) address = regs->pc; @@ -251,6 +253,7 @@ asmlinkage void do_sparc_fault(struct pt_regs *regs, int text_fault, int write, perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); +retry: down_read(&mm->mmap_sem); /* @@ -289,7 +292,11 @@ good_area: * make sure we exit gracefully rather than endlessly redo * the fault. */ - fault = handle_mm_fault(mm, vma, address, write ? FAULT_FLAG_WRITE : 0); + fault = handle_mm_fault(mm, vma, address, flags); + + if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) + return; + if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) goto out_of_memory; @@ -297,13 +304,29 @@ good_area: goto do_sigbus; BUG(); } - if (fault & VM_FAULT_MAJOR) { - current->maj_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address); - } else { - current->min_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address); + + if (flags & FAULT_FLAG_ALLOW_RETRY) { + if (fault & VM_FAULT_MAJOR) { + current->maj_flt++; + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, + 1, regs, address); + } else { + current->min_flt++; + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, + 1, regs, address); + } + if (fault & VM_FAULT_RETRY) { + flags &= ~FAULT_FLAG_ALLOW_RETRY; + + /* No need to up_read(&mm->mmap_sem) as we would + * have already released it in __lock_page_or_retry + * in mm/filemap.c. + */ + + goto retry; + } } + up_read(&mm->mmap_sem); return; diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c index 504c062..1fe0429 100644 --- a/arch/sparc/mm/fault_64.c +++ b/arch/sparc/mm/fault_64.c @@ -279,6 +279,7 @@ asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs) unsigned int insn = 0; int si_code, fault_code, fault; unsigned long address, mm_rss; + unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; fault_code = get_thread_fault_code(); @@ -333,6 +334,8 @@ asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs) insn = get_fault_insn(regs, insn); goto handle_kernel_fault; } + +retry: down_read(&mm->mmap_sem); } @@ -423,7 +426,12 @@ good_area: goto bad_area; } - fault = handle_mm_fault(mm, vma, address, (fault_code & FAULT_CODE_WRITE) ? FAULT_FLAG_WRITE : 0); + flags |= ((fault_code & FAULT_CODE_WRITE) ? FAULT_FLAG_WRITE : 0); + fault = handle_mm_fault(mm, vma, address, flags); + + if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) + return; + if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) goto out_of_memory; @@ -431,12 +439,27 @@ good_area: goto do_sigbus; BUG(); } - if (fault & VM_FAULT_MAJOR) { - current->maj_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address); - } else { - current->min_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address); + + if (flags & FAULT_FLAG_ALLOW_RETRY) { + if (fault & VM_FAULT_MAJOR) { + current->maj_flt++; + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, + 1, regs, address); + } else { + current->min_flt++; + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, + 1, regs, address); + } + if (fault & VM_FAULT_RETRY) { + flags &= ~FAULT_FLAG_ALLOW_RETRY; + + /* No need to up_read(&mm->mmap_sem) as we would + * have already released it in __lock_page_or_retry + * in mm/filemap.c. + */ + + goto retry; + } } up_read(&mm->mmap_sem); diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig index 11270ca..96033e2 100644 --- a/arch/tile/Kconfig +++ b/arch/tile/Kconfig @@ -12,7 +12,7 @@ config TILE select GENERIC_PENDING_IRQ if SMP select GENERIC_IRQ_SHOW select SYS_HYPERVISOR - select ARCH_HAVE_NMI_SAFE_CMPXCHG if !M386 + select ARCH_HAVE_NMI_SAFE_CMPXCHG # FIXME: investigate whether we need/want these options. # select HAVE_IOREMAP_PROT @@ -69,6 +69,9 @@ config ARCH_PHYS_ADDR_T_64BIT config ARCH_DMA_ADDR_T_64BIT def_bool y +config NEED_DMA_MAP_STATE + def_bool y + config LOCKDEP_SUPPORT def_bool y @@ -118,7 +121,7 @@ config 64BIT config ARCH_DEFCONFIG string - default "arch/tile/configs/tile_defconfig" if !TILEGX + default "arch/tile/configs/tilepro_defconfig" if !TILEGX default "arch/tile/configs/tilegx_defconfig" if TILEGX source "init/Kconfig" @@ -240,6 +243,7 @@ endchoice config PAGE_OFFSET hex + depends on !64BIT default 0xF0000000 if VMSPLIT_3_75G default 0xE0000000 if VMSPLIT_3_5G default 0xB0000000 if VMSPLIT_2_75G diff --git a/arch/tile/Makefile b/arch/tile/Makefile index 17acce7..9520bc5 100644 --- a/arch/tile/Makefile +++ b/arch/tile/Makefile @@ -30,7 +30,8 @@ ifneq ($(CONFIG_DEBUG_EXTRA_FLAGS),"") KBUILD_CFLAGS += $(CONFIG_DEBUG_EXTRA_FLAGS) endif -LIBGCC_PATH := $(shell $(CC) $(KBUILD_CFLAGS) -print-libgcc-file-name) +LIBGCC_PATH := \ + $(shell $(CC) $(KBUILD_CFLAGS) $(KCFLAGS) -print-libgcc-file-name) # Provide the path to use for "make defconfig". KBUILD_DEFCONFIG := $(ARCH)_defconfig @@ -53,8 +54,6 @@ libs-y += $(LIBGCC_PATH) # See arch/tile/Kbuild for content of core part of the kernel core-y += arch/tile/ -core-$(CONFIG_KVM) += arch/tile/kvm/ - ifdef TILERA_ROOT INSTALL_PATH ?= $(TILERA_ROOT)/tile/boot endif diff --git a/arch/tile/include/arch/spr_def.h b/arch/tile/include/arch/spr_def.h index f548efe..d6ba449 100644 --- a/arch/tile/include/arch/spr_def.h +++ b/arch/tile/include/arch/spr_def.h @@ -60,8 +60,8 @@ _concat4(SPR_IPI_EVENT_, CONFIG_KERNEL_PL,,) #define SPR_IPI_EVENT_RESET_K \ _concat4(SPR_IPI_EVENT_RESET_, CONFIG_KERNEL_PL,,) -#define SPR_IPI_MASK_SET_K \ - _concat4(SPR_IPI_MASK_SET_, CONFIG_KERNEL_PL,,) +#define SPR_IPI_EVENT_SET_K \ + _concat4(SPR_IPI_EVENT_SET_, CONFIG_KERNEL_PL,,) #define INT_IPI_K \ _concat4(INT_IPI_, CONFIG_KERNEL_PL,,) diff --git a/arch/tile/include/asm/atomic.h b/arch/tile/include/asm/atomic.h index bb696da..f246142 100644 --- a/arch/tile/include/asm/atomic.h +++ b/arch/tile/include/asm/atomic.h @@ -17,6 +17,8 @@ #ifndef _ASM_TILE_ATOMIC_H #define _ASM_TILE_ATOMIC_H +#include <asm/cmpxchg.h> + #ifndef __ASSEMBLY__ #include <linux/compiler.h> @@ -121,54 +123,6 @@ static inline int atomic_read(const atomic_t *v) */ #define atomic_add_negative(i, v) (atomic_add_return((i), (v)) < 0) -/* Nonexistent functions intended to cause link errors. */ -extern unsigned long __xchg_called_with_bad_pointer(void); -extern unsigned long __cmpxchg_called_with_bad_pointer(void); - -#define xchg(ptr, x) \ - ({ \ - typeof(*(ptr)) __x; \ - switch (sizeof(*(ptr))) { \ - case 4: \ - __x = (typeof(__x))(typeof(__x-__x))atomic_xchg( \ - (atomic_t *)(ptr), \ - (u32)(typeof((x)-(x)))(x)); \ - break; \ - case 8: \ - __x = (typeof(__x))(typeof(__x-__x))atomic64_xchg( \ - (atomic64_t *)(ptr), \ - (u64)(typeof((x)-(x)))(x)); \ - break; \ - default: \ - __xchg_called_with_bad_pointer(); \ - } \ - __x; \ - }) - -#define cmpxchg(ptr, o, n) \ - ({ \ - typeof(*(ptr)) __x; \ - switch (sizeof(*(ptr))) { \ - case 4: \ - __x = (typeof(__x))(typeof(__x-__x))atomic_cmpxchg( \ - (atomic_t *)(ptr), \ - (u32)(typeof((o)-(o)))(o), \ - (u32)(typeof((n)-(n)))(n)); \ - break; \ - case 8: \ - __x = (typeof(__x))(typeof(__x-__x))atomic64_cmpxchg( \ - (atomic64_t *)(ptr), \ - (u64)(typeof((o)-(o)))(o), \ - (u64)(typeof((n)-(n)))(n)); \ - break; \ - default: \ - __cmpxchg_called_with_bad_pointer(); \ - } \ - __x; \ - }) - -#define tas(ptr) (xchg((ptr), 1)) - #endif /* __ASSEMBLY__ */ #ifndef __tilegx__ diff --git a/arch/tile/include/asm/atomic_32.h b/arch/tile/include/asm/atomic_32.h index 466dc4a..54d1da8 100644 --- a/arch/tile/include/asm/atomic_32.h +++ b/arch/tile/include/asm/atomic_32.h @@ -200,7 +200,7 @@ static inline u64 atomic64_add_return(u64 i, atomic64_t *v) * @u: ...unless v is equal to u. * * Atomically adds @a to @v, so long as @v was not already @u. - * Returns the old value of @v. + * Returns non-zero if @v was not @u, and zero otherwise. */ static inline u64 atomic64_add_unless(atomic64_t *v, u64 a, u64 u) { diff --git a/arch/tile/include/asm/bitops_64.h b/arch/tile/include/asm/bitops_64.h index 58d021a..60b87ee 100644 --- a/arch/tile/include/asm/bitops_64.h +++ b/arch/tile/include/asm/bitops_64.h @@ -38,10 +38,10 @@ static inline void clear_bit(unsigned nr, volatile unsigned long *addr) static inline void change_bit(unsigned nr, volatile unsigned long *addr) { - unsigned long old, mask = (1UL << (nr % BITS_PER_LONG)); - long guess, oldval; + unsigned long mask = (1UL << (nr % BITS_PER_LONG)); + unsigned long guess, oldval; addr += nr / BITS_PER_LONG; - old = *addr; + oldval = *addr; do { guess = oldval; oldval = atomic64_cmpxchg((atomic64_t *)addr, @@ -85,7 +85,7 @@ static inline int test_and_change_bit(unsigned nr, volatile unsigned long *addr) { unsigned long mask = (1UL << (nr % BITS_PER_LONG)); - long guess, oldval = *addr; + unsigned long guess, oldval; addr += nr / BITS_PER_LONG; oldval = *addr; do { diff --git a/arch/tile/include/asm/cmpxchg.h b/arch/tile/include/asm/cmpxchg.h new file mode 100644 index 0000000..276f067 --- /dev/null +++ b/arch/tile/include/asm/cmpxchg.h @@ -0,0 +1,73 @@ +/* + * cmpxchg.h -- forked from asm/atomic.h with this copyright: + * + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + */ + +#ifndef _ASM_TILE_CMPXCHG_H +#define _ASM_TILE_CMPXCHG_H + +#ifndef __ASSEMBLY__ + +/* Nonexistent functions intended to cause link errors. */ +extern unsigned long __xchg_called_with_bad_pointer(void); +extern unsigned long __cmpxchg_called_with_bad_pointer(void); + +#define xchg(ptr, x) \ + ({ \ + typeof(*(ptr)) __x; \ + switch (sizeof(*(ptr))) { \ + case 4: \ + __x = (typeof(__x))(typeof(__x-__x))atomic_xchg( \ + (atomic_t *)(ptr), \ + (u32)(typeof((x)-(x)))(x)); \ + break; \ + case 8: \ + __x = (typeof(__x))(typeof(__x-__x))atomic64_xchg( \ + (atomic64_t *)(ptr), \ + (u64)(typeof((x)-(x)))(x)); \ + break; \ + default: \ + __xchg_called_with_bad_pointer(); \ + } \ + __x; \ + }) + +#define cmpxchg(ptr, o, n) \ + ({ \ + typeof(*(ptr)) __x; \ + switch (sizeof(*(ptr))) { \ + case 4: \ + __x = (typeof(__x))(typeof(__x-__x))atomic_cmpxchg( \ + (atomic_t *)(ptr), \ + (u32)(typeof((o)-(o)))(o), \ + (u32)(typeof((n)-(n)))(n)); \ + break; \ + case 8: \ + __x = (typeof(__x))(typeof(__x-__x))atomic64_cmpxchg( \ + (atomic64_t *)(ptr), \ + (u64)(typeof((o)-(o)))(o), \ + (u64)(typeof((n)-(n)))(n)); \ + break; \ + default: \ + __cmpxchg_called_with_bad_pointer(); \ + } \ + __x; \ + }) + +#define tas(ptr) (xchg((ptr), 1)) + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_TILE_CMPXCHG_H */ diff --git a/arch/tile/include/asm/irq.h b/arch/tile/include/asm/irq.h index f80f8ce..33cff9a 100644 --- a/arch/tile/include/asm/irq.h +++ b/arch/tile/include/asm/irq.h @@ -21,7 +21,7 @@ #define NR_IRQS 32 /* IRQ numbers used for linux IPIs. */ -#define IRQ_RESCHEDULE 1 +#define IRQ_RESCHEDULE 0 #define irq_canonicalize(irq) (irq) diff --git a/arch/tile/include/asm/spinlock_64.h b/arch/tile/include/asm/spinlock_64.h index 72be590..5f8b6a0 100644 --- a/arch/tile/include/asm/spinlock_64.h +++ b/arch/tile/include/asm/spinlock_64.h @@ -137,7 +137,7 @@ static inline void arch_read_unlock(arch_rwlock_t *rw) static inline void arch_write_unlock(arch_rwlock_t *rw) { __insn_mf(); - rw->lock = 0; + __insn_exch4(&rw->lock, 0); /* Avoid waiting in the write buffer. */ } static inline int arch_read_trylock(arch_rwlock_t *rw) diff --git a/arch/tile/include/asm/stack.h b/arch/tile/include/asm/stack.h index 4d97a2d..0e9d382a 100644 --- a/arch/tile/include/asm/stack.h +++ b/arch/tile/include/asm/stack.h @@ -25,7 +25,6 @@ struct KBacktraceIterator { BacktraceIterator it; struct task_struct *task; /* task we are backtracing */ - pte_t *pgtable; /* page table for user space access */ int end; /* iteration complete. */ int new_context; /* new context is starting */ int profile; /* profiling, so stop on async intrpt */ diff --git a/arch/tile/include/asm/traps.h b/arch/tile/include/asm/traps.h index 5f20f92..e28c3df4 100644 --- a/arch/tile/include/asm/traps.h +++ b/arch/tile/include/asm/traps.h @@ -64,7 +64,11 @@ void do_breakpoint(struct pt_regs *, int fault_num); #ifdef __tilegx__ +/* kernel/single_step.c */ void gx_singlestep_handle(struct pt_regs *, int fault_num); + +/* kernel/intvec_64.S */ +void fill_ra_stack(void); #endif -#endif /* _ASM_TILE_SYSCALLS_H */ +#endif /* _ASM_TILE_TRAPS_H */ diff --git a/arch/tile/kernel/entry.S b/arch/tile/kernel/entry.S index 431e9ae6..ec91568 100644 --- a/arch/tile/kernel/entry.S +++ b/arch/tile/kernel/entry.S @@ -85,6 +85,7 @@ STD_ENTRY(cpu_idle_on_new_stack) /* Loop forever on a nap during SMP boot. */ STD_ENTRY(smp_nap) nap + nop /* avoid provoking the icache prefetch with a jump */ j smp_nap /* we are not architecturally guaranteed not to exit nap */ jrp lr /* clue in the backtracer */ STD_ENDPROC(smp_nap) @@ -105,5 +106,6 @@ STD_ENTRY(_cpu_idle) .global _cpu_idle_nap _cpu_idle_nap: nap + nop /* avoid provoking the icache prefetch with a jump */ jrp lr STD_ENDPROC(_cpu_idle) diff --git a/arch/tile/kernel/intvec_32.S b/arch/tile/kernel/intvec_32.S index aecc8ed..5d56a1e 100644 --- a/arch/tile/kernel/intvec_32.S +++ b/arch/tile/kernel/intvec_32.S @@ -799,6 +799,10 @@ handle_interrupt: * This routine takes a boolean in r30 indicating if this is an NMI. * If so, we also expect a boolean in r31 indicating whether to * re-enable the oprofile interrupts. + * + * Note that .Lresume_userspace is jumped to directly in several + * places, and we need to make sure r30 is set correctly in those + * callers as well. */ STD_ENTRY(interrupt_return) /* If we're resuming to kernel space, don't check thread flags. */ @@ -1237,7 +1241,10 @@ handle_syscall: bzt r30, 1f jal do_syscall_trace FEEDBACK_REENTER(handle_syscall) -1: j .Lresume_userspace /* jump into middle of interrupt_return */ +1: { + movei r30, 0 /* not an NMI */ + j .Lresume_userspace /* jump into middle of interrupt_return */ + } .Linvalid_syscall: /* Report an invalid syscall back to the user program */ @@ -1246,7 +1253,10 @@ handle_syscall: movei r28, -ENOSYS } sw r29, r28 - j .Lresume_userspace /* jump into middle of interrupt_return */ + { + movei r30, 0 /* not an NMI */ + j .Lresume_userspace /* jump into middle of interrupt_return */ + } STD_ENDPROC(handle_syscall) /* Return the address for oprofile to suppress in backtraces. */ @@ -1262,7 +1272,10 @@ STD_ENTRY(ret_from_fork) jal sim_notify_fork jal schedule_tail FEEDBACK_REENTER(ret_from_fork) - j .Lresume_userspace /* jump into middle of interrupt_return */ + { + movei r30, 0 /* not an NMI */ + j .Lresume_userspace /* jump into middle of interrupt_return */ + } STD_ENDPROC(ret_from_fork) /* @@ -1376,7 +1389,10 @@ handle_ill: jal send_sigtrap /* issue a SIGTRAP */ FEEDBACK_REENTER(handle_ill) - j .Lresume_userspace /* jump into middle of interrupt_return */ + { + movei r30, 0 /* not an NMI */ + j .Lresume_userspace /* jump into middle of interrupt_return */ + } .Ldispatch_normal_ill: { diff --git a/arch/tile/kernel/intvec_64.S b/arch/tile/kernel/intvec_64.S index 79c93e1..49d9d66 100644 --- a/arch/tile/kernel/intvec_64.S +++ b/arch/tile/kernel/intvec_64.S @@ -22,6 +22,7 @@ #include <asm/irqflags.h> #include <asm/asm-offsets.h> #include <asm/types.h> +#include <asm/signal.h> #include <hv/hypervisor.h> #include <arch/abi.h> #include <arch/interrupts.h> @@ -605,6 +606,10 @@ handle_interrupt: * This routine takes a boolean in r30 indicating if this is an NMI. * If so, we also expect a boolean in r31 indicating whether to * re-enable the oprofile interrupts. + * + * Note that .Lresume_userspace is jumped to directly in several + * places, and we need to make sure r30 is set correctly in those + * callers as well. */ STD_ENTRY(interrupt_return) /* If we're resuming to kernel space, don't check thread flags. */ @@ -1039,11 +1044,28 @@ handle_syscall: /* Do syscall trace again, if requested. */ ld r30, r31 - andi r30, r30, _TIF_SYSCALL_TRACE - beqzt r30, 1f + andi r0, r30, _TIF_SYSCALL_TRACE + { + andi r0, r30, _TIF_SINGLESTEP + beqzt r0, 1f + } jal do_syscall_trace FEEDBACK_REENTER(handle_syscall) -1: j .Lresume_userspace /* jump into middle of interrupt_return */ + andi r0, r30, _TIF_SINGLESTEP + +1: beqzt r0, 2f + + /* Single stepping -- notify ptrace. */ + { + movei r0, SIGTRAP + jal ptrace_notify + } + FEEDBACK_REENTER(handle_syscall) + +2: { + movei r30, 0 /* not an NMI */ + j .Lresume_userspace /* jump into middle of interrupt_return */ + } .Lcompat_syscall: /* @@ -1077,7 +1099,10 @@ handle_syscall: movei r28, -ENOSYS } st r29, r28 - j .Lresume_userspace /* jump into middle of interrupt_return */ + { + movei r30, 0 /* not an NMI */ + j .Lresume_userspace /* jump into middle of interrupt_return */ + } STD_ENDPROC(handle_syscall) /* Return the address for oprofile to suppress in backtraces. */ @@ -1093,7 +1118,10 @@ STD_ENTRY(ret_from_fork) jal sim_notify_fork jal schedule_tail FEEDBACK_REENTER(ret_from_fork) - j .Lresume_userspace + { + movei r30, 0 /* not an NMI */ + j .Lresume_userspace /* jump into middle of interrupt_return */ + } STD_ENDPROC(ret_from_fork) /* Various stub interrupt handlers and syscall handlers */ @@ -1156,6 +1184,18 @@ int_unalign: push_extra_callee_saves r0 j do_trap +/* Fill the return address stack with nonzero entries. */ +STD_ENTRY(fill_ra_stack) + { + move r0, lr + jal 1f + } +1: jal 2f +2: jal 3f +3: jal 4f +4: jrp r0 + STD_ENDPROC(fill_ra_stack) + /* Include .intrpt1 array of interrupt vectors */ .section ".intrpt1", "ax" @@ -1166,7 +1206,7 @@ int_unalign: #define do_hardwall_trap bad_intr #endif - int_hand INT_MEM_ERROR, MEM_ERROR, bad_intr + int_hand INT_MEM_ERROR, MEM_ERROR, do_trap int_hand INT_SINGLE_STEP_3, SINGLE_STEP_3, bad_intr #if CONFIG_KERNEL_PL == 2 int_hand INT_SINGLE_STEP_2, SINGLE_STEP_2, gx_singlestep_handle diff --git a/arch/tile/kernel/module.c b/arch/tile/kernel/module.c index b90ab99..98d4769 100644 --- a/arch/tile/kernel/module.c +++ b/arch/tile/kernel/module.c @@ -67,6 +67,8 @@ void *module_alloc(unsigned long size) area = __get_vm_area(size, VM_ALLOC, MEM_MODULE_START, MEM_MODULE_END); if (!area) goto error; + area->nr_pages = npages; + area->pages = pages; if (map_vm_area(area, prot_rwx, &pages)) { vunmap(area->addr); diff --git a/arch/tile/kernel/proc.c b/arch/tile/kernel/proc.c index 7a93270..446a7f5 100644 --- a/arch/tile/kernel/proc.c +++ b/arch/tile/kernel/proc.c @@ -146,7 +146,6 @@ static ctl_table unaligned_table[] = { }, {} }; -#endif static struct ctl_path tile_path[] = { { .procname = "tile" }, @@ -155,10 +154,9 @@ static struct ctl_path tile_path[] = { static int __init proc_sys_tile_init(void) { -#ifndef __tilegx__ /* FIXME: GX: no support for unaligned access yet */ register_sysctl_paths(tile_path, unaligned_table); -#endif return 0; } arch_initcall(proc_sys_tile_init); +#endif diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c index 30caeca..2d5ef61 100644 --- a/arch/tile/kernel/process.c +++ b/arch/tile/kernel/process.c @@ -28,6 +28,7 @@ #include <linux/tracehook.h> #include <linux/signal.h> #include <asm/stack.h> +#include <asm/switch_to.h> #include <asm/homecache.h> #include <asm/syscalls.h> #include <asm/traps.h> @@ -285,7 +286,7 @@ struct task_struct *validate_current(void) static struct task_struct corrupt = { .comm = "<corrupt>" }; struct task_struct *tsk = current; if (unlikely((unsigned long)tsk < PAGE_OFFSET || - (void *)tsk > high_memory || + (high_memory && (void *)tsk > high_memory) || ((unsigned long)tsk & (__alignof__(*tsk) - 1)) != 0)) { pr_err("Corrupt 'current' %p (sp %#lx)\n", tsk, stack_pointer); tsk = &corrupt; diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c index 92a94f4..bff23f4 100644 --- a/arch/tile/kernel/setup.c +++ b/arch/tile/kernel/setup.c @@ -103,13 +103,11 @@ unsigned long __initdata pci_reserve_end_pfn = -1U; static int __init setup_maxmem(char *str) { - long maxmem_mb; - if (str == NULL || strict_strtol(str, 0, &maxmem_mb) != 0 || - maxmem_mb == 0) + unsigned long long maxmem; + if (str == NULL || (maxmem = memparse(str, NULL)) == 0) return -EINVAL; - maxmem_pfn = (maxmem_mb >> (HPAGE_SHIFT - 20)) << - (HPAGE_SHIFT - PAGE_SHIFT); + maxmem_pfn = (maxmem >> HPAGE_SHIFT) << (HPAGE_SHIFT - PAGE_SHIFT); pr_info("Forcing RAM used to no more than %dMB\n", maxmem_pfn >> (20 - PAGE_SHIFT)); return 0; @@ -119,14 +117,15 @@ early_param("maxmem", setup_maxmem); static int __init setup_maxnodemem(char *str) { char *endp; - long maxnodemem_mb, node; + unsigned long long maxnodemem; + long node; node = str ? simple_strtoul(str, &endp, 0) : INT_MAX; - if (node >= MAX_NUMNODES || *endp != ':' || - strict_strtol(endp+1, 0, &maxnodemem_mb) != 0) + if (node >= MAX_NUMNODES || *endp != ':') return -EINVAL; - maxnodemem_pfn[node] = (maxnodemem_mb >> (HPAGE_SHIFT - 20)) << + maxnodemem = memparse(endp+1, NULL); + maxnodemem_pfn[node] = (maxnodemem >> HPAGE_SHIFT) << (HPAGE_SHIFT - PAGE_SHIFT); pr_info("Forcing RAM used on node %ld to no more than %dMB\n", node, maxnodemem_pfn[node] >> (20 - PAGE_SHIFT)); @@ -913,6 +912,13 @@ void __cpuinit setup_cpu(int boot) #ifdef CONFIG_BLK_DEV_INITRD +/* + * Note that the kernel can potentially support other compression + * techniques than gz, though we don't do so by default. If we ever + * decide to do so we can either look for other filename extensions, + * or just allow a file with this name to be compressed with an + * arbitrary compressor (somewhat counterintuitively). + */ static int __initdata set_initramfs_file; static char __initdata initramfs_file[128] = "initramfs.cpio.gz"; @@ -928,9 +934,9 @@ static int __init setup_initramfs_file(char *str) early_param("initramfs_file", setup_initramfs_file); /* - * We look for an additional "initramfs.cpio.gz" file in the hvfs. + * We look for an "initramfs.cpio.gz" file in the hvfs. * If there is one, we allocate some memory for it and it will be - * unpacked to the initramfs after any built-in initramfs_data. + * unpacked to the initramfs. */ static void __init load_hv_initrd(void) { diff --git a/arch/tile/kernel/single_step.c b/arch/tile/kernel/single_step.c index bc1eb58..9efbc13 100644 --- a/arch/tile/kernel/single_step.c +++ b/arch/tile/kernel/single_step.c @@ -153,6 +153,25 @@ static tile_bundle_bits rewrite_load_store_unaligned( if (((unsigned long)addr % size) == 0) return bundle; + /* + * Return SIGBUS with the unaligned address, if requested. + * Note that we return SIGBUS even for completely invalid addresses + * as long as they are in fact unaligned; this matches what the + * tilepro hardware would be doing, if it could provide us with the + * actual bad address in an SPR, which it doesn't. + */ + if (unaligned_fixup == 0) { + siginfo_t info = { + .si_signo = SIGBUS, + .si_code = BUS_ADRALN, + .si_addr = addr + }; + trace_unhandled_signal("unaligned trap", regs, + (unsigned long)addr, SIGBUS); + force_sig_info(info.si_signo, &info, current); + return (tilepro_bundle_bits) 0; + } + #ifndef __LITTLE_ENDIAN # error We assume little-endian representation with copy_xx_user size 2 here #endif @@ -192,18 +211,6 @@ static tile_bundle_bits rewrite_load_store_unaligned( return (tile_bundle_bits) 0; } - if (unaligned_fixup == 0) { - siginfo_t info = { - .si_signo = SIGBUS, - .si_code = BUS_ADRALN, - .si_addr = addr - }; - trace_unhandled_signal("unaligned trap", regs, - (unsigned long)addr, SIGBUS); - force_sig_info(info.si_signo, &info, current); - return (tile_bundle_bits) 0; - } - if (unaligned_printk || unaligned_fixup_count == 0) { pr_info("Process %d/%s: PC %#lx: Fixup of" " unaligned %s at %#lx.\n", diff --git a/arch/tile/kernel/smp.c b/arch/tile/kernel/smp.c index a44e103..91da0f7 100644 --- a/arch/tile/kernel/smp.c +++ b/arch/tile/kernel/smp.c @@ -103,7 +103,7 @@ static void smp_stop_cpu_interrupt(void) set_cpu_online(smp_processor_id(), 0); arch_local_irq_disable_all(); for (;;) - asm("nap"); + asm("nap; nop"); } /* This function calls the 'stop' function on all other CPUs in the system. */ @@ -113,6 +113,12 @@ void smp_send_stop(void) send_IPI_allbutself(MSG_TAG_STOP_CPU); } +/* On panic, just wait; we may get an smp_send_stop() later on. */ +void panic_smp_self_stop(void) +{ + while (1) + asm("nap; nop"); +} /* * Dispatch code called from hv_message_intr() for HV_MSG_TILE hv messages. diff --git a/arch/tile/kernel/smpboot.c b/arch/tile/kernel/smpboot.c index b949edc..172aef7 100644 --- a/arch/tile/kernel/smpboot.c +++ b/arch/tile/kernel/smpboot.c @@ -196,6 +196,8 @@ void __cpuinit online_secondary(void) /* This must be done before setting cpu_online_mask */ wmb(); + notify_cpu_starting(smp_processor_id()); + /* * We need to hold call_lock, so there is no inconsistency * between the time smp_call_function() determines number of diff --git a/arch/tile/kernel/stack.c b/arch/tile/kernel/stack.c index 37ee4d0..b2f44c2 100644 --- a/arch/tile/kernel/stack.c +++ b/arch/tile/kernel/stack.c @@ -21,10 +21,12 @@ #include <linux/stacktrace.h> #include <linux/uaccess.h> #include <linux/mmzone.h> +#include <linux/dcache.h> +#include <linux/fs.h> #include <asm/backtrace.h> #include <asm/page.h> -#include <asm/tlbflush.h> #include <asm/ucontext.h> +#include <asm/switch_to.h> #include <asm/sigframe.h> #include <asm/stack.h> #include <arch/abi.h> @@ -44,72 +46,23 @@ static int in_kernel_stack(struct KBacktraceIterator *kbt, unsigned long sp) return sp >= kstack_base && sp < kstack_base + THREAD_SIZE; } -/* Is address valid for reading? */ -static int valid_address(struct KBacktraceIterator *kbt, unsigned long address) -{ - HV_PTE *l1_pgtable = kbt->pgtable; - HV_PTE *l2_pgtable; - unsigned long pfn; - HV_PTE pte; - struct page *page; - - if (l1_pgtable == NULL) - return 0; /* can't read user space in other tasks */ - -#ifdef CONFIG_64BIT - /* Find the real l1_pgtable by looking in the l0_pgtable. */ - pte = l1_pgtable[HV_L0_INDEX(address)]; - if (!hv_pte_get_present(pte)) - return 0; - pfn = hv_pte_get_pfn(pte); - if (pte_huge(pte)) { - if (!pfn_valid(pfn)) { - pr_err("L0 huge page has bad pfn %#lx\n", pfn); - return 0; - } - return hv_pte_get_present(pte) && hv_pte_get_readable(pte); - } - page = pfn_to_page(pfn); - BUG_ON(PageHighMem(page)); /* No HIGHMEM on 64-bit. */ - l1_pgtable = (HV_PTE *)pfn_to_kaddr(pfn); -#endif - pte = l1_pgtable[HV_L1_INDEX(address)]; - if (!hv_pte_get_present(pte)) - return 0; - pfn = hv_pte_get_pfn(pte); - if (pte_huge(pte)) { - if (!pfn_valid(pfn)) { - pr_err("huge page has bad pfn %#lx\n", pfn); - return 0; - } - return hv_pte_get_present(pte) && hv_pte_get_readable(pte); - } - - page = pfn_to_page(pfn); - if (PageHighMem(page)) { - pr_err("L2 page table not in LOWMEM (%#llx)\n", - HV_PFN_TO_CPA(pfn)); - return 0; - } - l2_pgtable = (HV_PTE *)pfn_to_kaddr(pfn); - pte = l2_pgtable[HV_L2_INDEX(address)]; - return hv_pte_get_present(pte) && hv_pte_get_readable(pte); -} - /* Callback for backtracer; basically a glorified memcpy */ static bool read_memory_func(void *result, unsigned long address, unsigned int size, void *vkbt) { int retval; struct KBacktraceIterator *kbt = (struct KBacktraceIterator *)vkbt; + + if (address == 0) + return 0; if (__kernel_text_address(address)) { /* OK to read kernel code. */ } else if (address >= PAGE_OFFSET) { /* We only tolerate kernel-space reads of this task's stack */ if (!in_kernel_stack(kbt, address)) return 0; - } else if (!valid_address(kbt, address)) { - return 0; /* invalid user-space address */ + } else if (!kbt->is_current) { + return 0; /* can't read from other user address spaces */ } pagefault_disable(); retval = __copy_from_user_inatomic(result, @@ -127,6 +80,8 @@ static struct pt_regs *valid_fault_handler(struct KBacktraceIterator* kbt) unsigned long sp = kbt->it.sp; struct pt_regs *p; + if (sp % sizeof(long) != 0) + return NULL; if (!in_kernel_stack(kbt, sp)) return NULL; if (!in_kernel_stack(kbt, sp + C_ABI_SAVE_AREA_SIZE + PTREGS_SIZE-1)) @@ -169,27 +124,27 @@ static int is_sigreturn(unsigned long pc) } /* Return a pt_regs pointer for a valid signal handler frame */ -static struct pt_regs *valid_sigframe(struct KBacktraceIterator* kbt) +static struct pt_regs *valid_sigframe(struct KBacktraceIterator* kbt, + struct rt_sigframe* kframe) { BacktraceIterator *b = &kbt->it; - if (b->pc == VDSO_BASE) { - struct rt_sigframe *frame; - unsigned long sigframe_top = - b->sp + sizeof(struct rt_sigframe) - 1; - if (!valid_address(kbt, b->sp) || - !valid_address(kbt, sigframe_top)) { - if (kbt->verbose) - pr_err(" (odd signal: sp %#lx?)\n", - (unsigned long)(b->sp)); + if (b->pc == VDSO_BASE && b->sp < PAGE_OFFSET && + b->sp % sizeof(long) == 0) { + int retval; + pagefault_disable(); + retval = __copy_from_user_inatomic( + kframe, (void __user __force *)b->sp, + sizeof(*kframe)); + pagefault_enable(); + if (retval != 0 || + (unsigned int)(kframe->info.si_signo) >= _NSIG) return NULL; - } - frame = (struct rt_sigframe *)b->sp; if (kbt->verbose) { pr_err(" <received signal %d>\n", - frame->info.si_signo); + kframe->info.si_signo); } - return (struct pt_regs *)&frame->uc.uc_mcontext; + return (struct pt_regs *)&kframe->uc.uc_mcontext; } return NULL; } @@ -202,10 +157,11 @@ static int KBacktraceIterator_is_sigreturn(struct KBacktraceIterator *kbt) static int KBacktraceIterator_restart(struct KBacktraceIterator *kbt) { struct pt_regs *p; + struct rt_sigframe kframe; p = valid_fault_handler(kbt); if (p == NULL) - p = valid_sigframe(kbt); + p = valid_sigframe(kbt, &kframe); if (p == NULL) return 0; backtrace_init(&kbt->it, read_memory_func, kbt, @@ -265,41 +221,19 @@ void KBacktraceIterator_init(struct KBacktraceIterator *kbt, /* * Set up callback information. We grab the kernel stack base - * so we will allow reads of that address range, and if we're - * asking about the current process we grab the page table - * so we can check user accesses before trying to read them. - * We flush the TLB to avoid any weird skew issues. + * so we will allow reads of that address range. */ - is_current = (t == NULL); + is_current = (t == NULL || t == current); kbt->is_current = is_current; if (is_current) t = validate_current(); kbt->task = t; - kbt->pgtable = NULL; kbt->verbose = 0; /* override in caller if desired */ kbt->profile = 0; /* override in caller if desired */ kbt->end = KBT_ONGOING; - kbt->new_context = 0; - if (is_current) { - HV_PhysAddr pgdir_pa = hv_inquire_context().page_table; - if (pgdir_pa == (unsigned long)swapper_pg_dir - PAGE_OFFSET) { - /* - * Not just an optimization: this also allows - * this to work at all before va/pa mappings - * are set up. - */ - kbt->pgtable = swapper_pg_dir; - } else { - struct page *page = pfn_to_page(PFN_DOWN(pgdir_pa)); - if (!PageHighMem(page)) - kbt->pgtable = __va(pgdir_pa); - else - pr_err("page table not in LOWMEM" - " (%#llx)\n", pgdir_pa); - } - local_flush_tlb_all(); + kbt->new_context = 1; + if (is_current) validate_stack(regs); - } if (regs == NULL) { if (is_current || t->state == TASK_RUNNING) { @@ -345,6 +279,78 @@ void KBacktraceIterator_next(struct KBacktraceIterator *kbt) } EXPORT_SYMBOL(KBacktraceIterator_next); +static void describe_addr(struct KBacktraceIterator *kbt, + unsigned long address, + int have_mmap_sem, char *buf, size_t bufsize) +{ + struct vm_area_struct *vma; + size_t namelen, remaining; + unsigned long size, offset, adjust; + char *p, *modname; + const char *name; + int rc; + + /* + * Look one byte back for every caller frame (i.e. those that + * aren't a new context) so we look up symbol data for the + * call itself, not the following instruction, which may be on + * a different line (or in a different function). + */ + adjust = !kbt->new_context; + address -= adjust; + + if (address >= PAGE_OFFSET) { + /* Handle kernel symbols. */ + BUG_ON(bufsize < KSYM_NAME_LEN); + name = kallsyms_lookup(address, &size, &offset, + &modname, buf); + if (name == NULL) { + buf[0] = '\0'; + return; + } + namelen = strlen(buf); + remaining = (bufsize - 1) - namelen; + p = buf + namelen; + rc = snprintf(p, remaining, "+%#lx/%#lx ", + offset + adjust, size); + if (modname && rc < remaining) + snprintf(p + rc, remaining - rc, "[%s] ", modname); + buf[bufsize-1] = '\0'; + return; + } + + /* If we don't have the mmap_sem, we can't show any more info. */ + buf[0] = '\0'; + if (!have_mmap_sem) + return; + + /* Find vma info. */ + vma = find_vma(kbt->task->mm, address); + if (vma == NULL || address < vma->vm_start) { + snprintf(buf, bufsize, "[unmapped address] "); + return; + } + + if (vma->vm_file) { + char *s; + p = d_path(&vma->vm_file->f_path, buf, bufsize); + if (IS_ERR(p)) + p = "?"; + s = strrchr(p, '/'); + if (s) + p = s+1; + } else { + p = "anon"; + } + + /* Generate a string description of the vma info. */ + namelen = strlen(p); + remaining = (bufsize - 1) - namelen; + memmove(buf, p, namelen); + snprintf(buf + namelen, remaining, "[%lx+%lx] ", + vma->vm_start, vma->vm_end - vma->vm_start); +} + /* * This method wraps the backtracer's more generic support. * It is only invoked from the architecture-specific code; show_stack() @@ -353,6 +359,7 @@ EXPORT_SYMBOL(KBacktraceIterator_next); void tile_show_stack(struct KBacktraceIterator *kbt, int headers) { int i; + int have_mmap_sem = 0; if (headers) { /* @@ -369,31 +376,16 @@ void tile_show_stack(struct KBacktraceIterator *kbt, int headers) kbt->verbose = 1; i = 0; for (; !KBacktraceIterator_end(kbt); KBacktraceIterator_next(kbt)) { - char *modname; - const char *name; - unsigned long address = kbt->it.pc; - unsigned long offset, size; char namebuf[KSYM_NAME_LEN+100]; + unsigned long address = kbt->it.pc; - if (address >= PAGE_OFFSET) - name = kallsyms_lookup(address, &size, &offset, - &modname, namebuf); - else - name = NULL; - - if (!name) - namebuf[0] = '\0'; - else { - size_t namelen = strlen(namebuf); - size_t remaining = (sizeof(namebuf) - 1) - namelen; - char *p = namebuf + namelen; - int rc = snprintf(p, remaining, "+%#lx/%#lx ", - offset, size); - if (modname && rc < remaining) - snprintf(p + rc, remaining - rc, - "[%s] ", modname); - namebuf[sizeof(namebuf)-1] = '\0'; - } + /* Try to acquire the mmap_sem as we pass into userspace. */ + if (address < PAGE_OFFSET && !have_mmap_sem && kbt->task->mm) + have_mmap_sem = + down_read_trylock(&kbt->task->mm->mmap_sem); + + describe_addr(kbt, address, have_mmap_sem, + namebuf, sizeof(namebuf)); pr_err(" frame %d: 0x%lx %s(sp 0x%lx)\n", i++, address, namebuf, (unsigned long)(kbt->it.sp)); @@ -408,6 +400,8 @@ void tile_show_stack(struct KBacktraceIterator *kbt, int headers) pr_err("Stack dump stopped; next frame identical to this one\n"); if (headers) pr_err("Stack dump complete\n"); + if (have_mmap_sem) + up_read(&kbt->task->mm->mmap_sem); } EXPORT_SYMBOL(tile_show_stack); diff --git a/arch/tile/kernel/traps.c b/arch/tile/kernel/traps.c index 2bb6602..73cff81 100644 --- a/arch/tile/kernel/traps.c +++ b/arch/tile/kernel/traps.c @@ -200,7 +200,7 @@ void __kprobes do_trap(struct pt_regs *regs, int fault_num, { siginfo_t info = { 0 }; int signo, code; - unsigned long address; + unsigned long address = 0; bundle_bits instr; /* Re-enable interrupts. */ @@ -223,6 +223,10 @@ void __kprobes do_trap(struct pt_regs *regs, int fault_num, } switch (fault_num) { + case INT_MEM_ERROR: + signo = SIGBUS; + code = BUS_OBJERR; + break; case INT_ILL: if (copy_from_user(&instr, (void __user *)regs->pc, sizeof(instr))) { @@ -289,7 +293,10 @@ void __kprobes do_trap(struct pt_regs *regs, int fault_num, address = regs->pc; break; #ifdef __tilegx__ - case INT_ILL_TRANS: + case INT_ILL_TRANS: { + /* Avoid a hardware erratum with the return address stack. */ + fill_ra_stack(); + signo = SIGSEGV; code = SEGV_MAPERR; if (reason & SPR_ILL_TRANS_REASON__I_STREAM_VA_RMASK) @@ -297,6 +304,7 @@ void __kprobes do_trap(struct pt_regs *regs, int fault_num, else address = 0; /* FIXME: GX: single-step for address */ break; + } #endif default: panic("Unexpected do_trap interrupt number %d", fault_num); @@ -308,7 +316,8 @@ void __kprobes do_trap(struct pt_regs *regs, int fault_num, info.si_addr = (void __user *)address; if (signo == SIGILL) info.si_trapno = fault_num; - trace_unhandled_signal("trap", regs, address, signo); + if (signo != SIGTRAP) + trace_unhandled_signal("trap", regs, address, signo); force_sig_info(signo, &info, current); } diff --git a/arch/tile/lib/Makefile b/arch/tile/lib/Makefile index 0c26086..985f598 100644 --- a/arch/tile/lib/Makefile +++ b/arch/tile/lib/Makefile @@ -7,6 +7,7 @@ lib-y = cacheflush.o checksum.o cpumask.o delay.o uaccess.o \ strchr_$(BITS).o strlen_$(BITS).o ifeq ($(CONFIG_TILEGX),y) +CFLAGS_REMOVE_memcpy_user_64.o = -fno-omit-frame-pointer lib-y += memcpy_user_64.o else lib-y += atomic_32.o atomic_asm_32.o memcpy_tile64.o diff --git a/arch/tile/lib/cacheflush.c b/arch/tile/lib/cacheflush.c index 8928aac..db4fb89 100644 --- a/arch/tile/lib/cacheflush.c +++ b/arch/tile/lib/cacheflush.c @@ -39,7 +39,21 @@ void finv_buffer_remote(void *buffer, size_t size, int hfh) { char *p, *base; size_t step_size, load_count; + + /* + * On TILEPro the striping granularity is a fixed 8KB; on + * TILE-Gx it is configurable, and we rely on the fact that + * the hypervisor always configures maximum striping, so that + * bits 9 and 10 of the PA are part of the stripe function, so + * every 512 bytes we hit a striping boundary. + * + */ +#ifdef __tilegx__ + const unsigned long STRIPE_WIDTH = 512; +#else const unsigned long STRIPE_WIDTH = 8192; +#endif + #ifdef __tilegx__ /* * On TILE-Gx, we must disable the dstream prefetcher before doing @@ -74,7 +88,7 @@ void finv_buffer_remote(void *buffer, size_t size, int hfh) * memory, that one load would be sufficient, but since we may * be, we also need to back up to the last load issued to * another memory controller, which would be the point where - * we crossed an 8KB boundary (the granularity of striping + * we crossed a "striping" boundary (the granularity of striping * across memory controllers). Keep backing up and doing this * until we are before the beginning of the buffer, or have * hit all the controllers. @@ -88,12 +102,22 @@ void finv_buffer_remote(void *buffer, size_t size, int hfh) * every cache line on a full memory stripe on each * controller" that we simply do that, to simplify the logic. * - * FIXME: See bug 9535 for some issues with this code. + * On TILE-Gx the hash-for-home function is much more complex, + * with the upshot being we can't readily guarantee we have + * hit both entries in the 128-entry AMT that were hit by any + * load in the entire range, so we just re-load them all. + * With larger buffers, we may want to consider using a hypervisor + * trap to issue loads directly to each hash-for-home tile for + * each controller (doing it from Linux would trash the TLB). */ if (hfh) { step_size = L2_CACHE_BYTES; +#ifdef __tilegx__ + load_count = (size + L2_CACHE_BYTES - 1) / L2_CACHE_BYTES; +#else load_count = (STRIPE_WIDTH / L2_CACHE_BYTES) * (1 << CHIP_LOG_NUM_MSHIMS()); +#endif } else { step_size = STRIPE_WIDTH; load_count = (1 << CHIP_LOG_NUM_MSHIMS()); @@ -109,7 +133,7 @@ void finv_buffer_remote(void *buffer, size_t size, int hfh) /* Figure out how far back we need to go. */ base = p - (step_size * (load_count - 2)); - if ((long)base < (long)buffer) + if ((unsigned long)base < (unsigned long)buffer) base = buffer; /* diff --git a/arch/tile/lib/memcpy_user_64.c b/arch/tile/lib/memcpy_user_64.c index 4763b3a..37440ca 100644 --- a/arch/tile/lib/memcpy_user_64.c +++ b/arch/tile/lib/memcpy_user_64.c @@ -14,7 +14,13 @@ * Do memcpy(), but trap and return "n" when a load or store faults. * * Note: this idiom only works when memcpy() compiles to a leaf function. - * If "sp" is updated during memcpy, the "jrp lr" will be incorrect. + * Here leaf function not only means it does not have calls, but also + * requires no stack operations (sp, stack frame pointer) and no + * use of callee-saved registers, else "jrp lr" will be incorrect since + * unwinding stack frame is bypassed. Since memcpy() is not complex so + * these conditions are satisfied here, but we need to be careful when + * modifying this file. This is not a clean solution but is the best + * one so far. * * Also note that we are capturing "n" from the containing scope here. */ diff --git a/arch/tile/lib/spinlock_common.h b/arch/tile/lib/spinlock_common.h index c101098..6ac3750 100644 --- a/arch/tile/lib/spinlock_common.h +++ b/arch/tile/lib/spinlock_common.h @@ -60,5 +60,5 @@ static void delay_backoff(int iterations) loops += __insn_crc32_32(stack_pointer, get_cycles_low()) & (loops - 1); - relax(1 << exponent); + relax(loops); } diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c index cba30e9..22e58f5 100644 --- a/arch/tile/mm/fault.c +++ b/arch/tile/mm/fault.c @@ -130,7 +130,7 @@ static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address) } /* - * Handle a fault on the vmalloc or module mapping area + * Handle a fault on the vmalloc area. */ static inline int vmalloc_fault(pgd_t *pgd, unsigned long address) { @@ -203,9 +203,14 @@ static pgd_t *get_current_pgd(void) * interrupt or a critical region, and must do as little as possible. * Similarly, we can't use atomic ops here, since we may be handling a * fault caused by an atomic op access. + * + * If we find a migrating PTE while we're in an NMI context, and we're + * at a PC that has a registered exception handler, we don't wait, + * since this thread may (e.g.) have been interrupted while migrating + * its own stack, which would then cause us to self-deadlock. */ static int handle_migrating_pte(pgd_t *pgd, int fault_num, - unsigned long address, + unsigned long address, unsigned long pc, int is_kernel_mode, int write) { pud_t *pud; @@ -227,6 +232,8 @@ static int handle_migrating_pte(pgd_t *pgd, int fault_num, pte_offset_kernel(pmd, address); pteval = *pte; if (pte_migrating(pteval)) { + if (in_nmi() && search_exception_tables(pc)) + return 0; wait_for_migration(pte); return 1; } @@ -300,7 +307,7 @@ static int handle_page_fault(struct pt_regs *regs, * rather than trying to patch up the existing PTE. */ pgd = get_current_pgd(); - if (handle_migrating_pte(pgd, fault_num, address, + if (handle_migrating_pte(pgd, fault_num, address, regs->pc, is_kernel_mode, write)) return 1; @@ -335,9 +342,12 @@ static int handle_page_fault(struct pt_regs *regs, /* * If we're trying to touch user-space addresses, we must * be either at PL0, or else with interrupts enabled in the - * kernel, so either way we can re-enable interrupts here. + * kernel, so either way we can re-enable interrupts here + * unless we are doing atomic access to user space with + * interrupts disabled. */ - local_irq_enable(); + if (!(regs->flags & PT_FLAGS_DISABLE_IRQ)) + local_irq_enable(); mm = tsk->mm; @@ -665,7 +675,7 @@ struct intvec_state do_page_fault_ics(struct pt_regs *regs, int fault_num, */ if (fault_num == INT_DTLB_ACCESS) write = 1; - if (handle_migrating_pte(pgd, fault_num, address, 1, write)) + if (handle_migrating_pte(pgd, fault_num, address, pc, 1, write)) return state; /* Return zero so that we continue on with normal fault handling. */ diff --git a/arch/tile/mm/homecache.c b/arch/tile/mm/homecache.c index 1cc6ae4..499f737 100644 --- a/arch/tile/mm/homecache.c +++ b/arch/tile/mm/homecache.c @@ -394,6 +394,7 @@ int page_home(struct page *page) return pte_to_home(*virt_to_pte(NULL, kva)); } } +EXPORT_SYMBOL(page_home); void homecache_change_page_home(struct page *page, int order, int home) { diff --git a/arch/tile/mm/init.c b/arch/tile/mm/init.c index 830c490..6a9d20d 100644 --- a/arch/tile/mm/init.c +++ b/arch/tile/mm/init.c @@ -254,11 +254,6 @@ static pgprot_t __init init_pgprot(ulong address) return construct_pgprot(PAGE_KERNEL_RO, PAGE_HOME_IMMUTABLE); } - /* As a performance optimization, keep the boot init stack here. */ - if (address >= (ulong)&init_thread_union && - address < (ulong)&init_thread_union + THREAD_SIZE) - return construct_pgprot(PAGE_KERNEL, smp_processor_id()); - #ifndef __tilegx__ #if !ATOMIC_LOCKS_FOUND_VIA_TABLE() /* Force the atomic_locks[] array page to be hash-for-home. */ @@ -557,6 +552,7 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base) address = MEM_SV_INTRPT; pmd = get_pmd(pgtables, address); + pfn = 0; /* code starts at PA 0 */ if (ktext_small) { /* Allocate an L2 PTE for the kernel text */ int cpu = 0; @@ -579,10 +575,15 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base) } BUG_ON(address != (unsigned long)_stext); - pfn = 0; /* code starts at PA 0 */ - pte = alloc_pte(); - for (pte_ofs = 0; address < (unsigned long)_einittext; - pfn++, pte_ofs++, address += PAGE_SIZE) { + pte = NULL; + for (; address < (unsigned long)_einittext; + pfn++, address += PAGE_SIZE) { + pte_ofs = pte_index(address); + if (pte_ofs == 0) { + if (pte) + assign_pte(pmd++, pte); + pte = alloc_pte(); + } if (!ktext_local) { prot = set_remote_cache_cpu(prot, cpu); cpu = cpumask_next(cpu, &ktext_mask); @@ -591,7 +592,8 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base) } pte[pte_ofs] = pfn_pte(pfn, prot); } - assign_pte(pmd, pte); + if (pte) + assign_pte(pmd, pte); } else { pte_t pteval = pfn_pte(0, PAGE_KERNEL_EXEC); pteval = pte_mkhuge(pteval); @@ -614,7 +616,9 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base) else pteval = hv_pte_set_mode(pteval, HV_PTE_MODE_CACHE_NO_L3); - *(pte_t *)pmd = pteval; + for (; address < (unsigned long)_einittext; + pfn += PFN_DOWN(HPAGE_SIZE), address += HPAGE_SIZE) + *(pte_t *)(pmd++) = pfn_pte(pfn, pteval); } /* Set swapper_pgprot here so it is flushed to memory right away. */ diff --git a/arch/tile/mm/pgtable.c b/arch/tile/mm/pgtable.c index 8730369..2410aa8 100644 --- a/arch/tile/mm/pgtable.c +++ b/arch/tile/mm/pgtable.c @@ -177,14 +177,10 @@ void shatter_huge_page(unsigned long addr) if (!pmd_huge_page(*pmd)) return; - /* - * Grab the pgd_lock, since we may need it to walk the pgd_list, - * and since we need some kind of lock here to avoid races. - */ - spin_lock_irqsave(&pgd_lock, flags); + spin_lock_irqsave(&init_mm.page_table_lock, flags); if (!pmd_huge_page(*pmd)) { /* Lost the race to convert the huge page. */ - spin_unlock_irqrestore(&pgd_lock, flags); + spin_unlock_irqrestore(&init_mm.page_table_lock, flags); return; } @@ -194,6 +190,7 @@ void shatter_huge_page(unsigned long addr) #ifdef __PAGETABLE_PMD_FOLDED /* Walk every pgd on the system and update the pmd there. */ + spin_lock(&pgd_lock); list_for_each(pos, &pgd_list) { pmd_t *copy_pmd; pgd = list_to_pgd(pos) + pgd_index(addr); @@ -201,6 +198,7 @@ void shatter_huge_page(unsigned long addr) copy_pmd = pmd_offset(pud, addr); __set_pmd(copy_pmd, *pmd); } + spin_unlock(&pgd_lock); #endif /* Tell every cpu to notice the change. */ @@ -208,7 +206,7 @@ void shatter_huge_page(unsigned long addr) cpu_possible_mask, NULL, 0); /* Hold the lock until the TLB flush is finished to avoid races. */ - spin_unlock_irqrestore(&pgd_lock, flags); + spin_unlock_irqrestore(&init_mm.page_table_lock, flags); } /* @@ -217,9 +215,13 @@ void shatter_huge_page(unsigned long addr) * against pageattr.c; it is the unique case in which a valid change * of kernel pagetables can't be lazily synchronized by vmalloc faults. * vmalloc faults work because attached pagetables are never freed. - * The locking scheme was chosen on the basis of manfred's - * recommendations and having no core impact whatsoever. - * -- wli + * + * The lock is always taken with interrupts disabled, unlike on x86 + * and other platforms, because we need to take the lock in + * shatter_huge_page(), which may be called from an interrupt context. + * We are not at risk from the tlbflush IPI deadlock that was seen on + * x86, since we use the flush_remote() API to have the hypervisor do + * the TLB flushes regardless of irq disabling. */ DEFINE_SPINLOCK(pgd_lock); LIST_HEAD(pgd_list); @@ -469,10 +471,18 @@ void __set_pte(pte_t *ptep, pte_t pte) void set_pte(pte_t *ptep, pte_t pte) { - struct page *page = pfn_to_page(pte_pfn(pte)); - - /* Update the home of a PTE if necessary */ - pte = pte_set_home(pte, page_home(page)); + if (pte_present(pte) && + (!CHIP_HAS_MMIO() || hv_pte_get_mode(pte) != HV_PTE_MODE_MMIO)) { + /* The PTE actually references physical memory. */ + unsigned long pfn = pte_pfn(pte); + if (pfn_valid(pfn)) { + /* Update the home of the PTE from the struct page. */ + pte = pte_set_home(pte, page_home(pfn_to_page(pfn))); + } else if (hv_pte_get_mode(pte) == 0) { + /* remap_pfn_range(), etc, must supply PTE mode. */ + panic("set_pte(): out-of-range PFN and mode 0\n"); + } + } __set_pte(ptep, pte); } diff --git a/arch/um/drivers/cow.h b/arch/um/drivers/cow.h index dc36b22..6673508 100644 --- a/arch/um/drivers/cow.h +++ b/arch/um/drivers/cow.h @@ -3,41 +3,6 @@ #include <asm/types.h> -#if defined(__KERNEL__) - -# include <asm/byteorder.h> - -# if defined(__BIG_ENDIAN) -# define ntohll(x) (x) -# define htonll(x) (x) -# elif defined(__LITTLE_ENDIAN) -# define ntohll(x) be64_to_cpu(x) -# define htonll(x) cpu_to_be64(x) -# else -# error "Could not determine byte order" -# endif - -#else -/* For the definition of ntohl, htonl and __BYTE_ORDER */ -#include <endian.h> -#include <netinet/in.h> -#if defined(__BYTE_ORDER) - -# if __BYTE_ORDER == __BIG_ENDIAN -# define ntohll(x) (x) -# define htonll(x) (x) -# elif __BYTE_ORDER == __LITTLE_ENDIAN -# define ntohll(x) bswap_64(x) -# define htonll(x) bswap_64(x) -# else -# error "Could not determine byte order: __BYTE_ORDER uncorrectly defined" -# endif - -#else /* ! defined(__BYTE_ORDER) */ -# error "Could not determine byte order: __BYTE_ORDER not defined" -#endif -#endif /* ! defined(__KERNEL__) */ - extern int init_cow_file(int fd, char *cow_file, char *backing_file, int sectorsize, int alignment, int *bitmap_offset_out, unsigned long *bitmap_len_out, int *data_offset_out); diff --git a/arch/um/drivers/cow_user.c b/arch/um/drivers/cow_user.c index 9cbb426..0ee9cc6 100644 --- a/arch/um/drivers/cow_user.c +++ b/arch/um/drivers/cow_user.c @@ -8,11 +8,10 @@ * that. */ #include <unistd.h> -#include <byteswap.h> #include <errno.h> #include <string.h> #include <arpa/inet.h> -#include <asm/types.h> +#include <endian.h> #include "cow.h" #include "cow_sys.h" @@ -214,8 +213,8 @@ int write_cow_header(char *cow_file, int fd, char *backing_file, "header\n"); goto out; } - header->magic = htonl(COW_MAGIC); - header->version = htonl(COW_VERSION); + header->magic = htobe32(COW_MAGIC); + header->version = htobe32(COW_VERSION); err = -EINVAL; if (strlen(backing_file) > sizeof(header->backing_file) - 1) { @@ -246,10 +245,10 @@ int write_cow_header(char *cow_file, int fd, char *backing_file, goto out_free; } - header->mtime = htonl(modtime); - header->size = htonll(*size); - header->sectorsize = htonl(sectorsize); - header->alignment = htonl(alignment); + header->mtime = htobe32(modtime); + header->size = htobe64(*size); + header->sectorsize = htobe32(sectorsize); + header->alignment = htobe32(alignment); header->cow_format = COW_BITMAP; err = cow_write_file(fd, header, sizeof(*header)); @@ -301,8 +300,8 @@ int read_cow_header(int (*reader)(__u64, char *, int, void *), void *arg, magic = header->v1.magic; if (magic == COW_MAGIC) version = header->v1.version; - else if (magic == ntohl(COW_MAGIC)) - version = ntohl(header->v1.version); + else if (magic == be32toh(COW_MAGIC)) + version = be32toh(header->v1.version); /* No error printed because the non-COW case comes through here */ else goto out; @@ -327,9 +326,9 @@ int read_cow_header(int (*reader)(__u64, char *, int, void *), void *arg, "header\n"); goto out; } - *mtime_out = ntohl(header->v2.mtime); - *size_out = ntohll(header->v2.size); - *sectorsize_out = ntohl(header->v2.sectorsize); + *mtime_out = be32toh(header->v2.mtime); + *size_out = be64toh(header->v2.size); + *sectorsize_out = be32toh(header->v2.sectorsize); *bitmap_offset_out = sizeof(header->v2); *align_out = *sectorsize_out; file = header->v2.backing_file; @@ -341,10 +340,10 @@ int read_cow_header(int (*reader)(__u64, char *, int, void *), void *arg, "header\n"); goto out; } - *mtime_out = ntohl(header->v3.mtime); - *size_out = ntohll(header->v3.size); - *sectorsize_out = ntohl(header->v3.sectorsize); - *align_out = ntohl(header->v3.alignment); + *mtime_out = be32toh(header->v3.mtime); + *size_out = be64toh(header->v3.size); + *sectorsize_out = be32toh(header->v3.sectorsize); + *align_out = be32toh(header->v3.alignment); if (*align_out == 0) { cow_printf("read_cow_header - invalid COW header, " "align == 0\n"); @@ -366,16 +365,16 @@ int read_cow_header(int (*reader)(__u64, char *, int, void *), void *arg, * this was used until Dec2005 - 64bits are needed to represent * 2038+. I.e. we can safely do this truncating cast. * - * Additionally, we must use ntohl() instead of ntohll(), since + * Additionally, we must use be32toh() instead of be64toh(), since * the program used to use the former (tested - I got mtime * mismatch "0 vs whatever"). * * Ever heard about bug-to-bug-compatibility ? ;-) */ - *mtime_out = (time32_t) ntohl(header->v3_b.mtime); + *mtime_out = (time32_t) be32toh(header->v3_b.mtime); - *size_out = ntohll(header->v3_b.size); - *sectorsize_out = ntohl(header->v3_b.sectorsize); - *align_out = ntohl(header->v3_b.alignment); + *size_out = be64toh(header->v3_b.size); + *sectorsize_out = be32toh(header->v3_b.sectorsize); + *align_out = be32toh(header->v3_b.alignment); if (*align_out == 0) { cow_printf("read_cow_header - invalid COW header, " "align == 0\n"); diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c index e672bd6..43b39d6 100644 --- a/arch/um/drivers/mconsole_kern.c +++ b/arch/um/drivers/mconsole_kern.c @@ -22,6 +22,7 @@ #include <linux/workqueue.h> #include <linux/mutex.h> #include <asm/uaccess.h> +#include <asm/switch_to.h> #include "init.h" #include "irq_kern.h" diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild index 8419f5c..fff2435 100644 --- a/arch/um/include/asm/Kbuild +++ b/arch/um/include/asm/Kbuild @@ -1,3 +1,4 @@ generic-y += bug.h cputime.h device.h emergency-restart.h futex.h hardirq.h generic-y += hw_irq.h irq_regs.h kdebug.h percpu.h sections.h topology.h xor.h -generic-y += ftrace.h pci.h io.h param.h delay.h mutex.h current.h +generic-y += ftrace.h pci.h io.h param.h delay.h mutex.h current.h exec.h +generic-y += switch_to.h diff --git a/arch/um/kernel/Makefile b/arch/um/kernel/Makefile index 492bc4c..65a1c3d 100644 --- a/arch/um/kernel/Makefile +++ b/arch/um/kernel/Makefile @@ -3,9 +3,10 @@ # Licensed under the GPL # -CPPFLAGS_vmlinux.lds := -DSTART=$(LDS_START) \ - -DELF_ARCH=$(LDS_ELF_ARCH) \ - -DELF_FORMAT=$(LDS_ELF_FORMAT) +CPPFLAGS_vmlinux.lds := -DSTART=$(LDS_START) \ + -DELF_ARCH=$(LDS_ELF_ARCH) \ + -DELF_FORMAT=$(LDS_ELF_FORMAT) \ + $(LDS_EXTRA) extra-y := vmlinux.lds clean-files := diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index f386d04..2b73ded 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -88,11 +88,8 @@ static inline void set_current(struct task_struct *task) extern void arch_switch_to(struct task_struct *to); -void *_switch_to(void *prev, void *next, void *last) +void *__switch_to(struct task_struct *from, struct task_struct *to) { - struct task_struct *from = prev; - struct task_struct *to = next; - to->thread.prev_sched = from; set_current(to); @@ -111,7 +108,6 @@ void *_switch_to(void *prev, void *next, void *last) } while (current->thread.saved_task); return current->thread.prev_sched; - } void interrupt_end(void) diff --git a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c index 4947b31..0a49ef0 100644 --- a/arch/um/kernel/skas/mmu.c +++ b/arch/um/kernel/skas/mmu.c @@ -103,7 +103,6 @@ int init_new_context(struct task_struct *task, struct mm_struct *mm) void uml_setup_stubs(struct mm_struct *mm) { - struct page **pages; int err, ret; if (!skas_needs_stub) diff --git a/arch/x86/Makefile.um b/arch/x86/Makefile.um index 4be406a..36b62bc 100644 --- a/arch/x86/Makefile.um +++ b/arch/x86/Makefile.um @@ -14,6 +14,9 @@ LINK-y += $(call cc-option,-m32) export LDFLAGS +LDS_EXTRA := -Ui386 +export LDS_EXTRA + # First of all, tune CFLAGS for the specific CPU. This actually sets cflags-y. include $(srctree)/arch/x86/Makefile_32.cpu diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 8be5f54..e054459 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -557,6 +557,8 @@ struct __large_struct { unsigned long buf[100]; }; extern unsigned long copy_from_user_nmi(void *to, const void __user *from, unsigned long n); +extern __must_check long +strncpy_from_user(char *dst, const char __user *src, long count); /* * movsl can be slow when source and dest are not both 8-byte aligned diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h index 566e803..8084bc73 100644 --- a/arch/x86/include/asm/uaccess_32.h +++ b/arch/x86/include/asm/uaccess_32.h @@ -213,11 +213,6 @@ static inline unsigned long __must_check copy_from_user(void *to, return n; } -long __must_check strncpy_from_user(char *dst, const char __user *src, - long count); -long __must_check __strncpy_from_user(char *dst, - const char __user *src, long count); - /** * strlen_user: - Get the size of a string in user space. * @str: The string to measure. diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index 1c66d30..fcd4b6f 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -208,10 +208,6 @@ int __copy_in_user(void __user *dst, const void __user *src, unsigned size) } } -__must_check long -strncpy_from_user(char *dst, const char __user *src, long count); -__must_check long -__strncpy_from_user(char *dst, const char __user *src, long count); __must_check long strnlen_user(const char __user *str, long n); __must_check long __strnlen_user(const char __user *str, long n); __must_check long strlen_user(const char __user *str); diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 694d801..b8ba6e4 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -38,6 +38,7 @@ #include <asm/traps.h> #include <asm/desc.h> #include <asm/tlbflush.h> +#include <asm/idle.h> static int kvmapf = 1; @@ -253,7 +254,10 @@ do_async_page_fault(struct pt_regs *regs, unsigned long error_code) kvm_async_pf_task_wait((u32)read_cr2()); break; case KVM_PV_REASON_PAGE_READY: + rcu_irq_enter(); + exit_idle(); kvm_async_pf_task_wake((u32)read_cr2()); + rcu_irq_exit(); break; } } diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index a73f0c1..173df38 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -369,7 +369,7 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data) case MSR_CORE_PERF_FIXED_CTR_CTRL: if (pmu->fixed_ctr_ctrl == data) return 0; - if (!(data & 0xfffffffffffff444)) { + if (!(data & 0xfffffffffffff444ull)) { reprogram_fixed_counters(pmu, data); return 0; } diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 280751c..ad85adf 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3906,7 +3906,9 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid); vmx->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET; + vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); vmx_set_cr0(&vmx->vcpu, kvm_read_cr0(vcpu)); /* enter rmode */ + srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); vmx_set_cr4(&vmx->vcpu, 0); vmx_set_efer(&vmx->vcpu, 0); vmx_fpu_activate(&vmx->vcpu); diff --git a/arch/x86/lib/usercopy.c b/arch/x86/lib/usercopy.c index 97be9cb..57252c9 100644 --- a/arch/x86/lib/usercopy.c +++ b/arch/x86/lib/usercopy.c @@ -7,6 +7,8 @@ #include <linux/highmem.h> #include <linux/module.h> +#include <asm/word-at-a-time.h> + /* * best effort, GUP based copy_from_user() that is NMI-safe */ @@ -41,3 +43,104 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n) return len; } EXPORT_SYMBOL_GPL(copy_from_user_nmi); + +static inline unsigned long count_bytes(unsigned long mask) +{ + mask = (mask - 1) & ~mask; + mask >>= 7; + return count_masked_bytes(mask); +} + +/* + * Do a strncpy, return length of string without final '\0'. + * 'count' is the user-supplied count (return 'count' if we + * hit it), 'max' is the address space maximum (and we return + * -EFAULT if we hit it). + */ +static inline long do_strncpy_from_user(char *dst, const char __user *src, long count, long max) +{ + long res = 0; + + /* + * Truncate 'max' to the user-specified limit, so that + * we only have one limit we need to check in the loop + */ + if (max > count) + max = count; + + while (max >= sizeof(unsigned long)) { + unsigned long c; + + /* Fall back to byte-at-a-time if we get a page fault */ + if (unlikely(__get_user(c,(unsigned long __user *)(src+res)))) + break; + /* This can write a few bytes past the NUL character, but that's ok */ + *(unsigned long *)(dst+res) = c; + c = has_zero(c); + if (c) + return res + count_bytes(c); + res += sizeof(unsigned long); + max -= sizeof(unsigned long); + } + + while (max) { + char c; + + if (unlikely(__get_user(c,src+res))) + return -EFAULT; + dst[res] = c; + if (!c) + return res; + res++; + max--; + } + + /* + * Uhhuh. We hit 'max'. But was that the user-specified maximum + * too? If so, that's ok - we got as much as the user asked for. + */ + if (res >= count) + return count; + + /* + * Nope: we hit the address space limit, and we still had more + * characters the caller would have wanted. That's an EFAULT. + */ + return -EFAULT; +} + +/** + * strncpy_from_user: - Copy a NUL terminated string from userspace. + * @dst: Destination address, in kernel space. This buffer must be at + * least @count bytes long. + * @src: Source address, in user space. + * @count: Maximum number of bytes to copy, including the trailing NUL. + * + * Copies a NUL-terminated string from userspace to kernel space. + * + * On success, returns the length of the string (not including the trailing + * NUL). + * + * If access to userspace fails, returns -EFAULT (some data may have been + * copied). + * + * If @count is smaller than the length of the string, copies @count bytes + * and returns @count. + */ +long +strncpy_from_user(char *dst, const char __user *src, long count) +{ + unsigned long max_addr, src_addr; + + if (unlikely(count <= 0)) + return 0; + + max_addr = current_thread_info()->addr_limit.seg; + src_addr = (unsigned long)src; + if (likely(src_addr < max_addr)) { + unsigned long max = max_addr - src_addr; + return do_strncpy_from_user(dst, src, count, max); + } + return -EFAULT; +} +EXPORT_SYMBOL(strncpy_from_user); diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c index d9b094c..ef2a6a5 100644 --- a/arch/x86/lib/usercopy_32.c +++ b/arch/x86/lib/usercopy_32.c @@ -33,93 +33,6 @@ static inline int __movsl_is_ok(unsigned long a1, unsigned long a2, unsigned lon __movsl_is_ok((unsigned long)(a1), (unsigned long)(a2), (n)) /* - * Copy a null terminated string from userspace. - */ - -#define __do_strncpy_from_user(dst, src, count, res) \ -do { \ - int __d0, __d1, __d2; \ - might_fault(); \ - __asm__ __volatile__( \ - " testl %1,%1\n" \ - " jz 2f\n" \ - "0: lodsb\n" \ - " stosb\n" \ - " testb %%al,%%al\n" \ - " jz 1f\n" \ - " decl %1\n" \ - " jnz 0b\n" \ - "1: subl %1,%0\n" \ - "2:\n" \ - ".section .fixup,\"ax\"\n" \ - "3: movl %5,%0\n" \ - " jmp 2b\n" \ - ".previous\n" \ - _ASM_EXTABLE(0b,3b) \ - : "=&d"(res), "=&c"(count), "=&a" (__d0), "=&S" (__d1), \ - "=&D" (__d2) \ - : "i"(-EFAULT), "0"(count), "1"(count), "3"(src), "4"(dst) \ - : "memory"); \ -} while (0) - -/** - * __strncpy_from_user: - Copy a NUL terminated string from userspace, with less checking. - * @dst: Destination address, in kernel space. This buffer must be at - * least @count bytes long. - * @src: Source address, in user space. - * @count: Maximum number of bytes to copy, including the trailing NUL. - * - * Copies a NUL-terminated string from userspace to kernel space. - * Caller must check the specified block with access_ok() before calling - * this function. - * - * On success, returns the length of the string (not including the trailing - * NUL). - * - * If access to userspace fails, returns -EFAULT (some data may have been - * copied). - * - * If @count is smaller than the length of the string, copies @count bytes - * and returns @count. - */ -long -__strncpy_from_user(char *dst, const char __user *src, long count) -{ - long res; - __do_strncpy_from_user(dst, src, count, res); - return res; -} -EXPORT_SYMBOL(__strncpy_from_user); - -/** - * strncpy_from_user: - Copy a NUL terminated string from userspace. - * @dst: Destination address, in kernel space. This buffer must be at - * least @count bytes long. - * @src: Source address, in user space. - * @count: Maximum number of bytes to copy, including the trailing NUL. - * - * Copies a NUL-terminated string from userspace to kernel space. - * - * On success, returns the length of the string (not including the trailing - * NUL). - * - * If access to userspace fails, returns -EFAULT (some data may have been - * copied). - * - * If @count is smaller than the length of the string, copies @count bytes - * and returns @count. - */ -long -strncpy_from_user(char *dst, const char __user *src, long count) -{ - long res = -EFAULT; - if (access_ok(VERIFY_READ, src, 1)) - __do_strncpy_from_user(dst, src, count, res); - return res; -} -EXPORT_SYMBOL(strncpy_from_user); - -/* * Zero Userspace */ diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c index b7c2849..0d0326f 100644 --- a/arch/x86/lib/usercopy_64.c +++ b/arch/x86/lib/usercopy_64.c @@ -9,55 +9,6 @@ #include <asm/uaccess.h> /* - * Copy a null terminated string from userspace. - */ - -#define __do_strncpy_from_user(dst,src,count,res) \ -do { \ - long __d0, __d1, __d2; \ - might_fault(); \ - __asm__ __volatile__( \ - " testq %1,%1\n" \ - " jz 2f\n" \ - "0: lodsb\n" \ - " stosb\n" \ - " testb %%al,%%al\n" \ - " jz 1f\n" \ - " decq %1\n" \ - " jnz 0b\n" \ - "1: subq %1,%0\n" \ - "2:\n" \ - ".section .fixup,\"ax\"\n" \ - "3: movq %5,%0\n" \ - " jmp 2b\n" \ - ".previous\n" \ - _ASM_EXTABLE(0b,3b) \ - : "=&r"(res), "=&c"(count), "=&a" (__d0), "=&S" (__d1), \ - "=&D" (__d2) \ - : "i"(-EFAULT), "0"(count), "1"(count), "3"(src), "4"(dst) \ - : "memory"); \ -} while (0) - -long -__strncpy_from_user(char *dst, const char __user *src, long count) -{ - long res; - __do_strncpy_from_user(dst, src, count, res); - return res; -} -EXPORT_SYMBOL(__strncpy_from_user); - -long -strncpy_from_user(char *dst, const char __user *src, long count) -{ - long res = -EFAULT; - if (access_ok(VERIFY_READ, src, 1)) - return __strncpy_from_user(dst, src, count); - return res; -} -EXPORT_SYMBOL(strncpy_from_user); - -/* * Zero Userspace */ diff --git a/arch/x86/um/asm/barrier.h b/arch/x86/um/asm/barrier.h new file mode 100644 index 0000000..7d01b8c --- /dev/null +++ b/arch/x86/um/asm/barrier.h @@ -0,0 +1,75 @@ +#ifndef _ASM_UM_BARRIER_H_ +#define _ASM_UM_BARRIER_H_ + +#include <asm/asm.h> +#include <asm/segment.h> +#include <asm/cpufeature.h> +#include <asm/cmpxchg.h> +#include <asm/nops.h> + +#include <linux/kernel.h> +#include <linux/irqflags.h> + +/* + * Force strict CPU ordering. + * And yes, this is required on UP too when we're talking + * to devices. + */ +#ifdef CONFIG_X86_32 + +#define mb() alternative("lock; addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2) +#define rmb() alternative("lock; addl $0,0(%%esp)", "lfence", X86_FEATURE_XMM2) +#define wmb() alternative("lock; addl $0,0(%%esp)", "sfence", X86_FEATURE_XMM) + +#else /* CONFIG_X86_32 */ + +#define mb() asm volatile("mfence" : : : "memory") +#define rmb() asm volatile("lfence" : : : "memory") +#define wmb() asm volatile("sfence" : : : "memory") + +#endif /* CONFIG_X86_32 */ + +#define read_barrier_depends() do { } while (0) + +#ifdef CONFIG_SMP + +#define smp_mb() mb() +#ifdef CONFIG_X86_PPRO_FENCE +#define smp_rmb() rmb() +#else /* CONFIG_X86_PPRO_FENCE */ +#define smp_rmb() barrier() +#endif /* CONFIG_X86_PPRO_FENCE */ + +#ifdef CONFIG_X86_OOSTORE +#define smp_wmb() wmb() +#else /* CONFIG_X86_OOSTORE */ +#define smp_wmb() barrier() +#endif /* CONFIG_X86_OOSTORE */ + +#define smp_read_barrier_depends() read_barrier_depends() +#define set_mb(var, value) do { (void)xchg(&var, value); } while (0) + +#else /* CONFIG_SMP */ + +#define smp_mb() barrier() +#define smp_rmb() barrier() +#define smp_wmb() barrier() +#define smp_read_barrier_depends() do { } while (0) +#define set_mb(var, value) do { var = value; barrier(); } while (0) + +#endif /* CONFIG_SMP */ + +/* + * Stop RDTSC speculation. This is needed when you need to use RDTSC + * (or get_cycles or vread that possibly accesses the TSC) in a defined + * code region. + * + * (Could use an alternative three way for this if there was one.) + */ +static inline void rdtsc_barrier(void) +{ + alternative(ASM_NOP3, "mfence", X86_FEATURE_MFENCE_RDTSC); + alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC); +} + +#endif diff --git a/arch/x86/um/asm/system.h b/arch/x86/um/asm/system.h deleted file mode 100644 index a459fd9..0000000 --- a/arch/x86/um/asm/system.h +++ /dev/null @@ -1,135 +0,0 @@ -#ifndef _ASM_X86_SYSTEM_H_ -#define _ASM_X86_SYSTEM_H_ - -#include <asm/asm.h> -#include <asm/segment.h> -#include <asm/cpufeature.h> -#include <asm/cmpxchg.h> -#include <asm/nops.h> - -#include <linux/kernel.h> -#include <linux/irqflags.h> - -/* entries in ARCH_DLINFO: */ -#ifdef CONFIG_IA32_EMULATION -# define AT_VECTOR_SIZE_ARCH 2 -#else -# define AT_VECTOR_SIZE_ARCH 1 -#endif - -extern unsigned long arch_align_stack(unsigned long sp); - -void default_idle(void); - -/* - * Force strict CPU ordering. - * And yes, this is required on UP too when we're talking - * to devices. - */ -#ifdef CONFIG_X86_32 -/* - * Some non-Intel clones support out of order store. wmb() ceases to be a - * nop for these. - */ -#define mb() alternative("lock; addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2) -#define rmb() alternative("lock; addl $0,0(%%esp)", "lfence", X86_FEATURE_XMM2) -#define wmb() alternative("lock; addl $0,0(%%esp)", "sfence", X86_FEATURE_XMM) -#else -#define mb() asm volatile("mfence":::"memory") -#define rmb() asm volatile("lfence":::"memory") -#define wmb() asm volatile("sfence" ::: "memory") -#endif - -/** - * read_barrier_depends - Flush all pending reads that subsequents reads - * depend on. - * - * No data-dependent reads from memory-like regions are ever reordered - * over this barrier. All reads preceding this primitive are guaranteed - * to access memory (but not necessarily other CPUs' caches) before any - * reads following this primitive that depend on the data return by - * any of the preceding reads. This primitive is much lighter weight than - * rmb() on most CPUs, and is never heavier weight than is - * rmb(). - * - * These ordering constraints are respected by both the local CPU - * and the compiler. - * - * Ordering is not guaranteed by anything other than these primitives, - * not even by data dependencies. See the documentation for - * memory_barrier() for examples and URLs to more information. - * - * For example, the following code would force ordering (the initial - * value of "a" is zero, "b" is one, and "p" is "&a"): - * - * <programlisting> - * CPU 0 CPU 1 - * - * b = 2; - * memory_barrier(); - * p = &b; q = p; - * read_barrier_depends(); - * d = *q; - * </programlisting> - * - * because the read of "*q" depends on the read of "p" and these - * two reads are separated by a read_barrier_depends(). However, - * the following code, with the same initial values for "a" and "b": - * - * <programlisting> - * CPU 0 CPU 1 - * - * a = 2; - * memory_barrier(); - * b = 3; y = b; - * read_barrier_depends(); - * x = a; - * </programlisting> - * - * does not enforce ordering, since there is no data dependency between - * the read of "a" and the read of "b". Therefore, on some CPUs, such - * as Alpha, "y" could be set to 3 and "x" to 0. Use rmb() - * in cases like this where there are no data dependencies. - **/ - -#define read_barrier_depends() do { } while (0) - -#ifdef CONFIG_SMP -#define smp_mb() mb() -#ifdef CONFIG_X86_PPRO_FENCE -# define smp_rmb() rmb() -#else -# define smp_rmb() barrier() -#endif -#ifdef CONFIG_X86_OOSTORE -# define smp_wmb() wmb() -#else -# define smp_wmb() barrier() -#endif -#define smp_read_barrier_depends() read_barrier_depends() -#define set_mb(var, value) do { (void)xchg(&var, value); } while (0) -#else -#define smp_mb() barrier() -#define smp_rmb() barrier() -#define smp_wmb() barrier() -#define smp_read_barrier_depends() do { } while (0) -#define set_mb(var, value) do { var = value; barrier(); } while (0) -#endif - -/* - * Stop RDTSC speculation. This is needed when you need to use RDTSC - * (or get_cycles or vread that possibly accesses the TSC) in a defined - * code region. - * - * (Could use an alternative three way for this if there was one.) - */ -static inline void rdtsc_barrier(void) -{ - alternative(ASM_NOP3, "mfence", X86_FEATURE_MFENCE_RDTSC); - alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC); -} - -extern void *_switch_to(void *prev, void *next, void *last); -#define switch_to(prev, next, last) prev = _switch_to(prev, next, last) - -#endif diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 988828b..b8e2794 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1859,6 +1859,7 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd, #endif /* CONFIG_X86_64 */ static unsigned char dummy_mapping[PAGE_SIZE] __page_aligned_bss; +static unsigned char fake_ioapic_mapping[PAGE_SIZE] __page_aligned_bss; static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) { @@ -1899,7 +1900,7 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) * We just don't map the IO APIC - all access is via * hypercalls. Keep the address in the pte for reference. */ - pte = pfn_pte(PFN_DOWN(__pa(dummy_mapping)), PAGE_KERNEL); + pte = pfn_pte(PFN_DOWN(__pa(fake_ioapic_mapping)), PAGE_KERNEL); break; #endif @@ -2064,6 +2065,7 @@ void __init xen_init_mmu_ops(void) pv_mmu_ops = xen_mmu_ops; memset(dummy_mapping, 0xff, PAGE_SIZE); + memset(fake_ioapic_mapping, 0xfd, PAGE_SIZE); } /* Protected by xen_reservation_lock. */ diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 02900e8..5fac691 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -59,7 +59,7 @@ static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id) static void __cpuinit cpu_bringup(void) { - int cpu = smp_processor_id(); + int cpu; cpu_init(); touch_softlockup_watchdog(); |