diff options
Diffstat (limited to 'arch/x86_64')
28 files changed, 179 insertions, 156 deletions
diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig index 75e52c5..e63323e 100644 --- a/arch/x86_64/Kconfig +++ b/arch/x86_64/Kconfig @@ -65,6 +65,10 @@ config GENERIC_IOMAP bool default y +config ARCH_MAY_HAVE_PC_FDC + bool + default y + source "init/Kconfig" @@ -148,7 +152,6 @@ config X86_CPUID with major 203 and minors 0 to 31 for /dev/cpu/0/cpuid to /dev/cpu/31/cpuid. -# disable it for opteron optimized builds because it pulls in ACPI_BOOT config X86_HT bool depends on SMP && !MK8 @@ -441,6 +444,11 @@ config ISA_DMA_API bool default y +config GENERIC_PENDING_IRQ + bool + depends on GENERIC_HARDIRQS && SMP + default y + menu "Power management options" source kernel/power/Kconfig @@ -465,7 +473,6 @@ config PCI_DIRECT config PCI_MMCONFIG bool "Support mmconfig PCI config space access" depends on PCI && ACPI - select ACPI_BOOT config UNORDERED_IO bool "Unordered IO mapping access" diff --git a/arch/x86_64/boot/setup.S b/arch/x86_64/boot/setup.S index ff58b28..12ea0b6 100644 --- a/arch/x86_64/boot/setup.S +++ b/arch/x86_64/boot/setup.S @@ -81,7 +81,7 @@ start: # This is the setup header, and it must start at %cs:2 (old 0x9020:2) .ascii "HdrS" # header signature - .word 0x0203 # header version number (>= 0x0105) + .word 0x0204 # header version number (>= 0x0105) # or else old loadlin-1.5 will fail) realmode_swtch: .word 0, 0 # default_switch, SETUPSEG start_sys_seg: .word SYSSEG diff --git a/arch/x86_64/boot/tools/build.c b/arch/x86_64/boot/tools/build.c index 18b5bac..c44f5e2 100644 --- a/arch/x86_64/boot/tools/build.c +++ b/arch/x86_64/boot/tools/build.c @@ -178,7 +178,9 @@ int main(int argc, char ** argv) die("Output: seek failed"); buf[0] = (sys_size & 0xff); buf[1] = ((sys_size >> 8) & 0xff); - if (write(1, buf, 2) != 2) + buf[2] = ((sys_size >> 16) & 0xff); + buf[3] = ((sys_size >> 24) & 0xff); + if (write(1, buf, 4) != 4) die("Write of image length failed"); return 0; /* Everything is OK */ diff --git a/arch/x86_64/defconfig b/arch/x86_64/defconfig index b97a61e..bf57e23 100644 --- a/arch/x86_64/defconfig +++ b/arch/x86_64/defconfig @@ -135,8 +135,6 @@ CONFIG_PM_STD_PARTITION="" # ACPI (Advanced Configuration and Power Interface) Support # CONFIG_ACPI=y -CONFIG_ACPI_BOOT=y -CONFIG_ACPI_INTERPRETER=y CONFIG_ACPI_AC=y CONFIG_ACPI_BATTERY=y CONFIG_ACPI_BUTTON=y @@ -151,10 +149,8 @@ CONFIG_ACPI_NUMA=y CONFIG_ACPI_TOSHIBA=y CONFIG_ACPI_BLACKLIST_YEAR=2001 # CONFIG_ACPI_DEBUG is not set -CONFIG_ACPI_BUS=y CONFIG_ACPI_EC=y CONFIG_ACPI_POWER=y -CONFIG_ACPI_PCI=y CONFIG_ACPI_SYSTEM=y # CONFIG_ACPI_CONTAINER is not set diff --git a/arch/x86_64/ia32/ia32entry.S b/arch/x86_64/ia32/ia32entry.S index c45d6a0..f174083 100644 --- a/arch/x86_64/ia32/ia32entry.S +++ b/arch/x86_64/ia32/ia32entry.S @@ -307,7 +307,7 @@ ia32_sys_call_table: .quad stub32_fork .quad sys_read .quad sys_write - .quad sys32_open /* 5 */ + .quad compat_sys_open /* 5 */ .quad sys_close .quad sys32_waitpid .quad sys_creat diff --git a/arch/x86_64/ia32/sys_ia32.c b/arch/x86_64/ia32/sys_ia32.c index be996d1b..04d8040 100644 --- a/arch/x86_64/ia32/sys_ia32.c +++ b/arch/x86_64/ia32/sys_ia32.c @@ -969,32 +969,6 @@ long sys32_kill(int pid, int sig) return sys_kill(pid, sig); } -asmlinkage long sys32_open(const char __user * filename, int flags, int mode) -{ - char * tmp; - int fd, error; - - /* don't force O_LARGEFILE */ - tmp = getname(filename); - fd = PTR_ERR(tmp); - if (!IS_ERR(tmp)) { - fd = get_unused_fd(); - if (fd >= 0) { - struct file *f = filp_open(tmp, flags, mode); - error = PTR_ERR(f); - if (IS_ERR(f)) { - put_unused_fd(fd); - fd = error; - } else { - fsnotify_open(f->f_dentry); - fd_install(fd, f); - } - } - putname(tmp); - } - return fd; -} - extern asmlinkage long sys_timer_create(clockid_t which_clock, struct sigevent __user *timer_event_spec, diff --git a/arch/x86_64/kernel/Makefile b/arch/x86_64/kernel/Makefile index c32e198..1579bdd 100644 --- a/arch/x86_64/kernel/Makefile +++ b/arch/x86_64/kernel/Makefile @@ -12,7 +12,7 @@ obj-y := process.o signal.o entry.o traps.o irq.o \ obj-$(CONFIG_X86_MCE) += mce.o obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o obj-$(CONFIG_MTRR) += ../../i386/kernel/cpu/mtrr/ -obj-$(CONFIG_ACPI_BOOT) += acpi/ +obj-$(CONFIG_ACPI) += acpi/ obj-$(CONFIG_X86_MSR) += msr.o obj-$(CONFIG_MICROCODE) += microcode.o obj-$(CONFIG_X86_CPUID) += cpuid.o diff --git a/arch/x86_64/kernel/acpi/Makefile b/arch/x86_64/kernel/acpi/Makefile index d2c2ee5..7da9ace 100644 --- a/arch/x86_64/kernel/acpi/Makefile +++ b/arch/x86_64/kernel/acpi/Makefile @@ -1,3 +1,3 @@ -obj-$(CONFIG_ACPI_BOOT) := boot.o -boot-$(CONFIG_ACPI_BOOT) := ../../../i386/kernel/acpi/boot.o +obj-y := boot.o +boot-y := ../../../i386/kernel/acpi/boot.o obj-$(CONFIG_ACPI_SLEEP) += sleep.o wakeup.o diff --git a/arch/x86_64/kernel/acpi/sleep.c b/arch/x86_64/kernel/acpi/sleep.c index 7a275de..148f6f7 100644 --- a/arch/x86_64/kernel/acpi/sleep.c +++ b/arch/x86_64/kernel/acpi/sleep.c @@ -47,7 +47,6 @@ #include <asm/proto.h> #include <asm/tlbflush.h> - /* -------------------------------------------------------------------------- Low-Level Sleep Support -------------------------------------------------------------------------- */ @@ -77,11 +76,12 @@ static void init_low_mapping(void) * Create an identity mapped page table and copy the wakeup routine to * low memory. */ -int acpi_save_state_mem (void) +int acpi_save_state_mem(void) { init_low_mapping(); - memcpy((void *) acpi_wakeup_address, &wakeup_start, &wakeup_end - &wakeup_start); + memcpy((void *)acpi_wakeup_address, &wakeup_start, + &wakeup_end - &wakeup_start); acpi_copy_wakeup_routine(acpi_wakeup_address); return 0; @@ -90,7 +90,7 @@ int acpi_save_state_mem (void) /* * acpi_restore_state */ -void acpi_restore_state_mem (void) +void acpi_restore_state_mem(void) { set_pgd(pgd_offset(current->mm, 0UL), low_ptr); flush_tlb_all(); @@ -108,7 +108,8 @@ void __init acpi_reserve_bootmem(void) { acpi_wakeup_address = (unsigned long)alloc_bootmem_low(PAGE_SIZE); if ((&wakeup_end - &wakeup_start) > PAGE_SIZE) - printk(KERN_CRIT "ACPI: Wakeup code way too big, will crash on attempt to suspend\n"); + printk(KERN_CRIT + "ACPI: Wakeup code way too big, will crash on attempt to suspend\n"); } static int __init acpi_sleep_setup(char *str) @@ -127,6 +128,8 @@ static int __init acpi_sleep_setup(char *str) __setup("acpi_sleep=", acpi_sleep_setup); -#endif /*CONFIG_ACPI_SLEEP*/ +#endif /*CONFIG_ACPI_SLEEP */ -void acpi_pci_link_exit(void) {} +void acpi_pci_link_exit(void) +{ +} diff --git a/arch/x86_64/kernel/e820.c b/arch/x86_64/kernel/e820.c index b548dea..116ac5f 100644 --- a/arch/x86_64/kernel/e820.c +++ b/arch/x86_64/kernel/e820.c @@ -85,7 +85,7 @@ int __init e820_mapped(unsigned long start, unsigned long end, unsigned type) struct e820entry *ei = &e820.map[i]; if (type && ei->type != type) continue; - if (ei->addr >= end || ei->addr + ei->size < start) + if (ei->addr >= end || ei->addr + ei->size <= start) continue; return 1; } diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S index 096d470..be51dbe 100644 --- a/arch/x86_64/kernel/entry.S +++ b/arch/x86_64/kernel/entry.S @@ -784,8 +784,9 @@ ENTRY(execve) ret CFI_ENDPROC -ENTRY(page_fault) +KPROBE_ENTRY(page_fault) errorentry do_page_fault + .previous .text ENTRY(coprocessor_error) zeroentry do_coprocessor_error @@ -797,13 +798,14 @@ ENTRY(device_not_available) zeroentry math_state_restore /* runs on exception stack */ -ENTRY(debug) +KPROBE_ENTRY(debug) CFI_STARTPROC pushq $0 CFI_ADJUST_CFA_OFFSET 8 paranoidentry do_debug jmp paranoid_exit CFI_ENDPROC + .previous .text /* runs on exception stack */ ENTRY(nmi) @@ -854,8 +856,9 @@ paranoid_schedule: jmp paranoid_userspace CFI_ENDPROC -ENTRY(int3) +KPROBE_ENTRY(int3) zeroentry do_int3 + .previous .text ENTRY(overflow) zeroentry do_overflow @@ -892,8 +895,9 @@ ENTRY(stack_segment) jmp paranoid_exit CFI_ENDPROC -ENTRY(general_protection) +KPROBE_ENTRY(general_protection) errorentry do_general_protection + .previous .text ENTRY(alignment_check) errorentry do_alignment_check diff --git a/arch/x86_64/kernel/genapic.c b/arch/x86_64/kernel/genapic.c index 30c843a..b1c144f 100644 --- a/arch/x86_64/kernel/genapic.c +++ b/arch/x86_64/kernel/genapic.c @@ -20,12 +20,12 @@ #include <asm/smp.h> #include <asm/ipi.h> -#if defined(CONFIG_ACPI_BUS) +#if defined(CONFIG_ACPI) #include <acpi/acpi_bus.h> #endif /* which logical CPU number maps to which CPU (physical APIC ID) */ -u8 x86_cpu_to_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID }; +u8 x86_cpu_to_apicid[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID }; EXPORT_SYMBOL(x86_cpu_to_apicid); u8 x86_cpu_to_log_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID }; @@ -47,7 +47,7 @@ void __init clustered_apic_check(void) u8 cluster_cnt[NUM_APIC_CLUSTERS]; int num_cpus = 0; -#if defined(CONFIG_ACPI_BUS) +#if defined(CONFIG_ACPI) /* * Some x86_64 machines use physical APIC mode regardless of how many * procs/clusters are present (x86_64 ES7000 is an example). diff --git a/arch/x86_64/kernel/genapic_cluster.c b/arch/x86_64/kernel/genapic_cluster.c index 9703da7..f6523dd 100644 --- a/arch/x86_64/kernel/genapic_cluster.c +++ b/arch/x86_64/kernel/genapic_cluster.c @@ -72,10 +72,14 @@ static void cluster_send_IPI_mask(cpumask_t mask, int vector) static void cluster_send_IPI_allbutself(int vector) { cpumask_t mask = cpu_online_map; - cpu_clear(smp_processor_id(), mask); + int me = get_cpu(); /* Ensure we are not preempted when we clear */ + + cpu_clear(me, mask); if (!cpus_empty(mask)) cluster_send_IPI_mask(mask, vector); + + put_cpu(); } static void cluster_send_IPI_all(int vector) diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c index d206d7e..40e0aca 100644 --- a/arch/x86_64/kernel/io_apic.c +++ b/arch/x86_64/kernel/io_apic.c @@ -70,7 +70,7 @@ static struct irq_pin_list { short apic, pin, next; } irq_2_pin[PIN_MAP_SIZE]; -int vector_irq[NR_VECTORS] = { [0 ... NR_VECTORS - 1] = -1}; +int vector_irq[NR_VECTORS] __read_mostly = { [0 ... NR_VECTORS - 1] = -1}; #ifdef CONFIG_PCI_MSI #define vector_to_irq(vector) \ (platform_legacy_irq(vector) ? vector : vector_irq[vector]) @@ -78,6 +78,54 @@ int vector_irq[NR_VECTORS] = { [0 ... NR_VECTORS - 1] = -1}; #define vector_to_irq(vector) (vector) #endif +#define __DO_ACTION(R, ACTION, FINAL) \ + \ +{ \ + int pin; \ + struct irq_pin_list *entry = irq_2_pin + irq; \ + \ + for (;;) { \ + unsigned int reg; \ + pin = entry->pin; \ + if (pin == -1) \ + break; \ + reg = io_apic_read(entry->apic, 0x10 + R + pin*2); \ + reg ACTION; \ + io_apic_modify(entry->apic, reg); \ + if (!entry->next) \ + break; \ + entry = irq_2_pin + entry->next; \ + } \ + FINAL; \ +} + +#ifdef CONFIG_SMP +static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) +{ + unsigned long flags; + unsigned int dest; + cpumask_t tmp; + + cpus_and(tmp, mask, cpu_online_map); + if (cpus_empty(tmp)) + tmp = TARGET_CPUS; + + cpus_and(mask, tmp, CPU_MASK_ALL); + + dest = cpu_mask_to_apicid(mask); + + /* + * Only the high 8 bits are valid. + */ + dest = SET_APIC_LOGICAL_ID(dest); + + spin_lock_irqsave(&ioapic_lock, flags); + __DO_ACTION(1, = dest, ) + set_irq_info(irq, mask); + spin_unlock_irqrestore(&ioapic_lock, flags); +} +#endif + /* * The common case is 1:1 IRQ<->pin mappings. Sometimes there are * shared ISA-space IRQs, so we have to support them. We are super @@ -101,26 +149,6 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin) entry->pin = pin; } -#define __DO_ACTION(R, ACTION, FINAL) \ - \ -{ \ - int pin; \ - struct irq_pin_list *entry = irq_2_pin + irq; \ - \ - for (;;) { \ - unsigned int reg; \ - pin = entry->pin; \ - if (pin == -1) \ - break; \ - reg = io_apic_read(entry->apic, 0x10 + R + pin*2); \ - reg ACTION; \ - io_apic_modify(entry->apic, reg); \ - if (!entry->next) \ - break; \ - entry = irq_2_pin + entry->next; \ - } \ - FINAL; \ -} #define DO_ACTION(name,R,ACTION, FINAL) \ \ @@ -655,7 +683,7 @@ static inline int IO_APIC_irq_trigger(int irq) } /* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */ -u8 irq_vector[NR_IRQ_VECTORS] = { FIRST_DEVICE_VECTOR , 0 }; +u8 irq_vector[NR_IRQ_VECTORS] __read_mostly = { FIRST_DEVICE_VECTOR , 0 }; int assign_irq_vector(int irq) { @@ -767,6 +795,7 @@ static void __init setup_IO_APIC_irqs(void) spin_lock_irqsave(&ioapic_lock, flags); io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1)); io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0)); + set_native_irq_info(irq, TARGET_CPUS); spin_unlock_irqrestore(&ioapic_lock, flags); } } @@ -1314,6 +1343,7 @@ static unsigned int startup_edge_ioapic_irq(unsigned int irq) */ static void ack_edge_ioapic_irq(unsigned int irq) { + move_irq(irq); if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED)) == (IRQ_PENDING | IRQ_DISABLED)) mask_IO_APIC_irq(irq); @@ -1343,26 +1373,10 @@ static unsigned int startup_level_ioapic_irq (unsigned int irq) static void end_level_ioapic_irq (unsigned int irq) { + move_irq(irq); ack_APIC_irq(); } -static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) -{ - unsigned long flags; - unsigned int dest; - - dest = cpu_mask_to_apicid(mask); - - /* - * Only the high 8 bits are valid. - */ - dest = SET_APIC_LOGICAL_ID(dest); - - spin_lock_irqsave(&ioapic_lock, flags); - __DO_ACTION(1, = dest, ) - spin_unlock_irqrestore(&ioapic_lock, flags); -} - #ifdef CONFIG_PCI_MSI static unsigned int startup_edge_ioapic_vector(unsigned int vector) { @@ -1375,6 +1389,7 @@ static void ack_edge_ioapic_vector(unsigned int vector) { int irq = vector_to_irq(vector); + move_native_irq(vector); ack_edge_ioapic_irq(irq); } @@ -1389,6 +1404,7 @@ static void end_level_ioapic_vector (unsigned int vector) { int irq = vector_to_irq(vector); + move_native_irq(vector); end_level_ioapic_irq(irq); } @@ -1406,14 +1422,17 @@ static void unmask_IO_APIC_vector (unsigned int vector) unmask_IO_APIC_irq(irq); } +#ifdef CONFIG_SMP static void set_ioapic_affinity_vector (unsigned int vector, cpumask_t cpu_mask) { int irq = vector_to_irq(vector); + set_native_irq_info(vector, cpu_mask); set_ioapic_affinity_irq(irq, cpu_mask); } -#endif +#endif // CONFIG_SMP +#endif // CONFIG_PCI_MSI /* * Level and edge triggered IO-APIC interrupts need different handling, @@ -1424,7 +1443,7 @@ static void set_ioapic_affinity_vector (unsigned int vector, * races. */ -static struct hw_interrupt_type ioapic_edge_type = { +static struct hw_interrupt_type ioapic_edge_type __read_mostly = { .typename = "IO-APIC-edge", .startup = startup_edge_ioapic, .shutdown = shutdown_edge_ioapic, @@ -1432,10 +1451,12 @@ static struct hw_interrupt_type ioapic_edge_type = { .disable = disable_edge_ioapic, .ack = ack_edge_ioapic, .end = end_edge_ioapic, +#ifdef CONFIG_SMP .set_affinity = set_ioapic_affinity, +#endif }; -static struct hw_interrupt_type ioapic_level_type = { +static struct hw_interrupt_type ioapic_level_type __read_mostly = { .typename = "IO-APIC-level", .startup = startup_level_ioapic, .shutdown = shutdown_level_ioapic, @@ -1443,7 +1464,9 @@ static struct hw_interrupt_type ioapic_level_type = { .disable = disable_level_ioapic, .ack = mask_and_ack_level_ioapic, .end = end_level_ioapic, +#ifdef CONFIG_SMP .set_affinity = set_ioapic_affinity, +#endif }; static inline void init_IO_APIC_traps(void) @@ -1506,7 +1529,7 @@ static void ack_lapic_irq (unsigned int irq) static void end_lapic_irq (unsigned int i) { /* nothing */ } -static struct hw_interrupt_type lapic_irq_type = { +static struct hw_interrupt_type lapic_irq_type __read_mostly = { .typename = "local-APIC-edge", .startup = NULL, /* startup_irq() not used for IRQ0 */ .shutdown = NULL, /* shutdown_irq() not used for IRQ0 */ @@ -1841,7 +1864,7 @@ device_initcall(ioapic_init_sysfs); ACPI-based IOAPIC Configuration -------------------------------------------------------------------------- */ -#ifdef CONFIG_ACPI_BOOT +#ifdef CONFIG_ACPI #define IO_APIC_MAX_ID 0xFE @@ -1918,12 +1941,13 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int a spin_lock_irqsave(&ioapic_lock, flags); io_apic_write(ioapic, 0x11+2*pin, *(((int *)&entry)+1)); io_apic_write(ioapic, 0x10+2*pin, *(((int *)&entry)+0)); + set_native_irq_info(use_pci_vector() ? entry.vector : irq, TARGET_CPUS); spin_unlock_irqrestore(&ioapic_lock, flags); return 0; } -#endif /*CONFIG_ACPI_BOOT*/ +#endif /* CONFIG_ACPI */ /* @@ -1931,6 +1955,7 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int a * we need to reprogram the ioredtbls to cater for the cpus which have come online * so mask in all cases should simply be TARGET_CPUS */ +#ifdef CONFIG_SMP void __init setup_ioapic_dest(void) { int pin, ioapic, irq, irq_entry; @@ -1949,3 +1974,4 @@ void __init setup_ioapic_dest(void) } } +#endif diff --git a/arch/x86_64/kernel/kprobes.c b/arch/x86_64/kernel/kprobes.c index 5c6dc70..df08c43 100644 --- a/arch/x86_64/kernel/kprobes.c +++ b/arch/x86_64/kernel/kprobes.c @@ -74,7 +74,7 @@ static inline int is_IF_modifier(kprobe_opcode_t *insn) return 0; } -int arch_prepare_kprobe(struct kprobe *p) +int __kprobes arch_prepare_kprobe(struct kprobe *p) { /* insn: must be on special executable page on x86_64. */ up(&kprobe_mutex); @@ -189,7 +189,7 @@ static inline s32 *is_riprel(u8 *insn) return NULL; } -void arch_copy_kprobe(struct kprobe *p) +void __kprobes arch_copy_kprobe(struct kprobe *p) { s32 *ripdisp; memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE); @@ -215,21 +215,21 @@ void arch_copy_kprobe(struct kprobe *p) p->opcode = *p->addr; } -void arch_arm_kprobe(struct kprobe *p) +void __kprobes arch_arm_kprobe(struct kprobe *p) { *p->addr = BREAKPOINT_INSTRUCTION; flush_icache_range((unsigned long) p->addr, (unsigned long) p->addr + sizeof(kprobe_opcode_t)); } -void arch_disarm_kprobe(struct kprobe *p) +void __kprobes arch_disarm_kprobe(struct kprobe *p) { *p->addr = p->opcode; flush_icache_range((unsigned long) p->addr, (unsigned long) p->addr + sizeof(kprobe_opcode_t)); } -void arch_remove_kprobe(struct kprobe *p) +void __kprobes arch_remove_kprobe(struct kprobe *p) { up(&kprobe_mutex); free_insn_slot(p->ainsn.insn); @@ -261,7 +261,7 @@ static inline void set_current_kprobe(struct kprobe *p, struct pt_regs *regs) kprobe_saved_rflags &= ~IF_MASK; } -static void prepare_singlestep(struct kprobe *p, struct pt_regs *regs) +static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs) { regs->eflags |= TF_MASK; regs->eflags &= ~IF_MASK; @@ -272,7 +272,8 @@ static void prepare_singlestep(struct kprobe *p, struct pt_regs *regs) regs->rip = (unsigned long)p->ainsn.insn; } -void arch_prepare_kretprobe(struct kretprobe *rp, struct pt_regs *regs) +void __kprobes arch_prepare_kretprobe(struct kretprobe *rp, + struct pt_regs *regs) { unsigned long *sara = (unsigned long *)regs->rsp; struct kretprobe_instance *ri; @@ -295,7 +296,7 @@ void arch_prepare_kretprobe(struct kretprobe *rp, struct pt_regs *regs) * Interrupts are disabled on entry as trap3 is an interrupt gate and they * remain disabled thorough out this function. */ -int kprobe_handler(struct pt_regs *regs) +int __kprobes kprobe_handler(struct pt_regs *regs) { struct kprobe *p; int ret = 0; @@ -310,7 +311,8 @@ int kprobe_handler(struct pt_regs *regs) Disarm the probe we just hit, and ignore it. */ p = get_kprobe(addr); if (p) { - if (kprobe_status == KPROBE_HIT_SS) { + if (kprobe_status == KPROBE_HIT_SS && + *p->ainsn.insn == BREAKPOINT_INSTRUCTION) { regs->eflags &= ~TF_MASK; regs->eflags |= kprobe_saved_rflags; unlock_kprobes(); @@ -360,7 +362,10 @@ int kprobe_handler(struct pt_regs *regs) * either a probepoint or a debugger breakpoint * at this address. In either case, no further * handling of this interrupt is appropriate. + * Back up over the (now missing) int3 and run + * the original instruction. */ + regs->rip = (unsigned long)addr; ret = 1; } /* Not one of ours: let kernel handle it */ @@ -399,7 +404,7 @@ no_kprobe: /* * Called when we hit the probe point at kretprobe_trampoline */ -int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) +int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) { struct kretprobe_instance *ri = NULL; struct hlist_head *head; @@ -478,7 +483,7 @@ int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) * that is atop the stack is the address following the copied instruction. * We need to make it the address following the original instruction. */ -static void resume_execution(struct kprobe *p, struct pt_regs *regs) +static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs) { unsigned long *tos = (unsigned long *)regs->rsp; unsigned long next_rip = 0; @@ -536,7 +541,7 @@ static void resume_execution(struct kprobe *p, struct pt_regs *regs) * Interrupts are disabled on entry as trap1 is an interrupt gate and they * remain disabled thoroughout this function. And we hold kprobe lock. */ -int post_kprobe_handler(struct pt_regs *regs) +int __kprobes post_kprobe_handler(struct pt_regs *regs) { if (!kprobe_running()) return 0; @@ -571,7 +576,7 @@ out: } /* Interrupts disabled, kprobe_lock held. */ -int kprobe_fault_handler(struct pt_regs *regs, int trapnr) +int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) { if (current_kprobe->fault_handler && current_kprobe->fault_handler(current_kprobe, regs, trapnr)) @@ -590,8 +595,8 @@ int kprobe_fault_handler(struct pt_regs *regs, int trapnr) /* * Wrapper routine for handling exceptions. */ -int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, - void *data) +int __kprobes kprobe_exceptions_notify(struct notifier_block *self, + unsigned long val, void *data) { struct die_args *args = (struct die_args *)data; switch (val) { @@ -619,7 +624,7 @@ int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, return NOTIFY_DONE; } -int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) +int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) { struct jprobe *jp = container_of(p, struct jprobe, kp); unsigned long addr; @@ -640,7 +645,7 @@ int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) return 1; } -void jprobe_return(void) +void __kprobes jprobe_return(void) { preempt_enable_no_resched(); asm volatile (" xchg %%rbx,%%rsp \n" @@ -651,7 +656,7 @@ void jprobe_return(void) (jprobe_saved_rsp):"memory"); } -int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) +int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) { u8 *addr = (u8 *) (regs->rip - 1); unsigned long stack_addr = (unsigned long)jprobe_saved_rsp; diff --git a/arch/x86_64/kernel/mpparse.c b/arch/x86_64/kernel/mpparse.c index 79c362d..8d8ed6a 100644 --- a/arch/x86_64/kernel/mpparse.c +++ b/arch/x86_64/kernel/mpparse.c @@ -74,7 +74,7 @@ static unsigned int num_processors = 0; physid_mask_t phys_cpu_present_map = PHYSID_MASK_NONE; /* ACPI MADT entry parsing functions */ -#ifdef CONFIG_ACPI_BOOT +#ifdef CONFIG_ACPI extern struct acpi_boot_flags acpi_boot; #ifdef CONFIG_X86_LOCAL_APIC extern int acpi_parse_lapic (acpi_table_entry_header *header); @@ -84,7 +84,7 @@ extern int acpi_parse_lapic_nmi (acpi_table_entry_header *header); #ifdef CONFIG_X86_IO_APIC extern int acpi_parse_ioapic (acpi_table_entry_header *header); #endif /*CONFIG_X86_IO_APIC*/ -#endif /*CONFIG_ACPI_BOOT*/ +#endif /*CONFIG_ACPI*/ u8 bios_cpu_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID }; @@ -519,8 +519,6 @@ void __init get_smp_config (void) struct intel_mp_floating *mpf = mpf_found; /* - * ACPI may be used to obtain the entire SMP configuration or just to - * enumerate/configure processors (CONFIG_ACPI_BOOT). Note that * ACPI supports both logical (e.g. Hyper-Threading) and physical * processors, where MPS only supports physical. */ @@ -673,7 +671,7 @@ void __init find_smp_config (void) ACPI-based MP Configuration -------------------------------------------------------------------------- */ -#ifdef CONFIG_ACPI_BOOT +#ifdef CONFIG_ACPI void __init mp_register_lapic_address ( u64 address) @@ -929,11 +927,9 @@ int mp_register_gsi(u32 gsi, int edge_level, int active_high_low) if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC) return gsi; -#ifdef CONFIG_ACPI_BUS /* Don't set up the ACPI SCI because it's already set up */ if (acpi_fadt.sci_int == gsi) return gsi; -#endif ioapic = mp_find_ioapic(gsi); if (ioapic < 0) { @@ -973,13 +969,11 @@ int mp_register_gsi(u32 gsi, int edge_level, int active_high_low) if (gsi < MAX_GSI_NUM) { if (gsi > 15) gsi = pci_irq++; -#ifdef CONFIG_ACPI_BUS /* * Don't assign IRQ used by ACPI SCI */ if (gsi == acpi_fadt.sci_int) gsi = pci_irq++; -#endif gsi_to_irq[irq] = gsi; } else { printk(KERN_ERR "GSI %u is too high\n", gsi); @@ -994,4 +988,4 @@ int mp_register_gsi(u32 gsi, int edge_level, int active_high_low) } #endif /*CONFIG_X86_IO_APIC*/ -#endif /*CONFIG_ACPI_BOOT*/ +#endif /*CONFIG_ACPI*/ diff --git a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c index 64a8e05..caf1649 100644 --- a/arch/x86_64/kernel/nmi.c +++ b/arch/x86_64/kernel/nmi.c @@ -463,6 +463,8 @@ void touch_nmi_watchdog (void) */ for (i = 0; i < NR_CPUS; i++) per_cpu(nmi_touch, i) = 1; + + touch_softlockup_watchdog(); } void nmi_watchdog_tick (struct pt_regs * regs, unsigned reason) @@ -522,14 +524,14 @@ asmlinkage void do_nmi(struct pt_regs * regs, long error_code) nmi_enter(); add_pda(__nmi_count,1); - if (!nmi_callback(regs, cpu)) + if (!rcu_dereference(nmi_callback)(regs, cpu)) default_do_nmi(regs); nmi_exit(); } void set_nmi_callback(nmi_callback_t callback) { - nmi_callback = callback; + rcu_assign_pointer(nmi_callback, callback); } void unset_nmi_callback(void) diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c index 7577f9d..8661f82 100644 --- a/arch/x86_64/kernel/process.c +++ b/arch/x86_64/kernel/process.c @@ -310,6 +310,7 @@ void __show_regs(struct pt_regs * regs) void show_regs(struct pt_regs *regs) { + printk("CPU %d:", smp_processor_id()); __show_regs(regs); show_trace(®s->rsp); } diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index 116a491..0511d80 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c @@ -65,13 +65,13 @@ * Machine setup.. */ -struct cpuinfo_x86 boot_cpu_data; +struct cpuinfo_x86 boot_cpu_data __read_mostly; unsigned long mmu_cr4_features; int acpi_disabled; EXPORT_SYMBOL(acpi_disabled); -#ifdef CONFIG_ACPI_BOOT +#ifdef CONFIG_ACPI extern int __initdata acpi_ht; extern acpi_interrupt_flags acpi_sci_flags; int __initdata acpi_force = 0; @@ -294,7 +294,7 @@ static __init void parse_cmdline_early (char ** cmdline_p) maxcpus = simple_strtoul(from + 8, NULL, 0); } #endif -#ifdef CONFIG_ACPI_BOOT +#ifdef CONFIG_ACPI /* "acpi=off" disables both ACPI table parsing and interpreter init */ if (!memcmp(from, "acpi=off", 8)) disable_acpi(); @@ -566,7 +566,7 @@ void __init setup_arch(char **cmdline_p) init_memory_mapping(0, (end_pfn_map << PAGE_SHIFT)); -#ifdef CONFIG_ACPI_BOOT +#ifdef CONFIG_ACPI /* * Initialize the ACPI boot-time table parser (gets the RSDP and SDT). * Call this early for SRAT node setup. @@ -658,7 +658,7 @@ void __init setup_arch(char **cmdline_p) check_ioapic(); -#ifdef CONFIG_ACPI_BOOT +#ifdef CONFIG_ACPI /* * Read APIC and some other early information from ACPI tables. */ diff --git a/arch/x86_64/kernel/setup64.c b/arch/x86_64/kernel/setup64.c index 34082c1..e3ffcac 100644 --- a/arch/x86_64/kernel/setup64.c +++ b/arch/x86_64/kernel/setup64.c @@ -36,7 +36,7 @@ struct desc_ptr idt_descr = { 256 * 16, (unsigned long) idt_table }; char boot_cpu_stack[IRQSTACKSIZE] __attribute__((section(".bss.page_aligned"))); -unsigned long __supported_pte_mask = ~0UL; +unsigned long __supported_pte_mask __read_mostly = ~0UL; static int do_not_nx __initdata = 0; /* noexec=on|off diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c index fa25e39..90aeccd 100644 --- a/arch/x86_64/kernel/smpboot.c +++ b/arch/x86_64/kernel/smpboot.c @@ -62,13 +62,13 @@ /* Number of siblings per CPU package */ int smp_num_siblings = 1; /* Package ID of each logical CPU */ -u8 phys_proc_id[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID }; -u8 cpu_core_id[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID }; +u8 phys_proc_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID }; +u8 cpu_core_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID }; EXPORT_SYMBOL(phys_proc_id); EXPORT_SYMBOL(cpu_core_id); /* Bitmask of currently online CPUs */ -cpumask_t cpu_online_map; +cpumask_t cpu_online_map __read_mostly; EXPORT_SYMBOL(cpu_online_map); @@ -88,8 +88,8 @@ struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned; /* Set when the idlers are all forked */ int smp_threads_ready; -cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned; -cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned; +cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly; +cpumask_t cpu_core_map[NR_CPUS] __read_mostly; EXPORT_SYMBOL(cpu_core_map); /* diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c index 66bf6dd..7b6abe0 100644 --- a/arch/x86_64/kernel/time.c +++ b/arch/x86_64/kernel/time.c @@ -176,10 +176,7 @@ int do_settimeofday(struct timespec *tv) set_normalized_timespec(&xtime, sec, nsec); set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec); - time_adjust = 0; /* stop active adjtime() */ - time_status |= STA_UNSYNC; - time_maxerror = NTP_PHASE_LIMIT; - time_esterror = NTP_PHASE_LIMIT; + ntp_clear(); write_sequnlock_irq(&xtime_lock); clock_was_set(); @@ -471,7 +468,7 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) * off) isn't likely to go away much sooner anyway. */ - if ((~time_status & STA_UNSYNC) && xtime.tv_sec > rtc_update && + if (ntp_synced() && xtime.tv_sec > rtc_update && abs(xtime.tv_nsec - 500000000) <= tick_nsec / 2) { set_rtc_mmss(xtime.tv_sec); rtc_update = xtime.tv_sec + 660; @@ -1041,6 +1038,7 @@ static int timer_resume(struct sys_device *dev) write_sequnlock_irqrestore(&xtime_lock,flags); jiffies += sleep_length; wall_jiffies += sleep_length; + touch_softlockup_watchdog(); return 0; } diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c index 6ead433..f238d60 100644 --- a/arch/x86_64/kernel/traps.c +++ b/arch/x86_64/kernel/traps.c @@ -29,6 +29,7 @@ #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/nmi.h> +#include <linux/kprobes.h> #include <asm/system.h> #include <asm/uaccess.h> @@ -419,8 +420,9 @@ void die_nmi(char *str, struct pt_regs *regs) do_exit(SIGSEGV); } -static void do_trap(int trapnr, int signr, char *str, - struct pt_regs * regs, long error_code, siginfo_t *info) +static void __kprobes do_trap(int trapnr, int signr, char *str, + struct pt_regs * regs, long error_code, + siginfo_t *info) { conditional_sti(regs); @@ -504,7 +506,8 @@ DO_ERROR(18, SIGSEGV, "reserved", reserved) DO_ERROR(12, SIGBUS, "stack segment", stack_segment) DO_ERROR( 8, SIGSEGV, "double fault", double_fault) -asmlinkage void do_general_protection(struct pt_regs * regs, long error_code) +asmlinkage void __kprobes do_general_protection(struct pt_regs * regs, + long error_code) { conditional_sti(regs); @@ -622,7 +625,7 @@ asmlinkage void default_do_nmi(struct pt_regs *regs) io_check_error(reason, regs); } -asmlinkage void do_int3(struct pt_regs * regs, long error_code) +asmlinkage void __kprobes do_int3(struct pt_regs * regs, long error_code) { if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) == NOTIFY_STOP) { return; @@ -653,7 +656,8 @@ asmlinkage struct pt_regs *sync_regs(struct pt_regs *eregs) } /* runs on IST stack. */ -asmlinkage void do_debug(struct pt_regs * regs, unsigned long error_code) +asmlinkage void __kprobes do_debug(struct pt_regs * regs, + unsigned long error_code) { unsigned long condition; struct task_struct *tsk = current; diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S index 2a94f9b..d4abb07 100644 --- a/arch/x86_64/kernel/vmlinux.lds.S +++ b/arch/x86_64/kernel/vmlinux.lds.S @@ -21,6 +21,7 @@ SECTIONS *(.text) SCHED_TEXT LOCK_TEXT + KPROBES_TEXT *(.fixup) *(.gnu.warning) } = 0x9090 diff --git a/arch/x86_64/mm/fault.c b/arch/x86_64/mm/fault.c index ca914c3..816732d 100644 --- a/arch/x86_64/mm/fault.c +++ b/arch/x86_64/mm/fault.c @@ -23,6 +23,7 @@ #include <linux/vt_kern.h> /* For unblank_screen() */ #include <linux/compiler.h> #include <linux/module.h> +#include <linux/kprobes.h> #include <asm/system.h> #include <asm/uaccess.h> @@ -294,7 +295,8 @@ int exception_trace = 1; * bit 2 == 0 means kernel, 1 means user-mode * bit 3 == 1 means fault was an instruction fetch */ -asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code) +asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, + unsigned long error_code) { struct task_struct *tsk; struct mm_struct *mm; diff --git a/arch/x86_64/mm/numa.c b/arch/x86_64/mm/numa.c index 6a156f5..04f7a33 100644 --- a/arch/x86_64/mm/numa.c +++ b/arch/x86_64/mm/numa.c @@ -22,14 +22,14 @@ #define Dprintk(x...) #endif -struct pglist_data *node_data[MAX_NUMNODES]; +struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; bootmem_data_t plat_node_bdata[MAX_NUMNODES]; int memnode_shift; u8 memnodemap[NODEMAPSIZE]; -unsigned char cpu_to_node[NR_CPUS] = { [0 ... NR_CPUS-1] = NUMA_NO_NODE }; -cpumask_t node_to_cpumask[MAX_NUMNODES]; +unsigned char cpu_to_node[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = NUMA_NO_NODE }; +cpumask_t node_to_cpumask[MAX_NUMNODES] __read_mostly; int numa_off __initdata; diff --git a/arch/x86_64/pci/Makefile b/arch/x86_64/pci/Makefile index 37c92e8..bb34e5e 100644 --- a/arch/x86_64/pci/Makefile +++ b/arch/x86_64/pci/Makefile @@ -8,7 +8,7 @@ CFLAGS += -Iarch/i386/pci obj-y := i386.o obj-$(CONFIG_PCI_DIRECT)+= direct.o obj-y += fixup.o -obj-$(CONFIG_ACPI_PCI) += acpi.o +obj-$(CONFIG_ACPI) += acpi.o obj-y += legacy.o irq.o common.o # mmconfig has a 64bit special obj-$(CONFIG_PCI_MMCONFIG) += mmconfig.o diff --git a/arch/x86_64/pci/Makefile-BUS b/arch/x86_64/pci/Makefile-BUS index 291985f..4f0c05a 100644 --- a/arch/x86_64/pci/Makefile-BUS +++ b/arch/x86_64/pci/Makefile-BUS @@ -8,7 +8,7 @@ CFLAGS += -I arch/i386/pci obj-y := i386.o obj-$(CONFIG_PCI_DIRECT)+= direct.o obj-y += fixup.o -obj-$(CONFIG_ACPI_PCI) += acpi.o +obj-$(CONFIG_ACPI) += acpi.o obj-y += legacy.o irq.o common.o # mmconfig has a 64bit special obj-$(CONFIG_PCI_MMCONFIG) += mmconfig.o |