diff options
Diffstat (limited to 'arch/x86_64')
35 files changed, 1826 insertions, 423 deletions
diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig index e18eb79..4310b4a 100644 --- a/arch/x86_64/Kconfig +++ b/arch/x86_64/Kconfig @@ -45,6 +45,10 @@ config RWSEM_GENERIC_SPINLOCK config RWSEM_XCHGADD_ALGORITHM bool +config GENERIC_HWEIGHT + bool + default y + config GENERIC_CALIBRATE_DELAY bool default y @@ -246,6 +250,15 @@ config SCHED_SMT cost of slightly increased overhead in some places. If unsure say N here. +config SCHED_MC + bool "Multi-core scheduler support" + depends on SMP + default y + help + Multi-core scheduler support improves the CPU scheduler's decision + making when dealing with multi-core CPU chips at a cost of slightly + increased overhead in some places. If unsure say N here. + source "kernel/Kconfig.preempt" config NUMA @@ -321,9 +334,13 @@ config HAVE_ARCH_EARLY_PFN_TO_NID def_bool y depends on NUMA +config OUT_OF_LINE_PFN_TO_PAGE + def_bool y + depends on DISCONTIGMEM + config NR_CPUS int "Maximum number of CPUs (2-256)" - range 2 256 + range 2 255 depends on SMP default "8" help @@ -364,13 +381,15 @@ config GART_IOMMU select SWIOTLB depends on PCI help - Support the IOMMU. Needed to run systems with more than 3GB of memory - properly with 32-bit PCI devices that do not support DAC (Double Address - Cycle). The IOMMU can be turned off at runtime with the iommu=off parameter. - Normally the kernel will take the right choice by itself. - This option includes a driver for the AMD Opteron/Athlon64 northbridge IOMMU - and a software emulation used on other systems. - If unsure, say Y. + Support for hardware IOMMU in AMD's Opteron/Athlon64 Processors + and for the bounce buffering software IOMMU. + Needed to run systems with more than 3GB of memory properly with + 32-bit PCI devices that do not support DAC (Double Address Cycle). + The IOMMU can be turned off at runtime with the iommu=off parameter. + Normally the kernel will take the right choice by itself. + This option includes a driver for the AMD Opteron/Athlon64 IOMMU + northbridge and a software emulation used on other systems without + hardware IOMMU. If unsure, say Y. # need this always enabled with GART_IOMMU for the VIA workaround config SWIOTLB @@ -429,10 +448,10 @@ config CRASH_DUMP config PHYSICAL_START hex "Physical address where the kernel is loaded" if (EMBEDDED || CRASH_DUMP) default "0x1000000" if CRASH_DUMP - default "0x100000" + default "0x200000" help This gives the physical address where the kernel is loaded. Normally - for regular kernels this value is 0x100000 (1MB). But in the case + for regular kernels this value is 0x200000 (2MB). But in the case of kexec on panic the fail safe kernel needs to run at a different address than the panic-ed kernel. This option is used to set the load address for kernels used to capture crash dump on being kexec'ed @@ -464,6 +483,14 @@ config SECCOMP source kernel/Kconfig.hz +config REORDER + bool "Function reordering" + default n + help + This option enables the toolchain to reorder functions for a more + optimal TLB usage. If you have pretty much any version of binutils, + this can increase your kernel build time by roughly one minute. + endmenu # @@ -512,16 +539,6 @@ config PCI_MMCONFIG bool "Support mmconfig PCI config space access" depends on PCI && ACPI -config UNORDERED_IO - bool "Unordered IO mapping access" - depends on EXPERIMENTAL - help - Use unordered stores to access IO memory mappings in device drivers. - Still very experimental. When a driver works on IA64/ppc64/pa-risc it should - work with this option, but it makes the drivers behave differently - from i386. Requires that the driver writer used memory barriers - properly. - source "drivers/pci/pcie/Kconfig" source "drivers/pci/Kconfig" diff --git a/arch/x86_64/Makefile b/arch/x86_64/Makefile index d7fd464..585fd4a 100644 --- a/arch/x86_64/Makefile +++ b/arch/x86_64/Makefile @@ -29,12 +29,14 @@ CHECKFLAGS += -D__x86_64__ -m64 cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8) cflags-$(CONFIG_MPSC) += $(call cc-option,-march=nocona) +cflags-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=generic) CFLAGS += $(cflags-y) CFLAGS += -m64 CFLAGS += -mno-red-zone CFLAGS += -mcmodel=kernel CFLAGS += -pipe +cflags-$(CONFIG_REORDER) += -ffunction-sections # this makes reading assembly source easier, but produces worse code # actually it makes the kernel smaller too. CFLAGS += -fno-reorder-blocks @@ -67,8 +69,8 @@ drivers-$(CONFIG_OPROFILE) += arch/x86_64/oprofile/ boot := arch/x86_64/boot -.PHONY: bzImage bzlilo install archmrproper \ - fdimage fdimage144 fdimage288 archclean +PHONY += bzImage bzlilo install archmrproper \ + fdimage fdimage144 fdimage288 isoimage archclean #Default target when executing "make" all: bzImage @@ -85,7 +87,7 @@ bzlilo: vmlinux bzdisk: vmlinux $(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(BOOTIMAGE) zdisk -fdimage fdimage144 fdimage288: vmlinux +fdimage fdimage144 fdimage288 isoimage: vmlinux $(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(BOOTIMAGE) $@ install: @@ -97,11 +99,16 @@ archclean: define archhelp echo '* bzImage - Compressed kernel image (arch/$(ARCH)/boot/bzImage)' echo ' install - Install kernel using' - echo ' (your) ~/bin/installkernel or' - echo ' (distribution) /sbin/installkernel or' - echo ' install to $$(INSTALL_PATH) and run lilo' + echo ' (your) ~/bin/installkernel or' + echo ' (distribution) /sbin/installkernel or' + echo ' install to $$(INSTALL_PATH) and run lilo' + echo ' bzdisk - Create a boot floppy in /dev/fd0' + echo ' fdimage - Create a boot floppy image' + echo ' isoimage - Create a boot CD-ROM image' endef -CLEAN_FILES += arch/$(ARCH)/boot/fdimage arch/$(ARCH)/boot/mtools.conf +CLEAN_FILES += arch/$(ARCH)/boot/fdimage \ + arch/$(ARCH)/boot/image.iso \ + arch/$(ARCH)/boot/mtools.conf diff --git a/arch/x86_64/boot/Makefile b/arch/x86_64/boot/Makefile index 29f8396..43ee6c5 100644 --- a/arch/x86_64/boot/Makefile +++ b/arch/x86_64/boot/Makefile @@ -60,8 +60,12 @@ $(obj)/setup $(obj)/bootsect: %: %.o FORCE $(obj)/compressed/vmlinux: FORCE $(Q)$(MAKE) $(build)=$(obj)/compressed IMAGE_OFFSET=$(IMAGE_OFFSET) $@ -# Set this if you want to pass append arguments to the zdisk/fdimage kernel +# Set this if you want to pass append arguments to the zdisk/fdimage/isoimage kernel FDARGS = +# Set this if you want an initrd included with the zdisk/fdimage/isoimage kernel +FDINITRD = + +image_cmdline = default linux $(FDARGS) $(if $(FDINITRD),initrd=initrd.img,) $(obj)/mtools.conf: $(src)/mtools.conf.in sed -e 's|@OBJ@|$(obj)|g' < $< > $@ @@ -70,8 +74,11 @@ $(obj)/mtools.conf: $(src)/mtools.conf.in zdisk: $(BOOTIMAGE) $(obj)/mtools.conf MTOOLSRC=$(obj)/mtools.conf mformat a: ; sync syslinux /dev/fd0 ; sync - echo 'default linux $(FDARGS)' | \ + echo '$(image_cmdline)' | \ MTOOLSRC=$(obj)/mtools.conf mcopy - a:syslinux.cfg + if [ -f '$(FDINITRD)' ] ; then \ + MTOOLSRC=$(obj)/mtools.conf mcopy '$(FDINITRD)' a:initrd.img ; \ + fi MTOOLSRC=$(obj)/mtools.conf mcopy $(BOOTIMAGE) a:linux ; sync # These require being root or having syslinux 2.02 or higher installed @@ -79,18 +86,39 @@ fdimage fdimage144: $(BOOTIMAGE) $(obj)/mtools.conf dd if=/dev/zero of=$(obj)/fdimage bs=1024 count=1440 MTOOLSRC=$(obj)/mtools.conf mformat v: ; sync syslinux $(obj)/fdimage ; sync - echo 'default linux $(FDARGS)' | \ + echo '$(image_cmdline)' | \ MTOOLSRC=$(obj)/mtools.conf mcopy - v:syslinux.cfg + if [ -f '$(FDINITRD)' ] ; then \ + MTOOLSRC=$(obj)/mtools.conf mcopy '$(FDINITRD)' v:initrd.img ; \ + fi MTOOLSRC=$(obj)/mtools.conf mcopy $(BOOTIMAGE) v:linux ; sync fdimage288: $(BOOTIMAGE) $(obj)/mtools.conf dd if=/dev/zero of=$(obj)/fdimage bs=1024 count=2880 MTOOLSRC=$(obj)/mtools.conf mformat w: ; sync syslinux $(obj)/fdimage ; sync - echo 'default linux $(FDARGS)' | \ + echo '$(image_cmdline)' | \ MTOOLSRC=$(obj)/mtools.conf mcopy - w:syslinux.cfg + if [ -f '$(FDINITRD)' ] ; then \ + MTOOLSRC=$(obj)/mtools.conf mcopy '$(FDINITRD)' w:initrd.img ; \ + fi MTOOLSRC=$(obj)/mtools.conf mcopy $(BOOTIMAGE) w:linux ; sync +isoimage: $(BOOTIMAGE) + -rm -rf $(obj)/isoimage + mkdir $(obj)/isoimage + cp `echo /usr/lib*/syslinux/isolinux.bin | awk '{ print $1; }'` \ + $(obj)/isoimage + cp $(BOOTIMAGE) $(obj)/isoimage/linux + echo '$(image_cmdline)' > $(obj)/isoimage/isolinux.cfg + if [ -f '$(FDINITRD)' ] ; then \ + cp '$(FDINITRD)' $(obj)/isoimage/initrd.img ; \ + fi + mkisofs -J -r -o $(obj)/image.iso -b isolinux.bin -c boot.cat \ + -no-emul-boot -boot-load-size 4 -boot-info-table \ + $(obj)/isoimage + rm -rf $(obj)/isoimage + zlilo: $(BOOTIMAGE) if [ -f $(INSTALL_PATH)/vmlinuz ]; then mv $(INSTALL_PATH)/vmlinuz $(INSTALL_PATH)/vmlinuz.old; fi if [ -f $(INSTALL_PATH)/System.map ]; then mv $(INSTALL_PATH)/System.map $(INSTALL_PATH)/System.old; fi diff --git a/arch/x86_64/defconfig b/arch/x86_64/defconfig index ce4de61..566ecc9 100644 --- a/arch/x86_64/defconfig +++ b/arch/x86_64/defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.16-rc3-git9 -# Sat Feb 18 00:27:03 2006 +# Linux kernel version: 2.6.16-git9 +# Sat Mar 25 15:18:40 2006 # CONFIG_X86_64=y CONFIG_64BIT=y @@ -38,6 +38,7 @@ CONFIG_SYSCTL=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y # CONFIG_CPUSETS is not set +# CONFIG_RELAY is not set CONFIG_INITRAMFS_SOURCE="" CONFIG_UID16=y CONFIG_VM86=y @@ -79,6 +80,7 @@ CONFIG_STOP_MACHINE=y # Block layer # CONFIG_LBD=y +# CONFIG_BLK_DEV_IO_TRACE is not set # # IO Schedulers @@ -139,7 +141,6 @@ CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID=y CONFIG_NR_CPUS=32 CONFIG_HOTPLUG_CPU=y CONFIG_HPET_TIMER=y -CONFIG_X86_PM_TIMER=y CONFIG_HPET_EMULATE_RTC=y CONFIG_GART_IOMMU=y CONFIG_SWIOTLB=y @@ -148,12 +149,13 @@ CONFIG_X86_MCE_INTEL=y CONFIG_X86_MCE_AMD=y # CONFIG_KEXEC is not set # CONFIG_CRASH_DUMP is not set -CONFIG_PHYSICAL_START=0x100000 +CONFIG_PHYSICAL_START=0x200000 CONFIG_SECCOMP=y # CONFIG_HZ_100 is not set CONFIG_HZ_250=y # CONFIG_HZ_1000 is not set CONFIG_HZ=250 +# CONFIG_REORDER is not set CONFIG_GENERIC_HARDIRQS=y CONFIG_GENERIC_IRQ_PROBE=y CONFIG_ISA_DMA_API=y @@ -189,12 +191,14 @@ CONFIG_ACPI_NUMA=y # CONFIG_ACPI_ASUS is not set # CONFIG_ACPI_IBM is not set CONFIG_ACPI_TOSHIBA=y -CONFIG_ACPI_BLACKLIST_YEAR=2001 +CONFIG_ACPI_BLACKLIST_YEAR=0 # CONFIG_ACPI_DEBUG is not set CONFIG_ACPI_EC=y CONFIG_ACPI_POWER=y CONFIG_ACPI_SYSTEM=y +CONFIG_X86_PM_TIMER=y CONFIG_ACPI_CONTAINER=y +CONFIG_ACPI_HOTPLUG_MEMORY=y # # CPU Frequency scaling @@ -232,10 +236,8 @@ CONFIG_X86_ACPI_CPUFREQ_PROC_INTF=y CONFIG_PCI=y CONFIG_PCI_DIRECT=y CONFIG_PCI_MMCONFIG=y -CONFIG_UNORDERED_IO=y CONFIG_PCIEPORTBUS=y CONFIG_PCI_MSI=y -# CONFIG_PCI_LEGACY_PROC is not set # CONFIG_PCI_DEBUG is not set # @@ -294,6 +296,7 @@ CONFIG_INET_TCP_DIAG=y CONFIG_TCP_CONG_BIC=y CONFIG_IPV6=y # CONFIG_IPV6_PRIVACY is not set +# CONFIG_IPV6_ROUTER_PREF is not set # CONFIG_INET6_AH is not set # CONFIG_INET6_ESP is not set # CONFIG_INET6_IPCOMP is not set @@ -701,6 +704,7 @@ CONFIG_S2IO=m # Wireless LAN (non-hamradio) # # CONFIG_NET_RADIO is not set +# CONFIG_NET_WIRELESS_RTNETLINK is not set # # Wan interfaces @@ -861,6 +865,8 @@ CONFIG_RTC=y CONFIG_AGP=y CONFIG_AGP_AMD64=y CONFIG_AGP_INTEL=y +# CONFIG_AGP_SIS is not set +# CONFIG_AGP_VIA is not set # CONFIG_DRM is not set # CONFIG_MWAVE is not set CONFIG_RAW_DRIVER=y @@ -907,10 +913,6 @@ CONFIG_HWMON=y # CONFIG_IBM_ASM is not set # -# Multimedia Capabilities Port drivers -# - -# # Multimedia devices # # CONFIG_VIDEO_DEV is not set @@ -974,6 +976,7 @@ CONFIG_SOUND_ICH=y # CONFIG_USB_ARCH_HAS_HCD=y CONFIG_USB_ARCH_HAS_OHCI=y +CONFIG_USB_ARCH_HAS_EHCI=y CONFIG_USB=y # CONFIG_USB_DEBUG is not set @@ -1002,7 +1005,6 @@ CONFIG_USB_UHCI_HCD=y # # USB Device Class drivers # -# CONFIG_OBSOLETE_OSS_USB_DRIVER is not set # CONFIG_USB_ACM is not set CONFIG_USB_PRINTER=y @@ -1121,11 +1123,7 @@ CONFIG_USB_MON=y # CONFIG_INFINIBAND is not set # -# SN Devices -# - -# -# EDAC - error detection and reporting (RAS) +# EDAC - error detection and reporting (RAS) (EXPERIMENTAL) # # CONFIG_EDAC is not set @@ -1198,7 +1196,6 @@ CONFIG_TMPFS=y CONFIG_HUGETLBFS=y CONFIG_HUGETLB_PAGE=y CONFIG_RAMFS=y -CONFIG_RELAYFS_FS=y # CONFIG_CONFIGFS_FS is not set # @@ -1321,6 +1318,7 @@ CONFIG_DETECT_SOFTLOCKUP=y CONFIG_DEBUG_FS=y # CONFIG_DEBUG_VM is not set # CONFIG_FRAME_POINTER is not set +# CONFIG_UNWIND_INFO is not set # CONFIG_FORCED_INLINING is not set # CONFIG_RCU_TORTURE_TEST is not set # CONFIG_DEBUG_RODATA is not set diff --git a/arch/x86_64/ia32/ia32_binfmt.c b/arch/x86_64/ia32/ia32_binfmt.c index 572b3b2..e776139 100644 --- a/arch/x86_64/ia32/ia32_binfmt.c +++ b/arch/x86_64/ia32/ia32_binfmt.c @@ -58,7 +58,7 @@ struct elf_phdr; #define USE_ELF_CORE_DUMP 1 -/* Overwrite elfcore.h */ +/* Override elfcore.h */ #define _LINUX_ELFCORE_H 1 typedef unsigned int elf_greg_t; diff --git a/arch/x86_64/ia32/ia32entry.S b/arch/x86_64/ia32/ia32entry.S index 00dee17..35b2fac 100644 --- a/arch/x86_64/ia32/ia32entry.S +++ b/arch/x86_64/ia32/ia32entry.S @@ -501,7 +501,7 @@ ia32_sys_call_table: .quad sys_setdomainname .quad sys_uname .quad sys_modify_ldt - .quad sys32_adjtimex + .quad compat_sys_adjtimex .quad sys32_mprotect /* 125 */ .quad compat_sys_sigprocmask .quad quiet_ni_syscall /* create_module */ @@ -688,6 +688,8 @@ ia32_sys_call_table: .quad sys_ni_syscall /* pselect6 for now */ .quad sys_ni_syscall /* ppoll for now */ .quad sys_unshare /* 310 */ + .quad compat_sys_set_robust_list + .quad compat_sys_get_robust_list ia32_syscall_end: .rept IA32_NR_syscalls-(ia32_syscall_end-ia32_sys_call_table)/8 .quad ni_syscall diff --git a/arch/x86_64/ia32/sys_ia32.c b/arch/x86_64/ia32/sys_ia32.c index 2bc55af..f182b20 100644 --- a/arch/x86_64/ia32/sys_ia32.c +++ b/arch/x86_64/ia32/sys_ia32.c @@ -30,7 +30,6 @@ #include <linux/resource.h> #include <linux/times.h> #include <linux/utsname.h> -#include <linux/timex.h> #include <linux/smp.h> #include <linux/smp_lock.h> #include <linux/sem.h> @@ -430,24 +429,12 @@ put_tv32(struct compat_timeval __user *o, struct timeval *i) return err; } -extern int do_setitimer(int which, struct itimerval *, struct itimerval *); +extern unsigned int alarm_setitimer(unsigned int seconds); asmlinkage long sys32_alarm(unsigned int seconds) { - struct itimerval it_new, it_old; - unsigned int oldalarm; - - it_new.it_interval.tv_sec = it_new.it_interval.tv_usec = 0; - it_new.it_value.tv_sec = seconds; - it_new.it_value.tv_usec = 0; - do_setitimer(ITIMER_REAL, &it_new, &it_old); - oldalarm = it_old.it_value.tv_sec; - /* ehhh.. We can't return 0 if we have an alarm pending.. */ - /* And we'd better return too much than too little anyway */ - if (it_old.it_value.tv_usec) - oldalarm++; - return oldalarm; + return alarm_setitimer(seconds); } /* Translations due to time_t size differences. Which affects all @@ -779,82 +766,6 @@ sys32_sendfile(int out_fd, int in_fd, compat_off_t __user *offset, s32 count) return ret; } -/* Handle adjtimex compatibility. */ - -struct timex32 { - u32 modes; - s32 offset, freq, maxerror, esterror; - s32 status, constant, precision, tolerance; - struct compat_timeval time; - s32 tick; - s32 ppsfreq, jitter, shift, stabil; - s32 jitcnt, calcnt, errcnt, stbcnt; - s32 :32; s32 :32; s32 :32; s32 :32; - s32 :32; s32 :32; s32 :32; s32 :32; - s32 :32; s32 :32; s32 :32; s32 :32; -}; - -extern int do_adjtimex(struct timex *); - -asmlinkage long -sys32_adjtimex(struct timex32 __user *utp) -{ - struct timex txc; - int ret; - - memset(&txc, 0, sizeof(struct timex)); - - if (!access_ok(VERIFY_READ, utp, sizeof(struct timex32)) || - __get_user(txc.modes, &utp->modes) || - __get_user(txc.offset, &utp->offset) || - __get_user(txc.freq, &utp->freq) || - __get_user(txc.maxerror, &utp->maxerror) || - __get_user(txc.esterror, &utp->esterror) || - __get_user(txc.status, &utp->status) || - __get_user(txc.constant, &utp->constant) || - __get_user(txc.precision, &utp->precision) || - __get_user(txc.tolerance, &utp->tolerance) || - __get_user(txc.time.tv_sec, &utp->time.tv_sec) || - __get_user(txc.time.tv_usec, &utp->time.tv_usec) || - __get_user(txc.tick, &utp->tick) || - __get_user(txc.ppsfreq, &utp->ppsfreq) || - __get_user(txc.jitter, &utp->jitter) || - __get_user(txc.shift, &utp->shift) || - __get_user(txc.stabil, &utp->stabil) || - __get_user(txc.jitcnt, &utp->jitcnt) || - __get_user(txc.calcnt, &utp->calcnt) || - __get_user(txc.errcnt, &utp->errcnt) || - __get_user(txc.stbcnt, &utp->stbcnt)) - return -EFAULT; - - ret = do_adjtimex(&txc); - - if (!access_ok(VERIFY_WRITE, utp, sizeof(struct timex32)) || - __put_user(txc.modes, &utp->modes) || - __put_user(txc.offset, &utp->offset) || - __put_user(txc.freq, &utp->freq) || - __put_user(txc.maxerror, &utp->maxerror) || - __put_user(txc.esterror, &utp->esterror) || - __put_user(txc.status, &utp->status) || - __put_user(txc.constant, &utp->constant) || - __put_user(txc.precision, &utp->precision) || - __put_user(txc.tolerance, &utp->tolerance) || - __put_user(txc.time.tv_sec, &utp->time.tv_sec) || - __put_user(txc.time.tv_usec, &utp->time.tv_usec) || - __put_user(txc.tick, &utp->tick) || - __put_user(txc.ppsfreq, &utp->ppsfreq) || - __put_user(txc.jitter, &utp->jitter) || - __put_user(txc.shift, &utp->shift) || - __put_user(txc.stabil, &utp->stabil) || - __put_user(txc.jitcnt, &utp->jitcnt) || - __put_user(txc.calcnt, &utp->calcnt) || - __put_user(txc.errcnt, &utp->errcnt) || - __put_user(txc.stbcnt, &utp->stbcnt)) - ret = -EFAULT; - - return ret; -} - asmlinkage long sys32_mmap2(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long fd, unsigned long pgoff) diff --git a/arch/x86_64/kernel/aperture.c b/arch/x86_64/kernel/aperture.c index a0f955b..fffd6b0 100644 --- a/arch/x86_64/kernel/aperture.c +++ b/arch/x86_64/kernel/aperture.c @@ -60,7 +60,7 @@ static u32 __init allocate_aperture(void) printk("Cannot allocate aperture memory hole (%p,%uK)\n", p, aper_size>>10); if (p) - free_bootmem_node(nd0, (unsigned long)p, aper_size); + free_bootmem_node(nd0, __pa(p), aper_size); return 0; } printk("Mapping aperture over %d KB of RAM @ %lx\n", @@ -161,7 +161,7 @@ static __u32 __init search_agp_bridge(u32 *order, int *valid_agp) int num, slot, func; /* Poor man's PCI discovery */ - for (num = 0; num < 32; num++) { + for (num = 0; num < 256; num++) { for (slot = 0; slot < 32; slot++) { for (func = 0; func < 8; func++) { u32 class, cap; diff --git a/arch/x86_64/kernel/apic.c b/arch/x86_64/kernel/apic.c index e5b14c5..d546201 100644 --- a/arch/x86_64/kernel/apic.c +++ b/arch/x86_64/kernel/apic.c @@ -342,6 +342,7 @@ void __init init_bsp_APIC(void) void __cpuinit setup_local_APIC (void) { unsigned int value, maxlvt; + int i, j; value = apic_read(APIC_LVR); @@ -371,6 +372,25 @@ void __cpuinit setup_local_APIC (void) apic_write(APIC_TASKPRI, value); /* + * After a crash, we no longer service the interrupts and a pending + * interrupt from previous kernel might still have ISR bit set. + * + * Most probably by now CPU has serviced that pending interrupt and + * it might not have done the ack_APIC_irq() because it thought, + * interrupt came from i8259 as ExtInt. LAPIC did not get EOI so it + * does not clear the ISR bit and cpu thinks it has already serivced + * the interrupt. Hence a vector might get locked. It was noticed + * for timer irq (vector 0x31). Issue an extra EOI to clear ISR. + */ + for (i = APIC_ISR_NR - 1; i >= 0; i--) { + value = apic_read(APIC_ISR + i*0x10); + for (j = 31; j >= 0; j--) { + if (value & (1<<j)) + ack_APIC_irq(); + } + } + + /* * Now that we are all set up, enable the APIC */ value = apic_read(APIC_SPIV); diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S index 7c10e90..8538bfe 100644 --- a/arch/x86_64/kernel/entry.S +++ b/arch/x86_64/kernel/entry.S @@ -553,7 +553,7 @@ iret_label: /* force a signal here? this matches i386 behaviour */ /* running with kernel gs */ bad_iret: - movq $-9999,%rdi /* better code? */ + movq $11,%rdi /* SIGSEGV */ sti jmp do_exit .previous diff --git a/arch/x86_64/kernel/functionlist b/arch/x86_64/kernel/functionlist new file mode 100644 index 0000000..2bcebdc --- /dev/null +++ b/arch/x86_64/kernel/functionlist @@ -0,0 +1,1286 @@ +*(.text.flush_thread) +*(.text.check_poison_obj) +*(.text.copy_page) +*(.text.__set_personality) +*(.text.gart_map_sg) +*(.text.kmem_cache_free) +*(.text.find_get_page) +*(.text._raw_spin_lock) +*(.text.ide_outb) +*(.text.unmap_vmas) +*(.text.copy_page_range) +*(.text.kprobe_handler) +*(.text.__handle_mm_fault) +*(.text.__d_lookup) +*(.text.copy_user_generic) +*(.text.__link_path_walk) +*(.text.get_page_from_freelist) +*(.text.kmem_cache_alloc) +*(.text.drive_cmd_intr) +*(.text.ia32_setup_sigcontext) +*(.text.huge_pte_offset) +*(.text.do_page_fault) +*(.text.page_remove_rmap) +*(.text.release_pages) +*(.text.ide_end_request) +*(.text.__mutex_lock_slowpath) +*(.text.__find_get_block) +*(.text.kfree) +*(.text.vfs_read) +*(.text._raw_spin_unlock) +*(.text.free_hot_cold_page) +*(.text.fget_light) +*(.text.schedule) +*(.text.memcmp) +*(.text.touch_atime) +*(.text.__might_sleep) +*(.text.__down_read_trylock) +*(.text.arch_pick_mmap_layout) +*(.text.find_vma) +*(.text.__make_request) +*(.text.do_generic_mapping_read) +*(.text.mutex_lock_interruptible) +*(.text.__generic_file_aio_read) +*(.text._atomic_dec_and_lock) +*(.text.__wake_up_bit) +*(.text.add_to_page_cache) +*(.text.cache_alloc_debugcheck_after) +*(.text.vm_normal_page) +*(.text.mutex_debug_check_no_locks_freed) +*(.text.net_rx_action) +*(.text.__find_first_zero_bit) +*(.text.put_page) +*(.text._raw_read_lock) +*(.text.__delay) +*(.text.dnotify_parent) +*(.text.do_path_lookup) +*(.text.do_sync_read) +*(.text.do_lookup) +*(.text.bit_waitqueue) +*(.text.file_read_actor) +*(.text.strncpy_from_user) +*(.text.__pagevec_lru_add_active) +*(.text.fget) +*(.text.dput) +*(.text.__strnlen_user) +*(.text.inotify_inode_queue_event) +*(.text.rw_verify_area) +*(.text.ide_intr) +*(.text.inotify_dentry_parent_queue_event) +*(.text.permission) +*(.text.memscan) +*(.text.hpet_rtc_interrupt) +*(.text.do_mmap_pgoff) +*(.text.current_fs_time) +*(.text.vfs_getattr) +*(.text.kmem_flagcheck) +*(.text.mark_page_accessed) +*(.text.free_pages_and_swap_cache) +*(.text.generic_fillattr) +*(.text.__block_prepare_write) +*(.text.__set_page_dirty_nobuffers) +*(.text.link_path_walk) +*(.text.find_get_pages_tag) +*(.text.ide_do_request) +*(.text.__alloc_pages) +*(.text.generic_permission) +*(.text.mod_page_state_offset) +*(.text.free_pgd_range) +*(.text.generic_file_buffered_write) +*(.text.number) +*(.text.ide_do_rw_disk) +*(.text.__brelse) +*(.text.__mod_page_state_offset) +*(.text.rotate_reclaimable_page) +*(.text.find_vma_prepare) +*(.text.find_vma_prev) +*(.text.lru_cache_add_active) +*(.text.__kmalloc_track_caller) +*(.text.smp_invalidate_interrupt) +*(.text.handle_IRQ_event) +*(.text.__find_get_block_slow) +*(.text.do_wp_page) +*(.text.do_select) +*(.text.set_user_nice) +*(.text.sys_read) +*(.text.do_munmap) +*(.text.csum_partial) +*(.text.__do_softirq) +*(.text.may_open) +*(.text.getname) +*(.text.get_empty_filp) +*(.text.__fput) +*(.text.remove_mapping) +*(.text.filp_ctor) +*(.text.poison_obj) +*(.text.unmap_region) +*(.text.test_set_page_writeback) +*(.text.__do_page_cache_readahead) +*(.text.sock_def_readable) +*(.text.ide_outl) +*(.text.shrink_zone) +*(.text.rb_insert_color) +*(.text.get_request) +*(.text.sys_pread64) +*(.text.spin_bug) +*(.text.ide_outsl) +*(.text.mask_and_ack_8259A) +*(.text.filemap_nopage) +*(.text.page_add_file_rmap) +*(.text.find_lock_page) +*(.text.tcp_poll) +*(.text.__mark_inode_dirty) +*(.text.file_ra_state_init) +*(.text.generic_file_llseek) +*(.text.__pagevec_lru_add) +*(.text.page_cache_readahead) +*(.text.n_tty_receive_buf) +*(.text.zonelist_policy) +*(.text.vma_adjust) +*(.text.test_clear_page_dirty) +*(.text.sync_buffer) +*(.text.do_exit) +*(.text.__bitmap_weight) +*(.text.alloc_pages_current) +*(.text.get_unused_fd) +*(.text.zone_watermark_ok) +*(.text.cpuset_update_task_memory_state) +*(.text.__bitmap_empty) +*(.text.sys_munmap) +*(.text.__inode_dir_notify) +*(.text.__generic_file_aio_write_nolock) +*(.text.__pte_alloc) +*(.text.sys_select) +*(.text.vm_acct_memory) +*(.text.vfs_write) +*(.text.__lru_add_drain) +*(.text.prio_tree_insert) +*(.text.generic_file_aio_read) +*(.text.vma_merge) +*(.text.block_write_full_page) +*(.text.__page_set_anon_rmap) +*(.text.apic_timer_interrupt) +*(.text.release_console_sem) +*(.text.sys_write) +*(.text.sys_brk) +*(.text.dup_mm) +*(.text.read_current_timer) +*(.text.ll_rw_block) +*(.text.blk_rq_map_sg) +*(.text.dbg_userword) +*(.text.__block_commit_write) +*(.text.cache_grow) +*(.text.copy_strings) +*(.text.release_task) +*(.text.do_sync_write) +*(.text.unlock_page) +*(.text.load_elf_binary) +*(.text.__follow_mount) +*(.text.__getblk) +*(.text.do_sys_open) +*(.text.current_kernel_time) +*(.text.call_rcu) +*(.text.write_chan) +*(.text.vsnprintf) +*(.text.dummy_inode_setsecurity) +*(.text.submit_bh) +*(.text.poll_freewait) +*(.text.bio_alloc_bioset) +*(.text.skb_clone) +*(.text.page_waitqueue) +*(.text.__mutex_lock_interruptible_slowpath) +*(.text.get_index) +*(.text.csum_partial_copy_generic) +*(.text.bad_range) +*(.text.remove_vma) +*(.text.cp_new_stat) +*(.text.alloc_arraycache) +*(.text.test_clear_page_writeback) +*(.text.strsep) +*(.text.open_namei) +*(.text._raw_read_unlock) +*(.text.get_vma_policy) +*(.text.__down_write_trylock) +*(.text.find_get_pages) +*(.text.tcp_rcv_established) +*(.text.generic_make_request) +*(.text.__block_write_full_page) +*(.text.cfq_set_request) +*(.text.sys_inotify_init) +*(.text.split_vma) +*(.text.__mod_timer) +*(.text.get_options) +*(.text.vma_link) +*(.text.mpage_writepages) +*(.text.truncate_complete_page) +*(.text.tcp_recvmsg) +*(.text.sigprocmask) +*(.text.filemap_populate) +*(.text.sys_close) +*(.text.inotify_dev_queue_event) +*(.text.do_task_stat) +*(.text.__dentry_open) +*(.text.unlink_file_vma) +*(.text.__pollwait) +*(.text.packet_rcv_spkt) +*(.text.drop_buffers) +*(.text.free_pgtables) +*(.text.generic_file_direct_write) +*(.text.copy_process) +*(.text.netif_receive_skb) +*(.text.dnotify_flush) +*(.text.print_bad_pte) +*(.text.anon_vma_unlink) +*(.text.sys_mprotect) +*(.text.sync_sb_inodes) +*(.text.find_inode_fast) +*(.text.dummy_inode_readlink) +*(.text.putname) +*(.text.init_smp_flush) +*(.text.dbg_redzone2) +*(.text.sk_run_filter) +*(.text.may_expand_vm) +*(.text.generic_file_aio_write) +*(.text.find_next_zero_bit) +*(.text.file_kill) +*(.text.audit_getname) +*(.text.arch_unmap_area_topdown) +*(.text.alloc_page_vma) +*(.text.tcp_transmit_skb) +*(.text.rb_next) +*(.text.dbg_redzone1) +*(.text.generic_file_mmap) +*(.text.vfs_fstat) +*(.text.sys_time) +*(.text.page_lock_anon_vma) +*(.text.get_unmapped_area) +*(.text.remote_llseek) +*(.text.__up_read) +*(.text.fd_install) +*(.text.eventpoll_init_file) +*(.text.dma_alloc_coherent) +*(.text.create_empty_buffers) +*(.text.__mutex_unlock_slowpath) +*(.text.dup_fd) +*(.text.d_alloc) +*(.text.tty_ldisc_try) +*(.text.sys_stime) +*(.text.__rb_rotate_right) +*(.text.d_validate) +*(.text.rb_erase) +*(.text.path_release) +*(.text.memmove) +*(.text.invalidate_complete_page) +*(.text.clear_inode) +*(.text.cache_estimate) +*(.text.alloc_buffer_head) +*(.text.smp_call_function_interrupt) +*(.text.flush_tlb_others) +*(.text.file_move) +*(.text.balance_dirty_pages_ratelimited) +*(.text.vma_prio_tree_add) +*(.text.timespec_trunc) +*(.text.mempool_alloc) +*(.text.iget_locked) +*(.text.d_alloc_root) +*(.text.cpuset_populate_dir) +*(.text.anon_vma_prepare) +*(.text.sys_newstat) +*(.text.alloc_page_interleave) +*(.text.__path_lookup_intent_open) +*(.text.__pagevec_free) +*(.text.inode_init_once) +*(.text.free_vfsmnt) +*(.text.__user_walk_fd) +*(.text.cfq_idle_slice_timer) +*(.text.sys_mmap) +*(.text.sys_llseek) +*(.text.prio_tree_remove) +*(.text.filp_close) +*(.text.file_permission) +*(.text.vma_prio_tree_remove) +*(.text.tcp_ack) +*(.text.nameidata_to_filp) +*(.text.sys_lseek) +*(.text.percpu_counter_mod) +*(.text.igrab) +*(.text.__bread) +*(.text.alloc_inode) +*(.text.filldir) +*(.text.__rb_rotate_left) +*(.text.irq_affinity_write_proc) +*(.text.init_request_from_bio) +*(.text.find_or_create_page) +*(.text.tty_poll) +*(.text.tcp_sendmsg) +*(.text.ide_wait_stat) +*(.text.free_buffer_head) +*(.text.flush_signal_handlers) +*(.text.tcp_v4_rcv) +*(.text.nr_blockdev_pages) +*(.text.locks_remove_flock) +*(.text.__iowrite32_copy) +*(.text.do_filp_open) +*(.text.try_to_release_page) +*(.text.page_add_new_anon_rmap) +*(.text.kmem_cache_size) +*(.text.eth_type_trans) +*(.text.try_to_free_buffers) +*(.text.schedule_tail) +*(.text.proc_lookup) +*(.text.no_llseek) +*(.text.kfree_skbmem) +*(.text.do_wait) +*(.text.do_mpage_readpage) +*(.text.vfs_stat_fd) +*(.text.tty_write) +*(.text.705) +*(.text.sync_page) +*(.text.__remove_shared_vm_struct) +*(.text.__kfree_skb) +*(.text.sock_poll) +*(.text.get_request_wait) +*(.text.do_sigaction) +*(.text.do_brk) +*(.text.tcp_event_data_recv) +*(.text.read_chan) +*(.text.pipe_writev) +*(.text.__emul_lookup_dentry) +*(.text.rtc_get_rtc_time) +*(.text.print_objinfo) +*(.text.file_update_time) +*(.text.do_signal) +*(.text.disable_8259A_irq) +*(.text.blk_queue_bounce) +*(.text.__anon_vma_link) +*(.text.__vma_link) +*(.text.vfs_rename) +*(.text.sys_newlstat) +*(.text.sys_newfstat) +*(.text.sys_mknod) +*(.text.__show_regs) +*(.text.iput) +*(.text.get_signal_to_deliver) +*(.text.flush_tlb_page) +*(.text.debug_mutex_wake_waiter) +*(.text.copy_thread) +*(.text.clear_page_dirty_for_io) +*(.text.buffer_io_error) +*(.text.vfs_permission) +*(.text.truncate_inode_pages_range) +*(.text.sys_recvfrom) +*(.text.remove_suid) +*(.text.mark_buffer_dirty) +*(.text.local_bh_enable) +*(.text.get_zeroed_page) +*(.text.get_vmalloc_info) +*(.text.flush_old_exec) +*(.text.dummy_inode_permission) +*(.text.__bio_add_page) +*(.text.prio_tree_replace) +*(.text.notify_change) +*(.text.mntput_no_expire) +*(.text.fput) +*(.text.__end_that_request_first) +*(.text.wake_up_bit) +*(.text.unuse_mm) +*(.text.skb_release_data) +*(.text.shrink_icache_memory) +*(.text.sched_balance_self) +*(.text.__pmd_alloc) +*(.text.pipe_poll) +*(.text.normal_poll) +*(.text.__free_pages) +*(.text.follow_mount) +*(.text.cdrom_start_packet_command) +*(.text.blk_recount_segments) +*(.text.bio_put) +*(.text.__alloc_skb) +*(.text.__wake_up) +*(.text.vm_stat_account) +*(.text.sys_fcntl) +*(.text.sys_fadvise64) +*(.text._raw_write_unlock) +*(.text.__pud_alloc) +*(.text.alloc_page_buffers) +*(.text.vfs_llseek) +*(.text.sockfd_lookup) +*(.text._raw_write_lock) +*(.text.put_compound_page) +*(.text.prune_dcache) +*(.text.pipe_readv) +*(.text.mempool_free) +*(.text.make_ahead_window) +*(.text.lru_add_drain) +*(.text.constant_test_bit) +*(.text.__clear_user) +*(.text.arch_unmap_area) +*(.text.anon_vma_link) +*(.text.sys_chroot) +*(.text.setup_arg_pages) +*(.text.radix_tree_preload) +*(.text.init_rwsem) +*(.text.generic_osync_inode) +*(.text.generic_delete_inode) +*(.text.do_sys_poll) +*(.text.dev_queue_xmit) +*(.text.default_llseek) +*(.text.__writeback_single_inode) +*(.text.vfs_ioctl) +*(.text.__up_write) +*(.text.unix_poll) +*(.text.sys_rt_sigprocmask) +*(.text.sock_recvmsg) +*(.text.recalc_bh_state) +*(.text.__put_unused_fd) +*(.text.process_backlog) +*(.text.locks_remove_posix) +*(.text.lease_modify) +*(.text.expand_files) +*(.text.end_buffer_read_nobh) +*(.text.d_splice_alias) +*(.text.debug_mutex_init_waiter) +*(.text.copy_from_user) +*(.text.cap_vm_enough_memory) +*(.text.show_vfsmnt) +*(.text.release_sock) +*(.text.pfifo_fast_enqueue) +*(.text.half_md4_transform) +*(.text.fs_may_remount_ro) +*(.text.do_fork) +*(.text.copy_hugetlb_page_range) +*(.text.cache_free_debugcheck) +*(.text.__tcp_select_window) +*(.text.task_handoff_register) +*(.text.sys_open) +*(.text.strlcpy) +*(.text.skb_copy_datagram_iovec) +*(.text.set_up_list3s) +*(.text.release_open_intent) +*(.text.qdisc_restart) +*(.text.n_tty_chars_in_buffer) +*(.text.inode_change_ok) +*(.text.__downgrade_write) +*(.text.debug_mutex_unlock) +*(.text.add_timer_randomness) +*(.text.sock_common_recvmsg) +*(.text.set_bh_page) +*(.text.printk_lock) +*(.text.path_release_on_umount) +*(.text.ip_output) +*(.text.ide_build_dmatable) +*(.text.__get_user_8) +*(.text.end_buffer_read_sync) +*(.text.__d_path) +*(.text.d_move) +*(.text.del_timer) +*(.text.constant_test_bit) +*(.text.blockable_page_cache_readahead) +*(.text.tty_read) +*(.text.sys_readlink) +*(.text.sys_faccessat) +*(.text.read_swap_cache_async) +*(.text.pty_write_room) +*(.text.page_address_in_vma) +*(.text.kthread) +*(.text.cfq_exit_io_context) +*(.text.__tcp_push_pending_frames) +*(.text.sys_pipe) +*(.text.submit_bio) +*(.text.pid_revalidate) +*(.text.page_referenced_file) +*(.text.lock_sock) +*(.text.get_page_state_node) +*(.text.generic_block_bmap) +*(.text.do_setitimer) +*(.text.dev_queue_xmit_nit) +*(.text.copy_from_read_buf) +*(.text.__const_udelay) +*(.text.console_conditional_schedule) +*(.text.wake_up_new_task) +*(.text.wait_for_completion_interruptible) +*(.text.tcp_rcv_rtt_update) +*(.text.sys_mlockall) +*(.text.set_fs_altroot) +*(.text.schedule_timeout) +*(.text.nr_free_pagecache_pages) +*(.text.nf_iterate) +*(.text.mapping_tagged) +*(.text.ip_queue_xmit) +*(.text.ip_local_deliver) +*(.text.follow_page) +*(.text.elf_map) +*(.text.dummy_file_permission) +*(.text.dispose_list) +*(.text.dentry_open) +*(.text.dentry_iput) +*(.text.bio_alloc) +*(.text.alloc_skb_from_cache) +*(.text.wait_on_page_bit) +*(.text.vfs_readdir) +*(.text.vfs_lstat) +*(.text.seq_escape) +*(.text.__posix_lock_file) +*(.text.mm_release) +*(.text.kref_put) +*(.text.ip_rcv) +*(.text.__iget) +*(.text.free_pages) +*(.text.find_mergeable_anon_vma) +*(.text.find_extend_vma) +*(.text.dummy_inode_listsecurity) +*(.text.bio_add_page) +*(.text.__vm_enough_memory) +*(.text.vfs_stat) +*(.text.tty_paranoia_check) +*(.text.tcp_read_sock) +*(.text.tcp_data_queue) +*(.text.sys_uname) +*(.text.sys_renameat) +*(.text.__strncpy_from_user) +*(.text.__mutex_init) +*(.text.__lookup_hash) +*(.text.kref_get) +*(.text.ip_route_input) +*(.text.__insert_inode_hash) +*(.text.do_sock_write) +*(.text.blk_done_softirq) +*(.text.__wake_up_sync) +*(.text.__vma_link_rb) +*(.text.tty_ioctl) +*(.text.tracesys) +*(.text.sys_getdents) +*(.text.sys_dup) +*(.text.stub_execve) +*(.text.sha_transform) +*(.text.radix_tree_tag_clear) +*(.text.put_unused_fd) +*(.text.put_files_struct) +*(.text.mpage_readpages) +*(.text.may_delete) +*(.text.kmem_cache_create) +*(.text.ip_mc_output) +*(.text.interleave_nodes) +*(.text.groups_search) +*(.text.generic_drop_inode) +*(.text.generic_commit_write) +*(.text.fcntl_setlk) +*(.text.exit_mmap) +*(.text.end_page_writeback) +*(.text.__d_rehash) +*(.text.debug_mutex_free_waiter) +*(.text.csum_ipv6_magic) +*(.text.count) +*(.text.cleanup_rbuf) +*(.text.check_spinlock_acquired_node) +*(.text.can_vma_merge_after) +*(.text.bio_endio) +*(.text.alloc_pidmap) +*(.text.write_ldt) +*(.text.vmtruncate_range) +*(.text.vfs_create) +*(.text.__user_walk) +*(.text.update_send_head) +*(.text.unmap_underlying_metadata) +*(.text.tty_ldisc_deref) +*(.text.tcp_setsockopt) +*(.text.tcp_send_ack) +*(.text.sys_pause) +*(.text.sys_gettimeofday) +*(.text.sync_dirty_buffer) +*(.text.strncmp) +*(.text.release_posix_timer) +*(.text.proc_file_read) +*(.text.prepare_to_wait) +*(.text.locks_mandatory_locked) +*(.text.interruptible_sleep_on_timeout) +*(.text.inode_sub_bytes) +*(.text.in_group_p) +*(.text.hrtimer_try_to_cancel) +*(.text.filldir64) +*(.text.fasync_helper) +*(.text.dummy_sb_pivotroot) +*(.text.d_lookup) +*(.text.d_instantiate) +*(.text.__d_find_alias) +*(.text.cpu_idle_wait) +*(.text.cond_resched_lock) +*(.text.chown_common) +*(.text.blk_congestion_wait) +*(.text.activate_page) +*(.text.unlock_buffer) +*(.text.tty_wakeup) +*(.text.tcp_v4_do_rcv) +*(.text.tcp_current_mss) +*(.text.sys_openat) +*(.text.sys_fchdir) +*(.text.strnlen_user) +*(.text.strnlen) +*(.text.strchr) +*(.text.sock_common_getsockopt) +*(.text.skb_checksum) +*(.text.remove_wait_queue) +*(.text.rb_replace_node) +*(.text.radix_tree_node_ctor) +*(.text.pty_chars_in_buffer) +*(.text.profile_hit) +*(.text.prio_tree_left) +*(.text.pgd_clear_bad) +*(.text.pfifo_fast_dequeue) +*(.text.page_referenced) +*(.text.open_exec) +*(.text.mmput) +*(.text.mm_init) +*(.text.__ide_dma_off_quietly) +*(.text.ide_dma_intr) +*(.text.hrtimer_start) +*(.text.get_io_context) +*(.text.__get_free_pages) +*(.text.find_first_zero_bit) +*(.text.file_free_rcu) +*(.text.dummy_socket_sendmsg) +*(.text.do_unlinkat) +*(.text.do_arch_prctl) +*(.text.destroy_inode) +*(.text.can_vma_merge_before) +*(.text.block_sync_page) +*(.text.block_prepare_write) +*(.text.bio_init) +*(.text.arch_ptrace) +*(.text.wake_up_inode) +*(.text.wait_on_retry_sync_kiocb) +*(.text.vma_prio_tree_next) +*(.text.tcp_rcv_space_adjust) +*(.text.__tcp_ack_snd_check) +*(.text.sys_utime) +*(.text.sys_recvmsg) +*(.text.sys_mremap) +*(.text.sys_bdflush) +*(.text.sleep_on) +*(.text.set_page_dirty_lock) +*(.text.seq_path) +*(.text.schedule_timeout_interruptible) +*(.text.sched_fork) +*(.text.rt_run_flush) +*(.text.profile_munmap) +*(.text.prepare_binprm) +*(.text.__pagevec_release_nonlru) +*(.text.m_show) +*(.text.lookup_mnt) +*(.text.__lookup_mnt) +*(.text.lock_timer_base) +*(.text.is_subdir) +*(.text.invalidate_bh_lru) +*(.text.init_buffer_head) +*(.text.ifind_fast) +*(.text.ide_dma_start) +*(.text.__get_page_state) +*(.text.flock_to_posix_lock) +*(.text.__find_symbol) +*(.text.do_futex) +*(.text.do_execve) +*(.text.dirty_writeback_centisecs_handler) +*(.text.dev_watchdog) +*(.text.can_share_swap_page) +*(.text.blkdev_put) +*(.text.bio_get_nr_vecs) +*(.text.xfrm_compile_policy) +*(.text.vma_prio_tree_insert) +*(.text.vfs_lstat_fd) +*(.text.__user_path_lookup_open) +*(.text.thread_return) +*(.text.tcp_send_delayed_ack) +*(.text.sock_def_error_report) +*(.text.shrink_slab) +*(.text.serial_out) +*(.text.seq_read) +*(.text.secure_ip_id) +*(.text.search_binary_handler) +*(.text.proc_pid_unhash) +*(.text.pagevec_lookup) +*(.text.new_inode) +*(.text.memcpy_toiovec) +*(.text.locks_free_lock) +*(.text.__lock_page) +*(.text.__lock_buffer) +*(.text.load_module) +*(.text.is_bad_inode) +*(.text.invalidate_inode_buffers) +*(.text.insert_vm_struct) +*(.text.inode_setattr) +*(.text.inode_add_bytes) +*(.text.ide_read_24) +*(.text.ide_get_error_location) +*(.text.ide_do_drive_cmd) +*(.text.get_locked_pte) +*(.text.get_filesystem_list) +*(.text.generic_file_open) +*(.text.follow_down) +*(.text.find_next_bit) +*(.text.__find_first_bit) +*(.text.exit_mm) +*(.text.exec_keys) +*(.text.end_buffer_write_sync) +*(.text.end_bio_bh_io_sync) +*(.text.dummy_socket_shutdown) +*(.text.d_rehash) +*(.text.d_path) +*(.text.do_ioctl) +*(.text.dget_locked) +*(.text.copy_thread_group_keys) +*(.text.cdrom_end_request) +*(.text.cap_bprm_apply_creds) +*(.text.blk_rq_bio_prep) +*(.text.__bitmap_intersects) +*(.text.bio_phys_segments) +*(.text.bio_free) +*(.text.arch_get_unmapped_area_topdown) +*(.text.writeback_in_progress) +*(.text.vfs_follow_link) +*(.text.tcp_rcv_state_process) +*(.text.tcp_check_space) +*(.text.sys_stat) +*(.text.sys_rt_sigreturn) +*(.text.sys_rt_sigaction) +*(.text.sys_remap_file_pages) +*(.text.sys_pwrite64) +*(.text.sys_fchownat) +*(.text.sys_fchmodat) +*(.text.strncat) +*(.text.strlcat) +*(.text.strcmp) +*(.text.steal_locks) +*(.text.sock_create) +*(.text.sk_stream_rfree) +*(.text.sk_stream_mem_schedule) +*(.text.skip_atoi) +*(.text.sk_alloc) +*(.text.show_stat) +*(.text.set_fs_pwd) +*(.text.set_binfmt) +*(.text.pty_unthrottle) +*(.text.proc_symlink) +*(.text.pipe_release) +*(.text.pageout) +*(.text.n_tty_write_wakeup) +*(.text.n_tty_ioctl) +*(.text.nr_free_zone_pages) +*(.text.migration_thread) +*(.text.mempool_free_slab) +*(.text.meminfo_read_proc) +*(.text.max_sane_readahead) +*(.text.lru_cache_add) +*(.text.kill_fasync) +*(.text.kernel_read) +*(.text.invalidate_mapping_pages) +*(.text.inode_has_buffers) +*(.text.init_once) +*(.text.inet_sendmsg) +*(.text.idedisk_issue_flush) +*(.text.generic_file_write) +*(.text.free_more_memory) +*(.text.__free_fdtable) +*(.text.filp_dtor) +*(.text.exit_sem) +*(.text.exit_itimers) +*(.text.error_interrupt) +*(.text.end_buffer_async_write) +*(.text.eligible_child) +*(.text.elf_map) +*(.text.dump_task_regs) +*(.text.dummy_task_setscheduler) +*(.text.dummy_socket_accept) +*(.text.dummy_file_free_security) +*(.text.__down_read) +*(.text.do_sock_read) +*(.text.do_sigaltstack) +*(.text.do_mremap) +*(.text.current_io_context) +*(.text.cpu_swap_callback) +*(.text.copy_vma) +*(.text.cap_bprm_set_security) +*(.text.blk_insert_request) +*(.text.bio_map_kern_endio) +*(.text.bio_hw_segments) +*(.text.bictcp_cong_avoid) +*(.text.add_interrupt_randomness) +*(.text.wait_for_completion) +*(.text.version_read_proc) +*(.text.unix_write_space) +*(.text.tty_ldisc_ref_wait) +*(.text.tty_ldisc_put) +*(.text.try_to_wake_up) +*(.text.tcp_v4_tw_remember_stamp) +*(.text.tcp_try_undo_dsack) +*(.text.tcp_may_send_now) +*(.text.sys_waitid) +*(.text.sys_sched_getparam) +*(.text.sys_getppid) +*(.text.sys_getcwd) +*(.text.sys_dup2) +*(.text.sys_chmod) +*(.text.sys_chdir) +*(.text.sprintf) +*(.text.sock_wfree) +*(.text.sock_aio_write) +*(.text.skb_drop_fraglist) +*(.text.skb_dequeue) +*(.text.set_close_on_exec) +*(.text.set_brk) +*(.text.seq_puts) +*(.text.SELECT_DRIVE) +*(.text.sched_exec) +*(.text.return_EIO) +*(.text.remove_from_page_cache) +*(.text.rcu_start_batch) +*(.text.__put_task_struct) +*(.text.proc_pid_readdir) +*(.text.proc_get_inode) +*(.text.prepare_to_wait_exclusive) +*(.text.pipe_wait) +*(.text.pipe_new) +*(.text.pdflush_operation) +*(.text.__pagevec_release) +*(.text.pagevec_lookup_tag) +*(.text.packet_rcv) +*(.text.n_tty_set_room) +*(.text.nr_free_pages) +*(.text.__net_timestamp) +*(.text.mpage_end_io_read) +*(.text.mod_timer) +*(.text.__memcpy) +*(.text.mb_cache_shrink_fn) +*(.text.lock_rename) +*(.text.kstrdup) +*(.text.is_ignored) +*(.text.int_very_careful) +*(.text.inotify_inode_is_dead) +*(.text.inotify_get_cookie) +*(.text.inode_get_bytes) +*(.text.init_timer) +*(.text.init_dev) +*(.text.inet_getname) +*(.text.ide_map_sg) +*(.text.__ide_dma_end) +*(.text.hrtimer_get_remaining) +*(.text.get_task_mm) +*(.text.get_random_int) +*(.text.free_pipe_info) +*(.text.filemap_write_and_wait_range) +*(.text.exit_thread) +*(.text.enter_idle) +*(.text.end_that_request_first) +*(.text.end_8259A_irq) +*(.text.dummy_file_alloc_security) +*(.text.do_group_exit) +*(.text.debug_mutex_init) +*(.text.cpuset_exit) +*(.text.cpu_idle) +*(.text.copy_semundo) +*(.text.copy_files) +*(.text.chrdev_open) +*(.text.cdrom_transfer_packet_command) +*(.text.cdrom_mode_sense) +*(.text.blk_phys_contig_segment) +*(.text.blk_get_queue) +*(.text.bio_split) +*(.text.audit_alloc) +*(.text.anon_pipe_buf_release) +*(.text.add_wait_queue_exclusive) +*(.text.add_wait_queue) +*(.text.acct_process) +*(.text.account) +*(.text.zeromap_page_range) +*(.text.yield) +*(.text.writeback_acquire) +*(.text.worker_thread) +*(.text.wait_on_page_writeback_range) +*(.text.__wait_on_buffer) +*(.text.vscnprintf) +*(.text.vmalloc_to_pfn) +*(.text.vgacon_save_screen) +*(.text.vfs_unlink) +*(.text.vfs_rmdir) +*(.text.unregister_md_personality) +*(.text.unlock_new_inode) +*(.text.unix_stream_sendmsg) +*(.text.unix_stream_recvmsg) +*(.text.unhash_process) +*(.text.udp_v4_lookup_longway) +*(.text.tty_ldisc_flush) +*(.text.tty_ldisc_enable) +*(.text.tty_hung_up_p) +*(.text.tty_buffer_free_all) +*(.text.tso_fragment) +*(.text.try_to_del_timer_sync) +*(.text.tcp_v4_err) +*(.text.tcp_unhash) +*(.text.tcp_seq_next) +*(.text.tcp_select_initial_window) +*(.text.tcp_sacktag_write_queue) +*(.text.tcp_cwnd_validate) +*(.text.sys_vhangup) +*(.text.sys_uselib) +*(.text.sys_symlink) +*(.text.sys_signal) +*(.text.sys_poll) +*(.text.sys_mount) +*(.text.sys_kill) +*(.text.sys_ioctl) +*(.text.sys_inotify_add_watch) +*(.text.sys_getuid) +*(.text.sys_getrlimit) +*(.text.sys_getitimer) +*(.text.sys_getgroups) +*(.text.sys_ftruncate) +*(.text.sysfs_lookup) +*(.text.sys_exit_group) +*(.text.stub_fork) +*(.text.sscanf) +*(.text.sock_map_fd) +*(.text.sock_get_timestamp) +*(.text.__sock_create) +*(.text.smp_call_function_single) +*(.text.sk_stop_timer) +*(.text.skb_copy_and_csum_datagram) +*(.text.__skb_checksum_complete) +*(.text.single_next) +*(.text.sigqueue_alloc) +*(.text.shrink_dcache_parent) +*(.text.select_idle_routine) +*(.text.run_workqueue) +*(.text.run_local_timers) +*(.text.remove_inode_hash) +*(.text.remove_dquot_ref) +*(.text.register_binfmt) +*(.text.read_cache_pages) +*(.text.rb_last) +*(.text.pty_open) +*(.text.proc_root_readdir) +*(.text.proc_pid_flush) +*(.text.proc_pident_lookup) +*(.text.proc_fill_super) +*(.text.proc_exe_link) +*(.text.posix_locks_deadlock) +*(.text.pipe_iov_copy_from_user) +*(.text.opost) +*(.text.nf_register_hook) +*(.text.netif_rx_ni) +*(.text.m_start) +*(.text.mpage_writepage) +*(.text.mm_alloc) +*(.text.memory_open) +*(.text.mark_buffer_async_write) +*(.text.lru_add_drain_all) +*(.text.locks_init_lock) +*(.text.locks_delete_lock) +*(.text.lock_hrtimer_base) +*(.text.load_script) +*(.text.__kill_fasync) +*(.text.ip_mc_sf_allow) +*(.text.__ioremap) +*(.text.int_with_check) +*(.text.int_sqrt) +*(.text.install_thread_keyring) +*(.text.init_page_buffers) +*(.text.inet_sock_destruct) +*(.text.idle_notifier_register) +*(.text.ide_execute_command) +*(.text.ide_end_drive_cmd) +*(.text.__ide_dma_host_on) +*(.text.hrtimer_run_queues) +*(.text.hpet_mask_rtc_irq_bit) +*(.text.__get_zone_counts) +*(.text.get_zone_counts) +*(.text.get_write_access) +*(.text.get_fs_struct) +*(.text.get_dirty_limits) +*(.text.generic_readlink) +*(.text.free_hot_page) +*(.text.finish_wait) +*(.text.find_inode) +*(.text.find_first_bit) +*(.text.__filemap_fdatawrite_range) +*(.text.__filemap_copy_from_user_iovec) +*(.text.exit_aio) +*(.text.elv_set_request) +*(.text.elv_former_request) +*(.text.dup_namespace) +*(.text.dupfd) +*(.text.dummy_socket_getsockopt) +*(.text.dummy_sb_post_mountroot) +*(.text.dummy_quotactl) +*(.text.dummy_inode_rename) +*(.text.__do_SAK) +*(.text.do_pipe) +*(.text.do_fsync) +*(.text.d_instantiate_unique) +*(.text.d_find_alias) +*(.text.deny_write_access) +*(.text.dentry_unhash) +*(.text.d_delete) +*(.text.datagram_poll) +*(.text.cpuset_fork) +*(.text.cpuid_read) +*(.text.copy_namespace) +*(.text.cond_resched) +*(.text.check_version) +*(.text.__change_page_attr) +*(.text.cfq_slab_kill) +*(.text.cfq_completed_request) +*(.text.cdrom_pc_intr) +*(.text.cdrom_decode_status) +*(.text.cap_capset_check) +*(.text.blk_put_request) +*(.text.bio_fs_destructor) +*(.text.bictcp_min_cwnd) +*(.text.alloc_chrdev_region) +*(.text.add_element) +*(.text.acct_update_integrals) +*(.text.write_boundary_block) +*(.text.writeback_release) +*(.text.writeback_inodes) +*(.text.wake_up_state) +*(.text.__wake_up_locked) +*(.text.wake_futex) +*(.text.wait_task_inactive) +*(.text.__wait_on_freeing_inode) +*(.text.wait_noreap_copyout) +*(.text.vmstat_start) +*(.text.vgacon_do_font_op) +*(.text.vfs_readv) +*(.text.vfs_quota_sync) +*(.text.update_queue) +*(.text.unshare_files) +*(.text.unmap_vm_area) +*(.text.unix_socketpair) +*(.text.unix_release_sock) +*(.text.unix_detach_fds) +*(.text.unix_create1) +*(.text.unix_bind) +*(.text.udp_sendmsg) +*(.text.udp_rcv) +*(.text.udp_queue_rcv_skb) +*(.text.uart_write) +*(.text.uart_startup) +*(.text.uart_open) +*(.text.tty_vhangup) +*(.text.tty_termios_baud_rate) +*(.text.tty_release) +*(.text.tty_ldisc_ref) +*(.text.throttle_vm_writeout) +*(.text.058) +*(.text.tcp_xmit_probe_skb) +*(.text.tcp_v4_send_check) +*(.text.tcp_v4_destroy_sock) +*(.text.tcp_sync_mss) +*(.text.tcp_snd_test) +*(.text.tcp_slow_start) +*(.text.tcp_send_fin) +*(.text.tcp_rtt_estimator) +*(.text.tcp_parse_options) +*(.text.tcp_ioctl) +*(.text.tcp_init_tso_segs) +*(.text.tcp_init_cwnd) +*(.text.tcp_getsockopt) +*(.text.tcp_fin) +*(.text.tcp_connect) +*(.text.tcp_cong_avoid) +*(.text.__tcp_checksum_complete_user) +*(.text.task_dumpable) +*(.text.sys_wait4) +*(.text.sys_utimes) +*(.text.sys_symlinkat) +*(.text.sys_socketpair) +*(.text.sys_rmdir) +*(.text.sys_readahead) +*(.text.sys_nanosleep) +*(.text.sys_linkat) +*(.text.sys_fstat) +*(.text.sysfs_readdir) +*(.text.sys_execve) +*(.text.sysenter_tracesys) +*(.text.sys_chown) +*(.text.stub_clone) +*(.text.strrchr) +*(.text.strncpy) +*(.text.stopmachine_set_state) +*(.text.sock_sendmsg) +*(.text.sock_release) +*(.text.sock_fasync) +*(.text.sock_close) +*(.text.sk_stream_write_space) +*(.text.sk_reset_timer) +*(.text.skb_split) +*(.text.skb_recv_datagram) +*(.text.skb_queue_tail) +*(.text.sk_attach_filter) +*(.text.si_swapinfo) +*(.text.simple_strtoll) +*(.text.set_termios) +*(.text.set_task_comm) +*(.text.set_shrinker) +*(.text.set_normalized_timespec) +*(.text.set_brk) +*(.text.serial_in) +*(.text.seq_printf) +*(.text.secure_dccp_sequence_number) +*(.text.rwlock_bug) +*(.text.rt_hash_code) +*(.text.__rta_fill) +*(.text.__request_resource) +*(.text.relocate_new_kernel) +*(.text.release_thread) +*(.text.release_mem) +*(.text.rb_prev) +*(.text.rb_first) +*(.text.random_poll) +*(.text.__put_super_and_need_restart) +*(.text.pty_write) +*(.text.ptrace_stop) +*(.text.proc_self_readlink) +*(.text.proc_root_lookup) +*(.text.proc_root_link) +*(.text.proc_pid_make_inode) +*(.text.proc_pid_attr_write) +*(.text.proc_lookupfd) +*(.text.proc_delete_inode) +*(.text.posix_same_owner) +*(.text.posix_block_lock) +*(.text.poll_initwait) +*(.text.pipe_write) +*(.text.pipe_read_fasync) +*(.text.pipe_ioctl) +*(.text.pdflush) +*(.text.pci_user_read_config_dword) +*(.text.page_readlink) +*(.text.null_lseek) +*(.text.nf_hook_slow) +*(.text.netlink_sock_destruct) +*(.text.netlink_broadcast) +*(.text.neigh_resolve_output) +*(.text.name_to_int) +*(.text.mwait_idle) +*(.text.mutex_trylock) +*(.text.mutex_debug_check_no_locks_held) +*(.text.m_stop) +*(.text.mpage_end_io_write) +*(.text.mpage_alloc) +*(.text.move_page_tables) +*(.text.mounts_open) +*(.text.__memset) +*(.text.memcpy_fromiovec) +*(.text.make_8259A_irq) +*(.text.lookup_user_key_possessed) +*(.text.lookup_create) +*(.text.locks_insert_lock) +*(.text.locks_alloc_lock) +*(.text.kthread_should_stop) +*(.text.kswapd) +*(.text.kobject_uevent) +*(.text.kobject_get_path) +*(.text.kobject_get) +*(.text.klist_children_put) +*(.text.__ip_route_output_key) +*(.text.ip_flush_pending_frames) +*(.text.ip_compute_csum) +*(.text.ip_append_data) +*(.text.ioc_set_batching) +*(.text.invalidate_inode_pages) +*(.text.__invalidate_device) +*(.text.install_arg_page) +*(.text.in_sched_functions) +*(.text.inotify_unmount_inodes) +*(.text.init_once) +*(.text.init_cdrom_command) +*(.text.inet_stream_connect) +*(.text.inet_sk_rebuild_header) +*(.text.inet_csk_addr2sockaddr) +*(.text.inet_create) +*(.text.ifind) +*(.text.ide_setup_dma) +*(.text.ide_outsw) +*(.text.ide_fixstring) +*(.text.ide_dma_setup) +*(.text.ide_cdrom_packet) +*(.text.ide_cd_put) +*(.text.ide_build_sglist) +*(.text.i8259A_shutdown) +*(.text.hung_up_tty_ioctl) +*(.text.hrtimer_nanosleep) +*(.text.hrtimer_init) +*(.text.hrtimer_cancel) +*(.text.hash_futex) +*(.text.group_send_sig_info) +*(.text.grab_cache_page_nowait) +*(.text.get_wchan) +*(.text.get_stack) +*(.text.get_page_state) +*(.text.getnstimeofday) +*(.text.get_node) +*(.text.get_kprobe) +*(.text.generic_unplug_device) +*(.text.free_task) +*(.text.frag_show) +*(.text.find_next_zero_string) +*(.text.filp_open) +*(.text.fillonedir) +*(.text.exit_io_context) +*(.text.exit_idle) +*(.text.exact_lock) +*(.text.eth_header) +*(.text.dummy_unregister_security) +*(.text.dummy_socket_post_create) +*(.text.dummy_socket_listen) +*(.text.dummy_quota_on) +*(.text.dummy_inode_follow_link) +*(.text.dummy_file_receive) +*(.text.dummy_file_mprotect) +*(.text.dummy_file_lock) +*(.text.dummy_file_ioctl) +*(.text.dummy_bprm_post_apply_creds) +*(.text.do_writepages) +*(.text.__down_interruptible) +*(.text.do_notify_resume) +*(.text.do_acct_process) +*(.text.del_timer_sync) +*(.text.default_rebuild_header) +*(.text.d_callback) +*(.text.dcache_readdir) +*(.text.ctrl_dumpfamily) +*(.text.cpuset_rmdir) +*(.text.copy_strings_kernel) +*(.text.con_write_room) +*(.text.complete_all) +*(.text.collect_sigign_sigcatch) +*(.text.clear_user) +*(.text.check_unthrottle) +*(.text.cdrom_release) +*(.text.cdrom_newpc_intr) +*(.text.cdrom_ioctl) +*(.text.cdrom_check_status) +*(.text.cdev_put) +*(.text.cdev_add) +*(.text.cap_ptrace) +*(.text.cap_bprm_secureexec) +*(.text.cache_alloc_refill) +*(.text.bmap) +*(.text.blk_run_queue) +*(.text.blk_queue_dma_alignment) +*(.text.blk_ordered_req_seq) +*(.text.blk_backing_dev_unplug) +*(.text.__bitmap_subset) +*(.text.__bitmap_and) +*(.text.bio_unmap_user) +*(.text.__bforget) +*(.text.bd_forget) +*(.text.bad_pipe_w) +*(.text.bad_get_user) +*(.text.audit_free) +*(.text.anon_vma_ctor) +*(.text.anon_pipe_buf_map) +*(.text.alloc_sock_iocb) +*(.text.alloc_fdset) +*(.text.aio_kick_handler) +*(.text.__add_entropy_words) +*(.text.add_disk_randomness) diff --git a/arch/x86_64/kernel/head.S b/arch/x86_64/kernel/head.S index 02fc7fa..6df05e6 100644 --- a/arch/x86_64/kernel/head.S +++ b/arch/x86_64/kernel/head.S @@ -26,6 +26,7 @@ */ .text + .section .bootstrap.text .code32 .globl startup_32 /* %bx: 1 if coming from smp trampoline on secondary cpu */ @@ -192,7 +193,8 @@ startup_64: movq initial_code(%rip),%rax jmp *%rax - /* SMP bootup changes these two */ + /* SMP bootup changes these two */ + .align 8 .globl initial_code initial_code: .quad x86_64_start_kernel @@ -237,7 +239,7 @@ ENTRY(no_long_mode) .org 0xf00 .globl pGDT32 pGDT32: - .word gdt_end-cpu_gdt_table + .word gdt_end-cpu_gdt_table-1 .long cpu_gdt_table-__START_KERNEL_map .org 0xf10 @@ -293,8 +295,6 @@ NEXT_PAGE(level2_kernel_pgt) /* Module mapping starts here */ .fill 492,8,0 -NEXT_PAGE(empty_zero_page) - NEXT_PAGE(level3_physmem_pgt) .quad phys_level2_kernel_pgt | 0x007 /* so that __va works even before pagetable_init */ .fill 511,8,0 @@ -337,7 +337,7 @@ ENTRY(boot_level4_pgt) .align 16 .globl cpu_gdt_descr cpu_gdt_descr: - .word gdt_end-cpu_gdt_table + .word gdt_end-cpu_gdt_table-1 gdt: .quad cpu_gdt_table #ifdef CONFIG_SMP @@ -352,7 +352,8 @@ gdt: * Also sysret mandates a special GDT layout */ -.align PAGE_SIZE + .section .data.page_aligned, "aw" + .align PAGE_SIZE /* The TLS descriptors are currently at a different place compared to i386. Hopefully nobody expects them at a fixed place (Wine?) */ @@ -378,9 +379,12 @@ gdt_end: /* zero the remaining page */ .fill PAGE_SIZE / 8 - GDT_ENTRIES,8,0 -ENTRY(idt_table) - .rept 256 - .quad 0 - .quad 0 - .endr + .section .bss, "aw", @nobits + .align L1_CACHE_BYTES +ENTRY(idt_table) + .skip 256 * 16 + .section .bss.page_aligned, "aw", @nobits + .align PAGE_SIZE +ENTRY(empty_zero_page) + .skip PAGE_SIZE diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c index ffed464..77b4c60 100644 --- a/arch/x86_64/kernel/io_apic.c +++ b/arch/x86_64/kernel/io_apic.c @@ -50,7 +50,7 @@ static int no_timer_check; int disable_timer_pin_1 __initdata; -int timer_over_8254 __initdata = 1; +int timer_over_8254 __initdata = 0; /* Where if anywhere is the i8259 connect in external int mode */ static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; @@ -310,7 +310,7 @@ void __init check_ioapic(void) force_iommu) && !iommu_aperture_allowed) { printk(KERN_INFO - "Looks like a VIA chipset. Disabling IOMMU. Overwrite with \"iommu=allowed\"\n"); + "Looks like a VIA chipset. Disabling IOMMU. Override with \"iommu=allowed\"\n"); iommu_aperture_disabled = 1; } #endif @@ -1848,7 +1848,7 @@ static inline void check_timer(void) */ setup_ExtINT_IRQ0_pin(apic2, pin2, vector); if (timer_irq_works()) { - printk("works.\n"); + apic_printk(APIC_VERBOSE," works.\n"); nmi_watchdog_default(); if (nmi_watchdog == NMI_IO_APIC) { setup_nmi(); @@ -1860,7 +1860,7 @@ static inline void check_timer(void) */ clear_IO_APIC_pin(apic2, pin2); } - printk(" failed.\n"); + apic_printk(APIC_VERBOSE," failed.\n"); if (nmi_watchdog == NMI_IO_APIC) { printk(KERN_WARNING "timer doesn't work through the IO-APIC - disabling NMI Watchdog!\n"); @@ -1875,7 +1875,7 @@ static inline void check_timer(void) enable_8259A_irq(0); if (timer_irq_works()) { - apic_printk(APIC_QUIET, " works.\n"); + apic_printk(APIC_VERBOSE," works.\n"); return; } apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector); diff --git a/arch/x86_64/kernel/kprobes.c b/arch/x86_64/kernel/kprobes.c index 14f0ced..accbff3 100644 --- a/arch/x86_64/kernel/kprobes.c +++ b/arch/x86_64/kernel/kprobes.c @@ -37,10 +37,12 @@ #include <linux/string.h> #include <linux/slab.h> #include <linux/preempt.h> +#include <linux/module.h> #include <asm/cacheflush.h> #include <asm/pgtable.h> #include <asm/kdebug.h> +#include <asm/uaccess.h> void jprobe_return_end(void); static void __kprobes arch_copy_kprobe(struct kprobe *p); @@ -578,16 +580,62 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) { struct kprobe *cur = kprobe_running(); struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + const struct exception_table_entry *fixup; - if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr)) - return 1; - - if (kcb->kprobe_status & KPROBE_HIT_SS) { - resume_execution(cur, regs, kcb); + switch(kcb->kprobe_status) { + case KPROBE_HIT_SS: + case KPROBE_REENTER: + /* + * We are here because the instruction being single + * stepped caused a page fault. We reset the current + * kprobe and the rip points back to the probe address + * and allow the page fault handler to continue as a + * normal page fault. + */ + regs->rip = (unsigned long)cur->addr; regs->eflags |= kcb->kprobe_old_rflags; - - reset_current_kprobe(); + if (kcb->kprobe_status == KPROBE_REENTER) + restore_previous_kprobe(kcb); + else + reset_current_kprobe(); preempt_enable_no_resched(); + break; + case KPROBE_HIT_ACTIVE: + case KPROBE_HIT_SSDONE: + /* + * We increment the nmissed count for accounting, + * we can also use npre/npostfault count for accouting + * these specific fault cases. + */ + kprobes_inc_nmissed_count(cur); + + /* + * We come here because instructions in the pre/post + * handler caused the page_fault, this could happen + * if handler tries to access user space by + * copy_from_user(), get_user() etc. Let the + * user-specified handler try to fix it first. + */ + if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr)) + return 1; + + /* + * In case the user-specified fault handler returned + * zero, try to fix up. + */ + fixup = search_exception_tables(regs->rip); + if (fixup) { + regs->rip = fixup->fixup; + return 1; + } + + /* + * fixup() could not handle it, + * Let do_page_fault() fix it. + */ + break; + default: + break; } return 0; } @@ -601,6 +649,9 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self, struct die_args *args = (struct die_args *)data; int ret = NOTIFY_DONE; + if (args->regs && user_mode(args->regs)) + return ret; + switch (val) { case DIE_INT3: if (kprobe_handler(args->regs)) diff --git a/arch/x86_64/kernel/mpparse.c b/arch/x86_64/kernel/mpparse.c index 9013a90..b17cf3e 100644 --- a/arch/x86_64/kernel/mpparse.c +++ b/arch/x86_64/kernel/mpparse.c @@ -106,11 +106,11 @@ static int __init mpf_checksum(unsigned char *mp, int len) return sum & 0xFF; } -static void __init MP_processor_info (struct mpc_config_processor *m) +static void __cpuinit MP_processor_info (struct mpc_config_processor *m) { int cpu; unsigned char ver; - static int found_bsp=0; + cpumask_t tmp_map; if (!(m->mpc_cpuflag & CPU_ENABLED)) { disabled_cpus++; @@ -133,8 +133,10 @@ static void __init MP_processor_info (struct mpc_config_processor *m) return; } - cpu = num_processors++; - + num_processors++; + cpus_complement(tmp_map, cpu_present_map); + cpu = first_cpu(tmp_map); + #if MAX_APICS < 255 if ((int)m->mpc_apicid > MAX_APICS) { printk(KERN_ERR "Processor #%d INVALID. (Max ID: %d).\n", @@ -160,12 +162,7 @@ static void __init MP_processor_info (struct mpc_config_processor *m) * entry is BSP, and so on. */ cpu = 0; - - bios_cpu_apicid[0] = m->mpc_apicid; - x86_cpu_to_apicid[0] = m->mpc_apicid; - found_bsp = 1; - } else - cpu = num_processors - found_bsp; + } bios_cpu_apicid[cpu] = m->mpc_apicid; x86_cpu_to_apicid[cpu] = m->mpc_apicid; @@ -691,7 +688,7 @@ void __init mp_register_lapic_address ( } -void __init mp_register_lapic ( +void __cpuinit mp_register_lapic ( u8 id, u8 enabled) { diff --git a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c index 66c009e..d9e4067 100644 --- a/arch/x86_64/kernel/nmi.c +++ b/arch/x86_64/kernel/nmi.c @@ -534,6 +534,7 @@ asmlinkage __kprobes void do_nmi(struct pt_regs * regs, long error_code) void set_nmi_callback(nmi_callback_t callback) { + vmalloc_sync_all(); rcu_assign_pointer(nmi_callback, callback); } diff --git a/arch/x86_64/kernel/pci-dma.c b/arch/x86_64/kernel/pci-dma.c index 4ed391e..03c9eee 100644 --- a/arch/x86_64/kernel/pci-dma.c +++ b/arch/x86_64/kernel/pci-dma.c @@ -73,6 +73,9 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, if (dma_mask == 0) dma_mask = 0xffffffff; + /* Don't invoke OOM killer */ + gfp |= __GFP_NORETRY; + /* Kludge to make it bug-to-bug compatible with i386. i386 uses the normal dma_mask for alloc_coherent. */ dma_mask &= *dev->dma_mask; diff --git a/arch/x86_64/kernel/pci-gart.c b/arch/x86_64/kernel/pci-gart.c index 0c3f052..a6c01e1 100644 --- a/arch/x86_64/kernel/pci-gart.c +++ b/arch/x86_64/kernel/pci-gart.c @@ -65,9 +65,7 @@ static u32 gart_unmapped_entry; #define for_all_nb(dev) \ dev = NULL; \ - while ((dev = pci_get_device(PCI_VENDOR_ID_AMD, 0x1103, dev))!=NULL)\ - if (dev->bus->number == 0 && \ - (PCI_SLOT(dev->devfn) >= 24) && (PCI_SLOT(dev->devfn) <= 31)) + while ((dev = pci_get_device(PCI_VENDOR_ID_AMD, 0x1103, dev))!=NULL) static struct pci_dev *northbridges[MAX_NB]; static u32 northbridge_flush_word[MAX_NB]; @@ -148,9 +146,12 @@ static void flush_gart(struct device *dev) if (!northbridges[i]) continue; /* Make sure the hardware actually executed the flush. */ - do { + for (;;) { pci_read_config_dword(northbridges[i], 0x9c, &w); - } while (w & 1); + if (!(w & 1)) + break; + cpu_relax(); + } } if (!flushed) printk("nothing to flush?\n"); diff --git a/arch/x86_64/kernel/pmtimer.c b/arch/x86_64/kernel/pmtimer.c index 5c51d10..ee5ee48 100644 --- a/arch/x86_64/kernel/pmtimer.c +++ b/arch/x86_64/kernel/pmtimer.c @@ -86,7 +86,7 @@ static unsigned pmtimer_wait_tick(void) for (a = b = inl(pmtmr_ioport) & ACPI_PM_MASK; a == b; b = inl(pmtmr_ioport) & ACPI_PM_MASK) - ; + cpu_relax(); return b; } @@ -97,6 +97,7 @@ void pmtimer_wait(unsigned us) a = pmtimer_wait_tick(); do { b = inl(pmtmr_ioport); + cpu_relax(); } while (cyc2us(b - a) < us); } diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c index 80a8f30..70dd8e5 100644 --- a/arch/x86_64/kernel/process.c +++ b/arch/x86_64/kernel/process.c @@ -35,8 +35,8 @@ #include <linux/ptrace.h> #include <linux/utsname.h> #include <linux/random.h> -#include <linux/kprobes.h> #include <linux/notifier.h> +#include <linux/kprobes.h> #include <asm/uaccess.h> #include <asm/pgtable.h> @@ -66,24 +66,17 @@ EXPORT_SYMBOL(boot_option_idle_override); void (*pm_idle)(void); static DEFINE_PER_CPU(unsigned int, cpu_idle_state); -static struct notifier_block *idle_notifier; -static DEFINE_SPINLOCK(idle_notifier_lock); +static ATOMIC_NOTIFIER_HEAD(idle_notifier); void idle_notifier_register(struct notifier_block *n) { - unsigned long flags; - spin_lock_irqsave(&idle_notifier_lock, flags); - notifier_chain_register(&idle_notifier, n); - spin_unlock_irqrestore(&idle_notifier_lock, flags); + atomic_notifier_chain_register(&idle_notifier, n); } EXPORT_SYMBOL_GPL(idle_notifier_register); void idle_notifier_unregister(struct notifier_block *n) { - unsigned long flags; - spin_lock_irqsave(&idle_notifier_lock, flags); - notifier_chain_unregister(&idle_notifier, n); - spin_unlock_irqrestore(&idle_notifier_lock, flags); + atomic_notifier_chain_unregister(&idle_notifier, n); } EXPORT_SYMBOL(idle_notifier_unregister); @@ -93,13 +86,13 @@ static DEFINE_PER_CPU(enum idle_state, idle_state) = CPU_NOT_IDLE; void enter_idle(void) { __get_cpu_var(idle_state) = CPU_IDLE; - notifier_call_chain(&idle_notifier, IDLE_START, NULL); + atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL); } static void __exit_idle(void) { __get_cpu_var(idle_state) = CPU_NOT_IDLE; - notifier_call_chain(&idle_notifier, IDLE_END, NULL); + atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL); } /* Called from interrupts to signify idle end */ @@ -353,13 +346,6 @@ void exit_thread(void) struct task_struct *me = current; struct thread_struct *t = &me->thread; - /* - * Remove function-return probe instances associated with this task - * and put them back on the free list. Do not insert an exit probe for - * this function, it will be disabled by kprobe_flush_task if you do. - */ - kprobe_flush_task(me); - if (me->thread.io_bitmap_ptr) { struct tss_struct *tss = &per_cpu(init_tss, get_cpu()); @@ -508,7 +494,7 @@ out: /* * This special macro can be used to load a debugging register */ -#define loaddebug(thread,r) set_debug(thread->debugreg ## r, r) +#define loaddebug(thread,r) set_debugreg(thread->debugreg ## r, r) /* * switch_to(x,y) should switch tasks from x to y. @@ -527,8 +513,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) int cpu = smp_processor_id(); struct tss_struct *tss = &per_cpu(init_tss, cpu); - unlazy_fpu(prev_p); - /* * Reload esp0, LDT and the page table pointer: */ @@ -586,11 +570,14 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) } /* - * Switch the PDA context. + * Switch the PDA and FPU contexts. */ prev->userrsp = read_pda(oldrsp); write_pda(oldrsp, next->userrsp); write_pda(pcurrent, next_p); + /* This must be here to ensure both math_state_restore() and + kernel_fpu_begin() work consistently. */ + unlazy_fpu(prev_p); write_pda(kernelstack, task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET); diff --git a/arch/x86_64/kernel/ptrace.c b/arch/x86_64/kernel/ptrace.c index 5320562..d44b2c1 100644 --- a/arch/x86_64/kernel/ptrace.c +++ b/arch/x86_64/kernel/ptrace.c @@ -420,9 +420,9 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) case offsetof(struct user, u_debugreg[7]): /* See arch/i386/kernel/ptrace.c for an explanation of * this awkward check.*/ - data &= ~DR_CONTROL_RESERVED; - for(i=0; i<4; i++) - if ((0x5454 >> ((data >> (16 + 4*i)) & 0xf)) & 1) + data &= ~DR_CONTROL_RESERVED; + for(i=0; i<4; i++) + if ((0x5554 >> ((data >> (16 + 4*i)) & 0xf)) & 1) break; if (i == 4) { child->thread.debugreg7 = data; diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index f227d0c..d1f3e92 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c @@ -46,6 +46,7 @@ #include <linux/cpufreq.h> #include <linux/dmi.h> #include <linux/dma-mapping.h> +#include <linux/ctype.h> #include <asm/mtrr.h> #include <asm/uaccess.h> @@ -67,6 +68,7 @@ #include <asm/swiotlb.h> #include <asm/sections.h> #include <asm/gart-mapping.h> +#include <asm/dmi.h> /* * Machine setup.. @@ -91,6 +93,12 @@ int bootloader_type; unsigned long saved_video_mode; +/* + * Early DMI memory + */ +int dmi_alloc_index; +char dmi_alloc_data[DMI_MAX_DATA]; + /* * Setup options */ @@ -270,6 +278,13 @@ static void __init probe_roms(void) } } +/* Check for full argument with no trailing characters */ +static int fullarg(char *p, char *arg) +{ + int l = strlen(arg); + return !memcmp(p, arg, l) && (p[l] == 0 || isspace(p[l])); +} + static __init void parse_cmdline_early (char ** cmdline_p) { char c = ' ', *to = command_line, *from = COMMAND_LINE; @@ -293,10 +308,10 @@ static __init void parse_cmdline_early (char ** cmdline_p) #endif #ifdef CONFIG_ACPI /* "acpi=off" disables both ACPI table parsing and interpreter init */ - if (!memcmp(from, "acpi=off", 8)) + if (fullarg(from,"acpi=off")) disable_acpi(); - if (!memcmp(from, "acpi=force", 10)) { + if (fullarg(from, "acpi=force")) { /* add later when we do DMI horrors: */ acpi_force = 1; acpi_disabled = 0; @@ -304,52 +319,47 @@ static __init void parse_cmdline_early (char ** cmdline_p) /* acpi=ht just means: do ACPI MADT parsing at bootup, but don't enable the full ACPI interpreter */ - if (!memcmp(from, "acpi=ht", 7)) { + if (fullarg(from, "acpi=ht")) { if (!acpi_force) disable_acpi(); acpi_ht = 1; } - else if (!memcmp(from, "pci=noacpi", 10)) + else if (fullarg(from, "pci=noacpi")) acpi_disable_pci(); - else if (!memcmp(from, "acpi=noirq", 10)) + else if (fullarg(from, "acpi=noirq")) acpi_noirq_set(); - else if (!memcmp(from, "acpi_sci=edge", 13)) + else if (fullarg(from, "acpi_sci=edge")) acpi_sci_flags.trigger = 1; - else if (!memcmp(from, "acpi_sci=level", 14)) + else if (fullarg(from, "acpi_sci=level")) acpi_sci_flags.trigger = 3; - else if (!memcmp(from, "acpi_sci=high", 13)) + else if (fullarg(from, "acpi_sci=high")) acpi_sci_flags.polarity = 1; - else if (!memcmp(from, "acpi_sci=low", 12)) + else if (fullarg(from, "acpi_sci=low")) acpi_sci_flags.polarity = 3; /* acpi=strict disables out-of-spec workarounds */ - else if (!memcmp(from, "acpi=strict", 11)) { + else if (fullarg(from, "acpi=strict")) { acpi_strict = 1; } #ifdef CONFIG_X86_IO_APIC - else if (!memcmp(from, "acpi_skip_timer_override", 24)) + else if (fullarg(from, "acpi_skip_timer_override")) acpi_skip_timer_override = 1; #endif #endif - if (!memcmp(from, "disable_timer_pin_1", 19)) + if (fullarg(from, "disable_timer_pin_1")) disable_timer_pin_1 = 1; - if (!memcmp(from, "enable_timer_pin_1", 18)) + if (fullarg(from, "enable_timer_pin_1")) disable_timer_pin_1 = -1; - if (!memcmp(from, "nolapic", 7) || - !memcmp(from, "disableapic", 11)) + if (fullarg(from, "nolapic") || fullarg(from, "disableapic")) disable_apic = 1; - /* Don't confuse with noapictimer */ - if (!memcmp(from, "noapic", 6) && - (from[6] == ' ' || from[6] == 0)) + if (fullarg(from, "noapic")) skip_ioapic_setup = 1; - /* Make sure to not confuse with apic= */ - if (!memcmp(from, "apic", 4) && - (from[4] == ' ' || from[4] == 0)) { + if (fullarg(from,"apic")) { skip_ioapic_setup = 0; ioapic_force = 1; } @@ -388,7 +398,7 @@ static __init void parse_cmdline_early (char ** cmdline_p) iommu_setup(from+6); } - if (!memcmp(from,"oops=panic", 10)) + if (fullarg(from,"oops=panic")) panic_on_oops = 1; if (!memcmp(from, "noexec=", 7)) @@ -611,11 +621,14 @@ void __init setup_arch(char **cmdline_p) * we are rounding upwards: */ end_pfn = e820_end_of_ram(); + num_physpages = end_pfn; /* for pfn_valid */ check_efer(); init_memory_mapping(0, (end_pfn_map << PAGE_SHIFT)); + dmi_scan_machine(); + zap_low_mappings(0); #ifdef CONFIG_ACPI @@ -708,6 +721,12 @@ void __init setup_arch(char **cmdline_p) check_ioapic(); + /* + * set this early, so we dont allocate cpu0 + * if MADT list doesnt list BSP first + * mpparse.c/MP_processor_info() allocates logical cpu numbers. + */ + cpu_set(0, cpu_present_map); #ifdef CONFIG_ACPI /* * Read APIC and some other early information from ACPI tables. @@ -836,7 +855,7 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c) unsigned bits; #ifdef CONFIG_NUMA int node = 0; - unsigned apicid = phys_proc_id[cpu]; + unsigned apicid = hard_smp_processor_id(); #endif bits = 0; @@ -846,7 +865,7 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c) /* Low order bits define the core id (index of core in socket) */ cpu_core_id[cpu] = phys_proc_id[cpu] & ((1 << bits)-1); /* Convert the APIC ID into the socket ID */ - phys_proc_id[cpu] >>= bits; + phys_proc_id[cpu] = phys_pkg_id(bits); #ifdef CONFIG_NUMA node = phys_proc_id[cpu]; @@ -872,8 +891,8 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c) } numa_set_node(cpu, node); - printk(KERN_INFO "CPU %d(%d) -> Node %d -> Core %d\n", - cpu, c->x86_max_cores, node, cpu_core_id[cpu]); + printk(KERN_INFO "CPU %d/%x(%d) -> Node %d -> Core %d\n", + cpu, apicid, c->x86_max_cores, node, cpu_core_id[cpu]); #endif #endif } @@ -927,8 +946,6 @@ static int __init init_amd(struct cpuinfo_x86 *c) if (c->extended_cpuid_level >= 0x80000008) { c->x86_max_cores = (cpuid_ecx(0x80000008) & 0xff) + 1; - if (c->x86_max_cores & (c->x86_max_cores - 1)) - c->x86_max_cores = 1; amd_detect_cmp(c); } @@ -945,7 +962,6 @@ static void __cpuinit detect_ht(struct cpuinfo_x86 *c) cpuid(1, &eax, &ebx, &ecx, &edx); - c->apicid = phys_pkg_id(0); if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY)) return; @@ -1154,6 +1170,8 @@ void __cpuinit identify_cpu(struct cpuinfo_x86 *c) c->x86_capability[2] = cpuid_edx(0x80860001); } + c->apicid = phys_pkg_id(0); + /* * Vendor-specific initialization. In this section we * canonicalize the feature flags, meaning if there are @@ -1261,7 +1279,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* Intel-defined (#2) */ - "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", NULL, "est", + "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est", "tm2", NULL, "cid", NULL, NULL, "cx16", "xtpr", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, @@ -1402,10 +1420,3 @@ struct seq_operations cpuinfo_op = { .show = show_cpuinfo, }; -static int __init run_dmi_scan(void) -{ - dmi_scan_machine(); - return 0; -} -core_initcall(run_dmi_scan); - diff --git a/arch/x86_64/kernel/setup64.c b/arch/x86_64/kernel/setup64.c index 70f1bb8..eabdb63f 100644 --- a/arch/x86_64/kernel/setup64.c +++ b/arch/x86_64/kernel/setup64.c @@ -33,7 +33,7 @@ cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; struct x8664_pda *_cpu_pda[NR_CPUS] __read_mostly; struct x8664_pda boot_cpu_pda[NR_CPUS] __cacheline_aligned; -struct desc_ptr idt_descr = { 256 * 16, (unsigned long) idt_table }; +struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; char boot_cpu_stack[IRQSTACKSIZE] __attribute__((section(".bss.page_aligned"))); @@ -59,7 +59,7 @@ int __init nonx_setup(char *str) } __setup("noexec=", nonx_setup); /* parsed early actually */ -int force_personality32 = READ_IMPLIES_EXEC; +int force_personality32 = 0; /* noexec32=on|off Control non executable heap for 32bit processes. @@ -248,7 +248,7 @@ void __cpuinit cpu_init (void) switch (v + 1) { #if DEBUG_STKSZ > EXCEPTION_STKSZ case DEBUG_STACK: - cpu_pda[cpu].debugstack = (unsigned long)estacks; + cpu_pda(cpu)->debugstack = (unsigned long)estacks; estacks += DEBUG_STKSZ; break; #endif @@ -281,12 +281,12 @@ void __cpuinit cpu_init (void) * Clear all 6 debug registers: */ - set_debug(0UL, 0); - set_debug(0UL, 1); - set_debug(0UL, 2); - set_debug(0UL, 3); - set_debug(0UL, 6); - set_debug(0UL, 7); + set_debugreg(0UL, 0); + set_debugreg(0UL, 1); + set_debugreg(0UL, 2); + set_debugreg(0UL, 3); + set_debugreg(0UL, 6); + set_debugreg(0UL, 7); fpu_init(); } diff --git a/arch/x86_64/kernel/smp.c b/arch/x86_64/kernel/smp.c index 19ef012..4a6628b 100644 --- a/arch/x86_64/kernel/smp.c +++ b/arch/x86_64/kernel/smp.c @@ -75,7 +75,7 @@ static inline void leave_mm(int cpu) { if (read_pda(mmu_state) == TLBSTATE_OK) BUG(); - clear_bit(cpu, &read_pda(active_mm)->cpu_vm_mask); + cpu_clear(cpu, read_pda(active_mm)->cpu_vm_mask); load_cr3(swapper_pg_dir); } @@ -85,7 +85,7 @@ static inline void leave_mm(int cpu) * [cpu0: the cpu that switches] * 1) switch_mm() either 1a) or 1b) * 1a) thread switch to a different mm - * 1a1) clear_bit(cpu, &old_mm->cpu_vm_mask); + * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask); * Stop ipi delivery for the old mm. This is not synchronized with * the other cpus, but smp_invalidate_interrupt ignore flush ipis * for the wrong mm, and in the worst case we perform a superfluous @@ -95,7 +95,7 @@ static inline void leave_mm(int cpu) * was in lazy tlb mode. * 1a3) update cpu active_mm * Now cpu0 accepts tlb flushes for the new mm. - * 1a4) set_bit(cpu, &new_mm->cpu_vm_mask); + * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask); * Now the other cpus will send tlb flush ipis. * 1a4) change cr3. * 1b) thread switch without mm change diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c index 66e9865..ea48fa6 100644 --- a/arch/x86_64/kernel/smpboot.c +++ b/arch/x86_64/kernel/smpboot.c @@ -68,6 +68,9 @@ u8 phys_proc_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID }; /* core ID of each logical CPU */ u8 cpu_core_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID }; +/* Last level cache ID of each logical CPU */ +u8 cpu_llc_id[NR_CPUS] __cpuinitdata = {[0 ... NR_CPUS-1] = BAD_APICID}; + /* Bitmask of currently online CPUs */ cpumask_t cpu_online_map __read_mostly; @@ -445,6 +448,18 @@ void __cpuinit smp_callin(void) cpu_set(cpuid, cpu_callin_map); } +/* maps the cpu to the sched domain representing multi-core */ +cpumask_t cpu_coregroup_map(int cpu) +{ + struct cpuinfo_x86 *c = cpu_data + cpu; + /* + * For perf, we return last level cache shared map. + * TBD: when power saving sched policy is added, we will return + * cpu_core_map when power saving policy is enabled + */ + return c->llc_shared_map; +} + /* representing cpus for which sibling maps can be computed */ static cpumask_t cpu_sibling_setup_map; @@ -463,12 +478,16 @@ static inline void set_cpu_sibling_map(int cpu) cpu_set(cpu, cpu_sibling_map[i]); cpu_set(i, cpu_core_map[cpu]); cpu_set(cpu, cpu_core_map[i]); + cpu_set(i, c[cpu].llc_shared_map); + cpu_set(cpu, c[i].llc_shared_map); } } } else { cpu_set(cpu, cpu_sibling_map[cpu]); } + cpu_set(cpu, c[cpu].llc_shared_map); + if (current_cpu_data.x86_max_cores == 1) { cpu_core_map[cpu] = cpu_sibling_map[cpu]; c[cpu].booted_cores = 1; @@ -476,6 +495,11 @@ static inline void set_cpu_sibling_map(int cpu) } for_each_cpu_mask(i, cpu_sibling_setup_map) { + if (cpu_llc_id[cpu] != BAD_APICID && + cpu_llc_id[cpu] == cpu_llc_id[i]) { + cpu_set(i, c[cpu].llc_shared_map); + cpu_set(cpu, c[i].llc_shared_map); + } if (phys_proc_id[cpu] == phys_proc_id[i]) { cpu_set(i, cpu_core_map[cpu]); cpu_set(cpu, cpu_core_map[i]); diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c index ee5ce3d..473b514 100644 --- a/arch/x86_64/kernel/time.c +++ b/arch/x86_64/kernel/time.c @@ -88,7 +88,8 @@ static inline unsigned int do_gettimeoffset_tsc(void) unsigned long t; unsigned long x; t = get_cycles_sync(); - if (t < vxtime.last_tsc) t = vxtime.last_tsc; /* hack */ + if (t < vxtime.last_tsc) + t = vxtime.last_tsc; /* hack */ x = ((t - vxtime.last_tsc) * vxtime.tsc_quot) >> 32; return x; } @@ -178,8 +179,9 @@ unsigned long profile_pc(struct pt_regs *regs) { unsigned long pc = instruction_pointer(regs); - /* Assume the lock function has either no stack frame or only a single word. - This checks if the address on the stack looks like a kernel text address. + /* Assume the lock function has either no stack frame or only a single + word. This checks if the address on the stack looks like a kernel + text address. There is a small window for false hits, but in that case the tick is just accounted to the spinlock function. Better would be to write these functions in assembler again @@ -242,17 +244,10 @@ static void set_rtc_mmss(unsigned long nowtime) real_minutes += 30; /* correct for half hour time zone */ real_minutes %= 60; -#if 0 - /* AMD 8111 is a really bad time keeper and hits this regularly. - It probably was an attempt to avoid screwing up DST, but ignore - that for now. */ if (abs(real_minutes - cmos_minutes) >= 30) { printk(KERN_WARNING "time.c: can't update CMOS clock " "from %d to %d\n", cmos_minutes, real_minutes); - } else -#endif - - { + } else { BIN_TO_BCD(real_seconds); BIN_TO_BCD(real_minutes); CMOS_WRITE(real_seconds, RTC_SECONDS); @@ -293,8 +288,7 @@ unsigned long long monotonic_clock(void) this_offset = hpet_readl(HPET_COUNTER); } while (read_seqretry(&xtime_lock, seq)); offset = (this_offset - last_offset); - offset *=(NSEC_PER_SEC/HZ)/hpet_tick; - return base + offset; + offset *= (NSEC_PER_SEC/HZ) / hpet_tick; } else { do { seq = read_seqbegin(&xtime_lock); @@ -303,50 +297,46 @@ unsigned long long monotonic_clock(void) base = monotonic_base; } while (read_seqretry(&xtime_lock, seq)); this_offset = get_cycles_sync(); - offset = (this_offset - last_offset)*1000/cpu_khz; - return base + offset; + offset = (this_offset - last_offset)*1000 / cpu_khz; } + return base + offset; } EXPORT_SYMBOL(monotonic_clock); static noinline void handle_lost_ticks(int lost, struct pt_regs *regs) { - static long lost_count; - static int warned; - - if (report_lost_ticks) { - printk(KERN_WARNING "time.c: Lost %d timer " - "tick(s)! ", lost); - print_symbol("rip %s)\n", regs->rip); - } - - if (lost_count == 1000 && !warned) { - printk(KERN_WARNING - "warning: many lost ticks.\n" - KERN_WARNING "Your time source seems to be instable or " + static long lost_count; + static int warned; + if (report_lost_ticks) { + printk(KERN_WARNING "time.c: Lost %d timer tick(s)! ", lost); + print_symbol("rip %s)\n", regs->rip); + } + + if (lost_count == 1000 && !warned) { + printk(KERN_WARNING "warning: many lost ticks.\n" + KERN_WARNING "Your time source seems to be instable or " "some driver is hogging interupts\n"); - print_symbol("rip %s\n", regs->rip); - if (vxtime.mode == VXTIME_TSC && vxtime.hpet_address) { - printk(KERN_WARNING "Falling back to HPET\n"); - if (hpet_use_timer) - vxtime.last = hpet_readl(HPET_T0_CMP) - hpet_tick; - else - vxtime.last = hpet_readl(HPET_COUNTER); - vxtime.mode = VXTIME_HPET; - do_gettimeoffset = do_gettimeoffset_hpet; - } - /* else should fall back to PIT, but code missing. */ - warned = 1; - } else - lost_count++; + print_symbol("rip %s\n", regs->rip); + if (vxtime.mode == VXTIME_TSC && vxtime.hpet_address) { + printk(KERN_WARNING "Falling back to HPET\n"); + if (hpet_use_timer) + vxtime.last = hpet_readl(HPET_T0_CMP) - + hpet_tick; + else + vxtime.last = hpet_readl(HPET_COUNTER); + vxtime.mode = VXTIME_HPET; + do_gettimeoffset = do_gettimeoffset_hpet; + } + /* else should fall back to PIT, but code missing. */ + warned = 1; + } else + lost_count++; #ifdef CONFIG_CPU_FREQ - /* In some cases the CPU can change frequency without us noticing - (like going into thermal throttle) - Give cpufreq a change to catch up. */ - if ((lost_count+1) % 25 == 0) { - cpufreq_delayed_get(); - } + /* In some cases the CPU can change frequency without us noticing + Give cpufreq a change to catch up. */ + if ((lost_count+1) % 25 == 0) + cpufreq_delayed_get(); #endif } @@ -354,7 +344,7 @@ void main_timer_handler(struct pt_regs *regs) { static unsigned long rtc_update = 0; unsigned long tsc; - int delay, offset = 0, lost = 0; + int delay = 0, offset = 0, lost = 0; /* * Here we are in the timer irq handler. We have irqs locally disabled (so we @@ -375,7 +365,7 @@ void main_timer_handler(struct pt_regs *regs) */ offset = hpet_readl(HPET_T0_CMP) - hpet_tick; delay = hpet_readl(HPET_COUNTER) - offset; - } else { + } else if (!pmtmr_ioport) { spin_lock(&i8253_lock); outb_p(0x00, 0x43); delay = inb_p(0x40); @@ -514,43 +504,32 @@ unsigned long long sched_clock(void) static unsigned long get_cmos_time(void) { - unsigned int timeout = 1000000, year, mon, day, hour, min, sec; - unsigned char uip = 0, this = 0; + unsigned int year, mon, day, hour, min, sec; unsigned long flags; - -/* - * The Linux interpretation of the CMOS clock register contents: When the - * Update-In-Progress (UIP) flag goes from 1 to 0, the RTC registers show the - * second which has precisely just started. Waiting for this can take up to 1 - * second, we timeout approximately after 2.4 seconds on a machine with - * standard 8.3 MHz ISA bus. - */ + unsigned extyear = 0; spin_lock_irqsave(&rtc_lock, flags); - while (timeout && (!uip || this)) { - uip |= this; - this = CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP; - timeout--; - } - - /* - * Here we are safe to assume the registers won't change for a whole - * second, so we just go ahead and read them. - */ - sec = CMOS_READ(RTC_SECONDS); - min = CMOS_READ(RTC_MINUTES); - hour = CMOS_READ(RTC_HOURS); - day = CMOS_READ(RTC_DAY_OF_MONTH); - mon = CMOS_READ(RTC_MONTH); - year = CMOS_READ(RTC_YEAR); + do { + sec = CMOS_READ(RTC_SECONDS); + min = CMOS_READ(RTC_MINUTES); + hour = CMOS_READ(RTC_HOURS); + day = CMOS_READ(RTC_DAY_OF_MONTH); + mon = CMOS_READ(RTC_MONTH); + year = CMOS_READ(RTC_YEAR); +#ifdef CONFIG_ACPI + if (acpi_fadt.revision >= FADT2_REVISION_ID && + acpi_fadt.century) + extyear = CMOS_READ(acpi_fadt.century); +#endif + } while (sec != CMOS_READ(RTC_SECONDS)); spin_unlock_irqrestore(&rtc_lock, flags); /* * We know that x86-64 always uses BCD format, no need to check the * config register. - */ + */ BCD_TO_BIN(sec); BCD_TO_BIN(min); @@ -559,11 +538,17 @@ static unsigned long get_cmos_time(void) BCD_TO_BIN(mon); BCD_TO_BIN(year); - /* - * x86-64 systems only exists since 2002. - * This will work up to Dec 31, 2100 - */ - year += 2000; + if (extyear) { + BCD_TO_BIN(extyear); + year += extyear; + printk(KERN_INFO "Extended CMOS year: %d\n", extyear); + } else { + /* + * x86-64 systems only exists since 2002. + * This will work up to Dec 31, 2100 + */ + year += 2000; + } return mktime(year, mon, day, hour, min, sec); } @@ -606,7 +591,8 @@ static void cpufreq_delayed_get(void) cpufreq_delayed_issched = 1; if (!warned) { warned = 1; - printk(KERN_DEBUG "Losing some ticks... checking if CPU frequency changed.\n"); + printk(KERN_DEBUG + "Losing some ticks... checking if CPU frequency changed.\n"); } schedule_work(&cpufreq_delayed_get_work); } @@ -629,9 +615,9 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, lpj = &dummy; if (!(freq->flags & CPUFREQ_CONST_LOOPS)) #ifdef CONFIG_SMP - lpj = &cpu_data[freq->cpu].loops_per_jiffy; + lpj = &cpu_data[freq->cpu].loops_per_jiffy; #else - lpj = &boot_cpu_data.loops_per_jiffy; + lpj = &boot_cpu_data.loops_per_jiffy; #endif if (!ref_freq) { @@ -768,9 +754,8 @@ static __init int late_hpet_init(void) int i; hpet = (struct hpet *) fix_to_virt(FIX_HPET_BASE); - - for (i = 2, timer = &hpet->hpet_timers[2]; i < ntimer; - timer++, i++) + timer = &hpet->hpet_timers[2]; + for (i = 2; i < ntimer; timer++, i++) hd.hd_irq[i] = (timer->hpet_config & Tn_INT_ROUTE_CNF_MASK) >> Tn_INT_ROUTE_CNF_SHIFT; @@ -927,8 +912,7 @@ void __init time_init(void) -xtime.tv_sec, -xtime.tv_nsec); if (!hpet_init()) - vxtime_hz = (1000000000000000L + hpet_period / 2) / - hpet_period; + vxtime_hz = (1000000000000000L + hpet_period / 2) / hpet_period; else vxtime.hpet_address = 0; diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c index 28d50dc..edaa9fe 100644 --- a/arch/x86_64/kernel/traps.c +++ b/arch/x86_64/kernel/traps.c @@ -47,8 +47,6 @@ #include <asm/proto.h> #include <asm/nmi.h> -extern struct gate_struct idt_table[256]; - asmlinkage void divide_error(void); asmlinkage void debug(void); asmlinkage void nmi(void); @@ -71,18 +69,20 @@ asmlinkage void alignment_check(void); asmlinkage void machine_check(void); asmlinkage void spurious_interrupt_bug(void); -struct notifier_block *die_chain; -static DEFINE_SPINLOCK(die_notifier_lock); +ATOMIC_NOTIFIER_HEAD(die_chain); int register_die_notifier(struct notifier_block *nb) { - int err = 0; - unsigned long flags; - spin_lock_irqsave(&die_notifier_lock, flags); - err = notifier_chain_register(&die_chain, nb); - spin_unlock_irqrestore(&die_notifier_lock, flags); - return err; + vmalloc_sync_all(); + return atomic_notifier_chain_register(&die_chain, nb); +} +EXPORT_SYMBOL(register_die_notifier); + +int unregister_die_notifier(struct notifier_block *nb) +{ + return atomic_notifier_chain_unregister(&die_chain, nb); } +EXPORT_SYMBOL(unregister_die_notifier); static inline void conditional_sti(struct pt_regs *regs) { @@ -122,7 +122,7 @@ int printk_address(unsigned long address) if (!modname) modname = delim = ""; return printk("<%016lx>{%s%s%s%s%+ld}", - address,delim,modname,delim,symname,offset); + address, delim, modname, delim, symname, offset); } #else int printk_address(unsigned long address) @@ -334,13 +334,12 @@ void show_registers(struct pt_regs *regs) show_stack(NULL, (unsigned long*)rsp); printk("\nCode: "); - if(regs->rip < PAGE_OFFSET) + if (regs->rip < PAGE_OFFSET) goto bad; - for(i=0;i<20;i++) - { + for (i=0; i<20; i++) { unsigned char c; - if(__get_user(c, &((unsigned char*)regs->rip)[i])) { + if (__get_user(c, &((unsigned char*)regs->rip)[i])) { bad: printk(" Bad RIP value."); break; @@ -479,7 +478,7 @@ static void __kprobes do_trap(int trapnr, int signr, char *str, printk(KERN_INFO "%s[%d] trap %s rip:%lx rsp:%lx error:%lx\n", tsk->comm, tsk->pid, str, - regs->rip,regs->rsp,error_code); + regs->rip, regs->rsp, error_code); if (info) force_sig_info(signr, info, tsk); @@ -493,9 +492,9 @@ static void __kprobes do_trap(int trapnr, int signr, char *str, { const struct exception_table_entry *fixup; fixup = search_exception_tables(regs->rip); - if (fixup) { + if (fixup) regs->rip = fixup->fixup; - } else + else die(str, regs, error_code); return; } @@ -568,7 +567,7 @@ asmlinkage void __kprobes do_general_protection(struct pt_regs * regs, printk(KERN_INFO "%s[%d] general protection rip:%lx rsp:%lx error:%lx\n", tsk->comm, tsk->pid, - regs->rip,regs->rsp,error_code); + regs->rip, regs->rsp, error_code); force_sig(SIGSEGV, tsk); return; diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S index 74db006..39ff070 100644 --- a/arch/x86_64/kernel/vmlinux.lds.S +++ b/arch/x86_64/kernel/vmlinux.lds.S @@ -20,6 +20,12 @@ SECTIONS phys_startup_64 = startup_64 - LOAD_OFFSET; _text = .; /* Text and read-only data */ .text : AT(ADDR(.text) - LOAD_OFFSET) { + /* First the code that has to be first for bootstrapping */ + *(.bootstrap.text) + /* Then all the functions that are "hot" in profiles, to group them + onto the same hugetlb entry */ + #include "functionlist" + /* Then the rest */ *(.text) SCHED_TEXT LOCK_TEXT diff --git a/arch/x86_64/kernel/x8664_ksyms.c b/arch/x86_64/kernel/x8664_ksyms.c index c9dc7e4..d96a934 100644 --- a/arch/x86_64/kernel/x8664_ksyms.c +++ b/arch/x86_64/kernel/x8664_ksyms.c @@ -144,16 +144,12 @@ EXPORT_SYMBOL(rwsem_down_write_failed_thunk); EXPORT_SYMBOL(empty_zero_page); EXPORT_SYMBOL(die_chain); -EXPORT_SYMBOL(register_die_notifier); #ifdef CONFIG_SMP EXPORT_SYMBOL(cpu_sibling_map); EXPORT_SYMBOL(smp_num_siblings); #endif -extern void do_softirq_thunk(void); -EXPORT_SYMBOL(do_softirq_thunk); - #ifdef CONFIG_BUG EXPORT_SYMBOL(out_of_line_bug); #endif diff --git a/arch/x86_64/lib/thunk.S b/arch/x86_64/lib/thunk.S index acc1e2ca..e49af00 100644 --- a/arch/x86_64/lib/thunk.S +++ b/arch/x86_64/lib/thunk.S @@ -42,7 +42,6 @@ thunk rwsem_wake_thunk,rwsem_wake thunk rwsem_downgrade_thunk,rwsem_downgrade_wake #endif - thunk do_softirq_thunk,do_softirq thunk __down_failed,__down thunk_retrax __down_failed_interruptible,__down_interruptible diff --git a/arch/x86_64/mm/fault.c b/arch/x86_64/mm/fault.c index 2e7c3c8..316c53d 100644 --- a/arch/x86_64/mm/fault.c +++ b/arch/x86_64/mm/fault.c @@ -264,6 +264,8 @@ static int vmalloc_fault(unsigned long address) return -1; if (pgd_none(*pgd)) set_pgd(pgd, *pgd_ref); + else + BUG_ON(pgd_page(*pgd) != pgd_page(*pgd_ref)); /* Below here mismatches are bugs because these lower tables are shared */ @@ -312,21 +314,13 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, unsigned long flags; siginfo_t info; + tsk = current; + mm = tsk->mm; + prefetchw(&mm->mmap_sem); + /* get the address */ __asm__("movq %%cr2,%0":"=r" (address)); - if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14, - SIGSEGV) == NOTIFY_STOP) - return; - - if (likely(regs->eflags & X86_EFLAGS_IF)) - local_irq_enable(); - if (unlikely(page_fault_trace)) - printk("pagefault rip:%lx rsp:%lx cs:%lu ss:%lu address %lx error %lx\n", - regs->rip,regs->rsp,regs->cs,regs->ss,address,error_code); - - tsk = current; - mm = tsk->mm; info.si_code = SEGV_MAPERR; @@ -351,10 +345,12 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, */ if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) && ((address >= VMALLOC_START && address < VMALLOC_END))) { - if (vmalloc_fault(address) < 0) - goto bad_area_nosemaphore; - return; + if (vmalloc_fault(address) >= 0) + return; } + if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14, + SIGSEGV) == NOTIFY_STOP) + return; /* * Don't take the mm semaphore here. If we fixup a prefetch * fault we could otherwise deadlock. @@ -362,6 +358,17 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, goto bad_area_nosemaphore; } + if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14, + SIGSEGV) == NOTIFY_STOP) + return; + + if (likely(regs->eflags & X86_EFLAGS_IF)) + local_irq_enable(); + + if (unlikely(page_fault_trace)) + printk("pagefault rip:%lx rsp:%lx cs:%lu ss:%lu address %lx error %lx\n", + regs->rip,regs->rsp,regs->cs,regs->ss,address,error_code); + if (unlikely(error_code & PF_RSVD)) pgtable_bad(address, regs, error_code); @@ -571,6 +578,48 @@ do_sigbus: return; } +DEFINE_SPINLOCK(pgd_lock); +struct page *pgd_list; + +void vmalloc_sync_all(void) +{ + /* Note that races in the updates of insync and start aren't + problematic: + insync can only get set bits added, and updates to start are only + improving performance (without affecting correctness if undone). */ + static DECLARE_BITMAP(insync, PTRS_PER_PGD); + static unsigned long start = VMALLOC_START & PGDIR_MASK; + unsigned long address; + + for (address = start; address <= VMALLOC_END; address += PGDIR_SIZE) { + if (!test_bit(pgd_index(address), insync)) { + const pgd_t *pgd_ref = pgd_offset_k(address); + struct page *page; + + if (pgd_none(*pgd_ref)) + continue; + spin_lock(&pgd_lock); + for (page = pgd_list; page; + page = (struct page *)page->index) { + pgd_t *pgd; + pgd = (pgd_t *)page_address(page) + pgd_index(address); + if (pgd_none(*pgd)) + set_pgd(pgd, *pgd_ref); + else + BUG_ON(pgd_page(*pgd) != pgd_page(*pgd_ref)); + } + spin_unlock(&pgd_lock); + set_bit(pgd_index(address), insync); + } + if (address == start) + start = address + PGDIR_SIZE; + } + /* Check that there is no need to do the same for the modules area. */ + BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL)); + BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) == + (__START_KERNEL & PGDIR_MASK))); +} + static int __init enable_pagefaulttrace(char *str) { page_fault_trace = 1; diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c index 40ed13d..e5f7f1c 100644 --- a/arch/x86_64/mm/init.c +++ b/arch/x86_64/mm/init.c @@ -72,7 +72,7 @@ void show_mem(void) show_free_areas(); printk(KERN_INFO "Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); - for_each_pgdat(pgdat) { + for_each_online_pgdat(pgdat) { for (i = 0; i < pgdat->node_spanned_pages; ++i) { page = pfn_to_page(pgdat->node_start_pfn + i); total++; @@ -94,7 +94,7 @@ void show_mem(void) int after_bootmem; -static void *spp_getpage(void) +static __init void *spp_getpage(void) { void *ptr; if (after_bootmem) @@ -108,7 +108,7 @@ static void *spp_getpage(void) return ptr; } -static void set_pte_phys(unsigned long vaddr, +static __init void set_pte_phys(unsigned long vaddr, unsigned long phys, pgprot_t prot) { pgd_t *pgd; @@ -157,7 +157,8 @@ static void set_pte_phys(unsigned long vaddr, } /* NOTE: this is meant to be run only at boot */ -void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t prot) +void __init +__set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t prot) { unsigned long address = __fix_to_virt(idx); @@ -225,6 +226,33 @@ static __meminit void unmap_low_page(int i) ti->allocated = 0; } +/* Must run before zap_low_mappings */ +__init void *early_ioremap(unsigned long addr, unsigned long size) +{ + unsigned long map = round_down(addr, LARGE_PAGE_SIZE); + + /* actually usually some more */ + if (size >= LARGE_PAGE_SIZE) { + printk("SMBIOS area too long %lu\n", size); + return NULL; + } + set_pmd(temp_mappings[0].pmd, __pmd(map | _KERNPG_TABLE | _PAGE_PSE)); + map += LARGE_PAGE_SIZE; + set_pmd(temp_mappings[1].pmd, __pmd(map | _KERNPG_TABLE | _PAGE_PSE)); + __flush_tlb(); + return temp_mappings[0].address + (addr & (LARGE_PAGE_SIZE-1)); +} + +/* To avoid virtual aliases later */ +__init void early_iounmap(void *addr, unsigned long size) +{ + if ((void *)round_down((unsigned long)addr, LARGE_PAGE_SIZE) != temp_mappings[0].address) + printk("early_iounmap: bad address %p\n", addr); + set_pmd(temp_mappings[0].pmd, __pmd(0)); + set_pmd(temp_mappings[1].pmd, __pmd(0)); + __flush_tlb(); +} + static void __meminit phys_pmd_init(pmd_t *pmd, unsigned long address, unsigned long end) { @@ -344,7 +372,7 @@ void __meminit init_memory_mapping(unsigned long start, unsigned long end) pud_t *pud; if (after_bootmem) - pud = pud_offset_k(pgd, __PAGE_OFFSET); + pud = pud_offset_k(pgd, start & PGDIR_MASK); else pud = alloc_low_page(&map, &pud_phys); diff --git a/arch/x86_64/mm/k8topology.c b/arch/x86_64/mm/k8topology.c index dd60e71..7c45c2d 100644 --- a/arch/x86_64/mm/k8topology.c +++ b/arch/x86_64/mm/k8topology.c @@ -43,7 +43,7 @@ static __init int find_northbridge(void) int __init k8_scan_nodes(unsigned long start, unsigned long end) { unsigned long prevbase; - struct node nodes[8]; + struct bootnode nodes[8]; int nodeid, i, nb; unsigned char nodeids[8]; int found = 0; diff --git a/arch/x86_64/mm/numa.c b/arch/x86_64/mm/numa.c index 22e51be..4be82d6 100644 --- a/arch/x86_64/mm/numa.c +++ b/arch/x86_64/mm/numa.c @@ -25,8 +25,7 @@ struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; bootmem_data_t plat_node_bdata[MAX_NUMNODES]; -int memnode_shift; -u8 memnodemap[NODEMAPSIZE]; +struct memnode memnode; unsigned char cpu_to_node[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = NUMA_NO_NODE @@ -47,7 +46,7 @@ int numa_off __initdata; * -1 if node overlap or lost ram (shift too big) */ static int __init -populate_memnodemap(const struct node *nodes, int numnodes, int shift) +populate_memnodemap(const struct bootnode *nodes, int numnodes, int shift) { int i; int res = -1; @@ -74,7 +73,7 @@ populate_memnodemap(const struct node *nodes, int numnodes, int shift) return res; } -int __init compute_hash_shift(struct node *nodes, int numnodes) +int __init compute_hash_shift(struct bootnode *nodes, int numnodes) { int shift = 20; @@ -149,7 +148,7 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, unsigned long en /* Initialize final allocator for a zone */ void __init setup_node_zones(int nodeid) { - unsigned long start_pfn, end_pfn; + unsigned long start_pfn, end_pfn, memmapsize, limit; unsigned long zones[MAX_NR_ZONES]; unsigned long holes[MAX_NR_ZONES]; @@ -159,6 +158,16 @@ void __init setup_node_zones(int nodeid) Dprintk(KERN_INFO "Setting up node %d %lx-%lx\n", nodeid, start_pfn, end_pfn); + /* Try to allocate mem_map at end to not fill up precious <4GB + memory. */ + memmapsize = sizeof(struct page) * (end_pfn-start_pfn); + limit = end_pfn << PAGE_SHIFT; + NODE_DATA(nodeid)->node_mem_map = + __alloc_bootmem_core(NODE_DATA(nodeid)->bdata, + memmapsize, SMP_CACHE_BYTES, + round_down(limit - memmapsize, PAGE_SIZE), + limit); + size_zones(zones, holes, start_pfn, end_pfn); free_area_init_node(nodeid, NODE_DATA(nodeid), zones, start_pfn, holes); @@ -191,7 +200,7 @@ int numa_fake __initdata = 0; static int numa_emulation(unsigned long start_pfn, unsigned long end_pfn) { int i; - struct node nodes[MAX_NUMNODES]; + struct bootnode nodes[MAX_NUMNODES]; unsigned long sz = ((end_pfn - start_pfn)<<PAGE_SHIFT) / numa_fake; /* Kludge needed for the hash function */ @@ -357,8 +366,7 @@ void __init init_cpu_to_node(void) EXPORT_SYMBOL(cpu_to_node); EXPORT_SYMBOL(node_to_cpumask); -EXPORT_SYMBOL(memnode_shift); -EXPORT_SYMBOL(memnodemap); +EXPORT_SYMBOL(memnode); EXPORT_SYMBOL(node_data); #ifdef CONFIG_DISCONTIGMEM @@ -369,21 +377,6 @@ EXPORT_SYMBOL(node_data); * Should do that. */ -/* Requires pfn_valid(pfn) to be true */ -struct page *pfn_to_page(unsigned long pfn) -{ - int nid = phys_to_nid(((unsigned long)(pfn)) << PAGE_SHIFT); - return (pfn - node_start_pfn(nid)) + NODE_DATA(nid)->node_mem_map; -} -EXPORT_SYMBOL(pfn_to_page); - -unsigned long page_to_pfn(struct page *page) -{ - return (long)(((page) - page_zone(page)->zone_mem_map) + - page_zone(page)->zone_start_pfn); -} -EXPORT_SYMBOL(page_to_pfn); - int pfn_valid(unsigned long pfn) { unsigned nid; diff --git a/arch/x86_64/mm/srat.c b/arch/x86_64/mm/srat.c index 482c257..2eb8795 100644 --- a/arch/x86_64/mm/srat.c +++ b/arch/x86_64/mm/srat.c @@ -23,7 +23,7 @@ static struct acpi_table_slit *acpi_slit; static nodemask_t nodes_parsed __initdata; static nodemask_t nodes_found __initdata; -static struct node nodes[MAX_NUMNODES] __initdata; +static struct bootnode nodes[MAX_NUMNODES] __initdata; static u8 pxm2node[256] = { [0 ... 255] = 0xff }; /* Too small nodes confuse the VM badly. Usually they result @@ -57,7 +57,7 @@ static __init int conflicting_nodes(unsigned long start, unsigned long end) { int i; for_each_node_mask(i, nodes_parsed) { - struct node *nd = &nodes[i]; + struct bootnode *nd = &nodes[i]; if (nd->start == nd->end) continue; if (nd->end > start && nd->start < end) @@ -70,7 +70,7 @@ static __init int conflicting_nodes(unsigned long start, unsigned long end) static __init void cutoff_node(int i, unsigned long start, unsigned long end) { - struct node *nd = &nodes[i]; + struct bootnode *nd = &nodes[i]; if (nd->start < start) { nd->start = start; if (nd->end < nd->start) @@ -159,7 +159,7 @@ acpi_numa_processor_affinity_init(struct acpi_table_processor_affinity *pa) void __init acpi_numa_memory_affinity_init(struct acpi_table_memory_affinity *ma) { - struct node *nd; + struct bootnode *nd; unsigned long start, end; int node, pxm; int i; |