summaryrefslogtreecommitdiffstats
path: root/arch/i386
diff options
context:
space:
mode:
Diffstat (limited to 'arch/i386')
-rw-r--r--arch/i386/Kconfig44
-rw-r--r--arch/i386/boot/Makefile9
-rw-r--r--arch/i386/boot/compressed/misc.c32
-rw-r--r--arch/i386/boot/video.S19
-rw-r--r--arch/i386/crypto/aes-i586-asm.S29
-rw-r--r--arch/i386/crypto/aes.c20
-rw-r--r--arch/i386/kernel/Makefile8
-rw-r--r--arch/i386/kernel/acpi/boot.c10
-rw-r--r--arch/i386/kernel/acpi/earlyquirk.c23
-rw-r--r--arch/i386/kernel/acpi/processor.c2
-rw-r--r--arch/i386/kernel/acpi/sleep.c19
-rw-r--r--arch/i386/kernel/acpi/wakeup.S11
-rw-r--r--arch/i386/kernel/alternative.c118
-rw-r--r--arch/i386/kernel/apic.c20
-rw-r--r--arch/i386/kernel/apm.c45
-rw-r--r--arch/i386/kernel/asm-offsets.c3
-rw-r--r--arch/i386/kernel/cpu/amd.c16
-rw-r--r--arch/i386/kernel/cpu/common.c2
-rw-r--r--arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c307
-rw-r--r--arch/i386/kernel/cpu/cpufreq/cpufreq-nforce2.c11
-rw-r--r--arch/i386/kernel/cpu/cpufreq/longhaul.c20
-rw-r--r--arch/i386/kernel/cpu/cpufreq/longrun.c1
-rw-r--r--arch/i386/kernel/cpu/cpufreq/powernow-k7.c13
-rw-r--r--arch/i386/kernel/cpu/cpufreq/powernow-k8.c350
-rw-r--r--arch/i386/kernel/cpu/cpufreq/powernow-k8.h44
-rw-r--r--arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c274
-rw-r--r--arch/i386/kernel/cpu/cyrix.c9
-rw-r--r--arch/i386/kernel/cpu/intel.c6
-rw-r--r--arch/i386/kernel/cpu/intel_cacheinfo.c113
-rw-r--r--arch/i386/kernel/cpu/proc.c2
-rw-r--r--arch/i386/kernel/crash.c10
-rw-r--r--arch/i386/kernel/doublefault.c3
-rw-r--r--arch/i386/kernel/entry.S263
-rw-r--r--arch/i386/kernel/hpet.c67
-rw-r--r--arch/i386/kernel/i387.c2
-rw-r--r--arch/i386/kernel/i8253.c118
-rw-r--r--arch/i386/kernel/i8259.c4
-rw-r--r--arch/i386/kernel/io_apic.c80
-rw-r--r--arch/i386/kernel/irq.c14
-rw-r--r--arch/i386/kernel/kprobes.c97
-rw-r--r--arch/i386/kernel/microcode.c73
-rw-r--r--arch/i386/kernel/nmi.c72
-rw-r--r--arch/i386/kernel/numaq.c10
-rw-r--r--arch/i386/kernel/process.c8
-rw-r--r--arch/i386/kernel/setup.c31
-rw-r--r--arch/i386/kernel/smp.c12
-rw-r--r--arch/i386/kernel/smpboot.c15
-rw-r--r--arch/i386/kernel/srat.c19
-rw-r--r--arch/i386/kernel/syscall_table.S1
-rw-r--r--arch/i386/kernel/time.c157
-rw-r--r--arch/i386/kernel/timers/Makefile9
-rw-r--r--arch/i386/kernel/timers/common.c172
-rw-r--r--arch/i386/kernel/timers/timer.c75
-rw-r--r--arch/i386/kernel/timers/timer_cyclone.c259
-rw-r--r--arch/i386/kernel/timers/timer_hpet.c217
-rw-r--r--arch/i386/kernel/timers/timer_none.c39
-rw-r--r--arch/i386/kernel/timers/timer_pit.c177
-rw-r--r--arch/i386/kernel/timers/timer_pm.c342
-rw-r--r--arch/i386/kernel/timers/timer_tsc.c617
-rw-r--r--arch/i386/kernel/traps.c81
-rw-r--r--arch/i386/kernel/tsc.c478
-rw-r--r--arch/i386/kernel/vmlinux.lds.S16
-rw-r--r--arch/i386/lib/delay.c65
-rw-r--r--arch/i386/lib/usercopy.c256
-rw-r--r--arch/i386/mach-default/setup.c43
-rw-r--r--arch/i386/mach-visws/setup.c49
-rw-r--r--arch/i386/mach-voyager/setup.c74
-rw-r--r--arch/i386/mm/fault.c59
-rw-r--r--arch/i386/mm/init.c3
-rw-r--r--arch/i386/mm/pageattr.c8
-rw-r--r--arch/i386/oprofile/nmi_int.c4
-rw-r--r--arch/i386/oprofile/op_model_athlon.c1
-rw-r--r--arch/i386/oprofile/op_model_p4.c1
-rw-r--r--arch/i386/oprofile/op_model_ppro.c1
-rw-r--r--arch/i386/pci/common.c1
-rw-r--r--arch/i386/pci/i386.c11
-rw-r--r--arch/i386/pci/irq.c15
-rw-r--r--arch/i386/pci/mmconfig.c9
-rw-r--r--arch/i386/pci/pcbios.c6
-rw-r--r--arch/i386/pci/pci.h1
-rw-r--r--arch/i386/power/cpu.c2
81 files changed, 2991 insertions, 2736 deletions
diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig
index 8dfa305..f3eaf22 100644
--- a/arch/i386/Kconfig
+++ b/arch/i386/Kconfig
@@ -14,6 +14,10 @@ config X86_32
486, 586, Pentiums, and various instruction-set-compatible chips by
AMD, Cyrix, and others.
+config GENERIC_TIME
+ bool
+ default y
+
config SEMAPHORE_SLEEPERS
bool
default y
@@ -173,6 +177,12 @@ config ACPI_SRAT
bool
default y
depends on NUMA && (X86_SUMMIT || X86_GENERICARCH)
+ select ACPI_NUMA
+
+config HAVE_ARCH_PARSE_SRAT
+ bool
+ default y
+ depends on ACPI_SRAT
config X86_SUMMIT_NUMA
bool
@@ -224,7 +234,6 @@ config NR_CPUS
config SCHED_SMT
bool "SMT (Hyperthreading) scheduler support"
depends on SMP
- default off
help
SMT scheduler support improves the CPU scheduler's decision making
when dealing with Intel Pentium 4 chips with HyperThreading at a
@@ -319,6 +328,15 @@ config X86_MCE_P4THERMAL
Enabling this feature will cause a message to be printed when the P4
enters thermal throttling.
+config VM86
+ default y
+ bool "Enable VM86 support" if EMBEDDED
+ help
+ This option is required by programs like DOSEMU to run 16-bit legacy
+ code on X86 processors. It also may be needed by software like
+ XFree86 to initialize some video cards via BIOS. Disabling this
+ option saves about 6k.
+
config TOSHIBA
tristate "Toshiba Laptop support"
---help---
@@ -1041,13 +1059,27 @@ config SCx200
tristate "NatSemi SCx200 support"
depends on !X86_VOYAGER
help
- This provides basic support for the National Semiconductor SCx200
- processor. Right now this is just a driver for the GPIO pins.
+ This provides basic support for National Semiconductor's
+ (now AMD's) Geode processors. The driver probes for the
+ PCI-IDs of several on-chip devices, so its a good dependency
+ for other scx200_* drivers.
- If you don't know what to do here, say N.
+ If compiled as a module, the driver is named scx200.
+
+config SCx200HR_TIMER
+ tristate "NatSemi SCx200 27MHz High-Resolution Timer Support"
+ depends on SCx200 && GENERIC_TIME
+ default y
+ help
+ This driver provides a clocksource built upon the on-chip
+ 27MHz high-resolution timer. Its also a workaround for
+ NSC Geode SC-1100's buggy TSC, which loses time when the
+ processor goes idle (as is done by the scheduler). The
+ other workaround is idle=poll boot option.
- This support is also available as a module. If compiled as a
- module, it will be called scx200.
+config K8_NB
+ def_bool y
+ depends on AGP_AMD64
source "drivers/pcmcia/Kconfig"
diff --git a/arch/i386/boot/Makefile b/arch/i386/boot/Makefile
index 33e5547..e979466 100644
--- a/arch/i386/boot/Makefile
+++ b/arch/i386/boot/Makefile
@@ -109,8 +109,13 @@ fdimage288: $(BOOTIMAGE) $(obj)/mtools.conf
isoimage: $(BOOTIMAGE)
-rm -rf $(obj)/isoimage
mkdir $(obj)/isoimage
- cp `echo /usr/lib*/syslinux/isolinux.bin | awk '{ print $1; }'` \
- $(obj)/isoimage
+ for i in lib lib64 share end ; do \
+ if [ -f /usr/$$i/syslinux/isolinux.bin ] ; then \
+ cp /usr/$$i/syslinux/isolinux.bin $(obj)/isoimage ; \
+ break ; \
+ fi ; \
+ if [ $$i = end ] ; then exit 1 ; fi ; \
+ done
cp $(BOOTIMAGE) $(obj)/isoimage/linux
echo '$(image_cmdline)' > $(obj)/isoimage/isolinux.cfg
if [ -f '$(FDINITRD)' ] ; then \
diff --git a/arch/i386/boot/compressed/misc.c b/arch/i386/boot/compressed/misc.c
index f19f3a7..b2ccd54 100644
--- a/arch/i386/boot/compressed/misc.c
+++ b/arch/i386/boot/compressed/misc.c
@@ -24,14 +24,6 @@
#undef memset
#undef memcpy
-
-/*
- * Why do we do this? Don't ask me..
- *
- * Incomprehensible are the ways of bootloaders.
- */
-static void* memset(void *, int, size_t);
-static void* memcpy(void *, __const void *, size_t);
#define memzero(s, n) memset ((s), 0, (n))
typedef unsigned char uch;
@@ -93,7 +85,7 @@ static unsigned char *real_mode; /* Pointer to real-mode data */
#endif
#define RM_SCREEN_INFO (*(struct screen_info *)(real_mode+0))
-extern char input_data[];
+extern unsigned char input_data[];
extern int input_len;
static long bytes_out = 0;
@@ -103,6 +95,9 @@ static unsigned long output_ptr = 0;
static void *malloc(int size);
static void free(void *where);
+static void *memset(void *s, int c, unsigned n);
+static void *memcpy(void *dest, const void *src, unsigned n);
+
static void putstr(const char *);
extern int end;
@@ -205,7 +200,7 @@ static void putstr(const char *s)
outb_p(0xff & (pos >> 1), vidport+1);
}
-static void* memset(void* s, int c, size_t n)
+static void* memset(void* s, int c, unsigned n)
{
int i;
char *ss = (char*)s;
@@ -214,14 +209,13 @@ static void* memset(void* s, int c, size_t n)
return s;
}
-static void* memcpy(void* __dest, __const void* __src,
- size_t __n)
+static void* memcpy(void* dest, const void* src, unsigned n)
{
int i;
- char *d = (char *)__dest, *s = (char *)__src;
+ char *d = (char *)dest, *s = (char *)src;
- for (i=0;i<__n;i++) d[i] = s[i];
- return __dest;
+ for (i=0;i<n;i++) d[i] = s[i];
+ return dest;
}
/* ===========================================================================
@@ -309,7 +303,7 @@ static void setup_normal_output_buffer(void)
#else
if ((RM_ALT_MEM_K > RM_EXT_MEM_K ? RM_ALT_MEM_K : RM_EXT_MEM_K) < 1024) error("Less than 2MB of memory");
#endif
- output_data = (char *)__PHYSICAL_START; /* Normally Points to 1M */
+ output_data = (unsigned char *)__PHYSICAL_START; /* Normally Points to 1M */
free_mem_end_ptr = (long)real_mode;
}
@@ -324,11 +318,9 @@ static void setup_output_buffer_if_we_run_high(struct moveparams *mv)
#ifdef STANDARD_MEMORY_BIOS_CALL
if (RM_EXT_MEM_K < (3*1024)) error("Less than 4MB of memory");
#else
- if ((RM_ALT_MEM_K > RM_EXT_MEM_K ? RM_ALT_MEM_K : RM_EXT_MEM_K) <
- (3*1024))
- error("Less than 4MB of memory");
+ if ((RM_ALT_MEM_K > RM_EXT_MEM_K ? RM_ALT_MEM_K : RM_EXT_MEM_K) < (3*1024)) error("Less than 4MB of memory");
#endif
- mv->low_buffer_start = output_data = (char *)LOW_BUFFER_START;
+ mv->low_buffer_start = output_data = (unsigned char *)LOW_BUFFER_START;
low_buffer_end = ((unsigned int)real_mode > LOW_BUFFER_MAX
? LOW_BUFFER_MAX : (unsigned int)real_mode) & ~0xfff;
low_buffer_size = low_buffer_end - LOW_BUFFER_START;
diff --git a/arch/i386/boot/video.S b/arch/i386/boot/video.S
index c9343c3a..8c2a6fa 100644
--- a/arch/i386/boot/video.S
+++ b/arch/i386/boot/video.S
@@ -1929,7 +1929,7 @@ skip10: movb %ah, %al
ret
store_edid:
-#ifdef CONFIG_FB_FIRMWARE_EDID
+#ifdef CONFIG_FIRMWARE_EDID
pushw %es # just save all registers
pushw %ax
pushw %bx
@@ -1947,6 +1947,22 @@ store_edid:
rep
stosl
+ pushw %es # save ES
+ xorw %di, %di # Report Capability
+ pushw %di
+ popw %es # ES:DI must be 0:0
+ movw $0x4f15, %ax
+ xorw %bx, %bx
+ xorw %cx, %cx
+ int $0x10
+ popw %es # restore ES
+
+ cmpb $0x00, %ah # call successful
+ jne no_edid
+
+ cmpb $0x4f, %al # function supported
+ jne no_edid
+
movw $0x4f15, %ax # do VBE/DDC
movw $0x01, %bx
movw $0x00, %cx
@@ -1954,6 +1970,7 @@ store_edid:
movw $0x140, %di
int $0x10
+no_edid:
popw %di # restore all registers
popw %dx
popw %cx
diff --git a/arch/i386/crypto/aes-i586-asm.S b/arch/i386/crypto/aes-i586-asm.S
index 911b153..f942f0c 100644
--- a/arch/i386/crypto/aes-i586-asm.S
+++ b/arch/i386/crypto/aes-i586-asm.S
@@ -36,22 +36,19 @@
.file "aes-i586-asm.S"
.text
-// aes_rval aes_enc_blk(const unsigned char in_blk[], unsigned char out_blk[], const aes_ctx cx[1])//
-// aes_rval aes_dec_blk(const unsigned char in_blk[], unsigned char out_blk[], const aes_ctx cx[1])//
-
-#define tlen 1024 // length of each of 4 'xor' arrays (256 32-bit words)
+#include <asm/asm-offsets.h>
-// offsets to parameters with one register pushed onto stack
-
-#define in_blk 8 // input byte array address parameter
-#define out_blk 12 // output byte array address parameter
-#define ctx 16 // AES context structure
+#define tlen 1024 // length of each of 4 'xor' arrays (256 32-bit words)
-// offsets in context structure
+/* offsets to parameters with one register pushed onto stack */
+#define tfm 8
+#define out_blk 12
+#define in_blk 16
-#define ekey 0 // encryption key schedule base address
-#define nrnd 256 // number of rounds
-#define dkey 260 // decryption key schedule base address
+/* offsets in crypto_tfm structure */
+#define ekey (crypto_tfm_ctx_offset + 0)
+#define nrnd (crypto_tfm_ctx_offset + 256)
+#define dkey (crypto_tfm_ctx_offset + 260)
// register mapping for encrypt and decrypt subroutines
@@ -220,6 +217,7 @@
do_col (table, r5,r0,r1,r4, r2,r3); /* idx=r5 */
// AES (Rijndael) Encryption Subroutine
+/* void aes_enc_blk(struct crypto_tfm *tfm, u8 *out_blk, const u8 *in_blk) */
.global aes_enc_blk
@@ -230,7 +228,7 @@
aes_enc_blk:
push %ebp
- mov ctx(%esp),%ebp // pointer to context
+ mov tfm(%esp),%ebp
// CAUTION: the order and the values used in these assigns
// rely on the register mappings
@@ -295,6 +293,7 @@ aes_enc_blk:
ret
// AES (Rijndael) Decryption Subroutine
+/* void aes_dec_blk(struct crypto_tfm *tfm, u8 *out_blk, const u8 *in_blk) */
.global aes_dec_blk
@@ -305,7 +304,7 @@ aes_enc_blk:
aes_dec_blk:
push %ebp
- mov ctx(%esp),%ebp // pointer to context
+ mov tfm(%esp),%ebp
// CAUTION: the order and the values used in these assigns
// rely on the register mappings
diff --git a/arch/i386/crypto/aes.c b/arch/i386/crypto/aes.c
index a50397b..d3806da 100644
--- a/arch/i386/crypto/aes.c
+++ b/arch/i386/crypto/aes.c
@@ -45,8 +45,8 @@
#include <linux/crypto.h>
#include <linux/linkage.h>
-asmlinkage void aes_enc_blk(const u8 *src, u8 *dst, void *ctx);
-asmlinkage void aes_dec_blk(const u8 *src, u8 *dst, void *ctx);
+asmlinkage void aes_enc_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
+asmlinkage void aes_dec_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
#define AES_MIN_KEY_SIZE 16
#define AES_MAX_KEY_SIZE 32
@@ -378,12 +378,12 @@ static void gen_tabs(void)
k[8*(i)+11] = ss[3]; \
}
-static int
-aes_set_key(void *ctx_arg, const u8 *in_key, unsigned int key_len, u32 *flags)
+static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+ unsigned int key_len, u32 *flags)
{
int i;
u32 ss[8];
- struct aes_ctx *ctx = ctx_arg;
+ struct aes_ctx *ctx = crypto_tfm_ctx(tfm);
const __le32 *key = (const __le32 *)in_key;
/* encryption schedule */
@@ -464,16 +464,16 @@ aes_set_key(void *ctx_arg, const u8 *in_key, unsigned int key_len, u32 *flags)
return 0;
}
-static inline void aes_encrypt(void *ctx, u8 *dst, const u8 *src)
+static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
{
- aes_enc_blk(src, dst, ctx);
+ aes_enc_blk(tfm, dst, src);
}
-static inline void aes_decrypt(void *ctx, u8 *dst, const u8 *src)
+
+static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
{
- aes_dec_blk(src, dst, ctx);
+ aes_dec_blk(tfm, dst, src);
}
-
static struct crypto_alg aes_alg = {
.cra_name = "aes",
.cra_driver_name = "aes-i586",
diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile
index 96fb8a0..5e70c2f 100644
--- a/arch/i386/kernel/Makefile
+++ b/arch/i386/kernel/Makefile
@@ -7,10 +7,9 @@ extra-y := head.o init_task.o vmlinux.lds
obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o \
ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_i386.o \
pci-dma.o i386_ksyms.o i387.o bootflag.o \
- quirks.o i8237.o topology.o alternative.o
+ quirks.o i8237.o topology.o alternative.o i8253.o tsc.o
obj-y += cpu/
-obj-y += timers/
obj-y += acpi/
obj-$(CONFIG_X86_BIOS_REBOOT) += reboot.o
obj-$(CONFIG_MCA) += mca.o
@@ -37,6 +36,8 @@ obj-$(CONFIG_EFI) += efi.o efi_stub.o
obj-$(CONFIG_DOUBLEFAULT) += doublefault.o
obj-$(CONFIG_VM86) += vm86.o
obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
+obj-$(CONFIG_HPET_TIMER) += hpet.o
+obj-$(CONFIG_K8_NB) += k8.o
EXTRA_AFLAGS := -traditional
@@ -76,3 +77,6 @@ SYSCFLAGS_vsyscall-syms.o = -r
$(obj)/vsyscall-syms.o: $(src)/vsyscall.lds \
$(obj)/vsyscall-sysenter.o $(obj)/vsyscall-note.o FORCE
$(call if_changed,syscall)
+
+k8-y += ../../x86_64/kernel/k8.o
+
diff --git a/arch/i386/kernel/acpi/boot.c b/arch/i386/kernel/acpi/boot.c
index 40e5aba..97ca171 100644
--- a/arch/i386/kernel/acpi/boot.c
+++ b/arch/i386/kernel/acpi/boot.c
@@ -202,6 +202,8 @@ int __init acpi_parse_mcfg(unsigned long phys_addr, unsigned long size)
if (mcfg->config[i].base_reserved) {
printk(KERN_ERR PREFIX
"MMCONFIG not in low 4GB of memory\n");
+ kfree(pci_mmcfg_config);
+ pci_mmcfg_config_num = 0;
return -ENODEV;
}
}
@@ -215,7 +217,7 @@ static int __init acpi_parse_madt(unsigned long phys_addr, unsigned long size)
{
struct acpi_table_madt *madt = NULL;
- if (!phys_addr || !size)
+ if (!phys_addr || !size || !cpu_has_apic)
return -EINVAL;
madt = (struct acpi_table_madt *)__acpi_map_table(phys_addr, size);
@@ -621,9 +623,9 @@ extern u32 pmtmr_ioport;
static int __init acpi_parse_fadt(unsigned long phys, unsigned long size)
{
- struct fadt_descriptor_rev2 *fadt = NULL;
+ struct fadt_descriptor *fadt = NULL;
- fadt = (struct fadt_descriptor_rev2 *)__acpi_map_table(phys, size);
+ fadt = (struct fadt_descriptor *)__acpi_map_table(phys, size);
if (!fadt) {
printk(KERN_WARNING PREFIX "Unable to map FADT\n");
return 0;
@@ -754,7 +756,7 @@ static int __init acpi_parse_madt_ioapic_entries(void)
return -ENODEV;
}
- if (!cpu_has_apic)
+ if (!cpu_has_apic)
return -ENODEV;
/*
diff --git a/arch/i386/kernel/acpi/earlyquirk.c b/arch/i386/kernel/acpi/earlyquirk.c
index 2e3b643..1649a17 100644
--- a/arch/i386/kernel/acpi/earlyquirk.c
+++ b/arch/i386/kernel/acpi/earlyquirk.c
@@ -5,17 +5,34 @@
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/pci.h>
+#include <linux/acpi.h>
+
#include <asm/pci-direct.h>
#include <asm/acpi.h>
#include <asm/apic.h>
+#ifdef CONFIG_ACPI
+
+static int nvidia_hpet_detected __initdata;
+
+static int __init nvidia_hpet_check(unsigned long phys, unsigned long size)
+{
+ nvidia_hpet_detected = 1;
+ return 0;
+}
+#endif
+
static int __init check_bridge(int vendor, int device)
{
#ifdef CONFIG_ACPI
- /* According to Nvidia all timer overrides are bogus. Just ignore
- them all. */
+ /* According to Nvidia all timer overrides are bogus unless HPET
+ is enabled. */
if (vendor == PCI_VENDOR_ID_NVIDIA) {
- acpi_skip_timer_override = 1;
+ nvidia_hpet_detected = 0;
+ acpi_table_parse(ACPI_HPET, nvidia_hpet_check);
+ if (nvidia_hpet_detected == 0) {
+ acpi_skip_timer_override = 1;
+ }
}
#endif
if (vendor == PCI_VENDOR_ID_ATI && timer_over_8254 == 1) {
diff --git a/arch/i386/kernel/acpi/processor.c b/arch/i386/kernel/acpi/processor.c
index 9f4cc02..b54fded 100644
--- a/arch/i386/kernel/acpi/processor.c
+++ b/arch/i386/kernel/acpi/processor.c
@@ -47,7 +47,7 @@ static void init_intel_pdc(struct acpi_processor *pr, struct cpuinfo_x86 *c)
buf[2] = ACPI_PDC_C_CAPABILITY_SMP;
if (cpu_has(c, X86_FEATURE_EST))
- buf[2] |= ACPI_PDC_EST_CAPABILITY_SMP;
+ buf[2] |= ACPI_PDC_EST_CAPABILITY_SWSMP;
obj->type = ACPI_TYPE_BUFFER;
obj->buffer.length = 12;
diff --git a/arch/i386/kernel/acpi/sleep.c b/arch/i386/kernel/acpi/sleep.c
index 1cb2b18..4ee8357 100644
--- a/arch/i386/kernel/acpi/sleep.c
+++ b/arch/i386/kernel/acpi/sleep.c
@@ -8,30 +8,17 @@
#include <linux/acpi.h>
#include <linux/bootmem.h>
#include <linux/dmi.h>
+#include <linux/cpumask.h>
+
#include <asm/smp.h>
-#include <asm/tlbflush.h>
/* address in low memory of the wakeup routine. */
unsigned long acpi_wakeup_address = 0;
unsigned long acpi_video_flags;
extern char wakeup_start, wakeup_end;
-extern void zap_low_mappings(void);
-
extern unsigned long FASTCALL(acpi_copy_wakeup_routine(unsigned long));
-static void init_low_mapping(pgd_t * pgd, int pgd_limit)
-{
- int pgd_ofs = 0;
-
- while ((pgd_ofs < pgd_limit)
- && (pgd_ofs + USER_PTRS_PER_PGD < PTRS_PER_PGD)) {
- set_pgd(pgd, *(pgd + USER_PTRS_PER_PGD));
- pgd_ofs++, pgd++;
- }
- flush_tlb_all();
-}
-
/**
* acpi_save_state_mem - save kernel state
*
@@ -42,7 +29,6 @@ int acpi_save_state_mem(void)
{
if (!acpi_wakeup_address)
return 1;
- init_low_mapping(swapper_pg_dir, USER_PTRS_PER_PGD);
memcpy((void *)acpi_wakeup_address, &wakeup_start,
&wakeup_end - &wakeup_start);
acpi_copy_wakeup_routine(acpi_wakeup_address);
@@ -55,7 +41,6 @@ int acpi_save_state_mem(void)
*/
void acpi_restore_state_mem(void)
{
- zap_low_mappings();
}
/**
diff --git a/arch/i386/kernel/acpi/wakeup.S b/arch/i386/kernel/acpi/wakeup.S
index 7c74fe0..9f408ee 100644
--- a/arch/i386/kernel/acpi/wakeup.S
+++ b/arch/i386/kernel/acpi/wakeup.S
@@ -56,7 +56,7 @@ wakeup_code:
1:
# set up page table
- movl $swapper_pg_dir-__PAGE_OFFSET, %eax
+ movl $swsusp_pg_dir-__PAGE_OFFSET, %eax
movl %eax, %cr3
testl $1, real_efer_save_restore - wakeup_code
@@ -265,11 +265,6 @@ ENTRY(acpi_copy_wakeup_routine)
movl $0x12345678, saved_magic
ret
-.data
-ALIGN
-ENTRY(saved_magic) .long 0
-ENTRY(saved_eip) .long 0
-
save_registers:
leal 4(%esp), %eax
movl %eax, saved_context_esp
@@ -304,7 +299,11 @@ ret_point:
call restore_processor_state
ret
+.data
ALIGN
+ENTRY(saved_magic) .long 0
+ENTRY(saved_eip) .long 0
+
# saved registers
saved_gdt: .long 0,0
saved_idt: .long 0,0
diff --git a/arch/i386/kernel/alternative.c b/arch/i386/kernel/alternative.c
index 5cbd6f9..50eb0e0 100644
--- a/arch/i386/kernel/alternative.c
+++ b/arch/i386/kernel/alternative.c
@@ -4,27 +4,41 @@
#include <asm/alternative.h>
#include <asm/sections.h>
-#define DEBUG 0
-#if DEBUG
-# define DPRINTK(fmt, args...) printk(fmt, args)
-#else
-# define DPRINTK(fmt, args...)
-#endif
+static int no_replacement = 0;
+static int smp_alt_once = 0;
+static int debug_alternative = 0;
+
+static int __init noreplacement_setup(char *s)
+{
+ no_replacement = 1;
+ return 1;
+}
+static int __init bootonly(char *str)
+{
+ smp_alt_once = 1;
+ return 1;
+}
+static int __init debug_alt(char *str)
+{
+ debug_alternative = 1;
+ return 1;
+}
+__setup("noreplacement", noreplacement_setup);
+__setup("smp-alt-boot", bootonly);
+__setup("debug-alternative", debug_alt);
+
+#define DPRINTK(fmt, args...) if (debug_alternative) \
+ printk(KERN_DEBUG fmt, args)
+
+#ifdef GENERIC_NOP1
/* Use inline assembly to define this because the nops are defined
as inline assembly strings in the include files and we cannot
get them easily into strings. */
asm("\t.data\nintelnops: "
GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6
GENERIC_NOP7 GENERIC_NOP8);
-asm("\t.data\nk8nops: "
- K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
- K8_NOP7 K8_NOP8);
-asm("\t.data\nk7nops: "
- K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6
- K7_NOP7 K7_NOP8);
-
-extern unsigned char intelnops[], k8nops[], k7nops[];
+extern unsigned char intelnops[];
static unsigned char *intel_nops[ASM_NOP_MAX+1] = {
NULL,
intelnops,
@@ -36,6 +50,13 @@ static unsigned char *intel_nops[ASM_NOP_MAX+1] = {
intelnops + 1 + 2 + 3 + 4 + 5 + 6,
intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
};
+#endif
+
+#ifdef K8_NOP1
+asm("\t.data\nk8nops: "
+ K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
+ K8_NOP7 K8_NOP8);
+extern unsigned char k8nops[];
static unsigned char *k8_nops[ASM_NOP_MAX+1] = {
NULL,
k8nops,
@@ -47,6 +68,13 @@ static unsigned char *k8_nops[ASM_NOP_MAX+1] = {
k8nops + 1 + 2 + 3 + 4 + 5 + 6,
k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
};
+#endif
+
+#ifdef K7_NOP1
+asm("\t.data\nk7nops: "
+ K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6
+ K7_NOP7 K7_NOP8);
+extern unsigned char k7nops[];
static unsigned char *k7_nops[ASM_NOP_MAX+1] = {
NULL,
k7nops,
@@ -58,6 +86,18 @@ static unsigned char *k7_nops[ASM_NOP_MAX+1] = {
k7nops + 1 + 2 + 3 + 4 + 5 + 6,
k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
};
+#endif
+
+#ifdef CONFIG_X86_64
+
+extern char __vsyscall_0;
+static inline unsigned char** find_nop_table(void)
+{
+ return k8_nops;
+}
+
+#else /* CONFIG_X86_64 */
+
static struct nop {
int cpuid;
unsigned char **noptable;
@@ -67,14 +107,6 @@ static struct nop {
{ -1, NULL }
};
-
-extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
-extern struct alt_instr __smp_alt_instructions[], __smp_alt_instructions_end[];
-extern u8 *__smp_locks[], *__smp_locks_end[];
-
-extern u8 __smp_alt_begin[], __smp_alt_end[];
-
-
static unsigned char** find_nop_table(void)
{
unsigned char **noptable = intel_nops;
@@ -89,6 +121,14 @@ static unsigned char** find_nop_table(void)
return noptable;
}
+#endif /* CONFIG_X86_64 */
+
+extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
+extern struct alt_instr __smp_alt_instructions[], __smp_alt_instructions_end[];
+extern u8 *__smp_locks[], *__smp_locks_end[];
+
+extern u8 __smp_alt_begin[], __smp_alt_end[];
+
/* Replace instructions with better alternatives for this CPU type.
This runs before SMP is initialized to avoid SMP problems with
self modifying code. This implies that assymetric systems where
@@ -99,6 +139,7 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
{
unsigned char **noptable = find_nop_table();
struct alt_instr *a;
+ u8 *instr;
int diff, i, k;
DPRINTK("%s: alt table %p -> %p\n", __FUNCTION__, start, end);
@@ -106,7 +147,16 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
BUG_ON(a->replacementlen > a->instrlen);
if (!boot_cpu_has(a->cpuid))
continue;
- memcpy(a->instr, a->replacement, a->replacementlen);
+ instr = a->instr;
+#ifdef CONFIG_X86_64
+ /* vsyscall code is not mapped yet. resolve it manually. */
+ if (instr >= (u8 *)VSYSCALL_START && instr < (u8*)VSYSCALL_END) {
+ instr = __va(instr - (u8*)VSYSCALL_START + (u8*)__pa_symbol(&__vsyscall_0));
+ DPRINTK("%s: vsyscall fixup: %p => %p\n",
+ __FUNCTION__, a->instr, instr);
+ }
+#endif
+ memcpy(instr, a->replacement, a->replacementlen);
diff = a->instrlen - a->replacementlen;
/* Pad the rest with nops */
for (i = a->replacementlen; diff > 0; diff -= k, i += k) {
@@ -186,14 +236,6 @@ struct smp_alt_module {
static LIST_HEAD(smp_alt_modules);
static DEFINE_SPINLOCK(smp_alt);
-static int smp_alt_once = 0;
-static int __init bootonly(char *str)
-{
- smp_alt_once = 1;
- return 1;
-}
-__setup("smp-alt-boot", bootonly);
-
void alternatives_smp_module_add(struct module *mod, char *name,
void *locks, void *locks_end,
void *text, void *text_end)
@@ -201,6 +243,9 @@ void alternatives_smp_module_add(struct module *mod, char *name,
struct smp_alt_module *smp;
unsigned long flags;
+ if (no_replacement)
+ return;
+
if (smp_alt_once) {
if (boot_cpu_has(X86_FEATURE_UP))
alternatives_smp_unlock(locks, locks_end,
@@ -235,7 +280,7 @@ void alternatives_smp_module_del(struct module *mod)
struct smp_alt_module *item;
unsigned long flags;
- if (smp_alt_once)
+ if (no_replacement || smp_alt_once)
return;
spin_lock_irqsave(&smp_alt, flags);
@@ -256,7 +301,7 @@ void alternatives_smp_switch(int smp)
struct smp_alt_module *mod;
unsigned long flags;
- if (smp_alt_once)
+ if (no_replacement || smp_alt_once)
return;
BUG_ON(!smp && (num_online_cpus() > 1));
@@ -285,6 +330,13 @@ void alternatives_smp_switch(int smp)
void __init alternative_instructions(void)
{
+ if (no_replacement) {
+ printk(KERN_INFO "(SMP-)alternatives turned off\n");
+ free_init_pages("SMP alternatives",
+ (unsigned long)__smp_alt_begin,
+ (unsigned long)__smp_alt_end);
+ return;
+ }
apply_alternatives(__alt_instructions, __alt_instructions_end);
/* switch to patch-once-at-boottime-only mode and free the
diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c
index 3d4b2f3..7ce0949 100644
--- a/arch/i386/kernel/apic.c
+++ b/arch/i386/kernel/apic.c
@@ -36,6 +36,7 @@
#include <asm/arch_hooks.h>
#include <asm/hpet.h>
#include <asm/i8253.h>
+#include <asm/nmi.h>
#include <mach_apic.h>
#include <mach_apicdef.h>
@@ -62,7 +63,7 @@ int apic_verbosity;
static void apic_pm_activate(void);
-int modern_apic(void)
+static int modern_apic(void)
{
unsigned int lvr, version;
/* AMD systems use old APIC versions, so check the CPU */
@@ -113,7 +114,7 @@ void __init apic_intr_init(void)
}
/* Using APIC to generate smp_local_timer_interrupt? */
-int using_apic_timer = 0;
+int using_apic_timer __read_mostly = 0;
static int enabled_via_apicbase;
@@ -156,7 +157,7 @@ void clear_local_APIC(void)
maxlvt = get_maxlvt();
/*
- * Masking an LVT entry on a P6 can trigger a local APIC error
+ * Masking an LVT entry can trigger a local APIC error
* if the vector is zero. Mask LVTERR first to prevent this.
*/
if (maxlvt >= 3) {
@@ -1117,7 +1118,18 @@ void disable_APIC_timer(void)
unsigned long v;
v = apic_read(APIC_LVTT);
- apic_write_around(APIC_LVTT, v | APIC_LVT_MASKED);
+ /*
+ * When an illegal vector value (0-15) is written to an LVT
+ * entry and delivery mode is Fixed, the APIC may signal an
+ * illegal vector error, with out regard to whether the mask
+ * bit is set or whether an interrupt is actually seen on input.
+ *
+ * Boot sequence might call this function when the LVTT has
+ * '0' vector value. So make sure vector field is set to
+ * valid value.
+ */
+ v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
+ apic_write_around(APIC_LVTT, v);
}
}
diff --git a/arch/i386/kernel/apm.c b/arch/i386/kernel/apm.c
index df0e174..7c5729d 100644
--- a/arch/i386/kernel/apm.c
+++ b/arch/i386/kernel/apm.c
@@ -374,14 +374,14 @@ static struct {
unsigned short segment;
} apm_bios_entry;
static int clock_slowed;
-static int idle_threshold = DEFAULT_IDLE_THRESHOLD;
-static int idle_period = DEFAULT_IDLE_PERIOD;
+static int idle_threshold __read_mostly = DEFAULT_IDLE_THRESHOLD;
+static int idle_period __read_mostly = DEFAULT_IDLE_PERIOD;
static int set_pm_idle;
static int suspends_pending;
static int standbys_pending;
static int ignore_sys_suspend;
static int ignore_normal_resume;
-static int bounce_interval = DEFAULT_BOUNCE_INTERVAL;
+static int bounce_interval __read_mostly = DEFAULT_BOUNCE_INTERVAL;
#ifdef CONFIG_APM_RTC_IS_GMT
# define clock_cmos_diff 0
@@ -390,8 +390,8 @@ static int bounce_interval = DEFAULT_BOUNCE_INTERVAL;
static long clock_cmos_diff;
static int got_clock_diff;
#endif
-static int debug;
-static int smp;
+static int debug __read_mostly;
+static int smp __read_mostly;
static int apm_disabled = -1;
#ifdef CONFIG_SMP
static int power_off;
@@ -403,8 +403,8 @@ static int realmode_power_off = 1;
#else
static int realmode_power_off;
#endif
-static int exit_kapmd;
-static int kapmd_running;
+static int exit_kapmd __read_mostly;
+static int kapmd_running __read_mostly;
#ifdef CONFIG_APM_ALLOW_INTS
static int allow_ints = 1;
#else
@@ -416,15 +416,15 @@ static DECLARE_WAIT_QUEUE_HEAD(apm_waitqueue);
static DECLARE_WAIT_QUEUE_HEAD(apm_suspend_waitqueue);
static struct apm_user * user_list;
static DEFINE_SPINLOCK(user_list_lock);
-static struct desc_struct bad_bios_desc = { 0, 0x00409200 };
+static const struct desc_struct bad_bios_desc = { 0, 0x00409200 };
-static char driver_version[] = "1.16ac"; /* no spaces */
+static const char driver_version[] = "1.16ac"; /* no spaces */
/*
* APM event names taken from the APM 1.2 specification. These are
* the message codes that the BIOS uses to tell us about events
*/
-static char * apm_event_name[] = {
+static const char * const apm_event_name[] = {
"system standby",
"system suspend",
"normal resume",
@@ -616,7 +616,7 @@ static u8 apm_bios_call(u32 func, u32 ebx_in, u32 ecx_in,
* @ecx_in: ECX register value for BIOS call
* @eax: EAX register on return from the BIOS call
*
- * Make a BIOS call that does only returns one value, or just status.
+ * Make a BIOS call that returns one value only, or just status.
* If there is an error, then the error code is returned in AH
* (bits 8-15 of eax) and this function returns non-zero. This is
* used for simpler BIOS operations. This call may hold interrupts
@@ -764,9 +764,9 @@ static int apm_do_idle(void)
int idled = 0;
int polling;
- polling = test_thread_flag(TIF_POLLING_NRFLAG);
+ polling = !!(current_thread_info()->status & TS_POLLING);
if (polling) {
- clear_thread_flag(TIF_POLLING_NRFLAG);
+ current_thread_info()->status &= ~TS_POLLING;
smp_mb__after_clear_bit();
}
if (!need_resched()) {
@@ -774,7 +774,7 @@ static int apm_do_idle(void)
ret = apm_bios_call_simple(APM_FUNC_IDLE, 0, 0, &eax);
}
if (polling)
- set_thread_flag(TIF_POLLING_NRFLAG);
+ current_thread_info()->status |= TS_POLLING;
if (!idled)
return 0;
@@ -822,7 +822,7 @@ static void apm_do_busy(void)
#define IDLE_CALC_LIMIT (HZ * 100)
#define IDLE_LEAKY_MAX 16
-static void (*original_pm_idle)(void);
+static void (*original_pm_idle)(void) __read_mostly;
/**
* apm_cpu_idle - cpu idling for APM capable Linux
@@ -1063,7 +1063,8 @@ static int apm_engage_power_management(u_short device, int enable)
static int apm_console_blank(int blank)
{
- int error, i;
+ int error = APM_NOT_ENGAGED; /* silence gcc */
+ int i;
u_short state;
static const u_short dev[3] = { 0x100, 0x1FF, 0x101 };
@@ -1104,7 +1105,8 @@ static int queue_empty(struct apm_user *as)
static apm_event_t get_queued_event(struct apm_user *as)
{
- as->event_tail = (as->event_tail + 1) % APM_MAX_EVENTS;
+ if (++as->event_tail >= APM_MAX_EVENTS)
+ as->event_tail = 0;
return as->events[as->event_tail];
}
@@ -1118,13 +1120,16 @@ static void queue_event(apm_event_t event, struct apm_user *sender)
for (as = user_list; as != NULL; as = as->next) {
if ((as == sender) || (!as->reader))
continue;
- as->event_head = (as->event_head + 1) % APM_MAX_EVENTS;
+ if (++as->event_head >= APM_MAX_EVENTS)
+ as->event_head = 0;
+
if (as->event_head == as->event_tail) {
static int notified;
if (notified++ == 0)
printk(KERN_ERR "apm: an event queue overflowed\n");
- as->event_tail = (as->event_tail + 1) % APM_MAX_EVENTS;
+ if (++as->event_tail >= APM_MAX_EVENTS)
+ as->event_tail = 0;
}
as->events[as->event_head] = event;
if ((!as->suser) || (!as->writer))
@@ -1282,7 +1287,7 @@ static void standby(void)
static apm_event_t get_event(void)
{
int error;
- apm_event_t event;
+ apm_event_t event = APM_NO_EVENTS; /* silence gcc */
apm_eventinfo_t info;
static int notified;
diff --git a/arch/i386/kernel/asm-offsets.c b/arch/i386/kernel/asm-offsets.c
index 36d66e2..1c3a809 100644
--- a/arch/i386/kernel/asm-offsets.c
+++ b/arch/i386/kernel/asm-offsets.c
@@ -4,6 +4,7 @@
* to extract and format the required data.
*/
+#include <linux/crypto.h>
#include <linux/sched.h>
#include <linux/signal.h>
#include <linux/personality.h>
@@ -69,4 +70,6 @@ void foo(void)
DEFINE(PAGE_SIZE_asm, PAGE_SIZE);
DEFINE(VSYSCALL_BASE, __fix_to_virt(FIX_VSYSCALL));
+
+ OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx);
}
diff --git a/arch/i386/kernel/cpu/amd.c b/arch/i386/kernel/cpu/amd.c
index 786d1a5..fd0457c 100644
--- a/arch/i386/kernel/cpu/amd.c
+++ b/arch/i386/kernel/cpu/amd.c
@@ -224,15 +224,17 @@ static void __init init_amd(struct cpuinfo_x86 *c)
#ifdef CONFIG_X86_HT
/*
- * On a AMD dual core setup the lower bits of the APIC id
- * distingush the cores. Assumes number of cores is a power
- * of two.
+ * On a AMD multi core setup the lower bits of the APIC id
+ * distingush the cores.
*/
if (c->x86_max_cores > 1) {
int cpu = smp_processor_id();
- unsigned bits = 0;
- while ((1 << bits) < c->x86_max_cores)
- bits++;
+ unsigned bits = (cpuid_ecx(0x80000008) >> 12) & 0xf;
+
+ if (bits == 0) {
+ while ((1 << bits) < c->x86_max_cores)
+ bits++;
+ }
cpu_core_id[cpu] = phys_proc_id[cpu] & ((1<<bits)-1);
phys_proc_id[cpu] >>= bits;
printk(KERN_INFO "CPU %d(%d) -> Core %d\n",
@@ -240,6 +242,8 @@ static void __init init_amd(struct cpuinfo_x86 *c)
}
#endif
+ if (cpuid_eax(0x80000000) >= 0x80000006)
+ num_cache_leaves = 3;
}
static unsigned int amd_size_cache(struct cpuinfo_x86 * c, unsigned int size)
diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c
index a06a490..44f2c5f 100644
--- a/arch/i386/kernel/cpu/common.c
+++ b/arch/i386/kernel/cpu/common.c
@@ -11,6 +11,8 @@
#include <asm/msr.h>
#include <asm/io.h>
#include <asm/mmu_context.h>
+#include <asm/mtrr.h>
+#include <asm/mce.h>
#ifdef CONFIG_X86_LOCAL_APIC
#include <asm/mpspec.h>
#include <asm/apic.h>
diff --git a/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c
index 3852d0a..5fd6532 100644
--- a/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -48,12 +48,13 @@ MODULE_LICENSE("GPL");
struct cpufreq_acpi_io {
- struct acpi_processor_performance acpi_data;
+ struct acpi_processor_performance *acpi_data;
struct cpufreq_frequency_table *freq_table;
unsigned int resume;
};
static struct cpufreq_acpi_io *acpi_io_data[NR_CPUS];
+static struct acpi_processor_performance *acpi_perf_data[NR_CPUS];
static struct cpufreq_driver acpi_cpufreq_driver;
@@ -104,64 +105,43 @@ acpi_processor_set_performance (
{
u16 port = 0;
u8 bit_width = 0;
+ int i = 0;
int ret = 0;
u32 value = 0;
- int i = 0;
- struct cpufreq_freqs cpufreq_freqs;
- cpumask_t saved_mask;
int retval;
+ struct acpi_processor_performance *perf;
dprintk("acpi_processor_set_performance\n");
- /*
- * TBD: Use something other than set_cpus_allowed.
- * As set_cpus_allowed is a bit racy,
- * with any other set_cpus_allowed for this process.
- */
- saved_mask = current->cpus_allowed;
- set_cpus_allowed(current, cpumask_of_cpu(cpu));
- if (smp_processor_id() != cpu) {
- return (-EAGAIN);
- }
-
- if (state == data->acpi_data.state) {
+ retval = 0;
+ perf = data->acpi_data;
+ if (state == perf->state) {
if (unlikely(data->resume)) {
dprintk("Called after resume, resetting to P%d\n", state);
data->resume = 0;
} else {
dprintk("Already at target state (P%d)\n", state);
- retval = 0;
- goto migrate_end;
+ return (retval);
}
}
- dprintk("Transitioning from P%d to P%d\n",
- data->acpi_data.state, state);
-
- /* cpufreq frequency struct */
- cpufreq_freqs.cpu = cpu;
- cpufreq_freqs.old = data->freq_table[data->acpi_data.state].frequency;
- cpufreq_freqs.new = data->freq_table[state].frequency;
-
- /* notify cpufreq */
- cpufreq_notify_transition(&cpufreq_freqs, CPUFREQ_PRECHANGE);
+ dprintk("Transitioning from P%d to P%d\n", perf->state, state);
/*
* First we write the target state's 'control' value to the
* control_register.
*/
- port = data->acpi_data.control_register.address;
- bit_width = data->acpi_data.control_register.bit_width;
- value = (u32) data->acpi_data.states[state].control;
+ port = perf->control_register.address;
+ bit_width = perf->control_register.bit_width;
+ value = (u32) perf->states[state].control;
dprintk("Writing 0x%08x to port 0x%04x\n", value, port);
ret = acpi_processor_write_port(port, bit_width, value);
if (ret) {
dprintk("Invalid port width 0x%04x\n", bit_width);
- retval = ret;
- goto migrate_end;
+ return (ret);
}
/*
@@ -177,49 +157,35 @@ acpi_processor_set_performance (
* before giving up.
*/
- port = data->acpi_data.status_register.address;
- bit_width = data->acpi_data.status_register.bit_width;
+ port = perf->status_register.address;
+ bit_width = perf->status_register.bit_width;
dprintk("Looking for 0x%08x from port 0x%04x\n",
- (u32) data->acpi_data.states[state].status, port);
+ (u32) perf->states[state].status, port);
- for (i=0; i<100; i++) {
+ for (i = 0; i < 100; i++) {
ret = acpi_processor_read_port(port, bit_width, &value);
if (ret) {
dprintk("Invalid port width 0x%04x\n", bit_width);
- retval = ret;
- goto migrate_end;
+ return (ret);
}
- if (value == (u32) data->acpi_data.states[state].status)
+ if (value == (u32) perf->states[state].status)
break;
udelay(10);
}
} else {
- i = 0;
- value = (u32) data->acpi_data.states[state].status;
+ value = (u32) perf->states[state].status;
}
- /* notify cpufreq */
- cpufreq_notify_transition(&cpufreq_freqs, CPUFREQ_POSTCHANGE);
-
- if (unlikely(value != (u32) data->acpi_data.states[state].status)) {
- unsigned int tmp = cpufreq_freqs.new;
- cpufreq_freqs.new = cpufreq_freqs.old;
- cpufreq_freqs.old = tmp;
- cpufreq_notify_transition(&cpufreq_freqs, CPUFREQ_PRECHANGE);
- cpufreq_notify_transition(&cpufreq_freqs, CPUFREQ_POSTCHANGE);
+ if (unlikely(value != (u32) perf->states[state].status)) {
printk(KERN_WARNING "acpi-cpufreq: Transition failed\n");
retval = -ENODEV;
- goto migrate_end;
+ return (retval);
}
dprintk("Transition successful after %d microseconds\n", i * 10);
- data->acpi_data.state = state;
-
- retval = 0;
-migrate_end:
- set_cpus_allowed(current, saved_mask);
+ perf->state = state;
return (retval);
}
@@ -231,8 +197,17 @@ acpi_cpufreq_target (
unsigned int relation)
{
struct cpufreq_acpi_io *data = acpi_io_data[policy->cpu];
+ struct acpi_processor_performance *perf;
+ struct cpufreq_freqs freqs;
+ cpumask_t online_policy_cpus;
+ cpumask_t saved_mask;
+ cpumask_t set_mask;
+ cpumask_t covered_cpus;
+ unsigned int cur_state = 0;
unsigned int next_state = 0;
unsigned int result = 0;
+ unsigned int j;
+ unsigned int tmp;
dprintk("acpi_cpufreq_setpolicy\n");
@@ -241,11 +216,95 @@ acpi_cpufreq_target (
target_freq,
relation,
&next_state);
- if (result)
+ if (unlikely(result))
return (result);
- result = acpi_processor_set_performance (data, policy->cpu, next_state);
+ perf = data->acpi_data;
+ cur_state = perf->state;
+ freqs.old = data->freq_table[cur_state].frequency;
+ freqs.new = data->freq_table[next_state].frequency;
+
+#ifdef CONFIG_HOTPLUG_CPU
+ /* cpufreq holds the hotplug lock, so we are safe from here on */
+ cpus_and(online_policy_cpus, cpu_online_map, policy->cpus);
+#else
+ online_policy_cpus = policy->cpus;
+#endif
+
+ for_each_cpu_mask(j, online_policy_cpus) {
+ freqs.cpu = j;
+ cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
+ }
+
+ /*
+ * We need to call driver->target() on all or any CPU in
+ * policy->cpus, depending on policy->shared_type.
+ */
+ saved_mask = current->cpus_allowed;
+ cpus_clear(covered_cpus);
+ for_each_cpu_mask(j, online_policy_cpus) {
+ /*
+ * Support for SMP systems.
+ * Make sure we are running on CPU that wants to change freq
+ */
+ cpus_clear(set_mask);
+ if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY)
+ cpus_or(set_mask, set_mask, online_policy_cpus);
+ else
+ cpu_set(j, set_mask);
+
+ set_cpus_allowed(current, set_mask);
+ if (unlikely(!cpu_isset(smp_processor_id(), set_mask))) {
+ dprintk("couldn't limit to CPUs in this domain\n");
+ result = -EAGAIN;
+ break;
+ }
+
+ result = acpi_processor_set_performance (data, j, next_state);
+ if (result) {
+ result = -EAGAIN;
+ break;
+ }
+
+ if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY)
+ break;
+
+ cpu_set(j, covered_cpus);
+ }
+
+ for_each_cpu_mask(j, online_policy_cpus) {
+ freqs.cpu = j;
+ cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
+ }
+ if (unlikely(result)) {
+ /*
+ * We have failed halfway through the frequency change.
+ * We have sent callbacks to online_policy_cpus and
+ * acpi_processor_set_performance() has been called on
+ * coverd_cpus. Best effort undo..
+ */
+
+ if (!cpus_empty(covered_cpus)) {
+ for_each_cpu_mask(j, covered_cpus) {
+ policy->cpu = j;
+ acpi_processor_set_performance (data,
+ j,
+ cur_state);
+ }
+ }
+
+ tmp = freqs.new;
+ freqs.new = freqs.old;
+ freqs.old = tmp;
+ for_each_cpu_mask(j, online_policy_cpus) {
+ freqs.cpu = j;
+ cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
+ cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
+ }
+ }
+
+ set_cpus_allowed(current, saved_mask);
return (result);
}
@@ -271,30 +330,65 @@ acpi_cpufreq_guess_freq (
struct cpufreq_acpi_io *data,
unsigned int cpu)
{
+ struct acpi_processor_performance *perf = data->acpi_data;
+
if (cpu_khz) {
/* search the closest match to cpu_khz */
unsigned int i;
unsigned long freq;
- unsigned long freqn = data->acpi_data.states[0].core_frequency * 1000;
+ unsigned long freqn = perf->states[0].core_frequency * 1000;
- for (i=0; i < (data->acpi_data.state_count - 1); i++) {
+ for (i = 0; i < (perf->state_count - 1); i++) {
freq = freqn;
- freqn = data->acpi_data.states[i+1].core_frequency * 1000;
+ freqn = perf->states[i+1].core_frequency * 1000;
if ((2 * cpu_khz) > (freqn + freq)) {
- data->acpi_data.state = i;
+ perf->state = i;
return (freq);
}
}
- data->acpi_data.state = data->acpi_data.state_count - 1;
+ perf->state = perf->state_count - 1;
return (freqn);
- } else
+ } else {
/* assume CPU is at P0... */
- data->acpi_data.state = 0;
- return data->acpi_data.states[0].core_frequency * 1000;
-
+ perf->state = 0;
+ return perf->states[0].core_frequency * 1000;
+ }
}
+/*
+ * acpi_cpufreq_early_init - initialize ACPI P-States library
+ *
+ * Initialize the ACPI P-States library (drivers/acpi/processor_perflib.c)
+ * in order to determine correct frequency and voltage pairings. We can
+ * do _PDC and _PSD and find out the processor dependency for the
+ * actual init that will happen later...
+ */
+static int acpi_cpufreq_early_init_acpi(void)
+{
+ struct acpi_processor_performance *data;
+ unsigned int i, j;
+
+ dprintk("acpi_cpufreq_early_init\n");
+
+ for_each_possible_cpu(i) {
+ data = kzalloc(sizeof(struct acpi_processor_performance),
+ GFP_KERNEL);
+ if (!data) {
+ for_each_possible_cpu(j) {
+ kfree(acpi_perf_data[j]);
+ acpi_perf_data[j] = NULL;
+ }
+ return (-ENOMEM);
+ }
+ acpi_perf_data[i] = data;
+ }
+
+ /* Do initialization in ACPI core */
+ acpi_processor_preregister_performance(acpi_perf_data);
+ return 0;
+}
+
static int
acpi_cpufreq_cpu_init (
struct cpufreq_policy *policy)
@@ -304,41 +398,51 @@ acpi_cpufreq_cpu_init (
struct cpufreq_acpi_io *data;
unsigned int result = 0;
struct cpuinfo_x86 *c = &cpu_data[policy->cpu];
+ struct acpi_processor_performance *perf;
dprintk("acpi_cpufreq_cpu_init\n");
+ if (!acpi_perf_data[cpu])
+ return (-ENODEV);
+
data = kzalloc(sizeof(struct cpufreq_acpi_io), GFP_KERNEL);
if (!data)
return (-ENOMEM);
+ data->acpi_data = acpi_perf_data[cpu];
acpi_io_data[cpu] = data;
- result = acpi_processor_register_performance(&data->acpi_data, cpu);
+ result = acpi_processor_register_performance(data->acpi_data, cpu);
if (result)
goto err_free;
+ perf = data->acpi_data;
+ policy->cpus = perf->shared_cpu_map;
+ policy->shared_type = perf->shared_type;
+
if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) {
acpi_cpufreq_driver.flags |= CPUFREQ_CONST_LOOPS;
}
/* capability check */
- if (data->acpi_data.state_count <= 1) {
+ if (perf->state_count <= 1) {
dprintk("No P-States\n");
result = -ENODEV;
goto err_unreg;
}
- if ((data->acpi_data.control_register.space_id != ACPI_ADR_SPACE_SYSTEM_IO) ||
- (data->acpi_data.status_register.space_id != ACPI_ADR_SPACE_SYSTEM_IO)) {
+
+ if ((perf->control_register.space_id != ACPI_ADR_SPACE_SYSTEM_IO) ||
+ (perf->status_register.space_id != ACPI_ADR_SPACE_SYSTEM_IO)) {
dprintk("Unsupported address space [%d, %d]\n",
- (u32) (data->acpi_data.control_register.space_id),
- (u32) (data->acpi_data.status_register.space_id));
+ (u32) (perf->control_register.space_id),
+ (u32) (perf->status_register.space_id));
result = -ENODEV;
goto err_unreg;
}
/* alloc freq_table */
- data->freq_table = kmalloc(sizeof(struct cpufreq_frequency_table) * (data->acpi_data.state_count + 1), GFP_KERNEL);
+ data->freq_table = kmalloc(sizeof(struct cpufreq_frequency_table) * (perf->state_count + 1), GFP_KERNEL);
if (!data->freq_table) {
result = -ENOMEM;
goto err_unreg;
@@ -346,9 +450,9 @@ acpi_cpufreq_cpu_init (
/* detect transition latency */
policy->cpuinfo.transition_latency = 0;
- for (i=0; i<data->acpi_data.state_count; i++) {
- if ((data->acpi_data.states[i].transition_latency * 1000) > policy->cpuinfo.transition_latency)
- policy->cpuinfo.transition_latency = data->acpi_data.states[i].transition_latency * 1000;
+ for (i=0; i<perf->state_count; i++) {
+ if ((perf->states[i].transition_latency * 1000) > policy->cpuinfo.transition_latency)
+ policy->cpuinfo.transition_latency = perf->states[i].transition_latency * 1000;
}
policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
@@ -356,11 +460,11 @@ acpi_cpufreq_cpu_init (
policy->cur = acpi_cpufreq_guess_freq(data, policy->cpu);
/* table init */
- for (i=0; i<=data->acpi_data.state_count; i++)
+ for (i=0; i<=perf->state_count; i++)
{
data->freq_table[i].index = i;
- if (i<data->acpi_data.state_count)
- data->freq_table[i].frequency = data->acpi_data.states[i].core_frequency * 1000;
+ if (i<perf->state_count)
+ data->freq_table[i].frequency = perf->states[i].core_frequency * 1000;
else
data->freq_table[i].frequency = CPUFREQ_TABLE_END;
}
@@ -375,12 +479,12 @@ acpi_cpufreq_cpu_init (
printk(KERN_INFO "acpi-cpufreq: CPU%u - ACPI performance management activated.\n",
cpu);
- for (i = 0; i < data->acpi_data.state_count; i++)
+ for (i = 0; i < perf->state_count; i++)
dprintk(" %cP%d: %d MHz, %d mW, %d uS\n",
- (i == data->acpi_data.state?'*':' '), i,
- (u32) data->acpi_data.states[i].core_frequency,
- (u32) data->acpi_data.states[i].power,
- (u32) data->acpi_data.states[i].transition_latency);
+ (i == perf->state?'*':' '), i,
+ (u32) perf->states[i].core_frequency,
+ (u32) perf->states[i].power,
+ (u32) perf->states[i].transition_latency);
cpufreq_frequency_table_get_attr(data->freq_table, policy->cpu);
@@ -395,7 +499,7 @@ acpi_cpufreq_cpu_init (
err_freqfree:
kfree(data->freq_table);
err_unreg:
- acpi_processor_unregister_performance(&data->acpi_data, cpu);
+ acpi_processor_unregister_performance(perf, cpu);
err_free:
kfree(data);
acpi_io_data[cpu] = NULL;
@@ -416,7 +520,7 @@ acpi_cpufreq_cpu_exit (
if (data) {
cpufreq_frequency_table_put_attr(policy->cpu);
acpi_io_data[policy->cpu] = NULL;
- acpi_processor_unregister_performance(&data->acpi_data, policy->cpu);
+ acpi_processor_unregister_performance(data->acpi_data, policy->cpu);
kfree(data);
}
@@ -444,14 +548,15 @@ static struct freq_attr* acpi_cpufreq_attr[] = {
};
static struct cpufreq_driver acpi_cpufreq_driver = {
- .verify = acpi_cpufreq_verify,
- .target = acpi_cpufreq_target,
- .init = acpi_cpufreq_cpu_init,
- .exit = acpi_cpufreq_cpu_exit,
- .resume = acpi_cpufreq_resume,
- .name = "acpi-cpufreq",
- .owner = THIS_MODULE,
- .attr = acpi_cpufreq_attr,
+ .verify = acpi_cpufreq_verify,
+ .target = acpi_cpufreq_target,
+ .init = acpi_cpufreq_cpu_init,
+ .exit = acpi_cpufreq_cpu_exit,
+ .resume = acpi_cpufreq_resume,
+ .name = "acpi-cpufreq",
+ .owner = THIS_MODULE,
+ .attr = acpi_cpufreq_attr,
+ .flags = CPUFREQ_STICKY,
};
@@ -462,7 +567,10 @@ acpi_cpufreq_init (void)
dprintk("acpi_cpufreq_init\n");
- result = cpufreq_register_driver(&acpi_cpufreq_driver);
+ result = acpi_cpufreq_early_init_acpi();
+
+ if (!result)
+ result = cpufreq_register_driver(&acpi_cpufreq_driver);
return (result);
}
@@ -471,10 +579,15 @@ acpi_cpufreq_init (void)
static void __exit
acpi_cpufreq_exit (void)
{
+ unsigned int i;
dprintk("acpi_cpufreq_exit\n");
cpufreq_unregister_driver(&acpi_cpufreq_driver);
+ for_each_possible_cpu(i) {
+ kfree(acpi_perf_data[i]);
+ acpi_perf_data[i] = NULL;
+ }
return;
}
diff --git a/arch/i386/kernel/cpu/cpufreq/cpufreq-nforce2.c b/arch/i386/kernel/cpu/cpufreq/cpufreq-nforce2.c
index f275e0d..0d49d73 100644
--- a/arch/i386/kernel/cpu/cpufreq/cpufreq-nforce2.c
+++ b/arch/i386/kernel/cpu/cpufreq/cpufreq-nforce2.c
@@ -1,5 +1,5 @@
/*
- * (C) 2004 Sebastian Witt <se.witt@gmx.net>
+ * (C) 2004-2006 Sebastian Witt <se.witt@gmx.net>
*
* Licensed under the terms of the GNU GPL License version 2.
* Based upon reverse engineered information
@@ -90,7 +90,7 @@ static int nforce2_calc_pll(unsigned int fsb)
/* Try to calculate multiplier and divider up to 4 times */
while (((mul == 0) || (div == 0)) && (tried <= 3)) {
- for (xdiv = 1; xdiv <= 0x80; xdiv++)
+ for (xdiv = 2; xdiv <= 0x80; xdiv++)
for (xmul = 1; xmul <= 0xfe; xmul++)
if (nforce2_calc_fsb(NFORCE2_PLL(xmul, xdiv)) ==
fsb + tried) {
@@ -117,8 +117,7 @@ static void nforce2_write_pll(int pll)
int temp;
/* Set the pll addr. to 0x00 */
- temp = 0x00;
- pci_write_config_dword(nforce2_chipset_dev, NFORCE2_PLLADR, temp);
+ pci_write_config_dword(nforce2_chipset_dev, NFORCE2_PLLADR, 0);
/* Now write the value in all 64 registers */
for (temp = 0; temp <= 0x3f; temp++)
@@ -266,7 +265,7 @@ static int nforce2_target(struct cpufreq_policy *policy,
if (freqs.old == freqs.new)
return 0;
- dprintk(KERN_INFO "cpufreq: Old CPU frequency %d kHz, new %d kHz\n",
+ dprintk("Old CPU frequency %d kHz, new %d kHz\n",
freqs.old, freqs.new);
cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
@@ -278,7 +277,7 @@ static int nforce2_target(struct cpufreq_policy *policy,
printk(KERN_ERR "cpufreq: Changing FSB to %d failed\n",
target_fsb);
else
- dprintk(KERN_INFO "cpufreq: Changed FSB successfully to %d\n",
+ dprintk("Changed FSB successfully to %d\n",
target_fsb);
/* Enable IRQs */
diff --git a/arch/i386/kernel/cpu/cpufreq/longhaul.c b/arch/i386/kernel/cpu/cpufreq/longhaul.c
index 8ef3854..146f607 100644
--- a/arch/i386/kernel/cpu/cpufreq/longhaul.c
+++ b/arch/i386/kernel/cpu/cpufreq/longhaul.c
@@ -77,13 +77,17 @@ static char speedbuffer[8];
static char *print_speed(int speed)
{
- if (speed > 1000) {
- if (speed%1000 == 0)
- sprintf (speedbuffer, "%dGHz", speed/1000);
- else
- sprintf (speedbuffer, "%d.%dGHz", speed/1000, (speed%1000)/100);
- } else
- sprintf (speedbuffer, "%dMHz", speed);
+ if (speed < 1000) {
+ snprintf(speedbuffer, sizeof(speedbuffer),"%dMHz", speed);
+ return speedbuffer;
+ }
+
+ if (speed%1000 == 0)
+ snprintf(speedbuffer, sizeof(speedbuffer),
+ "%dGHz", speed/1000);
+ else
+ snprintf(speedbuffer, sizeof(speedbuffer),
+ "%d.%dGHz", speed/1000, (speed%1000)/100);
return speedbuffer;
}
@@ -675,7 +679,7 @@ static int __init longhaul_init(void)
static void __exit longhaul_exit(void)
{
- int i=0;
+ int i;
for (i=0; i < numscales; i++) {
if (clock_ratio[i] == maxmult) {
diff --git a/arch/i386/kernel/cpu/cpufreq/longrun.c b/arch/i386/kernel/cpu/cpufreq/longrun.c
index e3868de..b268951 100644
--- a/arch/i386/kernel/cpu/cpufreq/longrun.c
+++ b/arch/i386/kernel/cpu/cpufreq/longrun.c
@@ -223,7 +223,6 @@ static unsigned int __init longrun_determine_freqs(unsigned int *low_freq,
/* set to 0 to try_hi perf_pctg */
msr_lo &= 0xFFFFFF80;
msr_hi &= 0xFFFFFF80;
- msr_lo |= 0;
msr_hi |= try_hi;
wrmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi);
diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k7.c b/arch/i386/kernel/cpu/cpufreq/powernow-k7.c
index 2bf4237..694d479 100644
--- a/arch/i386/kernel/cpu/cpufreq/powernow-k7.c
+++ b/arch/i386/kernel/cpu/cpufreq/powernow-k7.c
@@ -452,23 +452,23 @@ static int powernow_decode_bios (int maxfid, int startvid)
pst = (struct pst_s *) p;
- for (i = 0 ; i <psb->numpst; i++) {
+ for (j=0; j<psb->numpst; j++) {
pst = (struct pst_s *) p;
number_scales = pst->numpstates;
if ((etuple == pst->cpuid) && check_fsb(pst->fsbspeed) &&
(maxfid==pst->maxfid) && (startvid==pst->startvid))
{
- dprintk ("PST:%d (@%p)\n", i, pst);
+ dprintk ("PST:%d (@%p)\n", j, pst);
dprintk (" cpuid: 0x%x fsb: %d maxFID: 0x%x startvid: 0x%x\n",
pst->cpuid, pst->fsbspeed, pst->maxfid, pst->startvid);
ret = get_ranges ((char *) pst + sizeof (struct pst_s));
return ret;
-
} else {
+ unsigned int k;
p = (char *) pst + sizeof (struct pst_s);
- for (j=0 ; j < number_scales; j++)
+ for (k=0; k<number_scales; k++)
p+=2;
}
}
@@ -581,10 +581,7 @@ static int __init powernow_cpu_init (struct cpufreq_policy *policy)
rdmsrl (MSR_K7_FID_VID_STATUS, fidvidstatus.val);
- /* recalibrate cpu_khz */
- result = recalibrate_cpu_khz();
- if (result)
- return result;
+ recalibrate_cpu_khz();
fsb = (10 * cpu_khz) / fid_codes[fidvidstatus.bits.CFID];
if (!fsb) {
diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c
index 71fffa1..2d64916 100644
--- a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c
@@ -1,5 +1,5 @@
/*
- * (c) 2003, 2004, 2005 Advanced Micro Devices, Inc.
+ * (c) 2003-2006 Advanced Micro Devices, Inc.
* Your use of this code is subject to the terms and conditions of the
* GNU general public license version 2. See "COPYING" or
* http://www.gnu.org/licenses/gpl.html
@@ -14,13 +14,13 @@
* Based upon datasheets & sample CPUs kindly provided by AMD.
*
* Valuable input gratefully received from Dave Jones, Pavel Machek,
- * Dominik Brodowski, and others.
+ * Dominik Brodowski, Jacob Shin, and others.
* Originally developed by Paul Devriendt.
* Processor information obtained from Chapter 9 (Power and Thermal Management)
* of the "BIOS and Kernel Developer's Guide for the AMD Athlon 64 and AMD
* Opteron Processors" available for download from www.amd.com
*
- * Tables for specific CPUs can be infrerred from
+ * Tables for specific CPUs can be inferred from
* http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/30430.pdf
*/
@@ -46,7 +46,7 @@
#define PFX "powernow-k8: "
#define BFX PFX "BIOS error: "
-#define VERSION "version 1.60.2"
+#define VERSION "version 2.00.00"
#include "powernow-k8.h"
/* serialize freq changes */
@@ -54,6 +54,8 @@ static DEFINE_MUTEX(fidvid_mutex);
static struct powernow_k8_data *powernow_data[NR_CPUS];
+static int cpu_family = CPU_OPTERON;
+
#ifndef CONFIG_SMP
static cpumask_t cpu_core_map[1];
#endif
@@ -64,16 +66,36 @@ static u32 find_freq_from_fid(u32 fid)
return 800 + (fid * 100);
}
+
/* Return a frequency in KHz, given an input fid */
static u32 find_khz_freq_from_fid(u32 fid)
{
return 1000 * find_freq_from_fid(fid);
}
-/* Return a voltage in miliVolts, given an input vid */
-static u32 find_millivolts_from_vid(struct powernow_k8_data *data, u32 vid)
+/* Return a frequency in MHz, given an input fid and did */
+static u32 find_freq_from_fiddid(u32 fid, u32 did)
+{
+ return 100 * (fid + 0x10) >> did;
+}
+
+static u32 find_khz_freq_from_fiddid(u32 fid, u32 did)
{
- return 1550-vid*25;
+ return 1000 * find_freq_from_fiddid(fid, did);
+}
+
+static u32 find_fid_from_pstate(u32 pstate)
+{
+ u32 hi, lo;
+ rdmsr(MSR_PSTATE_DEF_BASE + pstate, lo, hi);
+ return lo & HW_PSTATE_FID_MASK;
+}
+
+static u32 find_did_from_pstate(u32 pstate)
+{
+ u32 hi, lo;
+ rdmsr(MSR_PSTATE_DEF_BASE + pstate, lo, hi);
+ return (lo & HW_PSTATE_DID_MASK) >> HW_PSTATE_DID_SHIFT;
}
/* Return the vco fid for an input fid
@@ -98,6 +120,9 @@ static int pending_bit_stuck(void)
{
u32 lo, hi;
+ if (cpu_family == CPU_HW_PSTATE)
+ return 0;
+
rdmsr(MSR_FIDVID_STATUS, lo, hi);
return lo & MSR_S_LO_CHANGE_PENDING ? 1 : 0;
}
@@ -111,6 +136,14 @@ static int query_current_values_with_pending_wait(struct powernow_k8_data *data)
u32 lo, hi;
u32 i = 0;
+ if (cpu_family == CPU_HW_PSTATE) {
+ rdmsr(MSR_PSTATE_STATUS, lo, hi);
+ i = lo & HW_PSTATE_MASK;
+ rdmsr(MSR_PSTATE_DEF_BASE + i, lo, hi);
+ data->currfid = lo & HW_PSTATE_FID_MASK;
+ data->currdid = (lo & HW_PSTATE_DID_MASK) >> HW_PSTATE_DID_SHIFT;
+ return 0;
+ }
do {
if (i++ > 10000) {
dprintk("detected change pending stuck\n");
@@ -175,7 +208,7 @@ static int write_new_fid(struct powernow_k8_data *data, u32 fid)
do {
wrmsr(MSR_FIDVID_CTL, lo, data->plllock * PLL_LOCK_CONVERSION);
if (i++ > 100) {
- printk(KERN_ERR PFX "internal error - pending bit very stuck - no further pstate changes possible\n");
+ printk(KERN_ERR PFX "Hardware error - pending bit very stuck - no further pstate changes possible\n");
return 1;
}
} while (query_current_values_with_pending_wait(data));
@@ -255,7 +288,15 @@ static int decrease_vid_code_by_step(struct powernow_k8_data *data, u32 reqvid,
return 0;
}
-/* Change the fid and vid, by the 3 phases. */
+/* Change hardware pstate by single MSR write */
+static int transition_pstate(struct powernow_k8_data *data, u32 pstate)
+{
+ wrmsr(MSR_PSTATE_CTRL, pstate, 0);
+ data->currfid = find_fid_from_pstate(pstate);
+ return 0;
+}
+
+/* Change Opteron/Athlon64 fid and vid, by the 3 phases. */
static int transition_fid_vid(struct powernow_k8_data *data, u32 reqfid, u32 reqvid)
{
if (core_voltage_pre_transition(data, reqvid))
@@ -474,26 +515,35 @@ static int check_supported_cpu(unsigned int cpu)
goto out;
eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE);
- if ((eax & CPUID_XFAM) != CPUID_XFAM_K8)
+ if (((eax & CPUID_XFAM) != CPUID_XFAM_K8) &&
+ ((eax & CPUID_XFAM) < CPUID_XFAM_10H))
goto out;
- if (((eax & CPUID_USE_XFAM_XMOD) != CPUID_USE_XFAM_XMOD) ||
- ((eax & CPUID_XMOD) > CPUID_XMOD_REV_G)) {
- printk(KERN_INFO PFX "Processor cpuid %x not supported\n", eax);
- goto out;
- }
+ if ((eax & CPUID_XFAM) == CPUID_XFAM_K8) {
+ if (((eax & CPUID_USE_XFAM_XMOD) != CPUID_USE_XFAM_XMOD) ||
+ ((eax & CPUID_XMOD) > CPUID_XMOD_REV_G)) {
+ printk(KERN_INFO PFX "Processor cpuid %x not supported\n", eax);
+ goto out;
+ }
- eax = cpuid_eax(CPUID_GET_MAX_CAPABILITIES);
- if (eax < CPUID_FREQ_VOLT_CAPABILITIES) {
- printk(KERN_INFO PFX
- "No frequency change capabilities detected\n");
- goto out;
- }
+ eax = cpuid_eax(CPUID_GET_MAX_CAPABILITIES);
+ if (eax < CPUID_FREQ_VOLT_CAPABILITIES) {
+ printk(KERN_INFO PFX
+ "No frequency change capabilities detected\n");
+ goto out;
+ }
- cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx);
- if ((edx & P_STATE_TRANSITION_CAPABLE) != P_STATE_TRANSITION_CAPABLE) {
- printk(KERN_INFO PFX "Power state transitions not supported\n");
- goto out;
+ cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx);
+ if ((edx & P_STATE_TRANSITION_CAPABLE) != P_STATE_TRANSITION_CAPABLE) {
+ printk(KERN_INFO PFX "Power state transitions not supported\n");
+ goto out;
+ }
+ } else { /* must be a HW Pstate capable processor */
+ cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx);
+ if ((edx & USE_HW_PSTATE) == USE_HW_PSTATE)
+ cpu_family = CPU_HW_PSTATE;
+ else
+ goto out;
}
rc = 1;
@@ -547,12 +597,18 @@ static void print_basics(struct powernow_k8_data *data)
{
int j;
for (j = 0; j < data->numps; j++) {
- if (data->powernow_table[j].frequency != CPUFREQ_ENTRY_INVALID)
- printk(KERN_INFO PFX " %d : fid 0x%x (%d MHz), vid 0x%x (%d mV)\n", j,
+ if (data->powernow_table[j].frequency != CPUFREQ_ENTRY_INVALID) {
+ if (cpu_family == CPU_HW_PSTATE) {
+ printk(KERN_INFO PFX " %d : fid 0x%x gid 0x%x (%d MHz)\n", j, (data->powernow_table[j].index & 0xff00) >> 8,
+ (data->powernow_table[j].index & 0xff0000) >> 16,
+ data->powernow_table[j].frequency/1000);
+ } else {
+ printk(KERN_INFO PFX " %d : fid 0x%x (%d MHz), vid 0x%x\n", j,
data->powernow_table[j].index & 0xff,
data->powernow_table[j].frequency/1000,
- data->powernow_table[j].index >> 8,
- find_millivolts_from_vid(data, data->powernow_table[j].index >> 8));
+ data->powernow_table[j].index >> 8);
+ }
+ }
}
if (data->batps)
printk(KERN_INFO PFX "Only %d pstates on battery\n", data->batps);
@@ -702,7 +758,7 @@ static int find_psb_table(struct powernow_k8_data *data)
#ifdef CONFIG_X86_POWERNOW_K8_ACPI
static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index)
{
- if (!data->acpi_data.state_count)
+ if (!data->acpi_data.state_count || (cpu_family == CPU_HW_PSTATE))
return;
data->irt = (data->acpi_data.states[index].control >> IRT_SHIFT) & IRT_MASK;
@@ -715,9 +771,8 @@ static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned
static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
{
- int i;
- int cntlofreq = 0;
struct cpufreq_frequency_table *powernow_table;
+ int ret_val;
if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) {
dprintk("register performance failed: bad ACPI data\n");
@@ -746,13 +801,92 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
goto err_out;
}
+ if (cpu_family == CPU_HW_PSTATE)
+ ret_val = fill_powernow_table_pstate(data, powernow_table);
+ else
+ ret_val = fill_powernow_table_fidvid(data, powernow_table);
+ if (ret_val)
+ goto err_out_mem;
+
+ powernow_table[data->acpi_data.state_count].frequency = CPUFREQ_TABLE_END;
+ powernow_table[data->acpi_data.state_count].index = 0;
+ data->powernow_table = powernow_table;
+
+ /* fill in data */
+ data->numps = data->acpi_data.state_count;
+ print_basics(data);
+ powernow_k8_acpi_pst_values(data, 0);
+
+ /* notify BIOS that we exist */
+ acpi_processor_notify_smm(THIS_MODULE);
+
+ return 0;
+
+err_out_mem:
+ kfree(powernow_table);
+
+err_out:
+ acpi_processor_unregister_performance(&data->acpi_data, data->cpu);
+
+ /* data->acpi_data.state_count informs us at ->exit() whether ACPI was used */
+ data->acpi_data.state_count = 0;
+
+ return -ENODEV;
+}
+
+static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table)
+{
+ int i;
+
+ for (i = 0; i < data->acpi_data.state_count; i++) {
+ u32 index;
+ u32 hi = 0, lo = 0;
+ u32 fid;
+ u32 did;
+
+ index = data->acpi_data.states[i].control & HW_PSTATE_MASK;
+ if (index > MAX_HW_PSTATE) {
+ printk(KERN_ERR PFX "invalid pstate %d - bad value %d.\n", i, index);
+ printk(KERN_ERR PFX "Please report to BIOS manufacturer\n");
+ }
+ rdmsr(MSR_PSTATE_DEF_BASE + index, lo, hi);
+ if (!(hi & HW_PSTATE_VALID_MASK)) {
+ dprintk("invalid pstate %d, ignoring\n", index);
+ powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID;
+ continue;
+ }
+
+ fid = lo & HW_PSTATE_FID_MASK;
+ did = (lo & HW_PSTATE_DID_MASK) >> HW_PSTATE_DID_SHIFT;
+
+ dprintk(" %d : fid 0x%x, did 0x%x\n", index, fid, did);
+
+ powernow_table[i].index = index | (fid << HW_FID_INDEX_SHIFT) | (did << HW_DID_INDEX_SHIFT);
+
+ powernow_table[i].frequency = find_khz_freq_from_fiddid(fid, did);
+
+ if (powernow_table[i].frequency != (data->acpi_data.states[i].core_frequency * 1000)) {
+ printk(KERN_INFO PFX "invalid freq entries %u kHz vs. %u kHz\n",
+ powernow_table[i].frequency,
+ (unsigned int) (data->acpi_data.states[i].core_frequency * 1000));
+ powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID;
+ continue;
+ }
+ }
+ return 0;
+}
+
+static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table)
+{
+ int i;
+ int cntlofreq = 0;
for (i = 0; i < data->acpi_data.state_count; i++) {
u32 fid;
u32 vid;
if (data->exttype) {
- fid = data->acpi_data.states[i].status & FID_MASK;
- vid = (data->acpi_data.states[i].status >> VID_SHIFT) & VID_MASK;
+ fid = data->acpi_data.states[i].status & EXT_FID_MASK;
+ vid = (data->acpi_data.states[i].status >> VID_SHIFT) & EXT_VID_MASK;
} else {
fid = data->acpi_data.states[i].control & FID_MASK;
vid = (data->acpi_data.states[i].control >> VID_SHIFT) & VID_MASK;
@@ -786,7 +920,7 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
if ((powernow_table[i].frequency != powernow_table[cntlofreq].frequency) ||
(powernow_table[i].index != powernow_table[cntlofreq].index)) {
printk(KERN_ERR PFX "Too many lo freq table entries\n");
- goto err_out_mem;
+ return 1;
}
dprintk("double low frequency table entry, ignoring it.\n");
@@ -804,31 +938,7 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
continue;
}
}
-
- powernow_table[data->acpi_data.state_count].frequency = CPUFREQ_TABLE_END;
- powernow_table[data->acpi_data.state_count].index = 0;
- data->powernow_table = powernow_table;
-
- /* fill in data */
- data->numps = data->acpi_data.state_count;
- print_basics(data);
- powernow_k8_acpi_pst_values(data, 0);
-
- /* notify BIOS that we exist */
- acpi_processor_notify_smm(THIS_MODULE);
-
return 0;
-
-err_out_mem:
- kfree(powernow_table);
-
-err_out:
- acpi_processor_unregister_performance(&data->acpi_data, data->cpu);
-
- /* data->acpi_data.state_count informs us at ->exit() whether ACPI was used */
- data->acpi_data.state_count = 0;
-
- return -ENODEV;
}
static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data)
@@ -844,20 +954,20 @@ static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned
#endif /* CONFIG_X86_POWERNOW_K8_ACPI */
/* Take a frequency, and issue the fid/vid transition command */
-static int transition_frequency(struct powernow_k8_data *data, unsigned int index)
+static int transition_frequency_fidvid(struct powernow_k8_data *data, unsigned int index)
{
- u32 fid;
- u32 vid;
+ u32 fid = 0;
+ u32 vid = 0;
int res, i;
struct cpufreq_freqs freqs;
dprintk("cpu %d transition to index %u\n", smp_processor_id(), index);
+ /* fid/vid correctness check for k8 */
/* fid are the lower 8 bits of the index we stored into
- * the cpufreq frequency table in find_psb_table, vid are
- * the upper 8 bits.
+ * the cpufreq frequency table in find_psb_table, vid
+ * are the upper 8 bits.
*/
-
fid = data->powernow_table[index].index & 0xFF;
vid = (data->powernow_table[index].index & 0xFF00) >> 8;
@@ -881,22 +991,58 @@ static int transition_frequency(struct powernow_k8_data *data, unsigned int inde
dprintk("cpu %d, changing to fid 0x%x, vid 0x%x\n",
smp_processor_id(), fid, vid);
-
- freqs.cpu = data->cpu;
freqs.old = find_khz_freq_from_fid(data->currfid);
freqs.new = find_khz_freq_from_fid(fid);
- for_each_cpu_mask(i, cpu_core_map[data->cpu]) {
+
+ for_each_cpu_mask(i, *(data->available_cores)) {
freqs.cpu = i;
cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
}
res = transition_fid_vid(data, fid, vid);
-
freqs.new = find_khz_freq_from_fid(data->currfid);
- for_each_cpu_mask(i, cpu_core_map[data->cpu]) {
+
+ for_each_cpu_mask(i, *(data->available_cores)) {
freqs.cpu = i;
cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
- }
+ }
+ return res;
+}
+
+/* Take a frequency, and issue the hardware pstate transition command */
+static int transition_frequency_pstate(struct powernow_k8_data *data, unsigned int index)
+{
+ u32 fid = 0;
+ u32 did = 0;
+ u32 pstate = 0;
+ int res, i;
+ struct cpufreq_freqs freqs;
+
+ dprintk("cpu %d transition to index %u\n", smp_processor_id(), index);
+
+ /* get fid did for hardware pstate transition */
+ pstate = index & HW_PSTATE_MASK;
+ if (pstate > MAX_HW_PSTATE)
+ return 0;
+ fid = (index & HW_FID_INDEX_MASK) >> HW_FID_INDEX_SHIFT;
+ did = (index & HW_DID_INDEX_MASK) >> HW_DID_INDEX_SHIFT;
+ freqs.old = find_khz_freq_from_fiddid(data->currfid, data->currdid);
+ freqs.new = find_khz_freq_from_fiddid(fid, did);
+
+ for_each_cpu_mask(i, *(data->available_cores)) {
+ freqs.cpu = i;
+ cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
+ }
+
+ res = transition_pstate(data, pstate);
+ data->currfid = find_fid_from_pstate(pstate);
+ data->currdid = find_did_from_pstate(pstate);
+ freqs.new = find_khz_freq_from_fiddid(data->currfid, data->currdid);
+
+ for_each_cpu_mask(i, *(data->available_cores)) {
+ freqs.cpu = i;
+ cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
+ }
return res;
}
@@ -933,18 +1079,21 @@ static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsi
dprintk("targ: cpu %d, %d kHz, min %d, max %d, relation %d\n",
pol->cpu, targfreq, pol->min, pol->max, relation);
- if (query_current_values_with_pending_wait(data)) {
- ret = -EIO;
+ if (query_current_values_with_pending_wait(data))
goto err_out;
- }
- dprintk("targ: curr fid 0x%x, vid 0x%x\n",
+ if (cpu_family == CPU_HW_PSTATE)
+ dprintk("targ: curr fid 0x%x, did 0x%x\n",
+ data->currfid, data->currvid);
+ else {
+ dprintk("targ: curr fid 0x%x, vid 0x%x\n",
data->currfid, data->currvid);
- if ((checkvid != data->currvid) || (checkfid != data->currfid)) {
- printk(KERN_INFO PFX
- "error - out of sync, fix 0x%x 0x%x, vid 0x%x 0x%x\n",
- checkfid, data->currfid, checkvid, data->currvid);
+ if ((checkvid != data->currvid) || (checkfid != data->currfid)) {
+ printk(KERN_INFO PFX
+ "error - out of sync, fix 0x%x 0x%x, vid 0x%x 0x%x\n",
+ checkfid, data->currfid, checkvid, data->currvid);
+ }
}
if (cpufreq_frequency_table_target(pol, data->powernow_table, targfreq, relation, &newstate))
@@ -954,7 +1103,11 @@ static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsi
powernow_k8_acpi_pst_values(data, newstate);
- if (transition_frequency(data, newstate)) {
+ if (cpu_family == CPU_HW_PSTATE)
+ ret = transition_frequency_pstate(data, newstate);
+ else
+ ret = transition_frequency_fidvid(data, newstate);
+ if (ret) {
printk(KERN_ERR PFX "transition frequency failed\n");
ret = 1;
mutex_unlock(&fidvid_mutex);
@@ -962,7 +1115,10 @@ static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsi
}
mutex_unlock(&fidvid_mutex);
- pol->cur = find_khz_freq_from_fid(data->currfid);
+ if (cpu_family == CPU_HW_PSTATE)
+ pol->cur = find_khz_freq_from_fiddid(data->currfid, data->currdid);
+ else
+ pol->cur = find_khz_freq_from_fid(data->currfid);
ret = 0;
err_out:
@@ -1007,14 +1163,13 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
* Use the PSB BIOS structure. This is only availabe on
* an UP version, and is deprecated by AMD.
*/
-
- if ((num_online_cpus() != 1) || (num_possible_cpus() != 1)) {
+ if (num_online_cpus() != 1) {
printk(KERN_ERR PFX "MP systems not supported by PSB BIOS structure\n");
kfree(data);
return -ENODEV;
}
if (pol->cpu != 0) {
- printk(KERN_ERR PFX "init not cpu 0\n");
+ printk(KERN_ERR PFX "No _PSS objects for CPU other than CPU0\n");
kfree(data);
return -ENODEV;
}
@@ -1042,20 +1197,28 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
if (query_current_values_with_pending_wait(data))
goto err_out;
- fidvid_msr_init();
+ if (cpu_family == CPU_OPTERON)
+ fidvid_msr_init();
/* run on any CPU again */
set_cpus_allowed(current, oldmask);
pol->governor = CPUFREQ_DEFAULT_GOVERNOR;
- pol->cpus = cpu_core_map[pol->cpu];
+ if (cpu_family == CPU_HW_PSTATE)
+ pol->cpus = cpumask_of_cpu(pol->cpu);
+ else
+ pol->cpus = cpu_core_map[pol->cpu];
+ data->available_cores = &(pol->cpus);
/* Take a crude guess here.
* That guess was in microseconds, so multiply with 1000 */
pol->cpuinfo.transition_latency = (((data->rvo + 8) * data->vstable * VST_UNITS_20US)
+ (3 * (1 << data->irt) * 10)) * 1000;
- pol->cur = find_khz_freq_from_fid(data->currfid);
+ if (cpu_family == CPU_HW_PSTATE)
+ pol->cur = find_khz_freq_from_fiddid(data->currfid, data->currdid);
+ else
+ pol->cur = find_khz_freq_from_fid(data->currfid);
dprintk("policy current frequency %d kHz\n", pol->cur);
/* min/max the cpu is capable of */
@@ -1069,8 +1232,12 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
cpufreq_frequency_table_get_attr(data->powernow_table, pol->cpu);
- printk("cpu_init done, current fid 0x%x, vid 0x%x\n",
- data->currfid, data->currvid);
+ if (cpu_family == CPU_HW_PSTATE)
+ dprintk("cpu_init done, current fid 0x%x, did 0x%x\n",
+ data->currfid, data->currdid);
+ else
+ dprintk("cpu_init done, current fid 0x%x, vid 0x%x\n",
+ data->currfid, data->currvid);
powernow_data[pol->cpu] = data;
@@ -1156,8 +1323,9 @@ static int __cpuinit powernowk8_init(void)
}
if (supported_cpus == num_online_cpus()) {
- printk(KERN_INFO PFX "Found %d AMD Athlon 64 / Opteron "
- "processors (" VERSION ")\n", supported_cpus);
+ printk(KERN_INFO PFX "Found %d %s "
+ "processors (" VERSION ")\n", supported_cpus,
+ boot_cpu_data.x86_model_id);
return cpufreq_register_driver(&cpufreq_amd64_driver);
}
diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k8.h b/arch/i386/kernel/cpu/cpufreq/powernow-k8.h
index 79a7c5c..0fb2a30 100644
--- a/arch/i386/kernel/cpu/cpufreq/powernow-k8.h
+++ b/arch/i386/kernel/cpu/cpufreq/powernow-k8.h
@@ -1,5 +1,5 @@
/*
- * (c) 2003, 2004, 2005 Advanced Micro Devices, Inc.
+ * (c) 2003-2006 Advanced Micro Devices, Inc.
* Your use of this code is subject to the terms and conditions of the
* GNU general public license version 2. See "COPYING" or
* http://www.gnu.org/licenses/gpl.html
@@ -21,8 +21,8 @@ struct powernow_k8_data {
u32 plllock; /* pll lock time, units 1 us */
u32 exttype; /* extended interface = 1 */
- /* keep track of the current fid / vid */
- u32 currvid, currfid;
+ /* keep track of the current fid / vid or did */
+ u32 currvid, currfid, currdid;
/* the powernow_table includes all frequency and vid/fid pairings:
* fid are the lower 8 bits of the index, vid are the upper 8 bits.
@@ -34,6 +34,10 @@ struct powernow_k8_data {
* used to determine valid frequency/vid/fid states */
struct acpi_processor_performance acpi_data;
#endif
+ /* we need to keep track of associated cores, but let cpufreq
+ * handle hotplug events - so just point at cpufreq pol->cpus
+ * structure */
+ cpumask_t *available_cores;
};
@@ -43,6 +47,7 @@ struct powernow_k8_data {
#define CPUID_XFAM_K8 0
#define CPUID_XMOD 0x000f0000 /* extended model */
#define CPUID_XMOD_REV_G 0x00060000
+#define CPUID_XFAM_10H 0x00100000 /* family 0x10 */
#define CPUID_USE_XFAM_XMOD 0x00000f00
#define CPUID_GET_MAX_CAPABILITIES 0x80000000
#define CPUID_FREQ_VOLT_CAPABILITIES 0x80000007
@@ -79,6 +84,32 @@ struct powernow_k8_data {
#define MSR_S_HI_CURRENT_VID 0x0000003f
#define MSR_C_HI_STP_GNT_BENIGN 0x00000001
+
+/* Hardware Pstate _PSS and MSR definitions */
+#define USE_HW_PSTATE 0x00000080
+#define HW_PSTATE_FID_MASK 0x0000003f
+#define HW_PSTATE_DID_MASK 0x000001c0
+#define HW_PSTATE_DID_SHIFT 6
+#define HW_PSTATE_MASK 0x00000007
+#define HW_PSTATE_VALID_MASK 0x80000000
+#define HW_FID_INDEX_SHIFT 8
+#define HW_FID_INDEX_MASK 0x0000ff00
+#define HW_DID_INDEX_SHIFT 16
+#define HW_DID_INDEX_MASK 0x00ff0000
+#define HW_WATTS_MASK 0xff
+#define HW_PWR_DVR_MASK 0x300
+#define HW_PWR_DVR_SHIFT 8
+#define HW_PWR_MAX_MULT 3
+#define MAX_HW_PSTATE 8 /* hw pstate supports up to 8 */
+#define MSR_PSTATE_DEF_BASE 0xc0010064 /* base of Pstate MSRs */
+#define MSR_PSTATE_STATUS 0xc0010063 /* Pstate Status MSR */
+#define MSR_PSTATE_CTRL 0xc0010062 /* Pstate control MSR */
+
+/* define the two driver architectures */
+#define CPU_OPTERON 0
+#define CPU_HW_PSTATE 1
+
+
/*
* There are restrictions frequencies have to follow:
* - only 1 entry in the low fid table ( <=1.4GHz )
@@ -138,7 +169,9 @@ struct powernow_k8_data {
#define MVS_MASK 3
#define VST_MASK 0x7f
#define VID_MASK 0x1f
-#define FID_MASK 0x3f
+#define FID_MASK 0x1f
+#define EXT_VID_MASK 0x3f
+#define EXT_FID_MASK 0x3f
/*
@@ -182,6 +215,9 @@ static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid);
static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index);
+static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table);
+static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table);
+
#ifdef CONFIG_SMP
static inline void define_siblings(int cpu, cpumask_t cpu_sharedcore_mask[])
{
diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c
index b0ff907..f7e4356 100644
--- a/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c
+++ b/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c
@@ -2,19 +2,15 @@
* cpufreq driver for Enhanced SpeedStep, as found in Intel's Pentium
* M (part of the Centrino chipset).
*
+ * Since the original Pentium M, most new Intel CPUs support Enhanced
+ * SpeedStep.
+ *
* Despite the "SpeedStep" in the name, this is almost entirely unlike
* traditional SpeedStep.
*
* Modelled on speedstep.c
*
* Copyright (C) 2003 Jeremy Fitzhardinge <jeremy@goop.org>
- *
- * WARNING WARNING WARNING
- *
- * This driver manipulates the PERF_CTL MSR, which is only somewhat
- * documented. While it seems to work on my laptop, it has not been
- * tested anywhere else, and it may not work for you, do strange
- * things or simply crash.
*/
#include <linux/kernel.h>
@@ -36,7 +32,7 @@
#include <asm/cpufeature.h>
#define PFX "speedstep-centrino: "
-#define MAINTAINER "Jeremy Fitzhardinge <jeremy@goop.org>"
+#define MAINTAINER "cpufreq@lists.linux.org.uk"
#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-centrino", msg)
@@ -250,7 +246,7 @@ static int centrino_cpu_init_table(struct cpufreq_policy *policy)
if (model->cpu_id == NULL) {
/* No match at all */
- dprintk(KERN_INFO PFX "no support for CPU model \"%s\": "
+ dprintk("no support for CPU model \"%s\": "
"send /proc/cpuinfo to " MAINTAINER "\n",
cpu->x86_model_id);
return -ENOENT;
@@ -258,10 +254,10 @@ static int centrino_cpu_init_table(struct cpufreq_policy *policy)
if (model->op_points == NULL) {
/* Matched a non-match */
- dprintk(KERN_INFO PFX "no table support for CPU model \"%s\"\n",
+ dprintk("no table support for CPU model \"%s\"\n",
cpu->x86_model_id);
#ifndef CONFIG_X86_SPEEDSTEP_CENTRINO_ACPI
- dprintk(KERN_INFO PFX "try compiling with CONFIG_X86_SPEEDSTEP_CENTRINO_ACPI enabled\n");
+ dprintk("try compiling with CONFIG_X86_SPEEDSTEP_CENTRINO_ACPI enabled\n");
#endif
return -ENOENT;
}
@@ -351,7 +347,36 @@ static unsigned int get_cur_freq(unsigned int cpu)
#ifdef CONFIG_X86_SPEEDSTEP_CENTRINO_ACPI
-static struct acpi_processor_performance p;
+static struct acpi_processor_performance *acpi_perf_data[NR_CPUS];
+
+/*
+ * centrino_cpu_early_init_acpi - Do the preregistering with ACPI P-States
+ * library
+ *
+ * Before doing the actual init, we need to do _PSD related setup whenever
+ * supported by the BIOS. These are handled by this early_init routine.
+ */
+static int centrino_cpu_early_init_acpi(void)
+{
+ unsigned int i, j;
+ struct acpi_processor_performance *data;
+
+ for_each_possible_cpu(i) {
+ data = kzalloc(sizeof(struct acpi_processor_performance),
+ GFP_KERNEL);
+ if (!data) {
+ for_each_possible_cpu(j) {
+ kfree(acpi_perf_data[j]);
+ acpi_perf_data[j] = NULL;
+ }
+ return (-ENOMEM);
+ }
+ acpi_perf_data[i] = data;
+ }
+
+ acpi_processor_preregister_performance(acpi_perf_data);
+ return 0;
+}
/*
* centrino_cpu_init_acpi - register with ACPI P-States library
@@ -365,46 +390,51 @@ static int centrino_cpu_init_acpi(struct cpufreq_policy *policy)
unsigned long cur_freq;
int result = 0, i;
unsigned int cpu = policy->cpu;
+ struct acpi_processor_performance *p;
+
+ p = acpi_perf_data[cpu];
/* register with ACPI core */
- if (acpi_processor_register_performance(&p, cpu)) {
- dprintk(KERN_INFO PFX "obtaining ACPI data failed\n");
+ if (acpi_processor_register_performance(p, cpu)) {
+ dprintk(PFX "obtaining ACPI data failed\n");
return -EIO;
}
+ policy->cpus = p->shared_cpu_map;
+ policy->shared_type = p->shared_type;
/* verify the acpi_data */
- if (p.state_count <= 1) {
+ if (p->state_count <= 1) {
dprintk("No P-States\n");
result = -ENODEV;
goto err_unreg;
}
- if ((p.control_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) ||
- (p.status_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE)) {
+ if ((p->control_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) ||
+ (p->status_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE)) {
dprintk("Invalid control/status registers (%x - %x)\n",
- p.control_register.space_id, p.status_register.space_id);
+ p->control_register.space_id, p->status_register.space_id);
result = -EIO;
goto err_unreg;
}
- for (i=0; i<p.state_count; i++) {
- if (p.states[i].control != p.states[i].status) {
+ for (i=0; i<p->state_count; i++) {
+ if (p->states[i].control != p->states[i].status) {
dprintk("Different control (%llu) and status values (%llu)\n",
- p.states[i].control, p.states[i].status);
+ p->states[i].control, p->states[i].status);
result = -EINVAL;
goto err_unreg;
}
- if (!p.states[i].core_frequency) {
+ if (!p->states[i].core_frequency) {
dprintk("Zero core frequency for state %u\n", i);
result = -EINVAL;
goto err_unreg;
}
- if (p.states[i].core_frequency > p.states[0].core_frequency) {
+ if (p->states[i].core_frequency > p->states[0].core_frequency) {
dprintk("P%u has larger frequency (%llu) than P0 (%llu), skipping\n", i,
- p.states[i].core_frequency, p.states[0].core_frequency);
- p.states[i].core_frequency = 0;
+ p->states[i].core_frequency, p->states[0].core_frequency);
+ p->states[i].core_frequency = 0;
continue;
}
}
@@ -416,26 +446,26 @@ static int centrino_cpu_init_acpi(struct cpufreq_policy *policy)
}
centrino_model[cpu]->model_name=NULL;
- centrino_model[cpu]->max_freq = p.states[0].core_frequency * 1000;
+ centrino_model[cpu]->max_freq = p->states[0].core_frequency * 1000;
centrino_model[cpu]->op_points = kmalloc(sizeof(struct cpufreq_frequency_table) *
- (p.state_count + 1), GFP_KERNEL);
+ (p->state_count + 1), GFP_KERNEL);
if (!centrino_model[cpu]->op_points) {
result = -ENOMEM;
goto err_kfree;
}
- for (i=0; i<p.state_count; i++) {
- centrino_model[cpu]->op_points[i].index = p.states[i].control;
- centrino_model[cpu]->op_points[i].frequency = p.states[i].core_frequency * 1000;
+ for (i=0; i<p->state_count; i++) {
+ centrino_model[cpu]->op_points[i].index = p->states[i].control;
+ centrino_model[cpu]->op_points[i].frequency = p->states[i].core_frequency * 1000;
dprintk("adding state %i with frequency %u and control value %04x\n",
i, centrino_model[cpu]->op_points[i].frequency, centrino_model[cpu]->op_points[i].index);
}
- centrino_model[cpu]->op_points[p.state_count].frequency = CPUFREQ_TABLE_END;
+ centrino_model[cpu]->op_points[p->state_count].frequency = CPUFREQ_TABLE_END;
cur_freq = get_cur_freq(cpu);
- for (i=0; i<p.state_count; i++) {
- if (!p.states[i].core_frequency) {
+ for (i=0; i<p->state_count; i++) {
+ if (!p->states[i].core_frequency) {
dprintk("skipping state %u\n", i);
centrino_model[cpu]->op_points[i].frequency = CPUFREQ_ENTRY_INVALID;
continue;
@@ -451,7 +481,7 @@ static int centrino_cpu_init_acpi(struct cpufreq_policy *policy)
}
if (cur_freq == centrino_model[cpu]->op_points[i].frequency)
- p.state = i;
+ p->state = i;
}
/* notify BIOS that we exist */
@@ -464,12 +494,13 @@ static int centrino_cpu_init_acpi(struct cpufreq_policy *policy)
err_kfree:
kfree(centrino_model[cpu]);
err_unreg:
- acpi_processor_unregister_performance(&p, cpu);
- dprintk(KERN_INFO PFX "invalid ACPI data\n");
+ acpi_processor_unregister_performance(p, cpu);
+ dprintk(PFX "invalid ACPI data\n");
return (result);
}
#else
static inline int centrino_cpu_init_acpi(struct cpufreq_policy *policy) { return -ENODEV; }
+static inline int centrino_cpu_early_init_acpi(void) { return 0; }
#endif
static int centrino_cpu_init(struct cpufreq_policy *policy)
@@ -499,7 +530,7 @@ static int centrino_cpu_init(struct cpufreq_policy *policy)
centrino_cpu[policy->cpu] = &cpu_ids[i];
if (!centrino_cpu[policy->cpu]) {
- dprintk(KERN_INFO PFX "found unsupported CPU with "
+ dprintk("found unsupported CPU with "
"Enhanced SpeedStep: send /proc/cpuinfo to "
MAINTAINER "\n");
return -ENODEV;
@@ -555,10 +586,15 @@ static int centrino_cpu_exit(struct cpufreq_policy *policy)
#ifdef CONFIG_X86_SPEEDSTEP_CENTRINO_ACPI
if (!centrino_model[cpu]->model_name) {
- dprintk("unregistering and freeing ACPI data\n");
- acpi_processor_unregister_performance(&p, cpu);
- kfree(centrino_model[cpu]->op_points);
- kfree(centrino_model[cpu]);
+ static struct acpi_processor_performance *p;
+
+ if (acpi_perf_data[cpu]) {
+ p = acpi_perf_data[cpu];
+ dprintk("unregistering and freeing ACPI data\n");
+ acpi_processor_unregister_performance(p, cpu);
+ kfree(centrino_model[cpu]->op_points);
+ kfree(centrino_model[cpu]);
+ }
}
#endif
@@ -592,63 +628,128 @@ static int centrino_target (struct cpufreq_policy *policy,
unsigned int relation)
{
unsigned int newstate = 0;
- unsigned int msr, oldmsr, h, cpu = policy->cpu;
+ unsigned int msr, oldmsr = 0, h = 0, cpu = policy->cpu;
struct cpufreq_freqs freqs;
+ cpumask_t online_policy_cpus;
cpumask_t saved_mask;
- int retval;
+ cpumask_t set_mask;
+ cpumask_t covered_cpus;
+ int retval = 0;
+ unsigned int j, k, first_cpu, tmp;
- if (centrino_model[cpu] == NULL)
+ if (unlikely(centrino_model[cpu] == NULL))
return -ENODEV;
- /*
- * Support for SMP systems.
- * Make sure we are running on the CPU that wants to change frequency
- */
- saved_mask = current->cpus_allowed;
- set_cpus_allowed(current, policy->cpus);
- if (!cpu_isset(smp_processor_id(), policy->cpus)) {
- dprintk("couldn't limit to CPUs in this domain\n");
- return(-EAGAIN);
+ if (unlikely(cpufreq_frequency_table_target(policy,
+ centrino_model[cpu]->op_points,
+ target_freq,
+ relation,
+ &newstate))) {
+ return -EINVAL;
}
- if (cpufreq_frequency_table_target(policy, centrino_model[cpu]->op_points, target_freq,
- relation, &newstate)) {
- retval = -EINVAL;
- goto migrate_end;
- }
+#ifdef CONFIG_HOTPLUG_CPU
+ /* cpufreq holds the hotplug lock, so we are safe from here on */
+ cpus_and(online_policy_cpus, cpu_online_map, policy->cpus);
+#else
+ online_policy_cpus = policy->cpus;
+#endif
- msr = centrino_model[cpu]->op_points[newstate].index;
- rdmsr(MSR_IA32_PERF_CTL, oldmsr, h);
+ saved_mask = current->cpus_allowed;
+ first_cpu = 1;
+ cpus_clear(covered_cpus);
+ for_each_cpu_mask(j, online_policy_cpus) {
+ /*
+ * Support for SMP systems.
+ * Make sure we are running on CPU that wants to change freq
+ */
+ cpus_clear(set_mask);
+ if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY)
+ cpus_or(set_mask, set_mask, online_policy_cpus);
+ else
+ cpu_set(j, set_mask);
+
+ set_cpus_allowed(current, set_mask);
+ if (unlikely(!cpu_isset(smp_processor_id(), set_mask))) {
+ dprintk("couldn't limit to CPUs in this domain\n");
+ retval = -EAGAIN;
+ if (first_cpu) {
+ /* We haven't started the transition yet. */
+ goto migrate_end;
+ }
+ break;
+ }
- if (msr == (oldmsr & 0xffff)) {
- retval = 0;
- dprintk("no change needed - msr was and needs to be %x\n", oldmsr);
- goto migrate_end;
- }
+ msr = centrino_model[cpu]->op_points[newstate].index;
+
+ if (first_cpu) {
+ rdmsr(MSR_IA32_PERF_CTL, oldmsr, h);
+ if (msr == (oldmsr & 0xffff)) {
+ dprintk("no change needed - msr was and needs "
+ "to be %x\n", oldmsr);
+ retval = 0;
+ goto migrate_end;
+ }
+
+ freqs.old = extract_clock(oldmsr, cpu, 0);
+ freqs.new = extract_clock(msr, cpu, 0);
+
+ dprintk("target=%dkHz old=%d new=%d msr=%04x\n",
+ target_freq, freqs.old, freqs.new, msr);
+
+ for_each_cpu_mask(k, online_policy_cpus) {
+ freqs.cpu = k;
+ cpufreq_notify_transition(&freqs,
+ CPUFREQ_PRECHANGE);
+ }
+
+ first_cpu = 0;
+ /* all but 16 LSB are reserved, treat them with care */
+ oldmsr &= ~0xffff;
+ msr &= 0xffff;
+ oldmsr |= msr;
+ }
- freqs.cpu = cpu;
- freqs.old = extract_clock(oldmsr, cpu, 0);
- freqs.new = extract_clock(msr, cpu, 0);
+ wrmsr(MSR_IA32_PERF_CTL, oldmsr, h);
+ if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY)
+ break;
- dprintk("target=%dkHz old=%d new=%d msr=%04x\n",
- target_freq, freqs.old, freqs.new, msr);
+ cpu_set(j, covered_cpus);
+ }
- cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
+ for_each_cpu_mask(k, online_policy_cpus) {
+ freqs.cpu = k;
+ cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
+ }
- /* all but 16 LSB are "reserved", so treat them with
- care */
- oldmsr &= ~0xffff;
- msr &= 0xffff;
- oldmsr |= msr;
+ if (unlikely(retval)) {
+ /*
+ * We have failed halfway through the frequency change.
+ * We have sent callbacks to policy->cpus and
+ * MSRs have already been written on coverd_cpus.
+ * Best effort undo..
+ */
- wrmsr(MSR_IA32_PERF_CTL, oldmsr, h);
+ if (!cpus_empty(covered_cpus)) {
+ for_each_cpu_mask(j, covered_cpus) {
+ set_cpus_allowed(current, cpumask_of_cpu(j));
+ wrmsr(MSR_IA32_PERF_CTL, oldmsr, h);
+ }
+ }
- cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
+ tmp = freqs.new;
+ freqs.new = freqs.old;
+ freqs.old = tmp;
+ for_each_cpu_mask(j, online_policy_cpus) {
+ freqs.cpu = j;
+ cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
+ cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
+ }
+ }
- retval = 0;
migrate_end:
set_cpus_allowed(current, saved_mask);
- return (retval);
+ return 0;
}
static struct freq_attr* centrino_attr[] = {
@@ -690,12 +791,25 @@ static int __init centrino_init(void)
if (!cpu_has(cpu, X86_FEATURE_EST))
return -ENODEV;
+ centrino_cpu_early_init_acpi();
+
return cpufreq_register_driver(&centrino_driver);
}
static void __exit centrino_exit(void)
{
+#ifdef CONFIG_X86_SPEEDSTEP_CENTRINO_ACPI
+ unsigned int j;
+#endif
+
cpufreq_unregister_driver(&centrino_driver);
+
+#ifdef CONFIG_X86_SPEEDSTEP_CENTRINO_ACPI
+ for_each_possible_cpu(j) {
+ kfree(acpi_perf_data[j]);
+ acpi_perf_data[j] = NULL;
+ }
+#endif
}
MODULE_AUTHOR ("Jeremy Fitzhardinge <jeremy@goop.org>");
diff --git a/arch/i386/kernel/cpu/cyrix.c b/arch/i386/kernel/cpu/cyrix.c
index 00f2e05..fc32c80 100644
--- a/arch/i386/kernel/cpu/cyrix.c
+++ b/arch/i386/kernel/cpu/cyrix.c
@@ -184,7 +184,7 @@ static void __init geode_configure(void)
#ifdef CONFIG_PCI
-static struct pci_device_id cyrix_55x0[] = {
+static struct pci_device_id __initdata cyrix_55x0[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5510) },
{ PCI_DEVICE(PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5520) },
{ },
@@ -272,14 +272,15 @@ static void __init init_cyrix(struct cpuinfo_x86 *c)
printk(KERN_INFO "Working around Cyrix MediaGX virtual DMA bugs.\n");
isa_dma_bridge_buggy = 2;
-#endif
- c->x86_cache_size=16; /* Yep 16K integrated cache thats it */
-
+
+
/*
* The 5510/5520 companion chips have a funky PIT.
*/
if (pci_dev_present(cyrix_55x0))
pit_latch_buggy = 1;
+#endif
+ c->x86_cache_size=16; /* Yep 16K integrated cache thats it */
/* GXm supports extended cpuid levels 'ala' AMD */
if (c->cpuid_level == 2) {
diff --git a/arch/i386/kernel/cpu/intel.c b/arch/i386/kernel/cpu/intel.c
index 5386b29..10afc64 100644
--- a/arch/i386/kernel/cpu/intel.c
+++ b/arch/i386/kernel/cpu/intel.c
@@ -122,6 +122,12 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
select_idle_routine(c);
l2 = init_intel_cacheinfo(c);
+ if (c->cpuid_level > 9 ) {
+ unsigned eax = cpuid_eax(10);
+ /* Check for version and the number of counters */
+ if ((eax & 0xff) && (((eax>>8) & 0xff) > 1))
+ set_bit(X86_FEATURE_ARCH_PERFMON, c->x86_capability);
+ }
/* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until model 3 mask 3 */
if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633)
diff --git a/arch/i386/kernel/cpu/intel_cacheinfo.c b/arch/i386/kernel/cpu/intel_cacheinfo.c
index c8547a6..6c37b4f 100644
--- a/arch/i386/kernel/cpu/intel_cacheinfo.c
+++ b/arch/i386/kernel/cpu/intel_cacheinfo.c
@@ -4,6 +4,7 @@
* Changes:
* Venkatesh Pallipadi : Adding cache identification through cpuid(4)
* Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure.
+ * Andi Kleen : CPUID4 emulation on AMD.
*/
#include <linux/init.h>
@@ -130,25 +131,111 @@ struct _cpuid4_info {
cpumask_t shared_cpu_map;
};
-static unsigned short num_cache_leaves;
+unsigned short num_cache_leaves;
+
+/* AMD doesn't have CPUID4. Emulate it here to report the same
+ information to the user. This makes some assumptions about the machine:
+ No L3, L2 not shared, no SMT etc. that is currently true on AMD CPUs.
+
+ In theory the TLBs could be reported as fake type (they are in "dummy").
+ Maybe later */
+union l1_cache {
+ struct {
+ unsigned line_size : 8;
+ unsigned lines_per_tag : 8;
+ unsigned assoc : 8;
+ unsigned size_in_kb : 8;
+ };
+ unsigned val;
+};
+
+union l2_cache {
+ struct {
+ unsigned line_size : 8;
+ unsigned lines_per_tag : 4;
+ unsigned assoc : 4;
+ unsigned size_in_kb : 16;
+ };
+ unsigned val;
+};
+
+static unsigned short assocs[] = {
+ [1] = 1, [2] = 2, [4] = 4, [6] = 8,
+ [8] = 16,
+ [0xf] = 0xffff // ??
+ };
+static unsigned char levels[] = { 1, 1, 2 };
+static unsigned char types[] = { 1, 2, 3 };
+
+static void __cpuinit amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
+ union _cpuid4_leaf_ebx *ebx,
+ union _cpuid4_leaf_ecx *ecx)
+{
+ unsigned dummy;
+ unsigned line_size, lines_per_tag, assoc, size_in_kb;
+ union l1_cache l1i, l1d;
+ union l2_cache l2;
+
+ eax->full = 0;
+ ebx->full = 0;
+ ecx->full = 0;
+
+ cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val);
+ cpuid(0x80000006, &dummy, &dummy, &l2.val, &dummy);
+
+ if (leaf > 2 || !l1d.val || !l1i.val || !l2.val)
+ return;
+
+ eax->split.is_self_initializing = 1;
+ eax->split.type = types[leaf];
+ eax->split.level = levels[leaf];
+ eax->split.num_threads_sharing = 0;
+ eax->split.num_cores_on_die = current_cpu_data.x86_max_cores - 1;
+
+ if (leaf <= 1) {
+ union l1_cache *l1 = leaf == 0 ? &l1d : &l1i;
+ assoc = l1->assoc;
+ line_size = l1->line_size;
+ lines_per_tag = l1->lines_per_tag;
+ size_in_kb = l1->size_in_kb;
+ } else {
+ assoc = l2.assoc;
+ line_size = l2.line_size;
+ lines_per_tag = l2.lines_per_tag;
+ /* cpu_data has errata corrections for K7 applied */
+ size_in_kb = current_cpu_data.x86_cache_size;
+ }
+
+ if (assoc == 0xf)
+ eax->split.is_fully_associative = 1;
+ ebx->split.coherency_line_size = line_size - 1;
+ ebx->split.ways_of_associativity = assocs[assoc] - 1;
+ ebx->split.physical_line_partition = lines_per_tag - 1;
+ ecx->split.number_of_sets = (size_in_kb * 1024) / line_size /
+ (ebx->split.ways_of_associativity + 1) - 1;
+}
static int __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf)
{
- unsigned int eax, ebx, ecx, edx;
- union _cpuid4_leaf_eax cache_eax;
+ union _cpuid4_leaf_eax eax;
+ union _cpuid4_leaf_ebx ebx;
+ union _cpuid4_leaf_ecx ecx;
+ unsigned edx;
- cpuid_count(4, index, &eax, &ebx, &ecx, &edx);
- cache_eax.full = eax;
- if (cache_eax.split.type == CACHE_TYPE_NULL)
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
+ amd_cpuid4(index, &eax, &ebx, &ecx);
+ else
+ cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
+ if (eax.split.type == CACHE_TYPE_NULL)
return -EIO; /* better error ? */
- this_leaf->eax.full = eax;
- this_leaf->ebx.full = ebx;
- this_leaf->ecx.full = ecx;
- this_leaf->size = (this_leaf->ecx.split.number_of_sets + 1) *
- (this_leaf->ebx.split.coherency_line_size + 1) *
- (this_leaf->ebx.split.physical_line_partition + 1) *
- (this_leaf->ebx.split.ways_of_associativity + 1);
+ this_leaf->eax = eax;
+ this_leaf->ebx = ebx;
+ this_leaf->ecx = ecx;
+ this_leaf->size = (ecx.split.number_of_sets + 1) *
+ (ebx.split.coherency_line_size + 1) *
+ (ebx.split.physical_line_partition + 1) *
+ (ebx.split.ways_of_associativity + 1);
return 0;
}
diff --git a/arch/i386/kernel/cpu/proc.c b/arch/i386/kernel/cpu/proc.c
index f94cdb7..a19fcb2 100644
--- a/arch/i386/kernel/cpu/proc.c
+++ b/arch/i386/kernel/cpu/proc.c
@@ -52,7 +52,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
/* VIA/Cyrix/Centaur-defined */
NULL, NULL, "rng", "rng_en", NULL, NULL, "ace", "ace_en",
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ "ace2", "ace2_en", "phe", "phe_en", "pmm", "pmm_en", NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
diff --git a/arch/i386/kernel/crash.c b/arch/i386/kernel/crash.c
index 2b0cfce..0c88d3e 100644
--- a/arch/i386/kernel/crash.c
+++ b/arch/i386/kernel/crash.c
@@ -114,19 +114,15 @@ static int crash_nmi_callback(struct pt_regs *regs, int cpu)
atomic_dec(&waiting_for_crash_ipi);
/* Assume hlt works */
halt();
- for(;;);
+ for (;;)
+ cpu_relax();
return 1;
}
-/*
- * By using the NMI code instead of a vector we just sneak thru the
- * word generator coming out with just what we want. AND it does
- * not matter if clustered_apic_mode is set or not.
- */
static void smp_send_nmi_allbutself(void)
{
- send_IPI_allbutself(APIC_DM_NMI);
+ send_IPI_allbutself(NMI_VECTOR);
}
static void nmi_shootdown_cpus(void)
diff --git a/arch/i386/kernel/doublefault.c b/arch/i386/kernel/doublefault.c
index 5edb1d3..b4d14c2 100644
--- a/arch/i386/kernel/doublefault.c
+++ b/arch/i386/kernel/doublefault.c
@@ -44,7 +44,8 @@ static void doublefault_fn(void)
}
}
- for (;;) /* nothing */;
+ for (;;)
+ cpu_relax();
}
struct tss_struct doublefault_tss __cacheline_aligned = {
diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S
index cfc683f..e6e4506 100644
--- a/arch/i386/kernel/entry.S
+++ b/arch/i386/kernel/entry.S
@@ -48,6 +48,7 @@
#include <asm/smp.h>
#include <asm/page.h>
#include <asm/desc.h>
+#include <asm/dwarf2.h>
#include "irq_vectors.h"
#define nr_syscalls ((syscall_table_size)/4)
@@ -85,31 +86,67 @@ VM_MASK = 0x00020000
#define SAVE_ALL \
cld; \
pushl %es; \
+ CFI_ADJUST_CFA_OFFSET 4;\
+ /*CFI_REL_OFFSET es, 0;*/\
pushl %ds; \
+ CFI_ADJUST_CFA_OFFSET 4;\
+ /*CFI_REL_OFFSET ds, 0;*/\
pushl %eax; \
+ CFI_ADJUST_CFA_OFFSET 4;\
+ CFI_REL_OFFSET eax, 0;\
pushl %ebp; \
+ CFI_ADJUST_CFA_OFFSET 4;\
+ CFI_REL_OFFSET ebp, 0;\
pushl %edi; \
+ CFI_ADJUST_CFA_OFFSET 4;\
+ CFI_REL_OFFSET edi, 0;\
pushl %esi; \
+ CFI_ADJUST_CFA_OFFSET 4;\
+ CFI_REL_OFFSET esi, 0;\
pushl %edx; \
+ CFI_ADJUST_CFA_OFFSET 4;\
+ CFI_REL_OFFSET edx, 0;\
pushl %ecx; \
+ CFI_ADJUST_CFA_OFFSET 4;\
+ CFI_REL_OFFSET ecx, 0;\
pushl %ebx; \
+ CFI_ADJUST_CFA_OFFSET 4;\
+ CFI_REL_OFFSET ebx, 0;\
movl $(__USER_DS), %edx; \
movl %edx, %ds; \
movl %edx, %es;
#define RESTORE_INT_REGS \
popl %ebx; \
+ CFI_ADJUST_CFA_OFFSET -4;\
+ CFI_RESTORE ebx;\
popl %ecx; \
+ CFI_ADJUST_CFA_OFFSET -4;\
+ CFI_RESTORE ecx;\
popl %edx; \
+ CFI_ADJUST_CFA_OFFSET -4;\
+ CFI_RESTORE edx;\
popl %esi; \
+ CFI_ADJUST_CFA_OFFSET -4;\
+ CFI_RESTORE esi;\
popl %edi; \
+ CFI_ADJUST_CFA_OFFSET -4;\
+ CFI_RESTORE edi;\
popl %ebp; \
- popl %eax
+ CFI_ADJUST_CFA_OFFSET -4;\
+ CFI_RESTORE ebp;\
+ popl %eax; \
+ CFI_ADJUST_CFA_OFFSET -4;\
+ CFI_RESTORE eax
#define RESTORE_REGS \
RESTORE_INT_REGS; \
1: popl %ds; \
+ CFI_ADJUST_CFA_OFFSET -4;\
+ /*CFI_RESTORE ds;*/\
2: popl %es; \
+ CFI_ADJUST_CFA_OFFSET -4;\
+ /*CFI_RESTORE es;*/\
.section .fixup,"ax"; \
3: movl $0,(%esp); \
jmp 1b; \
@@ -122,13 +159,43 @@ VM_MASK = 0x00020000
.long 2b,4b; \
.previous
+#define RING0_INT_FRAME \
+ CFI_STARTPROC simple;\
+ CFI_DEF_CFA esp, 3*4;\
+ /*CFI_OFFSET cs, -2*4;*/\
+ CFI_OFFSET eip, -3*4
+
+#define RING0_EC_FRAME \
+ CFI_STARTPROC simple;\
+ CFI_DEF_CFA esp, 4*4;\
+ /*CFI_OFFSET cs, -2*4;*/\
+ CFI_OFFSET eip, -3*4
+
+#define RING0_PTREGS_FRAME \
+ CFI_STARTPROC simple;\
+ CFI_DEF_CFA esp, OLDESP-EBX;\
+ /*CFI_OFFSET cs, CS-OLDESP;*/\
+ CFI_OFFSET eip, EIP-OLDESP;\
+ /*CFI_OFFSET es, ES-OLDESP;*/\
+ /*CFI_OFFSET ds, DS-OLDESP;*/\
+ CFI_OFFSET eax, EAX-OLDESP;\
+ CFI_OFFSET ebp, EBP-OLDESP;\
+ CFI_OFFSET edi, EDI-OLDESP;\
+ CFI_OFFSET esi, ESI-OLDESP;\
+ CFI_OFFSET edx, EDX-OLDESP;\
+ CFI_OFFSET ecx, ECX-OLDESP;\
+ CFI_OFFSET ebx, EBX-OLDESP
ENTRY(ret_from_fork)
+ CFI_STARTPROC
pushl %eax
+ CFI_ADJUST_CFA_OFFSET -4
call schedule_tail
GET_THREAD_INFO(%ebp)
popl %eax
+ CFI_ADJUST_CFA_OFFSET -4
jmp syscall_exit
+ CFI_ENDPROC
/*
* Return to user mode is not as complex as all this looks,
@@ -139,6 +206,7 @@ ENTRY(ret_from_fork)
# userspace resumption stub bypassing syscall exit tracing
ALIGN
+ RING0_PTREGS_FRAME
ret_from_exception:
preempt_stop
ret_from_intr:
@@ -171,20 +239,33 @@ need_resched:
call preempt_schedule_irq
jmp need_resched
#endif
+ CFI_ENDPROC
/* SYSENTER_RETURN points to after the "sysenter" instruction in
the vsyscall page. See vsyscall-sysentry.S, which defines the symbol. */
# sysenter call handler stub
ENTRY(sysenter_entry)
+ CFI_STARTPROC simple
+ CFI_DEF_CFA esp, 0
+ CFI_REGISTER esp, ebp
movl TSS_sysenter_esp0(%esp),%esp
sysenter_past_esp:
sti
pushl $(__USER_DS)
+ CFI_ADJUST_CFA_OFFSET 4
+ /*CFI_REL_OFFSET ss, 0*/
pushl %ebp
+ CFI_ADJUST_CFA_OFFSET 4
+ CFI_REL_OFFSET esp, 0
pushfl
+ CFI_ADJUST_CFA_OFFSET 4
pushl $(__USER_CS)
+ CFI_ADJUST_CFA_OFFSET 4
+ /*CFI_REL_OFFSET cs, 0*/
pushl $SYSENTER_RETURN
+ CFI_ADJUST_CFA_OFFSET 4
+ CFI_REL_OFFSET eip, 0
/*
* Load the potential sixth argument from user stack.
@@ -199,6 +280,7 @@ sysenter_past_esp:
.previous
pushl %eax
+ CFI_ADJUST_CFA_OFFSET 4
SAVE_ALL
GET_THREAD_INFO(%ebp)
@@ -219,11 +301,14 @@ sysenter_past_esp:
xorl %ebp,%ebp
sti
sysexit
+ CFI_ENDPROC
# system call handler stub
ENTRY(system_call)
+ RING0_INT_FRAME # can't unwind into user space anyway
pushl %eax # save orig_eax
+ CFI_ADJUST_CFA_OFFSET 4
SAVE_ALL
GET_THREAD_INFO(%ebp)
testl $TF_MASK,EFLAGS(%esp)
@@ -256,10 +341,12 @@ restore_all:
movb CS(%esp), %al
andl $(VM_MASK | (4 << 8) | 3), %eax
cmpl $((4 << 8) | 3), %eax
+ CFI_REMEMBER_STATE
je ldt_ss # returning to user-space with LDT SS
restore_nocheck:
RESTORE_REGS
addl $4, %esp
+ CFI_ADJUST_CFA_OFFSET -4
1: iret
.section .fixup,"ax"
iret_exc:
@@ -273,6 +360,7 @@ iret_exc:
.long 1b,iret_exc
.previous
+ CFI_RESTORE_STATE
ldt_ss:
larl OLDSS(%esp), %eax
jnz restore_nocheck
@@ -285,11 +373,13 @@ ldt_ss:
* CPUs, which we can try to work around to make
* dosemu and wine happy. */
subl $8, %esp # reserve space for switch16 pointer
+ CFI_ADJUST_CFA_OFFSET 8
cli
movl %esp, %eax
/* Set up the 16bit stack frame with switch32 pointer on top,
* and a switch16 pointer on top of the current frame. */
call setup_x86_bogus_stack
+ CFI_ADJUST_CFA_OFFSET -8 # frame has moved
RESTORE_REGS
lss 20+4(%esp), %esp # switch to 16bit stack
1: iret
@@ -297,9 +387,11 @@ ldt_ss:
.align 4
.long 1b,iret_exc
.previous
+ CFI_ENDPROC
# perform work that needs to be done immediately before resumption
ALIGN
+ RING0_PTREGS_FRAME # can't unwind into user space anyway
work_pending:
testb $_TIF_NEED_RESCHED, %cl
jz work_notifysig
@@ -329,8 +421,10 @@ work_notifysig: # deal with pending signals and
work_notifysig_v86:
#ifdef CONFIG_VM86
pushl %ecx # save ti_flags for do_notify_resume
+ CFI_ADJUST_CFA_OFFSET 4
call save_v86_state # %eax contains pt_regs pointer
popl %ecx
+ CFI_ADJUST_CFA_OFFSET -4
movl %eax, %esp
xorl %edx, %edx
call do_notify_resume
@@ -363,19 +457,21 @@ syscall_exit_work:
movl $1, %edx
call do_syscall_trace
jmp resume_userspace
+ CFI_ENDPROC
- ALIGN
+ RING0_INT_FRAME # can't unwind into user space anyway
syscall_fault:
pushl %eax # save orig_eax
+ CFI_ADJUST_CFA_OFFSET 4
SAVE_ALL
GET_THREAD_INFO(%ebp)
movl $-EFAULT,EAX(%esp)
jmp resume_userspace
- ALIGN
syscall_badsys:
movl $-ENOSYS,EAX(%esp)
jmp resume_userspace
+ CFI_ENDPROC
#define FIXUP_ESPFIX_STACK \
movl %esp, %eax; \
@@ -387,16 +483,21 @@ syscall_badsys:
movl %eax, %esp;
#define UNWIND_ESPFIX_STACK \
pushl %eax; \
+ CFI_ADJUST_CFA_OFFSET 4; \
movl %ss, %eax; \
/* see if on 16bit stack */ \
cmpw $__ESPFIX_SS, %ax; \
- jne 28f; \
- movl $__KERNEL_DS, %edx; \
- movl %edx, %ds; \
- movl %edx, %es; \
+ je 28f; \
+27: popl %eax; \
+ CFI_ADJUST_CFA_OFFSET -4; \
+.section .fixup,"ax"; \
+28: movl $__KERNEL_DS, %eax; \
+ movl %eax, %ds; \
+ movl %eax, %es; \
/* switch to 32bit stack */ \
- FIXUP_ESPFIX_STACK \
-28: popl %eax;
+ FIXUP_ESPFIX_STACK; \
+ jmp 27b; \
+.previous
/*
* Build the entry stubs and pointer table with
@@ -408,9 +509,14 @@ ENTRY(interrupt)
vector=0
ENTRY(irq_entries_start)
+ RING0_INT_FRAME
.rept NR_IRQS
ALIGN
+ .if vector
+ CFI_ADJUST_CFA_OFFSET -4
+ .endif
1: pushl $vector-256
+ CFI_ADJUST_CFA_OFFSET 4
jmp common_interrupt
.data
.long 1b
@@ -424,60 +530,99 @@ common_interrupt:
movl %esp,%eax
call do_IRQ
jmp ret_from_intr
+ CFI_ENDPROC
#define BUILD_INTERRUPT(name, nr) \
ENTRY(name) \
+ RING0_INT_FRAME; \
pushl $nr-256; \
- SAVE_ALL \
+ CFI_ADJUST_CFA_OFFSET 4; \
+ SAVE_ALL; \
movl %esp,%eax; \
call smp_/**/name; \
- jmp ret_from_intr;
+ jmp ret_from_intr; \
+ CFI_ENDPROC
/* The include is where all of the SMP etc. interrupts come from */
#include "entry_arch.h"
ENTRY(divide_error)
+ RING0_INT_FRAME
pushl $0 # no error code
+ CFI_ADJUST_CFA_OFFSET 4
pushl $do_divide_error
+ CFI_ADJUST_CFA_OFFSET 4
ALIGN
error_code:
pushl %ds
+ CFI_ADJUST_CFA_OFFSET 4
+ /*CFI_REL_OFFSET ds, 0*/
pushl %eax
+ CFI_ADJUST_CFA_OFFSET 4
+ CFI_REL_OFFSET eax, 0
xorl %eax, %eax
pushl %ebp
+ CFI_ADJUST_CFA_OFFSET 4
+ CFI_REL_OFFSET ebp, 0
pushl %edi
+ CFI_ADJUST_CFA_OFFSET 4
+ CFI_REL_OFFSET edi, 0
pushl %esi
+ CFI_ADJUST_CFA_OFFSET 4
+ CFI_REL_OFFSET esi, 0
pushl %edx
+ CFI_ADJUST_CFA_OFFSET 4
+ CFI_REL_OFFSET edx, 0
decl %eax # eax = -1
pushl %ecx
+ CFI_ADJUST_CFA_OFFSET 4
+ CFI_REL_OFFSET ecx, 0
pushl %ebx
+ CFI_ADJUST_CFA_OFFSET 4
+ CFI_REL_OFFSET ebx, 0
cld
pushl %es
+ CFI_ADJUST_CFA_OFFSET 4
+ /*CFI_REL_OFFSET es, 0*/
UNWIND_ESPFIX_STACK
popl %ecx
+ CFI_ADJUST_CFA_OFFSET -4
+ /*CFI_REGISTER es, ecx*/
movl ES(%esp), %edi # get the function address
movl ORIG_EAX(%esp), %edx # get the error code
movl %eax, ORIG_EAX(%esp)
movl %ecx, ES(%esp)
+ /*CFI_REL_OFFSET es, ES*/
movl $(__USER_DS), %ecx
movl %ecx, %ds
movl %ecx, %es
movl %esp,%eax # pt_regs pointer
call *%edi
jmp ret_from_exception
+ CFI_ENDPROC
ENTRY(coprocessor_error)
+ RING0_INT_FRAME
pushl $0
+ CFI_ADJUST_CFA_OFFSET 4
pushl $do_coprocessor_error
+ CFI_ADJUST_CFA_OFFSET 4
jmp error_code
+ CFI_ENDPROC
ENTRY(simd_coprocessor_error)
+ RING0_INT_FRAME
pushl $0
+ CFI_ADJUST_CFA_OFFSET 4
pushl $do_simd_coprocessor_error
+ CFI_ADJUST_CFA_OFFSET 4
jmp error_code
+ CFI_ENDPROC
ENTRY(device_not_available)
+ RING0_INT_FRAME
pushl $-1 # mark this as an int
+ CFI_ADJUST_CFA_OFFSET 4
SAVE_ALL
movl %cr0, %eax
testl $0x4, %eax # EM (math emulation bit)
@@ -487,9 +632,12 @@ ENTRY(device_not_available)
jmp ret_from_exception
device_not_available_emulate:
pushl $0 # temporary storage for ORIG_EIP
+ CFI_ADJUST_CFA_OFFSET 4
call math_emulate
addl $4, %esp
+ CFI_ADJUST_CFA_OFFSET -4
jmp ret_from_exception
+ CFI_ENDPROC
/*
* Debug traps and NMI can happen at the one SYSENTER instruction
@@ -514,16 +662,19 @@ label: \
pushl $sysenter_past_esp
KPROBE_ENTRY(debug)
+ RING0_INT_FRAME
cmpl $sysenter_entry,(%esp)
jne debug_stack_correct
FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn)
debug_stack_correct:
pushl $-1 # mark this as an int
+ CFI_ADJUST_CFA_OFFSET 4
SAVE_ALL
xorl %edx,%edx # error code 0
movl %esp,%eax # pt_regs pointer
call do_debug
jmp ret_from_exception
+ CFI_ENDPROC
.previous .text
/*
* NMI is doubly nasty. It can happen _while_ we're handling
@@ -534,14 +685,18 @@ debug_stack_correct:
* fault happened on the sysenter path.
*/
ENTRY(nmi)
+ RING0_INT_FRAME
pushl %eax
+ CFI_ADJUST_CFA_OFFSET 4
movl %ss, %eax
cmpw $__ESPFIX_SS, %ax
popl %eax
+ CFI_ADJUST_CFA_OFFSET -4
je nmi_16bit_stack
cmpl $sysenter_entry,(%esp)
je nmi_stack_fixup
pushl %eax
+ CFI_ADJUST_CFA_OFFSET 4
movl %esp,%eax
/* Do not access memory above the end of our stack page,
* it might not exist.
@@ -549,16 +704,19 @@ ENTRY(nmi)
andl $(THREAD_SIZE-1),%eax
cmpl $(THREAD_SIZE-20),%eax
popl %eax
+ CFI_ADJUST_CFA_OFFSET -4
jae nmi_stack_correct
cmpl $sysenter_entry,12(%esp)
je nmi_debug_stack_check
nmi_stack_correct:
pushl %eax
+ CFI_ADJUST_CFA_OFFSET 4
SAVE_ALL
xorl %edx,%edx # zero error code
movl %esp,%eax # pt_regs pointer
call do_nmi
jmp restore_all
+ CFI_ENDPROC
nmi_stack_fixup:
FIX_STACK(12,nmi_stack_correct, 1)
@@ -574,94 +732,177 @@ nmi_debug_stack_check:
jmp nmi_stack_correct
nmi_16bit_stack:
+ RING0_INT_FRAME
/* create the pointer to lss back */
pushl %ss
+ CFI_ADJUST_CFA_OFFSET 4
pushl %esp
+ CFI_ADJUST_CFA_OFFSET 4
movzwl %sp, %esp
addw $4, (%esp)
/* copy the iret frame of 12 bytes */
.rept 3
pushl 16(%esp)
+ CFI_ADJUST_CFA_OFFSET 4
.endr
pushl %eax
+ CFI_ADJUST_CFA_OFFSET 4
SAVE_ALL
FIXUP_ESPFIX_STACK # %eax == %esp
+ CFI_ADJUST_CFA_OFFSET -20 # the frame has now moved
xorl %edx,%edx # zero error code
call do_nmi
RESTORE_REGS
lss 12+4(%esp), %esp # back to 16bit stack
1: iret
+ CFI_ENDPROC
.section __ex_table,"a"
.align 4
.long 1b,iret_exc
.previous
KPROBE_ENTRY(int3)
+ RING0_INT_FRAME
pushl $-1 # mark this as an int
+ CFI_ADJUST_CFA_OFFSET 4
SAVE_ALL
xorl %edx,%edx # zero error code
movl %esp,%eax # pt_regs pointer
call do_int3
jmp ret_from_exception
+ CFI_ENDPROC
.previous .text
ENTRY(overflow)
+ RING0_INT_FRAME
pushl $0
+ CFI_ADJUST_CFA_OFFSET 4
pushl $do_overflow
+ CFI_ADJUST_CFA_OFFSET 4
jmp error_code
+ CFI_ENDPROC
ENTRY(bounds)
+ RING0_INT_FRAME
pushl $0
+ CFI_ADJUST_CFA_OFFSET 4
pushl $do_bounds
+ CFI_ADJUST_CFA_OFFSET 4
jmp error_code
+ CFI_ENDPROC
ENTRY(invalid_op)
+ RING0_INT_FRAME
pushl $0
+ CFI_ADJUST_CFA_OFFSET 4
pushl $do_invalid_op
+ CFI_ADJUST_CFA_OFFSET 4
jmp error_code
+ CFI_ENDPROC
ENTRY(coprocessor_segment_overrun)
+ RING0_INT_FRAME
pushl $0
+ CFI_ADJUST_CFA_OFFSET 4
pushl $do_coprocessor_segment_overrun
+ CFI_ADJUST_CFA_OFFSET 4
jmp error_code
+ CFI_ENDPROC
ENTRY(invalid_TSS)
+ RING0_EC_FRAME
pushl $do_invalid_TSS
+ CFI_ADJUST_CFA_OFFSET 4
jmp error_code
+ CFI_ENDPROC
ENTRY(segment_not_present)
+ RING0_EC_FRAME
pushl $do_segment_not_present
+ CFI_ADJUST_CFA_OFFSET 4
jmp error_code
+ CFI_ENDPROC
ENTRY(stack_segment)
+ RING0_EC_FRAME
pushl $do_stack_segment
+ CFI_ADJUST_CFA_OFFSET 4
jmp error_code
+ CFI_ENDPROC
KPROBE_ENTRY(general_protection)
+ RING0_EC_FRAME
pushl $do_general_protection
+ CFI_ADJUST_CFA_OFFSET 4
jmp error_code
+ CFI_ENDPROC
.previous .text
ENTRY(alignment_check)
+ RING0_EC_FRAME
pushl $do_alignment_check
+ CFI_ADJUST_CFA_OFFSET 4
jmp error_code
+ CFI_ENDPROC
KPROBE_ENTRY(page_fault)
+ RING0_EC_FRAME
pushl $do_page_fault
+ CFI_ADJUST_CFA_OFFSET 4
jmp error_code
+ CFI_ENDPROC
.previous .text
#ifdef CONFIG_X86_MCE
ENTRY(machine_check)
+ RING0_INT_FRAME
pushl $0
+ CFI_ADJUST_CFA_OFFSET 4
pushl machine_check_vector
+ CFI_ADJUST_CFA_OFFSET 4
jmp error_code
+ CFI_ENDPROC
#endif
ENTRY(spurious_interrupt_bug)
+ RING0_INT_FRAME
pushl $0
+ CFI_ADJUST_CFA_OFFSET 4
pushl $do_spurious_interrupt_bug
+ CFI_ADJUST_CFA_OFFSET 4
jmp error_code
+ CFI_ENDPROC
+
+#ifdef CONFIG_STACK_UNWIND
+ENTRY(arch_unwind_init_running)
+ CFI_STARTPROC
+ movl 4(%esp), %edx
+ movl (%esp), %ecx
+ leal 4(%esp), %eax
+ movl %ebx, EBX(%edx)
+ xorl %ebx, %ebx
+ movl %ebx, ECX(%edx)
+ movl %ebx, EDX(%edx)
+ movl %esi, ESI(%edx)
+ movl %edi, EDI(%edx)
+ movl %ebp, EBP(%edx)
+ movl %ebx, EAX(%edx)
+ movl $__USER_DS, DS(%edx)
+ movl $__USER_DS, ES(%edx)
+ movl %ebx, ORIG_EAX(%edx)
+ movl %ecx, EIP(%edx)
+ movl 12(%esp), %ecx
+ movl $__KERNEL_CS, CS(%edx)
+ movl %ebx, EFLAGS(%edx)
+ movl %eax, OLDESP(%edx)
+ movl 8(%esp), %eax
+ movl %ecx, 8(%esp)
+ movl EBX(%edx), %ebx
+ movl $__KERNEL_DS, OLDSS(%edx)
+ jmpl *%eax
+ CFI_ENDPROC
+ENDPROC(arch_unwind_init_running)
+#endif
.section .rodata,"a"
#include "syscall_table.S"
diff --git a/arch/i386/kernel/hpet.c b/arch/i386/kernel/hpet.c
new file mode 100644
index 0000000..c6737c3
--- /dev/null
+++ b/arch/i386/kernel/hpet.c
@@ -0,0 +1,67 @@
+#include <linux/clocksource.h>
+#include <linux/errno.h>
+#include <linux/hpet.h>
+#include <linux/init.h>
+
+#include <asm/hpet.h>
+#include <asm/io.h>
+
+#define HPET_MASK CLOCKSOURCE_MASK(32)
+#define HPET_SHIFT 22
+
+/* FSEC = 10^-15 NSEC = 10^-9 */
+#define FSEC_PER_NSEC 1000000
+
+static void *hpet_ptr;
+
+static cycle_t read_hpet(void)
+{
+ return (cycle_t)readl(hpet_ptr);
+}
+
+static struct clocksource clocksource_hpet = {
+ .name = "hpet",
+ .rating = 250,
+ .read = read_hpet,
+ .mask = HPET_MASK,
+ .mult = 0, /* set below */
+ .shift = HPET_SHIFT,
+ .is_continuous = 1,
+};
+
+static int __init init_hpet_clocksource(void)
+{
+ unsigned long hpet_period;
+ void __iomem* hpet_base;
+ u64 tmp;
+
+ if (!hpet_address)
+ return -ENODEV;
+
+ /* calculate the hpet address: */
+ hpet_base =
+ (void __iomem*)ioremap_nocache(hpet_address, HPET_MMAP_SIZE);
+ hpet_ptr = hpet_base + HPET_COUNTER;
+
+ /* calculate the frequency: */
+ hpet_period = readl(hpet_base + HPET_PERIOD);
+
+ /*
+ * hpet period is in femto seconds per cycle
+ * so we need to convert this to ns/cyc units
+ * aproximated by mult/2^shift
+ *
+ * fsec/cyc * 1nsec/1000000fsec = nsec/cyc = mult/2^shift
+ * fsec/cyc * 1ns/1000000fsec * 2^shift = mult
+ * fsec/cyc * 2^shift * 1nsec/1000000fsec = mult
+ * (fsec/cyc << shift)/1000000 = mult
+ * (hpet_period << shift)/FSEC_PER_NSEC = mult
+ */
+ tmp = (u64)hpet_period << HPET_SHIFT;
+ do_div(tmp, FSEC_PER_NSEC);
+ clocksource_hpet.mult = (u32)tmp;
+
+ return clocksource_register(&clocksource_hpet);
+}
+
+module_init(init_hpet_clocksource);
diff --git a/arch/i386/kernel/i387.c b/arch/i386/kernel/i387.c
index d755247..c435197 100644
--- a/arch/i386/kernel/i387.c
+++ b/arch/i386/kernel/i387.c
@@ -25,7 +25,7 @@
#define HAVE_HWFP 1
#endif
-static unsigned long mxcsr_feature_mask = 0xffffffff;
+static unsigned long mxcsr_feature_mask __read_mostly = 0xffffffff;
void mxcsr_feature_mask_init(void)
{
diff --git a/arch/i386/kernel/i8253.c b/arch/i386/kernel/i8253.c
new file mode 100644
index 0000000..477b24d
--- /dev/null
+++ b/arch/i386/kernel/i8253.c
@@ -0,0 +1,118 @@
+/*
+ * i8253.c 8253/PIT functions
+ *
+ */
+#include <linux/clocksource.h>
+#include <linux/spinlock.h>
+#include <linux/jiffies.h>
+#include <linux/sysdev.h>
+#include <linux/module.h>
+#include <linux/init.h>
+
+#include <asm/smp.h>
+#include <asm/delay.h>
+#include <asm/i8253.h>
+#include <asm/io.h>
+
+#include "io_ports.h"
+
+DEFINE_SPINLOCK(i8253_lock);
+EXPORT_SYMBOL(i8253_lock);
+
+void setup_pit_timer(void)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&i8253_lock, flags);
+ outb_p(0x34,PIT_MODE); /* binary, mode 2, LSB/MSB, ch 0 */
+ udelay(10);
+ outb_p(LATCH & 0xff , PIT_CH0); /* LSB */
+ udelay(10);
+ outb(LATCH >> 8 , PIT_CH0); /* MSB */
+ spin_unlock_irqrestore(&i8253_lock, flags);
+}
+
+/*
+ * Since the PIT overflows every tick, its not very useful
+ * to just read by itself. So use jiffies to emulate a free
+ * running counter:
+ */
+static cycle_t pit_read(void)
+{
+ unsigned long flags;
+ int count;
+ u32 jifs;
+ static int old_count;
+ static u32 old_jifs;
+
+ spin_lock_irqsave(&i8253_lock, flags);
+ /*
+ * Although our caller may have the read side of xtime_lock,
+ * this is now a seqlock, and we are cheating in this routine
+ * by having side effects on state that we cannot undo if
+ * there is a collision on the seqlock and our caller has to
+ * retry. (Namely, old_jifs and old_count.) So we must treat
+ * jiffies as volatile despite the lock. We read jiffies
+ * before latching the timer count to guarantee that although
+ * the jiffies value might be older than the count (that is,
+ * the counter may underflow between the last point where
+ * jiffies was incremented and the point where we latch the
+ * count), it cannot be newer.
+ */
+ jifs = jiffies;
+ outb_p(0x00, PIT_MODE); /* latch the count ASAP */
+ count = inb_p(PIT_CH0); /* read the latched count */
+ count |= inb_p(PIT_CH0) << 8;
+
+ /* VIA686a test code... reset the latch if count > max + 1 */
+ if (count > LATCH) {
+ outb_p(0x34, PIT_MODE);
+ outb_p(LATCH & 0xff, PIT_CH0);
+ outb(LATCH >> 8, PIT_CH0);
+ count = LATCH - 1;
+ }
+
+ /*
+ * It's possible for count to appear to go the wrong way for a
+ * couple of reasons:
+ *
+ * 1. The timer counter underflows, but we haven't handled the
+ * resulting interrupt and incremented jiffies yet.
+ * 2. Hardware problem with the timer, not giving us continuous time,
+ * the counter does small "jumps" upwards on some Pentium systems,
+ * (see c't 95/10 page 335 for Neptun bug.)
+ *
+ * Previous attempts to handle these cases intelligently were
+ * buggy, so we just do the simple thing now.
+ */
+ if (count > old_count && jifs == old_jifs) {
+ count = old_count;
+ }
+ old_count = count;
+ old_jifs = jifs;
+
+ spin_unlock_irqrestore(&i8253_lock, flags);
+
+ count = (LATCH - 1) - count;
+
+ return (cycle_t)(jifs * LATCH) + count;
+}
+
+static struct clocksource clocksource_pit = {
+ .name = "pit",
+ .rating = 110,
+ .read = pit_read,
+ .mask = CLOCKSOURCE_MASK(32),
+ .mult = 0,
+ .shift = 20,
+};
+
+static int __init init_pit_clocksource(void)
+{
+ if (num_possible_cpus() > 4) /* PIT does not scale! */
+ return 0;
+
+ clocksource_pit.mult = clocksource_hz2mult(CLOCK_TICK_RATE, 20);
+ return clocksource_register(&clocksource_pit);
+}
+module_init(init_pit_clocksource);
diff --git a/arch/i386/kernel/i8259.c b/arch/i386/kernel/i8259.c
index 323ef8a..b7636b9 100644
--- a/arch/i386/kernel/i8259.c
+++ b/arch/i386/kernel/i8259.c
@@ -271,8 +271,8 @@ static int i8259A_shutdown(struct sys_device *dev)
* the kernel initialization code can get it
* out of.
*/
- outb(0xff, 0x21); /* mask all of 8259A-1 */
- outb(0xff, 0xA1); /* mask all of 8259A-1 */
+ outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */
+ outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-1 */
return 0;
}
diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c
index d70f2ad..72ae414 100644
--- a/arch/i386/kernel/io_apic.c
+++ b/arch/i386/kernel/io_apic.c
@@ -38,6 +38,7 @@
#include <asm/desc.h>
#include <asm/timer.h>
#include <asm/i8259.h>
+#include <asm/nmi.h>
#include <mach_apic.h>
@@ -50,6 +51,7 @@ atomic_t irq_mis_count;
static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
static DEFINE_SPINLOCK(ioapic_lock);
+static DEFINE_SPINLOCK(vector_lock);
int timer_over_8254 __initdata = 1;
@@ -267,7 +269,7 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask)
# include <linux/slab.h> /* kmalloc() */
# include <linux/timer.h> /* time_after() */
-# ifdef CONFIG_BALANCED_IRQ_DEBUG
+#ifdef CONFIG_BALANCED_IRQ_DEBUG
# define TDprintk(x...) do { printk("<%ld:%s:%d>: ", jiffies, __FILE__, __LINE__); printk(x); } while (0)
# define Dprintk(x...) do { TDprintk(x); } while (0)
# else
@@ -275,10 +277,15 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask)
# define Dprintk(x...)
# endif
-
#define IRQBALANCE_CHECK_ARCH -999
-static int irqbalance_disabled = IRQBALANCE_CHECK_ARCH;
-static int physical_balance = 0;
+#define MAX_BALANCED_IRQ_INTERVAL (5*HZ)
+#define MIN_BALANCED_IRQ_INTERVAL (HZ/2)
+#define BALANCED_IRQ_MORE_DELTA (HZ/10)
+#define BALANCED_IRQ_LESS_DELTA (HZ)
+
+static int irqbalance_disabled __read_mostly = IRQBALANCE_CHECK_ARCH;
+static int physical_balance __read_mostly;
+static long balanced_irq_interval __read_mostly = MAX_BALANCED_IRQ_INTERVAL;
static struct irq_cpu_info {
unsigned long * last_irq;
@@ -297,12 +304,14 @@ static struct irq_cpu_info {
#define CPU_TO_PACKAGEINDEX(i) (first_cpu(cpu_sibling_map[i]))
-#define MAX_BALANCED_IRQ_INTERVAL (5*HZ)
-#define MIN_BALANCED_IRQ_INTERVAL (HZ/2)
-#define BALANCED_IRQ_MORE_DELTA (HZ/10)
-#define BALANCED_IRQ_LESS_DELTA (HZ)
+static cpumask_t balance_irq_affinity[NR_IRQS] = {
+ [0 ... NR_IRQS-1] = CPU_MASK_ALL
+};
-static long balanced_irq_interval = MAX_BALANCED_IRQ_INTERVAL;
+void set_balance_irq_affinity(unsigned int irq, cpumask_t mask)
+{
+ balance_irq_affinity[irq] = mask;
+}
static unsigned long move(int curr_cpu, cpumask_t allowed_mask,
unsigned long now, int direction)
@@ -340,7 +349,7 @@ static inline void balance_irq(int cpu, int irq)
if (irqbalance_disabled)
return;
- cpus_and(allowed_mask, cpu_online_map, irq_affinity[irq]);
+ cpus_and(allowed_mask, cpu_online_map, balance_irq_affinity[irq]);
new_cpu = move(cpu, allowed_mask, now, 1);
if (cpu != new_cpu) {
set_pending_irq(irq, cpumask_of_cpu(new_cpu));
@@ -529,7 +538,9 @@ tryanotherirq:
}
}
- cpus_and(allowed_mask, cpu_online_map, irq_affinity[selected_irq]);
+ cpus_and(allowed_mask,
+ cpu_online_map,
+ balance_irq_affinity[selected_irq]);
target_cpu_mask = cpumask_of_cpu(min_loaded);
cpus_and(tmp, target_cpu_mask, allowed_mask);
@@ -1152,10 +1163,17 @@ u8 irq_vector[NR_IRQ_VECTORS] __read_mostly = { FIRST_DEVICE_VECTOR , 0 };
int assign_irq_vector(int irq)
{
static int current_vector = FIRST_DEVICE_VECTOR, offset = 0;
+ unsigned long flags;
+ int vector;
+
+ BUG_ON(irq != AUTO_ASSIGN && (unsigned)irq >= NR_IRQ_VECTORS);
- BUG_ON(irq >= NR_IRQ_VECTORS);
- if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0)
+ spin_lock_irqsave(&vector_lock, flags);
+
+ if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0) {
+ spin_unlock_irqrestore(&vector_lock, flags);
return IO_APIC_VECTOR(irq);
+ }
next:
current_vector += 8;
if (current_vector == SYSCALL_VECTOR)
@@ -1163,16 +1181,21 @@ next:
if (current_vector >= FIRST_SYSTEM_VECTOR) {
offset++;
- if (!(offset%8))
+ if (!(offset%8)) {
+ spin_unlock_irqrestore(&vector_lock, flags);
return -ENOSPC;
+ }
current_vector = FIRST_DEVICE_VECTOR + offset;
}
- vector_irq[current_vector] = irq;
+ vector = current_vector;
+ vector_irq[vector] = irq;
if (irq != AUTO_ASSIGN)
- IO_APIC_VECTOR(irq) = current_vector;
+ IO_APIC_VECTOR(irq) = vector;
- return current_vector;
+ spin_unlock_irqrestore(&vector_lock, flags);
+
+ return vector;
}
static struct hw_interrupt_type ioapic_level_type;
@@ -1184,21 +1207,14 @@ static struct hw_interrupt_type ioapic_edge_type;
static inline void ioapic_register_intr(int irq, int vector, unsigned long trigger)
{
- if (use_pci_vector() && !platform_legacy_irq(irq)) {
- if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
- trigger == IOAPIC_LEVEL)
- irq_desc[vector].handler = &ioapic_level_type;
- else
- irq_desc[vector].handler = &ioapic_edge_type;
- set_intr_gate(vector, interrupt[vector]);
- } else {
- if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
- trigger == IOAPIC_LEVEL)
- irq_desc[irq].handler = &ioapic_level_type;
- else
- irq_desc[irq].handler = &ioapic_edge_type;
- set_intr_gate(vector, interrupt[irq]);
- }
+ unsigned idx = use_pci_vector() && !platform_legacy_irq(irq) ? vector : irq;
+
+ if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
+ trigger == IOAPIC_LEVEL)
+ irq_desc[idx].handler = &ioapic_level_type;
+ else
+ irq_desc[idx].handler = &ioapic_edge_type;
+ set_intr_gate(vector, interrupt[idx]);
}
static void __init setup_IO_APIC_irqs(void)
diff --git a/arch/i386/kernel/irq.c b/arch/i386/kernel/irq.c
index f3a9c78..061533e 100644
--- a/arch/i386/kernel/irq.c
+++ b/arch/i386/kernel/irq.c
@@ -42,8 +42,8 @@ union irq_ctx {
u32 stack[THREAD_SIZE/sizeof(u32)];
};
-static union irq_ctx *hardirq_ctx[NR_CPUS];
-static union irq_ctx *softirq_ctx[NR_CPUS];
+static union irq_ctx *hardirq_ctx[NR_CPUS] __read_mostly;
+static union irq_ctx *softirq_ctx[NR_CPUS] __read_mostly;
#endif
/*
@@ -95,6 +95,14 @@ fastcall unsigned int do_IRQ(struct pt_regs *regs)
irqctx->tinfo.task = curctx->tinfo.task;
irqctx->tinfo.previous_esp = current_stack_pointer;
+ /*
+ * Copy the softirq bits in preempt_count so that the
+ * softirq checks work in the hardirq context.
+ */
+ irqctx->tinfo.preempt_count =
+ irqctx->tinfo.preempt_count & ~SOFTIRQ_MASK |
+ curctx->tinfo.preempt_count & SOFTIRQ_MASK;
+
asm volatile(
" xchgl %%ebx,%%esp \n"
" call __do_IRQ \n"
@@ -219,7 +227,7 @@ int show_interrupts(struct seq_file *p, void *v)
if (i == 0) {
seq_printf(p, " ");
for_each_online_cpu(j)
- seq_printf(p, "CPU%d ",j);
+ seq_printf(p, "CPU%-8d",j);
seq_putc(p, '\n');
}
diff --git a/arch/i386/kernel/kprobes.c b/arch/i386/kernel/kprobes.c
index 38806f4..727e419 100644
--- a/arch/i386/kernel/kprobes.c
+++ b/arch/i386/kernel/kprobes.c
@@ -57,34 +57,85 @@ static __always_inline void set_jmp_op(void *from, void *to)
/*
* returns non-zero if opcodes can be boosted.
*/
-static __always_inline int can_boost(kprobe_opcode_t opcode)
+static __always_inline int can_boost(kprobe_opcode_t *opcodes)
{
- switch (opcode & 0xf0 ) {
+#define W(row,b0,b1,b2,b3,b4,b5,b6,b7,b8,b9,ba,bb,bc,bd,be,bf) \
+ (((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \
+ (b4##UL << 0x4)|(b5##UL << 0x5)|(b6##UL << 0x6)|(b7##UL << 0x7) | \
+ (b8##UL << 0x8)|(b9##UL << 0x9)|(ba##UL << 0xa)|(bb##UL << 0xb) | \
+ (bc##UL << 0xc)|(bd##UL << 0xd)|(be##UL << 0xe)|(bf##UL << 0xf)) \
+ << (row % 32))
+ /*
+ * Undefined/reserved opcodes, conditional jump, Opcode Extension
+ * Groups, and some special opcodes can not be boost.
+ */
+ static const unsigned long twobyte_is_boostable[256 / 32] = {
+ /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
+ /* ------------------------------- */
+ W(0x00, 0,0,1,1,0,0,1,0,1,1,0,0,0,0,0,0)| /* 00 */
+ W(0x10, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* 10 */
+ W(0x20, 1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0)| /* 20 */
+ W(0x30, 0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* 30 */
+ W(0x40, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 40 */
+ W(0x50, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* 50 */
+ W(0x60, 1,1,1,1,1,1,1,1,1,1,1,1,0,0,1,1)| /* 60 */
+ W(0x70, 0,0,0,0,1,1,1,1,0,0,0,0,0,0,1,1), /* 70 */
+ W(0x80, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* 80 */
+ W(0x90, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* 90 */
+ W(0xa0, 1,1,0,1,1,1,0,0,1,1,0,1,1,1,0,1)| /* a0 */
+ W(0xb0, 1,1,1,1,1,1,1,1,0,0,0,1,1,1,1,1), /* b0 */
+ W(0xc0, 1,1,0,0,0,0,0,0,1,1,1,1,1,1,1,1)| /* c0 */
+ W(0xd0, 0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1), /* d0 */
+ W(0xe0, 0,1,1,0,0,1,0,0,1,1,0,1,1,1,0,1)| /* e0 */
+ W(0xf0, 0,1,1,1,0,1,0,0,1,1,1,0,1,1,1,0) /* f0 */
+ /* ------------------------------- */
+ /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
+ };
+#undef W
+ kprobe_opcode_t opcode;
+ kprobe_opcode_t *orig_opcodes = opcodes;
+retry:
+ if (opcodes - orig_opcodes > MAX_INSN_SIZE - 1)
+ return 0;
+ opcode = *(opcodes++);
+
+ /* 2nd-byte opcode */
+ if (opcode == 0x0f) {
+ if (opcodes - orig_opcodes > MAX_INSN_SIZE - 1)
+ return 0;
+ return test_bit(*opcodes, twobyte_is_boostable);
+ }
+
+ switch (opcode & 0xf0) {
+ case 0x60:
+ if (0x63 < opcode && opcode < 0x67)
+ goto retry; /* prefixes */
+ /* can't boost Address-size override and bound */
+ return (opcode != 0x62 && opcode != 0x67);
case 0x70:
return 0; /* can't boost conditional jump */
- case 0x90:
- /* can't boost call and pushf */
- return opcode != 0x9a && opcode != 0x9c;
case 0xc0:
- /* can't boost undefined opcodes and soft-interruptions */
- return (0xc1 < opcode && opcode < 0xc6) ||
- (0xc7 < opcode && opcode < 0xcc) || opcode == 0xcf;
+ /* can't boost software-interruptions */
+ return (0xc1 < opcode && opcode < 0xcc) || opcode == 0xcf;
case 0xd0:
/* can boost AA* and XLAT */
return (opcode == 0xd4 || opcode == 0xd5 || opcode == 0xd7);
case 0xe0:
- /* can boost in/out and (may be) jmps */
- return (0xe3 < opcode && opcode != 0xe8);
+ /* can boost in/out and absolute jmps */
+ return ((opcode & 0x04) || opcode == 0xea);
case 0xf0:
+ if ((opcode & 0x0c) == 0 && opcode != 0xf1)
+ goto retry; /* lock/rep(ne) prefix */
/* clear and set flags can be boost */
return (opcode == 0xf5 || (0xf7 < opcode && opcode < 0xfe));
default:
- /* currently, can't boost 2 bytes opcodes */
- return opcode != 0x0f;
+ if (opcode == 0x26 || opcode == 0x36 || opcode == 0x3e)
+ goto retry; /* prefixes */
+ /* can't boost CS override and call */
+ return (opcode != 0x2e && opcode != 0x9a);
}
}
-
/*
* returns non-zero if opcode modifies the interrupt flag.
*/
@@ -109,7 +160,7 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
p->opcode = *p->addr;
- if (can_boost(p->opcode)) {
+ if (can_boost(p->addr)) {
p->ainsn.boostable = 0;
} else {
p->ainsn.boostable = -1;
@@ -208,7 +259,9 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
struct kprobe_ctlblk *kcb;
#ifdef CONFIG_PREEMPT
unsigned pre_preempt_count = preempt_count();
-#endif /* CONFIG_PREEMPT */
+#else
+ unsigned pre_preempt_count = 1;
+#endif
addr = (kprobe_opcode_t *)(regs->eip - sizeof(kprobe_opcode_t));
@@ -285,22 +338,14 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
/* handler has already set things up, so skip ss setup */
return 1;
- if (p->ainsn.boostable == 1 &&
-#ifdef CONFIG_PREEMPT
- !(pre_preempt_count) && /*
- * This enables booster when the direct
- * execution path aren't preempted.
- */
-#endif /* CONFIG_PREEMPT */
- !p->post_handler && !p->break_handler ) {
+ss_probe:
+ if (pre_preempt_count && p->ainsn.boostable == 1 && !p->post_handler){
/* Boost up -- we can execute copied instructions directly */
reset_current_kprobe();
regs->eip = (unsigned long)p->ainsn.insn;
preempt_enable_no_resched();
return 1;
}
-
-ss_probe:
prepare_singlestep(p, regs);
kcb->kprobe_status = KPROBE_HIT_SS;
return 1;
@@ -607,7 +652,7 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
struct die_args *args = (struct die_args *)data;
int ret = NOTIFY_DONE;
- if (args->regs && user_mode(args->regs))
+ if (args->regs && user_mode_vm(args->regs))
return ret;
switch (val) {
diff --git a/arch/i386/kernel/microcode.c b/arch/i386/kernel/microcode.c
index e7c138f..0a86588 100644
--- a/arch/i386/kernel/microcode.c
+++ b/arch/i386/kernel/microcode.c
@@ -91,7 +91,10 @@ MODULE_DESCRIPTION("Intel CPU (IA-32) Microcode Update Driver");
MODULE_AUTHOR("Tigran Aivazian <tigran@veritas.com>");
MODULE_LICENSE("GPL");
-#define MICROCODE_VERSION "1.14"
+static int verbose;
+module_param(verbose, int, 0644);
+
+#define MICROCODE_VERSION "1.14a"
#define DEFAULT_UCODE_DATASIZE (2000) /* 2000 bytes */
#define MC_HEADER_SIZE (sizeof (microcode_header_t)) /* 48 bytes */
@@ -122,14 +125,15 @@ static unsigned int user_buffer_size; /* it's size */
typedef enum mc_error_code {
MC_SUCCESS = 0,
- MC_NOTFOUND = 1,
- MC_MARKED = 2,
- MC_ALLOCATED = 3,
+ MC_IGNORED = 1,
+ MC_NOTFOUND = 2,
+ MC_MARKED = 3,
+ MC_ALLOCATED = 4,
} mc_error_code_t;
static struct ucode_cpu_info {
unsigned int sig;
- unsigned int pf;
+ unsigned int pf, orig_pf;
unsigned int rev;
unsigned int cksum;
mc_error_code_t err;
@@ -164,6 +168,7 @@ static void collect_cpu_info (void *unused)
rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]);
uci->pf = 1 << ((val[1] >> 18) & 7);
}
+ uci->orig_pf = uci->pf;
}
wrmsr(MSR_IA32_UCODE_REV, 0, 0);
@@ -197,21 +202,34 @@ static inline void mark_microcode_update (int cpu_num, microcode_header_t *mc_he
pr_debug(" Checksum 0x%x\n", cksum);
if (mc_header->rev < uci->rev) {
- printk(KERN_ERR "microcode: CPU%d not 'upgrading' to earlier revision"
- " 0x%x (current=0x%x)\n", cpu_num, mc_header->rev, uci->rev);
- goto out;
+ if (uci->err == MC_NOTFOUND) {
+ uci->err = MC_IGNORED;
+ uci->cksum = mc_header->rev;
+ } else if (uci->err == MC_IGNORED && uci->cksum < mc_header->rev)
+ uci->cksum = mc_header->rev;
} else if (mc_header->rev == uci->rev) {
- /* notify the caller of success on this cpu */
- uci->err = MC_SUCCESS;
- goto out;
+ if (uci->err < MC_MARKED) {
+ /* notify the caller of success on this cpu */
+ uci->err = MC_SUCCESS;
+ }
+ } else if (uci->err != MC_ALLOCATED || mc_header->rev > uci->mc->hdr.rev) {
+ pr_debug("microcode: CPU%d found a matching microcode update with "
+ " revision 0x%x (current=0x%x)\n", cpu_num, mc_header->rev, uci->rev);
+ uci->cksum = cksum;
+ uci->pf = pf; /* keep the original mc pf for cksum calculation */
+ uci->err = MC_MARKED; /* found the match */
+ for_each_online_cpu(cpu_num) {
+ if (ucode_cpu_info + cpu_num != uci
+ && ucode_cpu_info[cpu_num].mc == uci->mc) {
+ uci->mc = NULL;
+ break;
+ }
+ }
+ if (uci->mc != NULL) {
+ vfree(uci->mc);
+ uci->mc = NULL;
+ }
}
-
- pr_debug("microcode: CPU%d found a matching microcode update with "
- " revision 0x%x (current=0x%x)\n", cpu_num, mc_header->rev, uci->rev);
- uci->cksum = cksum;
- uci->pf = pf; /* keep the original mc pf for cksum calculation */
- uci->err = MC_MARKED; /* found the match */
-out:
return;
}
@@ -253,10 +271,8 @@ static int find_matching_ucodes (void)
for_each_online_cpu(cpu_num) {
struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num;
- if (uci->err != MC_NOTFOUND) /* already found a match or not an online cpu*/
- continue;
- if (sigmatch(mc_header.sig, uci->sig, mc_header.pf, uci->pf))
+ if (sigmatch(mc_header.sig, uci->sig, mc_header.pf, uci->orig_pf))
mark_microcode_update(cpu_num, &mc_header, mc_header.sig, mc_header.pf, mc_header.cksum);
}
@@ -295,9 +311,8 @@ static int find_matching_ucodes (void)
}
for_each_online_cpu(cpu_num) {
struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num;
- if (uci->err != MC_NOTFOUND) /* already found a match or not an online cpu*/
- continue;
- if (sigmatch(ext_sig.sig, uci->sig, ext_sig.pf, uci->pf)) {
+
+ if (sigmatch(ext_sig.sig, uci->sig, ext_sig.pf, uci->orig_pf)) {
mark_microcode_update(cpu_num, &mc_header, ext_sig.sig, ext_sig.pf, ext_sig.cksum);
}
}
@@ -368,6 +383,13 @@ static void do_update_one (void * unused)
struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num;
if (uci->mc == NULL) {
+ if (verbose) {
+ if (uci->err == MC_SUCCESS)
+ printk(KERN_INFO "microcode: CPU%d already at revision 0x%x\n",
+ cpu_num, uci->rev);
+ else
+ printk(KERN_INFO "microcode: No new microcode data for CPU%d\n", cpu_num);
+ }
return;
}
@@ -426,6 +448,9 @@ out_free:
ucode_cpu_info[j].mc = NULL;
}
}
+ if (ucode_cpu_info[i].err == MC_IGNORED && verbose)
+ printk(KERN_WARNING "microcode: CPU%d not 'upgrading' to earlier revision"
+ " 0x%x (current=0x%x)\n", i, ucode_cpu_info[i].cksum, ucode_cpu_info[i].rev);
}
out:
return error;
diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c
index d43b498..a76e931 100644
--- a/arch/i386/kernel/nmi.c
+++ b/arch/i386/kernel/nmi.c
@@ -14,21 +14,17 @@
*/
#include <linux/config.h>
-#include <linux/mm.h>
#include <linux/delay.h>
-#include <linux/bootmem.h>
-#include <linux/smp_lock.h>
#include <linux/interrupt.h>
-#include <linux/mc146818rtc.h>
-#include <linux/kernel_stat.h>
#include <linux/module.h>
#include <linux/nmi.h>
#include <linux/sysdev.h>
#include <linux/sysctl.h>
+#include <linux/percpu.h>
#include <asm/smp.h>
-#include <asm/div64.h>
#include <asm/nmi.h>
+#include <asm/intel_arch_perfmon.h>
#include "mach_traps.h"
@@ -100,6 +96,9 @@ int nmi_active;
(P4_CCCR_OVF_PMI0|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \
P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE)
+#define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL
+#define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK
+
#ifdef CONFIG_SMP
/* The performance counters used by NMI_LOCAL_APIC don't trigger when
* the CPU is idle. To make sure the NMI watchdog really ticks on all
@@ -212,6 +211,8 @@ static int __init setup_nmi_watchdog(char *str)
__setup("nmi_watchdog=", setup_nmi_watchdog);
+static void disable_intel_arch_watchdog(void);
+
static void disable_lapic_nmi_watchdog(void)
{
if (nmi_active <= 0)
@@ -221,6 +222,10 @@ static void disable_lapic_nmi_watchdog(void)
wrmsr(MSR_K7_EVNTSEL0, 0, 0);
break;
case X86_VENDOR_INTEL:
+ if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
+ disable_intel_arch_watchdog();
+ break;
+ }
switch (boot_cpu_data.x86) {
case 6:
if (boot_cpu_data.x86_model > 0xd)
@@ -449,6 +454,53 @@ static int setup_p4_watchdog(void)
return 1;
}
+static void disable_intel_arch_watchdog(void)
+{
+ unsigned ebx;
+
+ /*
+ * Check whether the Architectural PerfMon supports
+ * Unhalted Core Cycles Event or not.
+ * NOTE: Corresponding bit = 0 in ebp indicates event present.
+ */
+ ebx = cpuid_ebx(10);
+ if (!(ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
+ wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, 0, 0);
+}
+
+static int setup_intel_arch_watchdog(void)
+{
+ unsigned int evntsel;
+ unsigned ebx;
+
+ /*
+ * Check whether the Architectural PerfMon supports
+ * Unhalted Core Cycles Event or not.
+ * NOTE: Corresponding bit = 0 in ebp indicates event present.
+ */
+ ebx = cpuid_ebx(10);
+ if ((ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
+ return 0;
+
+ nmi_perfctr_msr = MSR_ARCH_PERFMON_PERFCTR0;
+
+ clear_msr_range(MSR_ARCH_PERFMON_EVENTSEL0, 2);
+ clear_msr_range(MSR_ARCH_PERFMON_PERFCTR0, 2);
+
+ evntsel = ARCH_PERFMON_EVENTSEL_INT
+ | ARCH_PERFMON_EVENTSEL_OS
+ | ARCH_PERFMON_EVENTSEL_USR
+ | ARCH_PERFMON_NMI_EVENT_SEL
+ | ARCH_PERFMON_NMI_EVENT_UMASK;
+
+ wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, evntsel, 0);
+ write_watchdog_counter("INTEL_ARCH_PERFCTR0");
+ apic_write(APIC_LVTPC, APIC_DM_NMI);
+ evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE;
+ wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, evntsel, 0);
+ return 1;
+}
+
void setup_apic_nmi_watchdog (void)
{
switch (boot_cpu_data.x86_vendor) {
@@ -458,6 +510,11 @@ void setup_apic_nmi_watchdog (void)
setup_k7_watchdog();
break;
case X86_VENDOR_INTEL:
+ if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
+ if (!setup_intel_arch_watchdog())
+ return;
+ break;
+ }
switch (boot_cpu_data.x86) {
case 6:
if (boot_cpu_data.x86_model > 0xd)
@@ -561,7 +618,8 @@ void nmi_watchdog_tick (struct pt_regs * regs)
wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0);
apic_write(APIC_LVTPC, APIC_DM_NMI);
}
- else if (nmi_perfctr_msr == MSR_P6_PERFCTR0) {
+ else if (nmi_perfctr_msr == MSR_P6_PERFCTR0 ||
+ nmi_perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) {
/* Only P6 based Pentium M need to re-unmask
* the apic vector but it doesn't hurt
* other P6 variant */
diff --git a/arch/i386/kernel/numaq.c b/arch/i386/kernel/numaq.c
index 5f5b075..0caf146 100644
--- a/arch/i386/kernel/numaq.c
+++ b/arch/i386/kernel/numaq.c
@@ -79,10 +79,12 @@ int __init get_memcfg_numaq(void)
return 1;
}
-static int __init numaq_dsc_disable(void)
+static int __init numaq_tsc_disable(void)
{
- printk(KERN_DEBUG "NUMAQ: disabling TSC\n");
- tsc_disable = 1;
+ if (num_online_nodes() > 1) {
+ printk(KERN_DEBUG "NUMAQ: disabling TSC\n");
+ tsc_disable = 1;
+ }
return 0;
}
-core_initcall(numaq_dsc_disable);
+arch_initcall(numaq_tsc_disable);
diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c
index 6259afe..6946b06 100644
--- a/arch/i386/kernel/process.c
+++ b/arch/i386/kernel/process.c
@@ -102,7 +102,7 @@ void default_idle(void)
local_irq_enable();
if (!hlt_counter && boot_cpu_data.hlt_works_ok) {
- clear_thread_flag(TIF_POLLING_NRFLAG);
+ current_thread_info()->status &= ~TS_POLLING;
smp_mb__after_clear_bit();
while (!need_resched()) {
local_irq_disable();
@@ -111,7 +111,7 @@ void default_idle(void)
else
local_irq_enable();
}
- set_thread_flag(TIF_POLLING_NRFLAG);
+ current_thread_info()->status |= TS_POLLING;
} else {
while (!need_resched())
cpu_relax();
@@ -174,7 +174,7 @@ void cpu_idle(void)
{
int cpu = smp_processor_id();
- set_thread_flag(TIF_POLLING_NRFLAG);
+ current_thread_info()->status |= TS_POLLING;
/* endless idle loop with no priority at all */
while (1) {
@@ -312,7 +312,7 @@ void show_regs(struct pt_regs * regs)
cr3 = read_cr3();
cr4 = read_cr4_safe();
printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", cr0, cr2, cr3, cr4);
- show_trace(NULL, &regs->esp);
+ show_trace(NULL, regs, &regs->esp);
}
/*
diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c
index 846e163..4a65040 100644
--- a/arch/i386/kernel/setup.c
+++ b/arch/i386/kernel/setup.c
@@ -60,7 +60,7 @@
#include <asm/io_apic.h>
#include <asm/ist.h>
#include <asm/io.h>
-#include "setup_arch_pre.h"
+#include <setup_arch.h>
#include <bios_ebda.h>
/* Forward Declaration. */
@@ -410,8 +410,8 @@ static void __init limit_regions(unsigned long long size)
}
}
-static void __init add_memory_region(unsigned long long start,
- unsigned long long size, int type)
+void __init add_memory_region(unsigned long long start,
+ unsigned long long size, int type)
{
int x;
@@ -474,7 +474,7 @@ static struct change_member *change_point[2*E820MAX] __initdata;
static struct e820entry *overlap_list[E820MAX] __initdata;
static struct e820entry new_bios[E820MAX] __initdata;
-static int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
+int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
{
struct change_member *change_tmp;
unsigned long current_type, last_type;
@@ -643,7 +643,7 @@ static int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
* thinkpad 560x, for example, does not cooperate with the memory
* detection code.)
*/
-static int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
+int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
{
/* Only one memory region (or negative)? Ignore it */
if (nr_map < 2)
@@ -701,12 +701,6 @@ static inline void copy_edd(void)
}
#endif
-/*
- * Do NOT EVER look at the BIOS memory size location.
- * It does not work on many machines.
- */
-#define LOWMEMSIZE() (0x9f000)
-
static void __init parse_cmdline_early (char ** cmdline_p)
{
char c = ' ', *to = command_line, *from = saved_command_line;
@@ -1423,8 +1417,6 @@ static void __init register_memory(void)
pci_mem_start, gapstart, gapsize);
}
-static char * __init machine_specific_memory_setup(void);
-
#ifdef CONFIG_MCA
static void set_mca_bus(int x)
{
@@ -1547,15 +1539,18 @@ void __init setup_arch(char **cmdline_p)
if (efi_enabled)
efi_map_memmap();
-#ifdef CONFIG_X86_IO_APIC
- check_acpi_pci(); /* Checks more than just ACPI actually */
-#endif
-
#ifdef CONFIG_ACPI
/*
* Parse the ACPI tables for possible boot-time SMP configuration.
*/
acpi_boot_table_init();
+#endif
+
+#ifdef CONFIG_X86_IO_APIC
+ check_acpi_pci(); /* Checks more than just ACPI actually */
+#endif
+
+#ifdef CONFIG_ACPI
acpi_boot_init();
#if defined(CONFIG_SMP) && defined(CONFIG_X86_PC)
@@ -1580,6 +1575,7 @@ void __init setup_arch(char **cmdline_p)
conswitchp = &dummy_con;
#endif
#endif
+ tsc_init();
}
static __init int add_pcspkr(void)
@@ -1599,7 +1595,6 @@ static __init int add_pcspkr(void)
}
device_initcall(add_pcspkr);
-#include "setup_arch_post.h"
/*
* Local Variables:
* mode:c
diff --git a/arch/i386/kernel/smp.c b/arch/i386/kernel/smp.c
index d134e96..c10789d 100644
--- a/arch/i386/kernel/smp.c
+++ b/arch/i386/kernel/smp.c
@@ -114,7 +114,17 @@ DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate) ____cacheline_aligned = { &init_m
static inline int __prepare_ICR (unsigned int shortcut, int vector)
{
- return APIC_DM_FIXED | shortcut | vector | APIC_DEST_LOGICAL;
+ unsigned int icr = shortcut | APIC_DEST_LOGICAL;
+
+ switch (vector) {
+ default:
+ icr |= APIC_DM_FIXED | vector;
+ break;
+ case NMI_VECTOR:
+ icr |= APIC_DM_NMI;
+ break;
+ }
+ return icr;
}
static inline int __prepare_ICR2 (unsigned int mask)
diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c
index 825b2b4..bce5470 100644
--- a/arch/i386/kernel/smpboot.c
+++ b/arch/i386/kernel/smpboot.c
@@ -52,6 +52,7 @@
#include <asm/tlbflush.h>
#include <asm/desc.h>
#include <asm/arch_hooks.h>
+#include <asm/nmi.h>
#include <mach_apic.h>
#include <mach_wakecpu.h>
@@ -257,7 +258,7 @@ static void __init synchronize_tsc_bp (void)
* all APs synchronize but they loop on '== num_cpus'
*/
while (atomic_read(&tsc_count_start) != num_booting_cpus()-1)
- mb();
+ cpu_relax();
atomic_set(&tsc_count_stop, 0);
wmb();
/*
@@ -276,7 +277,7 @@ static void __init synchronize_tsc_bp (void)
* Wait for all APs to leave the synchronization point:
*/
while (atomic_read(&tsc_count_stop) != num_booting_cpus()-1)
- mb();
+ cpu_relax();
atomic_set(&tsc_count_start, 0);
wmb();
atomic_inc(&tsc_count_stop);
@@ -333,19 +334,21 @@ static void __init synchronize_tsc_ap (void)
* this gets called, so we first wait for the BP to
* finish SMP initialization:
*/
- while (!atomic_read(&tsc_start_flag)) mb();
+ while (!atomic_read(&tsc_start_flag))
+ cpu_relax();
for (i = 0; i < NR_LOOPS; i++) {
atomic_inc(&tsc_count_start);
while (atomic_read(&tsc_count_start) != num_booting_cpus())
- mb();
+ cpu_relax();
rdtscll(tsc_values[smp_processor_id()]);
if (i == NR_LOOPS-1)
write_tsc(0, 0);
atomic_inc(&tsc_count_stop);
- while (atomic_read(&tsc_count_stop) != num_booting_cpus()) mb();
+ while (atomic_read(&tsc_count_stop) != num_booting_cpus())
+ cpu_relax();
}
}
#undef NR_LOOPS
@@ -1433,7 +1436,7 @@ int __devinit __cpu_up(unsigned int cpu)
/* Unleash the CPU! */
cpu_set(cpu, smp_commenced_mask);
while (!cpu_isset(cpu, cpu_online_map))
- mb();
+ cpu_relax();
return 0;
}
diff --git a/arch/i386/kernel/srat.c b/arch/i386/kernel/srat.c
index 52b3ed5..989c852 100644
--- a/arch/i386/kernel/srat.c
+++ b/arch/i386/kernel/srat.c
@@ -39,7 +39,6 @@
#define NODE_ARRAY_OFFSET(x) ((x) % 8) /* 8 bits/char */
#define BMAP_SET(bmap, bit) ((bmap)[NODE_ARRAY_INDEX(bit)] |= 1 << NODE_ARRAY_OFFSET(bit))
#define BMAP_TEST(bmap, bit) ((bmap)[NODE_ARRAY_INDEX(bit)] & (1 << NODE_ARRAY_OFFSET(bit)))
-#define MAX_PXM_DOMAINS 256 /* 1 byte and no promises about values */
/* bitmap length; _PXM is at most 255 */
#define PXM_BITMAP_LEN (MAX_PXM_DOMAINS / 8)
static u8 pxm_bitmap[PXM_BITMAP_LEN]; /* bitmap of proximity domains */
@@ -213,19 +212,11 @@ static __init void node_read_chunk(int nid, struct node_memory_chunk_s *memory_c
node_end_pfn[nid] = memory_chunk->end_pfn;
}
-static u8 pxm_to_nid_map[MAX_PXM_DOMAINS];/* _PXM to logical node ID map */
-
-int pxm_to_node(int pxm)
-{
- return pxm_to_nid_map[pxm];
-}
-
/* Parse the ACPI Static Resource Affinity Table */
static int __init acpi20_parse_srat(struct acpi_table_srat *sratp)
{
u8 *start, *end, *p;
int i, j, nid;
- u8 nid_to_pxm_map[MAX_NUMNODES];/* logical node ID to _PXM map */
start = (u8 *)(&(sratp->reserved) + 1); /* skip header */
p = start;
@@ -235,10 +226,6 @@ static int __init acpi20_parse_srat(struct acpi_table_srat *sratp)
memset(node_memory_chunk, 0, sizeof(node_memory_chunk));
memset(zholes_size, 0, sizeof(zholes_size));
- /* -1 in these maps means not available */
- memset(pxm_to_nid_map, -1, sizeof(pxm_to_nid_map));
- memset(nid_to_pxm_map, -1, sizeof(nid_to_pxm_map));
-
num_memory_chunks = 0;
while (p < end) {
switch (*p) {
@@ -278,9 +265,7 @@ static int __init acpi20_parse_srat(struct acpi_table_srat *sratp)
nodes_clear(node_online_map);
for (i = 0; i < MAX_PXM_DOMAINS; i++) {
if (BMAP_TEST(pxm_bitmap, i)) {
- nid = num_online_nodes();
- pxm_to_nid_map[i] = nid;
- nid_to_pxm_map[nid] = i;
+ int nid = acpi_map_pxm_to_node(i);
node_set_online(nid);
}
}
@@ -288,7 +273,7 @@ static int __init acpi20_parse_srat(struct acpi_table_srat *sratp)
/* set cnode id in memory chunk structure */
for (i = 0; i < num_memory_chunks; i++)
- node_memory_chunk[i].nid = pxm_to_nid_map[node_memory_chunk[i].pxm];
+ node_memory_chunk[i].nid = pxm_to_node(node_memory_chunk[i].pxm);
printk("pxm bitmap: ");
for (i = 0; i < sizeof(pxm_bitmap); i++) {
diff --git a/arch/i386/kernel/syscall_table.S b/arch/i386/kernel/syscall_table.S
index af56987..dd63d47 100644
--- a/arch/i386/kernel/syscall_table.S
+++ b/arch/i386/kernel/syscall_table.S
@@ -316,3 +316,4 @@ ENTRY(sys_call_table)
.long sys_sync_file_range
.long sys_tee /* 315 */
.long sys_vmsplice
+ .long sys_move_pages
diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c
index 9d30747..5f43d04 100644
--- a/arch/i386/kernel/time.c
+++ b/arch/i386/kernel/time.c
@@ -82,13 +82,6 @@ extern unsigned long wall_jiffies;
DEFINE_SPINLOCK(rtc_lock);
EXPORT_SYMBOL(rtc_lock);
-#include <asm/i8253.h>
-
-DEFINE_SPINLOCK(i8253_lock);
-EXPORT_SYMBOL(i8253_lock);
-
-struct timer_opts *cur_timer __read_mostly = &timer_none;
-
/*
* This is a special lock that is owned by the CPU and holds the index
* register we are working with. It is required for NMI access to the
@@ -118,99 +111,19 @@ void rtc_cmos_write(unsigned char val, unsigned char addr)
}
EXPORT_SYMBOL(rtc_cmos_write);
-/*
- * This version of gettimeofday has microsecond resolution
- * and better than microsecond precision on fast x86 machines with TSC.
- */
-void do_gettimeofday(struct timeval *tv)
-{
- unsigned long seq;
- unsigned long usec, sec;
- unsigned long max_ntp_tick;
-
- do {
- unsigned long lost;
-
- seq = read_seqbegin(&xtime_lock);
-
- usec = cur_timer->get_offset();
- lost = jiffies - wall_jiffies;
-
- /*
- * If time_adjust is negative then NTP is slowing the clock
- * so make sure not to go into next possible interval.
- * Better to lose some accuracy than have time go backwards..
- */
- if (unlikely(time_adjust < 0)) {
- max_ntp_tick = (USEC_PER_SEC / HZ) - tickadj;
- usec = min(usec, max_ntp_tick);
-
- if (lost)
- usec += lost * max_ntp_tick;
- }
- else if (unlikely(lost))
- usec += lost * (USEC_PER_SEC / HZ);
-
- sec = xtime.tv_sec;
- usec += (xtime.tv_nsec / 1000);
- } while (read_seqretry(&xtime_lock, seq));
-
- while (usec >= 1000000) {
- usec -= 1000000;
- sec++;
- }
-
- tv->tv_sec = sec;
- tv->tv_usec = usec;
-}
-
-EXPORT_SYMBOL(do_gettimeofday);
-
-int do_settimeofday(struct timespec *tv)
-{
- time_t wtm_sec, sec = tv->tv_sec;
- long wtm_nsec, nsec = tv->tv_nsec;
-
- if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
- return -EINVAL;
-
- write_seqlock_irq(&xtime_lock);
- /*
- * This is revolting. We need to set "xtime" correctly. However, the
- * value in this location is the value at the most recent update of
- * wall time. Discover what correction gettimeofday() would have
- * made, and then undo it!
- */
- nsec -= cur_timer->get_offset() * NSEC_PER_USEC;
- nsec -= (jiffies - wall_jiffies) * TICK_NSEC;
-
- wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec);
- wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec);
-
- set_normalized_timespec(&xtime, sec, nsec);
- set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec);
-
- ntp_clear();
- write_sequnlock_irq(&xtime_lock);
- clock_was_set();
- return 0;
-}
-
-EXPORT_SYMBOL(do_settimeofday);
-
static int set_rtc_mmss(unsigned long nowtime)
{
int retval;
-
- WARN_ON(irqs_disabled());
+ unsigned long flags;
/* gets recalled with irq locally disabled */
- spin_lock_irq(&rtc_lock);
+ /* XXX - does irqsave resolve this? -johnstul */
+ spin_lock_irqsave(&rtc_lock, flags);
if (efi_enabled)
retval = efi_set_rtc_mmss(nowtime);
else
retval = mach_set_rtc_mmss(nowtime);
- spin_unlock_irq(&rtc_lock);
+ spin_unlock_irqrestore(&rtc_lock, flags);
return retval;
}
@@ -218,16 +131,6 @@ static int set_rtc_mmss(unsigned long nowtime)
int timer_ack;
-/* monotonic_clock(): returns # of nanoseconds passed since time_init()
- * Note: This function is required to return accurate
- * time even in the absence of multiple timer ticks.
- */
-unsigned long long monotonic_clock(void)
-{
- return cur_timer->monotonic_clock();
-}
-EXPORT_SYMBOL(monotonic_clock);
-
#if defined(CONFIG_SMP) && defined(CONFIG_FRAME_POINTER)
unsigned long profile_pc(struct pt_regs *regs)
{
@@ -242,11 +145,21 @@ EXPORT_SYMBOL(profile_pc);
#endif
/*
- * timer_interrupt() needs to keep up the real-time clock,
- * as well as call the "do_timer()" routine every clocktick
+ * This is the same as the above, except we _also_ save the current
+ * Time Stamp Counter value at the time of the timer interrupt, so that
+ * we later on can estimate the time of day more exactly.
*/
-static inline void do_timer_interrupt(int irq, struct pt_regs *regs)
+irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
{
+ /*
+ * Here we are in the timer irq handler. We just have irqs locally
+ * disabled but we don't know if the timer_bh is running on the other
+ * CPU. We need to avoid to SMP race with it. NOTE: we don' t need
+ * the irq version of write_lock because as just said we have irq
+ * locally disabled. -arca
+ */
+ write_seqlock(&xtime_lock);
+
#ifdef CONFIG_X86_IO_APIC
if (timer_ack) {
/*
@@ -279,27 +192,6 @@ static inline void do_timer_interrupt(int irq, struct pt_regs *regs)
irq = inb_p( 0x61 ); /* read the current state */
outb_p( irq|0x80, 0x61 ); /* reset the IRQ */
}
-}
-
-/*
- * This is the same as the above, except we _also_ save the current
- * Time Stamp Counter value at the time of the timer interrupt, so that
- * we later on can estimate the time of day more exactly.
- */
-irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
-{
- /*
- * Here we are in the timer irq handler. We just have irqs locally
- * disabled but we don't know if the timer_bh is running on the other
- * CPU. We need to avoid to SMP race with it. NOTE: we don' t need
- * the irq version of write_lock because as just said we have irq
- * locally disabled. -arca
- */
- write_seqlock(&xtime_lock);
-
- cur_timer->mark_offset();
-
- do_timer_interrupt(irq, regs);
write_sequnlock(&xtime_lock);
@@ -380,7 +272,6 @@ void notify_arch_cmos_timer(void)
static long clock_cmos_diff, sleep_start;
-static struct timer_opts *last_timer;
static int timer_suspend(struct sys_device *dev, pm_message_t state)
{
/*
@@ -389,10 +280,6 @@ static int timer_suspend(struct sys_device *dev, pm_message_t state)
clock_cmos_diff = -get_cmos_time();
clock_cmos_diff += get_seconds();
sleep_start = get_cmos_time();
- last_timer = cur_timer;
- cur_timer = &timer_none;
- if (last_timer->suspend)
- last_timer->suspend(state);
return 0;
}
@@ -415,10 +302,6 @@ static int timer_resume(struct sys_device *dev)
jiffies_64 += sleep_length;
wall_jiffies += sleep_length;
write_sequnlock_irqrestore(&xtime_lock, flags);
- if (last_timer->resume)
- last_timer->resume();
- cur_timer = last_timer;
- last_timer = NULL;
touch_softlockup_watchdog();
return 0;
}
@@ -460,9 +343,6 @@ static void __init hpet_time_init(void)
printk("Using HPET for base-timer\n");
}
- cur_timer = select_timer();
- printk(KERN_INFO "Using %s for high-res timesource\n",cur_timer->name);
-
time_init_hook();
}
#endif
@@ -484,8 +364,5 @@ void __init time_init(void)
set_normalized_timespec(&wall_to_monotonic,
-xtime.tv_sec, -xtime.tv_nsec);
- cur_timer = select_timer();
- printk(KERN_INFO "Using %s for high-res timesource\n",cur_timer->name);
-
time_init_hook();
}
diff --git a/arch/i386/kernel/timers/Makefile b/arch/i386/kernel/timers/Makefile
deleted file mode 100644
index 8fa12be..0000000
--- a/arch/i386/kernel/timers/Makefile
+++ /dev/null
@@ -1,9 +0,0 @@
-#
-# Makefile for x86 timers
-#
-
-obj-y := timer.o timer_none.o timer_tsc.o timer_pit.o common.o
-
-obj-$(CONFIG_X86_CYCLONE_TIMER) += timer_cyclone.o
-obj-$(CONFIG_HPET_TIMER) += timer_hpet.o
-obj-$(CONFIG_X86_PM_TIMER) += timer_pm.o
diff --git a/arch/i386/kernel/timers/common.c b/arch/i386/kernel/timers/common.c
deleted file mode 100644
index 8163fe0..0000000
--- a/arch/i386/kernel/timers/common.c
+++ /dev/null
@@ -1,172 +0,0 @@
-/*
- * Common functions used across the timers go here
- */
-
-#include <linux/init.h>
-#include <linux/timex.h>
-#include <linux/errno.h>
-#include <linux/jiffies.h>
-#include <linux/module.h>
-
-#include <asm/io.h>
-#include <asm/timer.h>
-#include <asm/hpet.h>
-
-#include "mach_timer.h"
-
-/* ------ Calibrate the TSC -------
- * Return 2^32 * (1 / (TSC clocks per usec)) for do_fast_gettimeoffset().
- * Too much 64-bit arithmetic here to do this cleanly in C, and for
- * accuracy's sake we want to keep the overhead on the CTC speaker (channel 2)
- * output busy loop as low as possible. We avoid reading the CTC registers
- * directly because of the awkward 8-bit access mechanism of the 82C54
- * device.
- */
-
-#define CALIBRATE_TIME (5 * 1000020/HZ)
-
-unsigned long calibrate_tsc(void)
-{
- mach_prepare_counter();
-
- {
- unsigned long startlow, starthigh;
- unsigned long endlow, endhigh;
- unsigned long count;
-
- rdtsc(startlow,starthigh);
- mach_countup(&count);
- rdtsc(endlow,endhigh);
-
-
- /* Error: ECTCNEVERSET */
- if (count <= 1)
- goto bad_ctc;
-
- /* 64-bit subtract - gcc just messes up with long longs */
- __asm__("subl %2,%0\n\t"
- "sbbl %3,%1"
- :"=a" (endlow), "=d" (endhigh)
- :"g" (startlow), "g" (starthigh),
- "0" (endlow), "1" (endhigh));
-
- /* Error: ECPUTOOFAST */
- if (endhigh)
- goto bad_ctc;
-
- /* Error: ECPUTOOSLOW */
- if (endlow <= CALIBRATE_TIME)
- goto bad_ctc;
-
- __asm__("divl %2"
- :"=a" (endlow), "=d" (endhigh)
- :"r" (endlow), "0" (0), "1" (CALIBRATE_TIME));
-
- return endlow;
- }
-
- /*
- * The CTC wasn't reliable: we got a hit on the very first read,
- * or the CPU was so fast/slow that the quotient wouldn't fit in
- * 32 bits..
- */
-bad_ctc:
- return 0;
-}
-
-#ifdef CONFIG_HPET_TIMER
-/* ------ Calibrate the TSC using HPET -------
- * Return 2^32 * (1 / (TSC clocks per usec)) for getting the CPU freq.
- * Second output is parameter 1 (when non NULL)
- * Set 2^32 * (1 / (tsc per HPET clk)) for delay_hpet().
- * calibrate_tsc() calibrates the processor TSC by comparing
- * it to the HPET timer of known frequency.
- * Too much 64-bit arithmetic here to do this cleanly in C
- */
-#define CALIBRATE_CNT_HPET (5 * hpet_tick)
-#define CALIBRATE_TIME_HPET (5 * KERNEL_TICK_USEC)
-
-unsigned long __devinit calibrate_tsc_hpet(unsigned long *tsc_hpet_quotient_ptr)
-{
- unsigned long tsc_startlow, tsc_starthigh;
- unsigned long tsc_endlow, tsc_endhigh;
- unsigned long hpet_start, hpet_end;
- unsigned long result, remain;
-
- hpet_start = hpet_readl(HPET_COUNTER);
- rdtsc(tsc_startlow, tsc_starthigh);
- do {
- hpet_end = hpet_readl(HPET_COUNTER);
- } while ((hpet_end - hpet_start) < CALIBRATE_CNT_HPET);
- rdtsc(tsc_endlow, tsc_endhigh);
-
- /* 64-bit subtract - gcc just messes up with long longs */
- __asm__("subl %2,%0\n\t"
- "sbbl %3,%1"
- :"=a" (tsc_endlow), "=d" (tsc_endhigh)
- :"g" (tsc_startlow), "g" (tsc_starthigh),
- "0" (tsc_endlow), "1" (tsc_endhigh));
-
- /* Error: ECPUTOOFAST */
- if (tsc_endhigh)
- goto bad_calibration;
-
- /* Error: ECPUTOOSLOW */
- if (tsc_endlow <= CALIBRATE_TIME_HPET)
- goto bad_calibration;
-
- ASM_DIV64_REG(result, remain, tsc_endlow, 0, CALIBRATE_TIME_HPET);
- if (remain > (tsc_endlow >> 1))
- result++; /* rounding the result */
-
- if (tsc_hpet_quotient_ptr) {
- unsigned long tsc_hpet_quotient;
-
- ASM_DIV64_REG(tsc_hpet_quotient, remain, tsc_endlow, 0,
- CALIBRATE_CNT_HPET);
- if (remain > (tsc_endlow >> 1))
- tsc_hpet_quotient++; /* rounding the result */
- *tsc_hpet_quotient_ptr = tsc_hpet_quotient;
- }
-
- return result;
-bad_calibration:
- /*
- * the CPU was so fast/slow that the quotient wouldn't fit in
- * 32 bits..
- */
- return 0;
-}
-#endif
-
-
-unsigned long read_timer_tsc(void)
-{
- unsigned long retval;
- rdtscl(retval);
- return retval;
-}
-
-
-/* calculate cpu_khz */
-void init_cpu_khz(void)
-{
- if (cpu_has_tsc) {
- unsigned long tsc_quotient = calibrate_tsc();
- if (tsc_quotient) {
- /* report CPU clock rate in Hz.
- * The formula is (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) =
- * clock/second. Our precision is about 100 ppm.
- */
- { unsigned long eax=0, edx=1000;
- __asm__("divl %2"
- :"=a" (cpu_khz), "=d" (edx)
- :"r" (tsc_quotient),
- "0" (eax), "1" (edx));
- printk("Detected %u.%03u MHz processor.\n",
- cpu_khz / 1000, cpu_khz % 1000);
- }
- }
- }
-}
-
diff --git a/arch/i386/kernel/timers/timer.c b/arch/i386/kernel/timers/timer.c
deleted file mode 100644
index 7e39ed8..0000000
--- a/arch/i386/kernel/timers/timer.c
+++ /dev/null
@@ -1,75 +0,0 @@
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <asm/timer.h>
-
-#ifdef CONFIG_HPET_TIMER
-/*
- * HPET memory read is slower than tsc reads, but is more dependable as it
- * always runs at constant frequency and reduces complexity due to
- * cpufreq. So, we prefer HPET timer to tsc based one. Also, we cannot use
- * timer_pit when HPET is active. So, we default to timer_tsc.
- */
-#endif
-/* list of timers, ordered by preference, NULL terminated */
-static struct init_timer_opts* __initdata timers[] = {
-#ifdef CONFIG_X86_CYCLONE_TIMER
- &timer_cyclone_init,
-#endif
-#ifdef CONFIG_HPET_TIMER
- &timer_hpet_init,
-#endif
-#ifdef CONFIG_X86_PM_TIMER
- &timer_pmtmr_init,
-#endif
- &timer_tsc_init,
- &timer_pit_init,
- NULL,
-};
-
-static char clock_override[10] __initdata;
-
-static int __init clock_setup(char* str)
-{
- if (str)
- strlcpy(clock_override, str, sizeof(clock_override));
- return 1;
-}
-__setup("clock=", clock_setup);
-
-
-/* The chosen timesource has been found to be bad.
- * Fall back to a known good timesource (the PIT)
- */
-void clock_fallback(void)
-{
- cur_timer = &timer_pit;
-}
-
-/* iterates through the list of timers, returning the first
- * one that initializes successfully.
- */
-struct timer_opts* __init select_timer(void)
-{
- int i = 0;
-
- /* find most preferred working timer */
- while (timers[i]) {
- if (timers[i]->init)
- if (timers[i]->init(clock_override) == 0)
- return timers[i]->opts;
- ++i;
- }
-
- panic("select_timer: Cannot find a suitable timer\n");
- return NULL;
-}
-
-int read_current_timer(unsigned long *timer_val)
-{
- if (cur_timer->read_timer) {
- *timer_val = cur_timer->read_timer();
- return 0;
- }
- return -1;
-}
diff --git a/arch/i386/kernel/timers/timer_cyclone.c b/arch/i386/kernel/timers/timer_cyclone.c
deleted file mode 100644
index 13892a6..0000000
--- a/arch/i386/kernel/timers/timer_cyclone.c
+++ /dev/null
@@ -1,259 +0,0 @@
-/* Cyclone-timer:
- * This code implements timer_ops for the cyclone counter found
- * on IBM x440, x360, and other Summit based systems.
- *
- * Copyright (C) 2002 IBM, John Stultz (johnstul@us.ibm.com)
- */
-
-
-#include <linux/spinlock.h>
-#include <linux/init.h>
-#include <linux/timex.h>
-#include <linux/errno.h>
-#include <linux/string.h>
-#include <linux/jiffies.h>
-
-#include <asm/timer.h>
-#include <asm/io.h>
-#include <asm/pgtable.h>
-#include <asm/fixmap.h>
-#include <asm/i8253.h>
-
-#include "io_ports.h"
-
-/* Number of usecs that the last interrupt was delayed */
-static int delay_at_last_interrupt;
-
-#define CYCLONE_CBAR_ADDR 0xFEB00CD0
-#define CYCLONE_PMCC_OFFSET 0x51A0
-#define CYCLONE_MPMC_OFFSET 0x51D0
-#define CYCLONE_MPCS_OFFSET 0x51A8
-#define CYCLONE_TIMER_FREQ 100000000
-#define CYCLONE_TIMER_MASK (((u64)1<<40)-1) /* 40 bit mask */
-int use_cyclone = 0;
-
-static u32* volatile cyclone_timer; /* Cyclone MPMC0 register */
-static u32 last_cyclone_low;
-static u32 last_cyclone_high;
-static unsigned long long monotonic_base;
-static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED;
-
-/* helper macro to atomically read both cyclone counter registers */
-#define read_cyclone_counter(low,high) \
- do{ \
- high = cyclone_timer[1]; low = cyclone_timer[0]; \
- } while (high != cyclone_timer[1]);
-
-
-static void mark_offset_cyclone(void)
-{
- unsigned long lost, delay;
- unsigned long delta = last_cyclone_low;
- int count;
- unsigned long long this_offset, last_offset;
-
- write_seqlock(&monotonic_lock);
- last_offset = ((unsigned long long)last_cyclone_high<<32)|last_cyclone_low;
-
- spin_lock(&i8253_lock);
- read_cyclone_counter(last_cyclone_low,last_cyclone_high);
-
- /* read values for delay_at_last_interrupt */
- outb_p(0x00, 0x43); /* latch the count ASAP */
-
- count = inb_p(0x40); /* read the latched count */
- count |= inb(0x40) << 8;
-
- /*
- * VIA686a test code... reset the latch if count > max + 1
- * from timer_pit.c - cjb
- */
- if (count > LATCH) {
- outb_p(0x34, PIT_MODE);
- outb_p(LATCH & 0xff, PIT_CH0);
- outb(LATCH >> 8, PIT_CH0);
- count = LATCH - 1;
- }
- spin_unlock(&i8253_lock);
-
- /* lost tick compensation */
- delta = last_cyclone_low - delta;
- delta /= (CYCLONE_TIMER_FREQ/1000000);
- delta += delay_at_last_interrupt;
- lost = delta/(1000000/HZ);
- delay = delta%(1000000/HZ);
- if (lost >= 2)
- jiffies_64 += lost-1;
-
- /* update the monotonic base value */
- this_offset = ((unsigned long long)last_cyclone_high<<32)|last_cyclone_low;
- monotonic_base += (this_offset - last_offset) & CYCLONE_TIMER_MASK;
- write_sequnlock(&monotonic_lock);
-
- /* calculate delay_at_last_interrupt */
- count = ((LATCH-1) - count) * TICK_SIZE;
- delay_at_last_interrupt = (count + LATCH/2) / LATCH;
-
-
- /* catch corner case where tick rollover occured
- * between cyclone and pit reads (as noted when
- * usec delta is > 90% # of usecs/tick)
- */
- if (lost && abs(delay - delay_at_last_interrupt) > (900000/HZ))
- jiffies_64++;
-}
-
-static unsigned long get_offset_cyclone(void)
-{
- u32 offset;
-
- if(!cyclone_timer)
- return delay_at_last_interrupt;
-
- /* Read the cyclone timer */
- offset = cyclone_timer[0];
-
- /* .. relative to previous jiffy */
- offset = offset - last_cyclone_low;
-
- /* convert cyclone ticks to microseconds */
- /* XXX slow, can we speed this up? */
- offset = offset/(CYCLONE_TIMER_FREQ/1000000);
-
- /* our adjusted time offset in microseconds */
- return delay_at_last_interrupt + offset;
-}
-
-static unsigned long long monotonic_clock_cyclone(void)
-{
- u32 now_low, now_high;
- unsigned long long last_offset, this_offset, base;
- unsigned long long ret;
- unsigned seq;
-
- /* atomically read monotonic base & last_offset */
- do {
- seq = read_seqbegin(&monotonic_lock);
- last_offset = ((unsigned long long)last_cyclone_high<<32)|last_cyclone_low;
- base = monotonic_base;
- } while (read_seqretry(&monotonic_lock, seq));
-
-
- /* Read the cyclone counter */
- read_cyclone_counter(now_low,now_high);
- this_offset = ((unsigned long long)now_high<<32)|now_low;
-
- /* convert to nanoseconds */
- ret = base + ((this_offset - last_offset)&CYCLONE_TIMER_MASK);
- return ret * (1000000000 / CYCLONE_TIMER_FREQ);
-}
-
-static int __init init_cyclone(char* override)
-{
- u32* reg;
- u32 base; /* saved cyclone base address */
- u32 pageaddr; /* page that contains cyclone_timer register */
- u32 offset; /* offset from pageaddr to cyclone_timer register */
- int i;
-
- /* check clock override */
- if (override[0] && strncmp(override,"cyclone",7))
- return -ENODEV;
-
- /*make sure we're on a summit box*/
- if(!use_cyclone) return -ENODEV;
-
- printk(KERN_INFO "Summit chipset: Starting Cyclone Counter.\n");
-
- /* find base address */
- pageaddr = (CYCLONE_CBAR_ADDR)&PAGE_MASK;
- offset = (CYCLONE_CBAR_ADDR)&(~PAGE_MASK);
- set_fixmap_nocache(FIX_CYCLONE_TIMER, pageaddr);
- reg = (u32*)(fix_to_virt(FIX_CYCLONE_TIMER) + offset);
- if(!reg){
- printk(KERN_ERR "Summit chipset: Could not find valid CBAR register.\n");
- return -ENODEV;
- }
- base = *reg;
- if(!base){
- printk(KERN_ERR "Summit chipset: Could not find valid CBAR value.\n");
- return -ENODEV;
- }
-
- /* setup PMCC */
- pageaddr = (base + CYCLONE_PMCC_OFFSET)&PAGE_MASK;
- offset = (base + CYCLONE_PMCC_OFFSET)&(~PAGE_MASK);
- set_fixmap_nocache(FIX_CYCLONE_TIMER, pageaddr);
- reg = (u32*)(fix_to_virt(FIX_CYCLONE_TIMER) + offset);
- if(!reg){
- printk(KERN_ERR "Summit chipset: Could not find valid PMCC register.\n");
- return -ENODEV;
- }
- reg[0] = 0x00000001;
-
- /* setup MPCS */
- pageaddr = (base + CYCLONE_MPCS_OFFSET)&PAGE_MASK;
- offset = (base + CYCLONE_MPCS_OFFSET)&(~PAGE_MASK);
- set_fixmap_nocache(FIX_CYCLONE_TIMER, pageaddr);
- reg = (u32*)(fix_to_virt(FIX_CYCLONE_TIMER) + offset);
- if(!reg){
- printk(KERN_ERR "Summit chipset: Could not find valid MPCS register.\n");
- return -ENODEV;
- }
- reg[0] = 0x00000001;
-
- /* map in cyclone_timer */
- pageaddr = (base + CYCLONE_MPMC_OFFSET)&PAGE_MASK;
- offset = (base + CYCLONE_MPMC_OFFSET)&(~PAGE_MASK);
- set_fixmap_nocache(FIX_CYCLONE_TIMER, pageaddr);
- cyclone_timer = (u32*)(fix_to_virt(FIX_CYCLONE_TIMER) + offset);
- if(!cyclone_timer){
- printk(KERN_ERR "Summit chipset: Could not find valid MPMC register.\n");
- return -ENODEV;
- }
-
- /*quick test to make sure its ticking*/
- for(i=0; i<3; i++){
- u32 old = cyclone_timer[0];
- int stall = 100;
- while(stall--) barrier();
- if(cyclone_timer[0] == old){
- printk(KERN_ERR "Summit chipset: Counter not counting! DISABLED\n");
- cyclone_timer = 0;
- return -ENODEV;
- }
- }
-
- init_cpu_khz();
-
- /* Everything looks good! */
- return 0;
-}
-
-
-static void delay_cyclone(unsigned long loops)
-{
- unsigned long bclock, now;
- if(!cyclone_timer)
- return;
- bclock = cyclone_timer[0];
- do {
- rep_nop();
- now = cyclone_timer[0];
- } while ((now-bclock) < loops);
-}
-/************************************************************/
-
-/* cyclone timer_opts struct */
-static struct timer_opts timer_cyclone = {
- .name = "cyclone",
- .mark_offset = mark_offset_cyclone,
- .get_offset = get_offset_cyclone,
- .monotonic_clock = monotonic_clock_cyclone,
- .delay = delay_cyclone,
-};
-
-struct init_timer_opts __initdata timer_cyclone_init = {
- .init = init_cyclone,
- .opts = &timer_cyclone,
-};
diff --git a/arch/i386/kernel/timers/timer_hpet.c b/arch/i386/kernel/timers/timer_hpet.c
deleted file mode 100644
index 17a6fe7..0000000
--- a/arch/i386/kernel/timers/timer_hpet.c
+++ /dev/null
@@ -1,217 +0,0 @@
-/*
- * This code largely moved from arch/i386/kernel/time.c.
- * See comments there for proper credits.
- */
-
-#include <linux/spinlock.h>
-#include <linux/init.h>
-#include <linux/timex.h>
-#include <linux/errno.h>
-#include <linux/string.h>
-#include <linux/jiffies.h>
-
-#include <asm/timer.h>
-#include <asm/io.h>
-#include <asm/processor.h>
-
-#include "io_ports.h"
-#include "mach_timer.h"
-#include <asm/hpet.h>
-
-static unsigned long hpet_usec_quotient __read_mostly; /* convert hpet clks to usec */
-static unsigned long tsc_hpet_quotient __read_mostly; /* convert tsc to hpet clks */
-static unsigned long hpet_last; /* hpet counter value at last tick*/
-static unsigned long last_tsc_low; /* lsb 32 bits of Time Stamp Counter */
-static unsigned long last_tsc_high; /* msb 32 bits of Time Stamp Counter */
-static unsigned long long monotonic_base;
-static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED;
-
-/* convert from cycles(64bits) => nanoseconds (64bits)
- * basic equation:
- * ns = cycles / (freq / ns_per_sec)
- * ns = cycles * (ns_per_sec / freq)
- * ns = cycles * (10^9 / (cpu_khz * 10^3))
- * ns = cycles * (10^6 / cpu_khz)
- *
- * Then we use scaling math (suggested by george@mvista.com) to get:
- * ns = cycles * (10^6 * SC / cpu_khz) / SC
- * ns = cycles * cyc2ns_scale / SC
- *
- * And since SC is a constant power of two, we can convert the div
- * into a shift.
- *
- * We can use khz divisor instead of mhz to keep a better percision, since
- * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits.
- * (mathieu.desnoyers@polymtl.ca)
- *
- * -johnstul@us.ibm.com "math is hard, lets go shopping!"
- */
-static unsigned long cyc2ns_scale __read_mostly;
-#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
-
-static inline void set_cyc2ns_scale(unsigned long cpu_khz)
-{
- cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR)/cpu_khz;
-}
-
-static inline unsigned long long cycles_2_ns(unsigned long long cyc)
-{
- return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR;
-}
-
-static unsigned long long monotonic_clock_hpet(void)
-{
- unsigned long long last_offset, this_offset, base;
- unsigned seq;
-
- /* atomically read monotonic base & last_offset */
- do {
- seq = read_seqbegin(&monotonic_lock);
- last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
- base = monotonic_base;
- } while (read_seqretry(&monotonic_lock, seq));
-
- /* Read the Time Stamp Counter */
- rdtscll(this_offset);
-
- /* return the value in ns */
- return base + cycles_2_ns(this_offset - last_offset);
-}
-
-static unsigned long get_offset_hpet(void)
-{
- register unsigned long eax, edx;
-
- eax = hpet_readl(HPET_COUNTER);
- eax -= hpet_last; /* hpet delta */
- eax = min(hpet_tick, eax);
- /*
- * Time offset = (hpet delta) * ( usecs per HPET clock )
- * = (hpet delta) * ( usecs per tick / HPET clocks per tick)
- * = (hpet delta) * ( hpet_usec_quotient ) / (2^32)
- *
- * Where,
- * hpet_usec_quotient = (2^32 * usecs per tick)/HPET clocks per tick
- *
- * Using a mull instead of a divl saves some cycles in critical path.
- */
- ASM_MUL64_REG(eax, edx, hpet_usec_quotient, eax);
-
- /* our adjusted time offset in microseconds */
- return edx;
-}
-
-static void mark_offset_hpet(void)
-{
- unsigned long long this_offset, last_offset;
- unsigned long offset;
-
- write_seqlock(&monotonic_lock);
- last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
- rdtsc(last_tsc_low, last_tsc_high);
-
- if (hpet_use_timer)
- offset = hpet_readl(HPET_T0_CMP) - hpet_tick;
- else
- offset = hpet_readl(HPET_COUNTER);
- if (unlikely(((offset - hpet_last) >= (2*hpet_tick)) && (hpet_last != 0))) {
- int lost_ticks = ((offset - hpet_last) / hpet_tick) - 1;
- jiffies_64 += lost_ticks;
- }
- hpet_last = offset;
-
- /* update the monotonic base value */
- this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
- monotonic_base += cycles_2_ns(this_offset - last_offset);
- write_sequnlock(&monotonic_lock);
-}
-
-static void delay_hpet(unsigned long loops)
-{
- unsigned long hpet_start, hpet_end;
- unsigned long eax;
-
- /* loops is the number of cpu cycles. Convert it to hpet clocks */
- ASM_MUL64_REG(eax, loops, tsc_hpet_quotient, loops);
-
- hpet_start = hpet_readl(HPET_COUNTER);
- do {
- rep_nop();
- hpet_end = hpet_readl(HPET_COUNTER);
- } while ((hpet_end - hpet_start) < (loops));
-}
-
-static struct timer_opts timer_hpet;
-
-static int __init init_hpet(char* override)
-{
- unsigned long result, remain;
-
- /* check clock override */
- if (override[0] && strncmp(override,"hpet",4))
- return -ENODEV;
-
- if (!is_hpet_enabled())
- return -ENODEV;
-
- printk("Using HPET for gettimeofday\n");
- if (cpu_has_tsc) {
- unsigned long tsc_quotient = calibrate_tsc_hpet(&tsc_hpet_quotient);
- if (tsc_quotient) {
- /* report CPU clock rate in Hz.
- * The formula is (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) =
- * clock/second. Our precision is about 100 ppm.
- */
- { unsigned long eax=0, edx=1000;
- ASM_DIV64_REG(cpu_khz, edx, tsc_quotient,
- eax, edx);
- printk("Detected %u.%03u MHz processor.\n",
- cpu_khz / 1000, cpu_khz % 1000);
- }
- set_cyc2ns_scale(cpu_khz);
- }
- /* set this only when cpu_has_tsc */
- timer_hpet.read_timer = read_timer_tsc;
- }
-
- /*
- * Math to calculate hpet to usec multiplier
- * Look for the comments at get_offset_hpet()
- */
- ASM_DIV64_REG(result, remain, hpet_tick, 0, KERNEL_TICK_USEC);
- if (remain > (hpet_tick >> 1))
- result++; /* rounding the result */
- hpet_usec_quotient = result;
-
- return 0;
-}
-
-static int hpet_resume(void)
-{
- write_seqlock(&monotonic_lock);
- /* Assume this is the last mark offset time */
- rdtsc(last_tsc_low, last_tsc_high);
-
- if (hpet_use_timer)
- hpet_last = hpet_readl(HPET_T0_CMP) - hpet_tick;
- else
- hpet_last = hpet_readl(HPET_COUNTER);
- write_sequnlock(&monotonic_lock);
- return 0;
-}
-/************************************************************/
-
-/* tsc timer_opts struct */
-static struct timer_opts timer_hpet __read_mostly = {
- .name = "hpet",
- .mark_offset = mark_offset_hpet,
- .get_offset = get_offset_hpet,
- .monotonic_clock = monotonic_clock_hpet,
- .delay = delay_hpet,
- .resume = hpet_resume,
-};
-
-struct init_timer_opts __initdata timer_hpet_init = {
- .init = init_hpet,
- .opts = &timer_hpet,
-};
diff --git a/arch/i386/kernel/timers/timer_none.c b/arch/i386/kernel/timers/timer_none.c
deleted file mode 100644
index 4ea2f41..0000000
--- a/arch/i386/kernel/timers/timer_none.c
+++ /dev/null
@@ -1,39 +0,0 @@
-#include <linux/init.h>
-#include <asm/timer.h>
-
-static void mark_offset_none(void)
-{
- /* nothing needed */
-}
-
-static unsigned long get_offset_none(void)
-{
- return 0;
-}
-
-static unsigned long long monotonic_clock_none(void)
-{
- return 0;
-}
-
-static void delay_none(unsigned long loops)
-{
- int d0;
- __asm__ __volatile__(
- "\tjmp 1f\n"
- ".align 16\n"
- "1:\tjmp 2f\n"
- ".align 16\n"
- "2:\tdecl %0\n\tjns 2b"
- :"=&a" (d0)
- :"0" (loops));
-}
-
-/* none timer_opts struct */
-struct timer_opts timer_none = {
- .name = "none",
- .mark_offset = mark_offset_none,
- .get_offset = get_offset_none,
- .monotonic_clock = monotonic_clock_none,
- .delay = delay_none,
-};
diff --git a/arch/i386/kernel/timers/timer_pit.c b/arch/i386/kernel/timers/timer_pit.c
deleted file mode 100644
index b9b6bd5..0000000
--- a/arch/i386/kernel/timers/timer_pit.c
+++ /dev/null
@@ -1,177 +0,0 @@
-/*
- * This code largely moved from arch/i386/kernel/time.c.
- * See comments there for proper credits.
- */
-
-#include <linux/spinlock.h>
-#include <linux/module.h>
-#include <linux/device.h>
-#include <linux/sysdev.h>
-#include <linux/timex.h>
-#include <asm/delay.h>
-#include <asm/mpspec.h>
-#include <asm/timer.h>
-#include <asm/smp.h>
-#include <asm/io.h>
-#include <asm/arch_hooks.h>
-#include <asm/i8253.h>
-
-#include "do_timer.h"
-#include "io_ports.h"
-
-static int count_p; /* counter in get_offset_pit() */
-
-static int __init init_pit(char* override)
-{
- /* check clock override */
- if (override[0] && strncmp(override,"pit",3))
- printk(KERN_ERR "Warning: clock= override failed. Defaulting "
- "to PIT\n");
- init_cpu_khz();
- count_p = LATCH;
- return 0;
-}
-
-static void mark_offset_pit(void)
-{
- /* nothing needed */
-}
-
-static unsigned long long monotonic_clock_pit(void)
-{
- return 0;
-}
-
-static void delay_pit(unsigned long loops)
-{
- int d0;
- __asm__ __volatile__(
- "\tjmp 1f\n"
- ".align 16\n"
- "1:\tjmp 2f\n"
- ".align 16\n"
- "2:\tdecl %0\n\tjns 2b"
- :"=&a" (d0)
- :"0" (loops));
-}
-
-
-/* This function must be called with xtime_lock held.
- * It was inspired by Steve McCanne's microtime-i386 for BSD. -- jrs
- *
- * However, the pc-audio speaker driver changes the divisor so that
- * it gets interrupted rather more often - it loads 64 into the
- * counter rather than 11932! This has an adverse impact on
- * do_gettimeoffset() -- it stops working! What is also not
- * good is that the interval that our timer function gets called
- * is no longer 10.0002 ms, but 9.9767 ms. To get around this
- * would require using a different timing source. Maybe someone
- * could use the RTC - I know that this can interrupt at frequencies
- * ranging from 8192Hz to 2Hz. If I had the energy, I'd somehow fix
- * it so that at startup, the timer code in sched.c would select
- * using either the RTC or the 8253 timer. The decision would be
- * based on whether there was any other device around that needed
- * to trample on the 8253. I'd set up the RTC to interrupt at 1024 Hz,
- * and then do some jiggery to have a version of do_timer that
- * advanced the clock by 1/1024 s. Every time that reached over 1/100
- * of a second, then do all the old code. If the time was kept correct
- * then do_gettimeoffset could just return 0 - there is no low order
- * divider that can be accessed.
- *
- * Ideally, you would be able to use the RTC for the speaker driver,
- * but it appears that the speaker driver really needs interrupt more
- * often than every 120 us or so.
- *
- * Anyway, this needs more thought.... pjsg (1993-08-28)
- *
- * If you are really that interested, you should be reading
- * comp.protocols.time.ntp!
- */
-
-static unsigned long get_offset_pit(void)
-{
- int count;
- unsigned long flags;
- static unsigned long jiffies_p = 0;
-
- /*
- * cache volatile jiffies temporarily; we have xtime_lock.
- */
- unsigned long jiffies_t;
-
- spin_lock_irqsave(&i8253_lock, flags);
- /* timer count may underflow right here */
- outb_p(0x00, PIT_MODE); /* latch the count ASAP */
-
- count = inb_p(PIT_CH0); /* read the latched count */
-
- /*
- * We do this guaranteed double memory access instead of a _p
- * postfix in the previous port access. Wheee, hackady hack
- */
- jiffies_t = jiffies;
-
- count |= inb_p(PIT_CH0) << 8;
-
- /* VIA686a test code... reset the latch if count > max + 1 */
- if (count > LATCH) {
- outb_p(0x34, PIT_MODE);
- outb_p(LATCH & 0xff, PIT_CH0);
- outb(LATCH >> 8, PIT_CH0);
- count = LATCH - 1;
- }
-
- /*
- * avoiding timer inconsistencies (they are rare, but they happen)...
- * there are two kinds of problems that must be avoided here:
- * 1. the timer counter underflows
- * 2. hardware problem with the timer, not giving us continuous time,
- * the counter does small "jumps" upwards on some Pentium systems,
- * (see c't 95/10 page 335 for Neptun bug.)
- */
-
- if( jiffies_t == jiffies_p ) {
- if( count > count_p ) {
- /* the nutcase */
- count = do_timer_overflow(count);
- }
- } else
- jiffies_p = jiffies_t;
-
- count_p = count;
-
- spin_unlock_irqrestore(&i8253_lock, flags);
-
- count = ((LATCH-1) - count) * TICK_SIZE;
- count = (count + LATCH/2) / LATCH;
-
- return count;
-}
-
-
-/* tsc timer_opts struct */
-struct timer_opts timer_pit = {
- .name = "pit",
- .mark_offset = mark_offset_pit,
- .get_offset = get_offset_pit,
- .monotonic_clock = monotonic_clock_pit,
- .delay = delay_pit,
-};
-
-struct init_timer_opts __initdata timer_pit_init = {
- .init = init_pit,
- .opts = &timer_pit,
-};
-
-void setup_pit_timer(void)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&i8253_lock, flags);
- outb_p(0x34,PIT_MODE); /* binary, mode 2, LSB/MSB, ch 0 */
- udelay(10);
- outb_p(LATCH & 0xff , PIT_CH0); /* LSB */
- udelay(10);
- outb(LATCH >> 8 , PIT_CH0); /* MSB */
- spin_unlock_irqrestore(&i8253_lock, flags);
-}
diff --git a/arch/i386/kernel/timers/timer_pm.c b/arch/i386/kernel/timers/timer_pm.c
deleted file mode 100644
index 144e94a..0000000
--- a/arch/i386/kernel/timers/timer_pm.c
+++ /dev/null
@@ -1,342 +0,0 @@
-/*
- * (C) Dominik Brodowski <linux@brodo.de> 2003
- *
- * Driver to use the Power Management Timer (PMTMR) available in some
- * southbridges as primary timing source for the Linux kernel.
- *
- * Based on parts of linux/drivers/acpi/hardware/hwtimer.c, timer_pit.c,
- * timer_hpet.c, and on Arjan van de Ven's implementation for 2.4.
- *
- * This file is licensed under the GPL v2.
- */
-
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/device.h>
-#include <linux/init.h>
-#include <linux/pci.h>
-#include <asm/types.h>
-#include <asm/timer.h>
-#include <asm/smp.h>
-#include <asm/io.h>
-#include <asm/arch_hooks.h>
-
-#include <linux/timex.h>
-#include "mach_timer.h"
-
-/* Number of PMTMR ticks expected during calibration run */
-#define PMTMR_TICKS_PER_SEC 3579545
-#define PMTMR_EXPECTED_RATE \
- ((CALIBRATE_LATCH * (PMTMR_TICKS_PER_SEC >> 10)) / (CLOCK_TICK_RATE>>10))
-
-
-/* The I/O port the PMTMR resides at.
- * The location is detected during setup_arch(),
- * in arch/i386/acpi/boot.c */
-u32 pmtmr_ioport = 0;
-
-
-/* value of the Power timer at last timer interrupt */
-static u32 offset_tick;
-static u32 offset_delay;
-
-static unsigned long long monotonic_base;
-static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED;
-
-#define ACPI_PM_MASK 0xFFFFFF /* limit it to 24 bits */
-
-static int pmtmr_need_workaround __read_mostly = 1;
-
-/*helper function to safely read acpi pm timesource*/
-static inline u32 read_pmtmr(void)
-{
- if (pmtmr_need_workaround) {
- u32 v1, v2, v3;
-
- /* It has been reported that because of various broken
- * chipsets (ICH4, PIIX4 and PIIX4E) where the ACPI PM time
- * source is not latched, so you must read it multiple
- * times to insure a safe value is read.
- */
- do {
- v1 = inl(pmtmr_ioport);
- v2 = inl(pmtmr_ioport);
- v3 = inl(pmtmr_ioport);
- } while ((v1 > v2 && v1 < v3) || (v2 > v3 && v2 < v1)
- || (v3 > v1 && v3 < v2));
-
- /* mask the output to 24 bits */
- return v2 & ACPI_PM_MASK;
- }
-
- return inl(pmtmr_ioport) & ACPI_PM_MASK;
-}
-
-
-/*
- * Some boards have the PMTMR running way too fast. We check
- * the PMTMR rate against PIT channel 2 to catch these cases.
- */
-static int verify_pmtmr_rate(void)
-{
- u32 value1, value2;
- unsigned long count, delta;
-
- mach_prepare_counter();
- value1 = read_pmtmr();
- mach_countup(&count);
- value2 = read_pmtmr();
- delta = (value2 - value1) & ACPI_PM_MASK;
-
- /* Check that the PMTMR delta is within 5% of what we expect */
- if (delta < (PMTMR_EXPECTED_RATE * 19) / 20 ||
- delta > (PMTMR_EXPECTED_RATE * 21) / 20) {
- printk(KERN_INFO "PM-Timer running at invalid rate: %lu%% of normal - aborting.\n", 100UL * delta / PMTMR_EXPECTED_RATE);
- return -1;
- }
-
- return 0;
-}
-
-
-static int init_pmtmr(char* override)
-{
- u32 value1, value2;
- unsigned int i;
-
- if (override[0] && strncmp(override,"pmtmr",5))
- return -ENODEV;
-
- if (!pmtmr_ioport)
- return -ENODEV;
-
- /* we use the TSC for delay_pmtmr, so make sure it exists */
- if (!cpu_has_tsc)
- return -ENODEV;
-
- /* "verify" this timing source */
- value1 = read_pmtmr();
- for (i = 0; i < 10000; i++) {
- value2 = read_pmtmr();
- if (value2 == value1)
- continue;
- if (value2 > value1)
- goto pm_good;
- if ((value2 < value1) && ((value2) < 0xFFF))
- goto pm_good;
- printk(KERN_INFO "PM-Timer had inconsistent results: 0x%#x, 0x%#x - aborting.\n", value1, value2);
- return -EINVAL;
- }
- printk(KERN_INFO "PM-Timer had no reasonable result: 0x%#x - aborting.\n", value1);
- return -ENODEV;
-
-pm_good:
- if (verify_pmtmr_rate() != 0)
- return -ENODEV;
-
- init_cpu_khz();
- return 0;
-}
-
-static inline u32 cyc2us(u32 cycles)
-{
- /* The Power Management Timer ticks at 3.579545 ticks per microsecond.
- * 1 / PM_TIMER_FREQUENCY == 0.27936511 =~ 286/1024 [error: 0.024%]
- *
- * Even with HZ = 100, delta is at maximum 35796 ticks, so it can
- * easily be multiplied with 286 (=0x11E) without having to fear
- * u32 overflows.
- */
- cycles *= 286;
- return (cycles >> 10);
-}
-
-/*
- * this gets called during each timer interrupt
- * - Called while holding the writer xtime_lock
- */
-static void mark_offset_pmtmr(void)
-{
- u32 lost, delta, last_offset;
- static int first_run = 1;
- last_offset = offset_tick;
-
- write_seqlock(&monotonic_lock);
-
- offset_tick = read_pmtmr();
-
- /* calculate tick interval */
- delta = (offset_tick - last_offset) & ACPI_PM_MASK;
-
- /* convert to usecs */
- delta = cyc2us(delta);
-
- /* update the monotonic base value */
- monotonic_base += delta * NSEC_PER_USEC;
- write_sequnlock(&monotonic_lock);
-
- /* convert to ticks */
- delta += offset_delay;
- lost = delta / (USEC_PER_SEC / HZ);
- offset_delay = delta % (USEC_PER_SEC / HZ);
-
-
- /* compensate for lost ticks */
- if (lost >= 2)
- jiffies_64 += lost - 1;
-
- /* don't calculate delay for first run,
- or if we've got less then a tick */
- if (first_run || (lost < 1)) {
- first_run = 0;
- offset_delay = 0;
- }
-}
-
-static int pmtmr_resume(void)
-{
- write_seqlock(&monotonic_lock);
- /* Assume this is the last mark offset time */
- offset_tick = read_pmtmr();
- write_sequnlock(&monotonic_lock);
- return 0;
-}
-
-static unsigned long long monotonic_clock_pmtmr(void)
-{
- u32 last_offset, this_offset;
- unsigned long long base, ret;
- unsigned seq;
-
-
- /* atomically read monotonic base & last_offset */
- do {
- seq = read_seqbegin(&monotonic_lock);
- last_offset = offset_tick;
- base = monotonic_base;
- } while (read_seqretry(&monotonic_lock, seq));
-
- /* Read the pmtmr */
- this_offset = read_pmtmr();
-
- /* convert to nanoseconds */
- ret = (this_offset - last_offset) & ACPI_PM_MASK;
- ret = base + (cyc2us(ret) * NSEC_PER_USEC);
- return ret;
-}
-
-static void delay_pmtmr(unsigned long loops)
-{
- unsigned long bclock, now;
-
- rdtscl(bclock);
- do
- {
- rep_nop();
- rdtscl(now);
- } while ((now-bclock) < loops);
-}
-
-
-/*
- * get the offset (in microseconds) from the last call to mark_offset()
- * - Called holding a reader xtime_lock
- */
-static unsigned long get_offset_pmtmr(void)
-{
- u32 now, offset, delta = 0;
-
- offset = offset_tick;
- now = read_pmtmr();
- delta = (now - offset)&ACPI_PM_MASK;
-
- return (unsigned long) offset_delay + cyc2us(delta);
-}
-
-
-/* acpi timer_opts struct */
-static struct timer_opts timer_pmtmr = {
- .name = "pmtmr",
- .mark_offset = mark_offset_pmtmr,
- .get_offset = get_offset_pmtmr,
- .monotonic_clock = monotonic_clock_pmtmr,
- .delay = delay_pmtmr,
- .read_timer = read_timer_tsc,
- .resume = pmtmr_resume,
-};
-
-struct init_timer_opts __initdata timer_pmtmr_init = {
- .init = init_pmtmr,
- .opts = &timer_pmtmr,
-};
-
-#ifdef CONFIG_PCI
-/*
- * PIIX4 Errata:
- *
- * The power management timer may return improper results when read.
- * Although the timer value settles properly after incrementing,
- * while incrementing there is a 3 ns window every 69.8 ns where the
- * timer value is indeterminate (a 4.2% chance that the data will be
- * incorrect when read). As a result, the ACPI free running count up
- * timer specification is violated due to erroneous reads.
- */
-static int __init pmtmr_bug_check(void)
-{
- static struct pci_device_id gray_list[] __initdata = {
- /* these chipsets may have bug. */
- { PCI_DEVICE(PCI_VENDOR_ID_INTEL,
- PCI_DEVICE_ID_INTEL_82801DB_0) },
- { },
- };
- struct pci_dev *dev;
- int pmtmr_has_bug = 0;
- u8 rev;
-
- if (cur_timer != &timer_pmtmr || !pmtmr_need_workaround)
- return 0;
-
- dev = pci_get_device(PCI_VENDOR_ID_INTEL,
- PCI_DEVICE_ID_INTEL_82371AB_3, NULL);
- if (dev) {
- pci_read_config_byte(dev, PCI_REVISION_ID, &rev);
- /* the bug has been fixed in PIIX4M */
- if (rev < 3) {
- printk(KERN_WARNING "* Found PM-Timer Bug on this "
- "chipset. Due to workarounds for a bug,\n"
- "* this time source is slow. Consider trying "
- "other time sources (clock=)\n");
- pmtmr_has_bug = 1;
- }
- pci_dev_put(dev);
- }
-
- if (pci_dev_present(gray_list)) {
- printk(KERN_WARNING "* This chipset may have PM-Timer Bug. Due"
- " to workarounds for a bug,\n"
- "* this time source is slow. If you are sure your timer"
- " does not have\n"
- "* this bug, please use \"pmtmr_good\" to disable the "
- "workaround\n");
- pmtmr_has_bug = 1;
- }
-
- if (!pmtmr_has_bug)
- pmtmr_need_workaround = 0;
-
- return 0;
-}
-device_initcall(pmtmr_bug_check);
-#endif
-
-static int __init pmtr_good_setup(char *__str)
-{
- pmtmr_need_workaround = 0;
- return 1;
-}
-__setup("pmtmr_good", pmtr_good_setup);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Dominik Brodowski <linux@brodo.de>");
-MODULE_DESCRIPTION("Power Management Timer (PMTMR) as primary timing source for x86");
diff --git a/arch/i386/kernel/timers/timer_tsc.c b/arch/i386/kernel/timers/timer_tsc.c
deleted file mode 100644
index f1187dd..0000000
--- a/arch/i386/kernel/timers/timer_tsc.c
+++ /dev/null
@@ -1,617 +0,0 @@
-/*
- * This code largely moved from arch/i386/kernel/time.c.
- * See comments there for proper credits.
- *
- * 2004-06-25 Jesper Juhl
- * moved mark_offset_tsc below cpufreq_delayed_get to avoid gcc 3.4
- * failing to inline.
- */
-
-#include <linux/spinlock.h>
-#include <linux/init.h>
-#include <linux/timex.h>
-#include <linux/errno.h>
-#include <linux/cpufreq.h>
-#include <linux/string.h>
-#include <linux/jiffies.h>
-
-#include <asm/timer.h>
-#include <asm/io.h>
-/* processor.h for distable_tsc flag */
-#include <asm/processor.h>
-
-#include "io_ports.h"
-#include "mach_timer.h"
-
-#include <asm/hpet.h>
-#include <asm/i8253.h>
-
-#ifdef CONFIG_HPET_TIMER
-static unsigned long hpet_usec_quotient;
-static unsigned long hpet_last;
-static struct timer_opts timer_tsc;
-#endif
-
-static inline void cpufreq_delayed_get(void);
-
-int tsc_disable __devinitdata = 0;
-
-static int use_tsc;
-/* Number of usecs that the last interrupt was delayed */
-static int delay_at_last_interrupt;
-
-static unsigned long last_tsc_low; /* lsb 32 bits of Time Stamp Counter */
-static unsigned long last_tsc_high; /* msb 32 bits of Time Stamp Counter */
-static unsigned long long monotonic_base;
-static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED;
-
-/* Avoid compensating for lost ticks before TSCs are synched */
-static int detect_lost_ticks;
-static int __init start_lost_tick_compensation(void)
-{
- detect_lost_ticks = 1;
- return 0;
-}
-late_initcall(start_lost_tick_compensation);
-
-/* convert from cycles(64bits) => nanoseconds (64bits)
- * basic equation:
- * ns = cycles / (freq / ns_per_sec)
- * ns = cycles * (ns_per_sec / freq)
- * ns = cycles * (10^9 / (cpu_khz * 10^3))
- * ns = cycles * (10^6 / cpu_khz)
- *
- * Then we use scaling math (suggested by george@mvista.com) to get:
- * ns = cycles * (10^6 * SC / cpu_khz) / SC
- * ns = cycles * cyc2ns_scale / SC
- *
- * And since SC is a constant power of two, we can convert the div
- * into a shift.
- *
- * We can use khz divisor instead of mhz to keep a better percision, since
- * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits.
- * (mathieu.desnoyers@polymtl.ca)
- *
- * -johnstul@us.ibm.com "math is hard, lets go shopping!"
- */
-static unsigned long cyc2ns_scale __read_mostly;
-#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
-
-static inline void set_cyc2ns_scale(unsigned long cpu_khz)
-{
- cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR)/cpu_khz;
-}
-
-static inline unsigned long long cycles_2_ns(unsigned long long cyc)
-{
- return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR;
-}
-
-static int count2; /* counter for mark_offset_tsc() */
-
-/* Cached *multiplier* to convert TSC counts to microseconds.
- * (see the equation below).
- * Equal to 2^32 * (1 / (clocks per usec) ).
- * Initialized in time_init.
- */
-static unsigned long fast_gettimeoffset_quotient;
-
-static unsigned long get_offset_tsc(void)
-{
- register unsigned long eax, edx;
-
- /* Read the Time Stamp Counter */
-
- rdtsc(eax,edx);
-
- /* .. relative to previous jiffy (32 bits is enough) */
- eax -= last_tsc_low; /* tsc_low delta */
-
- /*
- * Time offset = (tsc_low delta) * fast_gettimeoffset_quotient
- * = (tsc_low delta) * (usecs_per_clock)
- * = (tsc_low delta) * (usecs_per_jiffy / clocks_per_jiffy)
- *
- * Using a mull instead of a divl saves up to 31 clock cycles
- * in the critical path.
- */
-
- __asm__("mull %2"
- :"=a" (eax), "=d" (edx)
- :"rm" (fast_gettimeoffset_quotient),
- "0" (eax));
-
- /* our adjusted time offset in microseconds */
- return delay_at_last_interrupt + edx;
-}
-
-static unsigned long long monotonic_clock_tsc(void)
-{
- unsigned long long last_offset, this_offset, base;
- unsigned seq;
-
- /* atomically read monotonic base & last_offset */
- do {
- seq = read_seqbegin(&monotonic_lock);
- last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
- base = monotonic_base;
- } while (read_seqretry(&monotonic_lock, seq));
-
- /* Read the Time Stamp Counter */
- rdtscll(this_offset);
-
- /* return the value in ns */
- return base + cycles_2_ns(this_offset - last_offset);
-}
-
-/*
- * Scheduler clock - returns current time in nanosec units.
- */
-unsigned long long sched_clock(void)
-{
- unsigned long long this_offset;
-
- /*
- * In the NUMA case we dont use the TSC as they are not
- * synchronized across all CPUs.
- */
-#ifndef CONFIG_NUMA
- if (!use_tsc)
-#endif
- /* no locking but a rare wrong value is not a big deal */
- return jiffies_64 * (1000000000 / HZ);
-
- /* Read the Time Stamp Counter */
- rdtscll(this_offset);
-
- /* return the value in ns */
- return cycles_2_ns(this_offset);
-}
-
-static void delay_tsc(unsigned long loops)
-{
- unsigned long bclock, now;
-
- rdtscl(bclock);
- do
- {
- rep_nop();
- rdtscl(now);
- } while ((now-bclock) < loops);
-}
-
-#ifdef CONFIG_HPET_TIMER
-static void mark_offset_tsc_hpet(void)
-{
- unsigned long long this_offset, last_offset;
- unsigned long offset, temp, hpet_current;
-
- write_seqlock(&monotonic_lock);
- last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
- /*
- * It is important that these two operations happen almost at
- * the same time. We do the RDTSC stuff first, since it's
- * faster. To avoid any inconsistencies, we need interrupts
- * disabled locally.
- */
- /*
- * Interrupts are just disabled locally since the timer irq
- * has the SA_INTERRUPT flag set. -arca
- */
- /* read Pentium cycle counter */
-
- hpet_current = hpet_readl(HPET_COUNTER);
- rdtsc(last_tsc_low, last_tsc_high);
-
- /* lost tick compensation */
- offset = hpet_readl(HPET_T0_CMP) - hpet_tick;
- if (unlikely(((offset - hpet_last) > hpet_tick) && (hpet_last != 0))
- && detect_lost_ticks) {
- int lost_ticks = (offset - hpet_last) / hpet_tick;
- jiffies_64 += lost_ticks;
- }
- hpet_last = hpet_current;
-
- /* update the monotonic base value */
- this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
- monotonic_base += cycles_2_ns(this_offset - last_offset);
- write_sequnlock(&monotonic_lock);
-
- /* calculate delay_at_last_interrupt */
- /*
- * Time offset = (hpet delta) * ( usecs per HPET clock )
- * = (hpet delta) * ( usecs per tick / HPET clocks per tick)
- * = (hpet delta) * ( hpet_usec_quotient ) / (2^32)
- * Where,
- * hpet_usec_quotient = (2^32 * usecs per tick)/HPET clocks per tick
- */
- delay_at_last_interrupt = hpet_current - offset;
- ASM_MUL64_REG(temp, delay_at_last_interrupt,
- hpet_usec_quotient, delay_at_last_interrupt);
-}
-#endif
-
-
-#ifdef CONFIG_CPU_FREQ
-#include <linux/workqueue.h>
-
-static unsigned int cpufreq_delayed_issched = 0;
-static unsigned int cpufreq_init = 0;
-static struct work_struct cpufreq_delayed_get_work;
-
-static void handle_cpufreq_delayed_get(void *v)
-{
- unsigned int cpu;
- for_each_online_cpu(cpu) {
- cpufreq_get(cpu);
- }
- cpufreq_delayed_issched = 0;
-}
-
-/* if we notice lost ticks, schedule a call to cpufreq_get() as it tries
- * to verify the CPU frequency the timing core thinks the CPU is running
- * at is still correct.
- */
-static inline void cpufreq_delayed_get(void)
-{
- if (cpufreq_init && !cpufreq_delayed_issched) {
- cpufreq_delayed_issched = 1;
- printk(KERN_DEBUG "Losing some ticks... checking if CPU frequency changed.\n");
- schedule_work(&cpufreq_delayed_get_work);
- }
-}
-
-/* If the CPU frequency is scaled, TSC-based delays will need a different
- * loops_per_jiffy value to function properly.
- */
-
-static unsigned int ref_freq = 0;
-static unsigned long loops_per_jiffy_ref = 0;
-
-#ifndef CONFIG_SMP
-static unsigned long fast_gettimeoffset_ref = 0;
-static unsigned int cpu_khz_ref = 0;
-#endif
-
-static int
-time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
- void *data)
-{
- struct cpufreq_freqs *freq = data;
-
- if (val != CPUFREQ_RESUMECHANGE && val != CPUFREQ_SUSPENDCHANGE)
- write_seqlock_irq(&xtime_lock);
- if (!ref_freq) {
- if (!freq->old){
- ref_freq = freq->new;
- goto end;
- }
- ref_freq = freq->old;
- loops_per_jiffy_ref = cpu_data[freq->cpu].loops_per_jiffy;
-#ifndef CONFIG_SMP
- fast_gettimeoffset_ref = fast_gettimeoffset_quotient;
- cpu_khz_ref = cpu_khz;
-#endif
- }
-
- if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) ||
- (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) ||
- (val == CPUFREQ_RESUMECHANGE)) {
- if (!(freq->flags & CPUFREQ_CONST_LOOPS))
- cpu_data[freq->cpu].loops_per_jiffy = cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new);
-#ifndef CONFIG_SMP
- if (cpu_khz)
- cpu_khz = cpufreq_scale(cpu_khz_ref, ref_freq, freq->new);
- if (use_tsc) {
- if (!(freq->flags & CPUFREQ_CONST_LOOPS)) {
- fast_gettimeoffset_quotient = cpufreq_scale(fast_gettimeoffset_ref, freq->new, ref_freq);
- set_cyc2ns_scale(cpu_khz);
- }
- }
-#endif
- }
-
-end:
- if (val != CPUFREQ_RESUMECHANGE && val != CPUFREQ_SUSPENDCHANGE)
- write_sequnlock_irq(&xtime_lock);
-
- return 0;
-}
-
-static struct notifier_block time_cpufreq_notifier_block = {
- .notifier_call = time_cpufreq_notifier
-};
-
-
-static int __init cpufreq_tsc(void)
-{
- int ret;
- INIT_WORK(&cpufreq_delayed_get_work, handle_cpufreq_delayed_get, NULL);
- ret = cpufreq_register_notifier(&time_cpufreq_notifier_block,
- CPUFREQ_TRANSITION_NOTIFIER);
- if (!ret)
- cpufreq_init = 1;
- return ret;
-}
-core_initcall(cpufreq_tsc);
-
-#else /* CONFIG_CPU_FREQ */
-static inline void cpufreq_delayed_get(void) { return; }
-#endif
-
-int recalibrate_cpu_khz(void)
-{
-#ifndef CONFIG_SMP
- unsigned int cpu_khz_old = cpu_khz;
-
- if (cpu_has_tsc) {
- local_irq_disable();
- init_cpu_khz();
- local_irq_enable();
- cpu_data[0].loops_per_jiffy =
- cpufreq_scale(cpu_data[0].loops_per_jiffy,
- cpu_khz_old,
- cpu_khz);
- return 0;
- } else
- return -ENODEV;
-#else
- return -ENODEV;
-#endif
-}
-EXPORT_SYMBOL(recalibrate_cpu_khz);
-
-static void mark_offset_tsc(void)
-{
- unsigned long lost,delay;
- unsigned long delta = last_tsc_low;
- int count;
- int countmp;
- static int count1 = 0;
- unsigned long long this_offset, last_offset;
- static int lost_count = 0;
-
- write_seqlock(&monotonic_lock);
- last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
- /*
- * It is important that these two operations happen almost at
- * the same time. We do the RDTSC stuff first, since it's
- * faster. To avoid any inconsistencies, we need interrupts
- * disabled locally.
- */
-
- /*
- * Interrupts are just disabled locally since the timer irq
- * has the SA_INTERRUPT flag set. -arca
- */
-
- /* read Pentium cycle counter */
-
- rdtsc(last_tsc_low, last_tsc_high);
-
- spin_lock(&i8253_lock);
- outb_p(0x00, PIT_MODE); /* latch the count ASAP */
-
- count = inb_p(PIT_CH0); /* read the latched count */
- count |= inb(PIT_CH0) << 8;
-
- /*
- * VIA686a test code... reset the latch if count > max + 1
- * from timer_pit.c - cjb
- */
- if (count > LATCH) {
- outb_p(0x34, PIT_MODE);
- outb_p(LATCH & 0xff, PIT_CH0);
- outb(LATCH >> 8, PIT_CH0);
- count = LATCH - 1;
- }
-
- spin_unlock(&i8253_lock);
-
- if (pit_latch_buggy) {
- /* get center value of last 3 time lutch */
- if ((count2 >= count && count >= count1)
- || (count1 >= count && count >= count2)) {
- count2 = count1; count1 = count;
- } else if ((count1 >= count2 && count2 >= count)
- || (count >= count2 && count2 >= count1)) {
- countmp = count;count = count2;
- count2 = count1;count1 = countmp;
- } else {
- count2 = count1; count1 = count; count = count1;
- }
- }
-
- /* lost tick compensation */
- delta = last_tsc_low - delta;
- {
- register unsigned long eax, edx;
- eax = delta;
- __asm__("mull %2"
- :"=a" (eax), "=d" (edx)
- :"rm" (fast_gettimeoffset_quotient),
- "0" (eax));
- delta = edx;
- }
- delta += delay_at_last_interrupt;
- lost = delta/(1000000/HZ);
- delay = delta%(1000000/HZ);
- if (lost >= 2 && detect_lost_ticks) {
- jiffies_64 += lost-1;
-
- /* sanity check to ensure we're not always losing ticks */
- if (lost_count++ > 100) {
- printk(KERN_WARNING "Losing too many ticks!\n");
- printk(KERN_WARNING "TSC cannot be used as a timesource. \n");
- printk(KERN_WARNING "Possible reasons for this are:\n");
- printk(KERN_WARNING " You're running with Speedstep,\n");
- printk(KERN_WARNING " You don't have DMA enabled for your hard disk (see hdparm),\n");
- printk(KERN_WARNING " Incorrect TSC synchronization on an SMP system (see dmesg).\n");
- printk(KERN_WARNING "Falling back to a sane timesource now.\n");
-
- clock_fallback();
- }
- /* ... but give the TSC a fair chance */
- if (lost_count > 25)
- cpufreq_delayed_get();
- } else
- lost_count = 0;
- /* update the monotonic base value */
- this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
- monotonic_base += cycles_2_ns(this_offset - last_offset);
- write_sequnlock(&monotonic_lock);
-
- /* calculate delay_at_last_interrupt */
- count = ((LATCH-1) - count) * TICK_SIZE;
- delay_at_last_interrupt = (count + LATCH/2) / LATCH;
-
- /* catch corner case where tick rollover occured
- * between tsc and pit reads (as noted when
- * usec delta is > 90% # of usecs/tick)
- */
- if (lost && abs(delay - delay_at_last_interrupt) > (900000/HZ))
- jiffies_64++;
-}
-
-static int __init init_tsc(char* override)
-{
-
- /* check clock override */
- if (override[0] && strncmp(override,"tsc",3)) {
-#ifdef CONFIG_HPET_TIMER
- if (is_hpet_enabled()) {
- printk(KERN_ERR "Warning: clock= override failed. Defaulting to tsc\n");
- } else
-#endif
- {
- return -ENODEV;
- }
- }
-
- /*
- * If we have APM enabled or the CPU clock speed is variable
- * (CPU stops clock on HLT or slows clock to save power)
- * then the TSC timestamps may diverge by up to 1 jiffy from
- * 'real time' but nothing will break.
- * The most frequent case is that the CPU is "woken" from a halt
- * state by the timer interrupt itself, so we get 0 error. In the
- * rare cases where a driver would "wake" the CPU and request a
- * timestamp, the maximum error is < 1 jiffy. But timestamps are
- * still perfectly ordered.
- * Note that the TSC counter will be reset if APM suspends
- * to disk; this won't break the kernel, though, 'cuz we're
- * smart. See arch/i386/kernel/apm.c.
- */
- /*
- * Firstly we have to do a CPU check for chips with
- * a potentially buggy TSC. At this point we haven't run
- * the ident/bugs checks so we must run this hook as it
- * may turn off the TSC flag.
- *
- * NOTE: this doesn't yet handle SMP 486 machines where only
- * some CPU's have a TSC. Thats never worked and nobody has
- * moaned if you have the only one in the world - you fix it!
- */
-
- count2 = LATCH; /* initialize counter for mark_offset_tsc() */
-
- if (cpu_has_tsc) {
- unsigned long tsc_quotient;
-#ifdef CONFIG_HPET_TIMER
- if (is_hpet_enabled() && hpet_use_timer) {
- unsigned long result, remain;
- printk("Using TSC for gettimeofday\n");
- tsc_quotient = calibrate_tsc_hpet(NULL);
- timer_tsc.mark_offset = &mark_offset_tsc_hpet;
- /*
- * Math to calculate hpet to usec multiplier
- * Look for the comments at get_offset_tsc_hpet()
- */
- ASM_DIV64_REG(result, remain, hpet_tick,
- 0, KERNEL_TICK_USEC);
- if (remain > (hpet_tick >> 1))
- result++; /* rounding the result */
-
- hpet_usec_quotient = result;
- } else
-#endif
- {
- tsc_quotient = calibrate_tsc();
- }
-
- if (tsc_quotient) {
- fast_gettimeoffset_quotient = tsc_quotient;
- use_tsc = 1;
- /*
- * We could be more selective here I suspect
- * and just enable this for the next intel chips ?
- */
- /* report CPU clock rate in Hz.
- * The formula is (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) =
- * clock/second. Our precision is about 100 ppm.
- */
- { unsigned long eax=0, edx=1000;
- __asm__("divl %2"
- :"=a" (cpu_khz), "=d" (edx)
- :"r" (tsc_quotient),
- "0" (eax), "1" (edx));
- printk("Detected %u.%03u MHz processor.\n",
- cpu_khz / 1000, cpu_khz % 1000);
- }
- set_cyc2ns_scale(cpu_khz);
- return 0;
- }
- }
- return -ENODEV;
-}
-
-static int tsc_resume(void)
-{
- write_seqlock(&monotonic_lock);
- /* Assume this is the last mark offset time */
- rdtsc(last_tsc_low, last_tsc_high);
-#ifdef CONFIG_HPET_TIMER
- if (is_hpet_enabled() && hpet_use_timer)
- hpet_last = hpet_readl(HPET_COUNTER);
-#endif
- write_sequnlock(&monotonic_lock);
- return 0;
-}
-
-#ifndef CONFIG_X86_TSC
-/* disable flag for tsc. Takes effect by clearing the TSC cpu flag
- * in cpu/common.c */
-static int __init tsc_setup(char *str)
-{
- tsc_disable = 1;
- return 1;
-}
-#else
-static int __init tsc_setup(char *str)
-{
- printk(KERN_WARNING "notsc: Kernel compiled with CONFIG_X86_TSC, "
- "cannot disable TSC.\n");
- return 1;
-}
-#endif
-__setup("notsc", tsc_setup);
-
-
-
-/************************************************************/
-
-/* tsc timer_opts struct */
-static struct timer_opts timer_tsc = {
- .name = "tsc",
- .mark_offset = mark_offset_tsc,
- .get_offset = get_offset_tsc,
- .monotonic_clock = monotonic_clock_tsc,
- .delay = delay_tsc,
- .read_timer = read_timer_tsc,
- .resume = tsc_resume,
-};
-
-struct init_timer_opts __initdata timer_tsc_init = {
- .init = init_tsc,
- .opts = &timer_tsc,
-};
diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c
index 0e49836..7846409 100644
--- a/arch/i386/kernel/traps.c
+++ b/arch/i386/kernel/traps.c
@@ -28,6 +28,7 @@
#include <linux/utsname.h>
#include <linux/kprobes.h>
#include <linux/kexec.h>
+#include <linux/unwind.h>
#ifdef CONFIG_EISA
#include <linux/ioport.h>
@@ -47,7 +48,7 @@
#include <asm/desc.h>
#include <asm/i387.h>
#include <asm/nmi.h>
-
+#include <asm/unwind.h>
#include <asm/smp.h>
#include <asm/arch_hooks.h>
#include <asm/kdebug.h>
@@ -92,6 +93,7 @@ asmlinkage void spurious_interrupt_bug(void);
asmlinkage void machine_check(void);
static int kstack_depth_to_print = 24;
+static int call_trace = 1;
ATOMIC_NOTIFIER_HEAD(i386die_chain);
int register_die_notifier(struct notifier_block *nb)
@@ -149,6 +151,12 @@ static inline unsigned long print_context_stack(struct thread_info *tinfo,
while (valid_stack_ptr(tinfo, (void *)ebp)) {
addr = *(unsigned long *)(ebp + 4);
printed = print_addr_and_symbol(addr, log_lvl, printed);
+ /*
+ * break out of recursive entries (such as
+ * end_of_stack_stop_unwind_function):
+ */
+ if (ebp == *(unsigned long *)ebp)
+ break;
ebp = *(unsigned long *)ebp;
}
#else
@@ -164,7 +172,23 @@ static inline unsigned long print_context_stack(struct thread_info *tinfo,
return ebp;
}
-static void show_trace_log_lvl(struct task_struct *task,
+static asmlinkage int show_trace_unwind(struct unwind_frame_info *info, void *log_lvl)
+{
+ int n = 0;
+ int printed = 0; /* nr of entries already printed on current line */
+
+ while (unwind(info) == 0 && UNW_PC(info)) {
+ ++n;
+ printed = print_addr_and_symbol(UNW_PC(info), log_lvl, printed);
+ if (arch_unw_user_mode(info))
+ break;
+ }
+ if (printed)
+ printk("\n");
+ return n;
+}
+
+static void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
unsigned long *stack, char *log_lvl)
{
unsigned long ebp;
@@ -172,6 +196,26 @@ static void show_trace_log_lvl(struct task_struct *task,
if (!task)
task = current;
+ if (call_trace >= 0) {
+ int unw_ret = 0;
+ struct unwind_frame_info info;
+
+ if (regs) {
+ if (unwind_init_frame_info(&info, task, regs) == 0)
+ unw_ret = show_trace_unwind(&info, log_lvl);
+ } else if (task == current)
+ unw_ret = unwind_init_running(&info, show_trace_unwind, log_lvl);
+ else {
+ if (unwind_init_blocked(&info, task) == 0)
+ unw_ret = show_trace_unwind(&info, log_lvl);
+ }
+ if (unw_ret > 0) {
+ if (call_trace > 0)
+ return;
+ printk("%sLegacy call trace:\n", log_lvl);
+ }
+ }
+
if (task == current) {
/* Grab ebp right from our regs */
asm ("movl %%ebp, %0" : "=r" (ebp) : );
@@ -192,13 +236,13 @@ static void show_trace_log_lvl(struct task_struct *task,
}
}
-void show_trace(struct task_struct *task, unsigned long * stack)
+void show_trace(struct task_struct *task, struct pt_regs *regs, unsigned long * stack)
{
- show_trace_log_lvl(task, stack, "");
+ show_trace_log_lvl(task, regs, stack, "");
}
-static void show_stack_log_lvl(struct task_struct *task, unsigned long *esp,
- char *log_lvl)
+static void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
+ unsigned long *esp, char *log_lvl)
{
unsigned long *stack;
int i;
@@ -219,13 +263,13 @@ static void show_stack_log_lvl(struct task_struct *task, unsigned long *esp,
printk("%08lx ", *stack++);
}
printk("\n%sCall Trace:\n", log_lvl);
- show_trace_log_lvl(task, esp, log_lvl);
+ show_trace_log_lvl(task, regs, esp, log_lvl);
}
void show_stack(struct task_struct *task, unsigned long *esp)
{
printk(" ");
- show_stack_log_lvl(task, esp, "");
+ show_stack_log_lvl(task, NULL, esp, "");
}
/*
@@ -235,7 +279,7 @@ void dump_stack(void)
{
unsigned long stack;
- show_trace(current, &stack);
+ show_trace(current, NULL, &stack);
}
EXPORT_SYMBOL(dump_stack);
@@ -268,8 +312,9 @@ void show_registers(struct pt_regs *regs)
regs->esi, regs->edi, regs->ebp, esp);
printk(KERN_EMERG "ds: %04x es: %04x ss: %04x\n",
regs->xds & 0xffff, regs->xes & 0xffff, ss);
- printk(KERN_EMERG "Process %s (pid: %d, threadinfo=%p task=%p)",
- current->comm, current->pid, current_thread_info(), current);
+ printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)",
+ TASK_COMM_LEN, current->comm, current->pid,
+ current_thread_info(), current, current->thread_info);
/*
* When in-kernel, we also print out the stack and code at the
* time of the fault..
@@ -278,7 +323,7 @@ void show_registers(struct pt_regs *regs)
u8 __user *eip;
printk("\n" KERN_EMERG "Stack: ");
- show_stack_log_lvl(NULL, (unsigned long *)esp, KERN_EMERG);
+ show_stack_log_lvl(NULL, regs, (unsigned long *)esp, KERN_EMERG);
printk(KERN_EMERG "Code: ");
@@ -1208,3 +1253,15 @@ static int __init kstack_setup(char *s)
return 1;
}
__setup("kstack=", kstack_setup);
+
+static int __init call_trace_setup(char *s)
+{
+ if (strcmp(s, "old") == 0)
+ call_trace = -1;
+ else if (strcmp(s, "both") == 0)
+ call_trace = 0;
+ else if (strcmp(s, "new") == 0)
+ call_trace = 1;
+ return 1;
+}
+__setup("call_trace=", call_trace_setup);
diff --git a/arch/i386/kernel/tsc.c b/arch/i386/kernel/tsc.c
new file mode 100644
index 0000000..7e0d8da
--- /dev/null
+++ b/arch/i386/kernel/tsc.c
@@ -0,0 +1,478 @@
+/*
+ * This code largely moved from arch/i386/kernel/timer/timer_tsc.c
+ * which was originally moved from arch/i386/kernel/time.c.
+ * See comments there for proper credits.
+ */
+
+#include <linux/clocksource.h>
+#include <linux/workqueue.h>
+#include <linux/cpufreq.h>
+#include <linux/jiffies.h>
+#include <linux/init.h>
+#include <linux/dmi.h>
+
+#include <asm/delay.h>
+#include <asm/tsc.h>
+#include <asm/delay.h>
+#include <asm/io.h>
+
+#include "mach_timer.h"
+
+/*
+ * On some systems the TSC frequency does not
+ * change with the cpu frequency. So we need
+ * an extra value to store the TSC freq
+ */
+unsigned int tsc_khz;
+
+int tsc_disable __cpuinitdata = 0;
+
+#ifdef CONFIG_X86_TSC
+static int __init tsc_setup(char *str)
+{
+ printk(KERN_WARNING "notsc: Kernel compiled with CONFIG_X86_TSC, "
+ "cannot disable TSC.\n");
+ return 1;
+}
+#else
+/*
+ * disable flag for tsc. Takes effect by clearing the TSC cpu flag
+ * in cpu/common.c
+ */
+static int __init tsc_setup(char *str)
+{
+ tsc_disable = 1;
+
+ return 1;
+}
+#endif
+
+__setup("notsc", tsc_setup);
+
+/*
+ * code to mark and check if the TSC is unstable
+ * due to cpufreq or due to unsynced TSCs
+ */
+static int tsc_unstable;
+
+static inline int check_tsc_unstable(void)
+{
+ return tsc_unstable;
+}
+
+void mark_tsc_unstable(void)
+{
+ tsc_unstable = 1;
+}
+EXPORT_SYMBOL_GPL(mark_tsc_unstable);
+
+/* Accellerators for sched_clock()
+ * convert from cycles(64bits) => nanoseconds (64bits)
+ * basic equation:
+ * ns = cycles / (freq / ns_per_sec)
+ * ns = cycles * (ns_per_sec / freq)
+ * ns = cycles * (10^9 / (cpu_khz * 10^3))
+ * ns = cycles * (10^6 / cpu_khz)
+ *
+ * Then we use scaling math (suggested by george@mvista.com) to get:
+ * ns = cycles * (10^6 * SC / cpu_khz) / SC
+ * ns = cycles * cyc2ns_scale / SC
+ *
+ * And since SC is a constant power of two, we can convert the div
+ * into a shift.
+ *
+ * We can use khz divisor instead of mhz to keep a better percision, since
+ * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits.
+ * (mathieu.desnoyers@polymtl.ca)
+ *
+ * -johnstul@us.ibm.com "math is hard, lets go shopping!"
+ */
+static unsigned long cyc2ns_scale __read_mostly;
+
+#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
+
+static inline void set_cyc2ns_scale(unsigned long cpu_khz)
+{
+ cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR)/cpu_khz;
+}
+
+static inline unsigned long long cycles_2_ns(unsigned long long cyc)
+{
+ return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR;
+}
+
+/*
+ * Scheduler clock - returns current time in nanosec units.
+ */
+unsigned long long sched_clock(void)
+{
+ unsigned long long this_offset;
+
+ /*
+ * in the NUMA case we dont use the TSC as they are not
+ * synchronized across all CPUs.
+ */
+#ifndef CONFIG_NUMA
+ if (!cpu_khz || check_tsc_unstable())
+#endif
+ /* no locking but a rare wrong value is not a big deal */
+ return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ);
+
+ /* read the Time Stamp Counter: */
+ rdtscll(this_offset);
+
+ /* return the value in ns */
+ return cycles_2_ns(this_offset);
+}
+
+static unsigned long calculate_cpu_khz(void)
+{
+ unsigned long long start, end;
+ unsigned long count;
+ u64 delta64;
+ int i;
+ unsigned long flags;
+
+ local_irq_save(flags);
+
+ /* run 3 times to ensure the cache is warm */
+ for (i = 0; i < 3; i++) {
+ mach_prepare_counter();
+ rdtscll(start);
+ mach_countup(&count);
+ rdtscll(end);
+ }
+ /*
+ * Error: ECTCNEVERSET
+ * The CTC wasn't reliable: we got a hit on the very first read,
+ * or the CPU was so fast/slow that the quotient wouldn't fit in
+ * 32 bits..
+ */
+ if (count <= 1)
+ goto err;
+
+ delta64 = end - start;
+
+ /* cpu freq too fast: */
+ if (delta64 > (1ULL<<32))
+ goto err;
+
+ /* cpu freq too slow: */
+ if (delta64 <= CALIBRATE_TIME_MSEC)
+ goto err;
+
+ delta64 += CALIBRATE_TIME_MSEC/2; /* round for do_div */
+ do_div(delta64,CALIBRATE_TIME_MSEC);
+
+ local_irq_restore(flags);
+ return (unsigned long)delta64;
+err:
+ local_irq_restore(flags);
+ return 0;
+}
+
+int recalibrate_cpu_khz(void)
+{
+#ifndef CONFIG_SMP
+ unsigned long cpu_khz_old = cpu_khz;
+
+ if (cpu_has_tsc) {
+ cpu_khz = calculate_cpu_khz();
+ tsc_khz = cpu_khz;
+ cpu_data[0].loops_per_jiffy =
+ cpufreq_scale(cpu_data[0].loops_per_jiffy,
+ cpu_khz_old, cpu_khz);
+ return 0;
+ } else
+ return -ENODEV;
+#else
+ return -ENODEV;
+#endif
+}
+
+EXPORT_SYMBOL(recalibrate_cpu_khz);
+
+void tsc_init(void)
+{
+ if (!cpu_has_tsc || tsc_disable)
+ return;
+
+ cpu_khz = calculate_cpu_khz();
+ tsc_khz = cpu_khz;
+
+ if (!cpu_khz)
+ return;
+
+ printk("Detected %lu.%03lu MHz processor.\n",
+ (unsigned long)cpu_khz / 1000,
+ (unsigned long)cpu_khz % 1000);
+
+ set_cyc2ns_scale(cpu_khz);
+ use_tsc_delay();
+}
+
+#ifdef CONFIG_CPU_FREQ
+
+static unsigned int cpufreq_delayed_issched = 0;
+static unsigned int cpufreq_init = 0;
+static struct work_struct cpufreq_delayed_get_work;
+
+static void handle_cpufreq_delayed_get(void *v)
+{
+ unsigned int cpu;
+
+ for_each_online_cpu(cpu)
+ cpufreq_get(cpu);
+
+ cpufreq_delayed_issched = 0;
+}
+
+/*
+ * if we notice cpufreq oddness, schedule a call to cpufreq_get() as it tries
+ * to verify the CPU frequency the timing core thinks the CPU is running
+ * at is still correct.
+ */
+static inline void cpufreq_delayed_get(void)
+{
+ if (cpufreq_init && !cpufreq_delayed_issched) {
+ cpufreq_delayed_issched = 1;
+ printk(KERN_DEBUG "Checking if CPU frequency changed.\n");
+ schedule_work(&cpufreq_delayed_get_work);
+ }
+}
+
+/*
+ * if the CPU frequency is scaled, TSC-based delays will need a different
+ * loops_per_jiffy value to function properly.
+ */
+static unsigned int ref_freq = 0;
+static unsigned long loops_per_jiffy_ref = 0;
+static unsigned long cpu_khz_ref = 0;
+
+static int
+time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, void *data)
+{
+ struct cpufreq_freqs *freq = data;
+
+ if (val != CPUFREQ_RESUMECHANGE && val != CPUFREQ_SUSPENDCHANGE)
+ write_seqlock_irq(&xtime_lock);
+
+ if (!ref_freq) {
+ if (!freq->old){
+ ref_freq = freq->new;
+ goto end;
+ }
+ ref_freq = freq->old;
+ loops_per_jiffy_ref = cpu_data[freq->cpu].loops_per_jiffy;
+ cpu_khz_ref = cpu_khz;
+ }
+
+ if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) ||
+ (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) ||
+ (val == CPUFREQ_RESUMECHANGE)) {
+ if (!(freq->flags & CPUFREQ_CONST_LOOPS))
+ cpu_data[freq->cpu].loops_per_jiffy =
+ cpufreq_scale(loops_per_jiffy_ref,
+ ref_freq, freq->new);
+
+ if (cpu_khz) {
+
+ if (num_online_cpus() == 1)
+ cpu_khz = cpufreq_scale(cpu_khz_ref,
+ ref_freq, freq->new);
+ if (!(freq->flags & CPUFREQ_CONST_LOOPS)) {
+ tsc_khz = cpu_khz;
+ set_cyc2ns_scale(cpu_khz);
+ /*
+ * TSC based sched_clock turns
+ * to junk w/ cpufreq
+ */
+ mark_tsc_unstable();
+ }
+ }
+ }
+end:
+ if (val != CPUFREQ_RESUMECHANGE && val != CPUFREQ_SUSPENDCHANGE)
+ write_sequnlock_irq(&xtime_lock);
+
+ return 0;
+}
+
+static struct notifier_block time_cpufreq_notifier_block = {
+ .notifier_call = time_cpufreq_notifier
+};
+
+static int __init cpufreq_tsc(void)
+{
+ int ret;
+
+ INIT_WORK(&cpufreq_delayed_get_work, handle_cpufreq_delayed_get, NULL);
+ ret = cpufreq_register_notifier(&time_cpufreq_notifier_block,
+ CPUFREQ_TRANSITION_NOTIFIER);
+ if (!ret)
+ cpufreq_init = 1;
+
+ return ret;
+}
+
+core_initcall(cpufreq_tsc);
+
+#endif
+
+/* clock source code */
+
+static unsigned long current_tsc_khz = 0;
+static int tsc_update_callback(void);
+
+static cycle_t read_tsc(void)
+{
+ cycle_t ret;
+
+ rdtscll(ret);
+
+ return ret;
+}
+
+static struct clocksource clocksource_tsc = {
+ .name = "tsc",
+ .rating = 300,
+ .read = read_tsc,
+ .mask = CLOCKSOURCE_MASK(64),
+ .mult = 0, /* to be set */
+ .shift = 22,
+ .update_callback = tsc_update_callback,
+ .is_continuous = 1,
+};
+
+static int tsc_update_callback(void)
+{
+ int change = 0;
+
+ /* check to see if we should switch to the safe clocksource: */
+ if (clocksource_tsc.rating != 50 && check_tsc_unstable()) {
+ clocksource_tsc.rating = 50;
+ clocksource_reselect();
+ change = 1;
+ }
+
+ /* only update if tsc_khz has changed: */
+ if (current_tsc_khz != tsc_khz) {
+ current_tsc_khz = tsc_khz;
+ clocksource_tsc.mult = clocksource_khz2mult(current_tsc_khz,
+ clocksource_tsc.shift);
+ change = 1;
+ }
+
+ return change;
+}
+
+static int __init dmi_mark_tsc_unstable(struct dmi_system_id *d)
+{
+ printk(KERN_NOTICE "%s detected: marking TSC unstable.\n",
+ d->ident);
+ mark_tsc_unstable();
+ return 0;
+}
+
+/* List of systems that have known TSC problems */
+static struct dmi_system_id __initdata bad_tsc_dmi_table[] = {
+ {
+ .callback = dmi_mark_tsc_unstable,
+ .ident = "IBM Thinkpad 380XD",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "IBM"),
+ DMI_MATCH(DMI_BOARD_NAME, "2635FA0"),
+ },
+ },
+ {}
+};
+
+#define TSC_FREQ_CHECK_INTERVAL (10*MSEC_PER_SEC) /* 10sec in MS */
+static struct timer_list verify_tsc_freq_timer;
+
+/* XXX - Probably should add locking */
+static void verify_tsc_freq(unsigned long unused)
+{
+ static u64 last_tsc;
+ static unsigned long last_jiffies;
+
+ u64 now_tsc, interval_tsc;
+ unsigned long now_jiffies, interval_jiffies;
+
+
+ if (check_tsc_unstable())
+ return;
+
+ rdtscll(now_tsc);
+ now_jiffies = jiffies;
+
+ if (!last_jiffies) {
+ goto out;
+ }
+
+ interval_jiffies = now_jiffies - last_jiffies;
+ interval_tsc = now_tsc - last_tsc;
+ interval_tsc *= HZ;
+ do_div(interval_tsc, cpu_khz*1000);
+
+ if (interval_tsc < (interval_jiffies * 3 / 4)) {
+ printk("TSC appears to be running slowly. "
+ "Marking it as unstable\n");
+ mark_tsc_unstable();
+ return;
+ }
+
+out:
+ last_tsc = now_tsc;
+ last_jiffies = now_jiffies;
+ /* set us up to go off on the next interval: */
+ mod_timer(&verify_tsc_freq_timer,
+ jiffies + msecs_to_jiffies(TSC_FREQ_CHECK_INTERVAL));
+}
+
+/*
+ * Make an educated guess if the TSC is trustworthy and synchronized
+ * over all CPUs.
+ */
+static __init int unsynchronized_tsc(void)
+{
+ /*
+ * Intel systems are normally all synchronized.
+ * Exceptions must mark TSC as unstable:
+ */
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
+ return 0;
+
+ /* assume multi socket systems are not synchronized: */
+ return num_possible_cpus() > 1;
+}
+
+static int __init init_tsc_clocksource(void)
+{
+
+ if (cpu_has_tsc && tsc_khz && !tsc_disable) {
+ /* check blacklist */
+ dmi_check_system(bad_tsc_dmi_table);
+
+ if (unsynchronized_tsc()) /* mark unstable if unsynced */
+ mark_tsc_unstable();
+ current_tsc_khz = tsc_khz;
+ clocksource_tsc.mult = clocksource_khz2mult(current_tsc_khz,
+ clocksource_tsc.shift);
+ /* lower the rating if we already know its unstable: */
+ if (check_tsc_unstable())
+ clocksource_tsc.rating = 50;
+
+ init_timer(&verify_tsc_freq_timer);
+ verify_tsc_freq_timer.function = verify_tsc_freq;
+ verify_tsc_freq_timer.expires =
+ jiffies + msecs_to_jiffies(TSC_FREQ_CHECK_INTERVAL);
+ add_timer(&verify_tsc_freq_timer);
+
+ return clocksource_register(&clocksource_tsc);
+ }
+
+ return 0;
+}
+
+module_init(init_tsc_clocksource);
diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S
index 8831303..2d4f138 100644
--- a/arch/i386/kernel/vmlinux.lds.S
+++ b/arch/i386/kernel/vmlinux.lds.S
@@ -37,6 +37,13 @@ SECTIONS
RODATA
+ . = ALIGN(4);
+ __tracedata_start = .;
+ .tracedata : AT(ADDR(.tracedata) - LOAD_OFFSET) {
+ *(.tracedata)
+ }
+ __tracedata_end = .;
+
/* writeable */
.data : AT(ADDR(.data) - LOAD_OFFSET) { /* Data */
*(.data)
@@ -64,6 +71,15 @@ SECTIONS
.data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) { *(.data.read_mostly) }
_edata = .; /* End of data section */
+#ifdef CONFIG_STACK_UNWIND
+ . = ALIGN(4);
+ .eh_frame : AT(ADDR(.eh_frame) - LOAD_OFFSET) {
+ __start_unwind = .;
+ *(.eh_frame)
+ __end_unwind = .;
+ }
+#endif
+
. = ALIGN(THREAD_SIZE); /* init_task */
.data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) {
*(.data.init_task)
diff --git a/arch/i386/lib/delay.c b/arch/i386/lib/delay.c
index c49a6ac..3c0714c 100644
--- a/arch/i386/lib/delay.c
+++ b/arch/i386/lib/delay.c
@@ -10,43 +10,92 @@
* we have to worry about.
*/
+#include <linux/module.h>
#include <linux/config.h>
#include <linux/sched.h>
#include <linux/delay.h>
-#include <linux/module.h>
+
#include <asm/processor.h>
#include <asm/delay.h>
#include <asm/timer.h>
#ifdef CONFIG_SMP
-#include <asm/smp.h>
+# include <asm/smp.h>
#endif
-extern struct timer_opts* timer;
+/* simple loop based delay: */
+static void delay_loop(unsigned long loops)
+{
+ int d0;
+
+ __asm__ __volatile__(
+ "\tjmp 1f\n"
+ ".align 16\n"
+ "1:\tjmp 2f\n"
+ ".align 16\n"
+ "2:\tdecl %0\n\tjns 2b"
+ :"=&a" (d0)
+ :"0" (loops));
+}
+
+/* TSC based delay: */
+static void delay_tsc(unsigned long loops)
+{
+ unsigned long bclock, now;
+
+ rdtscl(bclock);
+ do {
+ rep_nop();
+ rdtscl(now);
+ } while ((now-bclock) < loops);
+}
+
+/*
+ * Since we calibrate only once at boot, this
+ * function should be set once at boot and not changed
+ */
+static void (*delay_fn)(unsigned long) = delay_loop;
+
+void use_tsc_delay(void)
+{
+ delay_fn = delay_tsc;
+}
+
+int read_current_timer(unsigned long *timer_val)
+{
+ if (delay_fn == delay_tsc) {
+ rdtscl(*timer_val);
+ return 0;
+ }
+ return -1;
+}
void __delay(unsigned long loops)
{
- cur_timer->delay(loops);
+ delay_fn(loops);
}
inline void __const_udelay(unsigned long xloops)
{
int d0;
+
xloops *= 4;
__asm__("mull %0"
:"=d" (xloops), "=&a" (d0)
- :"1" (xloops),"0" (cpu_data[raw_smp_processor_id()].loops_per_jiffy * (HZ/4)));
- __delay(++xloops);
+ :"1" (xloops), "0"
+ (cpu_data[raw_smp_processor_id()].loops_per_jiffy * (HZ/4)));
+
+ __delay(++xloops);
}
void __udelay(unsigned long usecs)
{
- __const_udelay(usecs * 0x000010c7); /* 2**32 / 1000000 (rounded up) */
+ __const_udelay(usecs * 0x000010c7); /* 2**32 / 1000000 (rounded up) */
}
void __ndelay(unsigned long nsecs)
{
- __const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */
+ __const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */
}
EXPORT_SYMBOL(__delay);
diff --git a/arch/i386/lib/usercopy.c b/arch/i386/lib/usercopy.c
index 4cf981d..c5aa65f 100644
--- a/arch/i386/lib/usercopy.c
+++ b/arch/i386/lib/usercopy.c
@@ -425,15 +425,212 @@ __copy_user_zeroing_intel(void *to, const void __user *from, unsigned long size)
: "eax", "edx", "memory");
return size;
}
+
+/*
+ * Non Temporal Hint version of __copy_user_zeroing_intel. It is cache aware.
+ * hyoshiok@miraclelinux.com
+ */
+
+static unsigned long __copy_user_zeroing_intel_nocache(void *to,
+ const void __user *from, unsigned long size)
+{
+ int d0, d1;
+
+ __asm__ __volatile__(
+ " .align 2,0x90\n"
+ "0: movl 32(%4), %%eax\n"
+ " cmpl $67, %0\n"
+ " jbe 2f\n"
+ "1: movl 64(%4), %%eax\n"
+ " .align 2,0x90\n"
+ "2: movl 0(%4), %%eax\n"
+ "21: movl 4(%4), %%edx\n"
+ " movnti %%eax, 0(%3)\n"
+ " movnti %%edx, 4(%3)\n"
+ "3: movl 8(%4), %%eax\n"
+ "31: movl 12(%4),%%edx\n"
+ " movnti %%eax, 8(%3)\n"
+ " movnti %%edx, 12(%3)\n"
+ "4: movl 16(%4), %%eax\n"
+ "41: movl 20(%4), %%edx\n"
+ " movnti %%eax, 16(%3)\n"
+ " movnti %%edx, 20(%3)\n"
+ "10: movl 24(%4), %%eax\n"
+ "51: movl 28(%4), %%edx\n"
+ " movnti %%eax, 24(%3)\n"
+ " movnti %%edx, 28(%3)\n"
+ "11: movl 32(%4), %%eax\n"
+ "61: movl 36(%4), %%edx\n"
+ " movnti %%eax, 32(%3)\n"
+ " movnti %%edx, 36(%3)\n"
+ "12: movl 40(%4), %%eax\n"
+ "71: movl 44(%4), %%edx\n"
+ " movnti %%eax, 40(%3)\n"
+ " movnti %%edx, 44(%3)\n"
+ "13: movl 48(%4), %%eax\n"
+ "81: movl 52(%4), %%edx\n"
+ " movnti %%eax, 48(%3)\n"
+ " movnti %%edx, 52(%3)\n"
+ "14: movl 56(%4), %%eax\n"
+ "91: movl 60(%4), %%edx\n"
+ " movnti %%eax, 56(%3)\n"
+ " movnti %%edx, 60(%3)\n"
+ " addl $-64, %0\n"
+ " addl $64, %4\n"
+ " addl $64, %3\n"
+ " cmpl $63, %0\n"
+ " ja 0b\n"
+ " sfence \n"
+ "5: movl %0, %%eax\n"
+ " shrl $2, %0\n"
+ " andl $3, %%eax\n"
+ " cld\n"
+ "6: rep; movsl\n"
+ " movl %%eax,%0\n"
+ "7: rep; movsb\n"
+ "8:\n"
+ ".section .fixup,\"ax\"\n"
+ "9: lea 0(%%eax,%0,4),%0\n"
+ "16: pushl %0\n"
+ " pushl %%eax\n"
+ " xorl %%eax,%%eax\n"
+ " rep; stosb\n"
+ " popl %%eax\n"
+ " popl %0\n"
+ " jmp 8b\n"
+ ".previous\n"
+ ".section __ex_table,\"a\"\n"
+ " .align 4\n"
+ " .long 0b,16b\n"
+ " .long 1b,16b\n"
+ " .long 2b,16b\n"
+ " .long 21b,16b\n"
+ " .long 3b,16b\n"
+ " .long 31b,16b\n"
+ " .long 4b,16b\n"
+ " .long 41b,16b\n"
+ " .long 10b,16b\n"
+ " .long 51b,16b\n"
+ " .long 11b,16b\n"
+ " .long 61b,16b\n"
+ " .long 12b,16b\n"
+ " .long 71b,16b\n"
+ " .long 13b,16b\n"
+ " .long 81b,16b\n"
+ " .long 14b,16b\n"
+ " .long 91b,16b\n"
+ " .long 6b,9b\n"
+ " .long 7b,16b\n"
+ ".previous"
+ : "=&c"(size), "=&D" (d0), "=&S" (d1)
+ : "1"(to), "2"(from), "0"(size)
+ : "eax", "edx", "memory");
+ return size;
+}
+
+static unsigned long __copy_user_intel_nocache(void *to,
+ const void __user *from, unsigned long size)
+{
+ int d0, d1;
+
+ __asm__ __volatile__(
+ " .align 2,0x90\n"
+ "0: movl 32(%4), %%eax\n"
+ " cmpl $67, %0\n"
+ " jbe 2f\n"
+ "1: movl 64(%4), %%eax\n"
+ " .align 2,0x90\n"
+ "2: movl 0(%4), %%eax\n"
+ "21: movl 4(%4), %%edx\n"
+ " movnti %%eax, 0(%3)\n"
+ " movnti %%edx, 4(%3)\n"
+ "3: movl 8(%4), %%eax\n"
+ "31: movl 12(%4),%%edx\n"
+ " movnti %%eax, 8(%3)\n"
+ " movnti %%edx, 12(%3)\n"
+ "4: movl 16(%4), %%eax\n"
+ "41: movl 20(%4), %%edx\n"
+ " movnti %%eax, 16(%3)\n"
+ " movnti %%edx, 20(%3)\n"
+ "10: movl 24(%4), %%eax\n"
+ "51: movl 28(%4), %%edx\n"
+ " movnti %%eax, 24(%3)\n"
+ " movnti %%edx, 28(%3)\n"
+ "11: movl 32(%4), %%eax\n"
+ "61: movl 36(%4), %%edx\n"
+ " movnti %%eax, 32(%3)\n"
+ " movnti %%edx, 36(%3)\n"
+ "12: movl 40(%4), %%eax\n"
+ "71: movl 44(%4), %%edx\n"
+ " movnti %%eax, 40(%3)\n"
+ " movnti %%edx, 44(%3)\n"
+ "13: movl 48(%4), %%eax\n"
+ "81: movl 52(%4), %%edx\n"
+ " movnti %%eax, 48(%3)\n"
+ " movnti %%edx, 52(%3)\n"
+ "14: movl 56(%4), %%eax\n"
+ "91: movl 60(%4), %%edx\n"
+ " movnti %%eax, 56(%3)\n"
+ " movnti %%edx, 60(%3)\n"
+ " addl $-64, %0\n"
+ " addl $64, %4\n"
+ " addl $64, %3\n"
+ " cmpl $63, %0\n"
+ " ja 0b\n"
+ " sfence \n"
+ "5: movl %0, %%eax\n"
+ " shrl $2, %0\n"
+ " andl $3, %%eax\n"
+ " cld\n"
+ "6: rep; movsl\n"
+ " movl %%eax,%0\n"
+ "7: rep; movsb\n"
+ "8:\n"
+ ".section .fixup,\"ax\"\n"
+ "9: lea 0(%%eax,%0,4),%0\n"
+ "16: jmp 8b\n"
+ ".previous\n"
+ ".section __ex_table,\"a\"\n"
+ " .align 4\n"
+ " .long 0b,16b\n"
+ " .long 1b,16b\n"
+ " .long 2b,16b\n"
+ " .long 21b,16b\n"
+ " .long 3b,16b\n"
+ " .long 31b,16b\n"
+ " .long 4b,16b\n"
+ " .long 41b,16b\n"
+ " .long 10b,16b\n"
+ " .long 51b,16b\n"
+ " .long 11b,16b\n"
+ " .long 61b,16b\n"
+ " .long 12b,16b\n"
+ " .long 71b,16b\n"
+ " .long 13b,16b\n"
+ " .long 81b,16b\n"
+ " .long 14b,16b\n"
+ " .long 91b,16b\n"
+ " .long 6b,9b\n"
+ " .long 7b,16b\n"
+ ".previous"
+ : "=&c"(size), "=&D" (d0), "=&S" (d1)
+ : "1"(to), "2"(from), "0"(size)
+ : "eax", "edx", "memory");
+ return size;
+}
+
#else
+
/*
* Leave these declared but undefined. They should not be any references to
* them
*/
-unsigned long
-__copy_user_zeroing_intel(void *to, const void __user *from, unsigned long size);
-unsigned long
-__copy_user_intel(void __user *to, const void *from, unsigned long size);
+unsigned long __copy_user_zeroing_intel(void *to, const void __user *from,
+ unsigned long size);
+unsigned long __copy_user_intel(void __user *to, const void *from,
+ unsigned long size);
+unsigned long __copy_user_zeroing_intel_nocache(void *to,
+ const void __user *from, unsigned long size);
#endif /* CONFIG_X86_INTEL_USERCOPY */
/* Generic arbitrary sized copy. */
@@ -515,8 +712,8 @@ do { \
: "memory"); \
} while (0)
-
-unsigned long __copy_to_user_ll(void __user *to, const void *from, unsigned long n)
+unsigned long __copy_to_user_ll(void __user *to, const void *from,
+ unsigned long n)
{
BUG_ON((long) n < 0);
#ifndef CONFIG_X86_WP_WORKS_OK
@@ -576,8 +773,8 @@ survive:
}
EXPORT_SYMBOL(__copy_to_user_ll);
-unsigned long
-__copy_from_user_ll(void *to, const void __user *from, unsigned long n)
+unsigned long __copy_from_user_ll(void *to, const void __user *from,
+ unsigned long n)
{
BUG_ON((long)n < 0);
if (movsl_is_ok(to, from, n))
@@ -588,6 +785,49 @@ __copy_from_user_ll(void *to, const void __user *from, unsigned long n)
}
EXPORT_SYMBOL(__copy_from_user_ll);
+unsigned long __copy_from_user_ll_nozero(void *to, const void __user *from,
+ unsigned long n)
+{
+ BUG_ON((long)n < 0);
+ if (movsl_is_ok(to, from, n))
+ __copy_user(to, from, n);
+ else
+ n = __copy_user_intel((void __user *)to,
+ (const void *)from, n);
+ return n;
+}
+EXPORT_SYMBOL(__copy_from_user_ll_nozero);
+
+unsigned long __copy_from_user_ll_nocache(void *to, const void __user *from,
+ unsigned long n)
+{
+ BUG_ON((long)n < 0);
+#ifdef CONFIG_X86_INTEL_USERCOPY
+ if ( n > 64 && cpu_has_xmm2)
+ n = __copy_user_zeroing_intel_nocache(to, from, n);
+ else
+ __copy_user_zeroing(to, from, n);
+#else
+ __copy_user_zeroing(to, from, n);
+#endif
+ return n;
+}
+
+unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *from,
+ unsigned long n)
+{
+ BUG_ON((long)n < 0);
+#ifdef CONFIG_X86_INTEL_USERCOPY
+ if ( n > 64 && cpu_has_xmm2)
+ n = __copy_user_intel_nocache(to, from, n);
+ else
+ __copy_user(to, from, n);
+#else
+ __copy_user(to, from, n);
+#endif
+ return n;
+}
+
/**
* copy_to_user: - Copy a block of data into user space.
* @to: Destination address, in user space.
diff --git a/arch/i386/mach-default/setup.c b/arch/i386/mach-default/setup.c
index b4a7455..004837c 100644
--- a/arch/i386/mach-default/setup.c
+++ b/arch/i386/mach-default/setup.c
@@ -8,6 +8,8 @@
#include <linux/interrupt.h>
#include <asm/acpi.h>
#include <asm/arch_hooks.h>
+#include <asm/e820.h>
+#include <asm/setup.h>
#ifdef CONFIG_HOTPLUG_CPU
#define DEFAULT_SEND_IPI (1)
@@ -130,3 +132,44 @@ static int __init print_ipi_mode(void)
}
late_initcall(print_ipi_mode);
+
+/**
+ * machine_specific_memory_setup - Hook for machine specific memory setup.
+ *
+ * Description:
+ * This is included late in kernel/setup.c so that it can make
+ * use of all of the static functions.
+ **/
+
+char * __init machine_specific_memory_setup(void)
+{
+ char *who;
+
+
+ who = "BIOS-e820";
+
+ /*
+ * Try to copy the BIOS-supplied E820-map.
+ *
+ * Otherwise fake a memory map; one section from 0k->640k,
+ * the next section from 1mb->appropriate_mem_k
+ */
+ sanitize_e820_map(E820_MAP, &E820_MAP_NR);
+ if (copy_e820_map(E820_MAP, E820_MAP_NR) < 0) {
+ unsigned long mem_size;
+
+ /* compare results from other methods and take the greater */
+ if (ALT_MEM_K < EXT_MEM_K) {
+ mem_size = EXT_MEM_K;
+ who = "BIOS-88";
+ } else {
+ mem_size = ALT_MEM_K;
+ who = "BIOS-e801";
+ }
+
+ e820.nr_map = 0;
+ add_memory_region(0, LOWMEMSIZE(), E820_RAM);
+ add_memory_region(HIGH_MEMORY, mem_size << 10, E820_RAM);
+ }
+ return who;
+}
diff --git a/arch/i386/mach-visws/setup.c b/arch/i386/mach-visws/setup.c
index 07fac7e..8a9e1a6 100644
--- a/arch/i386/mach-visws/setup.c
+++ b/arch/i386/mach-visws/setup.c
@@ -10,6 +10,8 @@
#include <asm/fixmap.h>
#include <asm/arch_hooks.h>
#include <asm/io.h>
+#include <asm/e820.h>
+#include <asm/setup.h>
#include "cobalt.h"
#include "piix4.h"
@@ -133,3 +135,50 @@ void __init time_init_hook(void)
/* Wire cpu IDT entry to s/w handler (and Cobalt APIC to IDT) */
setup_irq(0, &irq0);
}
+
+/* Hook for machine specific memory setup. */
+
+#define MB (1024 * 1024)
+
+static unsigned long sgivwfb_mem_phys;
+static unsigned long sgivwfb_mem_size;
+
+long long mem_size __initdata = 0;
+
+char * __init machine_specific_memory_setup(void)
+{
+ long long gfx_mem_size = 8 * MB;
+
+ mem_size = ALT_MEM_K;
+
+ if (!mem_size) {
+ printk(KERN_WARNING "Bootloader didn't set memory size, upgrade it !\n");
+ mem_size = 128 * MB;
+ }
+
+ /*
+ * this hardcodes the graphics memory to 8 MB
+ * it really should be sized dynamically (or at least
+ * set as a boot param)
+ */
+ if (!sgivwfb_mem_size) {
+ printk(KERN_WARNING "Defaulting to 8 MB framebuffer size\n");
+ sgivwfb_mem_size = 8 * MB;
+ }
+
+ /*
+ * Trim to nearest MB
+ */
+ sgivwfb_mem_size &= ~((1 << 20) - 1);
+ sgivwfb_mem_phys = mem_size - gfx_mem_size;
+
+ add_memory_region(0, LOWMEMSIZE(), E820_RAM);
+ add_memory_region(HIGH_MEMORY, mem_size - sgivwfb_mem_size - HIGH_MEMORY, E820_RAM);
+ add_memory_region(sgivwfb_mem_phys, sgivwfb_mem_size, E820_RESERVED);
+
+ return "PROM";
+
+ /* Remove gcc warnings */
+ (void) sanitize_e820_map(NULL, NULL);
+ (void) copy_e820_map(NULL, 0);
+}
diff --git a/arch/i386/mach-voyager/setup.c b/arch/i386/mach-voyager/setup.c
index 7d8a3ac..0e22505 100644
--- a/arch/i386/mach-voyager/setup.c
+++ b/arch/i386/mach-voyager/setup.c
@@ -7,6 +7,9 @@
#include <linux/interrupt.h>
#include <asm/acpi.h>
#include <asm/arch_hooks.h>
+#include <asm/voyager.h>
+#include <asm/e820.h>
+#include <asm/setup.h>
void __init pre_intr_init_hook(void)
{
@@ -45,3 +48,74 @@ void __init time_init_hook(void)
{
setup_irq(0, &irq0);
}
+
+/* Hook for machine specific memory setup. */
+
+char * __init machine_specific_memory_setup(void)
+{
+ char *who;
+
+ who = "NOT VOYAGER";
+
+ if(voyager_level == 5) {
+ __u32 addr, length;
+ int i;
+
+ who = "Voyager-SUS";
+
+ e820.nr_map = 0;
+ for(i=0; voyager_memory_detect(i, &addr, &length); i++) {
+ add_memory_region(addr, length, E820_RAM);
+ }
+ return who;
+ } else if(voyager_level == 4) {
+ __u32 tom;
+ __u16 catbase = inb(VOYAGER_SSPB_RELOCATION_PORT)<<8;
+ /* select the DINO config space */
+ outb(VOYAGER_DINO, VOYAGER_CAT_CONFIG_PORT);
+ /* Read DINO top of memory register */
+ tom = ((inb(catbase + 0x4) & 0xf0) << 16)
+ + ((inb(catbase + 0x5) & 0x7f) << 24);
+
+ if(inb(catbase) != VOYAGER_DINO) {
+ printk(KERN_ERR "Voyager: Failed to get DINO for L4, setting tom to EXT_MEM_K\n");
+ tom = (EXT_MEM_K)<<10;
+ }
+ who = "Voyager-TOM";
+ add_memory_region(0, 0x9f000, E820_RAM);
+ /* map from 1M to top of memory */
+ add_memory_region(1*1024*1024, tom - 1*1024*1024, E820_RAM);
+ /* FIXME: Should check the ASICs to see if I need to
+ * take out the 8M window. Just do it at the moment
+ * */
+ add_memory_region(8*1024*1024, 8*1024*1024, E820_RESERVED);
+ return who;
+ }
+
+ who = "BIOS-e820";
+
+ /*
+ * Try to copy the BIOS-supplied E820-map.
+ *
+ * Otherwise fake a memory map; one section from 0k->640k,
+ * the next section from 1mb->appropriate_mem_k
+ */
+ sanitize_e820_map(E820_MAP, &E820_MAP_NR);
+ if (copy_e820_map(E820_MAP, E820_MAP_NR) < 0) {
+ unsigned long mem_size;
+
+ /* compare results from other methods and take the greater */
+ if (ALT_MEM_K < EXT_MEM_K) {
+ mem_size = EXT_MEM_K;
+ who = "BIOS-88";
+ } else {
+ mem_size = ALT_MEM_K;
+ who = "BIOS-e801";
+ }
+
+ e820.nr_map = 0;
+ add_memory_region(0, LOWMEMSIZE(), E820_RAM);
+ add_memory_region(HIGH_MEMORY, mem_size << 10, E820_RAM);
+ }
+ return who;
+}
diff --git a/arch/i386/mm/fault.c b/arch/i386/mm/fault.c
index 7f0fcf2..6ee7faa 100644
--- a/arch/i386/mm/fault.c
+++ b/arch/i386/mm/fault.c
@@ -30,6 +30,40 @@
extern void die(const char *,struct pt_regs *,long);
+#ifdef CONFIG_KPROBES
+ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain);
+int register_page_fault_notifier(struct notifier_block *nb)
+{
+ vmalloc_sync_all();
+ return atomic_notifier_chain_register(&notify_page_fault_chain, nb);
+}
+
+int unregister_page_fault_notifier(struct notifier_block *nb)
+{
+ return atomic_notifier_chain_unregister(&notify_page_fault_chain, nb);
+}
+
+static inline int notify_page_fault(enum die_val val, const char *str,
+ struct pt_regs *regs, long err, int trap, int sig)
+{
+ struct die_args args = {
+ .regs = regs,
+ .str = str,
+ .err = err,
+ .trapnr = trap,
+ .signr = sig
+ };
+ return atomic_notifier_call_chain(&notify_page_fault_chain, val, &args);
+}
+#else
+static inline int notify_page_fault(enum die_val val, const char *str,
+ struct pt_regs *regs, long err, int trap, int sig)
+{
+ return NOTIFY_DONE;
+}
+#endif
+
+
/*
* Unlock any spinlocks which will prevent us from getting the
* message out
@@ -77,12 +111,15 @@ static inline unsigned long get_segment_eip(struct pt_regs *regs,
unsigned seg = regs->xcs & 0xffff;
u32 seg_ar, seg_limit, base, *desc;
+ /* Unlikely, but must come before segment checks. */
+ if (unlikely(regs->eflags & VM_MASK)) {
+ base = seg << 4;
+ *eip_limit = base + 0xffff;
+ return base + (eip & 0xffff);
+ }
+
/* The standard kernel/user address space limit. */
*eip_limit = (seg & 3) ? USER_DS.seg : KERNEL_DS.seg;
-
- /* Unlikely, but must come before segment checks. */
- if (unlikely((regs->eflags & VM_MASK) != 0))
- return eip + (seg << 4);
/* By far the most common cases. */
if (likely(seg == __USER_CS || seg == __KERNEL_CS))
@@ -321,7 +358,7 @@ fastcall void __kprobes do_page_fault(struct pt_regs *regs,
if (unlikely(address >= TASK_SIZE)) {
if (!(error_code & 0x0000000d) && vmalloc_fault(address) >= 0)
return;
- if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
+ if (notify_page_fault(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
SIGSEGV) == NOTIFY_STOP)
return;
/*
@@ -331,7 +368,7 @@ fastcall void __kprobes do_page_fault(struct pt_regs *regs,
goto bad_area_nosemaphore;
}
- if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
+ if (notify_page_fault(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
SIGSEGV) == NOTIFY_STOP)
return;
@@ -380,12 +417,12 @@ fastcall void __kprobes do_page_fault(struct pt_regs *regs,
goto bad_area;
if (error_code & 4) {
/*
- * accessing the stack below %esp is always a bug.
- * The "+ 32" is there due to some instructions (like
- * pusha) doing post-decrement on the stack and that
- * doesn't show up until later..
+ * Accessing the stack below %esp is always a bug.
+ * The large cushion allows instructions like enter
+ * and pusha to work. ("enter $65535,$31" pushes
+ * 32 pointers and then decrements %esp by 65535.)
*/
- if (address + 32 < regs->esp)
+ if (address + 65536 + 32 * sizeof(unsigned long) < regs->esp)
goto bad_area;
}
if (expand_stack(vma, address))
diff --git a/arch/i386/mm/init.c b/arch/i386/mm/init.c
index 3df1371..bf19513 100644
--- a/arch/i386/mm/init.c
+++ b/arch/i386/mm/init.c
@@ -29,6 +29,7 @@
#include <linux/efi.h>
#include <linux/memory_hotplug.h>
#include <linux/initrd.h>
+#include <linux/cpumask.h>
#include <asm/processor.h>
#include <asm/system.h>
@@ -384,7 +385,7 @@ static void __init pagetable_init (void)
#endif
}
-#ifdef CONFIG_SOFTWARE_SUSPEND
+#if defined(CONFIG_SOFTWARE_SUSPEND) || defined(CONFIG_ACPI_SLEEP)
/*
* Swap suspend & friends need this for resume because things like the intel-agp
* driver might have split up a kernel 4MB mapping.
diff --git a/arch/i386/mm/pageattr.c b/arch/i386/mm/pageattr.c
index 92c3d9f..0887b34 100644
--- a/arch/i386/mm/pageattr.c
+++ b/arch/i386/mm/pageattr.c
@@ -209,19 +209,19 @@ int change_page_attr(struct page *page, int numpages, pgprot_t prot)
}
void global_flush_tlb(void)
-{
- LIST_HEAD(l);
+{
+ struct list_head l;
struct page *pg, *next;
BUG_ON(irqs_disabled());
spin_lock_irq(&cpa_lock);
- list_splice_init(&df_list, &l);
+ list_replace_init(&df_list, &l);
spin_unlock_irq(&cpa_lock);
flush_map();
list_for_each_entry_safe(pg, next, &l, lru)
__free_page(pg);
-}
+}
#ifdef CONFIG_DEBUG_PAGEALLOC
void kernel_map_pages(struct page *page, int numpages, int enable)
diff --git a/arch/i386/oprofile/nmi_int.c b/arch/i386/oprofile/nmi_int.c
index ec0fd3c..fa8a37b 100644
--- a/arch/i386/oprofile/nmi_int.c
+++ b/arch/i386/oprofile/nmi_int.c
@@ -281,9 +281,9 @@ static int nmi_create_files(struct super_block * sb, struct dentry * root)
for (i = 0; i < model->num_counters; ++i) {
struct dentry * dir;
- char buf[2];
+ char buf[4];
- snprintf(buf, 2, "%d", i);
+ snprintf(buf, sizeof(buf), "%d", i);
dir = oprofilefs_mkdir(sb, root, buf);
oprofilefs_create_ulong(sb, dir, "enabled", &counter_config[i].enabled);
oprofilefs_create_ulong(sb, dir, "event", &counter_config[i].event);
diff --git a/arch/i386/oprofile/op_model_athlon.c b/arch/i386/oprofile/op_model_athlon.c
index 3ad9a72..693bdea 100644
--- a/arch/i386/oprofile/op_model_athlon.c
+++ b/arch/i386/oprofile/op_model_athlon.c
@@ -13,6 +13,7 @@
#include <linux/oprofile.h>
#include <asm/ptrace.h>
#include <asm/msr.h>
+#include <asm/nmi.h>
#include "op_x86_model.h"
#include "op_counter.h"
diff --git a/arch/i386/oprofile/op_model_p4.c b/arch/i386/oprofile/op_model_p4.c
index ac8a066..7c61d35 100644
--- a/arch/i386/oprofile/op_model_p4.c
+++ b/arch/i386/oprofile/op_model_p4.c
@@ -14,6 +14,7 @@
#include <asm/ptrace.h>
#include <asm/fixmap.h>
#include <asm/apic.h>
+#include <asm/nmi.h>
#include "op_x86_model.h"
#include "op_counter.h"
diff --git a/arch/i386/oprofile/op_model_ppro.c b/arch/i386/oprofile/op_model_ppro.c
index d719015..5c3ab4b 100644
--- a/arch/i386/oprofile/op_model_ppro.c
+++ b/arch/i386/oprofile/op_model_ppro.c
@@ -14,6 +14,7 @@
#include <asm/ptrace.h>
#include <asm/msr.h>
#include <asm/apic.h>
+#include <asm/nmi.h>
#include "op_x86_model.h"
#include "op_counter.h"
diff --git a/arch/i386/pci/common.c b/arch/i386/pci/common.c
index dbece77..c624b61 100644
--- a/arch/i386/pci/common.c
+++ b/arch/i386/pci/common.c
@@ -288,6 +288,7 @@ int pcibios_enable_device(struct pci_dev *dev, int mask)
void pcibios_disable_device (struct pci_dev *dev)
{
+ pcibios_disable_resources(dev);
if (pcibios_disable_irq)
pcibios_disable_irq(dev);
}
diff --git a/arch/i386/pci/i386.c b/arch/i386/pci/i386.c
index ed2c8c8..a151f7a 100644
--- a/arch/i386/pci/i386.c
+++ b/arch/i386/pci/i386.c
@@ -242,6 +242,15 @@ int pcibios_enable_resources(struct pci_dev *dev, int mask)
return 0;
}
+void pcibios_disable_resources(struct pci_dev *dev)
+{
+ u16 cmd;
+
+ pci_read_config_word(dev, PCI_COMMAND, &cmd);
+ cmd &= ~(PCI_COMMAND_IO | PCI_COMMAND_MEMORY);
+ pci_write_config_word(dev, PCI_COMMAND, cmd);
+}
+
/*
* If we set up a device for bus mastering, we need to check the latency
* timer as certain crappy BIOSes forget to set it properly.
@@ -276,8 +285,6 @@ int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
/* Leave vm_pgoff as-is, the PCI space address is the physical
* address on this platform.
*/
- vma->vm_flags |= (VM_SHM | VM_LOCKED | VM_IO);
-
prot = pgprot_val(vma->vm_page_prot);
if (boot_cpu_data.x86 > 3)
prot |= _PAGE_PCD | _PAGE_PWT;
diff --git a/arch/i386/pci/irq.c b/arch/i386/pci/irq.c
index 06dab00..8ce6950 100644
--- a/arch/i386/pci/irq.c
+++ b/arch/i386/pci/irq.c
@@ -198,14 +198,14 @@ static void write_config_nybble(struct pci_dev *router, unsigned offset, unsigne
*/
static int pirq_ali_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
{
- static unsigned char irqmap[16] = { 0, 9, 3, 10, 4, 5, 7, 6, 1, 11, 0, 12, 0, 14, 0, 15 };
+ static const unsigned char irqmap[16] = { 0, 9, 3, 10, 4, 5, 7, 6, 1, 11, 0, 12, 0, 14, 0, 15 };
return irqmap[read_config_nybble(router, 0x48, pirq-1)];
}
static int pirq_ali_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
{
- static unsigned char irqmap[16] = { 0, 8, 0, 2, 4, 5, 7, 6, 0, 1, 3, 9, 11, 0, 13, 15 };
+ static const unsigned char irqmap[16] = { 0, 8, 0, 2, 4, 5, 7, 6, 0, 1, 3, 9, 11, 0, 13, 15 };
unsigned int val = irqmap[irq];
if (val) {
@@ -256,13 +256,13 @@ static int pirq_via_set(struct pci_dev *router, struct pci_dev *dev, int pirq, i
*/
static int pirq_via586_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
{
- static unsigned int pirqmap[4] = { 3, 2, 5, 1 };
+ static const unsigned int pirqmap[4] = { 3, 2, 5, 1 };
return read_config_nybble(router, 0x55, pirqmap[pirq-1]);
}
static int pirq_via586_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
{
- static unsigned int pirqmap[4] = { 3, 2, 5, 1 };
+ static const unsigned int pirqmap[4] = { 3, 2, 5, 1 };
write_config_nybble(router, 0x55, pirqmap[pirq-1], irq);
return 1;
}
@@ -274,13 +274,13 @@ static int pirq_via586_set(struct pci_dev *router, struct pci_dev *dev, int pirq
*/
static int pirq_ite_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
{
- static unsigned char pirqmap[4] = { 1, 0, 2, 3 };
+ static const unsigned char pirqmap[4] = { 1, 0, 2, 3 };
return read_config_nybble(router,0x43, pirqmap[pirq-1]);
}
static int pirq_ite_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
{
- static unsigned char pirqmap[4] = { 1, 0, 2, 3 };
+ static const unsigned char pirqmap[4] = { 1, 0, 2, 3 };
write_config_nybble(router, 0x43, pirqmap[pirq-1], irq);
return 1;
}
@@ -505,7 +505,7 @@ static int pirq_bios_set(struct pci_dev *router, struct pci_dev *dev, int pirq,
static __init int intel_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
{
- static struct pci_device_id pirq_440gx[] = {
+ static struct pci_device_id __initdata pirq_440gx[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443GX_0) },
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443GX_2) },
{ },
@@ -880,6 +880,7 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign)
((!(pci_probe & PCI_USE_PIRQ_MASK)) || ((1 << irq) & mask)) ) {
DBG(" -> got IRQ %d\n", irq);
msg = "Found";
+ eisa_set_level_irq(irq);
} else if (newirq && r->set && (dev->class >> 8) != PCI_CLASS_DISPLAY_VGA) {
DBG(" -> assigning IRQ %d", newirq);
if (r->set(pirq_router_dev, dev, pirq, newirq)) {
diff --git a/arch/i386/pci/mmconfig.c b/arch/i386/pci/mmconfig.c
index 6b1ea0c..e545b09 100644
--- a/arch/i386/pci/mmconfig.c
+++ b/arch/i386/pci/mmconfig.c
@@ -15,7 +15,9 @@
#include <asm/e820.h>
#include "pci.h"
-#define MMCONFIG_APER_SIZE (256*1024*1024)
+/* aperture is up to 256MB but BIOS may reserve less */
+#define MMCONFIG_APER_MIN (2 * 1024*1024)
+#define MMCONFIG_APER_MAX (256 * 1024*1024)
/* Assume systems with more busses have correct MCFG */
#define MAX_CHECK_BUS 16
@@ -197,9 +199,10 @@ void __init pci_mmcfg_init(void)
return;
if (!e820_all_mapped(pci_mmcfg_config[0].base_address,
- pci_mmcfg_config[0].base_address + MMCONFIG_APER_SIZE,
+ pci_mmcfg_config[0].base_address + MMCONFIG_APER_MIN,
E820_RESERVED)) {
- printk(KERN_ERR "PCI: BIOS Bug: MCFG area is not E820-reserved\n");
+ printk(KERN_ERR "PCI: BIOS Bug: MCFG area at %x is not E820-reserved\n",
+ pci_mmcfg_config[0].base_address);
printk(KERN_ERR "PCI: Not using MMCONFIG.\n");
return;
}
diff --git a/arch/i386/pci/pcbios.c b/arch/i386/pci/pcbios.c
index 1eec086..ed1512a 100644
--- a/arch/i386/pci/pcbios.c
+++ b/arch/i386/pci/pcbios.c
@@ -371,8 +371,7 @@ void __devinit pcibios_sort(void)
list_for_each(ln, &pci_devices) {
d = pci_dev_g(ln);
if (d->bus->number == bus && d->devfn == devfn) {
- list_del(&d->global_list);
- list_add_tail(&d->global_list, &sorted_devices);
+ list_move_tail(&d->global_list, &sorted_devices);
if (d == dev)
found = 1;
break;
@@ -390,8 +389,7 @@ void __devinit pcibios_sort(void)
if (!found) {
printk(KERN_WARNING "PCI: Device %s not found by BIOS\n",
pci_name(dev));
- list_del(&dev->global_list);
- list_add_tail(&dev->global_list, &sorted_devices);
+ list_move_tail(&dev->global_list, &sorted_devices);
}
}
list_splice(&sorted_devices, &pci_devices);
diff --git a/arch/i386/pci/pci.h b/arch/i386/pci/pci.h
index 12035e2..12bf3d8 100644
--- a/arch/i386/pci/pci.h
+++ b/arch/i386/pci/pci.h
@@ -35,6 +35,7 @@ extern unsigned int pcibios_max_latency;
void pcibios_resource_survey(void);
int pcibios_enable_resources(struct pci_dev *, int);
+void pcibios_disable_resources(struct pci_dev *);
/* pci-pc.c */
diff --git a/arch/i386/power/cpu.c b/arch/i386/power/cpu.c
index 79b2370..e651791 100644
--- a/arch/i386/power/cpu.c
+++ b/arch/i386/power/cpu.c
@@ -10,6 +10,8 @@
#include <linux/config.h>
#include <linux/module.h>
#include <linux/suspend.h>
+#include <asm/mtrr.h>
+#include <asm/mce.h>
static struct saved_context saved_context;
OpenPOWER on IntegriCloud