From 68a3a7feb08f960095072f28ec20f7900793c506 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Fri, 7 Apr 2006 19:49:18 +0200
Subject: [PATCH] x86_64: Reserve SRAT hotadd memory on x86-64

From: Keith Mannthey, Andi Kleen

Implement memory hotadd without sparsemem. The memory in the SRAT
hotadd area is just preserved instead and can be activated later.

There are a few restrictions:
- Only one continuous hotadd area allowed per node

The main problem is dealing with the many buggy SRAT tables
that are out there. The strategy here is to reject anything
suspicious.

Originally from Keith Mannthey, with several hacks and changes by AK
and also contributions from Andrew Morton

[ TBD: Problems pointed out by KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>:

 1) Goto's rebuild_zonelist patch will not work if CONFIG_MEMORY_HOTPLUG=n.

    Rebuilding zonelist is necessary when the system has just memory <
    4G at boot, and hot add memory > 4G.  because x86_64 has DMA32,
    ZONE_NORAML is not included into zonelist at boot time if system
    doesn't have memory >4G at boot.

    [AK: should just force the higher zones at boot time when SRAT tells us]

 2) zone and node's spanned_pages and present_pages are not incremented.
    They should be.

    For example, our server (ia64/Fujitsu PrimeQuest) can equip memory
    from 4G to 1T(maybe 2T in future), and SRAT will *always* say we have
    possible 1T +memory.  (Microsoft requires "write all possible memory
    in SRAT") When we reserve memmap for possible 1T memory, Linux will
    not work well in +minimum 4G configuraion ;)

    [AK: needs limiting to 5-10% of max memory]
 ]

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-x86_64/numa.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/asm-x86_64')

diff --git a/include/asm-x86_64/numa.h b/include/asm-x86_64/numa.h
index f6cbb4c..f0ba4d9 100644
--- a/include/asm-x86_64/numa.h
+++ b/include/asm-x86_64/numa.h
@@ -18,6 +18,8 @@ extern void numa_init_array(void);
 extern int numa_off;
 
 extern void numa_set_node(int cpu, int node);
+extern void srat_reserve_add_area(int nodeid);
+extern int hotadd_percent;
 
 extern unsigned char apicid_to_node[256];
 #ifdef CONFIG_NUMA
-- 
cgit v1.1


From eee5a9fa63c97366cdea6ab3aa2ed9e3601812d0 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Fri, 7 Apr 2006 19:49:24 +0200
Subject: [PATCH] x86_64: Rename e820_mapped to e820_any_mapped

Rename e820_mapped to e820_any_mapped since it tests if any part of the
range is mapped according to the type.

Later steps will introduce e820_all_mapped which will check if the
entire range is mapped with the type.  Both have their merit.

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-x86_64/e820.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/asm-x86_64')

diff --git a/include/asm-x86_64/e820.h b/include/asm-x86_64/e820.h
index 8dcc326..4192d0e 100644
--- a/include/asm-x86_64/e820.h
+++ b/include/asm-x86_64/e820.h
@@ -47,7 +47,7 @@ extern void contig_e820_setup(void);
 extern unsigned long e820_end_of_ram(void);
 extern void e820_reserve_resources(void);
 extern void e820_print_map(char *who);
-extern int e820_mapped(unsigned long start, unsigned long end, unsigned type);
+extern int e820_any_mapped(unsigned long start, unsigned long end, unsigned type);
 
 extern void e820_bootmem_free(pg_data_t *pgdat, unsigned long start,unsigned long end);
 extern void e820_setup_gap(void);
-- 
cgit v1.1


From 952223683ec989e86328c24808fdb962c4dbeb0a Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Fri, 7 Apr 2006 19:49:27 +0200
Subject: [PATCH] x86_64: Introduce e820_all_mapped

Introduce a e820_all_mapped() function which checks if the entire range
<start,end> is mapped with type.

This is done by moving the local start variable to the end of each
known-good region; if at the end of the function the start address is
still before end, there must be a part that's not of the correct type;
otherwise it's a good region.

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-x86_64/e820.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/asm-x86_64')

diff --git a/include/asm-x86_64/e820.h b/include/asm-x86_64/e820.h
index 4192d0e..93b51df 100644
--- a/include/asm-x86_64/e820.h
+++ b/include/asm-x86_64/e820.h
@@ -48,6 +48,7 @@ extern unsigned long e820_end_of_ram(void);
 extern void e820_reserve_resources(void);
 extern void e820_print_map(char *who);
 extern int e820_any_mapped(unsigned long start, unsigned long end, unsigned type);
+extern int e820_all_mapped(unsigned long start, unsigned long end, unsigned type);
 
 extern void e820_bootmem_free(pg_data_t *pgdat, unsigned long start,unsigned long end);
 extern void e820_setup_gap(void);
-- 
cgit v1.1


From be56db6186999a8571ae480cf2b929578f6dfd68 Mon Sep 17 00:00:00 2001
From: Dave Hansen <haveblue@us.ibm.com>
Date: Fri, 7 Apr 2006 19:49:54 +0200
Subject: [PATCH] x86_64: extra NODES_SHIFT definition

The generic linux/numa.h file defines NODES_SHIFT to 0 in case
the architecture did not.

Every architecture which has a NUMA config option defines
NODES_SHIFT in its asm-$ARCH headers, but only if NUMA is
enabled, except for x86_64.

This should make it like all the rest.

Signed-off-by: Dave Hansen <haveblue@us.ibm.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-x86_64/numnodes.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/asm-x86_64')

diff --git a/include/asm-x86_64/numnodes.h b/include/asm-x86_64/numnodes.h
index 32be16b..5a1d506 100644
--- a/include/asm-x86_64/numnodes.h
+++ b/include/asm-x86_64/numnodes.h
@@ -5,8 +5,6 @@
 
 #ifdef CONFIG_NUMA
 #define NODES_SHIFT	6
-#else
-#define NODES_SHIFT	0
 #endif
 
 #endif
-- 
cgit v1.1


From 553f265fe883a23502ee351845f09334790f18b8 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Fri, 7 Apr 2006 19:49:57 +0200
Subject: [PATCH] x86_64: Don't run NMI watchdog during machine checks

Machine checks can stall the machine for a long time and
it's not good to trigger the nmi watchdog during that.

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-x86_64/mce.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/asm-x86_64')

diff --git a/include/asm-x86_64/mce.h b/include/asm-x86_64/mce.h
index 5d298b7..7229785 100644
--- a/include/asm-x86_64/mce.h
+++ b/include/asm-x86_64/mce.h
@@ -70,6 +70,9 @@ struct mce_log {
 #define MCE_THRESHOLD_BASE      MCE_EXTENDED_BANK + 1 /* MCE_AMD */
 #define MCE_THRESHOLD_DRAM_ECC  MCE_THRESHOLD_BASE + 4
 
+#ifdef __KERNEL__
+#include <asm/atomic.h>
+
 void mce_log(struct mce *m);
 #ifdef CONFIG_X86_MCE_INTEL
 void mce_intel_feature_init(struct cpuinfo_x86 *c);
@@ -87,4 +90,8 @@ static inline void mce_amd_feature_init(struct cpuinfo_x86 *c)
 }
 #endif
 
+extern atomic_t mce_entry;
+
+#endif
+
 #endif
-- 
cgit v1.1


From b20367a6c2a0cd937cb1f0a8cf848f1402fef99c Mon Sep 17 00:00:00 2001
From: Jordan Hargrave <jordan_hargrave@dell.com>
Date: Fri, 7 Apr 2006 19:50:18 +0200
Subject: [PATCH] x86_64: Fix drift with HPET timer enabled

If the HPET timer is enabled, the clock can drift by ~3 seconds a day.
This is due to the HPET timer not being initialized with the correct
setting (still using PIT count).

If HZ changes, this drift can become even more pronounced.

HPET patch initializes tick_nsec with correct tick_nsec settings for
HPET timer.

Vojtech comments:

  "It's not entirely correct (it assumes the HPET ticks totally
   exactly), but it's significantly better than assuming the PIT error
   there."

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-x86_64/hpet.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/asm-x86_64')

diff --git a/include/asm-x86_64/hpet.h b/include/asm-x86_64/hpet.h
index 08b75c1..18ff7ee 100644
--- a/include/asm-x86_64/hpet.h
+++ b/include/asm-x86_64/hpet.h
@@ -51,6 +51,8 @@
 
 #define HPET_TN_ROUTE_SHIFT	9
 
+#define HPET_TICK_RATE (HZ * 100000UL)
+
 extern int is_hpet_enabled(void);
 extern int hpet_rtc_timer_init(void);
 extern int oem_force_hpet_timer(void);
-- 
cgit v1.1


From bbd3aff89d4b34ef17a748e4c001ecc5b43e3e55 Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Fri, 7 Apr 2006 19:50:28 +0200
Subject: [PATCH] x86_64: fix CONFIG_REORDER

Fix CONFIG_REORDER.

The value of cflags-y was assined to CFLAGS before cflags-y was assigned
the value used for CONFIG_REORDER.

Use cflags-y for all CFLAGS options in the Makefile to avoid this
happening again.

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-x86_64/ia32_unistd.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/asm-x86_64')

diff --git a/include/asm-x86_64/ia32_unistd.h b/include/asm-x86_64/ia32_unistd.h
index eeb2bcd..34ad297 100644
--- a/include/asm-x86_64/ia32_unistd.h
+++ b/include/asm-x86_64/ia32_unistd.h
@@ -317,6 +317,6 @@
 #define __NR_ia32_ppoll			309
 #define __NR_ia32_unshare		310
 
-#define IA32_NR_syscalls 315	/* must be > than biggest syscall! */
+#define IA32_NR_syscalls 311	/* must be > than biggest syscall! */
 
 #endif /* _ASM_X86_64_IA32_UNISTD_H_ */
-- 
cgit v1.1


From 67d53ea5a3d42aadeb1584e757ca4660c0e8a810 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Fri, 7 Apr 2006 19:50:31 +0200
Subject: [PATCH] x86_64: Eliminate IA32_NR_syscalls define

Or rather compute it based on the table length automatically.

This also has the intended side effect of not warning for new system calls
anymore.

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-x86_64/ia32_unistd.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/asm-x86_64')

diff --git a/include/asm-x86_64/ia32_unistd.h b/include/asm-x86_64/ia32_unistd.h
index 34ad297..b4f4b17 100644
--- a/include/asm-x86_64/ia32_unistd.h
+++ b/include/asm-x86_64/ia32_unistd.h
@@ -317,6 +317,4 @@
 #define __NR_ia32_ppoll			309
 #define __NR_ia32_unshare		310
 
-#define IA32_NR_syscalls 311	/* must be > than biggest syscall! */
-
 #endif /* _ASM_X86_64_IA32_UNISTD_H_ */
-- 
cgit v1.1


From c80d79d746cc48bd94b0ce4f6d4f3c90cd403aaf Mon Sep 17 00:00:00 2001
From: Yasunori Goto <y-goto@jp.fujitsu.com>
Date: Mon, 10 Apr 2006 22:53:53 -0700
Subject: [PATCH] Configurable NODES_SHIFT

Current implementations define NODES_SHIFT in include/asm-xxx/numnodes.h for
each arch.  Its definition is sometimes configurable.  Indeed, ia64 defines 5
NODES_SHIFT values in the current git tree.  But it looks a bit messy.

SGI-SN2(ia64) system requires 1024 nodes, and the number of nodes already has
been changeable by config.  Suitable node's number may be changed in the
future even if it is other architecture.  So, I wrote configurable node's
number.

This patch set defines just default value for each arch which needs multi
nodes except ia64.  But, it is easy to change to configurable if necessary.

On ia64 the number of nodes can be already configured in generic ia64 and SN2
config.  But, NODES_SHIFT is defined for DIG64 and HP'S machine too.  So, I
changed it so that all platforms can be configured via CONFIG_NODES_SHIFT.  It
would be simpler.

See also: http://marc.theaimsgroup.com/?l=linux-kernel&m=114358010523896&w=2

Signed-off-by: Yasunori Goto <y-goto@jp.fujitsu.com>
Cc: Hirokazu Takata <takata@linux-m32r.org>
Cc: "Luck, Tony" <tony.luck@intel.com>
Cc: Andi Kleen <ak@muc.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Cc: Russell King <rmk@arm.linux.org.uk>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Jack Steiner <steiner@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-x86_64/numa.h     |  1 -
 include/asm-x86_64/numnodes.h | 10 ----------
 2 files changed, 11 deletions(-)
 delete mode 100644 include/asm-x86_64/numnodes.h

(limited to 'include/asm-x86_64')

diff --git a/include/asm-x86_64/numa.h b/include/asm-x86_64/numa.h
index f0ba4d9..1cc92fe 100644
--- a/include/asm-x86_64/numa.h
+++ b/include/asm-x86_64/numa.h
@@ -2,7 +2,6 @@
 #define _ASM_X8664_NUMA_H 1
 
 #include <linux/nodemask.h>
-#include <asm/numnodes.h>
 
 struct bootnode {
 	u64 start,end; 
diff --git a/include/asm-x86_64/numnodes.h b/include/asm-x86_64/numnodes.h
deleted file mode 100644
index 5a1d506..0000000
--- a/include/asm-x86_64/numnodes.h
+++ /dev/null
@@ -1,10 +0,0 @@
-#ifndef _ASM_X8664_NUMNODES_H
-#define _ASM_X8664_NUMNODES_H 1
-
-#include <linux/config.h>
-
-#ifdef CONFIG_NUMA
-#define NODES_SHIFT	6
-#endif
-
-#endif
-- 
cgit v1.1


From e4cff6ac78e9c3bbb90c0e01b20418eeae0c6b52 Mon Sep 17 00:00:00 2001
From: "Siddha, Suresh B" <suresh.b.siddha@intel.com>
Date: Tue, 11 Apr 2006 12:54:42 +0200
Subject: [PATCH] x86_64: fix sync before RDTSC on Intel cpus

Commit c818a18146997d1356a4840b0c01f1168c16c8a4 didn't do the expected
thing.  This fix will remove the additional sync(cpuid) before RDTSC on
Intel platforms..

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-x86_64/timex.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/asm-x86_64')

diff --git a/include/asm-x86_64/timex.h b/include/asm-x86_64/timex.h
index f18443f..b9e5320 100644
--- a/include/asm-x86_64/timex.h
+++ b/include/asm-x86_64/timex.h
@@ -33,7 +33,7 @@ static __always_inline cycles_t get_cycles_sync(void)
 	unsigned eax;
 	/* Don't do an additional sync on CPUs where we know
 	   RDTSC is already synchronous. */
-	alternative_io(ASM_NOP2, "cpuid", X86_FEATURE_SYNC_RDTSC,
+	alternative_io("cpuid", ASM_NOP2, X86_FEATURE_SYNC_RDTSC,
 			  "=a" (eax), "0" (1) : "ebx","ecx","edx","memory");
 	rdtscll(ret);
 	return ret;
-- 
cgit v1.1


From cde227afe6b997dce08bcfc2aa6e373fb56857b0 Mon Sep 17 00:00:00 2001
From: "mao, bibo" <bibo.mao@intel.com>
Date: Tue, 11 Apr 2006 12:54:54 +0200
Subject: [PATCH] x86_64: inline function prefix with __always_inline in
 vsyscall

In vsyscall function do_vgettimeofday(), some functions are declared as
inlined, which is a hint for gcc to compile the function inlined but it
not forced.  Sometimes compiler does not compile the function as
inlined, so here inline is replaced by __always_inline prefix.

It does not happen in gcc compiler actually, but it possibly happens.

Signed-off-by: bibo mao <bibo.mao@intel.com>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-x86_64/io.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/asm-x86_64')

diff --git a/include/asm-x86_64/io.h b/include/asm-x86_64/io.h
index cafdfb3..a05da8a 100644
--- a/include/asm-x86_64/io.h
+++ b/include/asm-x86_64/io.h
@@ -177,7 +177,7 @@ static inline __u16 __readw(const volatile void __iomem *addr)
 {
 	return *(__force volatile __u16 *)addr;
 }
-static inline __u32 __readl(const volatile void __iomem *addr)
+static __always_inline __u32 __readl(const volatile void __iomem *addr)
 {
 	return *(__force volatile __u32 *)addr;
 }
-- 
cgit v1.1


From 70524490ee2ea1bbf6cee6c106597b3ac25a3fc2 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@suse.de>
Date: Tue, 11 Apr 2006 15:51:17 +0200
Subject: [PATCH] splice: add support for sys_tee()

Basically an in-kernel implementation of tee, which uses splice and the
pipe buffers as an intelligent way to pass data around by reference.

Where the user space tee consumes the input and produces a stdout and
file output, this syscall merely duplicates the data inside a pipe to
another pipe. No data is copied, the output just grabs a reference to the
input pipe data.

Signed-off-by: Jens Axboe <axboe@suse.de>
---
 include/asm-x86_64/unistd.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/asm-x86_64')

diff --git a/include/asm-x86_64/unistd.h b/include/asm-x86_64/unistd.h
index f21ff2c..d86494e 100644
--- a/include/asm-x86_64/unistd.h
+++ b/include/asm-x86_64/unistd.h
@@ -611,8 +611,10 @@ __SYSCALL(__NR_set_robust_list, sys_set_robust_list)
 __SYSCALL(__NR_get_robust_list, sys_get_robust_list)
 #define __NR_splice		275
 __SYSCALL(__NR_splice, sys_splice)
+#define __NR_tee		276
+__SYSCALL(__NR_tee, sys_tee)
 
-#define __NR_syscall_max __NR_splice
+#define __NR_syscall_max __NR_tee
 
 #ifndef __NO_STUBS
 
-- 
cgit v1.1


From 6fa679fdea22cd96287d4aa11ee771bcd46c6dfb Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Tue, 18 Apr 2006 12:35:16 +0200
Subject: [PATCH] x86_64: Increase NUMA hash function nodemap

Needed for some big Opteron systems to compute a numa hash function
They have more than 12 bits significant address.

TBD switch this over to dynamic allocation or use better hash

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-x86_64/mmzone.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/asm-x86_64')

diff --git a/include/asm-x86_64/mmzone.h b/include/asm-x86_64/mmzone.h
index 6b18cd8..6944e71 100644
--- a/include/asm-x86_64/mmzone.h
+++ b/include/asm-x86_64/mmzone.h
@@ -12,7 +12,8 @@
 
 #include <asm/smp.h>
 
-#define NODEMAPSIZE 0xfff
+/* Should really switch to dynamic allocation at some point */
+#define NODEMAPSIZE 0x4fff
 
 /* Simple perfect hash to map physical addresses to node numbers */
 struct memnode {
-- 
cgit v1.1


From f1233ab2cebb22a98df55de206a33a6693e3a78b Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Tue, 18 Apr 2006 12:35:19 +0200
Subject: [PATCH] x86_64: Add tee and sync_file_range

tee was already there for some reason for native 64bit, but
sys_sync_file_range was missing. Also add it to the compat layer.

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-x86_64/unistd.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/asm-x86_64')

diff --git a/include/asm-x86_64/unistd.h b/include/asm-x86_64/unistd.h
index d86494e..98c36ea 100644
--- a/include/asm-x86_64/unistd.h
+++ b/include/asm-x86_64/unistd.h
@@ -613,8 +613,10 @@ __SYSCALL(__NR_get_robust_list, sys_get_robust_list)
 __SYSCALL(__NR_splice, sys_splice)
 #define __NR_tee		276
 __SYSCALL(__NR_tee, sys_tee)
+#define __NR_sync_file_range	277
+__SYSCALL(__NR_sync_file_range, sys_sync_file_range)
 
-#define __NR_syscall_max __NR_tee
+#define __NR_syscall_max __NR_sync_file_range
 
 #ifndef __NO_STUBS
 
-- 
cgit v1.1


From 676ff453e58c5ff7ddbfebf5a11142e3e4add161 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Tue, 18 Apr 2006 22:20:21 -0700
Subject: [PATCH] for_each_possible_cpu: x86_64

for_each_cpu() actually iterates across all possible CPUs.  We've had
mistakes in the past where people were using for_each_cpu() where they
should have been iterating across only online or present CPUs.  This is
inefficient and possibly buggy.

We're renaming for_each_cpu() to for_each_possible_cpu() to avoid this
in the future.

This patch replaces for_each_cpu with for_each_possible_cpu.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: Andi Kleen <ak@muc.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-x86_64/percpu.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/asm-x86_64')

diff --git a/include/asm-x86_64/percpu.h b/include/asm-x86_64/percpu.h
index 4405b4a..7f33aaf 100644
--- a/include/asm-x86_64/percpu.h
+++ b/include/asm-x86_64/percpu.h
@@ -26,7 +26,7 @@
 #define percpu_modcopy(pcpudst, src, size)			\
 do {								\
 	unsigned int __i;					\
-	for_each_cpu(__i)					\
+	for_each_possible_cpu(__i)				\
 		memcpy((pcpudst)+__per_cpu_offset(__i),		\
 		       (src), (size));				\
 } while (0)
-- 
cgit v1.1


From 18bd057b1408cd110ed23281533430cfc2d52091 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Thu, 20 Apr 2006 02:36:45 +0200
Subject: [PATCH] i386/x86-64: Fix x87 information leak between processes

AMD K7/K8 CPUs only save/restore the FOP/FIP/FDP x87 registers in FXSAVE
when an exception is pending.  This means the value leak through
context switches and allow processes to observe some x87 instruction
state of other processes.

This was actually documented by AMD, but nobody recognized it as
being different from Intel before.

The fix first adds an optimization: instead of unconditionally
calling FNCLEX after each FXSAVE test if ES is pending and skip
it when not needed. Then do a x87 load from a kernel variable to
clear FOP/FIP/FDP.

This means other processes always will only see a constant value
defined by the kernel in their FP state.

I took some pain to make sure to chose a variable that's already
in L1 during context switch to make the overhead of this low.

Also alternative() is used to patch away the new code on CPUs
who don't need it.

Patch for both i386/x86-64.

The problem was discovered originally by Jan Beulich. Richard
Brunner provided the basic code for the workarounds, with contribution
from Jan.

This is CVE-2006-1056

Cc: richard.brunner@amd.com
Cc: jbeulich@novell.com

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-x86_64/cpufeature.h |  1 +
 include/asm-x86_64/i387.h       | 20 +++++++++++++++++++-
 2 files changed, 20 insertions(+), 1 deletion(-)

(limited to 'include/asm-x86_64')

diff --git a/include/asm-x86_64/cpufeature.h b/include/asm-x86_64/cpufeature.h
index 76bb619..662964b 100644
--- a/include/asm-x86_64/cpufeature.h
+++ b/include/asm-x86_64/cpufeature.h
@@ -64,6 +64,7 @@
 #define X86_FEATURE_REP_GOOD	(3*32+ 4) /* rep microcode works well on this CPU */
 #define X86_FEATURE_CONSTANT_TSC (3*32+5) /* TSC runs at constant rate */
 #define X86_FEATURE_SYNC_RDTSC  (3*32+6)  /* RDTSC syncs CPU core */
+#define X86_FEATURE_FXSAVE_LEAK (3*32+7)  /* FIP/FOP/FDP leaks through FXSAVE */
 
 /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
 #define X86_FEATURE_XMM3	(4*32+ 0) /* Streaming SIMD Extensions-3 */
diff --git a/include/asm-x86_64/i387.h b/include/asm-x86_64/i387.h
index 876eb9a..cba8a3b 100644
--- a/include/asm-x86_64/i387.h
+++ b/include/asm-x86_64/i387.h
@@ -72,6 +72,23 @@ extern int set_fpregs(struct task_struct *tsk,
 #define set_fpu_swd(t,val) ((t)->thread.i387.fxsave.swd = (val))
 #define set_fpu_fxsr_twd(t,val) ((t)->thread.i387.fxsave.twd = (val))
 
+#define X87_FSW_ES (1 << 7)	/* Exception Summary */
+
+/* AMD CPUs don't save/restore FDP/FIP/FOP unless an exception
+   is pending. Clear the x87 state here by setting it to fixed
+   values. The kernel data segment can be sometimes 0 and sometimes
+   new user value. Both should be ok.
+   Use the PDA as safe address because it should be already in L1. */
+static inline void clear_fpu_state(struct i387_fxsave_struct *fx)
+{
+	if (unlikely(fx->swd & X87_FSW_ES))
+		 asm volatile("fnclex");
+	alternative_input(ASM_NOP8 ASM_NOP2,
+	     	     "    emms\n"		/* clear stack tags */
+	     	     "    fildl %%gs:0",	/* load to clear state */
+		     X86_FEATURE_FXSAVE_LEAK);
+}
+
 static inline int restore_fpu_checking(struct i387_fxsave_struct *fx) 
 { 
 	int err;
@@ -119,6 +136,7 @@ static inline int save_i387_checking(struct i387_fxsave_struct __user *fx)
 #endif
 	if (unlikely(err))
 		__clear_user(fx, sizeof(struct i387_fxsave_struct));
+	/* No need to clear here because the caller clears USED_MATH */
 	return err;
 } 
 
@@ -149,7 +167,7 @@ static inline void __fxsave_clear(struct task_struct *tsk)
 				"i" (offsetof(__typeof__(*tsk),
 					      thread.i387.fxsave)));
 #endif
-	__asm__ __volatile__("fnclex");
+	clear_fpu_state(&tsk->thread.i387.fxsave);
 }
 
 static inline void kernel_fpu_begin(void)
-- 
cgit v1.1


From 0b699e36b2d43c1b4288992683e5913d347b5b78 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Thu, 20 Apr 2006 02:36:48 +0200
Subject: [PATCH] x86_64: bring back __read_mostly support to linux-2.6.17-rc2

It seems latest kernel has a wrong/missing __read_mostly implementation
for x86_64

__read_mostly macro should be declared outside of #if CONFIG_X86_VSMP block

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-x86_64/cache.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/asm-x86_64')

diff --git a/include/asm-x86_64/cache.h b/include/asm-x86_64/cache.h
index c8043a1..f8dff1c 100644
--- a/include/asm-x86_64/cache.h
+++ b/include/asm-x86_64/cache.h
@@ -20,8 +20,8 @@
        __attribute__((__section__(".data.page_aligned")))
 #endif
 
-#define __read_mostly __attribute__((__section__(".data.read_mostly")))
-
 #endif
 
+#define __read_mostly __attribute__((__section__(".data.read_mostly")))
+
 #endif
-- 
cgit v1.1


From 912d35f86781e64d73be1ef358f703c08905ac37 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@suse.de>
Date: Wed, 26 Apr 2006 10:59:21 +0200
Subject: [PATCH] Add support for the sys_vmsplice syscall

sys_splice() moves data to/from pipes with a file input/output. sys_vmsplice()
moves data to a pipe, with the input being a user address range instead.

This uses an approach suggested by Linus, where we can hold partial ranges
inside the pages[] map. Hopefully this will be useful for network
receive support as well.

Signed-off-by: Jens Axboe <axboe@suse.de>
---
 include/asm-x86_64/unistd.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/asm-x86_64')

diff --git a/include/asm-x86_64/unistd.h b/include/asm-x86_64/unistd.h
index 98c36ea..feb77cb 100644
--- a/include/asm-x86_64/unistd.h
+++ b/include/asm-x86_64/unistd.h
@@ -615,8 +615,10 @@ __SYSCALL(__NR_splice, sys_splice)
 __SYSCALL(__NR_tee, sys_tee)
 #define __NR_sync_file_range	277
 __SYSCALL(__NR_sync_file_range, sys_sync_file_range)
+#define __NR_vmsplice		278
+__SYSCALL(__NR_vmsplice, sys_vmsplice)
 
-#define __NR_syscall_max __NR_sync_file_range
+#define __NR_syscall_max __NR_vmsplice
 
 #ifndef __NO_STUBS
 
-- 
cgit v1.1