IFC @ r238370

author: grehan <grehan@FreeBSD.org> 2012-07-11 19:54:21 +0000
committer: grehan <grehan@FreeBSD.org> 2012-07-11 19:54:21 +0000
commit: ede42824618710ffa9ac08c805d8bf39bd5661ce (patch)
tree: 09d40ee855683606d0771d694b0686fedfe68e32 /sys/amd64/include
parent: fc13a01d538ded0843702a871a58cba4147b6037 (diff)
parent: c72304b145d41338bd04103bd7e56b101261ca88 (diff)
download: FreeBSD-src-ede42824618710ffa9ac08c805d8bf39bd5661ce.zip
FreeBSD-src-ede42824618710ffa9ac08c805d8bf39bd5661ce.tar.gz
10 files changed, 104 insertions, 43 deletions
diff --git a/sys/amd64/include/atomic.h b/sys/amd64/include/atomic.h
index e167b54..99a94b7 100644
--- a/sys/amd64/include/atomic.h
+++ b/sys/amd64/include/atomic.h
@@ -81,8 +81,9 @@ int	atomic_cmpset_long(volatile u_long *dst, u_long expect, u_long src);
 u_int	atomic_fetchadd_int(volatile u_int *p, u_int v);
 u_long	atomic_fetchadd_long(volatile u_long *p, u_long v);
 
-#define	ATOMIC_STORE_LOAD(TYPE, LOP, SOP)			\
-u_##TYPE	atomic_load_acq_##TYPE(volatile u_##TYPE *p);	\
+#define	ATOMIC_LOAD(TYPE, LOP)					\
+u_##TYPE	atomic_load_acq_##TYPE(volatile u_##TYPE *p)
+#define	ATOMIC_STORE(TYPE)					\
 void		atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)
 
 #else /* !KLD_MODULE && __GNUCLIKE_ASM */
@@ -210,37 +211,43 @@ atomic_fetchadd_long(volatile u_long *p, u_long v)
 	return (v);
 }
 
-#if defined(_KERNEL) && !defined(SMP)
-
 /*
- * We assume that a = b will do atomic loads and stores.  However, on a
- * PentiumPro or higher, reads may pass writes, so for that case we have
- * to use a serializing instruction (i.e. with LOCK) to do the load in
- * SMP kernels.  For UP kernels, however, the cache of the single processor
- * is always consistent, so we only need to take care of compiler.
+ * We assume that a = b will do atomic loads and stores.  Due to the
+ * IA32 memory model, a simple store guarantees release semantics.
+ *
+ * However, loads may pass stores, so for atomic_load_acq we have to
+ * ensure a Store/Load barrier to do the load in SMP kernels.  We use
+ * "lock cmpxchg" as recommended by the AMD Software Optimization
+ * Guide, and not mfence.  For UP kernels, however, the cache of the
+ * single processor is always consistent, so we only need to take care
+ * of the compiler.
  */
-#define	ATOMIC_STORE_LOAD(TYPE, LOP, SOP)		\
+#define	ATOMIC_STORE(TYPE)				\
+static __inline void					\
+atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\
+{							\
+	__asm __volatile("" : : : "memory");		\
+	*p = v;						\
+}							\
+struct __hack
+
+#if defined(_KERNEL) && !defined(SMP)
+
+#define	ATOMIC_LOAD(TYPE, LOP)				\
 static __inline u_##TYPE				\
 atomic_load_acq_##TYPE(volatile u_##TYPE *p)		\
 {							\
 	u_##TYPE tmp;					\
 							\
 	tmp = *p;					\
-	__asm __volatile ("" : : : "memory");		\
+	__asm __volatile("" : : : "memory");		\
 	return (tmp);					\
 }							\
-							\
-static __inline void					\
-atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\
-{							\
-	__asm __volatile ("" : : : "memory");		\
-	*p = v;						\
-}							\
 struct __hack
 
 #else /* !(_KERNEL && !SMP) */
 
-#define	ATOMIC_STORE_LOAD(TYPE, LOP, SOP)		\
+#define	ATOMIC_LOAD(TYPE, LOP)				\
 static __inline u_##TYPE				\
 atomic_load_acq_##TYPE(volatile u_##TYPE *p)		\
 {							\
@@ -254,19 +261,6 @@ atomic_load_acq_##TYPE(volatile u_##TYPE *p)		\
 							\
 	return (res);					\
 }							\
-							\
-/*							\
- * The XCHG instruction asserts LOCK automagically.	\
- */							\
-static __inline void					\
-atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\
-{							\
-	__asm __volatile(SOP				\
-	: "=m" (*p),			/* 0 */		\
-	  "+r" (v)			/* 1 */		\
-	: "m" (*p)			/* 2 */		\
-	: "memory");					\
-}							\
 struct __hack
 
 #endif /* _KERNEL && !SMP */
@@ -293,13 +287,19 @@ ATOMIC_ASM(clear,    long,  "andq %1,%0",  "ir", ~v);
 ATOMIC_ASM(add,	     long,  "addq %1,%0",  "ir",  v);
 ATOMIC_ASM(subtract, long,  "subq %1,%0",  "ir",  v);
 
-ATOMIC_STORE_LOAD(char,	"cmpxchgb %b0,%1", "xchgb %b1,%0");
-ATOMIC_STORE_LOAD(short,"cmpxchgw %w0,%1", "xchgw %w1,%0");
-ATOMIC_STORE_LOAD(int,	"cmpxchgl %0,%1",  "xchgl %1,%0");
-ATOMIC_STORE_LOAD(long,	"cmpxchgq %0,%1",  "xchgq %1,%0");
+ATOMIC_LOAD(char,  "cmpxchgb %b0,%1");
+ATOMIC_LOAD(short, "cmpxchgw %w0,%1");
+ATOMIC_LOAD(int,   "cmpxchgl %0,%1");
+ATOMIC_LOAD(long,  "cmpxchgq %0,%1");
+
+ATOMIC_STORE(char);
+ATOMIC_STORE(short);
+ATOMIC_STORE(int);
+ATOMIC_STORE(long);
 
 #undef ATOMIC_ASM
-#undef ATOMIC_STORE_LOAD
+#undef ATOMIC_LOAD
+#undef ATOMIC_STORE
 
 #ifndef WANT_FUNCTIONS
 
diff --git a/sys/amd64/include/cpufunc.h b/sys/amd64/include/cpufunc.h
index c616a3e..09d04db 100644
--- a/sys/amd64/include/cpufunc.h
+++ b/sys/amd64/include/cpufunc.h
@@ -107,6 +107,13 @@ clflush(u_long addr)
 }
 
 static __inline void
+clts(void)
+{
+
+	__asm __volatile("clts");
+}
+
+static __inline void
 disable_intr(void)
 {
 	__asm __volatile("cli" : : : "memory");
@@ -273,6 +280,15 @@ outw(u_int port, u_short data)
 	__asm __volatile("outw %0, %w1" : : "a" (data), "Nd" (port));
 }
 
+static __inline u_long
+popcntq(u_long mask)
+{
+	u_long result;
+
+	__asm __volatile("popcntq %1,%0" : "=r" (result) : "rm" (mask));
+	return (result);
+}
+
 static __inline void
 mfence(void)
 {
@@ -409,6 +425,25 @@ rcr4(void)
 	return (data);
 }
 
+static __inline u_long
+rxcr(u_int reg)
+{
+	u_int low, high;
+
+	__asm __volatile("xgetbv" : "=a" (low), "=d" (high) : "c" (reg));
+	return (low | ((uint64_t)high << 32));
+}
+
+static __inline void
+load_xcr(u_int reg, u_long val)
+{
+	u_int low, high;
+
+	low = val;
+	high = val >> 32;
+	__asm __volatile("xsetbv" : : "c" (reg), "a" (low), "d" (high));
+}
+
 /*
  * Global TLB flush (except for thise for pages marked PG_G)
  */
@@ -691,6 +726,9 @@ intr_restore(register_t rflags)
 int	breakpoint(void);
 u_int	bsfl(u_int mask);
 u_int	bsrl(u_int mask);
+void	clflush(u_long addr);
+void	clts(void);
+void	cpuid_count(u_int ax, u_int cx, u_int *p);
 void	disable_intr(void);
 void	do_cpuid(u_int ax, u_int *p);
 void	enable_intr(void);
diff --git a/sys/amd64/include/elf.h b/sys/amd64/include/elf.h
index ded4e44..d69c6b4 100644
--- a/sys/amd64/include/elf.h
+++ b/sys/amd64/include/elf.h
@@ -94,6 +94,7 @@ __ElfType(Auxinfo);
 #define	AT_NCPUS	19	/* Number of CPUs. */
 #define	AT_PAGESIZES	20	/* Pagesizes. */
 #define	AT_PAGESIZESLEN	21	/* Number of pagesizes. */
+#define	AT_TIMEKEEP	22	/* Pointer to timehands. */
 #define	AT_STACKPROT	23	/* Initial stack protection. */
 
 #define	AT_COUNT	24	/* Count of defined aux entry types. */
diff --git a/sys/amd64/include/in_cksum.h b/sys/amd64/include/in_cksum.h
index adfde8f..156035e 100644
--- a/sys/amd64/include/in_cksum.h
+++ b/sys/amd64/include/in_cksum.h
@@ -43,6 +43,7 @@
 
 #define in_cksum(m, len)	in_cksum_skip(m, len, 0)
 
+#if defined(IPVERSION) && (IPVERSION == 4)
 /*
  * It it useful to have an Internet checksum routine which is inlineable
  * and optimized specifically for the task of computing IP header checksums
@@ -69,9 +70,12 @@ in_cksum_update(struct ip *ip)
 	} while(0)
 
 #endif
+#endif
 
 #ifdef _KERNEL
+#if defined(IPVERSION) && (IPVERSION == 4)
 u_int in_cksum_hdr(const struct ip *ip);
+#endif
 u_short	in_addword(u_short sum, u_short b);
 u_short	in_pseudo(u_int sum, u_int b, u_int c);
 u_short	in_cksum_skip(struct mbuf *m, int len, int skip);
diff --git a/sys/amd64/include/intr_machdep.h b/sys/amd64/include/intr_machdep.h
index 700e35f..9d066b1 100644
--- a/sys/amd64/include/intr_machdep.h
+++ b/sys/amd64/include/intr_machdep.h
@@ -140,9 +140,7 @@ int	elcr_probe(void);
 enum intr_trigger elcr_read_trigger(u_int irq);
 void	elcr_resume(void);
 void	elcr_write_trigger(u_int irq, enum intr_trigger trigger);
-#ifdef SMP
 void	intr_add_cpu(u_int cpu);
-#endif
 int	intr_add_handler(const char *name, int vector, driver_filter_t filter, 
 			 driver_intr_t handler, void *arg, enum intr_type flags, 
 			 void **cookiep);    
diff --git a/sys/amd64/include/pcb.h b/sys/amd64/include/pcb.h
index 61f651bb..22cbbe2 100644
--- a/sys/amd64/include/pcb.h
+++ b/sys/amd64/include/pcb.h
@@ -91,9 +91,20 @@ struct pcb {
 	/* local tss, with i/o bitmap; NULL for common */
 	struct amd64tss *pcb_tssp;
 
+	/* model specific registers */
+	register_t	pcb_efer;
+	register_t	pcb_star;
+	register_t	pcb_lstar;
+	register_t	pcb_cstar;
+	register_t	pcb_sfmask;
+	register_t	pcb_xsmask;
+
+	/* fpu context for suspend/resume */
+	void		*pcb_fpususpend;
+
 	struct savefpu	*pcb_save;
 
-	uint64_t	pcb_pad[2];
+	uint64_t	pcb_pad[3];
 };
 
 #ifdef _KERNEL
@@ -130,7 +141,8 @@ clear_pcb_flags(struct pcb *pcb, const u_int flags)
 }
 
 void	makectx(struct trapframe *, struct pcb *);
-int	savectx(struct pcb *);
+int	savectx(struct pcb *) __returns_twice;
+void	resumectx(struct pcb *);
 
 #endif
 
diff --git a/sys/amd64/include/pmap.h b/sys/amd64/include/pmap.h
index 1b8108a..71045ae 100644
--- a/sys/amd64/include/pmap.h
+++ b/sys/amd64/include/pmap.h
@@ -295,7 +295,7 @@ struct pv_chunk {
 	pmap_t			pc_pmap;
 	TAILQ_ENTRY(pv_chunk)	pc_list;
 	uint64_t		pc_map[_NPCM];	/* bitmap; 1 = free */
-	uint64_t		pc_spare[2];
+	TAILQ_ENTRY(pv_chunk)	pc_lru;
 	struct pv_entry		pc_pventry[_NPCPV];
 };
 
@@ -309,6 +309,7 @@ extern vm_offset_t virtual_avail;
 extern vm_offset_t virtual_end;
 
 #define	pmap_page_get_memattr(m)	((vm_memattr_t)(m)->md.pat_mode)
+#define	pmap_page_is_write_mapped(m)	(((m)->aflags & PGA_WRITEABLE) != 0)
 #define	pmap_unmapbios(va, sz)	pmap_unmapdev((va), (sz))
 
 void	pmap_bootstrap(vm_paddr_t *);
diff --git a/sys/amd64/include/smp.h b/sys/amd64/include/smp.h
index de686b7..16d87ea 100644
--- a/sys/amd64/include/smp.h
+++ b/sys/amd64/include/smp.h
@@ -59,6 +59,7 @@ void	cpu_add(u_int apic_id, char boot_cpu);
 void	cpustop_handler(void);
 void	cpususpend_handler(void);
 void	init_secondary(void);
+void	ipi_startup(int apic_id, int vector);
 void	ipi_all_but_self(u_int ipi);
 void 	ipi_bitmap_handler(struct trapframe frame);
 void	ipi_cpu(int cpu, u_int ipi);
diff --git a/sys/amd64/include/vdso.h b/sys/amd64/include/vdso.h
new file mode 100644
index 0000000..b81c455
--- /dev/null
+++ b/sys/amd64/include/vdso.h
@@ -0,0 +1,6 @@
+/*-
+ * This file is in the public domain.
+ */
+/* $FreeBSD$ */
+
+#include <x86/vdso.h>
diff --git a/sys/amd64/include/vmparam.h b/sys/amd64/include/vmparam.h
index 0c65602..e06fa39 100644
--- a/sys/amd64/include/vmparam.h
+++ b/sys/amd64/include/vmparam.h
@@ -54,7 +54,7 @@
  */
 #define	MAXTSIZ		(128UL*1024*1024)	/* max text size */
 #ifndef DFLDSIZ
-#define	DFLDSIZ		(128UL*1024*1024)	/* initial data size limit */
+#define	DFLDSIZ		(32768UL*1024*1024)	/* initial data size limit */
 #endif
 #ifndef MAXDSIZ
 #define	MAXDSIZ		(32768UL*1024*1024)	/* max data size */
author	grehan <grehan@FreeBSD.org>	2012-07-11 19:54:21 +0000
committer	grehan <grehan@FreeBSD.org>	2012-07-11 19:54:21 +0000
commit	ede42824618710ffa9ac08c805d8bf39bd5661ce (patch)
tree	09d40ee855683606d0771d694b0686fedfe68e32 /sys/amd64/include
parent	fc13a01d538ded0843702a871a58cba4147b6037 (diff)
parent	c72304b145d41338bd04103bd7e56b101261ca88 (diff)
download	FreeBSD-src-ede42824618710ffa9ac08c805d8bf39bd5661ce.zip FreeBSD-src-ede42824618710ffa9ac08c805d8bf39bd5661ce.tar.gz