From 0ccc8b7ac86053388e793bad20bd26bd777752eb Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 20 Mar 2014 08:55:00 +0100 Subject: s390/bitops,atomic: add missing memory barriers When reworking the bitops and atomic ops I missed that those instructions that got atomic behaviour only perform a "specific-operand-serialization" instead of a full "serialization". The compare-and-swap instruction used before performs a full serialization before and after the instruction is executed, which means it has full memory barrier semantics. In order to give the new bitops and atomic ops functions also full memory barrier semantics add a "bcr 14,0" before and after each of those new instructions which performs full serialization as well. This restores memory barrier semantics for bitops and atomic ops functions which return values, like e.g. atomic_add_return(), but not for functions which do not return a value, like e.g. atomic_add(). This is consistent to other architectures and what common code requires. Cc: stable@vger.kernel.org # v3.13+ Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/atomic.h | 70 +++++++++++++++++++++++++----------------- arch/s390/include/asm/bitops.h | 41 ++++++++++++++----------- 2 files changed, 65 insertions(+), 46 deletions(-) (limited to 'arch/s390/include') diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h index fa9aaf7..1d47061 100644 --- a/arch/s390/include/asm/atomic.h +++ b/arch/s390/include/asm/atomic.h @@ -15,23 +15,29 @@ #include #include +#include #include #define ATOMIC_INIT(i) { (i) } +#define __ATOMIC_NO_BARRIER "\n" + #ifdef CONFIG_HAVE_MARCH_Z196_FEATURES #define __ATOMIC_OR "lao" #define __ATOMIC_AND "lan" #define __ATOMIC_ADD "laa" +#define __ATOMIC_BARRIER "bcr 14,0\n" -#define __ATOMIC_LOOP(ptr, op_val, op_string) \ +#define __ATOMIC_LOOP(ptr, op_val, op_string, __barrier) \ ({ \ int old_val; \ \ typecheck(atomic_t *, ptr); \ asm volatile( \ + __barrier \ op_string " %0,%2,%1\n" \ + __barrier \ : "=d" (old_val), "+Q" ((ptr)->counter) \ : "d" (op_val) \ : "cc", "memory"); \ @@ -43,8 +49,9 @@ #define __ATOMIC_OR "or" #define __ATOMIC_AND "nr" #define __ATOMIC_ADD "ar" +#define __ATOMIC_BARRIER "\n" -#define __ATOMIC_LOOP(ptr, op_val, op_string) \ +#define __ATOMIC_LOOP(ptr, op_val, op_string, __barrier) \ ({ \ int old_val, new_val; \ \ @@ -82,7 +89,7 @@ static inline void atomic_set(atomic_t *v, int i) static inline int atomic_add_return(int i, atomic_t *v) { - return __ATOMIC_LOOP(v, i, __ATOMIC_ADD) + i; + return __ATOMIC_LOOP(v, i, __ATOMIC_ADD, __ATOMIC_BARRIER) + i; } static inline void atomic_add(int i, atomic_t *v) @@ -94,12 +101,10 @@ static inline void atomic_add(int i, atomic_t *v) : "+Q" (v->counter) : "i" (i) : "cc", "memory"); - } else { - atomic_add_return(i, v); + return; } -#else - atomic_add_return(i, v); #endif + __ATOMIC_LOOP(v, i, __ATOMIC_ADD, __ATOMIC_NO_BARRIER); } #define atomic_add_negative(_i, _v) (atomic_add_return(_i, _v) < 0) @@ -115,12 +120,12 @@ static inline void atomic_add(int i, atomic_t *v) static inline void atomic_clear_mask(unsigned int mask, atomic_t *v) { - __ATOMIC_LOOP(v, ~mask, __ATOMIC_AND); + __ATOMIC_LOOP(v, ~mask, __ATOMIC_AND, __ATOMIC_NO_BARRIER); } static inline void atomic_set_mask(unsigned int mask, atomic_t *v) { - __ATOMIC_LOOP(v, mask, __ATOMIC_OR); + __ATOMIC_LOOP(v, mask, __ATOMIC_OR, __ATOMIC_NO_BARRIER); } #define atomic_xchg(v, new) (xchg(&((v)->counter), new)) @@ -157,19 +162,24 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u) #ifdef CONFIG_64BIT +#define __ATOMIC64_NO_BARRIER "\n" + #ifdef CONFIG_HAVE_MARCH_Z196_FEATURES #define __ATOMIC64_OR "laog" #define __ATOMIC64_AND "lang" #define __ATOMIC64_ADD "laag" +#define __ATOMIC64_BARRIER "bcr 14,0\n" -#define __ATOMIC64_LOOP(ptr, op_val, op_string) \ +#define __ATOMIC64_LOOP(ptr, op_val, op_string, __barrier) \ ({ \ long long old_val; \ \ typecheck(atomic64_t *, ptr); \ asm volatile( \ + __barrier \ op_string " %0,%2,%1\n" \ + __barrier \ : "=d" (old_val), "+Q" ((ptr)->counter) \ : "d" (op_val) \ : "cc", "memory"); \ @@ -181,8 +191,9 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u) #define __ATOMIC64_OR "ogr" #define __ATOMIC64_AND "ngr" #define __ATOMIC64_ADD "agr" +#define __ATOMIC64_BARRIER "\n" -#define __ATOMIC64_LOOP(ptr, op_val, op_string) \ +#define __ATOMIC64_LOOP(ptr, op_val, op_string, __barrier) \ ({ \ long long old_val, new_val; \ \ @@ -220,17 +231,32 @@ static inline void atomic64_set(atomic64_t *v, long long i) static inline long long atomic64_add_return(long long i, atomic64_t *v) { - return __ATOMIC64_LOOP(v, i, __ATOMIC64_ADD) + i; + return __ATOMIC64_LOOP(v, i, __ATOMIC64_ADD, __ATOMIC64_BARRIER) + i; +} + +static inline void atomic64_add(long long i, atomic64_t *v) +{ +#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES + if (__builtin_constant_p(i) && (i > -129) && (i < 128)) { + asm volatile( + "agsi %0,%1\n" + : "+Q" (v->counter) + : "i" (i) + : "cc", "memory"); + return; + } +#endif + __ATOMIC64_LOOP(v, i, __ATOMIC64_ADD, __ATOMIC64_NO_BARRIER); } static inline void atomic64_clear_mask(unsigned long mask, atomic64_t *v) { - __ATOMIC64_LOOP(v, ~mask, __ATOMIC64_AND); + __ATOMIC64_LOOP(v, ~mask, __ATOMIC64_AND, __ATOMIC64_NO_BARRIER); } static inline void atomic64_set_mask(unsigned long mask, atomic64_t *v) { - __ATOMIC64_LOOP(v, mask, __ATOMIC64_OR); + __ATOMIC64_LOOP(v, mask, __ATOMIC64_OR, __ATOMIC64_NO_BARRIER); } #define atomic64_xchg(v, new) (xchg(&((v)->counter), new)) @@ -334,25 +360,13 @@ static inline void atomic64_clear_mask(unsigned long long mask, atomic64_t *v) } while (atomic64_cmpxchg(v, old, new) != old); } -#endif /* CONFIG_64BIT */ - static inline void atomic64_add(long long i, atomic64_t *v) { -#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES - if (__builtin_constant_p(i) && (i > -129) && (i < 128)) { - asm volatile( - "agsi %0,%1\n" - : "+Q" (v->counter) - : "i" (i) - : "cc", "memory"); - } else { - atomic64_add_return(i, v); - } -#else atomic64_add_return(i, v); -#endif } +#endif /* CONFIG_64BIT */ + static inline int atomic64_add_unless(atomic64_t *v, long long i, long long u) { long long c, old; diff --git a/arch/s390/include/asm/bitops.h b/arch/s390/include/asm/bitops.h index ec5ef89..5205424 100644 --- a/arch/s390/include/asm/bitops.h +++ b/arch/s390/include/asm/bitops.h @@ -47,14 +47,18 @@ #include #include +#include + +#define __BITOPS_NO_BARRIER "\n" #ifndef CONFIG_64BIT #define __BITOPS_OR "or" #define __BITOPS_AND "nr" #define __BITOPS_XOR "xr" +#define __BITOPS_BARRIER "\n" -#define __BITOPS_LOOP(__addr, __val, __op_string) \ +#define __BITOPS_LOOP(__addr, __val, __op_string, __barrier) \ ({ \ unsigned long __old, __new; \ \ @@ -67,7 +71,7 @@ " jl 0b" \ : "=&d" (__old), "=&d" (__new), "+Q" (*(__addr))\ : "d" (__val) \ - : "cc"); \ + : "cc", "memory"); \ __old; \ }) @@ -78,17 +82,20 @@ #define __BITOPS_OR "laog" #define __BITOPS_AND "lang" #define __BITOPS_XOR "laxg" +#define __BITOPS_BARRIER "bcr 14,0\n" -#define __BITOPS_LOOP(__addr, __val, __op_string) \ +#define __BITOPS_LOOP(__addr, __val, __op_string, __barrier) \ ({ \ unsigned long __old; \ \ typecheck(unsigned long *, (__addr)); \ asm volatile( \ + __barrier \ __op_string " %0,%2,%1\n" \ + __barrier \ : "=d" (__old), "+Q" (*(__addr)) \ : "d" (__val) \ - : "cc"); \ + : "cc", "memory"); \ __old; \ }) @@ -97,8 +104,9 @@ #define __BITOPS_OR "ogr" #define __BITOPS_AND "ngr" #define __BITOPS_XOR "xgr" +#define __BITOPS_BARRIER "\n" -#define __BITOPS_LOOP(__addr, __val, __op_string) \ +#define __BITOPS_LOOP(__addr, __val, __op_string, __barrier) \ ({ \ unsigned long __old, __new; \ \ @@ -111,7 +119,7 @@ " jl 0b" \ : "=&d" (__old), "=&d" (__new), "+Q" (*(__addr))\ : "d" (__val) \ - : "cc"); \ + : "cc", "memory"); \ __old; \ }) @@ -149,12 +157,12 @@ static inline void set_bit(unsigned long nr, volatile unsigned long *ptr) "oi %0,%b1\n" : "+Q" (*caddr) : "i" (1 << (nr & 7)) - : "cc"); + : "cc", "memory"); return; } #endif mask = 1UL << (nr & (BITS_PER_LONG - 1)); - __BITOPS_LOOP(addr, mask, __BITOPS_OR); + __BITOPS_LOOP(addr, mask, __BITOPS_OR, __BITOPS_NO_BARRIER); } static inline void clear_bit(unsigned long nr, volatile unsigned long *ptr) @@ -170,12 +178,12 @@ static inline void clear_bit(unsigned long nr, volatile unsigned long *ptr) "ni %0,%b1\n" : "+Q" (*caddr) : "i" (~(1 << (nr & 7))) - : "cc"); + : "cc", "memory"); return; } #endif mask = ~(1UL << (nr & (BITS_PER_LONG - 1))); - __BITOPS_LOOP(addr, mask, __BITOPS_AND); + __BITOPS_LOOP(addr, mask, __BITOPS_AND, __BITOPS_NO_BARRIER); } static inline void change_bit(unsigned long nr, volatile unsigned long *ptr) @@ -191,12 +199,12 @@ static inline void change_bit(unsigned long nr, volatile unsigned long *ptr) "xi %0,%b1\n" : "+Q" (*caddr) : "i" (1 << (nr & 7)) - : "cc"); + : "cc", "memory"); return; } #endif mask = 1UL << (nr & (BITS_PER_LONG - 1)); - __BITOPS_LOOP(addr, mask, __BITOPS_XOR); + __BITOPS_LOOP(addr, mask, __BITOPS_XOR, __BITOPS_NO_BARRIER); } static inline int @@ -206,8 +214,7 @@ test_and_set_bit(unsigned long nr, volatile unsigned long *ptr) unsigned long old, mask; mask = 1UL << (nr & (BITS_PER_LONG - 1)); - old = __BITOPS_LOOP(addr, mask, __BITOPS_OR); - barrier(); + old = __BITOPS_LOOP(addr, mask, __BITOPS_OR, __BITOPS_BARRIER); return (old & mask) != 0; } @@ -218,8 +225,7 @@ test_and_clear_bit(unsigned long nr, volatile unsigned long *ptr) unsigned long old, mask; mask = ~(1UL << (nr & (BITS_PER_LONG - 1))); - old = __BITOPS_LOOP(addr, mask, __BITOPS_AND); - barrier(); + old = __BITOPS_LOOP(addr, mask, __BITOPS_AND, __BITOPS_BARRIER); return (old & ~mask) != 0; } @@ -230,8 +236,7 @@ test_and_change_bit(unsigned long nr, volatile unsigned long *ptr) unsigned long old, mask; mask = 1UL << (nr & (BITS_PER_LONG - 1)); - old = __BITOPS_LOOP(addr, mask, __BITOPS_XOR); - barrier(); + old = __BITOPS_LOOP(addr, mask, __BITOPS_XOR, __BITOPS_BARRIER); return (old & mask) != 0; } -- cgit v1.1 From 072c2790294210419db287995628406e270bf027 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Wed, 2 Apr 2014 14:06:10 +0200 Subject: s390/irq: Add defines for external interruption codes Introduce defines for external interruption codes so that we can get rid of some "magic" numbers in the s390 source code. Signed-off-by: Thomas Huth Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/irq.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'arch/s390/include') diff --git a/arch/s390/include/asm/irq.h b/arch/s390/include/asm/irq.h index 5f8bcc5..58c1ea6 100644 --- a/arch/s390/include/asm/irq.h +++ b/arch/s390/include/asm/irq.h @@ -16,6 +16,19 @@ /* This number is used when no interrupt has been assigned */ #define NO_IRQ 0 +/* External interruption codes */ +#define EXT_IRQ_INTERRUPT_KEY 0x0040 +#define EXT_IRQ_CLK_COMP 0x1004 +#define EXT_IRQ_CPU_TIMER 0x1005 +#define EXT_IRQ_WARNING_TRACK 0x1007 +#define EXT_IRQ_MALFUNC_ALERT 0x1200 +#define EXT_IRQ_EMERGENCY_SIG 0x1201 +#define EXT_IRQ_EXTERNAL_CALL 0x1202 +#define EXT_IRQ_TIMING_ALERT 0x1406 +#define EXT_IRQ_MEASURE_ALERT 0x1407 +#define EXT_IRQ_SERVICE_SIG 0x2401 +#define EXT_IRQ_IUCV 0x4000 + #ifndef __ASSEMBLY__ #include -- cgit v1.1 From 1dad093b66fdd4fd5d7d2692169dc1bafd794628 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Mon, 31 Mar 2014 15:24:08 +0200 Subject: s390/irq: Use defines for external interruption codes Use the new defines for external interruption codes to get rid of "magic" numbers in the s390 source code. And while we're at it, also rename the (un-)register_external_interrupt function to something shorter so that this patch does not exceed the 80 columns all over the place. Signed-off-by: Thomas Huth Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/irq.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/s390/include') diff --git a/arch/s390/include/asm/irq.h b/arch/s390/include/asm/irq.h index 58c1ea6..763ccdc 100644 --- a/arch/s390/include/asm/irq.h +++ b/arch/s390/include/asm/irq.h @@ -27,6 +27,7 @@ #define EXT_IRQ_TIMING_ALERT 0x1406 #define EXT_IRQ_MEASURE_ALERT 0x1407 #define EXT_IRQ_SERVICE_SIG 0x2401 +#define EXT_IRQ_CP_SERVICE 0x2603 #define EXT_IRQ_IUCV 0x4000 #ifndef __ASSEMBLY__ @@ -89,8 +90,8 @@ struct ext_code { typedef void (*ext_int_handler_t)(struct ext_code, unsigned int, unsigned long); -int register_external_interrupt(u16 code, ext_int_handler_t handler); -int unregister_external_interrupt(u16 code, ext_int_handler_t handler); +int register_external_irq(u16 code, ext_int_handler_t handler); +int unregister_external_irq(u16 code, ext_int_handler_t handler); enum irq_subclass { IRQ_SUBCLASS_MEASUREMENT_ALERT = 5, -- cgit v1.1 From 02a8f3abb708919149cb657a5202f4603f0c38e2 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Thu, 3 Apr 2014 13:54:59 +0200 Subject: s390/mm,tlb: safeguard against speculative TLB creation The principles of operations states that the CPU is allowed to create TLB entries for an address space anytime while an ASCE is loaded to the control register. This is true even if the CPU is running in the kernel and the user address space is not (actively) accessed. In theory this can affect two aspects of the TLB flush logic. For full-mm flushes the ASCE of the dying process is still attached. The approach to flush first with IDTE and then just free all page tables can in theory lead to stale TLB entries. Use the batched free of page tables for the full-mm flushes as well. For operations that can have a stale ASCE in the control register, e.g. a delayed update_user_asce in switch_mm, load the kernel ASCE to prevent invalid TLBs from being created. Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/mmu_context.h | 17 +++++++++++++---- arch/s390/include/asm/tlb.h | 14 +++----------- 2 files changed, 16 insertions(+), 15 deletions(-) (limited to 'arch/s390/include') diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index 38149b6..7abf318 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h @@ -35,7 +35,7 @@ static inline int init_new_context(struct task_struct *tsk, #define LCTL_OPCODE "lctlg" #endif -static inline void update_mm(struct mm_struct *mm, struct task_struct *tsk) +static inline void update_user_asce(struct mm_struct *mm) { pgd_t *pgd = mm->pgd; @@ -45,6 +45,13 @@ static inline void update_mm(struct mm_struct *mm, struct task_struct *tsk) set_fs(current->thread.mm_segment); } +static inline void clear_user_asce(struct mm_struct *mm) +{ + S390_lowcore.user_asce = S390_lowcore.kernel_asce; + asm volatile(LCTL_OPCODE" 1,1,%0\n" : : "m" (S390_lowcore.user_asce)); + asm volatile(LCTL_OPCODE" 7,7,%0\n" : : "m" (S390_lowcore.user_asce)); +} + static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk) { @@ -53,11 +60,13 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, if (prev == next) return; if (atomic_inc_return(&next->context.attach_count) >> 16) { - /* Delay update_mm until all TLB flushes are done. */ + /* Delay update_user_asce until all TLB flushes are done. */ set_tsk_thread_flag(tsk, TIF_TLB_WAIT); + /* Clear old ASCE by loading the kernel ASCE. */ + clear_user_asce(next); } else { cpumask_set_cpu(cpu, mm_cpumask(next)); - update_mm(next, tsk); + update_user_asce(next); if (next->context.flush_mm) /* Flush pending TLBs */ __tlb_flush_mm(next); @@ -80,7 +89,7 @@ static inline void finish_arch_post_lock_switch(void) cpu_relax(); cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm)); - update_mm(mm, tsk); + update_user_asce(mm); if (mm->context.flush_mm) __tlb_flush_mm(mm); preempt_enable(); diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h index 2cb846c..c544b6f 100644 --- a/arch/s390/include/asm/tlb.h +++ b/arch/s390/include/asm/tlb.h @@ -57,8 +57,6 @@ static inline void tlb_gather_mmu(struct mmu_gather *tlb, tlb->end = end; tlb->fullmm = !(start | (end+1)); tlb->batch = NULL; - if (tlb->fullmm) - __tlb_flush_mm(mm); } static inline void tlb_flush_mmu(struct mmu_gather *tlb) @@ -96,9 +94,7 @@ static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page) static inline void pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, unsigned long address) { - if (!tlb->fullmm) - return page_table_free_rcu(tlb, (unsigned long *) pte); - page_table_free(tlb->mm, (unsigned long *) pte); + page_table_free_rcu(tlb, (unsigned long *) pte); } /* @@ -114,9 +110,7 @@ static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd, #ifdef CONFIG_64BIT if (tlb->mm->context.asce_limit <= (1UL << 31)) return; - if (!tlb->fullmm) - return tlb_remove_table(tlb, pmd); - crst_table_free(tlb->mm, (unsigned long *) pmd); + tlb_remove_table(tlb, pmd); #endif } @@ -133,9 +127,7 @@ static inline void pud_free_tlb(struct mmu_gather *tlb, pud_t *pud, #ifdef CONFIG_64BIT if (tlb->mm->context.asce_limit <= (1UL << 42)) return; - if (!tlb->fullmm) - return tlb_remove_table(tlb, pud); - crst_table_free(tlb->mm, (unsigned long *) pud); + tlb_remove_table(tlb, pud); #endif } -- cgit v1.1 From 1b948d6caec4f28e3524244ca0f77c6ae8ddceef Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Thu, 3 Apr 2014 13:55:01 +0200 Subject: s390/mm,tlb: optimize TLB flushing for zEC12 The zEC12 machines introduced the local-clearing control for the IDTE and IPTE instruction. If the control is set only the TLB of the local CPU is cleared of entries, either all entries of a single address space for IDTE, or the entry for a single page-table entry for IPTE. Without the local-clearing control the TLB flush is broadcasted to all CPUs in the configuration, which is expensive. The reset of the bit mask of the CPUs that need flushing after a non-local IDTE is tricky. As TLB entries for an address space remain in the TLB even if the address space is detached a new bit field is required to keep track of attached CPUs vs. CPUs in the need of a flush. After a non-local flush with IDTE the bit-field of attached CPUs is copied to the bit-field of CPUs in need of a flush. The ordering of operations on cpu_attach_mask, attach_count and mm_cpumask(mm) is such that an underindication in mm_cpumask(mm) is prevented but an overindication in mm_cpumask(mm) is possible. Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/mmu.h | 2 + arch/s390/include/asm/mmu_context.h | 5 ++ arch/s390/include/asm/pgtable.h | 128 ++++++++++++++++++++++++++---------- arch/s390/include/asm/setup.h | 3 + arch/s390/include/asm/tlbflush.h | 115 ++++++++++++++++++++++++++------ 5 files changed, 197 insertions(+), 56 deletions(-) (limited to 'arch/s390/include') diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h index ff132ac..f77695a 100644 --- a/arch/s390/include/asm/mmu.h +++ b/arch/s390/include/asm/mmu.h @@ -1,9 +1,11 @@ #ifndef __MMU_H #define __MMU_H +#include #include typedef struct { + cpumask_t cpu_attach_mask; atomic_t attach_count; unsigned int flush_mm; spinlock_t list_lock; diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index 7abf318..71a2588 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h @@ -15,6 +15,7 @@ static inline int init_new_context(struct task_struct *tsk, struct mm_struct *mm) { + cpumask_clear(&mm->context.cpu_attach_mask); atomic_set(&mm->context.attach_count, 0); mm->context.flush_mm = 0; mm->context.asce_bits = _ASCE_TABLE_LENGTH | _ASCE_USER_BITS; @@ -59,6 +60,8 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, if (prev == next) return; + if (MACHINE_HAS_TLB_LC) + cpumask_set_cpu(cpu, &next->context.cpu_attach_mask); if (atomic_inc_return(&next->context.attach_count) >> 16) { /* Delay update_user_asce until all TLB flushes are done. */ set_tsk_thread_flag(tsk, TIF_TLB_WAIT); @@ -73,6 +76,8 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, } atomic_dec(&prev->context.attach_count); WARN_ON(atomic_read(&prev->context.attach_count) < 0); + if (MACHINE_HAS_TLB_LC) + cpumask_clear_cpu(cpu, &prev->context.cpu_attach_mask); } #define finish_arch_post_lock_switch finish_arch_post_lock_switch diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 1ab75ea..66d5183 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -1068,12 +1068,35 @@ static inline void __ptep_ipte(unsigned long address, pte_t *ptep) : "=m" (*ptep) : "m" (*ptep), "a" (pto), "a" (address)); } +static inline void __ptep_ipte_local(unsigned long address, pte_t *ptep) +{ + unsigned long pto = (unsigned long) ptep; + +#ifndef CONFIG_64BIT + /* pto in ESA mode must point to the start of the segment table */ + pto &= 0x7ffffc00; +#endif + /* Invalidation + local TLB flush for the pte */ + asm volatile( + " .insn rrf,0xb2210000,%2,%3,0,1" + : "=m" (*ptep) : "m" (*ptep), "a" (pto), "a" (address)); +} + static inline void ptep_flush_direct(struct mm_struct *mm, unsigned long address, pte_t *ptep) { + int active, count; + if (pte_val(*ptep) & _PAGE_INVALID) return; - __ptep_ipte(address, ptep); + active = (mm == current->active_mm) ? 1 : 0; + count = atomic_add_return(0x10000, &mm->context.attach_count); + if (MACHINE_HAS_TLB_LC && (count & 0xffff) <= active && + cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) + __ptep_ipte_local(address, ptep); + else + __ptep_ipte(address, ptep); + atomic_sub(0x10000, &mm->context.attach_count); } static inline void ptep_flush_lazy(struct mm_struct *mm, @@ -1382,35 +1405,6 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address) #define pte_offset_map(pmd, address) pte_offset_kernel(pmd, address) #define pte_unmap(pte) do { } while (0) -static inline void __pmd_idte(unsigned long address, pmd_t *pmdp) -{ - unsigned long sto = (unsigned long) pmdp - - pmd_index(address) * sizeof(pmd_t); - - if (!(pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID)) { - asm volatile( - " .insn rrf,0xb98e0000,%2,%3,0,0" - : "=m" (*pmdp) - : "m" (*pmdp), "a" (sto), - "a" ((address & HPAGE_MASK)) - : "cc" - ); - } -} - -static inline void __pmd_csp(pmd_t *pmdp) -{ - register unsigned long reg2 asm("2") = pmd_val(*pmdp); - register unsigned long reg3 asm("3") = pmd_val(*pmdp) | - _SEGMENT_ENTRY_INVALID; - register unsigned long reg4 asm("4") = ((unsigned long) pmdp) + 5; - - asm volatile( - " csp %1,%3" - : "=m" (*pmdp) - : "d" (reg2), "d" (reg3), "d" (reg4), "m" (*pmdp) : "cc"); -} - #if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLB_PAGE) static inline unsigned long massage_pgprot_pmd(pgprot_t pgprot) { @@ -1479,18 +1473,80 @@ static inline pmd_t pmd_mkwrite(pmd_t pmd) } #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLB_PAGE */ +static inline void __pmdp_csp(pmd_t *pmdp) +{ + register unsigned long reg2 asm("2") = pmd_val(*pmdp); + register unsigned long reg3 asm("3") = pmd_val(*pmdp) | + _SEGMENT_ENTRY_INVALID; + register unsigned long reg4 asm("4") = ((unsigned long) pmdp) + 5; + + asm volatile( + " csp %1,%3" + : "=m" (*pmdp) + : "d" (reg2), "d" (reg3), "d" (reg4), "m" (*pmdp) : "cc"); +} + +static inline void __pmdp_idte(unsigned long address, pmd_t *pmdp) +{ + unsigned long sto; + + sto = (unsigned long) pmdp - pmd_index(address) * sizeof(pmd_t); + asm volatile( + " .insn rrf,0xb98e0000,%2,%3,0,0" + : "=m" (*pmdp) + : "m" (*pmdp), "a" (sto), "a" ((address & HPAGE_MASK)) + : "cc" ); +} + +static inline void __pmdp_idte_local(unsigned long address, pmd_t *pmdp) +{ + unsigned long sto; + + sto = (unsigned long) pmdp - pmd_index(address) * sizeof(pmd_t); + asm volatile( + " .insn rrf,0xb98e0000,%2,%3,0,1" + : "=m" (*pmdp) + : "m" (*pmdp), "a" (sto), "a" ((address & HPAGE_MASK)) + : "cc" ); +} + +static inline void pmdp_flush_direct(struct mm_struct *mm, + unsigned long address, pmd_t *pmdp) +{ + int active, count; + + if (pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID) + return; + if (!MACHINE_HAS_IDTE) { + __pmdp_csp(pmdp); + return; + } + active = (mm == current->active_mm) ? 1 : 0; + count = atomic_add_return(0x10000, &mm->context.attach_count); + if (MACHINE_HAS_TLB_LC && (count & 0xffff) <= active && + cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) + __pmdp_idte_local(address, pmdp); + else + __pmdp_idte(address, pmdp); + atomic_sub(0x10000, &mm->context.attach_count); +} + static inline void pmdp_flush_lazy(struct mm_struct *mm, unsigned long address, pmd_t *pmdp) { int active, count; + if (pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID) + return; active = (mm == current->active_mm) ? 1 : 0; count = atomic_add_return(0x10000, &mm->context.attach_count); if ((count & 0xffff) <= active) { pmd_val(*pmdp) |= _SEGMENT_ENTRY_INVALID; mm->context.flush_mm = 1; - } else - __pmd_idte(address, pmdp); + } else if (MACHINE_HAS_IDTE) + __pmdp_idte(address, pmdp); + else + __pmdp_csp(pmdp); atomic_sub(0x10000, &mm->context.attach_count); } @@ -1543,7 +1599,7 @@ static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma, pmd_t pmd; pmd = *pmdp; - __pmd_idte(address, pmdp); + pmdp_flush_direct(vma->vm_mm, address, pmdp); *pmdp = pmd_mkold(pmd); return pmd_young(pmd); } @@ -1554,7 +1610,7 @@ static inline pmd_t pmdp_get_and_clear(struct mm_struct *mm, { pmd_t pmd = *pmdp; - __pmd_idte(address, pmdp); + pmdp_flush_direct(mm, address, pmdp); pmd_clear(pmdp); return pmd; } @@ -1570,7 +1626,7 @@ static inline pmd_t pmdp_clear_flush(struct vm_area_struct *vma, static inline void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp) { - __pmd_idte(address, pmdp); + pmdp_flush_direct(vma->vm_mm, address, pmdp); } #define __HAVE_ARCH_PMDP_SET_WRPROTECT @@ -1580,7 +1636,7 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm, pmd_t pmd = *pmdp; if (pmd_write(pmd)) { - __pmd_idte(address, pmdp); + pmdp_flush_direct(mm, address, pmdp); set_pmd_at(mm, address, pmdp, pmd_wrprotect(pmd)); } } diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index 406f3a1..b31b22d 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -68,6 +68,7 @@ void create_mem_hole(struct mem_chunk mem_chunk[], unsigned long addr, #define MACHINE_FLAG_TOPOLOGY (1UL << 14) #define MACHINE_FLAG_TE (1UL << 15) #define MACHINE_FLAG_RRBM (1UL << 16) +#define MACHINE_FLAG_TLB_LC (1UL << 17) #define MACHINE_IS_VM (S390_lowcore.machine_flags & MACHINE_FLAG_VM) #define MACHINE_IS_KVM (S390_lowcore.machine_flags & MACHINE_FLAG_KVM) @@ -90,6 +91,7 @@ void create_mem_hole(struct mem_chunk mem_chunk[], unsigned long addr, #define MACHINE_HAS_TOPOLOGY (0) #define MACHINE_HAS_TE (0) #define MACHINE_HAS_RRBM (0) +#define MACHINE_HAS_TLB_LC (0) #else /* CONFIG_64BIT */ #define MACHINE_HAS_IEEE (1) #define MACHINE_HAS_CSP (1) @@ -102,6 +104,7 @@ void create_mem_hole(struct mem_chunk mem_chunk[], unsigned long addr, #define MACHINE_HAS_TOPOLOGY (S390_lowcore.machine_flags & MACHINE_FLAG_TOPOLOGY) #define MACHINE_HAS_TE (S390_lowcore.machine_flags & MACHINE_FLAG_TE) #define MACHINE_HAS_RRBM (S390_lowcore.machine_flags & MACHINE_FLAG_RRBM) +#define MACHINE_HAS_TLB_LC (S390_lowcore.machine_flags & MACHINE_FLAG_TLB_LC) #endif /* CONFIG_64BIT */ /* diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h index f9fef04..16c9c88 100644 --- a/arch/s390/include/asm/tlbflush.h +++ b/arch/s390/include/asm/tlbflush.h @@ -7,19 +7,41 @@ #include /* - * Flush all tlb entries on the local cpu. + * Flush all TLB entries on the local CPU. */ static inline void __tlb_flush_local(void) { asm volatile("ptlb" : : : "memory"); } -#ifdef CONFIG_SMP /* - * Flush all tlb entries on all cpus. + * Flush TLB entries for a specific ASCE on all CPUs */ +static inline void __tlb_flush_idte(unsigned long asce) +{ + /* Global TLB flush for the mm */ + asm volatile( + " .insn rrf,0xb98e0000,0,%0,%1,0" + : : "a" (2048), "a" (asce) : "cc"); +} + +/* + * Flush TLB entries for a specific ASCE on the local CPU + */ +static inline void __tlb_flush_idte_local(unsigned long asce) +{ + /* Local TLB flush for the mm */ + asm volatile( + " .insn rrf,0xb98e0000,0,%0,%1,1" + : : "a" (2048), "a" (asce) : "cc"); +} + +#ifdef CONFIG_SMP void smp_ptlb_all(void); +/* + * Flush all TLB entries on all CPUs. + */ static inline void __tlb_flush_global(void) { register unsigned long reg2 asm("2"); @@ -42,36 +64,89 @@ static inline void __tlb_flush_global(void) : : "d" (reg2), "d" (reg3), "d" (reg4), "m" (dummy) : "cc" ); } +/* + * Flush TLB entries for a specific mm on all CPUs (in case gmap is used + * this implicates multiple ASCEs!). + */ static inline void __tlb_flush_full(struct mm_struct *mm) { - cpumask_t local_cpumask; - preempt_disable(); - /* - * If the process only ran on the local cpu, do a local flush. - */ - cpumask_copy(&local_cpumask, cpumask_of(smp_processor_id())); - if (cpumask_equal(mm_cpumask(mm), &local_cpumask)) + atomic_add(0x10000, &mm->context.attach_count); + if (cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) { + /* Local TLB flush */ __tlb_flush_local(); - else + } else { + /* Global TLB flush */ __tlb_flush_global(); + /* Reset TLB flush mask */ + if (MACHINE_HAS_TLB_LC) + cpumask_copy(mm_cpumask(mm), + &mm->context.cpu_attach_mask); + } + atomic_sub(0x10000, &mm->context.attach_count); preempt_enable(); } + +/* + * Flush TLB entries for a specific ASCE on all CPUs. + */ +static inline void __tlb_flush_asce(struct mm_struct *mm, unsigned long asce) +{ + int active, count; + + preempt_disable(); + active = (mm == current->active_mm) ? 1 : 0; + count = atomic_add_return(0x10000, &mm->context.attach_count); + if (MACHINE_HAS_TLB_LC && (count & 0xffff) <= active && + cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) { + __tlb_flush_idte_local(asce); + } else { + if (MACHINE_HAS_IDTE) + __tlb_flush_idte(asce); + else + __tlb_flush_global(); + /* Reset TLB flush mask */ + if (MACHINE_HAS_TLB_LC) + cpumask_copy(mm_cpumask(mm), + &mm->context.cpu_attach_mask); + } + atomic_sub(0x10000, &mm->context.attach_count); + preempt_enable(); +} + +static inline void __tlb_flush_kernel(void) +{ + if (MACHINE_HAS_IDTE) + __tlb_flush_idte((unsigned long) init_mm.pgd | + init_mm.context.asce_bits); + else + __tlb_flush_global(); +} #else -#define __tlb_flush_full(mm) __tlb_flush_local() #define __tlb_flush_global() __tlb_flush_local() -#endif +#define __tlb_flush_full(mm) __tlb_flush_local() /* - * Flush all tlb entries of a page table on all cpus. + * Flush TLB entries for a specific ASCE on all CPUs. */ -static inline void __tlb_flush_idte(unsigned long asce) +static inline void __tlb_flush_asce(struct mm_struct *mm, unsigned long asce) { - asm volatile( - " .insn rrf,0xb98e0000,0,%0,%1,0" - : : "a" (2048), "a" (asce) : "cc" ); + if (MACHINE_HAS_TLB_LC) + __tlb_flush_idte_local(asce); + else + __tlb_flush_local(); } +static inline void __tlb_flush_kernel(void) +{ + if (MACHINE_HAS_TLB_LC) + __tlb_flush_idte_local((unsigned long) init_mm.pgd | + init_mm.context.asce_bits); + else + __tlb_flush_local(); +} +#endif + static inline void __tlb_flush_mm(struct mm_struct * mm) { /* @@ -80,7 +155,7 @@ static inline void __tlb_flush_mm(struct mm_struct * mm) * only ran on the local cpu. */ if (MACHINE_HAS_IDTE && list_empty(&mm->context.gmap_list)) - __tlb_flush_idte((unsigned long) mm->pgd | + __tlb_flush_asce(mm, (unsigned long) mm->pgd | mm->context.asce_bits); else __tlb_flush_full(mm); @@ -130,7 +205,7 @@ static inline void flush_tlb_range(struct vm_area_struct *vma, static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end) { - __tlb_flush_mm(&init_mm); + __tlb_flush_kernel(); } #endif /* _S390_TLBFLUSH_H */ -- cgit v1.1 From 457f2180951cdcbfb4657ddcc83b486e93497f56 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 21 Mar 2014 10:42:25 +0100 Subject: s390/uaccess: rework uaccess code - fix locking issues The current uaccess code uses a page table walk in some circumstances, e.g. in case of the in atomic futex operations or if running on old hardware which doesn't support the mvcos instruction. However it turned out that the page table walk code does not correctly lock page tables when accessing page table entries. In other words: a different cpu may invalidate a page table entry while the current cpu inspects the pte. This may lead to random data corruption. Adding correct locking however isn't trivial for all uaccess operations. Especially copy_in_user() is problematic since that requires to hold at least two locks, but must be protected against ABBA deadlock when a different cpu also performs a copy_in_user() operation. So the solution is a different approach where we change address spaces: User space runs in primary address mode, or access register mode within vdso code, like it currently already does. The kernel usually also runs in home space mode, however when accessing user space the kernel switches to primary or secondary address mode if the mvcos instruction is not available or if a compare-and-swap (futex) instruction on a user space address is performed. KVM however is special, since that requires the kernel to run in home address space while implicitly accessing user space with the sie instruction. So we end up with: User space: - runs in primary or access register mode - cr1 contains the user asce - cr7 contains the user asce - cr13 contains the kernel asce Kernel space: - runs in home space mode - cr1 contains the user or kernel asce -> the kernel asce is loaded when a uaccess requires primary or secondary address mode - cr7 contains the user or kernel asce, (changed with set_fs()) - cr13 contains the kernel asce In case of uaccess the kernel changes to: - primary space mode in case of a uaccess (copy_to_user) and uses e.g. the mvcp instruction to access user space. However the kernel will stay in home space mode if the mvcos instruction is available - secondary space mode in case of futex atomic operations, so that the instructions come from primary address space and data from secondary space In case of kvm the kernel runs in home space mode, but cr1 gets switched to contain the gmap asce before the sie instruction gets executed. When the sie instruction is finished cr1 will be switched back to contain the user asce. A context switch between two processes will always load the kernel asce for the next process in cr1. So the first exit to user space is a bit more expensive (one extra load control register instruction) than before, however keeps the code rather simple. In sum this means there is no need to perform any error prone page table walks anymore when accessing user space. The patch seems to be rather large, however it mainly removes the the page table walk code and restores the previously deleted "standard" uaccess code, with a couple of changes. The uaccess without mvcos mode can be enforced with the "uaccess_primary" kernel parameter. Reported-by: Christian Borntraeger Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/futex.h | 66 ++++++++++++++++++++++++++++++++++--- arch/s390/include/asm/mmu_context.h | 37 ++++++++++++--------- arch/s390/include/asm/switch_to.h | 1 + arch/s390/include/asm/thread_info.h | 2 ++ arch/s390/include/asm/uaccess.h | 2 -- 5 files changed, 86 insertions(+), 22 deletions(-) (limited to 'arch/s390/include') diff --git a/arch/s390/include/asm/futex.h b/arch/s390/include/asm/futex.h index fda46bd..69cf5b5 100644 --- a/arch/s390/include/asm/futex.h +++ b/arch/s390/include/asm/futex.h @@ -1,12 +1,25 @@ #ifndef _ASM_S390_FUTEX_H #define _ASM_S390_FUTEX_H -#include #include +#include +#include #include -int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval, u32 newval); -int __futex_atomic_op_inuser(int op, u32 __user *uaddr, int oparg, int *old); +#define __futex_atomic_op(insn, ret, oldval, newval, uaddr, oparg) \ + asm volatile( \ + " sacf 256\n" \ + "0: l %1,0(%6)\n" \ + "1:"insn \ + "2: cs %1,%2,0(%6)\n" \ + "3: jl 1b\n" \ + " lhi %0,0\n" \ + "4: sacf 768\n" \ + EX_TABLE(0b,4b) EX_TABLE(2b,4b) EX_TABLE(3b,4b) \ + : "=d" (ret), "=&d" (oldval), "=&d" (newval), \ + "=m" (*uaddr) \ + : "0" (-EFAULT), "d" (oparg), "a" (uaddr), \ + "m" (*uaddr) : "cc"); static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) { @@ -14,13 +27,37 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) int cmp = (encoded_op >> 24) & 15; int oparg = (encoded_op << 8) >> 20; int cmparg = (encoded_op << 20) >> 20; - int oldval, ret; + int oldval = 0, newval, ret; + update_primary_asce(current); if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) oparg = 1 << oparg; pagefault_disable(); - ret = __futex_atomic_op_inuser(op, uaddr, oparg, &oldval); + switch (op) { + case FUTEX_OP_SET: + __futex_atomic_op("lr %2,%5\n", + ret, oldval, newval, uaddr, oparg); + break; + case FUTEX_OP_ADD: + __futex_atomic_op("lr %2,%1\nar %2,%5\n", + ret, oldval, newval, uaddr, oparg); + break; + case FUTEX_OP_OR: + __futex_atomic_op("lr %2,%1\nor %2,%5\n", + ret, oldval, newval, uaddr, oparg); + break; + case FUTEX_OP_ANDN: + __futex_atomic_op("lr %2,%1\nnr %2,%5\n", + ret, oldval, newval, uaddr, oparg); + break; + case FUTEX_OP_XOR: + __futex_atomic_op("lr %2,%1\nxr %2,%5\n", + ret, oldval, newval, uaddr, oparg); + break; + default: + ret = -ENOSYS; + } pagefault_enable(); if (!ret) { @@ -37,4 +74,23 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) return ret; } +static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, + u32 oldval, u32 newval) +{ + int ret; + + update_primary_asce(current); + asm volatile( + " sacf 256\n" + "0: cs %1,%4,0(%5)\n" + "1: la %0,0\n" + "2: sacf 768\n" + EX_TABLE(0b,2b) EX_TABLE(1b,2b) + : "=d" (ret), "+d" (oldval), "=m" (*uaddr) + : "0" (-EFAULT), "d" (newval), "a" (uaddr), "m" (*uaddr) + : "cc", "memory"); + *uval = oldval; + return ret; +} + #endif /* _ASM_S390_FUTEX_H */ diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index 71a2588..71be346 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h @@ -30,27 +30,33 @@ static inline int init_new_context(struct task_struct *tsk, #define destroy_context(mm) do { } while (0) -#ifndef CONFIG_64BIT -#define LCTL_OPCODE "lctl" -#else -#define LCTL_OPCODE "lctlg" -#endif - -static inline void update_user_asce(struct mm_struct *mm) +static inline void update_user_asce(struct mm_struct *mm, int load_primary) { pgd_t *pgd = mm->pgd; S390_lowcore.user_asce = mm->context.asce_bits | __pa(pgd); - /* Load primary space page table origin. */ - asm volatile(LCTL_OPCODE" 1,1,%0\n" : : "m" (S390_lowcore.user_asce)); + if (load_primary) + __ctl_load(S390_lowcore.user_asce, 1, 1); set_fs(current->thread.mm_segment); } -static inline void clear_user_asce(struct mm_struct *mm) +static inline void clear_user_asce(struct mm_struct *mm, int load_primary) { S390_lowcore.user_asce = S390_lowcore.kernel_asce; - asm volatile(LCTL_OPCODE" 1,1,%0\n" : : "m" (S390_lowcore.user_asce)); - asm volatile(LCTL_OPCODE" 7,7,%0\n" : : "m" (S390_lowcore.user_asce)); + + if (load_primary) + __ctl_load(S390_lowcore.user_asce, 1, 1); + __ctl_load(S390_lowcore.user_asce, 7, 7); +} + +static inline void update_primary_asce(struct task_struct *tsk) +{ + unsigned long asce; + + __ctl_store(asce, 1, 1); + if (asce != S390_lowcore.kernel_asce) + __ctl_load(S390_lowcore.kernel_asce, 1, 1); + set_tsk_thread_flag(tsk, TIF_ASCE); } static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, @@ -58,6 +64,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, { int cpu = smp_processor_id(); + update_primary_asce(tsk); if (prev == next) return; if (MACHINE_HAS_TLB_LC) @@ -66,10 +73,10 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, /* Delay update_user_asce until all TLB flushes are done. */ set_tsk_thread_flag(tsk, TIF_TLB_WAIT); /* Clear old ASCE by loading the kernel ASCE. */ - clear_user_asce(next); + clear_user_asce(next, 0); } else { cpumask_set_cpu(cpu, mm_cpumask(next)); - update_user_asce(next); + update_user_asce(next, 0); if (next->context.flush_mm) /* Flush pending TLBs */ __tlb_flush_mm(next); @@ -94,7 +101,7 @@ static inline void finish_arch_post_lock_switch(void) cpu_relax(); cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm)); - update_user_asce(mm); + update_user_asce(mm, 0); if (mm->context.flush_mm) __tlb_flush_mm(mm); preempt_enable(); diff --git a/arch/s390/include/asm/switch_to.h b/arch/s390/include/asm/switch_to.h index 29c81f8..e759181 100644 --- a/arch/s390/include/asm/switch_to.h +++ b/arch/s390/include/asm/switch_to.h @@ -132,6 +132,7 @@ static inline void restore_access_regs(unsigned int *acrs) update_cr_regs(next); \ } \ prev = __switch_to(prev,next); \ + update_primary_asce(current); \ } while (0) #define finish_arch_switch(prev) do { \ diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h index 3ccd71b..50630e6 100644 --- a/arch/s390/include/asm/thread_info.h +++ b/arch/s390/include/asm/thread_info.h @@ -82,6 +82,7 @@ static inline struct thread_info *current_thread_info(void) #define TIF_SIGPENDING 2 /* signal pending */ #define TIF_NEED_RESCHED 3 /* rescheduling necessary */ #define TIF_TLB_WAIT 4 /* wait for TLB flush completion */ +#define TIF_ASCE 5 /* primary asce needs fixup / uaccess */ #define TIF_PER_TRAP 6 /* deliver sigtrap on return to user */ #define TIF_MCCK_PENDING 7 /* machine check handling is pending */ #define TIF_SYSCALL_TRACE 8 /* syscall trace active */ @@ -99,6 +100,7 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_SIGPENDING (1<