From 9bd721c55c8a886b938a45198aab0ccb52f1f7fa Mon Sep 17 00:00:00 2001 From: Jason Low Date: Fri, 13 Sep 2013 11:26:52 -0700 Subject: sched/balancing: Consider max cost of idle balance per sched domain In this patch, we keep track of the max cost we spend doing idle load balancing for each sched domain. If the avg time the CPU remains idle is less then the time we have already spent on idle balancing + the max cost of idle balancing in the sched domain, then we don't continue to attempt the balance. We also keep a per rq variable, max_idle_balance_cost, which keeps track of the max time spent on newidle load balances throughout all its domains so that we can determine the avg_idle's max value. By using the max, we avoid overrunning the average. This further reduces the chance we attempt balancing when the CPU is not idle for longer than the cost to balance. Signed-off-by: Jason Low Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1379096813-3032-3-git-send-email-jason.low2@hp.com Signed-off-by: Ingo Molnar --- arch/metag/include/asm/topology.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/metag/include/asm/topology.h b/arch/metag/include/asm/topology.h index 23f5118..db19292 100644 --- a/arch/metag/include/asm/topology.h +++ b/arch/metag/include/asm/topology.h @@ -26,6 +26,7 @@ .last_balance = jiffies, \ .balance_interval = 1, \ .nr_balance_failed = 0, \ + .max_newidle_lb_cost = 0, \ } #define cpu_to_node(cpu) ((void)(cpu), 0) -- cgit v1.1 From f48627e686a69f5215cb0761e731edb3d9859dd9 Mon Sep 17 00:00:00 2001 From: Jason Low Date: Fri, 13 Sep 2013 11:26:53 -0700 Subject: sched/balancing: Periodically decay max cost of idle balance This patch builds on patch 2 and periodically decays that max value to do idle balancing per sched domain by approximately 1% per second. Also decay the rq's max_idle_balance_cost value. Signed-off-by: Jason Low Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1379096813-3032-4-git-send-email-jason.low2@hp.com Signed-off-by: Ingo Molnar --- arch/metag/include/asm/topology.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/metag/include/asm/topology.h b/arch/metag/include/asm/topology.h index db19292..8e9c0b3 100644 --- a/arch/metag/include/asm/topology.h +++ b/arch/metag/include/asm/topology.h @@ -27,6 +27,7 @@ .balance_interval = 1, \ .nr_balance_failed = 0, \ .max_newidle_lb_cost = 0, \ + .next_decay_max_lb_cost = jiffies, \ } #define cpu_to_node(cpu) ((void)(cpu), 0) -- cgit v1.1 From 0c44c2d0f459cd7e275242b72f500137c4fa834d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 11 Sep 2013 15:19:24 +0200 Subject: x86: Use asm goto to implement better modify_and_test() functions Linus suggested using asm goto to get rid of the typical SETcc + TEST instruction pair -- which also clobbers an extra register -- for our typical modify_and_test() functions. Because asm goto doesn't allow output fields it has to include an unconditinal memory clobber when it changes a memory variable to force a reload. Luckily all atomic ops already imply a compiler barrier to go along with their memory barrier semantics. Suggested-by: Linus Torvalds Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-0mtn9siwbeo1d33bap1422se@git.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/atomic.h | 29 +++++---------------------- arch/x86/include/asm/atomic64_64.h | 28 ++++---------------------- arch/x86/include/asm/bitops.h | 24 ++++------------------ arch/x86/include/asm/local.h | 28 ++++---------------------- arch/x86/include/asm/rmwcc.h | 41 ++++++++++++++++++++++++++++++++++++++ 5 files changed, 58 insertions(+), 92 deletions(-) create mode 100644 arch/x86/include/asm/rmwcc.h (limited to 'arch') diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h index 722aa3b..da31c8b 100644 --- a/arch/x86/include/asm/atomic.h +++ b/arch/x86/include/asm/atomic.h @@ -6,6 +6,7 @@ #include #include #include +#include /* * Atomic operations that C can't guarantee us. Useful for @@ -76,12 +77,7 @@ static inline void atomic_sub(int i, atomic_t *v) */ static inline int atomic_sub_and_test(int i, atomic_t *v) { - unsigned char c; - - asm volatile(LOCK_PREFIX "subl %2,%0; sete %1" - : "+m" (v->counter), "=qm" (c) - : "ir" (i) : "memory"); - return c; + GEN_BINARY_RMWcc(LOCK_PREFIX "subl", v->counter, i, "%0", "e"); } /** @@ -118,12 +114,7 @@ static inline void atomic_dec(atomic_t *v) */ static inline int atomic_dec_and_test(atomic_t *v) { - unsigned char c; - - asm volatile(LOCK_PREFIX "decl %0; sete %1" - : "+m" (v->counter), "=qm" (c) - : : "memory"); - return c != 0; + GEN_UNARY_RMWcc(LOCK_PREFIX "decl", v->counter, "%0", "e"); } /** @@ -136,12 +127,7 @@ static inline int atomic_dec_and_test(atomic_t *v) */ static inline int atomic_inc_and_test(atomic_t *v) { - unsigned char c; - - asm volatile(LOCK_PREFIX "incl %0; sete %1" - : "+m" (v->counter), "=qm" (c) - : : "memory"); - return c != 0; + GEN_UNARY_RMWcc(LOCK_PREFIX "incl", v->counter, "%0", "e"); } /** @@ -155,12 +141,7 @@ static inline int atomic_inc_and_test(atomic_t *v) */ static inline int atomic_add_negative(int i, atomic_t *v) { - unsigned char c; - - asm volatile(LOCK_PREFIX "addl %2,%0; sets %1" - : "+m" (v->counter), "=qm" (c) - : "ir" (i) : "memory"); - return c; + GEN_BINARY_RMWcc(LOCK_PREFIX "addl", v->counter, i, "%0", "s"); } /** diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h index 0e1cbfc..3f065c9 100644 --- a/arch/x86/include/asm/atomic64_64.h +++ b/arch/x86/include/asm/atomic64_64.h @@ -72,12 +72,7 @@ static inline void atomic64_sub(long i, atomic64_t *v) */ static inline int atomic64_sub_and_test(long i, atomic64_t *v) { - unsigned char c; - - asm volatile(LOCK_PREFIX "subq %2,%0; sete %1" - : "=m" (v->counter), "=qm" (c) - : "er" (i), "m" (v->counter) : "memory"); - return c; + GEN_BINARY_RMWcc(LOCK_PREFIX "subq", v->counter, i, "%0", "e"); } /** @@ -116,12 +111,7 @@ static inline void atomic64_dec(atomic64_t *v) */ static inline int atomic64_dec_and_test(atomic64_t *v) { - unsigned char c; - - asm volatile(LOCK_PREFIX "decq %0; sete %1" - : "=m" (v->counter), "=qm" (c) - : "m" (v->counter) : "memory"); - return c != 0; + GEN_UNARY_RMWcc(LOCK_PREFIX "decq", v->counter, "%0", "e"); } /** @@ -134,12 +124,7 @@ static inline int atomic64_dec_and_test(atomic64_t *v) */ static inline int atomic64_inc_and_test(atomic64_t *v) { - unsigned char c; - - asm volatile(LOCK_PREFIX "incq %0; sete %1" - : "=m" (v->counter), "=qm" (c) - : "m" (v->counter) : "memory"); - return c != 0; + GEN_UNARY_RMWcc(LOCK_PREFIX "incq", v->counter, "%0", "e"); } /** @@ -153,12 +138,7 @@ static inline int atomic64_inc_and_test(atomic64_t *v) */ static inline int atomic64_add_negative(long i, atomic64_t *v) { - unsigned char c; - - asm volatile(LOCK_PREFIX "addq %2,%0; sets %1" - : "=m" (v->counter), "=qm" (c) - : "er" (i), "m" (v->counter) : "memory"); - return c; + GEN_BINARY_RMWcc(LOCK_PREFIX "addq", v->counter, i, "%0", "s"); } /** diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index 41639ce..6d76d09 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h @@ -14,6 +14,7 @@ #include #include +#include #if BITS_PER_LONG == 32 # define _BITOPS_LONG_SHIFT 5 @@ -204,12 +205,7 @@ static inline void change_bit(long nr, volatile unsigned long *addr) */ static inline int test_and_set_bit(long nr, volatile unsigned long *addr) { - int oldbit; - - asm volatile(LOCK_PREFIX "bts %2,%1\n\t" - "sbb %0,%0" : "=r" (oldbit), ADDR : "Ir" (nr) : "memory"); - - return oldbit; + GEN_BINARY_RMWcc(LOCK_PREFIX "bts", *addr, nr, "%0", "c"); } /** @@ -255,13 +251,7 @@ static inline int __test_and_set_bit(long nr, volatile unsigned long *addr) */ static inline int test_and_clear_bit(long nr, volatile unsigned long *addr) { - int oldbit; - - asm volatile(LOCK_PREFIX "btr %2,%1\n\t" - "sbb %0,%0" - : "=r" (oldbit), ADDR : "Ir" (nr) : "memory"); - - return oldbit; + GEN_BINARY_RMWcc(LOCK_PREFIX "btr", *addr, nr, "%0", "c"); } /** @@ -314,13 +304,7 @@ static inline int __test_and_change_bit(long nr, volatile unsigned long *addr) */ static inline int test_and_change_bit(long nr, volatile unsigned long *addr) { - int oldbit; - - asm volatile(LOCK_PREFIX "btc %2,%1\n\t" - "sbb %0,%0" - : "=r" (oldbit), ADDR : "Ir" (nr) : "memory"); - - return oldbit; + GEN_BINARY_RMWcc(LOCK_PREFIX "btc", *addr, nr, "%0", "c"); } static __always_inline int constant_test_bit(long nr, const volatile unsigned long *addr) diff --git a/arch/x86/include/asm/local.h b/arch/x86/include/asm/local.h index 2d89e39..5b23e60 100644 --- a/arch/x86/include/asm/local.h +++ b/arch/x86/include/asm/local.h @@ -52,12 +52,7 @@ static inline void local_sub(long i, local_t *l) */ static inline int local_sub_and_test(long i, local_t *l) { - unsigned char c; - - asm volatile(_ASM_SUB "%2,%0; sete %1" - : "+m" (l->a.counter), "=qm" (c) - : "ir" (i) : "memory"); - return c; + GEN_BINARY_RMWcc(_ASM_SUB, l->a.counter, i, "%0", "e"); } /** @@ -70,12 +65,7 @@ static inline int local_sub_and_test(long i, local_t *l) */ static inline int local_dec_and_test(local_t *l) { - unsigned char c; - - asm volatile(_ASM_DEC "%0; sete %1" - : "+m" (l->a.counter), "=qm" (c) - : : "memory"); - return c != 0; + GEN_UNARY_RMWcc(_ASM_DEC, l->a.counter, "%0", "e"); } /** @@ -88,12 +78,7 @@ static inline int local_dec_and_test(local_t *l) */ static inline int local_inc_and_test(local_t *l) { - unsigned char c; - - asm volatile(_ASM_INC "%0; sete %1" - : "+m" (l->a.counter), "=qm" (c) - : : "memory"); - return c != 0; + GEN_UNARY_RMWcc(_ASM_INC, l->a.counter, "%0", "e"); } /** @@ -107,12 +92,7 @@ static inline int local_inc_and_test(local_t *l) */ static inline int local_add_negative(long i, local_t *l) { - unsigned char c; - - asm volatile(_ASM_ADD "%2,%0; sets %1" - : "+m" (l->a.counter), "=qm" (c) - : "ir" (i) : "memory"); - return c; + GEN_BINARY_RMWcc(_ASM_ADD, l->a.counter, i, "%0", "s"); } /** diff --git a/arch/x86/include/asm/rmwcc.h b/arch/x86/include/asm/rmwcc.h new file mode 100644 index 0000000..735f184 --- /dev/null +++ b/arch/x86/include/asm/rmwcc.h @@ -0,0 +1,41 @@ +#ifndef _ASM_X86_RMWcc +#define _ASM_X86_RMWcc + +#ifdef CC_HAVE_ASM_GOTO + +#define __GEN_RMWcc(fullop, var, cc, ...) \ +do { \ + asm volatile goto (fullop "; j" cc " %l[cc_label]" \ + : : "m" (var), ## __VA_ARGS__ \ + : "memory" : cc_label); \ + return 0; \ +cc_label: \ + return 1; \ +} while (0) + +#define GEN_UNARY_RMWcc(op, var, arg0, cc) \ + __GEN_RMWcc(op " " arg0, var, cc) + +#define GEN_BINARY_RMWcc(op, var, val, arg0, cc) \ + __GEN_RMWcc(op " %1, " arg0, var, cc, "er" (val)) + +#else /* !CC_HAVE_ASM_GOTO */ + +#define __GEN_RMWcc(fullop, var, cc, ...) \ +do { \ + char c; \ + asm volatile (fullop "; set" cc " %1" \ + : "+m" (var), "=qm" (c) \ + : __VA_ARGS__ : "memory"); \ + return c != 0; \ +} while (0) + +#define GEN_UNARY_RMWcc(op, var, arg0, cc) \ + __GEN_RMWcc(op " " arg0, var, cc) + +#define GEN_BINARY_RMWcc(op, var, val, arg0, cc) \ + __GEN_RMWcc(op " %2, " arg0, var, cc, "er" (val)) + +#endif /* CC_HAVE_ASM_GOTO */ + +#endif /* _ASM_X86_RMWcc */ -- cgit v1.1 From ea8117478918a4734586d35ff530721b682425be Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 11 Sep 2013 12:43:13 +0200 Subject: sched, idle: Fix the idle polling state logic Mike reported that commit 7d1a9417 ("x86: Use generic idle loop") regressed several workloads and caused excessive reschedule interrupts. The patch in question failed to notice that the x86 code had an inverted sense of the polling state versus the new generic code (x86: default polling, generic: default !polling). Fix the two prominent x86 mwait based idle drivers and introduce a few new generic polling helpers (fixing the wrong smp_mb__after_clear_bit usage). Also switch the idle routines to using tif_need_resched() which is an immediate TIF_NEED_RESCHED test as opposed to need_resched which will end up being slightly different. Reported-by: Mike Galbraith Signed-off-by: Peter Zijlstra Cc: lenb@kernel.org Cc: tglx@linutronix.de Link: http://lkml.kernel.org/n/tip-nc03imb0etuefmzybzj7sprf@git.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/process.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index c83516b..3fb8d95 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -391,9 +391,9 @@ static void amd_e400_idle(void) * The switch back from broadcast mode needs to be * called with interrupts disabled. */ - local_irq_disable(); - clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu); - local_irq_enable(); + local_irq_disable(); + clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu); + local_irq_enable(); } else default_idle(); } -- cgit v1.1 From a787870924dbd6f321661e06d4ec1c7a408c9ccf Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 14 Aug 2013 14:55:40 +0200 Subject: sched, arch: Create asm/preempt.h In order to prepare to per-arch implementations of preempt_count move the required bits into an asm-generic header and use this for all archs. Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-h5j0c1r3e3fk015m30h8f1zx@git.kernel.org Signed-off-by: Ingo Molnar --- arch/alpha/include/asm/Kbuild | 1 + arch/arc/include/asm/Kbuild | 1 + arch/arm/include/asm/Kbuild | 1 + arch/arm64/include/asm/Kbuild | 1 + arch/avr32/include/asm/Kbuild | 1 + arch/blackfin/include/asm/Kbuild | 1 + arch/c6x/include/asm/Kbuild | 1 + arch/cris/include/asm/Kbuild | 1 + arch/frv/include/asm/Kbuild | 1 + arch/h8300/include/asm/Kbuild | 1 + arch/hexagon/include/asm/Kbuild | 1 + arch/ia64/include/asm/Kbuild | 1 + arch/m32r/include/asm/Kbuild | 1 + arch/m68k/include/asm/Kbuild | 1 + arch/metag/include/asm/Kbuild | 1 + arch/microblaze/include/asm/Kbuild | 1 + arch/mips/include/asm/Kbuild | 1 + arch/mn10300/include/asm/Kbuild | 1 + arch/openrisc/include/asm/Kbuild | 1 + arch/parisc/include/asm/Kbuild | 1 + arch/powerpc/include/asm/Kbuild | 1 + arch/s390/include/asm/Kbuild | 1 + arch/score/include/asm/Kbuild | 1 + arch/sh/include/asm/Kbuild | 1 + arch/sparc/include/asm/Kbuild | 1 + arch/tile/include/asm/Kbuild | 1 + arch/um/include/asm/Kbuild | 1 + arch/unicore32/include/asm/Kbuild | 1 + arch/x86/include/asm/Kbuild | 1 + arch/xtensa/include/asm/Kbuild | 1 + 30 files changed, 30 insertions(+) (limited to 'arch') diff --git a/arch/alpha/include/asm/Kbuild b/arch/alpha/include/asm/Kbuild index a6e85f44..f01fb50 100644 --- a/arch/alpha/include/asm/Kbuild +++ b/arch/alpha/include/asm/Kbuild @@ -3,3 +3,4 @@ generic-y += clkdev.h generic-y += exec.h generic-y += trace_clock.h +generic-y += preempt.h diff --git a/arch/arc/include/asm/Kbuild b/arch/arc/include/asm/Kbuild index d8dd660..5943f7f 100644 --- a/arch/arc/include/asm/Kbuild +++ b/arch/arc/include/asm/Kbuild @@ -46,3 +46,4 @@ generic-y += ucontext.h generic-y += user.h generic-y += vga.h generic-y += xor.h +generic-y += preempt.h diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild index d3db398..4e6838d 100644 --- a/arch/arm/include/asm/Kbuild +++ b/arch/arm/include/asm/Kbuild @@ -33,3 +33,4 @@ generic-y += timex.h generic-y += trace_clock.h generic-y += types.h generic-y += unaligned.h +generic-y += preempt.h diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild index 79a642d..519f89f 100644 --- a/arch/arm64/include/asm/Kbuild +++ b/arch/arm64/include/asm/Kbuild @@ -50,3 +50,4 @@ generic-y += unaligned.h generic-y += user.h generic-y += vga.h generic-y += xor.h +generic-y += preempt.h diff --git a/arch/avr32/include/asm/Kbuild b/arch/avr32/include/asm/Kbuild index d22af85..b946080 100644 --- a/arch/avr32/include/asm/Kbuild +++ b/arch/avr32/include/asm/Kbuild @@ -3,3 +3,4 @@ generic-y += clkdev.h generic-y += exec.h generic-y += trace_clock.h generic-y += param.h +generic-y += preempt.h diff --git a/arch/blackfin/include/asm/Kbuild b/arch/blackfin/include/asm/Kbuild index 127826f..f2b4347 100644 --- a/arch/blackfin/include/asm/Kbuild +++ b/arch/blackfin/include/asm/Kbuild @@ -44,3 +44,4 @@ generic-y += ucontext.h generic-y += unaligned.h generic-y += user.h generic-y += xor.h +generic-y += preempt.h diff --git a/arch/c6x/include/asm/Kbuild b/arch/c6x/include/asm/Kbuild index e49f918..fc0b3c3 100644 --- a/arch/c6x/include/asm/Kbuild +++ b/arch/c6x/include/asm/Kbuild @@ -56,3 +56,4 @@ generic-y += ucontext.h generic-y += user.h generic-y += vga.h generic-y += xor.h +generic-y += preempt.h diff --git a/arch/cris/include/asm/Kbuild b/arch/cris/include/asm/Kbuild index c832545..b06caf6 100644 --- a/arch/cris/include/asm/Kbuild +++ b/arch/cris/include/asm/Kbuild @@ -11,3 +11,4 @@ generic-y += module.h generic-y += trace_clock.h generic-y += vga.h generic-y += xor.h +generic-y += preempt.h diff --git a/arch/frv/include/asm/Kbuild b/arch/frv/include/asm/Kbuild index c5d7670..74742dc 100644 --- a/arch/frv/include/asm/Kbuild +++ b/arch/frv/include/asm/Kbuild @@ -2,3 +2,4 @@ generic-y += clkdev.h generic-y += exec.h generic-y += trace_clock.h +generic-y += preempt.h diff --git a/arch/h8300/include/asm/Kbuild b/arch/h8300/include/asm/Kbuild index 8ada3cf..7e0e721 100644 --- a/arch/h8300/include/asm/Kbuild +++ b/arch/h8300/include/asm/Kbuild @@ -6,3 +6,4 @@ generic-y += mmu.h generic-y += module.h generic-y += trace_clock.h generic-y += xor.h +generic-y += preempt.h diff --git a/arch/hexagon/include/asm/Kbuild b/arch/hexagon/include/asm/Kbuild index 1da17ca..67c3450 100644 --- a/arch/hexagon/include/asm/Kbuild +++ b/arch/hexagon/include/asm/Kbuild @@ -53,3 +53,4 @@ generic-y += types.h generic-y += ucontext.h generic-y += unaligned.h generic-y += xor.h +generic-y += preempt.h diff --git a/arch/ia64/include/asm/Kbuild b/arch/ia64/include/asm/Kbuild index a3456f3..f93ee08 100644 --- a/arch/ia64/include/asm/Kbuild +++ b/arch/ia64/include/asm/Kbuild @@ -3,4 +3,5 @@ generic-y += clkdev.h generic-y += exec.h generic-y += kvm_para.h generic-y += trace_clock.h +generic-y += preempt.h generic-y += vtime.h \ No newline at end of file diff --git a/arch/m32r/include/asm/Kbuild b/arch/m32r/include/asm/Kbuild index bebdc36..2b58c5f 100644 --- a/arch/m32r/include/asm/Kbuild +++ b/arch/m32r/include/asm/Kbuild @@ -3,3 +3,4 @@ generic-y += clkdev.h generic-y += exec.h generic-y += module.h generic-y += trace_clock.h +generic-y += preempt.h diff --git a/arch/m68k/include/asm/Kbuild b/arch/m68k/include/asm/Kbuild index 09d77a8..a5d27f2 100644 --- a/arch/m68k/include/asm/Kbuild +++ b/arch/m68k/include/asm/Kbuild @@ -31,3 +31,4 @@ generic-y += trace_clock.h generic-y += types.h generic-y += word-at-a-time.h generic-y += xor.h +generic-y += preempt.h diff --git a/arch/metag/include/asm/Kbuild b/arch/metag/include/asm/Kbuild index 6ae0ccb..84d0c1d 100644 --- a/arch/metag/include/asm/Kbuild +++ b/arch/metag/include/asm/Kbuild @@ -52,3 +52,4 @@ generic-y += unaligned.h generic-y += user.h generic-y += vga.h generic-y += xor.h +generic-y += preempt.h diff --git a/arch/microblaze/include/asm/Kbuild b/arch/microblaze/include/asm/Kbuild index d3c51a6..ce0bbf8 100644 --- a/arch/microblaze/include/asm/Kbuild +++ b/arch/microblaze/include/asm/Kbuild @@ -3,3 +3,4 @@ generic-y += clkdev.h generic-y += exec.h generic-y += trace_clock.h generic-y += syscalls.h +generic-y += preempt.h diff --git a/arch/mips/include/asm/Kbuild b/arch/mips/include/asm/Kbuild index 454ddf9..1acbb8b 100644 --- a/arch/mips/include/asm/Kbuild +++ b/arch/mips/include/asm/Kbuild @@ -11,5 +11,6 @@ generic-y += sections.h generic-y += segment.h generic-y += serial.h generic-y += trace_clock.h +generic-y += preempt.h generic-y += ucontext.h generic-y += xor.h diff --git a/arch/mn10300/include/asm/Kbuild b/arch/mn10300/include/asm/Kbuild index c5d7670..74742dc 100644 --- a/arch/mn10300/include/asm/Kbuild +++ b/arch/mn10300/include/asm/Kbuild @@ -2,3 +2,4 @@ generic-y += clkdev.h generic-y += exec.h generic-y += trace_clock.h +generic-y += preempt.h diff --git a/arch/openrisc/include/asm/Kbuild b/arch/openrisc/include/asm/Kbuild index 195653e..7840562 100644 --- a/arch/openrisc/include/asm/Kbuild +++ b/arch/openrisc/include/asm/Kbuild @@ -67,3 +67,4 @@ generic-y += ucontext.h generic-y += user.h generic-y += word-at-a-time.h generic-y += xor.h +generic-y += preempt.h diff --git a/arch/parisc/include/asm/Kbuild b/arch/parisc/include/asm/Kbuild index ff4c9fa..a603b9e 100644 --- a/arch/parisc/include/asm/Kbuild +++ b/arch/parisc/include/asm/Kbuild @@ -4,3 +4,4 @@ generic-y += word-at-a-time.h auxvec.h user.h cputime.h emergency-restart.h \ div64.h irq_regs.h kdebug.h kvm_para.h local64.h local.h param.h \ poll.h xor.h clkdev.h exec.h generic-y += trace_clock.h +generic-y += preempt.h diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild index 704e6f1..d8f9d2f 100644 --- a/arch/powerpc/include/asm/Kbuild +++ b/arch/powerpc/include/asm/Kbuild @@ -2,4 +2,5 @@ generic-y += clkdev.h generic-y += rwsem.h generic-y += trace_clock.h +generic-y += preempt.h generic-y += vtime.h \ No newline at end of file diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild index f313f9c..7a5288f 100644 --- a/arch/s390/include/asm/Kbuild +++ b/arch/s390/include/asm/Kbuild @@ -2,3 +2,4 @@ generic-y += clkdev.h generic-y += trace_clock.h +generic-y += preempt.h diff --git a/arch/score/include/asm/Kbuild b/arch/score/include/asm/Kbuild index e1c7bb9..f3414ad 100644 --- a/arch/score/include/asm/Kbuild +++ b/arch/score/include/asm/Kbuild @@ -4,3 +4,4 @@ header-y += generic-y += clkdev.h generic-y += trace_clock.h generic-y += xor.h +generic-y += preempt.h diff --git a/arch/sh/include/asm/Kbuild b/arch/sh/include/asm/Kbuild index 280bea9..231efbb 100644 --- a/arch/sh/include/asm/Kbuild +++ b/arch/sh/include/asm/Kbuild @@ -34,3 +34,4 @@ generic-y += termios.h generic-y += trace_clock.h generic-y += ucontext.h generic-y += xor.h +generic-y += preempt.h diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild index 7e4a97f..bf39066 100644 --- a/arch/sparc/include/asm/Kbuild +++ b/arch/sparc/include/asm/Kbuild @@ -16,3 +16,4 @@ generic-y += serial.h generic-y += trace_clock.h generic-y += types.h generic-y += word-at-a-time.h +generic-y += preempt.h diff --git a/arch/tile/include/asm/Kbuild b/arch/tile/include/asm/Kbuild index 664d6ad..22f3bd1 100644 --- a/arch/tile/include/asm/Kbuild +++ b/arch/tile/include/asm/Kbuild @@ -38,3 +38,4 @@ generic-y += termios.h generic-y += trace_clock.h generic-y += types.h generic-y += xor.h +generic-y += preempt.h diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild index b30f34a..fdde187 100644 --- a/arch/um/include/asm/Kbuild +++ b/arch/um/include/asm/Kbuild @@ -3,3 +3,4 @@ generic-y += hw_irq.h irq_regs.h kdebug.h percpu.h sections.h topology.h xor.h generic-y += ftrace.h pci.h io.h param.h delay.h mutex.h current.h exec.h generic-y += switch_to.h clkdev.h generic-y += trace_clock.h +generic-y += preempt.h diff --git a/arch/unicore32/include/asm/Kbuild b/arch/unicore32/include/asm/Kbuild index 89d8b6c..00045cb 100644 --- a/arch/unicore32/include/asm/Kbuild +++ b/arch/unicore32/include/asm/Kbuild @@ -60,3 +60,4 @@ generic-y += unaligned.h generic-y += user.h generic-y += vga.h generic-y += xor.h +generic-y += preempt.h diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild index 7f66985..eca2028 100644 --- a/arch/x86/include/asm/Kbuild +++ b/arch/x86/include/asm/Kbuild @@ -5,3 +5,4 @@ genhdr-y += unistd_64.h genhdr-y += unistd_x32.h generic-y += clkdev.h +generic-y += preempt.h diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild index 1b98264..228d6ae 100644 --- a/arch/xtensa/include/asm/Kbuild +++ b/arch/xtensa/include/asm/Kbuild @@ -28,3 +28,4 @@ generic-y += termios.h generic-y += topology.h generic-y += trace_clock.h generic-y += xor.h +generic-y += preempt.h -- cgit v1.1 From bdb43806589096ac4272fe1307e789846ac08d7c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 10 Sep 2013 12:15:23 +0200 Subject: sched: Extract the basic add/sub preempt_count modifiers Rewrite the preempt_count macros in order to extract the 3 basic preempt_count value modifiers: __preempt_count_add() __preempt_count_sub() and the new: __preempt_count_dec_and_test() And since we're at it anyway, replace the unconventional $op_preempt_count names with the more conventional preempt_count_$op. Since these basic operators are equivalent to the previous _notrace() variants, do away with the _notrace() versions. Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-ewbpdbupy9xpsjhg960zwbv8@git.kernel.org Signed-off-by: Ingo Molnar --- arch/mips/mm/init.c | 5 ++--- arch/x86/kernel/traps.c | 4 ++-- 2 files changed, 4 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c index e205ef5..1215617 100644 --- a/arch/mips/mm/init.c +++ b/arch/mips/mm/init.c @@ -124,7 +124,7 @@ void *kmap_coherent(struct page *page, unsigned long addr) BUG_ON(Page_dcache_dirty(page)); - inc_preempt_count(); + pagefault_disable(); idx = (addr >> PAGE_SHIFT) & (FIX_N_COLOURS - 1); #ifdef CONFIG_MIPS_MT_SMTC idx += FIX_N_COLOURS * smp_processor_id() + @@ -193,8 +193,7 @@ void kunmap_coherent(void) write_c0_entryhi(old_ctx); EXIT_CRITICAL(flags); #endif - dec_preempt_count(); - preempt_check_resched(); + pagefault_enable(); } void copy_user_highpage(struct page *to, struct page *from, diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 8c8093b..729aa77 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -88,7 +88,7 @@ static inline void conditional_sti(struct pt_regs *regs) static inline void preempt_conditional_sti(struct pt_regs *regs) { - inc_preempt_count(); + preempt_count_inc(); if (regs->flags & X86_EFLAGS_IF) local_irq_enable(); } @@ -103,7 +103,7 @@ static inline void preempt_conditional_cli(struct pt_regs *regs) { if (regs->flags & X86_EFLAGS_IF) local_irq_disable(); - dec_preempt_count(); + preempt_count_dec(); } static int __kprobes -- cgit v1.1 From c2daa3bed53a81171cf8c1a36db798e82b91afe8 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 14 Aug 2013 14:51:00 +0200 Subject: sched, x86: Provide a per-cpu preempt_count implementation Convert x86 to use a per-cpu preemption count. The reason for doing so is that accessing per-cpu variables is a lot cheaper than accessing thread_info variables. We still need to save/restore the actual preemption count due to PREEMPT_ACTIVE so we place the per-cpu __preempt_count variable in the same cache-line as the other hot __switch_to() variables such as current_task. NOTE: this save/restore is required even for !PREEMPT kernels as cond_resched() also relies on preempt_count's PREEMPT_ACTIVE to ignore task_struct::state. Also rename thread_info::preempt_count to ensure nobody is 'accidentally' still poking at it. Suggested-by: Linus Torvalds Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-gzn5rfsf8trgjoqx8hyayy3q@git.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/Kbuild | 1 - arch/x86/include/asm/preempt.h | 98 ++++++++++++++++++++++++++++++++++++++ arch/x86/include/asm/thread_info.h | 5 +- arch/x86/kernel/asm-offsets.c | 1 - arch/x86/kernel/cpu/common.c | 5 ++ arch/x86/kernel/entry_32.S | 7 +-- arch/x86/kernel/entry_64.S | 4 +- arch/x86/kernel/irq_32.c | 4 -- arch/x86/kernel/process_32.c | 8 ++++ arch/x86/kernel/process_64.c | 8 ++++ 10 files changed, 124 insertions(+), 17 deletions(-) create mode 100644 arch/x86/include/asm/preempt.h (limited to 'arch') diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild index eca2028..7f66985 100644 --- a/arch/x86/include/asm/Kbuild +++ b/arch/x86/include/asm/Kbuild @@ -5,4 +5,3 @@ genhdr-y += unistd_64.h genhdr-y += unistd_x32.h generic-y += clkdev.h -generic-y += preempt.h diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h new file mode 100644 index 0000000..1309942 --- /dev/null +++ b/arch/x86/include/asm/preempt.h @@ -0,0 +1,98 @@ +#ifndef __ASM_PREEMPT_H +#define __ASM_PREEMPT_H + +#include +#include +#include + +DECLARE_PER_CPU(int, __preempt_count); + +/* + * We mask the PREEMPT_NEED_RESCHED bit so as not to confuse all current users + * that think a non-zero value indicates we cannot preempt. + */ +static __always_inline int preempt_count(void) +{ + return __this_cpu_read_4(__preempt_count) & ~PREEMPT_NEED_RESCHED; +} + +static __always_inline void preempt_count_set(int pc) +{ + __this_cpu_write_4(__preempt_count, pc); +} + +/* + * must be macros to avoid header recursion hell + */ +#define task_preempt_count(p) \ + (task_thread_info(p)->saved_preempt_count & ~PREEMPT_NEED_RESCHED) + +#define init_task_preempt_count(p) do { \ + task_thread_info(p)->saved_preempt_count = PREEMPT_DISABLED; \ +} while (0) + +#define init_idle_preempt_count(p, cpu) do { \ + task_thread_info(p)->saved_preempt_count = PREEMPT_ENABLED; \ + per_cpu(__preempt_count, (cpu)) = PREEMPT_ENABLED; \ +} while (0) + +/* + * We fold the NEED_RESCHED bit into the preempt count such that + * preempt_enable() can decrement and test for needing to reschedule with a + * single instruction. + * + * We invert the actual bit, so that when the decrement hits 0 we know we both + * need to resched (the bit is cleared) and can resched (no preempt count). + */ + +static __always_inline void set_preempt_need_resched(void) +{ + __this_cpu_and_4(__preempt_count, ~PREEMPT_NEED_RESCHED); +} + +static __always_inline void clear_preempt_need_resched(void) +{ + __this_cpu_or_4(__preempt_count, PREEMPT_NEED_RESCHED); +} + +static __always_inline bool test_preempt_need_resched(void) +{ + return !(__this_cpu_read_4(__preempt_count) & PREEMPT_NEED_RESCHED); +} + +/* + * The various preempt_count add/sub methods + */ + +static __always_inline void __preempt_count_add(int val) +{ + __this_cpu_add_4(__preempt_count, val); +} + +static __always_inline void __preempt_count_sub(int val) +{ + __this_cpu_add_4(__preempt_count, -val); +} + +static __always_inline bool __preempt_count_dec_and_test(void) +{ + GEN_UNARY_RMWcc("decl", __preempt_count, __percpu_arg(0), "e"); +} + +/* + * Returns true when we need to resched -- even if we can not. + */ +static __always_inline bool need_resched(void) +{ + return unlikely(test_preempt_need_resched()); +} + +/* + * Returns true when we need to resched and can (barring IRQ state). + */ +static __always_inline bool should_resched(void) +{ + return unlikely(!__this_cpu_read_4(__preempt_count)); +} + +#endif /* __ASM_PREEMPT_H */ diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 2781119..c46a46b 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -28,8 +28,7 @@ struct thread_info { __u32 flags; /* low level flags */ __u32 status; /* thread synchronous flags */ __u32 cpu; /* current CPU */ - int preempt_count; /* 0 => preemptable, - <0 => BUG */ + int saved_preempt_count; mm_segment_t addr_limit; struct restart_block restart_block; void __user *sysenter_return; @@ -49,7 +48,7 @@ struct thread_info { .exec_domain = &default_exec_domain, \ .flags = 0, \ .cpu = 0, \ - .preempt_count = INIT_PREEMPT_COUNT, \ + .saved_preempt_count = INIT_PREEMPT_COUNT, \ .addr_limit = KERNEL_DS, \ .restart_block = { \ .fn = do_no_restart_syscall, \ diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index 2861082..9f6b934 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -32,7 +32,6 @@ void common(void) { OFFSET(TI_flags, thread_info, flags); OFFSET(TI_status, thread_info, status); OFFSET(TI_addr_limit, thread_info, addr_limit); - OFFSET(TI_preempt_count, thread_info, preempt_count); BLANK(); OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 2793d1f..5223fe6 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1095,6 +1095,9 @@ DEFINE_PER_CPU(char *, irq_stack_ptr) = DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1; +DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT; +EXPORT_PER_CPU_SYMBOL(__preempt_count); + DEFINE_PER_CPU(struct task_struct *, fpu_owner_task); /* @@ -1169,6 +1172,8 @@ void debug_stack_reset(void) DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; EXPORT_PER_CPU_SYMBOL(current_task); +DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT; +EXPORT_PER_CPU_SYMBOL(__preempt_count); DEFINE_PER_CPU(struct task_struct *, fpu_owner_task); #ifdef CONFIG_CC_STACKPROTECTOR diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index f0dcb0c..fd1bc1b 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -362,12 +362,9 @@ END(ret_from_exception) #ifdef CONFIG_PREEMPT ENTRY(resume_kernel) DISABLE_INTERRUPTS(CLBR_ANY) - cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ? - jnz restore_all need_resched: - movl TI_flags(%ebp), %ecx # need_resched set ? - testb $_TIF_NEED_RESCHED, %cl - jz restore_all + cmpl $0,PER_CPU_VAR(__preempt_count) + jnz restore_all testl $X86_EFLAGS_IF,PT_EFLAGS(%esp) # interrupts off (exception path) ? jz restore_all call preempt_schedule_irq diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 1b69951..6a43e7d 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -1118,10 +1118,8 @@ retint_signal: /* Returning to kernel space. Check if we need preemption */ /* rcx: threadinfo. interrupts off. */ ENTRY(retint_kernel) - cmpl $0,TI_preempt_count(%rcx) + cmpl $0,PER_CPU_VAR(__preempt_count) jnz retint_restore_args - bt $TIF_NEED_RESCHED,TI_flags(%rcx) - jnc retint_restore_args bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */ jnc retint_restore_args call preempt_schedule_irq diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 4186755..3fe0663 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -100,9 +100,6 @@ execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq) irqctx->tinfo.task = curctx->tinfo.task; irqctx->tinfo.previous_esp = current_stack_pointer; - /* Copy the preempt_count so that the [soft]irq checks work. */ - irqctx->tinfo.preempt_count = curctx->tinfo.preempt_count; - if (unlikely(overflow)) call_on_stack(print_stack_overflow, isp); @@ -131,7 +128,6 @@ void irq_ctx_init(int cpu) THREAD_SIZE_ORDER)); memset(&irqctx->tinfo, 0, sizeof(struct thread_info)); irqctx->tinfo.cpu = cpu; - irqctx->tinfo.preempt_count = HARDIRQ_OFFSET; irqctx->tinfo.addr_limit = MAKE_MM_SEG(0); per_cpu(hardirq_ctx, cpu) = irqctx; diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 884f98f..c2ec1aa 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -292,6 +292,14 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) set_iopl_mask(next->iopl); /* + * If it were not for PREEMPT_ACTIVE we could guarantee that the + * preempt_count of all tasks was equal here and this would not be + * needed. + */ + task_thread_info(prev_p)->saved_preempt_count = this_cpu_read(__preempt_count); + this_cpu_write(__preempt_count, task_thread_info(next_p)->saved_preempt_count); + + /* * Now maybe handle debug registers and/or IO bitmaps */ if (unlikely(task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV || diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index bb1dc51..45ab4d6 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -363,6 +363,14 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) this_cpu_write(old_rsp, next->usersp); this_cpu_write(current_task, next_p); + /* + * If it were not for PREEMPT_ACTIVE we could guarantee that the + * preempt_count of all tasks was equal here and this would not be + * needed. + */ + task_thread_info(prev_p)->saved_preempt_count = this_cpu_read(__preempt_count); + this_cpu_write(__preempt_count, task_thread_info(next_p)->saved_preempt_count); + this_cpu_write(kernel_stack, (unsigned long)task_stack_page(next_p) + THREAD_SIZE - KERNEL_STACK_OFFSET); -- cgit v1.1 From 1a338ac32ca630f67df25b4a16436cccc314e997 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 14 Aug 2013 14:51:00 +0200 Subject: sched, x86: Optimize the preempt_schedule() call Remove the bloat of the C calling convention out of the preempt_enable() sites by creating an ASM wrapper which allows us to do an asm("call ___preempt_schedule") instead. calling.h bits by Andi Kleen Suggested-by: Linus Torvalds Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-tk7xdi1cvvxewixzke8t8le1@git.kernel.org [ Fixed build error. ] Signed-off-by: Ingo Molnar --- arch/x86/include/asm/calling.h | 50 ++++++++++++++++++++++++++++++++++++++++ arch/x86/include/asm/preempt.h | 10 ++++++++ arch/x86/kernel/Makefile | 2 ++ arch/x86/kernel/i386_ksyms_32.c | 7 ++++++ arch/x86/kernel/preempt.S | 25 ++++++++++++++++++++ arch/x86/kernel/x8664_ksyms_64.c | 7 ++++++ 6 files changed, 101 insertions(+) create mode 100644 arch/x86/kernel/preempt.S (limited to 'arch') diff --git a/arch/x86/include/asm/calling.h b/arch/x86/include/asm/calling.h index 0fa6750..cb4c73b 100644 --- a/arch/x86/include/asm/calling.h +++ b/arch/x86/include/asm/calling.h @@ -48,6 +48,8 @@ For 32-bit we have the following conventions - kernel is built with #include +#ifdef CONFIG_X86_64 + /* * 64-bit system call stack frame layout defines and helpers, * for assembly code: @@ -192,3 +194,51 @@ For 32-bit we have the following conventions - kernel is built with .macro icebp .byte 0xf1 .endm + +#else /* CONFIG_X86_64 */ + +/* + * For 32bit only simplified versions of SAVE_ALL/RESTORE_ALL. These + * are different from the entry_32.S versions in not changing the segment + * registers. So only suitable for in kernel use, not when transitioning + * from or to user space. The resulting stack frame is not a standard + * pt_regs frame. The main use case is calling C code from assembler + * when all the registers need to be preserved. + */ + + .macro SAVE_ALL + pushl_cfi %eax + CFI_REL_OFFSET eax, 0 + pushl_cfi %ebp + CFI_REL_OFFSET ebp, 0 + pushl_cfi %edi + CFI_REL_OFFSET edi, 0 + pushl_cfi %esi + CFI_REL_OFFSET esi, 0 + pushl_cfi %edx + CFI_REL_OFFSET edx, 0 + pushl_cfi %ecx + CFI_REL_OFFSET ecx, 0 + pushl_cfi %ebx + CFI_REL_OFFSET ebx, 0 + .endm + + .macro RESTORE_ALL + popl_cfi %ebx + CFI_RESTORE ebx + popl_cfi %ecx + CFI_RESTORE ecx + popl_cfi %edx + CFI_RESTORE edx + popl_cfi %esi + CFI_RESTORE esi + popl_cfi %edi + CFI_RESTORE edi + popl_cfi %ebp + CFI_RESTORE ebp + popl_cfi %eax + CFI_RESTORE eax + .endm + +#endif /* CONFIG_X86_64 */ + diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h index 1309942..1de41690 100644 --- a/arch/x86/include/asm/preempt.h +++ b/arch/x86/include/asm/preempt.h @@ -95,4 +95,14 @@ static __always_inline bool should_resched(void) return unlikely(!__this_cpu_read_4(__preempt_count)); } +#ifdef CONFIG_PREEMPT + extern asmlinkage void ___preempt_schedule(void); +# define __preempt_schedule() asm ("call ___preempt_schedule") + extern asmlinkage void preempt_schedule(void); +# ifdef CONFIG_CONTEXT_TRACKING + extern asmlinkage void ___preempt_schedule_context(void); +# define __preempt_schedule_context() asm ("call ___preempt_schedule_context") +# endif +#endif + #endif /* __ASM_PREEMPT_H */ diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index a5408b9..9b0a34e 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -36,6 +36,8 @@ obj-y += tsc.o io_delay.o rtc.o obj-y += pci-iommu_table.o obj-y += resource.o +obj-$(CONFIG_PREEMPT) += preempt.o + obj-y += process.o obj-y += i387.o xsave.o obj-y += ptrace.o diff --git a/arch/x86/kernel/i386_ksyms_32.c b/arch/x86/kernel/i386_ksyms_32.c index 0fa6912..05fd74f 100644 --- a/arch/x86/kernel/i386_ksyms_32.c +++ b/arch/x86/kernel/i386_ksyms_32.c @@ -37,3 +37,10 @@ EXPORT_SYMBOL(strstr); EXPORT_SYMBOL(csum_partial); EXPORT_SYMBOL(empty_zero_page); + +#ifdef CONFIG_PREEMPT +EXPORT_SYMBOL(___preempt_schedule); +#ifdef CONFIG_CONTEXT_TRACKING +EXPORT_SYMBOL(___preempt_schedule_context); +#endif +#endif diff --git a/arch/x86/kernel/preempt.S b/arch/x86/kernel/preempt.S new file mode 100644 index 0000000..ca7f0d5 --- /dev/null +++ b/arch/x86/kernel/preempt.S @@ -0,0 +1,25 @@ + +#include +#include +#include +#include + +ENTRY(___preempt_schedule) + CFI_STARTPROC + SAVE_ALL + call preempt_schedule + RESTORE_ALL + ret + CFI_ENDPROC + +#ifdef CONFIG_CONTEXT_TRACKING + +ENTRY(___preempt_schedule_context) + CFI_STARTPROC + SAVE_ALL + call preempt_schedule_context + RESTORE_ALL + ret + CFI_ENDPROC + +#endif diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c index b014d94..0406819 100644 --- a/arch/x86/kernel/x8664_ksyms_64.c +++ b/arch/x86/kernel/x8664_ksyms_64.c @@ -66,3 +66,10 @@ EXPORT_SYMBOL(empty_zero_page); #ifndef CONFIG_PARAVIRT EXPORT_SYMBOL(native_load_gs_index); #endif + +#ifdef CONFIG_PREEMPT +EXPORT_SYMBOL(___preempt_schedule); +#ifdef CONFIG_CONTEXT_TRACKING +EXPORT_SYMBOL(___preempt_schedule_context); +#endif +#endif -- cgit v1.1 From 75f93fed50c2abadbab6ef546b265f51ca975b27 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 27 Sep 2013 17:30:03 +0200 Subject: sched: Revert need_resched() to look at TIF_NEED_RESCHED Yuanhan reported a serious throughput regression in his pigz benchmark. Using the ftrace patch I found that several idle paths need more TLC before we can switch the generic need_resched() over to preempt_need_resched. The preemption paths benefit most from preempt_need_resched and do indeed use it; all other need_resched() users don't really care that much so reverting need_resched() back to tif_need_resched() is the simple and safe solution. Reported-by: Yuanhan Liu Signed-off-by: Peter Zijlstra Cc: Fengguang Wu Cc: Huang Ying Cc: lkp@linux.intel.com Cc: Linus Torvalds Link: http://lkml.kernel.org/r/20130927153003.GF15690@laptop.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- arch/x86/include/asm/preempt.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h index 1de41690..8729723 100644 --- a/arch/x86/include/asm/preempt.h +++ b/arch/x86/include/asm/preempt.h @@ -80,14 +80,6 @@ static __always_inline bool __preempt_count_dec_and_test(void) } /* - * Returns true when we need to resched -- even if we can not. - */ -static __always_inline bool need_resched(void) -{ - return unlikely(test_preempt_need_resched()); -} - -/* * Returns true when we need to resched and can (barring IRQ state). */ static __always_inline bool should_resched(void) -- cgit v1.1 From 35a2af94c7ce7130ca292c68b1d27fcfdb648f6b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 2 Oct 2013 11:22:33 +0200 Subject: sched/wait: Make the __wait_event*() interface more friendly Change all __wait_event*() implementations to match the corresponding wait_event*() signature for convenience. In particular this does away with the weird 'ret' logic. Since there are __wait_event*() users this requires we update them too. Reviewed-by: Oleg Nesterov Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20131002092529.042563462@infradead.org Signed-off-by: Ingo Molnar --- arch/mips/kernel/rtlx.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/mips/kernel/rtlx.c b/arch/mips/kernel/rtlx.c index d763f11..2c12ea1 100644 --- a/arch/mips/kernel/rtlx.c +++ b/arch/mips/kernel/rtlx.c @@ -172,8 +172,9 @@ int rtlx_open(int index, int can_sleep) if (rtlx == NULL) { if( (p = vpe_get_shared(tclimit)) == NULL) { if (can_sleep) { - __wait_event_interruptible(channel_wqs[index].lx_queue, - (p = vpe_get_shared(tclimit)), ret); + ret = __wait_event_interruptible( + channel_wqs[index].lx_queue, + (p = vpe_get_shared(tclimit))); if (ret) goto out_fail; } else { @@ -263,11 +264,10 @@ unsigned int rtlx_read_poll(int index, int can_sleep) /* data available to read? */ if (chan->lx_read == chan->lx_write) { if (can_sleep) { - int ret = 0; - - __wait_event_interruptible(channel_wqs[index].lx_queue, + int ret = __wait_event_interruptible( + channel_wqs[index].lx_queue, (chan->lx_read != chan->lx_write) || - sp_stopping, ret); + sp_stopping); if (ret) return ret; @@ -440,14 +440,13 @@ static ssize_t file_write(struct file *file, const char __user * buffer, /* any space left... */ if (!rtlx_write_poll(minor)) { - int ret = 0; + int ret; if (file->f_flags & O_NONBLOCK) return -EAGAIN; - __wait_event_interruptible(channel_wqs[minor].rt_queue, - rtlx_write_poll(minor), - ret); + ret = __wait_event_interruptible(channel_wqs[minor].rt_queue, + rtlx_write_poll(minor)); if (ret) return ret; } -- cgit v1.1 From 88f182dd779b9d350b4774c12d16633a5b60f50c Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 10 Oct 2013 10:16:30 +0200 Subject: x86: Apply the asm_volatile_goto() compiler quirk Apply the asm_volatile_goto() compiler quirk to the new rmwcc.h file as well, introduced in: c2daa3bed53a sched, x86: Provide a per-cpu preempt_count implementation Reported-and-tested-by: Fengguang Wu Reported-by: Oleg Nesterov Reported-by: Peter Zijlstra Suggested-by: Jakub Jelinek Reviewed-by: Richard Henderson Cc: Linus Torvalds Cc: Andrew Morton Signed-off-by: Ingo Molnar --- arch/x86/include/asm/rmwcc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/include/asm/rmwcc.h b/arch/x86/include/asm/rmwcc.h index 735f184..1ff990f 100644 --- a/arch/x86/include/asm/rmwcc.h +++ b/arch/x86/include/asm/rmwcc.h @@ -5,7 +5,7 @@ #define __GEN_RMWcc(fullop, var, cc, ...) \ do { \ - asm volatile goto (fullop "; j" cc " %l[cc_label]" \ + asm_volatile_goto (fullop "; j" cc " %l[cc_label]" \ : : "m" (var), ## __VA_ARGS__ \ : "memory" : cc_label); \ return 0; \ -- cgit v1.1