include/linux/kernel.h: rewrite min3, max3 and clamp using min and max

It appears that gcc is better at optimising a double call to min and max rather than open coded min3 and max3. This can be observed here: $ cat min-max.c #define min(x, y) ({ \ typeof(x) _min1 = (x); \ typeof(y) _min2 = (y); \ (void) (&_min1 == &_min2); \ _min1 < _min2 ? _min1 : _min2; }) #define min3(x, y, z) ({ \ typeof(x) _min1 = (x); \ typeof(y) _min2 = (y); \ typeof(z) _min3 = (z); \ (void) (&_min1 == &_min2); \ (void) (&_min1 == &_min3); \ _min1 < _min2 ? (_min1 < _min3 ? _min1 : _min3) : \ (_min2 < _min3 ? _min2 : _min3); }) int fmin3(int x, int y, int z) { return min3(x, y, z); } int fmin2(int x, int y, int z) { return min(min(x, y), z); } $ gcc -O2 -o min-max.s -S min-max.c; cat min-max.s .file "min-max.c" .text .p2align 4,,15 .globl fmin3 .type fmin3, @function fmin3: .LFB0: .cfi_startproc cmpl %esi, %edi jl .L5 cmpl %esi, %edx movl %esi, %eax cmovle %edx, %eax ret .p2align 4,,10 .p2align 3 .L5: cmpl %edi, %edx movl %edi, %eax cmovle %edx, %eax ret .cfi_endproc .LFE0: .size fmin3, .-fmin3 .p2align 4,,15 .globl fmin2 .type fmin2, @function fmin2: .LFB1: .cfi_startproc cmpl %edi, %esi movl %edx, %eax cmovle %esi, %edi cmpl %edx, %edi cmovle %edi, %eax ret .cfi_endproc .LFE1: .size fmin2, .-fmin2 .ident "GCC: (Ubuntu/Linaro 4.6.3-1ubuntu5) 4.6.3" .section .note.GNU-stack,"",@progbits fmin3 function, which uses open-coded min3 macro, is compiled into total of ten instructions including a conditional branch, whereas fmin2 function, which uses two calls to min2 macro, is compiled into six instructions with no branches. Similarly, open-coded clamp produces the same code as clamp using min and max macros, but the latter is much shorter: $ cat clamp.c #define clamp(val, min, max) ({ \ typeof(val) __val = (val); \ typeof(min) __min = (min); \ typeof(max) __max = (max); \ (void) (&__val == &__min); \ (void) (&__val == &__max); \ __val = __val < __min ? __min: __val; \ __val > __max ? __max: __val; }) #define min(x, y) ({ \ typeof(x) _min1 = (x); \ typeof(y) _min2 = (y); \ (void) (&_min1 == &_min2); \ _min1 < _min2 ? _min1 : _min2; }) #define max(x, y) ({ \ typeof(x) _max1 = (x); \ typeof(y) _max2 = (y); \ (void) (&_max1 == &_max2); \ _max1 > _max2 ? _max1 : _max2; }) int fclamp(int v, int min, int max) { return clamp(v, min, max); } int fclampmm(int v, int min, int max) { return min(max(v, min), max); } $ gcc -O2 -o clamp.s -S clamp.c; cat clamp.s .file "clamp.c" .text .p2align 4,,15 .globl fclamp .type fclamp, @function fclamp: .LFB0: .cfi_startproc cmpl %edi, %esi movl %edx, %eax cmovge %esi, %edi cmpl %edx, %edi cmovle %edi, %eax ret .cfi_endproc .LFE0: .size fclamp, .-fclamp .p2align 4,,15 .globl fclampmm .type fclampmm, @function fclampmm: .LFB1: .cfi_startproc cmpl %edi, %esi cmovge %esi, %edi cmpl %edi, %edx movl %edi, %eax cmovle %edx, %eax ret .cfi_endproc .LFE1: .size fclampmm, .-fclampmm .ident "GCC: (Ubuntu/Linaro 4.6.3-1ubuntu5) 4.6.3" .section .note.GNU-stack,"",@progbits Linux mpn-glaptop 3.13.0-29-generic #53~precise1-Ubuntu SMP Wed Jun 4 22:06:25 UTC 2014 x86_64 x86_64 x86_64 GNU/Linux gcc (Ubuntu/Linaro 4.6.3-1ubuntu5) 4.6.3 Copyright (C) 2011 Free Software Foundation, Inc. This is free software; see the source for copying conditions. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -rwx------ 1 mpn eng 51224656 Jun 17 14:15 vmlinux.before -rwx------ 1 mpn eng 51224608 Jun 17 13:57 vmlinux.after 48 bytes reduction. The do_fault_around was a few instruction shorter and as far as I can tell saved 12 bytes on the stack, i.e.: $ grep -e rsp -e pop -e push do_fault_around.* do_fault_around.before.s:push %rbp do_fault_around.before.s:mov %rsp,%rbp do_fault_around.before.s:push %r13 do_fault_around.before.s:push %r12 do_fault_around.before.s:push %rbx do_fault_around.before.s:sub $0x38,%rsp do_fault_around.before.s:add $0x38,%rsp do_fault_around.before.s:pop %rbx do_fault_around.before.s:pop %r12 do_fault_around.before.s:pop %r13 do_fault_around.before.s:pop %rbp do_fault_around.after.s:push %rbp do_fault_around.after.s:mov %rsp,%rbp do_fault_around.after.s:push %r12 do_fault_around.after.s:push %rbx do_fault_around.after.s:sub $0x30,%rsp do_fault_around.after.s:add $0x30,%rsp do_fault_around.after.s:pop %rbx do_fault_around.after.s:pop %r12 do_fault_around.after.s:pop %rbp or here side-by-side: Before After push %rbp push %rbp mov %rsp,%rbp mov %rsp,%rbp push %r13 push %r12 push %r12 push %rbx push %rbx sub $0x38,%rsp sub $0x30,%rsp add $0x38,%rsp add $0x30,%rsp pop %rbx pop %rbx pop %r12 pop %r12 pop %r13 pop %rbp pop %rbp There are also fewer branches: $ grep ^j do_fault_around.* do_fault_around.before.s:jae ffffffff812079b7 do_fault_around.before.s:jmp ffffffff812079c5 do_fault_around.before.s:jmp ffffffff81207a14 do_fault_around.before.s:ja ffffffff812079f9 do_fault_around.before.s:jb ffffffff81207a10 do_fault_around.before.s:jmp ffffffff81207a63 do_fault_around.before.s:jne ffffffff812079df do_fault_around.after.s:jmp ffffffff812079fd do_fault_around.after.s:ja ffffffff812079e2 do_fault_around.after.s:jb ffffffff812079f9 do_fault_around.after.s:jmp ffffffff81207a4c do_fault_around.after.s:jne ffffffff812079c8 And here's with allyesconfig on a different machine: $ uname -a; gcc --version; ls -l vmlinux.* Linux erwin 3.14.7-mn #54 SMP Sun Jun 15 11:25:08 CEST 2014 x86_64 AMD Phenom(tm) II X3 710 Processor AuthenticAMD GNU/Linux gcc (GCC) 4.8.3 Copyright (C) 2013 Free Software Foundation, Inc. This is free software; see the source for copying conditions. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -rwx------ 1 mpn eng 437027411 Jun 20 16:04 vmlinux.before -rwx------ 1 mpn eng 437026881 Jun 20 15:30 vmlinux.after 530 bytes reduction. Signed-off-by: Michal Nazarewicz <mina86@mina86.com> Signed-off-by: Hagen Paul Pfeifer <hagen@jauu.net> Acked-by: Steven Rostedt <rostedt@goodmis.org> Cc: Hagen Paul Pfeifer <hagen@jauu.net> Cc: David Rientjes <rientjes@google.com> Cc: "Rustad, Mark D" <mark.d.rustad@intel.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
author: Michal Nazarewicz <mina86@mina86.com> 2014-10-09 15:30:13 -0700
committer: Linus Torvalds <torvalds@linux-foundation.org> 2014-10-09 22:26:03 -0400
commit: 2e1d06e1c05af9dbe8a3bfddeefbf041ca637fff (patch)
tree: 71ddab1525bdc694d2dc7407843a1bafceb09f8b /include/linux/kernel.h
parent: 036c6508f183e9c730aee25e33d27b2b9b9a5bbc (diff)
download: op-kernel-dev-2e1d06e1c05af9dbe8a3bfddeefbf041ca637fff.zip
op-kernel-dev-2e1d06e1c05af9dbe8a3bfddeefbf041ca637fff.tar.gz
1 files changed, 5 insertions, 27 deletions
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 95624be..aa2a0cb 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -715,23 +715,8 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { }
 	(void) (&_max1 == &_max2);		\
 	_max1 > _max2 ? _max1 : _max2; })
 
-#define min3(x, y, z) ({			\
-	typeof(x) _min1 = (x);			\
-	typeof(y) _min2 = (y);			\
-	typeof(z) _min3 = (z);			\
-	(void) (&_min1 == &_min2);		\
-	(void) (&_min1 == &_min3);		\
-	_min1 < _min2 ? (_min1 < _min3 ? _min1 : _min3) : \
-		(_min2 < _min3 ? _min2 : _min3); })
-
-#define max3(x, y, z) ({			\
-	typeof(x) _max1 = (x);			\
-	typeof(y) _max2 = (y);			\
-	typeof(z) _max3 = (z);			\
-	(void) (&_max1 == &_max2);		\
-	(void) (&_max1 == &_max3);		\
-	_max1 > _max2 ? (_max1 > _max3 ? _max1 : _max3) : \
-		(_max2 > _max3 ? _max2 : _max3); })
+#define min3(x, y, z) min((typeof(x))min(x, y), z)
+#define max3(x, y, z) max((typeof(x))max(x, y), z)
 
 /**
  * min_not_zero - return the minimum that is _not_ zero, unless both are zero
@@ -746,20 +731,13 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { }
 /**
  * clamp - return a value clamped to a given range with strict typechecking
  * @val: current value
- * @min: minimum allowable value
- * @max: maximum allowable value
+ * @lo: lowest allowable value
+ * @hi: highest allowable value
  *
  * This macro does strict typechecking of min/max to make sure they are of the
  * same type as val.  See the unnecessary pointer comparisons.
  */
-#define clamp(val, min, max) ({			\
-	typeof(val) __val = (val);		\
-	typeof(min) __min = (min);		\
-	typeof(max) __max = (max);		\
-	(void) (&__val == &__min);		\
-	(void) (&__val == &__max);		\
-	__val = __val < __min ? __min: __val;	\
-	__val > __max ? __max: __val; })
+#define clamp(val, lo, hi) min((typeof(val))max(val, lo), hi)
 
 /*
  * ..and if you can't take the strict
author	Michal Nazarewicz <mina86@mina86.com>	2014-10-09 15:30:13 -0700
committer	Linus Torvalds <torvalds@linux-foundation.org>	2014-10-09 22:26:03 -0400
commit	2e1d06e1c05af9dbe8a3bfddeefbf041ca637fff (patch)
tree	71ddab1525bdc694d2dc7407843a1bafceb09f8b /include/linux/kernel.h
parent	036c6508f183e9c730aee25e33d27b2b9b9a5bbc (diff)
download	op-kernel-dev-2e1d06e1c05af9dbe8a3bfddeefbf041ca637fff.zip op-kernel-dev-2e1d06e1c05af9dbe8a3bfddeefbf041ca637fff.tar.gz