diff options
author | bz <bz@FreeBSD.org> | 2007-10-20 22:18:42 +0000 |
---|---|---|
committer | bz <bz@FreeBSD.org> | 2007-10-20 22:18:42 +0000 |
commit | 830ad96079c0199720ca93a683f2a4450afac014 (patch) | |
tree | 093ff5eddd45019ddc9c6de78b09a7b98701ea38 /sys | |
parent | c89ca49410c0b451c33462a27a43de462843677a (diff) | |
download | FreeBSD-src-830ad96079c0199720ca93a683f2a4450afac014.zip FreeBSD-src-830ad96079c0199720ca93a683f2a4450afac014.tar.gz |
Fold multiple asm statements into one so that the compiler at a certain
optimization level (-march=pentium-mmx for example) does not insert
intermediate ops which would trash the carry.
Change both sys/i386/i386/in_cksum.c[1] and sys/i386/include/in_cksum.h.
To my best understanding the same problem was addressed in rev. 1.16
of src/sys/i386/include/in_cksum.h for just a single function 3y ago.
Reviewed by: jhb
Submitted by: Zhouyi ZHOU <zhouzhouyi FreeBSD.org> (intial version of [1])
MFC after: 5 days
PR: 115678, 69257
Diffstat (limited to 'sys')
-rw-r--r-- | sys/i386/i386/in_cksum.c | 120 | ||||
-rw-r--r-- | sys/i386/include/in_cksum.h | 21 |
2 files changed, 91 insertions, 50 deletions
diff --git a/sys/i386/i386/in_cksum.c b/sys/i386/i386/in_cksum.c index 412a93b..be7d8a3 100644 --- a/sys/i386/i386/in_cksum.c +++ b/sys/i386/i386/in_cksum.c @@ -260,17 +260,6 @@ u_int in_cksum_hdr(const struct ip *ip) * reorder operations, this will generally take place in parallel with * other calculations. */ -#define ADD(n) __asm __volatile \ - ("addl %1, %0" : "+r" (sum) : \ - "g" (((const u_int32_t *)w)[n / 4])) -#define ADDC(n) __asm __volatile \ - ("adcl %1, %0" : "+r" (sum) : \ - "g" (((const u_int32_t *)w)[n / 4])) -#define LOAD(n) __asm __volatile \ - ("" : : "r" (((const u_int32_t *)w)[n / 4])) -#define MOP __asm __volatile \ - ("adcl $0, %0" : "+r" (sum)) - u_short in_cksum_skip(m, len, skip) struct mbuf *m; @@ -341,15 +330,24 @@ skip_start: * Advance to a 486 cache line boundary. */ if (4 & (int) w && mlen >= 4) { - ADD(0); - MOP; + __asm __volatile ( + "addl %1, %0\n" + "adcl $0, %0" + : "+r" (sum) + : "g" (((const u_int32_t *)w)[0]) + ); w += 2; mlen -= 4; } if (8 & (int) w && mlen >= 8) { - ADD(0); - ADDC(4); - MOP; + __asm __volatile ( + "addl %1, %0\n" + "adcl %2, %0\n" + "adcl $0, %0" + : "+r" (sum) + : "g" (((const u_int32_t *)w)[0]), + "g" (((const u_int32_t *)w)[1]) + ); w += 4; mlen -= 8; } @@ -379,45 +377,81 @@ skip_start: * is initially 33 (not 32) to guaranteed that * the LOAD(32) is within bounds. */ - ADD(16); - ADDC(0); - ADDC(4); - ADDC(8); - ADDC(12); - LOAD(32); - ADDC(20); - ADDC(24); - ADDC(28); - MOP; + __asm __volatile ( + "addl %1, %0\n" + "adcl %2, %0\n" + "adcl %3, %0\n" + "adcl %4, %0\n" + "adcl %5, %0\n" + "mov %6, %%eax\n" + "adcl %7, %0\n" + "adcl %8, %0\n" + "adcl %9, %0\n" + "adcl $0, %0" + : "+r" (sum) + : "g" (((const u_int32_t *)w)[4]), + "g" (((const u_int32_t *)w)[0]), + "g" (((const u_int32_t *)w)[1]), + "g" (((const u_int32_t *)w)[2]), + "g" (((const u_int32_t *)w)[3]), + "g" (((const u_int32_t *)w)[8]), + "g" (((const u_int32_t *)w)[5]), + "g" (((const u_int32_t *)w)[6]), + "g" (((const u_int32_t *)w)[7]) + : "eax" + ); w += 16; } mlen += 32 + 1; if (mlen >= 32) { - ADD(16); - ADDC(0); - ADDC(4); - ADDC(8); - ADDC(12); - ADDC(20); - ADDC(24); - ADDC(28); - MOP; + __asm __volatile ( + "addl %1, %0\n" + "adcl %2, %0\n" + "adcl %3, %0\n" + "adcl %4, %0\n" + "adcl %5, %0\n" + "adcl %6, %0\n" + "adcl %7, %0\n" + "adcl %8, %0\n" + "adcl $0, %0" + : "+r" (sum) + : "g" (((const u_int32_t *)w)[4]), + "g" (((const u_int32_t *)w)[0]), + "g" (((const u_int32_t *)w)[1]), + "g" (((const u_int32_t *)w)[2]), + "g" (((const u_int32_t *)w)[3]), + "g" (((const u_int32_t *)w)[5]), + "g" (((const u_int32_t *)w)[6]), + "g" (((const u_int32_t *)w)[7]) + ); w += 16; mlen -= 32; } if (mlen >= 16) { - ADD(0); - ADDC(4); - ADDC(8); - ADDC(12); - MOP; + __asm __volatile ( + "addl %1, %0\n" + "adcl %2, %0\n" + "adcl %3, %0\n" + "adcl %4, %0\n" + "adcl $0, %0" + : "+r" (sum) + : "g" (((const u_int32_t *)w)[0]), + "g" (((const u_int32_t *)w)[1]), + "g" (((const u_int32_t *)w)[2]), + "g" (((const u_int32_t *)w)[3]) + ); w += 8; mlen -= 16; } if (mlen >= 8) { - ADD(0); - ADDC(4); - MOP; + __asm __volatile ( + "addl %1, %0\n" + "adcl %2, %0\n" + "adcl $0, %0" + : "+r" (sum) + : "g" (((const u_int32_t *)w)[0]), + "g" (((const u_int32_t *)w)[1]) + ); w += 4; mlen -= 8; } diff --git a/sys/i386/include/in_cksum.h b/sys/i386/include/in_cksum.h index 062c675..7e7f815 100644 --- a/sys/i386/include/in_cksum.h +++ b/sys/i386/include/in_cksum.h @@ -92,9 +92,12 @@ static __inline u_short in_addword(u_short sum, u_short b) { /* __volatile is necessary because the condition codes are used. */ - __asm __volatile ("addw %1, %0" : "+r" (sum) : "r" (b)); - __asm __volatile ("adcw $0, %0" : "+r" (sum)); - + __asm __volatile ( + "addw %1, %0\n" + "adcw $0, %0" + : "+r" (sum) + : "r" (b) + ); return (sum); } @@ -102,10 +105,14 @@ static __inline u_short in_pseudo(u_int sum, u_int b, u_int c) { /* __volatile is necessary because the condition codes are used. */ - __asm __volatile ("addl %1, %0" : "+r" (sum) : "g" (b)); - __asm __volatile ("adcl %1, %0" : "+r" (sum) : "g" (c)); - __asm __volatile ("adcl $0, %0" : "+r" (sum)); - + __asm __volatile ( + "addl %1, %0\n" + "adcl %2, %0\n" + "adcl $0, %0" + : "+r" (sum) + : "g" (b), + "g" (c) + ); sum = (sum & 0xffff) + (sum >> 16); if (sum > 0xffff) sum -= 0xffff; |