summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorbz <bz@FreeBSD.org>2007-10-20 22:18:42 +0000
committerbz <bz@FreeBSD.org>2007-10-20 22:18:42 +0000
commit830ad96079c0199720ca93a683f2a4450afac014 (patch)
tree093ff5eddd45019ddc9c6de78b09a7b98701ea38
parentc89ca49410c0b451c33462a27a43de462843677a (diff)
downloadFreeBSD-src-830ad96079c0199720ca93a683f2a4450afac014.zip
FreeBSD-src-830ad96079c0199720ca93a683f2a4450afac014.tar.gz
Fold multiple asm statements into one so that the compiler at a certain
optimization level (-march=pentium-mmx for example) does not insert intermediate ops which would trash the carry. Change both sys/i386/i386/in_cksum.c[1] and sys/i386/include/in_cksum.h. To my best understanding the same problem was addressed in rev. 1.16 of src/sys/i386/include/in_cksum.h for just a single function 3y ago. Reviewed by: jhb Submitted by: Zhouyi ZHOU <zhouzhouyi FreeBSD.org> (intial version of [1]) MFC after: 5 days PR: 115678, 69257
-rw-r--r--sys/i386/i386/in_cksum.c120
-rw-r--r--sys/i386/include/in_cksum.h21
2 files changed, 91 insertions, 50 deletions
diff --git a/sys/i386/i386/in_cksum.c b/sys/i386/i386/in_cksum.c
index 412a93b..be7d8a3 100644
--- a/sys/i386/i386/in_cksum.c
+++ b/sys/i386/i386/in_cksum.c
@@ -260,17 +260,6 @@ u_int in_cksum_hdr(const struct ip *ip)
* reorder operations, this will generally take place in parallel with
* other calculations.
*/
-#define ADD(n) __asm __volatile \
- ("addl %1, %0" : "+r" (sum) : \
- "g" (((const u_int32_t *)w)[n / 4]))
-#define ADDC(n) __asm __volatile \
- ("adcl %1, %0" : "+r" (sum) : \
- "g" (((const u_int32_t *)w)[n / 4]))
-#define LOAD(n) __asm __volatile \
- ("" : : "r" (((const u_int32_t *)w)[n / 4]))
-#define MOP __asm __volatile \
- ("adcl $0, %0" : "+r" (sum))
-
u_short
in_cksum_skip(m, len, skip)
struct mbuf *m;
@@ -341,15 +330,24 @@ skip_start:
* Advance to a 486 cache line boundary.
*/
if (4 & (int) w && mlen >= 4) {
- ADD(0);
- MOP;
+ __asm __volatile (
+ "addl %1, %0\n"
+ "adcl $0, %0"
+ : "+r" (sum)
+ : "g" (((const u_int32_t *)w)[0])
+ );
w += 2;
mlen -= 4;
}
if (8 & (int) w && mlen >= 8) {
- ADD(0);
- ADDC(4);
- MOP;
+ __asm __volatile (
+ "addl %1, %0\n"
+ "adcl %2, %0\n"
+ "adcl $0, %0"
+ : "+r" (sum)
+ : "g" (((const u_int32_t *)w)[0]),
+ "g" (((const u_int32_t *)w)[1])
+ );
w += 4;
mlen -= 8;
}
@@ -379,45 +377,81 @@ skip_start:
* is initially 33 (not 32) to guaranteed that
* the LOAD(32) is within bounds.
*/
- ADD(16);
- ADDC(0);
- ADDC(4);
- ADDC(8);
- ADDC(12);
- LOAD(32);
- ADDC(20);
- ADDC(24);
- ADDC(28);
- MOP;
+ __asm __volatile (
+ "addl %1, %0\n"
+ "adcl %2, %0\n"
+ "adcl %3, %0\n"
+ "adcl %4, %0\n"
+ "adcl %5, %0\n"
+ "mov %6, %%eax\n"
+ "adcl %7, %0\n"
+ "adcl %8, %0\n"
+ "adcl %9, %0\n"
+ "adcl $0, %0"
+ : "+r" (sum)
+ : "g" (((const u_int32_t *)w)[4]),
+ "g" (((const u_int32_t *)w)[0]),
+ "g" (((const u_int32_t *)w)[1]),
+ "g" (((const u_int32_t *)w)[2]),
+ "g" (((const u_int32_t *)w)[3]),
+ "g" (((const u_int32_t *)w)[8]),
+ "g" (((const u_int32_t *)w)[5]),
+ "g" (((const u_int32_t *)w)[6]),
+ "g" (((const u_int32_t *)w)[7])
+ : "eax"
+ );
w += 16;
}
mlen += 32 + 1;
if (mlen >= 32) {
- ADD(16);
- ADDC(0);
- ADDC(4);
- ADDC(8);
- ADDC(12);
- ADDC(20);
- ADDC(24);
- ADDC(28);
- MOP;
+ __asm __volatile (
+ "addl %1, %0\n"
+ "adcl %2, %0\n"
+ "adcl %3, %0\n"
+ "adcl %4, %0\n"
+ "adcl %5, %0\n"
+ "adcl %6, %0\n"
+ "adcl %7, %0\n"
+ "adcl %8, %0\n"
+ "adcl $0, %0"
+ : "+r" (sum)
+ : "g" (((const u_int32_t *)w)[4]),
+ "g" (((const u_int32_t *)w)[0]),
+ "g" (((const u_int32_t *)w)[1]),
+ "g" (((const u_int32_t *)w)[2]),
+ "g" (((const u_int32_t *)w)[3]),
+ "g" (((const u_int32_t *)w)[5]),
+ "g" (((const u_int32_t *)w)[6]),
+ "g" (((const u_int32_t *)w)[7])
+ );
w += 16;
mlen -= 32;
}
if (mlen >= 16) {
- ADD(0);
- ADDC(4);
- ADDC(8);
- ADDC(12);
- MOP;
+ __asm __volatile (
+ "addl %1, %0\n"
+ "adcl %2, %0\n"
+ "adcl %3, %0\n"
+ "adcl %4, %0\n"
+ "adcl $0, %0"
+ : "+r" (sum)
+ : "g" (((const u_int32_t *)w)[0]),
+ "g" (((const u_int32_t *)w)[1]),
+ "g" (((const u_int32_t *)w)[2]),
+ "g" (((const u_int32_t *)w)[3])
+ );
w += 8;
mlen -= 16;
}
if (mlen >= 8) {
- ADD(0);
- ADDC(4);
- MOP;
+ __asm __volatile (
+ "addl %1, %0\n"
+ "adcl %2, %0\n"
+ "adcl $0, %0"
+ : "+r" (sum)
+ : "g" (((const u_int32_t *)w)[0]),
+ "g" (((const u_int32_t *)w)[1])
+ );
w += 4;
mlen -= 8;
}
diff --git a/sys/i386/include/in_cksum.h b/sys/i386/include/in_cksum.h
index 062c675..7e7f815 100644
--- a/sys/i386/include/in_cksum.h
+++ b/sys/i386/include/in_cksum.h
@@ -92,9 +92,12 @@ static __inline u_short
in_addword(u_short sum, u_short b)
{
/* __volatile is necessary because the condition codes are used. */
- __asm __volatile ("addw %1, %0" : "+r" (sum) : "r" (b));
- __asm __volatile ("adcw $0, %0" : "+r" (sum));
-
+ __asm __volatile (
+ "addw %1, %0\n"
+ "adcw $0, %0"
+ : "+r" (sum)
+ : "r" (b)
+ );
return (sum);
}
@@ -102,10 +105,14 @@ static __inline u_short
in_pseudo(u_int sum, u_int b, u_int c)
{
/* __volatile is necessary because the condition codes are used. */
- __asm __volatile ("addl %1, %0" : "+r" (sum) : "g" (b));
- __asm __volatile ("adcl %1, %0" : "+r" (sum) : "g" (c));
- __asm __volatile ("adcl $0, %0" : "+r" (sum));
-
+ __asm __volatile (
+ "addl %1, %0\n"
+ "adcl %2, %0\n"
+ "adcl $0, %0"
+ : "+r" (sum)
+ : "g" (b),
+ "g" (c)
+ );
sum = (sum & 0xffff) + (sum >> 16);
if (sum > 0xffff)
sum -= 0xffff;
OpenPOWER on IntegriCloud