diff options
author | Stephen Hemminger <shemminger@osdl.org> | 2005-12-21 19:32:36 -0800 |
---|---|---|
committer | David S. Miller <davem@sunset.davemloft.net> | 2006-01-03 13:11:09 -0800 |
commit | 9eb2d627190a8afe4b9276b24615a9559504fa60 (patch) | |
tree | 39479e9d761e53462bd05f979d0ced132bbfcb0a | |
parent | 89b3d9aaf46791177c5a5fa07a3ed38a035b5ef5 (diff) | |
download | op-kernel-dev-9eb2d627190a8afe4b9276b24615a9559504fa60.zip op-kernel-dev-9eb2d627190a8afe4b9276b24615a9559504fa60.tar.gz |
[TCP] cubic: use Newton-Raphson
Replace cube root algorithim with a faster version using Newton-Raphson.
Surprisingly, doing the scaled div64_64 is faster than a true 64 bit
division on 64 bit CPU's.
Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | net/ipv4/tcp_cubic.c | 93 |
1 files changed, 39 insertions, 54 deletions
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index 44fd408..31a4986 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -52,6 +52,7 @@ MODULE_PARM_DESC(bic_scale, "scale (scaled by 1024) value for bic function (bic_ module_param(tcp_friendliness, int, 0644); MODULE_PARM_DESC(tcp_friendliness, "turn on/off tcp friendliness"); +#include <asm/div64.h> /* BIC TCP Parameters */ struct bictcp { @@ -93,67 +94,51 @@ static void bictcp_init(struct sock *sk) tcp_sk(sk)->snd_ssthresh = initial_ssthresh; } -/* 65536 times the cubic root */ -static const u64 cubic_table[8] - = {0, 65536, 82570, 94519, 104030, 112063, 119087, 125367}; +/* 64bit divisor, dividend and result. dynamic precision */ +static inline u_int64_t div64_64(u_int64_t dividend, u_int64_t divisor) +{ + u_int32_t d = divisor; + + if (divisor > 0xffffffffULL) { + unsigned int shift = fls(divisor >> 32); + + d = divisor >> shift; + dividend >>= shift; + } + + /* avoid 64 bit division if possible */ + if (dividend >> 32) + do_div(dividend, d); + else + dividend = (uint32_t) dividend / d; + + return dividend; +} /* - * calculate the cubic root of x - * the basic idea is that x can be expressed as i*8^j - * so cubic_root(x) = cubic_root(i)*2^j - * in the following code, x is i, and y is 2^j - * because of integer calculation, there are errors in calculation - * so finally use binary search to find out the exact solution + * calculate the cubic root of x using Newton-Raphson */ -static u32 cubic_root(u64 x) +static u32 cubic_root(u64 a) { - u64 y, app, target, start, end, mid, start_diff, end_diff; - - if (x == 0) - return 0; + u32 x, x1; - target = x; - - /* first estimate lower and upper bound */ - y = 1; - while (x >= 8){ - x = (x >> 3); - y = (y << 1); - } - start = (y*cubic_table[x])>>16; - if (x==7) - end = (y<<1); - else - end = (y*cubic_table[x+1]+65535)>>16; - - /* binary search for more accurate one */ - while (start < end-1) { - mid = (start+end) >> 1; - app = mid*mid*mid; - if (app < target) - start = mid; - else if (app > target) - end = mid; - else - return mid; - } + /* Initial estimate is based on: + * cbrt(x) = exp(log(x) / 3) + */ + x = 1u << (fls64(a)/3); - /* find the most accurate one from start and end */ - app = start*start*start; - if (app < target) - start_diff = target - app; - else - start_diff = app - target; - app = end*end*end; - if (app < target) - end_diff = target - app; - else - end_diff = app - target; + /* + * Iteration based on: + * 2 + * x = ( 2 * x + a / x ) / 3 + * k+1 k k + */ + do { + x1 = x; + x = (2 * x + (uint32_t) div64_64(a, x*x)) / 3; + } while (abs(x1 - x) > 1); - if (start_diff < end_diff) - return (u32)start; - else - return (u32)end; + return x; } /* |