summaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorJiong Wang <jiong.wang@netronome.com>2018-07-06 15:13:18 -0700
committerDaniel Borkmann <daniel@iogearbox.net>2018-07-07 01:45:31 +0200
commit06ae48269d1e0324d806fca30fe77112f4a4a14a (patch)
tree066f16474f855f0a3ad05e34b9871e41ed3bf4c3 /lib
parent02000b55850deeadffe433e4b4930a8831f477de (diff)
downloadop-kernel-dev-06ae48269d1e0324d806fca30fe77112f4a4a14a.zip
op-kernel-dev-06ae48269d1e0324d806fca30fe77112f4a4a14a.tar.gz
lib: reciprocal_div: implement the improved algorithm on the paper mentioned
The new added "reciprocal_value_adv" implements the advanced version of the algorithm described in Figure 4.2 of the paper except when "divisor > (1U << 31)" whose ceil(log2(d)) result will be 32 which then requires u128 divide on host. The exception case could be easily handled before calling "reciprocal_value_adv". The advanced version requires more complex calculation to get the reciprocal multiplier and other control variables, but then could reduce the required emulation operations. It makes no sense to use this advanced version for host divide emulation, those extra complexities for calculating multiplier etc could completely waive our saving on emulation operations. However, it makes sense to use it for JIT divide code generation (for example eBPF JIT backends) for which we are willing to trade performance of JITed code with that of host. As shown by the following pseudo code, the required emulation operations could go down from 6 (the basic version) to 3 or 4. To use the result of "reciprocal_value_adv", suppose we want to calculate n/d, the C-style pseudo code will be the following, it could be easily changed to real code generation for other JIT targets. struct reciprocal_value_adv rvalue; u8 pre_shift, exp; // handle exception case. if (d >= (1U << 31)) { result = n >= d; return; } rvalue = reciprocal_value_adv(d, 32) exp = rvalue.exp; if (rvalue.is_wide_m && !(d & 1)) { // floor(log2(d & (2^32 -d))) pre_shift = fls(d & -d) - 1; rvalue = reciprocal_value_adv(d >> pre_shift, 32 - pre_shift); } else { pre_shift = 0; } // code generation starts. if (imm == 1U << exp) { result = n >> exp; } else if (rvalue.is_wide_m) { // pre_shift must be zero when reached here. t = (n * rvalue.m) >> 32; result = n - t; result >>= 1; result += t; result >>= rvalue.sh - 1; } else { if (pre_shift) result = n >> pre_shift; result = ((u64)result * rvalue.m) >> 32; result >>= rvalue.sh; } Signed-off-by: Jiong Wang <jiong.wang@netronome.com> Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Diffstat (limited to 'lib')
-rw-r--r--lib/reciprocal_div.c41
1 files changed, 41 insertions, 0 deletions
diff --git a/lib/reciprocal_div.c b/lib/reciprocal_div.c
index fcb4ce6..bf04325 100644
--- a/lib/reciprocal_div.c
+++ b/lib/reciprocal_div.c
@@ -1,4 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
+#include <linux/bug.h>
#include <linux/kernel.h>
#include <asm/div64.h>
#include <linux/reciprocal_div.h>
@@ -26,3 +27,43 @@ struct reciprocal_value reciprocal_value(u32 d)
return R;
}
EXPORT_SYMBOL(reciprocal_value);
+
+struct reciprocal_value_adv reciprocal_value_adv(u32 d, u8 prec)
+{
+ struct reciprocal_value_adv R;
+ u32 l, post_shift;
+ u64 mhigh, mlow;
+
+ /* ceil(log2(d)) */
+ l = fls(d - 1);
+ /* NOTE: mlow/mhigh could overflow u64 when l == 32. This case needs to
+ * be handled before calling "reciprocal_value_adv", please see the
+ * comment at include/linux/reciprocal_div.h.
+ */
+ WARN(l == 32,
+ "ceil(log2(0x%08x)) == 32, %s doesn't support such divisor",
+ d, __func__);
+ post_shift = l;
+ mlow = 1ULL << (32 + l);
+ do_div(mlow, d);
+ mhigh = (1ULL << (32 + l)) + (1ULL << (32 + l - prec));
+ do_div(mhigh, d);
+
+ for (; post_shift > 0; post_shift--) {
+ u64 lo = mlow >> 1, hi = mhigh >> 1;
+
+ if (lo >= hi)
+ break;
+
+ mlow = lo;
+ mhigh = hi;
+ }
+
+ R.m = (u32)mhigh;
+ R.sh = post_shift;
+ R.exp = l;
+ R.is_wide_m = mhigh > U32_MAX;
+
+ return R;
+}
+EXPORT_SYMBOL(reciprocal_value_adv);
OpenPOWER on IntegriCloud