summaryrefslogtreecommitdiffstats
path: root/libavcodec/cbrt_tablegen.h
diff options
context:
space:
mode:
authorGanesh Ajjanagadde <gajjanagadde@gmail.com>2016-01-11 17:09:44 -0500
committerGanesh Ajjanagadde <gajjanagadde@gmail.com>2016-01-11 17:20:38 -0500
commit07a11ebcab9b31e9fc784029e5d24e6fbf486ff3 (patch)
tree4fe2631f3261cf2bb6306f930372a21ba1b28d89 /libavcodec/cbrt_tablegen.h
parentf6e1c96730ebbcebbd0341329d51d3d3a36b4fa1 (diff)
downloadffmpeg-streaming-07a11ebcab9b31e9fc784029e5d24e6fbf486ff3.zip
ffmpeg-streaming-07a11ebcab9b31e9fc784029e5d24e6fbf486ff3.tar.gz
lavc/cbrt_tablegen: speed up tablegen
This exploits an approach based on the sieve of Eratosthenes, a popular method for generating prime numbers. Tables are identical to previous ones. Tested with FATE with/without --enable-hardcoded-tables. Sample benchmark (Haswell, GNU/Linux+gcc): prev: 7860100 decicycles in cbrt_tableinit, 1 runs, 0 skips 7777490 decicycles in cbrt_tableinit, 2 runs, 0 skips [...] 7582339 decicycles in cbrt_tableinit, 256 runs, 0 skips 7563556 decicycles in cbrt_tableinit, 512 runs, 0 skips new: 2099480 decicycles in cbrt_tableinit, 1 runs, 0 skips 2044470 decicycles in cbrt_tableinit, 2 runs, 0 skips [...] 1796544 decicycles in cbrt_tableinit, 256 runs, 0 skips 1791631 decicycles in cbrt_tableinit, 512 runs, 0 skips Both small and large run count given as this is called once so small run count may give a better picture, small numbers are fairly consistent, and there is a consistent downward trend from small to large runs, at which point it stabilizes to a new value. Reviewed-by: Michael Niedermayer <michael@niedermayer.cc> Signed-off-by: Ganesh Ajjanagadde <gajjanagadde@gmail.com>
Diffstat (limited to 'libavcodec/cbrt_tablegen.h')
-rw-r--r--libavcodec/cbrt_tablegen.h39
1 files changed, 29 insertions, 10 deletions
diff --git a/libavcodec/cbrt_tablegen.h b/libavcodec/cbrt_tablegen.h
index 59b5a1d..21e4b9a 100644
--- a/libavcodec/cbrt_tablegen.h
+++ b/libavcodec/cbrt_tablegen.h
@@ -26,12 +26,13 @@
#include <stdint.h>
#include <math.h>
#include "libavutil/attributes.h"
+#include "libavutil/intfloat.h"
#include "libavcodec/aac_defines.h"
#if USE_FIXED
-#define CBRT(x) lrint((x).f * 8192)
+#define CBRT(x) lrint((x) * 8192)
#else
-#define CBRT(x) x.i
+#define CBRT(x) av_float2int((float)(x))
#endif
#if CONFIG_HARDCODED_TABLES
@@ -47,16 +48,34 @@ static uint32_t cbrt_tab[1 << 13];
static av_cold void AAC_RENAME(cbrt_tableinit)(void)
{
+ static double cbrt_tab_dbl[1 << 13];
if (!cbrt_tab[(1<<13) - 1]) {
- int i;
- for (i = 0; i < 1<<13; i++) {
- union {
- float f;
- uint32_t i;
- } f;
- f.f = cbrt(i) * i;
- cbrt_tab[i] = CBRT(f);
+ int i, j, k;
+ double cbrt_val;
+
+ for (i = 1; i < 1<<13; i++)
+ cbrt_tab_dbl[i] = 1;
+
+ /* have to take care of non-squarefree numbers */
+ for (i = 2; i < 90; i++) {
+ if (cbrt_tab_dbl[i] == 1) {
+ cbrt_val = i * cbrt(i);
+ for (k = i; k < 1<<13; k *= i)
+ for (j = k; j < 1<<13; j += k)
+ cbrt_tab_dbl[j] *= cbrt_val;
+ }
}
+
+ for (i = 91; i <= 8191; i+= 2) {
+ if (cbrt_tab_dbl[i] == 1) {
+ cbrt_val = i * cbrt(i);
+ for (j = i; j < 1<<13; j += i)
+ cbrt_tab_dbl[j] *= cbrt_val;
+ }
+ }
+
+ for (i = 0; i < 1<<13; i++)
+ cbrt_tab[i] = CBRT(cbrt_tab_dbl[i]);
}
}
#endif /* CONFIG_HARDCODED_TABLES */
OpenPOWER on IntegriCloud