summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJukka Ojanen <jukka.ojanen@linkotec.net>2015-03-31 16:47:06 +0300
committerJukka Ojanen <jukka.ojanen@linkotec.net>2015-03-31 16:47:06 +0300
commitdfab21f8096660f441fb33bf5012e7f2c3652fa9 (patch)
tree405daf6579e51ba7f4407a8c13786d2ffed72b9b
parent93a58ef7e0b973411bbed3e07750c7d1fc1e40d5 (diff)
downloadffts-dfab21f8096660f441fb33bf5012e7f2c3652fa9.zip
ffts-dfab21f8096660f441fb33bf5012e7f2c3652fa9.tar.gz
Generate cosine and sine table without using C math library. About 100 times faster on ARM and 15 times faster on x86.
-rw-r--r--src/ffts.c59
-rw-r--r--src/ffts_internal.h10
2 files changed, 54 insertions, 15 deletions
diff --git a/src/ffts.c b/src/ffts.c
index 41df886..f9cb9bb 100644
--- a/src/ffts.c
+++ b/src/ffts.c
@@ -199,6 +199,58 @@ void ffts_free_1d(ffts_plan_t *p)
free(p);
}
+static void
+ffts_generate_cosine_sine_32f(ffts_cpx_32f *const table, int table_size)
+{
+ double alpha, beta;
+ double c[2], s[2];
+ int i;
+
+ double x = 1.0 / table_size;
+ double z = x * x;
+
+ /* polynomial approximations calculated using Sollya */
+
+ /* alpha = 2 * sin(M_PI_4 / m) * sin(M_PI_4 / m) */
+ alpha = x * (1.1107207345394952717884501203293686870741139540138 +
+ z * (-0.114191397993514079911985272577099412137126013186879 +
+ z * 3.52164670852685621720746817665316575239342815885835e-3));
+ alpha = alpha * alpha;
+
+ /* beta = sin(M_PI_2 / m) */
+ beta = x * (1.57079632679489455959753740899031981825828552246094 +
+ z * (-0.64596409735041482313988581154262647032737731933593 +
+ z * 7.9690915468332887416913479228242067620158195495605e-2));
+
+ /* cos(0) = 1.0, sin(0) = 0.0 */
+ c[0] = 1.0;
+ s[0] = 0.0;
+
+ table[ 0][0] = 1.0f;
+ table[ 0][1] = 0.0f;
+ table[table_size - 1][1] = -1.0f;
+ table[table_size - 1][0] = -0.0f;
+
+ /* generate sine and cosine table with maximum error less than 1 ULP */
+ for (i = 1; i < table_size/2; i += 2) {
+ c[1] = c[0] - ((alpha * c[0]) + (beta * s[0]));
+ s[1] = s[0] - ((alpha * s[0]) - (beta * c[0]));
+
+ table[i + 0][0] = (float) c[1];
+ table[i + 0][1] = (float) -s[1];
+ table[table_size - i][0] = (float) s[1];
+ table[table_size - i][1] = (float) -c[1];
+
+ c[0] = c[1] - ((alpha * c[1]) + (beta * s[1]));
+ s[0] = s[1] - ((alpha * s[1]) - (beta * c[1]));
+
+ table[i + 1][0] = (float) c[0];
+ table[i + 1][1] = (float) -s[0];
+ table[table_size - i - 1][0] = (float) s[0];
+ table[table_size - i - 1][1] = (float) -c[0];
+ }
+}
+
static int
ffts_generate_luts(ffts_plan_t *p, size_t N, size_t leaf_N, int sign)
{
@@ -252,12 +304,9 @@ ffts_generate_luts(ffts_plan_t *p, size_t N, size_t leaf_N, int sign)
m = leaf_N << (n_luts - 2);
tmp = FFTS_MALLOC(m * sizeof(ffts_cpx_32f), 32);
- for (i = 0; i < m; i++) {
- tmp[i][0] = W_re(4*m, i);
- tmp[i][1] = W_im(4*m, i);
- }
+ ffts_generate_cosine_sine_32f(tmp, m);
- /* generate lookup tables */
+ /* generate lookup tables */
stride = 1 << (n_luts - 1);
for (i = 0; i < n_luts; i++) {
p->ws_is[i] = w - (ffts_cpx_32f*) p->ws;
diff --git a/src/ffts_internal.h b/src/ffts_internal.h
index f992811..60de539 100644
--- a/src/ffts_internal.h
+++ b/src/ffts_internal.h
@@ -223,14 +223,4 @@ static __inline unsigned long ffts_ctzl(size_t N)
#endif /* _WIN64 */
#endif /* _MSC_VER */
-static FFTS_ALWAYS_INLINE float W_re(float N, float k)
-{
- return cos(-2.0 * M_PI * k / N);
-}
-
-static FFTS_ALWAYS_INLINE float W_im(float N, float k)
-{
- return sin(-2.0 * M_PI * k / N);
-}
-
#endif /* FFTS_INTERNAL_H */
OpenPOWER on IntegriCloud