diff options
author | Jukka Ojanen <jukka.ojanen@linkotec.net> | 2015-03-31 16:47:06 +0300 |
---|---|---|
committer | Jukka Ojanen <jukka.ojanen@linkotec.net> | 2015-03-31 16:47:06 +0300 |
commit | dfab21f8096660f441fb33bf5012e7f2c3652fa9 (patch) | |
tree | 405daf6579e51ba7f4407a8c13786d2ffed72b9b | |
parent | 93a58ef7e0b973411bbed3e07750c7d1fc1e40d5 (diff) | |
download | ffts-dfab21f8096660f441fb33bf5012e7f2c3652fa9.zip ffts-dfab21f8096660f441fb33bf5012e7f2c3652fa9.tar.gz |
Generate cosine and sine table without using C math library. About 100 times faster on ARM and 15 times faster on x86.
-rw-r--r-- | src/ffts.c | 59 | ||||
-rw-r--r-- | src/ffts_internal.h | 10 |
2 files changed, 54 insertions, 15 deletions
@@ -199,6 +199,58 @@ void ffts_free_1d(ffts_plan_t *p) free(p); } +static void +ffts_generate_cosine_sine_32f(ffts_cpx_32f *const table, int table_size) +{ + double alpha, beta; + double c[2], s[2]; + int i; + + double x = 1.0 / table_size; + double z = x * x; + + /* polynomial approximations calculated using Sollya */ + + /* alpha = 2 * sin(M_PI_4 / m) * sin(M_PI_4 / m) */ + alpha = x * (1.1107207345394952717884501203293686870741139540138 + + z * (-0.114191397993514079911985272577099412137126013186879 + + z * 3.52164670852685621720746817665316575239342815885835e-3)); + alpha = alpha * alpha; + + /* beta = sin(M_PI_2 / m) */ + beta = x * (1.57079632679489455959753740899031981825828552246094 + + z * (-0.64596409735041482313988581154262647032737731933593 + + z * 7.9690915468332887416913479228242067620158195495605e-2)); + + /* cos(0) = 1.0, sin(0) = 0.0 */ + c[0] = 1.0; + s[0] = 0.0; + + table[ 0][0] = 1.0f; + table[ 0][1] = 0.0f; + table[table_size - 1][1] = -1.0f; + table[table_size - 1][0] = -0.0f; + + /* generate sine and cosine table with maximum error less than 1 ULP */ + for (i = 1; i < table_size/2; i += 2) { + c[1] = c[0] - ((alpha * c[0]) + (beta * s[0])); + s[1] = s[0] - ((alpha * s[0]) - (beta * c[0])); + + table[i + 0][0] = (float) c[1]; + table[i + 0][1] = (float) -s[1]; + table[table_size - i][0] = (float) s[1]; + table[table_size - i][1] = (float) -c[1]; + + c[0] = c[1] - ((alpha * c[1]) + (beta * s[1])); + s[0] = s[1] - ((alpha * s[1]) - (beta * c[1])); + + table[i + 1][0] = (float) c[0]; + table[i + 1][1] = (float) -s[0]; + table[table_size - i - 1][0] = (float) s[0]; + table[table_size - i - 1][1] = (float) -c[0]; + } +} + static int ffts_generate_luts(ffts_plan_t *p, size_t N, size_t leaf_N, int sign) { @@ -252,12 +304,9 @@ ffts_generate_luts(ffts_plan_t *p, size_t N, size_t leaf_N, int sign) m = leaf_N << (n_luts - 2); tmp = FFTS_MALLOC(m * sizeof(ffts_cpx_32f), 32); - for (i = 0; i < m; i++) { - tmp[i][0] = W_re(4*m, i); - tmp[i][1] = W_im(4*m, i); - } + ffts_generate_cosine_sine_32f(tmp, m); - /* generate lookup tables */ + /* generate lookup tables */ stride = 1 << (n_luts - 1); for (i = 0; i < n_luts; i++) { p->ws_is[i] = w - (ffts_cpx_32f*) p->ws; diff --git a/src/ffts_internal.h b/src/ffts_internal.h index f992811..60de539 100644 --- a/src/ffts_internal.h +++ b/src/ffts_internal.h @@ -223,14 +223,4 @@ static __inline unsigned long ffts_ctzl(size_t N) #endif /* _WIN64 */ #endif /* _MSC_VER */ -static FFTS_ALWAYS_INLINE float W_re(float N, float k) -{ - return cos(-2.0 * M_PI * k / N); -} - -static FFTS_ALWAYS_INLINE float W_im(float N, float k) -{ - return sin(-2.0 * M_PI * k / N); -} - #endif /* FFTS_INTERNAL_H */ |