From 9885d87c6335d5b688bdf6b90e78de1add605d63 Mon Sep 17 00:00:00 2001 From: Jukka Ojanen Date: Tue, 14 Jul 2015 17:08:14 +0300 Subject: Move trigonometric stuff to separate file. Implemented Oscar Buneman's method for generating a sequence of sines and cosines. --- CMakeLists.txt | 2 + src/ffts.c | 55 +------------- src/ffts_real.c | 2 +- src/ffts_trig.c | 221 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/ffts_trig.h | 48 ++++++++++++ 5 files changed, 274 insertions(+), 54 deletions(-) create mode 100644 src/ffts_trig.c create mode 100644 src/ffts_trig.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 4c683af..db53dd5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -218,6 +218,8 @@ set(FFTS_SOURCES src/ffts_real.c src/ffts_real_nd.c src/ffts_real_nd.h + src/ffts_trig.c + src/ffts_trig.h src/ffts_static.c src/ffts_static.h src/macros.h diff --git a/src/ffts.c b/src/ffts.c index f9cb9bb..6e12563 100644 --- a/src/ffts.c +++ b/src/ffts.c @@ -35,6 +35,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "ffts_internal.h" #include "ffts_static.h" +#include "ffts_trig.h" #include "macros.h" #include "patterns.h" @@ -199,58 +200,6 @@ void ffts_free_1d(ffts_plan_t *p) free(p); } -static void -ffts_generate_cosine_sine_32f(ffts_cpx_32f *const table, int table_size) -{ - double alpha, beta; - double c[2], s[2]; - int i; - - double x = 1.0 / table_size; - double z = x * x; - - /* polynomial approximations calculated using Sollya */ - - /* alpha = 2 * sin(M_PI_4 / m) * sin(M_PI_4 / m) */ - alpha = x * (1.1107207345394952717884501203293686870741139540138 + - z * (-0.114191397993514079911985272577099412137126013186879 + - z * 3.52164670852685621720746817665316575239342815885835e-3)); - alpha = alpha * alpha; - - /* beta = sin(M_PI_2 / m) */ - beta = x * (1.57079632679489455959753740899031981825828552246094 + - z * (-0.64596409735041482313988581154262647032737731933593 + - z * 7.9690915468332887416913479228242067620158195495605e-2)); - - /* cos(0) = 1.0, sin(0) = 0.0 */ - c[0] = 1.0; - s[0] = 0.0; - - table[ 0][0] = 1.0f; - table[ 0][1] = 0.0f; - table[table_size - 1][1] = -1.0f; - table[table_size - 1][0] = -0.0f; - - /* generate sine and cosine table with maximum error less than 1 ULP */ - for (i = 1; i < table_size/2; i += 2) { - c[1] = c[0] - ((alpha * c[0]) + (beta * s[0])); - s[1] = s[0] - ((alpha * s[0]) - (beta * c[0])); - - table[i + 0][0] = (float) c[1]; - table[i + 0][1] = (float) -s[1]; - table[table_size - i][0] = (float) s[1]; - table[table_size - i][1] = (float) -c[1]; - - c[0] = c[1] - ((alpha * c[1]) + (beta * s[1])); - s[0] = s[1] - ((alpha * s[1]) - (beta * c[1])); - - table[i + 1][0] = (float) c[0]; - table[i + 1][1] = (float) -s[0]; - table[table_size - i - 1][0] = (float) s[0]; - table[table_size - i - 1][1] = (float) -c[0]; - } -} - static int ffts_generate_luts(ffts_plan_t *p, size_t N, size_t leaf_N, int sign) { @@ -304,7 +253,7 @@ ffts_generate_luts(ffts_plan_t *p, size_t N, size_t leaf_N, int sign) m = leaf_N << (n_luts - 2); tmp = FFTS_MALLOC(m * sizeof(ffts_cpx_32f), 32); - ffts_generate_cosine_sine_32f(tmp, m); + ffts_generate_cosine_sine_pow2_32f(tmp, m); /* generate lookup tables */ stride = 1 << (n_luts - 1); diff --git a/src/ffts_real.c b/src/ffts_real.c index 0327f15..f1355c7 100644 --- a/src/ffts_real.c +++ b/src/ffts_real.c @@ -650,7 +650,7 @@ ffts_init_1d_real(size_t N, int sign) p->B[0] = -0.5f; #else p->B[0] = 0.5f; -#endif +#endif p->B[1] = 0.5f; for (i = 1; i < N/4; i++) { diff --git a/src/ffts_trig.c b/src/ffts_trig.c new file mode 100644 index 0000000..8af96b9 --- /dev/null +++ b/src/ffts_trig.c @@ -0,0 +1,221 @@ +/* + +This file is part of FFTS -- The Fastest Fourier Transform in the South + +Copyright (c) 2015, Jukka Ojanen + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: +* Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +* Neither the name of the organization nor the +names of its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "ffts_trig.h" + +/* 1/(2*cos(pow(2,-p)*pi)) */ +static const FFTS_ALIGN(16) unsigned int half_secant[66] = { + 0x00000000, 0x3fe00000, 0x00000000, 0x3fe00000, 0x00000000, 0x3fe00000, + 0x00000000, 0x3fe00000, 0x00000000, 0x3fe00000, 0x00000000, 0x3fe00000, + 0x00000001, 0x3fe00000, 0x00000005, 0x3fe00000, 0x00000014, 0x3fe00000, + 0x0000004f, 0x3fe00000, 0x0000013c, 0x3fe00000, 0x000004ef, 0x3fe00000, + 0x000013bd, 0x3fe00000, 0x00004ef5, 0x3fe00000, 0x00013bd4, 0x3fe00000, + 0x0004ef4f, 0x3fe00000, 0x0013bd3d, 0x3fe00000, 0x004ef4f3, 0x3fe00000, + 0x013bd3cd, 0x3fe00000, 0x04ef4f34, 0x3fe00000, 0x13bd3cde, 0x3fe00000, + 0x4ef4f46c, 0x3fe00000, 0x3bd3e0e7, 0x3fe00001, 0xef507722, 0x3fe00004, + 0xbd5114f9, 0x3fe00013, 0xf637de7d, 0x3fe0004e, 0xe8190891, 0x3fe0013b, + 0x9436640e, 0x3fe004f0, 0x9c61d971, 0x3fe013d1, 0xd17cba53, 0x3fe0503e, + 0x7bdb3895, 0x3fe1517a, 0x00000000, 0x00000000, 0x00000000, 0x00000000 +}; + +/* cos(pow(2,-p)*pi), sin(pow(2,-p)*pi) */ +static const FFTS_ALIGN(16) unsigned int cos_sin_pi_table[132] = { + 0x00000000, 0x3ff00000, 0x54442d18, 0x3e0921fb, 0x00000000, 0x3ff00000, + 0x54442d18, 0x3e0921fb, 0x00000000, 0x3ff00000, 0x54442d18, 0x3e1921fb, + 0x00000000, 0x3ff00000, 0x54442d18, 0x3e2921fb, 0x00000000, 0x3ff00000, + 0x54442d18, 0x3e3921fb, 0xffffffff, 0x3fefffff, 0x54442d18, 0x3e4921fb, + 0xfffffffe, 0x3fefffff, 0x54442d18, 0x3e5921fb, 0xfffffff6, 0x3fefffff, + 0x54442d16, 0x3e6921fb, 0xffffffd9, 0x3fefffff, 0x54442d0e, 0x3e7921fb, + 0xffffff62, 0x3fefffff, 0x54442cef, 0x3e8921fb, 0xfffffd88, 0x3fefffff, + 0x54442c73, 0x3e9921fb, 0xfffff621, 0x3fefffff, 0x54442a83, 0x3ea921fb, + 0xffffd886, 0x3fefffff, 0x544422c2, 0x3eb921fb, 0xffff6216, 0x3fefffff, + 0x544403c1, 0x3ec921fb, 0xfffd8858, 0x3fefffff, 0x544387ba, 0x3ed921fb, + 0xfff62162, 0x3fefffff, 0x544197a1, 0x3ee921fb, 0xffd88586, 0x3fefffff, + 0x5439d73a, 0x3ef921fb, 0xff62161a, 0x3fefffff, 0x541ad59e, 0x3f0921fb, + 0xfd885867, 0x3fefffff, 0x539ecf31, 0x3f1921fb, 0xf621619c, 0x3fefffff, + 0x51aeb57c, 0x3f2921fb, 0xd8858675, 0x3fefffff, 0x49ee4ea6, 0x3f3921fb, + 0x62161a34, 0x3fefffff, 0x2aecb360, 0x3f4921fb, 0x88586ee6, 0x3feffffd, + 0xaee6472e, 0x3f5921fa, 0x21621d02, 0x3feffff6, 0xbecca4ba, 0x3f6921f8, + 0x858e8a92, 0x3fefffd8, 0xfe670071, 0x3f7921f0, 0x169b92db, 0x3fefff62, + 0xfcdec784, 0x3f8921d1, 0x6084cd0d, 0x3feffd88, 0xf7a3667e, 0x3f992155, + 0xe3796d7e, 0x3feff621, 0xf10dd814, 0x3fa91f65, 0xa3d12526, 0x3fefd88d, + 0xbc29b42c, 0x3fb917a6, 0xcff75cb0, 0x3fef6297, 0x3c69a60b, 0x3fc8f8b8, + 0xcf328d46, 0x3fed906b, 0xa6aea963, 0x3fd87de2, 0x667f3bcd, 0x3fe6a09e, + 0x667f3bcd, 0x3fe6a09e, 0x00000000, 0x00000000, 0x00000000, 0x3ff00000 +}; + +int +ffts_generate_cosine_sine_32f(ffts_cpx_32f *const table, int table_size) +{ + double alpha, beta; + double c[2], s[2]; + double x, z; + int i; + + if (!table || !table_size) { + return -1; + } + + /* the first */ + table[0][0] = 1.0f; + table[0][1] = -0.0f; + + if (FFTS_UNLIKELY(table_size == 1)) { + goto exit; + } + + if (FFTS_UNLIKELY(table_size == 2)) { + /* skip over */ + i = 1; + goto mid_point; + } + + /* polynomial approximations calculated using Sollya */ + x = 1.0 / table_size; + z = x * x; + + /* alpha = 2 * sin(M_PI_4 / m) * sin(M_PI_4 / m) */ + alpha = x * (1.1107207345394952717884501203293686870741139540138 + + z * (-0.114191397993514079911985272577099412137126013186879 + + z * 3.52164670852685621720746817665316575239342815885835e-3)); + alpha = alpha * alpha; + + /* beta = sin(M_PI_2 / m) */ + beta = x * (1.57079632679489455959753740899031981825828552246094 + + z * (-0.64596409735041482313988581154262647032737731933593 + + z * 7.9690915468332887416913479228242067620158195495605e-2)); + + /* cos(0) = 1.0, sin(0) = 0.0 */ + c[0] = 1.0; + s[0] = 0.0; + + /* generate sine and cosine table with maximum error less than 1 ULP */ + for (i = 1; i < (table_size + 1)/2; i++) { + c[1] = c[0] - ((alpha * c[0]) + (beta * s[0])); + s[1] = s[0] - ((alpha * s[0]) - (beta * c[0])); + + table[i + 0][0] = (float) c[1]; + table[i + 0][1] = (float) -s[1]; + table[table_size - i][0] = (float) s[1]; + table[table_size - i][1] = (float) -c[1]; + + c[0] = c[1]; + s[0] = s[1]; + } + + if (FFTS_UNLIKELY(table_size & 1)) { + goto exit; + } + +mid_point: + table[i][0] = 0.70710677f; + table[i][1] = -0.70710677f; + +exit: + return 0; +} + +/* Oscar Buneman's method for generating a sequence of sines and cosines. +* Expired US Patent 4,878,187 A +* +* D. Potts, G. Steidl, M. Tasche, Numerical stability of fast +* trigonometric transforms — a worst case study, +* J. Concrete Appl. Math. 1 (2003) 1–36 +* +* O. Buneman, Stable on–line creation of sines and cosines of +* successive angles, Proc. IEEE 75, 1434 – 1435 (1987). +*/ +int +ffts_generate_cosine_sine_pow2_32f(ffts_cpx_32f *const table, int table_size) +{ + const ffts_cpx_64f *FFTS_RESTRICT ct; + const double *FFTS_RESTRICT hs; + ffts_cpx_64f FFTS_ALIGN(16) w[32]; + int i, log_2, offset; + + /* size must be a power of two */ + if (!table || !table_size || (table_size & (table_size - 1))) { + return -1; + } + + /* the first */ + table[0][0] = 1.0f; + table[0][1] = -0.0f; + + if (FFTS_UNLIKELY(table_size == 1)) { + goto exit; + } + + if (FFTS_UNLIKELY(table_size == 2)) { + /* skip over */ + i = 1; + goto mid_point; + } + + /* calculate table offset */ + FFTS_ASSUME(table_size/2 > 1); + log_2 = ffts_ctzl(table_size); + FFTS_ASSUME(log_2 > 1); + offset = 32 - log_2; + ct = (const ffts_cpx_64f*) + FFTS_ASSUME_ALIGNED_32(&cos_sin_pi_table[4 * offset]); + hs = (const double*) &half_secant[2 * offset]; + + /* initialize from table */ + for (i = 0; i <= log_2; i++) { + w[i][0] = ct[i][0]; + w[i][1] = ct[i][1]; + } + + /* generate sine and cosine table with maximum error less than 0.5 ULP */ + for (i = 1; i < table_size/2; i++) { + /* calculate trailing zeros in index */ + log_2 = ffts_ctzl(i); + + table[i + 0][0] = (float) w[log_2][0]; + table[i + 0][1] = (float) -w[log_2][1]; + table[table_size - i][0] = (float) w[log_2][1]; + table[table_size - i][1] = (float) -w[log_2][0]; + + /* skip and find next trailing zero */ + offset = (log_2 + 2 + ffts_ctzl(~i >> (log_2 + 2))); + w[log_2][0] = hs[log_2] * (w[log_2 + 1][0] + w[offset][0]); + w[log_2][1] = hs[log_2] * (w[log_2 + 1][1] + w[offset][1]); + } + +mid_point: + table[i][0] = 0.70710677f; + table[i][1] = -0.70710677f; + +exit: + return 0; +} \ No newline at end of file diff --git a/src/ffts_trig.h b/src/ffts_trig.h new file mode 100644 index 0000000..258c176 --- /dev/null +++ b/src/ffts_trig.h @@ -0,0 +1,48 @@ +/* + +This file is part of FFTS -- The Fastest Fourier Transform in the South + +Copyright (c) 2015, Jukka Ojanen + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: +* Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +* Neither the name of the organization nor the +names of its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#ifndef FFTS_TRIG_H +#define FFTS_TRIG_H + +#if defined (_MSC_VER) && (_MSC_VER >= 1020) +#pragma once +#endif + +#include "ffts_internal.h" + +int +ffts_generate_cosine_sine_32f(ffts_cpx_32f *const table, int table_size); + +int +ffts_generate_cosine_sine_pow2_32f(ffts_cpx_32f *const table, int table_size); + +#endif /* FFTS_TRIG_H */ -- cgit v1.1