diff options
-rw-r--r-- | src/ffts_internal.h | 1 | ||||
-rw-r--r-- | src/ffts_real.c | 80 | ||||
-rw-r--r-- | src/ffts_trig.c | 130 | ||||
-rw-r--r-- | src/ffts_trig.h | 5 | ||||
-rw-r--r-- | src/patterns.c | 1 |
5 files changed, 135 insertions, 82 deletions
diff --git a/src/ffts_internal.h b/src/ffts_internal.h index 2d1dbd3..59f46f8 100644 --- a/src/ffts_internal.h +++ b/src/ffts_internal.h @@ -40,7 +40,6 @@ #include "types.h" #include <malloc.h> -#include <math.h> #include <stddef.h> #ifdef HAVE_STDINT_H diff --git a/src/ffts_real.c b/src/ffts_real.c index f1355c7..f6e6127 100644 --- a/src/ffts_real.c +++ b/src/ffts_real.c @@ -34,6 +34,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "ffts_real.h" #include "ffts_internal.h" +#include "ffts_trig.h" #ifdef HAVE_NEON #include <arm_neon.h> @@ -604,7 +605,6 @@ ffts_plan_t* ffts_init_1d_real(size_t N, int sign) { ffts_plan_t *p; - size_t i; p = (ffts_plan_t*) calloc(1, sizeof(*p) + sizeof(*p->plans)); if (!p) { @@ -642,85 +642,11 @@ ffts_init_1d_real(size_t N, int sign) goto cleanup; } - if (sign < 0) { - /* peel off the first */ - p->A[0] = 0.5f; - p->A[1] = -0.5f; -#ifdef HAVE_SSE3 - p->B[0] = -0.5f; -#else - p->B[0] = 0.5f; -#endif - p->B[1] = 0.5f; - - for (i = 1; i < N/4; i++) { - float t0 = (float) (0.5 * (1.0 - sin(2.0 * M_PI / N * i))); - float t1 = (float) (0.5 * (1.0 * cos(2.0 * M_PI / N * i))); - float t2 = (float) (0.5 * (1.0 + sin(2.0 * M_PI / N * i))); - - p->A[ 2 * i + 0] = t0; - p->A[N - 2 * i + 0] = t0; - p->A[ 2 * i + 1] = -t1; - p->A[N - 2 * i + 1] = t1; - #ifdef HAVE_SSE3 - p->B[ 2 * i + 0] = -t2; - p->B[N - 2 * i + 0] = -t2; + ffts_generate_table_1d_real_32f(p, sign, 1); #else - p->B[ 2 * i + 0] = t2; - p->B[N - 2 * i + 0] = t2; + ffts_generate_table_1d_real_32f(p, sign, 0); #endif - p->B[ 2 * i + 1] = t1; - p->B[N - 2 * i + 1] = -t1; - } - - /* and the last */ - p->A[2 * i + 0] = 0.0f; - p->A[2 * i + 1] = 0.0f; -#ifdef HAVE_SSE3 - p->B[2 * i + 0] = -1.0f; -#else - p->B[2 * i + 0] = 1.0f; -#endif - p->B[2 * i + 1] = 0.0f; - } else { - /* peel of the first */ - p->A[0] = 1.0f; -#ifdef HAVE_SSE3 - p->A[1] = 1.0f; -#else - p->A[1] = -1.0f; -#endif - p->B[0] = 1.0f; - p->B[1] = 1.0f; - - for (i = 1; i < N/4; i++) { - float t0 = (float) (1.0 * (1.0 - sin(2.0 * M_PI / N * i))); - float t1 = (float) (1.0 * (1.0 * cos(2.0 * M_PI / N * i))); - float t2 = (float) (1.0 * (1.0 + sin(2.0 * M_PI / N * i))); - - p->A[ 2 * i + 0] = t0; - p->A[N - 2 * i + 0] = t0; -#ifdef HAVE_SSE3 - p->A[ 2 * i + 1] = t1; - p->A[N - 2 * i + 1] = -t1; -#else - p->A[ 2 * i + 1] = -t1; - p->A[N - 2 * i + 1] = t1; -#endif - - p->B[ 2 * i + 0] = t2; - p->B[N - 2 * i + 0] = t2; - p->B[ 2 * i + 1] = t1; - p->B[N - 2 * i + 1] = -t1; - } - - /* and the last */ - p->A[2 * i + 0] = 0.0f; - p->A[2 * i + 1] = 0.0f; - p->B[2 * i + 0] = 2.0f; - p->B[2 * i + 1] = 0.0f; - } return p; diff --git a/src/ffts_trig.c b/src/ffts_trig.c index 8af96b9..514a1e5 100644 --- a/src/ffts_trig.c +++ b/src/ffts_trig.c @@ -118,7 +118,7 @@ ffts_generate_cosine_sine_32f(ffts_cpx_32f *const table, int table_size) c[0] = 1.0; s[0] = 0.0; - /* generate sine and cosine table with maximum error less than 1 ULP */ + /* generate sine and cosine tables with maximum error less than 1 ULP */ for (i = 1; i < (table_size + 1)/2; i++) { c[1] = c[0] - ((alpha * c[0]) + (beta * s[0])); s[1] = s[0] - ((alpha * s[0]) - (beta * c[0])); @@ -190,13 +190,13 @@ ffts_generate_cosine_sine_pow2_32f(ffts_cpx_32f *const table, int table_size) FFTS_ASSUME_ALIGNED_32(&cos_sin_pi_table[4 * offset]); hs = (const double*) &half_secant[2 * offset]; - /* initialize from table */ + /* initialize from lookup table */ for (i = 0; i <= log_2; i++) { w[i][0] = ct[i][0]; w[i][1] = ct[i][1]; } - /* generate sine and cosine table with maximum error less than 0.5 ULP */ + /* generate sine and cosine tables with maximum error less than 0.5 ULP */ for (i = 1; i < table_size/2; i++) { /* calculate trailing zeros in index */ log_2 = ffts_ctzl(i); @@ -218,4 +218,128 @@ mid_point: exit: return 0; +} + +int +ffts_generate_table_1d_real_32f(struct _ffts_plan_t *const p, + int sign, + int invert) +{ + const ffts_cpx_64f *FFTS_RESTRICT ct; + const double *FFTS_RESTRICT hs; + ffts_cpx_64f FFTS_ALIGN(16) w[32]; + int i, log_2, offset, N; + float *A, *B; + + if (!p) { + return -1; + } + + A = (float*) FFTS_ASSUME_ALIGNED_32(p->A); + B = (float*) FFTS_ASSUME_ALIGNED_32(p->B); + N = (int) p->N; + + /* the first */ + if (sign < 0) { + A[0] = 0.5f; + A[1] = -0.5f; + B[0] = invert ? -0.5f : 0.5f; + B[1] = 0.5f; + } else { + /* peel of the first */ + A[0] = 1.0f; + A[1] = invert ? 1.0f : -1.0f; + B[0] = 1.0f; + B[1] = 1.0f; + } + + if (FFTS_UNLIKELY(N == 4)) { + i = 1; + goto last; + } + + /* calculate table offset */ + FFTS_ASSUME(N / 4 > 1); + log_2 = ffts_ctzl(N); + FFTS_ASSUME(log_2 > 2); + offset = 34 - log_2; + ct = (const ffts_cpx_64f*) + FFTS_ASSUME_ALIGNED_32(&cos_sin_pi_table[4 * offset]); + hs = (const double*) &half_secant[2 * offset]; + + /* initialize from lookup table */ + for (i = 0; i <= log_2; i++) { + w[i][0] = ct[i][0]; + w[i][1] = ct[i][1]; + } + + /* generate sine and cosine tables with maximum error less than 0.5 ULP */ + if (sign < 0) { + for (i = 1; i < N/4; i++) { + float t0, t1, t2; + + /* calculate trailing zeros in index */ + log_2 = ffts_ctzl(i); + + t0 = (float) (0.5 * (1.0 - w[log_2][1])); + t1 = (float) (0.5 * w[log_2][0]); + t2 = (float) (0.5 * (1.0 + w[log_2][1])); + + A[ 2 * i + 0] = t0; + A[N - 2 * i + 0] = t0; + A[ 2 * i + 1] = -t1; + A[N - 2 * i + 1] = t1; + + B[ 2 * i + 0] = invert ? -t2 : t2; + B[N - 2 * i + 0] = invert ? -t2 : t2; + B[ 2 * i + 1] = t1; + B[N - 2 * i + 1] = -t1; + + /* skip and find next trailing zero */ + offset = (log_2 + 2 + ffts_ctzl(~i >> (log_2 + 2))); + w[log_2][0] = hs[log_2] * (w[log_2 + 1][0] + w[offset][0]); + w[log_2][1] = hs[log_2] * (w[log_2 + 1][1] + w[offset][1]); + } + } else { + for (i = 1; i < N/4; i++) { + float t0, t1, t2; + + /* calculate trailing zeros in index */ + log_2 = ffts_ctzl(i); + + t0 = (float) (1.0 - w[log_2][1]); + t1 = (float) w[log_2][0]; + t2 = (float) (1.0 + w[log_2][1]); + + A[ 2 * i + 0] = t0; + A[N - 2 * i + 0] = t0; + A[ 2 * i + 1] = invert ? t1 : -t1; + A[N - 2 * i + 1] = invert ? -t1 : t1; + + B[ 2 * i + 0] = t2; + B[N - 2 * i + 0] = t2; + B[ 2 * i + 1] = t1; + B[N - 2 * i + 1] = -t1; + + /* skip and find next trailing zero */ + offset = (log_2 + 2 + ffts_ctzl(~i >> (log_2 + 2))); + w[log_2][0] = hs[log_2] * (w[log_2 + 1][0] + w[offset][0]); + w[log_2][1] = hs[log_2] * (w[log_2 + 1][1] + w[offset][1]); + } + } + +last: + if (sign < 0) { + A[2 * i + 0] = 0.0f; + A[2 * i + 1] = 0.0f; + B[2 * i + 0] = invert ? -1.0f : 1.0f; + B[2 * i + 1] = 0.0f; + } else { + A[2 * i + 0] = 0.0f; + A[2 * i + 1] = 0.0f; + B[2 * i + 0] = 2.0f; + B[2 * i + 1] = 0.0f; + } + + return 0; }
\ No newline at end of file diff --git a/src/ffts_trig.h b/src/ffts_trig.h index 258c176..cfed2fb 100644 --- a/src/ffts_trig.h +++ b/src/ffts_trig.h @@ -45,4 +45,9 @@ ffts_generate_cosine_sine_32f(ffts_cpx_32f *const table, int table_size); int ffts_generate_cosine_sine_pow2_32f(ffts_cpx_32f *const table, int table_size); +int +ffts_generate_table_1d_real_32f(struct _ffts_plan_t *const p, + int sign, + int invert); + #endif /* FFTS_TRIG_H */ diff --git a/src/patterns.c b/src/patterns.c index 158ff89..be89265 100644 --- a/src/patterns.c +++ b/src/patterns.c @@ -37,7 +37,6 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include <assert.h> #include <limits.h> #include <malloc.h> -#include <math.h> #ifdef HAVE_STDLIB_H #include <stdlib.h> |