summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/ffts_internal.h1
-rw-r--r--src/ffts_real.c80
-rw-r--r--src/ffts_trig.c130
-rw-r--r--src/ffts_trig.h5
-rw-r--r--src/patterns.c1
5 files changed, 135 insertions, 82 deletions
diff --git a/src/ffts_internal.h b/src/ffts_internal.h
index 2d1dbd3..59f46f8 100644
--- a/src/ffts_internal.h
+++ b/src/ffts_internal.h
@@ -40,7 +40,6 @@
#include "types.h"
#include <malloc.h>
-#include <math.h>
#include <stddef.h>
#ifdef HAVE_STDINT_H
diff --git a/src/ffts_real.c b/src/ffts_real.c
index f1355c7..f6e6127 100644
--- a/src/ffts_real.c
+++ b/src/ffts_real.c
@@ -34,6 +34,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "ffts_real.h"
#include "ffts_internal.h"
+#include "ffts_trig.h"
#ifdef HAVE_NEON
#include <arm_neon.h>
@@ -604,7 +605,6 @@ ffts_plan_t*
ffts_init_1d_real(size_t N, int sign)
{
ffts_plan_t *p;
- size_t i;
p = (ffts_plan_t*) calloc(1, sizeof(*p) + sizeof(*p->plans));
if (!p) {
@@ -642,85 +642,11 @@ ffts_init_1d_real(size_t N, int sign)
goto cleanup;
}
- if (sign < 0) {
- /* peel off the first */
- p->A[0] = 0.5f;
- p->A[1] = -0.5f;
-#ifdef HAVE_SSE3
- p->B[0] = -0.5f;
-#else
- p->B[0] = 0.5f;
-#endif
- p->B[1] = 0.5f;
-
- for (i = 1; i < N/4; i++) {
- float t0 = (float) (0.5 * (1.0 - sin(2.0 * M_PI / N * i)));
- float t1 = (float) (0.5 * (1.0 * cos(2.0 * M_PI / N * i)));
- float t2 = (float) (0.5 * (1.0 + sin(2.0 * M_PI / N * i)));
-
- p->A[ 2 * i + 0] = t0;
- p->A[N - 2 * i + 0] = t0;
- p->A[ 2 * i + 1] = -t1;
- p->A[N - 2 * i + 1] = t1;
-
#ifdef HAVE_SSE3
- p->B[ 2 * i + 0] = -t2;
- p->B[N - 2 * i + 0] = -t2;
+ ffts_generate_table_1d_real_32f(p, sign, 1);
#else
- p->B[ 2 * i + 0] = t2;
- p->B[N - 2 * i + 0] = t2;
+ ffts_generate_table_1d_real_32f(p, sign, 0);
#endif
- p->B[ 2 * i + 1] = t1;
- p->B[N - 2 * i + 1] = -t1;
- }
-
- /* and the last */
- p->A[2 * i + 0] = 0.0f;
- p->A[2 * i + 1] = 0.0f;
-#ifdef HAVE_SSE3
- p->B[2 * i + 0] = -1.0f;
-#else
- p->B[2 * i + 0] = 1.0f;
-#endif
- p->B[2 * i + 1] = 0.0f;
- } else {
- /* peel of the first */
- p->A[0] = 1.0f;
-#ifdef HAVE_SSE3
- p->A[1] = 1.0f;
-#else
- p->A[1] = -1.0f;
-#endif
- p->B[0] = 1.0f;
- p->B[1] = 1.0f;
-
- for (i = 1; i < N/4; i++) {
- float t0 = (float) (1.0 * (1.0 - sin(2.0 * M_PI / N * i)));
- float t1 = (float) (1.0 * (1.0 * cos(2.0 * M_PI / N * i)));
- float t2 = (float) (1.0 * (1.0 + sin(2.0 * M_PI / N * i)));
-
- p->A[ 2 * i + 0] = t0;
- p->A[N - 2 * i + 0] = t0;
-#ifdef HAVE_SSE3
- p->A[ 2 * i + 1] = t1;
- p->A[N - 2 * i + 1] = -t1;
-#else
- p->A[ 2 * i + 1] = -t1;
- p->A[N - 2 * i + 1] = t1;
-#endif
-
- p->B[ 2 * i + 0] = t2;
- p->B[N - 2 * i + 0] = t2;
- p->B[ 2 * i + 1] = t1;
- p->B[N - 2 * i + 1] = -t1;
- }
-
- /* and the last */
- p->A[2 * i + 0] = 0.0f;
- p->A[2 * i + 1] = 0.0f;
- p->B[2 * i + 0] = 2.0f;
- p->B[2 * i + 1] = 0.0f;
- }
return p;
diff --git a/src/ffts_trig.c b/src/ffts_trig.c
index 8af96b9..514a1e5 100644
--- a/src/ffts_trig.c
+++ b/src/ffts_trig.c
@@ -118,7 +118,7 @@ ffts_generate_cosine_sine_32f(ffts_cpx_32f *const table, int table_size)
c[0] = 1.0;
s[0] = 0.0;
- /* generate sine and cosine table with maximum error less than 1 ULP */
+ /* generate sine and cosine tables with maximum error less than 1 ULP */
for (i = 1; i < (table_size + 1)/2; i++) {
c[1] = c[0] - ((alpha * c[0]) + (beta * s[0]));
s[1] = s[0] - ((alpha * s[0]) - (beta * c[0]));
@@ -190,13 +190,13 @@ ffts_generate_cosine_sine_pow2_32f(ffts_cpx_32f *const table, int table_size)
FFTS_ASSUME_ALIGNED_32(&cos_sin_pi_table[4 * offset]);
hs = (const double*) &half_secant[2 * offset];
- /* initialize from table */
+ /* initialize from lookup table */
for (i = 0; i <= log_2; i++) {
w[i][0] = ct[i][0];
w[i][1] = ct[i][1];
}
- /* generate sine and cosine table with maximum error less than 0.5 ULP */
+ /* generate sine and cosine tables with maximum error less than 0.5 ULP */
for (i = 1; i < table_size/2; i++) {
/* calculate trailing zeros in index */
log_2 = ffts_ctzl(i);
@@ -218,4 +218,128 @@ mid_point:
exit:
return 0;
+}
+
+int
+ffts_generate_table_1d_real_32f(struct _ffts_plan_t *const p,
+ int sign,
+ int invert)
+{
+ const ffts_cpx_64f *FFTS_RESTRICT ct;
+ const double *FFTS_RESTRICT hs;
+ ffts_cpx_64f FFTS_ALIGN(16) w[32];
+ int i, log_2, offset, N;
+ float *A, *B;
+
+ if (!p) {
+ return -1;
+ }
+
+ A = (float*) FFTS_ASSUME_ALIGNED_32(p->A);
+ B = (float*) FFTS_ASSUME_ALIGNED_32(p->B);
+ N = (int) p->N;
+
+ /* the first */
+ if (sign < 0) {
+ A[0] = 0.5f;
+ A[1] = -0.5f;
+ B[0] = invert ? -0.5f : 0.5f;
+ B[1] = 0.5f;
+ } else {
+ /* peel of the first */
+ A[0] = 1.0f;
+ A[1] = invert ? 1.0f : -1.0f;
+ B[0] = 1.0f;
+ B[1] = 1.0f;
+ }
+
+ if (FFTS_UNLIKELY(N == 4)) {
+ i = 1;
+ goto last;
+ }
+
+ /* calculate table offset */
+ FFTS_ASSUME(N / 4 > 1);
+ log_2 = ffts_ctzl(N);
+ FFTS_ASSUME(log_2 > 2);
+ offset = 34 - log_2;
+ ct = (const ffts_cpx_64f*)
+ FFTS_ASSUME_ALIGNED_32(&cos_sin_pi_table[4 * offset]);
+ hs = (const double*) &half_secant[2 * offset];
+
+ /* initialize from lookup table */
+ for (i = 0; i <= log_2; i++) {
+ w[i][0] = ct[i][0];
+ w[i][1] = ct[i][1];
+ }
+
+ /* generate sine and cosine tables with maximum error less than 0.5 ULP */
+ if (sign < 0) {
+ for (i = 1; i < N/4; i++) {
+ float t0, t1, t2;
+
+ /* calculate trailing zeros in index */
+ log_2 = ffts_ctzl(i);
+
+ t0 = (float) (0.5 * (1.0 - w[log_2][1]));
+ t1 = (float) (0.5 * w[log_2][0]);
+ t2 = (float) (0.5 * (1.0 + w[log_2][1]));
+
+ A[ 2 * i + 0] = t0;
+ A[N - 2 * i + 0] = t0;
+ A[ 2 * i + 1] = -t1;
+ A[N - 2 * i + 1] = t1;
+
+ B[ 2 * i + 0] = invert ? -t2 : t2;
+ B[N - 2 * i + 0] = invert ? -t2 : t2;
+ B[ 2 * i + 1] = t1;
+ B[N - 2 * i + 1] = -t1;
+
+ /* skip and find next trailing zero */
+ offset = (log_2 + 2 + ffts_ctzl(~i >> (log_2 + 2)));
+ w[log_2][0] = hs[log_2] * (w[log_2 + 1][0] + w[offset][0]);
+ w[log_2][1] = hs[log_2] * (w[log_2 + 1][1] + w[offset][1]);
+ }
+ } else {
+ for (i = 1; i < N/4; i++) {
+ float t0, t1, t2;
+
+ /* calculate trailing zeros in index */
+ log_2 = ffts_ctzl(i);
+
+ t0 = (float) (1.0 - w[log_2][1]);
+ t1 = (float) w[log_2][0];
+ t2 = (float) (1.0 + w[log_2][1]);
+
+ A[ 2 * i + 0] = t0;
+ A[N - 2 * i + 0] = t0;
+ A[ 2 * i + 1] = invert ? t1 : -t1;
+ A[N - 2 * i + 1] = invert ? -t1 : t1;
+
+ B[ 2 * i + 0] = t2;
+ B[N - 2 * i + 0] = t2;
+ B[ 2 * i + 1] = t1;
+ B[N - 2 * i + 1] = -t1;
+
+ /* skip and find next trailing zero */
+ offset = (log_2 + 2 + ffts_ctzl(~i >> (log_2 + 2)));
+ w[log_2][0] = hs[log_2] * (w[log_2 + 1][0] + w[offset][0]);
+ w[log_2][1] = hs[log_2] * (w[log_2 + 1][1] + w[offset][1]);
+ }
+ }
+
+last:
+ if (sign < 0) {
+ A[2 * i + 0] = 0.0f;
+ A[2 * i + 1] = 0.0f;
+ B[2 * i + 0] = invert ? -1.0f : 1.0f;
+ B[2 * i + 1] = 0.0f;
+ } else {
+ A[2 * i + 0] = 0.0f;
+ A[2 * i + 1] = 0.0f;
+ B[2 * i + 0] = 2.0f;
+ B[2 * i + 1] = 0.0f;
+ }
+
+ return 0;
} \ No newline at end of file
diff --git a/src/ffts_trig.h b/src/ffts_trig.h
index 258c176..cfed2fb 100644
--- a/src/ffts_trig.h
+++ b/src/ffts_trig.h
@@ -45,4 +45,9 @@ ffts_generate_cosine_sine_32f(ffts_cpx_32f *const table, int table_size);
int
ffts_generate_cosine_sine_pow2_32f(ffts_cpx_32f *const table, int table_size);
+int
+ffts_generate_table_1d_real_32f(struct _ffts_plan_t *const p,
+ int sign,
+ int invert);
+
#endif /* FFTS_TRIG_H */
diff --git a/src/patterns.c b/src/patterns.c
index 158ff89..be89265 100644
--- a/src/patterns.c
+++ b/src/patterns.c
@@ -37,7 +37,6 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <assert.h>
#include <limits.h>
#include <malloc.h>
-#include <math.h>
#ifdef HAVE_STDLIB_H
#include <stdlib.h>
OpenPOWER on IntegriCloud