summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/ffts_trig.c164
1 files changed, 115 insertions, 49 deletions
diff --git a/src/ffts_trig.c b/src/ffts_trig.c
index 514a1e5..883d0c5 100644
--- a/src/ffts_trig.c
+++ b/src/ffts_trig.c
@@ -33,44 +33,110 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "ffts_trig.h"
/* 1/(2*cos(pow(2,-p)*pi)) */
-static const FFTS_ALIGN(16) unsigned int half_secant[66] = {
- 0x00000000, 0x3fe00000, 0x00000000, 0x3fe00000, 0x00000000, 0x3fe00000,
- 0x00000000, 0x3fe00000, 0x00000000, 0x3fe00000, 0x00000000, 0x3fe00000,
- 0x00000001, 0x3fe00000, 0x00000005, 0x3fe00000, 0x00000014, 0x3fe00000,
- 0x0000004f, 0x3fe00000, 0x0000013c, 0x3fe00000, 0x000004ef, 0x3fe00000,
- 0x000013bd, 0x3fe00000, 0x00004ef5, 0x3fe00000, 0x00013bd4, 0x3fe00000,
- 0x0004ef4f, 0x3fe00000, 0x0013bd3d, 0x3fe00000, 0x004ef4f3, 0x3fe00000,
- 0x013bd3cd, 0x3fe00000, 0x04ef4f34, 0x3fe00000, 0x13bd3cde, 0x3fe00000,
- 0x4ef4f46c, 0x3fe00000, 0x3bd3e0e7, 0x3fe00001, 0xef507722, 0x3fe00004,
- 0xbd5114f9, 0x3fe00013, 0xf637de7d, 0x3fe0004e, 0xe8190891, 0x3fe0013b,
- 0x9436640e, 0x3fe004f0, 0x9c61d971, 0x3fe013d1, 0xd17cba53, 0x3fe0503e,
- 0x7bdb3895, 0x3fe1517a, 0x00000000, 0x00000000, 0x00000000, 0x00000000
+static const FFTS_ALIGN(16) unsigned int half_secant[132] = {
+ 0x00000000, 0x3fe00000, 0xc9be45de, 0x3be3bd3c,
+ 0x00000000, 0x3fe00000, 0xc9be45de, 0x3c03bd3c,
+ 0x00000000, 0x3fe00000, 0xc9be45de, 0x3c23bd3c,
+ 0x00000000, 0x3fe00000, 0xc9be45de, 0x3c43bd3c,
+ 0x00000000, 0x3fe00000, 0xc9be45de, 0x3c63bd3c,
+ 0x00000000, 0x3fe00000, 0xc9be45df, 0x3c83bd3c,
+ 0x00000001, 0x3fe00000, 0x4df22efd, 0x3c7de9e6,
+ 0x00000005, 0x3fe00000, 0x906e8725, 0xbc60b0cd,
+ 0x00000014, 0x3fe00000, 0x906e8357, 0xbc80b0cd,
+ 0x0000004f, 0x3fe00000, 0x0dce83c9, 0xbc5619b2,
+ 0x0000013c, 0x3fe00000, 0x0dc6e79a, 0xbc7619b2,
+ 0x000004ef, 0x3fe00000, 0xe4af1240, 0x3c83cc9b,
+ 0x000013bd, 0x3fe00000, 0x2d14c08a, 0x3c7e64df,
+ 0x00004ef5, 0x3fe00000, 0x47a85465, 0xbc59b20b,
+ 0x00013bd4, 0x3fe00000, 0xab79c897, 0xbc79b203,
+ 0x0004ef4f, 0x3fe00000, 0x15019a96, 0x3c79386b,
+ 0x0013bd3d, 0x3fe00000, 0x7d6dbf4b, 0xbc7b16b7,
+ 0x004ef4f3, 0x3fe00000, 0xf30832e0, 0x3c741ee4,
+ 0x013bd3cd, 0x3fe00000, 0xd3bcd4bb, 0xbc83f41e,
+ 0x04ef4f34, 0x3fe00000, 0xdd75aebb, 0xbc82ef06,
+ 0x13bd3cde, 0x3fe00000, 0xb2b41b3d, 0x3c52d979,
+ 0x4ef4f46c, 0x3fe00000, 0x4f0fb458, 0xbc851db3,
+ 0x3bd3e0e7, 0x3fe00001, 0x8a0ce3f0, 0x3c58dbab,
+ 0xef507722, 0x3fe00004, 0x2a8ec295, 0x3c83e351,
+ 0xbd5114f9, 0x3fe00013, 0xc4c0d92d, 0x3c8b3ca4,
+ 0xf637de7d, 0x3fe0004e, 0xb74de729, 0x3c45974e,
+ 0xe8190891, 0x3fe0013b, 0x26edf4da, 0xbc814c20,
+ 0x9436640e, 0x3fe004f0, 0xe2b34b50, 0x3c8091ab,
+ 0x9c61d971, 0x3fe013d1, 0x6ce01b8e, 0x3c7f7df7,
+ 0xd17cba53, 0x3fe0503e, 0x74ad7633, 0xbc697609,
+ 0x7bdb3895, 0x3fe1517a, 0x82f9091b, 0xbc8008d1,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000
};
/* cos(pow(2,-p)*pi), sin(pow(2,-p)*pi) */
-static const FFTS_ALIGN(16) unsigned int cos_sin_pi_table[132] = {
- 0x00000000, 0x3ff00000, 0x54442d18, 0x3e0921fb, 0x00000000, 0x3ff00000,
- 0x54442d18, 0x3e0921fb, 0x00000000, 0x3ff00000, 0x54442d18, 0x3e1921fb,
- 0x00000000, 0x3ff00000, 0x54442d18, 0x3e2921fb, 0x00000000, 0x3ff00000,
- 0x54442d18, 0x3e3921fb, 0xffffffff, 0x3fefffff, 0x54442d18, 0x3e4921fb,
- 0xfffffffe, 0x3fefffff, 0x54442d18, 0x3e5921fb, 0xfffffff6, 0x3fefffff,
- 0x54442d16, 0x3e6921fb, 0xffffffd9, 0x3fefffff, 0x54442d0e, 0x3e7921fb,
- 0xffffff62, 0x3fefffff, 0x54442cef, 0x3e8921fb, 0xfffffd88, 0x3fefffff,
- 0x54442c73, 0x3e9921fb, 0xfffff621, 0x3fefffff, 0x54442a83, 0x3ea921fb,
- 0xffffd886, 0x3fefffff, 0x544422c2, 0x3eb921fb, 0xffff6216, 0x3fefffff,
- 0x544403c1, 0x3ec921fb, 0xfffd8858, 0x3fefffff, 0x544387ba, 0x3ed921fb,
- 0xfff62162, 0x3fefffff, 0x544197a1, 0x3ee921fb, 0xffd88586, 0x3fefffff,
- 0x5439d73a, 0x3ef921fb, 0xff62161a, 0x3fefffff, 0x541ad59e, 0x3f0921fb,
- 0xfd885867, 0x3fefffff, 0x539ecf31, 0x3f1921fb, 0xf621619c, 0x3fefffff,
- 0x51aeb57c, 0x3f2921fb, 0xd8858675, 0x3fefffff, 0x49ee4ea6, 0x3f3921fb,
- 0x62161a34, 0x3fefffff, 0x2aecb360, 0x3f4921fb, 0x88586ee6, 0x3feffffd,
- 0xaee6472e, 0x3f5921fa, 0x21621d02, 0x3feffff6, 0xbecca4ba, 0x3f6921f8,
- 0x858e8a92, 0x3fefffd8, 0xfe670071, 0x3f7921f0, 0x169b92db, 0x3fefff62,
- 0xfcdec784, 0x3f8921d1, 0x6084cd0d, 0x3feffd88, 0xf7a3667e, 0x3f992155,
- 0xe3796d7e, 0x3feff621, 0xf10dd814, 0x3fa91f65, 0xa3d12526, 0x3fefd88d,
- 0xbc29b42c, 0x3fb917a6, 0xcff75cb0, 0x3fef6297, 0x3c69a60b, 0x3fc8f8b8,
- 0xcf328d46, 0x3fed906b, 0xa6aea963, 0x3fd87de2, 0x667f3bcd, 0x3fe6a09e,
- 0x667f3bcd, 0x3fe6a09e, 0x00000000, 0x00000000, 0x00000000, 0x3ff00000
+static const FFTS_ALIGN(16) unsigned int cos_sin_pi_table[264] = {
+ 0x00000000, 0x3ff00000, 0xc9be45de, 0xbbf3bd3c,
+ 0x54442d18, 0x3df921fb, 0xbb77974f, 0x3a91a390,
+ 0x00000000, 0x3ff00000, 0xc9be45de, 0xbc13bd3c,
+ 0x54442d18, 0x3e0921fb, 0x54a14928, 0x3aa19bd0,
+ 0x00000000, 0x3ff00000, 0xc9be45de, 0xbc33bd3c,
+ 0x54442d18, 0x3e1921fb, 0xb948108a, 0x3ab17cce,
+ 0x00000000, 0x3ff00000, 0xc9be45de, 0xbc53bd3c,
+ 0x54442d18, 0x3e2921fb, 0x4be32e14, 0x3ac100c8,
+ 0x00000000, 0x3ff00000, 0xc9be45de, 0xbc73bd3c,
+ 0x54442d18, 0x3e3921fb, 0x2c9f4879, 0x3ace215d,
+ 0xffffffff, 0x3fefffff, 0x6c837443, 0x3c888586,
+ 0x54442d18, 0x3e4921fb, 0x0005f376, 0x3acd411f,
+ 0xfffffffe, 0x3fefffff, 0x4df22ef1, 0xbc8de9e6,
+ 0x54442d18, 0x3e5921fb, 0x9937209e, 0xbaf7b153,
+ 0xfffffff6, 0x3fefffff, 0x906e88aa, 0x3c70b0cd,
+ 0x54442d16, 0x3e6921fb, 0xfe19968a, 0xbb03b7c0,
+ 0xffffffd9, 0x3fefffff, 0xdf22ed26, 0xbc8e9e64,
+ 0x54442d0e, 0x3e7921fb, 0x8d1b6ffb, 0xbaee8bb4,
+ 0xffffff62, 0x3fefffff, 0x0dd18f0f, 0x3c6619b2,
+ 0x54442cef, 0x3e8921fb, 0x7f2b20fb, 0xbb00e133,
+ 0xfffffd88, 0x3fefffff, 0x0dd314b2, 0x3c8619b2,
+ 0x54442c73, 0x3e9921fb, 0x619fdf6e, 0xbb174e98,
+ 0xfffff621, 0x3fefffff, 0x3764acf5, 0x3c8866c8,
+ 0x54442a83, 0x3ea921fb, 0xf5b2407f, 0xbb388215,
+ 0xffffd886, 0x3fefffff, 0x20e7a944, 0xbc8e64df,
+ 0x544422c2, 0x3eb921fb, 0x7b9b9f23, 0x3b5a0961,
+ 0xffff6216, 0x3fefffff, 0x52ee25ea, 0x3c69b20e,
+ 0x544403c1, 0x3ec921fb, 0x4df6a86a, 0xbb5999d9,
+ 0xfffd8858, 0x3fefffff, 0xd8910ead, 0x3c89b20f,
+ 0x544387ba, 0x3ed921fb, 0x0809d04d, 0x3b77d9db,
+ 0xfff62162, 0x3fefffff, 0x438d3925, 0xbc8937a8,
+ 0x544197a1, 0x3ee921fb, 0xa5d27f7a, 0xbb858b02,
+ 0xffd88586, 0x3fefffff, 0x94b3ddd2, 0x3c8b22e4,
+ 0x5439d73a, 0x3ef921fb, 0xf8a3b73d, 0xbb863c7f,
+ 0xff62161a, 0x3fefffff, 0x7ea469b2, 0xbc835c13,
+ 0x541ad59e, 0x3f0921fb, 0xb8cee262, 0x3bae9860,
+ 0xfd885867, 0x3fefffff, 0x23a32e63, 0xbc77d556,
+ 0x539ecf31, 0x3f1921fb, 0xfcd23a30, 0x3b96b111,
+ 0xf621619c, 0x3fefffff, 0xbbbd8fe6, 0xbc87507d,
+ 0x51aeb57c, 0x3f2921fb, 0x4916c435, 0xbbca6e1d,
+ 0xd8858675, 0x3fefffff, 0x54748eab, 0xbc879f0e,
+ 0x49ee4ea6, 0x3f3921fb, 0x744a453e, 0x3bde894d,
+ 0x62161a34, 0x3fefffff, 0xb1f9b9c4, 0xbc6136dc,
+ 0x2aecb360, 0x3f4921fb, 0x7e566b4c, 0x3be87615,
+ 0x88586ee6, 0x3feffffd, 0xf173ae5b, 0x3c81af64,
+ 0xaee6472e, 0x3f5921fa, 0x284a9df8, 0xbbfee52e,
+ 0x21621d02, 0x3feffff6, 0xebc82813, 0xbc76acfc,
+ 0xbecca4ba, 0x3f6921f8, 0x7bcab5b2, 0x3c02ba40,
+ 0x858e8a92, 0x3fefffd8, 0x1883bcf7, 0x3c8359c7,
+ 0xfe670071, 0x3f7921f0, 0xfe6b7a9b, 0x3bfab967,
+ 0x169b92db, 0x3fefff62, 0xc81fbd0d, 0x3c85dda3,
+ 0xfcdec784, 0x3f8921d1, 0xbe836d9d, 0x3c29878e,
+ 0x6084cd0d, 0x3feffd88, 0x4556e4cb, 0xbc81354d,
+ 0xf7a3667e, 0x3f992155, 0x091a0130, 0xbbfb1d63,
+ 0xe3796d7e, 0x3feff621, 0x2e24aa15, 0xbc6c57bc,
+ 0xf10dd814, 0x3fa91f65, 0x0d569a90, 0xbc2912bd,
+ 0xa3d12526, 0x3fefd88d, 0x378811c7, 0xbc887df6,
+ 0xbc29b42c, 0x3fb917a6, 0xd26ed688, 0xbc3e2718,
+ 0xcff75cb0, 0x3fef6297, 0x2a361fd3, 0x3c756217,
+ 0x3c69a60b, 0x3fc8f8b8, 0xb9ff8d82, 0xbc626d19,
+ 0xcf328d46, 0x3fed906b, 0x10231ac2, 0x3c7457e6,
+ 0xa6aea963, 0x3fd87de2, 0xd3d5a610, 0xbc672ced,
+ 0x667f3bcd, 0x3fe6a09e, 0x13b26456, 0xbc8bdd34,
+ 0x667f3bcd, 0x3fe6a09e, 0x13b26456, 0xbc8bdd34,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x3ff00000, 0x00000000, 0x00000000
};
int
@@ -187,13 +253,13 @@ ffts_generate_cosine_sine_pow2_32f(ffts_cpx_32f *const table, int table_size)
FFTS_ASSUME(log_2 > 1);
offset = 32 - log_2;
ct = (const ffts_cpx_64f*)
- FFTS_ASSUME_ALIGNED_32(&cos_sin_pi_table[4 * offset]);
- hs = (const double*) &half_secant[2 * offset];
+ FFTS_ASSUME_ALIGNED_32(&cos_sin_pi_table[8 * offset]);
+ hs = (const double*) &half_secant[4 * offset];
/* initialize from lookup table */
for (i = 0; i <= log_2; i++) {
- w[i][0] = ct[i][0];
- w[i][1] = ct[i][1];
+ w[i][0] = ct[2*i][0];
+ w[i][1] = ct[2*i][2];
}
/* generate sine and cosine tables with maximum error less than 0.5 ULP */
@@ -208,8 +274,8 @@ ffts_generate_cosine_sine_pow2_32f(ffts_cpx_32f *const table, int table_size)
/* skip and find next trailing zero */
offset = (log_2 + 2 + ffts_ctzl(~i >> (log_2 + 2)));
- w[log_2][0] = hs[log_2] * (w[log_2 + 1][0] + w[offset][0]);
- w[log_2][1] = hs[log_2] * (w[log_2 + 1][1] + w[offset][1]);
+ w[log_2][0] = hs[2 * log_2] * (w[log_2 + 1][0] + w[offset][0]);
+ w[log_2][1] = hs[2 * log_2] * (w[log_2 + 1][1] + w[offset][1]);
}
mid_point:
@@ -264,13 +330,13 @@ ffts_generate_table_1d_real_32f(struct _ffts_plan_t *const p,
FFTS_ASSUME(log_2 > 2);
offset = 34 - log_2;
ct = (const ffts_cpx_64f*)
- FFTS_ASSUME_ALIGNED_32(&cos_sin_pi_table[4 * offset]);
- hs = (const double*) &half_secant[2 * offset];
+ FFTS_ASSUME_ALIGNED_32(&cos_sin_pi_table[8 * offset]);
+ hs = (const double*) &half_secant[4 * offset];
/* initialize from lookup table */
for (i = 0; i <= log_2; i++) {
- w[i][0] = ct[i][0];
- w[i][1] = ct[i][1];
+ w[i][0] = ct[2*i][0];
+ w[i][1] = ct[2*i][2];
}
/* generate sine and cosine tables with maximum error less than 0.5 ULP */
@@ -297,8 +363,8 @@ ffts_generate_table_1d_real_32f(struct _ffts_plan_t *const p,
/* skip and find next trailing zero */
offset = (log_2 + 2 + ffts_ctzl(~i >> (log_2 + 2)));
- w[log_2][0] = hs[log_2] * (w[log_2 + 1][0] + w[offset][0]);
- w[log_2][1] = hs[log_2] * (w[log_2 + 1][1] + w[offset][1]);
+ w[log_2][0] = hs[2 * log_2] * (w[log_2 + 1][0] + w[offset][0]);
+ w[log_2][1] = hs[2 * log_2] * (w[log_2 + 1][1] + w[offset][1]);
}
} else {
for (i = 1; i < N/4; i++) {
@@ -323,8 +389,8 @@ ffts_generate_table_1d_real_32f(struct _ffts_plan_t *const p,
/* skip and find next trailing zero */
offset = (log_2 + 2 + ffts_ctzl(~i >> (log_2 + 2)));
- w[log_2][0] = hs[log_2] * (w[log_2 + 1][0] + w[offset][0]);
- w[log_2][1] = hs[log_2] * (w[log_2 + 1][1] + w[offset][1]);
+ w[log_2][0] = hs[2 * log_2] * (w[log_2 + 1][0] + w[offset][0]);
+ w[log_2][1] = hs[2 * log_2] * (w[log_2 + 1][1] + w[offset][1]);
}
}
OpenPOWER on IntegriCloud