summaryrefslogtreecommitdiffstats
path: root/libavcodec/fft_template.c
diff options
context:
space:
mode:
Diffstat (limited to 'libavcodec/fft_template.c')
-rw-r--r--libavcodec/fft_template.c189
1 files changed, 182 insertions, 7 deletions
diff --git a/libavcodec/fft_template.c b/libavcodec/fft_template.c
index 808f317..23ea453 100644
--- a/libavcodec/fft_template.c
+++ b/libavcodec/fft_template.c
@@ -4,20 +4,20 @@
* Copyright (c) 2002 Fabrice Bellard
* Partly based on libdjbfft by D. J. Bernstein
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
@@ -32,6 +32,10 @@
#include "fft.h"
#include "fft-internal.h"
+#if FFT_FIXED_32
+#include "fft_table.h"
+#else /* FFT_FIXED_32 */
+
/* cos(2*pi*x/n) for 0<=x<=n/4, followed by its reverse */
#if !CONFIG_HARDCODED_TABLES
COSTABLE(16);
@@ -65,6 +69,8 @@ COSTABLE_CONST FFTSample * const FFT_NAME(ff_cos_tabs)[] = {
FFT_NAME(ff_cos_65536),
};
+#endif /* FFT_FIXED_32 */
+
static void fft_permute_c(FFTContext *s, FFTComplex *z);
static void fft_calc_c(FFTContext *s, FFTComplex *z);
@@ -81,7 +87,7 @@ static int split_radix_permutation(int i, int n, int inverse)
av_cold void ff_init_ff_cos_tabs(int index)
{
-#if !CONFIG_HARDCODED_TABLES
+#if (!CONFIG_HARDCODED_TABLES) && (!FFT_FIXED_32)
int i;
int m = 1<<index;
double freq = 2*M_PI/m;
@@ -157,26 +163,34 @@ av_cold int ff_fft_init(FFTContext *s, int nbits, int inverse)
s->mdct_calc = ff_mdct_calc_c;
#endif
+#if FFT_FIXED_32
+ {
+ int n=0;
+ ff_fft_lut_init(ff_fft_offsets_lut, 0, 1 << 16, &n);
+ }
+#else /* FFT_FIXED_32 */
#if FFT_FLOAT
if (ARCH_AARCH64) ff_fft_init_aarch64(s);
if (ARCH_ARM) ff_fft_init_arm(s);
if (ARCH_PPC) ff_fft_init_ppc(s);
if (ARCH_X86) ff_fft_init_x86(s);
if (CONFIG_MDCT) s->mdct_calcw = s->mdct_calc;
+ if (HAVE_MIPSFPU) ff_fft_init_mips(s);
#else
if (CONFIG_MDCT) s->mdct_calcw = ff_mdct_calcw_c;
if (ARCH_ARM) ff_fft_fixed_init_arm(s);
#endif
-
for(j=4; j<=nbits; j++) {
ff_init_ff_cos_tabs(j);
}
+#endif /* FFT_FIXED_32 */
+
if (s->fft_permutation == FF_FFT_PERM_AVX) {
fft_perm_avx(s);
} else {
for(i=0; i<n; i++) {
- int j = i;
+ j = i;
if (s->fft_permutation == FF_FFT_PERM_SWAP_LSBS)
j = (j&~3) | ((j>>1)&1) | ((j<<1)&2);
s->revtab[-split_radix_permutation(i, n, s->inverse) & (n-1)] = j;
@@ -206,6 +220,166 @@ av_cold void ff_fft_end(FFTContext *s)
av_freep(&s->tmp_buf);
}
+#if FFT_FIXED_32
+
+static void fft_calc_c(FFTContext *s, FFTComplex *z) {
+
+ int nbits, i, n, num_transforms, offset, step;
+ int n4, n2, n34;
+ FFTSample tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8;
+ FFTComplex *tmpz;
+ const int fft_size = (1 << s->nbits);
+ int64_t accu;
+
+ num_transforms = (0x2aab >> (16 - s->nbits)) | 1;
+
+ for (n=0; n<num_transforms; n++){
+ offset = ff_fft_offsets_lut[n] << 2;
+ tmpz = z + offset;
+
+ tmp1 = tmpz[0].re + tmpz[1].re;
+ tmp5 = tmpz[2].re + tmpz[3].re;
+ tmp2 = tmpz[0].im + tmpz[1].im;
+ tmp6 = tmpz[2].im + tmpz[3].im;
+ tmp3 = tmpz[0].re - tmpz[1].re;
+ tmp8 = tmpz[2].im - tmpz[3].im;
+ tmp4 = tmpz[0].im - tmpz[1].im;
+ tmp7 = tmpz[2].re - tmpz[3].re;
+
+ tmpz[0].re = tmp1 + tmp5;
+ tmpz[2].re = tmp1 - tmp5;
+ tmpz[0].im = tmp2 + tmp6;
+ tmpz[2].im = tmp2 - tmp6;
+ tmpz[1].re = tmp3 + tmp8;
+ tmpz[3].re = tmp3 - tmp8;
+ tmpz[1].im = tmp4 - tmp7;
+ tmpz[3].im = tmp4 + tmp7;
+ }
+
+ if (fft_size < 8)
+ return;
+
+ num_transforms = (num_transforms >> 1) | 1;
+
+ for (n=0; n<num_transforms; n++){
+ offset = ff_fft_offsets_lut[n] << 3;
+ tmpz = z + offset;
+
+ tmp1 = tmpz[4].re + tmpz[5].re;
+ tmp3 = tmpz[6].re + tmpz[7].re;
+ tmp2 = tmpz[4].im + tmpz[5].im;
+ tmp4 = tmpz[6].im + tmpz[7].im;
+ tmp5 = tmp1 + tmp3;
+ tmp7 = tmp1 - tmp3;
+ tmp6 = tmp2 + tmp4;
+ tmp8 = tmp2 - tmp4;
+
+ tmp1 = tmpz[4].re - tmpz[5].re;
+ tmp2 = tmpz[4].im - tmpz[5].im;
+ tmp3 = tmpz[6].re - tmpz[7].re;
+ tmp4 = tmpz[6].im - tmpz[7].im;
+
+ tmpz[4].re = tmpz[0].re - tmp5;
+ tmpz[0].re = tmpz[0].re + tmp5;
+ tmpz[4].im = tmpz[0].im - tmp6;
+ tmpz[0].im = tmpz[0].im + tmp6;
+ tmpz[6].re = tmpz[2].re - tmp8;
+ tmpz[2].re = tmpz[2].re + tmp8;
+ tmpz[6].im = tmpz[2].im + tmp7;
+ tmpz[2].im = tmpz[2].im - tmp7;
+
+ accu = (int64_t)Q31(M_SQRT1_2)*(tmp1 + tmp2);
+ tmp5 = (int32_t)((accu + 0x40000000) >> 31);
+ accu = (int64_t)Q31(M_SQRT1_2)*(tmp3 - tmp4);
+ tmp7 = (int32_t)((accu + 0x40000000) >> 31);
+ accu = (int64_t)Q31(M_SQRT1_2)*(tmp2 - tmp1);
+ tmp6 = (int32_t)((accu + 0x40000000) >> 31);
+ accu = (int64_t)Q31(M_SQRT1_2)*(tmp3 + tmp4);
+ tmp8 = (int32_t)((accu + 0x40000000) >> 31);
+ tmp1 = tmp5 + tmp7;
+ tmp3 = tmp5 - tmp7;
+ tmp2 = tmp6 + tmp8;
+ tmp4 = tmp6 - tmp8;
+
+ tmpz[5].re = tmpz[1].re - tmp1;
+ tmpz[1].re = tmpz[1].re + tmp1;
+ tmpz[5].im = tmpz[1].im - tmp2;
+ tmpz[1].im = tmpz[1].im + tmp2;
+ tmpz[7].re = tmpz[3].re - tmp4;
+ tmpz[3].re = tmpz[3].re + tmp4;
+ tmpz[7].im = tmpz[3].im + tmp3;
+ tmpz[3].im = tmpz[3].im - tmp3;
+ }
+
+ step = 1 << ((MAX_LOG2_NFFT-4) - 4);
+ n4 = 4;
+
+ for (nbits=4; nbits<=s->nbits; nbits++){
+ n2 = 2*n4;
+ n34 = 3*n4;
+ num_transforms = (num_transforms >> 1) | 1;
+
+ for (n=0; n<num_transforms; n++){
+ const FFTSample *w_re_ptr = ff_w_tab_sr + step;
+ const FFTSample *w_im_ptr = ff_w_tab_sr + MAX_FFT_SIZE/(4*16) - step;
+ offset = ff_fft_offsets_lut[n] << nbits;
+ tmpz = z + offset;
+
+ tmp5 = tmpz[ n2].re + tmpz[n34].re;
+ tmp1 = tmpz[ n2].re - tmpz[n34].re;
+ tmp6 = tmpz[ n2].im + tmpz[n34].im;
+ tmp2 = tmpz[ n2].im - tmpz[n34].im;
+
+ tmpz[ n2].re = tmpz[ 0].re - tmp5;
+ tmpz[ 0].re = tmpz[ 0].re + tmp5;
+ tmpz[ n2].im = tmpz[ 0].im - tmp6;
+ tmpz[ 0].im = tmpz[ 0].im + tmp6;
+ tmpz[n34].re = tmpz[n4].re - tmp2;
+ tmpz[ n4].re = tmpz[n4].re + tmp2;
+ tmpz[n34].im = tmpz[n4].im + tmp1;
+ tmpz[ n4].im = tmpz[n4].im - tmp1;
+
+ for (i=1; i<n4; i++){
+ FFTSample w_re = w_re_ptr[0];
+ FFTSample w_im = w_im_ptr[0];
+ accu = (int64_t)w_re*tmpz[ n2+i].re;
+ accu += (int64_t)w_im*tmpz[ n2+i].im;
+ tmp1 = (int32_t)((accu + 0x40000000) >> 31);
+ accu = (int64_t)w_re*tmpz[ n2+i].im;
+ accu -= (int64_t)w_im*tmpz[ n2+i].re;
+ tmp2 = (int32_t)((accu + 0x40000000) >> 31);
+ accu = (int64_t)w_re*tmpz[n34+i].re;
+ accu -= (int64_t)w_im*tmpz[n34+i].im;
+ tmp3 = (int32_t)((accu + 0x40000000) >> 31);
+ accu = (int64_t)w_re*tmpz[n34+i].im;
+ accu += (int64_t)w_im*tmpz[n34+i].re;
+ tmp4 = (int32_t)((accu + 0x40000000) >> 31);
+
+ tmp5 = tmp1 + tmp3;
+ tmp1 = tmp1 - tmp3;
+ tmp6 = tmp2 + tmp4;
+ tmp2 = tmp2 - tmp4;
+
+ tmpz[ n2+i].re = tmpz[ i].re - tmp5;
+ tmpz[ i].re = tmpz[ i].re + tmp5;
+ tmpz[ n2+i].im = tmpz[ i].im - tmp6;
+ tmpz[ i].im = tmpz[ i].im + tmp6;
+ tmpz[n34+i].re = tmpz[n4+i].re - tmp2;
+ tmpz[ n4+i].re = tmpz[n4+i].re + tmp2;
+ tmpz[n34+i].im = tmpz[n4+i].im + tmp1;
+ tmpz[ n4+i].im = tmpz[n4+i].im - tmp1;
+
+ w_re_ptr += step;
+ w_im_ptr -= step;
+ }
+ }
+ step >>= 1;
+ n4 <<= 1;
+ }
+}
+
+#else /* FFT_FIXED_32 */
+
#define BUTTERFLIES(a0,a1,a2,a3) {\
BF(t3, t5, t5, t1);\
BF(a2.re, a0.re, a0.re, t5);\
@@ -351,3 +525,4 @@ static void fft_calc_c(FFTContext *s, FFTComplex *z)
{
fft_dispatch[s->nbits-2](z);
}
+#endif /* FFT_FIXED_32 */
OpenPOWER on IntegriCloud