summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Makefile1
-rw-r--r--arch.mak4
-rwxr-xr-xconfigure47
-rw-r--r--libavcodec/acelp_filters.c9
-rw-r--r--libavcodec/acelp_filters.h33
-rw-r--r--libavcodec/acelp_vectors.c8
-rw-r--r--libavcodec/acelp_vectors.h24
-rw-r--r--libavcodec/amrnbdec.c54
-rw-r--r--libavcodec/amrwbdec.c58
-rw-r--r--libavcodec/celp_filters.c9
-rw-r--r--libavcodec/celp_filters.h49
-rw-r--r--libavcodec/celp_math.c8
-rw-r--r--libavcodec/celp_math.h19
-rw-r--r--libavcodec/lsp.c4
-rw-r--r--libavcodec/mips/Makefile12
-rw-r--r--libavcodec/mips/acelp_filters_mips.c210
-rw-r--r--libavcodec/mips/acelp_vectors_mips.c96
-rw-r--r--libavcodec/mips/amrwbdec_mips.c185
-rw-r--r--libavcodec/mips/amrwbdec_mips.h62
-rw-r--r--libavcodec/mips/celp_filters_mips.c281
-rw-r--r--libavcodec/mips/celp_math_mips.c84
-rw-r--r--libavcodec/mips/lsp_mips.h108
-rw-r--r--libavutil/libm.h4
-rw-r--r--libavutil/mips/libm_mips.h73
24 files changed, 1403 insertions, 39 deletions
diff --git a/Makefile b/Makefile
index 01c8035..616e8e1 100644
--- a/Makefile
+++ b/Makefile
@@ -70,6 +70,7 @@ SUBDIR_VARS := CLEANFILES EXAMPLES FFLIBS HOSTPROGS TESTPROGS TOOLS \
ARCH_HEADERS BUILT_HEADERS SKIPHEADERS \
ALTIVEC-OBJS ARMV5TE-OBJS ARMV6-OBJS ARMVFP-OBJS MMI-OBJS \
MMX-OBJS NEON-OBJS VIS-OBJS YASM-OBJS \
+ MIPSFPU-OBJS MIPSDSPR2-OBJS MIPSDSPR1-OBJS MIPS32R2-OBJS \
OBJS TESTOBJS
define RESET
diff --git a/arch.mak b/arch.mak
index 33018f3..6ccdfa0 100644
--- a/arch.mak
+++ b/arch.mak
@@ -4,6 +4,10 @@ OBJS-$(HAVE_ARMVFP) += $(ARMVFP-OBJS) $(ARMVFP-OBJS-yes)
OBJS-$(HAVE_NEON) += $(NEON-OBJS) $(NEON-OBJS-yes)
OBJS-$(HAVE_MMI) += $(MMI-OBJS) $(MMI-OBJS-yes)
+OBJS-$(HAVE_MIPSFPU) += $(MIPSFPU-OBJS) $(MIPSFPU-OBJS-yes)
+OBJS-$(HAVE_MIPS32R2) += $(MIPS32R2-OBJS) $(MIPS32R2-OBJS-yes)
+OBJS-$(HAVE_MIPSDSPR1) += $(MIPSDSPR1-OBJS) $(MIPSDSPR1-OBJS-yes)
+OBJS-$(HAVE_MIPSDSPR2) += $(MIPSDSPR2-OBJS) $(MIPSDSPR2-OBJS-yes)
OBJS-$(HAVE_ALTIVEC) += $(ALTIVEC-OBJS) $(ALTIVEC-OBJS-yes)
diff --git a/configure b/configure
index bb07d28..f95a204 100755
--- a/configure
+++ b/configure
@@ -268,6 +268,10 @@ Optimization options (experts only):
--disable-neon disable NEON optimizations
--disable-vis disable VIS optimizations
--disable-yasm disable use of yasm assembler
+ --disable-mips32r2 disable MIPS32R2 optimizations
+ --disable-mipsdspr1 disable MIPS DSP ASE R1 optimizations
+ --disable-mipsdspr2 disable MIPS DSP ASE R2 optimizations
+ --disable-mipsfpu disable floating point MIPS optimizations
--postproc-version=V build libpostproc version V.
Where V can be '$ALT_PP_VER_MAJOR.$ALT_PP_VER_MINOR.$ALT_PP_VER_MICRO' or 'current'. [$postproc_version_default]
@@ -1149,6 +1153,10 @@ ARCH_EXT_LIST='
ssse3
vfpv3
vis
+ mipsfpu
+ mips32r2
+ mipsdspr1
+ mipsdspr2
'
HAVE_LIST_PUB='
@@ -1368,6 +1376,10 @@ armvfp_deps="arm"
neon_deps="arm"
vfpv3_deps="armvfp"
+mipsfpu_deps="mips"
+mips32r2_deps="mips"
+mipsdspr1_deps="mips"
+mipsdspr2_deps="mips"
mmi_deps="mips"
altivec_deps="ppc"
@@ -2567,6 +2579,28 @@ elif enabled mips; then
cpuflags="-march=$cpu"
+ case $cpu in
+ 24kc)
+ disable mipsfpu
+ disable mipsdspr1
+ disable mipsdspr2
+ ;;
+ 24kf*)
+ disable mipsdspr1
+ disable mipsdspr2
+ ;;
+ 24kec|34kc|1004kc)
+ disable mipsfpu
+ disable mipsdspr2
+ ;;
+ 24kef*|34kf*|1004kf*)
+ disable mipsdspr2
+ ;;
+ 74kc)
+ disable mipsfpu
+ ;;
+ esac
+
elif enabled avr32; then
case $cpu in
@@ -2942,6 +2976,15 @@ elif enabled mips; then
check_asm loongson '"dmult.g $1, $2, $3"'
enabled mmi && check_asm mmi '"lq $2, 0($2)"'
+ enabled mips32r2 && add_cflags "-mips32r2" &&
+ check_asm mips32r2 '"rotr $t0, $t1, 1"'
+ enabled mipsdspr1 && add_cflags "-mdsp" && add_asflags "-mdsp" &&
+ check_asm mipsdspr1 '"addu.qb $t0, $t1, $t2"'
+ enabled mipsdspr2 && add_cflags "-mdspr2" && add_asflags "-mdspr2" &&
+ check_asm mipsdspr2 '"absq_s.qb $t0, $t1"'
+ enabled mipsfpu && add_cflags "-mhard-float" &&
+ check_asm mipsfpu '"madd.d $f0, $f2, $f4, $f6"'
+
elif enabled ppc; then
@@ -3541,6 +3584,10 @@ if enabled arm; then
fi
if enabled mips; then
echo "MMI enabled ${mmi-no}"
+ echo "MIPS FPU enabled ${mipsfpu-no}"
+ echo "MIPS32R2 enabled ${mips32r2-no}"
+ echo "MIPS DSP R1 enabled ${mipsdspr1-no}"
+ echo "MIPS DSP R2 enabled ${mipsdspr2-no}"
fi
if enabled ppc; then
echo "AltiVec enabled ${altivec-no}"
diff --git a/libavcodec/acelp_filters.c b/libavcodec/acelp_filters.c
index 1ce5eed..831d672 100644
--- a/libavcodec/acelp_filters.c
+++ b/libavcodec/acelp_filters.c
@@ -142,3 +142,12 @@ void ff_tilt_compensation(float *mem, float tilt, float *samples, int size)
samples[0] -= tilt * *mem;
*mem = new_tilt_mem;
}
+
+void ff_acelp_filter_init(ACELPFContext *c)
+{
+ c->acelp_interpolatef = ff_acelp_interpolatef;
+ c->acelp_apply_order_2_transfer_function = ff_acelp_apply_order_2_transfer_function;
+
+ if(HAVE_MIPSFPU)
+ ff_acelp_filter_init_mips(c);
+}
diff --git a/libavcodec/acelp_filters.h b/libavcodec/acelp_filters.h
index e807aed..56197bc 100644
--- a/libavcodec/acelp_filters.h
+++ b/libavcodec/acelp_filters.h
@@ -25,6 +25,39 @@
#include <stdint.h>
+typedef struct ACELPFContext {
+ /**
+ * Floating point version of ff_acelp_interpolate()
+ */
+ void (*acelp_interpolatef)(float *out, const float *in,
+ const float *filter_coeffs, int precision,
+ int frac_pos, int filter_length, int length);
+
+ /**
+ * Apply an order 2 rational transfer function in-place.
+ *
+ * @param out output buffer for filtered speech samples
+ * @param in input buffer containing speech data (may be the same as out)
+ * @param zero_coeffs z^-1 and z^-2 coefficients of the numerator
+ * @param pole_coeffs z^-1 and z^-2 coefficients of the denominator
+ * @param gain scale factor for final output
+ * @param mem intermediate values used by filter (should be 0 initially)
+ * @param n number of samples (should be a multiple of eight)
+ */
+ void (*acelp_apply_order_2_transfer_function)(float *out, const float *in,
+ const float zero_coeffs[2],
+ const float pole_coeffs[2],
+ float gain,
+ float mem[2], int n);
+
+}ACELPFContext;
+
+/**
+ * Initialize ACELPFContext.
+ */
+void ff_acelp_filter_init(ACELPFContext *c);
+void ff_acelp_filter_init_mips(ACELPFContext *c);
+
/**
* low-pass Finite Impulse Response filter coefficients.
*
diff --git a/libavcodec/acelp_vectors.c b/libavcodec/acelp_vectors.c
index 6a544a9..c703647 100644
--- a/libavcodec/acelp_vectors.c
+++ b/libavcodec/acelp_vectors.c
@@ -260,3 +260,11 @@ void ff_clear_fixed_vector(float *out, const AMRFixed *in, int size)
} while (x < size && repeats);
}
}
+
+void ff_acelp_vectors_init(ACELPVContext *c)
+{
+ c->weighted_vector_sumf = ff_weighted_vector_sumf;
+
+ if(HAVE_MIPSFPU)
+ ff_acelp_vectors_init_mips(c);
+}
diff --git a/libavcodec/acelp_vectors.h b/libavcodec/acelp_vectors.h
index f3bc781..d92f288 100644
--- a/libavcodec/acelp_vectors.h
+++ b/libavcodec/acelp_vectors.h
@@ -25,6 +25,30 @@
#include <stdint.h>
+typedef struct ACELPVContext {
+ /**
+ * float implementation of weighted sum of two vectors.
+ * @param[out] out result of addition
+ * @param in_a first vector
+ * @param in_b second vector
+ * @param weight_coeff_a first vector weight coefficient
+ * @param weight_coeff_a second vector weight coefficient
+ * @param length vectors length (should be a multiple of two)
+ *
+ * @note It is safe to pass the same buffer for out and in_a or in_b.
+ */
+ void (*weighted_vector_sumf)(float *out, const float *in_a, const float *in_b,
+ float weight_coeff_a, float weight_coeff_b,
+ int length);
+
+}ACELPVContext;
+
+/**
+ * Initialize ACELPVContext.
+ */
+void ff_acelp_vectors_init(ACELPVContext *c);
+void ff_acelp_vectors_init_mips(ACELPVContext *c);
+
/** Sparse representation for the algebraic codebook (fixed) vector */
typedef struct {
int n;
diff --git a/libavcodec/amrnbdec.c b/libavcodec/amrnbdec.c
index 6b658c0..46e4856 100644
--- a/libavcodec/amrnbdec.c
+++ b/libavcodec/amrnbdec.c
@@ -136,6 +136,11 @@ typedef struct AMRContext {
float samples_in[LP_FILTER_ORDER + AMR_SUBFRAME_SIZE]; ///< floating point samples
+ ACELPFContext acelpf_ctx; ///< context for filters for ACELP-based codecs
+ ACELPVContext acelpv_ctx; ///< context for vector operations for ACELP-based codecs
+ CELPFContext celpf_ctx; ///< context for filters for CELP-based codecs
+ CELPMContext celpm_ctx; ///< context for fixed point math operations
+
} AMRContext;
/** Double version of ff_weighted_vector_sumf() */
@@ -171,6 +176,11 @@ static av_cold int amrnb_decode_init(AVCodecContext *avctx)
avcodec_get_frame_defaults(&p->avframe);
avctx->coded_frame = &p->avframe;
+ ff_acelp_filter_init(&p->acelpf_ctx);
+ ff_acelp_vectors_init(&p->acelpv_ctx);
+ ff_celp_filter_init(&p->celpf_ctx);
+ ff_celp_math_init(&p->celpm_ctx);
+
return 0;
}
@@ -214,15 +224,16 @@ static enum Mode unpack_bitstream(AMRContext *p, const uint8_t *buf,
* Interpolate the LSF vector (used for fixed gain smoothing).
* The interpolation is done over all four subframes even in MODE_12k2.
*
+ * @param[in] ctx The Context
* @param[in,out] lsf_q LSFs in [0,1] for each subframe
* @param[in] lsf_new New LSFs in [0,1] for subframe 4
*/
-static void interpolate_lsf(float lsf_q[4][LP_FILTER_ORDER], float *lsf_new)
+static void interpolate_lsf(ACELPVContext *ctx, float lsf_q[4][LP_FILTER_ORDER], float *lsf_new)
{
int i;
for (i = 0; i < 4; i++)
- ff_weighted_vector_sumf(lsf_q[i], lsf_q[3], lsf_new,
+ ctx->weighted_vector_sumf(lsf_q[i], lsf_q[3], lsf_new,
0.25 * (3 - i), 0.25 * (i + 1),
LP_FILTER_ORDER);
}
@@ -266,7 +277,7 @@ static void lsf2lsp_for_mode12k2(AMRContext *p, double lsp[LP_FILTER_ORDER],
ff_set_min_dist_lsf(lsf_q, MIN_LSF_SPACING, LP_FILTER_ORDER);
if (update)
- interpolate_lsf(p->lsf_q, lsf_q);
+ interpolate_lsf(&p->acelpv_ctx, p->lsf_q, lsf_q);
ff_acelp_lsf2lspd(lsp, lsf_q, LP_FILTER_ORDER);
}
@@ -329,7 +340,7 @@ static void lsf2lsp_3(AMRContext *p)
ff_set_min_dist_lsf(lsf_q, MIN_LSF_SPACING, LP_FILTER_ORDER);
// store data for computing the next frame's LSFs
- interpolate_lsf(p->lsf_q, lsf_q);
+ interpolate_lsf(&p->acelpv_ctx, p->lsf_q, lsf_q);
memcpy(p->prev_lsf_r, lsf_r, LP_FILTER_ORDER * sizeof(*lsf_r));
ff_acelp_lsf2lspd(p->lsp[3], lsf_q, LP_FILTER_ORDER);
@@ -395,7 +406,8 @@ static void decode_pitch_vector(AMRContext *p,
/* Calculate the pitch vector by interpolating the past excitation at the
pitch lag using a b60 hamming windowed sinc function. */
- ff_acelp_interpolatef(p->excitation, p->excitation + 1 - pitch_lag_int,
+ p->acelpf_ctx.acelp_interpolatef(p->excitation,
+ p->excitation + 1 - pitch_lag_int,
ff_b60_sinc, 6,
pitch_lag_frac + 6 - 6*(pitch_lag_frac > 0),
10, AMR_SUBFRAME_SIZE);
@@ -780,12 +792,12 @@ static int synthesis(AMRContext *p, float *lpc,
for (i = 0; i < AMR_SUBFRAME_SIZE; i++)
p->pitch_vector[i] *= 0.25;
- ff_weighted_vector_sumf(excitation, p->pitch_vector, fixed_vector,
+ p->acelpv_ctx.weighted_vector_sumf(excitation, p->pitch_vector, fixed_vector,
p->pitch_gain[4], fixed_gain, AMR_SUBFRAME_SIZE);
// emphasize pitch vector contribution
if (p->pitch_gain[4] > 0.5 && !overflow) {
- float energy = ff_dot_productf(excitation, excitation,
+ float energy = p->celpm_ctx.dot_productf(excitation, excitation,
AMR_SUBFRAME_SIZE);
float pitch_factor =
p->pitch_gain[4] *
@@ -800,7 +812,8 @@ static int synthesis(AMRContext *p, float *lpc,
AMR_SUBFRAME_SIZE);
}
- ff_celp_lp_synthesis_filterf(samples, lpc, excitation, AMR_SUBFRAME_SIZE,
+ p->celpf_ctx.celp_lp_synthesis_filterf(samples, lpc, excitation,
+ AMR_SUBFRAME_SIZE,
LP_FILTER_ORDER);
// detect overflow
@@ -846,10 +859,11 @@ static void update_state(AMRContext *p)
/**
* Get the tilt factor of a formant filter from its transfer function
*
+ * @param p The Context
* @param lpc_n LP_FILTER_ORDER coefficients of the numerator
* @param lpc_d LP_FILTER_ORDER coefficients of the denominator
*/
-static float tilt_factor(float *lpc_n, float *lpc_d)
+static float tilt_factor(AMRContext *p, float *lpc_n, float *lpc_d)
{
float rh0, rh1; // autocorrelation at lag 0 and 1
@@ -859,11 +873,12 @@ static float tilt_factor(float *lpc_n, float *lpc_d)
hf[0] = 1.0;
memcpy(hf + 1, lpc_n, sizeof(float) * LP_FILTER_ORDER);
- ff_celp_lp_synthesis_filterf(hf, lpc_d, hf, AMR_TILT_RESPONSE,
+ p->celpf_ctx.celp_lp_synthesis_filterf(hf, lpc_d, hf,
+ AMR_TILT_RESPONSE,
LP_FILTER_ORDER);
- rh0 = ff_dot_productf(hf, hf, AMR_TILT_RESPONSE);
- rh1 = ff_dot_productf(hf, hf + 1, AMR_TILT_RESPONSE - 1);
+ rh0 = p->celpm_ctx.dot_productf(hf, hf, AMR_TILT_RESPONSE);
+ rh1 = p->celpm_ctx.dot_productf(hf, hf + 1, AMR_TILT_RESPONSE - 1);
// The spec only specifies this check for 12.2 and 10.2 kbit/s
// modes. But in the ref source the tilt is always non-negative.
@@ -883,7 +898,7 @@ static void postfilter(AMRContext *p, float *lpc, float *buf_out)
int i;
float *samples = p->samples_in + LP_FILTER_ORDER; // Start of input
- float speech_gain = ff_dot_productf(samples, samples,
+ float speech_gain = p->celpm_ctx.dot_productf(samples, samples,
AMR_SUBFRAME_SIZE);
float pole_out[AMR_SUBFRAME_SIZE + LP_FILTER_ORDER]; // Output of pole filter
@@ -904,16 +919,16 @@ static void postfilter(AMRContext *p, float *lpc, float *buf_out)
}
memcpy(pole_out, p->postfilter_mem, sizeof(float) * LP_FILTER_ORDER);
- ff_celp_lp_synthesis_filterf(pole_out + LP_FILTER_ORDER, lpc_d, samples,
+ p->celpf_ctx.celp_lp_synthesis_filterf(pole_out + LP_FILTER_ORDER, lpc_d, samples,
AMR_SUBFRAME_SIZE, LP_FILTER_ORDER);
memcpy(p->postfilter_mem, pole_out + AMR_SUBFRAME_SIZE,
sizeof(float) * LP_FILTER_ORDER);
- ff_celp_lp_zero_synthesis_filterf(buf_out, lpc_n,
+ p->celpf_ctx.celp_lp_zero_synthesis_filterf(buf_out, lpc_n,
pole_out + LP_FILTER_ORDER,
AMR_SUBFRAME_SIZE, LP_FILTER_ORDER);
- ff_tilt_compensation(&p->tilt_mem, tilt_factor(lpc_n, lpc_d), buf_out,
+ ff_tilt_compensation(&p->tilt_mem, tilt_factor(p, lpc_n, lpc_d), buf_out,
AMR_SUBFRAME_SIZE);
ff_adaptive_gain_control(buf_out, buf_out, speech_gain, AMR_SUBFRAME_SIZE,
@@ -990,7 +1005,7 @@ static int amrnb_decode_frame(AVCodecContext *avctx, void *data,
p->fixed_gain[4] =
ff_amr_set_fixed_gain(fixed_gain_factor,
- ff_dot_productf(p->fixed_vector, p->fixed_vector,
+ p->celpm_ctx.dot_productf(p->fixed_vector, p->fixed_vector,
AMR_SUBFRAME_SIZE)/AMR_SUBFRAME_SIZE,
p->prediction_error,
energy_mean[p->cur_frame_mode], energy_pred_fac);
@@ -1034,7 +1049,8 @@ static int amrnb_decode_frame(AVCodecContext *avctx, void *data,
update_state(p);
}
- ff_acelp_apply_order_2_transfer_function(buf_out, buf_out, highpass_zeros,
+ p->acelpf_ctx.acelp_apply_order_2_transfer_function(buf_out,
+ buf_out, highpass_zeros,
highpass_poles,
highpass_gain * AMR_SAMPLE_SCALE,
p->high_pass_mem, AMR_BLOCK_SIZE);
@@ -1045,7 +1061,7 @@ static int amrnb_decode_frame(AVCodecContext *avctx, void *data,
* for fixed_gain_smooth.
* The specification has an incorrect formula: the reference decoder uses
* qbar(n-1) rather than qbar(n) in section 6.1(4) equation 71. */
- ff_weighted_vector_sumf(p->lsf_avg, p->lsf_avg, p->lsf_q[3],
+ p->acelpv_ctx.weighted_vector_sumf(p->lsf_avg, p->lsf_avg, p->lsf_q[3],
0.84, 0.16, LP_FILTER_ORDER);
*got_frame_ptr = 1;
diff --git a/libavcodec/amrwbdec.c b/libavcodec/amrwbdec.c
index 9b8b306..beb3bd7 100644
--- a/libavcodec/amrwbdec.c
+++ b/libavcodec/amrwbdec.c
@@ -38,6 +38,7 @@
#include "amr.h"
#include "amrwbdata.h"
+#include "mips/amrwbdec_mips.h"
typedef struct {
AVFrame avframe; ///< AVFrame for decoded samples
@@ -82,6 +83,11 @@ typedef struct {
AVLFG prng; ///< random number generator for white noise excitation
uint8_t first_frame; ///< flag active during decoding of the first frame
+ ACELPFContext acelpf_ctx; ///< context for filters for ACELP-based codecs
+ ACELPVContext acelpv_ctx; ///< context for vector operations for ACELP-based codecs
+ CELPFContext celpf_ctx; ///< context for filters for CELP-based codecs
+ CELPMContext celpm_ctx; ///< context for fixed point math operations
+
} AMRWBContext;
static av_cold int amrwb_decode_init(AVCodecContext *avctx)
@@ -105,6 +111,11 @@ static av_cold int amrwb_decode_init(AVCodecContext *avctx)
avcodec_get_frame_defaults(&ctx->avframe);
avctx->coded_frame = &ctx->avframe;
+ ff_acelp_filter_init(&ctx->acelpf_ctx);
+ ff_acelp_vectors_init(&ctx->acelpv_ctx);
+ ff_celp_filter_init(&ctx->celpf_ctx);
+ ff_celp_math_init(&ctx->celpm_ctx);
+
return 0;
}
@@ -319,7 +330,8 @@ static void decode_pitch_vector(AMRWBContext *ctx,
/* Calculate the pitch vector by interpolating the past excitation at the
pitch lag using a hamming windowed sinc function */
- ff_acelp_interpolatef(exc, exc + 1 - pitch_lag_int,
+ ctx->acelpf_ctx.acelp_interpolatef(exc,
+ exc + 1 - pitch_lag_int,
ac_inter, 4,
pitch_lag_frac + (pitch_lag_frac > 0 ? 0 : 4),
LP_ORDER, AMRWB_SFR_SIZE + 1);
@@ -578,15 +590,17 @@ static void pitch_sharpening(AMRWBContext *ctx, float *fixed_vector)
*
* @param[in] p_vector, f_vector Pitch and fixed excitation vectors
* @param[in] p_gain, f_gain Pitch and fixed gains
+ * @param[in] ctx The context
*/
// XXX: There is something wrong with the precision here! The magnitudes
// of the energies are not correct. Please check the reference code carefully
static float voice_factor(float *p_vector, float p_gain,
- float *f_vector, float f_gain)
+ float *f_vector, float f_gain,
+ CELPMContext *ctx)
{
- double p_ener = (double) ff_dot_productf(p_vector, p_vector,
+ double p_ener = (double) ctx->dot_productf(p_vector, p_vector,
AMRWB_SFR_SIZE) * p_gain * p_gain;
- double f_ener = (double) ff_dot_productf(f_vector, f_vector,
+ double f_ener = (double) ctx->dot_productf(f_vector, f_vector,
AMRWB_SFR_SIZE) * f_gain * f_gain;
return (p_ener - f_ener) / (p_ener + f_ener);
@@ -749,13 +763,13 @@ static void synthesis(AMRWBContext *ctx, float *lpc, float *excitation,
float fixed_gain, const float *fixed_vector,
float *samples)
{
- ff_weighted_vector_sumf(excitation, ctx->pitch_vector, fixed_vector,
+ ctx->acelpv_ctx.weighted_vector_sumf(excitation, ctx->pitch_vector, fixed_vector,
ctx->pitch_gain[0], fixed_gain, AMRWB_SFR_SIZE);
/* emphasize pitch vector contribution in low bitrate modes */
if (ctx->pitch_gain[0] > 0.5 && ctx->fr_cur_mode <= MODE_8k85) {
int i;
- float energy = ff_dot_productf(excitation, excitation,
+ float energy = ctx->celpm_ctx.dot_productf(excitation, excitation,
AMRWB_SFR_SIZE);
// XXX: Weird part in both ref code and spec. A unknown parameter
@@ -769,7 +783,7 @@ static void synthesis(AMRWBContext *ctx, float *lpc, float *excitation,
energy, AMRWB_SFR_SIZE);
}
- ff_celp_lp_synthesis_filterf(samples, lpc, excitation,
+ ctx->celpf_ctx.celp_lp_synthesis_filterf(samples, lpc, excitation,
AMRWB_SFR_SIZE, LP_ORDER);
}
@@ -801,8 +815,9 @@ static void de_emphasis(float *out, float *in, float m, float mem[1])
* @param[out] out Buffer for interpolated signal
* @param[in] in Current signal data (length 0.8*o_size)
* @param[in] o_size Output signal length
+ * @param[in] ctx The context
*/
-static void upsample_5_4(float *out, const float *in, int o_size)
+static void upsample_5_4(float *out, const float *in, int o_size, CELPMContext *ctx)
{
const float *in0 = in - UPS_FIR_SIZE + 1;
int i, j, k;
@@ -815,7 +830,8 @@ static void upsample_5_4(float *out, const float *in, int o_size)
i++;
for (k = 1; k < 5; k++) {
- out[i] = ff_dot_productf(in0 + int_part, upsample_fir[4 - frac_part],
+ out[i] = ctx->dot_productf(in0 + int_part,
+ upsample_fir[4 - frac_part],
UPS_MEM_SIZE);
int_part++;
frac_part--;
@@ -842,8 +858,8 @@ static float find_hb_gain(AMRWBContext *ctx, const float *synth,
if (ctx->fr_cur_mode == MODE_23k85)
return qua_hb_gain[hb_idx] * (1.0f / (1 << 14));
- tilt = ff_dot_productf(synth, synth + 1, AMRWB_SFR_SIZE - 1) /
- ff_dot_productf(synth, synth, AMRWB_SFR_SIZE);
+ tilt = ctx->celpm_ctx.dot_productf(synth, synth + 1, AMRWB_SFR_SIZE - 1) /
+ ctx->celpm_ctx.dot_productf(synth, synth, AMRWB_SFR_SIZE);
/* return gain bounded by [0.1, 1.0] */
return av_clipf((1.0 - FFMAX(0.0, tilt)) * (1.25 - 0.25 * wsp), 0.1, 1.0);
@@ -862,7 +878,7 @@ static void scaled_hb_excitation(AMRWBContext *ctx, float *hb_exc,
const float *synth_exc, float hb_gain)
{
int i;
- float energy = ff_dot_productf(synth_exc, synth_exc, AMRWB_SFR_SIZE);
+ float energy = ctx->celpm_ctx.dot_productf(synth_exc, synth_exc, AMRWB_SFR_SIZE);
/* Generate a white-noise excitation */
for (i = 0; i < AMRWB_SFR_SIZE_16k; i++)
@@ -993,7 +1009,7 @@ static void hb_synthesis(AMRWBContext *ctx, int subframe, float *samples,
float e_isf[LP_ORDER_16k]; // ISF vector for extrapolation
double e_isp[LP_ORDER_16k];
- ff_weighted_vector_sumf(e_isf, isf_past, isf, isfp_inter[subframe],
+ ctx->acelpv_ctx.weighted_vector_sumf(e_isf, isf_past, isf, isfp_inter[subframe],
1.0 - isfp_inter[subframe], LP_ORDER);
extrapolate_isf(e_isf);
@@ -1007,7 +1023,7 @@ static void hb_synthesis(AMRWBContext *ctx, int subframe, float *samples,
lpc_weighting(hb_lpc, ctx->lp_coef[subframe], 0.6, LP_ORDER);
}
- ff_celp_lp_synthesis_filterf(samples, hb_lpc, exc, AMRWB_SFR_SIZE_16k,
+ ctx->celpf_ctx.celp_lp_synthesis_filterf(samples, hb_lpc, exc, AMRWB_SFR_SIZE_16k,
(mode == MODE_6k60) ? LP_ORDER_16k : LP_ORDER);
}
@@ -1022,6 +1038,8 @@ static void hb_synthesis(AMRWBContext *ctx, int subframe, float *samples,
*
* @remark It is safe to pass the same array in in and out parameters
*/
+
+#ifndef hb_fir_filter
static void hb_fir_filter(float *out, const float fir_coef[HB_FIR_SIZE + 1],
float mem[HB_FIR_SIZE], const float *in)
{
@@ -1039,6 +1057,7 @@ static void hb_fir_filter(float *out, const float fir_coef[HB_FIR_SIZE + 1],
memcpy(mem, data + AMRWB_SFR_SIZE_16k, HB_FIR_SIZE * sizeof(float));
}
+#endif /* hb_fir_filter */
/**
* Update context state before the next subframe.
@@ -1155,14 +1174,15 @@ static int amrwb_decode_frame(AVCodecContext *avctx, void *data,
ctx->fixed_gain[0] =
ff_amr_set_fixed_gain(fixed_gain_factor,
- ff_dot_productf(ctx->fixed_vector, ctx->fixed_vector,
+ ctx->celpm_ctx.dot_productf(ctx->fixed_vector, ctx->fixed_vector,
AMRWB_SFR_SIZE) / AMRWB_SFR_SIZE,
ctx->prediction_error,
ENERGY_MEAN, energy_pred_fac);
/* Calculate voice factor and store tilt for next subframe */
voice_fac = voice_factor(ctx->pitch_vector, ctx->pitch_gain[0],
- ctx->fixed_vector, ctx->fixed_gain[0]);
+ ctx->fixed_vector, ctx->fixed_gain[0],
+ &ctx->celpm_ctx);
ctx->tilt_coef = voice_fac * 0.25 + 0.25;
/* Construct current excitation */
@@ -1188,15 +1208,15 @@ static int amrwb_decode_frame(AVCodecContext *avctx, void *data,
de_emphasis(&ctx->samples_up[UPS_MEM_SIZE],
&ctx->samples_az[LP_ORDER], PREEMPH_FAC, ctx->demph_mem);
- ff_acelp_apply_order_2_transfer_function(&ctx->samples_up[UPS_MEM_SIZE],
+ ctx->acelpf_ctx.acelp_apply_order_2_transfer_function(&ctx->samples_up[UPS_MEM_SIZE],
&ctx->samples_up[UPS_MEM_SIZE], hpf_zeros, hpf_31_poles,
hpf_31_gain, ctx->hpf_31_mem, AMRWB_SFR_SIZE);
upsample_5_4(sub_buf, &ctx->samples_up[UPS_FIR_SIZE],
- AMRWB_SFR_SIZE_16k);
+ AMRWB_SFR_SIZE_16k, &ctx->celpm_ctx);
/* High frequency band (6.4 - 7.0 kHz) generation part */
- ff_acelp_apply_order_2_transfer_function(hb_samples,
+ ctx->acelpf_ctx.acelp_apply_order_2_transfer_function(hb_samples,
&ctx->samples_up[UPS_MEM_SIZE], hpf_zeros, hpf_400_poles,
hpf_400_gain, ctx->hpf_400_mem, AMRWB_SFR_SIZE);
diff --git a/libavcodec/celp_filters.c b/libavcodec/celp_filters.c
index 04ede49..8047a78 100644
--- a/libavcodec/celp_filters.c
+++ b/libavcodec/celp_filters.c
@@ -205,3 +205,12 @@ void ff_celp_lp_zero_synthesis_filterf(float *out, const float *filter_coeffs,
out[n] += filter_coeffs[i-1] * in[n-i];
}
}
+
+void ff_celp_filter_init(CELPFContext *c)
+{
+ c->celp_lp_synthesis_filterf = ff_celp_lp_synthesis_filterf;
+ c->celp_lp_zero_synthesis_filterf = ff_celp_lp_zero_synthesis_filterf;
+
+ if(HAVE_MIPSFPU)
+ ff_celp_filter_init_mips(c);
+}
diff --git a/libavcodec/celp_filters.h b/libavcodec/celp_filters.h
index f7e8fbd..f644ec3 100644
--- a/libavcodec/celp_filters.h
+++ b/libavcodec/celp_filters.h
@@ -25,6 +25,55 @@
#include <stdint.h>
+typedef struct CELPFContext {
+ /**
+ * LP synthesis filter.
+ * @param[out] out pointer to output buffer
+ * - the array out[-filter_length, -1] must
+ * contain the previous result of this filter
+ * @param filter_coeffs filter coefficients.
+ * @param in input signal
+ * @param buffer_length amount of data to process
+ * @param filter_length filter length (10 for 10th order LP filter). Must be
+ * greater than 4 and even.
+ *
+ * @note Output buffer must contain filter_length samples of past
+ * speech data before pointer.
+ *
+ * Routine applies 1/A(z) filter to given speech data.
+ */
+ void (*celp_lp_synthesis_filterf)(float *out, const float *filter_coeffs,
+ const float *in, int buffer_length,
+ int filter_length);
+
+ /**
+ * LP zero synthesis filter.
+ * @param[out] out pointer to output buffer
+ * @param filter_coeffs filter coefficients.
+ * @param in input signal
+ * - the array in[-filter_length, -1] must
+ * contain the previous input of this filter
+ * @param buffer_length amount of data to process (should be a multiple of eight)
+ * @param filter_length filter length (10 for 10th order LP filter;
+ * should be a multiple of two)
+ *
+ * @note Output buffer must contain filter_length samples of past
+ * speech data before pointer.
+ *
+ * Routine applies A(z) filter to given speech data.
+ */
+ void (*celp_lp_zero_synthesis_filterf)(float *out, const float *filter_coeffs,
+ const float *in, int buffer_length,
+ int filter_length);
+
+}CELPFContext;
+
+/**
+ * Initialize CELPFContext.
+ */
+void ff_celp_filter_init(CELPFContext *c);
+void ff_celp_filter_init_mips(CELPFContext *c);
+
/**
* Circularly convolve fixed vector with a phase dispersion impulse
* response filter (D.6.2 of G.729 and 6.1.5 of AMR).
diff --git a/libavcodec/celp_math.c b/libavcodec/celp_math.c
index d85277f..443bd7f 100644
--- a/libavcodec/celp_math.c
+++ b/libavcodec/celp_math.c
@@ -218,3 +218,11 @@ float ff_dot_productf(const float* a, const float* b, int length)
return sum;
}
+
+void ff_celp_math_init(CELPMContext *c)
+{
+ c->dot_productf = ff_dot_productf;
+
+ if(HAVE_MIPSFPU)
+ ff_celp_math_init_mips(c);
+}
diff --git a/libavcodec/celp_math.h b/libavcodec/celp_math.h
index ec62a9e..16cc19c 100644
--- a/libavcodec/celp_math.h
+++ b/libavcodec/celp_math.h
@@ -25,6 +25,25 @@
#include <stdint.h>
+typedef struct CELPMContext {
+ /**
+ * Return the dot product.
+ * @param a input data array
+ * @param b input data array
+ * @param length number of elements
+ *
+ * @return dot product = sum of elementwise products
+ */
+ float (*dot_productf)(const float* a, const float* b, int length);
+
+}CELPMContext;
+
+/**
+ * Initialize CELPMContext.
+ */
+void ff_celp_math_init(CELPMContext *c);
+void ff_celp_math_init_mips(CELPMContext *c);
+
/**
* fixed-point implementation of cosine in [0; PI) domain.
* @param arg fixed-point cosine argument, 0 <= arg < 0x4000
diff --git a/libavcodec/lsp.c b/libavcodec/lsp.c
index 7fda12e..a5a86c8 100644
--- a/libavcodec/lsp.c
+++ b/libavcodec/lsp.c
@@ -28,6 +28,8 @@
#include "mathops.h"
#include "lsp.h"
#include "celp_math.h"
+#include "libavcodec/mips/lsp_mips.h"
+
void ff_acelp_reorder_lsf(int16_t* lsfq, int lsfq_min_distance, int lsfq_min, int lsfq_max, int lp_order)
{
@@ -162,6 +164,7 @@ void ff_acelp_lp_decode(int16_t* lp_1st, int16_t* lp_2nd, const int16_t* lsp_2nd
ff_acelp_lsp2lpc(lp_2nd, lsp_2nd, lp_order >> 1);
}
+#ifndef ff_lsp2polyf
void ff_lsp2polyf(const double *lsp, double *f, int lp_half_order)
{
int i, j;
@@ -178,6 +181,7 @@ void ff_lsp2polyf(const double *lsp, double *f, int lp_half_order)
f[1] += val;
}
}
+#endif /* ff_lsp2polyf */
void ff_acelp_lspd2lpc(const double *lsp, float *lpc, int lp_half_order)
{
diff --git a/libavcodec/mips/Makefile b/libavcodec/mips/Makefile
index 37899b1..24a95b5 100644
--- a/libavcodec/mips/Makefile
+++ b/libavcodec/mips/Makefile
@@ -1,3 +1,13 @@
MMI-OBJS += mips/dsputil_mmi.o \
mips/idct_mmi.o \
- mips/mpegvideo_mmi.o \
+ mips/mpegvideo_mmi.o
+
+MIPSFPU-OBJS-$(CONFIG_AMRNB_DECODER) += mips/acelp_filters_mips.o \
+ mips/celp_filters_mips.o \
+ mips/celp_math_mips.o \
+ mips/acelp_vectors_mips.o
+MIPSFPU-OBJS-$(CONFIG_AMRWB_DECODER) += mips/acelp_filters_mips.o \
+ mips/celp_filters_mips.o \
+ mips/amrwbdec_mips.o \
+ mips/celp_math_mips.o \
+ mips/acelp_vectors_mips.o
diff --git a/libavcodec/mips/acelp_filters_mips.c b/libavcodec/mips/acelp_filters_mips.c
new file mode 100644
index 0000000..be686c2
--- /dev/null
+++ b/libavcodec/mips/acelp_filters_mips.c
@@ -0,0 +1,210 @@
+ /*
+ * Copyright (c) 2012
+ * MIPS Technologies, Inc., California.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Author: Nedeljko Babic (nbabic@mips.com)
+ *
+ * various filters for ACELP-based codecs optimized for MIPS
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Reference: libavcodec/acelp_filters.c
+ */
+#include "libavutil/attributes.h"
+#include "libavcodec/acelp_filters.h"
+
+static void ff_acelp_interpolatef_mips(float *out, const float *in,
+ const float *filter_coeffs, int precision,
+ int frac_pos, int filter_length, int length)
+{
+ int n, i;
+ int prec = precision * 4;
+ int fc_offset = precision - frac_pos;
+ float in_val_p, in_val_m, fc_val_p, fc_val_m;
+
+ for (n = 0; n < length; n++) {
+ /**
+ * four pointers are defined in order to minimize number of
+ * computations done in inner loop
+ */
+ const float *p_in_p = &in[n];
+ const float *p_in_m = &in[n-1];
+ const float *p_filter_coeffs_p = &filter_coeffs[frac_pos];
+ const float *p_filter_coeffs_m = filter_coeffs + fc_offset;
+ float v = 0;
+
+ for (i = 0; i < filter_length;i++) {
+ __asm__ __volatile__ (
+ "lwc1 %[in_val_p], 0(%[p_in_p]) \n\t"
+ "lwc1 %[fc_val_p], 0(%[p_filter_coeffs_p]) \n\t"
+ "lwc1 %[in_val_m], 0(%[p_in_m]) \n\t"
+ "lwc1 %[fc_val_m], 0(%[p_filter_coeffs_m]) \n\t"
+ "addiu %[p_in_p], %[p_in_p], 4 \n\t"
+ "madd.s %[v],%[v], %[in_val_p],%[fc_val_p] \n\t"
+ "addiu %[p_in_m], %[p_in_m], -4 \n\t"
+ "addu %[p_filter_coeffs_p], %[p_filter_coeffs_p], %[prec] \n\t"
+ "addu %[p_filter_coeffs_m], %[p_filter_coeffs_m], %[prec] \n\t"
+ "madd.s %[v],%[v],%[in_val_m], %[fc_val_m] \n\t"
+
+ : [v] "=&f" (v),[p_in_p] "+r" (p_in_p), [p_in_m] "+r" (p_in_m),
+ [p_filter_coeffs_p] "+r" (p_filter_coeffs_p),
+ [in_val_p] "=&f" (in_val_p), [in_val_m] "=&f" (in_val_m),
+ [fc_val_p] "=&f" (fc_val_p), [fc_val_m] "=&f" (fc_val_m),
+ [p_filter_coeffs_m] "+r" (p_filter_coeffs_m)
+ : [prec] "r" (prec)
+ );
+ }
+ out[n] = v;
+ }
+}
+
+static void ff_acelp_apply_order_2_transfer_function_mips(float *out, const float *in,
+ const float zero_coeffs[2],
+ const float pole_coeffs[2],
+ float gain, float mem[2], int n)
+{
+ /**
+ * loop is unrolled eight times
+ */
+
+ __asm__ __volatile__ (
+ "lwc1 $f0, 0(%[mem]) \n\t"
+ "blez %[n], ff_acelp_apply_order_2_transfer_function_end%= \n\t"
+ "lwc1 $f1, 4(%[mem]) \n\t"
+ "lwc1 $f2, 0(%[pole_coeffs]) \n\t"
+ "lwc1 $f3, 4(%[pole_coeffs]) \n\t"
+ "lwc1 $f4, 0(%[zero_coeffs]) \n\t"
+ "lwc1 $f5, 4(%[zero_coeffs]) \n\t"
+
+ "ff_acelp_apply_order_2_transfer_function_madd%=: \n\t"
+
+ "lwc1 $f6, 0(%[in]) \n\t"
+ "mul.s $f9, $f3, $f1 \n\t"
+ "mul.s $f7, $f2, $f0 \n\t"
+ "msub.s $f7, $f7, %[gain], $f6 \n\t"
+ "sub.s $f7, $f7, $f9 \n\t"
+ "madd.s $f8, $f7, $f4, $f0 \n\t"
+ "madd.s $f8, $f8, $f5, $f1 \n\t"
+ "lwc1 $f11, 4(%[in]) \n\t"
+ "mul.s $f12, $f3, $f0 \n\t"
+ "mul.s $f13, $f2, $f7 \n\t"
+ "msub.s $f13, $f13, %[gain], $f11 \n\t"
+ "sub.s $f13, $f13, $f12 \n\t"
+ "madd.s $f14, $f13, $f4, $f7 \n\t"
+ "madd.s $f14, $f14, $f5, $f0 \n\t"
+ "swc1 $f8, 0(%[out]) \n\t"
+ "lwc1 $f6, 8(%[in]) \n\t"
+ "mul.s $f9, $f3, $f7 \n\t"
+ "mul.s $f15, $f2, $f13 \n\t"
+ "msub.s $f15, $f15, %[gain], $f6 \n\t"
+ "sub.s $f15, $f15, $f9 \n\t"
+ "madd.s $f8, $f15, $f4, $f13 \n\t"
+ "madd.s $f8, $f8, $f5, $f7 \n\t"
+ "swc1 $f14, 4(%[out]) \n\t"
+ "lwc1 $f11, 12(%[in]) \n\t"
+ "mul.s $f12, $f3, $f13 \n\t"
+ "mul.s $f16, $f2, $f15 \n\t"
+ "msub.s $f16, $f16, %[gain], $f11 \n\t"
+ "sub.s $f16, $f16, $f12 \n\t"
+ "madd.s $f14, $f16, $f4, $f15 \n\t"
+ "madd.s $f14, $f14, $f5, $f13 \n\t"
+ "swc1 $f8, 8(%[out]) \n\t"
+ "lwc1 $f6, 16(%[in]) \n\t"
+ "mul.s $f9, $f3, $f15 \n\t"
+ "mul.s $f7, $f2, $f16 \n\t"
+ "msub.s $f7, $f7, %[gain], $f6 \n\t"
+ "sub.s $f7, $f7, $f9 \n\t"
+ "madd.s $f8, $f7, $f4, $f16 \n\t"
+ "madd.s $f8, $f8, $f5, $f15 \n\t"
+ "swc1 $f14, 12(%[out]) \n\t"
+ "lwc1 $f11, 20(%[in]) \n\t"
+ "mul.s $f12, $f3, $f16 \n\t"
+ "mul.s $f13, $f2, $f7 \n\t"
+ "msub.s $f13, $f13, %[gain], $f11 \n\t"
+ "sub.s $f13, $f13, $f12 \n\t"
+ "madd.s $f14, $f13, $f4, $f7 \n\t"
+ "madd.s $f14, $f14, $f5, $f16 \n\t"
+ "swc1 $f8, 16(%[out]) \n\t"
+ "lwc1 $f6, 24(%[in]) \n\t"
+ "mul.s $f9, $f3, $f7 \n\t"
+ "mul.s $f15, $f2, $f13 \n\t"
+ "msub.s $f15, $f15, %[gain], $f6 \n\t"
+ "sub.s $f1, $f15, $f9 \n\t"
+ "madd.s $f8, $f1, $f4, $f13 \n\t"
+ "madd.s $f8, $f8, $f5, $f7 \n\t"
+ "swc1 $f14, 20(%[out]) \n\t"
+ "lwc1 $f11, 28(%[in]) \n\t"
+ "mul.s $f12, $f3, $f13 \n\t"
+ "mul.s $f16, $f2, $f1 \n\t"
+ "msub.s $f16, $f16, %[gain], $f11 \n\t"
+ "sub.s $f0, $f16, $f12 \n\t"
+ "madd.s $f14, $f0, $f4, $f1 \n\t"
+ "madd.s $f14, $f14, $f5, $f13 \n\t"
+ "swc1 $f8, 24(%[out]) \n\t"
+ "addiu %[out], 32 \n\t"
+ "addiu %[in], 32 \n\t"
+ "addiu %[n], -8 \n\t"
+ "swc1 $f14, -4(%[out]) \n\t"
+ "bnez %[n], ff_acelp_apply_order_2_transfer_function_madd%= \n\t"
+ "swc1 $f1, 4(%[mem]) \n\t"
+ "swc1 $f0, 0(%[mem]) \n\t"
+
+ "ff_acelp_apply_order_2_transfer_function_end%=: \n\t"
+
+ : [out] "+r" (out),
+ [in] "+r" (in), [gain] "+f" (gain),
+ [n] "+r" (n), [mem] "+r" (mem)
+ : [zero_coeffs] "r" (zero_coeffs),
+ [pole_coeffs] "r" (pole_coeffs)
+ : "$f0", "$f1", "$f2", "$f3", "$f4", "$f5",
+ "$f6", "$f7", "$f8", "$f9", "$f10", "$f11",
+ "$f12", "$f13", "$f14", "$f15", "$f16"
+ );
+}
+
+void ff_acelp_filter_init_mips(ACELPFContext *c)
+{
+ c->acelp_interpolatef = ff_acelp_interpolatef_mips;
+ c->acelp_apply_order_2_transfer_function = ff_acelp_apply_order_2_transfer_function_mips;
+}
diff --git a/libavcodec/mips/acelp_vectors_mips.c b/libavcodec/mips/acelp_vectors_mips.c
new file mode 100644
index 0000000..d62b377
--- /dev/null
+++ b/libavcodec/mips/acelp_vectors_mips.c
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2012
+ * MIPS Technologies, Inc., California.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Author: Nedeljko Babic (nbabic@mips.com)
+ *
+ * adaptive and fixed codebook vector operations for ACELP-based codecs
+ * optimized for MIPS
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Reference: libavcodec/acelp_vectors.c
+ */
+#include "libavcodec/acelp_vectors.h"
+
+static void ff_weighted_vector_sumf_mips(
+ float *out, const float *in_a, const float *in_b,
+ float weight_coeff_a, float weight_coeff_b, int length)
+{
+ const float *a_end = in_a + length;
+
+ /* loop unrolled two times */
+ __asm__ __volatile__ (
+ "blez %[length], ff_weighted_vector_sumf_end%= \n\t"
+
+ "ff_weighted_vector_sumf_madd%=: \n\t"
+ "lwc1 $f0, 0(%[in_a]) \n\t"
+ "lwc1 $f3, 4(%[in_a]) \n\t"
+ "lwc1 $f1, 0(%[in_b]) \n\t"
+ "lwc1 $f4, 4(%[in_b]) \n\t"
+ "mul.s $f2, %[weight_coeff_a], $f0 \n\t"
+ "mul.s $f5, %[weight_coeff_a], $f3 \n\t"
+ "madd.s $f2, $f2, %[weight_coeff_b], $f1 \n\t"
+ "madd.s $f5, $f5, %[weight_coeff_b], $f4 \n\t"
+ "addiu %[in_a], 8 \n\t"
+ "addiu %[in_b], 8 \n\t"
+ "swc1 $f2, 0(%[out]) \n\t"
+ "swc1 $f5, 4(%[out]) \n\t"
+ "addiu %[out], 8 \n\t"
+ "bne %[in_a], %[a_end], ff_weighted_vector_sumf_madd%= \n\t"
+
+ "ff_weighted_vector_sumf_end%=: \n\t"
+
+ : [out] "+r" (out), [in_a] "+r" (in_a), [in_b] "+r" (in_b)
+ : [weight_coeff_a] "f" (weight_coeff_a),
+ [weight_coeff_b] "f" (weight_coeff_b),
+ [length] "r" (length), [a_end]"r"(a_end)
+ : "$f0", "$f1", "$f2", "$f3", "$f4", "$f5"
+ );
+}
+
+void ff_acelp_vectors_init_mips(ACELPVContext *c)
+{
+ c->weighted_vector_sumf = ff_weighted_vector_sumf_mips;
+}
diff --git a/libavcodec/mips/amrwbdec_mips.c b/libavcodec/mips/amrwbdec_mips.c
new file mode 100644
index 0000000..ad08b63
--- /dev/null
+++ b/libavcodec/mips/amrwbdec_mips.c
@@ -0,0 +1,185 @@
+/*
+ * Copyright (c) 2012
+ * MIPS Technologies, Inc., California.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Author: Nedeljko Babic (nbabic@mips.com)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Reference: libavcodec/amrwbdec.c
+ */
+#include "libavutil/avutil.h"
+#include "libavcodec/amrwbdata.h"
+#include "amrwbdec_mips.h"
+
+void hb_fir_filter_mips(float *out, const float fir_coef[HB_FIR_SIZE + 1],
+ float mem[HB_FIR_SIZE], const float *in)
+{
+ int i;
+ float data[AMRWB_SFR_SIZE_16k + HB_FIR_SIZE]; // past and current samples
+
+ memcpy(data, mem, HB_FIR_SIZE * sizeof(float));
+ memcpy(data + HB_FIR_SIZE, in, AMRWB_SFR_SIZE_16k * sizeof(float));
+
+ for (i = 0; i < AMRWB_SFR_SIZE_16k; i++) {
+ float output;
+ float * p_data = (data+i);
+
+ /**
+ * inner loop is entirely unrolled and instructions are scheduled
+ * to minimize pipeline stall
+ */
+ __asm__ __volatile__(
+ "mtc1 $zero, %[output] \n\t"
+ "lwc1 $f0, 0(%[p_data]) \n\t"
+ "lwc1 $f1, 0(%[fir_coef]) \n\t"
+ "lwc1 $f2, 4(%[p_data]) \n\t"
+ "madd.s %[output], %[output], $f0, $f1 \n\t"
+ "lwc1 $f3, 4(%[fir_coef]) \n\t"
+ "lwc1 $f4, 8(%[p_data]) \n\t"
+ "madd.s %[output], %[output], $f2, $f3 \n\t"
+ "lwc1 $f5, 8(%[fir_coef]) \n\t"
+
+ "lwc1 $f0, 12(%[p_data]) \n\t"
+ "lwc1 $f1, 12(%[fir_coef]) \n\t"
+ "madd.s %[output], %[output], $f4, $f5 \n\t"
+ "lwc1 $f2, 16(%[p_data]) \n\t"
+ "madd.s %[output], %[output], $f0, $f1 \n\t"
+ "lwc1 $f3, 16(%[fir_coef]) \n\t"
+ "lwc1 $f4, 20(%[p_data]) \n\t"
+ "lwc1 $f5, 20(%[fir_coef]) \n\t"
+ "madd.s %[output], %[output], $f2, $f3 \n\t"
+
+ "lwc1 $f0, 24(%[p_data]) \n\t"
+ "lwc1 $f1, 24(%[fir_coef]) \n\t"
+ "lwc1 $f2, 28(%[p_data]) \n\t"
+ "madd.s %[output], %[output], $f4, $f5 \n\t"
+ "lwc1 $f3, 28(%[fir_coef]) \n\t"
+ "madd.s %[output], %[output], $f0, $f1 \n\t"
+ "lwc1 $f4, 32(%[p_data]) \n\t"
+ "madd.s %[output], %[output], $f2, $f3 \n\t"
+ "lwc1 $f5, 32(%[fir_coef]) \n\t"
+ "madd.s %[output], %[output], $f4, $f5 \n\t"
+
+ "lwc1 $f0, 36(%[p_data]) \n\t"
+ "lwc1 $f1, 36(%[fir_coef]) \n\t"
+ "lwc1 $f2, 40(%[p_data]) \n\t"
+ "lwc1 $f3, 40(%[fir_coef]) \n\t"
+ "madd.s %[output], %[output], $f0, $f1 \n\t"
+ "lwc1 $f4, 44(%[p_data]) \n\t"
+ "lwc1 $f5, 44(%[fir_coef]) \n\t"
+ "madd.s %[output], %[output], $f2, $f3 \n\t"
+
+ "lwc1 $f0, 48(%[p_data]) \n\t"
+ "lwc1 $f1, 48(%[fir_coef]) \n\t"
+ "lwc1 $f2, 52(%[p_data]) \n\t"
+ "madd.s %[output], %[output], $f4, $f5 \n\t"
+ "lwc1 $f3, 52(%[fir_coef]) \n\t"
+ "lwc1 $f4, 56(%[p_data]) \n\t"
+ "madd.s %[output], %[output], $f0, $f1 \n\t"
+ "lwc1 $f5, 56(%[fir_coef]) \n\t"
+ "madd.s %[output], %[output], $f2, $f3 \n\t"
+
+ "lwc1 $f0, 60(%[p_data]) \n\t"
+ "lwc1 $f1, 60(%[fir_coef]) \n\t"
+ "lwc1 $f2, 64(%[p_data]) \n\t"
+ "madd.s %[output], %[output], $f4, $f5 \n\t"
+ "lwc1 $f3, 64(%[fir_coef]) \n\t"
+ "madd.s %[output], %[output], $f0, $f1 \n\t"
+ "lwc1 $f4, 68(%[p_data]) \n\t"
+ "madd.s %[output], %[output], $f2, $f3 \n\t"
+ "lwc1 $f5, 68(%[fir_coef]) \n\t"
+ "madd.s %[output], %[output], $f4, $f5 \n\t"
+
+ "lwc1 $f0, 72(%[p_data]) \n\t"
+ "lwc1 $f1, 72(%[fir_coef]) \n\t"
+ "lwc1 $f2, 76(%[p_data]) \n\t"
+ "lwc1 $f3, 76(%[fir_coef]) \n\t"
+ "madd.s %[output], %[output], $f0, $f1 \n\t"
+ "lwc1 $f4, 80(%[p_data]) \n\t"
+ "lwc1 $f5, 80(%[fir_coef]) \n\t"
+ "madd.s %[output], %[output], $f2, $f3 \n\t"
+
+ "lwc1 $f0, 84(%[p_data]) \n\t"
+ "lwc1 $f1, 84(%[fir_coef]) \n\t"
+ "lwc1 $f2, 88(%[p_data]) \n\t"
+ "madd.s %[output], %[output], $f4, $f5 \n\t"
+ "lwc1 $f3, 88(%[fir_coef]) \n\t"
+ "lwc1 $f4, 92(%[p_data]) \n\t"
+ "madd.s %[output], %[output], $f0, $f1 \n\t"
+ "lwc1 $f5, 92(%[fir_coef]) \n\t"
+ "madd.s %[output], %[output], $f2, $f3 \n\t"
+
+ "lwc1 $f0, 96(%[p_data]) \n\t"
+ "lwc1 $f1, 96(%[fir_coef]) \n\t"
+ "lwc1 $f2, 100(%[p_data]) \n\t"
+ "madd.s %[output], %[output], $f4, $f5 \n\t"
+ "lwc1 $f3, 100(%[fir_coef]) \n\t"
+ "lwc1 $f4, 104(%[p_data]) \n\t"
+ "madd.s %[output], %[output], $f0, $f1 \n\t"
+ "lwc1 $f5, 104(%[fir_coef]) \n\t"
+ "madd.s %[output], %[output], $f2, $f3 \n\t"
+
+ "lwc1 $f0, 108(%[p_data]) \n\t"
+ "lwc1 $f1, 108(%[fir_coef]) \n\t"
+ "madd.s %[output], %[output], $f4, $f5 \n\t"
+ "lwc1 $f2, 112(%[p_data]) \n\t"
+ "lwc1 $f3, 112(%[fir_coef]) \n\t"
+ "madd.s %[output], %[output], $f0, $f1 \n\t"
+ "lwc1 $f4, 116(%[p_data]) \n\t"
+ "lwc1 $f5, 116(%[fir_coef]) \n\t"
+ "lwc1 $f0, 120(%[p_data]) \n\t"
+ "madd.s %[output], %[output], $f2, $f3 \n\t"
+ "lwc1 $f1, 120(%[fir_coef]) \n\t"
+ "madd.s %[output], %[output], $f4, $f5 \n\t"
+ "madd.s %[output], %[output], $f0, $f1 \n\t"
+
+ : [output]"=&f"(output)
+ : [fir_coef]"r"(fir_coef), [p_data]"r"(p_data)
+ : "$f0", "$f1", "$f2", "$f3", "$f4", "$f5"
+ );
+ out[i] = output;
+ }
+ memcpy(mem, data + AMRWB_SFR_SIZE_16k, HB_FIR_SIZE * sizeof(float));
+}
diff --git a/libavcodec/mips/amrwbdec_mips.h b/libavcodec/mips/amrwbdec_mips.h
new file mode 100644
index 0000000..a469918
--- /dev/null
+++ b/libavcodec/mips/amrwbdec_mips.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2012
+ * MIPS Technologies, Inc., California.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Author: Nedeljko Babic (nbabic@mips.com)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Reference: libavcodec/amrwbdec.c
+ */
+#ifndef AVCODEC_AMRWBDEC_MIPS_H
+#define AVCODEC_AMRWBDEC_MIPS_H
+#include "config.h"
+
+#if HAVE_MIPSFPU && HAVE_INLINE_ASM
+void hb_fir_filter_mips(float *out, const float fir_coef[],
+ float mem[], const float *in);
+#define hb_fir_filter hb_fir_filter_mips
+#endif
+
+#endif /* AVCODEC_AMRWBDEC_MIPS_H */
diff --git a/libavcodec/mips/celp_filters_mips.c b/libavcodec/mips/celp_filters_mips.c
new file mode 100644
index 0000000..a31b81d
--- /dev/null
+++ b/libavcodec/mips/celp_filters_mips.c
@@ -0,0 +1,281 @@
+/*
+ * Copyright (c) 2012
+ * MIPS Technologies, Inc., California.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Author: Nedeljko Babic (nbabic@mips.com)
+ *
+ * various filters for CELP-based codecs optimized for MIPS
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Reference: libavcodec/celp_filters.c
+ */
+#include "libavutil/attributes.h"
+#include "libavutil/common.h"
+#include "libavcodec/celp_filters.h"
+
+static void ff_celp_lp_synthesis_filterf_mips(float *out,
+ const float *filter_coeffs,
+ const float* in, int buffer_length,
+ int filter_length)
+{
+ int i,n;
+
+ float out0, out1, out2, out3;
+ float old_out0, old_out1, old_out2, old_out3;
+ float a,b,c;
+ const float *p_filter_coeffs;
+ float *p_out;
+
+ a = filter_coeffs[0];
+ b = filter_coeffs[1];
+ c = filter_coeffs[2];
+ b -= filter_coeffs[0] * filter_coeffs[0];
+ c -= filter_coeffs[1] * filter_coeffs[0];
+ c -= filter_coeffs[0] * b;
+
+ old_out0 = out[-4];
+ old_out1 = out[-3];
+ old_out2 = out[-2];
+ old_out3 = out[-1];
+ for (n = 0; n <= buffer_length - 4; n+=4) {
+ p_filter_coeffs = filter_coeffs;
+ p_out = out;
+
+ out0 = in[0];
+ out1 = in[1];
+ out2 = in[2];
+ out3 = in[3];
+
+ __asm__ __volatile__(
+ "lwc1 $f2, 8(%[filter_coeffs]) \n\t"
+ "lwc1 $f1, 4(%[filter_coeffs]) \n\t"
+ "lwc1 $f0, 0(%[filter_coeffs]) \n\t"
+ "nmsub.s %[out0], %[out0], $f2, %[old_out1] \n\t"
+ "nmsub.s %[out1], %[out1], $f2, %[old_out2] \n\t"
+ "nmsub.s %[out2], %[out2], $f2, %[old_out3] \n\t"
+ "lwc1 $f3, 12(%[filter_coeffs]) \n\t"
+ "nmsub.s %[out0], %[out0], $f1, %[old_out2] \n\t"
+ "nmsub.s %[out1], %[out1], $f1, %[old_out3] \n\t"
+ "nmsub.s %[out2], %[out2], $f3, %[old_out2] \n\t"
+ "nmsub.s %[out0], %[out0], $f0, %[old_out3] \n\t"
+ "nmsub.s %[out3], %[out3], $f3, %[old_out3] \n\t"
+ "nmsub.s %[out1], %[out1], $f3, %[old_out1] \n\t"
+ "nmsub.s %[out0], %[out0], $f3, %[old_out0] \n\t"
+
+ : [out0]"+f"(out0), [out1]"+f"(out1),
+ [out2]"+f"(out2), [out3]"+f"(out3)
+ : [old_out0]"f"(old_out0), [old_out1]"f"(old_out1),
+ [old_out2]"f"(old_out2), [old_out3]"f"(old_out3),
+ [filter_coeffs]"r"(filter_coeffs)
+ : "$f0", "$f1", "$f2", "$f3", "$f4"
+ );
+
+ for (i = 5; i <= filter_length; i += 2) {
+ __asm__ __volatile__(
+ "lwc1 %[old_out3], -20(%[p_out]) \n\t"
+ "lwc1 $f5, 16(%[p_filter_coeffs]) \n\t"
+ "addiu %[p_out], -8 \n\t"
+ "addiu %[p_filter_coeffs], 8 \n\t"
+ "nmsub.s %[out1], %[out1], $f5, %[old_out0] \n\t"
+ "nmsub.s %[out3], %[out3], $f5, %[old_out2] \n\t"
+ "lwc1 $f4, 12(%[p_filter_coeffs]) \n\t"
+ "lwc1 %[old_out2], -16(%[p_out]) \n\t"
+ "nmsub.s %[out0], %[out0], $f5, %[old_out3] \n\t"
+ "nmsub.s %[out2], %[out2], $f5, %[old_out1] \n\t"
+ "nmsub.s %[out1], %[out1], $f4, %[old_out3] \n\t"
+ "nmsub.s %[out3], %[out3], $f4, %[old_out1] \n\t"
+ "mov.s %[old_out1], %[old_out3] \n\t"
+ "nmsub.s %[out0], %[out0], $f4, %[old_out2] \n\t"
+ "nmsub.s %[out2], %[out2], $f4, %[old_out0] \n\t"
+
+ : [out0]"+f"(out0), [out1]"+f"(out1),
+ [out2]"+f"(out2), [out3]"+f"(out3), [old_out0]"+f"(old_out0),
+ [old_out1]"+f"(old_out1), [old_out2]"+f"(old_out2),
+ [old_out3]"+f"(old_out3),[p_filter_coeffs]"+r"(p_filter_coeffs),
+ [p_out]"+r"(p_out)
+ :
+ : "$f4", "$f5"
+ );
+ FFSWAP(float, old_out0, old_out2);
+ }
+
+ __asm__ __volatile__(
+ "nmsub.s %[out3], %[out3], %[a], %[out2] \n\t"
+ "nmsub.s %[out2], %[out2], %[a], %[out1] \n\t"
+ "nmsub.s %[out3], %[out3], %[b], %[out1] \n\t"
+ "nmsub.s %[out1], %[out1], %[a], %[out0] \n\t"
+ "nmsub.s %[out2], %[out2], %[b], %[out0] \n\t"
+ "nmsub.s %[out3], %[out3], %[c], %[out0] \n\t"
+
+ : [out0]"+f"(out0), [out1]"+f"(out1),
+ [out2]"+f"(out2), [out3]"+f"(out3)
+ : [a]"f"(a), [b]"f"(b), [c]"f"(c)
+ );
+
+ out[0] = out0;
+ out[1] = out1;
+ out[2] = out2;
+ out[3] = out3;
+
+ old_out0 = out0;
+ old_out1 = out1;
+ old_out2 = out2;
+ old_out3 = out3;
+
+ out += 4;
+ in += 4;
+ }
+
+ out -= n;
+ in -= n;
+ for (; n < buffer_length; n++) {
+ float out_val, out_val_i, fc_val;
+ p_filter_coeffs = filter_coeffs;
+ p_out = &out[n];
+ out_val = in[n];
+ for (i = 1; i <= filter_length; i++) {
+ __asm__ __volatile__(
+ "lwc1 %[fc_val], 0(%[p_filter_coeffs]) \n\t"
+ "lwc1 %[out_val_i], -4(%[p_out]) \n\t"
+ "addiu %[p_filter_coeffs], 4 \n\t"
+ "addiu %[p_out], -4 \n\t"
+ "nmsub.s %[out_val], %[out_val], %[fc_val], %[out_val_i] \n\t"
+
+ : [fc_val]"=&f"(fc_val), [out_val]"+f"(out_val),
+ [out_val_i]"=&f"(out_val_i), [p_out]"+r"(p_out),
+ [p_filter_coeffs]"+r"(p_filter_coeffs)
+ );
+ }
+ out[n] = out_val;
+ }
+}
+
+static void ff_celp_lp_zero_synthesis_filterf_mips(float *out,
+ const float *filter_coeffs,
+ const float *in, int buffer_length,
+ int filter_length)
+{
+ int i,n;
+ float sum_out8, sum_out7, sum_out6, sum_out5, sum_out4, fc_val;
+ float sum_out3, sum_out2, sum_out1;
+ const float *p_filter_coeffs, *p_in;
+
+ for (n = 0; n < buffer_length; n+=8) {
+ p_in = &in[n];
+ p_filter_coeffs = filter_coeffs;
+ sum_out8 = in[n+7];
+ sum_out7 = in[n+6];
+ sum_out6 = in[n+5];
+ sum_out5 = in[n+4];
+ sum_out4 = in[n+3];
+ sum_out3 = in[n+2];
+ sum_out2 = in[n+1];
+ sum_out1 = in[n];
+ i = filter_length;
+
+ /* i is always greater than 0
+ * outer loop is unrolled eight times so there is less memory access
+ * inner loop is unrolled two times
+ */
+ __asm__ __volatile__(
+ "filt_lp_inner%=: \n\t"
+ "lwc1 %[fc_val], 0(%[p_filter_coeffs]) \n\t"
+ "lwc1 $f7, 6*4(%[p_in]) \n\t"
+ "lwc1 $f6, 5*4(%[p_in]) \n\t"
+ "lwc1 $f5, 4*4(%[p_in]) \n\t"
+ "lwc1 $f4, 3*4(%[p_in]) \n\t"
+ "lwc1 $f3, 2*4(%[p_in]) \n\t"
+ "lwc1 $f2, 4(%[p_in]) \n\t"
+ "lwc1 $f1, 0(%[p_in]) \n\t"
+ "lwc1 $f0, -4(%[p_in]) \n\t"
+ "addiu %[i], -2 \n\t"
+ "madd.s %[sum_out8], %[sum_out8], %[fc_val], $f7 \n\t"
+ "madd.s %[sum_out7], %[sum_out7], %[fc_val], $f6 \n\t"
+ "madd.s %[sum_out6], %[sum_out6], %[fc_val], $f5 \n\t"
+ "madd.s %[sum_out5], %[sum_out5], %[fc_val], $f4 \n\t"
+ "madd.s %[sum_out4], %[sum_out4], %[fc_val], $f3 \n\t"
+ "madd.s %[sum_out3], %[sum_out3], %[fc_val], $f2 \n\t"
+ "madd.s %[sum_out2], %[sum_out2], %[fc_val], $f1 \n\t"
+ "madd.s %[sum_out1], %[sum_out1], %[fc_val], $f0 \n\t"
+ "lwc1 %[fc_val], 4(%[p_filter_coeffs]) \n\t"
+ "lwc1 $f7, -8(%[p_in]) \n\t"
+ "addiu %[p_filter_coeffs], 8 \n\t"
+ "addiu %[p_in], -8 \n\t"
+ "madd.s %[sum_out8], %[sum_out8], %[fc_val], $f6 \n\t"
+ "madd.s %[sum_out7], %[sum_out7], %[fc_val], $f5 \n\t"
+ "madd.s %[sum_out6], %[sum_out6], %[fc_val], $f4 \n\t"
+ "madd.s %[sum_out5], %[sum_out5], %[fc_val], $f3 \n\t"
+ "madd.s %[sum_out4], %[sum_out4], %[fc_val], $f2 \n\t"
+ "madd.s %[sum_out3], %[sum_out3], %[fc_val], $f1 \n\t"
+ "madd.s %[sum_out2], %[sum_out2], %[fc_val], $f0 \n\t"
+ "madd.s %[sum_out1], %[sum_out1], %[fc_val], $f7 \n\t"
+ "bgtz %[i], filt_lp_inner%= \n\t"
+
+ : [sum_out8]"+f"(sum_out8), [sum_out7]"+f"(sum_out7),
+ [sum_out6]"+f"(sum_out6), [sum_out5]"+f"(sum_out5),
+ [sum_out4]"+f"(sum_out4), [sum_out3]"+f"(sum_out3),
+ [sum_out2]"+f"(sum_out2), [sum_out1]"+f"(sum_out1),
+ [fc_val]"=&f"(fc_val), [p_filter_coeffs]"+r"(p_filter_coeffs),
+ [p_in]"+r"(p_in), [i]"+r"(i)
+ :
+ : "$f0", "$f1", "$f2", "$f3", "$f4", "$f5", "$f6", "$f7"
+ );
+
+ out[n+7] = sum_out8;
+ out[n+6] = sum_out7;
+ out[n+5] = sum_out6;
+ out[n+4] = sum_out5;
+ out[n+3] = sum_out4;
+ out[n+2] = sum_out3;
+ out[n+1] = sum_out2;
+ out[n] = sum_out1;
+ }
+}
+
+void ff_celp_filter_init_mips(CELPFContext *c)
+{
+ c->celp_lp_synthesis_filterf = ff_celp_lp_synthesis_filterf_mips;
+ c->celp_lp_zero_synthesis_filterf = ff_celp_lp_zero_synthesis_filterf_mips;
+}
diff --git a/libavcodec/mips/celp_math_mips.c b/libavcodec/mips/celp_math_mips.c
new file mode 100644
index 0000000..0af4171
--- /dev/null
+++ b/libavcodec/mips/celp_math_mips.c
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2012
+ * MIPS Technologies, Inc., California.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Author: Nedeljko Babic (nbabic@mips.com)
+ *
+ * Math operations optimized for MIPS
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Reference: libavcodec/celp_math.c
+ */
+#include "libavcodec/celp_math.h"
+
+static float ff_dot_productf_mips(const float* a, const float* b,
+ int length)
+{
+ float sum;
+ const float* a_end = a + length;
+
+ __asm__ __volatile__ (
+ "mtc1 $zero, %[sum] \n\t"
+ "blez %[length], ff_dot_productf_end%= \n\t"
+ "ff_dot_productf_madd%=: \n\t"
+ "lwc1 $f2, 0(%[a]) \n\t"
+ "lwc1 $f1, 0(%[b]) \n\t"
+ "addiu %[a], %[a], 4 \n\t"
+ "addiu %[b], %[b], 4 \n\t"
+ "madd.s %[sum], %[sum], $f1, $f2 \n\t"
+ "bne %[a], %[a_end], ff_dot_productf_madd%= \n\t"
+ "ff_dot_productf_end%=: \n\t"
+
+ : [sum] "=&f" (sum), [a] "+r" (a), [b] "+r" (b)
+ : [a_end]"r"(a_end), [length] "r" (length)
+ : "$f1", "$f2"
+ );
+ return sum;
+}
+
+void ff_celp_math_init_mips(CELPMContext *c)
+{
+ c->dot_productf = ff_dot_productf_mips;
+}
diff --git a/libavcodec/mips/lsp_mips.h b/libavcodec/mips/lsp_mips.h
new file mode 100644
index 0000000..f875392
--- /dev/null
+++ b/libavcodec/mips/lsp_mips.h
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2012
+ * MIPS Technologies, Inc., California.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Author: Nedeljko Babic (nbabic@mips.com)
+ *
+ * LSP routines for ACELP-based codecs optimized for MIPS
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Reference: libavcodec/lsp.c
+ */
+#ifndef AVCODEC_LSP_MIPS_H
+#define AVCODEC_LSP_MIPS_H
+
+#if HAVE_MIPSFPU && HAVE_INLINE_ASM
+static av_always_inline void ff_lsp2polyf_mips(const double *lsp, double *f, int lp_half_order)
+{
+ int i, j = 0;
+ double * p_fi = f;
+ double * p_f = 0;
+
+ f[0] = 1.0;
+ f[1] = -2 * lsp[0];
+ lsp -= 2;
+
+ for(i=2; i<=lp_half_order; i++)
+ {
+ double tmp, f_j_2, f_j_1, f_j;
+ double val = lsp[2*i];
+
+ __asm__ __volatile__(
+ "move %[p_f], %[p_fi] \n\t"
+ "add.d %[val], %[val], %[val] \n\t"
+ "addiu %[p_fi], 8 \n\t"
+ "ldc1 %[f_j_1], 0(%[p_f]) \n\t"
+ "ldc1 %[f_j], 8(%[p_f]) \n\t"
+ "neg.d %[val], %[val] \n\t"
+ "add.d %[tmp], %[f_j_1], %[f_j_1] \n\t"
+ "madd.d %[tmp], %[tmp], %[f_j], %[val] \n\t"
+ "addiu %[j], %[i], -2 \n\t"
+ "ldc1 %[f_j_2], -8(%[p_f]) \n\t"
+ "sdc1 %[tmp], 16(%[p_f]) \n\t"
+ "beqz %[j], ff_lsp2polyf_lp_j_end%= \n\t"
+ "ff_lsp2polyf_lp_j%=: \n\t"
+ "add.d %[tmp], %[f_j], %[f_j_2] \n\t"
+ "madd.d %[tmp], %[tmp], %[f_j_1], %[val] \n\t"
+ "mov.d %[f_j], %[f_j_1] \n\t"
+ "addiu %[j], -1 \n\t"
+ "mov.d %[f_j_1], %[f_j_2] \n\t"
+ "ldc1 %[f_j_2], -16(%[p_f]) \n\t"
+ "sdc1 %[tmp], 8(%[p_f]) \n\t"
+ "addiu %[p_f], -8 \n\t"
+ "bgtz %[j], ff_lsp2polyf_lp_j%= \n\t"
+ "ff_lsp2polyf_lp_j_end%=: \n\t"
+
+ : [f_j_2]"=&f"(f_j_2), [f_j_1]"=&f"(f_j_1), [val]"+f"(val),
+ [tmp]"=&f"(tmp), [f_j]"=&f"(f_j), [p_f]"+r"(p_f),
+ [j]"+r"(j), [p_fi]"+r"(p_fi)
+ : [i]"r"(i)
+ );
+ f[1] += val;
+ }
+}
+#define ff_lsp2polyf ff_lsp2polyf_mips
+#endif /* HAVE_MIPSFPU && HAVE_INLINE_ASM */
+#endif /* AVCODEC_LSP_MIPS_H */
diff --git a/libavutil/libm.h b/libavutil/libm.h
index 62faea4..57eb0c0 100644
--- a/libavutil/libm.h
+++ b/libavutil/libm.h
@@ -28,6 +28,10 @@
#include "config.h"
#include "attributes.h"
+#if HAVE_MIPSFPU && HAVE_INLINE_ASM
+#include "libavutil/mips/libm_mips.h"
+#endif /* HAVE_MIPSFPU && HAVE_INLINE_ASM*/
+
#if !HAVE_CBRTF
#undef cbrtf
#define cbrtf(x) powf(x, 1.0/3.0)
diff --git a/libavutil/mips/libm_mips.h b/libavutil/mips/libm_mips.h
new file mode 100644
index 0000000..9cc87b7
--- /dev/null
+++ b/libavutil/mips/libm_mips.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2012
+ * MIPS Technologies, Inc., California.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Author: Nedeljko Babic (nbabic@mips.com)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * MIPS optimization for some libm functions
+ */
+
+#ifndef AVUTIL_LIBM_MIPS_H
+#define AVUTIL_LIBM_MIPS_H
+
+static av_always_inline av_const long int lrintf_mips(float x)
+{
+ register int ret_int;
+
+ __asm__ __volatile__ (
+ "cvt.w.s %[x], %[x] \n\t"
+ "mfc1 %[ret_int], %[x] \n\t"
+
+ :[x]"+f"(x), [ret_int]"=r"(ret_int)
+ );
+ return ret_int;
+}
+
+#undef lrintf
+#define lrintf(x) lrintf_mips(x)
+
+#define HAVE_LRINTF 1
+#endif /* AVUTIL_LIBM_MIPS_H */
OpenPOWER on IntegriCloud