summaryrefslogtreecommitdiffstats
path: root/libavcodec/h264idct_template.c
diff options
context:
space:
mode:
Diffstat (limited to 'libavcodec/h264idct_template.c')
-rw-r--r--libavcodec/h264idct_template.c103
1 files changed, 86 insertions, 17 deletions
diff --git a/libavcodec/h264idct_template.c b/libavcodec/h264idct_template.c
index ba55715..64bc70d 100644
--- a/libavcodec/h264idct_template.c
+++ b/libavcodec/h264idct_template.c
@@ -2,20 +2,20 @@
* H.264 IDCT
* Copyright (c) 2004-2011 Michael Niedermayer <michaelni@gmx.at>
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
@@ -52,7 +52,7 @@ void FUNCC(ff_h264_idct_add)(uint8_t *_dst, DCTELEM *_block, int stride)
INIT_CLIP
pixel *dst = (pixel*)_dst;
dctcoef *block = (dctcoef*)_block;
- stride /= sizeof(pixel);
+ stride >>= sizeof(pixel)-1;
block[0] += 1 << 5;
@@ -86,7 +86,7 @@ void FUNCC(ff_h264_idct8_add)(uint8_t *_dst, DCTELEM *_block, int stride){
INIT_CLIP
pixel *dst = (pixel*)_dst;
dctcoef *block = (dctcoef*)_block;
- stride /= sizeof(pixel);
+ stride >>= sizeof(pixel)-1;
block[0] += 32;
@@ -155,12 +155,12 @@ void FUNCC(ff_h264_idct8_add)(uint8_t *_dst, DCTELEM *_block, int stride){
}
// assumes all AC coefs are 0
-void FUNCC(ff_h264_idct_dc_add)(uint8_t *_dst, DCTELEM *block, int stride){
+void FUNCC(ff_h264_idct_dc_add)(uint8_t *p_dst, DCTELEM *block, int stride){
int i, j;
int dc = (((dctcoef*)block)[0] + 32) >> 6;
INIT_CLIP
- pixel *dst = (pixel*)_dst;
- stride /= sizeof(pixel);
+ pixel *dst = (pixel*)p_dst;
+ stride >>= sizeof(pixel)-1;
for( j = 0; j < 4; j++ )
{
for( i = 0; i < 4; i++ )
@@ -169,12 +169,12 @@ void FUNCC(ff_h264_idct_dc_add)(uint8_t *_dst, DCTELEM *block, int stride){
}
}
-void FUNCC(ff_h264_idct8_dc_add)(uint8_t *_dst, DCTELEM *block, int stride){
+void FUNCC(ff_h264_idct8_dc_add)(uint8_t *p_dst, DCTELEM *block, int stride){
int i, j;
int dc = (((dctcoef*)block)[0] + 32) >> 6;
INIT_CLIP
- pixel *dst = (pixel*)_dst;
- stride /= sizeof(pixel);
+ pixel *dst = (pixel*)p_dst;
+ stride >>= sizeof(pixel)-1;
for( j = 0; j < 8; j++ )
{
for( i = 0; i < 8; i++ )
@@ -224,17 +224,50 @@ void FUNCC(ff_h264_idct_add8)(uint8_t **dest, const int *block_offset, DCTELEM *
}
}
}
+
+void FUNCC(ff_h264_idct_add8_422)(uint8_t **dest, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[15*8]){
+ int i, j;
+
+#if 0
+ av_log(NULL, AV_LOG_INFO, "idct\n");
+ int32_t *b = block;
+ for (int i = 0; i < 256; i++) {
+ av_log(NULL, AV_LOG_INFO, "%5d ", b[i+256]);
+ if (!((i+1) % 16))
+ av_log(NULL, AV_LOG_INFO, "\n");
+ }
+#endif
+
+ for(j=1; j<3; j++){
+ for(i=j*16; i<j*16+4; i++){
+ if(nnzc[ scan8[i] ])
+ FUNCC(ff_h264_idct_add )(dest[j-1] + block_offset[i], block + i*16*sizeof(pixel), stride);
+ else if(((dctcoef*)block)[i*16])
+ FUNCC(ff_h264_idct_dc_add)(dest[j-1] + block_offset[i], block + i*16*sizeof(pixel), stride);
+ }
+ }
+
+ for(j=1; j<3; j++){
+ for(i=j*16+4; i<j*16+8; i++){
+ if(nnzc[ scan8[i+4] ])
+ FUNCC(ff_h264_idct_add )(dest[j-1] + block_offset[i+4], block + i*16*sizeof(pixel), stride);
+ else if(((dctcoef*)block)[i*16])
+ FUNCC(ff_h264_idct_dc_add)(dest[j-1] + block_offset[i+4], block + i*16*sizeof(pixel), stride);
+ }
+ }
+}
+
/**
* IDCT transforms the 16 dc values and dequantizes them.
* @param qmul quantization parameter
*/
-void FUNCC(ff_h264_luma_dc_dequant_idct)(DCTELEM *_output, DCTELEM *_input, int qmul){
+void FUNCC(ff_h264_luma_dc_dequant_idct)(DCTELEM *p_output, DCTELEM *p_input, int qmul){
#define stride 16
int i;
int temp[16];
static const uint8_t x_offset[4]={0, 2*stride, 8*stride, 10*stride};
- dctcoef *input = (dctcoef*)_input;
- dctcoef *output = (dctcoef*)_output;
+ dctcoef *input = (dctcoef*)p_input;
+ dctcoef *output = (dctcoef*)p_output;
for(i=0; i<4; i++){
const int z0= input[4*i+0] + input[4*i+1];
@@ -263,11 +296,47 @@ void FUNCC(ff_h264_luma_dc_dequant_idct)(DCTELEM *_output, DCTELEM *_input, int
#undef stride
}
-void FUNCC(ff_h264_chroma_dc_dequant_idct)(DCTELEM *_block, int qmul){
+void FUNCC(ff_h264_chroma422_dc_dequant_idct)(DCTELEM *p_block, int qmul){
+ const int stride= 16*2;
+ const int xStride= 16;
+ int i;
+ int temp[8];
+ static const uint8_t x_offset[2]={0, 16};
+ dctcoef *block = (dctcoef*)p_block;
+
+ for(i=0; i<4; i++){
+ temp[2*i+0] = block[stride*i + xStride*0] + block[stride*i + xStride*1];
+ temp[2*i+1] = block[stride*i + xStride*0] - block[stride*i + xStride*1];
+ }
+
+ for(i=0; i<2; i++){
+ const int offset= x_offset[i];
+ const int z0= temp[2*0+i] + temp[2*2+i];
+ const int z1= temp[2*0+i] - temp[2*2+i];
+ const int z2= temp[2*1+i] - temp[2*3+i];
+ const int z3= temp[2*1+i] + temp[2*3+i];
+
+ block[stride*0+offset]= ((z0 + z3)*qmul + 128) >> 8;
+ block[stride*1+offset]= ((z1 + z2)*qmul + 128) >> 8;
+ block[stride*2+offset]= ((z1 - z2)*qmul + 128) >> 8;
+ block[stride*3+offset]= ((z0 - z3)*qmul + 128) >> 8;
+ }
+
+#if 0
+ av_log(NULL, AV_LOG_INFO, "after chroma dc\n");
+ for (int i = 0; i < 256; i++) {
+ av_log(NULL, AV_LOG_INFO, "%5d ", block[i]);
+ if (!((i+1) % 16))
+ av_log(NULL, AV_LOG_INFO, "\n");
+ }
+#endif
+}
+
+void FUNCC(ff_h264_chroma_dc_dequant_idct)(DCTELEM *p_block, int qmul){
const int stride= 16*2;
const int xStride= 16;
int a,b,c,d,e;
- dctcoef *block = (dctcoef*)_block;
+ dctcoef *block = (dctcoef*)p_block;
a= block[stride*0 + xStride*0];
b= block[stride*0 + xStride*1];
OpenPOWER on IntegriCloud