diff options
author | Ronald S. Bultje <rsbultje@gmail.com> | 2013-03-10 16:40:23 -0700 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2013-03-13 04:06:10 +0100 |
commit | e9e608ad5804d483de23e43b98d7af4d1b51e2e4 (patch) | |
tree | 3f8aeb445180e4ff1402aa3cb741e33ba08d4e89 /libavcodec/bfin | |
parent | de99545f4641d7cb9d7d539bae376413b8a88a8d (diff) | |
download | ffmpeg-streaming-e9e608ad5804d483de23e43b98d7af4d1b51e2e4.zip ffmpeg-streaming-e9e608ad5804d483de23e43b98d7af4d1b51e2e4.tar.gz |
Move bfin half-pel assembly from dsputil to hpeldsp.
Diffstat (limited to 'libavcodec/bfin')
-rw-r--r-- | libavcodec/bfin/Makefile | 2 | ||||
-rw-r--r-- | libavcodec/bfin/dsputil_bfin.c | 95 | ||||
-rw-r--r-- | libavcodec/bfin/dsputil_bfin.h | 11 | ||||
-rw-r--r-- | libavcodec/bfin/hpel_pixels_bfin.S | 379 | ||||
-rw-r--r-- | libavcodec/bfin/hpeldsp_bfin.c | 119 | ||||
-rw-r--r-- | libavcodec/bfin/hpeldsp_bfin.h | 50 | ||||
-rw-r--r-- | libavcodec/bfin/pixels_bfin.S | 360 |
7 files changed, 551 insertions, 465 deletions
diff --git a/libavcodec/bfin/Makefile b/libavcodec/bfin/Makefile index d1b41bc..f7a5f42 100644 --- a/libavcodec/bfin/Makefile +++ b/libavcodec/bfin/Makefile @@ -3,6 +3,8 @@ OBJS += bfin/dsputil_bfin.o \ bfin/idct_bfin.o \ bfin/pixels_bfin.o \ +OBJS-$(CONFIG_HPELDS) += bfin/hpeldsp_bfin.o \ + bfin/hpel_pixels_bfin.o OBJS-$(CONFIG_MPEGVIDEOENC) += bfin/mpegvideo_bfin.o OBJS-$(CONFIG_VP3DSP) += bfin/vp3_bfin.o \ bfin/vp3_idct_bfin.o diff --git a/libavcodec/bfin/dsputil_bfin.c b/libavcodec/bfin/dsputil_bfin.c index ce04753..c9a0f15 100644 --- a/libavcodec/bfin/dsputil_bfin.c +++ b/libavcodec/bfin/dsputil_bfin.c @@ -54,79 +54,6 @@ static void bfin_clear_blocks (int16_t *blocks) ::"a" (blocks):"P0","I0","R0"); } - - -static void bfin_put_pixels8 (uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) -{ - ff_bfin_put_pixels8uc (block, pixels, pixels, line_size, line_size, h); -} - -static void bfin_put_pixels8_x2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) -{ - ff_bfin_put_pixels8uc (block, pixels, pixels+1, line_size, line_size, h); -} - -static void bfin_put_pixels8_y2 (uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) -{ - ff_bfin_put_pixels8uc (block, pixels, pixels+line_size, line_size, line_size, h); -} - -static void bfin_put_pixels8_xy2 (uint8_t *block, const uint8_t *s0, ptrdiff_t line_size, int h) -{ - ff_bfin_z_put_pixels8_xy2 (block,s0,line_size, line_size, h); -} - -static void bfin_put_pixels16 (uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) -{ - ff_bfin_put_pixels16uc (block, pixels, pixels, line_size, line_size, h); -} - -static void bfin_put_pixels16_x2 (uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) -{ - ff_bfin_put_pixels16uc (block, pixels, pixels+1, line_size, line_size, h); -} - -static void bfin_put_pixels16_y2 (uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) -{ - ff_bfin_put_pixels16uc (block, pixels, pixels+line_size, line_size, line_size, h); -} - -static void bfin_put_pixels16_xy2 (uint8_t *block, const uint8_t *s0, ptrdiff_t line_size, int h) -{ - ff_bfin_z_put_pixels16_xy2 (block,s0,line_size, line_size, h); -} - -static void bfin_put_pixels8_nornd (uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) -{ - ff_bfin_put_pixels8uc_nornd (block, pixels, pixels, line_size, h); -} - -static void bfin_put_pixels8_x2_nornd (uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) -{ - ff_bfin_put_pixels8uc_nornd (block, pixels, pixels+1, line_size, h); -} - -static void bfin_put_pixels8_y2_nornd (uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) -{ - ff_bfin_put_pixels8uc_nornd (block, pixels, pixels+line_size, line_size, h); -} - - -static void bfin_put_pixels16_nornd (uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) -{ - ff_bfin_put_pixels16uc_nornd (block, pixels, pixels, line_size, h); -} - -static void bfin_put_pixels16_x2_nornd (uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) -{ - ff_bfin_put_pixels16uc_nornd (block, pixels, pixels+1, line_size, h); -} - -static void bfin_put_pixels16_y2_nornd (uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) -{ - ff_bfin_put_pixels16uc_nornd (block, pixels, pixels+line_size, line_size, h); -} - static int bfin_pix_abs16 (void *c, uint8_t *blk1, uint8_t *blk2, int line_size, int h) { return ff_bfin_z_sad16x16 (blk1,blk2,line_size,line_size,h); @@ -232,28 +159,6 @@ av_cold void ff_dsputil_init_bfin(DSPContext *c, AVCodecContext *avctx) c->sse[1] = ff_bfin_sse8; c->sse[2] = ff_bfin_sse4; - if (!high_bit_depth) { - c->put_pixels_tab[0][0] = bfin_put_pixels16; - c->put_pixels_tab[0][1] = bfin_put_pixels16_x2; - c->put_pixels_tab[0][2] = bfin_put_pixels16_y2; - c->put_pixels_tab[0][3] = bfin_put_pixels16_xy2; - - c->put_pixels_tab[1][0] = bfin_put_pixels8; - c->put_pixels_tab[1][1] = bfin_put_pixels8_x2; - c->put_pixels_tab[1][2] = bfin_put_pixels8_y2; - c->put_pixels_tab[1][3] = bfin_put_pixels8_xy2; - - c->put_no_rnd_pixels_tab[1][0] = bfin_put_pixels8_nornd; - c->put_no_rnd_pixels_tab[1][1] = bfin_put_pixels8_x2_nornd; - c->put_no_rnd_pixels_tab[1][2] = bfin_put_pixels8_y2_nornd; -/* c->put_no_rnd_pixels_tab[1][3] = ff_bfin_put_pixels8_xy2_nornd; */ - - c->put_no_rnd_pixels_tab[0][0] = bfin_put_pixels16_nornd; - c->put_no_rnd_pixels_tab[0][1] = bfin_put_pixels16_x2_nornd; - c->put_no_rnd_pixels_tab[0][2] = bfin_put_pixels16_y2_nornd; -/* c->put_no_rnd_pixels_tab[0][3] = ff_bfin_put_pixels16_xy2_nornd; */ - } - if (avctx->bits_per_raw_sample <= 8) { if (avctx->dct_algo == FF_DCT_AUTO) c->fdct = ff_bfin_fdct; diff --git a/libavcodec/bfin/dsputil_bfin.h b/libavcodec/bfin/dsputil_bfin.h index 2930923..079ebdd 100644 --- a/libavcodec/bfin/dsputil_bfin.h +++ b/libavcodec/bfin/dsputil_bfin.h @@ -46,19 +46,8 @@ int ff_bfin_pix_norm1 (uint8_t * pix, int line_size) attribute_l1_text; int ff_bfin_z_sad8x8 (uint8_t *blk1, uint8_t *blk2, int dsz, int line_size, int h) attribute_l1_text; int ff_bfin_z_sad16x16 (uint8_t *blk1, uint8_t *blk2, int dsz, int line_size, int h) attribute_l1_text; -void ff_bfin_z_put_pixels16_xy2 (uint8_t *block, const uint8_t *s0, int dest_size, int line_size, int h) attribute_l1_text; -void ff_bfin_z_put_pixels8_xy2 (uint8_t *block, const uint8_t *s0, int dest_size, int line_size, int h) attribute_l1_text; -void ff_bfin_put_pixels16_xy2_nornd (uint8_t *block, const uint8_t *s0, int line_size, int h) attribute_l1_text; -void ff_bfin_put_pixels8_xy2_nornd (uint8_t *block, const uint8_t *s0, int line_size, int h) attribute_l1_text; - - int ff_bfin_pix_sum (uint8_t *p, int stride) attribute_l1_text; -void ff_bfin_put_pixels8uc (uint8_t *block, const uint8_t *s0, const uint8_t *s1, int dest_size, int line_size, int h) attribute_l1_text; -void ff_bfin_put_pixels16uc (uint8_t *block, const uint8_t *s0, const uint8_t *s1, int dest_size, int line_size, int h) attribute_l1_text; -void ff_bfin_put_pixels8uc_nornd (uint8_t *block, const uint8_t *s0, const uint8_t *s1, int line_size, int h) attribute_l1_text; -void ff_bfin_put_pixels16uc_nornd (uint8_t *block, const uint8_t *s0, const uint8_t *s1, int line_size, int h) attribute_l1_text; - int ff_bfin_sse4 (void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) attribute_l1_text; int ff_bfin_sse8 (void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) attribute_l1_text; int ff_bfin_sse16 (void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) attribute_l1_text; diff --git a/libavcodec/bfin/hpel_pixels_bfin.S b/libavcodec/bfin/hpel_pixels_bfin.S new file mode 100644 index 0000000..b22bc29 --- /dev/null +++ b/libavcodec/bfin/hpel_pixels_bfin.S @@ -0,0 +1,379 @@ +/* + * Blackfin Pixel Operations + * Copyright (C) 2007 Marc Hoffman <marc.hoffman@analog.com> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "config_bfin.h" + +/** + motion compensation + primitives + + * Halfpel motion compensation with rounding (a+b+1)>>1. + * This is an array[4][4] of motion compensation funcions for 4 + * horizontal blocksizes (8,16) and the 4 halfpel positions<br> + * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ] + * @param block destination where the result is stored + * @param pixels source + * @param line_size number of bytes in a horizontal line of block + * @param h height + +*/ + +DEFUN(put_pixels8uc,mL1, + (uint8_t *block, const uint8_t *s0, const uint8_t *s1, + int dest_size, int line_size, int h)): + i3=r0; // dest + i0=r1; // src0 + i1=r2; // src1 + r0=[sp+12]; // dest_size + r2=[sp+16]; // line_size + p0=[sp+20]; // h + [--sp] = (r7:6); + r0+=-4; + m3=r0; + r2+=-8; + m0=r2; + LSETUP(pp8$0,pp8$1) LC0=P0; + DISALGNEXCPT || R0 = [I0++] || R2 =[I1++]; + +pp8$0: DISALGNEXCPT || R1 = [I0++] || R3 =[I1++]; + R6 = BYTEOP1P(R1:0,R3:2) || R0 = [I0++M0]|| R2 =[I1++M0]; + R7 = BYTEOP1P(R1:0,R3:2)(R) || R0 = [I0++] || [I3++] = R6 ; +pp8$1: DISALGNEXCPT || R2 = [I1++] || [I3++M3] = R7; + + (r7:6) = [sp++]; + RTS; +DEFUN_END(put_pixels8uc) + +DEFUN(put_pixels16uc,mL1, + (uint8_t *block, const uint8_t *s0, const uint8_t *s1, + int dest_size, int line_size, int h)): + link 0; + [--sp] = (r7:6); + i3=r0; // dest + i0=r1; // src0 + i1=r2; // src1 + r0=[fp+20]; // dest_size + r2=[fp+24]; // line_size + p0=[fp+28]; // h + + + r0+=-12; + m3=r0; // line_size + r2+=-16; + m0=r2; + + LSETUP(pp16$0,pp16$1) LC0=P0; + DISALGNEXCPT || R0 = [I0++] || R2 =[I1++]; + +pp16$0: DISALGNEXCPT || R1 = [I0++] || R3 =[I1++]; + R6 = BYTEOP1P(R1:0,R3:2) || R0 = [I0++] || R2 =[I1++]; + R7 = BYTEOP1P(R1:0,R3:2)(R) || R1 = [I0++] || R3 =[I1++]; + [I3++] = R6; + R6 = BYTEOP1P(R1:0,R3:2) || R0 = [I0++M0] || R2 =[I1++M0]; + R7 = BYTEOP1P(R1:0,R3:2)(R) || R0 = [I0++] || [I3++] = R7 ; + [I3++] = R6; +pp16$1: DISALGNEXCPT || R2 = [I1++] || [I3++M3] = R7; + + (r7:6) = [sp++]; + unlink; + RTS; +DEFUN_END(put_pixels16uc) + + + + + + +DEFUN(put_pixels8uc_nornd,mL1, + (uint8_t *block, const uint8_t *s0, const uint8_t *s1, + int line_size, int h)): + i3=r0; // dest + i0=r1; // src0 + i1=r2; // src1 + r2=[sp+12]; // line_size + p0=[sp+16]; // h + [--sp] = (r7:6); + r2+=-4; + m3=r2; + r2+=-4; + m0=r2; + LSETUP(pp8$2,pp8$3) LC0=P0; + DISALGNEXCPT || R0 = [I0++] || R2 =[I1++]; + +pp8$2: DISALGNEXCPT || R1 = [I0++] || R3 =[I1++]; + R6 = BYTEOP1P(R1:0,R3:2)(T) || R0 = [I0++M0]|| R2 =[I1++M0]; + R7 = BYTEOP1P(R1:0,R3:2)(T,R) || R0 = [I0++] || [I3++] = R6 ; +pp8$3: DISALGNEXCPT || R2 = [I1++] || [I3++M3] = R7; + + (r7:6) = [sp++]; + RTS; +DEFUN_END(put_pixels8uc_nornd) + +DEFUN(put_pixels16uc_nornd,mL1, + (uint8_t *block, const uint8_t *s0, const uint8_t *s1, + int line_size, int h)): + i3=r0; // dest + i0=r1; // src0 + i1=r2; // src1 + r2=[sp+12]; // line_size + p0=[sp+16]; // h + + [--sp] = (r7:6); + r2+=-12; + m3=r2; // line_size + r2+=-4; + m0=r2; + + LSETUP(pp16$2,pp16$3) LC0=P0; + DISALGNEXCPT || R0 = [I0++] || R2 =[I1++]; + +pp16$2: + DISALGNEXCPT || R1 = [I0++] || R3 =[I1++]; + R6 = BYTEOP1P(R1:0,R3:2)(T) || R0 = [I0++] || R2 =[I1++]; + R7 = BYTEOP1P(R1:0,R3:2)(T,R) || R1 = [I0++] || R3 =[I1++]; + [I3++] = R6; + + R6 = BYTEOP1P(R1:0,R3:2)(T) || R0 = [I0++M0] || R2 =[I1++M0]; + R7 = BYTEOP1P(R1:0,R3:2)(T,R) || R0 = [I0++] || [I3++] = R7 ; + [I3++] = R6; +pp16$3: DISALGNEXCPT || R2 = [I1++] || [I3++M3] = R7; + + (r7:6) = [sp++]; + + RTS; +DEFUN_END(put_pixels16uc_nornd) + +DEFUN(z_put_pixels16_xy2,mL1, + (uint8_t *block, const uint8_t *s0, + int dest_size, int line_size, int h)): + link 0; + [--sp] = (r7:4); + i3=r0; // dest + i0=r1; // src0--> pixels + i1=r1; // src1--> pixels + line_size + r2+=-12; + m2=r2; // m2=dest_width-4 + r2=[fp+20]; + m3=r2; // line_size + p0=[fp+24]; // h + r2+=-16; + i1+=m3; /* src1 + line_size */ + m0=r2; /* line-size - 20 */ + + B0 = I0; + B1 = I1; + B3 = I3; + + DISALGNEXCPT || R0 = [I0++] || R2 =[I1++]; + + LSETUP(LS$16E,LE$16E) LC0=P0; +LS$16E: DISALGNEXCPT || R1 = [I0++] || R3 =[I1++]; + R4 = BYTEOP2P (R3:2,R1:0) (RNDL) || R0 = [I0++] || R2 =[I1++]; + R5 = BYTEOP2P (R3:2,R1:0) (RNDL,R) || R1 = [I0++] || [I3++] = R4 ; + DISALGNEXCPT || R3 = [I1++] || [I3++] = R5; + R4 = BYTEOP2P (R3:2,R1:0) (RNDL) || R0 = [I0++M0]|| R2 = [I1++M0]; + R5 = BYTEOP2P (R3:2,R1:0) (RNDL,R) || R0 = [I0++] || [I3++] = R4 ; +LE$16E: DISALGNEXCPT || R2 = [I1++] || [I3++M2] = R5; + + M1 = 1; + I3 = B3; + I1 = B1; + I0 = B0; + + I0 += M1; + I1 += M1; + + DISALGNEXCPT || R0 = [I0++] || R2 =[I1++]; + LSETUP(LS$16O,LE$16O) LC0=P0; +LS$16O: DISALGNEXCPT || R1 = [I0++] || R3 =[I1++]; + R4 = BYTEOP2P (R3:2,R1:0) (RNDH) || R0 = [I0++] || R2 =[I1++]; + R5 = BYTEOP2P (R3:2,R1:0) (RNDH,R) || R1 = [I0++] || R6 =[I3++]; + R4 = R4 +|+ R6 || R7 = [I3--]; + R5 = R5 +|+ R7 || [I3++] = R4; + DISALGNEXCPT || R3 =[I1++] || [I3++] = R5; + R4 = BYTEOP2P (R3:2,R1:0) (RNDH) || R0 = [I0++M0]|| R2 = [I1++M0]; + R5 = BYTEOP2P (R3:2,R1:0) (RNDH,R) || R0 = [I0++] || R6 = [I3++]; + R4 = R4 +|+ R6 || R7 = [I3--]; + R5 = R5 +|+ R7 || [I3++] = R4; +LE$16O: DISALGNEXCPT || R2 = [I1++] || [I3++M2] = R5; + + (r7:4) = [sp++]; + unlink; + rts; +DEFUN_END(z_put_pixels16_xy2) + +DEFUN(put_pixels16_xy2_nornd,mL1, + (uint8_t *block, const uint8_t *s0, + int line_size, int h)): + link 0; + [--sp] = (r7:4); + i3=r0; // dest + i0=r1; // src0--> pixels + i1=r1; // src1--> pixels + line_size + m3=r2; + r2+=-12; + m2=r2; + r2+=-4; + i1+=m3; /* src1 + line_size */ + m0=r2; /* line-size - 20 */ + p0=[fp+20]; // h + + B0=I0; + B1=I1; + B3=I3; + + DISALGNEXCPT || R0 = [I0++] || R2 =[I1++]; + + LSETUP(LS$16ET,LE$16ET) LC0=P0; +LS$16ET:DISALGNEXCPT || R1 = [I0++] || R3 =[I1++]; + R4 = BYTEOP2P (R3:2,R1:0) (TL) || R0 = [I0++] || R2 =[I1++]; + R5 = BYTEOP2P (R3:2,R1:0) (TL,R) || R1 = [I0++] || [I3++] = R4 ; + DISALGNEXCPT || R3 = [I1++] || [I3++] = R5; + R4 = BYTEOP2P (R3:2,R1:0) (TL) || R0 = [I0++M0]|| R2 = [I1++M0]; + R5 = BYTEOP2P (R3:2,R1:0) (TL,R) || R0 = [I0++] || [I3++] = R4 ; +LE$16ET:DISALGNEXCPT || R2 = [I1++] || [I3++M2] = R5; + + M1 = 1; + I3=B3; + I1=B1; + I0=B0; + + I0 += M1; + I1 += M1; + + DISALGNEXCPT || R0 = [I0++] || R2 =[I1++]; + LSETUP(LS$16OT,LE$16OT) LC0=P0; +LS$16OT:DISALGNEXCPT || R1 = [I0++] || R3 =[I1++]; + R4 = BYTEOP2P (R3:2,R1:0) (TH) || R0 = [I0++] || R2 =[I1++]; + R5 = BYTEOP2P (R3:2,R1:0) (TH,R) || R1 = [I0++] || R6 =[I3++]; + R4 = R4 +|+ R6 || R7 = [I3--]; + R5 = R5 +|+ R7 || [I3++] = R4; + DISALGNEXCPT || R3 =[I1++] || [I3++] = R5; + R4 = BYTEOP2P (R3:2,R1:0) (TH) || R0 = [I0++M0]|| R2 = [I1++M0]; + R5 = BYTEOP2P (R3:2,R1:0) (TH,R) || R0 = [I0++] || R6 = [I3++]; + R4 = R4 +|+ R6 || R7 = [I3--]; + R5 = R5 +|+ R7 || [I3++] = R4; +LE$16OT:DISALGNEXCPT || R2 = [I1++] || [I3++M2] = R5; + + (r7:4) = [sp++]; + unlink; + rts; +DEFUN_END(put_pixels16_xy2_nornd) + +DEFUN(z_put_pixels8_xy2,mL1, + (uint8_t *block, const uint8_t *s0, + int dest_size, int line_size, int h)): + link 0; + [--sp] = (r7:4); + i3=r0; // dest + i0=r1; // src0--> pixels + i1=r1; // src1--> pixels + line_size + r2+=-4; + m2=r2; // m2=dest_width-4 + r2=[fp+20]; + m3=r2; // line_size + p0=[fp+24]; // h + r2+=-8; + i1+=m3; /* src1 + line_size */ + m0=r2; /* line-size - 20 */ + + b0 = I0; + b1 = I1; + b3 = I3; + + LSETUP(LS$8E,LE$8E) LC0=P0; + DISALGNEXCPT || R0 = [I0++] || R2 =[I1++]; +LS$8E: DISALGNEXCPT || R1 = [I0++] || R3 =[I1++]; + R4 = BYTEOP2P (R3:2,R1:0) (RNDL) || R0 = [I0++M0] || R2 =[I1++M0]; + R5 = BYTEOP2P (R3:2,R1:0) (RNDL,R) || R0 = [I0++] || [I3++] = R4 ; +LE$8E: DISALGNEXCPT || R2 = [I1++] || [I3++M2] = R5; + + M1 = 1; + I3 = b3; + I1 = b1; + I0 = b0; + + I0 += M1; + I1 += M1; + + LSETUP(LS$8O,LE$8O) LC0=P0; + DISALGNEXCPT || R0 = [I0++] || R2 =[I1++]; +LS$8O: DISALGNEXCPT || R1 = [I0++] || R3 =[I1++]; + R4 = BYTEOP2P (R3:2,R1:0) (RNDH) || R0 = [I0++M0] || R2 =[I1++M0]; + R5 = BYTEOP2P (R3:2,R1:0) (RNDH,R) || R0 = [I0++] || R6 =[I3++]; + R4 = R4 +|+ R6 || R7 = [I3--]; + R5 = R5 +|+ R7 || [I3++] = R4; +LE$8O: DISALGNEXCPT || R2 =[I1++] || [I3++M2] = R5; + + (r7:4) = [sp++]; + unlink; + rts; +DEFUN_END(z_put_pixels8_xy2) + +DEFUN(put_pixels8_xy2_nornd,mL1, + (uint8_t *block, const uint8_t *s0, int line_size, int h)): + link 0; + [--sp] = (r7:4); + i3=r0; // dest + i0=r1; // src0--> pixels + i1=r1; // src1--> pixels + line_size + m3=r2; + r2+=-4; + m2=r2; + r2+=-4; + i1+=m3; /* src1 + line_size */ + m0=r2; /* line-size - 20 */ + p0=[fp+20]; // h + + + b0 = I0; + b1 = I1; + b3 = I3; + + LSETUP(LS$8ET,LE$8ET) LC0=P0; + DISALGNEXCPT || R0 = [I0++] || R2 =[I1++]; + +LS$8ET: DISALGNEXCPT || R1 = [I0++] || R3 = [I1++]; + R4 = BYTEOP2P (R3:2,R1:0) (TL) || R0 = [I0++M0] || R2 = [I1++M0]; + R5 = BYTEOP2P (R3:2,R1:0) (TL,R) || R0 = [I0++] || [I3++] = R4 ; +LE$8ET: DISALGNEXCPT || R2 = [I1++] || [I3++M2] = R5; + + M1 = 1; + I3 = b3; + I1 = b1; + I0 = b0; + + I0 += M1; + I1 += M1; + + LSETUP(LS$8OT,LE$8OT) LC0=P0; + DISALGNEXCPT || R0 = [I0++] || R2 = [I1++]; + +LS$8OT: DISALGNEXCPT || R1 = [I0++] || R3 = [I1++]; + R4 = BYTEOP2P (R3:2,R1:0) (TH) || R0 = [I0++M0] || R2 = [I1++M0]; + R5 = BYTEOP2P (R3:2,R1:0) (TH,R) || R0 = [I0++] || R6 = [I3++]; + R4 = R4 +|+ R6 || R7 = [I3--]; + R5 = R5 +|+ R7 || [I3++] = R4; +LE$8OT: DISALGNEXCPT || R2 =[I1++] || [I3++M2] = R5; + + (r7:4) = [sp++]; + unlink; + rts; +DEFUN_END(put_pixels8_xy2_nornd) diff --git a/libavcodec/bfin/hpeldsp_bfin.c b/libavcodec/bfin/hpeldsp_bfin.c new file mode 100644 index 0000000..8b4af49 --- /dev/null +++ b/libavcodec/bfin/hpeldsp_bfin.c @@ -0,0 +1,119 @@ +/* + * BlackFin DSPUTILS + * + * Copyright (C) 2007 Marc Hoffman <marc.hoffman@analog.com> + * Copyright (c) 2006 Michael Benjamin <michael.benjamin@analog.com> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavcodec/hpeldsp.h" +#include "hpeldsp_bfin.h" + +static void bfin_put_pixels8 (uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) +{ + ff_bfin_put_pixels8uc (block, pixels, pixels, line_size, line_size, h); +} + +static void bfin_put_pixels8_x2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) +{ + ff_bfin_put_pixels8uc (block, pixels, pixels+1, line_size, line_size, h); +} + +static void bfin_put_pixels8_y2 (uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) +{ + ff_bfin_put_pixels8uc (block, pixels, pixels+line_size, line_size, line_size, h); +} + +static void bfin_put_pixels8_xy2 (uint8_t *block, const uint8_t *s0, ptrdiff_t line_size, int h) +{ + ff_bfin_z_put_pixels8_xy2 (block,s0,line_size, line_size, h); +} + +static void bfin_put_pixels16 (uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) +{ + ff_bfin_put_pixels16uc (block, pixels, pixels, line_size, line_size, h); +} + +static void bfin_put_pixels16_x2 (uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) +{ + ff_bfin_put_pixels16uc (block, pixels, pixels+1, line_size, line_size, h); +} + +static void bfin_put_pixels16_y2 (uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) +{ + ff_bfin_put_pixels16uc (block, pixels, pixels+line_size, line_size, line_size, h); +} + +static void bfin_put_pixels16_xy2 (uint8_t *block, const uint8_t *s0, ptrdiff_t line_size, int h) +{ + ff_bfin_z_put_pixels16_xy2 (block,s0,line_size, line_size, h); +} + +static void bfin_put_pixels8_nornd (uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) +{ + ff_bfin_put_pixels8uc_nornd (block, pixels, pixels, line_size, h); +} + +static void bfin_put_pixels8_x2_nornd (uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) +{ + ff_bfin_put_pixels8uc_nornd (block, pixels, pixels+1, line_size, h); +} + +static void bfin_put_pixels8_y2_nornd (uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) +{ + ff_bfin_put_pixels8uc_nornd (block, pixels, pixels+line_size, line_size, h); +} + + +static void bfin_put_pixels16_nornd (uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) +{ + ff_bfin_put_pixels16uc_nornd (block, pixels, pixels, line_size, h); +} + +static void bfin_put_pixels16_x2_nornd (uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) +{ + ff_bfin_put_pixels16uc_nornd (block, pixels, pixels+1, line_size, h); +} + +static void bfin_put_pixels16_y2_nornd (uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) +{ + ff_bfin_put_pixels16uc_nornd (block, pixels, pixels+line_size, line_size, h); +} + +void ff_hpeldsp_init_bfin(HpelDSPContext* c, int flags) +{ + c->put_pixels_tab[0][0] = bfin_put_pixels16; + c->put_pixels_tab[0][1] = bfin_put_pixels16_x2; + c->put_pixels_tab[0][2] = bfin_put_pixels16_y2; + c->put_pixels_tab[0][3] = bfin_put_pixels16_xy2; + + c->put_pixels_tab[1][0] = bfin_put_pixels8; + c->put_pixels_tab[1][1] = bfin_put_pixels8_x2; + c->put_pixels_tab[1][2] = bfin_put_pixels8_y2; + c->put_pixels_tab[1][3] = bfin_put_pixels8_xy2; + + c->put_no_rnd_pixels_tab[1][0] = bfin_put_pixels8_nornd; + c->put_no_rnd_pixels_tab[1][1] = bfin_put_pixels8_x2_nornd; + c->put_no_rnd_pixels_tab[1][2] = bfin_put_pixels8_y2_nornd; +/* c->put_no_rnd_pixels_tab[1][3] = ff_bfin_put_pixels8_xy2_nornd; */ + + c->put_no_rnd_pixels_tab[0][0] = bfin_put_pixels16_nornd; + c->put_no_rnd_pixels_tab[0][1] = bfin_put_pixels16_x2_nornd; + c->put_no_rnd_pixels_tab[0][2] = bfin_put_pixels16_y2_nornd; +/* c->put_no_rnd_pixels_tab[0][3] = ff_bfin_put_pixels16_xy2_nornd; */ +} diff --git a/libavcodec/bfin/hpeldsp_bfin.h b/libavcodec/bfin/hpeldsp_bfin.h new file mode 100644 index 0000000..36f3872 --- /dev/null +++ b/libavcodec/bfin/hpeldsp_bfin.h @@ -0,0 +1,50 @@ +/* + * BlackFin DSPUTILS COMMON OPTIMIZATIONS HEADER + * + * Copyright (C) 2007 Marc Hoffman <mmh@pleasantst.com> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + + +#ifndef AVCODEC_BFIN_HPELDSP_BFIN_H +#define AVCODEC_BFIN_HPELDSP_BFIN_H + +#include <stdint.h> + +#include "config.h" + +#if defined(__FDPIC__) && CONFIG_SRAM +#define attribute_l1_text __attribute__ ((l1_text)) +#define attribute_l1_data_b __attribute__((l1_data_B)) +#else +#define attribute_l1_text +#define attribute_l1_data_b +#endif + +void ff_bfin_z_put_pixels16_xy2 (uint8_t *block, const uint8_t *s0, int dest_size, int line_size, int h) attribute_l1_text; +void ff_bfin_z_put_pixels8_xy2 (uint8_t *block, const uint8_t *s0, int dest_size, int line_size, int h) attribute_l1_text; +void ff_bfin_put_pixels16_xy2_nornd (uint8_t *block, const uint8_t *s0, int line_size, int h) attribute_l1_text; +void ff_bfin_put_pixels8_xy2_nornd (uint8_t *block, const uint8_t *s0, int line_size, int h) attribute_l1_text; + + +void ff_bfin_put_pixels8uc (uint8_t *block, const uint8_t *s0, const uint8_t *s1, int dest_size, int line_size, int h) attribute_l1_text; +void ff_bfin_put_pixels16uc (uint8_t *block, const uint8_t *s0, const uint8_t *s1, int dest_size, int line_size, int h) attribute_l1_text; +void ff_bfin_put_pixels8uc_nornd (uint8_t *block, const uint8_t *s0, const uint8_t *s1, int line_size, int h) attribute_l1_text; +void ff_bfin_put_pixels16uc_nornd (uint8_t *block, const uint8_t *s0, const uint8_t *s1, int line_size, int h) attribute_l1_text; + +#endif /* AVCODEC_BFIN_HPELDSP_BFIN_H */ diff --git a/libavcodec/bfin/pixels_bfin.S b/libavcodec/bfin/pixels_bfin.S index 2e3cd0f..2c84deb 100644 --- a/libavcodec/bfin/pixels_bfin.S +++ b/libavcodec/bfin/pixels_bfin.S @@ -83,364 +83,6 @@ apc$3: R6 = BYTEOP3P(R1:0, R3:2) (LO) || [I2++M0] = R6 || R2 = [I1]; RTS; DEFUN_END(add_pixels_clamped) - -/** - motion compensation - primitives - - * Halfpel motion compensation with rounding (a+b+1)>>1. - * This is an array[4][4] of motion compensation funcions for 4 - * horizontal blocksizes (8,16) and the 4 halfpel positions<br> - * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ] - * @param block destination where the result is stored - * @param pixels source - * @param line_size number of bytes in a horizontal line of block - * @param h height - -*/ - -DEFUN(put_pixels8uc,mL1, - (uint8_t *block, const uint8_t *s0, const uint8_t *s1, - int dest_size, int line_size, int h)): - i3=r0; // dest - i0=r1; // src0 - i1=r2; // src1 - r0=[sp+12]; // dest_size - r2=[sp+16]; // line_size - p0=[sp+20]; // h - [--sp] = (r7:6); - r0+=-4; - m3=r0; - r2+=-8; - m0=r2; - LSETUP(pp8$0,pp8$1) LC0=P0; - DISALGNEXCPT || R0 = [I0++] || R2 =[I1++]; - -pp8$0: DISALGNEXCPT || R1 = [I0++] || R3 =[I1++]; - R6 = BYTEOP1P(R1:0,R3:2) || R0 = [I0++M0]|| R2 =[I1++M0]; - R7 = BYTEOP1P(R1:0,R3:2)(R) || R0 = [I0++] || [I3++] = R6 ; -pp8$1: DISALGNEXCPT || R2 = [I1++] || [I3++M3] = R7; - - (r7:6) = [sp++]; - RTS; -DEFUN_END(put_pixels8uc) - -DEFUN(put_pixels16uc,mL1, - (uint8_t *block, const uint8_t *s0, const uint8_t *s1, - int dest_size, int line_size, int h)): - link 0; - [--sp] = (r7:6); - i3=r0; // dest - i0=r1; // src0 - i1=r2; // src1 - r0=[fp+20]; // dest_size - r2=[fp+24]; // line_size - p0=[fp+28]; // h - - - r0+=-12; - m3=r0; // line_size - r2+=-16; - m0=r2; - - LSETUP(pp16$0,pp16$1) LC0=P0; - DISALGNEXCPT || R0 = [I0++] || R2 =[I1++]; - -pp16$0: DISALGNEXCPT || R1 = [I0++] || R3 =[I1++]; - R6 = BYTEOP1P(R1:0,R3:2) || R0 = [I0++] || R2 =[I1++]; - R7 = BYTEOP1P(R1:0,R3:2)(R) || R1 = [I0++] || R3 =[I1++]; - [I3++] = R6; - R6 = BYTEOP1P(R1:0,R3:2) || R0 = [I0++M0] || R2 =[I1++M0]; - R7 = BYTEOP1P(R1:0,R3:2)(R) || R0 = [I0++] || [I3++] = R7 ; - [I3++] = R6; -pp16$1: DISALGNEXCPT || R2 = [I1++] || [I3++M3] = R7; - - (r7:6) = [sp++]; - unlink; - RTS; -DEFUN_END(put_pixels16uc) - - - - - - -DEFUN(put_pixels8uc_nornd,mL1, - (uint8_t *block, const uint8_t *s0, const uint8_t *s1, - int line_size, int h)): - i3=r0; // dest - i0=r1; // src0 - i1=r2; // src1 - r2=[sp+12]; // line_size - p0=[sp+16]; // h - [--sp] = (r7:6); - r2+=-4; - m3=r2; - r2+=-4; - m0=r2; - LSETUP(pp8$2,pp8$3) LC0=P0; - DISALGNEXCPT || R0 = [I0++] || R2 =[I1++]; - -pp8$2: DISALGNEXCPT || R1 = [I0++] || R3 =[I1++]; - R6 = BYTEOP1P(R1:0,R3:2)(T) || R0 = [I0++M0]|| R2 =[I1++M0]; - R7 = BYTEOP1P(R1:0,R3:2)(T,R) || R0 = [I0++] || [I3++] = R6 ; -pp8$3: DISALGNEXCPT || R2 = [I1++] || [I3++M3] = R7; - - (r7:6) = [sp++]; - RTS; -DEFUN_END(put_pixels8uc_nornd) - -DEFUN(put_pixels16uc_nornd,mL1, - (uint8_t *block, const uint8_t *s0, const uint8_t *s1, - int line_size, int h)): - i3=r0; // dest - i0=r1; // src0 - i1=r2; // src1 - r2=[sp+12]; // line_size - p0=[sp+16]; // h - - [--sp] = (r7:6); - r2+=-12; - m3=r2; // line_size - r2+=-4; - m0=r2; - - LSETUP(pp16$2,pp16$3) LC0=P0; - DISALGNEXCPT || R0 = [I0++] || R2 =[I1++]; - -pp16$2: - DISALGNEXCPT || R1 = [I0++] || R3 =[I1++]; - R6 = BYTEOP1P(R1:0,R3:2)(T) || R0 = [I0++] || R2 =[I1++]; - R7 = BYTEOP1P(R1:0,R3:2)(T,R) || R1 = [I0++] || R3 =[I1++]; - [I3++] = R6; - - R6 = BYTEOP1P(R1:0,R3:2)(T) || R0 = [I0++M0] || R2 =[I1++M0]; - R7 = BYTEOP1P(R1:0,R3:2)(T,R) || R0 = [I0++] || [I3++] = R7 ; - [I3++] = R6; -pp16$3: DISALGNEXCPT || R2 = [I1++] || [I3++M3] = R7; - - (r7:6) = [sp++]; - - RTS; -DEFUN_END(put_pixels16uc_nornd) - -DEFUN(z_put_pixels16_xy2,mL1, - (uint8_t *block, const uint8_t *s0, - int dest_size, int line_size, int h)): - link 0; - [--sp] = (r7:4); - i3=r0; // dest - i0=r1; // src0--> pixels - i1=r1; // src1--> pixels + line_size - r2+=-12; - m2=r2; // m2=dest_width-4 - r2=[fp+20]; - m3=r2; // line_size - p0=[fp+24]; // h - r2+=-16; - i1+=m3; /* src1 + line_size */ - m0=r2; /* line-size - 20 */ - - B0 = I0; - B1 = I1; - B3 = I3; - - DISALGNEXCPT || R0 = [I0++] || R2 =[I1++]; - - LSETUP(LS$16E,LE$16E) LC0=P0; -LS$16E: DISALGNEXCPT || R1 = [I0++] || R3 =[I1++]; - R4 = BYTEOP2P (R3:2,R1:0) (RNDL) || R0 = [I0++] || R2 =[I1++]; - R5 = BYTEOP2P (R3:2,R1:0) (RNDL,R) || R1 = [I0++] || [I3++] = R4 ; - DISALGNEXCPT || R3 = [I1++] || [I3++] = R5; - R4 = BYTEOP2P (R3:2,R1:0) (RNDL) || R0 = [I0++M0]|| R2 = [I1++M0]; - R5 = BYTEOP2P (R3:2,R1:0) (RNDL,R) || R0 = [I0++] || [I3++] = R4 ; -LE$16E: DISALGNEXCPT || R2 = [I1++] || [I3++M2] = R5; - - M1 = 1; - I3 = B3; - I1 = B1; - I0 = B0; - - I0 += M1; - I1 += M1; - - DISALGNEXCPT || R0 = [I0++] || R2 =[I1++]; - LSETUP(LS$16O,LE$16O) LC0=P0; -LS$16O: DISALGNEXCPT || R1 = [I0++] || R3 =[I1++]; - R4 = BYTEOP2P (R3:2,R1:0) (RNDH) || R0 = [I0++] || R2 =[I1++]; - R5 = BYTEOP2P (R3:2,R1:0) (RNDH,R) || R1 = [I0++] || R6 =[I3++]; - R4 = R4 +|+ R6 || R7 = [I3--]; - R5 = R5 +|+ R7 || [I3++] = R4; - DISALGNEXCPT || R3 =[I1++] || [I3++] = R5; - R4 = BYTEOP2P (R3:2,R1:0) (RNDH) || R0 = [I0++M0]|| R2 = [I1++M0]; - R5 = BYTEOP2P (R3:2,R1:0) (RNDH,R) || R0 = [I0++] || R6 = [I3++]; - R4 = R4 +|+ R6 || R7 = [I3--]; - R5 = R5 +|+ R7 || [I3++] = R4; -LE$16O: DISALGNEXCPT || R2 = [I1++] || [I3++M2] = R5; - - (r7:4) = [sp++]; - unlink; - rts; -DEFUN_END(z_put_pixels16_xy2) - -DEFUN(put_pixels16_xy2_nornd,mL1, - (uint8_t *block, const uint8_t *s0, - int line_size, int h)): - link 0; - [--sp] = (r7:4); - i3=r0; // dest - i0=r1; // src0--> pixels - i1=r1; // src1--> pixels + line_size - m3=r2; - r2+=-12; - m2=r2; - r2+=-4; - i1+=m3; /* src1 + line_size */ - m0=r2; /* line-size - 20 */ - p0=[fp+20]; // h - - B0=I0; - B1=I1; - B3=I3; - - DISALGNEXCPT || R0 = [I0++] || R2 =[I1++]; - - LSETUP(LS$16ET,LE$16ET) LC0=P0; -LS$16ET:DISALGNEXCPT || R1 = [I0++] || R3 =[I1++]; - R4 = BYTEOP2P (R3:2,R1:0) (TL) || R0 = [I0++] || R2 =[I1++]; - R5 = BYTEOP2P (R3:2,R1:0) (TL,R) || R1 = [I0++] || [I3++] = R4 ; - DISALGNEXCPT || R3 = [I1++] || [I3++] = R5; - R4 = BYTEOP2P (R3:2,R1:0) (TL) || R0 = [I0++M0]|| R2 = [I1++M0]; - R5 = BYTEOP2P (R3:2,R1:0) (TL,R) || R0 = [I0++] || [I3++] = R4 ; -LE$16ET:DISALGNEXCPT || R2 = [I1++] || [I3++M2] = R5; - - M1 = 1; - I3=B3; - I1=B1; - I0=B0; - - I0 += M1; - I1 += M1; - - DISALGNEXCPT || R0 = [I0++] || R2 =[I1++]; - LSETUP(LS$16OT,LE$16OT) LC0=P0; -LS$16OT:DISALGNEXCPT || R1 = [I0++] || R3 =[I1++]; - R4 = BYTEOP2P (R3:2,R1:0) (TH) || R0 = [I0++] || R2 =[I1++]; - R5 = BYTEOP2P (R3:2,R1:0) (TH,R) || R1 = [I0++] || R6 =[I3++]; - R4 = R4 +|+ R6 || R7 = [I3--]; - R5 = R5 +|+ R7 || [I3++] = R4; - DISALGNEXCPT || R3 =[I1++] || [I3++] = R5; - R4 = BYTEOP2P (R3:2,R1:0) (TH) || R0 = [I0++M0]|| R2 = [I1++M0]; - R5 = BYTEOP2P (R3:2,R1:0) (TH,R) || R0 = [I0++] || R6 = [I3++]; - R4 = R4 +|+ R6 || R7 = [I3--]; - R5 = R5 +|+ R7 || [I3++] = R4; -LE$16OT:DISALGNEXCPT || R2 = [I1++] || [I3++M2] = R5; - - (r7:4) = [sp++]; - unlink; - rts; -DEFUN_END(put_pixels16_xy2_nornd) - -DEFUN(z_put_pixels8_xy2,mL1, - (uint8_t *block, const uint8_t *s0, - int dest_size, int line_size, int h)): - link 0; - [--sp] = (r7:4); - i3=r0; // dest - i0=r1; // src0--> pixels - i1=r1; // src1--> pixels + line_size - r2+=-4; - m2=r2; // m2=dest_width-4 - r2=[fp+20]; - m3=r2; // line_size - p0=[fp+24]; // h - r2+=-8; - i1+=m3; /* src1 + line_size */ - m0=r2; /* line-size - 20 */ - - b0 = I0; - b1 = I1; - b3 = I3; - - LSETUP(LS$8E,LE$8E) LC0=P0; - DISALGNEXCPT || R0 = [I0++] || R2 =[I1++]; -LS$8E: DISALGNEXCPT || R1 = [I0++] || R3 =[I1++]; - R4 = BYTEOP2P (R3:2,R1:0) (RNDL) || R0 = [I0++M0] || R2 =[I1++M0]; - R5 = BYTEOP2P (R3:2,R1:0) (RNDL,R) || R0 = [I0++] || [I3++] = R4 ; -LE$8E: DISALGNEXCPT || R2 = [I1++] || [I3++M2] = R5; - - M1 = 1; - I3 = b3; - I1 = b1; - I0 = b0; - - I0 += M1; - I1 += M1; - - LSETUP(LS$8O,LE$8O) LC0=P0; - DISALGNEXCPT || R0 = [I0++] || R2 =[I1++]; -LS$8O: DISALGNEXCPT || R1 = [I0++] || R3 =[I1++]; - R4 = BYTEOP2P (R3:2,R1:0) (RNDH) || R0 = [I0++M0] || R2 =[I1++M0]; - R5 = BYTEOP2P (R3:2,R1:0) (RNDH,R) || R0 = [I0++] || R6 =[I3++]; - R4 = R4 +|+ R6 || R7 = [I3--]; - R5 = R5 +|+ R7 || [I3++] = R4; -LE$8O: DISALGNEXCPT || R2 =[I1++] || [I3++M2] = R5; - - (r7:4) = [sp++]; - unlink; - rts; -DEFUN_END(z_put_pixels8_xy2) - -DEFUN(put_pixels8_xy2_nornd,mL1, - (uint8_t *block, const uint8_t *s0, int line_size, int h)): - link 0; - [--sp] = (r7:4); - i3=r0; // dest - i0=r1; // src0--> pixels - i1=r1; // src1--> pixels + line_size - m3=r2; - r2+=-4; - m2=r2; - r2+=-4; - i1+=m3; /* src1 + line_size */ - m0=r2; /* line-size - 20 */ - p0=[fp+20]; // h - - - b0 = I0; - b1 = I1; - b3 = I3; - - LSETUP(LS$8ET,LE$8ET) LC0=P0; - DISALGNEXCPT || R0 = [I0++] || R2 =[I1++]; - -LS$8ET: DISALGNEXCPT || R1 = [I0++] || R3 = [I1++]; - R4 = BYTEOP2P (R3:2,R1:0) (TL) || R0 = [I0++M0] || R2 = [I1++M0]; - R5 = BYTEOP2P (R3:2,R1:0) (TL,R) || R0 = [I0++] || [I3++] = R4 ; -LE$8ET: DISALGNEXCPT || R2 = [I1++] || [I3++M2] = R5; - - M1 = 1; - I3 = b3; - I1 = b1; - I0 = b0; - - I0 += M1; - I1 += M1; - - LSETUP(LS$8OT,LE$8OT) LC0=P0; - DISALGNEXCPT || R0 = [I0++] || R2 = [I1++]; - -LS$8OT: DISALGNEXCPT || R1 = [I0++] || R3 = [I1++]; - R4 = BYTEOP2P (R3:2,R1:0) (TH) || R0 = [I0++M0] || R2 = [I1++M0]; - R5 = BYTEOP2P (R3:2,R1:0) (TH,R) || R0 = [I0++] || R6 = [I3++]; - R4 = R4 +|+ R6 || R7 = [I3--]; - R5 = R5 +|+ R7 || [I3++] = R4; -LE$8OT: DISALGNEXCPT || R2 =[I1++] || [I3++M2] = R5; - - (r7:4) = [sp++]; - unlink; - rts; - DEFUN(diff_pixels,mL1, (int16_t *block, uint8_t *s1, uint8_t *s2, int stride)): link 0; @@ -467,7 +109,7 @@ DEFUN(diff_pixels,mL1, (r7:4) = [sp++]; unlink; rts; -DEFUN_END(put_pixels8_xy2_nornd) +DEFUN_END(diff_pixels) /* for (i = 0; i < 16; i++) { |