00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #include "../dsputil.h"
00022
00023 #include "gcc_fixes.h"
00024
00025 #include "dsputil_altivec.h"
00026
00027
00028
00029
00030
00031 #define GMC1_PERF_COND (h==8)
00032 void gmc1_altivec(uint8_t *dst , uint8_t *src , int stride, int h, int x16, int y16, int rounder)
00033 {
00034 POWERPC_PERF_DECLARE(altivec_gmc1_num, GMC1_PERF_COND);
00035 #ifdef ALTIVEC_USE_REFERENCE_C_CODE
00036 const int A=(16-x16)*(16-y16);
00037 const int B=( x16)*(16-y16);
00038 const int C=(16-x16)*( y16);
00039 const int D=( x16)*( y16);
00040 int i;
00041
00042 POWERPC_PERF_START_COUNT(altivec_gmc1_num, GMC1_PERF_COND);
00043
00044 for(i=0; i<h; i++)
00045 {
00046 dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8;
00047 dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8;
00048 dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + rounder)>>8;
00049 dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + rounder)>>8;
00050 dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + rounder)>>8;
00051 dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + rounder)>>8;
00052 dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + rounder)>>8;
00053 dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8;
00054 dst+= stride;
00055 src+= stride;
00056 }
00057
00058 POWERPC_PERF_STOP_COUNT(altivec_gmc1_num, GMC1_PERF_COND);
00059
00060 #else
00061 const unsigned short __attribute__ ((aligned(16))) rounder_a[8] =
00062 {rounder, rounder, rounder, rounder,
00063 rounder, rounder, rounder, rounder};
00064 const unsigned short __attribute__ ((aligned(16))) ABCD[8] =
00065 {
00066 (16-x16)*(16-y16),
00067 ( x16)*(16-y16),
00068 (16-x16)*( y16),
00069 ( x16)*( y16),
00070 0, 0, 0, 0
00071 };
00072 register const_vector unsigned char vczero = (const_vector unsigned char)vec_splat_u8(0);
00073 register const_vector unsigned short vcsr8 = (const_vector unsigned short)vec_splat_u16(8);
00074 register vector unsigned char dstv, dstv2, src_0, src_1, srcvA, srcvB, srcvC, srcvD;
00075 register vector unsigned short Av, Bv, Cv, Dv, rounderV, tempA, tempB, tempC, tempD;
00076 int i;
00077 unsigned long dst_odd = (unsigned long)dst & 0x0000000F;
00078 unsigned long src_really_odd = (unsigned long)src & 0x0000000F;
00079
00080
00081 POWERPC_PERF_START_COUNT(altivec_gmc1_num, GMC1_PERF_COND);
00082
00083 tempA = vec_ld(0, (unsigned short*)ABCD);
00084 Av = vec_splat(tempA, 0);
00085 Bv = vec_splat(tempA, 1);
00086 Cv = vec_splat(tempA, 2);
00087 Dv = vec_splat(tempA, 3);
00088
00089 rounderV = vec_ld(0, (unsigned short*)rounder_a);
00090
00091
00092
00093
00094
00095
00096 src_0 = vec_ld(0, src);
00097 src_1 = vec_ld(16, src);
00098 srcvA = vec_perm(src_0, src_1, vec_lvsl(0, src));
00099
00100 if (src_really_odd != 0x0000000F)
00101 {
00102 srcvB = vec_perm(src_0, src_1, vec_lvsl(1, src));
00103 }
00104 else
00105 {
00106 srcvB = src_1;
00107 }
00108 srcvA = vec_mergeh(vczero, srcvA);
00109 srcvB = vec_mergeh(vczero, srcvB);
00110
00111 for(i=0; i<h; i++)
00112 {
00113 dst_odd = (unsigned long)dst & 0x0000000F;
00114 src_really_odd = (((unsigned long)src) + stride) & 0x0000000F;
00115
00116 dstv = vec_ld(0, dst);
00117
00118
00119
00120
00121
00122 src_0 = vec_ld(stride + 0, src);
00123 src_1 = vec_ld(stride + 16, src);
00124 srcvC = vec_perm(src_0, src_1, vec_lvsl(stride + 0, src));
00125
00126 if (src_really_odd != 0x0000000F)
00127 {
00128 srcvD = vec_perm(src_0, src_1, vec_lvsl(stride + 1, src));
00129 }
00130 else
00131 {
00132 srcvD = src_1;
00133 }
00134
00135 srcvC = vec_mergeh(vczero, srcvC);
00136 srcvD = vec_mergeh(vczero, srcvD);
00137
00138
00139
00140
00141
00142 tempA = vec_mladd((vector unsigned short)srcvA, Av, rounderV);
00143 tempB = vec_mladd((vector unsigned short)srcvB, Bv, tempA);
00144 tempC = vec_mladd((vector unsigned short)srcvC, Cv, tempB);
00145 tempD = vec_mladd((vector unsigned short)srcvD, Dv, tempC);
00146
00147 srcvA = srcvC;
00148 srcvB = srcvD;
00149
00150 tempD = vec_sr(tempD, vcsr8);
00151
00152 dstv2 = vec_pack(tempD, (vector unsigned short)vczero);
00153
00154 if (dst_odd)
00155 {
00156 dstv2 = vec_perm(dstv, dstv2, vcprm(0,1,s0,s1));
00157 }
00158 else
00159 {
00160 dstv2 = vec_perm(dstv, dstv2, vcprm(s0,s1,2,3));
00161 }
00162
00163 vec_st(dstv2, 0, dst);
00164
00165 dst += stride;
00166 src += stride;
00167 }
00168
00169 POWERPC_PERF_STOP_COUNT(altivec_gmc1_num, GMC1_PERF_COND);
00170
00171 #endif
00172 }