00001
00002
00003
00004
00005
00006 #define LD(adr) *(uint32_t*)(adr)
00007
00008 #define PIXOP2(OPNAME, OP) \
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065 \
00066 \
00067 static inline void OPNAME ## _pixels4_l2_aligned(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
00068 {\
00069 do {\
00070 OP(LP(dst ),rnd_avg32(LP(src1 ),LP(src2 )) ); \
00071 src1+=src_stride1; \
00072 src2+=src_stride2; \
00073 dst+=dst_stride; \
00074 } while(--h); \
00075 }\
00076 \
00077 static inline void OPNAME ## _pixels4_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
00078 {\
00079 do {\
00080 OP(LP(dst ),rnd_avg32(LD32(src1 ),LP(src2 )) ); \
00081 src1+=src_stride1; \
00082 src2+=src_stride2; \
00083 dst+=dst_stride; \
00084 } while(--h); \
00085 }\
00086 \
00087 static inline void OPNAME ## _no_rnd_pixels16_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
00088 {\
00089 do {\
00090 OP(LP(dst ),no_rnd_avg32(LD32(src1 ),LP(src2 )) ); \
00091 OP(LP(dst+4),no_rnd_avg32(LD32(src1+4),LP(src2+4)) ); \
00092 OP(LP(dst+8),no_rnd_avg32(LD32(src1+8),LP(src2+8)) ); \
00093 OP(LP(dst+12),no_rnd_avg32(LD32(src1+12),LP(src2+12)) ); \
00094 src1+=src_stride1; \
00095 src2+=src_stride2; \
00096 dst+=dst_stride; \
00097 } while(--h); \
00098 }\
00099 \
00100 static inline void OPNAME ## _pixels16_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
00101 {\
00102 do {\
00103 OP(LP(dst ),rnd_avg32(LD32(src1 ),LP(src2 )) ); \
00104 OP(LP(dst+4),rnd_avg32(LD32(src1+4),LP(src2+4)) ); \
00105 OP(LP(dst+8),rnd_avg32(LD32(src1+8),LP(src2+8)) ); \
00106 OP(LP(dst+12),rnd_avg32(LD32(src1+12),LP(src2+12)) ); \
00107 src1+=src_stride1; \
00108 src2+=src_stride2; \
00109 dst+=dst_stride; \
00110 } while(--h); \
00111 }\
00112 \
00113 static inline void OPNAME ## _no_rnd_pixels8_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
00114 {\
00115 do { \
00116 OP(LP(dst ),no_rnd_avg32(LD32(src1 ),LP(src2 )) ); \
00117 OP(LP(dst+4),no_rnd_avg32(LD32(src1+4),LP(src2+4)) ); \
00118 src1+=src_stride1; \
00119 src2+=src_stride2; \
00120 dst+=dst_stride; \
00121 } while(--h); \
00122 }\
00123 \
00124 static inline void OPNAME ## _pixels8_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
00125 {\
00126 do {\
00127 OP(LP(dst ),rnd_avg32(LD32(src1 ),LP(src2 )) ); \
00128 OP(LP(dst+4),rnd_avg32(LD32(src1+4),LP(src2+4)) ); \
00129 src1+=src_stride1; \
00130 src2+=src_stride2; \
00131 dst+=dst_stride; \
00132 } while(--h); \
00133 }\
00134 \
00135 static inline void OPNAME ## _no_rnd_pixels8_l2_aligned(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
00136 {\
00137 do {\
00138 OP(LP(dst ),no_rnd_avg32(LP(src1 ),LP(src2 )) ); \
00139 OP(LP(dst+4),no_rnd_avg32(LP(src1+4),LP(src2+4)) ); \
00140 src1+=src_stride1; \
00141 src2+=src_stride2; \
00142 dst+=dst_stride; \
00143 } while(--h); \
00144 }\
00145 \
00146 static inline void OPNAME ## _pixels8_l2_aligned(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
00147 {\
00148 do {\
00149 OP(LP(dst ),rnd_avg32(LP(src1 ),LP(src2 )) ); \
00150 OP(LP(dst+4),rnd_avg32(LP(src1+4),LP(src2+4)) ); \
00151 src1+=src_stride1; \
00152 src2+=src_stride2; \
00153 dst+=dst_stride; \
00154 } while(--h); \
00155 }\
00156 \
00157 static inline void OPNAME ## _no_rnd_pixels16_l2_aligned(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
00158 {\
00159 do {\
00160 OP(LP(dst ),no_rnd_avg32(LP(src1 ),LP(src2 )) ); \
00161 OP(LP(dst+4),no_rnd_avg32(LP(src1+4),LP(src2+4)) ); \
00162 OP(LP(dst+8),no_rnd_avg32(LP(src1+8),LP(src2+8)) ); \
00163 OP(LP(dst+12),no_rnd_avg32(LP(src1+12),LP(src2+12)) ); \
00164 src1+=src_stride1; \
00165 src2+=src_stride2; \
00166 dst+=dst_stride; \
00167 } while(--h); \
00168 }\
00169 \
00170 static inline void OPNAME ## _pixels16_l2_aligned(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
00171 {\
00172 do {\
00173 OP(LP(dst ),rnd_avg32(LP(src1 ),LP(src2 )) ); \
00174 OP(LP(dst+4),rnd_avg32(LP(src1+4),LP(src2+4)) ); \
00175 OP(LP(dst+8),rnd_avg32(LP(src1+8),LP(src2+8)) ); \
00176 OP(LP(dst+12),rnd_avg32(LP(src1+12),LP(src2+12)) ); \
00177 src1+=src_stride1; \
00178 src2+=src_stride2; \
00179 dst+=dst_stride; \
00180 } while(--h); \
00181 }\
00182 \
00183 static inline void OPNAME ## _no_rnd_pixels16_l2_aligned1(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
00184 { OPNAME ## _no_rnd_pixels16_l2_aligned2(dst,src2,src1,dst_stride,src_stride2,src_stride1,h); } \
00185 \
00186 static inline void OPNAME ## _pixels16_l2_aligned1(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
00187 { OPNAME ## _pixels16_l2_aligned2(dst,src2,src1,dst_stride,src_stride2,src_stride1,h); } \
00188 \
00189 static inline void OPNAME ## _no_rnd_pixels8_l2_aligned1(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
00190 { OPNAME ## _no_rnd_pixels8_l2_aligned2(dst,src2,src1,dst_stride,src_stride2,src_stride1,h); } \
00191 \
00192 static inline void OPNAME ## _pixels8_l2_aligned1(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
00193 { OPNAME ## _pixels8_l2_aligned2(dst,src2,src1,dst_stride,src_stride2,src_stride1,h); } \
00194 \
00195 static inline void OPNAME ## _pixels8_l4_aligned(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
00196 do { \
00197 uint32_t a0,a1,a2,a3; \
00198 UNPACK(a0,a1,LP(src1),LP(src2)); \
00199 UNPACK(a2,a3,LP(src3),LP(src4)); \
00200 OP(LP(dst),rnd_PACK(a0,a1,a2,a3)); \
00201 UNPACK(a0,a1,LP(src1+4),LP(src2+4)); \
00202 UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \
00203 OP(LP(dst+4),rnd_PACK(a0,a1,a2,a3)); \
00204 src1+=src_stride1;\
00205 src2+=src_stride2;\
00206 src3+=src_stride3;\
00207 src4+=src_stride4;\
00208 dst+=dst_stride;\
00209 } while(--h); \
00210 } \
00211 \
00212 static inline void OPNAME ## _no_rnd_pixels8_l4_aligned(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
00213 do { \
00214 uint32_t a0,a1,a2,a3; \
00215 UNPACK(a0,a1,LP(src1),LP(src2)); \
00216 UNPACK(a2,a3,LP(src3),LP(src4)); \
00217 OP(LP(dst),no_rnd_PACK(a0,a1,a2,a3)); \
00218 UNPACK(a0,a1,LP(src1+4),LP(src2+4)); \
00219 UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \
00220 OP(LP(dst+4),no_rnd_PACK(a0,a1,a2,a3)); \
00221 src1+=src_stride1;\
00222 src2+=src_stride2;\
00223 src3+=src_stride3;\
00224 src4+=src_stride4;\
00225 dst+=dst_stride;\
00226 } while(--h); \
00227 } \
00228 \
00229 static inline void OPNAME ## _pixels8_l4_aligned0(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
00230 do { \
00231 uint32_t a0,a1,a2,a3; \
00232 UNPACK(a0,a1,LD32(src1),LP(src2)); \
00233 UNPACK(a2,a3,LP(src3),LP(src4)); \
00234 OP(LP(dst),rnd_PACK(a0,a1,a2,a3)); \
00235 UNPACK(a0,a1,LD32(src1+4),LP(src2+4)); \
00236 UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \
00237 OP(LP(dst+4),rnd_PACK(a0,a1,a2,a3)); \
00238 src1+=src_stride1;\
00239 src2+=src_stride2;\
00240 src3+=src_stride3;\
00241 src4+=src_stride4;\
00242 dst+=dst_stride;\
00243 } while(--h); \
00244 } \
00245 \
00246 static inline void OPNAME ## _no_rnd_pixels8_l4_aligned0(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
00247 do { \
00248 uint32_t a0,a1,a2,a3; \
00249 UNPACK(a0,a1,LD32(src1),LP(src2)); \
00250 UNPACK(a2,a3,LP(src3),LP(src4)); \
00251 OP(LP(dst),no_rnd_PACK(a0,a1,a2,a3)); \
00252 UNPACK(a0,a1,LD32(src1+4),LP(src2+4)); \
00253 UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \
00254 OP(LP(dst+4),no_rnd_PACK(a0,a1,a2,a3)); \
00255 src1+=src_stride1;\
00256 src2+=src_stride2;\
00257 src3+=src_stride3;\
00258 src4+=src_stride4;\
00259 dst+=dst_stride;\
00260 } while(--h); \
00261 } \
00262 \
00263 static inline void OPNAME ## _pixels16_l4_aligned(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
00264 do { \
00265 uint32_t a0,a1,a2,a3; \
00266 UNPACK(a0,a1,LP(src1),LP(src2)); \
00267 UNPACK(a2,a3,LP(src3),LP(src4)); \
00268 OP(LP(dst),rnd_PACK(a0,a1,a2,a3)); \
00269 UNPACK(a0,a1,LP(src1+4),LP(src2+4)); \
00270 UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \
00271 OP(LP(dst+8),rnd_PACK(a0,a1,a2,a3)); \
00272 UNPACK(a0,a1,LP(src1+8),LP(src2+8)); \
00273 UNPACK(a2,a3,LP(src3+8),LP(src4+8)); \
00274 OP(LP(dst+8),rnd_PACK(a0,a1,a2,a3)); \
00275 UNPACK(a0,a1,LP(src1+12),LP(src2+12)); \
00276 UNPACK(a2,a3,LP(src3+12),LP(src4+12)); \
00277 OP(LP(dst+12),rnd_PACK(a0,a1,a2,a3)); \
00278 src1+=src_stride1;\
00279 src2+=src_stride2;\
00280 src3+=src_stride3;\
00281 src4+=src_stride4;\
00282 dst+=dst_stride;\
00283 } while(--h); \
00284 } \
00285 \
00286 static inline void OPNAME ## _no_rnd_pixels16_l4_aligned(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
00287 do { \
00288 uint32_t a0,a1,a2,a3; \
00289 UNPACK(a0,a1,LP(src1),LP(src2)); \
00290 UNPACK(a2,a3,LP(src3),LP(src4)); \
00291 OP(LP(dst),no_rnd_PACK(a0,a1,a2,a3)); \
00292 UNPACK(a0,a1,LP(src1+4),LP(src2+4)); \
00293 UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \
00294 OP(LP(dst+4),no_rnd_PACK(a0,a1,a2,a3)); \
00295 UNPACK(a0,a1,LP(src1+8),LP(src2+8)); \
00296 UNPACK(a2,a3,LP(src3+8),LP(src4+8)); \
00297 OP(LP(dst+8),no_rnd_PACK(a0,a1,a2,a3)); \
00298 UNPACK(a0,a1,LP(src1+12),LP(src2+12)); \
00299 UNPACK(a2,a3,LP(src3+12),LP(src4+12)); \
00300 OP(LP(dst+12),no_rnd_PACK(a0,a1,a2,a3)); \
00301 src1+=src_stride1;\
00302 src2+=src_stride2;\
00303 src3+=src_stride3;\
00304 src4+=src_stride4;\
00305 dst+=dst_stride;\
00306 } while(--h); \
00307 } \
00308 \
00309 static inline void OPNAME ## _pixels16_l4_aligned0(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
00310 do { \
00311 uint32_t a0,a1,a2,a3; \
00312 UNPACK(a0,a1,LD32(src1),LP(src2)); \
00313 UNPACK(a2,a3,LP(src3),LP(src4)); \
00314 OP(LP(dst),rnd_PACK(a0,a1,a2,a3)); \
00315 UNPACK(a0,a1,LD32(src1+4),LP(src2+4)); \
00316 UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \
00317 OP(LP(dst+8),rnd_PACK(a0,a1,a2,a3)); \
00318 UNPACK(a0,a1,LD32(src1+8),LP(src2+8)); \
00319 UNPACK(a2,a3,LP(src3+8),LP(src4+8)); \
00320 OP(LP(dst+8),rnd_PACK(a0,a1,a2,a3)); \
00321 UNPACK(a0,a1,LD32(src1+12),LP(src2+12)); \
00322 UNPACK(a2,a3,LP(src3+12),LP(src4+12)); \
00323 OP(LP(dst+12),rnd_PACK(a0,a1,a2,a3)); \
00324 src1+=src_stride1;\
00325 src2+=src_stride2;\
00326 src3+=src_stride3;\
00327 src4+=src_stride4;\
00328 dst+=dst_stride;\
00329 } while(--h); \
00330 } \
00331 \
00332 static inline void OPNAME ## _no_rnd_pixels16_l4_aligned0(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
00333 do { \
00334 uint32_t a0,a1,a2,a3; \
00335 UNPACK(a0,a1,LD32(src1),LP(src2)); \
00336 UNPACK(a2,a3,LP(src3),LP(src4)); \
00337 OP(LP(dst),no_rnd_PACK(a0,a1,a2,a3)); \
00338 UNPACK(a0,a1,LD32(src1+4),LP(src2+4)); \
00339 UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \
00340 OP(LP(dst+4),no_rnd_PACK(a0,a1,a2,a3)); \
00341 UNPACK(a0,a1,LD32(src1+8),LP(src2+8)); \
00342 UNPACK(a2,a3,LP(src3+8),LP(src4+8)); \
00343 OP(LP(dst+8),no_rnd_PACK(a0,a1,a2,a3)); \
00344 UNPACK(a0,a1,LD32(src1+12),LP(src2+12)); \
00345 UNPACK(a2,a3,LP(src3+12),LP(src4+12)); \
00346 OP(LP(dst+12),no_rnd_PACK(a0,a1,a2,a3)); \
00347 src1+=src_stride1;\
00348 src2+=src_stride2;\
00349 src3+=src_stride3;\
00350 src4+=src_stride4;\
00351 dst+=dst_stride;\
00352 } while(--h); \
00353 } \
00354 \
00355
00356 #define op_avg(a, b) a = rnd_avg32(a,b)
00357 #define op_put(a, b) a = b
00358
00359 PIXOP2(avg, op_avg)
00360 PIXOP2(put, op_put)
00361 #undef op_avg
00362 #undef op_put
00363
00364 #define avg2(a,b) ((a+b+1)>>1)
00365 #define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
00366
00367
00368 static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder)
00369 {
00370 const int A=(16-x16)*(16-y16);
00371 const int B=( x16)*(16-y16);
00372 const int C=(16-x16)*( y16);
00373 const int D=( x16)*( y16);
00374
00375 do {
00376 int t0,t1,t2,t3;
00377 uint8_t *s0 = src;
00378 uint8_t *s1 = src+stride;
00379 t0 = *s0++; t2 = *s1++;
00380 t1 = *s0++; t3 = *s1++;
00381 dst[0]= (A*t0 + B*t1 + C*t2 + D*t3 + rounder)>>8;
00382 t0 = *s0++; t2 = *s1++;
00383 dst[1]= (A*t1 + B*t0 + C*t3 + D*t2 + rounder)>>8;
00384 t1 = *s0++; t3 = *s1++;
00385 dst[2]= (A*t0 + B*t1 + C*t2 + D*t3 + rounder)>>8;
00386 t0 = *s0++; t2 = *s1++;
00387 dst[3]= (A*t1 + B*t0 + C*t3 + D*t2 + rounder)>>8;
00388 t1 = *s0++; t3 = *s1++;
00389 dst[4]= (A*t0 + B*t1 + C*t2 + D*t3 + rounder)>>8;
00390 t0 = *s0++; t2 = *s1++;
00391 dst[5]= (A*t1 + B*t0 + C*t3 + D*t2 + rounder)>>8;
00392 t1 = *s0++; t3 = *s1++;
00393 dst[6]= (A*t0 + B*t1 + C*t2 + D*t3 + rounder)>>8;
00394 t0 = *s0++; t2 = *s1++;
00395 dst[7]= (A*t1 + B*t0 + C*t3 + D*t2 + rounder)>>8;
00396 dst+= stride;
00397 src+= stride;
00398 }while(--h);
00399 }
00400
00401 static void gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
00402 int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
00403 {
00404 int y, vx, vy;
00405 const int s= 1<<shift;
00406
00407 width--;
00408 height--;
00409
00410 for(y=0; y<h; y++){
00411 int x;
00412
00413 vx= ox;
00414 vy= oy;
00415 for(x=0; x<8; x++){
00416 int src_x, src_y, frac_x, frac_y, index;
00417
00418 src_x= vx>>16;
00419 src_y= vy>>16;
00420 frac_x= src_x&(s-1);
00421 frac_y= src_y&(s-1);
00422 src_x>>=shift;
00423 src_y>>=shift;
00424
00425 if((unsigned)src_x < width){
00426 if((unsigned)src_y < height){
00427 index= src_x + src_y*stride;
00428 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
00429 + src[index +1]* frac_x )*(s-frac_y)
00430 + ( src[index+stride ]*(s-frac_x)
00431 + src[index+stride+1]* frac_x )* frac_y
00432 + r)>>(shift*2);
00433 }else{
00434 index= src_x + clip(src_y, 0, height)*stride;
00435 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
00436 + src[index +1]* frac_x )*s
00437 + r)>>(shift*2);
00438 }
00439 }else{
00440 if((unsigned)src_y < height){
00441 index= clip(src_x, 0, width) + src_y*stride;
00442 dst[y*stride + x]= ( ( src[index ]*(s-frac_y)
00443 + src[index+stride ]* frac_y )*s
00444 + r)>>(shift*2);
00445 }else{
00446 index= clip(src_x, 0, width) + clip(src_y, 0, height)*stride;
00447 dst[y*stride + x]= src[index ];
00448 }
00449 }
00450
00451 vx+= dxx;
00452 vy+= dyx;
00453 }
00454 ox += dxy;
00455 oy += dyy;
00456 }
00457 }
00458 #define H264_CHROMA_MC(OPNAME, OP)\
00459 static void OPNAME ## h264_chroma_mc2_c(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y){\
00460 const int A=(8-x)*(8-y);\
00461 const int B=( x)*(8-y);\
00462 const int C=(8-x)*( y);\
00463 const int D=( x)*( y);\
00464 \
00465 assert(x<8 && y<8 && x>=0 && y>=0);\
00466 \
00467 do {\
00468 int t0,t1,t2,t3; \
00469 uint8_t *s0 = src; \
00470 uint8_t *s1 = src+stride; \
00471 t0 = *s0++; t2 = *s1++; \
00472 t1 = *s0++; t3 = *s1++; \
00473 OP(dst[0], (A*t0 + B*t1 + C*t2 + D*t3));\
00474 t0 = *s0++; t2 = *s1++; \
00475 OP(dst[1], (A*t1 + B*t0 + C*t3 + D*t2));\
00476 dst+= stride;\
00477 src+= stride;\
00478 }while(--h);\
00479 }\
00480 \
00481 static void OPNAME ## h264_chroma_mc4_c(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y){\
00482 const int A=(8-x)*(8-y);\
00483 const int B=( x)*(8-y);\
00484 const int C=(8-x)*( y);\
00485 const int D=( x)*( y);\
00486 \
00487 assert(x<8 && y<8 && x>=0 && y>=0);\
00488 \
00489 do {\
00490 int t0,t1,t2,t3; \
00491 uint8_t *s0 = src; \
00492 uint8_t *s1 = src+stride; \
00493 t0 = *s0++; t2 = *s1++; \
00494 t1 = *s0++; t3 = *s1++; \
00495 OP(dst[0], (A*t0 + B*t1 + C*t2 + D*t3));\
00496 t0 = *s0++; t2 = *s1++; \
00497 OP(dst[1], (A*t1 + B*t0 + C*t3 + D*t2));\
00498 t1 = *s0++; t3 = *s1++; \
00499 OP(dst[2], (A*t0 + B*t1 + C*t2 + D*t3));\
00500 t0 = *s0++; t2 = *s1++; \
00501 OP(dst[3], (A*t1 + B*t0 + C*t3 + D*t2));\
00502 dst+= stride;\
00503 src+= stride;\
00504 }while(--h);\
00505 }\
00506 \
00507 static void OPNAME ## h264_chroma_mc8_c(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y){\
00508 const int A=(8-x)*(8-y);\
00509 const int B=( x)*(8-y);\
00510 const int C=(8-x)*( y);\
00511 const int D=( x)*( y);\
00512 \
00513 assert(x<8 && y<8 && x>=0 && y>=0);\
00514 \
00515 do {\
00516 int t0,t1,t2,t3; \
00517 uint8_t *s0 = src; \
00518 uint8_t *s1 = src+stride; \
00519 t0 = *s0++; t2 = *s1++; \
00520 t1 = *s0++; t3 = *s1++; \
00521 OP(dst[0], (A*t0 + B*t1 + C*t2 + D*t3));\
00522 t0 = *s0++; t2 = *s1++; \
00523 OP(dst[1], (A*t1 + B*t0 + C*t3 + D*t2));\
00524 t1 = *s0++; t3 = *s1++; \
00525 OP(dst[2], (A*t0 + B*t1 + C*t2 + D*t3));\
00526 t0 = *s0++; t2 = *s1++; \
00527 OP(dst[3], (A*t1 + B*t0 + C*t3 + D*t2));\
00528 t1 = *s0++; t3 = *s1++; \
00529 OP(dst[4], (A*t0 + B*t1 + C*t2 + D*t3));\
00530 t0 = *s0++; t2 = *s1++; \
00531 OP(dst[5], (A*t1 + B*t0 + C*t3 + D*t2));\
00532 t1 = *s0++; t3 = *s1++; \
00533 OP(dst[6], (A*t0 + B*t1 + C*t2 + D*t3));\
00534 t0 = *s0++; t2 = *s1++; \
00535 OP(dst[7], (A*t1 + B*t0 + C*t3 + D*t2));\
00536 dst+= stride;\
00537 src+= stride;\
00538 }while(--h);\
00539 }
00540
00541 #define op_avg(a, b) a = (((a)+(((b) + 32)>>6)+1)>>1)
00542 #define op_put(a, b) a = (((b) + 32)>>6)
00543
00544 H264_CHROMA_MC(put_ , op_put)
00545 H264_CHROMA_MC(avg_ , op_avg)
00546 #undef op_avg
00547 #undef op_put
00548
00549
00550 static inline void copy_block4(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
00551 {
00552 int i;
00553 for(i=0; i<h; i++)
00554 {
00555 ST32(dst , LD32(src ));
00556 dst+=dstStride;
00557 src+=srcStride;
00558 }
00559 }
00560
00561 static inline void copy_block8(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
00562 {
00563 int i;
00564 for(i=0; i<h; i++)
00565 {
00566 ST32(dst , LD32(src ));
00567 ST32(dst+4 , LD32(src+4 ));
00568 dst+=dstStride;
00569 src+=srcStride;
00570 }
00571 }
00572
00573 static inline void copy_block16(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
00574 {
00575 int i;
00576 for(i=0; i<h; i++)
00577 {
00578 ST32(dst , LD32(src ));
00579 ST32(dst+4 , LD32(src+4 ));
00580 ST32(dst+8 , LD32(src+8 ));
00581 ST32(dst+12, LD32(src+12));
00582 dst+=dstStride;
00583 src+=srcStride;
00584 }
00585 }
00586
00587 static inline void copy_block17(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
00588 {
00589 int i;
00590 for(i=0; i<h; i++)
00591 {
00592 ST32(dst , LD32(src ));
00593 ST32(dst+4 , LD32(src+4 ));
00594 ST32(dst+8 , LD32(src+8 ));
00595 ST32(dst+12, LD32(src+12));
00596 dst[16]= src[16];
00597 dst+=dstStride;
00598 src+=srcStride;
00599 }
00600 }
00601
00602 static inline void copy_block9(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
00603 {
00604 int i;
00605 for(i=0; i<h; i++)
00606 {
00607 ST32(dst , LD32(src ));
00608 ST32(dst+4 , LD32(src+4 ));
00609 dst[8]= src[8];
00610 dst+=dstStride;
00611 src+=srcStride;
00612 }
00613 }
00614
00615
00616 #define QPEL_MC(r, OPNAME, RND, OP) \
00617 static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
00618 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
00619 do {\
00620 uint8_t *s = src; \
00621 int src0,src1,src2,src3,src4,src5,src6,src7,src8;\
00622 src0= *s++;\
00623 src1= *s++;\
00624 src2= *s++;\
00625 src3= *s++;\
00626 src4= *s++;\
00627 OP(dst[0], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\
00628 src5= *s++;\
00629 OP(dst[1], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\
00630 src6= *s++;\
00631 OP(dst[2], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\
00632 src7= *s++;\
00633 OP(dst[3], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\
00634 src8= *s++;\
00635 OP(dst[4], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\
00636 OP(dst[5], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\
00637 OP(dst[6], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\
00638 OP(dst[7], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
00639 dst+=dstStride;\
00640 src+=srcStride;\
00641 }while(--h);\
00642 }\
00643 \
00644 static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
00645 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
00646 int w=8;\
00647 do{\
00648 uint8_t *s = src, *d=dst;\
00649 int src0,src1,src2,src3,src4,src5,src6,src7,src8;\
00650 src0 = *s; s+=srcStride; \
00651 src1 = *s; s+=srcStride; \
00652 src2 = *s; s+=srcStride; \
00653 src3 = *s; s+=srcStride; \
00654 src4 = *s; s+=srcStride; \
00655 OP(*d, (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));d+=dstStride;\
00656 src5 = *s; s+=srcStride; \
00657 OP(*d, (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));d+=dstStride;\
00658 src6 = *s; s+=srcStride; \
00659 OP(*d, (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));d+=dstStride;\
00660 src7 = *s; s+=srcStride; \
00661 OP(*d, (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));d+=dstStride;\
00662 src8 = *s; \
00663 OP(*d, (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));d+=dstStride;\
00664 OP(*d, (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));d+=dstStride;\
00665 OP(*d, (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));d+=dstStride;\
00666 OP(*d, (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
00667 dst++;\
00668 src++;\
00669 }while(--w);\
00670 }\
00671 \
00672 static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
00673 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
00674 do {\
00675 uint8_t *s = src;\
00676 int src0,src1,src2,src3,src4,src5,src6,src7,src8;\
00677 int src9,src10,src11,src12,src13,src14,src15,src16;\
00678 src0= *s++;\
00679 src1= *s++;\
00680 src2= *s++;\
00681 src3= *s++;\
00682 src4= *s++;\
00683 OP(dst[ 0], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\
00684 src5= *s++;\
00685 OP(dst[ 1], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\
00686 src6= *s++;\
00687 OP(dst[ 2], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\
00688 src7= *s++;\
00689 OP(dst[ 3], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\
00690 src8= *s++;\
00691 OP(dst[ 4], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\
00692 src9= *s++;\
00693 OP(dst[ 5], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\
00694 src10= *s++;\
00695 OP(dst[ 6], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\
00696 src11= *s++;\
00697 OP(dst[ 7], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\
00698 src12= *s++;\
00699 OP(dst[ 8], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\
00700 src13= *s++;\
00701 OP(dst[ 9], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\
00702 src14= *s++;\
00703 OP(dst[10], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\
00704 src15= *s++;\
00705 OP(dst[11], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\
00706 src16= *s++;\
00707 OP(dst[12], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\
00708 OP(dst[13], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\
00709 OP(dst[14], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\
00710 OP(dst[15], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\
00711 dst+=dstStride;\
00712 src+=srcStride;\
00713 }while(--h);\
00714 }\
00715 \
00716 static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
00717 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
00718 int w=16;\
00719 do {\
00720 uint8_t *s = src, *d=dst;\
00721 int src0,src1,src2,src3,src4,src5,src6,src7,src8;\
00722 int src9,src10,src11,src12,src13,src14,src15,src16;\
00723 src0 = *s; s+=srcStride; \
00724 src1 = *s; s+=srcStride; \
00725 src2 = *s; s+=srcStride; \
00726 src3 = *s; s+=srcStride; \
00727 src4 = *s; s+=srcStride; \
00728 OP(*d, (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));d+=dstStride;\
00729 src5 = *s; s+=srcStride; \
00730 OP(*d, (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));d+=dstStride;\
00731 src6 = *s; s+=srcStride; \
00732 OP(*d, (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));d+=dstStride;\
00733 src7 = *s; s+=srcStride; \
00734 OP(*d, (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));d+=dstStride;\
00735 src8 = *s; s+=srcStride; \
00736 OP(*d, (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));d+=dstStride;\
00737 src9 = *s; s+=srcStride; \
00738 OP(*d, (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));d+=dstStride;\
00739 src10 = *s; s+=srcStride; \
00740 OP(*d, (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));d+=dstStride;\
00741 src11 = *s; s+=srcStride; \
00742 OP(*d, (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));d+=dstStride;\
00743 src12 = *s; s+=srcStride; \
00744 OP(*d, (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));d+=dstStride;\
00745 src13 = *s; s+=srcStride; \
00746 OP(*d, (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));d+=dstStride;\
00747 src14 = *s; s+=srcStride; \
00748 OP(*d, (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));d+=dstStride;\
00749 src15 = *s; s+=srcStride; \
00750 OP(*d, (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));d+=dstStride;\
00751 src16 = *s; \
00752 OP(*d, (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));d+=dstStride;\
00753 OP(*d, (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));d+=dstStride;\
00754 OP(*d, (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));d+=dstStride;\
00755 OP(*d, (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\
00756 dst++;\
00757 src++;\
00758 }while(--w);\
00759 }\
00760 \
00761 static void OPNAME ## qpel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){\
00762 OPNAME ## pixels8_c(dst, src, stride, 8);\
00763 }\
00764 \
00765 static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
00766 uint8_t half[64];\
00767 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
00768 OPNAME ## pixels8_l2_aligned2(dst, src, half, stride, stride, 8, 8);\
00769 }\
00770 \
00771 static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
00772 OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\
00773 }\
00774 \
00775 static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
00776 uint8_t half[64];\
00777 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
00778 OPNAME ## pixels8_l2_aligned2(dst, src+1, half, stride, stride, 8, 8);\
00779 }\
00780 \
00781 static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
00782 uint8_t full[16*9];\
00783 uint8_t half[64];\
00784 copy_block9(full, src, 16, stride, 9);\
00785 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
00786 OPNAME ## pixels8_l2_aligned(dst, full, half, stride, 16, 8, 8);\
00787 }\
00788 \
00789 static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
00790 uint8_t full[16*9];\
00791 copy_block9(full, src, 16, stride, 9);\
00792 OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\
00793 }\
00794 \
00795 static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
00796 uint8_t full[16*9];\
00797 uint8_t half[64];\
00798 copy_block9(full, src, 16, stride, 9);\
00799 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
00800 OPNAME ## pixels8_l2_aligned(dst, full+16, half, stride, 16, 8, 8);\
00801 }\
00802 static void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
00803 uint8_t full[16*9];\
00804 uint8_t halfH[72];\
00805 uint8_t halfV[64];\
00806 uint8_t halfHV[64];\
00807 copy_block9(full, src, 16, stride, 9);\
00808 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
00809 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
00810 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
00811 OPNAME ## pixels8_l4_aligned(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
00812 }\
00813 static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
00814 uint8_t full[16*9];\
00815 uint8_t halfH[72];\
00816 uint8_t halfHV[64];\
00817 copy_block9(full, src, 16, stride, 9);\
00818 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
00819 put ## RND ## pixels8_l2_aligned(halfH, halfH, full, 8, 8, 16, 9);\
00820 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
00821 OPNAME ## pixels8_l2_aligned(dst, halfH, halfHV, stride, 8, 8, 8);\
00822 }\
00823 static void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
00824 uint8_t full[16*9];\
00825 uint8_t halfH[72];\
00826 uint8_t halfV[64];\
00827 uint8_t halfHV[64];\
00828 copy_block9(full, src, 16, stride, 9);\
00829 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
00830 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
00831 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
00832 OPNAME ## pixels8_l4_aligned0(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
00833 }\
00834 static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
00835 uint8_t full[16*9];\
00836 uint8_t halfH[72];\
00837 uint8_t halfHV[64];\
00838 copy_block9(full, src, 16, stride, 9);\
00839 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
00840 put ## RND ## pixels8_l2_aligned1(halfH, halfH, full+1, 8, 8, 16, 9);\
00841 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
00842 OPNAME ## pixels8_l2_aligned(dst, halfH, halfHV, stride, 8, 8, 8);\
00843 }\
00844 static void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
00845 uint8_t full[16*9];\
00846 uint8_t halfH[72];\
00847 uint8_t halfV[64];\
00848 uint8_t halfHV[64];\
00849 copy_block9(full, src, 16, stride, 9);\
00850 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
00851 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
00852 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
00853 OPNAME ## pixels8_l4_aligned(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
00854 }\
00855 static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
00856 uint8_t full[16*9];\
00857 uint8_t halfH[72];\
00858 uint8_t halfHV[64];\
00859 copy_block9(full, src, 16, stride, 9);\
00860 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
00861 put ## RND ## pixels8_l2_aligned(halfH, halfH, full, 8, 8, 16, 9);\
00862 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
00863 OPNAME ## pixels8_l2_aligned(dst, halfH+8, halfHV, stride, 8, 8, 8);\
00864 }\
00865 static void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
00866 uint8_t full[16*9];\
00867 uint8_t halfH[72];\
00868 uint8_t halfV[64];\
00869 uint8_t halfHV[64];\
00870 copy_block9(full, src, 16, stride, 9);\
00871 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full , 8, 16, 9);\
00872 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
00873 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
00874 OPNAME ## pixels8_l4_aligned0(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
00875 }\
00876 static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
00877 uint8_t full[16*9];\
00878 uint8_t halfH[72];\
00879 uint8_t halfHV[64];\
00880 copy_block9(full, src, 16, stride, 9);\
00881 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
00882 put ## RND ## pixels8_l2_aligned1(halfH, halfH, full+1, 8, 8, 16, 9);\
00883 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
00884 OPNAME ## pixels8_l2_aligned(dst, halfH+8, halfHV, stride, 8, 8, 8);\
00885 }\
00886 static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
00887 uint8_t halfH[72];\
00888 uint8_t halfHV[64];\
00889 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
00890 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
00891 OPNAME ## pixels8_l2_aligned(dst, halfH, halfHV, stride, 8, 8, 8);\
00892 }\
00893 static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
00894 uint8_t halfH[72];\
00895 uint8_t halfHV[64];\
00896 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
00897 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
00898 OPNAME ## pixels8_l2_aligned(dst, halfH+8, halfHV, stride, 8, 8, 8);\
00899 }\
00900 static void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
00901 uint8_t full[16*9];\
00902 uint8_t halfH[72];\
00903 uint8_t halfV[64];\
00904 uint8_t halfHV[64];\
00905 copy_block9(full, src, 16, stride, 9);\
00906 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
00907 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
00908 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
00909 OPNAME ## pixels8_l2_aligned(dst, halfV, halfHV, stride, 8, 8, 8);\
00910 }\
00911 static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
00912 uint8_t full[16*9];\
00913 uint8_t halfH[72];\
00914 copy_block9(full, src, 16, stride, 9);\
00915 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
00916 put ## RND ## pixels8_l2_aligned(halfH, halfH, full, 8, 8, 16, 9);\
00917 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
00918 }\
00919 static void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
00920 uint8_t full[16*9];\
00921 uint8_t halfH[72];\
00922 uint8_t halfV[64];\
00923 uint8_t halfHV[64];\
00924 copy_block9(full, src, 16, stride, 9);\
00925 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
00926 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
00927 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
00928 OPNAME ## pixels8_l2_aligned(dst, halfV, halfHV, stride, 8, 8, 8);\
00929 }\
00930 static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
00931 uint8_t full[16*9];\
00932 uint8_t halfH[72];\
00933 copy_block9(full, src, 16, stride, 9);\
00934 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
00935 put ## RND ## pixels8_l2_aligned1(halfH, halfH, full+1, 8, 8, 16, 9);\
00936 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
00937 }\
00938 static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
00939 uint8_t halfH[72];\
00940 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
00941 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
00942 }\
00943 static void OPNAME ## qpel16_mc00_c (uint8_t *dst, uint8_t *src, int stride){\
00944 OPNAME ## pixels16_c(dst, src, stride, 16);\
00945 }\
00946 \
00947 static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
00948 uint8_t half[256];\
00949 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
00950 OPNAME ## pixels16_l2_aligned2(dst, src, half, stride, stride, 16, 16);\
00951 }\
00952 \
00953 static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
00954 OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\
00955 }\
00956 \
00957 static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
00958 uint8_t half[256];\
00959 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
00960 OPNAME ## pixels16_l2_aligned2(dst, src+1, half, stride, stride, 16, 16);\
00961 }\
00962 \
00963 static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
00964 uint8_t full[24*17];\
00965 uint8_t half[256];\
00966 copy_block17(full, src, 24, stride, 17);\
00967 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
00968 OPNAME ## pixels16_l2_aligned(dst, full, half, stride, 24, 16, 16);\
00969 }\
00970 \
00971 static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
00972 uint8_t full[24*17];\
00973 copy_block17(full, src, 24, stride, 17);\
00974 OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\
00975 }\
00976 \
00977 static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
00978 uint8_t full[24*17];\
00979 uint8_t half[256];\
00980 copy_block17(full, src, 24, stride, 17);\
00981 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
00982 OPNAME ## pixels16_l2_aligned(dst, full+24, half, stride, 24, 16, 16);\
00983 }\
00984 static void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
00985 uint8_t full[24*17];\
00986 uint8_t halfH[272];\
00987 uint8_t halfV[256];\
00988 uint8_t halfHV[256];\
00989 copy_block17(full, src, 24, stride, 17);\
00990 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
00991 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
00992 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
00993 OPNAME ## pixels16_l4_aligned(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
00994 }\
00995 static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
00996 uint8_t full[24*17];\
00997 uint8_t halfH[272];\
00998 uint8_t halfHV[256];\
00999 copy_block17(full, src, 24, stride, 17);\
01000 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01001 put ## RND ## pixels16_l2_aligned(halfH, halfH, full, 16, 16, 24, 17);\
01002 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01003 OPNAME ## pixels16_l2_aligned(dst, halfH, halfHV, stride, 16, 16, 16);\
01004 }\
01005 static void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
01006 uint8_t full[24*17];\
01007 uint8_t halfH[272];\
01008 uint8_t halfV[256];\
01009 uint8_t halfHV[256];\
01010 copy_block17(full, src, 24, stride, 17);\
01011 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01012 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
01013 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01014 OPNAME ## pixels16_l4_aligned0(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
01015 }\
01016 static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
01017 uint8_t full[24*17];\
01018 uint8_t halfH[272];\
01019 uint8_t halfHV[256];\
01020 copy_block17(full, src, 24, stride, 17);\
01021 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01022 put ## RND ## pixels16_l2_aligned1(halfH, halfH, full+1, 16, 16, 24, 17);\
01023 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01024 OPNAME ## pixels16_l2_aligned(dst, halfH, halfHV, stride, 16, 16, 16);\
01025 }\
01026 static void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
01027 uint8_t full[24*17];\
01028 uint8_t halfH[272];\
01029 uint8_t halfV[256];\
01030 uint8_t halfHV[256];\
01031 copy_block17(full, src, 24, stride, 17);\
01032 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01033 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
01034 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01035 OPNAME ## pixels16_l4_aligned(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
01036 }\
01037 static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
01038 uint8_t full[24*17];\
01039 uint8_t halfH[272];\
01040 uint8_t halfHV[256];\
01041 copy_block17(full, src, 24, stride, 17);\
01042 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01043 put ## RND ## pixels16_l2_aligned(halfH, halfH, full, 16, 16, 24, 17);\
01044 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01045 OPNAME ## pixels16_l2_aligned(dst, halfH+16, halfHV, stride, 16, 16, 16);\
01046 }\
01047 static void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
01048 uint8_t full[24*17];\
01049 uint8_t halfH[272];\
01050 uint8_t halfV[256];\
01051 uint8_t halfHV[256];\
01052 copy_block17(full, src, 24, stride, 17);\
01053 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full , 16, 24, 17);\
01054 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
01055 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01056 OPNAME ## pixels16_l4_aligned0(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
01057 }\
01058 static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
01059 uint8_t full[24*17];\
01060 uint8_t halfH[272];\
01061 uint8_t halfHV[256];\
01062 copy_block17(full, src, 24, stride, 17);\
01063 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01064 put ## RND ## pixels16_l2_aligned1(halfH, halfH, full+1, 16, 16, 24, 17);\
01065 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01066 OPNAME ## pixels16_l2_aligned(dst, halfH+16, halfHV, stride, 16, 16, 16);\
01067 }\
01068 static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
01069 uint8_t halfH[272];\
01070 uint8_t halfHV[256];\
01071 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
01072 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01073 OPNAME ## pixels16_l2_aligned(dst, halfH, halfHV, stride, 16, 16, 16);\
01074 }\
01075 static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
01076 uint8_t halfH[272];\
01077 uint8_t halfHV[256];\
01078 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
01079 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01080 OPNAME ## pixels16_l2_aligned(dst, halfH+16, halfHV, stride, 16, 16, 16);\
01081 }\
01082 static void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
01083 uint8_t full[24*17];\
01084 uint8_t halfH[272];\
01085 uint8_t halfV[256];\
01086 uint8_t halfHV[256];\
01087 copy_block17(full, src, 24, stride, 17);\
01088 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01089 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
01090 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01091 OPNAME ## pixels16_l2_aligned(dst, halfV, halfHV, stride, 16, 16, 16);\
01092 }\
01093 static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
01094 uint8_t full[24*17];\
01095 uint8_t halfH[272];\
01096 copy_block17(full, src, 24, stride, 17);\
01097 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01098 put ## RND ## pixels16_l2_aligned(halfH, halfH, full, 16, 16, 24, 17);\
01099 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
01100 }\
01101 static void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
01102 uint8_t full[24*17];\
01103 uint8_t halfH[272];\
01104 uint8_t halfV[256];\
01105 uint8_t halfHV[256];\
01106 copy_block17(full, src, 24, stride, 17);\
01107 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01108 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
01109 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01110 OPNAME ## pixels16_l2_aligned(dst, halfV, halfHV, stride, 16, 16, 16);\
01111 }\
01112 static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
01113 uint8_t full[24*17];\
01114 uint8_t halfH[272];\
01115 copy_block17(full, src, 24, stride, 17);\
01116 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01117 put ## RND ## pixels16_l2_aligned1(halfH, halfH, full+1, 16, 16, 24, 17);\
01118 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
01119 }\
01120 static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
01121 uint8_t halfH[272];\
01122 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
01123 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
01124 }
01125
01126 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
01127 #define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1)
01128 #define op_put(a, b) a = cm[((b) + 16)>>5]
01129 #define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5]
01130
01131 QPEL_MC(0, put_ , _ , op_put)
01132 QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd)
01133 QPEL_MC(0, avg_ , _ , op_avg)
01134
01135 #undef op_avg
01136 #undef op_avg_no_rnd
01137 #undef op_put
01138 #undef op_put_no_rnd
01139
01140 #if 1
01141 #define H264_LOWPASS(OPNAME, OP, OP2) \
01142 static inline void OPNAME ## h264_qpel_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride,int w,int h){\
01143 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
01144 do {\
01145 int srcB,srcA,src0,src1,src2,src3,src4,src5,src6;\
01146 uint8_t *s = src-2;\
01147 srcB = *s++;\
01148 srcA = *s++;\
01149 src0 = *s++;\
01150 src1 = *s++;\
01151 src2 = *s++;\
01152 src3 = *s++;\
01153 OP(dst[0], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
01154 src4 = *s++;\
01155 OP(dst[1], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
01156 src5 = *s++;\
01157 OP(dst[2], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
01158 src6 = *s++;\
01159 OP(dst[3], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
01160 if (w>4) { \
01161 int src7,src8,src9,src10; \
01162 src7 = *s++;\
01163 OP(dst[4], (src4+src5)*20 - (src3+src6)*5 + (src2+src7));\
01164 src8 = *s++;\
01165 OP(dst[5], (src5+src6)*20 - (src4+src7)*5 + (src3+src8));\
01166 src9 = *s++;\
01167 OP(dst[6], (src6+src7)*20 - (src5+src8)*5 + (src4+src9));\
01168 src10 = *s++;\
01169 OP(dst[7], (src7+src8)*20 - (src6+src9)*5 + (src5+src10));\
01170 if (w>8) { \
01171 int src11,src12,src13,src14,src15,src16,src17,src18; \
01172 src11 = *s++;\
01173 OP(dst[8] , (src8 +src9 )*20 - (src7 +src10)*5 + (src6 +src11));\
01174 src12 = *s++;\
01175 OP(dst[9] , (src9 +src10)*20 - (src8 +src11)*5 + (src7 +src12));\
01176 src13 = *s++;\
01177 OP(dst[10], (src10+src11)*20 - (src9 +src12)*5 + (src8 +src13));\
01178 src14 = *s++;\
01179 OP(dst[11], (src11+src12)*20 - (src10+src13)*5 + (src9 +src14));\
01180 src15 = *s++;\
01181 OP(dst[12], (src12+src13)*20 - (src11+src14)*5 + (src10+src15));\
01182 src16 = *s++;\
01183 OP(dst[13], (src13+src14)*20 - (src12+src15)*5 + (src11+src16));\
01184 src17 = *s++;\
01185 OP(dst[14], (src14+src15)*20 - (src13+src16)*5 + (src12+src17));\
01186 src18 = *s++;\
01187 OP(dst[15], (src15+src16)*20 - (src14+src17)*5 + (src13+src18));\
01188 } \
01189 } \
01190 dst+=dstStride;\
01191 src+=srcStride;\
01192 }while(--h);\
01193 }\
01194 \
01195 static inline void OPNAME ## h264_qpel_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride,int w,int h){\
01196 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
01197 do{\
01198 int srcB,srcA,src0,src1,src2,src3,src4,src5,src6;\
01199 uint8_t *s = src-2*srcStride,*d=dst;\
01200 srcB = *s; s+=srcStride;\
01201 srcA = *s; s+=srcStride;\
01202 src0 = *s; s+=srcStride;\
01203 src1 = *s; s+=srcStride;\
01204 src2 = *s; s+=srcStride;\
01205 src3 = *s; s+=srcStride;\
01206 OP(*d, (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));d+=dstStride;\
01207 src4 = *s; s+=srcStride;\
01208 OP(*d, (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));d+=dstStride;\
01209 src5 = *s; s+=srcStride;\
01210 OP(*d, (src2+src3)*20 - (src1+src4)*5 + (src0+src5));d+=dstStride;\
01211 src6 = *s; s+=srcStride;\
01212 OP(*d, (src3+src4)*20 - (src2+src5)*5 + (src1+src6));d+=dstStride;\
01213 if (h>4) { \
01214 int src7,src8,src9,src10; \
01215 src7 = *s; s+=srcStride;\
01216 OP(*d, (src4+src5)*20 - (src3+src6)*5 + (src2+src7));d+=dstStride;\
01217 src8 = *s; s+=srcStride;\
01218 OP(*d, (src5+src6)*20 - (src4+src7)*5 + (src3+src8));d+=dstStride;\
01219 src9 = *s; s+=srcStride;\
01220 OP(*d, (src6+src7)*20 - (src5+src8)*5 + (src4+src9));d+=dstStride;\
01221 src10 = *s; s+=srcStride;\
01222 OP(*d, (src7+src8)*20 - (src6+src9)*5 + (src5+src10));d+=dstStride;\
01223 if (h>8) { \
01224 int src11,src12,src13,src14,src15,src16,src17,src18; \
01225 src11 = *s; s+=srcStride;\
01226 OP(*d , (src8 +src9 )*20 - (src7 +src10)*5 + (src6 +src11));d+=dstStride;\
01227 src12 = *s; s+=srcStride;\
01228 OP(*d , (src9 +src10)*20 - (src8 +src11)*5 + (src7 +src12));d+=dstStride;\
01229 src13 = *s; s+=srcStride;\
01230 OP(*d, (src10+src11)*20 - (src9 +src12)*5 + (src8 +src13));d+=dstStride;\
01231 src14 = *s; s+=srcStride;\
01232 OP(*d, (src11+src12)*20 - (src10+src13)*5 + (src9 +src14));d+=dstStride;\
01233 src15 = *s; s+=srcStride;\
01234 OP(*d, (src12+src13)*20 - (src11+src14)*5 + (src10+src15));d+=dstStride;\
01235 src16 = *s; s+=srcStride;\
01236 OP(*d, (src13+src14)*20 - (src12+src15)*5 + (src11+src16));d+=dstStride;\
01237 src17 = *s; s+=srcStride;\
01238 OP(*d, (src14+src15)*20 - (src13+src16)*5 + (src12+src17));d+=dstStride;\
01239 src18 = *s; s+=srcStride;\
01240 OP(*d, (src15+src16)*20 - (src14+src17)*5 + (src13+src18));d+=dstStride;\
01241 } \
01242 } \
01243 dst++;\
01244 src++;\
01245 }while(--w);\
01246 }\
01247 \
01248 static inline void OPNAME ## h264_qpel_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride,int w,int h){\
01249 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
01250 int i;\
01251 src -= 2*srcStride;\
01252 i= h+5; \
01253 do {\
01254 int srcB,srcA,src0,src1,src2,src3,src4,src5,src6;\
01255 uint8_t *s = src-2;\
01256 srcB = *s++;\
01257 srcA = *s++;\
01258 src0 = *s++;\
01259 src1 = *s++;\
01260 src2 = *s++;\
01261 src3 = *s++;\
01262 tmp[0] = ((src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
01263 src4 = *s++;\
01264 tmp[1] = ((src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
01265 src5 = *s++;\
01266 tmp[2] = ((src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
01267 src6 = *s++;\
01268 tmp[3] = ((src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
01269 if (w>4) { \
01270 int src7,src8,src9,src10; \
01271 src7 = *s++;\
01272 tmp[4] = ((src4+src5)*20 - (src3+src6)*5 + (src2+src7));\
01273 src8 = *s++;\
01274 tmp[5] = ((src5+src6)*20 - (src4+src7)*5 + (src3+src8));\
01275 src9 = *s++;\
01276 tmp[6] = ((src6+src7)*20 - (src5+src8)*5 + (src4+src9));\
01277 src10 = *s++;\
01278 tmp[7] = ((src7+src8)*20 - (src6+src9)*5 + (src5+src10));\
01279 if (w>8) { \
01280 int src11,src12,src13,src14,src15,src16,src17,src18; \
01281 src11 = *s++;\
01282 tmp[8] = ((src8 +src9 )*20 - (src7 +src10)*5 + (src6 +src11));\
01283 src12 = *s++;\
01284 tmp[9] = ((src9 +src10)*20 - (src8 +src11)*5 + (src7 +src12));\
01285 src13 = *s++;\
01286 tmp[10] = ((src10+src11)*20 - (src9 +src12)*5 + (src8 +src13));\
01287 src14 = *s++;\
01288 tmp[11] = ((src11+src12)*20 - (src10+src13)*5 + (src9 +src14));\
01289 src15 = *s++;\
01290 tmp[12] = ((src12+src13)*20 - (src11+src14)*5 + (src10+src15));\
01291 src16 = *s++;\
01292 tmp[13] = ((src13+src14)*20 - (src12+src15)*5 + (src11+src16));\
01293 src17 = *s++;\
01294 tmp[14] = ((src14+src15)*20 - (src13+src16)*5 + (src12+src17));\
01295 src18 = *s++;\
01296 tmp[15] = ((src15+src16)*20 - (src14+src17)*5 + (src13+src18));\
01297 } \
01298 } \
01299 tmp+=tmpStride;\
01300 src+=srcStride;\
01301 }while(--i);\
01302 tmp -= tmpStride*(h+5-2);\
01303 i = w; \
01304 do {\
01305 int tmpB,tmpA,tmp0,tmp1,tmp2,tmp3,tmp4,tmp5,tmp6;\
01306 int16_t *s = tmp-2*tmpStride; \
01307 uint8_t *d=dst;\
01308 tmpB = *s; s+=tmpStride;\
01309 tmpA = *s; s+=tmpStride;\
01310 tmp0 = *s; s+=tmpStride;\
01311 tmp1 = *s; s+=tmpStride;\
01312 tmp2 = *s; s+=tmpStride;\
01313 tmp3 = *s; s+=tmpStride;\
01314 OP2(*d, (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));d+=dstStride;\
01315 tmp4 = *s; s+=tmpStride;\
01316 OP2(*d, (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));d+=dstStride;\
01317 tmp5 = *s; s+=tmpStride;\
01318 OP2(*d, (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));d+=dstStride;\
01319 tmp6 = *s; s+=tmpStride;\
01320 OP2(*d, (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));d+=dstStride;\
01321 if (h>4) { \
01322 int tmp7,tmp8,tmp9,tmp10; \
01323 tmp7 = *s; s+=tmpStride;\
01324 OP2(*d, (tmp4+tmp5)*20 - (tmp3+tmp6)*5 + (tmp2+tmp7));d+=dstStride;\
01325 tmp8 = *s; s+=tmpStride;\
01326 OP2(*d, (tmp5+tmp6)*20 - (tmp4+tmp7)*5 + (tmp3+tmp8));d+=dstStride;\
01327 tmp9 = *s; s+=tmpStride;\
01328 OP2(*d, (tmp6+tmp7)*20 - (tmp5+tmp8)*5 + (tmp4+tmp9));d+=dstStride;\
01329 tmp10 = *s; s+=tmpStride;\
01330 OP2(*d, (tmp7+tmp8)*20 - (tmp6+tmp9)*5 + (tmp5+tmp10));d+=dstStride;\
01331 if (h>8) { \
01332 int tmp11,tmp12,tmp13,tmp14,tmp15,tmp16,tmp17,tmp18; \
01333 tmp11 = *s; s+=tmpStride;\
01334 OP2(*d , (tmp8 +tmp9 )*20 - (tmp7 +tmp10)*5 + (tmp6 +tmp11));d+=dstStride;\
01335 tmp12 = *s; s+=tmpStride;\
01336 OP2(*d , (tmp9 +tmp10)*20 - (tmp8 +tmp11)*5 + (tmp7 +tmp12));d+=dstStride;\
01337 tmp13 = *s; s+=tmpStride;\
01338 OP2(*d, (tmp10+tmp11)*20 - (tmp9 +tmp12)*5 + (tmp8 +tmp13));d+=dstStride;\
01339 tmp14 = *s; s+=tmpStride;\
01340 OP2(*d, (tmp11+tmp12)*20 - (tmp10+tmp13)*5 + (tmp9 +tmp14));d+=dstStride;\
01341 tmp15 = *s; s+=tmpStride;\
01342 OP2(*d, (tmp12+tmp13)*20 - (tmp11+tmp14)*5 + (tmp10+tmp15));d+=dstStride;\
01343 tmp16 = *s; s+=tmpStride;\
01344 OP2(*d, (tmp13+tmp14)*20 - (tmp12+tmp15)*5 + (tmp11+tmp16));d+=dstStride;\
01345 tmp17 = *s; s+=tmpStride;\
01346 OP2(*d, (tmp14+tmp15)*20 - (tmp13+tmp16)*5 + (tmp12+tmp17));d+=dstStride;\
01347 tmp18 = *s; s+=tmpStride;\
01348 OP2(*d, (tmp15+tmp16)*20 - (tmp14+tmp17)*5 + (tmp13+tmp18));d+=dstStride;\
01349 } \
01350 } \
01351 dst++;\
01352 tmp++;\
01353 }while(--i);\
01354 }\
01355 \
01356 static void OPNAME ## h264_qpel4_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
01357 OPNAME ## h264_qpel_h_lowpass(dst,src,dstStride,srcStride,4,4); \
01358 }\
01359 static void OPNAME ## h264_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
01360 OPNAME ## h264_qpel_h_lowpass(dst,src,dstStride,srcStride,8,8); \
01361 }\
01362 static void OPNAME ## h264_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
01363 OPNAME ## h264_qpel_h_lowpass(dst,src,dstStride,srcStride,16,16); \
01364 }\
01365 \
01366 static void OPNAME ## h264_qpel4_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
01367 OPNAME ## h264_qpel_v_lowpass(dst,src,dstStride,srcStride,4,4); \
01368 }\
01369 static void OPNAME ## h264_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
01370 OPNAME ## h264_qpel_v_lowpass(dst,src,dstStride,srcStride,8,8); \
01371 }\
01372 static void OPNAME ## h264_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
01373 OPNAME ## h264_qpel_v_lowpass(dst,src,dstStride,srcStride,16,16); \
01374 }\
01375 static void OPNAME ## h264_qpel4_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
01376 OPNAME ## h264_qpel_hv_lowpass(dst,tmp,src,dstStride,tmpStride,srcStride,4,4); \
01377 }\
01378 static void OPNAME ## h264_qpel8_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
01379 OPNAME ## h264_qpel_hv_lowpass(dst,tmp,src,dstStride,tmpStride,srcStride,8,8); \
01380 }\
01381 static void OPNAME ## h264_qpel16_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
01382 OPNAME ## h264_qpel_hv_lowpass(dst,tmp,src,dstStride,tmpStride,srcStride,16,16); \
01383 }\
01384
01385 #define H264_MC(OPNAME, SIZE) \
01386 static void OPNAME ## h264_qpel ## SIZE ## _mc00_c (uint8_t *dst, uint8_t *src, int stride){\
01387 OPNAME ## pixels ## SIZE ## _c(dst, src, stride, SIZE);\
01388 }\
01389 \
01390 static void OPNAME ## h264_qpel ## SIZE ## _mc10_c(uint8_t *dst, uint8_t *src, int stride){\
01391 uint8_t half[SIZE*SIZE];\
01392 put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\
01393 OPNAME ## pixels ## SIZE ## _l2_aligned2(dst, src, half, stride, stride, SIZE, SIZE);\
01394 }\
01395 \
01396 static void OPNAME ## h264_qpel ## SIZE ## _mc20_c(uint8_t *dst, uint8_t *src, int stride){\
01397 OPNAME ## h264_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride);\
01398 }\
01399 \
01400 static void OPNAME ## h264_qpel ## SIZE ## _mc30_c(uint8_t *dst, uint8_t *src, int stride){\
01401 uint8_t half[SIZE*SIZE];\
01402 put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\
01403 OPNAME ## pixels ## SIZE ## _l2_aligned2(dst, src+1, half, stride, stride, SIZE, SIZE);\
01404 }\
01405 \
01406 static void OPNAME ## h264_qpel ## SIZE ## _mc01_c(uint8_t *dst, uint8_t *src, int stride){\
01407 uint8_t full[SIZE*(SIZE+5)];\
01408 uint8_t * const full_mid= full + SIZE*2;\
01409 uint8_t half[SIZE*SIZE];\
01410 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
01411 put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\
01412 OPNAME ## pixels ## SIZE ## _l2_aligned(dst, full_mid, half, stride, SIZE, SIZE, SIZE);\
01413 }\
01414 \
01415 static void OPNAME ## h264_qpel ## SIZE ## _mc02_c(uint8_t *dst, uint8_t *src, int stride){\
01416 uint8_t full[SIZE*(SIZE+5)];\
01417 uint8_t * const full_mid= full + SIZE*2;\
01418 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
01419 OPNAME ## h264_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE);\
01420 }\
01421 \
01422 static void OPNAME ## h264_qpel ## SIZE ## _mc03_c(uint8_t *dst, uint8_t *src, int stride){\
01423 uint8_t full[SIZE*(SIZE+5)];\
01424 uint8_t * const full_mid= full + SIZE*2;\
01425 uint8_t half[SIZE*SIZE];\
01426 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
01427 put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\
01428 OPNAME ## pixels ## SIZE ## _l2_aligned(dst, full_mid+SIZE, half, stride, SIZE, SIZE, SIZE);\
01429 }\
01430 \
01431 static void OPNAME ## h264_qpel ## SIZE ## _mc11_c(uint8_t *dst, uint8_t *src, int stride){\
01432 uint8_t full[SIZE*(SIZE+5)];\
01433 uint8_t * const full_mid= full + SIZE*2;\
01434 uint8_t halfH[SIZE*SIZE];\
01435 uint8_t halfV[SIZE*SIZE];\
01436 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
01437 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
01438 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
01439 OPNAME ## pixels ## SIZE ## _l2_aligned(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
01440 }\
01441 \
01442 static void OPNAME ## h264_qpel ## SIZE ## _mc31_c(uint8_t *dst, uint8_t *src, int stride){\
01443 uint8_t full[SIZE*(SIZE+5)];\
01444 uint8_t * const full_mid= full + SIZE*2;\
01445 uint8_t halfH[SIZE*SIZE];\
01446 uint8_t halfV[SIZE*SIZE];\
01447 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
01448 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
01449 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
01450 OPNAME ## pixels ## SIZE ## _l2_aligned(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
01451 }\
01452 \
01453 static void OPNAME ## h264_qpel ## SIZE ## _mc13_c(uint8_t *dst, uint8_t *src, int stride){\
01454 uint8_t full[SIZE*(SIZE+5)];\
01455 uint8_t * const full_mid= full + SIZE*2;\
01456 uint8_t halfH[SIZE*SIZE];\
01457 uint8_t halfV[SIZE*SIZE];\
01458 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
01459 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
01460 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
01461 OPNAME ## pixels ## SIZE ## _l2_aligned(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
01462 }\
01463 \
01464 static void OPNAME ## h264_qpel ## SIZE ## _mc33_c(uint8_t *dst, uint8_t *src, int stride){\
01465 uint8_t full[SIZE*(SIZE+5)];\
01466 uint8_t * const full_mid= full + SIZE*2;\
01467 uint8_t halfH[SIZE*SIZE];\
01468 uint8_t halfV[SIZE*SIZE];\
01469 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
01470 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
01471 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
01472 OPNAME ## pixels ## SIZE ## _l2_aligned(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
01473 }\
01474 \
01475 static void OPNAME ## h264_qpel ## SIZE ## _mc22_c(uint8_t *dst, uint8_t *src, int stride){\
01476 int16_t tmp[SIZE*(SIZE+5)];\
01477 OPNAME ## h264_qpel ## SIZE ## _hv_lowpass(dst, tmp, src, stride, SIZE, stride);\
01478 }\
01479 \
01480 static void OPNAME ## h264_qpel ## SIZE ## _mc21_c(uint8_t *dst, uint8_t *src, int stride){\
01481 int16_t tmp[SIZE*(SIZE+5)];\
01482 uint8_t halfH[SIZE*SIZE];\
01483 uint8_t halfHV[SIZE*SIZE];\
01484 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
01485 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
01486 OPNAME ## pixels ## SIZE ## _l2_aligned(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\
01487 }\
01488 \
01489 static void OPNAME ## h264_qpel ## SIZE ## _mc23_c(uint8_t *dst, uint8_t *src, int stride){\
01490 int16_t tmp[SIZE*(SIZE+5)];\
01491 uint8_t halfH[SIZE*SIZE];\
01492 uint8_t halfHV[SIZE*SIZE];\
01493 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
01494 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
01495 OPNAME ## pixels ## SIZE ## _l2_aligned(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\
01496 }\
01497 \
01498 static void OPNAME ## h264_qpel ## SIZE ## _mc12_c(uint8_t *dst, uint8_t *src, int stride){\
01499 uint8_t full[SIZE*(SIZE+5)];\
01500 uint8_t * const full_mid= full + SIZE*2;\
01501 int16_t tmp[SIZE*(SIZE+5)];\
01502 uint8_t halfV[SIZE*SIZE];\
01503 uint8_t halfHV[SIZE*SIZE];\
01504 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
01505 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
01506 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
01507 OPNAME ## pixels ## SIZE ## _l2_aligned(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\
01508 }\
01509 \
01510 static void OPNAME ## h264_qpel ## SIZE ## _mc32_c(uint8_t *dst, uint8_t *src, int stride){\
01511 uint8_t full[SIZE*(SIZE+5)];\
01512 uint8_t * const full_mid= full + SIZE*2;\
01513 int16_t tmp[SIZE*(SIZE+5)];\
01514 uint8_t halfV[SIZE*SIZE];\
01515 uint8_t halfHV[SIZE*SIZE];\
01516 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
01517 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
01518 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
01519 OPNAME ## pixels ## SIZE ## _l2_aligned(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\
01520 }\
01521
01522 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
01523
01524 #define op_put(a, b) a = cm[((b) + 16)>>5]
01525 #define op2_avg(a, b) a = (((a)+cm[((b) + 512)>>10]+1)>>1)
01526 #define op2_put(a, b) a = cm[((b) + 512)>>10]
01527
01528 H264_LOWPASS(put_ , op_put, op2_put)
01529 H264_LOWPASS(avg_ , op_avg, op2_avg)
01530 H264_MC(put_, 4)
01531 H264_MC(put_, 8)
01532 H264_MC(put_, 16)
01533 H264_MC(avg_, 4)
01534 H264_MC(avg_, 8)
01535 H264_MC(avg_, 16)
01536
01537 #undef op_avg
01538 #undef op_put
01539 #undef op2_avg
01540 #undef op2_put
01541 #endif
01542
01543 static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){
01544 uint8_t *cm = cropTbl + MAX_NEG_CROP;
01545
01546 do{
01547 int src_1,src0,src1,src2,src3,src4,src5,src6,src7,src8,src9;
01548 uint8_t *s = src;
01549 src_1 = s[-1];
01550 src0 = *s++;
01551 src1 = *s++;
01552 src2 = *s++;
01553 dst[0]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4];
01554 src3 = *s++;
01555 dst[1]= cm[(9*(src1 + src2) - (src0 + src3) + 8)>>4];
01556 src4 = *s++;
01557 dst[2]= cm[(9*(src2 + src3) - (src1 + src4) + 8)>>4];
01558 src5 = *s++;
01559 dst[3]= cm[(9*(src3 + src4) - (src2 + src5) + 8)>>4];
01560 src6 = *s++;
01561 dst[4]= cm[(9*(src4 + src5) - (src3 + src6) + 8)>>4];
01562 src7 = *s++;
01563 dst[5]= cm[(9*(src5 + src6) - (src4 + src7) + 8)>>4];
01564 src8 = *s++;
01565 dst[6]= cm[(9*(src6 + src7) - (src5 + src8) + 8)>>4];
01566 src9 = *s++;
01567 dst[7]= cm[(9*(src7 + src8) - (src6 + src9) + 8)>>4];
01568 dst+=dstStride;
01569 src+=srcStride;
01570 }while(--h);
01571 }
01572
01573 static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){
01574 uint8_t *cm = cropTbl + MAX_NEG_CROP;
01575
01576 do{
01577 int src_1,src0,src1,src2,src3,src4,src5,src6,src7,src8,src9;
01578 uint8_t *s = src,*d = dst;
01579 src_1 = *(s-srcStride);
01580 src0 = *s; s+=srcStride;
01581 src1 = *s; s+=srcStride;
01582 src2 = *s; s+=srcStride;
01583 *d= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4]; d+=dstStride;
01584 src3 = *s; s+=srcStride;
01585 *d= cm[(9*(src1 + src2) - (src0 + src3) + 8)>>4]; d+=dstStride;
01586 src4 = *s; s+=srcStride;
01587 *d= cm[(9*(src2 + src3) - (src1 + src4) + 8)>>4]; d+=dstStride;
01588 src5 = *s; s+=srcStride;
01589 *d= cm[(9*(src3 + src4) - (src2 + src5) + 8)>>4]; d+=dstStride;
01590 src6 = *s; s+=srcStride;
01591 *d= cm[(9*(src4 + src5) - (src3 + src6) + 8)>>4]; d+=dstStride;
01592 src7 = *s; s+=srcStride;
01593 *d= cm[(9*(src5 + src6) - (src4 + src7) + 8)>>4]; d+=dstStride;
01594 src8 = *s; s+=srcStride;
01595 *d= cm[(9*(src6 + src7) - (src5 + src8) + 8)>>4]; d+=dstStride;
01596 src9 = *s;
01597 *d= cm[(9*(src7 + src8) - (src6 + src9) + 8)>>4]; d+=dstStride;
01598 src++;
01599 dst++;
01600 }while(--w);
01601 }
01602
01603 static void put_mspel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){
01604 put_pixels8_c(dst, src, stride, 8);
01605 }
01606
01607 static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){
01608 uint8_t half[64];
01609 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
01610 put_pixels8_l2_aligned2(dst, src, half, stride, stride, 8, 8);
01611 }
01612
01613 static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){
01614 wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8);
01615 }
01616
01617 static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){
01618 uint8_t half[64];
01619 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
01620 put_pixels8_l2_aligned2(dst, src+1, half, stride, stride, 8, 8);
01621 }
01622
01623 static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){
01624 wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8);
01625 }
01626
01627 static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){
01628 uint8_t halfH[88];
01629 uint8_t halfV[64];
01630 uint8_t halfHV[64];
01631 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
01632 wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8);
01633 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
01634 put_pixels8_l2_aligned(dst, halfV, halfHV, stride, 8, 8, 8);
01635 }
01636 static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){
01637 uint8_t halfH[88];
01638 uint8_t halfV[64];
01639 uint8_t halfHV[64];
01640 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
01641 wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8);
01642 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
01643 put_pixels8_l2_aligned(dst, halfV, halfHV, stride, 8, 8, 8);
01644 }
01645 static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){
01646 uint8_t halfH[88];
01647 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
01648 wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8);
01649 }