00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00028 #include "avcodec.h"
00029 #include "dsputil.h"
00030 #include "mpegvideo.h"
00031 #include "simple_idct.h"
00032 #include "faandct.h"
00033
00034
00035 void ff_spatial_dwt(int *buffer, int width, int height, int stride, int type, int decomposition_count);
00036
00037 uint8_t cropTbl[256 + 2 * MAX_NEG_CROP] = {0, };
00038 uint32_t squareTbl[512] = {0, };
00039
00040 const uint8_t ff_zigzag_direct[64] = {
00041 0, 1, 8, 16, 9, 2, 3, 10,
00042 17, 24, 32, 25, 18, 11, 4, 5,
00043 12, 19, 26, 33, 40, 48, 41, 34,
00044 27, 20, 13, 6, 7, 14, 21, 28,
00045 35, 42, 49, 56, 57, 50, 43, 36,
00046 29, 22, 15, 23, 30, 37, 44, 51,
00047 58, 59, 52, 45, 38, 31, 39, 46,
00048 53, 60, 61, 54, 47, 55, 62, 63
00049 };
00050
00051
00052
00053 const uint8_t ff_zigzag248_direct[64] = {
00054 0, 8, 1, 9, 16, 24, 2, 10,
00055 17, 25, 32, 40, 48, 56, 33, 41,
00056 18, 26, 3, 11, 4, 12, 19, 27,
00057 34, 42, 49, 57, 50, 58, 35, 43,
00058 20, 28, 5, 13, 6, 14, 21, 29,
00059 36, 44, 51, 59, 52, 60, 37, 45,
00060 22, 30, 7, 15, 23, 31, 38, 46,
00061 53, 61, 54, 62, 39, 47, 55, 63,
00062 };
00063
00064
00065 uint16_t __align8 inv_zigzag_direct16[64] = {0, };
00066
00067 const uint8_t ff_alternate_horizontal_scan[64] = {
00068 0, 1, 2, 3, 8, 9, 16, 17,
00069 10, 11, 4, 5, 6, 7, 15, 14,
00070 13, 12, 19, 18, 24, 25, 32, 33,
00071 26, 27, 20, 21, 22, 23, 28, 29,
00072 30, 31, 34, 35, 40, 41, 48, 49,
00073 42, 43, 36, 37, 38, 39, 44, 45,
00074 46, 47, 50, 51, 56, 57, 58, 59,
00075 52, 53, 54, 55, 60, 61, 62, 63,
00076 };
00077
00078 const uint8_t ff_alternate_vertical_scan[64] = {
00079 0, 8, 16, 24, 1, 9, 2, 10,
00080 17, 25, 32, 40, 48, 56, 57, 49,
00081 41, 33, 26, 18, 3, 11, 4, 12,
00082 19, 27, 34, 42, 50, 58, 35, 43,
00083 51, 59, 20, 28, 5, 13, 6, 14,
00084 21, 29, 36, 44, 52, 60, 37, 45,
00085 53, 61, 22, 30, 7, 15, 23, 31,
00086 38, 46, 54, 62, 39, 47, 55, 63,
00087 };
00088
00089
00090 const uint32_t inverse[256]={
00091 0, 4294967295U,2147483648U,1431655766, 1073741824, 858993460, 715827883, 613566757,
00092 536870912, 477218589, 429496730, 390451573, 357913942, 330382100, 306783379, 286331154,
00093 268435456, 252645136, 238609295, 226050911, 214748365, 204522253, 195225787, 186737709,
00094 178956971, 171798692, 165191050, 159072863, 153391690, 148102321, 143165577, 138547333,
00095 134217728, 130150525, 126322568, 122713352, 119304648, 116080198, 113025456, 110127367,
00096 107374183, 104755300, 102261127, 99882961, 97612894, 95443718, 93368855, 91382283,
00097 89478486, 87652394, 85899346, 84215046, 82595525, 81037119, 79536432, 78090315,
00098 76695845, 75350304, 74051161, 72796056, 71582789, 70409300, 69273667, 68174085,
00099 67108864, 66076420, 65075263, 64103990, 63161284, 62245903, 61356676, 60492498,
00100 59652324, 58835169, 58040099, 57266231, 56512728, 55778797, 55063684, 54366675,
00101 53687092, 53024288, 52377650, 51746594, 51130564, 50529028, 49941481, 49367441,
00102 48806447, 48258060, 47721859, 47197443, 46684428, 46182445, 45691142, 45210183,
00103 44739243, 44278014, 43826197, 43383509, 42949673, 42524429, 42107523, 41698712,
00104 41297763, 40904451, 40518560, 40139882, 39768216, 39403370, 39045158, 38693400,
00105 38347923, 38008561, 37675152, 37347542, 37025581, 36709123, 36398028, 36092163,
00106 35791395, 35495598, 35204650, 34918434, 34636834, 34359739, 34087043, 33818641,
00107 33554432, 33294321, 33038210, 32786010, 32537632, 32292988, 32051995, 31814573,
00108 31580642, 31350127, 31122952, 30899046, 30678338, 30460761, 30246249, 30034737,
00109 29826162, 29620465, 29417585, 29217465, 29020050, 28825284, 28633116, 28443493,
00110 28256364, 28071682, 27889399, 27709467, 27531842, 27356480, 27183338, 27012373,
00111 26843546, 26676816, 26512144, 26349493, 26188825, 26030105, 25873297, 25718368,
00112 25565282, 25414008, 25264514, 25116768, 24970741, 24826401, 24683721, 24542671,
00113 24403224, 24265352, 24129030, 23994231, 23860930, 23729102, 23598722, 23469767,
00114 23342214, 23216040, 23091223, 22967740, 22845571, 22724695, 22605092, 22486740,
00115 22369622, 22253717, 22139007, 22025474, 21913099, 21801865, 21691755, 21582751,
00116 21474837, 21367997, 21262215, 21157475, 21053762, 20951060, 20849356, 20748635,
00117 20648882, 20550083, 20452226, 20355296, 20259280, 20164166, 20069941, 19976593,
00118 19884108, 19792477, 19701685, 19611723, 19522579, 19434242, 19346700, 19259944,
00119 19173962, 19088744, 19004281, 18920561, 18837576, 18755316, 18673771, 18592933,
00120 18512791, 18433337, 18354562, 18276457, 18199014, 18122225, 18046082, 17970575,
00121 17895698, 17821442, 17747799, 17674763, 17602325, 17530479, 17459217, 17388532,
00122 17318417, 17248865, 17179870, 17111424, 17043522, 16976156, 16909321, 16843010,
00123 };
00124
00125
00126 static const uint8_t simple_mmx_permutation[64]={
00127 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
00128 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
00129 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
00130 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
00131 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
00132 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
00133 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
00134 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
00135 };
00136
00137 static int pix_sum_c(uint8_t * pix, int line_size)
00138 {
00139 int s, i, j;
00140
00141 s = 0;
00142 for (i = 0; i < 16; i++) {
00143 for (j = 0; j < 16; j += 8) {
00144 s += pix[0];
00145 s += pix[1];
00146 s += pix[2];
00147 s += pix[3];
00148 s += pix[4];
00149 s += pix[5];
00150 s += pix[6];
00151 s += pix[7];
00152 pix += 8;
00153 }
00154 pix += line_size - 16;
00155 }
00156 return s;
00157 }
00158
00159 static int pix_norm1_c(uint8_t * pix, int line_size)
00160 {
00161 int s, i, j;
00162 uint32_t *sq = squareTbl + 256;
00163
00164 s = 0;
00165 for (i = 0; i < 16; i++) {
00166 for (j = 0; j < 16; j += 8) {
00167 #if 0
00168 s += sq[pix[0]];
00169 s += sq[pix[1]];
00170 s += sq[pix[2]];
00171 s += sq[pix[3]];
00172 s += sq[pix[4]];
00173 s += sq[pix[5]];
00174 s += sq[pix[6]];
00175 s += sq[pix[7]];
00176 #else
00177 #if LONG_MAX > 2147483647
00178 register uint64_t x=*(uint64_t*)pix;
00179 s += sq[x&0xff];
00180 s += sq[(x>>8)&0xff];
00181 s += sq[(x>>16)&0xff];
00182 s += sq[(x>>24)&0xff];
00183 s += sq[(x>>32)&0xff];
00184 s += sq[(x>>40)&0xff];
00185 s += sq[(x>>48)&0xff];
00186 s += sq[(x>>56)&0xff];
00187 #else
00188 register uint32_t x=*(uint32_t*)pix;
00189 s += sq[x&0xff];
00190 s += sq[(x>>8)&0xff];
00191 s += sq[(x>>16)&0xff];
00192 s += sq[(x>>24)&0xff];
00193 x=*(uint32_t*)(pix+4);
00194 s += sq[x&0xff];
00195 s += sq[(x>>8)&0xff];
00196 s += sq[(x>>16)&0xff];
00197 s += sq[(x>>24)&0xff];
00198 #endif
00199 #endif
00200 pix += 8;
00201 }
00202 pix += line_size - 16;
00203 }
00204 return s;
00205 }
00206
00207 static void bswap_buf(uint32_t *dst, uint32_t *src, int w){
00208 int i;
00209
00210 for(i=0; i+8<=w; i+=8){
00211 dst[i+0]= bswap_32(src[i+0]);
00212 dst[i+1]= bswap_32(src[i+1]);
00213 dst[i+2]= bswap_32(src[i+2]);
00214 dst[i+3]= bswap_32(src[i+3]);
00215 dst[i+4]= bswap_32(src[i+4]);
00216 dst[i+5]= bswap_32(src[i+5]);
00217 dst[i+6]= bswap_32(src[i+6]);
00218 dst[i+7]= bswap_32(src[i+7]);
00219 }
00220 for(;i<w; i++){
00221 dst[i+0]= bswap_32(src[i+0]);
00222 }
00223 }
00224
00225 static int sse4_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
00226 {
00227 int s, i;
00228 uint32_t *sq = squareTbl + 256;
00229
00230 s = 0;
00231 for (i = 0; i < h; i++) {
00232 s += sq[pix1[0] - pix2[0]];
00233 s += sq[pix1[1] - pix2[1]];
00234 s += sq[pix1[2] - pix2[2]];
00235 s += sq[pix1[3] - pix2[3]];
00236 pix1 += line_size;
00237 pix2 += line_size;
00238 }
00239 return s;
00240 }
00241
00242 static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
00243 {
00244 int s, i;
00245 uint32_t *sq = squareTbl + 256;
00246
00247 s = 0;
00248 for (i = 0; i < h; i++) {
00249 s += sq[pix1[0] - pix2[0]];
00250 s += sq[pix1[1] - pix2[1]];
00251 s += sq[pix1[2] - pix2[2]];
00252 s += sq[pix1[3] - pix2[3]];
00253 s += sq[pix1[4] - pix2[4]];
00254 s += sq[pix1[5] - pix2[5]];
00255 s += sq[pix1[6] - pix2[6]];
00256 s += sq[pix1[7] - pix2[7]];
00257 pix1 += line_size;
00258 pix2 += line_size;
00259 }
00260 return s;
00261 }
00262
00263 static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
00264 {
00265 int s, i;
00266 uint32_t *sq = squareTbl + 256;
00267
00268 s = 0;
00269 for (i = 0; i < h; i++) {
00270 s += sq[pix1[ 0] - pix2[ 0]];
00271 s += sq[pix1[ 1] - pix2[ 1]];
00272 s += sq[pix1[ 2] - pix2[ 2]];
00273 s += sq[pix1[ 3] - pix2[ 3]];
00274 s += sq[pix1[ 4] - pix2[ 4]];
00275 s += sq[pix1[ 5] - pix2[ 5]];
00276 s += sq[pix1[ 6] - pix2[ 6]];
00277 s += sq[pix1[ 7] - pix2[ 7]];
00278 s += sq[pix1[ 8] - pix2[ 8]];
00279 s += sq[pix1[ 9] - pix2[ 9]];
00280 s += sq[pix1[10] - pix2[10]];
00281 s += sq[pix1[11] - pix2[11]];
00282 s += sq[pix1[12] - pix2[12]];
00283 s += sq[pix1[13] - pix2[13]];
00284 s += sq[pix1[14] - pix2[14]];
00285 s += sq[pix1[15] - pix2[15]];
00286
00287 pix1 += line_size;
00288 pix2 += line_size;
00289 }
00290 return s;
00291 }
00292
00293
00294 static inline int w_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int w, int h, int type){
00295 #ifdef CONFIG_SNOW_ENCODER //idwt is in snow.c
00296 int s, i, j;
00297 const int dec_count= w==8 ? 3 : 4;
00298 int tmp[16*16];
00299 #if 0
00300 int level, ori;
00301 static const int scale[2][2][4][4]={
00302 {
00303 {
00304
00305 {268, 239, 239, 213},
00306 { 0, 224, 224, 152},
00307 { 0, 135, 135, 110},
00308 },{
00309
00310 {344, 310, 310, 280},
00311 { 0, 320, 320, 228},
00312 { 0, 175, 175, 136},
00313 { 0, 129, 129, 102},
00314 }
00315 },{
00316 {
00317
00318 {275, 245, 245, 218},
00319 { 0, 230, 230, 156},
00320 { 0, 138, 138, 113},
00321 },{
00322
00323 {352, 317, 317, 286},
00324 { 0, 328, 328, 233},
00325 { 0, 180, 180, 140},
00326 { 0, 132, 132, 105},
00327 }
00328 }
00329 };
00330 #endif
00331
00332 for (i = 0; i < h; i++) {
00333 for (j = 0; j < w; j+=4) {
00334 tmp[16*i+j+0] = (pix1[j+0] - pix2[j+0])<<4;
00335 tmp[16*i+j+1] = (pix1[j+1] - pix2[j+1])<<4;
00336 tmp[16*i+j+2] = (pix1[j+2] - pix2[j+2])<<4;
00337 tmp[16*i+j+3] = (pix1[j+3] - pix2[j+3])<<4;
00338 }
00339 pix1 += line_size;
00340 pix2 += line_size;
00341 }
00342
00343 ff_spatial_dwt(tmp, w, h, 16, type, dec_count);
00344
00345 s=0;
00346 #if 0
00347 for(level=0; level<dec_count; level++){
00348 for(ori= level ? 1 : 0; ori<4; ori++){
00349 int sx= (ori&1) ? 1<<level: 0;
00350 int stride= 16<<(dec_count-level);
00351 int sy= (ori&2) ? stride>>1 : 0;
00352 int size= 1<<level;
00353
00354 for(i=0; i<size; i++){
00355 for(j=0; j<size; j++){
00356 int v= tmp[sx + sy + i*stride + j] * scale[type][dec_count-3][level][ori];
00357 s += ABS(v);
00358 }
00359 }
00360 }
00361 }
00362 #endif
00363 for (i = 0; i < h; i++) {
00364 for (j = 0; j < w; j+=4) {
00365 s+= ABS(tmp[16*i+j+0]);
00366 s+= ABS(tmp[16*i+j+1]);
00367 s+= ABS(tmp[16*i+j+2]);
00368 s+= ABS(tmp[16*i+j+3]);
00369 }
00370 }
00371 assert(s>=0);
00372
00373 return s>>2;
00374 #endif
00375 }
00376
00377 static int w53_8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
00378 return w_c(v, pix1, pix2, line_size, 8, h, 1);
00379 }
00380
00381 static int w97_8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
00382 return w_c(v, pix1, pix2, line_size, 8, h, 0);
00383 }
00384
00385 static int w53_16_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
00386 return w_c(v, pix1, pix2, line_size, 16, h, 1);
00387 }
00388
00389 static int w97_16_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
00390 return w_c(v, pix1, pix2, line_size, 16, h, 0);
00391 }
00392
00393 static void get_pixels_c(DCTELEM *restrict block, const uint8_t *pixels, int line_size)
00394 {
00395 int i;
00396
00397
00398 for(i=0;i<8;i++) {
00399 block[0] = pixels[0];
00400 block[1] = pixels[1];
00401 block[2] = pixels[2];
00402 block[3] = pixels[3];
00403 block[4] = pixels[4];
00404 block[5] = pixels[5];
00405 block[6] = pixels[6];
00406 block[7] = pixels[7];
00407 pixels += line_size;
00408 block += 8;
00409 }
00410 }
00411
00412 static void diff_pixels_c(DCTELEM *restrict block, const uint8_t *s1,
00413 const uint8_t *s2, int stride){
00414 int i;
00415
00416
00417 for(i=0;i<8;i++) {
00418 block[0] = s1[0] - s2[0];
00419 block[1] = s1[1] - s2[1];
00420 block[2] = s1[2] - s2[2];
00421 block[3] = s1[3] - s2[3];
00422 block[4] = s1[4] - s2[4];
00423 block[5] = s1[5] - s2[5];
00424 block[6] = s1[6] - s2[6];
00425 block[7] = s1[7] - s2[7];
00426 s1 += stride;
00427 s2 += stride;
00428 block += 8;
00429 }
00430 }
00431
00432
00433 static void put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
00434 int line_size)
00435 {
00436 int i;
00437 uint8_t *cm = cropTbl + MAX_NEG_CROP;
00438
00439
00440 for(i=0;i<8;i++) {
00441 pixels[0] = cm[block[0]];
00442 pixels[1] = cm[block[1]];
00443 pixels[2] = cm[block[2]];
00444 pixels[3] = cm[block[3]];
00445 pixels[4] = cm[block[4]];
00446 pixels[5] = cm[block[5]];
00447 pixels[6] = cm[block[6]];
00448 pixels[7] = cm[block[7]];
00449
00450 pixels += line_size;
00451 block += 8;
00452 }
00453 }
00454
00455 static void put_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
00456 int line_size)
00457 {
00458 int i;
00459 uint8_t *cm = cropTbl + MAX_NEG_CROP;
00460
00461
00462 for(i=0;i<4;i++) {
00463 pixels[0] = cm[block[0]];
00464 pixels[1] = cm[block[1]];
00465 pixels[2] = cm[block[2]];
00466 pixels[3] = cm[block[3]];
00467
00468 pixels += line_size;
00469 block += 8;
00470 }
00471 }
00472
00473 static void put_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels,
00474 int line_size)
00475 {
00476 int i;
00477 uint8_t *cm = cropTbl + MAX_NEG_CROP;
00478
00479
00480 for(i=0;i<2;i++) {
00481 pixels[0] = cm[block[0]];
00482 pixels[1] = cm[block[1]];
00483
00484 pixels += line_size;
00485 block += 8;
00486 }
00487 }
00488
00489 static void put_signed_pixels_clamped_c(const DCTELEM *block,
00490 uint8_t *restrict pixels,
00491 int line_size)
00492 {
00493 int i, j;
00494
00495 for (i = 0; i < 8; i++) {
00496 for (j = 0; j < 8; j++) {
00497 if (*block < -128)
00498 *pixels = 0;
00499 else if (*block > 127)
00500 *pixels = 255;
00501 else
00502 *pixels = (uint8_t)(*block + 128);
00503 block++;
00504 pixels++;
00505 }
00506 pixels += (line_size - 8);
00507 }
00508 }
00509
00510 static void add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
00511 int line_size)
00512 {
00513 int i;
00514 uint8_t *cm = cropTbl + MAX_NEG_CROP;
00515
00516
00517 for(i=0;i<8;i++) {
00518 pixels[0] = cm[pixels[0] + block[0]];
00519 pixels[1] = cm[pixels[1] + block[1]];
00520 pixels[2] = cm[pixels[2] + block[2]];
00521 pixels[3] = cm[pixels[3] + block[3]];
00522 pixels[4] = cm[pixels[4] + block[4]];
00523 pixels[5] = cm[pixels[5] + block[5]];
00524 pixels[6] = cm[pixels[6] + block[6]];
00525 pixels[7] = cm[pixels[7] + block[7]];
00526 pixels += line_size;
00527 block += 8;
00528 }
00529 }
00530
00531 static void add_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
00532 int line_size)
00533 {
00534 int i;
00535 uint8_t *cm = cropTbl + MAX_NEG_CROP;
00536
00537
00538 for(i=0;i<4;i++) {
00539 pixels[0] = cm[pixels[0] + block[0]];
00540 pixels[1] = cm[pixels[1] + block[1]];
00541 pixels[2] = cm[pixels[2] + block[2]];
00542 pixels[3] = cm[pixels[3] + block[3]];
00543 pixels += line_size;
00544 block += 8;
00545 }
00546 }
00547
00548 static void add_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels,
00549 int line_size)
00550 {
00551 int i;
00552 uint8_t *cm = cropTbl + MAX_NEG_CROP;
00553
00554
00555 for(i=0;i<2;i++) {
00556 pixels[0] = cm[pixels[0] + block[0]];
00557 pixels[1] = cm[pixels[1] + block[1]];
00558 pixels += line_size;
00559 block += 8;
00560 }
00561 }
00562
00563 static void add_pixels8_c(uint8_t *restrict pixels, DCTELEM *block, int line_size)
00564 {
00565 int i;
00566 for(i=0;i<8;i++) {
00567 pixels[0] += block[0];
00568 pixels[1] += block[1];
00569 pixels[2] += block[2];
00570 pixels[3] += block[3];
00571 pixels[4] += block[4];
00572 pixels[5] += block[5];
00573 pixels[6] += block[6];
00574 pixels[7] += block[7];
00575 pixels += line_size;
00576 block += 8;
00577 }
00578 }
00579
00580 static void add_pixels4_c(uint8_t *restrict pixels, DCTELEM *block, int line_size)
00581 {
00582 int i;
00583 for(i=0;i<4;i++) {
00584 pixels[0] += block[0];
00585 pixels[1] += block[1];
00586 pixels[2] += block[2];
00587 pixels[3] += block[3];
00588 pixels += line_size;
00589 block += 4;
00590 }
00591 }
00592
00593 #if 0
00594
00595 #define PIXOP2(OPNAME, OP) \
00596 static void OPNAME ## _pixels(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
00597 {\
00598 int i;\
00599 for(i=0; i<h; i++){\
00600 OP(*((uint64_t*)block), LD64(pixels));\
00601 pixels+=line_size;\
00602 block +=line_size;\
00603 }\
00604 }\
00605 \
00606 static void OPNAME ## _no_rnd_pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
00607 {\
00608 int i;\
00609 for(i=0; i<h; i++){\
00610 const uint64_t a= LD64(pixels );\
00611 const uint64_t b= LD64(pixels+1);\
00612 OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
00613 pixels+=line_size;\
00614 block +=line_size;\
00615 }\
00616 }\
00617 \
00618 static void OPNAME ## _pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
00619 {\
00620 int i;\
00621 for(i=0; i<h; i++){\
00622 const uint64_t a= LD64(pixels );\
00623 const uint64_t b= LD64(pixels+1);\
00624 OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
00625 pixels+=line_size;\
00626 block +=line_size;\
00627 }\
00628 }\
00629 \
00630 static void OPNAME ## _no_rnd_pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
00631 {\
00632 int i;\
00633 for(i=0; i<h; i++){\
00634 const uint64_t a= LD64(pixels );\
00635 const uint64_t b= LD64(pixels+line_size);\
00636 OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
00637 pixels+=line_size;\
00638 block +=line_size;\
00639 }\
00640 }\
00641 \
00642 static void OPNAME ## _pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
00643 {\
00644 int i;\
00645 for(i=0; i<h; i++){\
00646 const uint64_t a= LD64(pixels );\
00647 const uint64_t b= LD64(pixels+line_size);\
00648 OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
00649 pixels+=line_size;\
00650 block +=line_size;\
00651 }\
00652 }\
00653 \
00654 static void OPNAME ## _pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
00655 {\
00656 int i;\
00657 const uint64_t a= LD64(pixels );\
00658 const uint64_t b= LD64(pixels+1);\
00659 uint64_t l0= (a&0x0303030303030303ULL)\
00660 + (b&0x0303030303030303ULL)\
00661 + 0x0202020202020202ULL;\
00662 uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
00663 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
00664 uint64_t l1,h1;\
00665 \
00666 pixels+=line_size;\
00667 for(i=0; i<h; i+=2){\
00668 uint64_t a= LD64(pixels );\
00669 uint64_t b= LD64(pixels+1);\
00670 l1= (a&0x0303030303030303ULL)\
00671 + (b&0x0303030303030303ULL);\
00672 h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
00673 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
00674 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
00675 pixels+=line_size;\
00676 block +=line_size;\
00677 a= LD64(pixels );\
00678 b= LD64(pixels+1);\
00679 l0= (a&0x0303030303030303ULL)\
00680 + (b&0x0303030303030303ULL)\
00681 + 0x0202020202020202ULL;\
00682 h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
00683 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
00684 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
00685 pixels+=line_size;\
00686 block +=line_size;\
00687 }\
00688 }\
00689 \
00690 static void OPNAME ## _no_rnd_pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
00691 {\
00692 int i;\
00693 const uint64_t a= LD64(pixels );\
00694 const uint64_t b= LD64(pixels+1);\
00695 uint64_t l0= (a&0x0303030303030303ULL)\
00696 + (b&0x0303030303030303ULL)\
00697 + 0x0101010101010101ULL;\
00698 uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
00699 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
00700 uint64_t l1,h1;\
00701 \
00702 pixels+=line_size;\
00703 for(i=0; i<h; i+=2){\
00704 uint64_t a= LD64(pixels );\
00705 uint64_t b= LD64(pixels+1);\
00706 l1= (a&0x0303030303030303ULL)\
00707 + (b&0x0303030303030303ULL);\
00708 h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
00709 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
00710 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
00711 pixels+=line_size;\
00712 block +=line_size;\
00713 a= LD64(pixels );\
00714 b= LD64(pixels+1);\
00715 l0= (a&0x0303030303030303ULL)\
00716 + (b&0x0303030303030303ULL)\
00717 + 0x0101010101010101ULL;\
00718 h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
00719 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
00720 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
00721 pixels+=line_size;\
00722 block +=line_size;\
00723 }\
00724 }\
00725 \
00726 CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels_c , 8)\
00727 CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels_x2_c , 8)\
00728 CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels_y2_c , 8)\
00729 CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels_xy2_c, 8)\
00730 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels_x2_c , 8)\
00731 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels_y2_c , 8)\
00732 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels_xy2_c, 8)
00733
00734 #define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEFEFEFEFEULL)>>1) )
00735 #else // 64 bit variant
00736
00737 #define PIXOP2(OPNAME, OP) \
00738 static void OPNAME ## _pixels2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
00739 int i;\
00740 for(i=0; i<h; i++){\
00741 OP(*((uint16_t*)(block )), LD16(pixels ));\
00742 pixels+=line_size;\
00743 block +=line_size;\
00744 }\
00745 }\
00746 static void OPNAME ## _pixels4_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
00747 int i;\
00748 for(i=0; i<h; i++){\
00749 OP(*((uint32_t*)(block )), LD32(pixels ));\
00750 pixels+=line_size;\
00751 block +=line_size;\
00752 }\
00753 }\
00754 static void OPNAME ## _pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
00755 int i;\
00756 for(i=0; i<h; i++){\
00757 OP(*((uint32_t*)(block )), LD32(pixels ));\
00758 OP(*((uint32_t*)(block+4)), LD32(pixels+4));\
00759 pixels+=line_size;\
00760 block +=line_size;\
00761 }\
00762 }\
00763 static inline void OPNAME ## _no_rnd_pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
00764 OPNAME ## _pixels8_c(block, pixels, line_size, h);\
00765 }\
00766 \
00767 static inline void OPNAME ## _no_rnd_pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
00768 int src_stride1, int src_stride2, int h){\
00769 int i;\
00770 for(i=0; i<h; i++){\
00771 uint32_t a,b;\
00772 a= LD32(&src1[i*src_stride1 ]);\
00773 b= LD32(&src2[i*src_stride2 ]);\
00774 OP(*((uint32_t*)&dst[i*dst_stride ]), no_rnd_avg32(a, b));\
00775 a= LD32(&src1[i*src_stride1+4]);\
00776 b= LD32(&src2[i*src_stride2+4]);\
00777 OP(*((uint32_t*)&dst[i*dst_stride+4]), no_rnd_avg32(a, b));\
00778 }\
00779 }\
00780 \
00781 static inline void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
00782 int src_stride1, int src_stride2, int h){\
00783 int i;\
00784 for(i=0; i<h; i++){\
00785 uint32_t a,b;\
00786 a= LD32(&src1[i*src_stride1 ]);\
00787 b= LD32(&src2[i*src_stride2 ]);\
00788 OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\
00789 a= LD32(&src1[i*src_stride1+4]);\
00790 b= LD32(&src2[i*src_stride2+4]);\
00791 OP(*((uint32_t*)&dst[i*dst_stride+4]), rnd_avg32(a, b));\
00792 }\
00793 }\
00794 \
00795 static inline void OPNAME ## _pixels4_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
00796 int src_stride1, int src_stride2, int h){\
00797 int i;\
00798 for(i=0; i<h; i++){\
00799 uint32_t a,b;\
00800 a= LD32(&src1[i*src_stride1 ]);\
00801 b= LD32(&src2[i*src_stride2 ]);\
00802 OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\
00803 }\
00804 }\
00805 \
00806 static inline void OPNAME ## _pixels2_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
00807 int src_stride1, int src_stride2, int h){\
00808 int i;\
00809 for(i=0; i<h; i++){\
00810 uint32_t a,b;\
00811 a= LD16(&src1[i*src_stride1 ]);\
00812 b= LD16(&src2[i*src_stride2 ]);\
00813 OP(*((uint16_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\
00814 }\
00815 }\
00816 \
00817 static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
00818 int src_stride1, int src_stride2, int h){\
00819 OPNAME ## _pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\
00820 OPNAME ## _pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
00821 }\
00822 \
00823 static inline void OPNAME ## _no_rnd_pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
00824 int src_stride1, int src_stride2, int h){\
00825 OPNAME ## _no_rnd_pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\
00826 OPNAME ## _no_rnd_pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
00827 }\
00828 \
00829 static inline void OPNAME ## _no_rnd_pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
00830 OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
00831 }\
00832 \
00833 static inline void OPNAME ## _pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
00834 OPNAME ## _pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
00835 }\
00836 \
00837 static inline void OPNAME ## _no_rnd_pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
00838 OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
00839 }\
00840 \
00841 static inline void OPNAME ## _pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
00842 OPNAME ## _pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
00843 }\
00844 \
00845 static inline void OPNAME ## _pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
00846 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
00847 int i;\
00848 for(i=0; i<h; i++){\
00849 uint32_t a, b, c, d, l0, l1, h0, h1;\
00850 a= LD32(&src1[i*src_stride1]);\
00851 b= LD32(&src2[i*src_stride2]);\
00852 c= LD32(&src3[i*src_stride3]);\
00853 d= LD32(&src4[i*src_stride4]);\
00854 l0= (a&0x03030303UL)\
00855 + (b&0x03030303UL)\
00856 + 0x02020202UL;\
00857 h0= ((a&0xFCFCFCFCUL)>>2)\
00858 + ((b&0xFCFCFCFCUL)>>2);\
00859 l1= (c&0x03030303UL)\
00860 + (d&0x03030303UL);\
00861 h1= ((c&0xFCFCFCFCUL)>>2)\
00862 + ((d&0xFCFCFCFCUL)>>2);\
00863 OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
00864 a= LD32(&src1[i*src_stride1+4]);\
00865 b= LD32(&src2[i*src_stride2+4]);\
00866 c= LD32(&src3[i*src_stride3+4]);\
00867 d= LD32(&src4[i*src_stride4+4]);\
00868 l0= (a&0x03030303UL)\
00869 + (b&0x03030303UL)\
00870 + 0x02020202UL;\
00871 h0= ((a&0xFCFCFCFCUL)>>2)\
00872 + ((b&0xFCFCFCFCUL)>>2);\
00873 l1= (c&0x03030303UL)\
00874 + (d&0x03030303UL);\
00875 h1= ((c&0xFCFCFCFCUL)>>2)\
00876 + ((d&0xFCFCFCFCUL)>>2);\
00877 OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
00878 }\
00879 }\
00880 \
00881 static inline void OPNAME ## _pixels4_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
00882 OPNAME ## _pixels4_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
00883 }\
00884 \
00885 static inline void OPNAME ## _pixels4_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
00886 OPNAME ## _pixels4_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
00887 }\
00888 \
00889 static inline void OPNAME ## _pixels2_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
00890 OPNAME ## _pixels2_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
00891 }\
00892 \
00893 static inline void OPNAME ## _pixels2_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
00894 OPNAME ## _pixels2_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
00895 }\
00896 \
00897 static inline void OPNAME ## _no_rnd_pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
00898 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
00899 int i;\
00900 for(i=0; i<h; i++){\
00901 uint32_t a, b, c, d, l0, l1, h0, h1;\
00902 a= LD32(&src1[i*src_stride1]);\
00903 b= LD32(&src2[i*src_stride2]);\
00904 c= LD32(&src3[i*src_stride3]);\
00905 d= LD32(&src4[i*src_stride4]);\
00906 l0= (a&0x03030303UL)\
00907 + (b&0x03030303UL)\
00908 + 0x01010101UL;\
00909 h0= ((a&0xFCFCFCFCUL)>>2)\
00910 + ((b&0xFCFCFCFCUL)>>2);\
00911 l1= (c&0x03030303UL)\
00912 + (d&0x03030303UL);\
00913 h1= ((c&0xFCFCFCFCUL)>>2)\
00914 + ((d&0xFCFCFCFCUL)>>2);\
00915 OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
00916 a= LD32(&src1[i*src_stride1+4]);\
00917 b= LD32(&src2[i*src_stride2+4]);\
00918 c= LD32(&src3[i*src_stride3+4]);\
00919 d= LD32(&src4[i*src_stride4+4]);\
00920 l0= (a&0x03030303UL)\
00921 + (b&0x03030303UL)\
00922 + 0x01010101UL;\
00923 h0= ((a&0xFCFCFCFCUL)>>2)\
00924 + ((b&0xFCFCFCFCUL)>>2);\
00925 l1= (c&0x03030303UL)\
00926 + (d&0x03030303UL);\
00927 h1= ((c&0xFCFCFCFCUL)>>2)\
00928 + ((d&0xFCFCFCFCUL)>>2);\
00929 OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
00930 }\
00931 }\
00932 static inline void OPNAME ## _pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
00933 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
00934 OPNAME ## _pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
00935 OPNAME ## _pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
00936 }\
00937 static inline void OPNAME ## _no_rnd_pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
00938 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
00939 OPNAME ## _no_rnd_pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
00940 OPNAME ## _no_rnd_pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
00941 }\
00942 \
00943 static inline void OPNAME ## _pixels2_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
00944 {\
00945 int i, a0, b0, a1, b1;\
00946 a0= pixels[0];\
00947 b0= pixels[1] + 2;\
00948 a0 += b0;\
00949 b0 += pixels[2];\
00950 \
00951 pixels+=line_size;\
00952 for(i=0; i<h; i+=2){\
00953 a1= pixels[0];\
00954 b1= pixels[1];\
00955 a1 += b1;\
00956 b1 += pixels[2];\
00957 \
00958 block[0]= (a1+a0)>>2; \
00959 block[1]= (b1+b0)>>2;\
00960 \
00961 pixels+=line_size;\
00962 block +=line_size;\
00963 \
00964 a0= pixels[0];\
00965 b0= pixels[1] + 2;\
00966 a0 += b0;\
00967 b0 += pixels[2];\
00968 \
00969 block[0]= (a1+a0)>>2;\
00970 block[1]= (b1+b0)>>2;\
00971 pixels+=line_size;\
00972 block +=line_size;\
00973 }\
00974 }\
00975 \
00976 static inline void OPNAME ## _pixels4_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
00977 {\
00978 int i;\
00979 const uint32_t a= LD32(pixels );\
00980 const uint32_t b= LD32(pixels+1);\
00981 uint32_t l0= (a&0x03030303UL)\
00982 + (b&0x03030303UL)\
00983 + 0x02020202UL;\
00984 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
00985 + ((b&0xFCFCFCFCUL)>>2);\
00986 uint32_t l1,h1;\
00987 \
00988 pixels+=line_size;\
00989 for(i=0; i<h; i+=2){\
00990 uint32_t a= LD32(pixels );\
00991 uint32_t b= LD32(pixels+1);\
00992 l1= (a&0x03030303UL)\
00993 + (b&0x03030303UL);\
00994 h1= ((a&0xFCFCFCFCUL)>>2)\
00995 + ((b&0xFCFCFCFCUL)>>2);\
00996 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
00997 pixels+=line_size;\
00998 block +=line_size;\
00999 a= LD32(pixels );\
01000 b= LD32(pixels+1);\
01001 l0= (a&0x03030303UL)\
01002 + (b&0x03030303UL)\
01003 + 0x02020202UL;\
01004 h0= ((a&0xFCFCFCFCUL)>>2)\
01005 + ((b&0xFCFCFCFCUL)>>2);\
01006 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
01007 pixels+=line_size;\
01008 block +=line_size;\
01009 }\
01010 }\
01011 \
01012 static inline void OPNAME ## _pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
01013 {\
01014 int j;\
01015 for(j=0; j<2; j++){\
01016 int i;\
01017 const uint32_t a= LD32(pixels );\
01018 const uint32_t b= LD32(pixels+1);\
01019 uint32_t l0= (a&0x03030303UL)\
01020 + (b&0x03030303UL)\
01021 + 0x02020202UL;\
01022 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
01023 + ((b&0xFCFCFCFCUL)>>2);\
01024 uint32_t l1,h1;\
01025 \
01026 pixels+=line_size;\
01027 for(i=0; i<h; i+=2){\
01028 uint32_t a= LD32(pixels );\
01029 uint32_t b= LD32(pixels+1);\
01030 l1= (a&0x03030303UL)\
01031 + (b&0x03030303UL);\
01032 h1= ((a&0xFCFCFCFCUL)>>2)\
01033 + ((b&0xFCFCFCFCUL)>>2);\
01034 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
01035 pixels+=line_size;\
01036 block +=line_size;\
01037 a= LD32(pixels );\
01038 b= LD32(pixels+1);\
01039 l0= (a&0x03030303UL)\
01040 + (b&0x03030303UL)\
01041 + 0x02020202UL;\
01042 h0= ((a&0xFCFCFCFCUL)>>2)\
01043 + ((b&0xFCFCFCFCUL)>>2);\
01044 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
01045 pixels+=line_size;\
01046 block +=line_size;\
01047 }\
01048 pixels+=4-line_size*(h+1);\
01049 block +=4-line_size*h;\
01050 }\
01051 }\
01052 \
01053 static inline void OPNAME ## _no_rnd_pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
01054 {\
01055 int j;\
01056 for(j=0; j<2; j++){\
01057 int i;\
01058 const uint32_t a= LD32(pixels );\
01059 const uint32_t b= LD32(pixels+1);\
01060 uint32_t l0= (a&0x03030303UL)\
01061 + (b&0x03030303UL)\
01062 + 0x01010101UL;\
01063 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
01064 + ((b&0xFCFCFCFCUL)>>2);\
01065 uint32_t l1,h1;\
01066 \
01067 pixels+=line_size;\
01068 for(i=0; i<h; i+=2){\
01069 uint32_t a= LD32(pixels );\
01070 uint32_t b= LD32(pixels+1);\
01071 l1= (a&0x03030303UL)\
01072 + (b&0x03030303UL);\
01073 h1= ((a&0xFCFCFCFCUL)>>2)\
01074 + ((b&0xFCFCFCFCUL)>>2);\
01075 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
01076 pixels+=line_size;\
01077 block +=line_size;\
01078 a= LD32(pixels );\
01079 b= LD32(pixels+1);\
01080 l0= (a&0x03030303UL)\
01081 + (b&0x03030303UL)\
01082 + 0x01010101UL;\
01083 h0= ((a&0xFCFCFCFCUL)>>2)\
01084 + ((b&0xFCFCFCFCUL)>>2);\
01085 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
01086 pixels+=line_size;\
01087 block +=line_size;\
01088 }\
01089 pixels+=4-line_size*(h+1);\
01090 block +=4-line_size*h;\
01091 }\
01092 }\
01093 \
01094 CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels8_c , 8)\
01095 CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels8_x2_c , 8)\
01096 CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels8_y2_c , 8)\
01097 CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels8_xy2_c, 8)\
01098 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_c , OPNAME ## _pixels8_c , 8)\
01099 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels8_x2_c , 8)\
01100 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels8_y2_c , 8)\
01101 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels8_xy2_c, 8)\
01102
01103 #define op_avg(a, b) a = rnd_avg32(a, b)
01104 #endif
01105 #define op_put(a, b) a = b
01106
01107 PIXOP2(avg, op_avg)
01108 PIXOP2(put, op_put)
01109 #undef op_avg
01110 #undef op_put
01111
01112 #define avg2(a,b) ((a+b+1)>>1)
01113 #define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
01114
01115 static void put_no_rnd_pixels16_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){
01116 put_no_rnd_pixels16_l2(dst, a, b, stride, stride, stride, h);
01117 }
01118
01119 static void put_no_rnd_pixels8_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){
01120 put_no_rnd_pixels8_l2(dst, a, b, stride, stride, stride, h);
01121 }
01122
01123 static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder)
01124 {
01125 const int A=(16-x16)*(16-y16);
01126 const int B=( x16)*(16-y16);
01127 const int C=(16-x16)*( y16);
01128 const int D=( x16)*( y16);
01129 int i;
01130
01131 for(i=0; i<h; i++)
01132 {
01133 dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8;
01134 dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8;
01135 dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + rounder)>>8;
01136 dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + rounder)>>8;
01137 dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + rounder)>>8;
01138 dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + rounder)>>8;
01139 dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + rounder)>>8;
01140 dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8;
01141 dst+= stride;
01142 src+= stride;
01143 }
01144 }
01145
01146 static void gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
01147 int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
01148 {
01149 int y, vx, vy;
01150 const int s= 1<<shift;
01151
01152 width--;
01153 height--;
01154
01155 for(y=0; y<h; y++){
01156 int x;
01157
01158 vx= ox;
01159 vy= oy;
01160 for(x=0; x<8; x++){
01161 int src_x, src_y, frac_x, frac_y, index;
01162
01163 src_x= vx>>16;
01164 src_y= vy>>16;
01165 frac_x= src_x&(s-1);
01166 frac_y= src_y&(s-1);
01167 src_x>>=shift;
01168 src_y>>=shift;
01169
01170 if((unsigned)src_x < width){
01171 if((unsigned)src_y < height){
01172 index= src_x + src_y*stride;
01173 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
01174 + src[index +1]* frac_x )*(s-frac_y)
01175 + ( src[index+stride ]*(s-frac_x)
01176 + src[index+stride+1]* frac_x )* frac_y
01177 + r)>>(shift*2);
01178 }else{
01179 index= src_x + clip(src_y, 0, height)*stride;
01180 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
01181 + src[index +1]* frac_x )*s
01182 + r)>>(shift*2);
01183 }
01184 }else{
01185 if((unsigned)src_y < height){
01186 index= clip(src_x, 0, width) + src_y*stride;
01187 dst[y*stride + x]= ( ( src[index ]*(s-frac_y)
01188 + src[index+stride ]* frac_y )*s
01189 + r)>>(shift*2);
01190 }else{
01191 index= clip(src_x, 0, width) + clip(src_y, 0, height)*stride;
01192 dst[y*stride + x]= src[index ];
01193 }
01194 }
01195
01196 vx+= dxx;
01197 vy+= dyx;
01198 }
01199 ox += dxy;
01200 oy += dyy;
01201 }
01202 }
01203
01204 static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01205 switch(width){
01206 case 2: put_pixels2_c (dst, src, stride, height); break;
01207 case 4: put_pixels4_c (dst, src, stride, height); break;
01208 case 8: put_pixels8_c (dst, src, stride, height); break;
01209 case 16:put_pixels16_c(dst, src, stride, height); break;
01210 }
01211 }
01212
01213 static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01214 int i,j;
01215 for (i=0; i < height; i++) {
01216 for (j=0; j < width; j++) {
01217 dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11;
01218 }
01219 src += stride;
01220 dst += stride;
01221 }
01222 }
01223
01224 static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01225 int i,j;
01226 for (i=0; i < height; i++) {
01227 for (j=0; j < width; j++) {
01228 dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11;
01229 }
01230 src += stride;
01231 dst += stride;
01232 }
01233 }
01234
01235 static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01236 int i,j;
01237 for (i=0; i < height; i++) {
01238 for (j=0; j < width; j++) {
01239 dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11;
01240 }
01241 src += stride;
01242 dst += stride;
01243 }
01244 }
01245
01246 static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01247 int i,j;
01248 for (i=0; i < height; i++) {
01249 for (j=0; j < width; j++) {
01250 dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15;
01251 }
01252 src += stride;
01253 dst += stride;
01254 }
01255 }
01256
01257 static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01258 int i,j;
01259 for (i=0; i < height; i++) {
01260 for (j=0; j < width; j++) {
01261 dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
01262 }
01263 src += stride;
01264 dst += stride;
01265 }
01266 }
01267
01268 static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01269 int i,j;
01270 for (i=0; i < height; i++) {
01271 for (j=0; j < width; j++) {
01272 dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11;
01273 }
01274 src += stride;
01275 dst += stride;
01276 }
01277 }
01278
01279 static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01280 int i,j;
01281 for (i=0; i < height; i++) {
01282 for (j=0; j < width; j++) {
01283 dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
01284 }
01285 src += stride;
01286 dst += stride;
01287 }
01288 }
01289
01290 static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01291 int i,j;
01292 for (i=0; i < height; i++) {
01293 for (j=0; j < width; j++) {
01294 dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15;
01295 }
01296 src += stride;
01297 dst += stride;
01298 }
01299 }
01300
01301 static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01302 switch(width){
01303 case 2: avg_pixels2_c (dst, src, stride, height); break;
01304 case 4: avg_pixels4_c (dst, src, stride, height); break;
01305 case 8: avg_pixels8_c (dst, src, stride, height); break;
01306 case 16:avg_pixels16_c(dst, src, stride, height); break;
01307 }
01308 }
01309
01310 static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01311 int i,j;
01312 for (i=0; i < height; i++) {
01313 for (j=0; j < width; j++) {
01314 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1;
01315 }
01316 src += stride;
01317 dst += stride;
01318 }
01319 }
01320
01321 static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01322 int i,j;
01323 for (i=0; i < height; i++) {
01324 for (j=0; j < width; j++) {
01325 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1;
01326 }
01327 src += stride;
01328 dst += stride;
01329 }
01330 }
01331
01332 static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01333 int i,j;
01334 for (i=0; i < height; i++) {
01335 for (j=0; j < width; j++) {
01336 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+stride] + 1)) >> 11) + 1) >> 1;
01337 }
01338 src += stride;
01339 dst += stride;
01340 }
01341 }
01342
01343 static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01344 int i,j;
01345 for (i=0; i < height; i++) {
01346 for (j=0; j < width; j++) {
01347 dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
01348 }
01349 src += stride;
01350 dst += stride;
01351 }
01352 }
01353
01354 static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01355 int i,j;
01356 for (i=0; i < height; i++) {
01357 for (j=0; j < width; j++) {
01358 dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
01359 }
01360 src += stride;
01361 dst += stride;
01362 }
01363 }
01364
01365 static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01366 int i,j;
01367 for (i=0; i < height; i++) {
01368 for (j=0; j < width; j++) {
01369 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+stride] + 1)) >> 11) + 1) >> 1;
01370 }
01371 src += stride;
01372 dst += stride;
01373 }
01374 }
01375
01376 static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01377 int i,j;
01378 for (i=0; i < height; i++) {
01379 for (j=0; j < width; j++) {
01380 dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
01381 }
01382 src += stride;
01383 dst += stride;
01384 }
01385 }
01386
01387 static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01388 int i,j;
01389 for (i=0; i < height; i++) {
01390 for (j=0; j < width; j++) {
01391 dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
01392 }
01393 src += stride;
01394 dst += stride;
01395 }
01396 }
01397 #if 0
01398 #define TPEL_WIDTH(width)\
01399 static void put_tpel_pixels ## width ## _mc00_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
01400 void put_tpel_pixels_mc00_c(dst, src, stride, width, height);}\
01401 static void put_tpel_pixels ## width ## _mc10_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
01402 void put_tpel_pixels_mc10_c(dst, src, stride, width, height);}\
01403 static void put_tpel_pixels ## width ## _mc20_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
01404 void put_tpel_pixels_mc20_c(dst, src, stride, width, height);}\
01405 static void put_tpel_pixels ## width ## _mc01_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
01406 void put_tpel_pixels_mc01_c(dst, src, stride, width, height);}\
01407 static void put_tpel_pixels ## width ## _mc11_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
01408 void put_tpel_pixels_mc11_c(dst, src, stride, width, height);}\
01409 static void put_tpel_pixels ## width ## _mc21_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
01410 void put_tpel_pixels_mc21_c(dst, src, stride, width, height);}\
01411 static void put_tpel_pixels ## width ## _mc02_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
01412 void put_tpel_pixels_mc02_c(dst, src, stride, width, height);}\
01413 static void put_tpel_pixels ## width ## _mc12_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
01414 void put_tpel_pixels_mc12_c(dst, src, stride, width, height);}\
01415 static void put_tpel_pixels ## width ## _mc22_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
01416 void put_tpel_pixels_mc22_c(dst, src, stride, width, height);}
01417 #endif
01418
01419 #define H264_CHROMA_MC(OPNAME, OP)\
01420 static void OPNAME ## h264_chroma_mc2_c(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y){\
01421 const int A=(8-x)*(8-y);\
01422 const int B=( x)*(8-y);\
01423 const int C=(8-x)*( y);\
01424 const int D=( x)*( y);\
01425 int i;\
01426 \
01427 assert(x<8 && y<8 && x>=0 && y>=0);\
01428 \
01429 for(i=0; i<h; i++)\
01430 {\
01431 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
01432 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
01433 dst+= stride;\
01434 src+= stride;\
01435 }\
01436 }\
01437 \
01438 static void OPNAME ## h264_chroma_mc4_c(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y){\
01439 const int A=(8-x)*(8-y);\
01440 const int B=( x)*(8-y);\
01441 const int C=(8-x)*( y);\
01442 const int D=( x)*( y);\
01443 int i;\
01444 \
01445 assert(x<8 && y<8 && x>=0 && y>=0);\
01446 \
01447 for(i=0; i<h; i++)\
01448 {\
01449 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
01450 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
01451 OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
01452 OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
01453 dst+= stride;\
01454 src+= stride;\
01455 }\
01456 }\
01457 \
01458 static void OPNAME ## h264_chroma_mc8_c(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y){\
01459 const int A=(8-x)*(8-y);\
01460 const int B=( x)*(8-y);\
01461 const int C=(8-x)*( y);\
01462 const int D=( x)*( y);\
01463 int i;\
01464 \
01465 assert(x<8 && y<8 && x>=0 && y>=0);\
01466 \
01467 for(i=0; i<h; i++)\
01468 {\
01469 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
01470 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
01471 OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
01472 OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
01473 OP(dst[4], (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5]));\
01474 OP(dst[5], (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6]));\
01475 OP(dst[6], (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7]));\
01476 OP(dst[7], (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8]));\
01477 dst+= stride;\
01478 src+= stride;\
01479 }\
01480 }
01481
01482 #define op_avg(a, b) a = (((a)+(((b) + 32)>>6)+1)>>1)
01483 #define op_put(a, b) a = (((b) + 32)>>6)
01484
01485 H264_CHROMA_MC(put_ , op_put)
01486 H264_CHROMA_MC(avg_ , op_avg)
01487 #undef op_avg
01488 #undef op_put
01489
01490 static inline void copy_block4(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
01491 {
01492 int i;
01493 for(i=0; i<h; i++)
01494 {
01495 ST32(dst , LD32(src ));
01496 dst+=dstStride;
01497 src+=srcStride;
01498 }
01499 }
01500
01501 static inline void copy_block8(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
01502 {
01503 int i;
01504 for(i=0; i<h; i++)
01505 {
01506 ST32(dst , LD32(src ));
01507 ST32(dst+4 , LD32(src+4 ));
01508 dst+=dstStride;
01509 src+=srcStride;
01510 }
01511 }
01512
01513 static inline void copy_block16(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
01514 {
01515 int i;
01516 for(i=0; i<h; i++)
01517 {
01518 ST32(dst , LD32(src ));
01519 ST32(dst+4 , LD32(src+4 ));
01520 ST32(dst+8 , LD32(src+8 ));
01521 ST32(dst+12, LD32(src+12));
01522 dst+=dstStride;
01523 src+=srcStride;
01524 }
01525 }
01526
01527 static inline void copy_block17(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
01528 {
01529 int i;
01530 for(i=0; i<h; i++)
01531 {
01532 ST32(dst , LD32(src ));
01533 ST32(dst+4 , LD32(src+4 ));
01534 ST32(dst+8 , LD32(src+8 ));
01535 ST32(dst+12, LD32(src+12));
01536 dst[16]= src[16];
01537 dst+=dstStride;
01538 src+=srcStride;
01539 }
01540 }
01541
01542 static inline void copy_block9(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
01543 {
01544 int i;
01545 for(i=0; i<h; i++)
01546 {
01547 ST32(dst , LD32(src ));
01548 ST32(dst+4 , LD32(src+4 ));
01549 dst[8]= src[8];
01550 dst+=dstStride;
01551 src+=srcStride;
01552 }
01553 }
01554
01555
01556 #define QPEL_MC(r, OPNAME, RND, OP) \
01557 static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
01558 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
01559 int i;\
01560 for(i=0; i<h; i++)\
01561 {\
01562 OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\
01563 OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\
01564 OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\
01565 OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\
01566 OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\
01567 OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\
01568 OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\
01569 OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\
01570 dst+=dstStride;\
01571 src+=srcStride;\
01572 }\
01573 }\
01574 \
01575 static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
01576 const int w=8;\
01577 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
01578 int i;\
01579 for(i=0; i<w; i++)\
01580 {\
01581 const int src0= src[0*srcStride];\
01582 const int src1= src[1*srcStride];\
01583 const int src2= src[2*srcStride];\
01584 const int src3= src[3*srcStride];\
01585 const int src4= src[4*srcStride];\
01586 const int src5= src[5*srcStride];\
01587 const int src6= src[6*srcStride];\
01588 const int src7= src[7*srcStride];\
01589 const int src8= src[8*srcStride];\
01590 OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\
01591 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\
01592 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\
01593 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\
01594 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\
01595 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\
01596 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\
01597 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
01598 dst++;\
01599 src++;\
01600 }\
01601 }\
01602 \
01603 static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
01604 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
01605 int i;\
01606 \
01607 for(i=0; i<h; i++)\
01608 {\
01609 OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\
01610 OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\
01611 OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\
01612 OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\
01613 OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\
01614 OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\
01615 OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\
01616 OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\
01617 OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\
01618 OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\
01619 OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\
01620 OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\
01621 OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\
01622 OP(dst[13], (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]));\
01623 OP(dst[14], (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]));\
01624 OP(dst[15], (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]));\
01625 dst+=dstStride;\
01626 src+=srcStride;\
01627 }\
01628 }\
01629 \
01630 static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
01631 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
01632 int i;\
01633 const int w=16;\
01634 for(i=0; i<w; i++)\
01635 {\
01636 const int src0= src[0*srcStride];\
01637 const int src1= src[1*srcStride];\
01638 const int src2= src[2*srcStride];\
01639 const int src3= src[3*srcStride];\
01640 const int src4= src[4*srcStride];\
01641 const int src5= src[5*srcStride];\
01642 const int src6= src[6*srcStride];\
01643 const int src7= src[7*srcStride];\
01644 const int src8= src[8*srcStride];\
01645 const int src9= src[9*srcStride];\
01646 const int src10= src[10*srcStride];\
01647 const int src11= src[11*srcStride];\
01648 const int src12= src[12*srcStride];\
01649 const int src13= src[13*srcStride];\
01650 const int src14= src[14*srcStride];\
01651 const int src15= src[15*srcStride];\
01652 const int src16= src[16*srcStride];\
01653 OP(dst[ 0*dstStride], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\
01654 OP(dst[ 1*dstStride], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\
01655 OP(dst[ 2*dstStride], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\
01656 OP(dst[ 3*dstStride], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\
01657 OP(dst[ 4*dstStride], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\
01658 OP(dst[ 5*dstStride], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\
01659 OP(dst[ 6*dstStride], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\
01660 OP(dst[ 7*dstStride], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\
01661 OP(dst[ 8*dstStride], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\
01662 OP(dst[ 9*dstStride], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\
01663 OP(dst[10*dstStride], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\
01664 OP(dst[11*dstStride], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\
01665 OP(dst[12*dstStride], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\
01666 OP(dst[13*dstStride], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\
01667 OP(dst[14*dstStride], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\
01668 OP(dst[15*dstStride], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\
01669 dst++;\
01670 src++;\
01671 }\
01672 }\
01673 \
01674 static void OPNAME ## qpel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){\
01675 OPNAME ## pixels8_c(dst, src, stride, 8);\
01676 }\
01677 \
01678 static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
01679 uint8_t half[64];\
01680 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
01681 OPNAME ## pixels8_l2(dst, src, half, stride, stride, 8, 8);\
01682 }\
01683 \
01684 static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
01685 OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\
01686 }\
01687 \
01688 static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
01689 uint8_t half[64];\
01690 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
01691 OPNAME ## pixels8_l2(dst, src+1, half, stride, stride, 8, 8);\
01692 }\
01693 \
01694 static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
01695 uint8_t full[16*9];\
01696 uint8_t half[64];\
01697 copy_block9(full, src, 16, stride, 9);\
01698 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
01699 OPNAME ## pixels8_l2(dst, full, half, stride, 16, 8, 8);\
01700 }\
01701 \
01702 static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
01703 uint8_t full[16*9];\
01704 copy_block9(full, src, 16, stride, 9);\
01705 OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\
01706 }\
01707 \
01708 static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
01709 uint8_t full[16*9];\
01710 uint8_t half[64];\
01711 copy_block9(full, src, 16, stride, 9);\
01712 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
01713 OPNAME ## pixels8_l2(dst, full+16, half, stride, 16, 8, 8);\
01714 }\
01715 void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
01716 uint8_t full[16*9];\
01717 uint8_t halfH[72];\
01718 uint8_t halfV[64];\
01719 uint8_t halfHV[64];\
01720 copy_block9(full, src, 16, stride, 9);\
01721 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01722 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
01723 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01724 OPNAME ## pixels8_l4(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
01725 }\
01726 static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
01727 uint8_t full[16*9];\
01728 uint8_t halfH[72];\
01729 uint8_t halfHV[64];\
01730 copy_block9(full, src, 16, stride, 9);\
01731 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01732 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
01733 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01734 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
01735 }\
01736 void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
01737 uint8_t full[16*9];\
01738 uint8_t halfH[72];\
01739 uint8_t halfV[64];\
01740 uint8_t halfHV[64];\
01741 copy_block9(full, src, 16, stride, 9);\
01742 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01743 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
01744 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01745 OPNAME ## pixels8_l4(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
01746 }\
01747 static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
01748 uint8_t full[16*9];\
01749 uint8_t halfH[72];\
01750 uint8_t halfHV[64];\
01751 copy_block9(full, src, 16, stride, 9);\
01752 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01753 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
01754 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01755 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
01756 }\
01757 void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
01758 uint8_t full[16*9];\
01759 uint8_t halfH[72];\
01760 uint8_t halfV[64];\
01761 uint8_t halfHV[64];\
01762 copy_block9(full, src, 16, stride, 9);\
01763 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01764 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
01765 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01766 OPNAME ## pixels8_l4(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
01767 }\
01768 static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
01769 uint8_t full[16*9];\
01770 uint8_t halfH[72];\
01771 uint8_t halfHV[64];\
01772 copy_block9(full, src, 16, stride, 9);\
01773 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01774 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
01775 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01776 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
01777 }\
01778 void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
01779 uint8_t full[16*9];\
01780 uint8_t halfH[72];\
01781 uint8_t halfV[64];\
01782 uint8_t halfHV[64];\
01783 copy_block9(full, src, 16, stride, 9);\
01784 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full , 8, 16, 9);\
01785 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
01786 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01787 OPNAME ## pixels8_l4(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
01788 }\
01789 static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
01790 uint8_t full[16*9];\
01791 uint8_t halfH[72];\
01792 uint8_t halfHV[64];\
01793 copy_block9(full, src, 16, stride, 9);\
01794 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01795 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
01796 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01797 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
01798 }\
01799 static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
01800 uint8_t halfH[72];\
01801 uint8_t halfHV[64];\
01802 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
01803 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01804 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
01805 }\
01806 static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
01807 uint8_t halfH[72];\
01808 uint8_t halfHV[64];\
01809 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
01810 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01811 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
01812 }\
01813 void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
01814 uint8_t full[16*9];\
01815 uint8_t halfH[72];\
01816 uint8_t halfV[64];\
01817 uint8_t halfHV[64];\
01818 copy_block9(full, src, 16, stride, 9);\
01819 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01820 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
01821 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01822 OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\
01823 }\
01824 static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
01825 uint8_t full[16*9];\
01826 uint8_t halfH[72];\
01827 copy_block9(full, src, 16, stride, 9);\
01828 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01829 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
01830 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
01831 }\
01832 void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
01833 uint8_t full[16*9];\
01834 uint8_t halfH[72];\
01835 uint8_t halfV[64];\
01836 uint8_t halfHV[64];\
01837 copy_block9(full, src, 16, stride, 9);\
01838 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01839 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
01840 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01841 OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\
01842 }\
01843 static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
01844 uint8_t full[16*9];\
01845 uint8_t halfH[72];\
01846 copy_block9(full, src, 16, stride, 9);\
01847 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01848 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
01849 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
01850 }\
01851 static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
01852 uint8_t halfH[72];\
01853 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
01854 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
01855 }\
01856 static void OPNAME ## qpel16_mc00_c (uint8_t *dst, uint8_t *src, int stride){\
01857 OPNAME ## pixels16_c(dst, src, stride, 16);\
01858 }\
01859 \
01860 static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
01861 uint8_t half[256];\
01862 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
01863 OPNAME ## pixels16_l2(dst, src, half, stride, stride, 16, 16);\
01864 }\
01865 \
01866 static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
01867 OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\
01868 }\
01869 \
01870 static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
01871 uint8_t half[256];\
01872 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
01873 OPNAME ## pixels16_l2(dst, src+1, half, stride, stride, 16, 16);\
01874 }\
01875 \
01876 static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
01877 uint8_t full[24*17];\
01878 uint8_t half[256];\
01879 copy_block17(full, src, 24, stride, 17);\
01880 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
01881 OPNAME ## pixels16_l2(dst, full, half, stride, 24, 16, 16);\
01882 }\
01883 \
01884 static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
01885 uint8_t full[24*17];\
01886 copy_block17(full, src, 24, stride, 17);\
01887 OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\
01888 }\
01889 \
01890 static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
01891 uint8_t full[24*17];\
01892 uint8_t half[256];\
01893 copy_block17(full, src, 24, stride, 17);\
01894 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
01895 OPNAME ## pixels16_l2(dst, full+24, half, stride, 24, 16, 16);\
01896 }\
01897 void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
01898 uint8_t full[24*17];\
01899 uint8_t halfH[272];\
01900 uint8_t halfV[256];\
01901 uint8_t halfHV[256];\
01902 copy_block17(full, src, 24, stride, 17);\
01903 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01904 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
01905 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01906 OPNAME ## pixels16_l4(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
01907 }\
01908 static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
01909 uint8_t full[24*17];\
01910 uint8_t halfH[272];\
01911 uint8_t halfHV[256];\
01912 copy_block17(full, src, 24, stride, 17);\
01913 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01914 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
01915 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01916 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
01917 }\
01918 void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
01919 uint8_t full[24*17];\
01920 uint8_t halfH[272];\
01921 uint8_t halfV[256];\
01922 uint8_t halfHV[256];\
01923 copy_block17(full, src, 24, stride, 17);\
01924 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01925 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
01926 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01927 OPNAME ## pixels16_l4(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
01928 }\
01929 static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
01930 uint8_t full[24*17];\
01931 uint8_t halfH[272];\
01932 uint8_t halfHV[256];\
01933 copy_block17(full, src, 24, stride, 17);\
01934 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01935 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
01936 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01937 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
01938 }\
01939 void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
01940 uint8_t full[24*17];\
01941 uint8_t halfH[272];\
01942 uint8_t halfV[256];\
01943 uint8_t halfHV[256];\
01944 copy_block17(full, src, 24, stride, 17);\
01945 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01946 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
01947 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01948 OPNAME ## pixels16_l4(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
01949 }\
01950 static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
01951 uint8_t full[24*17];\
01952 uint8_t halfH[272];\
01953 uint8_t halfHV[256];\
01954 copy_block17(full, src, 24, stride, 17);\
01955 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01956 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
01957 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01958 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
01959 }\
01960 void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
01961 uint8_t full[24*17];\
01962 uint8_t halfH[272];\
01963 uint8_t halfV[256];\
01964 uint8_t halfHV[256];\
01965 copy_block17(full, src, 24, stride, 17);\
01966 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full , 16, 24, 17);\
01967 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
01968 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01969 OPNAME ## pixels16_l4(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
01970 }\
01971 static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
01972 uint8_t full[24*17];\
01973 uint8_t halfH[272];\
01974 uint8_t halfHV[256];\
01975 copy_block17(full, src, 24, stride, 17);\
01976 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01977 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
01978 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01979 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
01980 }\
01981 static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
01982 uint8_t halfH[272];\
01983 uint8_t halfHV[256];\
01984 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
01985 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01986 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
01987 }\
01988 static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
01989 uint8_t halfH[272];\
01990 uint8_t halfHV[256];\
01991 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
01992 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01993 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
01994 }\
01995 void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
01996 uint8_t full[24*17];\
01997 uint8_t halfH[272];\
01998 uint8_t halfV[256];\
01999 uint8_t halfHV[256];\
02000 copy_block17(full, src, 24, stride, 17);\
02001 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
02002 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
02003 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
02004 OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
02005 }\
02006 static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
02007 uint8_t full[24*17];\
02008 uint8_t halfH[272];\
02009 copy_block17(full, src, 24, stride, 17);\
02010 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
02011 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
02012 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
02013 }\
02014 void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
02015 uint8_t full[24*17];\
02016 uint8_t halfH[272];\
02017 uint8_t halfV[256];\
02018 uint8_t halfHV[256];\
02019 copy_block17(full, src, 24, stride, 17);\
02020 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
02021 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
02022 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
02023 OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
02024 }\
02025 static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
02026 uint8_t full[24*17];\
02027 uint8_t halfH[272];\
02028 copy_block17(full, src, 24, stride, 17);\
02029 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
02030 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
02031 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
02032 }\
02033 static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
02034 uint8_t halfH[272];\
02035 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
02036 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
02037 }
02038
02039 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
02040 #define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1)
02041 #define op_put(a, b) a = cm[((b) + 16)>>5]
02042 #define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5]
02043
02044 QPEL_MC(0, put_ , _ , op_put)
02045 QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd)
02046 QPEL_MC(0, avg_ , _ , op_avg)
02047
02048 #undef op_avg
02049 #undef op_avg_no_rnd
02050 #undef op_put
02051 #undef op_put_no_rnd
02052
02053 #if 1
02054 #define H264_LOWPASS(OPNAME, OP, OP2) \
02055 static void OPNAME ## h264_qpel4_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
02056 const int h=4;\
02057 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
02058 int i;\
02059 for(i=0; i<h; i++)\
02060 {\
02061 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\
02062 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\
02063 OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]));\
02064 OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]));\
02065 dst+=dstStride;\
02066 src+=srcStride;\
02067 }\
02068 }\
02069 \
02070 static void OPNAME ## h264_qpel4_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
02071 const int w=4;\
02072 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
02073 int i;\
02074 for(i=0; i<w; i++)\
02075 {\
02076 const int srcB= src[-2*srcStride];\
02077 const int srcA= src[-1*srcStride];\
02078 const int src0= src[0 *srcStride];\
02079 const int src1= src[1 *srcStride];\
02080 const int src2= src[2 *srcStride];\
02081 const int src3= src[3 *srcStride];\
02082 const int src4= src[4 *srcStride];\
02083 const int src5= src[5 *srcStride];\
02084 const int src6= src[6 *srcStride];\
02085 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
02086 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
02087 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
02088 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
02089 dst++;\
02090 src++;\
02091 }\
02092 }\
02093 \
02094 static void OPNAME ## h264_qpel4_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
02095 const int h=4;\
02096 const int w=4;\
02097 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
02098 int i;\
02099 src -= 2*srcStride;\
02100 for(i=0; i<h+5; i++)\
02101 {\
02102 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\
02103 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\
02104 tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]);\
02105 tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]);\
02106 tmp+=tmpStride;\
02107 src+=srcStride;\
02108 }\
02109 tmp -= tmpStride*(h+5-2);\
02110 for(i=0; i<w; i++)\
02111 {\
02112 const int tmpB= tmp[-2*tmpStride];\
02113 const int tmpA= tmp[-1*tmpStride];\
02114 const int tmp0= tmp[0 *tmpStride];\
02115 const int tmp1= tmp[1 *tmpStride];\
02116 const int tmp2= tmp[2 *tmpStride];\
02117 const int tmp3= tmp[3 *tmpStride];\
02118 const int tmp4= tmp[4 *tmpStride];\
02119 const int tmp5= tmp[5 *tmpStride];\
02120 const int tmp6= tmp[6 *tmpStride];\
02121 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
02122 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
02123 OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
02124 OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\
02125 dst++;\
02126 tmp++;\
02127 }\
02128 }\
02129 \
02130 static void OPNAME ## h264_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
02131 const int h=8;\
02132 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
02133 int i;\
02134 for(i=0; i<h; i++)\
02135 {\
02136 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]));\
02137 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]));\
02138 OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]));\
02139 OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]));\
02140 OP(dst[4], (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]));\
02141 OP(dst[5], (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]));\
02142 OP(dst[6], (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]));\
02143 OP(dst[7], (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]));\
02144 dst+=dstStride;\
02145 src+=srcStride;\
02146 }\
02147 }\
02148 \
02149 static void OPNAME ## h264_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
02150 const int w=8;\
02151 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
02152 int i;\
02153 for(i=0; i<w; i++)\
02154 {\
02155 const int srcB= src[-2*srcStride];\
02156 const int srcA= src[-1*srcStride];\
02157 const int src0= src[0 *srcStride];\
02158 const int src1= src[1 *srcStride];\
02159 const int src2= src[2 *srcStride];\
02160 const int src3= src[3 *srcStride];\
02161 const int src4= src[4 *srcStride];\
02162 const int src5= src[5 *srcStride];\
02163 const int src6= src[6 *srcStride];\
02164 const int src7= src[7 *srcStride];\
02165 const int src8= src[8 *srcStride];\
02166 const int src9= src[9 *srcStride];\
02167 const int src10=src[10*srcStride];\
02168 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
02169 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
02170 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
02171 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
02172 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*5 + (src2+src7));\
02173 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*5 + (src3+src8));\
02174 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*5 + (src4+src9));\
02175 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src9)*5 + (src5+src10));\
02176 dst++;\
02177 src++;\
02178 }\
02179 }\
02180 \
02181 static void OPNAME ## h264_qpel8_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
02182 const int h=8;\
02183 const int w=8;\
02184 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
02185 int i;\
02186 src -= 2*srcStride;\
02187 for(i=0; i<h+5; i++)\
02188 {\
02189 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]);\
02190 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]);\
02191 tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]);\
02192 tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]);\
02193 tmp[4]= (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]);\
02194 tmp[5]= (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]);\
02195 tmp[6]= (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]);\
02196 tmp[7]= (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]);\
02197 tmp+=tmpStride;\
02198 src+=srcStride;\
02199 }\
02200 tmp -= tmpStride*(h+5-2);\
02201 for(i=0; i<w; i++)\
02202 {\
02203 const int tmpB= tmp[-2*tmpStride];\
02204 const int tmpA= tmp[-1*tmpStride];\
02205 const int tmp0= tmp[0 *tmpStride];\
02206 const int tmp1= tmp[1 *tmpStride];\
02207 const int tmp2= tmp[2 *tmpStride];\
02208 const int tmp3= tmp[3 *tmpStride];\
02209 const int tmp4= tmp[4 *tmpStride];\
02210 const int tmp5= tmp[5 *tmpStride];\
02211 const int tmp6= tmp[6 *tmpStride];\
02212 const int tmp7= tmp[7 *tmpStride];\
02213 const int tmp8= tmp[8 *tmpStride];\
02214 const int tmp9= tmp[9 *tmpStride];\
02215 const int tmp10=tmp[10*tmpStride];\
02216 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
02217 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
02218 OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
02219 OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\
02220 OP2(dst[4*dstStride], (tmp4+tmp5)*20 - (tmp3+tmp6)*5 + (tmp2+tmp7));\
02221 OP2(dst[5*dstStride], (tmp5+tmp6)*20 - (tmp4+tmp7)*5 + (tmp3+tmp8));\
02222 OP2(dst[6*dstStride], (tmp6+tmp7)*20 - (tmp5+tmp8)*5 + (tmp4+tmp9));\
02223 OP2(dst[7*dstStride], (tmp7+tmp8)*20 - (tmp6+tmp9)*5 + (tmp5+tmp10));\
02224 dst++;\
02225 tmp++;\
02226 }\
02227 }\
02228 \
02229 static void OPNAME ## h264_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
02230 OPNAME ## h264_qpel8_v_lowpass(dst , src , dstStride, srcStride);\
02231 OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\
02232 src += 8*srcStride;\
02233 dst += 8*dstStride;\
02234 OPNAME ## h264_qpel8_v_lowpass(dst , src , dstStride, srcStride);\
02235 OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\
02236 }\
02237 \
02238 static void OPNAME ## h264_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
02239 OPNAME ## h264_qpel8_h_lowpass(dst , src , dstStride, srcStride);\
02240 OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\
02241 src += 8*srcStride;\
02242 dst += 8*dstStride;\
02243 OPNAME ## h264_qpel8_h_lowpass(dst , src , dstStride, srcStride);\
02244 OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\
02245 }\
02246 \
02247 static void OPNAME ## h264_qpel16_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
02248 OPNAME ## h264_qpel8_hv_lowpass(dst , tmp , src , dstStride, tmpStride, srcStride);\
02249 OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\
02250 src += 8*srcStride;\
02251 dst += 8*dstStride;\
02252 OPNAME ## h264_qpel8_hv_lowpass(dst , tmp , src , dstStride, tmpStride, srcStride);\
02253 OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\
02254 }\
02255
02256 #define H264_MC(OPNAME, SIZE) \
02257 static void OPNAME ## h264_qpel ## SIZE ## _mc00_c (uint8_t *dst, uint8_t *src, int stride){\
02258 OPNAME ## pixels ## SIZE ## _c(dst, src, stride, SIZE);\
02259 }\
02260 \
02261 static void OPNAME ## h264_qpel ## SIZE ## _mc10_c(uint8_t *dst, uint8_t *src, int stride){\
02262 uint8_t half[SIZE*SIZE];\
02263 put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\
02264 OPNAME ## pixels ## SIZE ## _l2(dst, src, half, stride, stride, SIZE, SIZE);\
02265 }\
02266 \
02267 static void OPNAME ## h264_qpel ## SIZE ## _mc20_c(uint8_t *dst, uint8_t *src, int stride){\
02268 OPNAME ## h264_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride);\
02269 }\
02270 \
02271 static void OPNAME ## h264_qpel ## SIZE ## _mc30_c(uint8_t *dst, uint8_t *src, int stride){\
02272 uint8_t half[SIZE*SIZE];\
02273 put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\
02274 OPNAME ## pixels ## SIZE ## _l2(dst, src+1, half, stride, stride, SIZE, SIZE);\
02275 }\
02276 \
02277 static void OPNAME ## h264_qpel ## SIZE ## _mc01_c(uint8_t *dst, uint8_t *src, int stride){\
02278 uint8_t full[SIZE*(SIZE+5)];\
02279 uint8_t * const full_mid= full + SIZE*2;\
02280 uint8_t half[SIZE*SIZE];\
02281 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
02282 put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\
02283 OPNAME ## pixels ## SIZE ## _l2(dst, full_mid, half, stride, SIZE, SIZE, SIZE);\
02284 }\
02285 \
02286 static void OPNAME ## h264_qpel ## SIZE ## _mc02_c(uint8_t *dst, uint8_t *src, int stride){\
02287 uint8_t full[SIZE*(SIZE+5)];\
02288 uint8_t * const full_mid= full + SIZE*2;\
02289 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
02290 OPNAME ## h264_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE);\
02291 }\
02292 \
02293 static void OPNAME ## h264_qpel ## SIZE ## _mc03_c(uint8_t *dst, uint8_t *src, int stride){\
02294 uint8_t full[SIZE*(SIZE+5)];\
02295 uint8_t * const full_mid= full + SIZE*2;\
02296 uint8_t half[SIZE*SIZE];\
02297 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
02298 put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\
02299 OPNAME ## pixels ## SIZE ## _l2(dst, full_mid+SIZE, half, stride, SIZE, SIZE, SIZE);\
02300 }\
02301 \
02302 static void OPNAME ## h264_qpel ## SIZE ## _mc11_c(uint8_t *dst, uint8_t *src, int stride){\
02303 uint8_t full[SIZE*(SIZE+5)];\
02304 uint8_t * const full_mid= full + SIZE*2;\
02305 uint8_t halfH[SIZE*SIZE];\
02306 uint8_t halfV[SIZE*SIZE];\
02307 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
02308 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
02309 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
02310 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
02311 }\
02312 \
02313 static void OPNAME ## h264_qpel ## SIZE ## _mc31_c(uint8_t *dst, uint8_t *src, int stride){\
02314 uint8_t full[SIZE*(SIZE+5)];\
02315 uint8_t * const full_mid= full + SIZE*2;\
02316 uint8_t halfH[SIZE*SIZE];\
02317 uint8_t halfV[SIZE*SIZE];\
02318 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
02319 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
02320 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
02321 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
02322 }\
02323 \
02324 static void OPNAME ## h264_qpel ## SIZE ## _mc13_c(uint8_t *dst, uint8_t *src, int stride){\
02325 uint8_t full[SIZE*(SIZE+5)];\
02326 uint8_t * const full_mid= full + SIZE*2;\
02327 uint8_t halfH[SIZE*SIZE];\
02328 uint8_t halfV[SIZE*SIZE];\
02329 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
02330 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
02331 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
02332 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
02333 }\
02334 \
02335 static void OPNAME ## h264_qpel ## SIZE ## _mc33_c(uint8_t *dst, uint8_t *src, int stride){\
02336 uint8_t full[SIZE*(SIZE+5)];\
02337 uint8_t * const full_mid= full + SIZE*2;\
02338 uint8_t halfH[SIZE*SIZE];\
02339 uint8_t halfV[SIZE*SIZE];\
02340 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
02341 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
02342 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
02343 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
02344 }\
02345 \
02346 static void OPNAME ## h264_qpel ## SIZE ## _mc22_c(uint8_t *dst, uint8_t *src, int stride){\
02347 int16_t tmp[SIZE*(SIZE+5)];\
02348 OPNAME ## h264_qpel ## SIZE ## _hv_lowpass(dst, tmp, src, stride, SIZE, stride);\
02349 }\
02350 \
02351 static void OPNAME ## h264_qpel ## SIZE ## _mc21_c(uint8_t *dst, uint8_t *src, int stride){\
02352 int16_t tmp[SIZE*(SIZE+5)];\
02353 uint8_t halfH[SIZE*SIZE];\
02354 uint8_t halfHV[SIZE*SIZE];\
02355 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
02356 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
02357 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\
02358 }\
02359 \
02360 static void OPNAME ## h264_qpel ## SIZE ## _mc23_c(uint8_t *dst, uint8_t *src, int stride){\
02361 int16_t tmp[SIZE*(SIZE+5)];\
02362 uint8_t halfH[SIZE*SIZE];\
02363 uint8_t halfHV[SIZE*SIZE];\
02364 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
02365 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
02366 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\
02367 }\
02368 \
02369 static void OPNAME ## h264_qpel ## SIZE ## _mc12_c(uint8_t *dst, uint8_t *src, int stride){\
02370 uint8_t full[SIZE*(SIZE+5)];\
02371 uint8_t * const full_mid= full + SIZE*2;\
02372 int16_t tmp[SIZE*(SIZE+5)];\
02373 uint8_t halfV[SIZE*SIZE];\
02374 uint8_t halfHV[SIZE*SIZE];\
02375 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
02376 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
02377 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
02378 OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\
02379 }\
02380 \
02381 static void OPNAME ## h264_qpel ## SIZE ## _mc32_c(uint8_t *dst, uint8_t *src, int stride){\
02382 uint8_t full[SIZE*(SIZE+5)];\
02383 uint8_t * const full_mid= full + SIZE*2;\
02384 int16_t tmp[SIZE*(SIZE+5)];\
02385 uint8_t halfV[SIZE*SIZE];\
02386 uint8_t halfHV[SIZE*SIZE];\
02387 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
02388 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
02389 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
02390 OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\
02391 }\
02392
02393 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
02394
02395 #define op_put(a, b) a = cm[((b) + 16)>>5]
02396 #define op2_avg(a, b) a = (((a)+cm[((b) + 512)>>10]+1)>>1)
02397 #define op2_put(a, b) a = cm[((b) + 512)>>10]
02398
02399 H264_LOWPASS(put_ , op_put, op2_put)
02400 H264_LOWPASS(avg_ , op_avg, op2_avg)
02401 H264_MC(put_, 4)
02402 H264_MC(put_, 8)
02403 H264_MC(put_, 16)
02404 H264_MC(avg_, 4)
02405 H264_MC(avg_, 8)
02406 H264_MC(avg_, 16)
02407
02408 #undef op_avg
02409 #undef op_put
02410 #undef op2_avg
02411 #undef op2_put
02412 #endif
02413
02414 #define op_scale1(x) block[x] = clip_uint8( (block[x]*weight + offset) >> log2_denom )
02415 #define op_scale2(x) dst[x] = clip_uint8( (src[x]*weights + dst[x]*weightd + offset) >> (log2_denom+1))
02416 #define H264_WEIGHT(W,H) \
02417 static void weight_h264_pixels ## W ## x ## H ## _c(uint8_t *block, int stride, int log2_denom, int weight, int offset){ \
02418 int attribute_unused x, y; \
02419 offset <<= log2_denom; \
02420 if(log2_denom) offset += 1<<(log2_denom-1); \
02421 for(y=0; y<H; y++, block += stride){ \
02422 op_scale1(0); \
02423 op_scale1(1); \
02424 if(W==2) continue; \
02425 op_scale1(2); \
02426 op_scale1(3); \
02427 if(W==4) continue; \
02428 op_scale1(4); \
02429 op_scale1(5); \
02430 op_scale1(6); \
02431 op_scale1(7); \
02432 if(W==8) continue; \
02433 op_scale1(8); \
02434 op_scale1(9); \
02435 op_scale1(10); \
02436 op_scale1(11); \
02437 op_scale1(12); \
02438 op_scale1(13); \
02439 op_scale1(14); \
02440 op_scale1(15); \
02441 } \
02442 } \
02443 static void biweight_h264_pixels ## W ## x ## H ## _c(uint8_t *dst, uint8_t *src, int stride, int log2_denom, int weightd, int weights, int offsetd, int offsets){ \
02444 int attribute_unused x, y; \
02445 int offset = (offsets + offsetd + 1) >> 1; \
02446 offset = ((offset << 1) + 1) << log2_denom; \
02447 for(y=0; y<H; y++, dst += stride, src += stride){ \
02448 op_scale2(0); \
02449 op_scale2(1); \
02450 if(W==2) continue; \
02451 op_scale2(2); \
02452 op_scale2(3); \
02453 if(W==4) continue; \
02454 op_scale2(4); \
02455 op_scale2(5); \
02456 op_scale2(6); \
02457 op_scale2(7); \
02458 if(W==8) continue; \
02459 op_scale2(8); \
02460 op_scale2(9); \
02461 op_scale2(10); \
02462 op_scale2(11); \
02463 op_scale2(12); \
02464 op_scale2(13); \
02465 op_scale2(14); \
02466 op_scale2(15); \
02467 } \
02468 }
02469
02470 H264_WEIGHT(16,16)
02471 H264_WEIGHT(16,8)
02472 H264_WEIGHT(8,16)
02473 H264_WEIGHT(8,8)
02474 H264_WEIGHT(8,4)
02475 H264_WEIGHT(4,8)
02476 H264_WEIGHT(4,4)
02477 H264_WEIGHT(4,2)
02478 H264_WEIGHT(2,4)
02479 H264_WEIGHT(2,2)
02480
02481 #undef op_scale1
02482 #undef op_scale2
02483 #undef H264_WEIGHT
02484
02485 static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){
02486 uint8_t *cm = cropTbl + MAX_NEG_CROP;
02487 int i;
02488
02489 for(i=0; i<h; i++){
02490 dst[0]= cm[(9*(src[0] + src[1]) - (src[-1] + src[2]) + 8)>>4];
02491 dst[1]= cm[(9*(src[1] + src[2]) - (src[ 0] + src[3]) + 8)>>4];
02492 dst[2]= cm[(9*(src[2] + src[3]) - (src[ 1] + src[4]) + 8)>>4];
02493 dst[3]= cm[(9*(src[3] + src[4]) - (src[ 2] + src[5]) + 8)>>4];
02494 dst[4]= cm[(9*(src[4] + src[5]) - (src[ 3] + src[6]) + 8)>>4];
02495 dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4];
02496 dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4];
02497 dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4];
02498 dst+=dstStride;
02499 src+=srcStride;
02500 }
02501 }
02502
02503 static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){
02504 uint8_t *cm = cropTbl + MAX_NEG_CROP;
02505 int i;
02506
02507 for(i=0; i<w; i++){
02508 const int src_1= src[ -srcStride];
02509 const int src0 = src[0 ];
02510 const int src1 = src[ srcStride];
02511 const int src2 = src[2*srcStride];
02512 const int src3 = src[3*srcStride];
02513 const int src4 = src[4*srcStride];
02514 const int src5 = src[5*srcStride];
02515 const int src6 = src[6*srcStride];
02516 const int src7 = src[7*srcStride];
02517 const int src8 = src[8*srcStride];
02518 const int src9 = src[9*srcStride];
02519 dst[0*dstStride]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4];
02520 dst[1*dstStride]= cm[(9*(src1 + src2) - (src0 + src3) + 8)>>4];
02521 dst[2*dstStride]= cm[(9*(src2 + src3) - (src1 + src4) + 8)>>4];
02522 dst[3*dstStride]= cm[(9*(src3 + src4) - (src2 + src5) + 8)>>4];
02523 dst[4*dstStride]= cm[(9*(src4 + src5) - (src3 + src6) + 8)>>4];
02524 dst[5*dstStride]= cm[(9*(src5 + src6) - (src4 + src7) + 8)>>4];
02525 dst[6*dstStride]= cm[(9*(src6 + src7) - (src5 + src8) + 8)>>4];
02526 dst[7*dstStride]= cm[(9*(src7 + src8) - (src6 + src9) + 8)>>4];
02527 src++;
02528 dst++;
02529 }
02530 }
02531
02532 static void put_mspel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){
02533 put_pixels8_c(dst, src, stride, 8);
02534 }
02535
02536 static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){
02537 uint8_t half[64];
02538 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
02539 put_pixels8_l2(dst, src, half, stride, stride, 8, 8);
02540 }
02541
02542 static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){
02543 wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8);
02544 }
02545
02546 static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){
02547 uint8_t half[64];
02548 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
02549 put_pixels8_l2(dst, src+1, half, stride, stride, 8, 8);
02550 }
02551
02552 static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){
02553 wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8);
02554 }
02555
02556 static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){
02557 uint8_t halfH[88];
02558 uint8_t halfV[64];
02559 uint8_t halfHV[64];
02560 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
02561 wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8);
02562 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
02563 put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);
02564 }
02565 static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){
02566 uint8_t halfH[88];
02567 uint8_t halfV[64];
02568 uint8_t halfHV[64];
02569 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
02570 wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8);
02571 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
02572 put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);
02573 }
02574 static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){
02575 uint8_t halfH[88];
02576 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
02577 wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8);
02578 }
02579
02580 static void h263_v_loop_filter_c(uint8_t *src, int stride, int qscale){
02581 int x;
02582 const int strength= ff_h263_loop_filter_strength[qscale];
02583
02584 for(x=0; x<8; x++){
02585 int d1, d2, ad1;
02586 int p0= src[x-2*stride];
02587 int p1= src[x-1*stride];
02588 int p2= src[x+0*stride];
02589 int p3= src[x+1*stride];
02590 int d = (p0 - p3 + 4*(p2 - p1)) / 8;
02591
02592 if (d<-2*strength) d1= 0;
02593 else if(d<- strength) d1=-2*strength - d;
02594 else if(d< strength) d1= d;
02595 else if(d< 2*strength) d1= 2*strength - d;
02596 else d1= 0;
02597
02598 p1 += d1;
02599 p2 -= d1;
02600 if(p1&256) p1= ~(p1>>31);
02601 if(p2&256) p2= ~(p2>>31);
02602
02603 src[x-1*stride] = p1;
02604 src[x+0*stride] = p2;
02605
02606 ad1= ABS(d1)>>1;
02607
02608 d2= clip((p0-p3)/4, -ad1, ad1);
02609
02610 src[x-2*stride] = p0 - d2;
02611 src[x+ stride] = p3 + d2;
02612 }
02613 }
02614
02615 static void h263_h_loop_filter_c(uint8_t *src, int stride, int qscale){
02616 int y;
02617 const int strength= ff_h263_loop_filter_strength[qscale];
02618
02619 for(y=0; y<8; y++){
02620 int d1, d2, ad1;
02621 int p0= src[y*stride-2];
02622 int p1= src[y*stride-1];
02623 int p2= src[y*stride+0];
02624 int p3= src[y*stride+1];
02625 int d = (p0 - p3 + 4*(p2 - p1)) / 8;
02626
02627 if (d<-2*strength) d1= 0;
02628 else if(d<- strength) d1=-2*strength - d;
02629 else if(d< strength) d1= d;
02630 else if(d< 2*strength) d1= 2*strength - d;
02631 else d1= 0;
02632
02633 p1 += d1;
02634 p2 -= d1;
02635 if(p1&256) p1= ~(p1>>31);
02636 if(p2&256) p2= ~(p2>>31);
02637
02638 src[y*stride-1] = p1;
02639 src[y*stride+0] = p2;
02640
02641 ad1= ABS(d1)>>1;
02642
02643 d2= clip((p0-p3)/4, -ad1, ad1);
02644
02645 src[y*stride-2] = p0 - d2;
02646 src[y*stride+1] = p3 + d2;
02647 }
02648 }
02649
02650 static void h261_loop_filter_c(uint8_t *src, int stride){
02651 int x,y,xy,yz;
02652 int temp[64];
02653
02654 for(x=0; x<8; x++){
02655 temp[x ] = 4*src[x ];
02656 temp[x + 7*8] = 4*src[x + 7*stride];
02657 }
02658 for(y=1; y<7; y++){
02659 for(x=0; x<8; x++){
02660 xy = y * stride + x;
02661 yz = y * 8 + x;
02662 temp[yz] = src[xy - stride] + 2*src[xy] + src[xy + stride];
02663 }
02664 }
02665
02666 for(y=0; y<8; y++){
02667 src[ y*stride] = (temp[ y*8] + 2)>>2;
02668 src[7+y*stride] = (temp[7+y*8] + 2)>>2;
02669 for(x=1; x<7; x++){
02670 xy = y * stride + x;
02671 yz = y * 8 + x;
02672 src[xy] = (temp[yz-1] + 2*temp[yz] + temp[yz+1] + 8)>>4;
02673 }
02674 }
02675 }
02676
02677 static inline void h264_loop_filter_luma_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0)
02678 {
02679 int i, d;
02680 for( i = 0; i < 4; i++ ) {
02681 if( tc0[i] < 0 ) {
02682 pix += 4*ystride;
02683 continue;
02684 }
02685 for( d = 0; d < 4; d++ ) {
02686 const int p0 = pix[-1*xstride];
02687 const int p1 = pix[-2*xstride];
02688 const int p2 = pix[-3*xstride];
02689 const int q0 = pix[0];
02690 const int q1 = pix[1*xstride];
02691 const int q2 = pix[2*xstride];
02692
02693 if( ABS( p0 - q0 ) < alpha &&
02694 ABS( p1 - p0 ) < beta &&
02695 ABS( q1 - q0 ) < beta ) {
02696
02697 int tc = tc0[i];
02698 int i_delta;
02699
02700 if( ABS( p2 - p0 ) < beta ) {
02701 pix[-2*xstride] = p1 + clip( (( p2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - p1, -tc0[i], tc0[i] );
02702 tc++;
02703 }
02704 if( ABS( q2 - q0 ) < beta ) {
02705 pix[ xstride] = q1 + clip( (( q2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - q1, -tc0[i], tc0[i] );
02706 tc++;
02707 }
02708
02709 i_delta = clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
02710 pix[-xstride] = clip_uint8( p0 + i_delta );
02711 pix[0] = clip_uint8( q0 - i_delta );
02712 }
02713 pix += ystride;
02714 }
02715 }
02716 }
02717 static void h264_v_loop_filter_luma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
02718 {
02719 h264_loop_filter_luma_c(pix, stride, 1, alpha, beta, tc0);
02720 }
02721 static void h264_h_loop_filter_luma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
02722 {
02723 h264_loop_filter_luma_c(pix, 1, stride, alpha, beta, tc0);
02724 }
02725
02726 static inline void h264_loop_filter_chroma_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0)
02727 {
02728 int i, d;
02729 for( i = 0; i < 4; i++ ) {
02730 const int tc = tc0[i];
02731 if( tc <= 0 ) {
02732 pix += 2*ystride;
02733 continue;
02734 }
02735 for( d = 0; d < 2; d++ ) {
02736 const int p0 = pix[-1*xstride];
02737 const int p1 = pix[-2*xstride];
02738 const int q0 = pix[0];
02739 const int q1 = pix[1*xstride];
02740
02741 if( ABS( p0 - q0 ) < alpha &&
02742 ABS( p1 - p0 ) < beta &&
02743 ABS( q1 - q0 ) < beta ) {
02744
02745 int delta = clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
02746
02747 pix[-xstride] = clip_uint8( p0 + delta );
02748 pix[0] = clip_uint8( q0 - delta );
02749 }
02750 pix += ystride;
02751 }
02752 }
02753 }
02754 static void h264_v_loop_filter_chroma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
02755 {
02756 h264_loop_filter_chroma_c(pix, stride, 1, alpha, beta, tc0);
02757 }
02758 static void h264_h_loop_filter_chroma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
02759 {
02760 h264_loop_filter_chroma_c(pix, 1, stride, alpha, beta, tc0);
02761 }
02762
02763 static inline void h264_loop_filter_chroma_intra_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta)
02764 {
02765 int d;
02766 for( d = 0; d < 8; d++ ) {
02767 const int p0 = pix[-1*xstride];
02768 const int p1 = pix[-2*xstride];
02769 const int q0 = pix[0];
02770 const int q1 = pix[1*xstride];
02771
02772 if( ABS( p0 - q0 ) < alpha &&
02773 ABS( p1 - p0 ) < beta &&
02774 ABS( q1 - q0 ) < beta ) {
02775
02776 pix[-xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
02777 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
02778 }
02779 pix += ystride;
02780 }
02781 }
02782 static void h264_v_loop_filter_chroma_intra_c(uint8_t *pix, int stride, int alpha, int beta)
02783 {
02784 h264_loop_filter_chroma_intra_c(pix, stride, 1, alpha, beta);
02785 }
02786 static void h264_h_loop_filter_chroma_intra_c(uint8_t *pix, int stride, int alpha, int beta)
02787 {
02788 h264_loop_filter_chroma_intra_c(pix, 1, stride, alpha, beta);
02789 }
02790
02791 static inline int pix_abs16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
02792 {
02793 int s, i;
02794
02795 s = 0;
02796 for(i=0;i<h;i++) {
02797 s += abs(pix1[0] - pix2[0]);
02798 s += abs(pix1[1] - pix2[1]);
02799 s += abs(pix1[2] - pix2[2]);
02800 s += abs(pix1[3] - pix2[3]);
02801 s += abs(pix1[4] - pix2[4]);
02802 s += abs(pix1[5] - pix2[5]);
02803 s += abs(pix1[6] - pix2[6]);
02804 s += abs(pix1[7] - pix2[7]);
02805 s += abs(pix1[8] - pix2[8]);
02806 s += abs(pix1[9] - pix2[9]);
02807 s += abs(pix1[10] - pix2[10]);
02808 s += abs(pix1[11] - pix2[11]);
02809 s += abs(pix1[12] - pix2[12]);
02810 s += abs(pix1[13] - pix2[13]);
02811 s += abs(pix1[14] - pix2[14]);
02812 s += abs(pix1[15] - pix2[15]);
02813 pix1 += line_size;
02814 pix2 += line_size;
02815 }
02816 return s;
02817 }
02818
02819 static int pix_abs16_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
02820 {
02821 int s, i;
02822
02823 s = 0;
02824 for(i=0;i<h;i++) {
02825 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
02826 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
02827 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
02828 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
02829 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
02830 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
02831 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
02832 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
02833 s += abs(pix1[8] - avg2(pix2[8], pix2[9]));
02834 s += abs(pix1[9] - avg2(pix2[9], pix2[10]));
02835 s += abs(pix1[10] - avg2(pix2[10], pix2[11]));
02836 s += abs(pix1[11] - avg2(pix2[11], pix2[12]));
02837 s += abs(pix1[12] - avg2(pix2[12], pix2[13]));
02838 s += abs(pix1[13] - avg2(pix2[13], pix2[14]));
02839 s += abs(pix1[14] - avg2(pix2[14], pix2[15]));
02840 s += abs(pix1[15] - avg2(pix2[15], pix2[16]));
02841 pix1 += line_size;
02842 pix2 += line_size;
02843 }
02844 return s;
02845 }
02846
02847 static int pix_abs16_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
02848 {
02849 int s, i;
02850 uint8_t *pix3 = pix2 + line_size;
02851
02852 s = 0;
02853 for(i=0;i<h;i++) {
02854 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
02855 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
02856 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
02857 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
02858 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
02859 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
02860 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
02861 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
02862 s += abs(pix1[8] - avg2(pix2[8], pix3[8]));
02863 s += abs(pix1[9] - avg2(pix2[9], pix3[9]));
02864 s += abs(pix1[10] - avg2(pix2[10], pix3[10]));
02865 s += abs(pix1[11] - avg2(pix2[11], pix3[11]));
02866 s += abs(pix1[12] - avg2(pix2[12], pix3[12]));
02867 s += abs(pix1[13] - avg2(pix2[13], pix3[13]));
02868 s += abs(pix1[14] - avg2(pix2[14], pix3[14]));
02869 s += abs(pix1[15] - avg2(pix2[15], pix3[15]));
02870 pix1 += line_size;
02871 pix2 += line_size;
02872 pix3 += line_size;
02873 }
02874 return s;
02875 }
02876
02877 static int pix_abs16_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
02878 {
02879 int s, i;
02880 uint8_t *pix3 = pix2 + line_size;
02881
02882 s = 0;
02883 for(i=0;i<h;i++) {
02884 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
02885 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
02886 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
02887 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
02888 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
02889 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
02890 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
02891 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
02892 s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9]));
02893 s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10]));
02894 s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
02895 s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
02896 s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
02897 s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
02898 s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
02899 s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
02900 pix1 += line_size;
02901 pix2 += line_size;
02902 pix3 += line_size;
02903 }
02904 return s;
02905 }
02906
02907 static inline int pix_abs8_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
02908 {
02909 int s, i;
02910
02911 s = 0;
02912 for(i=0;i<h;i++) {
02913 s += abs(pix1[0] - pix2[0]);
02914 s += abs(pix1[1] - pix2[1]);
02915 s += abs(pix1[2] - pix2[2]);
02916 s += abs(pix1[3] - pix2[3]);
02917 s += abs(pix1[4] - pix2[4]);
02918 s += abs(pix1[5] - pix2[5]);
02919 s += abs(pix1[6] - pix2[6]);
02920 s += abs(pix1[7] - pix2[7]);
02921 pix1 += line_size;
02922 pix2 += line_size;
02923 }
02924 return s;
02925 }
02926
02927 static int pix_abs8_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
02928 {
02929 int s, i;
02930
02931 s = 0;
02932 for(i=0;i<h;i++) {
02933 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
02934 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
02935 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
02936 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
02937 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
02938 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
02939 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
02940 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
02941 pix1 += line_size;
02942 pix2 += line_size;
02943 }
02944 return s;
02945 }
02946
02947 static int pix_abs8_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
02948 {
02949 int s, i;
02950 uint8_t *pix3 = pix2 + line_size;
02951
02952 s = 0;
02953 for(i=0;i<h;i++) {
02954 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
02955 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
02956 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
02957 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
02958 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
02959 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
02960 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
02961 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
02962 pix1 += line_size;
02963 pix2 += line_size;
02964 pix3 += line_size;
02965 }
02966 return s;
02967 }
02968
02969 static int pix_abs8_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
02970 {
02971 int s, i;
02972 uint8_t *pix3 = pix2 + line_size;
02973
02974 s = 0;
02975 for(i=0;i<h;i++) {
02976 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
02977 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
02978 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
02979 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
02980 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
02981 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
02982 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
02983 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
02984 pix1 += line_size;
02985 pix2 += line_size;
02986 pix3 += line_size;
02987 }
02988 return s;
02989 }
02990
02991 static int nsse16_c(MpegEncContext *c, uint8_t *s1, uint8_t *s2, int stride, int h){
02992 int score1=0;
02993 int score2=0;
02994 int x,y;
02995
02996 for(y=0; y<h; y++){
02997 for(x=0; x<16; x++){
02998 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
02999 }
03000 if(y+1<h){
03001 for(x=0; x<15; x++){
03002 score2+= ABS( s1[x ] - s1[x +stride]
03003 - s1[x+1] + s1[x+1+stride])
03004 -ABS( s2[x ] - s2[x +stride]
03005 - s2[x+1] + s2[x+1+stride]);
03006 }
03007 }
03008 s1+= stride;
03009 s2+= stride;
03010 }
03011
03012 if(c) return score1 + ABS(score2)*c->avctx->nsse_weight;
03013 else return score1 + ABS(score2)*8;
03014 }
03015
03016 static int nsse8_c(MpegEncContext *c, uint8_t *s1, uint8_t *s2, int stride, int h){
03017 int score1=0;
03018 int score2=0;
03019 int x,y;
03020
03021 for(y=0; y<h; y++){
03022 for(x=0; x<8; x++){
03023 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
03024 }
03025 if(y+1<h){
03026 for(x=0; x<7; x++){
03027 score2+= ABS( s1[x ] - s1[x +stride]
03028 - s1[x+1] + s1[x+1+stride])
03029 -ABS( s2[x ] - s2[x +stride]
03030 - s2[x+1] + s2[x+1+stride]);
03031 }
03032 }
03033 s1+= stride;
03034 s2+= stride;
03035 }
03036
03037 if(c) return score1 + ABS(score2)*c->avctx->nsse_weight;
03038 else return score1 + ABS(score2)*8;
03039 }
03040
03041 static int try_8x8basis_c(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale){
03042 int i;
03043 unsigned int sum=0;
03044
03045 for(i=0; i<8*8; i++){
03046 int b= rem[i] + ((basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT));
03047 int w= weight[i];
03048 b>>= RECON_SHIFT;
03049 assert(-512<b && b<512);
03050
03051 sum += (w*b)*(w*b)>>4;
03052 }
03053 return sum>>2;
03054 }
03055
03056 static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale){
03057 int i;
03058
03059 for(i=0; i<8*8; i++){
03060 rem[i] += (basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT);
03061 }
03062 }
03063
03072 void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last)
03073 {
03074 int i;
03075 DCTELEM temp[64];
03076
03077 if(last<=0) return;
03078
03079
03080 for(i=0; i<=last; i++){
03081 const int j= scantable[i];
03082 temp[j]= block[j];
03083 block[j]=0;
03084 }
03085
03086 for(i=0; i<=last; i++){
03087 const int j= scantable[i];
03088 const int perm_j= permutation[j];
03089 block[perm_j]= temp[j];
03090 }
03091 }
03092
03093 static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){
03094 return 0;
03095 }
03096
03097 void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type){
03098 int i;
03099
03100 memset(cmp, 0, sizeof(void*)*5);
03101
03102 for(i=0; i<5; i++){
03103 switch(type&0xFF){
03104 case FF_CMP_SAD:
03105 cmp[i]= c->sad[i];
03106 break;
03107 case FF_CMP_SATD:
03108 cmp[i]= c->hadamard8_diff[i];
03109 break;
03110 case FF_CMP_SSE:
03111 cmp[i]= c->sse[i];
03112 break;
03113 case FF_CMP_DCT:
03114 cmp[i]= c->dct_sad[i];
03115 break;
03116 case FF_CMP_DCTMAX:
03117 cmp[i]= c->dct_max[i];
03118 break;
03119 case FF_CMP_PSNR:
03120 cmp[i]= c->quant_psnr[i];
03121 break;
03122 case FF_CMP_BIT:
03123 cmp[i]= c->bit[i];
03124 break;
03125 case FF_CMP_RD:
03126 cmp[i]= c->rd[i];
03127 break;
03128 case FF_CMP_VSAD:
03129 cmp[i]= c->vsad[i];
03130 break;
03131 case FF_CMP_VSSE:
03132 cmp[i]= c->vsse[i];
03133 break;
03134 case FF_CMP_ZERO:
03135 cmp[i]= zero_cmp;
03136 break;
03137 case FF_CMP_NSSE:
03138 cmp[i]= c->nsse[i];
03139 break;
03140 case FF_CMP_W53:
03141 cmp[i]= c->w53[i];
03142 break;
03143 case FF_CMP_W97:
03144 cmp[i]= c->w97[i];
03145 break;
03146 default:
03147 av_log(NULL, AV_LOG_ERROR,"internal error in cmp function selection\n");
03148 }
03149 }
03150 }
03151
03155 static void clear_blocks_c(DCTELEM *blocks)
03156 {
03157 memset(blocks, 0, sizeof(DCTELEM)*6*64);
03158 }
03159
03160 static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){
03161 int i;
03162 for(i=0; i+7<w; i+=8){
03163 dst[i+0] += src[i+0];
03164 dst[i+1] += src[i+1];
03165 dst[i+2] += src[i+2];
03166 dst[i+3] += src[i+3];
03167 dst[i+4] += src[i+4];
03168 dst[i+5] += src[i+5];
03169 dst[i+6] += src[i+6];
03170 dst[i+7] += src[i+7];
03171 }
03172 for(; i<w; i++)
03173 dst[i+0] += src[i+0];
03174 }
03175
03176 static void diff_bytes_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
03177 int i;
03178 for(i=0; i+7<w; i+=8){
03179 dst[i+0] = src1[i+0]-src2[i+0];
03180 dst[i+1] = src1[i+1]-src2[i+1];
03181 dst[i+2] = src1[i+2]-src2[i+2];
03182 dst[i+3] = src1[i+3]-src2[i+3];
03183 dst[i+4] = src1[i+4]-src2[i+4];
03184 dst[i+5] = src1[i+5]-src2[i+5];
03185 dst[i+6] = src1[i+6]-src2[i+6];
03186 dst[i+7] = src1[i+7]-src2[i+7];
03187 }
03188 for(; i<w; i++)
03189 dst[i+0] = src1[i+0]-src2[i+0];
03190 }
03191
03192 static void sub_hfyu_median_prediction_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w, int *left, int *left_top){
03193 int i;
03194 uint8_t l, lt;
03195
03196 l= *left;
03197 lt= *left_top;
03198
03199 for(i=0; i<w; i++){
03200 const int pred= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF);
03201 lt= src1[i];
03202 l= src2[i];
03203 dst[i]= l - pred;
03204 }
03205
03206 *left= l;
03207 *left_top= lt;
03208 }
03209
03210 #define BUTTERFLY2(o1,o2,i1,i2) \
03211 o1= (i1)+(i2);\
03212 o2= (i1)-(i2);
03213
03214 #define BUTTERFLY1(x,y) \
03215 {\
03216 int a,b;\
03217 a= x;\
03218 b= y;\
03219 x= a+b;\
03220 y= a-b;\
03221 }
03222
03223 #define BUTTERFLYA(x,y) (ABS((x)+(y)) + ABS((x)-(y)))
03224
03225 static int hadamard8_diff8x8_c( void *s, uint8_t *dst, uint8_t *src, int stride, int h){
03226 int i;
03227 int temp[64];
03228 int sum=0;
03229
03230 assert(h==8);
03231
03232 for(i=0; i<8; i++){
03233
03234 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-dst[stride*i+0],src[stride*i+1]-dst[stride*i+1]);
03235 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-dst[stride*i+2],src[stride*i+3]-dst[stride*i+3]);
03236 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-dst[stride*i+4],src[stride*i+5]-dst[stride*i+5]);
03237 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-dst[stride*i+6],src[stride*i+7]-dst[stride*i+7]);
03238
03239 BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
03240 BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
03241 BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
03242 BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
03243
03244 BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
03245 BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
03246 BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
03247 BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
03248 }
03249
03250 for(i=0; i<8; i++){
03251 BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
03252 BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
03253 BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
03254 BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
03255
03256 BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
03257 BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
03258 BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
03259 BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
03260
03261 sum +=
03262 BUTTERFLYA(temp[8*0+i], temp[8*4+i])
03263 +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
03264 +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
03265 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
03266 }
03267 #if 0
03268 static int maxi=0;
03269 if(sum>maxi){
03270 maxi=sum;
03271 printf("MAX:%d\n", maxi);
03272 }
03273 #endif
03274 return sum;
03275 }
03276
03277 static int hadamard8_intra8x8_c( void *s, uint8_t *src, uint8_t *dummy, int stride, int h){
03278 int i;
03279 int temp[64];
03280 int sum=0;
03281
03282 assert(h==8);
03283
03284 for(i=0; i<8; i++){
03285
03286 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0],src[stride*i+1]);
03287 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2],src[stride*i+3]);
03288 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4],src[stride*i+5]);
03289 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6],src[stride*i+7]);
03290
03291 BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
03292 BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
03293 BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
03294 BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
03295
03296 BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
03297 BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
03298 BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
03299 BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
03300 }
03301
03302 for(i=0; i<8; i++){
03303 BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
03304 BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
03305 BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
03306 BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
03307
03308 BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
03309 BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
03310 BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
03311 BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
03312
03313 sum +=
03314 BUTTERFLYA(temp[8*0+i], temp[8*4+i])
03315 +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
03316 +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
03317 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
03318 }
03319
03320 sum -= ABS(temp[8*0] + temp[8*4]);
03321
03322 return sum;
03323 }
03324
03325 static int dct_sad8x8_c( void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
03326 MpegEncContext * const s= (MpegEncContext *)c;
03327 uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64/8];
03328 DCTELEM * const temp= (DCTELEM*)aligned_temp;
03329 int sum=0, i;
03330
03331 assert(h==8);
03332
03333 s->dsp.diff_pixels(temp, src1, src2, stride);
03334 s->dsp.fdct(temp);
03335
03336 for(i=0; i<64; i++)
03337 sum+= ABS(temp[i]);
03338
03339 return sum;
03340 }
03341
03342 static int dct_max8x8_c( void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
03343 MpegEncContext * const s= (MpegEncContext *)c;
03344 uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64/8];
03345 DCTELEM * const temp= (DCTELEM*)aligned_temp;
03346 int sum=0, i;
03347
03348 assert(h==8);
03349
03350 s->dsp.diff_pixels(temp, src1, src2, stride);
03351 s->dsp.fdct(temp);
03352
03353 for(i=0; i<64; i++)
03354 sum= FFMAX(sum, ABS(temp[i]));
03355
03356 return sum;
03357 }
03358
03359 void simple_idct(DCTELEM *block);
03360
03361 static int quant_psnr8x8_c( void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
03362 MpegEncContext * const s= (MpegEncContext *)c;
03363 uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64*2/8];
03364 DCTELEM * const temp= (DCTELEM*)aligned_temp;
03365 DCTELEM * const bak = ((DCTELEM*)aligned_temp)+64;
03366 int sum=0, i;
03367
03368 assert(h==8);
03369 s->mb_intra=0;
03370
03371 s->dsp.diff_pixels(temp, src1, src2, stride);
03372
03373 memcpy(bak, temp, 64*sizeof(DCTELEM));
03374
03375 s->block_last_index[0]= s->fast_dct_quantize(s, temp, 0, s->qscale, &i);
03376 s->dct_unquantize_inter(s, temp, 0, s->qscale);
03377 simple_idct(temp);
03378
03379 for(i=0; i<64; i++)
03380 sum+= (temp[i]-bak[i])*(temp[i]-bak[i]);
03381
03382 return sum;
03383 }
03384
03385 static int rd8x8_c( void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
03386 MpegEncContext * const s= (MpegEncContext *)c;
03387 const uint8_t *scantable= s->intra_scantable.permutated;
03388 uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64/8];
03389 uint64_t __align8 aligned_bak[stride];
03390 DCTELEM * const temp= (DCTELEM*)aligned_temp;
03391 uint8_t * const bak= (uint8_t*)aligned_bak;
03392 int i, last, run, bits, level, distoration, start_i;
03393 const int esc_length= s->ac_esc_length;
03394 uint8_t * length;
03395 uint8_t * last_length;
03396
03397 assert(h==8);
03398
03399 for(i=0; i<8; i++){
03400 ((uint32_t*)(bak + i*stride))[0]= ((uint32_t*)(src2 + i*stride))[0];
03401 ((uint32_t*)(bak + i*stride))[1]= ((uint32_t*)(src2 + i*stride))[1];
03402 }
03403
03404 s->dsp.diff_pixels(temp, src1, src2, stride);
03405
03406 s->block_last_index[0]= last= s->fast_dct_quantize(s, temp, 0, s->qscale, &i);
03407
03408 bits=0;
03409
03410 if (s->mb_intra) {
03411 start_i = 1;
03412 length = s->intra_ac_vlc_length;
03413 last_length= s->intra_ac_vlc_last_length;
03414 bits+= s->luma_dc_vlc_length[temp[0] + 256];
03415 } else {
03416 start_i = 0;
03417 length = s->inter_ac_vlc_length;
03418 last_length= s->inter_ac_vlc_last_length;
03419 }
03420
03421 if(last>=start_i){
03422 run=0;
03423 for(i=start_i; i<last; i++){
03424 int j= scantable[i];
03425 level= temp[j];
03426
03427 if(level){
03428 level+=64;
03429 if((level&(~127)) == 0){
03430 bits+= length[UNI_AC_ENC_INDEX(run, level)];
03431 }else
03432 bits+= esc_length;
03433 run=0;
03434 }else
03435 run++;
03436 }
03437 i= scantable[last];
03438
03439 level= temp[i] + 64;
03440
03441 assert(level - 64);
03442
03443 if((level&(~127)) == 0){
03444 bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
03445 }else
03446 bits+= esc_length;
03447
03448 }
03449
03450 if(last>=0){
03451 if(s->mb_intra)
03452 s->dct_unquantize_intra(s, temp, 0, s->qscale);
03453 else
03454 s->dct_unquantize_inter(s, temp, 0, s->qscale);
03455 }
03456
03457 s->dsp.idct_add(bak, stride, temp);
03458
03459 distoration= s->dsp.sse[1](NULL, bak, src1, stride, 8);
03460
03461 return distoration + ((bits*s->qscale*s->qscale*109 + 64)>>7);
03462 }
03463
03464 static int bit8x8_c( void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
03465 MpegEncContext * const s= (MpegEncContext *)c;
03466 const uint8_t *scantable= s->intra_scantable.permutated;
03467 uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64/8];
03468 DCTELEM * const temp= (DCTELEM*)aligned_temp;
03469 int i, last, run, bits, level, start_i;
03470 const int esc_length= s->ac_esc_length;
03471 uint8_t * length;
03472 uint8_t * last_length;
03473
03474 assert(h==8);
03475
03476 s->dsp.diff_pixels(temp, src1, src2, stride);
03477
03478 s->block_last_index[0]= last= s->fast_dct_quantize(s, temp, 0, s->qscale, &i);
03479
03480 bits=0;
03481
03482 if (s->mb_intra) {
03483 start_i = 1;
03484 length = s->intra_ac_vlc_length;
03485 last_length= s->intra_ac_vlc_last_length;
03486 bits+= s->luma_dc_vlc_length[temp[0] + 256];
03487 } else {
03488 start_i = 0;
03489 length = s->inter_ac_vlc_length;
03490 last_length= s->inter_ac_vlc_last_length;
03491 }
03492
03493 if(last>=start_i){
03494 run=0;
03495 for(i=start_i; i<last; i++){
03496 int j= scantable[i];
03497 level= temp[j];
03498
03499 if(level){
03500 level+=64;
03501 if((level&(~127)) == 0){
03502 bits+= length[UNI_AC_ENC_INDEX(run, level)];
03503 }else
03504 bits+= esc_length;
03505 run=0;
03506 }else
03507 run++;
03508 }
03509 i= scantable[last];
03510
03511 level= temp[i] + 64;
03512
03513 assert(level - 64);
03514
03515 if((level&(~127)) == 0){
03516 bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
03517 }else
03518 bits+= esc_length;
03519 }
03520
03521 return bits;
03522 }
03523
03524 static int vsad_intra16_c( void *c, uint8_t *s, uint8_t *dummy, int stride, int h){
03525 int score=0;
03526 int x,y;
03527
03528 for(y=1; y<h; y++){
03529 for(x=0; x<16; x+=4){
03530 score+= ABS(s[x ] - s[x +stride]) + ABS(s[x+1] - s[x+1+stride])
03531 +ABS(s[x+2] - s[x+2+stride]) + ABS(s[x+3] - s[x+3+stride]);
03532 }
03533 s+= stride;
03534 }
03535
03536 return score;
03537 }
03538
03539 static int vsad16_c( void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
03540 int score=0;
03541 int x,y;
03542
03543 for(y=1; y<h; y++){
03544 for(x=0; x<16; x++){
03545 score+= ABS(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
03546 }
03547 s1+= stride;
03548 s2+= stride;
03549 }
03550
03551 return score;
03552 }
03553
03554 #define SQ(a) ((a)*(a))
03555 static int vsse_intra16_c( void *c, uint8_t *s, uint8_t *dummy, int stride, int h){
03556 int score=0;
03557 int x,y;
03558
03559 for(y=1; y<h; y++){
03560 for(x=0; x<16; x+=4){
03561 score+= SQ(s[x ] - s[x +stride]) + SQ(s[x+1] - s[x+1+stride])
03562 +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]);
03563 }
03564 s+= stride;
03565 }
03566
03567 return score;
03568 }
03569
03570 static int vsse16_c( void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
03571 int score=0;
03572 int x,y;
03573
03574 for(y=1; y<h; y++){
03575 for(x=0; x<16; x++){
03576 score+= SQ(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
03577 }
03578 s1+= stride;
03579 s2+= stride;
03580 }
03581
03582 return score;
03583 }
03584
03585 WARPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c)
03586 WARPER8_16_SQ(hadamard8_intra8x8_c, hadamard8_intra16_c)
03587 WARPER8_16_SQ(dct_sad8x8_c, dct_sad16_c)
03588 WARPER8_16_SQ(dct_max8x8_c, dct_max16_c)
03589 WARPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c)
03590 WARPER8_16_SQ(rd8x8_c, rd16_c)
03591 WARPER8_16_SQ(bit8x8_c, bit16_c)
03592
03593
03594
03595 static void ff_jref_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
03596 {
03597 j_rev_dct (block);
03598 put_pixels_clamped_c(block, dest, line_size);
03599 }
03600 static void ff_jref_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
03601 {
03602 j_rev_dct (block);
03603 add_pixels_clamped_c(block, dest, line_size);
03604 }
03605
03606 static void ff_jref_idct4_put(uint8_t *dest, int line_size, DCTELEM *block)
03607 {
03608 j_rev_dct4 (block);
03609 put_pixels_clamped4_c(block, dest, line_size);
03610 }
03611 static void ff_jref_idct4_add(uint8_t *dest, int line_size, DCTELEM *block)
03612 {
03613 j_rev_dct4 (block);
03614 add_pixels_clamped4_c(block, dest, line_size);
03615 }
03616
03617 static void ff_jref_idct2_put(uint8_t *dest, int line_size, DCTELEM *block)
03618 {
03619 j_rev_dct2 (block);
03620 put_pixels_clamped2_c(block, dest, line_size);
03621 }
03622 static void ff_jref_idct2_add(uint8_t *dest, int line_size, DCTELEM *block)
03623 {
03624 j_rev_dct2 (block);
03625 add_pixels_clamped2_c(block, dest, line_size);
03626 }
03627
03628 static void ff_jref_idct1_put(uint8_t *dest, int line_size, DCTELEM *block)
03629 {
03630 uint8_t *cm = cropTbl + MAX_NEG_CROP;
03631
03632 dest[0] = cm[(block[0] + 4)>>3];
03633 }
03634 static void ff_jref_idct1_add(uint8_t *dest, int line_size, DCTELEM *block)
03635 {
03636 uint8_t *cm = cropTbl + MAX_NEG_CROP;
03637
03638 dest[0] = cm[dest[0] + ((block[0] + 4)>>3)];
03639 }
03640
03641
03642 void dsputil_static_init(void)
03643 {
03644 int i;
03645
03646 for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i;
03647 for(i=0;i<MAX_NEG_CROP;i++) {
03648 cropTbl[i] = 0;
03649 cropTbl[i + MAX_NEG_CROP + 256] = 255;
03650 }
03651
03652 for(i=0;i<512;i++) {
03653 squareTbl[i] = (i - 256) * (i - 256);
03654 }
03655
03656 for(i=0; i<64; i++) inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1;
03657 }
03658
03659
03660 void dsputil_init(DSPContext* c, AVCodecContext *avctx)
03661 {
03662 int i;
03663
03664 #ifdef CONFIG_ENCODERS
03665 if(avctx->dct_algo==FF_DCT_FASTINT) {
03666 c->fdct = fdct_ifast;
03667 c->fdct248 = fdct_ifast248;
03668 }
03669 else if(avctx->dct_algo==FF_DCT_FAAN) {
03670 c->fdct = ff_faandct;
03671 c->fdct248 = ff_faandct248;
03672 }
03673 else {
03674 c->fdct = ff_jpeg_fdct_islow;
03675 c->fdct248 = ff_fdct248_islow;
03676 }
03677 #endif //CONFIG_ENCODERS
03678
03679 if(avctx->lowres==1){
03680 if(avctx->idct_algo==FF_IDCT_INT || avctx->idct_algo==FF_IDCT_AUTO){
03681 c->idct_put= ff_jref_idct4_put;
03682 c->idct_add= ff_jref_idct4_add;
03683 }else{
03684 c->idct_put= ff_h264_lowres_idct_put_c;
03685 c->idct_add= ff_h264_lowres_idct_add_c;
03686 }
03687 c->idct = j_rev_dct4;
03688 c->idct_permutation_type= FF_NO_IDCT_PERM;
03689 }else if(avctx->lowres==2){
03690 c->idct_put= ff_jref_idct2_put;
03691 c->idct_add= ff_jref_idct2_add;
03692 c->idct = j_rev_dct2;
03693 c->idct_permutation_type= FF_NO_IDCT_PERM;
03694 }else if(avctx->lowres==3){
03695 c->idct_put= ff_jref_idct1_put;
03696 c->idct_add= ff_jref_idct1_add;
03697 c->idct = j_rev_dct1;
03698 c->idct_permutation_type= FF_NO_IDCT_PERM;
03699 }else{
03700 if(avctx->idct_algo==FF_IDCT_INT){
03701 c->idct_put= ff_jref_idct_put;
03702 c->idct_add= ff_jref_idct_add;
03703 c->idct = j_rev_dct;
03704 c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
03705 }else if(avctx->idct_algo==FF_IDCT_VP3){
03706 c->idct_put= ff_vp3_idct_put_c;
03707 c->idct_add= ff_vp3_idct_add_c;
03708 c->idct = ff_vp3_idct_c;
03709 c->idct_permutation_type= FF_NO_IDCT_PERM;
03710 }else{
03711 c->idct_put= simple_idct_put;
03712 c->idct_add= simple_idct_add;
03713 c->idct = simple_idct;
03714 c->idct_permutation_type= FF_NO_IDCT_PERM;
03715 }
03716 }
03717
03718 c->h264_idct_add= ff_h264_idct_add_c;
03719 c->h264_idct8_add= ff_h264_idct8_add_c;
03720
03721 c->get_pixels = get_pixels_c;
03722 c->diff_pixels = diff_pixels_c;
03723 c->put_pixels_clamped = put_pixels_clamped_c;
03724 c->put_signed_pixels_clamped = put_signed_pixels_clamped_c;
03725 c->add_pixels_clamped = add_pixels_clamped_c;
03726 c->add_pixels8 = add_pixels8_c;
03727 c->add_pixels4 = add_pixels4_c;
03728 c->gmc1 = gmc1_c;
03729 c->gmc = gmc_c;
03730 c->clear_blocks = clear_blocks_c;
03731 c->pix_sum = pix_sum_c;
03732 c->pix_norm1 = pix_norm1_c;
03733
03734
03735 c->pix_abs[0][0] = pix_abs16_c;
03736 c->pix_abs[0][1] = pix_abs16_x2_c;
03737 c->pix_abs[0][2] = pix_abs16_y2_c;
03738 c->pix_abs[0][3] = pix_abs16_xy2_c;
03739 c->pix_abs[1][0] = pix_abs8_c;
03740 c->pix_abs[1][1] = pix_abs8_x2_c;
03741 c->pix_abs[1][2] = pix_abs8_y2_c;
03742 c->pix_abs[1][3] = pix_abs8_xy2_c;
03743
03744 #define dspfunc(PFX, IDX, NUM) \
03745 c->PFX ## _pixels_tab[IDX][0] = PFX ## _pixels ## NUM ## _c; \
03746 c->PFX ## _pixels_tab[IDX][1] = PFX ## _pixels ## NUM ## _x2_c; \
03747 c->PFX ## _pixels_tab[IDX][2] = PFX ## _pixels ## NUM ## _y2_c; \
03748 c->PFX ## _pixels_tab[IDX][3] = PFX ## _pixels ## NUM ## _xy2_c
03749
03750 dspfunc(put, 0, 16);
03751 dspfunc(put_no_rnd, 0, 16);
03752 dspfunc(put, 1, 8);
03753 dspfunc(put_no_rnd, 1, 8);
03754 dspfunc(put, 2, 4);
03755 dspfunc(put, 3, 2);
03756
03757 dspfunc(avg, 0, 16);
03758 dspfunc(avg_no_rnd, 0, 16);
03759 dspfunc(avg, 1, 8);
03760 dspfunc(avg_no_rnd, 1, 8);
03761 dspfunc(avg, 2, 4);
03762 dspfunc(avg, 3, 2);
03763 #undef dspfunc
03764
03765 c->put_no_rnd_pixels_l2[0]= put_no_rnd_pixels16_l2_c;
03766 c->put_no_rnd_pixels_l2[1]= put_no_rnd_pixels8_l2_c;
03767
03768 c->put_tpel_pixels_tab[ 0] = put_tpel_pixels_mc00_c;
03769 c->put_tpel_pixels_tab[ 1] = put_tpel_pixels_mc10_c;
03770 c->put_tpel_pixels_tab[ 2] = put_tpel_pixels_mc20_c;
03771 c->put_tpel_pixels_tab[ 4] = put_tpel_pixels_mc01_c;
03772 c->put_tpel_pixels_tab[ 5] = put_tpel_pixels_mc11_c;
03773 c->put_tpel_pixels_tab[ 6] = put_tpel_pixels_mc21_c;
03774 c->put_tpel_pixels_tab[ 8] = put_tpel_pixels_mc02_c;
03775 c->put_tpel_pixels_tab[ 9] = put_tpel_pixels_mc12_c;
03776 c->put_tpel_pixels_tab[10] = put_tpel_pixels_mc22_c;
03777
03778 c->avg_tpel_pixels_tab[ 0] = avg_tpel_pixels_mc00_c;
03779 c->avg_tpel_pixels_tab[ 1] = avg_tpel_pixels_mc10_c;
03780 c->avg_tpel_pixels_tab[ 2] = avg_tpel_pixels_mc20_c;
03781 c->avg_tpel_pixels_tab[ 4] = avg_tpel_pixels_mc01_c;
03782 c->avg_tpel_pixels_tab[ 5] = avg_tpel_pixels_mc11_c;
03783 c->avg_tpel_pixels_tab[ 6] = avg_tpel_pixels_mc21_c;
03784 c->avg_tpel_pixels_tab[ 8] = avg_tpel_pixels_mc02_c;
03785 c->avg_tpel_pixels_tab[ 9] = avg_tpel_pixels_mc12_c;
03786 c->avg_tpel_pixels_tab[10] = avg_tpel_pixels_mc22_c;
03787
03788 #define dspfunc(PFX, IDX, NUM) \
03789 c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; \
03790 c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; \
03791 c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_c; \
03792 c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_c; \
03793 c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_c; \
03794 c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_c; \
03795 c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_c; \
03796 c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_c; \
03797 c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_c; \
03798 c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_c; \
03799 c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \
03800 c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \
03801 c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \
03802 c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \
03803 c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \
03804 c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c
03805
03806 dspfunc(put_qpel, 0, 16);
03807 dspfunc(put_no_rnd_qpel, 0, 16);
03808
03809 dspfunc(avg_qpel, 0, 16);
03810
03811
03812 dspfunc(put_qpel, 1, 8);
03813 dspfunc(put_no_rnd_qpel, 1, 8);
03814
03815 dspfunc(avg_qpel, 1, 8);
03816
03817
03818 dspfunc(put_h264_qpel, 0, 16);
03819 dspfunc(put_h264_qpel, 1, 8);
03820 dspfunc(put_h264_qpel, 2, 4);
03821 dspfunc(avg_h264_qpel, 0, 16);
03822 dspfunc(avg_h264_qpel, 1, 8);
03823 dspfunc(avg_h264_qpel, 2, 4);
03824
03825 #undef dspfunc
03826 c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_c;
03827 c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_c;
03828 c->put_h264_chroma_pixels_tab[2]= put_h264_chroma_mc2_c;
03829 c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_c;
03830 c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_c;
03831 c->avg_h264_chroma_pixels_tab[2]= avg_h264_chroma_mc2_c;
03832
03833 c->weight_h264_pixels_tab[0]= weight_h264_pixels16x16_c;
03834 c->weight_h264_pixels_tab[1]= weight_h264_pixels16x8_c;
03835 c->weight_h264_pixels_tab[2]= weight_h264_pixels8x16_c;
03836 c->weight_h264_pixels_tab[3]= weight_h264_pixels8x8_c;
03837 c->weight_h264_pixels_tab[4]= weight_h264_pixels8x4_c;
03838 c->weight_h264_pixels_tab[5]= weight_h264_pixels4x8_c;
03839 c->weight_h264_pixels_tab[6]= weight_h264_pixels4x4_c;
03840 c->weight_h264_pixels_tab[7]= weight_h264_pixels4x2_c;
03841 c->weight_h264_pixels_tab[8]= weight_h264_pixels2x4_c;
03842 c->weight_h264_pixels_tab[9]= weight_h264_pixels2x2_c;
03843 c->biweight_h264_pixels_tab[0]= biweight_h264_pixels16x16_c;
03844 c->biweight_h264_pixels_tab[1]= biweight_h264_pixels16x8_c;
03845 c->biweight_h264_pixels_tab[2]= biweight_h264_pixels8x16_c;
03846 c->biweight_h264_pixels_tab[3]= biweight_h264_pixels8x8_c;
03847 c->biweight_h264_pixels_tab[4]= biweight_h264_pixels8x4_c;
03848 c->biweight_h264_pixels_tab[5]= biweight_h264_pixels4x8_c;
03849 c->biweight_h264_pixels_tab[6]= biweight_h264_pixels4x4_c;
03850 c->biweight_h264_pixels_tab[7]= biweight_h264_pixels4x2_c;
03851 c->biweight_h264_pixels_tab[8]= biweight_h264_pixels2x4_c;
03852 c->biweight_h264_pixels_tab[9]= biweight_h264_pixels2x2_c;
03853
03854 c->put_mspel_pixels_tab[0]= put_mspel8_mc00_c;
03855 c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c;
03856 c->put_mspel_pixels_tab[2]= put_mspel8_mc20_c;
03857 c->put_mspel_pixels_tab[3]= put_mspel8_mc30_c;
03858 c->put_mspel_pixels_tab[4]= put_mspel8_mc02_c;
03859 c->put_mspel_pixels_tab[5]= put_mspel8_mc12_c;
03860 c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c;
03861 c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c;
03862
03863 #define SET_CMP_FUNC(name) \
03864 c->name[0]= name ## 16_c;\
03865 c->name[1]= name ## 8x8_c;
03866
03867 SET_CMP_FUNC(hadamard8_diff)
03868 c->hadamard8_diff[4]= hadamard8_intra16_c;
03869 SET_CMP_FUNC(dct_sad)
03870 SET_CMP_FUNC(dct_max)
03871 c->sad[0]= pix_abs16_c;
03872 c->sad[1]= pix_abs8_c;
03873 c->sse[0]= sse16_c;
03874 c->sse[1]= sse8_c;
03875 c->sse[2]= sse4_c;
03876 SET_CMP_FUNC(quant_psnr)
03877 SET_CMP_FUNC(rd)
03878 SET_CMP_FUNC(bit)
03879 c->vsad[0]= vsad16_c;
03880 c->vsad[4]= vsad_intra16_c;
03881 c->vsse[0]= vsse16_c;
03882 c->vsse[4]= vsse_intra16_c;
03883 c->nsse[0]= nsse16_c;
03884 c->nsse[1]= nsse8_c;
03885 c->w53[0]= w53_16_c;
03886 c->w53[1]= w53_8_c;
03887 c->w97[0]= w97_16_c;
03888 c->w97[1]= w97_8_c;
03889
03890 c->add_bytes= add_bytes_c;
03891 c->diff_bytes= diff_bytes_c;
03892 c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_c;
03893 c->bswap_buf= bswap_buf;
03894
03895 c->h264_v_loop_filter_luma= h264_v_loop_filter_luma_c;
03896 c->h264_h_loop_filter_luma= h264_h_loop_filter_luma_c;
03897 c->h264_v_loop_filter_chroma= h264_v_loop_filter_chroma_c;
03898 c->h264_h_loop_filter_chroma= h264_h_loop_filter_chroma_c;
03899 c->h264_v_loop_filter_chroma_intra= h264_v_loop_filter_chroma_intra_c;
03900 c->h264_h_loop_filter_chroma_intra= h264_h_loop_filter_chroma_intra_c;
03901
03902 c->h263_h_loop_filter= h263_h_loop_filter_c;
03903 c->h263_v_loop_filter= h263_v_loop_filter_c;
03904
03905 c->h261_loop_filter= h261_loop_filter_c;
03906
03907 c->try_8x8basis= try_8x8basis_c;
03908 c->add_8x8basis= add_8x8basis_c;
03909
03910 #ifdef HAVE_MMX
03911 dsputil_init_mmx(c, avctx);
03912 #endif
03913 #ifdef ARCH_ARMV4L
03914 dsputil_init_armv4l(c, avctx);
03915 #endif
03916 #ifdef HAVE_MLIB
03917 dsputil_init_mlib(c, avctx);
03918 #endif
03919 #ifdef ARCH_SPARC
03920 dsputil_init_vis(c,avctx);
03921 #endif
03922 #ifdef ARCH_ALPHA
03923 dsputil_init_alpha(c, avctx);
03924 #endif
03925 #ifdef ARCH_POWERPC
03926 dsputil_init_ppc(c, avctx);
03927 #endif
03928 #ifdef HAVE_MMI
03929 dsputil_init_mmi(c, avctx);
03930 #endif
03931 #ifdef ARCH_SH4
03932 dsputil_init_sh4(c,avctx);
03933 #endif
03934
03935 switch(c->idct_permutation_type){
03936 case FF_NO_IDCT_PERM:
03937 for(i=0; i<64; i++)
03938 c->idct_permutation[i]= i;
03939 break;
03940 case FF_LIBMPEG2_IDCT_PERM:
03941 for(i=0; i<64; i++)
03942 c->idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
03943 break;
03944 case FF_SIMPLE_IDCT_PERM:
03945 for(i=0; i<64; i++)
03946 c->idct_permutation[i]= simple_mmx_permutation[i];
03947 break;
03948 case FF_TRANSPOSE_IDCT_PERM:
03949 for(i=0; i<64; i++)
03950 c->idct_permutation[i]= ((i&7)<<3) | (i>>3);
03951 break;
03952 case FF_PARTTRANS_IDCT_PERM:
03953 for(i=0; i<64; i++)
03954 c->idct_permutation[i]= (i&0x24) | ((i&3)<<3) | ((i>>3)&3);
03955 break;
03956 default:
03957 av_log(avctx, AV_LOG_ERROR, "Internal error, IDCT permutation not set\n");
03958 }
03959 }
03960