00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00025 #ifdef ARCH_X86_64
00026 # define REGa rax
00027 # define REGc rcx
00028 # define REGd rdx
00029 # define REG_a "rax"
00030 # define REG_c "rcx"
00031 # define REG_d "rdx"
00032 # define REG_SP "rsp"
00033 # define ALIGN_MASK "$0xFFFFFFFFFFFFFFF8"
00034 #else
00035 # define REGa eax
00036 # define REGc ecx
00037 # define REGd edx
00038 # define REG_a "eax"
00039 # define REG_c "ecx"
00040 # define REG_d "edx"
00041 # define REG_SP "esp"
00042 # define ALIGN_MASK "$0xFFFFFFF8"
00043 #endif
00044
00045
00046 #undef PAVGB
00047 #undef PMINUB
00048 #undef PMAXUB
00049
00050 #ifdef HAVE_MMX2
00051 #define REAL_PAVGB(a,b) "pavgb " #a ", " #b " \n\t"
00052 #elif defined (HAVE_3DNOW)
00053 #define REAL_PAVGB(a,b) "pavgusb " #a ", " #b " \n\t"
00054 #endif
00055 #define PAVGB(a,b) REAL_PAVGB(a,b)
00056
00057 #ifdef HAVE_MMX2
00058 #define PMINUB(a,b,t) "pminub " #a ", " #b " \n\t"
00059 #elif defined (HAVE_MMX)
00060 #define PMINUB(b,a,t) \
00061 "movq " #a ", " #t " \n\t"\
00062 "psubusb " #b ", " #t " \n\t"\
00063 "psubb " #t ", " #a " \n\t"
00064 #endif
00065
00066 #ifdef HAVE_MMX2
00067 #define PMAXUB(a,b) "pmaxub " #a ", " #b " \n\t"
00068 #elif defined (HAVE_MMX)
00069 #define PMAXUB(a,b) \
00070 "psubusb " #a ", " #b " \n\t"\
00071 "paddb " #a ", " #b " \n\t"
00072 #endif
00073
00074
00075 #ifdef HAVE_MMX
00076
00079 static inline int RENAME(vertClassify)(uint8_t src[], int stride, PPContext *c){
00080 int numEq= 0, dcOk;
00081 src+= stride*4;
00082 asm volatile(
00083 "movq %0, %%mm7 \n\t"
00084 "movq %1, %%mm6 \n\t"
00085 : : "m" (c->mmxDcOffset[c->nonBQP]), "m" (c->mmxDcThreshold[c->nonBQP])
00086 );
00087
00088 asm volatile(
00089 "lea (%2, %3), %%"REG_a" \n\t"
00090
00091
00092
00093 "movq (%2), %%mm0 \n\t"
00094 "movq (%%"REG_a"), %%mm1 \n\t"
00095 "movq %%mm0, %%mm3 \n\t"
00096 "movq %%mm0, %%mm4 \n\t"
00097 PMAXUB(%%mm1, %%mm4)
00098 PMINUB(%%mm1, %%mm3, %%mm5)
00099 "psubb %%mm1, %%mm0 \n\t"
00100 "paddb %%mm7, %%mm0 \n\t"
00101 "pcmpgtb %%mm6, %%mm0 \n\t"
00102
00103 "movq (%%"REG_a",%3), %%mm2 \n\t"
00104 PMAXUB(%%mm2, %%mm4)
00105 PMINUB(%%mm2, %%mm3, %%mm5)
00106 "psubb %%mm2, %%mm1 \n\t"
00107 "paddb %%mm7, %%mm1 \n\t"
00108 "pcmpgtb %%mm6, %%mm1 \n\t"
00109 "paddb %%mm1, %%mm0 \n\t"
00110
00111 "movq (%%"REG_a", %3, 2), %%mm1 \n\t"
00112 PMAXUB(%%mm1, %%mm4)
00113 PMINUB(%%mm1, %%mm3, %%mm5)
00114 "psubb %%mm1, %%mm2 \n\t"
00115 "paddb %%mm7, %%mm2 \n\t"
00116 "pcmpgtb %%mm6, %%mm2 \n\t"
00117 "paddb %%mm2, %%mm0 \n\t"
00118
00119 "lea (%%"REG_a", %3, 4), %%"REG_a" \n\t"
00120
00121 "movq (%2, %3, 4), %%mm2 \n\t"
00122 PMAXUB(%%mm2, %%mm4)
00123 PMINUB(%%mm2, %%mm3, %%mm5)
00124 "psubb %%mm2, %%mm1 \n\t"
00125 "paddb %%mm7, %%mm1 \n\t"
00126 "pcmpgtb %%mm6, %%mm1 \n\t"
00127 "paddb %%mm1, %%mm0 \n\t"
00128
00129 "movq (%%"REG_a"), %%mm1 \n\t"
00130 PMAXUB(%%mm1, %%mm4)
00131 PMINUB(%%mm1, %%mm3, %%mm5)
00132 "psubb %%mm1, %%mm2 \n\t"
00133 "paddb %%mm7, %%mm2 \n\t"
00134 "pcmpgtb %%mm6, %%mm2 \n\t"
00135 "paddb %%mm2, %%mm0 \n\t"
00136
00137 "movq (%%"REG_a", %3), %%mm2 \n\t"
00138 PMAXUB(%%mm2, %%mm4)
00139 PMINUB(%%mm2, %%mm3, %%mm5)
00140 "psubb %%mm2, %%mm1 \n\t"
00141 "paddb %%mm7, %%mm1 \n\t"
00142 "pcmpgtb %%mm6, %%mm1 \n\t"
00143 "paddb %%mm1, %%mm0 \n\t"
00144
00145 "movq (%%"REG_a", %3, 2), %%mm1 \n\t"
00146 PMAXUB(%%mm1, %%mm4)
00147 PMINUB(%%mm1, %%mm3, %%mm5)
00148 "psubb %%mm1, %%mm2 \n\t"
00149 "paddb %%mm7, %%mm2 \n\t"
00150 "pcmpgtb %%mm6, %%mm2 \n\t"
00151 "paddb %%mm2, %%mm0 \n\t"
00152 "psubusb %%mm3, %%mm4 \n\t"
00153
00154 " \n\t"
00155 #ifdef HAVE_MMX2
00156 "pxor %%mm7, %%mm7 \n\t"
00157 "psadbw %%mm7, %%mm0 \n\t"
00158 #else
00159 "movq %%mm0, %%mm1 \n\t"
00160 "psrlw $8, %%mm0 \n\t"
00161 "paddb %%mm1, %%mm0 \n\t"
00162 "movq %%mm0, %%mm1 \n\t"
00163 "psrlq $16, %%mm0 \n\t"
00164 "paddb %%mm1, %%mm0 \n\t"
00165 "movq %%mm0, %%mm1 \n\t"
00166 "psrlq $32, %%mm0 \n\t"
00167 "paddb %%mm1, %%mm0 \n\t"
00168 #endif
00169 "movq %4, %%mm7 \n\t"
00170 "paddusb %%mm7, %%mm7 \n\t"
00171 "psubusb %%mm7, %%mm4 \n\t"
00172 "packssdw %%mm4, %%mm4 \n\t"
00173 "movd %%mm0, %0 \n\t"
00174 "movd %%mm4, %1 \n\t"
00175
00176 : "=r" (numEq), "=r" (dcOk)
00177 : "r" (src), "r" ((long)stride), "m" (c->pQPb)
00178 : "%"REG_a
00179 );
00180
00181 numEq= (-numEq) &0xFF;
00182 if(numEq > c->ppMode.flatnessThreshold){
00183 if(dcOk) return 0;
00184 else return 1;
00185 }else{
00186 return 2;
00187 }
00188 }
00189 #endif
00190
00195 #ifndef HAVE_ALTIVEC
00196 static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c)
00197 {
00198 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
00199 src+= stride*3;
00200 asm volatile(
00201 "movq %2, %%mm0 \n\t"
00202 "pxor %%mm4, %%mm4 \n\t"
00203
00204 "movq (%0), %%mm6 \n\t"
00205 "movq (%0, %1), %%mm5 \n\t"
00206 "movq %%mm5, %%mm1 \n\t"
00207 "movq %%mm6, %%mm2 \n\t"
00208 "psubusb %%mm6, %%mm5 \n\t"
00209 "psubusb %%mm1, %%mm2 \n\t"
00210 "por %%mm5, %%mm2 \n\t"
00211 "psubusb %%mm0, %%mm2 \n\t"
00212 "pcmpeqb %%mm4, %%mm2 \n\t"
00213
00214 "pand %%mm2, %%mm6 \n\t"
00215 "pandn %%mm1, %%mm2 \n\t"
00216 "por %%mm2, %%mm6 \n\t"
00217
00218 "movq (%0, %1, 8), %%mm5 \n\t"
00219 "lea (%0, %1, 4), %%"REG_a" \n\t"
00220 "lea (%0, %1, 8), %%"REG_c" \n\t"
00221 "sub %1, %%"REG_c" \n\t"
00222 "add %1, %0 \n\t"
00223 "movq (%0, %1, 8), %%mm7 \n\t"
00224 "movq %%mm5, %%mm1 \n\t"
00225 "movq %%mm7, %%mm2 \n\t"
00226 "psubusb %%mm7, %%mm5 \n\t"
00227 "psubusb %%mm1, %%mm2 \n\t"
00228 "por %%mm5, %%mm2 \n\t"
00229 "psubusb %%mm0, %%mm2 \n\t"
00230 "pcmpeqb %%mm4, %%mm2 \n\t"
00231
00232 "pand %%mm2, %%mm7 \n\t"
00233 "pandn %%mm1, %%mm2 \n\t"
00234 "por %%mm2, %%mm7 \n\t"
00235
00236
00237
00238
00239
00240
00241
00242
00243 "movq (%0, %1), %%mm0 \n\t"
00244 "movq %%mm0, %%mm1 \n\t"
00245 PAVGB(%%mm6, %%mm0)
00246 PAVGB(%%mm6, %%mm0)
00247
00248 "movq (%0, %1, 4), %%mm2 \n\t"
00249 "movq %%mm2, %%mm5 \n\t"
00250 PAVGB((%%REGa), %%mm2)
00251 PAVGB((%0, %1, 2), %%mm2)
00252 "movq %%mm2, %%mm3 \n\t"
00253 "movq (%0), %%mm4 \n\t"
00254 PAVGB(%%mm4, %%mm3)
00255 PAVGB(%%mm0, %%mm3)
00256 "movq %%mm3, (%0) \n\t"
00257
00258 "movq %%mm1, %%mm0 \n\t"
00259 PAVGB(%%mm6, %%mm0)
00260 "movq %%mm4, %%mm3 \n\t"
00261 PAVGB((%0,%1,2), %%mm3)
00262 PAVGB((%%REGa,%1,2), %%mm5)
00263 PAVGB((%%REGa), %%mm5)
00264 PAVGB(%%mm5, %%mm3)
00265 PAVGB(%%mm0, %%mm3)
00266 "movq %%mm3, (%0,%1) \n\t"
00267
00268 PAVGB(%%mm4, %%mm6)
00269 "movq (%%"REG_c"), %%mm0 \n\t"
00270 PAVGB((%%REGa, %1, 2), %%mm0)
00271 "movq %%mm0, %%mm3 \n\t"
00272 PAVGB(%%mm1, %%mm0)
00273 PAVGB(%%mm6, %%mm0)
00274 PAVGB(%%mm2, %%mm0)
00275 "movq (%0, %1, 2), %%mm2 \n\t"
00276 "movq %%mm0, (%0, %1, 2) \n\t"
00277
00278 "movq (%%"REG_a", %1, 4), %%mm0 \n\t"
00279 PAVGB((%%REGc), %%mm0)
00280 PAVGB(%%mm0, %%mm6)
00281 PAVGB(%%mm1, %%mm4)
00282 PAVGB(%%mm2, %%mm1)
00283 PAVGB(%%mm1, %%mm6)
00284 PAVGB(%%mm5, %%mm6)
00285 "movq (%%"REG_a"), %%mm5 \n\t"
00286 "movq %%mm6, (%%"REG_a") \n\t"
00287
00288 "movq (%%"REG_a", %1, 4), %%mm6 \n\t"
00289 PAVGB(%%mm7, %%mm6)
00290 PAVGB(%%mm4, %%mm6)
00291 PAVGB(%%mm3, %%mm6)
00292 PAVGB(%%mm5, %%mm2)
00293 "movq (%0, %1, 4), %%mm4 \n\t"
00294 PAVGB(%%mm4, %%mm2)
00295 PAVGB(%%mm2, %%mm6)
00296 "movq %%mm6, (%0, %1, 4) \n\t"
00297
00298 PAVGB(%%mm7, %%mm1)
00299 PAVGB(%%mm4, %%mm5)
00300 PAVGB(%%mm5, %%mm0)
00301 "movq (%%"REG_a", %1, 2), %%mm6 \n\t"
00302 PAVGB(%%mm6, %%mm1)
00303 PAVGB(%%mm0, %%mm1)
00304 "movq %%mm1, (%%"REG_a", %1, 2) \n\t"
00305
00306 PAVGB((%%REGc), %%mm2)
00307 "movq (%%"REG_a", %1, 4), %%mm0 \n\t"
00308 PAVGB(%%mm0, %%mm6)
00309 PAVGB(%%mm7, %%mm6)
00310 PAVGB(%%mm2, %%mm6)
00311 "movq %%mm6, (%%"REG_c") \n\t"
00312
00313 PAVGB(%%mm7, %%mm5)
00314 PAVGB(%%mm7, %%mm5)
00315
00316 PAVGB(%%mm3, %%mm0)
00317 PAVGB(%%mm0, %%mm5)
00318 "movq %%mm5, (%%"REG_a", %1, 4) \n\t"
00319 "sub %1, %0 \n\t"
00320
00321 :
00322 : "r" (src), "r" ((long)stride), "m" (c->pQPb)
00323 : "%"REG_a, "%"REG_c
00324 );
00325 #else
00326 const int l1= stride;
00327 const int l2= stride + l1;
00328 const int l3= stride + l2;
00329 const int l4= stride + l3;
00330 const int l5= stride + l4;
00331 const int l6= stride + l5;
00332 const int l7= stride + l6;
00333 const int l8= stride + l7;
00334 const int l9= stride + l8;
00335 int x;
00336 src+= stride*3;
00337 for(x=0; x<BLOCK_SIZE; x++)
00338 {
00339 const int first= ABS(src[0] - src[l1]) < c->QP ? src[0] : src[l1];
00340 const int last= ABS(src[l8] - src[l9]) < c->QP ? src[l9] : src[l8];
00341
00342 int sums[10];
00343 sums[0] = 4*first + src[l1] + src[l2] + src[l3] + 4;
00344 sums[1] = sums[0] - first + src[l4];
00345 sums[2] = sums[1] - first + src[l5];
00346 sums[3] = sums[2] - first + src[l6];
00347 sums[4] = sums[3] - first + src[l7];
00348 sums[5] = sums[4] - src[l1] + src[l8];
00349 sums[6] = sums[5] - src[l2] + last;
00350 sums[7] = sums[6] - src[l3] + last;
00351 sums[8] = sums[7] - src[l4] + last;
00352 sums[9] = sums[8] - src[l5] + last;
00353
00354 src[l1]= (sums[0] + sums[2] + 2*src[l1])>>4;
00355 src[l2]= (sums[1] + sums[3] + 2*src[l2])>>4;
00356 src[l3]= (sums[2] + sums[4] + 2*src[l3])>>4;
00357 src[l4]= (sums[3] + sums[5] + 2*src[l4])>>4;
00358 src[l5]= (sums[4] + sums[6] + 2*src[l5])>>4;
00359 src[l6]= (sums[5] + sums[7] + 2*src[l6])>>4;
00360 src[l7]= (sums[6] + sums[8] + 2*src[l7])>>4;
00361 src[l8]= (sums[7] + sums[9] + 2*src[l8])>>4;
00362
00363 src++;
00364 }
00365 #endif
00366 }
00367 #endif //HAVE_ALTIVEC
00368
00369 #if 0
00370
00381 static inline void RENAME(vertRK1Filter)(uint8_t *src, int stride, int QP)
00382 {
00383 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
00384 src+= stride*3;
00385
00386 asm volatile(
00387 "pxor %%mm7, %%mm7 \n\t"
00388 "movq "MANGLE(b80)", %%mm6 \n\t"
00389 "leal (%0, %1), %%"REG_a" \n\t"
00390 "leal (%%"REG_a", %1, 4), %%"REG_c" \n\t"
00391
00392
00393 "movq "MANGLE(pQPb)", %%mm0 \n\t"
00394 "movq %%mm0, %%mm1 \n\t"
00395 "paddusb "MANGLE(b02)", %%mm0 \n\t"
00396 "psrlw $2, %%mm0 \n\t"
00397 "pand "MANGLE(b3F)", %%mm0 \n\t"
00398 "paddusb %%mm1, %%mm0 \n\t"
00399 "movq (%0, %1, 4), %%mm2 \n\t"
00400 "movq (%%"REG_c"), %%mm3 \n\t"
00401 "movq %%mm2, %%mm4 \n\t"
00402 "pcmpeqb %%mm5, %%mm5 \n\t"
00403 "pxor %%mm2, %%mm5 \n\t"
00404 PAVGB(%%mm3, %%mm5)
00405 "paddb %%mm6, %%mm5 \n\t"
00406 "psubusb %%mm3, %%mm4 \n\t"
00407 "psubusb %%mm2, %%mm3 \n\t"
00408 "por %%mm3, %%mm4 \n\t"
00409 "psubusb %%mm0, %%mm4 \n\t"
00410 "pcmpeqb %%mm7, %%mm4 \n\t"
00411 "pand %%mm4, %%mm5 \n\t"
00412
00413
00414 "paddb %%mm5, %%mm2 \n\t"
00415
00416 "movq %%mm2, (%0,%1, 4) \n\t"
00417
00418 "movq (%%"REG_c"), %%mm2 \n\t"
00419
00420 "psubb %%mm5, %%mm2 \n\t"
00421
00422 "movq %%mm2, (%%"REG_c") \n\t"
00423
00424 "paddb %%mm6, %%mm5 \n\t"
00425 "psrlw $2, %%mm5 \n\t"
00426 "pand "MANGLE(b3F)", %%mm5 \n\t"
00427 "psubb "MANGLE(b20)", %%mm5 \n\t"
00428
00429 "movq (%%"REG_a", %1, 2), %%mm2 \n\t"
00430 "paddb %%mm6, %%mm2 \n\t"
00431 "paddsb %%mm5, %%mm2 \n\t"
00432 "psubb %%mm6, %%mm2 \n\t"
00433 "movq %%mm2, (%%"REG_a", %1, 2) \n\t"
00434
00435 "movq (%%"REG_c", %1), %%mm2 \n\t"
00436 "paddb %%mm6, %%mm2 \n\t"
00437 "psubsb %%mm5, %%mm2 \n\t"
00438 "psubb %%mm6, %%mm2 \n\t"
00439 "movq %%mm2, (%%"REG_c", %1) \n\t"
00440
00441 :
00442 : "r" (src), "r" ((long)stride)
00443 : "%"REG_a, "%"REG_c
00444 );
00445 #else
00446 const int l1= stride;
00447 const int l2= stride + l1;
00448 const int l3= stride + l2;
00449 const int l4= stride + l3;
00450 const int l5= stride + l4;
00451 const int l6= stride + l5;
00452
00453
00454
00455 int x;
00456 const int QP15= QP + (QP>>2);
00457 src+= stride*3;
00458 for(x=0; x<BLOCK_SIZE; x++)
00459 {
00460 const int v = (src[x+l5] - src[x+l4]);
00461 if(ABS(v) < QP15)
00462 {
00463 src[x+l3] +=v>>3;
00464 src[x+l4] +=v>>1;
00465 src[x+l5] -=v>>1;
00466 src[x+l6] -=v>>3;
00467
00468 }
00469 }
00470
00471 #endif
00472 }
00473 #endif
00474
00482 static inline void RENAME(vertX1Filter)(uint8_t *src, int stride, PPContext *co)
00483 {
00484 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
00485 src+= stride*3;
00486
00487 asm volatile(
00488 "pxor %%mm7, %%mm7 \n\t"
00489 "lea (%0, %1), %%"REG_a" \n\t"
00490 "lea (%%"REG_a", %1, 4), %%"REG_c" \n\t"
00491
00492
00493 "movq (%%"REG_a", %1, 2), %%mm0 \n\t"
00494 "movq (%0, %1, 4), %%mm1 \n\t"
00495 "movq %%mm1, %%mm2 \n\t"
00496 "psubusb %%mm0, %%mm1 \n\t"
00497 "psubusb %%mm2, %%mm0 \n\t"
00498 "por %%mm1, %%mm0 \n\t"
00499 "movq (%%"REG_c"), %%mm3 \n\t"
00500 "movq (%%"REG_c", %1), %%mm4 \n\t"
00501 "movq %%mm3, %%mm5 \n\t"
00502 "psubusb %%mm4, %%mm3 \n\t"
00503 "psubusb %%mm5, %%mm4 \n\t"
00504 "por %%mm4, %%mm3 \n\t"
00505 PAVGB(%%mm3, %%mm0)
00506 "movq %%mm2, %%mm1 \n\t"
00507 "psubusb %%mm5, %%mm2 \n\t"
00508 "movq %%mm2, %%mm4 \n\t"
00509 "pcmpeqb %%mm7, %%mm2 \n\t"
00510 "psubusb %%mm1, %%mm5 \n\t"
00511 "por %%mm5, %%mm4 \n\t"
00512 "psubusb %%mm0, %%mm4 \n\t"
00513 "movq %%mm4, %%mm3 \n\t"
00514 "movq %2, %%mm0 \n\t"
00515 "paddusb %%mm0, %%mm0 \n\t"
00516 "psubusb %%mm0, %%mm4 \n\t"
00517 "pcmpeqb %%mm7, %%mm4 \n\t"
00518 "psubusb "MANGLE(b01)", %%mm3 \n\t"
00519 "pand %%mm4, %%mm3 \n\t"
00520
00521 PAVGB(%%mm7, %%mm3)
00522 "movq %%mm3, %%mm1 \n\t"
00523 PAVGB(%%mm7, %%mm3)
00524 PAVGB(%%mm1, %%mm3)
00525
00526 "movq (%0, %1, 4), %%mm0 \n\t"
00527 "pxor %%mm2, %%mm0 \n\t"
00528 "psubusb %%mm3, %%mm0 \n\t"
00529 "pxor %%mm2, %%mm0 \n\t"
00530 "movq %%mm0, (%0, %1, 4) \n\t"
00531
00532 "movq (%%"REG_c"), %%mm0 \n\t"
00533 "pxor %%mm2, %%mm0 \n\t"
00534 "paddusb %%mm3, %%mm0 \n\t"
00535 "pxor %%mm2, %%mm0 \n\t"
00536 "movq %%mm0, (%%"REG_c") \n\t"
00537
00538 PAVGB(%%mm7, %%mm1)
00539
00540 "movq (%%"REG_a", %1, 2), %%mm0 \n\t"
00541 "pxor %%mm2, %%mm0 \n\t"
00542 "psubusb %%mm1, %%mm0 \n\t"
00543 "pxor %%mm2, %%mm0 \n\t"
00544 "movq %%mm0, (%%"REG_a", %1, 2) \n\t"
00545
00546 "movq (%%"REG_c", %1), %%mm0 \n\t"
00547 "pxor %%mm2, %%mm0 \n\t"
00548 "paddusb %%mm1, %%mm0 \n\t"
00549 "pxor %%mm2, %%mm0 \n\t"
00550 "movq %%mm0, (%%"REG_c", %1) \n\t"
00551
00552 PAVGB(%%mm7, %%mm1)
00553
00554 "movq (%%"REG_a", %1), %%mm0 \n\t"
00555 "pxor %%mm2, %%mm0 \n\t"
00556 "psubusb %%mm1, %%mm0 \n\t"
00557 "pxor %%mm2, %%mm0 \n\t"
00558 "movq %%mm0, (%%"REG_a", %1) \n\t"
00559
00560 "movq (%%"REG_c", %1, 2), %%mm0 \n\t"
00561 "pxor %%mm2, %%mm0 \n\t"
00562 "paddusb %%mm1, %%mm0 \n\t"
00563 "pxor %%mm2, %%mm0 \n\t"
00564 "movq %%mm0, (%%"REG_c", %1, 2) \n\t"
00565
00566 :
00567 : "r" (src), "r" ((long)stride), "m" (co->pQPb)
00568 : "%"REG_a, "%"REG_c
00569 );
00570 #else
00571
00572 const int l1= stride;
00573 const int l2= stride + l1;
00574 const int l3= stride + l2;
00575 const int l4= stride + l3;
00576 const int l5= stride + l4;
00577 const int l6= stride + l5;
00578 const int l7= stride + l6;
00579
00580
00581 int x;
00582
00583 src+= stride*3;
00584 for(x=0; x<BLOCK_SIZE; x++)
00585 {
00586 int a= src[l3] - src[l4];
00587 int b= src[l4] - src[l5];
00588 int c= src[l5] - src[l6];
00589
00590 int d= ABS(b) - ((ABS(a) + ABS(c))>>1);
00591 d= MAX(d, 0);
00592
00593 if(d < co->QP*2)
00594 {
00595 int v = d * SIGN(-b);
00596
00597 src[l2] +=v>>3;
00598 src[l3] +=v>>2;
00599 src[l4] +=(3*v)>>3;
00600 src[l5] -=(3*v)>>3;
00601 src[l6] -=v>>2;
00602 src[l7] -=v>>3;
00603
00604 }
00605 src++;
00606 }
00607 #endif
00608 }
00609
00610 #ifndef HAVE_ALTIVEC
00611 static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext *c)
00612 {
00613 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
00614
00615
00616
00617
00618
00619
00620
00621
00622
00623
00624
00625
00626
00627
00628 src+= stride*4;
00629 asm volatile(
00630
00631 #if 0 //sligtly more accurate and slightly slower
00632 "pxor %%mm7, %%mm7 \n\t"
00633 "lea (%0, %1), %%"REG_a" \n\t"
00634 "lea (%%"REG_a", %1, 4), %%"REG_c" \n\t"
00635
00636
00637
00638
00639
00640 "movq (%0, %1, 2), %%mm0 \n\t"
00641 "movq (%0), %%mm1 \n\t"
00642 "movq %%mm0, %%mm2 \n\t"
00643 PAVGB(%%mm7, %%mm0)
00644 PAVGB(%%mm1, %%mm0)
00645 PAVGB(%%mm2, %%mm0)
00646
00647 "movq (%%"REG_a"), %%mm1 \n\t"
00648 "movq (%%"REG_a", %1, 2), %%mm3 \n\t"
00649 "movq %%mm1, %%mm4 \n\t"
00650 PAVGB(%%mm7, %%mm1)
00651 PAVGB(%%mm3, %%mm1)
00652 PAVGB(%%mm4, %%mm1)
00653
00654 "movq %%mm0, %%mm4 \n\t"
00655 "psubusb %%mm1, %%mm0 \n\t"
00656 "psubusb %%mm4, %%mm1 \n\t"
00657 "por %%mm0, %%mm1 \n\t"
00658
00659
00660 "movq (%0, %1, 4), %%mm0 \n\t"
00661 "movq %%mm0, %%mm4 \n\t"
00662 PAVGB(%%mm7, %%mm0)
00663 PAVGB(%%mm2, %%mm0)
00664 PAVGB(%%mm4, %%mm0)
00665
00666 "movq (%%"REG_c"), %%mm2 \n\t"
00667 "movq %%mm3, %%mm5 \n\t"
00668 PAVGB(%%mm7, %%mm3)
00669 PAVGB(%%mm2, %%mm3)
00670 PAVGB(%%mm5, %%mm3)
00671
00672 "movq %%mm0, %%mm6 \n\t"
00673 "psubusb %%mm3, %%mm0 \n\t"
00674 "psubusb %%mm6, %%mm3 \n\t"
00675 "por %%mm0, %%mm3 \n\t"
00676 "pcmpeqb %%mm7, %%mm0 \n\t"
00677
00678
00679 "movq (%%"REG_c", %1), %%mm6 \n\t"
00680 "movq %%mm6, %%mm5 \n\t"
00681 PAVGB(%%mm7, %%mm6)
00682 PAVGB(%%mm4, %%mm6)
00683 PAVGB(%%mm5, %%mm6)
00684
00685 "movq (%%"REG_c", %1, 2), %%mm5 \n\t"
00686 "movq %%mm2, %%mm4 \n\t"
00687 PAVGB(%%mm7, %%mm2)
00688 PAVGB(%%mm5, %%mm2)
00689 PAVGB(%%mm4, %%mm2)
00690
00691 "movq %%mm6, %%mm4 \n\t"
00692 "psubusb %%mm2, %%mm6 \n\t"
00693 "psubusb %%mm4, %%mm2 \n\t"
00694 "por %%mm6, %%mm2 \n\t"
00695
00696
00697
00698 PMINUB(%%mm2, %%mm1, %%mm4)
00699 "movq %2, %%mm4 \n\t"
00700 "paddusb "MANGLE(b01)", %%mm4 \n\t"
00701 "pcmpgtb %%mm3, %%mm4 \n\t"
00702 "psubusb %%mm1, %%mm3 \n\t"
00703 "pand %%mm4, %%mm3 \n\t"
00704
00705 "movq %%mm3, %%mm1 \n\t"
00706
00707 PAVGB(%%mm7, %%mm3)
00708 PAVGB(%%mm7, %%mm3)
00709 "paddusb %%mm1, %%mm3 \n\t"
00710
00711
00712 "movq (%%"REG_a", %1, 2), %%mm6 \n\t"
00713 "movq (%0, %1, 4), %%mm5 \n\t"
00714 "movq (%0, %1, 4), %%mm4 \n\t"
00715 "psubusb %%mm6, %%mm5 \n\t"
00716 "psubusb %%mm4, %%mm6 \n\t"
00717 "por %%mm6, %%mm5 \n\t"
00718 "pcmpeqb %%mm7, %%mm6 \n\t"
00719 "pxor %%mm6, %%mm0 \n\t"
00720 "pand %%mm0, %%mm3 \n\t"
00721 PMINUB(%%mm5, %%mm3, %%mm0)
00722
00723 "psubusb "MANGLE(b01)", %%mm3 \n\t"
00724 PAVGB(%%mm7, %%mm3)
00725
00726 "movq (%%"REG_a", %1, 2), %%mm0 \n\t"
00727 "movq (%0, %1, 4), %%mm2 \n\t"
00728 "pxor %%mm6, %%mm0 \n\t"
00729 "pxor %%mm6, %%mm2 \n\t"
00730 "psubb %%mm3, %%mm0 \n\t"
00731 "paddb %%mm3, %%mm2 \n\t"
00732 "pxor %%mm6, %%mm0 \n\t"
00733 "pxor %%mm6, %%mm2 \n\t"
00734 "movq %%mm0, (%%"REG_a", %1, 2) \n\t"
00735 "movq %%mm2, (%0, %1, 4) \n\t"
00736 #endif
00737
00738 "lea (%0, %1), %%"REG_a" \n\t"
00739 "pcmpeqb %%mm6, %%mm6 \n\t"
00740
00741
00742
00743
00744
00745 "movq (%%"REG_a", %1, 2), %%mm1 \n\t"
00746 "movq (%0, %1, 4), %%mm0 \n\t"
00747 "pxor %%mm6, %%mm1 \n\t"
00748 PAVGB(%%mm1, %%mm0)
00749
00750
00751 "movq (%%"REG_a", %1, 4), %%mm2 \n\t"
00752 "movq (%%"REG_a", %1), %%mm3 \n\t"
00753 "pxor %%mm6, %%mm2 \n\t"
00754 "movq %%mm2, %%mm5 \n\t"
00755 "movq "MANGLE(b80)", %%mm4 \n\t"
00756 "lea (%%"REG_a", %1, 4), %%"REG_c" \n\t"
00757 PAVGB(%%mm3, %%mm2)
00758 PAVGB(%%mm0, %%mm4)
00759 PAVGB(%%mm2, %%mm4)
00760 PAVGB(%%mm0, %%mm4)
00761
00762
00763 "movq (%%"REG_a"), %%mm2 \n\t"
00764 "pxor %%mm6, %%mm2 \n\t"
00765 PAVGB(%%mm3, %%mm2)
00766 PAVGB((%0), %%mm1)
00767 "movq "MANGLE(b80)", %%mm3 \n\t"
00768 PAVGB(%%mm2, %%mm3)
00769 PAVGB(%%mm1, %%mm3)
00770 PAVGB(%%mm2, %%mm3)
00771
00772
00773 PAVGB((%%REGc, %1), %%mm5)
00774 "movq (%%"REG_c", %1, 2), %%mm1 \n\t"
00775 "pxor %%mm6, %%mm1 \n\t"
00776 PAVGB((%0, %1, 4), %%mm1)
00777 "movq "MANGLE(b80)", %%mm2 \n\t"
00778 PAVGB(%%mm5, %%mm2)
00779 PAVGB(%%mm1, %%mm2)
00780 PAVGB(%%mm5, %%mm2)
00781
00782
00783 "movq "MANGLE(b00)", %%mm1 \n\t"
00784 "movq "MANGLE(b00)", %%mm5 \n\t"
00785 "psubb %%mm2, %%mm1 \n\t"
00786 "psubb %%mm3, %%mm5 \n\t"
00787 PMAXUB(%%mm1, %%mm2)
00788 PMAXUB(%%mm5, %%mm3)
00789 PMINUB(%%mm2, %%mm3, %%mm1)
00790
00791
00792
00793 "movq "MANGLE(b00)", %%mm7 \n\t"
00794 "movq %2, %%mm2 \n\t"
00795 PAVGB(%%mm6, %%mm2)
00796 "psubb %%mm6, %%mm2 \n\t"
00797
00798 "movq %%mm4, %%mm1 \n\t"
00799 "pcmpgtb %%mm7, %%mm1 \n\t"
00800 "pxor %%mm1, %%mm4 \n\t"
00801 "psubb %%mm1, %%mm4 \n\t"
00802 "pcmpgtb %%mm4, %%mm2 \n\t"
00803 "psubusb %%mm3, %%mm4 \n\t"
00804
00805
00806 "movq %%mm4, %%mm3 \n\t"
00807 "psubusb "MANGLE(b01)", %%mm4 \n\t"
00808 PAVGB(%%mm7, %%mm4)
00809 PAVGB(%%mm7, %%mm4)
00810 "paddb %%mm3, %%mm4 \n\t"
00811 "pand %%mm2, %%mm4 \n\t"
00812
00813 "movq "MANGLE(b80)", %%mm5 \n\t"
00814 "psubb %%mm0, %%mm5 \n\t"
00815 "paddsb %%mm6, %%mm5 \n\t"
00816 "pcmpgtb %%mm5, %%mm7 \n\t"
00817 "pxor %%mm7, %%mm5 \n\t"
00818
00819 PMINUB(%%mm5, %%mm4, %%mm3)
00820 "pxor %%mm1, %%mm7 \n\t"
00821
00822 "pand %%mm7, %%mm4 \n\t"
00823 "movq (%%"REG_a", %1, 2), %%mm0 \n\t"
00824 "movq (%0, %1, 4), %%mm2 \n\t"
00825 "pxor %%mm1, %%mm0 \n\t"
00826 "pxor %%mm1, %%mm2 \n\t"
00827 "paddb %%mm4, %%mm0 \n\t"
00828 "psubb %%mm4, %%mm2 \n\t"
00829 "pxor %%mm1, %%mm0 \n\t"
00830 "pxor %%mm1, %%mm2 \n\t"
00831 "movq %%mm0, (%%"REG_a", %1, 2) \n\t"
00832 "movq %%mm2, (%0, %1, 4) \n\t"
00833
00834 :
00835 : "r" (src), "r" ((long)stride), "m" (c->pQPb)
00836 : "%"REG_a, "%"REG_c
00837 );
00838
00839
00840
00841
00842
00843
00844
00845
00846
00847
00848
00849
00850
00851
00852
00853
00854
00855
00856
00857
00858
00859
00860
00861
00862
00863
00864
00865
00866
00867
00868
00869
00870
00871
00872
00873
00874
00875
00876
00877
00878
00879
00880
00881
00882
00883
00884
00885
00886
00887
00888
00889
00890
00891
00892
00893
00894
00895
00896
00897
00898
00899
00900
00901
00902
00903 #elif defined (HAVE_MMX)
00904 src+= stride*4;
00905 asm volatile(
00906 "pxor %%mm7, %%mm7 \n\t"
00907 "lea -40(%%"REG_SP"), %%"REG_c" \n\t"
00908 "and "ALIGN_MASK", %%"REG_c" \n\t"
00909
00910
00911
00912
00913 "movq (%0), %%mm0 \n\t"
00914 "movq %%mm0, %%mm1 \n\t"
00915 "punpcklbw %%mm7, %%mm0 \n\t"
00916 "punpckhbw %%mm7, %%mm1 \n\t"
00917
00918 "movq (%0, %1), %%mm2 \n\t"
00919 "lea (%0, %1, 2), %%"REG_a" \n\t"
00920 "movq %%mm2, %%mm3 \n\t"
00921 "punpcklbw %%mm7, %%mm2 \n\t"
00922 "punpckhbw %%mm7, %%mm3 \n\t"
00923
00924 "movq (%%"REG_a"), %%mm4 \n\t"
00925 "movq %%mm4, %%mm5 \n\t"
00926 "punpcklbw %%mm7, %%mm4 \n\t"
00927 "punpckhbw %%mm7, %%mm5 \n\t"
00928
00929 "paddw %%mm0, %%mm0 \n\t"
00930 "paddw %%mm1, %%mm1 \n\t"
00931 "psubw %%mm4, %%mm2 \n\t"
00932 "psubw %%mm5, %%mm3 \n\t"
00933 "psubw %%mm2, %%mm0 \n\t"
00934 "psubw %%mm3, %%mm1 \n\t"
00935
00936 "psllw $2, %%mm2 \n\t"
00937 "psllw $2, %%mm3 \n\t"
00938 "psubw %%mm2, %%mm0 \n\t"
00939 "psubw %%mm3, %%mm1 \n\t"
00940
00941 "movq (%%"REG_a", %1), %%mm2 \n\t"
00942 "movq %%mm2, %%mm3 \n\t"
00943 "punpcklbw %%mm7, %%mm2 \n\t"
00944 "punpckhbw %%mm7, %%mm3 \n\t"
00945
00946 "psubw %%mm2, %%mm0 \n\t"
00947 "psubw %%mm3, %%mm1 \n\t"
00948 "psubw %%mm2, %%mm0 \n\t"
00949 "psubw %%mm3, %%mm1 \n\t"
00950 "movq %%mm0, (%%"REG_c") \n\t"
00951 "movq %%mm1, 8(%%"REG_c") \n\t"
00952
00953 "movq (%%"REG_a", %1, 2), %%mm0 \n\t"
00954 "movq %%mm0, %%mm1 \n\t"
00955 "punpcklbw %%mm7, %%mm0 \n\t"
00956 "punpckhbw %%mm7, %%mm1 \n\t"
00957
00958 "psubw %%mm0, %%mm2 \n\t"
00959 "psubw %%mm1, %%mm3 \n\t"
00960 "movq %%mm2, 16(%%"REG_c") \n\t"
00961 "movq %%mm3, 24(%%"REG_c") \n\t"
00962 "paddw %%mm4, %%mm4 \n\t"
00963 "paddw %%mm5, %%mm5 \n\t"
00964 "psubw %%mm2, %%mm4 \n\t"
00965 "psubw %%mm3, %%mm5 \n\t"
00966
00967 "lea (%%"REG_a", %1), %0 \n\t"
00968 "psllw $2, %%mm2 \n\t"
00969 "psllw $2, %%mm3 \n\t"
00970 "psubw %%mm2, %%mm4 \n\t"
00971 "psubw %%mm3, %%mm5 \n\t"
00972
00973 "movq (%0, %1, 2), %%mm2 \n\t"
00974 "movq %%mm2, %%mm3 \n\t"
00975 "punpcklbw %%mm7, %%mm2 \n\t"
00976 "punpckhbw %%mm7, %%mm3 \n\t"
00977 "psubw %%mm2, %%mm4 \n\t"
00978 "psubw %%mm3, %%mm5 \n\t"
00979 "psubw %%mm2, %%mm4 \n\t"
00980 "psubw %%mm3, %%mm5 \n\t"
00981
00982 "movq (%%"REG_a", %1, 4), %%mm6 \n\t"
00983 "punpcklbw %%mm7, %%mm6 \n\t"
00984 "psubw %%mm6, %%mm2 \n\t"
00985 "movq (%%"REG_a", %1, 4), %%mm6 \n\t"
00986 "punpckhbw %%mm7, %%mm6 \n\t"
00987 "psubw %%mm6, %%mm3 \n\t"
00988
00989 "paddw %%mm0, %%mm0 \n\t"
00990 "paddw %%mm1, %%mm1 \n\t"
00991 "psubw %%mm2, %%mm0 \n\t"
00992 "psubw %%mm3, %%mm1 \n\t"
00993
00994 "psllw $2, %%mm2 \n\t"
00995 "psllw $2, %%mm3 \n\t"
00996 "psubw %%mm2, %%mm0 \n\t"
00997 "psubw %%mm3, %%mm1 \n\t"
00998
00999 "movq (%0, %1, 4), %%mm2 \n\t"
01000 "movq %%mm2, %%mm3 \n\t"
01001 "punpcklbw %%mm7, %%mm2 \n\t"
01002 "punpckhbw %%mm7, %%mm3 \n\t"
01003
01004 "paddw %%mm2, %%mm2 \n\t"
01005 "paddw %%mm3, %%mm3 \n\t"
01006 "psubw %%mm2, %%mm0 \n\t"
01007 "psubw %%mm3, %%mm1 \n\t"
01008
01009 "movq (%%"REG_c"), %%mm2 \n\t"
01010 "movq 8(%%"REG_c"), %%mm3 \n\t"
01011
01012 #ifdef HAVE_MMX2
01013 "movq %%mm7, %%mm6 \n\t"
01014 "psubw %%mm0, %%mm6 \n\t"
01015 "pmaxsw %%mm6, %%mm0 \n\t"
01016 "movq %%mm7, %%mm6 \n\t"
01017 "psubw %%mm1, %%mm6 \n\t"
01018 "pmaxsw %%mm6, %%mm1 \n\t"
01019 "movq %%mm7, %%mm6 \n\t"
01020 "psubw %%mm2, %%mm6 \n\t"
01021 "pmaxsw %%mm6, %%mm2 \n\t"
01022 "movq %%mm7, %%mm6 \n\t"
01023 "psubw %%mm3, %%mm6 \n\t"
01024 "pmaxsw %%mm6, %%mm3 \n\t"
01025 #else
01026 "movq %%mm7, %%mm6 \n\t"
01027 "pcmpgtw %%mm0, %%mm6 \n\t"
01028 "pxor %%mm6, %%mm0 \n\t"
01029 "psubw %%mm6, %%mm0 \n\t"
01030 "movq %%mm7, %%mm6 \n\t"
01031 "pcmpgtw %%mm1, %%mm6 \n\t"
01032 "pxor %%mm6, %%mm1 \n\t"
01033 "psubw %%mm6, %%mm1 \n\t"
01034 "movq %%mm7, %%mm6 \n\t"
01035 "pcmpgtw %%mm2, %%mm6 \n\t"
01036 "pxor %%mm6, %%mm2 \n\t"
01037 "psubw %%mm6, %%mm2 \n\t"
01038 "movq %%mm7, %%mm6 \n\t"
01039 "pcmpgtw %%mm3, %%mm6 \n\t"
01040 "pxor %%mm6, %%mm3 \n\t"
01041 "psubw %%mm6, %%mm3 \n\t"
01042 #endif
01043
01044 #ifdef HAVE_MMX2
01045 "pminsw %%mm2, %%mm0 \n\t"
01046 "pminsw %%mm3, %%mm1 \n\t"
01047 #else
01048 "movq %%mm0, %%mm6 \n\t"
01049 "psubusw %%mm2, %%mm6 \n\t"
01050 "psubw %%mm6, %%mm0 \n\t"
01051 "movq %%mm1, %%mm6 \n\t"
01052 "psubusw %%mm3, %%mm6 \n\t"
01053 "psubw %%mm6, %%mm1 \n\t"
01054 #endif
01055
01056 "movd %2, %%mm2 \n\t"
01057 "punpcklbw %%mm7, %%mm2 \n\t"
01058
01059 "movq %%mm7, %%mm6 \n\t"
01060 "pcmpgtw %%mm4, %%mm6 \n\t"
01061 "pxor %%mm6, %%mm4 \n\t"
01062 "psubw %%mm6, %%mm4 \n\t"
01063 "pcmpgtw %%mm5, %%mm7 \n\t"
01064 "pxor %%mm7, %%mm5 \n\t"
01065 "psubw %%mm7, %%mm5 \n\t"
01066
01067 "psllw $3, %%mm2 \n\t"
01068 "movq %%mm2, %%mm3 \n\t"
01069 "pcmpgtw %%mm4, %%mm2 \n\t"
01070 "pcmpgtw %%mm5, %%mm3 \n\t"
01071 "pand %%mm2, %%mm4 \n\t"
01072 "pand %%mm3, %%mm5 \n\t"
01073
01074
01075 "psubusw %%mm0, %%mm4 \n\t"
01076 "psubusw %%mm1, %%mm5 \n\t"
01077
01078
01079 "movq "MANGLE(w05)", %%mm2 \n\t"
01080 "pmullw %%mm2, %%mm4 \n\t"
01081 "pmullw %%mm2, %%mm5 \n\t"
01082 "movq "MANGLE(w20)", %%mm2 \n\t"
01083 "paddw %%mm2, %%mm4 \n\t"
01084 "paddw %%mm2, %%mm5 \n\t"
01085 "psrlw $6, %%mm4 \n\t"
01086 "psrlw $6, %%mm5 \n\t"
01087
01088 "movq 16(%%"REG_c"), %%mm0 \n\t"
01089 "movq 24(%%"REG_c"), %%mm1 \n\t"
01090
01091 "pxor %%mm2, %%mm2 \n\t"
01092 "pxor %%mm3, %%mm3 \n\t"
01093
01094 "pcmpgtw %%mm0, %%mm2 \n\t"
01095 "pcmpgtw %%mm1, %%mm3 \n\t"
01096 "pxor %%mm2, %%mm0 \n\t"
01097 "pxor %%mm3, %%mm1 \n\t"
01098 "psubw %%mm2, %%mm0 \n\t"
01099 "psubw %%mm3, %%mm1 \n\t"
01100 "psrlw $1, %%mm0 \n\t"
01101 "psrlw $1, %%mm1 \n\t"
01102
01103 "pxor %%mm6, %%mm2 \n\t"
01104 "pxor %%mm7, %%mm3 \n\t"
01105 "pand %%mm2, %%mm4 \n\t"
01106 "pand %%mm3, %%mm5 \n\t"
01107
01108 #ifdef HAVE_MMX2
01109 "pminsw %%mm0, %%mm4 \n\t"
01110 "pminsw %%mm1, %%mm5 \n\t"
01111 #else
01112 "movq %%mm4, %%mm2 \n\t"
01113 "psubusw %%mm0, %%mm2 \n\t"
01114 "psubw %%mm2, %%mm4 \n\t"
01115 "movq %%mm5, %%mm2 \n\t"
01116 "psubusw %%mm1, %%mm2 \n\t"
01117 "psubw %%mm2, %%mm5 \n\t"
01118 #endif
01119 "pxor %%mm6, %%mm4 \n\t"
01120 "pxor %%mm7, %%mm5 \n\t"
01121 "psubw %%mm6, %%mm4 \n\t"
01122 "psubw %%mm7, %%mm5 \n\t"
01123 "packsswb %%mm5, %%mm4 \n\t"
01124 "movq (%0), %%mm0 \n\t"
01125 "paddb %%mm4, %%mm0 \n\t"
01126 "movq %%mm0, (%0) \n\t"
01127 "movq (%0, %1), %%mm0 \n\t"
01128 "psubb %%mm4, %%mm0 \n\t"
01129 "movq %%mm0, (%0, %1) \n\t"
01130
01131 : "+r" (src)
01132 : "r" ((long)stride), "m" (c->pQPb)
01133 : "%"REG_a, "%"REG_c
01134 );
01135 #else
01136 const int l1= stride;
01137 const int l2= stride + l1;
01138 const int l3= stride + l2;
01139 const int l4= stride + l3;
01140 const int l5= stride + l4;
01141 const int l6= stride + l5;
01142 const int l7= stride + l6;
01143 const int l8= stride + l7;
01144
01145 int x;
01146 src+= stride*3;
01147 for(x=0; x<BLOCK_SIZE; x++)
01148 {
01149 const int middleEnergy= 5*(src[l5] - src[l4]) + 2*(src[l3] - src[l6]);
01150 if(ABS(middleEnergy) < 8*c->QP)
01151 {
01152 const int q=(src[l4] - src[l5])/2;
01153 const int leftEnergy= 5*(src[l3] - src[l2]) + 2*(src[l1] - src[l4]);
01154 const int rightEnergy= 5*(src[l7] - src[l6]) + 2*(src[l5] - src[l8]);
01155
01156 int d= ABS(middleEnergy) - MIN( ABS(leftEnergy), ABS(rightEnergy) );
01157 d= MAX(d, 0);
01158
01159 d= (5*d + 32) >> 6;
01160 d*= SIGN(-middleEnergy);
01161
01162 if(q>0)
01163 {
01164 d= d<0 ? 0 : d;
01165 d= d>q ? q : d;
01166 }
01167 else
01168 {
01169 d= d>0 ? 0 : d;
01170 d= d<q ? q : d;
01171 }
01172
01173 src[l4]-= d;
01174 src[l5]+= d;
01175 }
01176 src++;
01177 }
01178 #endif
01179 }
01180 #endif //HAVE_ALTIVEC
01181
01182 #ifndef HAVE_ALTIVEC
01183 static inline void RENAME(dering)(uint8_t src[], int stride, PPContext *c)
01184 {
01185 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
01186 asm volatile(
01187 "pxor %%mm6, %%mm6 \n\t"
01188 "pcmpeqb %%mm7, %%mm7 \n\t"
01189 "movq %2, %%mm0 \n\t"
01190 "punpcklbw %%mm6, %%mm0 \n\t"
01191 "psrlw $1, %%mm0 \n\t"
01192 "psubw %%mm7, %%mm0 \n\t"
01193 "packuswb %%mm0, %%mm0 \n\t"
01194 "movq %%mm0, %3 \n\t"
01195
01196 "lea (%0, %1), %%"REG_a" \n\t"
01197 "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t"
01198
01199
01200
01201
01202 #undef FIND_MIN_MAX
01203 #ifdef HAVE_MMX2
01204 #define REAL_FIND_MIN_MAX(addr)\
01205 "movq " #addr ", %%mm0 \n\t"\
01206 "pminub %%mm0, %%mm7 \n\t"\
01207 "pmaxub %%mm0, %%mm6 \n\t"
01208 #else
01209 #define REAL_FIND_MIN_MAX(addr)\
01210 "movq " #addr ", %%mm0 \n\t"\
01211 "movq %%mm7, %%mm1 \n\t"\
01212 "psubusb %%mm0, %%mm6 \n\t"\
01213 "paddb %%mm0, %%mm6 \n\t"\
01214 "psubusb %%mm0, %%mm1 \n\t"\
01215 "psubb %%mm1, %%mm7 \n\t"
01216 #endif
01217 #define FIND_MIN_MAX(addr) REAL_FIND_MIN_MAX(addr)
01218
01219 FIND_MIN_MAX((%%REGa))
01220 FIND_MIN_MAX((%%REGa, %1))
01221 FIND_MIN_MAX((%%REGa, %1, 2))
01222 FIND_MIN_MAX((%0, %1, 4))
01223 FIND_MIN_MAX((%%REGd))
01224 FIND_MIN_MAX((%%REGd, %1))
01225 FIND_MIN_MAX((%%REGd, %1, 2))
01226 FIND_MIN_MAX((%0, %1, 8))
01227
01228 "movq %%mm7, %%mm4 \n\t"
01229 "psrlq $8, %%mm7 \n\t"
01230 #ifdef HAVE_MMX2
01231 "pminub %%mm4, %%mm7 \n\t"
01232 "pshufw $0xF9, %%mm7, %%mm4 \n\t"
01233 "pminub %%mm4, %%mm7 \n\t"
01234 "pshufw $0xFE, %%mm7, %%mm4 \n\t"
01235 "pminub %%mm4, %%mm7 \n\t"
01236 #else
01237 "movq %%mm7, %%mm1 \n\t"
01238 "psubusb %%mm4, %%mm1 \n\t"
01239 "psubb %%mm1, %%mm7 \n\t"
01240 "movq %%mm7, %%mm4 \n\t"
01241 "psrlq $16, %%mm7 \n\t"
01242 "movq %%mm7, %%mm1 \n\t"
01243 "psubusb %%mm4, %%mm1 \n\t"
01244 "psubb %%mm1, %%mm7 \n\t"
01245 "movq %%mm7, %%mm4 \n\t"
01246 "psrlq $32, %%mm7 \n\t"
01247 "movq %%mm7, %%mm1 \n\t"
01248 "psubusb %%mm4, %%mm1 \n\t"
01249 "psubb %%mm1, %%mm7 \n\t"
01250 #endif
01251
01252
01253 "movq %%mm6, %%mm4 \n\t"
01254 "psrlq $8, %%mm6 \n\t"
01255 #ifdef HAVE_MMX2
01256 "pmaxub %%mm4, %%mm6 \n\t"
01257 "pshufw $0xF9, %%mm6, %%mm4 \n\t"
01258 "pmaxub %%mm4, %%mm6 \n\t"
01259 "pshufw $0xFE, %%mm6, %%mm4 \n\t"
01260 "pmaxub %%mm4, %%mm6 \n\t"
01261 #else
01262 "psubusb %%mm4, %%mm6 \n\t"
01263 "paddb %%mm4, %%mm6 \n\t"
01264 "movq %%mm6, %%mm4 \n\t"
01265 "psrlq $16, %%mm6 \n\t"
01266 "psubusb %%mm4, %%mm6 \n\t"
01267 "paddb %%mm4, %%mm6 \n\t"
01268 "movq %%mm6, %%mm4 \n\t"
01269 "psrlq $32, %%mm6 \n\t"
01270 "psubusb %%mm4, %%mm6 \n\t"
01271 "paddb %%mm4, %%mm6 \n\t"
01272 #endif
01273 "movq %%mm6, %%mm0 \n\t"
01274 "psubb %%mm7, %%mm6 \n\t"
01275 "movd %%mm6, %%ecx \n\t"
01276 "cmpb "MANGLE(deringThreshold)", %%cl \n\t"
01277 " jb 1f \n\t"
01278 "lea -24(%%"REG_SP"), %%"REG_c" \n\t"
01279 "and "ALIGN_MASK", %%"REG_c" \n\t"
01280 PAVGB(%%mm0, %%mm7)
01281 "punpcklbw %%mm7, %%mm7 \n\t"
01282 "punpcklbw %%mm7, %%mm7 \n\t"
01283 "punpcklbw %%mm7, %%mm7 \n\t"
01284 "movq %%mm7, (%%"REG_c") \n\t"
01285
01286 "movq (%0), %%mm0 \n\t"
01287 "movq %%mm0, %%mm1 \n\t"
01288 "movq %%mm0, %%mm2 \n\t"
01289 "psllq $8, %%mm1 \n\t"
01290 "psrlq $8, %%mm2 \n\t"
01291 "movd -4(%0), %%mm3 \n\t"
01292 "movd 8(%0), %%mm4 \n\t"
01293 "psrlq $24, %%mm3 \n\t"
01294 "psllq $56, %%mm4 \n\t"
01295 "por %%mm3, %%mm1 \n\t"
01296 "por %%mm4, %%mm2 \n\t"
01297 "movq %%mm1, %%mm3 \n\t"
01298 PAVGB(%%mm2, %%mm1)
01299 PAVGB(%%mm0, %%mm1)
01300 "psubusb %%mm7, %%mm0 \n\t"
01301 "psubusb %%mm7, %%mm2 \n\t"
01302 "psubusb %%mm7, %%mm3 \n\t"
01303 "pcmpeqb "MANGLE(b00)", %%mm0 \n\t"
01304 "pcmpeqb "MANGLE(b00)", %%mm2 \n\t"
01305 "pcmpeqb "MANGLE(b00)", %%mm3 \n\t"
01306 "paddb %%mm2, %%mm0 \n\t"
01307 "paddb %%mm3, %%mm0 \n\t"
01308
01309 "movq (%%"REG_a"), %%mm2 \n\t"
01310 "movq %%mm2, %%mm3 \n\t"
01311 "movq %%mm2, %%mm4 \n\t"
01312 "psllq $8, %%mm3 \n\t"
01313 "psrlq $8, %%mm4 \n\t"
01314 "movd -4(%%"REG_a"), %%mm5 \n\t"
01315 "movd 8(%%"REG_a"), %%mm6 \n\t"
01316 "psrlq $24, %%mm5 \n\t"
01317 "psllq $56, %%mm6 \n\t"
01318 "por %%mm5, %%mm3 \n\t"
01319 "por %%mm6, %%mm4 \n\t"
01320 "movq %%mm3, %%mm5 \n\t"
01321 PAVGB(%%mm4, %%mm3)
01322 PAVGB(%%mm2, %%mm3)
01323 "psubusb %%mm7, %%mm2 \n\t"
01324 "psubusb %%mm7, %%mm4 \n\t"
01325 "psubusb %%mm7, %%mm5 \n\t"
01326 "pcmpeqb "MANGLE(b00)", %%mm2 \n\t"
01327 "pcmpeqb "MANGLE(b00)", %%mm4 \n\t"
01328 "pcmpeqb "MANGLE(b00)", %%mm5 \n\t"
01329 "paddb %%mm4, %%mm2 \n\t"
01330 "paddb %%mm5, %%mm2 \n\t"
01331
01332 #define REAL_DERING_CORE(dst,src,ppsx,psx,sx,pplx,plx,lx,t0,t1) \
01333 "movq " #src ", " #sx " \n\t" \
01334 "movq " #sx ", " #lx " \n\t" \
01335 "movq " #sx ", " #t0 " \n\t" \
01336 "psllq $8, " #lx " \n\t"\
01337 "psrlq $8, " #t0 " \n\t"\
01338 "movd -4" #src ", " #t1 " \n\t"\
01339 "psrlq $24, " #t1 " \n\t"\
01340 "por " #t1 ", " #lx " \n\t" \
01341 "movd 8" #src ", " #t1 " \n\t"\
01342 "psllq $56, " #t1 " \n\t"\
01343 "por " #t1 ", " #t0 " \n\t" \
01344 "movq " #lx ", " #t1 " \n\t" \
01345 PAVGB(t0, lx) \
01346 PAVGB(sx, lx) \
01347 PAVGB(lx, pplx) \
01348 "movq " #lx ", 8(%%"REG_c") \n\t"\
01349 "movq (%%"REG_c"), " #lx " \n\t"\
01350 "psubusb " #lx ", " #t1 " \n\t"\
01351 "psubusb " #lx ", " #t0 " \n\t"\
01352 "psubusb " #lx ", " #sx " \n\t"\
01353 "movq "MANGLE(b00)", " #lx " \n\t"\
01354 "pcmpeqb " #lx ", " #t1 " \n\t" \
01355 "pcmpeqb " #lx ", " #t0 " \n\t" \
01356 "pcmpeqb " #lx ", " #sx " \n\t" \
01357 "paddb " #t1 ", " #t0 " \n\t"\
01358 "paddb " #t0 ", " #sx " \n\t"\
01359 \
01360 PAVGB(plx, pplx) \
01361 "movq " #dst ", " #t0 " \n\t" \
01362 "movq " #t0 ", " #t1 " \n\t" \
01363 "psubusb %3, " #t0 " \n\t"\
01364 "paddusb %3, " #t1 " \n\t"\
01365 PMAXUB(t0, pplx)\
01366 PMINUB(t1, pplx, t0)\
01367 "paddb " #sx ", " #ppsx " \n\t"\
01368 "paddb " #psx ", " #ppsx " \n\t"\
01369 "#paddb "MANGLE(b02)", " #ppsx " \n\t"\
01370 "pand "MANGLE(b08)", " #ppsx " \n\t"\
01371 "pcmpeqb " #lx ", " #ppsx " \n\t"\
01372 "pand " #ppsx ", " #pplx " \n\t"\
01373 "pandn " #dst ", " #ppsx " \n\t"\
01374 "por " #pplx ", " #ppsx " \n\t"\
01375 "movq " #ppsx ", " #dst " \n\t"\
01376 "movq 8(%%"REG_c"), " #lx " \n\t"
01377
01378 #define DERING_CORE(dst,src,ppsx,psx,sx,pplx,plx,lx,t0,t1) \
01379 REAL_DERING_CORE(dst,src,ppsx,psx,sx,pplx,plx,lx,t0,t1)
01380
01381
01382
01383
01384
01385
01386
01387
01388
01389
01390
01391
01392
01393
01394
01395
01396 DERING_CORE((%%REGa),(%%REGa, %1) ,%%mm0,%%mm2,%%mm4,%%mm1,%%mm3,%%mm5,%%mm6,%%mm7)
01397 DERING_CORE((%%REGa, %1),(%%REGa, %1, 2) ,%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,%%mm6,%%mm7)
01398 DERING_CORE((%%REGa, %1, 2),(%0, %1, 4) ,%%mm4,%%mm0,%%mm2,%%mm5,%%mm1,%%mm3,%%mm6,%%mm7)
01399 DERING_CORE((%0, %1, 4),(%%REGd) ,%%mm0,%%mm2,%%mm4,%%mm1,%%mm3,%%mm5,%%mm6,%%mm7)
01400 DERING_CORE((%%REGd),(%%REGd, %1) ,%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,%%mm6,%%mm7)
01401 DERING_CORE((%%REGd, %1), (%%REGd, %1, 2),%%mm4,%%mm0,%%mm2,%%mm5,%%mm1,%%mm3,%%mm6,%%mm7)
01402 DERING_CORE((%%REGd, %1, 2),(%0, %1, 8) ,%%mm0,%%mm2,%%mm4,%%mm1,%%mm3,%%mm5,%%mm6,%%mm7)
01403 DERING_CORE((%0, %1, 8),(%%REGd, %1, 4) ,%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,%%mm6,%%mm7)
01404
01405 "1: \n\t"
01406 : : "r" (src), "r" ((long)stride), "m" (c->pQPb), "m"(c->pQPb2)
01407 : "%"REG_a, "%"REG_d, "%"REG_c
01408 );
01409 #else
01410 int y;
01411 int min=255;
01412 int max=0;
01413 int avg;
01414 uint8_t *p;
01415 int s[10];
01416 const int QP2= c->QP/2 + 1;
01417
01418 for(y=1; y<9; y++)
01419 {
01420 int x;
01421 p= src + stride*y;
01422 for(x=1; x<9; x++)
01423 {
01424 p++;
01425 if(*p > max) max= *p;
01426 if(*p < min) min= *p;
01427 }
01428 }
01429 avg= (min + max + 1)>>1;
01430
01431 if(max - min <deringThreshold) return;
01432
01433 for(y=0; y<10; y++)
01434 {
01435 int t = 0;
01436
01437 if(src[stride*y + 0] > avg) t+= 1;
01438 if(src[stride*y + 1] > avg) t+= 2;
01439 if(src[stride*y + 2] > avg) t+= 4;
01440 if(src[stride*y + 3] > avg) t+= 8;
01441 if(src[stride*y + 4] > avg) t+= 16;
01442 if(src[stride*y + 5] > avg) t+= 32;
01443 if(src[stride*y + 6] > avg) t+= 64;
01444 if(src[stride*y + 7] > avg) t+= 128;
01445 if(src[stride*y + 8] > avg) t+= 256;
01446 if(src[stride*y + 9] > avg) t+= 512;
01447
01448 t |= (~t)<<16;
01449 t &= (t<<1) & (t>>1);
01450 s[y] = t;
01451 }
01452
01453 for(y=1; y<9; y++)
01454 {
01455 int t = s[y-1] & s[y] & s[y+1];
01456 t|= t>>16;
01457 s[y-1]= t;
01458 }
01459
01460 for(y=1; y<9; y++)
01461 {
01462 int x;
01463 int t = s[y-1];
01464
01465 p= src + stride*y;
01466 for(x=1; x<9; x++)
01467 {
01468 p++;
01469 if(t & (1<<x))
01470 {
01471 int f= (*(p-stride-1)) + 2*(*(p-stride)) + (*(p-stride+1))
01472 +2*(*(p -1)) + 4*(*p ) + 2*(*(p +1))
01473 +(*(p+stride-1)) + 2*(*(p+stride)) + (*(p+stride+1));
01474 f= (f + 8)>>4;
01475
01476 #ifdef DEBUG_DERING_THRESHOLD
01477 asm volatile("emms\n\t":);
01478 {
01479 static long long numPixels=0;
01480 if(x!=1 && x!=8 && y!=1 && y!=8) numPixels++;
01481
01482
01483
01484 if(max-min < 20)
01485 {
01486 static int numSkiped=0;
01487 static int errorSum=0;
01488 static int worstQP=0;
01489 static int worstRange=0;
01490 static int worstDiff=0;
01491 int diff= (f - *p);
01492 int absDiff= ABS(diff);
01493 int error= diff*diff;
01494
01495 if(x==1 || x==8 || y==1 || y==8) continue;
01496
01497 numSkiped++;
01498 if(absDiff > worstDiff)
01499 {
01500 worstDiff= absDiff;
01501 worstQP= QP;
01502 worstRange= max-min;
01503 }
01504 errorSum+= error;
01505
01506 if(1024LL*1024LL*1024LL % numSkiped == 0)
01507 {
01508 printf( "sum:%1.3f, skip:%d, wQP:%d, "
01509 "wRange:%d, wDiff:%d, relSkip:%1.3f\n",
01510 (float)errorSum/numSkiped, numSkiped, worstQP, worstRange,
01511 worstDiff, (float)numSkiped/numPixels);
01512 }
01513 }
01514 }
01515 #endif
01516 if (*p + QP2 < f) *p= *p + QP2;
01517 else if(*p - QP2 > f) *p= *p - QP2;
01518 else *p=f;
01519 }
01520 }
01521 }
01522 #ifdef DEBUG_DERING_THRESHOLD
01523 if(max-min < 20)
01524 {
01525 for(y=1; y<9; y++)
01526 {
01527 int x;
01528 int t = 0;
01529 p= src + stride*y;
01530 for(x=1; x<9; x++)
01531 {
01532 p++;
01533 *p = MIN(*p + 20, 255);
01534 }
01535 }
01536
01537 }
01538 #endif
01539 #endif
01540 }
01541 #endif //HAVE_ALTIVEC
01542
01549 static inline void RENAME(deInterlaceInterpolateLinear)(uint8_t src[], int stride)
01550 {
01551 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
01552 src+= 4*stride;
01553 asm volatile(
01554 "lea (%0, %1), %%"REG_a" \n\t"
01555 "lea (%%"REG_a", %1, 4), %%"REG_c" \n\t"
01556
01557
01558
01559 "movq (%0), %%mm0 \n\t"
01560 "movq (%%"REG_a", %1), %%mm1 \n\t"
01561 PAVGB(%%mm1, %%mm0)
01562 "movq %%mm0, (%%"REG_a") \n\t"
01563 "movq (%0, %1, 4), %%mm0 \n\t"
01564 PAVGB(%%mm0, %%mm1)
01565 "movq %%mm1, (%%"REG_a", %1, 2) \n\t"
01566 "movq (%%"REG_c", %1), %%mm1 \n\t"
01567 PAVGB(%%mm1, %%mm0)
01568 "movq %%mm0, (%%"REG_c") \n\t"
01569 "movq (%0, %1, 8), %%mm0 \n\t"
01570 PAVGB(%%mm0, %%mm1)
01571 "movq %%mm1, (%%"REG_c", %1, 2) \n\t"
01572
01573 : : "r" (src), "r" ((long)stride)
01574 : "%"REG_a, "%"REG_c
01575 );
01576 #else
01577 int a, b, x;
01578 src+= 4*stride;
01579
01580 for(x=0; x<2; x++){
01581 a= *(uint32_t*)&src[stride*0];
01582 b= *(uint32_t*)&src[stride*2];
01583 *(uint32_t*)&src[stride*1]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);
01584 a= *(uint32_t*)&src[stride*4];
01585 *(uint32_t*)&src[stride*3]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);
01586 b= *(uint32_t*)&src[stride*6];
01587 *(uint32_t*)&src[stride*5]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);
01588 a= *(uint32_t*)&src[stride*8];
01589 *(uint32_t*)&src[stride*7]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);
01590 src += 4;
01591 }
01592 #endif
01593 }
01594
01602 static inline void RENAME(deInterlaceInterpolateCubic)(uint8_t src[], int stride)
01603 {
01604 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
01605 src+= stride*3;
01606 asm volatile(
01607 "lea (%0, %1), %%"REG_a" \n\t"
01608 "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t"
01609 "lea (%%"REG_d", %1, 4), %%"REG_c" \n\t"
01610 "add %1, %%"REG_c" \n\t"
01611 "pxor %%mm7, %%mm7 \n\t"
01612
01613
01614
01615 #define REAL_DEINT_CUBIC(a,b,c,d,e)\
01616 "movq " #a ", %%mm0 \n\t"\
01617 "movq " #b ", %%mm1 \n\t"\
01618 "movq " #d ", %%mm2 \n\t"\
01619 "movq " #e ", %%mm3 \n\t"\
01620 PAVGB(%%mm2, %%mm1) \
01621 PAVGB(%%mm3, %%mm0) \
01622 "movq %%mm0, %%mm2 \n\t"\
01623 "punpcklbw %%mm7, %%mm0 \n\t"\
01624 "punpckhbw %%mm7, %%mm2 \n\t"\
01625 "movq %%mm1, %%mm3 \n\t"\
01626 "punpcklbw %%mm7, %%mm1 \n\t"\
01627 "punpckhbw %%mm7, %%mm3 \n\t"\
01628 "psubw %%mm1, %%mm0 \n\t" \
01629 "psubw %%mm3, %%mm2 \n\t" \
01630 "psraw $3, %%mm0 \n\t" \
01631 "psraw $3, %%mm2 \n\t" \
01632 "psubw %%mm0, %%mm1 \n\t" \
01633 "psubw %%mm2, %%mm3 \n\t" \
01634 "packuswb %%mm3, %%mm1 \n\t"\
01635 "movq %%mm1, " #c " \n\t"
01636 #define DEINT_CUBIC(a,b,c,d,e) REAL_DEINT_CUBIC(a,b,c,d,e)
01637
01638 DEINT_CUBIC((%0), (%%REGa, %1), (%%REGa, %1, 2), (%0, %1, 4), (%%REGd, %1))
01639 DEINT_CUBIC((%%REGa, %1), (%0, %1, 4), (%%REGd), (%%REGd, %1), (%0, %1, 8))
01640 DEINT_CUBIC((%0, %1, 4), (%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8), (%%REGc))
01641 DEINT_CUBIC((%%REGd, %1), (%0, %1, 8), (%%REGd, %1, 4), (%%REGc), (%%REGc, %1, 2))
01642
01643 : : "r" (src), "r" ((long)stride)
01644 : "%"REG_a, "%"REG_d, "%"REG_c
01645 );
01646 #else
01647 int x;
01648 src+= stride*3;
01649 for(x=0; x<8; x++)
01650 {
01651 src[stride*3] = CLIP((-src[0] + 9*src[stride*2] + 9*src[stride*4] - src[stride*6])>>4);
01652 src[stride*5] = CLIP((-src[stride*2] + 9*src[stride*4] + 9*src[stride*6] - src[stride*8])>>4);
01653 src[stride*7] = CLIP((-src[stride*4] + 9*src[stride*6] + 9*src[stride*8] - src[stride*10])>>4);
01654 src[stride*9] = CLIP((-src[stride*6] + 9*src[stride*8] + 9*src[stride*10] - src[stride*12])>>4);
01655 src++;
01656 }
01657 #endif
01658 }
01659
01667 static inline void RENAME(deInterlaceFF)(uint8_t src[], int stride, uint8_t *tmp)
01668 {
01669 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
01670 src+= stride*4;
01671 asm volatile(
01672 "lea (%0, %1), %%"REG_a" \n\t"
01673 "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t"
01674 "pxor %%mm7, %%mm7 \n\t"
01675 "movq (%2), %%mm0 \n\t"
01676
01677
01678
01679 #define REAL_DEINT_FF(a,b,c,d)\
01680 "movq " #a ", %%mm1 \n\t"\
01681 "movq " #b ", %%mm2 \n\t"\
01682 "movq " #c ", %%mm3 \n\t"\
01683 "movq " #d ", %%mm4 \n\t"\
01684 PAVGB(%%mm3, %%mm1) \
01685 PAVGB(%%mm4, %%mm0) \
01686 "movq %%mm0, %%mm3 \n\t"\
01687 "punpcklbw %%mm7, %%mm0 \n\t"\
01688 "punpckhbw %%mm7, %%mm3 \n\t"\
01689 "movq %%mm1, %%mm4 \n\t"\
01690 "punpcklbw %%mm7, %%mm1 \n\t"\
01691 "punpckhbw %%mm7, %%mm4 \n\t"\
01692 "psllw $2, %%mm1 \n\t"\
01693 "psllw $2, %%mm4 \n\t"\
01694 "psubw %%mm0, %%mm1 \n\t"\
01695 "psubw %%mm3, %%mm4 \n\t"\
01696 "movq %%mm2, %%mm5 \n\t"\
01697 "movq %%mm2, %%mm0 \n\t"\
01698 "punpcklbw %%mm7, %%mm2 \n\t"\
01699 "punpckhbw %%mm7, %%mm5 \n\t"\
01700 "paddw %%mm2, %%mm1 \n\t"\
01701 "paddw %%mm5, %%mm4 \n\t"\
01702 "psraw $2, %%mm1 \n\t"\
01703 "psraw $2, %%mm4 \n\t"\
01704 "packuswb %%mm4, %%mm1 \n\t"\
01705 "movq %%mm1, " #b " \n\t"\
01706
01707 #define DEINT_FF(a,b,c,d) REAL_DEINT_FF(a,b,c,d)
01708
01709 DEINT_FF((%0) , (%%REGa) , (%%REGa, %1), (%%REGa, %1, 2))
01710 DEINT_FF((%%REGa, %1), (%%REGa, %1, 2), (%0, %1, 4), (%%REGd) )
01711 DEINT_FF((%0, %1, 4), (%%REGd) , (%%REGd, %1), (%%REGd, %1, 2))
01712 DEINT_FF((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8), (%%REGd, %1, 4))
01713
01714 "movq %%mm0, (%2) \n\t"
01715 : : "r" (src), "r" ((long)stride), "r"(tmp)
01716 : "%"REG_a, "%"REG_d
01717 );
01718 #else
01719 int x;
01720 src+= stride*4;
01721 for(x=0; x<8; x++)
01722 {
01723 int t1= tmp[x];
01724 int t2= src[stride*1];
01725
01726 src[stride*1]= CLIP((-t1 + 4*src[stride*0] + 2*t2 + 4*src[stride*2] - src[stride*3] + 4)>>3);
01727 t1= src[stride*4];
01728 src[stride*3]= CLIP((-t2 + 4*src[stride*2] + 2*t1 + 4*src[stride*4] - src[stride*5] + 4)>>3);
01729 t2= src[stride*6];
01730 src[stride*5]= CLIP((-t1 + 4*src[stride*4] + 2*t2 + 4*src[stride*6] - src[stride*7] + 4)>>3);
01731 t1= src[stride*8];
01732 src[stride*7]= CLIP((-t2 + 4*src[stride*6] + 2*t1 + 4*src[stride*8] - src[stride*9] + 4)>>3);
01733 tmp[x]= t1;
01734
01735 src++;
01736 }
01737 #endif
01738 }
01739
01747 static inline void RENAME(deInterlaceL5)(uint8_t src[], int stride, uint8_t *tmp, uint8_t *tmp2)
01748 {
01749 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
01750 src+= stride*4;
01751 asm volatile(
01752 "lea (%0, %1), %%"REG_a" \n\t"
01753 "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t"
01754 "pxor %%mm7, %%mm7 \n\t"
01755 "movq (%2), %%mm0 \n\t"
01756 "movq (%3), %%mm1 \n\t"
01757
01758
01759
01760 #define REAL_DEINT_L5(t1,t2,a,b,c)\
01761 "movq " #a ", %%mm2 \n\t"\
01762 "movq " #b ", %%mm3 \n\t"\
01763 "movq " #c ", %%mm4 \n\t"\
01764 PAVGB(t2, %%mm3) \
01765 PAVGB(t1, %%mm4) \
01766 "movq %%mm2, %%mm5 \n\t"\
01767 "movq %%mm2, " #t1 " \n\t"\
01768 "punpcklbw %%mm7, %%mm2 \n\t"\
01769 "punpckhbw %%mm7, %%mm5 \n\t"\
01770 "movq %%mm2, %%mm6 \n\t"\
01771 "paddw %%mm2, %%mm2 \n\t"\
01772 "paddw %%mm6, %%mm2 \n\t"\
01773 "movq %%mm5, %%mm6 \n\t"\
01774 "paddw %%mm5, %%mm5 \n\t"\
01775 "paddw %%mm6, %%mm5 \n\t"\
01776 "movq %%mm3, %%mm6 \n\t"\
01777 "punpcklbw %%mm7, %%mm3 \n\t"\
01778 "punpckhbw %%mm7, %%mm6 \n\t"\
01779 "paddw %%mm3, %%mm3 \n\t"\
01780 "paddw %%mm6, %%mm6 \n\t"\
01781 "paddw %%mm3, %%mm2 \n\t"\
01782 "paddw %%mm6, %%mm5 \n\t"\
01783 "movq %%mm4, %%mm6 \n\t"\
01784 "punpcklbw %%mm7, %%mm4 \n\t"\
01785 "punpckhbw %%mm7, %%mm6 \n\t"\
01786 "psubw %%mm4, %%mm2 \n\t"\
01787 "psubw %%mm6, %%mm5 \n\t"\
01788 "psraw $2, %%mm2 \n\t"\
01789 "psraw $2, %%mm5 \n\t"\
01790 "packuswb %%mm5, %%mm2 \n\t"\
01791 "movq %%mm2, " #a " \n\t"\
01792
01793 #define DEINT_L5(t1,t2,a,b,c) REAL_DEINT_L5(t1,t2,a,b,c)
01794
01795 DEINT_L5(%%mm0, %%mm1, (%0) , (%%REGa) , (%%REGa, %1) )
01796 DEINT_L5(%%mm1, %%mm0, (%%REGa) , (%%REGa, %1) , (%%REGa, %1, 2))
01797 DEINT_L5(%%mm0, %%mm1, (%%REGa, %1) , (%%REGa, %1, 2), (%0, %1, 4) )
01798 DEINT_L5(%%mm1, %%mm0, (%%REGa, %1, 2), (%0, %1, 4) , (%%REGd) )
01799 DEINT_L5(%%mm0, %%mm1, (%0, %1, 4) , (%%REGd) , (%%REGd, %1) )
01800 DEINT_L5(%%mm1, %%mm0, (%%REGd) , (%%REGd, %1) , (%%REGd, %1, 2))
01801 DEINT_L5(%%mm0, %%mm1, (%%REGd, %1) , (%%REGd, %1, 2), (%0, %1, 8) )
01802 DEINT_L5(%%mm1, %%mm0, (%%REGd, %1, 2), (%0, %1, 8) , (%%REGd, %1, 4))
01803
01804 "movq %%mm0, (%2) \n\t"
01805 "movq %%mm1, (%3) \n\t"
01806 : : "r" (src), "r" ((long)stride), "r"(tmp), "r"(tmp2)
01807 : "%"REG_a, "%"REG_d
01808 );
01809 #else
01810 int x;
01811 src+= stride*4;
01812 for(x=0; x<8; x++)
01813 {
01814 int t1= tmp[x];
01815 int t2= tmp2[x];
01816 int t3= src[0];
01817
01818 src[stride*0]= CLIP((-(t1 + src[stride*2]) + 2*(t2 + src[stride*1]) + 6*t3 + 4)>>3);
01819 t1= src[stride*1];
01820 src[stride*1]= CLIP((-(t2 + src[stride*3]) + 2*(t3 + src[stride*2]) + 6*t1 + 4)>>3);
01821 t2= src[stride*2];
01822 src[stride*2]= CLIP((-(t3 + src[stride*4]) + 2*(t1 + src[stride*3]) + 6*t2 + 4)>>3);
01823 t3= src[stride*3];
01824 src[stride*3]= CLIP((-(t1 + src[stride*5]) + 2*(t2 + src[stride*4]) + 6*t3 + 4)>>3);
01825 t1= src[stride*4];
01826 src[stride*4]= CLIP((-(t2 + src[stride*6]) + 2*(t3 + src[stride*5]) + 6*t1 + 4)>>3);
01827 t2= src[stride*5];
01828 src[stride*5]= CLIP((-(t3 + src[stride*7]) + 2*(t1 + src[stride*6]) + 6*t2 + 4)>>3);
01829 t3= src[stride*6];
01830 src[stride*6]= CLIP((-(t1 + src[stride*8]) + 2*(t2 + src[stride*7]) + 6*t3 + 4)>>3);
01831 t1= src[stride*7];
01832 src[stride*7]= CLIP((-(t2 + src[stride*9]) + 2*(t3 + src[stride*8]) + 6*t1 + 4)>>3);
01833
01834 tmp[x]= t3;
01835 tmp2[x]= t1;
01836
01837 src++;
01838 }
01839 #endif
01840 }
01841
01849 static inline void RENAME(deInterlaceBlendLinear)(uint8_t src[], int stride, uint8_t *tmp)
01850 {
01851 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
01852 src+= 4*stride;
01853 asm volatile(
01854 "lea (%0, %1), %%"REG_a" \n\t"
01855 "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t"
01856
01857
01858
01859 "movq (%2), %%mm0 \n\t"
01860 "movq (%%"REG_a"), %%mm1 \n\t"
01861 PAVGB(%%mm1, %%mm0)
01862 "movq (%0), %%mm2 \n\t"
01863 PAVGB(%%mm2, %%mm0)
01864 "movq %%mm0, (%0) \n\t"
01865 "movq (%%"REG_a", %1), %%mm0 \n\t"
01866 PAVGB(%%mm0, %%mm2)
01867 PAVGB(%%mm1, %%mm2)
01868 "movq %%mm2, (%%"REG_a") \n\t"
01869 "movq (%%"REG_a", %1, 2), %%mm2 \n\t"
01870 PAVGB(%%mm2, %%mm1)
01871 PAVGB(%%mm0, %%mm1)
01872 "movq %%mm1, (%%"REG_a", %1) \n\t"
01873 "movq (%0, %1, 4), %%mm1 \n\t"
01874 PAVGB(%%mm1, %%mm0)
01875 PAVGB(%%mm2, %%mm0)
01876 "movq %%mm0, (%%"REG_a", %1, 2) \n\t"
01877 "movq (%%"REG_d"), %%mm0 \n\t"
01878 PAVGB(%%mm0, %%mm2)
01879 PAVGB(%%mm1, %%mm2)
01880 "movq %%mm2, (%0, %1, 4) \n\t"
01881 "movq (%%"REG_d", %1), %%mm2 \n\t"
01882 PAVGB(%%mm2, %%mm1)
01883 PAVGB(%%mm0, %%mm1)
01884 "movq %%mm1, (%%"REG_d") \n\t"
01885 "movq (%%"REG_d", %1, 2), %%mm1 \n\t"
01886 PAVGB(%%mm1, %%mm0)
01887 PAVGB(%%mm2, %%mm0)
01888 "movq %%mm0, (%%"REG_d", %1) \n\t"
01889 "movq (%0, %1, 8), %%mm0 \n\t"
01890 PAVGB(%%mm0, %%mm2)
01891 PAVGB(%%mm1, %%mm2)
01892 "movq %%mm2, (%%"REG_d", %1, 2) \n\t"
01893 "movq %%mm1, (%2) \n\t"
01894
01895 : : "r" (src), "r" ((long)stride), "r" (tmp)
01896 : "%"REG_a, "%"REG_d
01897 );
01898 #else
01899 int a, b, c, x;
01900 src+= 4*stride;
01901
01902 for(x=0; x<2; x++){
01903 a= *(uint32_t*)&tmp[stride*0];
01904 b= *(uint32_t*)&src[stride*0];
01905 c= *(uint32_t*)&src[stride*1];
01906 a= (a&c) + (((a^c)&0xFEFEFEFEUL)>>1);
01907 *(uint32_t*)&src[stride*0]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);
01908
01909 a= *(uint32_t*)&src[stride*2];
01910 b= (a&b) + (((a^b)&0xFEFEFEFEUL)>>1);
01911 *(uint32_t*)&src[stride*1]= (c|b) - (((c^b)&0xFEFEFEFEUL)>>1);
01912
01913 b= *(uint32_t*)&src[stride*3];
01914 c= (b&c) + (((b^c)&0xFEFEFEFEUL)>>1);
01915 *(uint32_t*)&src[stride*2]= (c|a) - (((c^a)&0xFEFEFEFEUL)>>1);
01916
01917 c= *(uint32_t*)&src[stride*4];
01918 a= (a&c) + (((a^c)&0xFEFEFEFEUL)>>1);
01919 *(uint32_t*)&src[stride*3]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);
01920
01921 a= *(uint32_t*)&src[stride*5];
01922 b= (a&b) + (((a^b)&0xFEFEFEFEUL)>>1);
01923 *(uint32_t*)&src[stride*4]= (c|b) - (((c^b)&0xFEFEFEFEUL)>>1);
01924
01925 b= *(uint32_t*)&src[stride*6];
01926 c= (b&c) + (((b^c)&0xFEFEFEFEUL)>>1);
01927 *(uint32_t*)&src[stride*5]= (c|a) - (((c^a)&0xFEFEFEFEUL)>>1);
01928
01929 c= *(uint32_t*)&src[stride*7];
01930 a= (a&c) + (((a^c)&0xFEFEFEFEUL)>>1);
01931 *(uint32_t*)&src[stride*6]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);
01932
01933 a= *(uint32_t*)&src[stride*8];
01934 b= (a&b) + (((a^b)&0xFEFEFEFEUL)>>1);
01935 *(uint32_t*)&src[stride*7]= (c|b) - (((c^b)&0xFEFEFEFEUL)>>1);
01936
01937 *(uint32_t*)&tmp[stride*0]= c;
01938 src += 4;
01939 tmp += 4;
01940 }
01941 #endif
01942 }
01943
01950 static inline void RENAME(deInterlaceMedian)(uint8_t src[], int stride)
01951 {
01952 #ifdef HAVE_MMX
01953 src+= 4*stride;
01954 #ifdef HAVE_MMX2
01955 asm volatile(
01956 "lea (%0, %1), %%"REG_a" \n\t"
01957 "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t"
01958
01959
01960
01961 "movq (%0), %%mm0 \n\t"
01962 "movq (%%"REG_a", %1), %%mm2 \n\t"
01963 "movq (%%"REG_a"), %%mm1 \n\t"
01964 "movq %%mm0, %%mm3 \n\t"
01965 "pmaxub %%mm1, %%mm0 \n\t"
01966 "pminub %%mm3, %%mm1 \n\t"
01967 "pmaxub %%mm2, %%mm1 \n\t"
01968 "pminub %%mm1, %%mm0 \n\t"
01969 "movq %%mm0, (%%"REG_a") \n\t"
01970
01971 "movq (%0, %1, 4), %%mm0 \n\t"
01972 "movq (%%"REG_a", %1, 2), %%mm1 \n\t"
01973 "movq %%mm2, %%mm3 \n\t"
01974 "pmaxub %%mm1, %%mm2 \n\t"
01975 "pminub %%mm3, %%mm1 \n\t"
01976 "pmaxub %%mm0, %%mm1 \n\t"
01977 "pminub %%mm1, %%mm2 \n\t"
01978 "movq %%mm2, (%%"REG_a", %1, 2) \n\t"
01979
01980 "movq (%%"REG_d"), %%mm2 \n\t"
01981 "movq (%%"REG_d", %1), %%mm1 \n\t"
01982 "movq %%mm2, %%mm3 \n\t"
01983 "pmaxub %%mm0, %%mm2 \n\t"
01984 "pminub %%mm3, %%mm0 \n\t"
01985 "pmaxub %%mm1, %%mm0 \n\t"
01986 "pminub %%mm0, %%mm2 \n\t"
01987 "movq %%mm2, (%%"REG_d") \n\t"
01988
01989 "movq (%%"REG_d", %1, 2), %%mm2 \n\t"
01990 "movq (%0, %1, 8), %%mm0 \n\t"
01991 "movq %%mm2, %%mm3 \n\t"
01992 "pmaxub %%mm0, %%mm2 \n\t"
01993 "pminub %%mm3, %%mm0 \n\t"
01994 "pmaxub %%mm1, %%mm0 \n\t"
01995 "pminub %%mm0, %%mm2 \n\t"
01996 "movq %%mm2, (%%"REG_d", %1, 2) \n\t"
01997
01998
01999 : : "r" (src), "r" ((long)stride)
02000 : "%"REG_a, "%"REG_d
02001 );
02002
02003 #else // MMX without MMX2
02004 asm volatile(
02005 "lea (%0, %1), %%"REG_a" \n\t"
02006 "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t"
02007
02008
02009 "pxor %%mm7, %%mm7 \n\t"
02010
02011 #define REAL_MEDIAN(a,b,c)\
02012 "movq " #a ", %%mm0 \n\t"\
02013 "movq " #b ", %%mm2 \n\t"\
02014 "movq " #c ", %%mm1 \n\t"\
02015 "movq %%mm0, %%mm3 \n\t"\
02016 "movq %%mm1, %%mm4 \n\t"\
02017 "movq %%mm2, %%mm5 \n\t"\
02018 "psubusb %%mm1, %%mm3 \n\t"\
02019 "psubusb %%mm2, %%mm4 \n\t"\
02020 "psubusb %%mm0, %%mm5 \n\t"\
02021 "pcmpeqb %%mm7, %%mm3 \n\t"\
02022 "pcmpeqb %%mm7, %%mm4 \n\t"\
02023 "pcmpeqb %%mm7, %%mm5 \n\t"\
02024 "movq %%mm3, %%mm6 \n\t"\
02025 "pxor %%mm4, %%mm3 \n\t"\
02026 "pxor %%mm5, %%mm4 \n\t"\
02027 "pxor %%mm6, %%mm5 \n\t"\
02028 "por %%mm3, %%mm1 \n\t"\
02029 "por %%mm4, %%mm2 \n\t"\
02030 "por %%mm5, %%mm0 \n\t"\
02031 "pand %%mm2, %%mm0 \n\t"\
02032 "pand %%mm1, %%mm0 \n\t"\
02033 "movq %%mm0, " #b " \n\t"
02034 #define MEDIAN(a,b,c) REAL_MEDIAN(a,b,c)
02035
02036 MEDIAN((%0), (%%REGa), (%%REGa, %1))
02037 MEDIAN((%%REGa, %1), (%%REGa, %1, 2), (%0, %1, 4))
02038 MEDIAN((%0, %1, 4), (%%REGd), (%%REGd, %1))
02039 MEDIAN((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8))
02040
02041 : : "r" (src), "r" ((long)stride)
02042 : "%"REG_a, "%"REG_d
02043 );
02044 #endif // MMX
02045 #else
02046 int x, y;
02047 src+= 4*stride;
02048
02049 for(x=0; x<8; x++)
02050 {
02051 uint8_t *colsrc = src;
02052 for (y=0; y<4; y++)
02053 {
02054 int a, b, c, d, e, f;
02055 a = colsrc[0 ];
02056 b = colsrc[stride ];
02057 c = colsrc[stride*2];
02058 d = (a-b)>>31;
02059 e = (b-c)>>31;
02060 f = (c-a)>>31;
02061 colsrc[stride ] = (a|(d^f)) & (b|(d^e)) & (c|(e^f));
02062 colsrc += stride*2;
02063 }
02064 src++;
02065 }
02066 #endif
02067 }
02068
02069 #ifdef HAVE_MMX
02070
02073 static inline void RENAME(transpose1)(uint8_t *dst1, uint8_t *dst2, uint8_t *src, int srcStride)
02074 {
02075 asm(
02076 "lea (%0, %1), %%"REG_a" \n\t"
02077
02078
02079 "movq (%0), %%mm0 \n\t"
02080 "movq (%%"REG_a"), %%mm1 \n\t"
02081 "movq %%mm0, %%mm2 \n\t"
02082 "punpcklbw %%mm1, %%mm0 \n\t"
02083 "punpckhbw %%mm1, %%mm2 \n\t"
02084
02085 "movq (%%"REG_a", %1), %%mm1 \n\t"
02086 "movq (%%"REG_a", %1, 2), %%mm3 \n\t"
02087 "movq %%mm1, %%mm4 \n\t"
02088 "punpcklbw %%mm3, %%mm1 \n\t"
02089 "punpckhbw %%mm3, %%mm4 \n\t"
02090
02091 "movq %%mm0, %%mm3 \n\t"
02092 "punpcklwd %%mm1, %%mm0 \n\t"
02093 "punpckhwd %%mm1, %%mm3 \n\t"
02094 "movq %%mm2, %%mm1 \n\t"
02095 "punpcklwd %%mm4, %%mm2 \n\t"
02096 "punpckhwd %%mm4, %%mm1 \n\t"
02097
02098 "movd %%mm0, 128(%2) \n\t"
02099 "psrlq $32, %%mm0 \n\t"
02100 "movd %%mm0, 144(%2) \n\t"
02101 "movd %%mm3, 160(%2) \n\t"
02102 "psrlq $32, %%mm3 \n\t"
02103 "movd %%mm3, 176(%2) \n\t"
02104 "movd %%mm3, 48(%3) \n\t"
02105 "movd %%mm2, 192(%2) \n\t"
02106 "movd %%mm2, 64(%3) \n\t"
02107 "psrlq $32, %%mm2 \n\t"
02108 "movd %%mm2, 80(%3) \n\t"
02109 "movd %%mm1, 96(%3) \n\t"
02110 "psrlq $32, %%mm1 \n\t"
02111 "movd %%mm1, 112(%3) \n\t"
02112
02113 "lea (%%"REG_a", %1, 4), %%"REG_a" \n\t"
02114
02115 "movq (%0, %1, 4), %%mm0 \n\t"
02116 "movq (%%"REG_a"), %%mm1 \n\t"
02117 "movq %%mm0, %%mm2 \n\t"
02118 "punpcklbw %%mm1, %%mm0 \n\t"
02119 "punpckhbw %%mm1, %%mm2 \n\t"
02120
02121 "movq (%%"REG_a", %1), %%mm1 \n\t"
02122 "movq (%%"REG_a", %1, 2), %%mm3 \n\t"
02123 "movq %%mm1, %%mm4 \n\t"
02124 "punpcklbw %%mm3, %%mm1 \n\t"
02125 "punpckhbw %%mm3, %%mm4 \n\t"
02126
02127 "movq %%mm0, %%mm3 \n\t"
02128 "punpcklwd %%mm1, %%mm0 \n\t"
02129 "punpckhwd %%mm1, %%mm3 \n\t"
02130 "movq %%mm2, %%mm1 \n\t"
02131 "punpcklwd %%mm4, %%mm2 \n\t"
02132 "punpckhwd %%mm4, %%mm1 \n\t"
02133
02134 "movd %%mm0, 132(%2) \n\t"
02135 "psrlq $32, %%mm0 \n\t"
02136 "movd %%mm0, 148(%2) \n\t"
02137 "movd %%mm3, 164(%2) \n\t"
02138 "psrlq $32, %%mm3 \n\t"
02139 "movd %%mm3, 180(%2) \n\t"
02140 "movd %%mm3, 52(%3) \n\t"
02141 "movd %%mm2, 196(%2) \n\t"
02142 "movd %%mm2, 68(%3) \n\t"
02143 "psrlq $32, %%mm2 \n\t"
02144 "movd %%mm2, 84(%3) \n\t"
02145 "movd %%mm1, 100(%3) \n\t"
02146 "psrlq $32, %%mm1 \n\t"
02147 "movd %%mm1, 116(%3) \n\t"
02148
02149
02150 :: "r" (src), "r" ((long)srcStride), "r" (dst1), "r" (dst2)
02151 : "%"REG_a
02152 );
02153 }
02154
02158 static inline void RENAME(transpose2)(uint8_t *dst, int dstStride, uint8_t *src)
02159 {
02160 asm(
02161 "lea (%0, %1), %%"REG_a" \n\t"
02162 "lea (%%"REG_a",%1,4), %%"REG_d"\n\t"
02163
02164
02165 "movq (%2), %%mm0 \n\t"
02166 "movq 16(%2), %%mm1 \n\t"
02167 "movq %%mm0, %%mm2 \n\t"
02168 "punpcklbw %%mm1, %%mm0 \n\t"
02169 "punpckhbw %%mm1, %%mm2 \n\t"
02170
02171 "movq 32(%2), %%mm1 \n\t"
02172 "movq 48(%2), %%mm3 \n\t"
02173 "movq %%mm1, %%mm4 \n\t"
02174 "punpcklbw %%mm3, %%mm1 \n\t"
02175 "punpckhbw %%mm3, %%mm4 \n\t"
02176
02177 "movq %%mm0, %%mm3 \n\t"
02178 "punpcklwd %%mm1, %%mm0 \n\t"
02179 "punpckhwd %%mm1, %%mm3 \n\t"
02180 "movq %%mm2, %%mm1 \n\t"
02181 "punpcklwd %%mm4, %%mm2 \n\t"
02182 "punpckhwd %%mm4, %%mm1 \n\t"
02183
02184 "movd %%mm0, (%0) \n\t"
02185 "psrlq $32, %%mm0 \n\t"
02186 "movd %%mm0, (%%"REG_a") \n\t"
02187 "movd %%mm3, (%%"REG_a", %1) \n\t"
02188 "psrlq $32, %%mm3 \n\t"
02189 "movd %%mm3, (%%"REG_a", %1, 2) \n\t"
02190 "movd %%mm2, (%0, %1, 4) \n\t"
02191 "psrlq $32, %%mm2 \n\t"
02192 "movd %%mm2, (%%"REG_d") \n\t"
02193 "movd %%mm1, (%%"REG_d", %1) \n\t"
02194 "psrlq $32, %%mm1 \n\t"
02195 "movd %%mm1, (%%"REG_d", %1, 2) \n\t"
02196
02197
02198 "movq 64(%2), %%mm0 \n\t"
02199 "movq 80(%2), %%mm1 \n\t"
02200 "movq %%mm0, %%mm2 \n\t"
02201 "punpcklbw %%mm1, %%mm0 \n\t"
02202 "punpckhbw %%mm1, %%mm2 \n\t"
02203
02204 "movq 96(%2), %%mm1 \n\t"
02205 "movq 112(%2), %%mm3 \n\t"
02206 "movq %%mm1, %%mm4 \n\t"
02207 "punpcklbw %%mm3, %%mm1 \n\t"
02208 "punpckhbw %%mm3, %%mm4 \n\t"
02209
02210 "movq %%mm0, %%mm3 \n\t"
02211 "punpcklwd %%mm1, %%mm0 \n\t"
02212 "punpckhwd %%mm1, %%mm3 \n\t"
02213 "movq %%mm2, %%mm1 \n\t"
02214 "punpcklwd %%mm4, %%mm2 \n\t"
02215 "punpckhwd %%mm4, %%mm1 \n\t"
02216
02217 "movd %%mm0, 4(%0) \n\t"
02218 "psrlq $32, %%mm0 \n\t"
02219 "movd %%mm0, 4(%%"REG_a") \n\t"
02220 "movd %%mm3, 4(%%"REG_a", %1) \n\t"
02221 "psrlq $32, %%mm3 \n\t"
02222 "movd %%mm3, 4(%%"REG_a", %1, 2) \n\t"
02223 "movd %%mm2, 4(%0, %1, 4) \n\t"
02224 "psrlq $32, %%mm2 \n\t"
02225 "movd %%mm2, 4(%%"REG_d") \n\t"
02226 "movd %%mm1, 4(%%"REG_d", %1) \n\t"
02227 "psrlq $32, %%mm1 \n\t"
02228 "movd %%mm1, 4(%%"REG_d", %1, 2) \n\t"
02229
02230 :: "r" (dst), "r" ((long)dstStride), "r" (src)
02231 : "%"REG_a, "%"REG_d
02232 );
02233 }
02234 #endif
02235
02236
02237 #ifndef HAVE_ALTIVEC
02238 static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride,
02239 uint8_t *tempBlured, uint32_t *tempBluredPast, int *maxNoise)
02240 {
02241
02242 tempBluredPast[127]= maxNoise[0];
02243 tempBluredPast[128]= maxNoise[1];
02244 tempBluredPast[129]= maxNoise[2];
02245
02246 #define FAST_L2_DIFF
02247
02248 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
02249 asm volatile(
02250 "lea (%2, %2, 2), %%"REG_a" \n\t"
02251 "lea (%2, %2, 4), %%"REG_d" \n\t"
02252 "lea (%%"REG_d", %2, 2), %%"REG_c" \n\t"
02253
02254
02255
02256 #ifdef L1_DIFF //needs mmx2
02257 "movq (%0), %%mm0 \n\t"
02258 "psadbw (%1), %%mm0 \n\t"
02259 "movq (%0, %2), %%mm1 \n\t"
02260 "psadbw (%1, %2), %%mm1 \n\t"
02261 "movq (%0, %2, 2), %%mm2 \n\t"
02262 "psadbw (%1, %2, 2), %%mm2 \n\t"
02263 "movq (%0, %%"REG_a"), %%mm3 \n\t"
02264 "psadbw (%1, %%"REG_a"), %%mm3 \n\t"
02265
02266 "movq (%0, %2, 4), %%mm4 \n\t"
02267 "paddw %%mm1, %%mm0 \n\t"
02268 "psadbw (%1, %2, 4), %%mm4 \n\t"
02269 "movq (%0, %%"REG_d"), %%mm5 \n\t"
02270 "paddw %%mm2, %%mm0 \n\t"
02271 "psadbw (%1, %%"REG_d"), %%mm5 \n\t"
02272 "movq (%0, %%"REG_a", 2), %%mm6 \n\t"
02273 "paddw %%mm3, %%mm0 \n\t"
02274 "psadbw (%1, %%"REG_a", 2), %%mm6 \n\t"
02275 "movq (%0, %%"REG_c"), %%mm7 \n\t"
02276 "paddw %%mm4, %%mm0 \n\t"
02277 "psadbw (%1, %%"REG_c"), %%mm7 \n\t"
02278 "paddw %%mm5, %%mm6 \n\t"
02279 "paddw %%mm7, %%mm6 \n\t"
02280 "paddw %%mm6, %%mm0 \n\t"
02281 #else
02282 #if defined (FAST_L2_DIFF)
02283 "pcmpeqb %%mm7, %%mm7 \n\t"
02284 "movq "MANGLE(b80)", %%mm6 \n\t"
02285 "pxor %%mm0, %%mm0 \n\t"
02286 #define REAL_L2_DIFF_CORE(a, b)\
02287 "movq " #a ", %%mm5 \n\t"\
02288 "movq " #b ", %%mm2 \n\t"\
02289 "pxor %%mm7, %%mm2 \n\t"\
02290 PAVGB(%%mm2, %%mm5)\
02291 "paddb %%mm6, %%mm5 \n\t"\
02292 "movq %%mm5, %%mm2 \n\t"\
02293 "psllw $8, %%mm5 \n\t"\
02294 "pmaddwd %%mm5, %%mm5 \n\t"\
02295 "pmaddwd %%mm2, %%mm2 \n\t"\
02296 "paddd %%mm2, %%mm5 \n\t"\
02297 "psrld $14, %%mm5 \n\t"\
02298 "paddd %%mm5, %%mm0 \n\t"
02299
02300 #else
02301 "pxor %%mm7, %%mm7 \n\t"
02302 "pxor %%mm0, %%mm0 \n\t"
02303 #define REAL_L2_DIFF_CORE(a, b)\
02304 "movq " #a ", %%mm5 \n\t"\
02305 "movq " #b ", %%mm2 \n\t"\
02306 "movq %%mm5, %%mm1 \n\t"\
02307 "movq %%mm2, %%mm3 \n\t"\
02308 "punpcklbw %%mm7, %%mm5 \n\t"\
02309 "punpckhbw %%mm7, %%mm1 \n\t"\
02310 "punpcklbw %%mm7, %%mm2 \n\t"\
02311 "punpckhbw %%mm7, %%mm3 \n\t"\
02312 "psubw %%mm2, %%mm5 \n\t"\
02313 "psubw %%mm3, %%mm1 \n\t"\
02314 "pmaddwd %%mm5, %%mm5 \n\t"\
02315 "pmaddwd %%mm1, %%mm1 \n\t"\
02316 "paddd %%mm1, %%mm5 \n\t"\
02317 "paddd %%mm5, %%mm0 \n\t"
02318
02319 #endif
02320
02321 #define L2_DIFF_CORE(a, b) REAL_L2_DIFF_CORE(a, b)
02322
02323 L2_DIFF_CORE((%0), (%1))
02324 L2_DIFF_CORE((%0, %2), (%1, %2))
02325 L2_DIFF_CORE((%0, %2, 2), (%1, %2, 2))
02326 L2_DIFF_CORE((%0, %%REGa), (%1, %%REGa))
02327 L2_DIFF_CORE((%0, %2, 4), (%1, %2, 4))
02328 L2_DIFF_CORE((%0, %%REGd), (%1, %%REGd))
02329 L2_DIFF_CORE((%0, %%REGa,2), (%1, %%REGa,2))
02330 L2_DIFF_CORE((%0, %%REGc), (%1, %%REGc))
02331
02332 #endif
02333
02334 "movq %%mm0, %%mm4 \n\t"
02335 "psrlq $32, %%mm0 \n\t"
02336 "paddd %%mm0, %%mm4 \n\t"
02337 "movd %%mm4, %%ecx \n\t"
02338 "shll $2, %%ecx \n\t"
02339 "mov %3, %%"REG_d" \n\t"
02340 "addl -4(%%"REG_d"), %%ecx \n\t"
02341 "addl 4(%%"REG_d"), %%ecx \n\t"
02342 "addl -1024(%%"REG_d"), %%ecx \n\t"
02343 "addl $4, %%ecx \n\t"
02344 "addl 1024(%%"REG_d"), %%ecx \n\t"
02345 "shrl $3, %%ecx \n\t"
02346 "movl %%ecx, (%%"REG_d") \n\t"
02347
02348
02349
02350
02351 "cmpl 512(%%"REG_d"), %%ecx \n\t"
02352 " jb 2f \n\t"
02353 "cmpl 516(%%"REG_d"), %%ecx \n\t"
02354 " jb 1f \n\t"
02355
02356 "lea (%%"REG_a", %2, 2), %%"REG_d" \n\t"
02357 "lea (%%"REG_d", %2, 2), %%"REG_c" \n\t"
02358 "movq (%0), %%mm0 \n\t"
02359 "movq (%0, %2), %%mm1 \n\t"
02360 "movq (%0, %2, 2), %%mm2 \n\t"
02361 "movq (%0, %%"REG_a"), %%mm3 \n\t"
02362 "movq (%0, %2, 4), %%mm4 \n\t"
02363 "movq (%0, %%"REG_d"), %%mm5 \n\t"
02364 "movq (%0, %%"REG_a", 2), %%mm6 \n\t"
02365 "movq (%0, %%"REG_c"), %%mm7 \n\t"
02366 "movq %%mm0, (%1) \n\t"
02367 "movq %%mm1, (%1, %2) \n\t"
02368 "movq %%mm2, (%1, %2, 2) \n\t"
02369 "movq %%mm3, (%1, %%"REG_a") \n\t"
02370 "movq %%mm4, (%1, %2, 4) \n\t"
02371 "movq %%mm5, (%1, %%"REG_d") \n\t"
02372 "movq %%mm6, (%1, %%"REG_a", 2) \n\t"
02373 "movq %%mm7, (%1, %%"REG_c") \n\t"
02374 "jmp 4f \n\t"
02375
02376 "1: \n\t"
02377 "lea (%%"REG_a", %2, 2), %%"REG_d" \n\t"
02378 "lea (%%"REG_d", %2, 2), %%"REG_c" \n\t"
02379 "movq (%0), %%mm0 \n\t"
02380 PAVGB((%1), %%mm0)
02381 "movq (%0, %2), %%mm1 \n\t"
02382 PAVGB((%1, %2), %%mm1)
02383 "movq (%0, %2, 2), %%mm2 \n\t"
02384 PAVGB((%1, %2, 2), %%mm2)
02385 "movq (%0, %%"REG_a"), %%mm3 \n\t"
02386 PAVGB((%1, %%REGa), %%mm3)
02387 "movq (%0, %2, 4), %%mm4 \n\t"
02388 PAVGB((%1, %2, 4), %%mm4)
02389 "movq (%0, %%"REG_d"), %%mm5 \n\t"
02390 PAVGB((%1, %%REGd), %%mm5)
02391 "movq (%0, %%"REG_a", 2), %%mm6 \n\t"
02392 PAVGB((%1, %%REGa, 2), %%mm6)
02393 "movq (%0, %%"REG_c"), %%mm7 \n\t"
02394 PAVGB((%1, %%REGc), %%mm7)
02395 "movq %%mm0, (%1) \n\t"
02396 "movq %%mm1, (%1, %2) \n\t"
02397 "movq %%mm2, (%1, %2, 2) \n\t"
02398 "movq %%mm3, (%1, %%"REG_a") \n\t"
02399 "movq %%mm4, (%1, %2, 4) \n\t"
02400 "movq %%mm5, (%1, %%"REG_d") \n\t"
02401 "movq %%mm6, (%1, %%"REG_a", 2) \n\t"
02402 "movq %%mm7, (%1, %%"REG_c") \n\t"
02403 "movq %%mm0, (%0) \n\t"
02404 "movq %%mm1, (%0, %2) \n\t"
02405 "movq %%mm2, (%0, %2, 2) \n\t"
02406 "movq %%mm3, (%0, %%"REG_a") \n\t"
02407 "movq %%mm4, (%0, %2, 4) \n\t"
02408 "movq %%mm5, (%0, %%"REG_d") \n\t"
02409 "movq %%mm6, (%0, %%"REG_a", 2) \n\t"
02410 "movq %%mm7, (%0, %%"REG_c") \n\t"
02411 "jmp 4f \n\t"
02412
02413 "2: \n\t"
02414 "cmpl 508(%%"REG_d"), %%ecx \n\t"
02415 " jb 3f \n\t"
02416
02417 "lea (%%"REG_a", %2, 2), %%"REG_d" \n\t"
02418 "lea (%%"REG_d", %2, 2), %%"REG_c" \n\t"
02419 "movq (%0), %%mm0 \n\t"
02420 "movq (%0, %2), %%mm1 \n\t"
02421 "movq (%0, %2, 2), %%mm2 \n\t"
02422 "movq (%0, %%"REG_a"), %%mm3 \n\t"
02423 "movq (%1), %%mm4 \n\t"
02424 "movq (%1, %2), %%mm5 \n\t"
02425 "movq (%1, %2, 2), %%mm6 \n\t"
02426 "movq (%1, %%"REG_a"), %%mm7 \n\t"
02427 PAVGB(%%mm4, %%mm0)
02428 PAVGB(%%mm5, %%mm1)
02429 PAVGB(%%mm6, %%mm2)
02430 PAVGB(%%mm7, %%mm3)
02431 PAVGB(%%mm4, %%mm0)
02432 PAVGB(%%mm5, %%mm1)
02433 PAVGB(%%mm6, %%mm2)
02434 PAVGB(%%mm7, %%mm3)
02435 "movq %%mm0, (%1) \n\t"
02436 "movq %%mm1, (%1, %2) \n\t"
02437 "movq %%mm2, (%1, %2, 2) \n\t"
02438 "movq %%mm3, (%1, %%"REG_a") \n\t"
02439 "movq %%mm0, (%0) \n\t"
02440 "movq %%mm1, (%0, %2) \n\t"
02441 "movq %%mm2, (%0, %2, 2) \n\t"
02442 "movq %%mm3, (%0, %%"REG_a") \n\t"
02443
02444 "movq (%0, %2, 4), %%mm0 \n\t"
02445 "movq (%0, %%"REG_d"), %%mm1 \n\t"
02446 "movq (%0, %%"REG_a", 2), %%mm2 \n\t"
02447 "movq (%0, %%"REG_c"), %%mm3 \n\t"
02448 "movq (%1, %2, 4), %%mm4 \n\t"
02449 "movq (%1, %%"REG_d"), %%mm5 \n\t"
02450 "movq (%1, %%"REG_a", 2), %%mm6 \n\t"
02451 "movq (%1, %%"REG_c"), %%mm7 \n\t"
02452 PAVGB(%%mm4, %%mm0)
02453 PAVGB(%%mm5, %%mm1)
02454 PAVGB(%%mm6, %%mm2)
02455 PAVGB(%%mm7, %%mm3)
02456 PAVGB(%%mm4, %%mm0)
02457 PAVGB(%%mm5, %%mm1)
02458 PAVGB(%%mm6, %%mm2)
02459 PAVGB(%%mm7, %%mm3)
02460 "movq %%mm0, (%1, %2, 4) \n\t"
02461 "movq %%mm1, (%1, %%"REG_d") \n\t"
02462 "movq %%mm2, (%1, %%"REG_a", 2) \n\t"
02463 "movq %%mm3, (%1, %%"REG_c") \n\t"
02464 "movq %%mm0, (%0, %2, 4) \n\t"
02465 "movq %%mm1, (%0, %%"REG_d") \n\t"
02466 "movq %%mm2, (%0, %%"REG_a", 2) \n\t"
02467 "movq %%mm3, (%0, %%"REG_c") \n\t"
02468 "jmp 4f \n\t"
02469
02470 "3: \n\t"
02471 "lea (%%"REG_a", %2, 2), %%"REG_d" \n\t"
02472 "lea (%%"REG_d", %2, 2), %%"REG_c" \n\t"
02473 "movq (%0), %%mm0 \n\t"
02474 "movq (%0, %2), %%mm1 \n\t"
02475 "movq (%0, %2, 2), %%mm2 \n\t"
02476 "movq (%0, %%"REG_a"), %%mm3 \n\t"
02477 "movq (%1), %%mm4 \n\t"
02478 "movq (%1, %2), %%mm5 \n\t"
02479 "movq (%1, %2, 2), %%mm6 \n\t"
02480 "movq (%1, %%"REG_a"), %%mm7 \n\t"
02481 PAVGB(%%mm4, %%mm0)
02482 PAVGB(%%mm5, %%mm1)
02483 PAVGB(%%mm6, %%mm2)
02484 PAVGB(%%mm7, %%mm3)
02485 PAVGB(%%mm4, %%mm0)
02486 PAVGB(%%mm5, %%mm1)
02487 PAVGB(%%mm6, %%mm2)
02488 PAVGB(%%mm7, %%mm3)
02489 PAVGB(%%mm4, %%mm0)
02490 PAVGB(%%mm5, %%mm1)
02491 PAVGB(%%mm6, %%mm2)
02492 PAVGB(%%mm7, %%mm3)
02493 "movq %%mm0, (%1) \n\t"
02494 "movq %%mm1, (%1, %2) \n\t"
02495 "movq %%mm2, (%1, %2, 2) \n\t"
02496 "movq %%mm3, (%1, %%"REG_a") \n\t"
02497 "movq %%mm0, (%0) \n\t"
02498 "movq %%mm1, (%0, %2) \n\t"
02499 "movq %%mm2, (%0, %2, 2) \n\t"
02500 "movq %%mm3, (%0, %%"REG_a") \n\t"
02501
02502 "movq (%0, %2, 4), %%mm0 \n\t"
02503 "movq (%0, %%"REG_d"), %%mm1 \n\t"
02504 "movq (%0, %%"REG_a", 2), %%mm2 \n\t"
02505 "movq (%0, %%"REG_c"), %%mm3 \n\t"
02506 "movq (%1, %2, 4), %%mm4 \n\t"
02507 "movq (%1, %%"REG_d"), %%mm5 \n\t"
02508 "movq (%1, %%"REG_a", 2), %%mm6 \n\t"
02509 "movq (%1, %%"REG_c"), %%mm7 \n\t"
02510 PAVGB(%%mm4, %%mm0)
02511 PAVGB(%%mm5, %%mm1)
02512 PAVGB(%%mm6, %%mm2)
02513 PAVGB(%%mm7, %%mm3)
02514 PAVGB(%%mm4, %%mm0)
02515 PAVGB(%%mm5, %%mm1)
02516 PAVGB(%%mm6, %%mm2)
02517 PAVGB(%%mm7, %%mm3)
02518 PAVGB(%%mm4, %%mm0)
02519 PAVGB(%%mm5, %%mm1)
02520 PAVGB(%%mm6, %%mm2)
02521 PAVGB(%%mm7, %%mm3)
02522 "movq %%mm0, (%1, %2, 4) \n\t"
02523 "movq %%mm1, (%1, %%"REG_d") \n\t"
02524 "movq %%mm2, (%1, %%"REG_a", 2) \n\t"
02525 "movq %%mm3, (%1, %%"REG_c") \n\t"
02526 "movq %%mm0, (%0, %2, 4) \n\t"
02527 "movq %%mm1, (%0, %%"REG_d") \n\t"
02528 "movq %%mm2, (%0, %%"REG_a", 2) \n\t"
02529 "movq %%mm3, (%0, %%"REG_c") \n\t"
02530
02531 "4: \n\t"
02532
02533 :: "r" (src), "r" (tempBlured), "r"((long)stride), "m" (tempBluredPast)
02534 : "%"REG_a, "%"REG_d, "%"REG_c, "memory"
02535 );
02536
02537 #else
02538 {
02539 int y;
02540 int d=0;
02541
02542 int i;
02543
02544 for(y=0; y<8; y++)
02545 {
02546 int x;
02547 for(x=0; x<8; x++)
02548 {
02549 int ref= tempBlured[ x + y*stride ];
02550 int cur= src[ x + y*stride ];
02551 int d1=ref - cur;
02552
02553
02554
02555 d+= d1*d1;
02556
02557 }
02558 }
02559 i=d;
02560 d= (
02561 4*d
02562 +(*(tempBluredPast-256))
02563 +(*(tempBluredPast-1))+ (*(tempBluredPast+1))
02564 +(*(tempBluredPast+256))
02565 +4)>>3;
02566 *tempBluredPast=i;
02567
02568
02569
02570
02571
02572
02573
02574
02575
02576
02577 if(d > maxNoise[1])
02578 {
02579 if(d < maxNoise[2])
02580 {
02581 for(y=0; y<8; y++)
02582 {
02583 int x;
02584 for(x=0; x<8; x++)
02585 {
02586 int ref= tempBlured[ x + y*stride ];
02587 int cur= src[ x + y*stride ];
02588 tempBlured[ x + y*stride ]=
02589 src[ x + y*stride ]=
02590 (ref + cur + 1)>>1;
02591 }
02592 }
02593 }
02594 else
02595 {
02596 for(y=0; y<8; y++)
02597 {
02598 int x;
02599 for(x=0; x<8; x++)
02600 {
02601 tempBlured[ x + y*stride ]= src[ x + y*stride ];
02602 }
02603 }
02604 }
02605 }
02606 else
02607 {
02608 if(d < maxNoise[0])
02609 {
02610 for(y=0; y<8; y++)
02611 {
02612 int x;
02613 for(x=0; x<8; x++)
02614 {
02615 int ref= tempBlured[ x + y*stride ];
02616 int cur= src[ x + y*stride ];
02617 tempBlured[ x + y*stride ]=
02618 src[ x + y*stride ]=
02619 (ref*7 + cur + 4)>>3;
02620 }
02621 }
02622 }
02623 else
02624 {
02625 for(y=0; y<8; y++)
02626 {
02627 int x;
02628 for(x=0; x<8; x++)
02629 {
02630 int ref= tempBlured[ x + y*stride ];
02631 int cur= src[ x + y*stride ];
02632 tempBlured[ x + y*stride ]=
02633 src[ x + y*stride ]=
02634 (ref*3 + cur + 2)>>2;
02635 }
02636 }
02637 }
02638 }
02639 }
02640 #endif
02641 }
02642 #endif //HAVE_ALTIVEC
02643
02644 #ifdef HAVE_MMX
02645
02648 static always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int stride, PPContext *c){
02649 int64_t dc_mask, eq_mask, both_masks;
02650 int64_t sums[10*8*2];
02651 src+= step*3;
02652
02653 asm volatile(
02654 "movq %0, %%mm7 \n\t"
02655 "movq %1, %%mm6 \n\t"
02656 : : "m" (c->mmxDcOffset[c->nonBQP]), "m" (c->mmxDcThreshold[c->nonBQP])
02657 );
02658
02659 asm volatile(
02660 "lea (%2, %3), %%"REG_a" \n\t"
02661
02662
02663
02664 "movq (%2), %%mm0 \n\t"
02665 "movq (%%"REG_a"), %%mm1 \n\t"
02666 "movq %%mm1, %%mm3 \n\t"
02667 "movq %%mm1, %%mm4 \n\t"
02668 "psubb %%mm1, %%mm0 \n\t"
02669 "paddb %%mm7, %%mm0 \n\t"
02670 "pcmpgtb %%mm6, %%mm0 \n\t"
02671
02672 "movq (%%"REG_a",%3), %%mm2 \n\t"
02673 PMAXUB(%%mm2, %%mm4)
02674 PMINUB(%%mm2, %%mm3, %%mm5)
02675 "psubb %%mm2, %%mm1 \n\t"
02676 "paddb %%mm7, %%mm1 \n\t"
02677 "pcmpgtb %%mm6, %%mm1 \n\t"
02678 "paddb %%mm1, %%mm0 \n\t"
02679
02680 "movq (%%"REG_a", %3, 2), %%mm1 \n\t"
02681 PMAXUB(%%mm1, %%mm4)
02682 PMINUB(%%mm1, %%mm3, %%mm5)
02683 "psubb %%mm1, %%mm2 \n\t"
02684 "paddb %%mm7, %%mm2 \n\t"
02685 "pcmpgtb %%mm6, %%mm2 \n\t"
02686 "paddb %%mm2, %%mm0 \n\t"
02687
02688 "lea (%%"REG_a", %3, 4), %%"REG_a" \n\t"
02689
02690 "movq (%2, %3, 4), %%mm2 \n\t"
02691 PMAXUB(%%mm2, %%mm4)
02692 PMINUB(%%mm2, %%mm3, %%mm5)
02693 "psubb %%mm2, %%mm1 \n\t"
02694 "paddb %%mm7, %%mm1 \n\t"
02695 "pcmpgtb %%mm6, %%mm1 \n\t"
02696 "paddb %%mm1, %%mm0 \n\t"
02697
02698 "movq (%%"REG_a"), %%mm1 \n\t"
02699 PMAXUB(%%mm1, %%mm4)
02700 PMINUB(%%mm1, %%mm3, %%mm5)
02701 "psubb %%mm1, %%mm2 \n\t"
02702 "paddb %%mm7, %%mm2 \n\t"
02703 "pcmpgtb %%mm6, %%mm2 \n\t"
02704 "paddb %%mm2, %%mm0 \n\t"
02705
02706 "movq (%%"REG_a", %3), %%mm2 \n\t"
02707 PMAXUB(%%mm2, %%mm4)
02708 PMINUB(%%mm2, %%mm3, %%mm5)
02709 "psubb %%mm2, %%mm1 \n\t"
02710 "paddb %%mm7, %%mm1 \n\t"
02711 "pcmpgtb %%mm6, %%mm1 \n\t"
02712 "paddb %%mm1, %%mm0 \n\t"
02713
02714 "movq (%%"REG_a", %3, 2), %%mm1 \n\t"
02715 PMAXUB(%%mm1, %%mm4)
02716 PMINUB(%%mm1, %%mm3, %%mm5)
02717 "psubb %%mm1, %%mm2 \n\t"
02718 "paddb %%mm7, %%mm2 \n\t"
02719 "pcmpgtb %%mm6, %%mm2 \n\t"
02720 "paddb %%mm2, %%mm0 \n\t"
02721
02722 "movq (%2, %3, 8), %%mm2 \n\t"
02723 PMAXUB(%%mm2, %%mm4)
02724 PMINUB(%%mm2, %%mm3, %%mm5)
02725 "psubb %%mm2, %%mm1 \n\t"
02726 "paddb %%mm7, %%mm1 \n\t"
02727 "pcmpgtb %%mm6, %%mm1 \n\t"
02728 "paddb %%mm1, %%mm0 \n\t"
02729
02730 "movq (%%"REG_a", %3, 4), %%mm1 \n\t"
02731 "psubb %%mm1, %%mm2 \n\t"
02732 "paddb %%mm7, %%mm2 \n\t"
02733 "pcmpgtb %%mm6, %%mm2 \n\t"
02734 "paddb %%mm2, %%mm0 \n\t"
02735 "psubusb %%mm3, %%mm4 \n\t"
02736
02737 "pxor %%mm6, %%mm6 \n\t"
02738 "movq %4, %%mm7 \n\t"
02739 "paddusb %%mm7, %%mm7 \n\t"
02740 "psubusb %%mm4, %%mm7 \n\t"
02741 "pcmpeqb %%mm6, %%mm7 \n\t"
02742 "pcmpeqb %%mm6, %%mm7 \n\t"
02743 "movq %%mm7, %1 \n\t"
02744
02745 "movq %5, %%mm7 \n\t"
02746 "punpcklbw %%mm7, %%mm7 \n\t"
02747 "punpcklbw %%mm7, %%mm7 \n\t"
02748 "punpcklbw %%mm7, %%mm7 \n\t"
02749 "psubb %%mm0, %%mm6 \n\t"
02750 "pcmpgtb %%mm7, %%mm6 \n\t"
02751 "movq %%mm6, %0 \n\t"
02752
02753 : "=m" (eq_mask), "=m" (dc_mask)
02754 : "r" (src), "r" ((long)step), "m" (c->pQPb), "m"(c->ppMode.flatnessThreshold)
02755 : "%"REG_a
02756 );
02757
02758 both_masks = dc_mask & eq_mask;
02759
02760 if(both_masks){
02761 long offset= -8*step;
02762 int64_t *temp_sums= sums;
02763
02764 asm volatile(
02765 "movq %2, %%mm0 \n\t"
02766 "pxor %%mm4, %%mm4 \n\t"
02767
02768 "movq (%0), %%mm6 \n\t"
02769 "movq (%0, %1), %%mm5 \n\t"
02770 "movq %%mm5, %%mm1 \n\t"
02771 "movq %%mm6, %%mm2 \n\t"
02772 "psubusb %%mm6, %%mm5 \n\t"
02773 "psubusb %%mm1, %%mm2 \n\t"
02774 "por %%mm5, %%mm2 \n\t"
02775 "psubusb %%mm2, %%mm0 \n\t"
02776 "pcmpeqb %%mm4, %%mm0 \n\t"
02777
02778 "pxor %%mm6, %%mm1 \n\t"
02779 "pand %%mm0, %%mm1 \n\t"
02780 "pxor %%mm1, %%mm6 \n\t"
02781
02782
02783 "movq (%0, %1, 8), %%mm5 \n\t"
02784 "add %1, %0 \n\t"
02785 "movq (%0, %1, 8), %%mm7 \n\t"
02786 "movq %%mm5, %%mm1 \n\t"
02787 "movq %%mm7, %%mm2 \n\t"
02788 "psubusb %%mm7, %%mm5 \n\t"
02789 "psubusb %%mm1, %%mm2 \n\t"
02790 "por %%mm5, %%mm2 \n\t"
02791 "movq %2, %%mm0 \n\t"
02792 "psubusb %%mm2, %%mm0 \n\t"
02793 "pcmpeqb %%mm4, %%mm0 \n\t"
02794
02795 "pxor %%mm7, %%mm1 \n\t"
02796 "pand %%mm0, %%mm1 \n\t"
02797 "pxor %%mm1, %%mm7 \n\t"
02798
02799 "movq %%mm6, %%mm5 \n\t"
02800 "punpckhbw %%mm4, %%mm6 \n\t"
02801 "punpcklbw %%mm4, %%mm5 \n\t"
02802
02803
02804 "movq %%mm5, %%mm0 \n\t"
02805 "movq %%mm6, %%mm1 \n\t"
02806 "psllw $2, %%mm0 \n\t"
02807 "psllw $2, %%mm1 \n\t"
02808 "paddw "MANGLE(w04)", %%mm0 \n\t"
02809 "paddw "MANGLE(w04)", %%mm1 \n\t"
02810
02811 #define NEXT\
02812 "movq (%0), %%mm2 \n\t"\
02813 "movq (%0), %%mm3 \n\t"\
02814 "add %1, %0 \n\t"\
02815 "punpcklbw %%mm4, %%mm2 \n\t"\
02816 "punpckhbw %%mm4, %%mm3 \n\t"\
02817 "paddw %%mm2, %%mm0 \n\t"\
02818 "paddw %%mm3, %%mm1 \n\t"
02819
02820 #define PREV\
02821 "movq (%0), %%mm2 \n\t"\
02822 "movq (%0), %%mm3 \n\t"\
02823 "add %1, %0 \n\t"\
02824 "punpcklbw %%mm4, %%mm2 \n\t"\
02825 "punpckhbw %%mm4, %%mm3 \n\t"\
02826 "psubw %%mm2, %%mm0 \n\t"\
02827 "psubw %%mm3, %%mm1 \n\t"
02828
02829
02830 NEXT
02831 NEXT
02832 NEXT
02833 "movq %%mm0, (%3) \n\t"
02834 "movq %%mm1, 8(%3) \n\t"
02835
02836 NEXT
02837 "psubw %%mm5, %%mm0 \n\t"
02838 "psubw %%mm6, %%mm1 \n\t"
02839 "movq %%mm0, 16(%3) \n\t"
02840 "movq %%mm1, 24(%3) \n\t"
02841
02842 NEXT
02843 "psubw %%mm5, %%mm0 \n\t"
02844 "psubw %%mm6, %%mm1 \n\t"
02845 "movq %%mm0, 32(%3) \n\t"
02846 "movq %%mm1, 40(%3) \n\t"
02847
02848 NEXT
02849 "psubw %%mm5, %%mm0 \n\t"
02850 "psubw %%mm6, %%mm1 \n\t"
02851 "movq %%mm0, 48(%3) \n\t"
02852 "movq %%mm1, 56(%3) \n\t"
02853
02854 NEXT
02855 "psubw %%mm5, %%mm0 \n\t"
02856 "psubw %%mm6, %%mm1 \n\t"
02857 "movq %%mm0, 64(%3) \n\t"
02858 "movq %%mm1, 72(%3) \n\t"
02859
02860 "movq %%mm7, %%mm6 \n\t"
02861 "punpckhbw %%mm4, %%mm7 \n\t"
02862 "punpcklbw %%mm4, %%mm6 \n\t"
02863
02864 NEXT
02865 "mov %4, %0 \n\t"
02866 "add %1, %0 \n\t"
02867 PREV
02868 "movq %%mm0, 80(%3) \n\t"
02869 "movq %%mm1, 88(%3) \n\t"
02870
02871 PREV
02872 "paddw %%mm6, %%mm0 \n\t"
02873 "paddw %%mm7, %%mm1 \n\t"
02874 "movq %%mm0, 96(%3) \n\t"
02875 "movq %%mm1, 104(%3) \n\t"
02876
02877 PREV
02878 "paddw %%mm6, %%mm0 \n\t"
02879 "paddw %%mm7, %%mm1 \n\t"
02880 "movq %%mm0, 112(%3) \n\t"
02881 "movq %%mm1, 120(%3) \n\t"
02882
02883 PREV
02884 "paddw %%mm6, %%mm0 \n\t"
02885 "paddw %%mm7, %%mm1 \n\t"
02886 "movq %%mm0, 128(%3) \n\t"
02887 "movq %%mm1, 136(%3) \n\t"
02888
02889 PREV
02890 "paddw %%mm6, %%mm0 \n\t"
02891 "paddw %%mm7, %%mm1 \n\t"
02892 "movq %%mm0, 144(%3) \n\t"
02893 "movq %%mm1, 152(%3) \n\t"
02894
02895 "mov %4, %0 \n\t"
02896
02897 : "+&r"(src)
02898 : "r" ((long)step), "m" (c->pQPb), "r"(sums), "g"(src)
02899 );
02900
02901 src+= step;
02902
02903 asm volatile(
02904 "movq %4, %%mm6 \n\t"
02905 "pcmpeqb %%mm5, %%mm5 \n\t"
02906 "pxor %%mm6, %%mm5 \n\t"
02907 "pxor %%mm7, %%mm7 \n\t"
02908
02909 "1: \n\t"
02910 "movq (%1), %%mm0 \n\t"
02911 "movq 8(%1), %%mm1 \n\t"
02912 "paddw 32(%1), %%mm0 \n\t"
02913 "paddw 40(%1), %%mm1 \n\t"
02914 "movq (%0, %3), %%mm2 \n\t"
02915 "movq %%mm2, %%mm3 \n\t"
02916 "movq %%mm2, %%mm4 \n\t"
02917 "punpcklbw %%mm7, %%mm2 \n\t"
02918 "punpckhbw %%mm7, %%mm3 \n\t"
02919 "paddw %%mm2, %%mm0 \n\t"
02920 "paddw %%mm3, %%mm1 \n\t"
02921 "paddw %%mm2, %%mm0 \n\t"
02922 "paddw %%mm3, %%mm1 \n\t"
02923 "psrlw $4, %%mm0 \n\t"
02924 "psrlw $4, %%mm1 \n\t"
02925 "packuswb %%mm1, %%mm0 \n\t"
02926 "pand %%mm6, %%mm0 \n\t"
02927 "pand %%mm5, %%mm4 \n\t"
02928 "por %%mm4, %%mm0 \n\t"
02929 "movq %%mm0, (%0, %3) \n\t"
02930 "add $16, %1 \n\t"
02931 "add %2, %0 \n\t"
02932 " js 1b \n\t"
02933
02934 : "+r"(offset), "+r"(temp_sums)
02935 : "r" ((long)step), "r"(src - offset), "m"(both_masks)
02936 );
02937 }else
02938 src+= step;
02939
02940 if(eq_mask != -1LL){
02941 uint8_t *temp_src= src;
02942 asm volatile(
02943 "pxor %%mm7, %%mm7 \n\t"
02944 "lea -40(%%"REG_SP"), %%"REG_c" \n\t"
02945 "and "ALIGN_MASK", %%"REG_c" \n\t"
02946
02947
02948
02949 "movq (%0), %%mm0 \n\t"
02950 "movq %%mm0, %%mm1 \n\t"
02951 "punpcklbw %%mm7, %%mm0 \n\t"
02952 "punpckhbw %%mm7, %%mm1 \n\t"
02953
02954 "movq (%0, %1), %%mm2 \n\t"
02955 "lea (%0, %1, 2), %%"REG_a" \n\t"
02956 "movq %%mm2, %%mm3 \n\t"
02957 "punpcklbw %%mm7, %%mm2 \n\t"
02958 "punpckhbw %%mm7, %%mm3 \n\t"
02959
02960 "movq (%%"REG_a"), %%mm4 \n\t"
02961 "movq %%mm4, %%mm5 \n\t"
02962 "punpcklbw %%mm7, %%mm4 \n\t"
02963 "punpckhbw %%mm7, %%mm5 \n\t"
02964
02965 "paddw %%mm0, %%mm0 \n\t"
02966 "paddw %%mm1, %%mm1 \n\t"
02967 "psubw %%mm4, %%mm2 \n\t"
02968 "psubw %%mm5, %%mm3 \n\t"
02969 "psubw %%mm2, %%mm0 \n\t"
02970 "psubw %%mm3, %%mm1 \n\t"
02971
02972 "psllw $2, %%mm2 \n\t"
02973 "psllw $2, %%mm3 \n\t"
02974 "psubw %%mm2, %%mm0 \n\t"
02975 "psubw %%mm3, %%mm1 \n\t"
02976
02977 "movq (%%"REG_a", %1), %%mm2 \n\t"
02978 "movq %%mm2, %%mm3 \n\t"
02979 "punpcklbw %%mm7, %%mm2 \n\t"
02980 "punpckhbw %%mm7, %%mm3 \n\t"
02981
02982 "psubw %%mm2, %%mm0 \n\t"
02983 "psubw %%mm3, %%mm1 \n\t"
02984 "psubw %%mm2, %%mm0 \n\t"
02985 "psubw %%mm3, %%mm1 \n\t"
02986 "movq %%mm0, (%%"REG_c") \n\t"
02987 "movq %%mm1, 8(%%"REG_c") \n\t"
02988
02989 "movq (%%"REG_a", %1, 2), %%mm0 \n\t"
02990 "movq %%mm0, %%mm1 \n\t"
02991 "punpcklbw %%mm7, %%mm0 \n\t"
02992 "punpckhbw %%mm7, %%mm1 \n\t"
02993
02994 "psubw %%mm0, %%mm2 \n\t"
02995 "psubw %%mm1, %%mm3 \n\t"
02996 "movq %%mm2, 16(%%"REG_c") \n\t"
02997 "movq %%mm3, 24(%%"REG_c") \n\t"
02998 "paddw %%mm4, %%mm4 \n\t"
02999 "paddw %%mm5, %%mm5 \n\t"
03000 "psubw %%mm2, %%mm4 \n\t"
03001 "psubw %%mm3, %%mm5 \n\t"
03002
03003 "lea (%%"REG_a", %1), %0 \n\t"
03004 "psllw $2, %%mm2 \n\t"
03005 "psllw $2, %%mm3 \n\t"
03006 "psubw %%mm2, %%mm4 \n\t"
03007 "psubw %%mm3, %%mm5 \n\t"
03008
03009 "movq (%0, %1, 2), %%mm2 \n\t"
03010 "movq %%mm2, %%mm3 \n\t"
03011 "punpcklbw %%mm7, %%mm2 \n\t"
03012 "punpckhbw %%mm7, %%mm3 \n\t"
03013 "psubw %%mm2, %%mm4 \n\t"
03014 "psubw %%mm3, %%mm5 \n\t"
03015 "psubw %%mm2, %%mm4 \n\t"
03016 "psubw %%mm3, %%mm5 \n\t"
03017
03018 "movq (%%"REG_a", %1, 4), %%mm6 \n\t"
03019 "punpcklbw %%mm7, %%mm6 \n\t"
03020 "psubw %%mm6, %%mm2 \n\t"
03021 "movq (%%"REG_a", %1, 4), %%mm6 \n\t"
03022 "punpckhbw %%mm7, %%mm6 \n\t"
03023 "psubw %%mm6, %%mm3 \n\t"
03024
03025 "paddw %%mm0, %%mm0 \n\t"
03026 "paddw %%mm1, %%mm1 \n\t"
03027 "psubw %%mm2, %%mm0 \n\t"
03028 "psubw %%mm3, %%mm1 \n\t"
03029
03030 "psllw $2, %%mm2 \n\t"
03031 "psllw $2, %%mm3 \n\t"
03032 "psubw %%mm2, %%mm0 \n\t"
03033 "psubw %%mm3, %%mm1 \n\t"
03034
03035 "movq (%0, %1, 4), %%mm2 \n\t"
03036 "movq %%mm2, %%mm3 \n\t"
03037 "punpcklbw %%mm7, %%mm2 \n\t"
03038 "punpckhbw %%mm7, %%mm3 \n\t"
03039
03040 "paddw %%mm2, %%mm2 \n\t"
03041 "paddw %%mm3, %%mm3 \n\t"
03042 "psubw %%mm2, %%mm0 \n\t"
03043 "psubw %%mm3, %%mm1 \n\t"
03044
03045 "movq (%%"REG_c"), %%mm2 \n\t"
03046 "movq 8(%%"REG_c"), %%mm3 \n\t"
03047
03048 #ifdef HAVE_MMX2
03049 "movq %%mm7, %%mm6 \n\t"
03050 "psubw %%mm0, %%mm6 \n\t"
03051 "pmaxsw %%mm6, %%mm0 \n\t"
03052 "movq %%mm7, %%mm6 \n\t"
03053 "psubw %%mm1, %%mm6 \n\t"
03054 "pmaxsw %%mm6, %%mm1 \n\t"
03055 "movq %%mm7, %%mm6 \n\t"
03056 "psubw %%mm2, %%mm6 \n\t"
03057 "pmaxsw %%mm6, %%mm2 \n\t"
03058 "movq %%mm7, %%mm6 \n\t"
03059 "psubw %%mm3, %%mm6 \n\t"
03060 "pmaxsw %%mm6, %%mm3 \n\t"
03061 #else
03062 "movq %%mm7, %%mm6 \n\t"
03063 "pcmpgtw %%mm0, %%mm6 \n\t"
03064 "pxor %%mm6, %%mm0 \n\t"
03065 "psubw %%mm6, %%mm0 \n\t"
03066 "movq %%mm7, %%mm6 \n\t"
03067 "pcmpgtw %%mm1, %%mm6 \n\t"
03068 "pxor %%mm6, %%mm1 \n\t"
03069 "psubw %%mm6, %%mm1 \n\t"
03070 "movq %%mm7, %%mm6 \n\t"
03071 "pcmpgtw %%mm2, %%mm6 \n\t"
03072 "pxor %%mm6, %%mm2 \n\t"
03073 "psubw %%mm6, %%mm2 \n\t"
03074 "movq %%mm7, %%mm6 \n\t"
03075 "pcmpgtw %%mm3, %%mm6 \n\t"
03076 "pxor %%mm6, %%mm3 \n\t"
03077 "psubw %%mm6, %%mm3 \n\t"
03078 #endif
03079
03080 #ifdef HAVE_MMX2
03081 "pminsw %%mm2, %%mm0 \n\t"
03082 "pminsw %%mm3, %%mm1 \n\t"
03083 #else
03084 "movq %%mm0, %%mm6 \n\t"
03085 "psubusw %%mm2, %%mm6 \n\t"
03086 "psubw %%mm6, %%mm0 \n\t"
03087 "movq %%mm1, %%mm6 \n\t"
03088 "psubusw %%mm3, %%mm6 \n\t"
03089 "psubw %%mm6, %%mm1 \n\t"
03090 #endif
03091
03092 "movd %2, %%mm2 \n\t"
03093 "punpcklbw %%mm7, %%mm2 \n\t"
03094
03095 "movq %%mm7, %%mm6 \n\t"
03096 "pcmpgtw %%mm4, %%mm6 \n\t"
03097 "pxor %%mm6, %%mm4 \n\t"
03098 "psubw %%mm6, %%mm4 \n\t"
03099 "pcmpgtw %%mm5, %%mm7 \n\t"
03100 "pxor %%mm7, %%mm5 \n\t"
03101 "psubw %%mm7, %%mm5 \n\t"
03102
03103 "psllw $3, %%mm2 \n\t"
03104 "movq %%mm2, %%mm3 \n\t"
03105 "pcmpgtw %%mm4, %%mm2 \n\t"
03106 "pcmpgtw %%mm5, %%mm3 \n\t"
03107 "pand %%mm2, %%mm4 \n\t"
03108 "pand %%mm3, %%mm5 \n\t"
03109
03110
03111 "psubusw %%mm0, %%mm4 \n\t"
03112 "psubusw %%mm1, %%mm5 \n\t"
03113
03114
03115 "movq "MANGLE(w05)", %%mm2 \n\t"
03116 "pmullw %%mm2, %%mm4 \n\t"
03117 "pmullw %%mm2, %%mm5 \n\t"
03118 "movq "MANGLE(w20)", %%mm2 \n\t"
03119 "paddw %%mm2, %%mm4 \n\t"
03120 "paddw %%mm2, %%mm5 \n\t"
03121 "psrlw $6, %%mm4 \n\t"
03122 "psrlw $6, %%mm5 \n\t"
03123
03124 "movq 16(%%"REG_c"), %%mm0 \n\t"
03125 "movq 24(%%"REG_c"), %%mm1 \n\t"
03126
03127 "pxor %%mm2, %%mm2 \n\t"
03128 "pxor %%mm3, %%mm3 \n\t"
03129
03130 "pcmpgtw %%mm0, %%mm2 \n\t"
03131 "pcmpgtw %%mm1, %%mm3 \n\t"
03132 "pxor %%mm2, %%mm0 \n\t"
03133 "pxor %%mm3, %%mm1 \n\t"
03134 "psubw %%mm2, %%mm0 \n\t"
03135 "psubw %%mm3, %%mm1 \n\t"
03136 "psrlw $1, %%mm0 \n\t"
03137 "psrlw $1, %%mm1 \n\t"
03138
03139 "pxor %%mm6, %%mm2 \n\t"
03140 "pxor %%mm7, %%mm3 \n\t"
03141 "pand %%mm2, %%mm4 \n\t"
03142 "pand %%mm3, %%mm5 \n\t"
03143
03144 #ifdef HAVE_MMX2
03145 "pminsw %%mm0, %%mm4 \n\t"
03146 "pminsw %%mm1, %%mm5 \n\t"
03147 #else
03148 "movq %%mm4, %%mm2 \n\t"
03149 "psubusw %%mm0, %%mm2 \n\t"
03150 "psubw %%mm2, %%mm4 \n\t"
03151 "movq %%mm5, %%mm2 \n\t"
03152 "psubusw %%mm1, %%mm2 \n\t"
03153 "psubw %%mm2, %%mm5 \n\t"
03154 #endif
03155 "pxor %%mm6, %%mm4 \n\t"
03156 "pxor %%mm7, %%mm5 \n\t"
03157 "psubw %%mm6, %%mm4 \n\t"
03158 "psubw %%mm7, %%mm5 \n\t"
03159 "packsswb %%mm5, %%mm4 \n\t"
03160 "movq %3, %%mm1 \n\t"
03161 "pandn %%mm4, %%mm1 \n\t"
03162 "movq (%0), %%mm0 \n\t"
03163 "paddb %%mm1, %%mm0 \n\t"
03164 "movq %%mm0, (%0) \n\t"
03165 "movq (%0, %1), %%mm0 \n\t"
03166 "psubb %%mm1, %%mm0 \n\t"
03167 "movq %%mm0, (%0, %1) \n\t"
03168
03169 : "+r" (temp_src)
03170 : "r" ((long)step), "m" (c->pQPb), "m"(eq_mask)
03171 : "%"REG_a, "%"REG_c
03172 );
03173 }
03174
03175
03176
03177
03178
03179 }
03180 #endif //HAVE_MMX
03181
03182 static void RENAME(postProcess)(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
03183 QP_STORE_T QPs[], int QPStride, int isColor, PPContext *c);
03184
03189 #undef SCALED_CPY
03190
03191 static inline void RENAME(blockCopy)(uint8_t dst[], int dstStride, uint8_t src[], int srcStride,
03192 int levelFix, int64_t *packedOffsetAndScale)
03193 {
03194 #ifndef HAVE_MMX
03195 int i;
03196 #endif
03197 if(levelFix)
03198 {
03199 #ifdef HAVE_MMX
03200 asm volatile(
03201 "movq (%%"REG_a"), %%mm2 \n\t"
03202 "movq 8(%%"REG_a"), %%mm3 \n\t"
03203 "lea (%2,%4), %%"REG_a" \n\t"
03204 "lea (%3,%5), %%"REG_d" \n\t"
03205 "pxor %%mm4, %%mm4 \n\t"
03206 #ifdef HAVE_MMX2
03207 #define REAL_SCALED_CPY(src1, src2, dst1, dst2) \
03208 "movq " #src1 ", %%mm0 \n\t"\
03209 "movq " #src1 ", %%mm5 \n\t"\
03210 "movq " #src2 ", %%mm1 \n\t"\
03211 "movq " #src2 ", %%mm6 \n\t"\
03212 "punpcklbw %%mm0, %%mm0 \n\t"\
03213 "punpckhbw %%mm5, %%mm5 \n\t"\
03214 "punpcklbw %%mm1, %%mm1 \n\t"\
03215 "punpckhbw %%mm6, %%mm6 \n\t"\
03216 "pmulhuw %%mm3, %%mm0 \n\t"\
03217 "pmulhuw %%mm3, %%mm5 \n\t"\
03218 "pmulhuw %%mm3, %%mm1 \n\t"\
03219 "pmulhuw %%mm3, %%mm6 \n\t"\
03220 "psubw %%mm2, %%mm0 \n\t"\
03221 "psubw %%mm2, %%mm5 \n\t"\
03222 "psubw %%mm2, %%mm1 \n\t"\
03223 "psubw %%mm2, %%mm6 \n\t"\
03224 "packuswb %%mm5, %%mm0 \n\t"\
03225 "packuswb %%mm6, %%mm1 \n\t"\
03226 "movq %%mm0, " #dst1 " \n\t"\
03227 "movq %%mm1, " #dst2 " \n\t"\
03228
03229 #else //HAVE_MMX2
03230 #define REAL_SCALED_CPY(src1, src2, dst1, dst2) \
03231 "movq " #src1 ", %%mm0 \n\t"\
03232 "movq " #src1 ", %%mm5 \n\t"\
03233 "punpcklbw %%mm4, %%mm0 \n\t"\
03234 "punpckhbw %%mm4, %%mm5 \n\t"\
03235 "psubw %%mm2, %%mm0 \n\t"\
03236 "psubw %%mm2, %%mm5 \n\t"\
03237 "movq " #src2 ", %%mm1 \n\t"\
03238 "psllw $6, %%mm0 \n\t"\
03239 "psllw $6, %%mm5 \n\t"\
03240 "pmulhw %%mm3, %%mm0 \n\t"\
03241 "movq " #src2 ", %%mm6 \n\t"\
03242 "pmulhw %%mm3, %%mm5 \n\t"\
03243 "punpcklbw %%mm4, %%mm1 \n\t"\
03244 "punpckhbw %%mm4, %%mm6 \n\t"\
03245 "psubw %%mm2, %%mm1 \n\t"\
03246 "psubw %%mm2, %%mm6 \n\t"\
03247 "psllw $6, %%mm1 \n\t"\
03248 "psllw $6, %%mm6 \n\t"\
03249 "pmulhw %%mm3, %%mm1 \n\t"\
03250 "pmulhw %%mm3, %%mm6 \n\t"\
03251 "packuswb %%mm5, %%mm0 \n\t"\
03252 "packuswb %%mm6, %%mm1 \n\t"\
03253 "movq %%mm0, " #dst1 " \n\t"\
03254 "movq %%mm1, " #dst2 " \n\t"\
03255
03256 #endif
03257 #define SCALED_CPY(src1, src2, dst1, dst2)\
03258 REAL_SCALED_CPY(src1, src2, dst1, dst2)
03259
03260 SCALED_CPY((%2) , (%2, %4) , (%3) , (%3, %5))
03261 SCALED_CPY((%2, %4, 2), (%%REGa, %4, 2), (%3, %5, 2), (%%REGd, %5, 2))
03262 SCALED_CPY((%2, %4, 4), (%%REGa, %4, 4), (%3, %5, 4), (%%REGd, %5, 4))
03263 "lea (%%"REG_a",%4,4), %%"REG_a" \n\t"
03264 "lea (%%"REG_d",%5,4), %%"REG_d" \n\t"
03265 SCALED_CPY((%%REGa, %4), (%%REGa, %4, 2), (%%REGd, %5), (%%REGd, %5, 2))
03266
03267
03268 : "=&a" (packedOffsetAndScale)
03269 : "0" (packedOffsetAndScale),
03270 "r"(src),
03271 "r"(dst),
03272 "r" ((long)srcStride),
03273 "r" ((long)dstStride)
03274 : "%"REG_d
03275 );
03276 #else
03277 for(i=0; i<8; i++)
03278 memcpy( &(dst[dstStride*i]),
03279 &(src[srcStride*i]), BLOCK_SIZE);
03280 #endif
03281 }
03282 else
03283 {
03284 #ifdef HAVE_MMX
03285 asm volatile(
03286 "lea (%0,%2), %%"REG_a" \n\t"
03287 "lea (%1,%3), %%"REG_d" \n\t"
03288
03289 #define REAL_SIMPLE_CPY(src1, src2, dst1, dst2) \
03290 "movq " #src1 ", %%mm0 \n\t"\
03291 "movq " #src2 ", %%mm1 \n\t"\
03292 "movq %%mm0, " #dst1 " \n\t"\
03293 "movq %%mm1, " #dst2 " \n\t"\
03294
03295 #define SIMPLE_CPY(src1, src2, dst1, dst2)\
03296 REAL_SIMPLE_CPY(src1, src2, dst1, dst2)
03297
03298 SIMPLE_CPY((%0) , (%0, %2) , (%1) , (%1, %3))
03299 SIMPLE_CPY((%0, %2, 2), (%%REGa, %2, 2), (%1, %3, 2), (%%REGd, %3, 2))
03300 SIMPLE_CPY((%0, %2, 4), (%%REGa, %2, 4), (%1, %3, 4), (%%REGd, %3, 4))
03301 "lea (%%"REG_a",%2,4), %%"REG_a" \n\t"
03302 "lea (%%"REG_d",%3,4), %%"REG_d" \n\t"
03303 SIMPLE_CPY((%%REGa, %2), (%%REGa, %2, 2), (%%REGd, %3), (%%REGd, %3, 2))
03304
03305 : : "r" (src),
03306 "r" (dst),
03307 "r" ((long)srcStride),
03308 "r" ((long)dstStride)
03309 : "%"REG_a, "%"REG_d
03310 );
03311 #else
03312 for(i=0; i<8; i++)
03313 memcpy( &(dst[dstStride*i]),
03314 &(src[srcStride*i]), BLOCK_SIZE);
03315 #endif
03316 }
03317 }
03318
03322 static inline void RENAME(duplicate)(uint8_t src[], int stride)
03323 {
03324 #ifdef HAVE_MMX
03325 asm volatile(
03326 "movq (%0), %%mm0 \n\t"
03327 "add %1, %0 \n\t"
03328 "movq %%mm0, (%0) \n\t"
03329 "movq %%mm0, (%0, %1) \n\t"
03330 "movq %%mm0, (%0, %1, 2) \n\t"
03331 : "+r" (src)
03332 : "r" ((long)-stride)
03333 );
03334 #else
03335 int i;
03336 uint8_t *p=src;
03337 for(i=0; i<3; i++)
03338 {
03339 p-= stride;
03340 memcpy(p, src, 8);
03341 }
03342 #endif
03343 }
03344
03348 static void RENAME(postProcess)(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
03349 QP_STORE_T QPs[], int QPStride, int isColor, PPContext *c2)
03350 {
03351 PPContext __attribute__((aligned(8))) c= *c2;
03352 int x,y;
03353 #ifdef COMPILE_TIME_MODE
03354 const int mode= COMPILE_TIME_MODE;
03355 #else
03356 const int mode= isColor ? c.ppMode.chromMode : c.ppMode.lumMode;
03357 #endif
03358 int black=0, white=255;
03359 int QPCorrecture= 256*256;
03360
03361 int copyAhead;
03362 #ifdef HAVE_MMX
03363 int i;
03364 #endif
03365
03366 const int qpHShift= isColor ? 4-c.hChromaSubSample : 4;
03367 const int qpVShift= isColor ? 4-c.vChromaSubSample : 4;
03368
03369
03370 uint64_t * const yHistogram= c.yHistogram;
03371 uint8_t * const tempSrc= srcStride > 0 ? c.tempSrc : c.tempSrc - 23*srcStride;
03372 uint8_t * const tempDst= dstStride > 0 ? c.tempDst : c.tempDst - 23*dstStride;
03373
03374
03375 #ifdef HAVE_MMX
03376 for(i=0; i<57; i++){
03377 int offset= ((i*c.ppMode.baseDcDiff)>>8) + 1;
03378 int threshold= offset*2 + 1;
03379 c.mmxDcOffset[i]= 0x7F - offset;
03380 c.mmxDcThreshold[i]= 0x7F - threshold;
03381 c.mmxDcOffset[i]*= 0x0101010101010101LL;
03382 c.mmxDcThreshold[i]*= 0x0101010101010101LL;
03383 }
03384 #endif
03385
03386 if(mode & CUBIC_IPOL_DEINT_FILTER) copyAhead=16;
03387 else if( (mode & LINEAR_BLEND_DEINT_FILTER)
03388 || (mode & FFMPEG_DEINT_FILTER)
03389 || (mode & LOWPASS5_DEINT_FILTER)) copyAhead=14;
03390 else if( (mode & V_DEBLOCK)
03391 || (mode & LINEAR_IPOL_DEINT_FILTER)
03392 || (mode & MEDIAN_DEINT_FILTER)
03393 || (mode & V_A_DEBLOCK)) copyAhead=13;
03394 else if(mode & V_X1_FILTER) copyAhead=11;
03395
03396 else if(mode & DERING) copyAhead=9;
03397 else copyAhead=8;
03398
03399 copyAhead-= 8;
03400
03401 if(!isColor)
03402 {
03403 uint64_t sum= 0;
03404 int i;
03405 uint64_t maxClipped;
03406 uint64_t clipped;
03407 double scale;
03408
03409 c.frameNum++;
03410
03411 if(c.frameNum == 1) yHistogram[0]= width*height/64*15/256;
03412
03413 for(i=0; i<256; i++)
03414 {
03415 sum+= yHistogram[i];
03416
03417 }
03418
03419
03420
03421 maxClipped= (uint64_t)(sum * c.ppMode.maxClippedThreshold);
03422
03423 clipped= sum;
03424 for(black=255; black>0; black--)
03425 {
03426 if(clipped < maxClipped) break;
03427 clipped-= yHistogram[black];
03428 }
03429
03430 clipped= sum;
03431 for(white=0; white<256; white++)
03432 {
03433 if(clipped < maxClipped) break;
03434 clipped-= yHistogram[white];
03435 }
03436
03437 scale= (double)(c.ppMode.maxAllowedY - c.ppMode.minAllowedY) / (double)(white-black);
03438
03439 #ifdef HAVE_MMX2
03440 c.packedYScale= (uint16_t)(scale*256.0 + 0.5);
03441 c.packedYOffset= (((black*c.packedYScale)>>8) - c.ppMode.minAllowedY) & 0xFFFF;
03442 #else
03443 c.packedYScale= (uint16_t)(scale*1024.0 + 0.5);
03444 c.packedYOffset= (black - c.ppMode.minAllowedY) & 0xFFFF;
03445 #endif
03446
03447 c.packedYOffset|= c.packedYOffset<<32;
03448 c.packedYOffset|= c.packedYOffset<<16;
03449
03450 c.packedYScale|= c.packedYScale<<32;
03451 c.packedYScale|= c.packedYScale<<16;
03452
03453 if(mode & LEVEL_FIX) QPCorrecture= (int)(scale*256*256 + 0.5);
03454 else QPCorrecture= 256*256;
03455 }
03456 else
03457 {
03458 c.packedYScale= 0x0100010001000100LL;
03459 c.packedYOffset= 0;
03460 QPCorrecture= 256*256;
03461 }
03462
03463
03464 y=-BLOCK_SIZE;
03465 {
03466 uint8_t *srcBlock= &(src[y*srcStride]);
03467 uint8_t *dstBlock= tempDst + dstStride;
03468
03469
03470
03471
03472 for(x=0; x<width; x+=BLOCK_SIZE)
03473 {
03474
03475 #ifdef HAVE_MMX2
03476
03477
03478
03479
03480
03481
03482
03483 asm(
03484 "mov %4, %%"REG_a" \n\t"
03485 "shr $2, %%"REG_a" \n\t"
03486 "and $6, %%"REG_a" \n\t"
03487 "add %5, %%"REG_a" \n\t"
03488 "mov %%"REG_a", %%"REG_d" \n\t"
03489 "imul %1, %%"REG_a" \n\t"
03490 "imul %3, %%"REG_d" \n\t"
03491 "prefetchnta 32(%%"REG_a", %0) \n\t"
03492 "prefetcht0 32(%%"REG_d", %2) \n\t"
03493 "add %1, %%"REG_a" \n\t"
03494 "add %3, %%"REG_d" \n\t"
03495 "prefetchnta 32(%%"REG_a", %0) \n\t"
03496 "prefetcht0 32(%%"REG_d", %2) \n\t"
03497 :: "r" (srcBlock), "r" ((long)srcStride), "r" (dstBlock), "r" ((long)dstStride),
03498 "g" ((long)x), "g" ((long)copyAhead)
03499 : "%"REG_a, "%"REG_d
03500 );
03501
03502 #elif defined(HAVE_3DNOW)
03503
03504
03505
03506
03507
03508
03509 #endif
03510
03511 RENAME(blockCopy)(dstBlock + dstStride*8, dstStride,
03512 srcBlock + srcStride*8, srcStride, mode & LEVEL_FIX, &c.packedYOffset);
03513
03514 RENAME(duplicate)(dstBlock + dstStride*8, dstStride);
03515
03516 if(mode & LINEAR_IPOL_DEINT_FILTER)
03517 RENAME(deInterlaceInterpolateLinear)(dstBlock, dstStride);
03518 else if(mode & LINEAR_BLEND_DEINT_FILTER)
03519 RENAME(deInterlaceBlendLinear)(dstBlock, dstStride, c.deintTemp + x);
03520 else if(mode & MEDIAN_DEINT_FILTER)
03521 RENAME(deInterlaceMedian)(dstBlock, dstStride);
03522 else if(mode & CUBIC_IPOL_DEINT_FILTER)
03523 RENAME(deInterlaceInterpolateCubic)(dstBlock, dstStride);
03524 else if(mode & FFMPEG_DEINT_FILTER)
03525 RENAME(deInterlaceFF)(dstBlock, dstStride, c.deintTemp + x);
03526 else if(mode & LOWPASS5_DEINT_FILTER)
03527 RENAME(deInterlaceL5)(dstBlock, dstStride, c.deintTemp + x, c.deintTemp + width + x);
03528
03529
03530
03531 dstBlock+=8;
03532 srcBlock+=8;
03533 }
03534 if(width==ABS(dstStride))
03535 linecpy(dst, tempDst + 9*dstStride, copyAhead, dstStride);
03536 else
03537 {
03538 int i;
03539 for(i=0; i<copyAhead; i++)
03540 {
03541 memcpy(dst + i*dstStride, tempDst + (9+i)*dstStride, width);
03542 }
03543 }
03544 }
03545
03546
03547 for(y=0; y<height; y+=BLOCK_SIZE)
03548 {
03549
03550 uint8_t *srcBlock= &(src[y*srcStride]);
03551 uint8_t *dstBlock= &(dst[y*dstStride]);
03552 #ifdef HAVE_MMX
03553 uint8_t *tempBlock1= c.tempBlocks;
03554 uint8_t *tempBlock2= c.tempBlocks + 8;
03555 #endif
03556 int8_t *QPptr= &QPs[(y>>qpVShift)*QPStride];
03557 int8_t *nonBQPptr= &c.nonBQPTable[(y>>qpVShift)*ABS(QPStride)];
03558 int QP=0;
03559
03560
03561 if(y+15 >= height)
03562 {
03563 int i;
03564
03565
03566 linecpy(tempSrc + srcStride*copyAhead, srcBlock + srcStride*copyAhead,
03567 MAX(height-y-copyAhead, 0), srcStride);
03568
03569
03570 for(i=MAX(height-y, 8); i<copyAhead+8; i++)
03571 memcpy(tempSrc + srcStride*i, src + srcStride*(height-1), ABS(srcStride));
03572
03573
03574 linecpy(tempDst, dstBlock - dstStride, MIN(height-y+1, copyAhead+1), dstStride);
03575
03576
03577 for(i=height-y+1; i<=copyAhead; i++)
03578 memcpy(tempDst + dstStride*i, dst + dstStride*(height-1), ABS(dstStride));
03579
03580 dstBlock= tempDst + dstStride;
03581 srcBlock= tempSrc;
03582 }
03583
03584
03585
03586
03587
03588 for(x=0; x<width; x+=BLOCK_SIZE)
03589 {
03590 const int stride= dstStride;
03591 #ifdef HAVE_MMX
03592 uint8_t *tmpXchg;
03593 #endif
03594 if(isColor)
03595 {
03596 QP= QPptr[x>>qpHShift];
03597 c.nonBQP= nonBQPptr[x>>qpHShift];
03598 }
03599 else
03600 {
03601 QP= QPptr[x>>4];
03602 QP= (QP* QPCorrecture + 256*128)>>16;
03603 c.nonBQP= nonBQPptr[x>>4];
03604 c.nonBQP= (c.nonBQP* QPCorrecture + 256*128)>>16;
03605 yHistogram[ srcBlock[srcStride*12 + 4] ]++;
03606 }
03607 c.QP= QP;
03608 #ifdef HAVE_MMX
03609 asm volatile(
03610 "movd %1, %%mm7 \n\t"
03611 "packuswb %%mm7, %%mm7 \n\t"
03612 "packuswb %%mm7, %%mm7 \n\t"
03613 "packuswb %%mm7, %%mm7 \n\t"
03614 "movq %%mm7, %0 \n\t"
03615 : "=m" (c.pQPb)
03616 : "r" (QP)
03617 );
03618 #endif
03619
03620
03621 #ifdef HAVE_MMX2
03622
03623
03624
03625
03626
03627
03628
03629 asm(
03630 "mov %4, %%"REG_a" \n\t"
03631 "shr $2, %%"REG_a" \n\t"
03632 "and $6, %%"REG_a" \n\t"
03633 "add %5, %%"REG_a" \n\t"
03634 "mov %%"REG_a", %%"REG_d" \n\t"
03635 "imul %1, %%"REG_a" \n\t"
03636 "imul %3, %%"REG_d" \n\t"
03637 "prefetchnta 32(%%"REG_a", %0) \n\t"
03638 "prefetcht0 32(%%"REG_d", %2) \n\t"
03639 "add %1, %%"REG_a" \n\t"
03640 "add %3, %%"REG_d" \n\t"
03641 "prefetchnta 32(%%"REG_a", %0) \n\t"
03642 "prefetcht0 32(%%"REG_d", %2) \n\t"
03643 :: "r" (srcBlock), "r" ((long)srcStride), "r" (dstBlock), "r" ((long)dstStride),
03644 "g" ((long)x), "g" ((long)copyAhead)
03645 : "%"REG_a, "%"REG_d
03646 );
03647
03648 #elif defined(HAVE_3DNOW)
03649
03650
03651
03652
03653
03654
03655 #endif
03656
03657 RENAME(blockCopy)(dstBlock + dstStride*copyAhead, dstStride,
03658 srcBlock + srcStride*copyAhead, srcStride, mode & LEVEL_FIX, &c.packedYOffset);
03659
03660 if(mode & LINEAR_IPOL_DEINT_FILTER)
03661 RENAME(deInterlaceInterpolateLinear)(dstBlock, dstStride);
03662 else if(mode & LINEAR_BLEND_DEINT_FILTER)
03663 RENAME(deInterlaceBlendLinear)(dstBlock, dstStride, c.deintTemp + x);
03664 else if(mode & MEDIAN_DEINT_FILTER)
03665 RENAME(deInterlaceMedian)(dstBlock, dstStride);
03666 else if(mode & CUBIC_IPOL_DEINT_FILTER)
03667 RENAME(deInterlaceInterpolateCubic)(dstBlock, dstStride);
03668 else if(mode & FFMPEG_DEINT_FILTER)
03669 RENAME(deInterlaceFF)(dstBlock, dstStride, c.deintTemp + x);
03670 else if(mode & LOWPASS5_DEINT_FILTER)
03671 RENAME(deInterlaceL5)(dstBlock, dstStride, c.deintTemp + x, c.deintTemp + width + x);
03672
03673
03674
03675
03676
03677 if(y + 8 < height)
03678 {
03679 if(mode & V_X1_FILTER)
03680 RENAME(vertX1Filter)(dstBlock, stride, &c);
03681 else if(mode & V_DEBLOCK)
03682 {
03683 const int t= RENAME(vertClassify)(dstBlock, stride, &c);
03684
03685 if(t==1)
03686 RENAME(doVertLowPass)(dstBlock, stride, &c);
03687 else if(t==2)
03688 RENAME(doVertDefFilter)(dstBlock, stride, &c);
03689 }else if(mode & V_A_DEBLOCK){
03690 RENAME(do_a_deblock)(dstBlock, stride, 1, &c);
03691 }
03692 }
03693
03694 #ifdef HAVE_MMX
03695 RENAME(transpose1)(tempBlock1, tempBlock2, dstBlock, dstStride);
03696 #endif
03697
03698 if(x - 8 >= 0)
03699 {
03700 #ifdef HAVE_MMX
03701 if(mode & H_X1_FILTER)
03702 RENAME(vertX1Filter)(tempBlock1, 16, &c);
03703 else if(mode & H_DEBLOCK)
03704 {
03705
03706 const int t= RENAME(vertClassify)(tempBlock1, 16, &c);
03707
03708 if(t==1)
03709 RENAME(doVertLowPass)(tempBlock1, 16, &c);
03710 else if(t==2)
03711 RENAME(doVertDefFilter)(tempBlock1, 16, &c);
03712 }else if(mode & H_A_DEBLOCK){
03713 RENAME(do_a_deblock)(tempBlock1, 16, 1, &c);
03714 }
03715
03716 RENAME(transpose2)(dstBlock-4, dstStride, tempBlock1 + 4*16);
03717
03718 #else
03719 if(mode & H_X1_FILTER)
03720 horizX1Filter(dstBlock-4, stride, QP);
03721 else if(mode & H_DEBLOCK)
03722 {
03723 #ifdef HAVE_ALTIVEC
03724 unsigned char __attribute__ ((aligned(16))) tempBlock[272];
03725 transpose_16x8_char_toPackedAlign_altivec(tempBlock, dstBlock - (4 + 1), stride);
03726
03727 const int t=vertClassify_altivec(tempBlock-48, 16, &c);
03728 if(t==1) {
03729 doVertLowPass_altivec(tempBlock-48, 16, &c);
03730 transpose_8x16_char_fromPackedAlign_altivec(dstBlock - (4 + 1), tempBlock, stride);
03731 }
03732 else if(t==2) {
03733 doVertDefFilter_altivec(tempBlock-48, 16, &c);
03734 transpose_8x16_char_fromPackedAlign_altivec(dstBlock - (4 + 1), tempBlock, stride);
03735 }
03736 #else
03737 const int t= RENAME(horizClassify)(dstBlock-4, stride, &c);
03738
03739 if(t==1)
03740 RENAME(doHorizLowPass)(dstBlock-4, stride, &c);
03741 else if(t==2)
03742 RENAME(doHorizDefFilter)(dstBlock-4, stride, &c);
03743 #endif
03744 }else if(mode & H_A_DEBLOCK){
03745 RENAME(do_a_deblock)(dstBlock-8, 1, stride, &c);
03746 }
03747 #endif
03748 if(mode & DERING)
03749 {
03750
03751 if(y>0) RENAME(dering)(dstBlock - stride - 8, stride, &c);
03752 }
03753
03754 if(mode & TEMP_NOISE_FILTER)
03755 {
03756 RENAME(tempNoiseReducer)(dstBlock-8, stride,
03757 c.tempBlured[isColor] + y*dstStride + x,
03758 c.tempBluredPast[isColor] + (y>>3)*256 + (x>>3),
03759 c.ppMode.maxTmpNoise);
03760 }
03761 }
03762
03763 dstBlock+=8;
03764 srcBlock+=8;
03765
03766 #ifdef HAVE_MMX
03767 tmpXchg= tempBlock1;
03768 tempBlock1= tempBlock2;
03769 tempBlock2 = tmpXchg;
03770 #endif
03771 }
03772
03773 if(mode & DERING)
03774 {
03775 if(y > 0) RENAME(dering)(dstBlock - dstStride - 8, dstStride, &c);
03776 }
03777
03778 if((mode & TEMP_NOISE_FILTER))
03779 {
03780 RENAME(tempNoiseReducer)(dstBlock-8, dstStride,
03781 c.tempBlured[isColor] + y*dstStride + x,
03782 c.tempBluredPast[isColor] + (y>>3)*256 + (x>>3),
03783 c.ppMode.maxTmpNoise);
03784 }
03785
03786
03787 if(y+15 >= height)
03788 {
03789 uint8_t *dstBlock= &(dst[y*dstStride]);
03790 if(width==ABS(dstStride))
03791 linecpy(dstBlock, tempDst + dstStride, height-y, dstStride);
03792 else
03793 {
03794 int i;
03795 for(i=0; i<height-y; i++)
03796 {
03797 memcpy(dstBlock + i*dstStride, tempDst + (i+1)*dstStride, width);
03798 }
03799 }
03800 }
03801
03802
03803
03804
03805
03806
03807
03808
03809
03810
03811 }
03812 #ifdef HAVE_3DNOW
03813 asm volatile("femms");
03814 #elif defined (HAVE_MMX)
03815 asm volatile("emms");
03816 #endif
03817
03818 #ifdef DEBUG_BRIGHTNESS
03819 if(!isColor)
03820 {
03821 int max=1;
03822 int i;
03823 for(i=0; i<256; i++)
03824 if(yHistogram[i] > max) max=yHistogram[i];
03825
03826 for(i=1; i<256; i++)
03827 {
03828 int x;
03829 int start=yHistogram[i-1]/(max/256+1);
03830 int end=yHistogram[i]/(max/256+1);
03831 int inc= end > start ? 1 : -1;
03832 for(x=start; x!=end+inc; x+=inc)
03833 dst[ i*dstStride + x]+=128;
03834 }
03835
03836 for(i=0; i<100; i+=2)
03837 {
03838 dst[ (white)*dstStride + i]+=128;
03839 dst[ (black)*dstStride + i]+=128;
03840 }
03841
03842 }
03843 #endif
03844
03845 *c2= c;
03846
03847 }