00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074 #include "config.h"
00075 #include <inttypes.h>
00076 #include <stdio.h>
00077 #include <stdlib.h>
00078 #include <string.h>
00079 #ifdef HAVE_MALLOC_H
00080 #include <malloc.h>
00081 #endif
00082
00083
00084
00085
00086
00087 #ifdef USE_FASTMEMCPY
00088 #include "fastmemcpy.h"
00089 #endif
00090 #include "postprocess.h"
00091 #include "postprocess_internal.h"
00092
00093 #include "mangle.h"
00094
00095 #ifdef HAVE_ALTIVEC_H
00096 #include <altivec.h>
00097 #endif
00098
00099 #ifndef HAVE_MEMALIGN
00100 #define memalign(a,b) malloc(b)
00101 #endif
00102
00103 #define MIN(a,b) ((a) > (b) ? (b) : (a))
00104 #define MAX(a,b) ((a) < (b) ? (b) : (a))
00105 #define ABS(a) ((a) > 0 ? (a) : (-(a)))
00106 #define SIGN(a) ((a) > 0 ? 1 : -1)
00107
00108 #define GET_MODE_BUFFER_SIZE 500
00109 #define OPTIONS_ARRAY_SIZE 10
00110 #define BLOCK_SIZE 8
00111 #define TEMP_STRIDE 8
00112
00113
00114 #if defined(__GNUC__) && (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ > 0)
00115 # define attribute_used __attribute__((used))
00116 # define always_inline __attribute__((always_inline)) inline
00117 #else
00118 # define attribute_used
00119 # define always_inline inline
00120 #endif
00121
00122 #if defined(ARCH_X86) || defined(ARCH_X86_64)
00123 static uint64_t __attribute__((aligned(8))) attribute_used w05= 0x0005000500050005LL;
00124 static uint64_t __attribute__((aligned(8))) attribute_used w04= 0x0004000400040004LL;
00125 static uint64_t __attribute__((aligned(8))) attribute_used w20= 0x0020002000200020LL;
00126 static uint64_t __attribute__((aligned(8))) attribute_used b00= 0x0000000000000000LL;
00127 static uint64_t __attribute__((aligned(8))) attribute_used b01= 0x0101010101010101LL;
00128 static uint64_t __attribute__((aligned(8))) attribute_used b02= 0x0202020202020202LL;
00129 static uint64_t __attribute__((aligned(8))) attribute_used b08= 0x0808080808080808LL;
00130 static uint64_t __attribute__((aligned(8))) attribute_used b80= 0x8080808080808080LL;
00131 #endif
00132
00133 static uint8_t clip_table[3*256];
00134 static uint8_t * const clip_tab= clip_table + 256;
00135
00136 static const int verbose= 0;
00137
00138 static const int attribute_used deringThreshold= 20;
00139
00140
00141 static struct PPFilter filters[]=
00142 {
00143 {"hb", "hdeblock", 1, 1, 3, H_DEBLOCK},
00144 {"vb", "vdeblock", 1, 2, 4, V_DEBLOCK},
00145
00146
00147 {"h1", "x1hdeblock", 1, 1, 3, H_X1_FILTER},
00148 {"v1", "x1vdeblock", 1, 2, 4, V_X1_FILTER},
00149 {"ha", "ahdeblock", 1, 1, 3, H_A_DEBLOCK},
00150 {"va", "avdeblock", 1, 2, 4, V_A_DEBLOCK},
00151 {"dr", "dering", 1, 5, 6, DERING},
00152 {"al", "autolevels", 0, 1, 2, LEVEL_FIX},
00153 {"lb", "linblenddeint", 1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
00154 {"li", "linipoldeint", 1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
00155 {"ci", "cubicipoldeint", 1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
00156 {"md", "mediandeint", 1, 1, 4, MEDIAN_DEINT_FILTER},
00157 {"fd", "ffmpegdeint", 1, 1, 4, FFMPEG_DEINT_FILTER},
00158 {"l5", "lowpass5", 1, 1, 4, LOWPASS5_DEINT_FILTER},
00159 {"tn", "tmpnoise", 1, 7, 8, TEMP_NOISE_FILTER},
00160 {"fq", "forcequant", 1, 0, 0, FORCE_QUANT},
00161 {NULL, NULL,0,0,0,0}
00162 };
00163
00164 static char *replaceTable[]=
00165 {
00166 "default", "hdeblock:a,vdeblock:a,dering:a",
00167 "de", "hdeblock:a,vdeblock:a,dering:a",
00168 "fast", "x1hdeblock:a,x1vdeblock:a,dering:a",
00169 "fa", "x1hdeblock:a,x1vdeblock:a,dering:a",
00170 "ac", "ha:a:128:7,va:a,dering:a",
00171 NULL
00172 };
00173
00174
00175 #if defined(ARCH_X86) || defined(ARCH_X86_64)
00176 static inline void prefetchnta(void *p)
00177 {
00178 asm volatile( "prefetchnta (%0)\n\t"
00179 : : "r" (p)
00180 );
00181 }
00182
00183 static inline void prefetcht0(void *p)
00184 {
00185 asm volatile( "prefetcht0 (%0)\n\t"
00186 : : "r" (p)
00187 );
00188 }
00189
00190 static inline void prefetcht1(void *p)
00191 {
00192 asm volatile( "prefetcht1 (%0)\n\t"
00193 : : "r" (p)
00194 );
00195 }
00196
00197 static inline void prefetcht2(void *p)
00198 {
00199 asm volatile( "prefetcht2 (%0)\n\t"
00200 : : "r" (p)
00201 );
00202 }
00203 #endif
00204
00205
00206
00210 static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c)
00211 {
00212 int numEq= 0;
00213 int y;
00214 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
00215 const int dcThreshold= dcOffset*2 + 1;
00216
00217 for(y=0; y<BLOCK_SIZE; y++)
00218 {
00219 if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
00220 if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
00221 if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
00222 if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
00223 if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
00224 if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
00225 if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
00226 src+= stride;
00227 }
00228 return numEq > c->ppMode.flatnessThreshold;
00229 }
00230
00234 static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c){
00235 int numEq= 0;
00236 int y;
00237 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
00238 const int dcThreshold= dcOffset*2 + 1;
00239
00240 src+= stride*4;
00241 for(y=0; y<BLOCK_SIZE-1; y++)
00242 {
00243 if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
00244 if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
00245 if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
00246 if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
00247 if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
00248 if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
00249 if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
00250 if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
00251 src+= stride;
00252 }
00253 return numEq > c->ppMode.flatnessThreshold;
00254 }
00255
00256 static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP)
00257 {
00258 int i;
00259 #if 1
00260 for(i=0; i<2; i++){
00261 if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
00262 src += stride;
00263 if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
00264 src += stride;
00265 if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
00266 src += stride;
00267 if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
00268 src += stride;
00269 }
00270 #else
00271 for(i=0; i<8; i++){
00272 if((unsigned)(src[0] - src[7] + 2*QP) > 4*QP) return 0;
00273 src += stride;
00274 }
00275 #endif
00276 return 1;
00277 }
00278
00279 static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP)
00280 {
00281 #if 1
00282 #if 1
00283 int x;
00284 src+= stride*4;
00285 for(x=0; x<BLOCK_SIZE; x+=4)
00286 {
00287 if((unsigned)(src[ x + 0*stride] - src[ x + 5*stride] + 2*QP) > 4*QP) return 0;
00288 if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
00289 if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
00290 if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
00291 }
00292 #else
00293 int x;
00294 src+= stride*3;
00295 for(x=0; x<BLOCK_SIZE; x++)
00296 {
00297 if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0;
00298 }
00299 #endif
00300 return 1;
00301 #else
00302 int x;
00303 src+= stride*4;
00304 for(x=0; x<BLOCK_SIZE; x++)
00305 {
00306 int min=255;
00307 int max=0;
00308 int y;
00309 for(y=0; y<8; y++){
00310 int v= src[x + y*stride];
00311 if(v>max) max=v;
00312 if(v<min) min=v;
00313 }
00314 if(max-min > 2*QP) return 0;
00315 }
00316 return 1;
00317 #endif
00318 }
00319
00320 static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c){
00321 if( isHorizDC_C(src, stride, c) ){
00322 if( isHorizMinMaxOk_C(src, stride, c->QP) )
00323 return 1;
00324 else
00325 return 0;
00326 }else{
00327 return 2;
00328 }
00329 }
00330
00331 static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c){
00332 if( isVertDC_C(src, stride, c) ){
00333 if( isVertMinMaxOk_C(src, stride, c->QP) )
00334 return 1;
00335 else
00336 return 0;
00337 }else{
00338 return 2;
00339 }
00340 }
00341
00342 static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c)
00343 {
00344 int y;
00345 for(y=0; y<BLOCK_SIZE; y++)
00346 {
00347 const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
00348
00349 if(ABS(middleEnergy) < 8*c->QP)
00350 {
00351 const int q=(dst[3] - dst[4])/2;
00352 const int leftEnergy= 5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
00353 const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
00354
00355 int d= ABS(middleEnergy) - MIN( ABS(leftEnergy), ABS(rightEnergy) );
00356 d= MAX(d, 0);
00357
00358 d= (5*d + 32) >> 6;
00359 d*= SIGN(-middleEnergy);
00360
00361 if(q>0)
00362 {
00363 d= d<0 ? 0 : d;
00364 d= d>q ? q : d;
00365 }
00366 else
00367 {
00368 d= d>0 ? 0 : d;
00369 d= d<q ? q : d;
00370 }
00371
00372 dst[3]-= d;
00373 dst[4]+= d;
00374 }
00375 dst+= stride;
00376 }
00377 }
00378
00383 static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c)
00384 {
00385 int y;
00386 for(y=0; y<BLOCK_SIZE; y++)
00387 {
00388 const int first= ABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
00389 const int last= ABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
00390
00391 int sums[10];
00392 sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
00393 sums[1] = sums[0] - first + dst[3];
00394 sums[2] = sums[1] - first + dst[4];
00395 sums[3] = sums[2] - first + dst[5];
00396 sums[4] = sums[3] - first + dst[6];
00397 sums[5] = sums[4] - dst[0] + dst[7];
00398 sums[6] = sums[5] - dst[1] + last;
00399 sums[7] = sums[6] - dst[2] + last;
00400 sums[8] = sums[7] - dst[3] + last;
00401 sums[9] = sums[8] - dst[4] + last;
00402
00403 dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
00404 dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
00405 dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
00406 dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
00407 dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
00408 dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
00409 dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
00410 dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
00411
00412 dst+= stride;
00413 }
00414 }
00415
00424 static inline void horizX1Filter(uint8_t *src, int stride, int QP)
00425 {
00426 int y;
00427 static uint64_t *lut= NULL;
00428 if(lut==NULL)
00429 {
00430 int i;
00431 lut= (uint64_t*)memalign(8, 256*8);
00432 for(i=0; i<256; i++)
00433 {
00434 int v= i < 128 ? 2*i : 2*(i-256);
00435
00436
00437
00438
00439
00440
00441
00442
00443 uint64_t a= (v/16) & 0xFF;
00444 uint64_t b= (v*3/16) & 0xFF;
00445 uint64_t c= (v*5/16) & 0xFF;
00446 uint64_t d= (7*v/16) & 0xFF;
00447 uint64_t A= (0x100 - a)&0xFF;
00448 uint64_t B= (0x100 - b)&0xFF;
00449 uint64_t C= (0x100 - c)&0xFF;
00450 uint64_t D= (0x100 - c)&0xFF;
00451
00452 lut[i] = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
00453 (D<<24) | (C<<16) | (B<<8) | (A);
00454
00455 }
00456 }
00457
00458 for(y=0; y<BLOCK_SIZE; y++)
00459 {
00460 int a= src[1] - src[2];
00461 int b= src[3] - src[4];
00462 int c= src[5] - src[6];
00463
00464 int d= MAX(ABS(b) - (ABS(a) + ABS(c))/2, 0);
00465
00466 if(d < QP)
00467 {
00468 int v = d * SIGN(-b);
00469
00470 src[1] +=v/8;
00471 src[2] +=v/4;
00472 src[3] +=3*v/8;
00473 src[4] -=3*v/8;
00474 src[5] -=v/4;
00475 src[6] -=v/8;
00476
00477 }
00478 src+=stride;
00479 }
00480 }
00481
00485 static always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){
00486 int y;
00487 const int QP= c->QP;
00488 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
00489 const int dcThreshold= dcOffset*2 + 1;
00490
00491 src+= step*4;
00492 for(y=0; y<8; y++){
00493 int numEq= 0;
00494
00495 if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++;
00496 if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++;
00497 if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++;
00498 if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++;
00499 if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++;
00500 if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++;
00501 if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++;
00502 if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++;
00503 if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++;
00504 if(numEq > c->ppMode.flatnessThreshold){
00505 int min, max, x;
00506
00507 if(src[0] > src[step]){
00508 max= src[0];
00509 min= src[step];
00510 }else{
00511 max= src[step];
00512 min= src[0];
00513 }
00514 for(x=2; x<8; x+=2){
00515 if(src[x*step] > src[(x+1)*step]){
00516 if(src[x *step] > max) max= src[ x *step];
00517 if(src[(x+1)*step] < min) min= src[(x+1)*step];
00518 }else{
00519 if(src[(x+1)*step] > max) max= src[(x+1)*step];
00520 if(src[ x *step] < min) min= src[ x *step];
00521 }
00522 }
00523 if(max-min < 2*QP){
00524 const int first= ABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
00525 const int last= ABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
00526
00527 int sums[10];
00528 sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
00529 sums[1] = sums[0] - first + src[3*step];
00530 sums[2] = sums[1] - first + src[4*step];
00531 sums[3] = sums[2] - first + src[5*step];
00532 sums[4] = sums[3] - first + src[6*step];
00533 sums[5] = sums[4] - src[0*step] + src[7*step];
00534 sums[6] = sums[5] - src[1*step] + last;
00535 sums[7] = sums[6] - src[2*step] + last;
00536 sums[8] = sums[7] - src[3*step] + last;
00537 sums[9] = sums[8] - src[4*step] + last;
00538
00539 src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
00540 src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
00541 src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
00542 src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
00543 src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
00544 src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
00545 src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
00546 src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
00547 }
00548 }else{
00549 const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
00550
00551 if(ABS(middleEnergy) < 8*QP)
00552 {
00553 const int q=(src[3*step] - src[4*step])/2;
00554 const int leftEnergy= 5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
00555 const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
00556
00557 int d= ABS(middleEnergy) - MIN( ABS(leftEnergy), ABS(rightEnergy) );
00558 d= MAX(d, 0);
00559
00560 d= (5*d + 32) >> 6;
00561 d*= SIGN(-middleEnergy);
00562
00563 if(q>0)
00564 {
00565 d= d<0 ? 0 : d;
00566 d= d>q ? q : d;
00567 }
00568 else
00569 {
00570 d= d>0 ? 0 : d;
00571 d= d<q ? q : d;
00572 }
00573
00574 src[3*step]-= d;
00575 src[4*step]+= d;
00576 }
00577 }
00578
00579 src += stride;
00580 }
00581
00582
00583
00584
00585
00586 }
00587
00588
00589
00590 #if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT)
00591 #define COMPILE_C
00592 #endif
00593
00594 #ifdef ARCH_POWERPC
00595 #ifdef HAVE_ALTIVEC
00596 #define COMPILE_ALTIVEC
00597 #endif //HAVE_ALTIVEC
00598 #endif //ARCH_POWERPC
00599
00600 #if defined(ARCH_X86) || defined(ARCH_X86_64)
00601
00602 #if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
00603 #define COMPILE_MMX
00604 #endif
00605
00606 #if defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
00607 #define COMPILE_MMX2
00608 #endif
00609
00610 #if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
00611 #define COMPILE_3DNOW
00612 #endif
00613 #endif //ARCH_X86
00614
00615 #undef HAVE_MMX
00616 #undef HAVE_MMX2
00617 #undef HAVE_3DNOW
00618 #undef HAVE_ALTIVEC
00619
00620 #ifdef COMPILE_C
00621 #undef HAVE_MMX
00622 #undef HAVE_MMX2
00623 #undef HAVE_3DNOW
00624 #define RENAME(a) a ## _C
00625 #include "postprocess_template.c"
00626 #endif
00627
00628 #ifdef ARCH_POWERPC
00629 #ifdef COMPILE_ALTIVEC
00630 #undef RENAME
00631 #define HAVE_ALTIVEC
00632 #define RENAME(a) a ## _altivec
00633 #include "postprocess_altivec_template.c"
00634 #include "postprocess_template.c"
00635 #endif
00636 #endif //ARCH_POWERPC
00637
00638
00639 #ifdef COMPILE_MMX
00640 #undef RENAME
00641 #define HAVE_MMX
00642 #undef HAVE_MMX2
00643 #undef HAVE_3DNOW
00644 #define RENAME(a) a ## _MMX
00645 #include "postprocess_template.c"
00646 #endif
00647
00648
00649 #ifdef COMPILE_MMX2
00650 #undef RENAME
00651 #define HAVE_MMX
00652 #define HAVE_MMX2
00653 #undef HAVE_3DNOW
00654 #define RENAME(a) a ## _MMX2
00655 #include "postprocess_template.c"
00656 #endif
00657
00658
00659 #ifdef COMPILE_3DNOW
00660 #undef RENAME
00661 #define HAVE_MMX
00662 #undef HAVE_MMX2
00663 #define HAVE_3DNOW
00664 #define RENAME(a) a ## _3DNow
00665 #include "postprocess_template.c"
00666 #endif
00667
00668
00669
00670 static inline void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
00671 QP_STORE_T QPs[], int QPStride, int isColor, pp_mode_t *vm, pp_context_t *vc)
00672 {
00673 PPContext *c= (PPContext *)vc;
00674 PPMode *ppMode= (PPMode *)vm;
00675 c->ppMode= *ppMode;
00676
00677
00678
00679
00680 #ifdef RUNTIME_CPUDETECT
00681 #if defined(ARCH_X86) || defined(ARCH_X86_64)
00682
00683 if(c->cpuCaps & PP_CPU_CAPS_MMX2)
00684 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00685 else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
00686 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00687 else if(c->cpuCaps & PP_CPU_CAPS_MMX)
00688 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00689 else
00690 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00691 #else
00692 #ifdef ARCH_POWERPC
00693 #ifdef HAVE_ALTIVEC
00694 if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC)
00695 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00696 else
00697 #endif
00698 #endif
00699 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00700 #endif
00701 #else //RUNTIME_CPUDETECT
00702 #ifdef HAVE_MMX2
00703 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00704 #elif defined (HAVE_3DNOW)
00705 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00706 #elif defined (HAVE_MMX)
00707 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00708 #elif defined (HAVE_ALTIVEC)
00709 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00710 #else
00711 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00712 #endif
00713 #endif
00714 }
00715
00716
00717
00718
00719
00720
00721 char *pp_help=
00722 "Available postprocessing filters:\n"
00723 "Filters Options\n"
00724 "short long name short long option Description\n"
00725 "* * a autoq CPU power dependent enabler\n"
00726 " c chrom chrominance filtering enabled\n"
00727 " y nochrom chrominance filtering disabled\n"
00728 " n noluma luma filtering disabled\n"
00729 "hb hdeblock (2 threshold) horizontal deblocking filter\n"
00730 " 1. difference factor: default=32, higher -> more deblocking\n"
00731 " 2. flatness threshold: default=39, lower -> more deblocking\n"
00732 " the h & v deblocking filters share these\n"
00733 " so you can't set different thresholds for h / v\n"
00734 "vb vdeblock (2 threshold) vertical deblocking filter\n"
00735 "ha hadeblock (2 threshold) horizontal deblocking filter\n"
00736 "va vadeblock (2 threshold) vertical deblocking filter\n"
00737 "h1 x1hdeblock experimental h deblock filter 1\n"
00738 "v1 x1vdeblock experimental v deblock filter 1\n"
00739 "dr dering deringing filter\n"
00740 "al autolevels automatic brightness / contrast\n"
00741 " f fullyrange stretch luminance to (0..255)\n"
00742 "lb linblenddeint linear blend deinterlacer\n"
00743 "li linipoldeint linear interpolating deinterlace\n"
00744 "ci cubicipoldeint cubic interpolating deinterlacer\n"
00745 "md mediandeint median deinterlacer\n"
00746 "fd ffmpegdeint ffmpeg deinterlacer\n"
00747 "l5 lowpass5 FIR lowpass deinterlacer\n"
00748 "de default hb:a,vb:a,dr:a\n"
00749 "fa fast h1:a,v1:a,dr:a\n"
00750 "ac ha:a:128:7,va:a,dr:a\n"
00751 "tn tmpnoise (3 threshold) temporal noise reducer\n"
00752 " 1. <= 2. <= 3. larger -> stronger filtering\n"
00753 "fq forceQuant <quantizer> force quantizer\n"
00754 "Usage:\n"
00755 "<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
00756 "long form example:\n"
00757 "vdeblock:autoq/hdeblock:autoq/linblenddeint default,-vdeblock\n"
00758 "short form example:\n"
00759 "vb:a/hb:a/lb de,-vb\n"
00760 "more examples:\n"
00761 "tn:64:128:256\n"
00762 ;
00763
00764 pp_mode_t *pp_get_mode_by_name_and_quality(char *name, int quality)
00765 {
00766 char temp[GET_MODE_BUFFER_SIZE];
00767 char *p= temp;
00768 char *filterDelimiters= ",/";
00769 char *optionDelimiters= ":";
00770 struct PPMode *ppMode;
00771 char *filterToken;
00772
00773 ppMode= memalign(8, sizeof(PPMode));
00774
00775 ppMode->lumMode= 0;
00776 ppMode->chromMode= 0;
00777 ppMode->maxTmpNoise[0]= 700;
00778 ppMode->maxTmpNoise[1]= 1500;
00779 ppMode->maxTmpNoise[2]= 3000;
00780 ppMode->maxAllowedY= 234;
00781 ppMode->minAllowedY= 16;
00782 ppMode->baseDcDiff= 256/8;
00783 ppMode->flatnessThreshold= 56-16-1;
00784 ppMode->maxClippedThreshold= 0.01;
00785 ppMode->error=0;
00786
00787 strncpy(temp, name, GET_MODE_BUFFER_SIZE);
00788
00789 if(verbose>1) printf("pp: %s\n", name);
00790
00791 for(;;){
00792 char *filterName;
00793 int q= 1000000;
00794 int chrom=-1;
00795 int luma=-1;
00796 char *option;
00797 char *options[OPTIONS_ARRAY_SIZE];
00798 int i;
00799 int filterNameOk=0;
00800 int numOfUnknownOptions=0;
00801 int enable=1;
00802
00803 filterToken= strtok(p, filterDelimiters);
00804 if(filterToken == NULL) break;
00805 p+= strlen(filterToken) + 1;
00806 filterName= strtok(filterToken, optionDelimiters);
00807 if(verbose>1) printf("pp: %s::%s\n", filterToken, filterName);
00808
00809 if(*filterName == '-')
00810 {
00811 enable=0;
00812 filterName++;
00813 }
00814
00815 for(;;){
00816 option= strtok(NULL, optionDelimiters);
00817 if(option == NULL) break;
00818
00819 if(verbose>1) printf("pp: option: %s\n", option);
00820 if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
00821 else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
00822 else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
00823 else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
00824 else
00825 {
00826 options[numOfUnknownOptions] = option;
00827 numOfUnknownOptions++;
00828 }
00829 if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
00830 }
00831 options[numOfUnknownOptions] = NULL;
00832
00833
00834 for(i=0; replaceTable[2*i]!=NULL; i++)
00835 {
00836 if(!strcmp(replaceTable[2*i], filterName))
00837 {
00838 int newlen= strlen(replaceTable[2*i + 1]);
00839 int plen;
00840 int spaceLeft;
00841
00842 if(p==NULL) p= temp, *p=0;
00843 else p--, *p=',';
00844
00845 plen= strlen(p);
00846 spaceLeft= p - temp + plen;
00847 if(spaceLeft + newlen >= GET_MODE_BUFFER_SIZE)
00848 {
00849 ppMode->error++;
00850 break;
00851 }
00852 memmove(p + newlen, p, plen+1);
00853 memcpy(p, replaceTable[2*i + 1], newlen);
00854 filterNameOk=1;
00855 }
00856 }
00857
00858 for(i=0; filters[i].shortName!=NULL; i++)
00859 {
00860
00861 if( !strcmp(filters[i].longName, filterName)
00862 || !strcmp(filters[i].shortName, filterName))
00863 {
00864 ppMode->lumMode &= ~filters[i].mask;
00865 ppMode->chromMode &= ~filters[i].mask;
00866
00867 filterNameOk=1;
00868 if(!enable) break;
00869
00870 if(q >= filters[i].minLumQuality && luma)
00871 ppMode->lumMode|= filters[i].mask;
00872 if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
00873 if(q >= filters[i].minChromQuality)
00874 ppMode->chromMode|= filters[i].mask;
00875
00876 if(filters[i].mask == LEVEL_FIX)
00877 {
00878 int o;
00879 ppMode->minAllowedY= 16;
00880 ppMode->maxAllowedY= 234;
00881 for(o=0; options[o]!=NULL; o++)
00882 {
00883 if( !strcmp(options[o],"fullyrange")
00884 ||!strcmp(options[o],"f"))
00885 {
00886 ppMode->minAllowedY= 0;
00887 ppMode->maxAllowedY= 255;
00888 numOfUnknownOptions--;
00889 }
00890 }
00891 }
00892 else if(filters[i].mask == TEMP_NOISE_FILTER)
00893 {
00894 int o;
00895 int numOfNoises=0;
00896
00897 for(o=0; options[o]!=NULL; o++)
00898 {
00899 char *tail;
00900 ppMode->maxTmpNoise[numOfNoises]=
00901 strtol(options[o], &tail, 0);
00902 if(tail!=options[o])
00903 {
00904 numOfNoises++;
00905 numOfUnknownOptions--;
00906 if(numOfNoises >= 3) break;
00907 }
00908 }
00909 }
00910 else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK
00911 || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK)
00912 {
00913 int o;
00914
00915 for(o=0; options[o]!=NULL && o<2; o++)
00916 {
00917 char *tail;
00918 int val= strtol(options[o], &tail, 0);
00919 if(tail==options[o]) break;
00920
00921 numOfUnknownOptions--;
00922 if(o==0) ppMode->baseDcDiff= val;
00923 else ppMode->flatnessThreshold= val;
00924 }
00925 }
00926 else if(filters[i].mask == FORCE_QUANT)
00927 {
00928 int o;
00929 ppMode->forcedQuant= 15;
00930
00931 for(o=0; options[o]!=NULL && o<1; o++)
00932 {
00933 char *tail;
00934 int val= strtol(options[o], &tail, 0);
00935 if(tail==options[o]) break;
00936
00937 numOfUnknownOptions--;
00938 ppMode->forcedQuant= val;
00939 }
00940 }
00941 }
00942 }
00943 if(!filterNameOk) ppMode->error++;
00944 ppMode->error += numOfUnknownOptions;
00945 }
00946
00947 if(verbose>1) printf("pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
00948 if(ppMode->error)
00949 {
00950 fprintf(stderr, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
00951 free(ppMode);
00952 return NULL;
00953 }
00954 return ppMode;
00955 }
00956
00957 void pp_free_mode(pp_mode_t *mode){
00958 if(mode) free(mode);
00959 }
00960
00961 static void reallocAlign(void **p, int alignment, int size){
00962 if(*p) free(*p);
00963 *p= memalign(alignment, size);
00964 memset(*p, 0, size);
00965 }
00966
00967 static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
00968 int mbWidth = (width+15)>>4;
00969 int mbHeight= (height+15)>>4;
00970 int i;
00971
00972 c->stride= stride;
00973 c->qpStride= qpStride;
00974
00975 reallocAlign((void **)&c->tempDst, 8, stride*24);
00976 reallocAlign((void **)&c->tempSrc, 8, stride*24);
00977 reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
00978 reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
00979 for(i=0; i<256; i++)
00980 c->yHistogram[i]= width*height/64*15/256;
00981
00982 for(i=0; i<3; i++)
00983 {
00984
00985 reallocAlign((void **)&c->tempBlured[i], 8, stride*mbHeight*16 + 17*1024);
00986 reallocAlign((void **)&c->tempBluredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);
00987 }
00988
00989 reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
00990 reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
00991 reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
00992 reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
00993 }
00994
00995 static void global_init(void){
00996 int i;
00997 memset(clip_table, 0, 256);
00998 for(i=256; i<512; i++)
00999 clip_table[i]= i;
01000 memset(clip_table+512, 0, 256);
01001 }
01002
01003 pp_context_t *pp_get_context(int width, int height, int cpuCaps){
01004 PPContext *c= memalign(32, sizeof(PPContext));
01005 int stride= (width+15)&(~15);
01006 int qpStride= (width+15)/16 + 2;
01007
01008 global_init();
01009
01010 memset(c, 0, sizeof(PPContext));
01011 c->cpuCaps= cpuCaps;
01012 if(cpuCaps&PP_FORMAT){
01013 c->hChromaSubSample= cpuCaps&0x3;
01014 c->vChromaSubSample= (cpuCaps>>4)&0x3;
01015 }else{
01016 c->hChromaSubSample= 1;
01017 c->vChromaSubSample= 1;
01018 }
01019
01020 reallocBuffers(c, width, height, stride, qpStride);
01021
01022 c->frameNum=-1;
01023
01024 return c;
01025 }
01026
01027 void pp_free_context(void *vc){
01028 PPContext *c = (PPContext*)vc;
01029 int i;
01030
01031 for(i=0; i<3; i++) free(c->tempBlured[i]);
01032 for(i=0; i<3; i++) free(c->tempBluredPast[i]);
01033
01034 free(c->tempBlocks);
01035 free(c->yHistogram);
01036 free(c->tempDst);
01037 free(c->tempSrc);
01038 free(c->deintTemp);
01039 free(c->stdQPTable);
01040 free(c->nonBQPTable);
01041 free(c->forcedQPTable);
01042
01043 memset(c, 0, sizeof(PPContext));
01044
01045 free(c);
01046 }
01047
01048 void pp_postprocess(uint8_t * src[3], int srcStride[3],
01049 uint8_t * dst[3], int dstStride[3],
01050 int width, int height,
01051 QP_STORE_T *QP_store, int QPStride,
01052 pp_mode_t *vm, void *vc, int pict_type)
01053 {
01054 int mbWidth = (width+15)>>4;
01055 int mbHeight= (height+15)>>4;
01056 PPMode *mode = (PPMode*)vm;
01057 PPContext *c = (PPContext*)vc;
01058 int minStride= MAX(ABS(srcStride[0]), ABS(dstStride[0]));
01059 int absQPStride = ABS(QPStride);
01060
01061
01062 if(c->stride < minStride || c->qpStride < absQPStride)
01063 reallocBuffers(c, width, height,
01064 MAX(minStride, c->stride),
01065 MAX(c->qpStride, absQPStride));
01066
01067 if(QP_store==NULL || (mode->lumMode & FORCE_QUANT))
01068 {
01069 int i;
01070 QP_store= c->forcedQPTable;
01071 absQPStride = QPStride = 0;
01072 if(mode->lumMode & FORCE_QUANT)
01073 for(i=0; i<mbWidth; i++) QP_store[i]= mode->forcedQuant;
01074 else
01075 for(i=0; i<mbWidth; i++) QP_store[i]= 1;
01076 }
01077
01078
01079 if(pict_type & PP_PICT_TYPE_QP2){
01080 int i;
01081 const int count= mbHeight * absQPStride;
01082 for(i=0; i<(count>>2); i++){
01083 ((uint32_t*)c->stdQPTable)[i] = (((uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
01084 }
01085 for(i<<=2; i<count; i++){
01086 c->stdQPTable[i] = QP_store[i]>>1;
01087 }
01088 QP_store= c->stdQPTable;
01089 QPStride= absQPStride;
01090 }
01091
01092 if(0){
01093 int x,y;
01094 for(y=0; y<mbHeight; y++){
01095 for(x=0; x<mbWidth; x++){
01096 printf("%2d ", QP_store[x + y*QPStride]);
01097 }
01098 printf("\n");
01099 }
01100 printf("\n");
01101 }
01102
01103 if((pict_type&7)!=3)
01104 {
01105 if (QPStride >= 0) {
01106 int i;
01107 const int count= mbHeight * QPStride;
01108 for(i=0; i<(count>>2); i++){
01109 ((uint32_t*)c->nonBQPTable)[i] = ((uint32_t*)QP_store)[i] & 0x3F3F3F3F;
01110 }
01111 for(i<<=2; i<count; i++){
01112 c->nonBQPTable[i] = QP_store[i] & 0x3F;
01113 }
01114 } else {
01115 int i,j;
01116 for(i=0; i<mbHeight; i++) {
01117 for(j=0; j<absQPStride; j++) {
01118 c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
01119 }
01120 }
01121 }
01122 }
01123
01124 if(verbose>2)
01125 {
01126 printf("using npp filters 0x%X/0x%X\n", mode->lumMode, mode->chromMode);
01127 }
01128
01129 postProcess(src[0], srcStride[0], dst[0], dstStride[0],
01130 width, height, QP_store, QPStride, 0, mode, c);
01131
01132 width = (width )>>c->hChromaSubSample;
01133 height = (height)>>c->vChromaSubSample;
01134
01135 if(mode->chromMode)
01136 {
01137 postProcess(src[1], srcStride[1], dst[1], dstStride[1],
01138 width, height, QP_store, QPStride, 1, mode, c);
01139 postProcess(src[2], srcStride[2], dst[2], dstStride[2],
01140 width, height, QP_store, QPStride, 2, mode, c);
01141 }
01142 else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2])
01143 {
01144 linecpy(dst[1], src[1], height, srcStride[1]);
01145 linecpy(dst[2], src[2], height, srcStride[2]);
01146 }
01147 else
01148 {
01149 int y;
01150 for(y=0; y<height; y++)
01151 {
01152 memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
01153 memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
01154 }
01155 }
01156 }
01157