00001
00007 #include <stdlib.h>
00008 #include <stdio.h>
00009 #include <string.h>
00010 #include <sys/time.h>
00011 #include <unistd.h>
00012
00013 #include "dsputil.h"
00014
00015 #include "i386/mmx.h"
00016 #include "simple_idct.h"
00017 #include "faandct.h"
00018
00019 #ifndef MAX
00020 #define MAX(a, b) (((a) > (b)) ? (a) : (b))
00021 #endif
00022
00023
00024 extern void fdct(DCTELEM *block);
00025 extern void idct(DCTELEM *block);
00026 extern void init_fdct();
00027
00028 extern void j_rev_dct(DCTELEM *data);
00029 extern void ff_mmx_idct(DCTELEM *data);
00030 extern void ff_mmxext_idct(DCTELEM *data);
00031
00032 extern void odivx_idct_c (short *block);
00033
00034 #define AANSCALE_BITS 12
00035 static const unsigned short aanscales[64] = {
00036
00037 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
00038 22725, 31521, 29692, 26722, 22725, 17855, 12299, 6270,
00039 21407, 29692, 27969, 25172, 21407, 16819, 11585, 5906,
00040 19266, 26722, 25172, 22654, 19266, 15137, 10426, 5315,
00041 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
00042 12873, 17855, 16819, 15137, 12873, 10114, 6967, 3552,
00043 8867, 12299, 11585, 10426, 8867, 6967, 4799, 2446,
00044 4520, 6270, 5906, 5315, 4520, 3552, 2446, 1247
00045 };
00046
00047 uint8_t cropTbl[256 + 2 * MAX_NEG_CROP];
00048
00049 int64_t gettime(void)
00050 {
00051 struct timeval tv;
00052 gettimeofday(&tv,NULL);
00053 return (int64_t)tv.tv_sec * 1000000 + tv.tv_usec;
00054 }
00055
00056 #define NB_ITS 20000
00057 #define NB_ITS_SPEED 50000
00058
00059 static short idct_mmx_perm[64];
00060
00061 static short idct_simple_mmx_perm[64]={
00062 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
00063 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
00064 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
00065 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
00066 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
00067 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
00068 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
00069 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
00070 };
00071
00072 void idct_mmx_init(void)
00073 {
00074 int i;
00075
00076
00077 for (i = 0; i < 64; i++) {
00078 idct_mmx_perm[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
00079
00080 }
00081 }
00082
00083 static DCTELEM block[64] __attribute__ ((aligned (8)));
00084 static DCTELEM block1[64] __attribute__ ((aligned (8)));
00085 static DCTELEM block_org[64] __attribute__ ((aligned (8)));
00086
00087 void dct_error(const char *name, int is_idct,
00088 void (*fdct_func)(DCTELEM *block),
00089 void (*fdct_ref)(DCTELEM *block), int test)
00090 {
00091 int it, i, scale;
00092 int err_inf, v;
00093 int64_t err2, ti, ti1, it1;
00094 int64_t sysErr[64], sysErrMax=0;
00095 int maxout=0;
00096 int blockSumErrMax=0, blockSumErr;
00097
00098 srandom(0);
00099
00100 err_inf = 0;
00101 err2 = 0;
00102 for(i=0; i<64; i++) sysErr[i]=0;
00103 for(it=0;it<NB_ITS;it++) {
00104 for(i=0;i<64;i++)
00105 block1[i] = 0;
00106 switch(test){
00107 case 0:
00108 for(i=0;i<64;i++)
00109 block1[i] = (random() % 512) -256;
00110 if (is_idct){
00111 fdct(block1);
00112
00113 for(i=0;i<64;i++)
00114 block1[i]>>=3;
00115 }
00116 break;
00117 case 1:{
00118 int num= (random()%10)+1;
00119 for(i=0;i<num;i++)
00120 block1[random()%64] = (random() % 512) -256;
00121 }break;
00122 case 2:
00123 block1[0]= (random()%4096)-2048;
00124 block1[63]= (block1[0]&1)^1;
00125 break;
00126 }
00127
00128 #if 0 // simulate mismatch control
00129 { int sum=0;
00130 for(i=0;i<64;i++)
00131 sum+=block1[i];
00132
00133 if((sum&1)==0) block1[63]^=1;
00134 }
00135 #endif
00136
00137 for(i=0; i<64; i++)
00138 block_org[i]= block1[i];
00139
00140 if (fdct_func == ff_mmx_idct ||
00141 fdct_func == j_rev_dct || fdct_func == ff_mmxext_idct) {
00142 for(i=0;i<64;i++)
00143 block[idct_mmx_perm[i]] = block1[i];
00144 } else if(fdct_func == ff_simple_idct_mmx ) {
00145 for(i=0;i<64;i++)
00146 block[idct_simple_mmx_perm[i]] = block1[i];
00147
00148 } else {
00149 for(i=0; i<64; i++)
00150 block[i]= block1[i];
00151 }
00152 #if 0 // simulate mismatch control for tested IDCT but not the ref
00153 { int sum=0;
00154 for(i=0;i<64;i++)
00155 sum+=block[i];
00156
00157 if((sum&1)==0) block[63]^=1;
00158 }
00159 #endif
00160
00161 fdct_func(block);
00162 emms();
00163
00164 if (fdct_func == fdct_ifast
00165 #ifndef FAAN_POSTSCALE
00166 || fdct_func == ff_faandct
00167 #endif
00168 ) {
00169 for(i=0; i<64; i++) {
00170 scale = 8*(1 << (AANSCALE_BITS + 11)) / aanscales[i];
00171 block[i] = (block[i] * scale ) >> AANSCALE_BITS;
00172 }
00173 }
00174
00175 fdct_ref(block1);
00176
00177 blockSumErr=0;
00178 for(i=0;i<64;i++) {
00179 v = abs(block[i] - block1[i]);
00180 if (v > err_inf)
00181 err_inf = v;
00182 err2 += v * v;
00183 sysErr[i] += block[i] - block1[i];
00184 blockSumErr += v;
00185 if( abs(block[i])>maxout) maxout=abs(block[i]);
00186 }
00187 if(blockSumErrMax < blockSumErr) blockSumErrMax= blockSumErr;
00188 #if 0 // print different matrix pairs
00189 if(blockSumErr){
00190 printf("\n");
00191 for(i=0; i<64; i++){
00192 if((i&7)==0) printf("\n");
00193 printf("%4d ", block_org[i]);
00194 }
00195 for(i=0; i<64; i++){
00196 if((i&7)==0) printf("\n");
00197 printf("%4d ", block[i] - block1[i]);
00198 }
00199 }
00200 #endif
00201 }
00202 for(i=0; i<64; i++) sysErrMax= MAX(sysErrMax, ABS(sysErr[i]));
00203
00204 #if 1 // dump systematic errors
00205 for(i=0; i<64; i++){
00206 if(i%8==0) printf("\n");
00207 printf("%5d ", (int)sysErr[i]);
00208 }
00209 printf("\n");
00210 #endif
00211
00212 printf("%s %s: err_inf=%d err2=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n",
00213 is_idct ? "IDCT" : "DCT",
00214 name, err_inf, (double)err2 / NB_ITS / 64.0, (double)sysErrMax / NB_ITS, maxout, blockSumErrMax);
00215 #if 1 //Speed test
00216
00217 for(i=0;i<64;i++)
00218 block1[i] = 0;
00219 switch(test){
00220 case 0:
00221 for(i=0;i<64;i++)
00222 block1[i] = (random() % 512) -256;
00223 if (is_idct){
00224 fdct(block1);
00225
00226 for(i=0;i<64;i++)
00227 block1[i]>>=3;
00228 }
00229 break;
00230 case 1:{
00231 case 2:
00232 block1[0] = (random() % 512) -256;
00233 block1[1] = (random() % 512) -256;
00234 block1[2] = (random() % 512) -256;
00235 block1[3] = (random() % 512) -256;
00236 }break;
00237 }
00238
00239 if (fdct_func == ff_mmx_idct ||
00240 fdct_func == j_rev_dct || fdct_func == ff_mmxext_idct) {
00241 for(i=0;i<64;i++)
00242 block[idct_mmx_perm[i]] = block1[i];
00243 } else if(fdct_func == ff_simple_idct_mmx ) {
00244 for(i=0;i<64;i++)
00245 block[idct_simple_mmx_perm[i]] = block1[i];
00246 } else {
00247 for(i=0; i<64; i++)
00248 block[i]= block1[i];
00249 }
00250
00251 ti = gettime();
00252 it1 = 0;
00253 do {
00254 for(it=0;it<NB_ITS_SPEED;it++) {
00255 for(i=0; i<64; i++)
00256 block[i]= block1[i];
00257
00258
00259 fdct_func(block);
00260 }
00261 it1 += NB_ITS_SPEED;
00262 ti1 = gettime() - ti;
00263 } while (ti1 < 1000000);
00264 emms();
00265
00266 printf("%s %s: %0.1f kdct/s\n",
00267 is_idct ? "IDCT" : "DCT",
00268 name, (double)it1 * 1000.0 / (double)ti1);
00269 #endif
00270 }
00271
00272 static uint8_t img_dest[64] __attribute__ ((aligned (8)));
00273 static uint8_t img_dest1[64] __attribute__ ((aligned (8)));
00274
00275 void idct248_ref(uint8_t *dest, int linesize, int16_t *block)
00276 {
00277 static int init;
00278 static double c8[8][8];
00279 static double c4[4][4];
00280 double block1[64], block2[64], block3[64];
00281 double s, sum, v;
00282 int i, j, k;
00283
00284 if (!init) {
00285 init = 1;
00286
00287 for(i=0;i<8;i++) {
00288 sum = 0;
00289 for(j=0;j<8;j++) {
00290 s = (i==0) ? sqrt(1.0/8.0) : sqrt(1.0/4.0);
00291 c8[i][j] = s * cos(M_PI * i * (j + 0.5) / 8.0);
00292 sum += c8[i][j] * c8[i][j];
00293 }
00294 }
00295
00296 for(i=0;i<4;i++) {
00297 sum = 0;
00298 for(j=0;j<4;j++) {
00299 s = (i==0) ? sqrt(1.0/4.0) : sqrt(1.0/2.0);
00300 c4[i][j] = s * cos(M_PI * i * (j + 0.5) / 4.0);
00301 sum += c4[i][j] * c4[i][j];
00302 }
00303 }
00304 }
00305
00306
00307 s = 0.5 * sqrt(2.0);
00308 for(i=0;i<4;i++) {
00309 for(j=0;j<8;j++) {
00310 block1[8*(2*i)+j] = (block[8*(2*i)+j] + block[8*(2*i+1)+j]) * s;
00311 block1[8*(2*i+1)+j] = (block[8*(2*i)+j] - block[8*(2*i+1)+j]) * s;
00312 }
00313 }
00314
00315
00316 for(i=0;i<8;i++) {
00317 for(j=0;j<8;j++) {
00318 sum = 0;
00319 for(k=0;k<8;k++)
00320 sum += c8[k][j] * block1[8*i+k];
00321 block2[8*i+j] = sum;
00322 }
00323 }
00324
00325
00326 for(i=0;i<8;i++) {
00327 for(j=0;j<4;j++) {
00328
00329 sum = 0;
00330 for(k=0;k<4;k++)
00331 sum += c4[k][j] * block2[8*(2*k)+i];
00332 block3[8*(2*j)+i] = sum;
00333
00334
00335 sum = 0;
00336 for(k=0;k<4;k++)
00337 sum += c4[k][j] * block2[8*(2*k+1)+i];
00338 block3[8*(2*j+1)+i] = sum;
00339 }
00340 }
00341
00342
00343 for(i=0;i<8;i++) {
00344 for(j=0;j<8;j++) {
00345 v = block3[8*i+j];
00346 if (v < 0)
00347 v = 0;
00348 else if (v > 255)
00349 v = 255;
00350 dest[i * linesize + j] = (int)rint(v);
00351 }
00352 }
00353 }
00354
00355 void idct248_error(const char *name,
00356 void (*idct248_put)(uint8_t *dest, int line_size, int16_t *block))
00357 {
00358 int it, i, it1, ti, ti1, err_max, v;
00359
00360 srandom(0);
00361
00362
00363
00364 err_max = 0;
00365 for(it=0;it<NB_ITS;it++) {
00366
00367
00368 for(i=0;i<64;i++)
00369 block1[i] = (random() % 256) - 128;
00370 block1[0] += 1024;
00371
00372 for(i=0; i<64; i++)
00373 block[i]= block1[i];
00374 idct248_ref(img_dest1, 8, block);
00375
00376 for(i=0; i<64; i++)
00377 block[i]= block1[i];
00378 idct248_put(img_dest, 8, block);
00379
00380 for(i=0;i<64;i++) {
00381 v = abs((int)img_dest[i] - (int)img_dest1[i]);
00382 if (v == 255)
00383 printf("%d %d\n", img_dest[i], img_dest1[i]);
00384 if (v > err_max)
00385 err_max = v;
00386 }
00387 #if 0
00388 printf("ref=\n");
00389 for(i=0;i<8;i++) {
00390 int j;
00391 for(j=0;j<8;j++) {
00392 printf(" %3d", img_dest1[i*8+j]);
00393 }
00394 printf("\n");
00395 }
00396
00397 printf("out=\n");
00398 for(i=0;i<8;i++) {
00399 int j;
00400 for(j=0;j<8;j++) {
00401 printf(" %3d", img_dest[i*8+j]);
00402 }
00403 printf("\n");
00404 }
00405 #endif
00406 }
00407 printf("%s %s: err_inf=%d\n",
00408 1 ? "IDCT248" : "DCT248",
00409 name, err_max);
00410
00411 ti = gettime();
00412 it1 = 0;
00413 do {
00414 for(it=0;it<NB_ITS_SPEED;it++) {
00415 for(i=0; i<64; i++)
00416 block[i]= block1[i];
00417
00418
00419 idct248_put(img_dest, 8, block);
00420 }
00421 it1 += NB_ITS_SPEED;
00422 ti1 = gettime() - ti;
00423 } while (ti1 < 1000000);
00424 emms();
00425
00426 printf("%s %s: %0.1f kdct/s\n",
00427 1 ? "IDCT248" : "DCT248",
00428 name, (double)it1 * 1000.0 / (double)ti1);
00429 }
00430
00431 void help(void)
00432 {
00433 printf("dct-test [-i] [<test-number>]\n"
00434 "test-number 0 -> test with random matrixes\n"
00435 " 1 -> test with random sparse matrixes\n"
00436 " 2 -> do 3. test from mpeg4 std\n"
00437 "-i test IDCT implementations\n"
00438 "-4 test IDCT248 implementations\n");
00439 exit(1);
00440 }
00441
00442 int main(int argc, char **argv)
00443 {
00444 int test_idct = 0, test_248_dct = 0;
00445 int c,i;
00446 int test=1;
00447
00448 init_fdct();
00449 idct_mmx_init();
00450
00451 for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i;
00452 for(i=0;i<MAX_NEG_CROP;i++) {
00453 cropTbl[i] = 0;
00454 cropTbl[i + MAX_NEG_CROP + 256] = 255;
00455 }
00456
00457 for(;;) {
00458 c = getopt(argc, argv, "ih4");
00459 if (c == -1)
00460 break;
00461 switch(c) {
00462 case 'i':
00463 test_idct = 1;
00464 break;
00465 case '4':
00466 test_248_dct = 1;
00467 break;
00468 default :
00469 case 'h':
00470 help();
00471 break;
00472 }
00473 }
00474
00475 if(optind <argc) test= atoi(argv[optind]);
00476
00477 printf("ffmpeg DCT/IDCT test\n");
00478
00479 if (test_248_dct) {
00480 idct248_error("SIMPLE-C", simple_idct248_put);
00481 } else {
00482 if (!test_idct) {
00483 dct_error("REF-DBL", 0, fdct, fdct, test);
00484 dct_error("IJG-AAN-INT", 0, fdct_ifast, fdct, test);
00485 dct_error("IJG-LLM-INT", 0, ff_jpeg_fdct_islow, fdct, test);
00486 dct_error("MMX", 0, ff_fdct_mmx, fdct, test);
00487 dct_error("MMX2", 0, ff_fdct_mmx2, fdct, test);
00488 dct_error("FAAN", 0, ff_faandct, fdct, test);
00489 } else {
00490 dct_error("REF-DBL", 1, idct, idct, test);
00491 dct_error("INT", 1, j_rev_dct, idct, test);
00492 dct_error("LIBMPEG2-MMX", 1, ff_mmx_idct, idct, test);
00493 dct_error("LIBMPEG2-MMXEXT", 1, ff_mmxext_idct, idct, test);
00494 dct_error("SIMPLE-C", 1, simple_idct, idct, test);
00495 dct_error("SIMPLE-MMX", 1, ff_simple_idct_mmx, idct, test);
00496
00497
00498
00499
00500
00501
00502
00503
00504
00505 }
00506 }
00507 return 0;
00508 }