00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00026
00027
00028
00029
00030 #include "avcodec.h"
00031 #include "dsputil.h"
00032 #include "simple_idct.h"
00033
00034 #if 0
00035 #define W1 2841
00036 #define W2 2676
00037 #define W3 2408
00038 #define W4 2048
00039 #define W5 1609
00040 #define W6 1108
00041 #define W7 565
00042 #define ROW_SHIFT 8
00043 #define COL_SHIFT 17
00044 #else
00045 #define W1 22725 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
00046 #define W2 21407 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
00047 #define W3 19266 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
00048 #define W4 16383 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
00049 #define W5 12873 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
00050 #define W6 8867 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
00051 #define W7 4520 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
00052 #define ROW_SHIFT 11
00053 #define COL_SHIFT 20 // 6
00054 #endif
00055
00056 #if defined(ARCH_POWERPC_405)
00057
00058
00059 #define MAC16(rt, ra, rb) \
00060 asm ("maclhw %0, %2, %3" : "=r" (rt) : "0" (rt), "r" (ra), "r" (rb));
00061
00062
00063 #define MUL16(rt, ra, rb) \
00064 asm ("mullhw %0, %1, %2" : "=r" (rt) : "r" (ra), "r" (rb));
00065
00066 #else
00067
00068
00069 #define MAC16(rt, ra, rb) rt += (ra) * (rb)
00070
00071
00072 #define MUL16(rt, ra, rb) rt = (ra) * (rb)
00073
00074 #endif
00075
00076 static inline void idctRowCondDC (DCTELEM * row)
00077 {
00078 int a0, a1, a2, a3, b0, b1, b2, b3;
00079 #ifdef FAST_64BIT
00080 uint64_t temp;
00081 #else
00082 uint32_t temp;
00083 #endif
00084
00085 #ifdef FAST_64BIT
00086 #ifdef WORDS_BIGENDIAN
00087 #define ROW0_MASK 0xffff000000000000LL
00088 #else
00089 #define ROW0_MASK 0xffffLL
00090 #endif
00091 if(sizeof(DCTELEM)==2){
00092 if ( ((((uint64_t *)row)[0] & ~ROW0_MASK) |
00093 ((uint64_t *)row)[1]) == 0) {
00094 temp = (row[0] << 3) & 0xffff;
00095 temp += temp << 16;
00096 temp += temp << 32;
00097 ((uint64_t *)row)[0] = temp;
00098 ((uint64_t *)row)[1] = temp;
00099 return;
00100 }
00101 }else{
00102 if (!(row[1]|row[2]|row[3]|row[4]|row[5]|row[6]|row[7])) {
00103 row[0]=row[1]=row[2]=row[3]=row[4]=row[5]=row[6]=row[7]= row[0] << 3;
00104 return;
00105 }
00106 }
00107 #else
00108 if(sizeof(DCTELEM)==2){
00109 if (!(((uint32_t*)row)[1] |
00110 ((uint32_t*)row)[2] |
00111 ((uint32_t*)row)[3] |
00112 row[1])) {
00113 temp = (row[0] << 3) & 0xffff;
00114 temp += temp << 16;
00115 ((uint32_t*)row)[0]=((uint32_t*)row)[1] =
00116 ((uint32_t*)row)[2]=((uint32_t*)row)[3] = temp;
00117 return;
00118 }
00119 }else{
00120 if (!(row[1]|row[2]|row[3]|row[4]|row[5]|row[6]|row[7])) {
00121 row[0]=row[1]=row[2]=row[3]=row[4]=row[5]=row[6]=row[7]= row[0] << 3;
00122 return;
00123 }
00124 }
00125 #endif
00126
00127 a0 = (W4 * row[0]) + (1 << (ROW_SHIFT - 1));
00128 a1 = a0;
00129 a2 = a0;
00130 a3 = a0;
00131
00132
00133 a0 += W2 * row[2];
00134 a1 += W6 * row[2];
00135 a2 -= W6 * row[2];
00136 a3 -= W2 * row[2];
00137
00138 MUL16(b0, W1, row[1]);
00139 MAC16(b0, W3, row[3]);
00140 MUL16(b1, W3, row[1]);
00141 MAC16(b1, -W7, row[3]);
00142 MUL16(b2, W5, row[1]);
00143 MAC16(b2, -W1, row[3]);
00144 MUL16(b3, W7, row[1]);
00145 MAC16(b3, -W5, row[3]);
00146
00147 #ifdef FAST_64BIT
00148 temp = ((uint64_t*)row)[1];
00149 #else
00150 temp = ((uint32_t*)row)[2] | ((uint32_t*)row)[3];
00151 #endif
00152 if (temp != 0) {
00153 a0 += W4*row[4] + W6*row[6];
00154 a1 += - W4*row[4] - W2*row[6];
00155 a2 += - W4*row[4] + W2*row[6];
00156 a3 += W4*row[4] - W6*row[6];
00157
00158 MAC16(b0, W5, row[5]);
00159 MAC16(b0, W7, row[7]);
00160
00161 MAC16(b1, -W1, row[5]);
00162 MAC16(b1, -W5, row[7]);
00163
00164 MAC16(b2, W7, row[5]);
00165 MAC16(b2, W3, row[7]);
00166
00167 MAC16(b3, W3, row[5]);
00168 MAC16(b3, -W1, row[7]);
00169 }
00170
00171 row[0] = (a0 + b0) >> ROW_SHIFT;
00172 row[7] = (a0 - b0) >> ROW_SHIFT;
00173 row[1] = (a1 + b1) >> ROW_SHIFT;
00174 row[6] = (a1 - b1) >> ROW_SHIFT;
00175 row[2] = (a2 + b2) >> ROW_SHIFT;
00176 row[5] = (a2 - b2) >> ROW_SHIFT;
00177 row[3] = (a3 + b3) >> ROW_SHIFT;
00178 row[4] = (a3 - b3) >> ROW_SHIFT;
00179 }
00180
00181 static inline void idctSparseColPut (uint8_t *dest, int line_size,
00182 DCTELEM * col)
00183 {
00184 int a0, a1, a2, a3, b0, b1, b2, b3;
00185 uint8_t *cm = cropTbl + MAX_NEG_CROP;
00186
00187
00188 a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4));
00189 a1 = a0;
00190 a2 = a0;
00191 a3 = a0;
00192
00193 a0 += + W2*col[8*2];
00194 a1 += + W6*col[8*2];
00195 a2 += - W6*col[8*2];
00196 a3 += - W2*col[8*2];
00197
00198 MUL16(b0, W1, col[8*1]);
00199 MUL16(b1, W3, col[8*1]);
00200 MUL16(b2, W5, col[8*1]);
00201 MUL16(b3, W7, col[8*1]);
00202
00203 MAC16(b0, + W3, col[8*3]);
00204 MAC16(b1, - W7, col[8*3]);
00205 MAC16(b2, - W1, col[8*3]);
00206 MAC16(b3, - W5, col[8*3]);
00207
00208 if(col[8*4]){
00209 a0 += + W4*col[8*4];
00210 a1 += - W4*col[8*4];
00211 a2 += - W4*col[8*4];
00212 a3 += + W4*col[8*4];
00213 }
00214
00215 if (col[8*5]) {
00216 MAC16(b0, + W5, col[8*5]);
00217 MAC16(b1, - W1, col[8*5]);
00218 MAC16(b2, + W7, col[8*5]);
00219 MAC16(b3, + W3, col[8*5]);
00220 }
00221
00222 if(col[8*6]){
00223 a0 += + W6*col[8*6];
00224 a1 += - W2*col[8*6];
00225 a2 += + W2*col[8*6];
00226 a3 += - W6*col[8*6];
00227 }
00228
00229 if (col[8*7]) {
00230 MAC16(b0, + W7, col[8*7]);
00231 MAC16(b1, - W5, col[8*7]);
00232 MAC16(b2, + W3, col[8*7]);
00233 MAC16(b3, - W1, col[8*7]);
00234 }
00235
00236 dest[0] = cm[(a0 + b0) >> COL_SHIFT];
00237 dest += line_size;
00238 dest[0] = cm[(a1 + b1) >> COL_SHIFT];
00239 dest += line_size;
00240 dest[0] = cm[(a2 + b2) >> COL_SHIFT];
00241 dest += line_size;
00242 dest[0] = cm[(a3 + b3) >> COL_SHIFT];
00243 dest += line_size;
00244 dest[0] = cm[(a3 - b3) >> COL_SHIFT];
00245 dest += line_size;
00246 dest[0] = cm[(a2 - b2) >> COL_SHIFT];
00247 dest += line_size;
00248 dest[0] = cm[(a1 - b1) >> COL_SHIFT];
00249 dest += line_size;
00250 dest[0] = cm[(a0 - b0) >> COL_SHIFT];
00251 }
00252
00253 static inline void idctSparseColAdd (uint8_t *dest, int line_size,
00254 DCTELEM * col)
00255 {
00256 int a0, a1, a2, a3, b0, b1, b2, b3;
00257 uint8_t *cm = cropTbl + MAX_NEG_CROP;
00258
00259
00260 a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4));
00261 a1 = a0;
00262 a2 = a0;
00263 a3 = a0;
00264
00265 a0 += + W2*col[8*2];
00266 a1 += + W6*col[8*2];
00267 a2 += - W6*col[8*2];
00268 a3 += - W2*col[8*2];
00269
00270 MUL16(b0, W1, col[8*1]);
00271 MUL16(b1, W3, col[8*1]);
00272 MUL16(b2, W5, col[8*1]);
00273 MUL16(b3, W7, col[8*1]);
00274
00275 MAC16(b0, + W3, col[8*3]);
00276 MAC16(b1, - W7, col[8*3]);
00277 MAC16(b2, - W1, col[8*3]);
00278 MAC16(b3, - W5, col[8*3]);
00279
00280 if(col[8*4]){
00281 a0 += + W4*col[8*4];
00282 a1 += - W4*col[8*4];
00283 a2 += - W4*col[8*4];
00284 a3 += + W4*col[8*4];
00285 }
00286
00287 if (col[8*5]) {
00288 MAC16(b0, + W5, col[8*5]);
00289 MAC16(b1, - W1, col[8*5]);
00290 MAC16(b2, + W7, col[8*5]);
00291 MAC16(b3, + W3, col[8*5]);
00292 }
00293
00294 if(col[8*6]){
00295 a0 += + W6*col[8*6];
00296 a1 += - W2*col[8*6];
00297 a2 += + W2*col[8*6];
00298 a3 += - W6*col[8*6];
00299 }
00300
00301 if (col[8*7]) {
00302 MAC16(b0, + W7, col[8*7]);
00303 MAC16(b1, - W5, col[8*7]);
00304 MAC16(b2, + W3, col[8*7]);
00305 MAC16(b3, - W1, col[8*7]);
00306 }
00307
00308 dest[0] = cm[dest[0] + ((a0 + b0) >> COL_SHIFT)];
00309 dest += line_size;
00310 dest[0] = cm[dest[0] + ((a1 + b1) >> COL_SHIFT)];
00311 dest += line_size;
00312 dest[0] = cm[dest[0] + ((a2 + b2) >> COL_SHIFT)];
00313 dest += line_size;
00314 dest[0] = cm[dest[0] + ((a3 + b3) >> COL_SHIFT)];
00315 dest += line_size;
00316 dest[0] = cm[dest[0] + ((a3 - b3) >> COL_SHIFT)];
00317 dest += line_size;
00318 dest[0] = cm[dest[0] + ((a2 - b2) >> COL_SHIFT)];
00319 dest += line_size;
00320 dest[0] = cm[dest[0] + ((a1 - b1) >> COL_SHIFT)];
00321 dest += line_size;
00322 dest[0] = cm[dest[0] + ((a0 - b0) >> COL_SHIFT)];
00323 }
00324
00325 static inline void idctSparseCol (DCTELEM * col)
00326 {
00327 int a0, a1, a2, a3, b0, b1, b2, b3;
00328
00329
00330 a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4));
00331 a1 = a0;
00332 a2 = a0;
00333 a3 = a0;
00334
00335 a0 += + W2*col[8*2];
00336 a1 += + W6*col[8*2];
00337 a2 += - W6*col[8*2];
00338 a3 += - W2*col[8*2];
00339
00340 MUL16(b0, W1, col[8*1]);
00341 MUL16(b1, W3, col[8*1]);
00342 MUL16(b2, W5, col[8*1]);
00343 MUL16(b3, W7, col[8*1]);
00344
00345 MAC16(b0, + W3, col[8*3]);
00346 MAC16(b1, - W7, col[8*3]);
00347 MAC16(b2, - W1, col[8*3]);
00348 MAC16(b3, - W5, col[8*3]);
00349
00350 if(col[8*4]){
00351 a0 += + W4*col[8*4];
00352 a1 += - W4*col[8*4];
00353 a2 += - W4*col[8*4];
00354 a3 += + W4*col[8*4];
00355 }
00356
00357 if (col[8*5]) {
00358 MAC16(b0, + W5, col[8*5]);
00359 MAC16(b1, - W1, col[8*5]);
00360 MAC16(b2, + W7, col[8*5]);
00361 MAC16(b3, + W3, col[8*5]);
00362 }
00363
00364 if(col[8*6]){
00365 a0 += + W6*col[8*6];
00366 a1 += - W2*col[8*6];
00367 a2 += + W2*col[8*6];
00368 a3 += - W6*col[8*6];
00369 }
00370
00371 if (col[8*7]) {
00372 MAC16(b0, + W7, col[8*7]);
00373 MAC16(b1, - W5, col[8*7]);
00374 MAC16(b2, + W3, col[8*7]);
00375 MAC16(b3, - W1, col[8*7]);
00376 }
00377
00378 col[0 ] = ((a0 + b0) >> COL_SHIFT);
00379 col[8 ] = ((a1 + b1) >> COL_SHIFT);
00380 col[16] = ((a2 + b2) >> COL_SHIFT);
00381 col[24] = ((a3 + b3) >> COL_SHIFT);
00382 col[32] = ((a3 - b3) >> COL_SHIFT);
00383 col[40] = ((a2 - b2) >> COL_SHIFT);
00384 col[48] = ((a1 - b1) >> COL_SHIFT);
00385 col[56] = ((a0 - b0) >> COL_SHIFT);
00386 }
00387
00388 void simple_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
00389 {
00390 int i;
00391 for(i=0; i<8; i++)
00392 idctRowCondDC(block + i*8);
00393
00394 for(i=0; i<8; i++)
00395 idctSparseColPut(dest + i, line_size, block + i);
00396 }
00397
00398 void simple_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
00399 {
00400 int i;
00401 for(i=0; i<8; i++)
00402 idctRowCondDC(block + i*8);
00403
00404 for(i=0; i<8; i++)
00405 idctSparseColAdd(dest + i, line_size, block + i);
00406 }
00407
00408 void simple_idct(DCTELEM *block)
00409 {
00410 int i;
00411 for(i=0; i<8; i++)
00412 idctRowCondDC(block + i*8);
00413
00414 for(i=0; i<8; i++)
00415 idctSparseCol(block + i);
00416 }
00417
00418
00419
00420 #define CN_SHIFT 12
00421 #define C_FIX(x) ((int)((x) * (1 << CN_SHIFT) + 0.5))
00422 #define C1 C_FIX(0.6532814824)
00423 #define C2 C_FIX(0.2705980501)
00424
00425
00426
00427 #define C_SHIFT (4+1+12)
00428
00429 static inline void idct4col(uint8_t *dest, int line_size, const DCTELEM *col)
00430 {
00431 int c0, c1, c2, c3, a0, a1, a2, a3;
00432 const uint8_t *cm = cropTbl + MAX_NEG_CROP;
00433
00434 a0 = col[8*0];
00435 a1 = col[8*2];
00436 a2 = col[8*4];
00437 a3 = col[8*6];
00438 c0 = ((a0 + a2) << (CN_SHIFT - 1)) + (1 << (C_SHIFT - 1));
00439 c2 = ((a0 - a2) << (CN_SHIFT - 1)) + (1 << (C_SHIFT - 1));
00440 c1 = a1 * C1 + a3 * C2;
00441 c3 = a1 * C2 - a3 * C1;
00442 dest[0] = cm[(c0 + c1) >> C_SHIFT];
00443 dest += line_size;
00444 dest[0] = cm[(c2 + c3) >> C_SHIFT];
00445 dest += line_size;
00446 dest[0] = cm[(c2 - c3) >> C_SHIFT];
00447 dest += line_size;
00448 dest[0] = cm[(c0 - c1) >> C_SHIFT];
00449 }
00450
00451 #define BF(k) \
00452 {\
00453 int a0, a1;\
00454 a0 = ptr[k];\
00455 a1 = ptr[8 + k];\
00456 ptr[k] = a0 + a1;\
00457 ptr[8 + k] = a0 - a1;\
00458 }
00459
00460
00461
00462
00463
00464
00465
00466 void simple_idct248_put(uint8_t *dest, int line_size, DCTELEM *block)
00467 {
00468 int i;
00469 DCTELEM *ptr;
00470
00471
00472 ptr = block;
00473 for(i=0;i<4;i++) {
00474 BF(0);
00475 BF(1);
00476 BF(2);
00477 BF(3);
00478 BF(4);
00479 BF(5);
00480 BF(6);
00481 BF(7);
00482 ptr += 2 * 8;
00483 }
00484
00485
00486 for(i=0; i<8; i++) {
00487 idctRowCondDC(block + i*8);
00488 }
00489
00490
00491 for(i=0;i<8;i++) {
00492 idct4col(dest + i, 2 * line_size, block + i);
00493 idct4col(dest + line_size + i, 2 * line_size, block + 8 + i);
00494 }
00495 }
00496
00497
00498 #undef CN_SHIFT
00499 #undef C_SHIFT
00500 #undef C_FIX
00501 #undef C1
00502 #undef C2
00503 #define CN_SHIFT 12
00504 #define C_FIX(x) ((int)((x) * 1.414213562 * (1 << CN_SHIFT) + 0.5))
00505 #define C1 C_FIX(0.6532814824)
00506 #define C2 C_FIX(0.2705980501)
00507 #define C3 C_FIX(0.5)
00508 #define C_SHIFT (4+1+12)
00509 static inline void idct4col_add(uint8_t *dest, int line_size, const DCTELEM *col)
00510 {
00511 int c0, c1, c2, c3, a0, a1, a2, a3;
00512 const uint8_t *cm = cropTbl + MAX_NEG_CROP;
00513
00514 a0 = col[8*0];
00515 a1 = col[8*1];
00516 a2 = col[8*2];
00517 a3 = col[8*3];
00518 c0 = (a0 + a2)*C3 + (1 << (C_SHIFT - 1));
00519 c2 = (a0 - a2)*C3 + (1 << (C_SHIFT - 1));
00520 c1 = a1 * C1 + a3 * C2;
00521 c3 = a1 * C2 - a3 * C1;
00522 dest[0] = cm[dest[0] + ((c0 + c1) >> C_SHIFT)];
00523 dest += line_size;
00524 dest[0] = cm[dest[0] + ((c2 + c3) >> C_SHIFT)];
00525 dest += line_size;
00526 dest[0] = cm[dest[0] + ((c2 - c3) >> C_SHIFT)];
00527 dest += line_size;
00528 dest[0] = cm[dest[0] + ((c0 - c1) >> C_SHIFT)];
00529 }
00530
00531 #define RN_SHIFT 15
00532 #define R_FIX(x) ((int)((x) * 1.414213562 * (1 << RN_SHIFT) + 0.5))
00533 #define R1 R_FIX(0.6532814824)
00534 #define R2 R_FIX(0.2705980501)
00535 #define R3 R_FIX(0.5)
00536 #define R_SHIFT 11
00537 static inline void idct4row(DCTELEM *row)
00538 {
00539 int c0, c1, c2, c3, a0, a1, a2, a3;
00540
00541
00542 a0 = row[0];
00543 a1 = row[1];
00544 a2 = row[2];
00545 a3 = row[3];
00546 c0 = (a0 + a2)*R3 + (1 << (R_SHIFT - 1));
00547 c2 = (a0 - a2)*R3 + (1 << (R_SHIFT - 1));
00548 c1 = a1 * R1 + a3 * R2;
00549 c3 = a1 * R2 - a3 * R1;
00550 row[0]= (c0 + c1) >> R_SHIFT;
00551 row[1]= (c2 + c3) >> R_SHIFT;
00552 row[2]= (c2 - c3) >> R_SHIFT;
00553 row[3]= (c0 - c1) >> R_SHIFT;
00554 }
00555
00556 void simple_idct84_add(uint8_t *dest, int line_size, DCTELEM *block)
00557 {
00558 int i;
00559
00560
00561 for(i=0; i<4; i++) {
00562 idctRowCondDC(block + i*8);
00563 }
00564
00565
00566 for(i=0;i<8;i++) {
00567 idct4col_add(dest + i, line_size, block + i);
00568 }
00569 }
00570
00571 void simple_idct48_add(uint8_t *dest, int line_size, DCTELEM *block)
00572 {
00573 int i;
00574
00575
00576 for(i=0; i<8; i++) {
00577 idct4row(block + i*8);
00578 }
00579
00580
00581 for(i=0; i<4; i++){
00582 idctSparseColAdd(dest + i, line_size, block + i);
00583 }
00584 }
00585