00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00025 #include "common.h"
00026 #include "avcodec.h"
00027 #include "dsputil.h"
00028 #include "vp3data.h"
00029
00030 #define IdctAdjustBeforeShift 8
00031 #define xC1S7 64277
00032 #define xC2S6 60547
00033 #define xC3S5 54491
00034 #define xC4S4 46341
00035 #define xC5S3 36410
00036 #define xC6S2 25080
00037 #define xC7S1 12785
00038
00039 static always_inline void idct(uint8_t *dst, int stride, int16_t *input, int type)
00040 {
00041 int16_t *ip = input;
00042 uint8_t *cm = cropTbl + MAX_NEG_CROP;
00043
00044 int A_, B_, C_, D_, _Ad, _Bd, _Cd, _Dd, E_, F_, G_, H_;
00045 int _Ed, _Gd, _Add, _Bdd, _Fd, _Hd;
00046 int t1, t2;
00047
00048 int i, j;
00049
00050
00051 for (i = 0; i < 8; i++) {
00052
00053 if ( ip[0] | ip[1] | ip[2] | ip[3] | ip[4] | ip[5] | ip[6] | ip[7] ) {
00054 t1 = (int32_t)(xC1S7 * ip[1]);
00055 t2 = (int32_t)(xC7S1 * ip[7]);
00056 t1 >>= 16;
00057 t2 >>= 16;
00058 A_ = t1 + t2;
00059
00060 t1 = (int32_t)(xC7S1 * ip[1]);
00061 t2 = (int32_t)(xC1S7 * ip[7]);
00062 t1 >>= 16;
00063 t2 >>= 16;
00064 B_ = t1 - t2;
00065
00066 t1 = (int32_t)(xC3S5 * ip[3]);
00067 t2 = (int32_t)(xC5S3 * ip[5]);
00068 t1 >>= 16;
00069 t2 >>= 16;
00070 C_ = t1 + t2;
00071
00072 t1 = (int32_t)(xC3S5 * ip[5]);
00073 t2 = (int32_t)(xC5S3 * ip[3]);
00074 t1 >>= 16;
00075 t2 >>= 16;
00076 D_ = t1 - t2;
00077
00078
00079 t1 = (int32_t)(xC4S4 * (A_ - C_));
00080 t1 >>= 16;
00081 _Ad = t1;
00082
00083 t1 = (int32_t)(xC4S4 * (B_ - D_));
00084 t1 >>= 16;
00085 _Bd = t1;
00086
00087
00088 _Cd = A_ + C_;
00089 _Dd = B_ + D_;
00090
00091 t1 = (int32_t)(xC4S4 * (ip[0] + ip[4]));
00092 t1 >>= 16;
00093 E_ = t1;
00094
00095 t1 = (int32_t)(xC4S4 * (ip[0] - ip[4]));
00096 t1 >>= 16;
00097 F_ = t1;
00098
00099 t1 = (int32_t)(xC2S6 * ip[2]);
00100 t2 = (int32_t)(xC6S2 * ip[6]);
00101 t1 >>= 16;
00102 t2 >>= 16;
00103 G_ = t1 + t2;
00104
00105 t1 = (int32_t)(xC6S2 * ip[2]);
00106 t2 = (int32_t)(xC2S6 * ip[6]);
00107 t1 >>= 16;
00108 t2 >>= 16;
00109 H_ = t1 - t2;
00110
00111
00112 _Ed = E_ - G_;
00113 _Gd = E_ + G_;
00114
00115 _Add = F_ + _Ad;
00116 _Bdd = _Bd - H_;
00117
00118 _Fd = F_ - _Ad;
00119 _Hd = _Bd + H_;
00120
00121
00122 ip[0] = _Gd + _Cd ;
00123 ip[7] = _Gd - _Cd ;
00124
00125 ip[1] = _Add + _Hd;
00126 ip[2] = _Add - _Hd;
00127
00128 ip[3] = _Ed + _Dd ;
00129 ip[4] = _Ed - _Dd ;
00130
00131 ip[5] = _Fd + _Bdd;
00132 ip[6] = _Fd - _Bdd;
00133
00134 }
00135
00136 ip += 8;
00137 }
00138
00139 ip = input;
00140
00141 for ( i = 0; i < 8; i++) {
00142
00143 if ( ip[1 * 8] | ip[2 * 8] | ip[3 * 8] |
00144 ip[4 * 8] | ip[5 * 8] | ip[6 * 8] | ip[7 * 8] ) {
00145
00146 t1 = (int32_t)(xC1S7 * ip[1*8]);
00147 t2 = (int32_t)(xC7S1 * ip[7*8]);
00148 t1 >>= 16;
00149 t2 >>= 16;
00150 A_ = t1 + t2;
00151
00152 t1 = (int32_t)(xC7S1 * ip[1*8]);
00153 t2 = (int32_t)(xC1S7 * ip[7*8]);
00154 t1 >>= 16;
00155 t2 >>= 16;
00156 B_ = t1 - t2;
00157
00158 t1 = (int32_t)(xC3S5 * ip[3*8]);
00159 t2 = (int32_t)(xC5S3 * ip[5*8]);
00160 t1 >>= 16;
00161 t2 >>= 16;
00162 C_ = t1 + t2;
00163
00164 t1 = (int32_t)(xC3S5 * ip[5*8]);
00165 t2 = (int32_t)(xC5S3 * ip[3*8]);
00166 t1 >>= 16;
00167 t2 >>= 16;
00168 D_ = t1 - t2;
00169
00170
00171 t1 = (int32_t)(xC4S4 * (A_ - C_));
00172 t1 >>= 16;
00173 _Ad = t1;
00174
00175 t1 = (int32_t)(xC4S4 * (B_ - D_));
00176 t1 >>= 16;
00177 _Bd = t1;
00178
00179
00180 _Cd = A_ + C_;
00181 _Dd = B_ + D_;
00182
00183 t1 = (int32_t)(xC4S4 * (ip[0*8] + ip[4*8]));
00184 t1 >>= 16;
00185 E_ = t1;
00186
00187 t1 = (int32_t)(xC4S4 * (ip[0*8] - ip[4*8]));
00188 t1 >>= 16;
00189 F_ = t1;
00190
00191 t1 = (int32_t)(xC2S6 * ip[2*8]);
00192 t2 = (int32_t)(xC6S2 * ip[6*8]);
00193 t1 >>= 16;
00194 t2 >>= 16;
00195 G_ = t1 + t2;
00196
00197 t1 = (int32_t)(xC6S2 * ip[2*8]);
00198 t2 = (int32_t)(xC2S6 * ip[6*8]);
00199 t1 >>= 16;
00200 t2 >>= 16;
00201 H_ = t1 - t2;
00202
00203
00204 _Ed = E_ - G_;
00205 _Gd = E_ + G_;
00206
00207 _Add = F_ + _Ad;
00208 _Bdd = _Bd - H_;
00209
00210 _Fd = F_ - _Ad;
00211 _Hd = _Bd + H_;
00212
00213 if(type==1){
00214 _Gd += 16*128;
00215 _Add+= 16*128;
00216 _Ed += 16*128;
00217 _Fd += 16*128;
00218 }
00219 _Gd += IdctAdjustBeforeShift;
00220 _Add += IdctAdjustBeforeShift;
00221 _Ed += IdctAdjustBeforeShift;
00222 _Fd += IdctAdjustBeforeShift;
00223
00224
00225 if(type==0){
00226 ip[0*8] = (_Gd + _Cd ) >> 4;
00227 ip[7*8] = (_Gd - _Cd ) >> 4;
00228
00229 ip[1*8] = (_Add + _Hd ) >> 4;
00230 ip[2*8] = (_Add - _Hd ) >> 4;
00231
00232 ip[3*8] = (_Ed + _Dd ) >> 4;
00233 ip[4*8] = (_Ed - _Dd ) >> 4;
00234
00235 ip[5*8] = (_Fd + _Bdd ) >> 4;
00236 ip[6*8] = (_Fd - _Bdd ) >> 4;
00237 }else if(type==1){
00238 dst[0*stride] = cm[(_Gd + _Cd ) >> 4];
00239 dst[7*stride] = cm[(_Gd - _Cd ) >> 4];
00240
00241 dst[1*stride] = cm[(_Add + _Hd ) >> 4];
00242 dst[2*stride] = cm[(_Add - _Hd ) >> 4];
00243
00244 dst[3*stride] = cm[(_Ed + _Dd ) >> 4];
00245 dst[4*stride] = cm[(_Ed - _Dd ) >> 4];
00246
00247 dst[5*stride] = cm[(_Fd + _Bdd ) >> 4];
00248 dst[6*stride] = cm[(_Fd - _Bdd ) >> 4];
00249 }else{
00250 dst[0*stride] = cm[dst[0*stride] + ((_Gd + _Cd ) >> 4)];
00251 dst[7*stride] = cm[dst[7*stride] + ((_Gd - _Cd ) >> 4)];
00252
00253 dst[1*stride] = cm[dst[1*stride] + ((_Add + _Hd ) >> 4)];
00254 dst[2*stride] = cm[dst[2*stride] + ((_Add - _Hd ) >> 4)];
00255
00256 dst[3*stride] = cm[dst[3*stride] + ((_Ed + _Dd ) >> 4)];
00257 dst[4*stride] = cm[dst[4*stride] + ((_Ed - _Dd ) >> 4)];
00258
00259 dst[5*stride] = cm[dst[5*stride] + ((_Fd + _Bdd ) >> 4)];
00260 dst[6*stride] = cm[dst[6*stride] + ((_Fd - _Bdd ) >> 4)];
00261 }
00262
00263 } else {
00264 if(type==0){
00265 ip[0*8] =
00266 ip[1*8] =
00267 ip[2*8] =
00268 ip[3*8] =
00269 ip[4*8] =
00270 ip[5*8] =
00271 ip[6*8] =
00272 ip[7*8] = ((xC4S4 * ip[0*8] + (IdctAdjustBeforeShift<<16))>>20);
00273 }else if(type==1){
00274 dst[0*stride]=
00275 dst[1*stride]=
00276 dst[2*stride]=
00277 dst[3*stride]=
00278 dst[4*stride]=
00279 dst[5*stride]=
00280 dst[6*stride]=
00281 dst[7*stride]= 128 + ((xC4S4 * ip[0*8] + (IdctAdjustBeforeShift<<16))>>20);
00282 }else{
00283 if(ip[0*8]){
00284 int v= ((xC4S4 * ip[0*8] + (IdctAdjustBeforeShift<<16))>>20);
00285 dst[0*stride] = cm[dst[0*stride] + v];
00286 dst[1*stride] = cm[dst[1*stride] + v];
00287 dst[2*stride] = cm[dst[2*stride] + v];
00288 dst[3*stride] = cm[dst[3*stride] + v];
00289 dst[4*stride] = cm[dst[4*stride] + v];
00290 dst[5*stride] = cm[dst[5*stride] + v];
00291 dst[6*stride] = cm[dst[6*stride] + v];
00292 dst[7*stride] = cm[dst[7*stride] + v];
00293 }
00294 }
00295 }
00296
00297 ip++;
00298 dst++;
00299 }
00300 }
00301
00302 void ff_vp3_idct_c(DCTELEM *block){
00303 idct(NULL, 0, block, 0);
00304 }
00305
00306 void ff_vp3_idct_put_c(uint8_t *dest, int line_size, DCTELEM *block){
00307 idct(dest, line_size, block, 1);
00308 }
00309
00310 void ff_vp3_idct_add_c(uint8_t *dest, int line_size, DCTELEM *block){
00311 idct(dest, line_size, block, 2);
00312 }