00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020 #include "../dsputil.h"
00021 #include "../simple_idct.h"
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033 #define C0 23170 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
00034 #define C1 22725 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
00035 #define C2 21407 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
00036 #define C3 19266 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
00037 #if 0
00038 #define C4 16384 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
00039 #else
00040 #define C4 16383 //cos(i*M_PI/16)*sqrt(2)*(1<<14) - 0.5
00041 #endif
00042 #define C5 12873 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
00043 #define C6 8867 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
00044 #define C7 4520 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
00045
00046 #define ROW_SHIFT 11
00047 #define COL_SHIFT 20 // 6
00048
00049 static const uint64_t attribute_used __attribute__((aligned(8))) wm1010= 0xFFFF0000FFFF0000ULL;
00050 static const uint64_t attribute_used __attribute__((aligned(8))) d40000= 0x0000000000040000ULL;
00051
00052 static const int16_t __attribute__((aligned(8))) coeffs[]= {
00053 1<<(ROW_SHIFT-1), 0, 1<<(ROW_SHIFT-1), 0,
00054
00055
00056 1<<(ROW_SHIFT-1), 1, 1<<(ROW_SHIFT-1), 0,
00057
00058
00059
00060
00061 C4, C4, C4, C4,
00062 C4, -C4, C4, -C4,
00063
00064 C2, C6, C2, C6,
00065 C6, -C2, C6, -C2,
00066
00067 C1, C3, C1, C3,
00068 C5, C7, C5, C7,
00069
00070 C3, -C7, C3, -C7,
00071 -C1, -C5, -C1, -C5,
00072
00073 C5, -C1, C5, -C1,
00074 C7, C3, C7, C3,
00075
00076 C7, -C5, C7, -C5,
00077 C3, -C1, C3, -C1
00078 };
00079
00080 #if 0
00081 static void unused_var_killer(){
00082 int a= wm1010 + d40000;
00083 temp[0]=a;
00084 }
00085
00086 static void inline idctCol (int16_t * col, int16_t *input)
00087 {
00088 #undef C0
00089 #undef C1
00090 #undef C2
00091 #undef C3
00092 #undef C4
00093 #undef C5
00094 #undef C6
00095 #undef C7
00096 int a0, a1, a2, a3, b0, b1, b2, b3;
00097 const int C0 = 23170;
00098 const int C1 = 22725;
00099 const int C2 = 21407;
00100 const int C3 = 19266;
00101 const int C4 = 16383;
00102 const int C5 = 12873;
00103 const int C6 = 8867;
00104 const int C7 = 4520;
00105
00106
00107
00108
00109
00110
00111
00112 col[8*0] = input[8*0 + 0];
00113 col[8*1] = input[8*2 + 0];
00114 col[8*2] = input[8*0 + 1];
00115 col[8*3] = input[8*2 + 1];
00116 col[8*4] = input[8*4 + 0];
00117 col[8*5] = input[8*6 + 0];
00118 col[8*6] = input[8*4 + 1];
00119 col[8*7] = input[8*6 + 1];
00120
00121 a0 = C4*col[8*0] + C2*col[8*2] + C4*col[8*4] + C6*col[8*6] + (1<<(COL_SHIFT-1));
00122 a1 = C4*col[8*0] + C6*col[8*2] - C4*col[8*4] - C2*col[8*6] + (1<<(COL_SHIFT-1));
00123 a2 = C4*col[8*0] - C6*col[8*2] - C4*col[8*4] + C2*col[8*6] + (1<<(COL_SHIFT-1));
00124 a3 = C4*col[8*0] - C2*col[8*2] + C4*col[8*4] - C6*col[8*6] + (1<<(COL_SHIFT-1));
00125
00126 b0 = C1*col[8*1] + C3*col[8*3] + C5*col[8*5] + C7*col[8*7];
00127 b1 = C3*col[8*1] - C7*col[8*3] - C1*col[8*5] - C5*col[8*7];
00128 b2 = C5*col[8*1] - C1*col[8*3] + C7*col[8*5] + C3*col[8*7];
00129 b3 = C7*col[8*1] - C5*col[8*3] + C3*col[8*5] - C1*col[8*7];
00130
00131 col[8*0] = (a0 + b0) >> COL_SHIFT;
00132 col[8*1] = (a1 + b1) >> COL_SHIFT;
00133 col[8*2] = (a2 + b2) >> COL_SHIFT;
00134 col[8*3] = (a3 + b3) >> COL_SHIFT;
00135 col[8*4] = (a3 - b3) >> COL_SHIFT;
00136 col[8*5] = (a2 - b2) >> COL_SHIFT;
00137 col[8*6] = (a1 - b1) >> COL_SHIFT;
00138 col[8*7] = (a0 - b0) >> COL_SHIFT;
00139 }
00140
00141 static void inline idctRow (int16_t * output, int16_t * input)
00142 {
00143 int16_t row[8];
00144
00145 int a0, a1, a2, a3, b0, b1, b2, b3;
00146 const int C0 = 23170;
00147 const int C1 = 22725;
00148 const int C2 = 21407;
00149 const int C3 = 19266;
00150 const int C4 = 16383;
00151 const int C5 = 12873;
00152 const int C6 = 8867;
00153 const int C7 = 4520;
00154
00155 row[0] = input[0];
00156 row[2] = input[1];
00157 row[4] = input[4];
00158 row[6] = input[5];
00159 row[1] = input[8];
00160 row[3] = input[9];
00161 row[5] = input[12];
00162 row[7] = input[13];
00163
00164 if( !(row[1] | row[2] |row[3] |row[4] |row[5] |row[6] | row[7]) ) {
00165 row[0] = row[1] = row[2] = row[3] = row[4] =
00166 row[5] = row[6] = row[7] = row[0]<<3;
00167 output[0] = row[0];
00168 output[2] = row[1];
00169 output[4] = row[2];
00170 output[6] = row[3];
00171 output[8] = row[4];
00172 output[10] = row[5];
00173 output[12] = row[6];
00174 output[14] = row[7];
00175 return;
00176 }
00177
00178 a0 = C4*row[0] + C2*row[2] + C4*row[4] + C6*row[6] + (1<<(ROW_SHIFT-1));
00179 a1 = C4*row[0] + C6*row[2] - C4*row[4] - C2*row[6] + (1<<(ROW_SHIFT-1));
00180 a2 = C4*row[0] - C6*row[2] - C4*row[4] + C2*row[6] + (1<<(ROW_SHIFT-1));
00181 a3 = C4*row[0] - C2*row[2] + C4*row[4] - C6*row[6] + (1<<(ROW_SHIFT-1));
00182
00183 b0 = C1*row[1] + C3*row[3] + C5*row[5] + C7*row[7];
00184 b1 = C3*row[1] - C7*row[3] - C1*row[5] - C5*row[7];
00185 b2 = C5*row[1] - C1*row[3] + C7*row[5] + C3*row[7];
00186 b3 = C7*row[1] - C5*row[3] + C3*row[5] - C1*row[7];
00187
00188 row[0] = (a0 + b0) >> ROW_SHIFT;
00189 row[1] = (a1 + b1) >> ROW_SHIFT;
00190 row[2] = (a2 + b2) >> ROW_SHIFT;
00191 row[3] = (a3 + b3) >> ROW_SHIFT;
00192 row[4] = (a3 - b3) >> ROW_SHIFT;
00193 row[5] = (a2 - b2) >> ROW_SHIFT;
00194 row[6] = (a1 - b1) >> ROW_SHIFT;
00195 row[7] = (a0 - b0) >> ROW_SHIFT;
00196
00197 output[0] = row[0];
00198 output[2] = row[1];
00199 output[4] = row[2];
00200 output[6] = row[3];
00201 output[8] = row[4];
00202 output[10] = row[5];
00203 output[12] = row[6];
00204 output[14] = row[7];
00205 }
00206 #endif
00207
00208 static inline void idct(int16_t *block)
00209 {
00210 int64_t __attribute__((aligned(8))) align_tmp[16];
00211 int16_t * const temp= (int16_t*)align_tmp;
00212
00213 asm volatile(
00214 #if 0 //Alternative, simpler variant
00215
00216 #define ROW_IDCT(src0, src4, src1, src5, dst, rounder, shift) \
00217 "movq " #src0 ", %%mm0 \n\t" \
00218 "movq " #src4 ", %%mm1 \n\t" \
00219 "movq " #src1 ", %%mm2 \n\t" \
00220 "movq " #src5 ", %%mm3 \n\t" \
00221 "movq 16(%2), %%mm4 \n\t" \
00222 "pmaddwd %%mm0, %%mm4 \n\t" \
00223 "movq 24(%2), %%mm5 \n\t" \
00224 "pmaddwd %%mm5, %%mm0 \n\t" \
00225 "movq 32(%2), %%mm5 \n\t" \
00226 "pmaddwd %%mm1, %%mm5 \n\t" \
00227 "movq 40(%2), %%mm6 \n\t" \
00228 "pmaddwd %%mm6, %%mm1 \n\t" \
00229 "movq 48(%2), %%mm7 \n\t" \
00230 "pmaddwd %%mm2, %%mm7 \n\t" \
00231 #rounder ", %%mm4 \n\t"\
00232 "movq %%mm4, %%mm6 \n\t" \
00233 "paddd %%mm5, %%mm4 \n\t" \
00234 "psubd %%mm5, %%mm6 \n\t" \
00235 "movq 56(%2), %%mm5 \n\t" \
00236 "pmaddwd %%mm3, %%mm5 \n\t" \
00237 #rounder ", %%mm0 \n\t"\
00238 "paddd %%mm0, %%mm1 \n\t" \
00239 "paddd %%mm0, %%mm0 \n\t" \
00240 "psubd %%mm1, %%mm0 \n\t" \
00241 "pmaddwd 64(%2), %%mm2 \n\t" \
00242 "paddd %%mm5, %%mm7 \n\t" \
00243 "movq 72(%2), %%mm5 \n\t" \
00244 "pmaddwd %%mm3, %%mm5 \n\t" \
00245 "paddd %%mm4, %%mm7 \n\t" \
00246 "paddd %%mm4, %%mm4 \n\t" \
00247 "psubd %%mm7, %%mm4 \n\t" \
00248 "paddd %%mm2, %%mm5 \n\t" \
00249 "psrad $" #shift ", %%mm7 \n\t"\
00250 "psrad $" #shift ", %%mm4 \n\t"\
00251 "movq %%mm1, %%mm2 \n\t" \
00252 "paddd %%mm5, %%mm1 \n\t" \
00253 "psubd %%mm5, %%mm2 \n\t" \
00254 "psrad $" #shift ", %%mm1 \n\t"\
00255 "psrad $" #shift ", %%mm2 \n\t"\
00256 "packssdw %%mm1, %%mm7 \n\t" \
00257 "packssdw %%mm4, %%mm2 \n\t" \
00258 "movq %%mm7, " #dst " \n\t"\
00259 "movq " #src1 ", %%mm1 \n\t" \
00260 "movq 80(%2), %%mm4 \n\t" \
00261 "movq %%mm2, 24+" #dst " \n\t"\
00262 "pmaddwd %%mm1, %%mm4 \n\t" \
00263 "movq 88(%2), %%mm7 \n\t" \
00264 "pmaddwd 96(%2), %%mm1 \n\t" \
00265 "pmaddwd %%mm3, %%mm7 \n\t" \
00266 "movq %%mm0, %%mm2 \n\t" \
00267 "pmaddwd 104(%2), %%mm3 \n\t" \
00268 "paddd %%mm7, %%mm4 \n\t" \
00269 "paddd %%mm4, %%mm2 \n\t" \
00270 "psubd %%mm4, %%mm0 \n\t" \
00271 "psrad $" #shift ", %%mm2 \n\t"\
00272 "psrad $" #shift ", %%mm0 \n\t"\
00273 "movq %%mm6, %%mm4 \n\t" \
00274 "paddd %%mm1, %%mm3 \n\t" \
00275 "paddd %%mm3, %%mm6 \n\t" \
00276 "psubd %%mm3, %%mm4 \n\t" \
00277 "psrad $" #shift ", %%mm6 \n\t"\
00278 "packssdw %%mm6, %%mm2 \n\t" \
00279 "movq %%mm2, 8+" #dst " \n\t"\
00280 "psrad $" #shift ", %%mm4 \n\t"\
00281 "packssdw %%mm0, %%mm4 \n\t" \
00282 "movq %%mm4, 16+" #dst " \n\t"\
00283
00284 #define COL_IDCT(src0, src4, src1, src5, dst, rounder, shift) \
00285 "movq " #src0 ", %%mm0 \n\t" \
00286 "movq " #src4 ", %%mm1 \n\t" \
00287 "movq " #src1 ", %%mm2 \n\t" \
00288 "movq " #src5 ", %%mm3 \n\t" \
00289 "movq 16(%2), %%mm4 \n\t" \
00290 "pmaddwd %%mm0, %%mm4 \n\t" \
00291 "movq 24(%2), %%mm5 \n\t" \
00292 "pmaddwd %%mm5, %%mm0 \n\t" \
00293 "movq 32(%2), %%mm5 \n\t" \
00294 "pmaddwd %%mm1, %%mm5 \n\t" \
00295 "movq 40(%2), %%mm6 \n\t" \
00296 "pmaddwd %%mm6, %%mm1 \n\t" \
00297 #rounder ", %%mm4 \n\t"\
00298 "movq %%mm4, %%mm6 \n\t" \
00299 "movq 48(%2), %%mm7 \n\t" \
00300 #rounder ", %%mm0 \n\t"\
00301 "pmaddwd %%mm2, %%mm7 \n\t" \
00302 "paddd %%mm5, %%mm4 \n\t" \
00303 "psubd %%mm5, %%mm6 \n\t" \
00304 "movq %%mm0, %%mm5 \n\t" \
00305 "paddd %%mm1, %%mm0 \n\t" \
00306 "psubd %%mm1, %%mm5 \n\t" \
00307 "movq 56(%2), %%mm1 \n\t" \
00308 "pmaddwd %%mm3, %%mm1 \n\t" \
00309 "pmaddwd 64(%2), %%mm2 \n\t" \
00310 "paddd %%mm1, %%mm7 \n\t" \
00311 "movq 72(%2), %%mm1 \n\t" \
00312 "pmaddwd %%mm3, %%mm1 \n\t" \
00313 "paddd %%mm4, %%mm7 \n\t" \
00314 "paddd %%mm4, %%mm4 \n\t" \
00315 "psubd %%mm7, %%mm4 \n\t" \
00316 "paddd %%mm2, %%mm1 \n\t" \
00317 "psrad $" #shift ", %%mm7 \n\t"\
00318 "psrad $" #shift ", %%mm4 \n\t"\
00319 "movq %%mm0, %%mm2 \n\t" \
00320 "paddd %%mm1, %%mm0 \n\t" \
00321 "psubd %%mm1, %%mm2 \n\t" \
00322 "psrad $" #shift ", %%mm0 \n\t"\
00323 "psrad $" #shift ", %%mm2 \n\t"\
00324 "packssdw %%mm7, %%mm7 \n\t" \
00325 "movd %%mm7, " #dst " \n\t"\
00326 "packssdw %%mm0, %%mm0 \n\t" \
00327 "movd %%mm0, 16+" #dst " \n\t"\
00328 "packssdw %%mm2, %%mm2 \n\t" \
00329 "movd %%mm2, 96+" #dst " \n\t"\
00330 "packssdw %%mm4, %%mm4 \n\t" \
00331 "movd %%mm4, 112+" #dst " \n\t"\
00332 "movq " #src1 ", %%mm0 \n\t" \
00333 "movq 80(%2), %%mm4 \n\t" \
00334 "pmaddwd %%mm0, %%mm4 \n\t" \
00335 "movq 88(%2), %%mm7 \n\t" \
00336 "pmaddwd 96(%2), %%mm0 \n\t" \
00337 "pmaddwd %%mm3, %%mm7 \n\t" \
00338 "movq %%mm5, %%mm2 \n\t" \
00339 "pmaddwd 104(%2), %%mm3 \n\t" \
00340 "paddd %%mm7, %%mm4 \n\t" \
00341 "paddd %%mm4, %%mm2 \n\t" \
00342 "psubd %%mm4, %%mm5 \n\t" \
00343 "psrad $" #shift ", %%mm2 \n\t"\
00344 "psrad $" #shift ", %%mm5 \n\t"\
00345 "movq %%mm6, %%mm4 \n\t" \
00346 "paddd %%mm0, %%mm3 \n\t" \
00347 "paddd %%mm3, %%mm6 \n\t" \
00348 "psubd %%mm3, %%mm4 \n\t" \
00349 "psrad $" #shift ", %%mm6 \n\t"\
00350 "psrad $" #shift ", %%mm4 \n\t"\
00351 "packssdw %%mm2, %%mm2 \n\t" \
00352 "packssdw %%mm6, %%mm6 \n\t" \
00353 "movd %%mm2, 32+" #dst " \n\t"\
00354 "packssdw %%mm4, %%mm4 \n\t" \
00355 "packssdw %%mm5, %%mm5 \n\t" \
00356 "movd %%mm6, 48+" #dst " \n\t"\
00357 "movd %%mm4, 64+" #dst " \n\t"\
00358 "movd %%mm5, 80+" #dst " \n\t"\
00359
00360
00361 #define DC_COND_ROW_IDCT(src0, src4, src1, src5, dst, rounder, shift) \
00362 "movq " #src0 ", %%mm0 \n\t" \
00363 "movq " #src4 ", %%mm1 \n\t" \
00364 "movq " #src1 ", %%mm2 \n\t" \
00365 "movq " #src5 ", %%mm3 \n\t" \
00366 "movq "MANGLE(wm1010)", %%mm4 \n\t"\
00367 "pand %%mm0, %%mm4 \n\t"\
00368 "por %%mm1, %%mm4 \n\t"\
00369 "por %%mm2, %%mm4 \n\t"\
00370 "por %%mm3, %%mm4 \n\t"\
00371 "packssdw %%mm4,%%mm4 \n\t"\
00372 "movd %%mm4, %%eax \n\t"\
00373 "orl %%eax, %%eax \n\t"\
00374 "jz 1f \n\t"\
00375 "movq 16(%2), %%mm4 \n\t" \
00376 "pmaddwd %%mm0, %%mm4 \n\t" \
00377 "movq 24(%2), %%mm5 \n\t" \
00378 "pmaddwd %%mm5, %%mm0 \n\t" \
00379 "movq 32(%2), %%mm5 \n\t" \
00380 "pmaddwd %%mm1, %%mm5 \n\t" \
00381 "movq 40(%2), %%mm6 \n\t" \
00382 "pmaddwd %%mm6, %%mm1 \n\t" \
00383 "movq 48(%2), %%mm7 \n\t" \
00384 "pmaddwd %%mm2, %%mm7 \n\t" \
00385 #rounder ", %%mm4 \n\t"\
00386 "movq %%mm4, %%mm6 \n\t" \
00387 "paddd %%mm5, %%mm4 \n\t" \
00388 "psubd %%mm5, %%mm6 \n\t" \
00389 "movq 56(%2), %%mm5 \n\t" \
00390 "pmaddwd %%mm3, %%mm5 \n\t" \
00391 #rounder ", %%mm0 \n\t"\
00392 "paddd %%mm0, %%mm1 \n\t" \
00393 "paddd %%mm0, %%mm0 \n\t" \
00394 "psubd %%mm1, %%mm0 \n\t" \
00395 "pmaddwd 64(%2), %%mm2 \n\t" \
00396 "paddd %%mm5, %%mm7 \n\t" \
00397 "movq 72(%2), %%mm5 \n\t" \
00398 "pmaddwd %%mm3, %%mm5 \n\t" \
00399 "paddd %%mm4, %%mm7 \n\t" \
00400 "paddd %%mm4, %%mm4 \n\t" \
00401 "psubd %%mm7, %%mm4 \n\t" \
00402 "paddd %%mm2, %%mm5 \n\t" \
00403 "psrad $" #shift ", %%mm7 \n\t"\
00404 "psrad $" #shift ", %%mm4 \n\t"\
00405 "movq %%mm1, %%mm2 \n\t" \
00406 "paddd %%mm5, %%mm1 \n\t" \
00407 "psubd %%mm5, %%mm2 \n\t" \
00408 "psrad $" #shift ", %%mm1 \n\t"\
00409 "psrad $" #shift ", %%mm2 \n\t"\
00410 "packssdw %%mm1, %%mm7 \n\t" \
00411 "packssdw %%mm4, %%mm2 \n\t" \
00412 "movq %%mm7, " #dst " \n\t"\
00413 "movq " #src1 ", %%mm1 \n\t" \
00414 "movq 80(%2), %%mm4 \n\t" \
00415 "movq %%mm2, 24+" #dst " \n\t"\
00416 "pmaddwd %%mm1, %%mm4 \n\t" \
00417 "movq 88(%2), %%mm7 \n\t" \
00418 "pmaddwd 96(%2), %%mm1 \n\t" \
00419 "pmaddwd %%mm3, %%mm7 \n\t" \
00420 "movq %%mm0, %%mm2 \n\t" \
00421 "pmaddwd 104(%2), %%mm3 \n\t" \
00422 "paddd %%mm7, %%mm4 \n\t" \
00423 "paddd %%mm4, %%mm2 \n\t" \
00424 "psubd %%mm4, %%mm0 \n\t" \
00425 "psrad $" #shift ", %%mm2 \n\t"\
00426 "psrad $" #shift ", %%mm0 \n\t"\
00427 "movq %%mm6, %%mm4 \n\t" \
00428 "paddd %%mm1, %%mm3 \n\t" \
00429 "paddd %%mm3, %%mm6 \n\t" \
00430 "psubd %%mm3, %%mm4 \n\t" \
00431 "psrad $" #shift ", %%mm6 \n\t"\
00432 "packssdw %%mm6, %%mm2 \n\t" \
00433 "movq %%mm2, 8+" #dst " \n\t"\
00434 "psrad $" #shift ", %%mm4 \n\t"\
00435 "packssdw %%mm0, %%mm4 \n\t" \
00436 "movq %%mm4, 16+" #dst " \n\t"\
00437 "jmp 2f \n\t"\
00438 "1: \n\t"\
00439 "pslld $16, %%mm0 \n\t"\
00440 "#paddd "MANGLE(d40000)", %%mm0 \n\t"\
00441 "psrad $13, %%mm0 \n\t"\
00442 "packssdw %%mm0, %%mm0 \n\t"\
00443 "movq %%mm0, " #dst " \n\t"\
00444 "movq %%mm0, 8+" #dst " \n\t"\
00445 "movq %%mm0, 16+" #dst " \n\t"\
00446 "movq %%mm0, 24+" #dst " \n\t"\
00447 "2: \n\t"
00448
00449
00450
00451 ROW_IDCT( (%0), 8(%0), 16(%0), 24(%0), 0(%1),paddd 8(%2), 11)
00452
00453
00454
00455
00456 DC_COND_ROW_IDCT( 32(%0), 40(%0), 48(%0), 56(%0), 32(%1),paddd (%2), 11)
00457 DC_COND_ROW_IDCT( 64(%0), 72(%0), 80(%0), 88(%0), 64(%1),paddd (%2), 11)
00458 DC_COND_ROW_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11)
00459
00460
00461
00462 COL_IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0),/nop, 20)
00463 COL_IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0),/nop, 20)
00464 COL_IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0),/nop, 20)
00465 COL_IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0),/nop, 20)
00466
00467 #else
00468
00469 #define DC_COND_IDCT(src0, src4, src1, src5, dst, rounder, shift) \
00470 "movq " #src0 ", %%mm0 \n\t" \
00471 "movq " #src4 ", %%mm1 \n\t" \
00472 "movq " #src1 ", %%mm2 \n\t" \
00473 "movq " #src5 ", %%mm3 \n\t" \
00474 "movq "MANGLE(wm1010)", %%mm4 \n\t"\
00475 "pand %%mm0, %%mm4 \n\t"\
00476 "por %%mm1, %%mm4 \n\t"\
00477 "por %%mm2, %%mm4 \n\t"\
00478 "por %%mm3, %%mm4 \n\t"\
00479 "packssdw %%mm4,%%mm4 \n\t"\
00480 "movd %%mm4, %%eax \n\t"\
00481 "orl %%eax, %%eax \n\t"\
00482 "jz 1f \n\t"\
00483 "movq 16(%2), %%mm4 \n\t" \
00484 "pmaddwd %%mm0, %%mm4 \n\t" \
00485 "movq 24(%2), %%mm5 \n\t" \
00486 "pmaddwd %%mm5, %%mm0 \n\t" \
00487 "movq 32(%2), %%mm5 \n\t" \
00488 "pmaddwd %%mm1, %%mm5 \n\t" \
00489 "movq 40(%2), %%mm6 \n\t" \
00490 "pmaddwd %%mm6, %%mm1 \n\t" \
00491 "movq 48(%2), %%mm7 \n\t" \
00492 "pmaddwd %%mm2, %%mm7 \n\t" \
00493 #rounder ", %%mm4 \n\t"\
00494 "movq %%mm4, %%mm6 \n\t" \
00495 "paddd %%mm5, %%mm4 \n\t" \
00496 "psubd %%mm5, %%mm6 \n\t" \
00497 "movq 56(%2), %%mm5 \n\t" \
00498 "pmaddwd %%mm3, %%mm5 \n\t" \
00499 #rounder ", %%mm0 \n\t"\
00500 "paddd %%mm0, %%mm1 \n\t" \
00501 "paddd %%mm0, %%mm0 \n\t" \
00502 "psubd %%mm1, %%mm0 \n\t" \
00503 "pmaddwd 64(%2), %%mm2 \n\t" \
00504 "paddd %%mm5, %%mm7 \n\t" \
00505 "movq 72(%2), %%mm5 \n\t" \
00506 "pmaddwd %%mm3, %%mm5 \n\t" \
00507 "paddd %%mm4, %%mm7 \n\t" \
00508 "paddd %%mm4, %%mm4 \n\t" \
00509 "psubd %%mm7, %%mm4 \n\t" \
00510 "paddd %%mm2, %%mm5 \n\t" \
00511 "psrad $" #shift ", %%mm7 \n\t"\
00512 "psrad $" #shift ", %%mm4 \n\t"\
00513 "movq %%mm1, %%mm2 \n\t" \
00514 "paddd %%mm5, %%mm1 \n\t" \
00515 "psubd %%mm5, %%mm2 \n\t" \
00516 "psrad $" #shift ", %%mm1 \n\t"\
00517 "psrad $" #shift ", %%mm2 \n\t"\
00518 "packssdw %%mm1, %%mm7 \n\t" \
00519 "packssdw %%mm4, %%mm2 \n\t" \
00520 "movq %%mm7, " #dst " \n\t"\
00521 "movq " #src1 ", %%mm1 \n\t" \
00522 "movq 80(%2), %%mm4 \n\t" \
00523 "movq %%mm2, 24+" #dst " \n\t"\
00524 "pmaddwd %%mm1, %%mm4 \n\t" \
00525 "movq 88(%2), %%mm7 \n\t" \
00526 "pmaddwd 96(%2), %%mm1 \n\t" \
00527 "pmaddwd %%mm3, %%mm7 \n\t" \
00528 "movq %%mm0, %%mm2 \n\t" \
00529 "pmaddwd 104(%2), %%mm3 \n\t" \
00530 "paddd %%mm7, %%mm4 \n\t" \
00531 "paddd %%mm4, %%mm2 \n\t" \
00532 "psubd %%mm4, %%mm0 \n\t" \
00533 "psrad $" #shift ", %%mm2 \n\t"\
00534 "psrad $" #shift ", %%mm0 \n\t"\
00535 "movq %%mm6, %%mm4 \n\t" \
00536 "paddd %%mm1, %%mm3 \n\t" \
00537 "paddd %%mm3, %%mm6 \n\t" \
00538 "psubd %%mm3, %%mm4 \n\t" \
00539 "psrad $" #shift ", %%mm6 \n\t"\
00540 "packssdw %%mm6, %%mm2 \n\t" \
00541 "movq %%mm2, 8+" #dst " \n\t"\
00542 "psrad $" #shift ", %%mm4 \n\t"\
00543 "packssdw %%mm0, %%mm4 \n\t" \
00544 "movq %%mm4, 16+" #dst " \n\t"\
00545 "jmp 2f \n\t"\
00546 "1: \n\t"\
00547 "pslld $16, %%mm0 \n\t"\
00548 "paddd "MANGLE(d40000)", %%mm0 \n\t"\
00549 "psrad $13, %%mm0 \n\t"\
00550 "packssdw %%mm0, %%mm0 \n\t"\
00551 "movq %%mm0, " #dst " \n\t"\
00552 "movq %%mm0, 8+" #dst " \n\t"\
00553 "movq %%mm0, 16+" #dst " \n\t"\
00554 "movq %%mm0, 24+" #dst " \n\t"\
00555 "2: \n\t"
00556
00557 #define Z_COND_IDCT(src0, src4, src1, src5, dst, rounder, shift, bt) \
00558 "movq " #src0 ", %%mm0 \n\t" \
00559 "movq " #src4 ", %%mm1 \n\t" \
00560 "movq " #src1 ", %%mm2 \n\t" \
00561 "movq " #src5 ", %%mm3 \n\t" \
00562 "movq %%mm0, %%mm4 \n\t"\
00563 "por %%mm1, %%mm4 \n\t"\
00564 "por %%mm2, %%mm4 \n\t"\
00565 "por %%mm3, %%mm4 \n\t"\
00566 "packssdw %%mm4,%%mm4 \n\t"\
00567 "movd %%mm4, %%eax \n\t"\
00568 "orl %%eax, %%eax \n\t"\
00569 "jz " #bt " \n\t"\
00570 "movq 16(%2), %%mm4 \n\t" \
00571 "pmaddwd %%mm0, %%mm4 \n\t" \
00572 "movq 24(%2), %%mm5 \n\t" \
00573 "pmaddwd %%mm5, %%mm0 \n\t" \
00574 "movq 32(%2), %%mm5 \n\t" \
00575 "pmaddwd %%mm1, %%mm5 \n\t" \
00576 "movq 40(%2), %%mm6 \n\t" \
00577 "pmaddwd %%mm6, %%mm1 \n\t" \
00578 "movq 48(%2), %%mm7 \n\t" \
00579 "pmaddwd %%mm2, %%mm7 \n\t" \
00580 #rounder ", %%mm4 \n\t"\
00581 "movq %%mm4, %%mm6 \n\t" \
00582 "paddd %%mm5, %%mm4 \n\t" \
00583 "psubd %%mm5, %%mm6 \n\t" \
00584 "movq 56(%2), %%mm5 \n\t" \
00585 "pmaddwd %%mm3, %%mm5 \n\t" \
00586 #rounder ", %%mm0 \n\t"\
00587 "paddd %%mm0, %%mm1 \n\t" \
00588 "paddd %%mm0, %%mm0 \n\t" \
00589 "psubd %%mm1, %%mm0 \n\t" \
00590 "pmaddwd 64(%2), %%mm2 \n\t" \
00591 "paddd %%mm5, %%mm7 \n\t" \
00592 "movq 72(%2), %%mm5 \n\t" \
00593 "pmaddwd %%mm3, %%mm5 \n\t" \
00594 "paddd %%mm4, %%mm7 \n\t" \
00595 "paddd %%mm4, %%mm4 \n\t" \
00596 "psubd %%mm7, %%mm4 \n\t" \
00597 "paddd %%mm2, %%mm5 \n\t" \
00598 "psrad $" #shift ", %%mm7 \n\t"\
00599 "psrad $" #shift ", %%mm4 \n\t"\
00600 "movq %%mm1, %%mm2 \n\t" \
00601 "paddd %%mm5, %%mm1 \n\t" \
00602 "psubd %%mm5, %%mm2 \n\t" \
00603 "psrad $" #shift ", %%mm1 \n\t"\
00604 "psrad $" #shift ", %%mm2 \n\t"\
00605 "packssdw %%mm1, %%mm7 \n\t" \
00606 "packssdw %%mm4, %%mm2 \n\t" \
00607 "movq %%mm7, " #dst " \n\t"\
00608 "movq " #src1 ", %%mm1 \n\t" \
00609 "movq 80(%2), %%mm4 \n\t" \
00610 "movq %%mm2, 24+" #dst " \n\t"\
00611 "pmaddwd %%mm1, %%mm4 \n\t" \
00612 "movq 88(%2), %%mm7 \n\t" \
00613 "pmaddwd 96(%2), %%mm1 \n\t" \
00614 "pmaddwd %%mm3, %%mm7 \n\t" \
00615 "movq %%mm0, %%mm2 \n\t" \
00616 "pmaddwd 104(%2), %%mm3 \n\t" \
00617 "paddd %%mm7, %%mm4 \n\t" \
00618 "paddd %%mm4, %%mm2 \n\t" \
00619 "psubd %%mm4, %%mm0 \n\t" \
00620 "psrad $" #shift ", %%mm2 \n\t"\
00621 "psrad $" #shift ", %%mm0 \n\t"\
00622 "movq %%mm6, %%mm4 \n\t" \
00623 "paddd %%mm1, %%mm3 \n\t" \
00624 "paddd %%mm3, %%mm6 \n\t" \
00625 "psubd %%mm3, %%mm4 \n\t" \
00626 "psrad $" #shift ", %%mm6 \n\t"\
00627 "packssdw %%mm6, %%mm2 \n\t" \
00628 "movq %%mm2, 8+" #dst " \n\t"\
00629 "psrad $" #shift ", %%mm4 \n\t"\
00630 "packssdw %%mm0, %%mm4 \n\t" \
00631 "movq %%mm4, 16+" #dst " \n\t"\
00632
00633 #define ROW_IDCT(src0, src4, src1, src5, dst, rounder, shift) \
00634 "movq " #src0 ", %%mm0 \n\t" \
00635 "movq " #src4 ", %%mm1 \n\t" \
00636 "movq " #src1 ", %%mm2 \n\t" \
00637 "movq " #src5 ", %%mm3 \n\t" \
00638 "movq 16(%2), %%mm4 \n\t" \
00639 "pmaddwd %%mm0, %%mm4 \n\t" \
00640 "movq 24(%2), %%mm5 \n\t" \
00641 "pmaddwd %%mm5, %%mm0 \n\t" \
00642 "movq 32(%2), %%mm5 \n\t" \
00643 "pmaddwd %%mm1, %%mm5 \n\t" \
00644 "movq 40(%2), %%mm6 \n\t" \
00645 "pmaddwd %%mm6, %%mm1 \n\t" \
00646 "movq 48(%2), %%mm7 \n\t" \
00647 "pmaddwd %%mm2, %%mm7 \n\t" \
00648 #rounder ", %%mm4 \n\t"\
00649 "movq %%mm4, %%mm6 \n\t" \
00650 "paddd %%mm5, %%mm4 \n\t" \
00651 "psubd %%mm5, %%mm6 \n\t" \
00652 "movq 56(%2), %%mm5 \n\t" \
00653 "pmaddwd %%mm3, %%mm5 \n\t" \
00654 #rounder ", %%mm0 \n\t"\
00655 "paddd %%mm0, %%mm1 \n\t" \
00656 "paddd %%mm0, %%mm0 \n\t" \
00657 "psubd %%mm1, %%mm0 \n\t" \
00658 "pmaddwd 64(%2), %%mm2 \n\t" \
00659 "paddd %%mm5, %%mm7 \n\t" \
00660 "movq 72(%2), %%mm5 \n\t" \
00661 "pmaddwd %%mm3, %%mm5 \n\t" \
00662 "paddd %%mm4, %%mm7 \n\t" \
00663 "paddd %%mm4, %%mm4 \n\t" \
00664 "psubd %%mm7, %%mm4 \n\t" \
00665 "paddd %%mm2, %%mm5 \n\t" \
00666 "psrad $" #shift ", %%mm7 \n\t"\
00667 "psrad $" #shift ", %%mm4 \n\t"\
00668 "movq %%mm1, %%mm2 \n\t" \
00669 "paddd %%mm5, %%mm1 \n\t" \
00670 "psubd %%mm5, %%mm2 \n\t" \
00671 "psrad $" #shift ", %%mm1 \n\t"\
00672 "psrad $" #shift ", %%mm2 \n\t"\
00673 "packssdw %%mm1, %%mm7 \n\t" \
00674 "packssdw %%mm4, %%mm2 \n\t" \
00675 "movq %%mm7, " #dst " \n\t"\
00676 "movq " #src1 ", %%mm1 \n\t" \
00677 "movq 80(%2), %%mm4 \n\t" \
00678 "movq %%mm2, 24+" #dst " \n\t"\
00679 "pmaddwd %%mm1, %%mm4 \n\t" \
00680 "movq 88(%2), %%mm7 \n\t" \
00681 "pmaddwd 96(%2), %%mm1 \n\t" \
00682 "pmaddwd %%mm3, %%mm7 \n\t" \
00683 "movq %%mm0, %%mm2 \n\t" \
00684 "pmaddwd 104(%2), %%mm3 \n\t" \
00685 "paddd %%mm7, %%mm4 \n\t" \
00686 "paddd %%mm4, %%mm2 \n\t" \
00687 "psubd %%mm4, %%mm0 \n\t" \
00688 "psrad $" #shift ", %%mm2 \n\t"\
00689 "psrad $" #shift ", %%mm0 \n\t"\
00690 "movq %%mm6, %%mm4 \n\t" \
00691 "paddd %%mm1, %%mm3 \n\t" \
00692 "paddd %%mm3, %%mm6 \n\t" \
00693 "psubd %%mm3, %%mm4 \n\t" \
00694 "psrad $" #shift ", %%mm6 \n\t"\
00695 "packssdw %%mm6, %%mm2 \n\t" \
00696 "movq %%mm2, 8+" #dst " \n\t"\
00697 "psrad $" #shift ", %%mm4 \n\t"\
00698 "packssdw %%mm0, %%mm4 \n\t" \
00699 "movq %%mm4, 16+" #dst " \n\t"\
00700
00701
00702 DC_COND_IDCT( 0(%0), 8(%0), 16(%0), 24(%0), 0(%1),paddd 8(%2), 11)
00703 Z_COND_IDCT( 32(%0), 40(%0), 48(%0), 56(%0), 32(%1),paddd (%2), 11, 4f)
00704 Z_COND_IDCT( 64(%0), 72(%0), 80(%0), 88(%0), 64(%1),paddd (%2), 11, 2f)
00705 Z_COND_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 1f)
00706
00707 #undef IDCT
00708 #define IDCT(src0, src4, src1, src5, dst, rounder, shift) \
00709 "movq " #src0 ", %%mm0 \n\t" \
00710 "movq " #src4 ", %%mm1 \n\t" \
00711 "movq " #src1 ", %%mm2 \n\t" \
00712 "movq " #src5 ", %%mm3 \n\t" \
00713 "movq 16(%2), %%mm4 \n\t" \
00714 "pmaddwd %%mm0, %%mm4 \n\t" \
00715 "movq 24(%2), %%mm5 \n\t" \
00716 "pmaddwd %%mm5, %%mm0 \n\t" \
00717 "movq 32(%2), %%mm5 \n\t" \
00718 "pmaddwd %%mm1, %%mm5 \n\t" \
00719 "movq 40(%2), %%mm6 \n\t" \
00720 "pmaddwd %%mm6, %%mm1 \n\t" \
00721 #rounder ", %%mm4 \n\t"\
00722 "movq %%mm4, %%mm6 \n\t" \
00723 "movq 48(%2), %%mm7 \n\t" \
00724 #rounder ", %%mm0 \n\t"\
00725 "pmaddwd %%mm2, %%mm7 \n\t" \
00726 "paddd %%mm5, %%mm4 \n\t" \
00727 "psubd %%mm5, %%mm6 \n\t" \
00728 "movq %%mm0, %%mm5 \n\t" \
00729 "paddd %%mm1, %%mm0 \n\t" \
00730 "psubd %%mm1, %%mm5 \n\t" \
00731 "movq 56(%2), %%mm1 \n\t" \
00732 "pmaddwd %%mm3, %%mm1 \n\t" \
00733 "pmaddwd 64(%2), %%mm2 \n\t" \
00734 "paddd %%mm1, %%mm7 \n\t" \
00735 "movq 72(%2), %%mm1 \n\t" \
00736 "pmaddwd %%mm3, %%mm1 \n\t" \
00737 "paddd %%mm4, %%mm7 \n\t" \
00738 "paddd %%mm4, %%mm4 \n\t" \
00739 "psubd %%mm7, %%mm4 \n\t" \
00740 "paddd %%mm2, %%mm1 \n\t" \
00741 "psrad $" #shift ", %%mm7 \n\t"\
00742 "psrad $" #shift ", %%mm4 \n\t"\
00743 "movq %%mm0, %%mm2 \n\t" \
00744 "paddd %%mm1, %%mm0 \n\t" \
00745 "psubd %%mm1, %%mm2 \n\t" \
00746 "psrad $" #shift ", %%mm0 \n\t"\
00747 "psrad $" #shift ", %%mm2 \n\t"\
00748 "packssdw %%mm7, %%mm7 \n\t" \
00749 "movd %%mm7, " #dst " \n\t"\
00750 "packssdw %%mm0, %%mm0 \n\t" \
00751 "movd %%mm0, 16+" #dst " \n\t"\
00752 "packssdw %%mm2, %%mm2 \n\t" \
00753 "movd %%mm2, 96+" #dst " \n\t"\
00754 "packssdw %%mm4, %%mm4 \n\t" \
00755 "movd %%mm4, 112+" #dst " \n\t"\
00756 "movq " #src1 ", %%mm0 \n\t" \
00757 "movq 80(%2), %%mm4 \n\t" \
00758 "pmaddwd %%mm0, %%mm4 \n\t" \
00759 "movq 88(%2), %%mm7 \n\t" \
00760 "pmaddwd 96(%2), %%mm0 \n\t" \
00761 "pmaddwd %%mm3, %%mm7 \n\t" \
00762 "movq %%mm5, %%mm2 \n\t" \
00763 "pmaddwd 104(%2), %%mm3 \n\t" \
00764 "paddd %%mm7, %%mm4 \n\t" \
00765 "paddd %%mm4, %%mm2 \n\t" \
00766 "psubd %%mm4, %%mm5 \n\t" \
00767 "psrad $" #shift ", %%mm2 \n\t"\
00768 "psrad $" #shift ", %%mm5 \n\t"\
00769 "movq %%mm6, %%mm4 \n\t" \
00770 "paddd %%mm0, %%mm3 \n\t" \
00771 "paddd %%mm3, %%mm6 \n\t" \
00772 "psubd %%mm3, %%mm4 \n\t" \
00773 "psrad $" #shift ", %%mm6 \n\t"\
00774 "psrad $" #shift ", %%mm4 \n\t"\
00775 "packssdw %%mm2, %%mm2 \n\t" \
00776 "packssdw %%mm6, %%mm6 \n\t" \
00777 "movd %%mm2, 32+" #dst " \n\t"\
00778 "packssdw %%mm4, %%mm4 \n\t" \
00779 "packssdw %%mm5, %%mm5 \n\t" \
00780 "movd %%mm6, 48+" #dst " \n\t"\
00781 "movd %%mm4, 64+" #dst " \n\t"\
00782 "movd %%mm5, 80+" #dst " \n\t"
00783
00784
00785
00786 IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0),/nop, 20)
00787 IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0),/nop, 20)
00788 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0),/nop, 20)
00789 IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0),/nop, 20)
00790 "jmp 9f \n\t"
00791
00792 "#.balign 16 \n\t"\
00793 "4: \n\t"
00794 Z_COND_IDCT( 64(%0), 72(%0), 80(%0), 88(%0), 64(%1),paddd (%2), 11, 6f)
00795 Z_COND_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 5f)
00796
00797 #undef IDCT
00798 #define IDCT(src0, src4, src1, src5, dst, rounder, shift) \
00799 "movq " #src0 ", %%mm0 \n\t" \
00800 "movq " #src4 ", %%mm1 \n\t" \
00801 "movq " #src5 ", %%mm3 \n\t" \
00802 "movq 16(%2), %%mm4 \n\t" \
00803 "pmaddwd %%mm0, %%mm4 \n\t" \
00804 "movq 24(%2), %%mm5 \n\t" \
00805 "pmaddwd %%mm5, %%mm0 \n\t" \
00806 "movq 32(%2), %%mm5 \n\t" \
00807 "pmaddwd %%mm1, %%mm5 \n\t" \
00808 "movq 40(%2), %%mm6 \n\t" \
00809 "pmaddwd %%mm6, %%mm1 \n\t" \
00810 #rounder ", %%mm4 \n\t"\
00811 "movq %%mm4, %%mm6 \n\t" \
00812 #rounder ", %%mm0 \n\t"\
00813 "paddd %%mm5, %%mm4 \n\t" \
00814 "psubd %%mm5, %%mm6 \n\t" \
00815 "movq %%mm0, %%mm5 \n\t" \
00816 "paddd %%mm1, %%mm0 \n\t" \
00817 "psubd %%mm1, %%mm5 \n\t" \
00818 "movq 56(%2), %%mm1 \n\t" \
00819 "pmaddwd %%mm3, %%mm1 \n\t" \
00820 "movq 72(%2), %%mm7 \n\t" \
00821 "pmaddwd %%mm3, %%mm7 \n\t" \
00822 "paddd %%mm4, %%mm1 \n\t" \
00823 "paddd %%mm4, %%mm4 \n\t" \
00824 "psubd %%mm1, %%mm4 \n\t" \
00825 "psrad $" #shift ", %%mm1 \n\t"\
00826 "psrad $" #shift ", %%mm4 \n\t"\
00827 "movq %%mm0, %%mm2 \n\t" \
00828 "paddd %%mm7, %%mm0 \n\t" \
00829 "psubd %%mm7, %%mm2 \n\t" \
00830 "psrad $" #shift ", %%mm0 \n\t"\
00831 "psrad $" #shift ", %%mm2 \n\t"\
00832 "packssdw %%mm1, %%mm1 \n\t" \
00833 "movd %%mm1, " #dst " \n\t"\
00834 "packssdw %%mm0, %%mm0 \n\t" \
00835 "movd %%mm0, 16+" #dst " \n\t"\
00836 "packssdw %%mm2, %%mm2 \n\t" \
00837 "movd %%mm2, 96+" #dst " \n\t"\
00838 "packssdw %%mm4, %%mm4 \n\t" \
00839 "movd %%mm4, 112+" #dst " \n\t"\
00840 "movq 88(%2), %%mm1 \n\t" \
00841 "pmaddwd %%mm3, %%mm1 \n\t" \
00842 "movq %%mm5, %%mm2 \n\t" \
00843 "pmaddwd 104(%2), %%mm3 \n\t" \
00844 "paddd %%mm1, %%mm2 \n\t" \
00845 "psubd %%mm1, %%mm5 \n\t" \
00846 "psrad $" #shift ", %%mm2 \n\t"\
00847 "psrad $" #shift ", %%mm5 \n\t"\
00848 "movq %%mm6, %%mm1 \n\t" \
00849 "paddd %%mm3, %%mm6 \n\t" \
00850 "psubd %%mm3, %%mm1 \n\t" \
00851 "psrad $" #shift ", %%mm6 \n\t"\
00852 "psrad $" #shift ", %%mm1 \n\t"\
00853 "packssdw %%mm2, %%mm2 \n\t" \
00854 "packssdw %%mm6, %%mm6 \n\t" \
00855 "movd %%mm2, 32+" #dst " \n\t"\
00856 "packssdw %%mm1, %%mm1 \n\t" \
00857 "packssdw %%mm5, %%mm5 \n\t" \
00858 "movd %%mm6, 48+" #dst " \n\t"\
00859 "movd %%mm1, 64+" #dst " \n\t"\
00860 "movd %%mm5, 80+" #dst " \n\t"
00861
00862
00863 IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0),/nop, 20)
00864 IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0),/nop, 20)
00865 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0),/nop, 20)
00866 IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0),/nop, 20)
00867 "jmp 9f \n\t"
00868
00869 "#.balign 16 \n\t"\
00870 "6: \n\t"
00871 Z_COND_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 7f)
00872
00873 #undef IDCT
00874 #define IDCT(src0, src4, src1, src5, dst, rounder, shift) \
00875 "movq " #src0 ", %%mm0 \n\t" \
00876 "movq " #src5 ", %%mm3 \n\t" \
00877 "movq 16(%2), %%mm4 \n\t" \
00878 "pmaddwd %%mm0, %%mm4 \n\t" \
00879 "movq 24(%2), %%mm5 \n\t" \
00880 "pmaddwd %%mm5, %%mm0 \n\t" \
00881 #rounder ", %%mm4 \n\t"\
00882 "movq %%mm4, %%mm6 \n\t" \
00883 #rounder ", %%mm0 \n\t"\
00884 "movq %%mm0, %%mm5 \n\t" \
00885 "movq 56(%2), %%mm1 \n\t" \
00886 "pmaddwd %%mm3, %%mm1 \n\t" \
00887 "movq 72(%2), %%mm7 \n\t" \
00888 "pmaddwd %%mm3, %%mm7 \n\t" \
00889 "paddd %%mm4, %%mm1 \n\t" \
00890 "paddd %%mm4, %%mm4 \n\t" \
00891 "psubd %%mm1, %%mm4 \n\t" \
00892 "psrad $" #shift ", %%mm1 \n\t"\
00893 "psrad $" #shift ", %%mm4 \n\t"\
00894 "movq %%mm0, %%mm2 \n\t" \
00895 "paddd %%mm7, %%mm0 \n\t" \
00896 "psubd %%mm7, %%mm2 \n\t" \
00897 "psrad $" #shift ", %%mm0 \n\t"\
00898 "psrad $" #shift ", %%mm2 \n\t"\
00899 "packssdw %%mm1, %%mm1 \n\t" \
00900 "movd %%mm1, " #dst " \n\t"\
00901 "packssdw %%mm0, %%mm0 \n\t" \
00902 "movd %%mm0, 16+" #dst " \n\t"\
00903 "packssdw %%mm2, %%mm2 \n\t" \
00904 "movd %%mm2, 96+" #dst " \n\t"\
00905 "packssdw %%mm4, %%mm4 \n\t" \
00906 "movd %%mm4, 112+" #dst " \n\t"\
00907 "movq 88(%2), %%mm1 \n\t" \
00908 "pmaddwd %%mm3, %%mm1 \n\t" \
00909 "movq %%mm5, %%mm2 \n\t" \
00910 "pmaddwd 104(%2), %%mm3 \n\t" \
00911 "paddd %%mm1, %%mm2 \n\t" \
00912 "psubd %%mm1, %%mm5 \n\t" \
00913 "psrad $" #shift ", %%mm2 \n\t"\
00914 "psrad $" #shift ", %%mm5 \n\t"\
00915 "movq %%mm6, %%mm1 \n\t" \
00916 "paddd %%mm3, %%mm6 \n\t" \
00917 "psubd %%mm3, %%mm1 \n\t" \
00918 "psrad $" #shift ", %%mm6 \n\t"\
00919 "psrad $" #shift ", %%mm1 \n\t"\
00920 "packssdw %%mm2, %%mm2 \n\t" \
00921 "packssdw %%mm6, %%mm6 \n\t" \
00922 "movd %%mm2, 32+" #dst " \n\t"\
00923 "packssdw %%mm1, %%mm1 \n\t" \
00924 "packssdw %%mm5, %%mm5 \n\t" \
00925 "movd %%mm6, 48+" #dst " \n\t"\
00926 "movd %%mm1, 64+" #dst " \n\t"\
00927 "movd %%mm5, 80+" #dst " \n\t"
00928
00929
00930
00931 IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0),/nop, 20)
00932 IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0),/nop, 20)
00933 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0),/nop, 20)
00934 IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0),/nop, 20)
00935 "jmp 9f \n\t"
00936
00937 "#.balign 16 \n\t"\
00938 "2: \n\t"
00939 Z_COND_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 3f)
00940
00941 #undef IDCT
00942 #define IDCT(src0, src4, src1, src5, dst, rounder, shift) \
00943 "movq " #src0 ", %%mm0 \n\t" \
00944 "movq " #src1 ", %%mm2 \n\t" \
00945 "movq " #src5 ", %%mm3 \n\t" \
00946 "movq 16(%2), %%mm4 \n\t" \
00947 "pmaddwd %%mm0, %%mm4 \n\t" \
00948 "movq 24(%2), %%mm5 \n\t" \
00949 "pmaddwd %%mm5, %%mm0 \n\t" \
00950 #rounder ", %%mm4 \n\t"\
00951 "movq %%mm4, %%mm6 \n\t" \
00952 "movq 48(%2), %%mm7 \n\t" \
00953 #rounder ", %%mm0 \n\t"\
00954 "pmaddwd %%mm2, %%mm7 \n\t" \
00955 "movq %%mm0, %%mm5 \n\t" \
00956 "movq 56(%2), %%mm1 \n\t" \
00957 "pmaddwd %%mm3, %%mm1 \n\t" \
00958 "pmaddwd 64(%2), %%mm2 \n\t" \
00959 "paddd %%mm1, %%mm7 \n\t" \
00960 "movq 72(%2), %%mm1 \n\t" \
00961 "pmaddwd %%mm3, %%mm1 \n\t" \
00962 "paddd %%mm4, %%mm7 \n\t" \
00963 "paddd %%mm4, %%mm4 \n\t" \
00964 "psubd %%mm7, %%mm4 \n\t" \
00965 "paddd %%mm2, %%mm1 \n\t" \
00966 "psrad $" #shift ", %%mm7 \n\t"\
00967 "psrad $" #shift ", %%mm4 \n\t"\
00968 "movq %%mm0, %%mm2 \n\t" \
00969 "paddd %%mm1, %%mm0 \n\t" \
00970 "psubd %%mm1, %%mm2 \n\t" \
00971 "psrad $" #shift ", %%mm0 \n\t"\
00972 "psrad $" #shift ", %%mm2 \n\t"\
00973 "packssdw %%mm7, %%mm7 \n\t" \
00974 "movd %%mm7, " #dst " \n\t"\
00975 "packssdw %%mm0, %%mm0 \n\t" \
00976 "movd %%mm0, 16+" #dst " \n\t"\
00977 "packssdw %%mm2, %%mm2 \n\t" \
00978 "movd %%mm2, 96+" #dst " \n\t"\
00979 "packssdw %%mm4, %%mm4 \n\t" \
00980 "movd %%mm4, 112+" #dst " \n\t"\
00981 "movq " #src1 ", %%mm0 \n\t" \
00982 "movq 80(%2), %%mm4 \n\t" \
00983 "pmaddwd %%mm0, %%mm4 \n\t" \
00984 "movq 88(%2), %%mm7 \n\t" \
00985 "pmaddwd 96(%2), %%mm0 \n\t" \
00986 "pmaddwd %%mm3, %%mm7 \n\t" \
00987 "movq %%mm5, %%mm2 \n\t" \
00988 "pmaddwd 104(%2), %%mm3 \n\t" \
00989 "paddd %%mm7, %%mm4 \n\t" \
00990 "paddd %%mm4, %%mm2 \n\t" \
00991 "psubd %%mm4, %%mm5 \n\t" \
00992 "psrad $" #shift ", %%mm2 \n\t"\
00993 "psrad $" #shift ", %%mm5 \n\t"\
00994 "movq %%mm6, %%mm4 \n\t" \
00995 "paddd %%mm0, %%mm3 \n\t" \
00996 "paddd %%mm3, %%mm6 \n\t" \
00997 "psubd %%mm3, %%mm4 \n\t" \
00998 "psrad $" #shift ", %%mm6 \n\t"\
00999 "psrad $" #shift ", %%mm4 \n\t"\
01000 "packssdw %%mm2, %%mm2 \n\t" \
01001 "packssdw %%mm6, %%mm6 \n\t" \
01002 "movd %%mm2, 32+" #dst " \n\t"\
01003 "packssdw %%mm4, %%mm4 \n\t" \
01004 "packssdw %%mm5, %%mm5 \n\t" \
01005 "movd %%mm6, 48+" #dst " \n\t"\
01006 "movd %%mm4, 64+" #dst " \n\t"\
01007 "movd %%mm5, 80+" #dst " \n\t"
01008
01009
01010 IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0),/nop, 20)
01011 IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0),/nop, 20)
01012 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0),/nop, 20)
01013 IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0),/nop, 20)
01014 "jmp 9f \n\t"
01015
01016 "#.balign 16 \n\t"\
01017 "3: \n\t"
01018 #undef IDCT
01019 #define IDCT(src0, src4, src1, src5, dst, rounder, shift) \
01020 "movq " #src0 ", %%mm0 \n\t" \
01021 "movq " #src1 ", %%mm2 \n\t" \
01022 "movq 16(%2), %%mm4 \n\t" \
01023 "pmaddwd %%mm0, %%mm4 \n\t" \
01024 "movq 24(%2), %%mm5 \n\t" \
01025 "pmaddwd %%mm5, %%mm0 \n\t" \
01026 #rounder ", %%mm4 \n\t"\
01027 "movq %%mm4, %%mm6 \n\t" \
01028 "movq 48(%2), %%mm7 \n\t" \
01029 #rounder ", %%mm0 \n\t"\
01030 "pmaddwd %%mm2, %%mm7 \n\t" \
01031 "movq %%mm0, %%mm5 \n\t" \
01032 "movq 64(%2), %%mm3 \n\t"\
01033 "pmaddwd %%mm2, %%mm3 \n\t" \
01034 "paddd %%mm4, %%mm7 \n\t" \
01035 "paddd %%mm4, %%mm4 \n\t" \
01036 "psubd %%mm7, %%mm4 \n\t" \
01037 "psrad $" #shift ", %%mm7 \n\t"\
01038 "psrad $" #shift ", %%mm4 \n\t"\
01039 "movq %%mm0, %%mm1 \n\t" \
01040 "paddd %%mm3, %%mm0 \n\t" \
01041 "psubd %%mm3, %%mm1 \n\t" \
01042 "psrad $" #shift ", %%mm0 \n\t"\
01043 "psrad $" #shift ", %%mm1 \n\t"\
01044 "packssdw %%mm7, %%mm7 \n\t" \
01045 "movd %%mm7, " #dst " \n\t"\
01046 "packssdw %%mm0, %%mm0 \n\t" \
01047 "movd %%mm0, 16+" #dst " \n\t"\
01048 "packssdw %%mm1, %%mm1 \n\t" \
01049 "movd %%mm1, 96+" #dst " \n\t"\
01050 "packssdw %%mm4, %%mm4 \n\t" \
01051 "movd %%mm4, 112+" #dst " \n\t"\
01052 "movq 80(%2), %%mm4 \n\t" \
01053 "pmaddwd %%mm2, %%mm4 \n\t" \
01054 "pmaddwd 96(%2), %%mm2 \n\t" \
01055 "movq %%mm5, %%mm1 \n\t" \
01056 "paddd %%mm4, %%mm1 \n\t" \
01057 "psubd %%mm4, %%mm5 \n\t" \
01058 "psrad $" #shift ", %%mm1 \n\t"\
01059 "psrad $" #shift ", %%mm5 \n\t"\
01060 "movq %%mm6, %%mm4 \n\t" \
01061 "paddd %%mm2, %%mm6 \n\t" \
01062 "psubd %%mm2, %%mm4 \n\t" \
01063 "psrad $" #shift ", %%mm6 \n\t"\
01064 "psrad $" #shift ", %%mm4 \n\t"\
01065 "packssdw %%mm1, %%mm1 \n\t" \
01066 "packssdw %%mm6, %%mm6 \n\t" \
01067 "movd %%mm1, 32+" #dst " \n\t"\
01068 "packssdw %%mm4, %%mm4 \n\t" \
01069 "packssdw %%mm5, %%mm5 \n\t" \
01070 "movd %%mm6, 48+" #dst " \n\t"\
01071 "movd %%mm4, 64+" #dst " \n\t"\
01072 "movd %%mm5, 80+" #dst " \n\t"
01073
01074
01075
01076 IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0),/nop, 20)
01077 IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0),/nop, 20)
01078 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0),/nop, 20)
01079 IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0),/nop, 20)
01080 "jmp 9f \n\t"
01081
01082 "#.balign 16 \n\t"\
01083 "5: \n\t"
01084 #undef IDCT
01085 #define IDCT(src0, src4, src1, src5, dst, rounder, shift) \
01086 "movq " #src0 ", %%mm0 \n\t" \
01087 "movq " #src4 ", %%mm1 \n\t" \
01088 "movq 16(%2), %%mm4 \n\t" \
01089 "pmaddwd %%mm0, %%mm4 \n\t" \
01090 "movq 24(%2), %%mm5 \n\t" \
01091 "pmaddwd %%mm5, %%mm0 \n\t" \
01092 "movq 32(%2), %%mm5 \n\t" \
01093 "pmaddwd %%mm1, %%mm5 \n\t" \
01094 "movq 40(%2), %%mm6 \n\t" \
01095 "pmaddwd %%mm6, %%mm1 \n\t" \
01096 #rounder ", %%mm4 \n\t"\
01097 "movq %%mm4, %%mm6 \n\t" \
01098 "paddd %%mm5, %%mm4 \n\t" \
01099 #rounder ", %%mm0 \n\t"\
01100 "psubd %%mm5, %%mm6 \n\t" \
01101 "movq %%mm0, %%mm5 \n\t" \
01102 "paddd %%mm1, %%mm0 \n\t" \
01103 "psubd %%mm1, %%mm5 \n\t" \
01104 "movq 8+" #src0 ", %%mm2 \n\t" \
01105 "movq 8+" #src4 ", %%mm3 \n\t" \
01106 "movq 16(%2), %%mm1 \n\t" \
01107 "pmaddwd %%mm2, %%mm1 \n\t" \
01108 "movq 24(%2), %%mm7 \n\t" \
01109 "pmaddwd %%mm7, %%mm2 \n\t" \
01110 "movq 32(%2), %%mm7 \n\t" \
01111 "pmaddwd %%mm3, %%mm7 \n\t" \
01112 "pmaddwd 40(%2), %%mm3 \n\t" \
01113 #rounder ", %%mm1 \n\t"\
01114 "paddd %%mm1, %%mm7 \n\t" \
01115 "paddd %%mm1, %%mm1 \n\t" \
01116 #rounder ", %%mm2 \n\t"\
01117 "psubd %%mm7, %%mm1 \n\t" \
01118 "paddd %%mm2, %%mm3 \n\t" \
01119 "paddd %%mm2, %%mm2 \n\t" \
01120 "psubd %%mm3, %%mm2 \n\t" \
01121 "psrad $" #shift ", %%mm4 \n\t"\
01122 "psrad $" #shift ", %%mm7 \n\t"\
01123 "psrad $" #shift ", %%mm3 \n\t"\
01124 "packssdw %%mm7, %%mm4 \n\t" \
01125 "movq %%mm4, " #dst " \n\t"\
01126 "psrad $" #shift ", %%mm0 \n\t"\
01127 "packssdw %%mm3, %%mm0 \n\t" \
01128 "movq %%mm0, 16+" #dst " \n\t"\
01129 "movq %%mm0, 96+" #dst " \n\t"\
01130 "movq %%mm4, 112+" #dst " \n\t"\
01131 "psrad $" #shift ", %%mm5 \n\t"\
01132 "psrad $" #shift ", %%mm6 \n\t"\
01133 "psrad $" #shift ", %%mm2 \n\t"\
01134 "packssdw %%mm2, %%mm5 \n\t" \
01135 "movq %%mm5, 32+" #dst " \n\t"\
01136 "psrad $" #shift ", %%mm1 \n\t"\
01137 "packssdw %%mm1, %%mm6 \n\t" \
01138 "movq %%mm6, 48+" #dst " \n\t"\
01139 "movq %%mm6, 64+" #dst " \n\t"\
01140 "movq %%mm5, 80+" #dst " \n\t"
01141
01142
01143
01144 IDCT( 0(%1), 64(%1), 32(%1), 96(%1), 0(%0),/nop, 20)
01145
01146 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0),/nop, 20)
01147
01148 "jmp 9f \n\t"
01149
01150
01151 "#.balign 16 \n\t"\
01152 "1: \n\t"
01153 #undef IDCT
01154 #define IDCT(src0, src4, src1, src5, dst, rounder, shift) \
01155 "movq " #src0 ", %%mm0 \n\t" \
01156 "movq " #src4 ", %%mm1 \n\t" \
01157 "movq " #src1 ", %%mm2 \n\t" \
01158 "movq 16(%2), %%mm4 \n\t" \
01159 "pmaddwd %%mm0, %%mm4 \n\t" \
01160 "movq 24(%2), %%mm5 \n\t" \
01161 "pmaddwd %%mm5, %%mm0 \n\t" \
01162 "movq 32(%2), %%mm5 \n\t" \
01163 "pmaddwd %%mm1, %%mm5 \n\t" \
01164 "movq 40(%2), %%mm6 \n\t" \
01165 "pmaddwd %%mm6, %%mm1 \n\t" \
01166 #rounder ", %%mm4 \n\t"\
01167 "movq %%mm4, %%mm6 \n\t" \
01168 "movq 48(%2), %%mm7 \n\t" \
01169 #rounder ", %%mm0 \n\t"\
01170 "pmaddwd %%mm2, %%mm7 \n\t" \
01171 "paddd %%mm5, %%mm4 \n\t" \
01172 "psubd %%mm5, %%mm6 \n\t" \
01173 "movq %%mm0, %%mm5 \n\t" \
01174 "paddd %%mm1, %%mm0 \n\t" \
01175 "psubd %%mm1, %%mm5 \n\t" \
01176 "movq 64(%2), %%mm1 \n\t"\
01177 "pmaddwd %%mm2, %%mm1 \n\t" \
01178 "paddd %%mm4, %%mm7 \n\t" \
01179 "paddd %%mm4, %%mm4 \n\t" \
01180 "psubd %%mm7, %%mm4 \n\t" \
01181 "psrad $" #shift ", %%mm7 \n\t"\
01182 "psrad $" #shift ", %%mm4 \n\t"\
01183 "movq %%mm0, %%mm3 \n\t" \
01184 "paddd %%mm1, %%mm0 \n\t" \
01185 "psubd %%mm1, %%mm3 \n\t" \
01186 "psrad $" #shift ", %%mm0 \n\t"\
01187 "psrad $" #shift ", %%mm3 \n\t"\
01188 "packssdw %%mm7, %%mm7 \n\t" \
01189 "movd %%mm7, " #dst " \n\t"\
01190 "packssdw %%mm0, %%mm0 \n\t" \
01191 "movd %%mm0, 16+" #dst " \n\t"\
01192 "packssdw %%mm3, %%mm3 \n\t" \
01193 "movd %%mm3, 96+" #dst " \n\t"\
01194 "packssdw %%mm4, %%mm4 \n\t" \
01195 "movd %%mm4, 112+" #dst " \n\t"\
01196 "movq 80(%2), %%mm4 \n\t" \
01197 "pmaddwd %%mm2, %%mm4 \n\t" \
01198 "pmaddwd 96(%2), %%mm2 \n\t" \
01199 "movq %%mm5, %%mm3 \n\t" \
01200 "paddd %%mm4, %%mm3 \n\t" \
01201 "psubd %%mm4, %%mm5 \n\t" \
01202 "psrad $" #shift ", %%mm3 \n\t"\
01203 "psrad $" #shift ", %%mm5 \n\t"\
01204 "movq %%mm6, %%mm4 \n\t" \
01205 "paddd %%mm2, %%mm6 \n\t" \
01206 "psubd %%mm2, %%mm4 \n\t" \
01207 "psrad $" #shift ", %%mm6 \n\t"\
01208 "packssdw %%mm3, %%mm3 \n\t" \
01209 "movd %%mm3, 32+" #dst " \n\t"\
01210 "psrad $" #shift ", %%mm4 \n\t"\
01211 "packssdw %%mm6, %%mm6 \n\t" \
01212 "movd %%mm6, 48+" #dst " \n\t"\
01213 "packssdw %%mm4, %%mm4 \n\t" \
01214 "packssdw %%mm5, %%mm5 \n\t" \
01215 "movd %%mm4, 64+" #dst " \n\t"\
01216 "movd %%mm5, 80+" #dst " \n\t"
01217
01218
01219
01220 IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0),/nop, 20)
01221 IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0),/nop, 20)
01222 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0),/nop, 20)
01223 IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0),/nop, 20)
01224 "jmp 9f \n\t"
01225
01226
01227 "#.balign 16 \n\t"
01228 "7: \n\t"
01229 #undef IDCT
01230 #define IDCT(src0, src4, src1, src5, dst, rounder, shift) \
01231 "movq " #src0 ", %%mm0 \n\t" \
01232 "movq 16(%2), %%mm4 \n\t" \
01233 "pmaddwd %%mm0, %%mm4 \n\t" \
01234 "movq 24(%2), %%mm5 \n\t" \
01235 "pmaddwd %%mm5, %%mm0 \n\t" \
01236 #rounder ", %%mm4 \n\t"\
01237 #rounder ", %%mm0 \n\t"\
01238 "psrad $" #shift ", %%mm4 \n\t"\
01239 "psrad $" #shift ", %%mm0 \n\t"\
01240 "movq 8+" #src0 ", %%mm2 \n\t" \
01241 "movq 16(%2), %%mm1 \n\t" \
01242 "pmaddwd %%mm2, %%mm1 \n\t" \
01243 "movq 24(%2), %%mm7 \n\t" \
01244 "pmaddwd %%mm7, %%mm2 \n\t" \
01245 "movq 32(%2), %%mm7 \n\t" \
01246 #rounder ", %%mm1 \n\t"\
01247 #rounder ", %%mm2 \n\t"\
01248 "psrad $" #shift ", %%mm1 \n\t"\
01249 "packssdw %%mm1, %%mm4 \n\t" \
01250 "movq %%mm4, " #dst " \n\t"\
01251 "psrad $" #shift ", %%mm2 \n\t"\
01252 "packssdw %%mm2, %%mm0 \n\t" \
01253 "movq %%mm0, 16+" #dst " \n\t"\
01254 "movq %%mm0, 96+" #dst " \n\t"\
01255 "movq %%mm4, 112+" #dst " \n\t"\
01256 "movq %%mm0, 32+" #dst " \n\t"\
01257 "movq %%mm4, 48+" #dst " \n\t"\
01258 "movq %%mm4, 64+" #dst " \n\t"\
01259 "movq %%mm0, 80+" #dst " \n\t"
01260
01261
01262 IDCT( 0(%1), 64(%1), 32(%1), 96(%1), 0(%0),/nop, 20)
01263
01264 IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0),/nop, 20)
01265
01266
01267
01268 #endif
01269
01270
01271
01272
01273
01274
01275
01276
01277
01278
01279
01280
01281
01282
01283
01284
01285
01286
01287
01288
01289
01290
01291
01292 "9: \n\t"
01293 :: "r" (block), "r" (temp), "r" (coeffs)
01294 : "%eax"
01295 );
01296 }
01297
01298 void ff_simple_idct_mmx(int16_t *block)
01299 {
01300 idct(block);
01301 }
01302
01303
01304
01305 void ff_simple_idct_put_mmx(uint8_t *dest, int line_size, DCTELEM *block)
01306 {
01307 idct(block);
01308 put_pixels_clamped_mmx(block, dest, line_size);
01309 }
01310 void ff_simple_idct_add_mmx(uint8_t *dest, int line_size, DCTELEM *block)
01311 {
01312 idct(block);
01313 add_pixels_clamped_mmx(block, dest, line_size);
01314 }