00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00024 #include "../dsputil.h"
00025 #include "mmx.h"
00026
00027 #define IdctAdjustBeforeShift 8
00028
00029
00030
00031
00032
00033
00034 static uint16_t idct_constants[(4 + 7 + 1) * 4];
00035 static const uint16_t idct_cosine_table[7] = {
00036 64277, 60547, 54491, 46341, 36410, 25080, 12785
00037 };
00038
00039 #define r0 mm0
00040 #define r1 mm1
00041 #define r2 mm2
00042 #define r3 mm3
00043 #define r4 mm4
00044 #define r5 mm5
00045 #define r6 mm6
00046 #define r7 mm7
00047
00048
00049 #define BeginIDCT() { \
00050 movq_m2r(*I(3), r2); \
00051 movq_m2r(*C(3), r6); \
00052 movq_r2r(r2, r4); \
00053 movq_m2r(*J(5), r7); \
00054 pmulhw_r2r(r6, r4); \
00055 movq_m2r(*C(5), r1); \
00056 pmulhw_r2r(r7, r6); \
00057 movq_r2r(r1, r5); \
00058 pmulhw_r2r(r2, r1); \
00059 movq_m2r(*I(1), r3); \
00060 pmulhw_r2r(r7, r5); \
00061 movq_m2r(*C(1), r0); \
00062 paddw_r2r(r2, r4); \
00063 paddw_r2r(r7, r6); \
00064 paddw_r2r(r1, r2); \
00065 movq_m2r(*J(7), r1); \
00066 paddw_r2r(r5, r7); \
00067 movq_r2r(r0, r5); \
00068 pmulhw_r2r(r3, r0); \
00069 paddsw_r2r(r7, r4); \
00070 pmulhw_r2r(r1, r5); \
00071 movq_m2r(*C(7), r7); \
00072 psubsw_r2r(r2, r6); \
00073 paddw_r2r(r3, r0); \
00074 pmulhw_r2r(r7, r3); \
00075 movq_m2r(*I(2), r2); \
00076 pmulhw_r2r(r1, r7); \
00077 paddw_r2r(r1, r5); \
00078 movq_r2r(r2, r1); \
00079 pmulhw_m2r(*C(2), r2); \
00080 psubsw_r2r(r5, r3); \
00081 movq_m2r(*J(6), r5); \
00082 paddsw_r2r(r7, r0); \
00083 movq_r2r(r5, r7); \
00084 psubsw_r2r(r4, r0); \
00085 pmulhw_m2r(*C(2), r5); \
00086 paddw_r2r(r1, r2); \
00087 pmulhw_m2r(*C(6), r1); \
00088 paddsw_r2r(r4, r4); \
00089 paddsw_r2r(r0, r4); \
00090 psubsw_r2r(r6, r3); \
00091 paddw_r2r(r7, r5); \
00092 paddsw_r2r(r6, r6); \
00093 pmulhw_m2r(*C(6), r7); \
00094 paddsw_r2r(r3, r6); \
00095 movq_r2m(r4, *I(1)); \
00096 psubsw_r2r(r5, r1); \
00097 movq_m2r(*C(4), r4); \
00098 movq_r2r(r3, r5); \
00099 pmulhw_r2r(r4, r3); \
00100 paddsw_r2r(r2, r7); \
00101 movq_r2m(r6, *I(2)); \
00102 movq_r2r(r0, r2); \
00103 movq_m2r(*I(0), r6); \
00104 pmulhw_r2r(r4, r0); \
00105 paddw_r2r(r3, r5); \
00106 movq_m2r(*J(4), r3); \
00107 psubsw_r2r(r1, r5); \
00108 paddw_r2r(r0, r2); \
00109 psubsw_r2r(r3, r6); \
00110 movq_r2r(r6, r0); \
00111 pmulhw_r2r(r4, r6); \
00112 paddsw_r2r(r3, r3); \
00113 paddsw_r2r(r1, r1); \
00114 paddsw_r2r(r0, r3); \
00115 paddsw_r2r(r5, r1); \
00116 pmulhw_r2r(r3, r4); \
00117 paddsw_r2r(r0, r6); \
00118 psubsw_r2r(r2, r6); \
00119 paddsw_r2r(r2, r2); \
00120 movq_m2r(*I(1), r0); \
00121 paddsw_r2r(r6, r2); \
00122 paddw_r2r(r3, r4); \
00123 psubsw_r2r(r1, r2); \
00124 }
00125
00126
00127 #define RowIDCT() { \
00128 \
00129 BeginIDCT(); \
00130 \
00131 movq_m2r(*I(2), r3); \
00132 psubsw_r2r(r7, r4); \
00133 paddsw_r2r(r1, r1); \
00134 paddsw_r2r(r7, r7); \
00135 paddsw_r2r(r2, r1); \
00136 paddsw_r2r(r4, r7); \
00137 psubsw_r2r(r3, r4); \
00138 paddsw_r2r(r3, r3); \
00139 psubsw_r2r(r5, r6); \
00140 paddsw_r2r(r5, r5); \
00141 paddsw_r2r(r4, r3); \
00142 paddsw_r2r(r6, r5); \
00143 psubsw_r2r(r0, r7); \
00144 paddsw_r2r(r0, r0); \
00145 movq_r2m(r1, *I(1)); \
00146 paddsw_r2r(r7, r0); \
00147 }
00148
00149
00150 #define ColumnIDCT() { \
00151 \
00152 BeginIDCT(); \
00153 \
00154 paddsw_m2r(*Eight, r2); \
00155 paddsw_r2r(r1, r1); \
00156 paddsw_r2r(r2, r1); \
00157 psraw_i2r(4, r2); \
00158 psubsw_r2r(r7, r4); \
00159 psraw_i2r(4, r1); \
00160 movq_m2r(*I(2), r3); \
00161 paddsw_r2r(r7, r7); \
00162 movq_r2m(r2, *I(2)); \
00163 paddsw_r2r(r4, r7); \
00164 movq_r2m(r1, *I(1)); \
00165 psubsw_r2r(r3, r4); \
00166 paddsw_m2r(*Eight, r4); \
00167 paddsw_r2r(r3, r3); \
00168 paddsw_r2r(r4, r3); \
00169 psraw_i2r(4, r4); \
00170 psubsw_r2r(r5, r6); \
00171 psraw_i2r(4, r3); \
00172 paddsw_m2r(*Eight, r6); \
00173 paddsw_r2r(r5, r5); \
00174 paddsw_r2r(r6, r5); \
00175 psraw_i2r(4, r6); \
00176 movq_r2m(r4, *J(4)); \
00177 psraw_i2r(4, r5); \
00178 movq_r2m(r3, *I(3)); \
00179 psubsw_r2r(r0, r7); \
00180 paddsw_m2r(*Eight, r7); \
00181 paddsw_r2r(r0, r0); \
00182 paddsw_r2r(r7, r0); \
00183 psraw_i2r(4, r7); \
00184 movq_r2m(r6, *J(6)); \
00185 psraw_i2r(4, r0); \
00186 movq_r2m(r5, *J(5)); \
00187 movq_r2m(r7, *J(7)); \
00188 movq_r2m(r0, *I(0)); \
00189 }
00190
00191
00192
00193
00194
00195
00196
00197
00198
00199
00200
00201
00202
00203
00204
00205
00206
00207
00208
00209
00210
00211
00212
00213
00214
00215
00216
00217
00218
00219
00220
00221
00222 #define Transpose() { \
00223 movq_r2r(r4, r1); \
00224 punpcklwd_r2r(r5, r4); \
00225 movq_r2m(r0, *I(0)); \
00226 punpckhwd_r2r(r5, r1); \
00227 movq_r2r(r6, r0); \
00228 punpcklwd_r2r(r7, r6); \
00229 movq_r2r(r4, r5); \
00230 punpckldq_r2r(r6, r4); \
00231 punpckhdq_r2r(r6, r5); \
00232 movq_r2r(r1, r6); \
00233 movq_r2m(r4, *J(4)); \
00234 punpckhwd_r2r(r7, r0); \
00235 movq_r2m(r5, *J(5)); \
00236 punpckhdq_r2r(r0, r6); \
00237 movq_m2r(*I(0), r4); \
00238 punpckldq_r2r(r0, r1); \
00239 movq_m2r(*I(1), r5); \
00240 movq_r2r(r4, r0); \
00241 movq_r2m(r6, *J(7)); \
00242 punpcklwd_r2r(r5, r0); \
00243 movq_r2m(r1, *J(6)); \
00244 punpckhwd_r2r(r5, r4); \
00245 movq_r2r(r2, r5); \
00246 punpcklwd_r2r(r3, r2); \
00247 movq_r2r(r0, r1); \
00248 punpckldq_r2r(r2, r0); \
00249 punpckhdq_r2r(r2, r1); \
00250 movq_r2r(r4, r2); \
00251 movq_r2m(r0, *I(0)); \
00252 punpckhwd_r2r(r3, r5); \
00253 movq_r2m(r1, *I(1)); \
00254 punpckhdq_r2r(r5, r4); \
00255 punpckldq_r2r(r5, r2); \
00256 movq_r2m(r4, *I(3)); \
00257 movq_r2m(r2, *I(2)); \
00258 }
00259
00260 void ff_vp3_dsp_init_mmx(void)
00261 {
00262 int j = 16;
00263 uint16_t *p;
00264
00265 j = 1;
00266 do {
00267 p = idct_constants + ((j + 3) << 2);
00268 p[0] = p[1] = p[2] = p[3] = idct_cosine_table[j - 1];
00269 } while (++j <= 7);
00270
00271 idct_constants[44] = idct_constants[45] =
00272 idct_constants[46] = idct_constants[47] = IdctAdjustBeforeShift;
00273 }
00274
00275 void ff_vp3_idct_mmx(int16_t *output_data)
00276 {
00277
00278
00279
00280
00281
00282
00283
00284
00285
00286 #define C(x) (idct_constants + 16 + (x - 1) * 4)
00287 #define Eight (idct_constants + 44)
00288
00289
00290
00291 #define I(K) (output_data + K * 8)
00292 #define J(K) (output_data + ((K - 4) * 8) + 4)
00293
00294 RowIDCT();
00295 Transpose();
00296
00297 #undef I
00298 #undef J
00299 #define I(K) (output_data + (K * 8) + 32)
00300 #define J(K) (output_data + ((K - 4) * 8) + 36)
00301
00302 RowIDCT();
00303 Transpose();
00304
00305 #undef I
00306 #undef J
00307 #define I(K) (output_data + K * 8)
00308 #define J(K) (output_data + K * 8)
00309
00310 ColumnIDCT();
00311
00312 #undef I
00313 #undef J
00314 #define I(K) (output_data + (K * 8) + 4)
00315 #define J(K) (output_data + (K * 8) + 4)
00316
00317 ColumnIDCT();
00318
00319 #undef I
00320 #undef J
00321
00322 }