00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014 #include "../common.h"
00015 #include "../dsputil.h"
00016 #include "mmi.h"
00017
00018 #define BITS_INV_ACC 5 // 4 or 5 for IEEE
00019 #define SHIFT_INV_ROW (16 - BITS_INV_ACC)
00020 #define SHIFT_INV_COL (1 + BITS_INV_ACC)
00021
00022 #define TG1 6518
00023 #define TG2 13573
00024 #define TG3 21895
00025 #define CS4 23170
00026
00027 #define ROUNDER_0 0
00028 #define ROUNDER_1 16
00029
00030 #define TAB_i_04 (32+0)
00031 #define TAB_i_17 (32+64)
00032 #define TAB_i_26 (32+128)
00033 #define TAB_i_35 (32+192)
00034
00035 #define TG_1_16 (32+256+0)
00036 #define TG_2_16 (32+256+16)
00037 #define TG_3_16 (32+256+32)
00038 #define COS_4_16 (32+256+48)
00039
00040 #define CLIPMAX (32+256+64+0)
00041
00042 static short consttable[] align16 = {
00043
00044 0x3ff, 1, 0x3ff, 1, 0x3ff, 1, 0x3ff, 1,
00045
00046 0x3ff, 0, 0x3ff, 0, 0x3ff, 0, 0x3ff, 0,
00047
00048 16384, 21407, -16384, -21407, 22725, 19266, -22725, -12873,
00049 8867, 16384, 8867, 16384, 4520, 12873, -4520, 19266,
00050 16384, -8867, 16384, -8867, 12873, -22725, 19266, -22725,
00051 21407, -16384, -21407, 16384, 19266, 4520, -12873, 4520,
00052
00053 22725, 29692, -22725, -29692, 31521, 26722, -31521, -17855,
00054 12299, 22725, 12299, 22725, 6270, 17855, -6270, 26722,
00055 22725, -12299, 22725, -12299, 17855, -31521, 26722, -31521,
00056 29692, -22725, -29692, 22725, 26722, 6270, -17855, 6270,
00057
00058 21407, 27969, -21407, -27969, 29692, 25172, -29692, -16819,
00059 11585, 21407, 11585, 21407, 5906, 16819, -5906, 25172,
00060 21407, -11585, 21407, -11585, 16819, -29692, 25172, -29692,
00061 27969, -21407, -27969, 21407, 25172, 5906, -16819, 5906,
00062
00063 19266, 25172, -19266, -25172, 26722, 22654, -26722, -15137,
00064 10426, 19266, 10426, 19266, 5315, 15137, -5315, 22654,
00065 19266, -10426, 19266, -10426, 15137, -26722, 22654, -26722,
00066 25172, -19266, -25172, 19266, 22654, 5315, -15137, 5315,
00067
00068 TG1, TG1, TG1, TG1, TG1, TG1, TG1, TG1,
00069 TG2, TG2, TG2, TG2, TG2, TG2, TG2, TG2,
00070 TG3, TG3, TG3, TG3, TG3, TG3, TG3, TG3,
00071 CS4, CS4, CS4, CS4, CS4, CS4, CS4, CS4,
00072
00073 255, 255, 255, 255, 255, 255, 255, 255
00074 };
00075
00076
00077 #define DCT_8_INV_ROW1(blk, rowoff, taboff, rnd, outreg) { \
00078 lq(blk, rowoff, $16); \
00079 \
00080 lq($24, 0+taboff, $17); \
00081 \
00082 lq($24, 16+taboff, $18); \
00083 prevh($16, $2); \
00084 lq($24, 32+taboff, $19); \
00085 phmadh($17, $16, $17); \
00086 lq($24, 48+taboff, $20); \
00087 phmadh($18, $2, $18); \
00088 phmadh($19, $16, $19); \
00089 phmadh($20, $2, $20); \
00090 paddw($17, $18, $17); \
00091 paddw($19, $20, $19); \
00092 pcpyld($19, $17, $18); \
00093 pcpyud($17, $19, $20); \
00094 paddw($18, rnd, $18); \
00095 paddw($18, $20, $17); \
00096 psubw($18, $20, $20); \
00097 psraw($17, SHIFT_INV_ROW, $17); \
00098 psraw($20, SHIFT_INV_ROW, $20); \
00099 ppach($20, $17, outreg); \
00100 \
00101 prevh(outreg, $2); \
00102 pcpyud($2, $2, $2); \
00103 pcpyld($2, outreg, outreg); \
00104 }
00105
00106
00107 #define DCT_8_INV_COL8() \
00108 \
00109 lq($24, TG_3_16, $2); \
00110 \
00111 pmulth($11, $2, $17); \
00112 psraw($17, 15, $17); \
00113 pmfhl_uw($3); \
00114 psraw($3, 15, $3); \
00115 pinteh($3, $17, $17); \
00116 psubh($17, $13, $17); \
00117 \
00118 pmulth($13, $2, $18); \
00119 psraw($18, 15, $18); \
00120 pmfhl_uw($3); \
00121 psraw($3, 15, $3); \
00122 pinteh($3, $18, $18); \
00123 paddh($18, $11, $18); \
00124 \
00125 lq($24, TG_1_16, $2); \
00126 \
00127 pmulth($15, $2, $19); \
00128 psraw($19, 15, $19); \
00129 pmfhl_uw($3); \
00130 psraw($3, 15, $3); \
00131 pinteh($3, $19, $19); \
00132 paddh($19, $9, $19); \
00133 \
00134 pmulth($9, $2, $20); \
00135 psraw($20, 15, $20); \
00136 pmfhl_uw($3); \
00137 psraw($3, 15, $3); \
00138 pinteh($3, $20, $20); \
00139 psubh($20, $15, $20); \
00140 \
00141 psubh($19, $18, $3); \
00142 paddh($20, $17, $16); \
00143 psubh($20, $17, $23); \
00144 paddh($19, $18, $20); \
00145 \
00146 lq($24, COS_4_16, $2); \
00147 \
00148 paddh($3, $16, $21); \
00149 psubh($3, $16, $22); \
00150 \
00151 pmulth($21, $2, $21); \
00152 psraw($21, 15, $21); \
00153 pmfhl_uw($3); \
00154 psraw($3, 15, $3); \
00155 pinteh($3, $21, $21); \
00156 \
00157 pmulth($22, $2, $22); \
00158 psraw($22, 15, $22); \
00159 pmfhl_uw($3); \
00160 psraw($3, 15, $3); \
00161 pinteh($3, $22, $22); \
00162 \
00163 lq($24, TG_2_16, $2); \
00164 \
00165 pmulth($10, $2, $17); \
00166 psraw($17, 15, $17); \
00167 pmfhl_uw($3); \
00168 psraw($3, 15, $3); \
00169 pinteh($3, $17, $17); \
00170 psubh($17, $14, $17); \
00171 \
00172 pmulth($14, $2, $18); \
00173 psraw($18, 15, $18); \
00174 pmfhl_uw($3); \
00175 psraw($3, 15, $3); \
00176 pinteh($3, $18, $18); \
00177 paddh($18, $10, $18); \
00178 \
00179 paddh($8, $12, $2); \
00180 psubh($8, $12, $3); \
00181 \
00182 paddh($2, $18, $16); \
00183 psubh($2, $18, $19); \
00184 psubh($3, $17, $18); \
00185 paddh($3, $17, $17);
00186
00187
00188 #define DCT_8_INV_COL8_STORE(blk) \
00189 \
00190 paddh($16, $20, $2); \
00191 psubh($16, $20, $16); \
00192 psrah($2, SHIFT_INV_COL, $2); \
00193 psrah($16, SHIFT_INV_COL, $16); \
00194 sq($2, 0, blk); \
00195 sq($16, 112, blk); \
00196 \
00197 paddh($17, $21, $3); \
00198 psubh($17, $21, $17); \
00199 psrah($3, SHIFT_INV_COL, $3); \
00200 psrah($17, SHIFT_INV_COL, $17); \
00201 sq($3, 16, blk); \
00202 sq($17, 96, blk); \
00203 \
00204 paddh($18, $22, $2); \
00205 psubh($18, $22, $18); \
00206 psrah($2, SHIFT_INV_COL, $2); \
00207 psrah($18, SHIFT_INV_COL, $18); \
00208 sq($2, 32, blk); \
00209 sq($18, 80, blk); \
00210 \
00211 paddh($19, $23, $3); \
00212 psubh($19, $23, $19); \
00213 psrah($3, SHIFT_INV_COL, $3); \
00214 psrah($19, SHIFT_INV_COL, $19); \
00215 sq($3, 48, blk); \
00216 sq($19, 64, blk);
00217
00218
00219
00220 #define DCT_8_INV_COL8_PMS() \
00221 paddh($16, $20, $2); \
00222 psubh($16, $20, $20); \
00223 psrah($2, SHIFT_INV_COL, $16); \
00224 psrah($20, SHIFT_INV_COL, $20); \
00225 \
00226 paddh($17, $21, $3); \
00227 psubh($17, $21, $21); \
00228 psrah($3, SHIFT_INV_COL, $17); \
00229 psrah($21, SHIFT_INV_COL, $21); \
00230 \
00231 paddh($18, $22, $2); \
00232 psubh($18, $22, $22); \
00233 psrah($2, SHIFT_INV_COL, $18); \
00234 psrah($22, SHIFT_INV_COL, $22); \
00235 \
00236 paddh($19, $23, $3); \
00237 psubh($19, $23, $23); \
00238 psrah($3, SHIFT_INV_COL, $19); \
00239 psrah($23, SHIFT_INV_COL, $23);
00240
00241 #define PUT(rs) \
00242 pminh(rs, $11, $2); \
00243 pmaxh($2, $0, $2); \
00244 ppacb($0, $2, $2); \
00245 sd3(2, 0, 4); \
00246 __asm__ __volatile__ ("add $4, $5, $4");
00247
00248 #define DCT_8_INV_COL8_PUT() \
00249 PUT($16); \
00250 PUT($17); \
00251 PUT($18); \
00252 PUT($19); \
00253 PUT($23); \
00254 PUT($22); \
00255 PUT($21); \
00256 PUT($20);
00257
00258 #define ADD(rs) \
00259 ld3(4, 0, 2); \
00260 pextlb($0, $2, $2); \
00261 paddh($2, rs, $2); \
00262 pminh($2, $11, $2); \
00263 pmaxh($2, $0, $2); \
00264 ppacb($0, $2, $2); \
00265 sd3(2, 0, 4); \
00266 __asm__ __volatile__ ("add $4, $5, $4");
00267
00268
00269 #define DCT_8_INV_COL8_ADD() \
00270 ADD($16); \
00271 ADD($17); \
00272 ADD($18); \
00273 ADD($19); \
00274 ADD($23); \
00275 ADD($22); \
00276 ADD($21); \
00277 ADD($20);
00278
00279
00280 void ff_mmi_idct(int16_t * block)
00281 {
00282
00283 __asm__ __volatile__("la $24, %0"::"m"(consttable[0]));
00284 lq($24, ROUNDER_0, $8);
00285 lq($24, ROUNDER_1, $7);
00286 DCT_8_INV_ROW1($4, 0, TAB_i_04, $8, $8);
00287 DCT_8_INV_ROW1($4, 16, TAB_i_17, $7, $9);
00288 DCT_8_INV_ROW1($4, 32, TAB_i_26, $7, $10);
00289 DCT_8_INV_ROW1($4, 48, TAB_i_35, $7, $11);
00290 DCT_8_INV_ROW1($4, 64, TAB_i_04, $7, $12);
00291 DCT_8_INV_ROW1($4, 80, TAB_i_35, $7, $13);
00292 DCT_8_INV_ROW1($4, 96, TAB_i_26, $7, $14);
00293 DCT_8_INV_ROW1($4, 112, TAB_i_17, $7, $15);
00294 DCT_8_INV_COL8();
00295 DCT_8_INV_COL8_STORE($4);
00296
00297
00298 __asm__ __volatile__(" ":::"$16", "$17", "$18", "$19", "$20", "$21", "$22", "$23");
00299 }
00300
00301
00302 void ff_mmi_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
00303 {
00304
00305 __asm__ __volatile__("la $24, %0"::"m"(consttable[0]));
00306 lq($24, ROUNDER_0, $8);
00307 lq($24, ROUNDER_1, $7);
00308 DCT_8_INV_ROW1($6, 0, TAB_i_04, $8, $8);
00309 DCT_8_INV_ROW1($6, 16, TAB_i_17, $7, $9);
00310 DCT_8_INV_ROW1($6, 32, TAB_i_26, $7, $10);
00311 DCT_8_INV_ROW1($6, 48, TAB_i_35, $7, $11);
00312 DCT_8_INV_ROW1($6, 64, TAB_i_04, $7, $12);
00313 DCT_8_INV_ROW1($6, 80, TAB_i_35, $7, $13);
00314 DCT_8_INV_ROW1($6, 96, TAB_i_26, $7, $14);
00315 DCT_8_INV_ROW1($6, 112, TAB_i_17, $7, $15);
00316 DCT_8_INV_COL8();
00317 lq($24, CLIPMAX, $11);
00318 DCT_8_INV_COL8_PMS();
00319 DCT_8_INV_COL8_PUT();
00320
00321
00322 __asm__ __volatile__(" ":::"$16", "$17", "$18", "$19", "$20", "$21", "$22", "$23");
00323 }
00324
00325
00326 void ff_mmi_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
00327 {
00328
00329 __asm__ __volatile__("la $24, %0"::"m"(consttable[0]));
00330 lq($24, ROUNDER_0, $8);
00331 lq($24, ROUNDER_1, $7);
00332 DCT_8_INV_ROW1($6, 0, TAB_i_04, $8, $8);
00333 DCT_8_INV_ROW1($6, 16, TAB_i_17, $7, $9);
00334 DCT_8_INV_ROW1($6, 32, TAB_i_26, $7, $10);
00335 DCT_8_INV_ROW1($6, 48, TAB_i_35, $7, $11);
00336 DCT_8_INV_ROW1($6, 64, TAB_i_04, $7, $12);
00337 DCT_8_INV_ROW1($6, 80, TAB_i_35, $7, $13);
00338 DCT_8_INV_ROW1($6, 96, TAB_i_26, $7, $14);
00339 DCT_8_INV_ROW1($6, 112, TAB_i_17, $7, $15);
00340 DCT_8_INV_COL8();
00341 lq($24, CLIPMAX, $11);
00342 DCT_8_INV_COL8_PMS();
00343 DCT_8_INV_COL8_ADD();
00344
00345
00346 __asm__ __volatile__(" ":::"$16", "$17", "$18", "$19", "$20", "$21", "$22", "$23");
00347 }
00348