00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027 #include "asm.h"
00028 #include "../dsputil.h"
00029
00030 extern void (*put_pixels_clamped_axp_p)(const DCTELEM *block, uint8_t *pixels,
00031 int line_size);
00032 extern void (*add_pixels_clamped_axp_p)(const DCTELEM *block, uint8_t *pixels,
00033 int line_size);
00034
00035
00036
00037
00038 #define W1 ((int_fast32_t) 22725)
00039 #define W2 ((int_fast32_t) 21407)
00040 #define W3 ((int_fast32_t) 19266)
00041 #define W4 ((int_fast32_t) 16383)
00042 #define W5 ((int_fast32_t) 12873)
00043 #define W6 ((int_fast32_t) 8867)
00044 #define W7 ((int_fast32_t) 4520)
00045 #define ROW_SHIFT 11
00046 #define COL_SHIFT 20
00047
00048
00049 static inline int idct_row(DCTELEM *row)
00050 {
00051 int_fast32_t a0, a1, a2, a3, b0, b1, b2, b3, t;
00052 uint64_t l, r, t2;
00053 l = ldq(row);
00054 r = ldq(row + 4);
00055
00056 if (l == 0 && r == 0)
00057 return 0;
00058
00059 a0 = W4 * sextw(l) + (1 << (ROW_SHIFT - 1));
00060
00061 if (((l & ~0xffffUL) | r) == 0) {
00062 a0 >>= ROW_SHIFT;
00063 t2 = (uint16_t) a0;
00064 t2 |= t2 << 16;
00065 t2 |= t2 << 32;
00066
00067 stq(t2, row);
00068 stq(t2, row + 4);
00069 return 1;
00070 }
00071
00072 a1 = a0;
00073 a2 = a0;
00074 a3 = a0;
00075
00076 t = extwl(l, 4);
00077 if (t != 0) {
00078 t = sextw(t);
00079 a0 += W2 * t;
00080 a1 += W6 * t;
00081 a2 -= W6 * t;
00082 a3 -= W2 * t;
00083 }
00084
00085 t = extwl(r, 0);
00086 if (t != 0) {
00087 t = sextw(t);
00088 a0 += W4 * t;
00089 a1 -= W4 * t;
00090 a2 -= W4 * t;
00091 a3 += W4 * t;
00092 }
00093
00094 t = extwl(r, 4);
00095 if (t != 0) {
00096 t = sextw(t);
00097 a0 += W6 * t;
00098 a1 -= W2 * t;
00099 a2 += W2 * t;
00100 a3 -= W6 * t;
00101 }
00102
00103 t = extwl(l, 2);
00104 if (t != 0) {
00105 t = sextw(t);
00106 b0 = W1 * t;
00107 b1 = W3 * t;
00108 b2 = W5 * t;
00109 b3 = W7 * t;
00110 } else {
00111 b0 = 0;
00112 b1 = 0;
00113 b2 = 0;
00114 b3 = 0;
00115 }
00116
00117 t = extwl(l, 6);
00118 if (t) {
00119 t = sextw(t);
00120 b0 += W3 * t;
00121 b1 -= W7 * t;
00122 b2 -= W1 * t;
00123 b3 -= W5 * t;
00124 }
00125
00126
00127 t = extwl(r, 2);
00128 if (t) {
00129 t = sextw(t);
00130 b0 += W5 * t;
00131 b1 -= W1 * t;
00132 b2 += W7 * t;
00133 b3 += W3 * t;
00134 }
00135
00136 t = extwl(r, 6);
00137 if (t) {
00138 t = sextw(t);
00139 b0 += W7 * t;
00140 b1 -= W5 * t;
00141 b2 += W3 * t;
00142 b3 -= W1 * t;
00143 }
00144
00145 row[0] = (a0 + b0) >> ROW_SHIFT;
00146 row[1] = (a1 + b1) >> ROW_SHIFT;
00147 row[2] = (a2 + b2) >> ROW_SHIFT;
00148 row[3] = (a3 + b3) >> ROW_SHIFT;
00149 row[4] = (a3 - b3) >> ROW_SHIFT;
00150 row[5] = (a2 - b2) >> ROW_SHIFT;
00151 row[6] = (a1 - b1) >> ROW_SHIFT;
00152 row[7] = (a0 - b0) >> ROW_SHIFT;
00153
00154 return 2;
00155 }
00156
00157 static inline void idct_col(DCTELEM *col)
00158 {
00159 int_fast32_t a0, a1, a2, a3, b0, b1, b2, b3;
00160
00161 col[0] += (1 << (COL_SHIFT - 1)) / W4;
00162
00163 a0 = W4 * col[8 * 0];
00164 a1 = W4 * col[8 * 0];
00165 a2 = W4 * col[8 * 0];
00166 a3 = W4 * col[8 * 0];
00167
00168 if (col[8 * 2]) {
00169 a0 += W2 * col[8 * 2];
00170 a1 += W6 * col[8 * 2];
00171 a2 -= W6 * col[8 * 2];
00172 a3 -= W2 * col[8 * 2];
00173 }
00174
00175 if (col[8 * 4]) {
00176 a0 += W4 * col[8 * 4];
00177 a1 -= W4 * col[8 * 4];
00178 a2 -= W4 * col[8 * 4];
00179 a3 += W4 * col[8 * 4];
00180 }
00181
00182 if (col[8 * 6]) {
00183 a0 += W6 * col[8 * 6];
00184 a1 -= W2 * col[8 * 6];
00185 a2 += W2 * col[8 * 6];
00186 a3 -= W6 * col[8 * 6];
00187 }
00188
00189 if (col[8 * 1]) {
00190 b0 = W1 * col[8 * 1];
00191 b1 = W3 * col[8 * 1];
00192 b2 = W5 * col[8 * 1];
00193 b3 = W7 * col[8 * 1];
00194 } else {
00195 b0 = 0;
00196 b1 = 0;
00197 b2 = 0;
00198 b3 = 0;
00199 }
00200
00201 if (col[8 * 3]) {
00202 b0 += W3 * col[8 * 3];
00203 b1 -= W7 * col[8 * 3];
00204 b2 -= W1 * col[8 * 3];
00205 b3 -= W5 * col[8 * 3];
00206 }
00207
00208 if (col[8 * 5]) {
00209 b0 += W5 * col[8 * 5];
00210 b1 -= W1 * col[8 * 5];
00211 b2 += W7 * col[8 * 5];
00212 b3 += W3 * col[8 * 5];
00213 }
00214
00215 if (col[8 * 7]) {
00216 b0 += W7 * col[8 * 7];
00217 b1 -= W5 * col[8 * 7];
00218 b2 += W3 * col[8 * 7];
00219 b3 -= W1 * col[8 * 7];
00220 }
00221
00222 col[8 * 0] = (a0 + b0) >> COL_SHIFT;
00223 col[8 * 7] = (a0 - b0) >> COL_SHIFT;
00224 col[8 * 1] = (a1 + b1) >> COL_SHIFT;
00225 col[8 * 6] = (a1 - b1) >> COL_SHIFT;
00226 col[8 * 2] = (a2 + b2) >> COL_SHIFT;
00227 col[8 * 5] = (a2 - b2) >> COL_SHIFT;
00228 col[8 * 3] = (a3 + b3) >> COL_SHIFT;
00229 col[8 * 4] = (a3 - b3) >> COL_SHIFT;
00230 }
00231
00232
00233
00234 static inline void idct_col2(DCTELEM *col)
00235 {
00236 int i;
00237 uint64_t l, r;
00238
00239 for (i = 0; i < 8; ++i) {
00240 int_fast32_t a0 = col[i] + (1 << (COL_SHIFT - 1)) / W4;
00241
00242 a0 *= W4;
00243 col[i] = a0 >> COL_SHIFT;
00244 }
00245
00246 l = ldq(col + 0 * 4); r = ldq(col + 1 * 4);
00247 stq(l, col + 2 * 4); stq(r, col + 3 * 4);
00248 stq(l, col + 4 * 4); stq(r, col + 5 * 4);
00249 stq(l, col + 6 * 4); stq(r, col + 7 * 4);
00250 stq(l, col + 8 * 4); stq(r, col + 9 * 4);
00251 stq(l, col + 10 * 4); stq(r, col + 11 * 4);
00252 stq(l, col + 12 * 4); stq(r, col + 13 * 4);
00253 stq(l, col + 14 * 4); stq(r, col + 15 * 4);
00254 }
00255
00256 void simple_idct_axp(DCTELEM *block)
00257 {
00258
00259 int i;
00260 int rowsZero = 1;
00261 int rowsConstant = 1;
00262
00263 for (i = 0; i < 8; i++) {
00264 int sparseness = idct_row(block + 8 * i);
00265
00266 if (i > 0 && sparseness > 0)
00267 rowsZero = 0;
00268 if (sparseness == 2)
00269 rowsConstant = 0;
00270 }
00271
00272 if (rowsZero) {
00273 idct_col2(block);
00274 } else if (rowsConstant) {
00275 idct_col(block);
00276 for (i = 0; i < 8; i += 2) {
00277 uint64_t v = (uint16_t) block[0];
00278 uint64_t w = (uint16_t) block[8];
00279
00280 v |= v << 16;
00281 w |= w << 16;
00282 v |= v << 32;
00283 w |= w << 32;
00284 stq(v, block + 0 * 4);
00285 stq(v, block + 1 * 4);
00286 stq(w, block + 2 * 4);
00287 stq(w, block + 3 * 4);
00288 block += 4 * 4;
00289 }
00290 } else {
00291 for (i = 0; i < 8; i++)
00292 idct_col(block + i);
00293 }
00294 }
00295
00296 void simple_idct_put_axp(uint8_t *dest, int line_size, DCTELEM *block)
00297 {
00298 simple_idct_axp(block);
00299 put_pixels_clamped_axp_p(block, dest, line_size);
00300 }
00301
00302 void simple_idct_add_axp(uint8_t *dest, int line_size, DCTELEM *block)
00303 {
00304 simple_idct_axp(block);
00305 add_pixels_clamped_axp_p(block, dest, line_size);
00306 }