00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #include "../dsputil.h"
00022 #define c1 1.38703984532214752434
00023 #define c2 1.30656296487637657577
00024 #define c3 1.17587560241935884520
00025 #define c4 1.00000000000000000000
00026 #define c5 0.78569495838710234903
00027 #define c6 0.54119610014619712324
00028 #define c7 0.27589937928294311353
00029
00030 const static float even_table[] __attribute__ ((aligned(8))) = {
00031 c4, c4, c4, c4,
00032 c2, c6,-c6,-c2,
00033 c4,-c4,-c4, c4,
00034 c6,-c2, c2,-c6
00035 };
00036
00037 const static float odd_table[] __attribute__ ((aligned(8))) = {
00038 c1, c3, c5, c7,
00039 c3,-c7,-c1,-c5,
00040 c5,-c1, c7, c3,
00041 c7,-c5, c3,-c1
00042 };
00043
00044 #undef c1
00045 #undef c2
00046 #undef c3
00047 #undef c4
00048 #undef c5
00049 #undef c6
00050 #undef c7
00051
00052 #if defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__)
00053
00054 #define load_matrix(table) \
00055 __asm__ volatile( \
00056 " fschg\n" \
00057 " fmov @%0+,xd0\n" \
00058 " fmov @%0+,xd2\n" \
00059 " fmov @%0+,xd4\n" \
00060 " fmov @%0+,xd6\n" \
00061 " fmov @%0+,xd8\n" \
00062 " fmov @%0+,xd10\n" \
00063 " fmov @%0+,xd12\n" \
00064 " fmov @%0+,xd14\n" \
00065 " fschg\n" \
00066 :\
00067 : "r"(table)\
00068 : "0" \
00069 )
00070
00071 #define ftrv() \
00072 __asm__ volatile("ftrv xmtrx,fv0" \
00073 : "=f"(fr0),"=f"(fr1),"=f"(fr2),"=f"(fr3) \
00074 : "0"(fr0), "1"(fr1), "2"(fr2), "3"(fr3) );
00075
00076 #define DEFREG \
00077 register float fr0 __asm__("fr0"); \
00078 register float fr1 __asm__("fr1"); \
00079 register float fr2 __asm__("fr2"); \
00080 register float fr3 __asm__("fr3")
00081
00082 #else
00083
00084
00085
00086 static void ftrv_(const float xf[],float fv[])
00087 {
00088 float f0,f1,f2,f3;
00089 f0 = fv[0];
00090 f1 = fv[1];
00091 f2 = fv[2];
00092 f3 = fv[3];
00093 fv[0] = xf[0]*f0 + xf[4]*f1 + xf[ 8]*f2 + xf[12]*f3;
00094 fv[1] = xf[1]*f0 + xf[5]*f1 + xf[ 9]*f2 + xf[13]*f3;
00095 fv[2] = xf[2]*f0 + xf[6]*f1 + xf[10]*f2 + xf[14]*f3;
00096 fv[3] = xf[3]*f0 + xf[7]*f1 + xf[11]*f2 + xf[15]*f3;
00097 }
00098
00099 static void load_matrix_(float xf[],const float table[])
00100 {
00101 int i;
00102 for(i=0;i<16;i++) xf[i]=table[i];
00103 }
00104
00105 #define ftrv() ftrv_(xf,fv)
00106 #define load_matrix(table) load_matrix_(xf,table)
00107
00108 #define DEFREG \
00109 float fv[4],xf[16]
00110
00111 #define fr0 fv[0]
00112 #define fr1 fv[1]
00113 #define fr2 fv[2]
00114 #define fr3 fv[3]
00115
00116 #endif
00117
00118 #if 1
00119 #define DESCALE(x,n) (x)*(1.0f/(1<<(n)))
00120 #else
00121 #define DESCALE(x,n) (((int)(x)+(1<<(n-1)))>>(n))
00122 #endif
00123
00124
00125
00126
00127 #if 1
00128
00129
00130 void idct_sh4(DCTELEM *block)
00131 {
00132 DEFREG;
00133
00134 int i;
00135 float tblock[8*8],*fblock;
00136 int ofs1,ofs2,ofs3;
00137
00138 #if defined(__SH4__)
00139 #error "FIXME!! change to single float"
00140 #endif
00141
00142
00143
00144
00145 load_matrix(even_table);
00146
00147 fblock = tblock+4;
00148 i = 8;
00149 do {
00150 fr0 = block[0];
00151 fr1 = block[2];
00152 fr2 = block[4];
00153 fr3 = block[6];
00154 block+=8;
00155 ftrv();
00156 *--fblock = fr3;
00157 *--fblock = fr2;
00158 *--fblock = fr1;
00159 *--fblock = fr0;
00160 fblock+=8+4;
00161 } while(--i);
00162 block-=8*8;
00163 fblock-=8*8+4;
00164
00165 load_matrix(odd_table);
00166
00167 i = 8;
00168
00169
00170
00171
00172
00173 do {
00174 float t0,t1,t2,t3;
00175 fr0 = block[1];
00176 fr1 = block[3];
00177 fr2 = block[5];
00178 fr3 = block[7];
00179 block+=8;
00180 ftrv();
00181 t0 = *fblock++;
00182 t1 = *fblock++;
00183 t2 = *fblock++;
00184 t3 = *fblock++;
00185 fblock+=4;
00186 *--fblock = t0 - fr0;
00187 *--fblock = t1 - fr1;
00188 *--fblock = t2 - fr2;
00189 *--fblock = t3 - fr3;
00190 *--fblock = t3 + fr3;
00191 *--fblock = t2 + fr2;
00192 *--fblock = t1 + fr1;
00193 *--fblock = t0 + fr0;
00194 fblock+=8;
00195 } while(--i);
00196 block-=8*8;
00197 fblock-=8*8;
00198
00199
00200
00201
00202 load_matrix(even_table);
00203
00204 ofs1 = sizeof(float)*2*8;
00205 ofs2 = sizeof(float)*4*8;
00206 ofs3 = sizeof(float)*6*8;
00207
00208 i = 8;
00209
00210 #define OA(fblock,ofs) *(float*)((char*)fblock + ofs)
00211
00212 do {
00213 fr0 = OA(fblock, 0);
00214 fr1 = OA(fblock,ofs1);
00215 fr2 = OA(fblock,ofs2);
00216 fr3 = OA(fblock,ofs3);
00217 ftrv();
00218 OA(fblock,0 ) = fr0;
00219 OA(fblock,ofs1) = fr1;
00220 OA(fblock,ofs2) = fr2;
00221 OA(fblock,ofs3) = fr3;
00222 fblock++;
00223 } while(--i);
00224 fblock-=8;
00225
00226 load_matrix(odd_table);
00227
00228 i=8;
00229 do {
00230 float t0,t1,t2,t3;
00231 t0 = OA(fblock, 0);
00232 t1 = OA(fblock,ofs1);
00233 t2 = OA(fblock,ofs2);
00234 t3 = OA(fblock,ofs3);
00235 fblock+=8;
00236 fr0 = OA(fblock, 0);
00237 fr1 = OA(fblock,ofs1);
00238 fr2 = OA(fblock,ofs2);
00239 fr3 = OA(fblock,ofs3);
00240 fblock+=-8+1;
00241 ftrv();
00242 block[8*0] = DESCALE(t0 + fr0,3);
00243 block[8*7] = DESCALE(t0 - fr0,3);
00244 block[8*1] = DESCALE(t1 + fr1,3);
00245 block[8*6] = DESCALE(t1 - fr1,3);
00246 block[8*2] = DESCALE(t2 + fr2,3);
00247 block[8*5] = DESCALE(t2 - fr2,3);
00248 block[8*3] = DESCALE(t3 + fr3,3);
00249 block[8*4] = DESCALE(t3 - fr3,3);
00250 block++;
00251 } while(--i);
00252
00253 #if defined(__SH4__)
00254 #error "FIXME!! change to double"
00255 #endif
00256 }
00257 #else
00258 void idct_sh4(DCTELEM *block)
00259 {
00260 DEFREG;
00261
00262 int i;
00263 float tblock[8*8],*fblock;
00264
00265
00266
00267
00268 load_matrix(even_table);
00269
00270 fblock = tblock;
00271 i = 8;
00272 do {
00273 fr0 = block[0];
00274 fr1 = block[2];
00275 fr2 = block[4];
00276 fr3 = block[6];
00277 block+=8;
00278 ftrv();
00279 fblock[0] = fr0;
00280 fblock[2] = fr1;
00281 fblock[4] = fr2;
00282 fblock[6] = fr3;
00283 fblock+=8;
00284 } while(--i);
00285 block-=8*8;
00286 fblock-=8*8;
00287
00288 load_matrix(odd_table);
00289
00290 i = 8;
00291
00292 do {
00293 float t0,t1,t2,t3;
00294 fr0 = block[1];
00295 fr1 = block[3];
00296 fr2 = block[5];
00297 fr3 = block[7];
00298 block+=8;
00299 ftrv();
00300 t0 = fblock[0];
00301 t1 = fblock[2];
00302 t2 = fblock[4];
00303 t3 = fblock[6];
00304 fblock[0] = t0 + fr0;
00305 fblock[7] = t0 - fr0;
00306 fblock[1] = t1 + fr1;
00307 fblock[6] = t1 - fr1;
00308 fblock[2] = t2 + fr2;
00309 fblock[5] = t2 - fr2;
00310 fblock[3] = t3 + fr3;
00311 fblock[4] = t3 - fr3;
00312 fblock+=8;
00313 } while(--i);
00314 block-=8*8;
00315 fblock-=8*8;
00316
00317
00318
00319
00320 load_matrix(even_table);
00321
00322 i = 8;
00323
00324 do {
00325 fr0 = fblock[8*0];
00326 fr1 = fblock[8*2];
00327 fr2 = fblock[8*4];
00328 fr3 = fblock[8*6];
00329 ftrv();
00330 fblock[8*0] = fr0;
00331 fblock[8*2] = fr1;
00332 fblock[8*4] = fr2;
00333 fblock[8*6] = fr3;
00334 fblock++;
00335 } while(--i);
00336 fblock-=8;
00337
00338 load_matrix(odd_table);
00339
00340 i=8;
00341 do {
00342 float t0,t1,t2,t3;
00343 fr0 = fblock[8*1];
00344 fr1 = fblock[8*3];
00345 fr2 = fblock[8*5];
00346 fr3 = fblock[8*7];
00347 ftrv();
00348 t0 = fblock[8*0];
00349 t1 = fblock[8*2];
00350 t2 = fblock[8*4];
00351 t3 = fblock[8*6];
00352 fblock++;
00353 block[8*0] = DESCALE(t0 + fr0,3);
00354 block[8*7] = DESCALE(t0 - fr0,3);
00355 block[8*1] = DESCALE(t1 + fr1,3);
00356 block[8*6] = DESCALE(t1 - fr1,3);
00357 block[8*2] = DESCALE(t2 + fr2,3);
00358 block[8*5] = DESCALE(t2 - fr2,3);
00359 block[8*3] = DESCALE(t3 + fr3,3);
00360 block[8*4] = DESCALE(t3 - fr3,3);
00361 block++;
00362 } while(--i);
00363 }
00364 #endif