00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027 #include "a52.h"
00028 #include "a52_internal.h"
00029 #include "mm_accel.h"
00030
00031 typedef struct complex_s {
00032 sample_t real;
00033 sample_t imag;
00034 } complex_t;
00035
00036 static uint8_t fftorder[] = {
00037 0,128, 64,192, 32,160,224, 96, 16,144, 80,208,240,112, 48,176,
00038 8,136, 72,200, 40,168,232,104,248,120, 56,184, 24,152,216, 88,
00039 4,132, 68,196, 36,164,228,100, 20,148, 84,212,244,116, 52,180,
00040 252,124, 60,188, 28,156,220, 92, 12,140, 76,204,236,108, 44,172,
00041 2,130, 66,194, 34,162,226, 98, 18,146, 82,210,242,114, 50,178,
00042 10,138, 74,202, 42,170,234,106,250,122, 58,186, 26,154,218, 90,
00043 254,126, 62,190, 30,158,222, 94, 14,142, 78,206,238,110, 46,174,
00044 6,134, 70,198, 38,166,230,102,246,118, 54,182, 22,150,214, 86
00045 };
00046
00047
00048 static sample_t roots16[3];
00049 static sample_t roots32[7];
00050 static sample_t roots64[15];
00051 static sample_t roots128[31];
00052
00053
00054 static complex_t pre1[128];
00055 static complex_t post1[64];
00056 static complex_t pre2[64];
00057 static complex_t post2[32];
00058
00059 static sample_t a52_imdct_window[256];
00060
00061 static void (* ifft128) (complex_t * buf);
00062 static void (* ifft64) (complex_t * buf);
00063
00064 static inline void ifft2 (complex_t * buf)
00065 {
00066 sample_t r, i;
00067
00068 r = buf[0].real;
00069 i = buf[0].imag;
00070 buf[0].real += buf[1].real;
00071 buf[0].imag += buf[1].imag;
00072 buf[1].real = r - buf[1].real;
00073 buf[1].imag = i - buf[1].imag;
00074 }
00075
00076 static inline void ifft4 (complex_t * buf)
00077 {
00078 sample_t tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8;
00079
00080 tmp1 = buf[0].real + buf[1].real;
00081 tmp2 = buf[3].real + buf[2].real;
00082 tmp3 = buf[0].imag + buf[1].imag;
00083 tmp4 = buf[2].imag + buf[3].imag;
00084 tmp5 = buf[0].real - buf[1].real;
00085 tmp6 = buf[0].imag - buf[1].imag;
00086 tmp7 = buf[2].imag - buf[3].imag;
00087 tmp8 = buf[3].real - buf[2].real;
00088
00089 buf[0].real = tmp1 + tmp2;
00090 buf[0].imag = tmp3 + tmp4;
00091 buf[2].real = tmp1 - tmp2;
00092 buf[2].imag = tmp3 - tmp4;
00093 buf[1].real = tmp5 + tmp7;
00094 buf[1].imag = tmp6 + tmp8;
00095 buf[3].real = tmp5 - tmp7;
00096 buf[3].imag = tmp6 - tmp8;
00097 }
00098
00099
00100
00101 #define BUTTERFLY_0(t0,t1,W0,W1,d0,d1) do { \
00102 t0 = MUL (W1, d1) + MUL (W0, d0); \
00103 t1 = MUL (W0, d1) - MUL (W1, d0); \
00104 } while (0)
00105
00106
00107
00108 #define BUTTERFLY_B(t0,t1,W0,W1,d0,d1) do { \
00109 t0 = BIAS (MUL (d1, W1) + MUL (d0, W0)); \
00110 t1 = BIAS (MUL (d1, W0) - MUL (d0, W1)); \
00111 } while (0)
00112
00113
00114
00115 #define BUTTERFLY(a0,a1,a2,a3,wr,wi) do { \
00116 BUTTERFLY_0 (tmp5, tmp6, wr, wi, a2.real, a2.imag); \
00117 BUTTERFLY_0 (tmp8, tmp7, wr, wi, a3.imag, a3.real); \
00118 tmp1 = tmp5 + tmp7; \
00119 tmp2 = tmp6 + tmp8; \
00120 tmp3 = tmp6 - tmp8; \
00121 tmp4 = tmp7 - tmp5; \
00122 a2.real = a0.real - tmp1; \
00123 a2.imag = a0.imag - tmp2; \
00124 a3.real = a1.real - tmp3; \
00125 a3.imag = a1.imag - tmp4; \
00126 a0.real += tmp1; \
00127 a0.imag += tmp2; \
00128 a1.real += tmp3; \
00129 a1.imag += tmp4; \
00130 } while (0)
00131
00132
00133
00134 #define BUTTERFLY_ZERO(a0,a1,a2,a3) do { \
00135 tmp1 = a2.real + a3.real; \
00136 tmp2 = a2.imag + a3.imag; \
00137 tmp3 = a2.imag - a3.imag; \
00138 tmp4 = a3.real - a2.real; \
00139 a2.real = a0.real - tmp1; \
00140 a2.imag = a0.imag - tmp2; \
00141 a3.real = a1.real - tmp3; \
00142 a3.imag = a1.imag - tmp4; \
00143 a0.real += tmp1; \
00144 a0.imag += tmp2; \
00145 a1.real += tmp3; \
00146 a1.imag += tmp4; \
00147 } while (0)
00148
00149
00150
00151 #define BUTTERFLY_HALF(a0,a1,a2,a3,w) do { \
00152 tmp5 = MUL (a2.real + a2.imag, w); \
00153 tmp6 = MUL (a2.imag - a2.real, w); \
00154 tmp7 = MUL (a3.real - a3.imag, w); \
00155 tmp8 = MUL (a3.imag + a3.real, w); \
00156 tmp1 = tmp5 + tmp7; \
00157 tmp2 = tmp6 + tmp8; \
00158 tmp3 = tmp6 - tmp8; \
00159 tmp4 = tmp7 - tmp5; \
00160 a2.real = a0.real - tmp1; \
00161 a2.imag = a0.imag - tmp2; \
00162 a3.real = a1.real - tmp3; \
00163 a3.imag = a1.imag - tmp4; \
00164 a0.real += tmp1; \
00165 a0.imag += tmp2; \
00166 a1.real += tmp3; \
00167 a1.imag += tmp4; \
00168 } while (0)
00169
00170 static inline void ifft8 (complex_t * buf)
00171 {
00172 sample_t tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8;
00173
00174 ifft4 (buf);
00175 ifft2 (buf + 4);
00176 ifft2 (buf + 6);
00177 BUTTERFLY_ZERO (buf[0], buf[2], buf[4], buf[6]);
00178 BUTTERFLY_HALF (buf[1], buf[3], buf[5], buf[7], roots16[1]);
00179 }
00180
00181 static void ifft_pass (complex_t * buf, sample_t * weight, int n)
00182 {
00183 complex_t * buf1;
00184 complex_t * buf2;
00185 complex_t * buf3;
00186 sample_t tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8;
00187 int i;
00188
00189 buf++;
00190 buf1 = buf + n;
00191 buf2 = buf + 2 * n;
00192 buf3 = buf + 3 * n;
00193
00194 BUTTERFLY_ZERO (buf[-1], buf1[-1], buf2[-1], buf3[-1]);
00195
00196 i = n - 1;
00197
00198 do {
00199 BUTTERFLY (buf[0], buf1[0], buf2[0], buf3[0],
00200 weight[0], weight[2*i-n]);
00201 buf++;
00202 buf1++;
00203 buf2++;
00204 buf3++;
00205 weight++;
00206 } while (--i);
00207 }
00208
00209 static void ifft16 (complex_t * buf)
00210 {
00211 ifft8 (buf);
00212 ifft4 (buf + 8);
00213 ifft4 (buf + 12);
00214 ifft_pass (buf, roots16, 4);
00215 }
00216
00217 static void ifft32 (complex_t * buf)
00218 {
00219 ifft16 (buf);
00220 ifft8 (buf + 16);
00221 ifft8 (buf + 24);
00222 ifft_pass (buf, roots32, 8);
00223 }
00224
00225 static void ifft64_c (complex_t * buf)
00226 {
00227 ifft32 (buf);
00228 ifft16 (buf + 32);
00229 ifft16 (buf + 48);
00230 ifft_pass (buf, roots64, 16);
00231 }
00232
00233 static void ifft128_c (complex_t * buf)
00234 {
00235 ifft32 (buf);
00236 ifft16 (buf + 32);
00237 ifft16 (buf + 48);
00238 ifft_pass (buf, roots64, 16);
00239
00240 ifft32 (buf + 64);
00241 ifft32 (buf + 96);
00242 ifft_pass (buf, roots128, 32);
00243 }
00244
00245 void a52_imdct_512 (sample_t * data, sample_t * delay, sample_t bias)
00246 {
00247 int i, k;
00248 sample_t t_r, t_i, a_r, a_i, b_r, b_i, w_1, w_2;
00249 const sample_t * window = a52_imdct_window;
00250 complex_t buf[128];
00251
00252 for (i = 0; i < 128; i++) {
00253 k = fftorder[i];
00254 t_r = pre1[i].real;
00255 t_i = pre1[i].imag;
00256 BUTTERFLY_0 (buf[i].real, buf[i].imag, t_r, t_i, data[k], data[255-k]);
00257 }
00258
00259 ifft128 (buf);
00260
00261
00262
00263 for (i = 0; i < 64; i++) {
00264
00265 t_r = post1[i].real;
00266 t_i = post1[i].imag;
00267 BUTTERFLY_0 (a_r, a_i, t_i, t_r, buf[i].imag, buf[i].real);
00268 BUTTERFLY_0 (b_r, b_i, t_r, t_i, buf[127-i].imag, buf[127-i].real);
00269
00270 w_1 = window[2*i];
00271 w_2 = window[255-2*i];
00272 BUTTERFLY_B (data[255-2*i], data[2*i], w_2, w_1, a_r, delay[2*i]);
00273 delay[2*i] = a_i;
00274
00275 w_1 = window[2*i+1];
00276 w_2 = window[254-2*i];
00277 BUTTERFLY_B (data[2*i+1], data[254-2*i], w_1, w_2, b_r, delay[2*i+1]);
00278 delay[2*i+1] = b_i;
00279 }
00280 }
00281
00282 void a52_imdct_256 (sample_t * data, sample_t * delay, sample_t bias)
00283 {
00284 int i, k;
00285 sample_t t_r, t_i, a_r, a_i, b_r, b_i, c_r, c_i, d_r, d_i, w_1, w_2;
00286 const sample_t * window = a52_imdct_window;
00287 complex_t buf1[64], buf2[64];
00288
00289
00290 for (i = 0; i < 64; i++) {
00291 k = fftorder[i];
00292 t_r = pre2[i].real;
00293 t_i = pre2[i].imag;
00294 BUTTERFLY_0 (buf1[i].real, buf1[i].imag, t_r, t_i, data[k], data[254-k]);
00295 BUTTERFLY_0 (buf2[i].real, buf2[i].imag, t_r, t_i, data[k+1], data[255-k]);
00296 }
00297
00298 ifft64 (buf1);
00299 ifft64 (buf2);
00300
00301
00302
00303 for (i = 0; i < 32; i++) {
00304
00305 t_r = post2[i].real;
00306 t_i = post2[i].imag;
00307 BUTTERFLY_0 (a_r, a_i, t_i, t_r, buf1[i].imag, buf1[i].real);
00308 BUTTERFLY_0 (b_r, b_i, t_r, t_i, buf1[63-i].imag, buf1[63-i].real);
00309 BUTTERFLY_0 (c_r, c_i, t_i, t_r, buf2[i].imag, buf2[i].real);
00310 BUTTERFLY_0 (d_r, d_i, t_r, t_i, buf2[63-i].imag, buf2[63-i].real);
00311
00312 w_1 = window[2*i];
00313 w_2 = window[255-2*i];
00314 BUTTERFLY_B (data[255-2*i], data[2*i], w_2, w_1, a_r, delay[2*i]);
00315 delay[2*i] = c_i;
00316
00317 w_1 = window[128+2*i];
00318 w_2 = window[127-2*i];
00319 BUTTERFLY_B (data[128+2*i], data[127-2*i], w_1, w_2, a_i, delay[127-2*i]);
00320 delay[127-2*i] = c_r;
00321
00322 w_1 = window[2*i+1];
00323 w_2 = window[254-2*i];
00324 BUTTERFLY_B (data[254-2*i], data[2*i+1], w_2, w_1, b_i, delay[2*i+1]);
00325 delay[2*i+1] = d_r;
00326
00327 w_1 = window[129+2*i];
00328 w_2 = window[126-2*i];
00329 BUTTERFLY_B (data[129+2*i], data[126-2*i], w_1, w_2, b_r, delay[126-2*i]);
00330 delay[126-2*i] = d_i;
00331 }
00332 }
00333
00334 static double besselI0 (double x)
00335 {
00336 double bessel = 1;
00337 int i = 100;
00338
00339 do
00340 bessel = bessel * x / (i * i) + 1;
00341 while (--i);
00342 return bessel;
00343 }
00344
00345 void a52_imdct_init (uint32_t mm_accel)
00346 {
00347 int i, k;
00348 double sum;
00349 double local_imdct_window[256];
00350
00351
00352 sum = 0;
00353 for (i = 0; i < 256; i++) {
00354 sum += besselI0 (i * (256 - i) * (5 * M_PI / 256) * (5 * M_PI / 256));
00355 local_imdct_window[i] = sum;
00356 }
00357 sum++;
00358 for (i = 0; i < 256; i++)
00359 a52_imdct_window[i] = SAMPLE (sqrt (local_imdct_window[i] / sum));
00360
00361 for (i = 0; i < 3; i++)
00362 roots16[i] = SAMPLE (cos ((M_PI / 8) * (i + 1)));
00363
00364 for (i = 0; i < 7; i++)
00365 roots32[i] = SAMPLE (cos ((M_PI / 16) * (i + 1)));
00366
00367 for (i = 0; i < 15; i++)
00368 roots64[i] = SAMPLE (cos ((M_PI / 32) * (i + 1)));
00369
00370 for (i = 0; i < 31; i++)
00371 roots128[i] = SAMPLE (cos ((M_PI / 64) * (i + 1)));
00372
00373 for (i = 0; i < 64; i++) {
00374 k = fftorder[i] / 2 + 64;
00375 pre1[i].real = SAMPLE (cos ((M_PI / 256) * (k - 0.25)));
00376 pre1[i].imag = SAMPLE (sin ((M_PI / 256) * (k - 0.25)));
00377 }
00378
00379 for (i = 64; i < 128; i++) {
00380 k = fftorder[i] / 2 + 64;
00381 pre1[i].real = SAMPLE (-cos ((M_PI / 256) * (k - 0.25)));
00382 pre1[i].imag = SAMPLE (-sin ((M_PI / 256) * (k - 0.25)));
00383 }
00384
00385 for (i = 0; i < 64; i++) {
00386 post1[i].real = SAMPLE (cos ((M_PI / 256) * (i + 0.5)));
00387 post1[i].imag = SAMPLE (sin ((M_PI / 256) * (i + 0.5)));
00388 }
00389
00390 for (i = 0; i < 64; i++) {
00391 k = fftorder[i] / 4;
00392 pre2[i].real = SAMPLE (cos ((M_PI / 128) * (k - 0.25)));
00393 pre2[i].imag = SAMPLE (sin ((M_PI / 128) * (k - 0.25)));
00394 }
00395
00396 for (i = 0; i < 32; i++) {
00397 post2[i].real = SAMPLE (cos ((M_PI / 128) * (i + 0.5)));
00398 post2[i].imag = SAMPLE (sin ((M_PI / 128) * (i + 0.5)));
00399 }
00400
00401 #ifdef LIBA52_DJBFFT
00402 if (mm_accel & MM_ACCEL_DJBFFT) {
00403 ifft128 = (void (*) (complex_t *)) fftc4_un128;
00404 ifft64 = (void (*) (complex_t *)) fftc4_un64;
00405 } else
00406 #endif
00407 {
00408 ifft128 = ifft128_c;
00409 ifft64 = ifft64_c;
00410 }
00411 }