• Main Page
  • Related Pages
  • Modules
  • Data Structures
  • Files

hvirtual/quicktime/ffmpeg/libavcodec/mpegaudiodec.c

Go to the documentation of this file.
00001 /*
00002  * MPEG Audio decoder
00003  * Copyright (c) 2001, 2002 Fabrice Bellard.
00004  *
00005  * This library is free software; you can redistribute it and/or
00006  * modify it under the terms of the GNU Lesser General Public
00007  * License as published by the Free Software Foundation; either
00008  * version 2 of the License, or (at your option) any later version.
00009  *
00010  * This library is distributed in the hope that it will be useful,
00011  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00012  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00013  * Lesser General Public License for more details.
00014  *
00015  * You should have received a copy of the GNU Lesser General Public
00016  * License along with this library; if not, write to the Free Software
00017  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00018  */
00019 
00025 //#define DEBUG
00026 #include "avcodec.h"
00027 #include "bitstream.h"
00028 #include "mpegaudio.h"
00029 #include "dsputil.h"
00030 
00031 /*
00032  * TODO:
00033  *  - in low precision mode, use more 16 bit multiplies in synth filter
00034  *  - test lsf / mpeg25 extensively.
00035  */
00036 
00037 /* define USE_HIGHPRECISION to have a bit exact (but slower) mpeg
00038    audio decoder */
00039 #ifdef CONFIG_MPEGAUDIO_HP
00040 #define USE_HIGHPRECISION
00041 #endif
00042 
00043 #ifdef USE_HIGHPRECISION
00044 #define FRAC_BITS   23   /* fractional bits for sb_samples and dct */
00045 #define WFRAC_BITS  16   /* fractional bits for window */
00046 #else
00047 #define FRAC_BITS   15   /* fractional bits for sb_samples and dct */
00048 #define WFRAC_BITS  14   /* fractional bits for window */
00049 #endif
00050 
00051 #if defined(USE_HIGHPRECISION) && defined(CONFIG_AUDIO_NONSHORT)
00052 typedef int32_t OUT_INT;
00053 #define OUT_MAX INT32_MAX
00054 #define OUT_MIN INT32_MIN
00055 #define OUT_SHIFT (WFRAC_BITS + FRAC_BITS - 31)
00056 #else
00057 typedef int16_t OUT_INT;
00058 #define OUT_MAX INT16_MAX
00059 #define OUT_MIN INT16_MIN
00060 #define OUT_SHIFT (WFRAC_BITS + FRAC_BITS - 15)
00061 #endif
00062 
00063 #define FRAC_ONE    (1 << FRAC_BITS)
00064 
00065 #define MULL(a,b) (((int64_t)(a) * (int64_t)(b)) >> FRAC_BITS)
00066 #define MUL64(a,b) ((int64_t)(a) * (int64_t)(b))
00067 #define FIX(a)   ((int)((a) * FRAC_ONE))
00068 /* WARNING: only correct for posititive numbers */
00069 #define FIXR(a)   ((int)((a) * FRAC_ONE + 0.5))
00070 #define FRAC_RND(a) (((a) + (FRAC_ONE/2)) >> FRAC_BITS)
00071 
00072 #define FIXHR(a) ((int)((a) * (1LL<<32) + 0.5))
00073 //#define MULH(a,b) (((int64_t)(a) * (int64_t)(b))>>32) //gcc 3.4 creates an incredibly bloated mess out of this
00074 static always_inline int MULH(int a, int b){
00075     return ((int64_t)(a) * (int64_t)(b))>>32;
00076 }
00077 
00078 #if FRAC_BITS <= 15
00079 typedef int16_t MPA_INT;
00080 #else
00081 typedef int32_t MPA_INT;
00082 #endif
00083 
00084 /****************/
00085 
00086 #define HEADER_SIZE 4
00087 #define BACKSTEP_SIZE 512
00088 
00089 struct GranuleDef;
00090 
00091 typedef struct MPADecodeContext {
00092     uint8_t inbuf1[2][MPA_MAX_CODED_FRAME_SIZE + BACKSTEP_SIZE];        /* input buffer */
00093     int inbuf_index;
00094     uint8_t *inbuf_ptr, *inbuf;
00095     int frame_size;
00096     int free_format_frame_size; /* frame size in case of free format
00097                                    (zero if currently unknown) */
00098     /* next header (used in free format parsing) */
00099     uint32_t free_format_next_header; 
00100     int error_protection;
00101     int layer;
00102     int sample_rate;
00103     int sample_rate_index; /* between 0 and 8 */
00104     int bit_rate;
00105     int old_frame_size;
00106     GetBitContext gb;
00107     int nb_channels;
00108     int mode;
00109     int mode_ext;
00110     int lsf;
00111     MPA_INT synth_buf[MPA_MAX_CHANNELS][512 * 2] __attribute__((aligned(16)));
00112     int synth_buf_offset[MPA_MAX_CHANNELS];
00113     int32_t sb_samples[MPA_MAX_CHANNELS][36][SBLIMIT] __attribute__((aligned(16)));
00114     int32_t mdct_buf[MPA_MAX_CHANNELS][SBLIMIT * 18]; /* previous samples, for layer 3 MDCT */
00115 #ifdef DEBUG
00116     int frame_count;
00117 #endif
00118     void (*compute_antialias)(struct MPADecodeContext *s, struct GranuleDef *g);
00119     int adu_mode; 
00120     unsigned int dither_state;
00121 } MPADecodeContext;
00122 
00126 typedef struct MP3On4DecodeContext {
00127     int frames;   
00128     int chan_cfg; 
00129     MPADecodeContext *mp3decctx[5]; 
00130 } MP3On4DecodeContext;
00131 
00132 /* layer 3 "granule" */
00133 typedef struct GranuleDef {
00134     uint8_t scfsi;
00135     int part2_3_length;
00136     int big_values;
00137     int global_gain;
00138     int scalefac_compress;
00139     uint8_t block_type;
00140     uint8_t switch_point;
00141     int table_select[3];
00142     int subblock_gain[3];
00143     uint8_t scalefac_scale;
00144     uint8_t count1table_select;
00145     int region_size[3]; /* number of huffman codes in each region */
00146     int preflag;
00147     int short_start, long_end; /* long/short band indexes */
00148     uint8_t scale_factors[40];
00149     int32_t sb_hybrid[SBLIMIT * 18]; /* 576 samples */
00150 } GranuleDef;
00151 
00152 #define MODE_EXT_MS_STEREO 2
00153 #define MODE_EXT_I_STEREO  1
00154 
00155 /* layer 3 huffman tables */
00156 typedef struct HuffTable {
00157     int xsize;
00158     const uint8_t *bits;
00159     const uint16_t *codes;
00160 } HuffTable;
00161 
00162 #include "mpegaudiodectab.h"
00163 
00164 static void compute_antialias_integer(MPADecodeContext *s, GranuleDef *g);
00165 static void compute_antialias_float(MPADecodeContext *s, GranuleDef *g);
00166 
00167 /* vlc structure for decoding layer 3 huffman tables */
00168 static VLC huff_vlc[16]; 
00169 static uint8_t *huff_code_table[16];
00170 static VLC huff_quad_vlc[2];
00171 /* computed from band_size_long */
00172 static uint16_t band_index_long[9][23];
00173 /* XXX: free when all decoders are closed */
00174 #define TABLE_4_3_SIZE (8191 + 16)*4
00175 static int8_t  *table_4_3_exp;
00176 static uint32_t *table_4_3_value;
00177 /* intensity stereo coef table */
00178 static int32_t is_table[2][16];
00179 static int32_t is_table_lsf[2][2][16];
00180 static int32_t csa_table[8][4];
00181 static float csa_table_float[8][4];
00182 static int32_t mdct_win[8][36];
00183 
00184 /* lower 2 bits: modulo 3, higher bits: shift */
00185 static uint16_t scale_factor_modshift[64];
00186 /* [i][j]:  2^(-j/3) * FRAC_ONE * 2^(i+2) / (2^(i+2) - 1) */
00187 static int32_t scale_factor_mult[15][3];
00188 /* mult table for layer 2 group quantization */
00189 
00190 #define SCALE_GEN(v) \
00191 { FIXR(1.0 * (v)), FIXR(0.7937005259 * (v)), FIXR(0.6299605249 * (v)) }
00192 
00193 static const int32_t scale_factor_mult2[3][3] = {
00194     SCALE_GEN(4.0 / 3.0), /* 3 steps */
00195     SCALE_GEN(4.0 / 5.0), /* 5 steps */
00196     SCALE_GEN(4.0 / 9.0), /* 9 steps */
00197 };
00198 
00199 void ff_mpa_synth_init(MPA_INT *window);
00200 static MPA_INT window[512] __attribute__((aligned(16)));
00201     
00202 /* layer 1 unscaling */
00203 /* n = number of bits of the mantissa minus 1 */
00204 static inline int l1_unscale(int n, int mant, int scale_factor)
00205 {
00206     int shift, mod;
00207     int64_t val;
00208 
00209     shift = scale_factor_modshift[scale_factor];
00210     mod = shift & 3;
00211     shift >>= 2;
00212     val = MUL64(mant + (-1 << n) + 1, scale_factor_mult[n-1][mod]);
00213     shift += n;
00214     /* NOTE: at this point, 1 <= shift >= 21 + 15 */
00215     return (int)((val + (1LL << (shift - 1))) >> shift);
00216 }
00217 
00218 static inline int l2_unscale_group(int steps, int mant, int scale_factor)
00219 {
00220     int shift, mod, val;
00221 
00222     shift = scale_factor_modshift[scale_factor];
00223     mod = shift & 3;
00224     shift >>= 2;
00225 
00226     val = (mant - (steps >> 1)) * scale_factor_mult2[steps >> 2][mod];
00227     /* NOTE: at this point, 0 <= shift <= 21 */
00228     if (shift > 0)
00229         val = (val + (1 << (shift - 1))) >> shift;
00230     return val;
00231 }
00232 
00233 /* compute value^(4/3) * 2^(exponent/4). It normalized to FRAC_BITS */
00234 static inline int l3_unscale(int value, int exponent)
00235 {
00236     unsigned int m;
00237     int e;
00238 
00239     e = table_4_3_exp  [4*value + (exponent&3)];
00240     m = table_4_3_value[4*value + (exponent&3)];
00241     e -= (exponent >> 2);
00242     assert(e>=1);
00243     if (e > 31)
00244         return 0;
00245     m = (m + (1 << (e-1))) >> e;
00246 
00247     return m;
00248 }
00249 
00250 /* all integer n^(4/3) computation code */
00251 #define DEV_ORDER 13
00252 
00253 #define POW_FRAC_BITS 24
00254 #define POW_FRAC_ONE    (1 << POW_FRAC_BITS)
00255 #define POW_FIX(a)   ((int)((a) * POW_FRAC_ONE))
00256 #define POW_MULL(a,b) (((int64_t)(a) * (int64_t)(b)) >> POW_FRAC_BITS)
00257 
00258 static int dev_4_3_coefs[DEV_ORDER];
00259 
00260 #if 0 /* unused */
00261 static int pow_mult3[3] = {
00262     POW_FIX(1.0),
00263     POW_FIX(1.25992104989487316476),
00264     POW_FIX(1.58740105196819947474),
00265 };
00266 #endif
00267 
00268 static void int_pow_init(void)
00269 {
00270     int i, a;
00271 
00272     a = POW_FIX(1.0);
00273     for(i=0;i<DEV_ORDER;i++) {
00274         a = POW_MULL(a, POW_FIX(4.0 / 3.0) - i * POW_FIX(1.0)) / (i + 1);
00275         dev_4_3_coefs[i] = a;
00276     }
00277 }
00278 
00279 #if 0 /* unused, remove? */
00280 /* return the mantissa and the binary exponent */
00281 static int int_pow(int i, int *exp_ptr)
00282 {
00283     int e, er, eq, j;
00284     int a, a1;
00285     
00286     /* renormalize */
00287     a = i;
00288     e = POW_FRAC_BITS;
00289     while (a < (1 << (POW_FRAC_BITS - 1))) {
00290         a = a << 1;
00291         e--;
00292     }
00293     a -= (1 << POW_FRAC_BITS);
00294     a1 = 0;
00295     for(j = DEV_ORDER - 1; j >= 0; j--)
00296         a1 = POW_MULL(a, dev_4_3_coefs[j] + a1);
00297     a = (1 << POW_FRAC_BITS) + a1;
00298     /* exponent compute (exact) */
00299     e = e * 4;
00300     er = e % 3;
00301     eq = e / 3;
00302     a = POW_MULL(a, pow_mult3[er]);
00303     while (a >= 2 * POW_FRAC_ONE) {
00304         a = a >> 1;
00305         eq++;
00306     }
00307     /* convert to float */
00308     while (a < POW_FRAC_ONE) {
00309         a = a << 1;
00310         eq--;
00311     }
00312     /* now POW_FRAC_ONE <= a < 2 * POW_FRAC_ONE */
00313 #if POW_FRAC_BITS > FRAC_BITS
00314     a = (a + (1 << (POW_FRAC_BITS - FRAC_BITS - 1))) >> (POW_FRAC_BITS - FRAC_BITS);
00315     /* correct overflow */
00316     if (a >= 2 * (1 << FRAC_BITS)) {
00317         a = a >> 1;
00318         eq++;
00319     }
00320 #endif
00321     *exp_ptr = eq;
00322     return a;
00323 }
00324 #endif
00325 
00326 static int decode_init(AVCodecContext * avctx)
00327 {
00328     MPADecodeContext *s = avctx->priv_data;
00329     static int init=0;
00330     int i, j, k;
00331 
00332 #if defined(USE_HIGHPRECISION) && defined(CONFIG_AUDIO_NONSHORT)
00333     avctx->sample_fmt= SAMPLE_FMT_S32;
00334 #else
00335     avctx->sample_fmt= SAMPLE_FMT_S16;
00336 #endif    
00337     
00338     if(avctx->antialias_algo != FF_AA_FLOAT)
00339         s->compute_antialias= compute_antialias_integer;
00340     else
00341         s->compute_antialias= compute_antialias_float;
00342 
00343     if (!init && !avctx->parse_only) {
00344         /* scale factors table for layer 1/2 */
00345         for(i=0;i<64;i++) {
00346             int shift, mod;
00347             /* 1.0 (i = 3) is normalized to 2 ^ FRAC_BITS */
00348             shift = (i / 3);
00349             mod = i % 3;
00350             scale_factor_modshift[i] = mod | (shift << 2);
00351         }
00352 
00353         /* scale factor multiply for layer 1 */
00354         for(i=0;i<15;i++) {
00355             int n, norm;
00356             n = i + 2;
00357             norm = ((int64_t_C(1) << n) * FRAC_ONE) / ((1 << n) - 1);
00358             scale_factor_mult[i][0] = MULL(FIXR(1.0 * 2.0), norm);
00359             scale_factor_mult[i][1] = MULL(FIXR(0.7937005259 * 2.0), norm);
00360             scale_factor_mult[i][2] = MULL(FIXR(0.6299605249 * 2.0), norm);
00361             dprintf("%d: norm=%x s=%x %x %x\n",
00362                     i, norm, 
00363                     scale_factor_mult[i][0],
00364                     scale_factor_mult[i][1],
00365                     scale_factor_mult[i][2]);
00366         }
00367         
00368         ff_mpa_synth_init(window);
00369         
00370         /* huffman decode tables */
00371         huff_code_table[0] = NULL;
00372         for(i=1;i<16;i++) {
00373             const HuffTable *h = &mpa_huff_tables[i];
00374             int xsize, x, y;
00375             unsigned int n;
00376             uint8_t *code_table;
00377 
00378             xsize = h->xsize;
00379             n = xsize * xsize;
00380             /* XXX: fail test */
00381             init_vlc(&huff_vlc[i], 8, n, 
00382                      h->bits, 1, 1, h->codes, 2, 2, 1);
00383             
00384             code_table = av_mallocz(n);
00385             j = 0;
00386             for(x=0;x<xsize;x++) {
00387                 for(y=0;y<xsize;y++)
00388                     code_table[j++] = (x << 4) | y;
00389             }
00390             huff_code_table[i] = code_table;
00391         }
00392         for(i=0;i<2;i++) {
00393             init_vlc(&huff_quad_vlc[i], i == 0 ? 7 : 4, 16, 
00394                      mpa_quad_bits[i], 1, 1, mpa_quad_codes[i], 1, 1, 1);
00395         }
00396 
00397         for(i=0;i<9;i++) {
00398             k = 0;
00399             for(j=0;j<22;j++) {
00400                 band_index_long[i][j] = k;
00401                 k += band_size_long[i][j];
00402             }
00403             band_index_long[i][22] = k;
00404         }
00405 
00406         /* compute n ^ (4/3) and store it in mantissa/exp format */
00407         table_4_3_exp= av_mallocz_static(TABLE_4_3_SIZE * sizeof(table_4_3_exp[0]));
00408         if(!table_4_3_exp)
00409             return -1;
00410         table_4_3_value= av_mallocz_static(TABLE_4_3_SIZE * sizeof(table_4_3_value[0]));
00411         if(!table_4_3_value)
00412             return -1;
00413         
00414         int_pow_init();
00415         for(i=1;i<TABLE_4_3_SIZE;i++) {
00416             double f, fm;
00417             int e, m;
00418             f = pow((double)(i/4), 4.0 / 3.0) * pow(2, (i&3)*0.25);
00419             fm = frexp(f, &e);
00420             m = (uint32_t)(fm*(1LL<<31) + 0.5);
00421             e+= FRAC_BITS - 31 + 5;
00422 
00423             /* normalized to FRAC_BITS */
00424             table_4_3_value[i] = m;
00425 //            av_log(NULL, AV_LOG_DEBUG, "%d %d %f\n", i, m, pow((double)i, 4.0 / 3.0));
00426             table_4_3_exp[i] = -e;
00427         }
00428         
00429         for(i=0;i<7;i++) {
00430             float f;
00431             int v;
00432             if (i != 6) {
00433                 f = tan((double)i * M_PI / 12.0);
00434                 v = FIXR(f / (1.0 + f));
00435             } else {
00436                 v = FIXR(1.0);
00437             }
00438             is_table[0][i] = v;
00439             is_table[1][6 - i] = v;
00440         }
00441         /* invalid values */
00442         for(i=7;i<16;i++)
00443             is_table[0][i] = is_table[1][i] = 0.0;
00444 
00445         for(i=0;i<16;i++) {
00446             double f;
00447             int e, k;
00448 
00449             for(j=0;j<2;j++) {
00450                 e = -(j + 1) * ((i + 1) >> 1);
00451                 f = pow(2.0, e / 4.0);
00452                 k = i & 1;
00453                 is_table_lsf[j][k ^ 1][i] = FIXR(f);
00454                 is_table_lsf[j][k][i] = FIXR(1.0);
00455                 dprintf("is_table_lsf %d %d: %x %x\n", 
00456                         i, j, is_table_lsf[j][0][i], is_table_lsf[j][1][i]);
00457             }
00458         }
00459 
00460         for(i=0;i<8;i++) {
00461             float ci, cs, ca;
00462             ci = ci_table[i];
00463             cs = 1.0 / sqrt(1.0 + ci * ci);
00464             ca = cs * ci;
00465             csa_table[i][0] = FIXHR(cs/4);
00466             csa_table[i][1] = FIXHR(ca/4);
00467             csa_table[i][2] = FIXHR(ca/4) + FIXHR(cs/4);
00468             csa_table[i][3] = FIXHR(ca/4) - FIXHR(cs/4); 
00469             csa_table_float[i][0] = cs;
00470             csa_table_float[i][1] = ca;
00471             csa_table_float[i][2] = ca + cs;
00472             csa_table_float[i][3] = ca - cs; 
00473 //            printf("%d %d %d %d\n", FIX(cs), FIX(cs-1), FIX(ca), FIX(cs)-FIX(ca));
00474 //            av_log(NULL, AV_LOG_DEBUG,"%f %f %f %f\n", cs, ca, ca+cs, ca-cs);
00475         }
00476 
00477         /* compute mdct windows */
00478         for(i=0;i<36;i++) {
00479             for(j=0; j<4; j++){
00480                 double d;
00481                 
00482                 if(j==2 && i%3 != 1)
00483                     continue;
00484                 
00485                 d= sin(M_PI * (i + 0.5) / 36.0);
00486                 if(j==1){
00487                     if     (i>=30) d= 0;
00488                     else if(i>=24) d= sin(M_PI * (i - 18 + 0.5) / 12.0);
00489                     else if(i>=18) d= 1;
00490                 }else if(j==3){
00491                     if     (i<  6) d= 0;
00492                     else if(i< 12) d= sin(M_PI * (i -  6 + 0.5) / 12.0);
00493                     else if(i< 18) d= 1;
00494                 }
00495                 //merge last stage of imdct into the window coefficients
00496                 d*= 0.5 / cos(M_PI*(2*i + 19)/72);
00497 
00498                 if(j==2)
00499                     mdct_win[j][i/3] = FIXHR((d / (1<<5)));
00500                 else
00501                     mdct_win[j][i  ] = FIXHR((d / (1<<5)));
00502 //                av_log(NULL, AV_LOG_DEBUG, "%2d %d %f\n", i,j,d / (1<<5));
00503             }
00504         }
00505 
00506         /* NOTE: we do frequency inversion adter the MDCT by changing
00507            the sign of the right window coefs */
00508         for(j=0;j<4;j++) {
00509             for(i=0;i<36;i+=2) {
00510                 mdct_win[j + 4][i] = mdct_win[j][i];
00511                 mdct_win[j + 4][i + 1] = -mdct_win[j][i + 1];
00512             }
00513         }
00514 
00515 #if defined(DEBUG)
00516         for(j=0;j<8;j++) {
00517             printf("win%d=\n", j);
00518             for(i=0;i<36;i++)
00519                 printf("%f, ", (double)mdct_win[j][i] / FRAC_ONE);
00520             printf("\n");
00521         }
00522 #endif
00523         init = 1;
00524     }
00525 
00526     s->inbuf_index = 0;
00527     s->inbuf = &s->inbuf1[s->inbuf_index][BACKSTEP_SIZE];
00528     s->inbuf_ptr = s->inbuf;
00529 #ifdef DEBUG
00530     s->frame_count = 0;
00531 #endif
00532     if (avctx->codec_id == CODEC_ID_MP3ADU)
00533         s->adu_mode = 1;
00534     return 0;
00535 }
00536 
00537 /* tab[i][j] = 1.0 / (2.0 * cos(pi*(2*k+1) / 2^(6 - j))) */
00538 
00539 /* cos(i*pi/64) */
00540 
00541 #define COS0_0  FIXR(0.50060299823519630134)
00542 #define COS0_1  FIXR(0.50547095989754365998)
00543 #define COS0_2  FIXR(0.51544730992262454697)
00544 #define COS0_3  FIXR(0.53104259108978417447)
00545 #define COS0_4  FIXR(0.55310389603444452782)
00546 #define COS0_5  FIXR(0.58293496820613387367)
00547 #define COS0_6  FIXR(0.62250412303566481615)
00548 #define COS0_7  FIXR(0.67480834145500574602)
00549 #define COS0_8  FIXR(0.74453627100229844977)
00550 #define COS0_9  FIXR(0.83934964541552703873)
00551 #define COS0_10 FIXR(0.97256823786196069369)
00552 #define COS0_11 FIXR(1.16943993343288495515)
00553 #define COS0_12 FIXR(1.48416461631416627724)
00554 #define COS0_13 FIXR(2.05778100995341155085)
00555 #define COS0_14 FIXR(3.40760841846871878570)
00556 #define COS0_15 FIXR(10.19000812354805681150)
00557 
00558 #define COS1_0 FIXR(0.50241928618815570551)
00559 #define COS1_1 FIXR(0.52249861493968888062)
00560 #define COS1_2 FIXR(0.56694403481635770368)
00561 #define COS1_3 FIXR(0.64682178335999012954)
00562 #define COS1_4 FIXR(0.78815462345125022473)
00563 #define COS1_5 FIXR(1.06067768599034747134)
00564 #define COS1_6 FIXR(1.72244709823833392782)
00565 #define COS1_7 FIXR(5.10114861868916385802)
00566 
00567 #define COS2_0 FIXR(0.50979557910415916894)
00568 #define COS2_1 FIXR(0.60134488693504528054)
00569 #define COS2_2 FIXR(0.89997622313641570463)
00570 #define COS2_3 FIXR(2.56291544774150617881)
00571 
00572 #define COS3_0 FIXR(0.54119610014619698439)
00573 #define COS3_1 FIXR(1.30656296487637652785)
00574 
00575 #define COS4_0 FIXR(0.70710678118654752439)
00576 
00577 /* butterfly operator */
00578 #define BF(a, b, c)\
00579 {\
00580     tmp0 = tab[a] + tab[b];\
00581     tmp1 = tab[a] - tab[b];\
00582     tab[a] = tmp0;\
00583     tab[b] = MULL(tmp1, c);\
00584 }
00585 
00586 #define BF1(a, b, c, d)\
00587 {\
00588     BF(a, b, COS4_0);\
00589     BF(c, d, -COS4_0);\
00590     tab[c] += tab[d];\
00591 }
00592 
00593 #define BF2(a, b, c, d)\
00594 {\
00595     BF(a, b, COS4_0);\
00596     BF(c, d, -COS4_0);\
00597     tab[c] += tab[d];\
00598     tab[a] += tab[c];\
00599     tab[c] += tab[b];\
00600     tab[b] += tab[d];\
00601 }
00602 
00603 #define ADD(a, b) tab[a] += tab[b]
00604 
00605 /* DCT32 without 1/sqrt(2) coef zero scaling. */
00606 static void dct32(int32_t *out, int32_t *tab)
00607 {
00608     int tmp0, tmp1;
00609 
00610     /* pass 1 */
00611     BF(0, 31, COS0_0);
00612     BF(1, 30, COS0_1);
00613     BF(2, 29, COS0_2);
00614     BF(3, 28, COS0_3);
00615     BF(4, 27, COS0_4);
00616     BF(5, 26, COS0_5);
00617     BF(6, 25, COS0_6);
00618     BF(7, 24, COS0_7);
00619     BF(8, 23, COS0_8);
00620     BF(9, 22, COS0_9);
00621     BF(10, 21, COS0_10);
00622     BF(11, 20, COS0_11);
00623     BF(12, 19, COS0_12);
00624     BF(13, 18, COS0_13);
00625     BF(14, 17, COS0_14);
00626     BF(15, 16, COS0_15);
00627 
00628     /* pass 2 */
00629     BF(0, 15, COS1_0);
00630     BF(1, 14, COS1_1);
00631     BF(2, 13, COS1_2);
00632     BF(3, 12, COS1_3);
00633     BF(4, 11, COS1_4);
00634     BF(5, 10, COS1_5);
00635     BF(6,  9, COS1_6);
00636     BF(7,  8, COS1_7);
00637     
00638     BF(16, 31, -COS1_0);
00639     BF(17, 30, -COS1_1);
00640     BF(18, 29, -COS1_2);
00641     BF(19, 28, -COS1_3);
00642     BF(20, 27, -COS1_4);
00643     BF(21, 26, -COS1_5);
00644     BF(22, 25, -COS1_6);
00645     BF(23, 24, -COS1_7);
00646     
00647     /* pass 3 */
00648     BF(0, 7, COS2_0);
00649     BF(1, 6, COS2_1);
00650     BF(2, 5, COS2_2);
00651     BF(3, 4, COS2_3);
00652     
00653     BF(8, 15, -COS2_0);
00654     BF(9, 14, -COS2_1);
00655     BF(10, 13, -COS2_2);
00656     BF(11, 12, -COS2_3);
00657     
00658     BF(16, 23, COS2_0);
00659     BF(17, 22, COS2_1);
00660     BF(18, 21, COS2_2);
00661     BF(19, 20, COS2_3);
00662     
00663     BF(24, 31, -COS2_0);
00664     BF(25, 30, -COS2_1);
00665     BF(26, 29, -COS2_2);
00666     BF(27, 28, -COS2_3);
00667 
00668     /* pass 4 */
00669     BF(0, 3, COS3_0);
00670     BF(1, 2, COS3_1);
00671     
00672     BF(4, 7, -COS3_0);
00673     BF(5, 6, -COS3_1);
00674     
00675     BF(8, 11, COS3_0);
00676     BF(9, 10, COS3_1);
00677     
00678     BF(12, 15, -COS3_0);
00679     BF(13, 14, -COS3_1);
00680     
00681     BF(16, 19, COS3_0);
00682     BF(17, 18, COS3_1);
00683     
00684     BF(20, 23, -COS3_0);
00685     BF(21, 22, -COS3_1);
00686     
00687     BF(24, 27, COS3_0);
00688     BF(25, 26, COS3_1);
00689     
00690     BF(28, 31, -COS3_0);
00691     BF(29, 30, -COS3_1);
00692     
00693     /* pass 5 */
00694     BF1(0, 1, 2, 3);
00695     BF2(4, 5, 6, 7);
00696     BF1(8, 9, 10, 11);
00697     BF2(12, 13, 14, 15);
00698     BF1(16, 17, 18, 19);
00699     BF2(20, 21, 22, 23);
00700     BF1(24, 25, 26, 27);
00701     BF2(28, 29, 30, 31);
00702     
00703     /* pass 6 */
00704     
00705     ADD( 8, 12);
00706     ADD(12, 10);
00707     ADD(10, 14);
00708     ADD(14,  9);
00709     ADD( 9, 13);
00710     ADD(13, 11);
00711     ADD(11, 15);
00712 
00713     out[ 0] = tab[0];
00714     out[16] = tab[1];
00715     out[ 8] = tab[2];
00716     out[24] = tab[3];
00717     out[ 4] = tab[4];
00718     out[20] = tab[5];
00719     out[12] = tab[6];
00720     out[28] = tab[7];
00721     out[ 2] = tab[8];
00722     out[18] = tab[9];
00723     out[10] = tab[10];
00724     out[26] = tab[11];
00725     out[ 6] = tab[12];
00726     out[22] = tab[13];
00727     out[14] = tab[14];
00728     out[30] = tab[15];
00729     
00730     ADD(24, 28);
00731     ADD(28, 26);
00732     ADD(26, 30);
00733     ADD(30, 25);
00734     ADD(25, 29);
00735     ADD(29, 27);
00736     ADD(27, 31);
00737 
00738     out[ 1] = tab[16] + tab[24];
00739     out[17] = tab[17] + tab[25];
00740     out[ 9] = tab[18] + tab[26];
00741     out[25] = tab[19] + tab[27];
00742     out[ 5] = tab[20] + tab[28];
00743     out[21] = tab[21] + tab[29];
00744     out[13] = tab[22] + tab[30];
00745     out[29] = tab[23] + tab[31];
00746     out[ 3] = tab[24] + tab[20];
00747     out[19] = tab[25] + tab[21];
00748     out[11] = tab[26] + tab[22];
00749     out[27] = tab[27] + tab[23];
00750     out[ 7] = tab[28] + tab[18];
00751     out[23] = tab[29] + tab[19];
00752     out[15] = tab[30] + tab[17];
00753     out[31] = tab[31];
00754 }
00755 
00756 #if FRAC_BITS <= 15
00757 
00758 static inline int round_sample(int *sum)
00759 {
00760     int sum1;
00761     sum1 = (*sum) >> OUT_SHIFT;
00762     *sum &= (1<<OUT_SHIFT)-1;
00763     if (sum1 < OUT_MIN)
00764         sum1 = OUT_MIN;
00765     else if (sum1 > OUT_MAX)
00766         sum1 = OUT_MAX;
00767     return sum1;
00768 }
00769 
00770 #if defined(ARCH_POWERPC_405)
00771 
00772 /* signed 16x16 -> 32 multiply add accumulate */
00773 #define MACS(rt, ra, rb) \
00774     asm ("maclhw %0, %2, %3" : "=r" (rt) : "0" (rt), "r" (ra), "r" (rb));
00775 
00776 /* signed 16x16 -> 32 multiply */
00777 #define MULS(ra, rb) \
00778     ({ int __rt; asm ("mullhw %0, %1, %2" : "=r" (__rt) : "r" (ra), "r" (rb)); __rt; })
00779 
00780 #else
00781 
00782 /* signed 16x16 -> 32 multiply add accumulate */
00783 #define MACS(rt, ra, rb) rt += (ra) * (rb)
00784 
00785 /* signed 16x16 -> 32 multiply */
00786 #define MULS(ra, rb) ((ra) * (rb))
00787 
00788 #endif
00789 
00790 #else
00791 
00792 static inline int round_sample(int64_t *sum) 
00793 {
00794     int sum1;
00795     sum1 = (int)((*sum) >> OUT_SHIFT);
00796     *sum &= (1<<OUT_SHIFT)-1;
00797     if (sum1 < OUT_MIN)
00798         sum1 = OUT_MIN;
00799     else if (sum1 > OUT_MAX)
00800         sum1 = OUT_MAX;
00801     return sum1;
00802 }
00803 
00804 #define MULS(ra, rb) MUL64(ra, rb)
00805 
00806 #endif
00807 
00808 #define SUM8(sum, op, w, p) \
00809 {                                               \
00810     sum op MULS((w)[0 * 64], p[0 * 64]);\
00811     sum op MULS((w)[1 * 64], p[1 * 64]);\
00812     sum op MULS((w)[2 * 64], p[2 * 64]);\
00813     sum op MULS((w)[3 * 64], p[3 * 64]);\
00814     sum op MULS((w)[4 * 64], p[4 * 64]);\
00815     sum op MULS((w)[5 * 64], p[5 * 64]);\
00816     sum op MULS((w)[6 * 64], p[6 * 64]);\
00817     sum op MULS((w)[7 * 64], p[7 * 64]);\
00818 }
00819 
00820 #define SUM8P2(sum1, op1, sum2, op2, w1, w2, p) \
00821 {                                               \
00822     int tmp;\
00823     tmp = p[0 * 64];\
00824     sum1 op1 MULS((w1)[0 * 64], tmp);\
00825     sum2 op2 MULS((w2)[0 * 64], tmp);\
00826     tmp = p[1 * 64];\
00827     sum1 op1 MULS((w1)[1 * 64], tmp);\
00828     sum2 op2 MULS((w2)[1 * 64], tmp);\
00829     tmp = p[2 * 64];\
00830     sum1 op1 MULS((w1)[2 * 64], tmp);\
00831     sum2 op2 MULS((w2)[2 * 64], tmp);\
00832     tmp = p[3 * 64];\
00833     sum1 op1 MULS((w1)[3 * 64], tmp);\
00834     sum2 op2 MULS((w2)[3 * 64], tmp);\
00835     tmp = p[4 * 64];\
00836     sum1 op1 MULS((w1)[4 * 64], tmp);\
00837     sum2 op2 MULS((w2)[4 * 64], tmp);\
00838     tmp = p[5 * 64];\
00839     sum1 op1 MULS((w1)[5 * 64], tmp);\
00840     sum2 op2 MULS((w2)[5 * 64], tmp);\
00841     tmp = p[6 * 64];\
00842     sum1 op1 MULS((w1)[6 * 64], tmp);\
00843     sum2 op2 MULS((w2)[6 * 64], tmp);\
00844     tmp = p[7 * 64];\
00845     sum1 op1 MULS((w1)[7 * 64], tmp);\
00846     sum2 op2 MULS((w2)[7 * 64], tmp);\
00847 }
00848 
00849 void ff_mpa_synth_init(MPA_INT *window)
00850 {
00851     int i;
00852 
00853     /* max = 18760, max sum over all 16 coefs : 44736 */
00854     for(i=0;i<257;i++) {
00855         int v;
00856         v = mpa_enwindow[i];
00857 #if WFRAC_BITS < 16
00858         v = (v + (1 << (16 - WFRAC_BITS - 1))) >> (16 - WFRAC_BITS);
00859 #endif
00860         window[i] = v;
00861         if ((i & 63) != 0)
00862             v = -v;
00863         if (i != 0)
00864             window[512 - i] = v;
00865     }   
00866 }
00867 
00868 /* 32 sub band synthesis filter. Input: 32 sub band samples, Output:
00869    32 samples. */
00870 /* XXX: optimize by avoiding ring buffer usage */
00871 void ff_mpa_synth_filter(MPA_INT *synth_buf_ptr, int *synth_buf_offset,
00872                          MPA_INT *window, int *dither_state,
00873                          OUT_INT *samples, int incr, 
00874                          int32_t sb_samples[SBLIMIT])
00875 {
00876     int32_t tmp[32];
00877     register MPA_INT *synth_buf;
00878     register const MPA_INT *w, *w2, *p;
00879     int j, offset, v;
00880     OUT_INT *samples2;
00881 #if FRAC_BITS <= 15
00882     int sum, sum2;
00883 #else
00884     int64_t sum, sum2;
00885 #endif
00886 
00887     dct32(tmp, sb_samples);
00888     
00889     offset = *synth_buf_offset;
00890     synth_buf = synth_buf_ptr + offset;
00891 
00892     for(j=0;j<32;j++) {
00893         v = tmp[j];
00894 #if FRAC_BITS <= 15
00895         /* NOTE: can cause a loss in precision if very high amplitude
00896            sound */
00897         if (v > 32767)
00898             v = 32767;
00899         else if (v < -32768)
00900             v = -32768;
00901 #endif
00902         synth_buf[j] = v;
00903     }
00904     /* copy to avoid wrap */
00905     memcpy(synth_buf + 512, synth_buf, 32 * sizeof(MPA_INT));
00906 
00907     samples2 = samples + 31 * incr;
00908     w = window;
00909     w2 = window + 31;
00910 
00911     sum = *dither_state;
00912     p = synth_buf + 16;
00913     SUM8(sum, +=, w, p);
00914     p = synth_buf + 48;
00915     SUM8(sum, -=, w + 32, p);
00916     *samples = round_sample(&sum);
00917     samples += incr;
00918     w++;
00919 
00920     /* we calculate two samples at the same time to avoid one memory
00921        access per two sample */
00922     for(j=1;j<16;j++) {
00923         sum2 = 0;
00924         p = synth_buf + 16 + j;
00925         SUM8P2(sum, +=, sum2, -=, w, w2, p);
00926         p = synth_buf + 48 - j;
00927         SUM8P2(sum, -=, sum2, -=, w + 32, w2 + 32, p);
00928 
00929         *samples = round_sample(&sum);
00930         samples += incr;
00931         sum += sum2;
00932         *samples2 = round_sample(&sum);
00933         samples2 -= incr;
00934         w++;
00935         w2--;
00936     }
00937     
00938     p = synth_buf + 32;
00939     SUM8(sum, -=, w + 32, p);
00940     *samples = round_sample(&sum);
00941     *dither_state= sum;
00942 
00943     offset = (offset - 32) & 511;
00944     *synth_buf_offset = offset;
00945 }
00946 
00947 #define C3 FIXHR(0.86602540378443864676/2)
00948 
00949 /* 0.5 / cos(pi*(2*i+1)/36) */
00950 static const int icos36[9] = {
00951     FIXR(0.50190991877167369479),
00952     FIXR(0.51763809020504152469), //0
00953     FIXR(0.55168895948124587824),
00954     FIXR(0.61038729438072803416),
00955     FIXR(0.70710678118654752439), //1
00956     FIXR(0.87172339781054900991),
00957     FIXR(1.18310079157624925896),
00958     FIXR(1.93185165257813657349), //2
00959     FIXR(5.73685662283492756461),
00960 };
00961 
00962 /* 12 points IMDCT. We compute it "by hand" by factorizing obvious
00963    cases. */
00964 static void imdct12(int *out, int *in)
00965 {
00966     int in0, in1, in2, in3, in4, in5, t1, t2;
00967 
00968     in0= in[0*3];
00969     in1= in[1*3] + in[0*3];
00970     in2= in[2*3] + in[1*3];
00971     in3= in[3*3] + in[2*3];
00972     in4= in[4*3] + in[3*3];
00973     in5= in[5*3] + in[4*3];
00974     in5 += in3;
00975     in3 += in1;
00976 
00977     in2= MULH(2*in2, C3);
00978     in3= MULH(2*in3, C3);
00979     
00980     t1 = in0 - in4;
00981     t2 = MULL(in1 - in5, icos36[4]);
00982 
00983     out[ 7]= 
00984     out[10]= t1 + t2;
00985     out[ 1]=
00986     out[ 4]= t1 - t2;
00987 
00988     in0 += in4>>1;
00989     in4 = in0 + in2;
00990     in1 += in5>>1;
00991     in5 = MULL(in1 + in3, icos36[1]);    
00992     out[ 8]= 
00993     out[ 9]= in4 + in5;
00994     out[ 2]=
00995     out[ 3]= in4 - in5;
00996     
00997     in0 -= in2;
00998     in1 = MULL(in1 - in3, icos36[7]);
00999     out[ 0]=
01000     out[ 5]= in0 - in1;
01001     out[ 6]=
01002     out[11]= in0 + in1;    
01003 }
01004 
01005 /* cos(pi*i/18) */
01006 #define C1 FIXHR(0.98480775301220805936/2)
01007 #define C2 FIXHR(0.93969262078590838405/2)
01008 #define C3 FIXHR(0.86602540378443864676/2)
01009 #define C4 FIXHR(0.76604444311897803520/2)
01010 #define C5 FIXHR(0.64278760968653932632/2)
01011 #define C6 FIXHR(0.5/2)
01012 #define C7 FIXHR(0.34202014332566873304/2)
01013 #define C8 FIXHR(0.17364817766693034885/2)
01014 
01015 
01016 /* using Lee like decomposition followed by hand coded 9 points DCT */
01017 static void imdct36(int *out, int *buf, int *in, int *win)
01018 {
01019     int i, j, t0, t1, t2, t3, s0, s1, s2, s3;
01020     int tmp[18], *tmp1, *in1;
01021 
01022     for(i=17;i>=1;i--)
01023         in[i] += in[i-1];
01024     for(i=17;i>=3;i-=2)
01025         in[i] += in[i-2];
01026 
01027     for(j=0;j<2;j++) {
01028         tmp1 = tmp + j;
01029         in1 = in + j;
01030 #if 0
01031 //more accurate but slower
01032         int64_t t0, t1, t2, t3;
01033         t2 = in1[2*4] + in1[2*8] - in1[2*2];
01034         
01035         t3 = (in1[2*0] + (int64_t)(in1[2*6]>>1))<<32;
01036         t1 = in1[2*0] - in1[2*6];
01037         tmp1[ 6] = t1 - (t2>>1);
01038         tmp1[16] = t1 + t2;
01039 
01040         t0 = MUL64(2*(in1[2*2] + in1[2*4]),    C2);
01041         t1 = MUL64(   in1[2*4] - in1[2*8] , -2*C8);
01042         t2 = MUL64(2*(in1[2*2] + in1[2*8]),   -C4);
01043         
01044         tmp1[10] = (t3 - t0 - t2) >> 32;
01045         tmp1[ 2] = (t3 + t0 + t1) >> 32;
01046         tmp1[14] = (t3 + t2 - t1) >> 32;
01047         
01048         tmp1[ 4] = MULH(2*(in1[2*5] + in1[2*7] - in1[2*1]), -C3);
01049         t2 = MUL64(2*(in1[2*1] + in1[2*5]),    C1);
01050         t3 = MUL64(   in1[2*5] - in1[2*7] , -2*C7);
01051         t0 = MUL64(2*in1[2*3], C3);
01052 
01053         t1 =