00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00025 #include "dsputil.h"
00026
00031 int ff_fft_init(FFTContext *s, int nbits, int inverse)
00032 {
00033 int i, j, m, n;
00034 float alpha, c1, s1, s2;
00035
00036 s->nbits = nbits;
00037 n = 1 << nbits;
00038
00039 s->exptab = av_malloc((n / 2) * sizeof(FFTComplex));
00040 if (!s->exptab)
00041 goto fail;
00042 s->revtab = av_malloc(n * sizeof(uint16_t));
00043 if (!s->revtab)
00044 goto fail;
00045 s->inverse = inverse;
00046
00047 s2 = inverse ? 1.0 : -1.0;
00048
00049 for(i=0;i<(n/2);i++) {
00050 alpha = 2 * M_PI * (float)i / (float)n;
00051 c1 = cos(alpha);
00052 s1 = sin(alpha) * s2;
00053 s->exptab[i].re = c1;
00054 s->exptab[i].im = s1;
00055 }
00056 s->fft_calc = ff_fft_calc_c;
00057 s->exptab1 = NULL;
00058
00059
00060 #if (defined(HAVE_MMX) && defined(HAVE_BUILTIN_VECTOR)) || defined(HAVE_ALTIVEC)
00061 {
00062 int has_vectors = 0;
00063
00064 #if defined(HAVE_MMX)
00065 has_vectors = mm_support() & MM_SSE;
00066 #endif
00067 #if defined(HAVE_ALTIVEC) && !defined(ALTIVEC_USE_REFERENCE_C_CODE)
00068 has_vectors = mm_support() & MM_ALTIVEC;
00069 #endif
00070 if (has_vectors) {
00071 int np, nblocks, np2, l;
00072 FFTComplex *q;
00073
00074 np = 1 << nbits;
00075 nblocks = np >> 3;
00076 np2 = np >> 1;
00077 s->exptab1 = av_malloc(np * 2 * sizeof(FFTComplex));
00078 if (!s->exptab1)
00079 goto fail;
00080 q = s->exptab1;
00081 do {
00082 for(l = 0; l < np2; l += 2 * nblocks) {
00083 *q++ = s->exptab[l];
00084 *q++ = s->exptab[l + nblocks];
00085
00086 q->re = -s->exptab[l].im;
00087 q->im = s->exptab[l].re;
00088 q++;
00089 q->re = -s->exptab[l + nblocks].im;
00090 q->im = s->exptab[l + nblocks].re;
00091 q++;
00092 }
00093 nblocks = nblocks >> 1;
00094 } while (nblocks != 0);
00095 av_freep(&s->exptab);
00096 #if defined(HAVE_MMX)
00097 s->fft_calc = ff_fft_calc_sse;
00098 #else
00099 s->fft_calc = ff_fft_calc_altivec;
00100 #endif
00101 }
00102 }
00103 #endif
00104
00105
00106
00107 for(i=0;i<n;i++) {
00108 m=0;
00109 for(j=0;j<nbits;j++) {
00110 m |= ((i >> j) & 1) << (nbits-j-1);
00111 }
00112 s->revtab[i]=m;
00113 }
00114 return 0;
00115 fail:
00116 av_freep(&s->revtab);
00117 av_freep(&s->exptab);
00118 av_freep(&s->exptab1);
00119 return -1;
00120 }
00121
00122
00123 #define BF(pre, pim, qre, qim, pre1, pim1, qre1, qim1) \
00124 {\
00125 FFTSample ax, ay, bx, by;\
00126 bx=pre1;\
00127 by=pim1;\
00128 ax=qre1;\
00129 ay=qim1;\
00130 pre = (bx + ax);\
00131 pim = (by + ay);\
00132 qre = (bx - ax);\
00133 qim = (by - ay);\
00134 }
00135
00136 #define MUL16(a,b) ((a) * (b))
00137
00138 #define CMUL(pre, pim, are, aim, bre, bim) \
00139 {\
00140 pre = (MUL16(are, bre) - MUL16(aim, bim));\
00141 pim = (MUL16(are, bim) + MUL16(bre, aim));\
00142 }
00143
00149 void ff_fft_calc_c(FFTContext *s, FFTComplex *z)
00150 {
00151 int ln = s->nbits;
00152 int j, np, np2;
00153 int nblocks, nloops;
00154 register FFTComplex *p, *q;
00155 FFTComplex *exptab = s->exptab;
00156 int l;
00157 FFTSample tmp_re, tmp_im;
00158
00159 np = 1 << ln;
00160
00161
00162
00163 p=&z[0];
00164 j=(np >> 1);
00165 do {
00166 BF(p[0].re, p[0].im, p[1].re, p[1].im,
00167 p[0].re, p[0].im, p[1].re, p[1].im);
00168 p+=2;
00169 } while (--j != 0);
00170
00171
00172
00173
00174 p=&z[0];
00175 j=np >> 2;
00176 if (s->inverse) {
00177 do {
00178 BF(p[0].re, p[0].im, p[2].re, p[2].im,
00179 p[0].re, p[0].im, p[2].re, p[2].im);
00180 BF(p[1].re, p[1].im, p[3].re, p[3].im,
00181 p[1].re, p[1].im, -p[3].im, p[3].re);
00182 p+=4;
00183 } while (--j != 0);
00184 } else {
00185 do {
00186 BF(p[0].re, p[0].im, p[2].re, p[2].im,
00187 p[0].re, p[0].im, p[2].re, p[2].im);
00188 BF(p[1].re, p[1].im, p[3].re, p[3].im,
00189 p[1].re, p[1].im, p[3].im, -p[3].re);
00190 p+=4;
00191 } while (--j != 0);
00192 }
00193
00194
00195 nblocks = np >> 3;
00196 nloops = 1 << 2;
00197 np2 = np >> 1;
00198 do {
00199 p = z;
00200 q = z + nloops;
00201 for (j = 0; j < nblocks; ++j) {
00202 BF(p->re, p->im, q->re, q->im,
00203 p->re, p->im, q->re, q->im);
00204
00205 p++;
00206 q++;
00207 for(l = nblocks; l < np2; l += nblocks) {
00208 CMUL(tmp_re, tmp_im, exptab[l].re, exptab[l].im, q->re, q->im);
00209 BF(p->re, p->im, q->re, q->im,
00210 p->re, p->im, tmp_re, tmp_im);
00211 p++;
00212 q++;
00213 }
00214
00215 p += nloops;
00216 q += nloops;
00217 }
00218 nblocks = nblocks >> 1;
00219 nloops = nloops << 1;
00220 } while (nblocks != 0);
00221 }
00222
00226 void ff_fft_permute(FFTContext *s, FFTComplex *z)
00227 {
00228 int j, k, np;
00229 FFTComplex tmp;
00230 const uint16_t *revtab = s->revtab;
00231
00232
00233 np = 1 << s->nbits;
00234 for(j=0;j<np;j++) {
00235 k = revtab[j];
00236 if (k < j) {
00237 tmp = z[k];
00238 z[k] = z[j];
00239 z[j] = tmp;
00240 }
00241 }
00242 }
00243
00244 void ff_fft_end(FFTContext *s)
00245 {
00246 av_freep(&s->revtab);
00247 av_freep(&s->exptab);
00248 av_freep(&s->exptab1);
00249 }
00250