00001 #include <stdio.h>
00002 #include <stdlib.h>
00003 #include <math.h>
00004 #include <string.h>
00005 #include "common.h"
00006 #include "options.h"
00007 #include "encoder.h"
00008 #include "mem.h"
00009 #include "fft.h"
00010 #include "ath.h"
00011 #define OLDTHRESHx
00012 #include "psycho_3.h"
00013 #include "psycho_3priv.h"
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024 #define DBTAB 1000
00025 static double dbtable[DBTAB];
00026
00027 #define CRITBANDMAX 32
00028 int cbands=0;
00029 int cbandindex[CRITBANDMAX];
00030
00031
00032 #define SUBSIZE 136
00033 int freq_subset[SUBSIZE];
00034 FLOAT bark[HBLKSIZE], ath[HBLKSIZE];
00035
00036 int *numlines;
00037 FLOAT *cbval;
00038 int partition[HBLKSIZE];
00039 static D1408 *fft_buf;
00040
00041 frame_header *header;
00042
00043
00044 INLINE double psycho_3_add_db (double a, double b)
00045 {
00046
00047
00048
00049
00050
00051
00052 FLOAT fdiff;
00053 int idiff;
00054 fdiff = (10.0 * (a - b));
00055
00056 if (fdiff > 990.0) {
00057 return a;
00058 }
00059 if (fdiff < -990.0) {
00060 return (b);
00061 }
00062
00063 idiff = (int) fdiff;
00064 if (idiff >= 0) {
00065 return (a + dbtable[idiff]);
00066 }
00067
00068 return (b + dbtable[-idiff]);
00069 }
00070
00071 void psycho_3 (short buffer[2][1152], double scale[2][SBLIMIT],
00072 double ltmin[2][SBLIMIT], frame_info * frame, options *glopts)
00073 {
00074 int nch = frame->nch;
00075 int sblimit = frame->sblimit;
00076 int k, i;
00077 static char init = 0;
00078 static int off[2] = { 256, 256 };
00079 FLOAT sample[BLKSIZE];
00080
00081 FLOAT energy[BLKSIZE];
00082 FLOAT power[HBLKSIZE];
00083 FLOAT Xtm[HBLKSIZE], Xnm[HBLKSIZE];
00084 int tonelabel[HBLKSIZE], noiselabel[HBLKSIZE];
00085 FLOAT LTg[HBLKSIZE];
00086 double Lsb[SBLIMIT];
00087
00088 header = frame->header;
00089
00090 if (init==0) {
00091 psycho_3_init(glopts);
00092 init++;
00093 }
00094
00095
00096 for (k = 0; k < nch; k++) {
00097 int ok = off[k] % 1408;
00098 for (i = 0; i < 1152; i++) {
00099 fft_buf[k][ok++] = (FLOAT) buffer[k][i] / SCALE;
00100 if (ok >= 1408)
00101 ok = 0;
00102 }
00103 ok = (off[k] + 1216) % 1408;
00104 for (i = 0; i < BLKSIZE; i++) {
00105 sample[i] = fft_buf[k][ok++];
00106 if (ok >= 1408)
00107 ok = 0;
00108 }
00109
00110 off[k] += 1152;
00111 off[k] %= 1408;
00112
00113 psycho_3_fft(sample, energy);
00114 psycho_3_powerdensityspectrum(energy, power);
00115 psycho_3_spl(Lsb, power, &scale[k][0]);
00116 psycho_3_tonal_label (power, tonelabel, Xtm);
00117 psycho_3_noise_label (power, energy, tonelabel, noiselabel, Xnm);
00118 if (glopts->verbosity > 20)
00119 psycho_3_dump(tonelabel, Xtm, noiselabel, Xnm);
00120 psycho_3_decimation(ath, tonelabel, Xtm, noiselabel, Xnm, bark);
00121 psycho_3_threshold(LTg, tonelabel, Xtm, noiselabel, Xnm, bark, ath, bitrate[header->version][header->bitrate_index] / nch, freq_subset);
00122 psycho_3_minimummasking(LTg, <min[k][0], freq_subset);
00123 psycho_3_smr(<min[k][0], Lsb);
00124 }
00125 }
00126
00127
00128 void psycho_3_fft(FLOAT sample[BLKSIZE], FLOAT energy[BLKSIZE])
00129 {
00130 FLOAT x_real[BLKSIZE];
00131 int i;
00132 static int init = 0;
00133 static FLOAT *window;
00134
00135 if (!init) {
00136 window = (FLOAT *) mem_alloc (sizeof (DFFT), "window");
00137 register FLOAT sqrt_8_over_3 = pow (8.0 / 3.0, 0.5);
00138 for (i = 0; i < BLKSIZE; i++) {
00139 window[i] = sqrt_8_over_3 * 0.5 * (1 - cos (2.0 * PI * i / (BLKSIZE))) / BLKSIZE;
00140 }
00141 init++;
00142 }
00143
00144
00145 for (i = 0; i < BLKSIZE; i++)
00146 x_real[i] = (FLOAT) (sample[i] * window[i]);
00147
00148 psycho_1_fft (x_real, energy, BLKSIZE);
00149 }
00150
00151
00152 void psycho_3_powerdensityspectrum(FLOAT energy[BLKSIZE], FLOAT power[HBLKSIZE]) {
00153 int i;
00154 for (i=1;i<HBLKSIZE;i++) {
00155 if (energy[i] < 1E-20)
00156 power[i] = -200.0 + POWERNORM;
00157 else
00158 power[i] = 10 * log10 (energy[i]) + POWERNORM;
00159 }
00160 }
00161
00162
00163 void psycho_3_spl(double *Lsb, FLOAT *power, double *scale) {
00164 int i;
00165 FLOAT Xmax[SBLIMIT];
00166
00167 for (i=0;i<SBLIMIT;i++) {
00168 Xmax[i] = DBMIN;
00169 }
00170
00171 for (i=1;i<HBLKSIZE;i++) {
00172 int index = i>>4;
00173 if (Xmax[index] < power[i])
00174 Xmax[index] = power[i];
00175 }
00176
00177
00178
00179 for (i=0;i<SBLIMIT;i++) {
00180 double val = 20 * log10 (scale[i] * 32768) - 10;
00181 Lsb[i] = MAX(Xmax[i], val);
00182 }
00183 }
00184
00185
00186 void psycho_3_tonal_label (FLOAT power[HBLKSIZE], int *tonelabel, FLOAT Xtm[HBLKSIZE])
00187 {
00188 int i;
00189 int maxima[HBLKSIZE];
00190
00191
00192 maxima[0]=maxima[HBLKSIZE-1]=0;
00193 tonelabel[0]=tonelabel[HBLKSIZE-1]=0;
00194 Xtm[0] = Xtm[HBLKSIZE-1] = DBMIN;
00195 for (i=1;i<HBLKSIZE-1;i++) {
00196 tonelabel[i] = 0;
00197 Xtm[i] = DBMIN;
00198 if (power[i]>power[i-1] && power[i]>power[i+1])
00199 maxima[i]=1;
00200 else
00201 maxima[i]=0;
00202 }
00203
00204 {
00205
00206
00207
00208
00209
00210
00211
00212 psycho_3_tonal_label_range(power, tonelabel, maxima, Xtm, 2, 63, 2);
00213 psycho_3_tonal_label_range(power, tonelabel, maxima, Xtm, 63,127,3);
00214 psycho_3_tonal_label_range(power, tonelabel, maxima, Xtm, 127,255,6);
00215 psycho_3_tonal_label_range(power, tonelabel, maxima, Xtm, 255,500,12);
00216
00217 }
00218 }
00219
00220
00221
00222
00223 void psycho_3_tonal_label_range(FLOAT *power, int *tonelabel, int *maxima, FLOAT *Xtm, int start, int end, int srange) {
00224 int j,k;
00225
00226 for (k=start;k<end;k++)
00227 if (maxima[k] == 1) {
00228 tonelabel[k] = TONE;
00229 for (j=-srange;j<=+srange;j++)
00230 if (abs(j) > 1)
00231 if ((power[k] - power[k+j]) < 7.0)
00232 tonelabel[k] = 0;
00233 if (tonelabel[k] == TONE) {
00234
00235
00236
00237
00238 double temp = psycho_3_add_db(power[k-1], power[k]);
00239 Xtm[k] = psycho_3_add_db(temp, power[k+1]);
00240
00241
00242
00243 for (j=-srange;j<=+srange;j++)
00244 power[k+j] = DBMIN;
00245 }
00246 }
00247 }
00248
00249 void psycho_3_init_add_db (void)
00250 {
00251 int i;
00252 double x;
00253 for (i = 0; i < DBTAB; i++) {
00254 x = (double) i / 10.0;
00255 dbtable[i] = 10 * log10 (1 + pow (10.0, x / 10.0)) - x;
00256 }
00257 }
00258
00259
00260
00261
00262
00263
00264 void psycho_3_noise_label (FLOAT power[HBLKSIZE], FLOAT energy[BLKSIZE], int *tonelabel, int *noiselabel, FLOAT Xnm[HBLKSIZE]) {
00265 int i,j;
00266
00267 Xnm[0] = DBMIN;
00268 for (i=0;i<cbands;i++) {
00269
00270 double sum = DBMIN;
00271 double esum=0;
00272 double centreweight = 0;
00273 int centre;
00274 for (j=cbandindex[i]; j<cbandindex[i+1]; j++) {
00275 Xnm[j] = DBMIN;
00276
00277
00278 if (power[j] != DBMIN) {
00279
00280 sum = psycho_3_add_db(power[j], sum);
00281
00282
00283
00284
00285
00286
00287
00288 esum += energy[j];
00289 centreweight += (j - cbandindex[i]) * energy[j];
00290 }
00291 }
00292
00293 if (sum<=DBMIN)
00294
00295
00296 centre = (cbandindex[i] + cbandindex[i+1])/2;
00297 else
00298
00299 centre = cbandindex[i] + (int)(centreweight/esum);
00300
00301 Xnm[centre] = sum;
00302 noiselabel[centre] = NOISE;
00303 }
00304 }
00305
00306
00307
00308
00309 void psycho_3_decimation(FLOAT *ath, int *tonelabel, FLOAT *Xtm, int *noiselabel, FLOAT *Xnm, FLOAT *bark) {
00310 int i;
00311
00312
00313 for (i=1;i<HBLKSIZE;i++) {
00314 if (noiselabel[i]==NOISE) {
00315 if (Xnm[i] < ath[i]) {
00316
00317 Xnm[i] = DBMIN;
00318 noiselabel[i]=0;
00319 }
00320 }
00321 if (tonelabel[i] == TONE) {
00322 if (Xtm[i] < ath[i]) {
00323 Xtm[i] = DBMIN;
00324 tonelabel[i]=0;
00325 }
00326 }
00327 }
00328
00329
00330
00331 }
00332
00333
00334
00335
00336
00337
00338
00339 void psycho_3_threshold(FLOAT *LTg, int *tonelabel, FLOAT *Xtm, int *noiselabel, FLOAT *Xnm, FLOAT *bark, FLOAT *ath, int bit_rate, int *freq_subset) {
00340 int i,j,k;
00341 FLOAT LTtm[SUBSIZE];
00342 FLOAT LTnm[SUBSIZE];
00343
00344 for (i=0;i<SUBSIZE;i++) {
00345 LTtm[i] = DBMIN;
00346 LTnm[i] = DBMIN;
00347 }
00348
00349
00350
00351 for (k=1;k<HBLKSIZE;k++) {
00352
00353 if (tonelabel[k]==TONE) {
00354 for (j=0;j<SUBSIZE;j++) {
00355
00356 FLOAT dz = bark[freq_subset[j]] - bark[k];
00357 if (dz >= -3.0 && dz < 8.0) {
00358 FLOAT vf;
00359 FLOAT av = -1.525 - 0.275 * bark[k] - 4.5 + Xtm[k];
00360
00361 if (dz < -1)
00362 vf = 17 * (dz + 1) - (0.4 * Xtm[k] + 6);
00363 else if (dz < 0)
00364 vf = (0.4 * Xtm[k] + 6) * dz;
00365 else if (dz < 1)
00366 vf = (-17 * dz);
00367 else
00368 vf = -(dz - 1) * (17 - 0.15 * Xtm[k]) - 17;
00369 LTtm[j] = psycho_3_add_db (LTtm[j], av + vf);
00370 }
00371 }
00372 }
00373
00374
00375 if (noiselabel[k]==NOISE) {
00376 for (j=0;j<SUBSIZE;j++) {
00377
00378 FLOAT dz = bark[freq_subset[j]] - bark[k];
00379 if (dz >= -3.0 && dz < 8.0) {
00380 FLOAT vf;
00381 FLOAT av = -1.525 - 0.175 * bark[k] - 0.5 + Xnm[k];
00382
00383 if (dz < -1)
00384 vf = 17 * (dz + 1) - (0.4 * Xnm[k] + 6);
00385 else if (dz < 0)
00386 vf = (0.4 * Xnm[k] + 6) * dz;
00387 else if (dz < 1)
00388 vf = (-17 * dz);
00389 else
00390 vf = -(dz - 1) * (17 - 0.15 * Xnm[k]) - 17;
00391 LTnm[j] = psycho_3_add_db (LTnm[j], av + vf);
00392 }
00393 }
00394 }
00395 }
00396
00397
00398
00399 for (i=0;i<SUBSIZE;i++) {
00400 LTg[i] = psycho_3_add_db(LTnm[i], LTtm[i]);
00401 if (bit_rate < 96)
00402 LTg[i] = psycho_3_add_db(ath[freq_subset[i]], LTg[i]);
00403 else
00404 LTg[i] = psycho_3_add_db(ath[freq_subset[i]]-12.0, LTg[i]);
00405 }
00406 }
00407
00408
00409 void psycho_3_minimummasking(FLOAT *LTg, double *LTmin, int *freq_subset) {
00410 int i;
00411
00412 for (i=0;i<SBLIMIT;i++)
00413 LTmin[i] = 999999.9;
00414
00415 for (i=0;i<SUBSIZE;i++) {
00416 int index = freq_subset[i]>>4;
00417 if (LTmin[index] > LTg[i]) {
00418 LTmin[index] = LTg[i];
00419 }
00420 }
00421 }
00422
00423
00424
00425
00426
00427 void psycho_3_smr(double *LTmin, double *Lsb) {
00428 int i;
00429 for (i=0;i<SBLIMIT;i++) {
00430 LTmin[i] = Lsb[i] - LTmin[i];
00431 }
00432 }
00433
00434 void psycho_3_init(options *glopts) {
00435 int i;
00436 int cbase = 0;
00437
00438 fft_buf = (D1408 *) mem_alloc ((long) sizeof (D1408) * 2, "fft_buf");
00439
00440
00441 psycho_3_init_add_db();
00442
00443
00444 FLOAT sfreq = (FLOAT) s_freq[header->version][header->sampling_frequency] * 1000;
00445 for (i=1;i<HBLKSIZE; i++) {
00446 FLOAT freq = i * sfreq/BLKSIZE;
00447 bark[i] = toolame_freq2bark(freq);
00448 ath[i] = ATH_dB(freq,glopts->athlevel);
00449 }
00450
00451 {
00452
00453
00454
00455
00456 numlines = (int *)calloc(HBLKSIZE, sizeof(int));
00457 cbval = (float *)calloc(HBLKSIZE, sizeof(float));
00458 cbandindex[0] = 1;
00459 for (i=1;i<HBLKSIZE;i++) {
00460 if ((bark[i] - bark[cbase]) > 1.0) {
00461
00462
00463
00464 cbase = i;
00465 cbands++;
00466 cbandindex[cbands] = cbase;
00467 }
00468
00469 partition[i] = cbands;
00470
00471 numlines[cbands]++;
00472 }
00473
00474 cbands++;
00475 cbandindex[cbands] = 513;
00476
00477
00478
00479 for (i=1;i<HBLKSIZE;i++)
00480 cbval[partition[i]] += bark[i];
00481 for (i=1;i<CBANDS;i++) {
00482 if (numlines[i] != 0)
00483 cbval[i] /= numlines[i];
00484 else {
00485 cbval[i]=0;
00486 }
00487 }
00488 }
00489
00490 {
00491
00492
00493
00494
00495
00496
00497
00498
00499
00500
00501
00502
00503 int freq_index=0;
00504 for (i=1;i<(3*16)+1;i++)
00505 freq_subset[freq_index++] = i;
00506 for (;i<(6*16)+1;i+=2)
00507 freq_subset[freq_index++] = i;
00508 for (;i<(12*16)+1;i+=4)
00509 freq_subset[freq_index++] = i;
00510 for (;i<(32*16)+1;i+=8)
00511 freq_subset[freq_index++] = i;
00512 }
00513
00514 if (glopts->verbosity > 4) {
00515 fprintf(stdout,"%i critical bands\n",cbands);
00516 for (i=0;i<cbands;i++)
00517 fprintf(stdout,"cband %i spectral line index %i\n",i,cbandindex[i]);
00518 fprintf(stdout,"%i Subsampled spectral lines\n",SUBSIZE);
00519 for (i=0;i<SUBSIZE;i++)
00520 fprintf(stdout,"%i Spectral line %i Bark %.2f\n",i,freq_subset[i], bark[freq_subset[i]]);
00521 }
00522 }
00523
00524 void psycho_3_dump(int *tonelabel, FLOAT *Xtm, int *noiselabel, FLOAT *Xnm) {
00525 int i;
00526 fprintf(stdout,"3 Ton:");
00527 for (i=1;i<HAN_SIZE;i++) {
00528 if (tonelabel[i] == TONE)
00529 fprintf(stdout,"[%i] %3.0f ",i,Xtm[i]);
00530 }
00531 fprintf(stdout,"\n");
00532
00533 fprintf(stdout,"3 Nos:");
00534 for (i=1;i<HAN_SIZE;i++) {
00535 if (noiselabel[i] == NOISE)
00536 fprintf(stdout,"[%i] %3.0f ",i,Xnm[i]);
00537 }
00538 fprintf(stdout,"\n");
00539 }