00001 #include <stdio.h>
00002 #include <math.h>
00003 #include "common.h"
00004 #include "encoder.h"
00005 #include "mem.h"
00006 #include "fft.h"
00007 #include "psycho_1.h"
00008 #include "psycho_1_priv.h"
00009
00010 #define DBTAB 1000
00011 double dbtable[DBTAB];
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 void psycho_1 (short buffer[2][1152], double scale[2][SBLIMIT],
00023 double ltmin[2][SBLIMIT], frame_info * frame)
00024 {
00025 frame_header *header = frame->header;
00026 int nch = frame->nch;
00027 int sblimit = frame->sblimit;
00028 int k, i, tone = 0, noise = 0;
00029 static char init = 0;
00030 static int off[2] = { 256, 256 };
00031 double sample[FFT_SIZE];
00032 double spike[2][SBLIMIT];
00033 static D1408 *fft_buf;
00034 static mask_ptr power;
00035 static g_ptr ltg;
00036 FLOAT energy[FFT_SIZE];
00037
00038
00039 if (!init) {
00040 fft_buf = (D1408 *) mem_alloc ((long) sizeof (D1408) * 2, "fft_buf");
00041 power = (mask_ptr) mem_alloc (sizeof (mask) * HAN_SIZE, "power");
00042 if (header->version == MPEG_AUDIO_ID) {
00043 psycho_1_read_cbound (header->lay, header->sampling_frequency);
00044 psycho_1_read_freq_band (<g, header->lay, header->sampling_frequency);
00045 } else {
00046 psycho_1_read_cbound (header->lay, header->sampling_frequency + 4);
00047 psycho_1_read_freq_band (<g, header->lay, header->sampling_frequency + 4);
00048 }
00049 psycho_1_make_map (power, ltg);
00050 for (i = 0; i < 1408; i++)
00051 fft_buf[0][i] = fft_buf[1][i] = 0;
00052
00053 psycho_1_init_add_db ();
00054
00055 init = 1;
00056 }
00057 for (k = 0; k < nch; k++) {
00058
00059
00060
00061 int ok = off[k] % 1408;
00062 for (i = 0; i < 1152; i++) {
00063 fft_buf[k][ok++] = (double) buffer[k][i] / SCALE;
00064 if (ok >= 1408)
00065 ok = 0;
00066 }
00067 ok = (off[k] + 1216) % 1408;
00068 for (i = 0; i < FFT_SIZE; i++) {
00069 sample[i] = fft_buf[k][ok++];
00070 if (ok >= 1408)
00071 ok = 0;
00072 }
00073 off[k] += 1152;
00074 off[k] %= 1408;
00075
00076 psycho_1_hann_fft_pickmax (sample, power, &spike[k][0], energy);
00077 psycho_1_tonal_label (power, &tone);
00078 psycho_1_noise_label (power, &noise, ltg, energy);
00079
00080 psycho_1_subsampling (power, ltg, &tone, &noise);
00081 psycho_1_threshold (power, ltg, &tone, &noise,
00082 bitrate[header->version][header->bitrate_index] / nch);
00083 psycho_1_minimum_mask (ltg, <min[k][0], sblimit);
00084 psycho_1_smr (<min[k][0], &spike[k][0], &scale[k][0], sblimit);
00085 }
00086
00087 }
00088
00089
00090 int crit_band;
00091 int *cbound;
00092 int sub_size;
00093
00094 void psycho_1_read_cbound (int lay, int freq)
00095
00096 {
00097
00098 #include "critband.h"
00099
00100
00101 int i, k;
00102
00103 if ((lay < 1) || (lay > 2)) {
00104 printf ("Internal error (read_cbound())\n");
00105 return;
00106 }
00107 if ((freq < 0) || (freq > 6) || (freq == 3)) {
00108 printf ("Internal error (read_cbound())\n");
00109 return;
00110 }
00111
00112 crit_band = SecondCriticalBand[freq][0];
00113 cbound = (int *) mem_alloc (sizeof (int) * crit_band, "cbound");
00114 for (i = 0; i < crit_band; i++) {
00115 k = SecondCriticalBand[freq][i + 1];
00116 if (k != 0) {
00117 cbound[i] = k;
00118 } else {
00119 printf ("Internal error (read_cbound())\n");
00120 return;
00121 }
00122 }
00123 }
00124
00125 void psycho_1_read_freq_band (ltg, lay, freq)
00126 int lay, freq;
00127 g_ptr *ltg;
00128 {
00129
00130 #include "freqtable.h"
00131
00132 int i, k;
00133
00134 if ((freq < 0) || (freq > 6) || (freq == 3)) {
00135 printf ("Internal error (read_freq_band())\n");
00136 return;
00137 }
00138
00139
00140
00141 sub_size = SecondFreqEntries[freq] + 1;
00142 *ltg = (g_ptr) mem_alloc (sizeof (g_thres) * sub_size, "ltg");
00143 (*ltg)[0].line = 0;
00144 (*ltg)[0].bark = 0.0;
00145 (*ltg)[0].hear = 0.0;
00146 for (i = 1; i < sub_size; i++) {
00147 k = SecondFreqSubband[freq][i - 1].line;
00148 if (k != 0) {
00149 (*ltg)[i].line = k;
00150 (*ltg)[i].bark = SecondFreqSubband[freq][i - 1].bark;
00151 (*ltg)[i].hear = SecondFreqSubband[freq][i - 1].hear;
00152 } else {
00153 printf ("Internal error (read_freq_band())\n");
00154 return;
00155 }
00156 }
00157 }
00158
00159
00160 void psycho_1_make_map (mask power[HAN_SIZE], g_thres * ltg)
00161
00162 {
00163 int i, j;
00164
00165 for (i = 1; i < sub_size; i++)
00166 for (j = ltg[i - 1].line; j <= ltg[i].line; j++)
00167 power[j].map = i;
00168 }
00169
00170 void psycho_1_init_add_db (void)
00171 {
00172 int i;
00173 double x;
00174 for (i = 0; i < DBTAB; i++) {
00175 x = (double) i / 10.0;
00176 dbtable[i] = 10 * log10 (1 + pow (10.0, x / 10.0)) - x;
00177 }
00178 }
00179
00180 INLINE double add_db (double a, double b)
00181 {
00182
00183
00184
00185
00186
00187
00188 FLOAT fdiff;
00189 int idiff;
00190 fdiff = (10.0 * (a - b));
00191
00192 if (fdiff > 990.0) {
00193 return a;
00194 }
00195 if (fdiff < -990.0) {
00196 return (b);
00197 }
00198
00199 idiff = (int) fdiff;
00200 if (idiff >= 0) {
00201 return (a + dbtable[idiff]);
00202 }
00203
00204 return (b + dbtable[-idiff]);
00205 }
00206
00207
00208
00209
00210
00211
00212
00213
00214
00215 void psycho_1_hann_fft_pickmax (double sample[FFT_SIZE], mask power[HAN_SIZE],
00216 double spike[SBLIMIT], FLOAT energy[FFT_SIZE])
00217 {
00218 FLOAT x_real[FFT_SIZE];
00219 register int i, j;
00220 register double sqrt_8_over_3;
00221 static int init = 0;
00222 static double *window;
00223 double sum;
00224
00225 if (!init) {
00226 window = (double *) mem_alloc (sizeof (DFFT), "window");
00227 sqrt_8_over_3 = pow (8.0 / 3.0, 0.5);
00228 for (i = 0; i < FFT_SIZE; i++) {
00229
00230 window[i] =
00231 sqrt_8_over_3 * 0.5 * (1 -
00232 cos (2.0 * PI * i / (FFT_SIZE))) / FFT_SIZE;
00233 }
00234 init = 1;
00235 }
00236 for (i = 0; i < FFT_SIZE; i++)
00237 x_real[i] = (FLOAT) (sample[i] * window[i]);
00238
00239 psycho_1_fft (x_real, energy, FFT_SIZE);
00240
00241 for (i = 0; i < HAN_SIZE; i++) {
00242 if (energy[i] < 1E-20)
00243 power[i].x = -200.0 + POWERNORM;
00244 else
00245 power[i].x = 10 * log10 (energy[i]) + POWERNORM;
00246 power[i].next = STOP;
00247 power[i].type = FALSE;
00248 }
00249
00250
00251
00252 #define CF 1073741824
00253 #define DBM 1E-20
00254 for (i = 0; i < HAN_SIZE; spike[i >> 4] = 10.0 * log10 (sum), i += 16) {
00255 for (j = 0, sum = DBM; j < 16; j++)
00256 sum += CF * energy[i + j];
00257 }
00258 }
00259
00260
00261
00262
00263
00264
00265
00266
00267 void psycho_1_tonal_label (mask power[HAN_SIZE], int *tone)
00268
00269 {
00270 int i, j, last = LAST, first, run, last_but_one = LAST;
00271 double max;
00272
00273 *tone = LAST;
00274 for (i = 2; i < HAN_SIZE - 12; i++) {
00275 if (power[i].x > power[i - 1].x && power[i].x >= power[i + 1].x) {
00276 power[i].type = TONE;
00277 power[i].next = LAST;
00278 if (last != LAST)
00279 power[last].next = i;
00280 else
00281 first = *tone = i;
00282 last = i;
00283 }
00284 }
00285 last = LAST;
00286 first = *tone;
00287 *tone = LAST;
00288 while ((first != LAST) && (first != STOP)) {
00289 if (first < 3 || first > 500)
00290 run = 0;
00291 else if (first < 63)
00292 run = 2;
00293 else if (first < 127)
00294 run = 3;
00295 else if (first < 255)
00296 run = 6;
00297 else
00298 run = 12;
00299 max = power[first].x - 7;
00300 for (j = 2; j <= run; j++)
00301 if (max < power[first - j].x || max < power[first + j].x) {
00302 power[first].type = FALSE;
00303 break;
00304 }
00305 if (power[first].type == TONE) {
00306 int help = first;
00307 if (*tone == LAST)
00308 *tone = first;
00309 while ((power[help].next != LAST) && (power[help].next - first) <= run)
00310 help = power[help].next;
00311 help = power[help].next;
00312 power[first].next = help;
00313 if ((first - last) <= run) {
00314 if (last_but_one != LAST)
00315 power[last_but_one].next = first;
00316 }
00317 if (first > 1 && first < 500) {
00318 double tmp;
00319 tmp = add_db (power[first - 1].x, power[first + 1].x);
00320 power[first].x = add_db (power[first].x, tmp);
00321 }
00322 for (j = 1; j <= run; j++) {
00323 power[first - j].x = power[first + j].x = DBMIN;
00324 power[first - j].next = power[first + j].next = STOP;
00325 power[first - j].type = power[first + j].type = FALSE;
00326 }
00327 last_but_one = last;
00328 last = first;
00329 first = power[first].next;
00330 } else {
00331 int ll;
00332 if (last == LAST);
00333 else
00334 power[last].next = power[first].next;
00335 ll = first;
00336 first = power[first].next;
00337 power[ll].next = STOP;
00338 }
00339 }
00340 }
00341
00342
00343
00344
00345
00346
00347
00348
00349
00350 void psycho_1_noise_label (mask * power, int *noise, g_thres * ltg,
00351 FLOAT energy[FFT_SIZE])
00352 {
00353 int i, j, centre, last = LAST;
00354 double index, weight, sum;
00355
00356 for (i = 0; i < crit_band - 1; i++) {
00357 for (j = cbound[i], weight = 0.0, sum = DBMIN; j < cbound[i + 1]; j++) {
00358 if (power[j].type != TONE) {
00359 if (power[j].x != DBMIN) {
00360 sum = add_db (power[j].x, sum);
00361
00362 weight += CF * energy[j] * (double) (j - cbound[i]) / (double) (cbound[i + 1] - cbound[i]);
00363 power[j].x = DBMIN;
00364 }
00365 }
00366 }
00367
00368
00369 if (sum <= DBMIN)
00370 centre = (cbound[i + 1] + cbound[i]) / 2;
00371 else {
00372
00373 index = weight * pow (10.0, -0.1 * sum);
00374 centre =
00375 cbound[i] + (int) (index * (double) (cbound[i + 1] - cbound[i]));
00376 }
00377
00378
00379
00380
00381
00382
00383 if (power[centre].type == TONE) {
00384 if (power[centre + 1].type == TONE) {
00385 centre++;
00386 } else
00387 centre--;
00388 }
00389
00390 if (last == LAST)
00391 *noise = centre;
00392 else {
00393 power[centre].next = LAST;
00394 power[last].next = centre;
00395 }
00396 power[centre].x = sum;
00397 power[centre].type = NOISE;
00398 last = centre;
00399 }
00400 }
00401
00402
00403
00404
00405
00406
00407
00408
00409 void psycho_1_subsampling (mask power[HAN_SIZE], g_thres * ltg, int *tone, int *noise)
00410 {
00411 int i, old;
00412
00413 i = *tone;
00414 old = STOP;
00415
00416 while ((i != LAST) && (i != STOP))
00417 {
00418 if (power[i].x < ltg[power[i].map].hear) {
00419 power[i].type = FALSE;
00420 power[i].x = DBMIN;
00421 if (old == STOP)
00422 *tone = power[i].next;
00423 else
00424 power[old].next = power[i].next;
00425 } else
00426 old = i;
00427 i = power[i].next;
00428 }
00429 i = *noise;
00430 old = STOP;
00431 while ((i != LAST) && (i != STOP)) {
00432 if (power[i].x < ltg[power[i].map].hear) {
00433 power[i].type = FALSE;
00434 power[i].x = DBMIN;
00435 if (old == STOP)
00436 *noise = power[i].next;
00437 else
00438 power[old].next = power[i].next;
00439 } else
00440 old = i;
00441 i = power[i].next;
00442 }
00443 i = *tone;
00444 old = STOP;
00445 while ((i != LAST) && (i != STOP))
00446 {
00447 if (power[i].next == LAST)
00448 break;
00449 if (ltg[power[power[i].next].map].bark -
00450 ltg[power[i].map].bark < 0.5) {
00451 if (power[power[i].next].x > power[i].x) {
00452 if (old == STOP)
00453 *tone = power[i].next;
00454 else
00455 power[old].next = power[i].next;
00456 power[i].type = FALSE;
00457 power[i].x = DBMIN;
00458 i = power[i].next;
00459 } else {
00460 power[power[i].next].type = FALSE;
00461 power[power[i].next].x = DBMIN;
00462 power[i].next = power[power[i].next].next;
00463 old = i;
00464 }
00465 } else {
00466 old = i;
00467 i = power[i].next;
00468 }
00469 }
00470 }
00471
00472
00473
00474
00475
00476
00477
00478
00479
00480 void psycho_1_threshold (mask power[HAN_SIZE], g_thres * ltg, int *tone, int *noise,
00481 int bit_rate)
00482 {
00483 int k, t;
00484 double dz, tmps, vf;
00485
00486 for (k = 1; k < sub_size; k++) {
00487 ltg[k].x = DBMIN;
00488 t = *tone;
00489 while ((t != LAST) && (t != STOP))
00490 {
00491 dz = ltg[k].bark - ltg[power[t].map].bark;
00492 if (dz >= -3.0 && dz < 8.0) {
00493 tmps = -1.525 - 0.275 * ltg[power[t].map].bark - 4.5 + power[t].x;
00494
00495 if (dz < -1)
00496 vf = 17 * (dz + 1) - (0.4 * power[t].x + 6);
00497 else if (dz < 0)
00498 vf = (0.4 * power[t].x + 6) * dz;
00499 else if (dz < 1)
00500 vf = (-17 * dz);
00501 else
00502 vf = -(dz - 1) * (17 - 0.15 * power[t].x) - 17;
00503 ltg[k].x = add_db (ltg[k].x, tmps + vf);
00504 }
00505 t = power[t].next;
00506 }
00507
00508 t = *noise;
00509 while ((t != LAST) && (t != STOP)) {
00510 dz = ltg[k].bark - ltg[power[t].map].bark;
00511 if (dz >= -3.0 && dz < 8.0) {
00512 tmps = -1.525 - 0.175 * ltg[power[t].map].bark - 0.5 + power[t].x;
00513
00514 if (dz < -1)
00515 vf = 17 * (dz + 1) - (0.4 * power[t].x + 6);
00516 else if (dz < 0)
00517 vf = (0.4 * power[t].x + 6) * dz;
00518 else if (dz < 1)
00519 vf = (-17 * dz);
00520 else
00521 vf = -(dz - 1) * (17 - 0.15 * power[t].x) - 17;
00522 ltg[k].x = add_db (ltg[k].x, tmps + vf);
00523 }
00524 t = power[t].next;
00525 }
00526 if (bit_rate < 96)
00527 ltg[k].x = add_db (ltg[k].hear, ltg[k].x);
00528 else
00529 ltg[k].x = add_db (ltg[k].hear - 12.0, ltg[k].x);
00530 }
00531
00532 }
00533
00534
00535
00536
00537
00538
00539
00540
00541 void psycho_1_minimum_mask (g_thres * ltg, double ltmin[SBLIMIT], int sblimit)
00542 {
00543 double min;
00544 int i, j;
00545
00546 j = 1;
00547 for (i = 0; i < sblimit; i++)
00548 if (j >= sub_size - 1)
00549 ltmin[i] = ltg[sub_size - 1].hear;
00550 else {
00551 min = ltg[j].x;
00552 while (ltg[j].line >> 4 == i && j < sub_size) {
00553 if (min > ltg[j].x)
00554 min = ltg[j].x;
00555 j++;
00556 }
00557 ltmin[i] = min;
00558 }
00559 }
00560
00561
00562
00563
00564
00565
00566
00567
00568 void psycho_1_smr (double ltmin[SBLIMIT], double spike[SBLIMIT], double scale[SBLIMIT],
00569 int sblimit)
00570 {
00571 int i;
00572 double max;
00573
00574 for (i = 0; i < sblimit; i++) {
00575 max = 20 * log10 (scale[i] * 32768) - 10;
00576 if (spike[i] > max)
00577 max = spike[i];
00578 max -= ltmin[i];
00579 ltmin[i] = max;
00580 }
00581 }
00582
00583 void psycho_1_dump(mask power[HAN_SIZE], int *tone, int *noise) {
00584 int t;
00585
00586 fprintf(stdout,"1 Ton: ");
00587 t=*tone;
00588 while (t!=LAST && t!=STOP) {
00589 fprintf(stdout,"[%i] %3.0f ",t, power[t].x);
00590 t = power[t].next;
00591 }
00592 fprintf(stdout,"\n");
00593
00594 fprintf(stdout,"1 Nos: ");
00595 t=*noise;
00596 while (t!=LAST && t!=STOP) {
00597 fprintf(stdout,"[%i] %3.0f ",t, power[t].x);
00598 t = power[t].next;
00599 }
00600 fprintf(stdout,"\n");
00601 }