Home | History | Annotate | Download | only in audioquality
      1 /*
      2  * Copyright (C) 2010 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
      5  * use this file except in compliance with the License. You may obtain a copy of
      6  * the License at
      7  *
      8  * http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
     12  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
     13  * License for the specific language governing permissions and limitations under
     14  * the License.
     15  */
     16 
     17 // An amplitude-normalized spectrum comparison method.
     18 
     19 #include "Fft.h"
     20 #include "Window.h"
     21 
     22 #include <stdio.h>
     23 #include <stdlib.h>
     24 #include <math.h>
     25 
     26 /* Find the endpoints of the signal stored in data such that the rms of
     27    the found segment exceeds signalOnRms.  data is of length n.  The
     28    RMS calculations used to find the endpoints use a window of length
     29    step and advance step samples.  The approximate, conservative
     30    endpoint sample indices are returned in start and end. */
     31 static void findEndpoints(short* data, int n, int step, float signalOnRms,
     32                           int* start, int* end) {
     33     int size = step;
     34     *start = *end = 0;
     35     int last_frame = n - size;
     36     for (int frame = 0; frame < last_frame; frame += step) {
     37         double sum = 0.0;
     38         for (int i=0; i < size; ++i) {
     39             float val = data[i + frame];
     40             sum += (val * val);
     41         }
     42         float rms = sqrt(sum / size);
     43         if (! *start) {
     44             if (rms >= signalOnRms) {
     45                 *start = frame + size;
     46             }
     47             continue;
     48         } else {
     49             if (rms < signalOnRms) {
     50                 *end = frame - size;
     51                 // fprintf(stderr, "n:%d onset:%d offset:%d\n", n, *start, *end);
     52                 return;
     53             }
     54         }
     55     }
     56     // Handle the case where the signal does not drop below threshold
     57     // after onset.
     58     if ((*start > 0) && (! *end)) {
     59         *end = n - size - 1;
     60     }
     61 }
     62 
     63 // Sum the magnitude squared spectra.
     64 static void accumulateMagnitude(float* im, float* re, int size, double* mag) {
     65     for (int i = 0; i < size; ++i) {
     66         mag[i] += ((re[i] * re[i]) + (im[i] * im[i]));
     67     }
     68 }
     69 
     70 /* Return a pointer to 1+(fftSize/2) spectrum magnitude values
     71    averaged over all of the numSamples in pcm.  It is the
     72    responsibility of the caller to free this magnitude array.  Return
     73    NULL on failure.  Use 50% overlap on the spectra. An FFT of
     74    fftSize points is used to compute the spectra, The overall signal
     75    rms over all frequencies between lowestBin and highestBin is
     76    returned as a scalar in rms. */
     77 double* getAverageSpectrum(short* pcm, int numSamples, int fftSize,
     78                            int lowestBin, int highestBin, float* rms) {
     79     if (numSamples < fftSize) return NULL;
     80     int numFrames = 1 + ((2 * (numSamples - fftSize)) / fftSize);
     81     int numMag = 1 + (fftSize / 2);
     82     float* re = new float[fftSize];
     83     float* im = new float[fftSize];
     84     double* mag = new double[numMag];
     85     for (int i = 0; i < numMag; ++i) {
     86         mag[i] = 0.0;
     87     }
     88     Window wind(fftSize);
     89     Fft ft;
     90     int pow2 = ft.fftPow2FromWindowSize(fftSize);
     91     ft.fftInit(pow2);
     92     int input_p = 0;
     93     for (int i = 0; i < numFrames; ++i) {
     94         wind.window(pcm + input_p, re, 0.0);
     95         for (int j = 0; j < fftSize; ++j) {
     96             im[j] = 0.0;
     97         }
     98         ft.fft(re,im);
     99         accumulateMagnitude(im, re, numMag, mag);
    100         input_p += fftSize / 2;
    101     }
    102     double averageEnergy = 0.0; // per frame average energy
    103     for (int i = 0; i < numMag; ++i) {
    104         double e = mag[i]/numFrames;
    105         if ((i >= lowestBin) && (i <= highestBin))
    106             averageEnergy += e;
    107         mag[i] = sqrt(e);
    108     }
    109     *rms = sqrt(averageEnergy / (highestBin - lowestBin + 1));
    110     delete [] re;
    111     delete [] im;
    112     return mag;
    113 }
    114 
    115 /* Compare the average magnitude spectra of the signals in pcm and
    116    refPcm, which are of length numSamples and numRefSamples,
    117    respectively; both sampled at sample_rate.  The maximum deviation
    118    between average spectra, expressed in dB, is returned in
    119    maxDeviation, and the rms of all dB variations is returned in
    120    rmsDeviation.  Note that a lower limit is set on the frequencies that
    121    are compared so as to ignore irrelevant DC and rumble components.  If
    122    the measurement fails for some reason, return 0; else return 1, for
    123    success.  Causes for failure include the amplitude of one or both of
    124    the signals being too low, or the duration of the signals being too
    125    short.
    126 
    127    Note that the expected signal collection scenario is that the phone
    128    would be stimulated with a broadband signal as in a recognition
    129    attempt, so that there will be some "silence" regions at the start and
    130    end of the pcm signals.  The preferred stimulus would be pink noise,
    131    but any broadband signal should work. */
    132 int compareSpectra(short* pcm, int numSamples, short* refPcm,
    133                    int numRefSamples, float sample_rate,
    134                    float* maxDeviation, float* rmsDeviation) {
    135     int fftSize = 512;           // must be a power of 2
    136     float lowestFreq = 100.0;    // don't count DC, room rumble, etc.
    137     float highestFreq = 3500.0;  // ignore most effects of sloppy anti alias filters.
    138     int lowestBin = int(0.5 + (lowestFreq * fftSize / sample_rate));
    139     int highestBin = int(0.5 + (highestFreq * fftSize / sample_rate));
    140     float signalOnRms = 1000.0; // endpointer RMS on/off threshold
    141     int endpointStepSize = int(0.5 + (sample_rate * 0.02)); // 20ms setp
    142     float rms1 = 0.0;
    143     float rms2 = 0.0;
    144     int onset = 0;
    145     int offset = 0;
    146     findEndpoints(refPcm, numRefSamples, endpointStepSize, signalOnRms,
    147                    &onset, &offset);
    148     double* spect1 = getAverageSpectrum(refPcm + onset, offset - onset,
    149                                         fftSize, lowestBin, highestBin, &rms1);
    150     findEndpoints(pcm, numSamples, endpointStepSize, signalOnRms,
    151                   &onset, &offset);
    152     double* spect2 = getAverageSpectrum(pcm + onset, offset - onset,
    153                                         fftSize, lowestBin, highestBin, &rms2);
    154     int magSize = 1 + (fftSize/2);
    155     if ((rms1 <= 0.0) || (rms2 <= 0.0))
    156         return 0; // failure because one or both signals are too short or
    157                   // too low in amplitude.
    158     float rmsNorm = rms2 / rms1; // compensate for overall gain differences
    159     // fprintf(stderr, "Level normalization: %f dB\n", 20.0 * log10(rmsNorm));
    160     *maxDeviation = 0.0;
    161     float sumDevSquared = 0.0;
    162     for (int i = lowestBin; i <= highestBin; ++i) {
    163         double val = 1.0;
    164         if ((spect1[i] > 0.0) && (spect2[i] > 0.0)) {
    165             val = 20.0 * log10(rmsNorm * spect1[i] / spect2[i]);
    166         }
    167         sumDevSquared += val * val;
    168         if (fabs(val) > fabs(*maxDeviation)) {
    169             *maxDeviation = val;
    170         }
    171         // fprintf(stderr, "%d %f\n", i, val);
    172     }
    173     *rmsDeviation = sqrt(sumDevSquared / (highestBin - lowestBin + 1));
    174     delete [] spect1;
    175     delete [] spect2;
    176     return 1; // success
    177 }
    178 
    179 
    180