1 /* 2 * Copyright (C) 2010 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 * use this file except in compliance with the License. You may obtain a copy of 6 * the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 * License for the specific language governing permissions and limitations under 14 * the License. 15 */ 16 17 // An amplitude-normalized spectrum comparison method. 18 19 #include "Fft.h" 20 #include "Window.h" 21 22 #include <stdio.h> 23 #include <stdlib.h> 24 #include <math.h> 25 26 /* Find the endpoints of the signal stored in data such that the rms of 27 the found segment exceeds signalOnRms. data is of length n. The 28 RMS calculations used to find the endpoints use a window of length 29 step and advance step samples. The approximate, conservative 30 endpoint sample indices are returned in start and end. */ 31 static void findEndpoints(short* data, int n, int step, float signalOnRms, 32 int* start, int* end) { 33 int size = step; 34 *start = *end = 0; 35 int last_frame = n - size; 36 for (int frame = 0; frame < last_frame; frame += step) { 37 double sum = 0.0; 38 for (int i=0; i < size; ++i) { 39 float val = data[i + frame]; 40 sum += (val * val); 41 } 42 float rms = sqrt(sum / size); 43 if (! *start) { 44 if (rms >= signalOnRms) { 45 *start = frame + size; 46 } 47 continue; 48 } else { 49 if (rms < signalOnRms) { 50 *end = frame - size; 51 // fprintf(stderr, "n:%d onset:%d offset:%d\n", n, *start, *end); 52 return; 53 } 54 } 55 } 56 // Handle the case where the signal does not drop below threshold 57 // after onset. 58 if ((*start > 0) && (! *end)) { 59 *end = n - size - 1; 60 } 61 } 62 63 // Sum the magnitude squared spectra. 64 static void accumulateMagnitude(float* im, float* re, int size, double* mag) { 65 for (int i = 0; i < size; ++i) { 66 mag[i] += ((re[i] * re[i]) + (im[i] * im[i])); 67 } 68 } 69 70 /* Return a pointer to 1+(fftSize/2) spectrum magnitude values 71 averaged over all of the numSamples in pcm. It is the 72 responsibility of the caller to free this magnitude array. Return 73 NULL on failure. Use 50% overlap on the spectra. An FFT of 74 fftSize points is used to compute the spectra, The overall signal 75 rms over all frequencies between lowestBin and highestBin is 76 returned as a scalar in rms. */ 77 double* getAverageSpectrum(short* pcm, int numSamples, int fftSize, 78 int lowestBin, int highestBin, float* rms) { 79 if (numSamples < fftSize) return NULL; 80 int numFrames = 1 + ((2 * (numSamples - fftSize)) / fftSize); 81 int numMag = 1 + (fftSize / 2); 82 float* re = new float[fftSize]; 83 float* im = new float[fftSize]; 84 double* mag = new double[numMag]; 85 for (int i = 0; i < numMag; ++i) { 86 mag[i] = 0.0; 87 } 88 Window wind(fftSize); 89 Fft ft; 90 int pow2 = ft.fftPow2FromWindowSize(fftSize); 91 ft.fftInit(pow2); 92 int input_p = 0; 93 for (int i = 0; i < numFrames; ++i) { 94 wind.window(pcm + input_p, re, 0.0); 95 for (int j = 0; j < fftSize; ++j) { 96 im[j] = 0.0; 97 } 98 ft.fft(re,im); 99 accumulateMagnitude(im, re, numMag, mag); 100 input_p += fftSize / 2; 101 } 102 double averageEnergy = 0.0; // per frame average energy 103 for (int i = 0; i < numMag; ++i) { 104 double e = mag[i]/numFrames; 105 if ((i >= lowestBin) && (i <= highestBin)) 106 averageEnergy += e; 107 mag[i] = sqrt(e); 108 } 109 *rms = sqrt(averageEnergy / (highestBin - lowestBin + 1)); 110 delete [] re; 111 delete [] im; 112 return mag; 113 } 114 115 /* Compare the average magnitude spectra of the signals in pcm and 116 refPcm, which are of length numSamples and numRefSamples, 117 respectively; both sampled at sample_rate. The maximum deviation 118 between average spectra, expressed in dB, is returned in 119 maxDeviation, and the rms of all dB variations is returned in 120 rmsDeviation. Note that a lower limit is set on the frequencies that 121 are compared so as to ignore irrelevant DC and rumble components. If 122 the measurement fails for some reason, return 0; else return 1, for 123 success. Causes for failure include the amplitude of one or both of 124 the signals being too low, or the duration of the signals being too 125 short. 126 127 Note that the expected signal collection scenario is that the phone 128 would be stimulated with a broadband signal as in a recognition 129 attempt, so that there will be some "silence" regions at the start and 130 end of the pcm signals. The preferred stimulus would be pink noise, 131 but any broadband signal should work. */ 132 int compareSpectra(short* pcm, int numSamples, short* refPcm, 133 int numRefSamples, float sample_rate, 134 float* maxDeviation, float* rmsDeviation) { 135 int fftSize = 512; // must be a power of 2 136 float lowestFreq = 100.0; // don't count DC, room rumble, etc. 137 float highestFreq = 3500.0; // ignore most effects of sloppy anti alias filters. 138 int lowestBin = int(0.5 + (lowestFreq * fftSize / sample_rate)); 139 int highestBin = int(0.5 + (highestFreq * fftSize / sample_rate)); 140 float signalOnRms = 1000.0; // endpointer RMS on/off threshold 141 int endpointStepSize = int(0.5 + (sample_rate * 0.02)); // 20ms setp 142 float rms1 = 0.0; 143 float rms2 = 0.0; 144 int onset = 0; 145 int offset = 0; 146 findEndpoints(refPcm, numRefSamples, endpointStepSize, signalOnRms, 147 &onset, &offset); 148 double* spect1 = getAverageSpectrum(refPcm + onset, offset - onset, 149 fftSize, lowestBin, highestBin, &rms1); 150 findEndpoints(pcm, numSamples, endpointStepSize, signalOnRms, 151 &onset, &offset); 152 double* spect2 = getAverageSpectrum(pcm + onset, offset - onset, 153 fftSize, lowestBin, highestBin, &rms2); 154 int magSize = 1 + (fftSize/2); 155 if ((rms1 <= 0.0) || (rms2 <= 0.0)) 156 return 0; // failure because one or both signals are too short or 157 // too low in amplitude. 158 float rmsNorm = rms2 / rms1; // compensate for overall gain differences 159 // fprintf(stderr, "Level normalization: %f dB\n", 20.0 * log10(rmsNorm)); 160 *maxDeviation = 0.0; 161 float sumDevSquared = 0.0; 162 for (int i = lowestBin; i <= highestBin; ++i) { 163 double val = 1.0; 164 if ((spect1[i] > 0.0) && (spect2[i] > 0.0)) { 165 val = 20.0 * log10(rmsNorm * spect1[i] / spect2[i]); 166 } 167 sumDevSquared += val * val; 168 if (fabs(val) > fabs(*maxDeviation)) { 169 *maxDeviation = val; 170 } 171 // fprintf(stderr, "%d %f\n", i, val); 172 } 173 *rmsDeviation = sqrt(sumDevSquared / (highestBin - lowestBin + 1)); 174 delete [] spect1; 175 delete [] spect2; 176 return 1; // success 177 } 178 179 180