Home | History | Annotate | Download | only in audio
      1 # Copyright 2016 The Chromium OS Authors. All rights reserved.
      2 # Use of this source code is governed by a BSD-style license that can be
      3 # found in the LICENSE file.
      4 
      5 """This module provides utilities to detect some artifacts and measure the
      6     quality of audio."""
      7 
      8 import logging
      9 import math
     10 import numpy
     11 
     12 # Normal autotest environment.
     13 try:
     14     import common
     15     from autotest_lib.client.cros.audio import audio_analysis
     16 # Standalone execution without autotest environment.
     17 except ImportError:
     18     import audio_analysis
     19 
     20 
     21 # The input signal should be one sine wave with fixed frequency which
     22 # can have silence before and/or after sine wave.
     23 # For example:
     24 #   silence      sine wave      silence
     25 #  -----------|VVVVVVVVVVVVV|-----------
     26 #     (a)           (b)           (c)
     27 # This module detects these artifacts:
     28 #   1. Detect noise in (a) and (c).
     29 #   2. Detect delay in (b).
     30 #   3. Detect burst in (b).
     31 # Assume the transitions between (a)(b) and (b)(c) are smooth and
     32 # amplitude increases/decreases linearly.
     33 # This module will detect artifacts in the sine wave.
     34 # This module also estimates the equivalent noise level by teager operator.
     35 # This module also detects volume changes in the sine wave. However, volume
     36 # changes may be affected by delay or burst.
     37 # Some artifacts may cause each other.
     38 
     39 # In this module, amplitude and frequency are derived from Hilbert transform.
     40 # Both amplitude and frequency are a function of time.
     41 
     42 # To detect each artifact, each point will be compared with
     43 # average amplitude of its block. The block size will be 1.5 ms.
     44 # Using average amplitude can mitigate the error caused by
     45 # Hilbert transform and noise.
     46 # In some case, for more accuracy, the block size may be modified
     47 # to other values.
     48 DEFAULT_BLOCK_SIZE_SECS = 0.0015
     49 
     50 # If the difference between average frequency of this block and
     51 # dominant frequency of full signal is less than 0.5 times of
     52 # dominant frequency, this block is considered to be within the
     53 # sine wave. In most cases, if there is no sine wave(only noise),
     54 # average frequency will be much greater than 5 times of
     55 # dominant frequency.
     56 # Also, for delay during playback, the frequency will be about 0
     57 # in perfect situation or much greater than 5 times of dominant
     58 # frequency if it's noised.
     59 DEFAULT_FREQUENCY_ERROR = 0.5
     60 
     61 # If the amplitude of some sample is less than 0.6 times of the
     62 # average amplitude of its left/right block, it will be considered
     63 # as a delay during playing.
     64 DEFAULT_DELAY_AMPLITUDE_THRESHOLD = 0.6
     65 
     66 # If the average amplitude of the block before or after playing
     67 # is more than 0.5 times to the average amplitude of the wave,
     68 # it will be considered as a noise artifact.
     69 DEFAULT_NOISE_AMPLITUDE_THRESHOLD = 0.5
     70 
     71 # In the sine wave, if the amplitude is more than 1.4 times of
     72 # its left side and its right side, it will be considered as
     73 # a burst.
     74 DEFAULT_BURST_AMPLITUDE_THRESHOLD = 1.4
     75 
     76 # When detecting burst, if the amplitude is lower than 0.5 times
     77 # average amplitude, we ignore it.
     78 DEFAULT_BURST_TOO_SMALL = 0.5
     79 
     80 # For a signal which is the combination of sine wave with fixed frequency f and
     81 # amplitude 1 and standard noise with amplitude k, the average teager value is
     82 # nearly linear to the noise level k.
     83 # Given frequency f, we simulate a sine wave with default noise level and
     84 # calculate its average teager value. Then, we can estimate the equivalent
     85 # noise level of input signal by the average teager value of input signal.
     86 DEFAULT_STANDARD_NOISE = 0.005
     87 
     88 # For delay, burst, volume increasing/decreasing, if two delay(
     89 # burst, volume increasing/decreasing) happen within
     90 # DEFAULT_SAME_EVENT_SECS seconds, we consider they are the
     91 # same event.
     92 DEFAULT_SAME_EVENT_SECS = 0.001
     93 
     94 # When detecting increasing/decreasing volume of signal, if the amplitude
     95 # is lower than 0.1 times average amplitude, we ignore it.
     96 DEFAULT_VOLUME_CHANGE_TOO_SMALL = 0.1
     97 
     98 # If average amplitude of right block is less/more than average
     99 # amplitude of left block times DEFAULT_VOLUME_CHANGE_AMPLITUDE, it will be
    100 # considered as decreasing/increasing on volume.
    101 DEFAULT_VOLUME_CHANGE_AMPLITUDE = 0.1
    102 
    103 # If the increasing/decreasing volume event is too close to the start or the end
    104 # of sine wave, we consider its volume change as part of rising/falling phase in
    105 # the start/end.
    106 NEAR_START_OR_END_SECS = 0.01
    107 
    108 # After applying Hilbert transform, the resulting amplitude and frequency may be
    109 # extremely large in the start and/or the end part. Thus, we will append zeros
    110 # before and after the whole wave for 0.1 secs.
    111 APPEND_ZEROS_SECS = 0.1
    112 
    113 # If the noise event is too close to the start or the end of the data, we
    114 # consider its noise as part of artifacts caused by edge effect of Hilbert
    115 # transform.
    116 # For example, originally, the data duration is 10 seconds.
    117 # We append 0.1 seconds of zeros in the beginning and the end of the data, so
    118 # the data becomes 10.2 seocnds long.
    119 # Then, we apply Hilbert transform to 10.2 seconds of data.
    120 # Near 0.1 seconds and 10.1 seconds, there will be edge effect of Hilbert
    121 # transform. We do not want these be treated as noise.
    122 # If NEAR_DATA_START_OR_END_SECS is set to 0.01, then the noise happened
    123 # at [0, 0.11] and [10.09, 10.1] will be ignored.
    124 NEAR_DATA_START_OR_END_SECS = 0.01
    125 
    126 # If the noise event is too close to the start or the end of the sine wave in
    127 # the data, we consider its noise as part of artifacts caused by edge effect of
    128 # Hilbert transform.
    129 # A |-------------|vvvvvvvvvvvvvvvvvvvvvvv|-------------|
    130 # B |ooooooooo| d |                       | d |ooooooooo|
    131 #
    132 # A is full signal. It contains a sine wave and silence before and after sine
    133 # wave.
    134 # In B, |oooo| shows the parts that we are going to check for noise before/after
    135 # sine wave. | d | is determined by NEAR_SINE_START_OR_END_SECS.
    136 NEAR_SINE_START_OR_END_SECS = 0.01
    137 
    138 
    139 class SineWaveNotFound(Exception):
    140     """Error when there's no sine wave found in the signal"""
    141     pass
    142 
    143 
    144 def hilbert(x):
    145     """Hilbert transform copied from scipy.
    146 
    147     More information can be found here:
    148     http://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.hilbert.html
    149 
    150     @param x: Real signal data to transform.
    151 
    152     @returns: Analytic signal of x, we can further extract amplitude and
    153               frequency from it.
    154 
    155     """
    156     x = numpy.asarray(x)
    157     if numpy.iscomplexobj(x):
    158         raise ValueError("x must be real.")
    159     axis = -1
    160     N = x.shape[axis]
    161     if N <= 0:
    162         raise ValueError("N must be positive.")
    163 
    164     Xf = numpy.fft.fft(x, N, axis=axis)
    165     h = numpy.zeros(N)
    166     if N % 2 == 0:
    167         h[0] = h[N // 2] = 1
    168         h[1:N // 2] = 2
    169     else:
    170         h[0] = 1
    171         h[1:(N + 1) // 2] = 2
    172 
    173     if len(x.shape) > 1:
    174         ind = [newaxis] * x.ndim
    175         ind[axis] = slice(None)
    176         h = h[ind]
    177     x = numpy.fft.ifft(Xf * h, axis=axis)
    178     return x
    179 
    180 
    181 def noised_sine_wave(frequency, rate, noise_level):
    182     """Generates a sine wave of 2 second with specified noise level.
    183 
    184     @param frequency: Frequency of sine wave.
    185     @param rate: Sampling rate.
    186     @param noise_level: Required noise level.
    187 
    188     @returns: A sine wave with specified noise level.
    189 
    190     """
    191     wave = []
    192     for index in xrange(0, rate * 2):
    193         sample = 2.0 * math.pi * frequency * float(index) / float(rate)
    194         sine_wave = math.sin(sample)
    195         noise = noise_level * numpy.random.standard_normal()
    196         wave.append(sine_wave + noise)
    197     return wave
    198 
    199 
    200 def average_teager_value(wave, amplitude):
    201     """Computes the normalized average teager value.
    202 
    203     After averaging the teager value, we will normalize the value by
    204     dividing square of amplitude.
    205 
    206     @param wave: Wave to apply teager operator.
    207     @param amplitude: Average amplitude of given wave.
    208 
    209     @returns: Average teager value.
    210 
    211     """
    212     teager_value, length = 0, len(wave)
    213     for i in range(1, length - 1):
    214         ith_teager_value = abs(wave[i] * wave[i] - wave[i - 1] * wave[i + 1])
    215         ith_teager_value *= max(1, abs(wave[i]))
    216         teager_value += ith_teager_value
    217     teager_value = (float(teager_value) / length) / (amplitude ** 2)
    218     return teager_value
    219 
    220 
    221 def noise_level(amplitude, frequency, rate, teager_value_of_input):
    222     """Computes the noise level compared with standard_noise.
    223 
    224     For a signal which is the combination of sine wave with fixed frequency f
    225     and amplitude 1 and standard noise with amplitude k, the average teager
    226     value is nearly linear to the noise level k.
    227     Thus, we can compute the average teager value of a sine wave with
    228     standard_noise. Then, we can estimate the noise level of given input.
    229 
    230     @param amplitude: Amplitude of input audio.
    231     @param frequency: Dominant frequency of input audio.
    232     @param rate: Sampling rate.
    233     @param teager_value_of_input: Average teager value of input audio.
    234 
    235     @returns: A float value denotes the audio is equivalent to have
    236               how many times of noise compared with its amplitude.
    237               For example, 0.02 denotes that the wave has a noise which
    238               has standard distribution with standard deviation being
    239               0.02 times the amplitude of the wave.
    240 
    241     """
    242     standard_noise = DEFAULT_STANDARD_NOISE
    243 
    244     # Generates the standard sine wave with stdandard_noise level of noise.
    245     standard_wave = noised_sine_wave(frequency, rate, standard_noise)
    246 
    247     # Calculates the average teager value.
    248     teager_value_of_std_wave = average_teager_value(standard_wave, amplitude)
    249 
    250     return (teager_value_of_input / teager_value_of_std_wave) * standard_noise
    251 
    252 
    253 def error(f1, f2):
    254     """Calculates the relative error between f1 and f2.
    255 
    256     @param f1: Exact value.
    257     @param f2: Test value.
    258 
    259     @returns: Relative error between f1 and f2.
    260 
    261     """
    262     return abs(float(f1) - float(f2)) / float(f1)
    263 
    264 
    265 def hilbert_analysis(signal, rate, block_size):
    266     """Finds amplitude and frequency of each time of signal by Hilbert transform.
    267 
    268     @param signal: The wave to analyze.
    269     @param rate: Sampling rate.
    270     @param block_size: The size of block to transform.
    271 
    272     @returns: A tuple of list: (amplitude, frequency) composed of
    273               amplitude and frequency of each time.
    274 
    275     """
    276     # To apply Hilbert transform, the wave will be transformed
    277     # segment by segment. For each segment, its size will be
    278     # block_size and we will only take middle part of it.
    279     # Thus, each segment looks like: |-----|=====|=====|-----|.
    280     # "=...=" part will be taken while "-...-" part will be ignored.
    281     #
    282     # The whole size of taken part will be half of block_size
    283     # which will be hilbert_block.
    284     # The size of each ignored part will be half of hilbert_block
    285     # which will be half_hilbert_block.
    286     hilbert_block = block_size // 2
    287     half_hilbert_block = hilbert_block // 2
    288     # As mentioned above, for each block, we will only take middle
    289     # part of it. Thus, the whole transformation will be completed as:
    290     # |=====|=====|-----|           |-----|=====|=====|-----|
    291     #       |-----|=====|=====|-----|           |-----|=====|=====|
    292     #                   |-----|=====|=====|-----|
    293     # Specially, beginning and ending part may not have ignored part.
    294     length = len(signal)
    295     result = []
    296     for left_border in xrange(0, length, hilbert_block):
    297         right_border = min(length, left_border + hilbert_block)
    298         temp_left_border = max(0, left_border - half_hilbert_block)
    299         temp_right_border = min(length, right_border + half_hilbert_block)
    300         temp = hilbert(signal[temp_left_border:temp_right_border])
    301         for index in xrange(left_border, right_border):
    302             result.append(temp[index - temp_left_border])
    303     result = numpy.asarray(result)
    304     amplitude = numpy.abs(result)
    305     phase = numpy.unwrap(numpy.angle(result))
    306     frequency = numpy.diff(phase) / (2.0 * numpy.pi) * rate
    307     #frequency.append(frequency[len(frequency)-1])
    308     frequecny = numpy.append(frequency, frequency[len(frequency) - 1])
    309     return (amplitude, frequency)
    310 
    311 
    312 def find_block_average_value(arr, side_block_size, block_size):
    313     """For each index, finds average value of its block, left block, right block.
    314 
    315     It will find average value for each index in the range.
    316 
    317     For each index, the range of its block is
    318         [max(0, index - block_size / 2), min(length - 1, index + block_size / 2)]
    319     For each index, the range of its left block is
    320         [max(0, index - size_block_size), index]
    321     For each index, the range of its right block is
    322         [index, min(length - 1, index + side_block_size)]
    323 
    324     @param arr: The array to be computed.
    325     @param side_block_size: the size of the left_block and right_block.
    326     @param block_size: the size of the block.
    327 
    328     @returns: A tuple of lists: (left_block_average_array,
    329                                  right_block_average_array,
    330                                  block_average_array)
    331     """
    332     length = len(arr)
    333     left_border, right_border = 0, 1
    334     left_block_sum = arr[0]
    335     right_block_sum = arr[0]
    336     left_average_array = numpy.zeros(length)
    337     right_average_array = numpy.zeros(length)
    338     block_average_array = numpy.zeros(length)
    339     for index in xrange(0, length):
    340         while left_border < index - side_block_size:
    341             left_block_sum -= arr[left_border]
    342             left_border += 1
    343         while right_border < min(length, index + side_block_size):
    344             right_block_sum += arr[right_border]
    345             right_border += 1
    346 
    347         left_average_value = float(left_block_sum) / (index - left_border + 1)
    348         right_average_value = float(right_block_sum) / (right_border - index)
    349         left_average_array[index] = left_average_value
    350         right_average_array[index] = right_average_value
    351 
    352         if index + 1 < length:
    353             left_block_sum += arr[index + 1]
    354         right_block_sum -= arr[index]
    355     left_border, right_border = 0, 1
    356     block_sum = 0
    357     for index in xrange(0, length):
    358         while left_border < index - block_size / 2:
    359             block_sum -= arr[left_border]
    360             left_border += 1
    361         while right_border < min(length, index + block_size / 2):
    362             block_sum += arr[right_border]
    363             right_border += 1
    364 
    365         average_value = float(block_sum) / (right_border - left_border)
    366         block_average_array[index] = average_value
    367     return (left_average_array, right_average_array, block_average_array)
    368 
    369 
    370 def find_start_end_index(dominant_frequency,
    371                          block_frequency_delta,
    372                          block_size,
    373                          frequency_error_threshold):
    374     """Finds start and end index of sine wave.
    375 
    376     For each block with size of block_size, we check that whether its frequency
    377     is close enough to the dominant_frequency. If yes, we will consider this
    378     block to be within the sine wave.
    379     Then, it will return the start and end index of sine wave indicating that
    380     sine wave is between [start_index, end_index)
    381     It's okay if the whole signal only contains sine wave.
    382 
    383     @param dominant_frequency: Dominant frequency of signal.
    384     @param block_frequency_delta: Average absolute difference between dominant
    385                                   frequency and frequency of each block. For
    386                                   each index, its block is
    387                                   [max(0, index - block_size / 2),
    388                                    min(length - 1, index + block_size / 2)]
    389     @param block_size: Block size in samples.
    390 
    391     @returns: A tuple composed of (start_index, end_index)
    392 
    393     """
    394     length = len(block_frequency_delta)
    395 
    396     # Finds the start/end time index of playing based on dominant frequency
    397     start_index, end_index = length - 1 , 0
    398     for index in xrange(0, length):
    399         left_border = max(0, index - block_size / 2)
    400         right_border = min(length - 1, index + block_size / 2)
    401         frequency_error = block_frequency_delta[index] / dominant_frequency
    402         if frequency_error < frequency_error_threshold:
    403             start_index = min(start_index, left_border)
    404             end_index = max(end_index, right_border + 1)
    405     return (start_index, end_index)
    406 
    407 
    408 def noise_detection(start_index, end_index,
    409                     block_amplitude, average_amplitude,
    410                     rate, noise_amplitude_threshold):
    411     """Detects noise before/after sine wave.
    412 
    413     If average amplitude of some sample's block before start of wave or after
    414     end of wave is more than average_amplitude times noise_amplitude_threshold,
    415     it will be considered as a noise.
    416 
    417     @param start_index: Start index of sine wave.
    418     @param end_index: End index of sine wave.
    419     @param block_amplitude: An array for average amplitude of each block, where
    420                             amplitude is computed from Hilbert transform.
    421     @param average_amplitude: Average amplitude of sine wave.
    422     @param rate: Sampling rate
    423     @param noise_amplitude_threshold: If the average amplitude of a block is
    424                         higher than average amplitude of the wave times
    425                         noise_amplitude_threshold, it will be considered as
    426                         noise before/after playback.
    427 
    428     @returns: A tuple of lists indicating the time that noise happens:
    429             (noise_before_playing, noise_after_playing).
    430 
    431     """
    432     length = len(block_amplitude)
    433     amplitude_threshold = average_amplitude * noise_amplitude_threshold
    434     same_event_samples = rate * DEFAULT_SAME_EVENT_SECS
    435 
    436     # Detects noise before playing.
    437     noise_time_point = []
    438     last_noise_end_time_point = []
    439     previous_noise_index = None
    440     times = 0
    441     for index in xrange(0, length):
    442         # Ignore noise too close to the beginning or the end of sine wave.
    443         # Check the docstring of NEAR_SINE_START_OR_END_SECS.
    444         if ((start_index - rate * NEAR_SINE_START_OR_END_SECS) <= index and
    445             (index < end_index + rate * NEAR_SINE_START_OR_END_SECS)):
    446             continue
    447 
    448         # Ignore noise too close to the beginning or the end of original data.
    449         # Check the docstring of NEAR_DATA_START_OR_END_SECS.
    450         if (float(index) / rate <=
    451             NEAR_DATA_START_OR_END_SECS + APPEND_ZEROS_SECS):
    452             continue
    453         if (float(length - index) / rate <=
    454             NEAR_DATA_START_OR_END_SECS + APPEND_ZEROS_SECS):
    455             continue
    456         if block_amplitude[index] > amplitude_threshold:
    457             same_event = False
    458             if previous_noise_index:
    459                 same_event = (index - previous_noise_index) < same_event_samples
    460             if not same_event:
    461                 index_start_sec = float(index) / rate - APPEND_ZEROS_SECS
    462                 index_end_sec = float(index + 1) / rate - APPEND_ZEROS_SECS
    463                 noise_time_point.append(index_start_sec)
    464                 last_noise_end_time_point.append(index_end_sec)
    465                 times += 1
    466             index_end_sec = float(index + 1) / rate - APPEND_ZEROS_SECS
    467             last_noise_end_time_point[times - 1] = index_end_sec
    468             previous_noise_index = index
    469 
    470     noise_before_playing, noise_after_playing = [], []
    471     for i in xrange(times):
    472         duration = last_noise_end_time_point[i] - noise_time_point[i]
    473         if noise_time_point[i] < float(start_index) / rate - APPEND_ZEROS_SECS:
    474             noise_before_playing.append((noise_time_point[i], duration))
    475         else:
    476             noise_after_playing.append((noise_time_point[i], duration))
    477 
    478     return (noise_before_playing, noise_after_playing)
    479 
    480 
    481 def delay_detection(start_index, end_index,
    482                     block_amplitude, average_amplitude,
    483                     dominant_frequency,
    484                     rate,
    485                     left_block_amplitude,
    486                     right_block_amplitude,
    487                     block_frequency_delta,
    488                     delay_amplitude_threshold,
    489                     frequency_error_threshold):
    490     """Detects delay during playing.
    491 
    492     For each sample, we will check whether the average amplitude of its block
    493     is less than average amplitude of its left block and its right block times
    494     delay_amplitude_threshold. Also, we will check whether the frequency of
    495     its block is far from the dominant frequency.
    496     If at least one constraint fulfilled, it will be considered as a delay.
    497 
    498     @param start_index: Start index of sine wave.
    499     @param end_index: End index of sine wave.
    500     @param block_amplitude: An array for average amplitude of each block, where
    501                             amplitude is computed from Hilbert transform.
    502     @param average_amplitude: Average amplitude of sine wave.
    503     @param dominant_frequency: Dominant frequency of signal.
    504     @param rate: Sampling rate
    505     @param left_block_amplitude: Average amplitude of left block of each index.
    506                                 Ref to find_block_average_value function.
    507     @param right_block_amplitude: Average amplitude of right block of each index.
    508                                 Ref to find_block_average_value function.
    509     @param block_frequency_delta: Average absolute difference frequency to
    510                                 dominant frequency of block of each index.
    511                                 Ref to find_block_average_value function.
    512     @param delay_amplitude_threshold: If the average amplitude of a block is
    513                         lower than average amplitude of the wave times
    514                         delay_amplitude_threshold, it will be considered
    515                         as delay.
    516     @param frequency_error_threshold: Ref to DEFAULT_FREQUENCY_ERROR
    517 
    518     @returns: List of delay occurrence:
    519                 [(time_1, duration_1), (time_2, duration_2), ...],
    520               where time and duration are in seconds.
    521 
    522     """
    523     delay_time_points = []
    524     last_delay_end_time_points = []
    525     previous_delay_index = None
    526     times = 0
    527     same_event_samples = rate * DEFAULT_SAME_EVENT_SECS
    528     start_time = float(start_index) / rate - APPEND_ZEROS_SECS
    529     end_time = float(end_index) / rate - APPEND_ZEROS_SECS
    530     for index in xrange(start_index, end_index):
    531         if block_amplitude[index] > average_amplitude * delay_amplitude_threshold:
    532             continue
    533         now_time = float(index) / rate - APPEND_ZEROS_SECS
    534         if abs(now_time - start_time) < NEAR_START_OR_END_SECS:
    535             continue
    536         if abs(now_time - end_time) < NEAR_START_OR_END_SECS:
    537             continue
    538         # If amplitude less than its left/right side and small enough,
    539         # it will be considered as a delay.
    540         amp_threshold = average_amplitude * delay_amplitude_threshold
    541         left_threshold = delay_amplitude_threshold * left_block_amplitude[index]
    542         amp_threshold = min(amp_threshold, left_threshold)
    543         right_threshold = delay_amplitude_threshold * right_block_amplitude[index]
    544         amp_threshold = min(amp_threshold, right_threshold)
    545 
    546         frequency_error = block_frequency_delta[index] / dominant_frequency
    547 
    548         amplitude_too_small = block_amplitude[index] < amp_threshold
    549         frequency_not_match = frequency_error > frequency_error_threshold
    550 
    551         if amplitude_too_small or frequency_not_match:
    552             same_event = False
    553             if previous_delay_index:
    554                 same_event = (index - previous_delay_index) < same_event_samples
    555             if not same_event:
    556                 index_start_sec = float(index) / rate - APPEND_ZEROS_SECS
    557                 index_end_sec = float(index + 1) / rate - APPEND_ZEROS_SECS
    558                 delay_time_points.append(index_start_sec)
    559                 last_delay_end_time_points.append(index_end_sec)
    560                 times += 1
    561             previous_delay_index = index
    562             index_end_sec = float(index + 1) / rate - APPEND_ZEROS_SECS
    563             last_delay_end_time_points[times - 1] = index_end_sec
    564 
    565     delay_list = []
    566     for i in xrange(len(delay_time_points)):
    567         duration = last_delay_end_time_points[i] - delay_time_points[i]
    568         delay_list.append( (delay_time_points[i], duration) )
    569     return delay_list
    570 
    571 
    572 def burst_detection(start_index, end_index,
    573                     block_amplitude, average_amplitude,
    574                     dominant_frequency,
    575                     rate,
    576                     left_block_amplitude,
    577                     right_block_amplitude,
    578                     block_frequency_delta,
    579                     burst_amplitude_threshold,
    580                     frequency_error_threshold):
    581     """Detects burst during playing.
    582 
    583     For each sample, we will check whether the average amplitude of its block is
    584     more than average amplitude of its left block and its right block times
    585     burst_amplitude_threshold. Also, we will check whether the frequency of
    586     its block is not compatible to the dominant frequency.
    587     If at least one constraint fulfilled, it will be considered as a burst.
    588 
    589     @param start_index: Start index of sine wave.
    590     @param end_index: End index of sine wave.
    591     @param block_amplitude: An array for average amplitude of each block, where
    592                             amplitude is computed from Hilbert transform.
    593     @param average_amplitude: Average amplitude of sine wave.
    594     @param dominant_frequency: Dominant frequency of signal.
    595     @param rate: Sampling rate
    596     @param left_block_amplitude: Average amplitude of left block of each index.
    597                                 Ref to find_block_average_value function.
    598     @param right_block_amplitude: Average amplitude of right block of each index.
    599                                 Ref to find_block_average_value function.
    600     @param block_frequency_delta: Average absolute difference frequency to
    601                                 dominant frequency of block of each index.
    602     @param burst_amplitude_threshold: If the amplitude is higher than average
    603                             amplitude of its left block and its right block
    604                             times burst_amplitude_threshold. It will be
    605                             considered as a burst.
    606     @param frequency_error_threshold: Ref to DEFAULT_FREQUENCY_ERROR
    607 
    608     @returns: List of burst occurence: [time_1, time_2, ...],
    609               where time is in seconds.
    610 
    611     """
    612     burst_time_points = []
    613     previous_burst_index = None
    614     same_event_samples = rate * DEFAULT_SAME_EVENT_SECS
    615     for index in xrange(start_index, end_index):
    616         # If amplitude higher than its left/right side and large enough,
    617         # it will be considered as a burst.
    618         if block_amplitude[index] <= average_amplitude * DEFAULT_BURST_TOO_SMALL:
    619             continue
    620         if abs(index - start_index) < rate * NEAR_START_OR_END_SECS:
    621             continue
    622         if abs(index - end_index) < rate * NEAR_START_OR_END_SECS:
    623             continue
    624         amp_threshold = average_amplitude * DEFAULT_BURST_TOO_SMALL
    625         left_threshold = burst_amplitude_threshold * left_block_amplitude[index]
    626         amp_threshold = max(amp_threshold, left_threshold)
    627         right_threshold = burst_amplitude_threshold * right_block_amplitude[index]
    628         amp_threshold = max(amp_threshold, right_threshold)
    629 
    630         frequency_error = block_frequency_delta[index] / dominant_frequency
    631 
    632         amplitude_too_large = block_amplitude[index] > amp_threshold
    633         frequency_not_match = frequency_error > frequency_error_threshold
    634 
    635         if amplitude_too_large or frequency_not_match:
    636             same_event = False
    637             if previous_burst_index:
    638                 same_event = index - previous_burst_index < same_event_samples
    639             if not same_event:
    640                 burst_time_points.append(float(index) / rate - APPEND_ZEROS_SECS)
    641             previous_burst_index = index
    642 
    643     return burst_time_points
    644 
    645 
    646 def changing_volume_detection(start_index, end_index,
    647                               average_amplitude,
    648                               rate,
    649                               left_block_amplitude,
    650                               right_block_amplitude,
    651                               volume_changing_amplitude_threshold):
    652     """Finds volume changing during playback.
    653 
    654     For each index, we will compare average amplitude of its left block and its
    655     right block. If average amplitude of right block is more than average
    656     amplitude of left block times (1 + DEFAULT_VOLUME_CHANGE_AMPLITUDE), it will
    657     be considered as an increasing volume. If the one of right block is less
    658     than that of left block times (1 - DEFAULT_VOLUME_CHANGE_AMPLITUDE), it will
    659     be considered as a decreasing volume.
    660 
    661     @param start_index: Start index of sine wave.
    662     @param end_index: End index of sine wave.
    663     @param average_amplitude: Average amplitude of sine wave.
    664     @param rate: Sampling rate
    665     @param left_block_amplitude: Average amplitude of left block of each index.
    666                                 Ref to find_block_average_value function.
    667     @param right_block_amplitude: Average amplitude of right block of each index.
    668                                 Ref to find_block_average_value function.
    669     @param volume_changing_amplitude_threshold: If the average amplitude of right
    670                                                 block is higher or lower than
    671                                                 that of left one times this
    672                                                 value, it will be considered as
    673                                                 a volume change.
    674                                                 Also refer to
    675                                                 DEFAULT_VOLUME_CHANGE_AMPLITUDE
    676 
    677     @returns: List of volume changing composed of 1 for increasing and
    678               -1 for decreasing.
    679 
    680     """
    681     length = len(left_block_amplitude)
    682 
    683     # Detects rising and/or falling volume.
    684     previous_rising_index, previous_falling_index = None, None
    685     changing_time = []
    686     changing_events = []
    687     amplitude_threshold = average_amplitude * DEFAULT_VOLUME_CHANGE_TOO_SMALL
    688     same_event_samples = rate * DEFAULT_SAME_EVENT_SECS
    689     for index in xrange(start_index, end_index):
    690         # Skips if amplitude is too small.
    691         if left_block_amplitude[index] < amplitude_threshold:
    692             continue
    693         if right_block_amplitude[index] < amplitude_threshold:
    694             continue
    695         # Skips if changing is from start or end time
    696         if float(abs(start_index - index)) / rate < NEAR_START_OR_END_SECS:
    697             continue
    698         if float(abs(end_index   - index)) / rate < NEAR_START_OR_END_SECS:
    699             continue
    700 
    701         delta_margin = volume_changing_amplitude_threshold
    702         if left_block_amplitude[index] > 0:
    703             delta_margin *= left_block_amplitude[index]
    704 
    705         increasing_threshold = left_block_amplitude[index] + delta_margin
    706         decreasing_threshold = left_block_amplitude[index] - delta_margin
    707 
    708         if right_block_amplitude[index] > increasing_threshold:
    709             same_event = False
    710             if previous_rising_index:
    711                 same_event = index - previous_rising_index < same_event_samples
    712             if not same_event:
    713                 changing_time.append(float(index) / rate - APPEND_ZEROS_SECS)
    714                 changing_events.append(+1)
    715             previous_rising_index = index
    716         if right_block_amplitude[index] < decreasing_threshold:
    717             same_event = False
    718             if previous_falling_index:
    719                 same_event = index - previous_falling_index < same_event_samples
    720             if not same_event:
    721                 changing_time.append(float(index) / rate - APPEND_ZEROS_SECS)
    722                 changing_events.append(-1)
    723             previous_falling_index = index
    724 
    725     # Combines consecutive increasing/decreasing event.
    726     combined_changing_events, prev = [], 0
    727     for i in xrange(len(changing_events)):
    728         if changing_events[i] == prev:
    729             continue
    730         combined_changing_events.append((changing_time[i], changing_events[i]))
    731         prev = changing_events[i]
    732     return combined_changing_events
    733 
    734 
    735 def quality_measurement(
    736         signal, rate,
    737         dominant_frequency=None,
    738         block_size_secs=DEFAULT_BLOCK_SIZE_SECS,
    739         frequency_error_threshold=DEFAULT_FREQUENCY_ERROR,
    740         delay_amplitude_threshold=DEFAULT_DELAY_AMPLITUDE_THRESHOLD,
    741         noise_amplitude_threshold=DEFAULT_NOISE_AMPLITUDE_THRESHOLD,
    742         burst_amplitude_threshold=DEFAULT_BURST_AMPLITUDE_THRESHOLD,
    743         volume_changing_amplitude_threshold=DEFAULT_VOLUME_CHANGE_AMPLITUDE):
    744     """Detects several artifacts and estimates the noise level.
    745 
    746     This method detects artifact before playing, after playing, and delay
    747     during playing. Also, it estimates the noise level of the signal.
    748     To avoid the influence of noise, it calculates amplitude and frequency
    749     block by block.
    750 
    751     @param signal: A list of numbers for one-channel PCM data. The data should
    752                    be normalized to [-1,1].
    753     @param rate: Sampling rate
    754     @param dominant_frequency: Dominant frequency of signal. Set None to
    755                                recalculate the frequency in this function.
    756     @param block_size_secs: Block size in seconds. The measurement will be done
    757                             block-by-block using average amplitude and frequency
    758                             in each block to avoid noise.
    759     @param frequency_error_threshold: Ref to DEFAULT_FREQUENCY_ERROR.
    760     @param delay_amplitude_threshold: If the average amplitude of a block is
    761                                       lower than average amplitude of the wave
    762                                       times delay_amplitude_threshold, it will
    763                                       be considered as delay.
    764                                       Also refer to delay_detection and
    765                                       DEFAULT_DELAY_AMPLITUDE_THRESHOLD.
    766     @param noise_amplitude_threshold: If the average amplitude of a block is
    767                                       higher than average amplitude of the wave
    768                                       times noise_amplitude_threshold, it will
    769                                       be considered as noise before/after
    770                                       playback.
    771                                       Also refer to noise_detection and
    772                                       DEFAULT_NOISE_AMPLITUDE_THRESHOLD.
    773     @param burst_amplitude_threshold: If the average amplitude of a block is
    774                                       higher than average amplitude of its left
    775                                       block and its right block times
    776                                       burst_amplitude_threshold. It will be
    777                                       considered as a burst.
    778                                       Also refer to burst_detection and
    779                                       DEFAULT_BURST_AMPLITUDE_THRESHOLD.
    780     @param volume_changing_amplitude_threshold: If the average amplitude of right
    781                                                 block is higher or lower than
    782                                                 that of left one times this
    783                                                 value, it will be considered as
    784                                                 a volume change.
    785                                                 Also refer to
    786                                                 changing_volume_detection and
    787                                                 DEFAULT_VOLUME_CHANGE_AMPLITUDE
    788 
    789     @returns: A dictoinary of detection/estimation:
    790               {'artifacts':
    791                 {'noise_before_playback':
    792                     [(time_1, duration_1), (time_2, duration_2), ...],
    793                  'noise_after_playback':
    794                     [(time_1, duration_1), (time_2, duration_2), ...],
    795                  'delay_during_playback':
    796                     [(time_1, duration_1), (time_2, duration_2), ...],
    797                  'burst_during_playback':
    798                     [time_1, time_2, ...]
    799                 },
    800                'volume_changes':
    801                  [(time_1, flag_1), (time_2, flag_2), ...],
    802                'equivalent_noise_level': level
    803               }
    804               where durations and time points are in seconds. And,
    805               equivalence_noise_level is the quotient of noise and
    806               wave which refers to DEFAULT_STANDARD_NOISE.
    807               volume_changes is a list of tuples containing time
    808               stamps and decreasing/increasing flags for volume
    809               change events.
    810 
    811     """
    812     # Calculates the block size, from seconds to samples.
    813     block_size = int(block_size_secs * rate)
    814 
    815     signal = numpy.concatenate((numpy.zeros(int(rate * APPEND_ZEROS_SECS)),
    816                                 signal,
    817                                 numpy.zeros(int(rate * APPEND_ZEROS_SECS))))
    818     signal = numpy.array(signal, dtype=float)
    819     length = len(signal)
    820 
    821     # Calculates the amplitude and frequency.
    822     amplitude, frequency = hilbert_analysis(signal, rate, block_size)
    823 
    824     # Finds the dominant frequency.
    825     if not dominant_frequency:
    826         dominant_frequency = audio_analysis.spectral_analysis(signal, rate)[0][0]
    827 
    828     # Finds the array which contains absolute difference between dominant
    829     # frequency and frequency at each time point.
    830     frequency_delta = abs(frequency - dominant_frequency)
    831 
    832     # Computes average amplitude of each type of block
    833     res = find_block_average_value(amplitude, block_size * 2, block_size)
    834     left_block_amplitude, right_block_amplitude, block_amplitude = res
    835 
    836     # Computes average absolute difference of frequency and dominant frequency
    837     # of the block of each index
    838     _, _, block_frequency_delta = find_block_average_value(frequency_delta,
    839                                                            block_size * 2,
    840                                                            block_size)
    841 
    842     # Finds start and end index of sine wave.
    843     start_index, end_index = find_start_end_index(dominant_frequency,
    844                                                   block_frequency_delta,
    845                                                   block_size,
    846                                                   frequency_error_threshold)
    847 
    848     if start_index > end_index:
    849         raise SineWaveNotFound('No sine wave found in signal')
    850 
    851     logging.debug('Found sine wave: start: %s, end: %s',
    852                   float(start_index) / rate - APPEND_ZEROS_SECS,
    853                   float(end_index) / rate - APPEND_ZEROS_SECS)
    854 
    855     sum_of_amplitude = float(sum(amplitude[start_index:end_index]))
    856     # Finds average amplitude of sine wave.
    857     average_amplitude = sum_of_amplitude / (end_index - start_index)
    858 
    859     # Finds noise before and/or after playback.
    860     noise_before_playing, noise_after_playing = noise_detection(
    861             start_index, end_index,
    862             block_amplitude, average_amplitude,
    863             rate,
    864             noise_amplitude_threshold)
    865 
    866     # Finds delay during playback.
    867     delays = delay_detection(start_index, end_index,
    868                              block_amplitude, average_amplitude,
    869                              dominant_frequency,
    870                              rate,
    871                              left_block_amplitude,
    872                              right_block_amplitude,
    873                              block_frequency_delta,
    874                              delay_amplitude_threshold,
    875                              frequency_error_threshold)
    876 
    877     # Finds burst during playback.
    878     burst_time_points = burst_detection(start_index, end_index,
    879                                         block_amplitude, average_amplitude,
    880                                         dominant_frequency,
    881                                         rate,
    882                                         left_block_amplitude,
    883                                         right_block_amplitude,
    884                                         block_frequency_delta,
    885                                         burst_amplitude_threshold,
    886                                         frequency_error_threshold)
    887 
    888     # Finds volume changing during playback.
    889     volume_changes = changing_volume_detection(
    890             start_index, end_index,
    891             average_amplitude,
    892             rate,
    893             left_block_amplitude,
    894             right_block_amplitude,
    895             volume_changing_amplitude_threshold)
    896 
    897     # Calculates the average teager value.
    898     teager_value = average_teager_value(signal[start_index:end_index],
    899                                         average_amplitude)
    900 
    901     # Finds out the noise level.
    902     noise = noise_level(average_amplitude, dominant_frequency,
    903                         rate,
    904                         teager_value)
    905 
    906     return {'artifacts':
    907             {'noise_before_playback': noise_before_playing,
    908              'noise_after_playback': noise_after_playing,
    909              'delay_during_playback': delays,
    910              'burst_during_playback': burst_time_points
    911             },
    912             'volume_changes': volume_changes,
    913             'equivalent_noise_level': noise
    914            }
    915