1 /*---------------------------------------------------------------------------* 2 * voicing.c * 3 * * 4 * Copyright 2007, 2008 Nuance Communciations, Inc. * 5 * * 6 * Licensed under the Apache License, Version 2.0 (the 'License'); * 7 * you may not use this file except in compliance with the License. * 8 * * 9 * You may obtain a copy of the License at * 10 * http://www.apache.org/licenses/LICENSE-2.0 * 11 * * 12 * Unless required by applicable law or agreed to in writing, software * 13 * distributed under the License is distributed on an 'AS IS' BASIS, * 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * 15 * See the License for the specific language governing permissions and * 16 * limitations under the License. * 17 * * 18 *---------------------------------------------------------------------------*/ 19 20 21 22 #include <stdlib.h> 23 #include <string.h> 24 #include <math.h> 25 #include <assert.h> 26 27 #include <limits.h> 28 #ifndef _RTT 29 #include <stdio.h> 30 #endif 31 32 #include "all_defs.h" 33 #include "voicing.h" 34 35 #include "portable.h" 36 37 #include "../cfront/sh_down.h" 38 39 #define DEBUG 0 40 41 42 static const char voicing[] = "$Id: voicing.c,v 1.1.10.5 2007/10/15 18:06:24 dahan Exp $"; 43 44 45 void init_voicing_analysis(voicing_info *chan) 46 { 47 chan->count = -1; 48 chan->sil_count = 0; 49 chan->speech_count = 0; 50 chan->fast_count = 0; 51 #if DEBUG 52 log_report("U: 255 255 255 -1 -1 -1 -1\n"); 53 #endif 54 return; 55 } 56 57 long voicing_analysis(voicing_info *chan, voicedata enval , int* log) 58 { 59 long retval; 60 int threshold; 61 62 if (chan->count < 0) 63 { 64 chan->b1 = SHIFT_UP(enval, 8); 65 chan->b0 = SHIFT_UP(enval, 8); 66 chan->count = -1; 67 } 68 69 /* background level 70 */ 71 if (chan->b0 > SHIFT_UP(enval, 8)) 72 { 73 chan->b0 = SHIFT_UP(enval, 8); 74 chan->count = 0; 75 } 76 if (chan->count > B0_HANG2) 77 chan->b0 += B0_RATE2; 78 else if (chan->count > B0_HANG1) 79 chan->b0 += B0_RATE1; 80 81 chan->count++; 82 83 /* the second background level 84 */ 85 if ((enval - chan->quiet_margin) < (chan->b0 >> 8)) 86 chan->b1 += SHIFT_DOWN(B1_RATE * (SHIFT_UP(enval, 8) - chan->b1), 8); 87 88 /* speech level 89 */ 90 if (chan->s0 < SHIFT_UP(enval, 8)) 91 chan->s0 = SHIFT_UP(enval, 8); 92 else 93 chan->s0 -= B0_RATE1; 94 95 /* increase the range by 25% */ 96 threshold = (chan->b1 + (SHIFT_DOWN( 97 MAX(chan->s0 - chan->b0 - DYNAMIC_RANGE, 0), 2))) >> 8; 98 99 /* Is it speech? 100 */ 101 if (enval > (threshold + chan->margin)) 102 chan->speech_count++; 103 else 104 chan->speech_count = 0; 105 106 /* Is it Fast-match speech 107 */ 108 if (enval > (threshold + chan->fast_margin)) 109 chan->fast_count++; 110 else 111 chan->fast_count = 0; 112 113 if (enval <= (threshold + chan->quiet_margin)) 114 chan->sil_count++; 115 else 116 chan->sil_count = 0; 117 118 /******************* 119 * Returning flags * 120 *******************/ 121 122 retval = 0L; 123 124 if (chan->fast_count > chan->voice_duration) 125 retval = FAST_VOICE_BIT; 126 else if (chan->sil_count > chan->quiet_duration) 127 retval = QUIET_BIT; 128 129 if (chan->speech_count > chan->voice_duration) 130 retval |= VOICE_BIT; 131 132 if (chan->sil_count > 0) 133 retval |= BELOW_THRESHOLD_BIT; 134 135 chan->voice_status = retval; 136 #if DEBUG 137 log_report("U: %d %.1f %.1f, %d %d %d %d\n", (int) enval, 138 chan->b0 / 256.0, chan->b1 / 256.0, 139 chan->speech_count, chan->fast_count, 140 chan->sil_count, chan->count); 141 #endif 142 return (retval); 143 } 144