Home | History | Annotate | Download | only in clib
      1 /*---------------------------------------------------------------------------*
      2  *  swicms.c                                                                 *
      3  *                                                                           *
      4  *  Copyright 2007, 2008 Nuance Communciations, Inc.                         *
      5  *                                                                           *
      6  *  Licensed under the Apache License, Version 2.0 (the 'License');          *
      7  *  you may not use this file except in compliance with the License.         *
      8  *                                                                           *
      9  *  You may obtain a copy of the License at                                  *
     10  *      http://www.apache.org/licenses/LICENSE-2.0                           *
     11  *                                                                           *
     12  *  Unless required by applicable law or agreed to in writing, software      *
     13  *  distributed under the License is distributed on an 'AS IS' BASIS,        *
     14  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
     15  *  See the License for the specific language governing permissions and      *
     16  *  limitations under the License.                                           *
     17  *                                                                           *
     18  *---------------------------------------------------------------------------*/
     19 
     20 #include <string.h>
     21 #include"swicms.h"
     22 #include"srec_sizes.h"
     23 #include"prelib.h"
     24 
     25 #include "passert.h"
     26 #include "ESR_Session.h"
     27 #include "ESR_SessionType.h"
     28 #include "IntArrayList.h"
     29 #include "portable.h"
     30 
     31 #define printf_vector(HEAD, FMT, PTR, NN) { int i; LCHAR buffer[256]; sprintf(buffer, HEAD); sprintf(buffer + LSTRLEN(buffer), " %x", (int)PTR); for (i=0; i<(NN); ++i) sprintf(buffer + LSTRLEN(buffer), FMT, PTR[i]); PLogMessage(buffer); }
     32 
     33 /* Cross-utterance CMN calculation:
     34    We try to normalize the speech frames before they get to the recognizer.
     35    The speech frames are LDA-processed mfcc-with-dynamic feature vectors.
     36    We collect these speech frames during recognition. At the end of
     37    recognition we exclude the silence frames from the collected data, and
     38    generate a new channel average based on the previous average and the new
     39    data, using an exponential decay formula.
     40 
     41    In-utterance CMN calculation:
     42    A new short-term average mechanism was introduced, with faster update,
     43    to improve recognition on the very first recognition after init or reset.
     44    We wait for a minimum number of new data frames to apply this. We also
     45    disable the fast updater after some frames, because we assume the
     46    cross-utterance estimator to be more reliable, particularly in its
     47    ability to exclude silence frames from the calculation.
     48 */
     49 
     50 /* default settings for cross-utterance cms */
     51 #define SWICMS_FORGET_FACTOR_DEFAULT        400 /* effective frms of history */
     52 #define SWICMS_SBINDEX_DEFAULT              100 /* use speech frames only */
     53 /* #define SWICMS_CACHE_RESOLUTION_DEFAULT  see swicms.h */
     54 /* #define SWICMS_CACHE_SIZE_DEFAULT        see swicms.h */
     55 
     56 /* default settings for in-utterance cms */
     57 #define SWICMS_INUTT_FORGET_FACTOR2_DISABLE 65535 /* any large number */
     58 #define SWICMS_INUTT_FORGET_FACTOR2_DEFAULT SWICMS_INUTT_FORGET_FACTOR2_DISABLE
     59 /* disable this when cross-utt become more reliable */
     60 #define SWICMS_INUTT_DISABLE_AFTER_FRAMES   200
     61 /* wait while the estimate is poor */
     62 #define SWICMS_INUTT_ENABLE_AFTER_FRAMES    10
     63 
     64 /**
     65  * Logging Stuff
     66  */
     67 #define LOG_LEVEL 2
     68 #define MODULE_NAME L("swicms.c")
     69 //static const char* MTAG = MODULE_NAME;
     70 
     71 static const char *rcsid = 0 ? (const char *) &rcsid :
     72                            "$Id: swicms.c,v 1.21.6.16 2008/06/05 19:00:55 stever Exp $";
     73 
     74 static ESR_BOOL SWICMS_DEBUG = ESR_FALSE;
     75 
     76 /* these are good values from cmn/tmn files */
     77 static const imeldata gswicms_cmn1_8 [MAX_CHAN_DIM] =
     78   {
     79     158, 141,  99, 125, 101, 162, 113, 138, 128, 143, 123, 141,
     80     127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
     81     127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127
     82   };
     83 
     84 static const imeldata gswicms_cmn1_11 [MAX_CHAN_DIM] =
     85   {
     86     163, 121, 120, 114, 124, 139, 144, 108, 150, 119, 146, 124,
     87     127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
     88     127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127
     89   };
     90 
     91 static const imeldata gswicms_tmn1_8 [MAX_CHAN_DIM] =
     92   {
     93     108, 138, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
     94     127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
     95     127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127
     96   };
     97 
     98 static const imeldata gswicms_tmn1_11 [MAX_CHAN_DIM] =
     99   {
    100     108, 138, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
    101     127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
    102     127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127
    103   };
    104 
    105 static ESR_ReturnCode GetSomeIntsIfAny( const LCHAR* parname, imeldata* parvalue, size_t reqSize)
    106 {
    107   size_t i, size;
    108   ESR_ReturnCode rc;
    109   ESR_BOOL exists;
    110   IntArrayList* intList = 0;
    111 
    112   CHKLOG(rc, ESR_SessionContains(parname, &exists));
    113   if (exists) {
    114     rc = ESR_SessionGetProperty(parname, (void**)&intList, TYPES_INTARRAYLIST);
    115     if (rc != ESR_SUCCESS && rc != ESR_NO_MATCH_ERROR) {
    116       /* no match will revert to default data already in static array */
    117       PLogError(L("Error reading %s from session: %s"), parname, ESR_rc2str(rc));
    118       return ESR_FATAL_ERROR;
    119     }
    120     else if (rc == ESR_SUCCESS) {
    121       CHKLOG(rc, IntArrayListGetSize(intList, &size));
    122       if(size != reqSize) {
    123 	PLogError(L("Error reading %s from session, expected len %d: %s"), parname, reqSize, ESR_rc2str(rc));
    124 	return ESR_FATAL_ERROR;
    125       }
    126       if(reqSize == 1)
    127 	CHKLOG(rc, IntArrayListGet(intList, 0, parvalue));
    128       else {
    129 	for (i=0; i<size; ++i)
    130 	  CHKLOG(rc, IntArrayListGet(intList, i, &parvalue[i]));
    131       }
    132     }
    133   }
    134   return ESR_SUCCESS;
    135  CLEANUP:
    136   return rc;
    137 }
    138 
    139 int swicms_init(swicms_norm_info* swicms)
    140 {
    141   ESR_ReturnCode    rc = ESR_SUCCESS;
    142   size_t            i;
    143   ESR_BOOL          exists, sessionExists;
    144   size_t 	    sample_rate;
    145 
    146   /* defaults */
    147   swicms->sbindex          = SWICMS_SBINDEX_DEFAULT;
    148   swicms->cached_num_frames = 0;
    149   swicms->forget_factor    = SWICMS_FORGET_FACTOR_DEFAULT;
    150   swicms->cache_resolution = SWICMS_CACHE_RESOLUTION_DEFAULT;
    151   swicms->num_frames_in_cmn = 0;
    152 
    153   CHKLOG(rc, ESR_SessionExists(&sessionExists));
    154 
    155   if (sessionExists)
    156   {  /* We'll assume this rate is valid or someone else will be complaining.   SteveR */
    157     rc = ESR_SessionGetSize_t ( L ( "CREC.Frontend.samplerate" ), &sample_rate );
    158 
    159     if ( rc != ESR_SUCCESS )
    160       return ( rc );
    161   }
    162   else
    163     sample_rate = 11025;
    164 
    165   /* init the data structures by copying the static data so that we can have a copy if we need to reset */
    166   if ( sample_rate == 8000 )
    167   {
    168     for ( i = 0; i < MAX_CHAN_DIM; i++ )
    169     {
    170       swicms->cmn [i] = gswicms_cmn1_8 [i];
    171       swicms->tmn [i] = gswicms_tmn1_8 [i];
    172 // _lda_*mn below are OK, but are recalculated in swicms_lda_process()
    173       swicms->lda_cmn [i] = 0; /* calculated by swicms_lda_process() */
    174       swicms->lda_tmn [i] = 0; /* calculated by swicms_lda_process() */
    175     }
    176   }
    177   else
    178   {
    179     for ( i = 0; i < MAX_CHAN_DIM; i++ )
    180     {
    181       swicms->cmn [i] = gswicms_cmn1_11 [i];
    182       swicms->tmn [i] = gswicms_tmn1_11 [i];
    183 // _lda_*mn below are OK, but are recalculated in swicms_lda_process()
    184       swicms->lda_cmn [i] = 0; /* calculated by swicms_lda_process() */
    185       swicms->lda_tmn [i] = 0; /* calculated by swicms_lda_process() */
    186     }
    187   }
    188   CHKLOG(rc, ESR_SessionExists(&sessionExists));
    189 
    190   if (sessionExists)
    191   {
    192     const LCHAR* parname = L("CREC.Frontend.swicms.debug");
    193     CHKLOG(rc, ESR_SessionContains(parname, &exists));
    194     if (exists) {
    195       rc = ESR_SessionGetBool(parname, &SWICMS_DEBUG);
    196       if (rc != ESR_SUCCESS && rc != ESR_NO_MATCH_ERROR) {
    197         PLOG_DBG_ERROR((L("Error reading %s from session: %s"), parname, ESR_rc2str(rc)));
    198         return rc;
    199       }
    200     }
    201 
    202     rc = GetSomeIntsIfAny( L("CREC.Frontend.swicms.forget_factor"),
    203 			   &swicms->forget_factor, 1);
    204     if(rc != ESR_SUCCESS) return rc;
    205 
    206     rc = GetSomeIntsIfAny( L("CREC.Frontend.swicms.sbindex"),
    207 			   &swicms->sbindex, 1);
    208     if(rc != ESR_SUCCESS) return rc;
    209 
    210     rc = GetSomeIntsIfAny( L("CREC.Frontend.swicms.cmn"),
    211 			   &swicms->cmn[0], MAX_CHAN_DIM);
    212     if(rc != ESR_SUCCESS) return rc;
    213 
    214     if ( sample_rate == 8000 )
    215     {
    216       rc = GetSomeIntsIfAny( L("CREC.Frontend.swicms.cmn8"), &swicms->cmn[0], MAX_CHAN_DIM);
    217 
    218       if(rc != ESR_SUCCESS)
    219         return rc;
    220     }
    221     else
    222     {
    223       rc = GetSomeIntsIfAny( L("CREC.Frontend.swicms.cmn11"), &swicms->cmn[0], MAX_CHAN_DIM);
    224 
    225       if(rc != ESR_SUCCESS)
    226         return rc;
    227     }
    228 
    229     rc = GetSomeIntsIfAny( L("CREC.Frontend.swicms.tmn"),
    230 			   &swicms->tmn[0], MAX_CHAN_DIM);
    231     if(rc != ESR_SUCCESS) return rc;
    232   }
    233 
    234   swicms->is_valid = 0;
    235   for (i = 0; i < MAX_CHAN_DIM; i++)
    236     swicms->adjust[i] = 255;
    237 
    238 #ifdef SREC_ENGINE_VERBOSE_LOGGING
    239   PLogMessage("swicms->forget_factor    = %d\n", swicms->forget_factor);
    240   PLogMessage("swicms->cache_resolution = %d\n", swicms->cache_resolution);
    241   PLogMessage("swicms->sbindex          = %d\n", swicms->sbindex);
    242 #endif
    243 
    244   /* in-utt cms parameters */
    245   swicms->inutt.forget_factor2 = SWICMS_INUTT_FORGET_FACTOR2_DEFAULT;
    246   swicms->inutt.disable_after  = 200;
    247   swicms->inutt.enable_after   = 10;    /* in-utt is less reliable       */
    248   swicms->inutt.num_bou_frames_to_skip = 20; /* silence frames! see windback */
    249   swicms->inutt.num_frames_since_bou = 0;
    250   swicms->inutt.num_frames_in_accum = 0;
    251   for(i=0; i<MAX_CHAN_DIM; i++) swicms->inutt.accum[i] = 0;
    252 
    253   if (sessionExists) {
    254     rc = GetSomeIntsIfAny(L("CREC.Frontend.swicms.inutt.forget_factor2"),
    255 			  &swicms->inutt.forget_factor2, 1);
    256     if(rc != ESR_SUCCESS) return rc;
    257 
    258     rc = GetSomeIntsIfAny(L("CREC.Frontend.swicms.inutt.disable_after"),
    259 			  &swicms->inutt.disable_after, 1);
    260     if(rc != ESR_SUCCESS) return rc;
    261 
    262     rc = GetSomeIntsIfAny(L("CREC.Frontend.swicms.inutt.enable_after"),
    263 			  &swicms->inutt.enable_after, 1);
    264     if(rc != ESR_SUCCESS) return rc;
    265 
    266     /* we need to estimate the in-utt cmn from speech frames only! so let's
    267        make sure to skip some frames before collecting data, */
    268     ESR_SessionContains(L("CREC.Frontend.start_windback"), &exists);
    269     if (exists) {
    270       ESR_BOOL do_skip_even_frames = ESR_TRUE;
    271       ESR_SessionGetBool(L("CREC.Frontend.do_skip_even_frames"), &do_skip_even_frames);
    272       ESR_SessionGetInt(L("CREC.Frontend.start_windback"), &swicms->inutt.num_bou_frames_to_skip);
    273       if( do_skip_even_frames)
    274 	swicms->inutt.num_bou_frames_to_skip /= 2;
    275       swicms->inutt.num_bou_frames_to_skip -= 5; /* ensure spch frames only */
    276     }
    277   }
    278 
    279   return 0;
    280  CLEANUP:
    281   return rc;
    282 }
    283 
    284 
    285 ESR_ReturnCode swicms_get_cmn ( swicms_norm_info* swicms, LCHAR *cmn_params, size_t* len )
    286 {
    287   int dim_count;
    288   int i;
    289   imeldata temp[MAX_CHAN_DIM];
    290   const size_t INT_LENGTH = 12;
    291 
    292   if (  swicms->_prep != NULL )	/* lda exists give them transformed lda. */
    293   {
    294     for ( dim_count = 0; dim_count < MAX_CHAN_DIM; dim_count++ )
    295       temp [dim_count] = swicms->lda_cmn [dim_count];
    296     inverse_transform_frame( swicms->_prep, temp, 1 /*do_shift*/);
    297   }
    298   else	/* lda does not exist give them raw cmn values */
    299   {
    300     for ( dim_count = 0; dim_count < MAX_CHAN_DIM; dim_count++ )
    301       temp [dim_count] = swicms->cmn [dim_count];
    302   }
    303 
    304   for ( dim_count = 0, i = 0; dim_count < MAX_CHAN_DIM; dim_count++ )
    305   {
    306     i += sprintf( cmn_params + i, dim_count==0 ? "%d" : ",%d", temp [dim_count] );
    307     if (i + INT_LENGTH >= *len) {
    308         *len = MAX_CHAN_DIM * (INT_LENGTH + 2) * sizeof(LCHAR);
    309         return ESR_BUFFER_OVERFLOW;
    310     }
    311   }
    312 
    313   return ESR_SUCCESS;
    314 }
    315 
    316 
    317 ESR_ReturnCode swicms_set_cmn ( swicms_norm_info* swicms, const char *cmn_params )
    318 {
    319   ESR_ReturnCode    set_status;
    320   int               length_of_params;
    321   int               dim_count;
    322   int               got_word;
    323   int               current_position;
    324   char              *copy_of_params;
    325   char              *parsed_strings [MAX_CHAN_DIM];
    326   int               temp_cmn [MAX_CHAN_DIM];
    327 
    328   length_of_params = strlen ( cmn_params ) + 1;
    329   copy_of_params = (char*)MALLOC ( length_of_params, NULL );
    330 
    331   if ( copy_of_params != NULL )
    332   {
    333     set_status = ESR_SUCCESS;
    334     memcpy ( copy_of_params, cmn_params, length_of_params );
    335     dim_count = 0;
    336     current_position = 0;
    337     got_word = 0;
    338     parsed_strings [dim_count] = copy_of_params + current_position;
    339 
    340     while ( ( dim_count < MAX_CHAN_DIM ) && ( set_status == ESR_SUCCESS ) )
    341     {
    342       switch ( *( copy_of_params + current_position ) )
    343       {
    344         case '\0':
    345           if ( got_word == 1 )
    346           {
    347             if ( dim_count == ( MAX_CHAN_DIM - 1 ) )
    348               dim_count++;
    349             else
    350             {
    351               PLogError ( "Channel Normalization : Missing Params Must Contain %d Params\n", MAX_CHAN_DIM );
    352               set_status = ESR_INVALID_ARGUMENT;
    353             }
    354           }
    355           else
    356           {
    357             PLogError ( "Channel Normalization : Missing Params Mus Contain %d Params\n", MAX_CHAN_DIM );
    358             set_status = ESR_INVALID_ARGUMENT;
    359           }
    360           break;
    361 
    362         case ',':
    363           if ( got_word == 1 )
    364           {
    365             if ( dim_count < ( MAX_CHAN_DIM - 1 ) )
    366             {
    367               dim_count++;
    368               *( copy_of_params + current_position) = '\0';
    369               current_position++;
    370 
    371               if ( current_position == length_of_params )
    372               {
    373                 PLogError ( "Channel Normalization : Delimiter At End Of Param String\n" );
    374                 set_status = ESR_INVALID_ARGUMENT;
    375               }
    376               parsed_strings [dim_count] = copy_of_params + current_position;
    377               got_word = 0;
    378             }
    379             else
    380             {
    381               PLogError ( "Channel Normalization : Too Many Params Must Contain %d Params\n", MAX_CHAN_DIM );
    382               set_status = ESR_INVALID_ARGUMENT;
    383             }
    384           }
    385           else
    386           {
    387             PLogError ( "Channel Normalization : Too Many Params Must Contain %d Params\n", MAX_CHAN_DIM );
    388             set_status = ESR_INVALID_ARGUMENT;
    389           }
    390           break;
    391 
    392         case '0':
    393         case '1':
    394         case '2':
    395         case '3':
    396         case '4':
    397         case '5':
    398         case '6':
    399         case '7':
    400         case '8':
    401         case '9':
    402           got_word = 1;
    403           current_position++;
    404 
    405           if ( current_position == length_of_params )
    406           {
    407             PLogError ( "Channel Normalization : Too Many Params Must Contain %d Params\n", MAX_CHAN_DIM );
    408             set_status = ESR_INVALID_ARGUMENT;
    409           }
    410           break;
    411 
    412         default:
    413           PLogError ( "Channel Normalization : Invalid Param : %c : Params Must Contain Only Digits\n" );
    414           set_status = ESR_INVALID_ARGUMENT;
    415           break;
    416       }
    417     }
    418     if ( set_status == ESR_SUCCESS )
    419     {
    420       dim_count = 0;
    421 
    422       while ( ( dim_count < MAX_CHAN_DIM ) && (  set_status == ESR_SUCCESS ) )
    423       {
    424         temp_cmn [dim_count] = atoi ( parsed_strings [dim_count] );
    425 
    426         if ( ( temp_cmn [dim_count] < 0 ) || ( temp_cmn [dim_count] > 255 ) )
    427         {
    428           set_status = ESR_INVALID_ARGUMENT;
    429         }
    430       }
    431       if ( set_status == ESR_SUCCESS )
    432       {
    433         for ( dim_count = 0; dim_count < MAX_CHAN_DIM; dim_count++ )
    434           swicms->cmn [dim_count] = temp_cmn [dim_count];
    435         if ( swicms->_prep != NULL )	/* Set now if NULL it will automatically be set on first utterance */
    436           linear_transform_frame(swicms->_prep, swicms->lda_cmn, 1 /*do_shift*/);
    437       }
    438     }
    439     FREE ( copy_of_params );
    440   }
    441   else
    442   {
    443     PLogError ( "Channel Normalization Out Of Memory Error\n" );
    444     set_status = ESR_OUT_OF_MEMORY;
    445   }
    446   swicms->num_frames_in_cmn = 0;
    447   return ( set_status );
    448 }
    449 
    450 
    451 int swicms_cache_frame(swicms_norm_info* swicms, imeldata* frame, int dimen)
    452 {
    453   int i;
    454   imeldata *pcache, *pframe;
    455 
    456   ASSERT(dimen == MAX_CHAN_DIM);
    457   i = swicms->cached_num_frames / swicms->cache_resolution;
    458   if (i < SWICMS_CACHE_SIZE_DEFAULT)
    459   {
    460     pcache = swicms->cached_sections[ i];
    461     if (swicms->cached_num_frames % swicms->cache_resolution == 0)
    462     {
    463       for (i = 0; i < MAX_CHAN_DIM; i++) *pcache++ = 0;
    464       pcache -= MAX_CHAN_DIM;
    465     }
    466     pframe = frame;
    467     for (i = 0; i < MAX_CHAN_DIM; i++) *pcache++ += *pframe++;
    468     swicms->cached_num_frames++;
    469   }
    470 
    471   return 0;
    472 }
    473 
    474 int apply_channel_normalization_in_swicms(swicms_norm_info *swicms,
    475     imeldata* oframe,
    476     imeldata* iframe, int dimen)
    477 {
    478   int ii;
    479   ASSERT(dimen == MAX_CHAN_DIM);
    480 
    481   /* IF inutt is activated at all */
    482   if(swicms->inutt.forget_factor2 != SWICMS_INUTT_FORGET_FACTOR2_DISABLE) {
    483     /* AND IF we have not disabled it (due to x-utt more reliable) */
    484     if(swicms->inutt.num_frames_in_accum < swicms->inutt.disable_after) {
    485       /* AND IF we have skipped past the silence frames */
    486       if( swicms->inutt.num_frames_since_bou >= swicms->inutt.num_bou_frames_to_skip){
    487 	swicms->inutt.num_frames_in_accum++;
    488 	for(ii=0;ii<dimen;ii++) swicms->inutt.accum[ii] += iframe[ii];
    489 	/* AND IF we've already seen at least 10 frames (presumably) of speech */
    490 	if(swicms->inutt.num_frames_in_accum>swicms->inutt.enable_after) {
    491 	  /* THEN we update the adjustment in-line with the current utterance! */
    492 	  for(ii=0;ii<dimen;ii++) {
    493 	    imeldata denom = ( swicms->inutt.forget_factor2
    494 			       + swicms->inutt.num_frames_in_accum );
    495 	    /* tmp: weighted average of the old lda_cmn and the new accum */
    496 	    imeldata tmp=(swicms->lda_cmn[ii]*swicms->inutt.forget_factor2
    497 			  + swicms->inutt.accum[ii] + denom/2) / denom;
    498 	    swicms->adjust[ii] = swicms->lda_tmn[ii] - tmp;
    499 	  }
    500 	  //printf_vector("swicms->adjust2 "," %d",swicms->adjust, dimen);
    501 	}
    502       }
    503     }
    504     swicms->inutt.num_frames_since_bou++;
    505   }
    506 
    507   for (ii = 0; ii < dimen; ii++)
    508     oframe[ii] = MAKEBYTE(iframe[ii] + swicms->adjust[ii]);
    509   return 0;
    510 }
    511 
    512 int swicms_update(swicms_norm_info* swicms, int speech_start, int speech_end)
    513 {
    514   int i, j;
    515   asr_int32_t speech_avg[MAX_CHAN_DIM], backgr_avg[MAX_CHAN_DIM], avg[MAX_CHAN_DIM];
    516   int ff;
    517   int nn, speech_nn, backgr_nn;
    518   int num_frames = swicms->cached_num_frames;
    519   int cache_start, cache_end, backgr_cache_end;
    520   int sbindex = swicms->sbindex;
    521 
    522   /* init for utterance */
    523   swicms->inutt.num_frames_since_bou = 0;
    524 
    525   swicms->cached_num_frames = 0;
    526   cache_start = speech_start;
    527   cache_start -= (cache_start % swicms->cache_resolution);
    528   cache_start /= swicms->cache_resolution;
    529 
    530   if (speech_end == MAXframeID)
    531   {
    532     cache_end = SWICMS_CACHE_SIZE_DEFAULT;
    533   }
    534   else
    535   {
    536     if (speech_end < num_frames)
    537       cache_end = speech_end;
    538     else
    539       cache_end = num_frames;
    540     cache_end -= (cache_end % swicms->cache_resolution);
    541     cache_end /= swicms->cache_resolution;
    542   }
    543 
    544   if (num_frames == 0 || speech_end == 0 || speech_start == speech_end || speech_end == MAXframeID)
    545   {
    546     if (speech_end != 0 || speech_start != 0)
    547       PLogError("Warning: speech_bounds (%d,%d) swicms->cached_num_frames (%d)\n",
    548                 speech_start, speech_end, num_frames);
    549 	if (SWICMS_DEBUG) {
    550       //printf_vector("swicms->adjust.rep", " %d", swicms->adjust, MAX_CHAN_DIM);
    551     }
    552     return 1;
    553   }
    554 
    555   backgr_cache_end = (num_frames - num_frames % swicms->cache_resolution) / swicms->cache_resolution;
    556 
    557   speech_nn = (cache_end - cache_start) * swicms->cache_resolution;
    558   backgr_nn = backgr_cache_end * swicms->cache_resolution - speech_nn;
    559 
    560   for (i = 0; i < MAX_CHAN_DIM; i++)
    561   {
    562     speech_avg[i] = 0;
    563     backgr_avg[i] = 0;
    564     for (j = cache_start; j < cache_end; j++)
    565       speech_avg[i] += swicms->cached_sections[j][i];
    566     for (j = 0; j < cache_start; j++)
    567       backgr_avg[i] += swicms->cached_sections[j][i];
    568     for (j = cache_end; j < backgr_cache_end; j++)
    569       backgr_avg[i] += swicms->cached_sections[j][i];
    570     if (speech_nn == 0 && backgr_nn > 0)
    571     {
    572       backgr_avg[i] /= backgr_nn;
    573       speech_avg[i] = backgr_avg[i];
    574       speech_nn = backgr_nn;
    575     }
    576     else if (speech_nn > 0 && backgr_nn == 0)
    577     {
    578       speech_avg[i] /= speech_nn;
    579       backgr_avg[i] = speech_avg[i];
    580       backgr_nn = speech_nn;
    581     }
    582     else if (speech_nn > 0 && backgr_nn > 0)
    583     {
    584       speech_avg[i] /= speech_nn;
    585       backgr_avg[i] /= backgr_nn;
    586     }
    587     else
    588     {
    589       return 0;
    590     }
    591 
    592     avg[i] = (sbindex * speech_avg[i] + (100 - sbindex) * backgr_avg[i] + 50) / 100;
    593   }
    594   nn = (sbindex * speech_nn + (100 - sbindex) * backgr_nn + 50) / 100;
    595 
    596   for (i = 0, ff = 0; i < MAX_CHAN_DIM; i++)
    597   {
    598     ff += (swicms->lda_tmn[i] - avg[i]);
    599   }
    600   ff /= MAX_CHAN_DIM; /* sum is now the average offset from TMN */
    601   if (ff > 5)
    602   {
    603     PLogError("Warning: bad utt mean during swicms_update() (moffs=%d)\n", ff);
    604     //printf_vector("swicms->adjust.rep", " %d", swicms->adjust, MAX_CHAN_DIM);
    605     return 1;
    606   }
    607   ff = swicms->forget_factor;
    608   if (ff < 9999)
    609   {
    610     for (i = 0; i < MAX_CHAN_DIM; i++)
    611     {
    612       swicms->lda_cmn[i] = (swicms->lda_cmn[i] * ff + avg[i] * nn + (ff + nn) / 2)  / (ff + nn);
    613       swicms->adjust[i] = swicms->lda_tmn[i] - swicms->lda_cmn[i];
    614     }
    615   }
    616 
    617   if (SWICMS_DEBUG)
    618     {
    619       imeldata temp[MAX_CHAN_DIM];
    620       PLogMessage("swicms_update() used %d frames (%d-%d)", nn, speech_start, speech_end);
    621 
    622       for(i=0;i<MAX_CHAN_DIM;i++) temp[i]=swicms->lda_cmn[i];
    623       inverse_transform_frame( swicms->_prep, temp, 1 /*do_shift*/);
    624       /* use this dump, to put back into CREC.Frontend.swicms.cmn */
    625       printf_vector("swicms.cmn(r)  ", " %d", temp, MAX_CHAN_DIM);
    626 
    627       //printf_vector("swicms.lda_cmn   ", " %d", &swicms.lda_cmn [0], MAX_CHAN_DIM);
    628       //printf_vector("swicms.lda_tmn   ", " %d", &swicms.lda_tmn [0], MAX_CHAN_DIM);
    629       //printf_vector("swicms->adjust", " %d", swicms->adjust, MAX_CHAN_DIM);
    630       //printf_vector("avg.speech    ", " %d", avg, MAX_CHAN_DIM);
    631     }
    632   else
    633     {
    634 #ifndef NDEBUG
    635       //printf_vector("swicms->adjust", " %d", swicms->adjust, MAX_CHAN_DIM);
    636 #endif
    637     }
    638   swicms->num_frames_in_cmn += nn;
    639   return 0;
    640 }
    641 
    642 int swicms_lda_process(swicms_norm_info* swicms, preprocessed* prep)
    643 {
    644   int i;
    645 
    646   for (i = 0; i < MAX_CHAN_DIM; i++) swicms->lda_tmn[i] = swicms->tmn[i];
    647   for (i = 0; i < MAX_CHAN_DIM; i++) swicms->lda_cmn[i] = swicms->cmn[i];
    648   linear_transform_frame(prep, swicms->lda_tmn, 1 /*do_shift*/);
    649   linear_transform_frame(prep, swicms->lda_cmn, 1 /*do_shift*/);
    650 
    651   for (i = 0; i < MAX_CHAN_DIM; i++)
    652   {
    653     swicms->adjust[i] = swicms->lda_tmn[i] - swicms->lda_cmn[i];
    654   }
    655 
    656 #ifndef NDEBUG
    657   //printf_vector("swicms->adjust", " %d", swicms->adjust, MAX_CHAN_DIM);
    658 #endif
    659   swicms->is_valid = 1;
    660   swicms->_prep = prep;
    661 
    662   if(SWICMS_DEBUG) {
    663     imeldata temp[MAX_CHAN_DIM];
    664     printf_vector("swicms->cmn     ", " %d", swicms->cmn,     MAX_CHAN_DIM);
    665     printf_vector("swicms->lda_cmn ", " %d", swicms->lda_cmn, MAX_CHAN_DIM);
    666     //printf_vector("swicms->tmn     ", " %d", swicms->tmn,     MAX_CHAN_DIM);
    667     //printf_vector("swicms->lda_tmn ", " %d", swicms->lda_tmn, MAX_CHAN_DIM);
    668     //printf_vector("swicms->adjust  ", " %d", swicms->adjust,  MAX_CHAN_DIM);
    669 
    670     //for(i=0;i<MAX_CHAN_DIM;i++) temp[i]=swicms->lda_tmn[i];
    671     //inverse_transform_frame( swicms->_prep, temp, 1 /*do_shift*/);
    672     //printf_vector("swicms->tmn(r)  ", " %d", temp, MAX_CHAN_DIM);
    673 
    674     for(i=0;i<MAX_CHAN_DIM;i++) temp[i]=swicms->lda_cmn[i];
    675     inverse_transform_frame( swicms->_prep, temp, 1 /*do_shift*/);
    676     printf_vector("swicms->cmn(r)  ", " %d", temp, MAX_CHAN_DIM);
    677   }
    678   return 0;
    679 }
    680 
    681 
    682 
    683