Home | History | Annotate | Download | only in clib
      1 /*---------------------------------------------------------------------------*
      2  *  swicms.c                                                                 *
      3  *                                                                           *
      4  *  Copyright 2007, 2008 Nuance Communciations, Inc.                         *
      5  *                                                                           *
      6  *  Licensed under the Apache License, Version 2.0 (the 'License');          *
      7  *  you may not use this file except in compliance with the License.         *
      8  *                                                                           *
      9  *  You may obtain a copy of the License at                                  *
     10  *      http://www.apache.org/licenses/LICENSE-2.0                           *
     11  *                                                                           *
     12  *  Unless required by applicable law or agreed to in writing, software      *
     13  *  distributed under the License is distributed on an 'AS IS' BASIS,        *
     14  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
     15  *  See the License for the specific language governing permissions and      *
     16  *  limitations under the License.                                           *
     17  *                                                                           *
     18  *---------------------------------------------------------------------------*/
     19 
     20 #include <string.h>
     21 #include"swicms.h"
     22 #include"srec_sizes.h"
     23 #include"prelib.h"
     24 
     25 #include "passert.h"
     26 #include "ESR_Session.h"
     27 #include "ESR_SessionType.h"
     28 #include "IntArrayList.h"
     29 #include "portable.h"
     30 
     31 #define printf_vector(HEAD, FMT, PTR, NN) { int i; LCHAR buffer[256]; sprintf(buffer, HEAD); sprintf(buffer + LSTRLEN(buffer), " %p", (void *)PTR); for (i=0; i<(NN); ++i) sprintf(buffer + LSTRLEN(buffer), FMT, PTR[i]); PLogMessage(buffer); }
     32 
     33 /* Cross-utterance CMN calculation:
     34    We try to normalize the speech frames before they get to the recognizer.
     35    The speech frames are LDA-processed mfcc-with-dynamic feature vectors.
     36    We collect these speech frames during recognition. At the end of
     37    recognition we exclude the silence frames from the collected data, and
     38    generate a new channel average based on the previous average and the new
     39    data, using an exponential decay formula.
     40 
     41    In-utterance CMN calculation:
     42    A new short-term average mechanism was introduced, with faster update,
     43    to improve recognition on the very first recognition after init or reset.
     44    We wait for a minimum number of new data frames to apply this. We also
     45    disable the fast updater after some frames, because we assume the
     46    cross-utterance estimator to be more reliable, particularly in its
     47    ability to exclude silence frames from the calculation.
     48 */
     49 
     50 /* default settings for cross-utterance cms */
     51 #define SWICMS_FORGET_FACTOR_DEFAULT        400 /* effective frms of history */
     52 #define SWICMS_SBINDEX_DEFAULT              100 /* use speech frames only */
     53 /* #define SWICMS_CACHE_RESOLUTION_DEFAULT  see swicms.h */
     54 /* #define SWICMS_CACHE_SIZE_DEFAULT        see swicms.h */
     55 
     56 /* default settings for in-utterance cms */
     57 #define SWICMS_INUTT_FORGET_FACTOR2_DISABLE 65535 /* any large number */
     58 #define SWICMS_INUTT_FORGET_FACTOR2_DEFAULT SWICMS_INUTT_FORGET_FACTOR2_DISABLE
     59 /* disable this when cross-utt become more reliable */
     60 #define SWICMS_INUTT_DISABLE_AFTER_FRAMES   200
     61 /* wait while the estimate is poor */
     62 #define SWICMS_INUTT_ENABLE_AFTER_FRAMES    10
     63 
     64 /**
     65  * Logging Stuff
     66  */
     67 #define LOG_LEVEL 2
     68 #define MODULE_NAME L("swicms.c")
     69 //static const char* MTAG = MODULE_NAME;
     70 
     71 static const char *rcsid = 0 ? (const char *) &rcsid :
     72                            "$Id: swicms.c,v 1.21.6.16 2008/06/05 19:00:55 stever Exp $";
     73 
     74 static ESR_BOOL SWICMS_DEBUG = ESR_FALSE;
     75 
     76 /* these are good values from cmn/tmn files */
     77 static const imeldata gswicms_cmn1_8 [MAX_CHAN_DIM] =
     78   {
     79     158, 141,  99, 125, 101, 162, 113, 138, 128, 143, 123, 141,
     80     127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
     81     127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127
     82   };
     83 
     84 static const imeldata gswicms_cmn1_11 [MAX_CHAN_DIM] =
     85   {
     86     163, 121, 120, 114, 124, 139, 144, 108, 150, 119, 146, 124,
     87     127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
     88     127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127
     89   };
     90 
     91 static const imeldata gswicms_tmn1_8 [MAX_CHAN_DIM] =
     92   {
     93     108, 138, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
     94     127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
     95     127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127
     96   };
     97 
     98 static const imeldata gswicms_tmn1_11 [MAX_CHAN_DIM] =
     99   {
    100     108, 138, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
    101     127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
    102     127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127
    103   };
    104 
    105 static ESR_ReturnCode GetSomeIntsIfAny( const LCHAR* parname, imeldata* parvalue, size_t reqSize)
    106 {
    107   size_t i, size;
    108   ESR_ReturnCode rc;
    109   ESR_BOOL exists;
    110   IntArrayList* intList = 0;
    111 
    112   CHKLOG(rc, ESR_SessionContains(parname, &exists));
    113   if (exists) {
    114     rc = ESR_SessionGetProperty(parname, (void**)&intList, TYPES_INTARRAYLIST);
    115     if (rc != ESR_SUCCESS && rc != ESR_NO_MATCH_ERROR) {
    116       /* no match will revert to default data already in static array */
    117       PLogError(L("Error reading %s from session: %s"), parname, ESR_rc2str(rc));
    118       return ESR_FATAL_ERROR;
    119     }
    120     else if (rc == ESR_SUCCESS) {
    121       CHKLOG(rc, IntArrayListGetSize(intList, &size));
    122       if(size != reqSize) {
    123 	PLogError(L("Error reading %s from session, expected len %d: %s"), parname, reqSize, ESR_rc2str(rc));
    124 	return ESR_FATAL_ERROR;
    125       }
    126       if(reqSize == 1)
    127 	CHKLOG(rc, IntArrayListGet(intList, 0, parvalue));
    128       else {
    129 	for (i=0; i<size; ++i)
    130 	  CHKLOG(rc, IntArrayListGet(intList, i, &parvalue[i]));
    131       }
    132     }
    133   }
    134   return ESR_SUCCESS;
    135  CLEANUP:
    136   return rc;
    137 }
    138 
    139 int swicms_init(swicms_norm_info* swicms)
    140 {
    141   ESR_ReturnCode    rc = ESR_SUCCESS;
    142   size_t            i;
    143   ESR_BOOL          exists, sessionExists;
    144   size_t 	    sample_rate;
    145 
    146   /* defaults */
    147   swicms->sbindex          = SWICMS_SBINDEX_DEFAULT;
    148   swicms->cached_num_frames = 0;
    149   swicms->forget_factor    = SWICMS_FORGET_FACTOR_DEFAULT;
    150   swicms->cache_resolution = SWICMS_CACHE_RESOLUTION_DEFAULT;
    151   swicms->num_frames_in_cmn = 0;
    152 
    153   CHKLOG(rc, ESR_SessionExists(&sessionExists));
    154 
    155   if (sessionExists)
    156   {  /* We'll assume this rate is valid or someone else will be complaining.   SteveR */
    157     rc = ESR_SessionGetSize_t ( L ( "CREC.Frontend.samplerate" ), &sample_rate );
    158 
    159     if ( rc != ESR_SUCCESS )
    160       return ( rc );
    161   }
    162   else
    163     sample_rate = 11025;
    164 
    165   /* init the data structures by copying the static data so that we can have a copy if we need to reset */
    166   if ( sample_rate == 8000 )
    167   {
    168     for ( i = 0; i < MAX_CHAN_DIM; i++ )
    169     {
    170       swicms->cmn [i] = gswicms_cmn1_8 [i];
    171       swicms->tmn [i] = gswicms_tmn1_8 [i];
    172 // _lda_*mn below are OK, but are recalculated in swicms_lda_process()
    173       swicms->lda_cmn [i] = 0; /* calculated by swicms_lda_process() */
    174       swicms->lda_tmn [i] = 0; /* calculated by swicms_lda_process() */
    175     }
    176   }
    177   else
    178   {
    179     for ( i = 0; i < MAX_CHAN_DIM; i++ )
    180     {
    181       swicms->cmn [i] = gswicms_cmn1_11 [i];
    182       swicms->tmn [i] = gswicms_tmn1_11 [i];
    183 // _lda_*mn below are OK, but are recalculated in swicms_lda_process()
    184       swicms->lda_cmn [i] = 0; /* calculated by swicms_lda_process() */
    185       swicms->lda_tmn [i] = 0; /* calculated by swicms_lda_process() */
    186     }
    187   }
    188   CHKLOG(rc, ESR_SessionExists(&sessionExists));
    189 
    190   if (sessionExists)
    191   {
    192     const LCHAR* parname = L("CREC.Frontend.swicms.debug");
    193     CHKLOG(rc, ESR_SessionContains(parname, &exists));
    194     if (exists) {
    195       rc = ESR_SessionGetBool(parname, &SWICMS_DEBUG);
    196       if (rc != ESR_SUCCESS && rc != ESR_NO_MATCH_ERROR) {
    197         PLOG_DBG_ERROR((L("Error reading %s from session: %s"), parname, ESR_rc2str(rc)));
    198         return rc;
    199       }
    200     }
    201 
    202     rc = GetSomeIntsIfAny( L("CREC.Frontend.swicms.forget_factor"),
    203 			   &swicms->forget_factor, 1);
    204     if(rc != ESR_SUCCESS) return rc;
    205 
    206     rc = GetSomeIntsIfAny( L("CREC.Frontend.swicms.sbindex"),
    207 			   &swicms->sbindex, 1);
    208     if(rc != ESR_SUCCESS) return rc;
    209 
    210     rc = GetSomeIntsIfAny( L("CREC.Frontend.swicms.cmn"),
    211 			   &swicms->cmn[0], MAX_CHAN_DIM);
    212     if(rc != ESR_SUCCESS) return rc;
    213 
    214     if ( sample_rate == 8000 )
    215     {
    216       rc = GetSomeIntsIfAny( L("CREC.Frontend.swicms.cmn8"), &swicms->cmn[0], MAX_CHAN_DIM);
    217 
    218       if(rc != ESR_SUCCESS)
    219         return rc;
    220     }
    221     else
    222     {
    223       rc = GetSomeIntsIfAny( L("CREC.Frontend.swicms.cmn11"), &swicms->cmn[0], MAX_CHAN_DIM);
    224 
    225       if(rc != ESR_SUCCESS)
    226         return rc;
    227     }
    228 
    229     rc = GetSomeIntsIfAny( L("CREC.Frontend.swicms.tmn"),
    230 			   &swicms->tmn[0], MAX_CHAN_DIM);
    231     if(rc != ESR_SUCCESS) return rc;
    232   }
    233 
    234   swicms->is_valid = 0;
    235   for (i = 0; i < MAX_CHAN_DIM; i++)
    236     swicms->adjust[i] = 255;
    237 
    238 #ifdef SREC_ENGINE_VERBOSE_LOGGING
    239   PLogMessage("swicms->forget_factor    = %d\n", swicms->forget_factor);
    240   PLogMessage("swicms->cache_resolution = %d\n", swicms->cache_resolution);
    241   PLogMessage("swicms->sbindex          = %d\n", swicms->sbindex);
    242 #endif
    243 
    244   /* in-utt cms parameters */
    245   swicms->inutt.forget_factor2 = SWICMS_INUTT_FORGET_FACTOR2_DEFAULT;
    246   swicms->inutt.disable_after  = 200;
    247   swicms->inutt.enable_after   = 10;    /* in-utt is less reliable       */
    248   swicms->inutt.num_bou_frames_to_skip = 20; /* silence frames! see windback */
    249   swicms->inutt.num_frames_since_bou = 0;
    250   swicms->inutt.num_frames_in_accum = 0;
    251   for(i=0; i<MAX_CHAN_DIM; i++) swicms->inutt.accum[i] = 0;
    252 
    253   if (sessionExists) {
    254     rc = GetSomeIntsIfAny(L("CREC.Frontend.swicms.inutt.forget_factor2"),
    255 			  &swicms->inutt.forget_factor2, 1);
    256     if(rc != ESR_SUCCESS) return rc;
    257 
    258     rc = GetSomeIntsIfAny(L("CREC.Frontend.swicms.inutt.disable_after"),
    259 			  &swicms->inutt.disable_after, 1);
    260     if(rc != ESR_SUCCESS) return rc;
    261 
    262     rc = GetSomeIntsIfAny(L("CREC.Frontend.swicms.inutt.enable_after"),
    263 			  &swicms->inutt.enable_after, 1);
    264     if(rc != ESR_SUCCESS) return rc;
    265 
    266     /* we need to estimate the in-utt cmn from speech frames only! so let's
    267        make sure to skip some frames before collecting data, */
    268     ESR_SessionContains(L("CREC.Frontend.start_windback"), &exists);
    269     if (exists) {
    270       ESR_BOOL do_skip_even_frames = ESR_TRUE;
    271       ESR_SessionGetBool(L("CREC.Frontend.do_skip_even_frames"), &do_skip_even_frames);
    272       ESR_SessionGetInt(L("CREC.Frontend.start_windback"), &swicms->inutt.num_bou_frames_to_skip);
    273       if( do_skip_even_frames)
    274 	swicms->inutt.num_bou_frames_to_skip /= 2;
    275       swicms->inutt.num_bou_frames_to_skip -= 5; /* ensure spch frames only */
    276     }
    277   }
    278 
    279   return 0;
    280  CLEANUP:
    281   return rc;
    282 }
    283 
    284 
    285 ESR_ReturnCode swicms_get_cmn ( swicms_norm_info* swicms, LCHAR *cmn_params, size_t* len )
    286 {
    287   int dim_count;
    288   int i;
    289   imeldata temp[MAX_CHAN_DIM];
    290   const size_t INT_LENGTH = 12;
    291 
    292   if (  swicms->_prep != NULL )	/* lda exists give them transformed lda. */
    293   {
    294     for ( dim_count = 0; dim_count < MAX_CHAN_DIM; dim_count++ )
    295       temp [dim_count] = swicms->lda_cmn [dim_count];
    296     inverse_transform_frame( swicms->_prep, temp, 1 /*do_shift*/);
    297   }
    298   else	/* lda does not exist give them raw cmn values */
    299   {
    300     for ( dim_count = 0; dim_count < MAX_CHAN_DIM; dim_count++ )
    301       temp [dim_count] = swicms->cmn [dim_count];
    302   }
    303 
    304   for ( dim_count = 0, i = 0; dim_count < MAX_CHAN_DIM; dim_count++ )
    305   {
    306     i += sprintf( cmn_params + i, dim_count==0 ? "%d" : ",%d", temp [dim_count] );
    307     if (i + INT_LENGTH >= *len) {
    308         *len = MAX_CHAN_DIM * (INT_LENGTH + 2) * sizeof(LCHAR);
    309         return ESR_BUFFER_OVERFLOW;
    310     }
    311   }
    312 
    313   return ESR_SUCCESS;
    314 }
    315 
    316 
    317 ESR_ReturnCode swicms_set_cmn ( swicms_norm_info* swicms, const char *cmn_params )
    318 {
    319   ESR_ReturnCode    set_status;
    320   int               length_of_params;
    321   int               dim_count;
    322   int               got_word;
    323   int               current_position;
    324   char              *copy_of_params;
    325   char              *parsed_strings [MAX_CHAN_DIM];
    326   int               temp_cmn [MAX_CHAN_DIM];
    327 
    328   length_of_params = strlen ( cmn_params ) + 1;
    329   copy_of_params = (char*)MALLOC ( length_of_params, NULL );
    330 
    331   if ( copy_of_params != NULL )
    332   {
    333     set_status = ESR_SUCCESS;
    334     memcpy ( copy_of_params, cmn_params, length_of_params );
    335     dim_count = 0;
    336     current_position = 0;
    337     got_word = 0;
    338     parsed_strings [dim_count] = copy_of_params + current_position;
    339 
    340     while ( ( dim_count < MAX_CHAN_DIM ) && ( set_status == ESR_SUCCESS ) )
    341     {
    342       switch ( *( copy_of_params + current_position ) )
    343       {
    344         case '\0':
    345           if ( got_word == 1 )
    346           {
    347             if ( dim_count == ( MAX_CHAN_DIM - 1 ) )
    348               dim_count++;
    349             else
    350             {
    351               PLogError ( "Channel Normalization : Missing Params Must Contain %d Params\n", MAX_CHAN_DIM );
    352               set_status = ESR_INVALID_ARGUMENT;
    353             }
    354           }
    355           else
    356           {
    357             PLogError ( "Channel Normalization : Missing Params Mus Contain %d Params\n", MAX_CHAN_DIM );
    358             set_status = ESR_INVALID_ARGUMENT;
    359           }
    360           break;
    361 
    362         case ',':
    363           if ( got_word == 1 )
    364           {
    365             if ( dim_count < ( MAX_CHAN_DIM - 1 ) )
    366             {
    367               dim_count++;
    368               *( copy_of_params + current_position) = '\0';
    369               current_position++;
    370 
    371               if ( current_position == length_of_params )
    372               {
    373                 PLogError ( "Channel Normalization : Delimiter At End Of Param String\n" );
    374                 set_status = ESR_INVALID_ARGUMENT;
    375               }
    376               parsed_strings [dim_count] = copy_of_params + current_position;
    377               got_word = 0;
    378             }
    379             else
    380             {
    381               PLogError ( "Channel Normalization : Too Many Params Must Contain %d Params\n", MAX_CHAN_DIM );
    382               set_status = ESR_INVALID_ARGUMENT;
    383             }
    384           }
    385           else
    386           {
    387             PLogError ( "Channel Normalization : Too Many Params Must Contain %d Params\n", MAX_CHAN_DIM );
    388             set_status = ESR_INVALID_ARGUMENT;
    389           }
    390           break;
    391 
    392         case '0':
    393         case '1':
    394         case '2':
    395         case '3':
    396         case '4':
    397         case '5':
    398         case '6':
    399         case '7':
    400         case '8':
    401         case '9':
    402           got_word = 1;
    403           current_position++;
    404 
    405           if ( current_position == length_of_params )
    406           {
    407             PLogError ( "Channel Normalization : Too Many Params Must Contain %d Params\n", MAX_CHAN_DIM );
    408             set_status = ESR_INVALID_ARGUMENT;
    409           }
    410           break;
    411 
    412         default:
    413           PLogError ( "Channel Normalization : Invalid Param : %c : Params Must Contain Only Digits\n" );
    414           set_status = ESR_INVALID_ARGUMENT;
    415           break;
    416       }
    417     }
    418     if ( set_status == ESR_SUCCESS )
    419     {
    420       dim_count = 0;
    421 
    422       while ( ( dim_count < MAX_CHAN_DIM ) && (  set_status == ESR_SUCCESS ) )
    423       {
    424         temp_cmn [dim_count] = atoi ( parsed_strings [dim_count] );
    425 
    426         if ( ( temp_cmn [dim_count] < 0 ) || ( temp_cmn [dim_count] > 255 ) )
    427         {
    428           set_status = ESR_INVALID_ARGUMENT;
    429         }
    430 
    431         dim_count++;
    432       }
    433       if ( set_status == ESR_SUCCESS )
    434       {
    435         for ( dim_count = 0; dim_count < MAX_CHAN_DIM; dim_count++ )
    436           swicms->cmn [dim_count] = temp_cmn [dim_count];
    437         if ( swicms->_prep != NULL )	/* Set now if NULL it will automatically be set on first utterance */
    438           linear_transform_frame(swicms->_prep, swicms->lda_cmn, 1 /*do_shift*/);
    439       }
    440     }
    441     FREE ( copy_of_params );
    442   }
    443   else
    444   {
    445     PLogError ( "Channel Normalization Out Of Memory Error\n" );
    446     set_status = ESR_OUT_OF_MEMORY;
    447   }
    448   swicms->num_frames_in_cmn = 0;
    449   return ( set_status );
    450 }
    451 
    452 
    453 int swicms_cache_frame(swicms_norm_info* swicms, imeldata* frame, int dimen)
    454 {
    455   int i;
    456   imeldata *pcache, *pframe;
    457 
    458   ASSERT(dimen == MAX_CHAN_DIM);
    459   i = swicms->cached_num_frames / swicms->cache_resolution;
    460   if (i < SWICMS_CACHE_SIZE_DEFAULT)
    461   {
    462     pcache = swicms->cached_sections[ i];
    463     if (swicms->cached_num_frames % swicms->cache_resolution == 0)
    464     {
    465       for (i = 0; i < MAX_CHAN_DIM; i++) *pcache++ = 0;
    466       pcache -= MAX_CHAN_DIM;
    467     }
    468     pframe = frame;
    469     for (i = 0; i < MAX_CHAN_DIM; i++) *pcache++ += *pframe++;
    470     swicms->cached_num_frames++;
    471   }
    472 
    473   return 0;
    474 }
    475 
    476 int apply_channel_normalization_in_swicms(swicms_norm_info *swicms,
    477     imeldata* oframe,
    478     imeldata* iframe, int dimen)
    479 {
    480   int ii;
    481   ASSERT(dimen == MAX_CHAN_DIM);
    482 
    483   /* IF inutt is activated at all */
    484   if(swicms->inutt.forget_factor2 != SWICMS_INUTT_FORGET_FACTOR2_DISABLE) {
    485     /* AND IF we have not disabled it (due to x-utt more reliable) */
    486     if(swicms->inutt.num_frames_in_accum < swicms->inutt.disable_after) {
    487       /* AND IF we have skipped past the silence frames */
    488       if( swicms->inutt.num_frames_since_bou >= swicms->inutt.num_bou_frames_to_skip){
    489 	swicms->inutt.num_frames_in_accum++;
    490 	for(ii=0;ii<dimen;ii++) swicms->inutt.accum[ii] += iframe[ii];
    491 	/* AND IF we've already seen at least 10 frames (presumably) of speech */
    492 	if(swicms->inutt.num_frames_in_accum>swicms->inutt.enable_after) {
    493 	  /* THEN we update the adjustment in-line with the current utterance! */
    494 	  for(ii=0;ii<dimen;ii++) {
    495 	    imeldata denom = ( swicms->inutt.forget_factor2
    496 			       + swicms->inutt.num_frames_in_accum );
    497 	    /* tmp: weighted average of the old lda_cmn and the new accum */
    498 	    imeldata tmp=(swicms->lda_cmn[ii]*swicms->inutt.forget_factor2
    499 			  + swicms->inutt.accum[ii] + denom/2) / denom;
    500 	    swicms->adjust[ii] = swicms->lda_tmn[ii] - tmp;
    501 	  }
    502 	  //printf_vector("swicms->adjust2 "," %d",swicms->adjust, dimen);
    503 	}
    504       }
    505     }
    506     swicms->inutt.num_frames_since_bou++;
    507   }
    508 
    509   for (ii = 0; ii < dimen; ii++)
    510     oframe[ii] = MAKEBYTE(iframe[ii] + swicms->adjust[ii]);
    511   return 0;
    512 }
    513 
    514 int swicms_update(swicms_norm_info* swicms, int speech_start, int speech_end)
    515 {
    516   int i, j;
    517   asr_int32_t speech_avg[MAX_CHAN_DIM], backgr_avg[MAX_CHAN_DIM], avg[MAX_CHAN_DIM];
    518   int ff;
    519   int nn, speech_nn, backgr_nn;
    520   int num_frames = swicms->cached_num_frames;
    521   int cache_start, cache_end, backgr_cache_end;
    522   int sbindex = swicms->sbindex;
    523 
    524   /* init for utterance */
    525   swicms->inutt.num_frames_since_bou = 0;
    526 
    527   swicms->cached_num_frames = 0;
    528   cache_start = speech_start;
    529   cache_start -= (cache_start % swicms->cache_resolution);
    530   cache_start /= swicms->cache_resolution;
    531 
    532   if (speech_end == MAXframeID)
    533   {
    534     cache_end = SWICMS_CACHE_SIZE_DEFAULT;
    535   }
    536   else
    537   {
    538     if (speech_end < num_frames)
    539       cache_end = speech_end;
    540     else
    541       cache_end = num_frames;
    542     cache_end -= (cache_end % swicms->cache_resolution);
    543     cache_end /= swicms->cache_resolution;
    544   }
    545 
    546   if (num_frames == 0 || speech_end == 0 || speech_start == speech_end || speech_end == MAXframeID)
    547   {
    548     if (speech_end != 0 || speech_start != 0)
    549       PLogError("Warning: speech_bounds (%d,%d) swicms->cached_num_frames (%d)\n",
    550                 speech_start, speech_end, num_frames);
    551 	if (SWICMS_DEBUG) {
    552       //printf_vector("swicms->adjust.rep", " %d", swicms->adjust, MAX_CHAN_DIM);
    553     }
    554     return 1;
    555   }
    556 
    557   backgr_cache_end = (num_frames - num_frames % swicms->cache_resolution) / swicms->cache_resolution;
    558 
    559   speech_nn = (cache_end - cache_start) * swicms->cache_resolution;
    560   backgr_nn = backgr_cache_end * swicms->cache_resolution - speech_nn;
    561 
    562   for (i = 0; i < MAX_CHAN_DIM; i++)
    563   {
    564     speech_avg[i] = 0;
    565     backgr_avg[i] = 0;
    566     for (j = cache_start; j < cache_end; j++)
    567       speech_avg[i] += swicms->cached_sections[j][i];
    568     for (j = 0; j < cache_start; j++)
    569       backgr_avg[i] += swicms->cached_sections[j][i];
    570     for (j = cache_end; j < backgr_cache_end; j++)
    571       backgr_avg[i] += swicms->cached_sections[j][i];
    572     if (speech_nn == 0 && backgr_nn > 0)
    573     {
    574       backgr_avg[i] /= backgr_nn;
    575       speech_avg[i] = backgr_avg[i];
    576       speech_nn = backgr_nn;
    577     }
    578     else if (speech_nn > 0 && backgr_nn == 0)
    579     {
    580       speech_avg[i] /= speech_nn;
    581       backgr_avg[i] = speech_avg[i];
    582       backgr_nn = speech_nn;
    583     }
    584     else if (speech_nn > 0 && backgr_nn > 0)
    585     {
    586       speech_avg[i] /= speech_nn;
    587       backgr_avg[i] /= backgr_nn;
    588     }
    589     else
    590     {
    591       return 0;
    592     }
    593 
    594     avg[i] = (sbindex * speech_avg[i] + (100 - sbindex) * backgr_avg[i] + 50) / 100;
    595   }
    596   nn = (sbindex * speech_nn + (100 - sbindex) * backgr_nn + 50) / 100;
    597 
    598   for (i = 0, ff = 0; i < MAX_CHAN_DIM; i++)
    599   {
    600     ff += (swicms->lda_tmn[i] - avg[i]);
    601   }
    602   ff /= MAX_CHAN_DIM; /* sum is now the average offset from TMN */
    603   if (ff > 5)
    604   {
    605     PLogError("Warning: bad utt mean during swicms_update() (moffs=%d)\n", ff);
    606     //printf_vector("swicms->adjust.rep", " %d", swicms->adjust, MAX_CHAN_DIM);
    607     return 1;
    608   }
    609   ff = swicms->forget_factor;
    610   if (ff < 9999)
    611   {
    612     for (i = 0; i < MAX_CHAN_DIM; i++)
    613     {
    614       swicms->lda_cmn[i] = (swicms->lda_cmn[i] * ff + avg[i] * nn + (ff + nn) / 2)  / (ff + nn);
    615       swicms->adjust[i] = swicms->lda_tmn[i] - swicms->lda_cmn[i];
    616     }
    617   }
    618 
    619   if (SWICMS_DEBUG)
    620     {
    621       imeldata temp[MAX_CHAN_DIM];
    622       PLogMessage("swicms_update() used %d frames (%d-%d)", nn, speech_start, speech_end);
    623 
    624       for(i=0;i<MAX_CHAN_DIM;i++) temp[i]=swicms->lda_cmn[i];
    625       inverse_transform_frame( swicms->_prep, temp, 1 /*do_shift*/);
    626       /* use this dump, to put back into CREC.Frontend.swicms.cmn */
    627       printf_vector("swicms.cmn(r)  ", " %d", temp, MAX_CHAN_DIM);
    628 
    629       //printf_vector("swicms.lda_cmn   ", " %d", &swicms.lda_cmn [0], MAX_CHAN_DIM);
    630       //printf_vector("swicms.lda_tmn   ", " %d", &swicms.lda_tmn [0], MAX_CHAN_DIM);
    631       //printf_vector("swicms->adjust", " %d", swicms->adjust, MAX_CHAN_DIM);
    632       //printf_vector("avg.speech    ", " %d", avg, MAX_CHAN_DIM);
    633     }
    634   else
    635     {
    636 #ifndef NDEBUG
    637       //printf_vector("swicms->adjust", " %d", swicms->adjust, MAX_CHAN_DIM);
    638 #endif
    639     }
    640   swicms->num_frames_in_cmn += nn;
    641   return 0;
    642 }
    643 
    644 int swicms_lda_process(swicms_norm_info* swicms, preprocessed* prep)
    645 {
    646   int i;
    647 
    648   for (i = 0; i < MAX_CHAN_DIM; i++) swicms->lda_tmn[i] = swicms->tmn[i];
    649   for (i = 0; i < MAX_CHAN_DIM; i++) swicms->lda_cmn[i] = swicms->cmn[i];
    650   linear_transform_frame(prep, swicms->lda_tmn, 1 /*do_shift*/);
    651   linear_transform_frame(prep, swicms->lda_cmn, 1 /*do_shift*/);
    652 
    653   for (i = 0; i < MAX_CHAN_DIM; i++)
    654   {
    655     swicms->adjust[i] = swicms->lda_tmn[i] - swicms->lda_cmn[i];
    656   }
    657 
    658 #ifndef NDEBUG
    659   //printf_vector("swicms->adjust", " %d", swicms->adjust, MAX_CHAN_DIM);
    660 #endif
    661   swicms->is_valid = 1;
    662   swicms->_prep = prep;
    663 
    664   if(SWICMS_DEBUG) {
    665     imeldata temp[MAX_CHAN_DIM];
    666     printf_vector("swicms->cmn     ", " %d", swicms->cmn,     MAX_CHAN_DIM);
    667     printf_vector("swicms->lda_cmn ", " %d", swicms->lda_cmn, MAX_CHAN_DIM);
    668     //printf_vector("swicms->tmn     ", " %d", swicms->tmn,     MAX_CHAN_DIM);
    669     //printf_vector("swicms->lda_tmn ", " %d", swicms->lda_tmn, MAX_CHAN_DIM);
    670     //printf_vector("swicms->adjust  ", " %d", swicms->adjust,  MAX_CHAN_DIM);
    671 
    672     //for(i=0;i<MAX_CHAN_DIM;i++) temp[i]=swicms->lda_tmn[i];
    673     //inverse_transform_frame( swicms->_prep, temp, 1 /*do_shift*/);
    674     //printf_vector("swicms->tmn(r)  ", " %d", temp, MAX_CHAN_DIM);
    675 
    676     for(i=0;i<MAX_CHAN_DIM;i++) temp[i]=swicms->lda_cmn[i];
    677     inverse_transform_frame( swicms->_prep, temp, 1 /*do_shift*/);
    678     printf_vector("swicms->cmn(r)  ", " %d", temp, MAX_CHAN_DIM);
    679   }
    680   return 0;
    681 }
    682 
    683 
    684 
    685