Home | History | Annotate | Download | only in lib
      1 /*
      2  * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *     http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 /**
     17  * @file picosa.c
     18  *
     19  * sentence analysis - POS disambiguation
     20  *
     21  * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
     22  * All rights reserved.
     23  *
     24  * History:
     25  * - 2009-04-20 -- initial version
     26  *
     27  */
     28 
     29 #include "picoos.h"
     30 #include "picodbg.h"
     31 #include "picobase.h"
     32 #include "picokdt.h"
     33 #include "picoklex.h"
     34 #include "picoktab.h"
     35 #include "picokfst.h"
     36 #include "picotrns.h"
     37 #include "picodata.h"
     38 #include "picosa.h"
     39 
     40 #ifdef __cplusplus
     41 extern "C" {
     42 #endif
     43 #if 0
     44 }
     45 #endif
     46 
     47 
     48 /* PU saStep states */
     49 #define SA_STEPSTATE_COLLECT       0
     50 #define SA_STEPSTATE_PROCESS_POSD 10
     51 #define SA_STEPSTATE_PROCESS_WPHO 11
     52 #define SA_STEPSTATE_PROCESS_TRNS_PARSE 12
     53 #define SA_STEPSTATE_PROCESS_TRNS_FST 13
     54 #define SA_STEPSTATE_FEED          2
     55 
     56 #define SA_MAX_ALTDESC_SIZE (30*(PICOTRNS_MAX_NUM_POSSYM + 2))
     57 
     58 #define SA_MSGSTR_SIZE 32
     59 
     60 /*  subobject    : SentAnaUnit
     61  *  shortcut     : sa
     62  *  context size : one phrase, max. 30 non-PUNC items, for non-processed items
     63  *                 one item if internal input empty
     64  */
     65 
     66 /** @addtogroup picosa
     67 
     68   internal buffers:
     69 
     70   - headx: array for extended item heads of fixed size (head plus
     71     index for content, plus two fields for boundary strength/type)
     72 
     73   - cbuf1, cbuf2: buffers for item contents (referenced by index in
     74     headx). Future: replace these two buffers by a single double-sided
     75     buffer (double shrink-grow type)
     76 
     77   0. bottom up filling of items in headx and cbuf1
     78 
     79   1. POS disambiguation (right-to-left, top-to-bottom):
     80   - number and sequence of items unchanged
     81   - item content can only get smaller (reducing nr of results in WORDINDEX)
     82   -> info stays in "headx, cbuf1" and changed in place                      \n
     83      WORDGRAPH(POSes,NA)graph             -> WORDGRAPH(POS,NA)graph         \n
     84      WORDINDEX(POSes,NA)POS1ind1...POSNindN  -> WORDINDEX(POS,NA)POS|ind    \n
     85 
     86   2. lex-index lookup and G2P (both directions possible, left-to-right done):
     87   - number and sequence of items unchanged, item head info and content
     88     changes
     89   -> headx changed in place; cbuf1 to cbuf2                                 \n
     90      WORDGRAPH(POS,NA)graph    -> WORDPHON(POS,NA)phon                      \n
     91      WORDINDEX(POS,NA)POS|ind  -> WORDPHON(POS,NA)phon                      \n
     92 
     93   3. phrasing (right-to-left):
     94 
     95      Previous (before introducing SBEG)\n
     96      ----------------------------------
     97                                            1|          2|             3|    4|    \n
     98      e.g. from      WP WP WP       WP WP PUNC  WP WP PUNC  WP WP WP PUNC FLUSH    \n
     99      e.g. to  BINIT WP WP WP BPHR3 WP WP BPHR1 WP WP BSEND WP WP WP BSEND BTERM   \n
    100               |1                         |2          |3             |4            \n
    101 
    102      3-level bound state: to keep track of bound strength from end of
    103      previous punc-phrase, then BOUND item output as first item
    104      (strength from prev punc-phrase and type from current
    105      punc-phrase).
    106 
    107      trailing PUNC item       bound states
    108                               INIT         SEND         PHR1
    109        PUNC(SENTEND, T)       B(I,T)>SEND  B(S,T)>SEND  B(P1,T)>SEND
    110        PUNC(SENTEND, Q)       B(I,Q)>SEND  B(S,Q)>SEND  B(P1,Q)>SEND
    111        PUNC(SENTEND, E)       B(I,E)>SEND  B(S,E)>SEND  B(P1,E)>SEND
    112        PUNC(PHRASEEND, P)     B(I,P)>PHR1  B(S,P)>PHR1  B(P1,P)>PHR1
    113        PUNC(PHRASEEND, FORC)  B(I,P)>PHR1  B(S,P)>PHR1  B(P1,P)>PHR1
    114        PUNC(FLUSH, T)         B(I,T)..     B(S,T)..     B(P1,T)..
    115                                 B(T,NA)      B(T,NA)      B(T,NA)
    116                                 >INIT        >INIT        >INIT
    117 
    118      PHR2/3 case:
    119      trailing PUNC item       bound states
    120                           INIT              SEND              PHR1
    121        PUNC(SENTEND, T)   B(I,P)B(P,T)>SEND B(S,P)B(P,T)>SEND B(P1,P)B(P,T)>SEND
    122        PUNC(SENTEND, Q)   B(I,P)B(P,Q)>SEND B(S,P)B(P,Q)>SEND B(P1,P)B(P,Q)>SEND
    123        PUNC(SENTEND, E)   B(I,P)B(P,E)>SEND B(S,P)B(P,E)>SEND B(P1,P)B(P,E)>SEND
    124        PUNC(PHRASEEND, P) B(I,P)B(P,P)>PHR1 B(S,P)B(P,P)>PHR1 B(P1,P)B(P,P)>PHR1
    125        PUNC(PHREND, FORC) B(I,P)B(P,P)>PHR1 B(S,P)B(P,P)>PHR1 B(P1,P)B(P,P)>PHR1
    126        PUNC(FLUSH, T)     B(I,P)B(P,T)..    B(S,T)B(P,T)..    B(P1,T)B(P,T)..
    127                             B(T,NA)             B(T,NA)             B(T,NA)
    128                             >INIT               >INIT               >INIT
    129 
    130      Current
    131      --------
    132      e.g. from      WP WP WP       WP WP PUNC  WP WP PUNC        WP WP WP PUNC  FLUSH
    133      e.g. to  BSBEG WP WP WP BPHR3 WP WP BPHR1 WP WP BSEND BSBEG WP WP WP BSEND BTERM
    134               |1                         |2                |3                   |4
    135 
    136      2-level bound state: The internal buffer contains one primary phrase (sometimes forced, if buffer
    137      allmost full), with the trailing PUNCT item included (last item).
    138      If the trailing PUNC is a a primary phrase separator, the
    139        item is not output, but instead, the bound state is set to PPHR, so that the correct BOUND can
    140        be output at the start of the next primary phrase.
    141      Otherwise,
    142        the item is converted to the corresponding BOUND and output. the bound state is set to SSEP,
    143        so that a BOUND of type SBEG is output at the start of the next primary phrase.
    144 
    145      trailing PUNC item       bound states
    146                               SSEP           PPHR
    147        PUNC(SENTEND, X)       B(B,X)>SSEP    B(P1,X)>SSEP  (X = T | Q | E)
    148        PUNC(FLUSH, T)         B(B,T)>SSEP*    B(P1,T)>SSEP
    149        PUNC(PHRASEEND, P)     B(B,P)>PPHR    B(P1,P)>PPHR
    150        PUNC(PHRASEEND, FORC)  B(B,P)>PPHR    B(P1,P)>PPHR
    151 
    152 *    If more than one sentence separators follow each other (e.g. SEND-FLUSH, SEND-SEND) then
    153      all but the first will be treated as an (empty) phrase containing just this item.
    154      If this (single) item is a flush, creation of SBEG is suppressed.
    155 
    156 
    157   - dtphr phrasing tree (rather subphrasing tree it should be called)
    158     determines
    159       BOUND_PHR2
    160       BOUND_PHR3
    161   - boundary strenghts are determined for every word (except the
    162     first one) from right-to-left. The boundary types mark the phrase
    163     type of the phrase following the boundary.
    164   - number of items actually changed (new BOUND items added): because
    165     of fixed size without content, two fields are contained in headx
    166     to indicate if a BOUND needs to be added to the LEFT of the item.
    167     -> headx further extended with boundary strength and type info to
    168     indicate that to the left of the headx ele a BOUND needs to be
    169     inserted when outputting.
    170 
    171   4. accentuation:
    172   - number of items unchanged, content unchanged, only head info changes
    173   -> changed in place in headx
    174 */
    175 
    176 
    177 typedef struct {
    178     picodata_itemhead_t head;
    179     picoos_uint16 cind;
    180 } picosa_headx_t;
    181 
    182 
    183 typedef struct sa_subobj {
    184     picoos_uint8 procState; /* for next processing step decision */
    185 
    186     picoos_uint8 inspaceok;      /* flag: headx/cbuf1 has space for an item */
    187     picoos_uint8 needsmoreitems; /* flag: need more items */
    188     picoos_uint8 phonesTransduced; /* flag: */
    189 
    190     picoos_uint8 tmpbuf[PICODATA_MAX_ITEMSIZE];  /* tmp. location for an item */
    191 
    192     picosa_headx_t headx[PICOSA_MAXNR_HEADX];
    193     picoos_uint16 headxBottom; /* bottom */
    194     picoos_uint16 headxLen;    /* length, 0 if empty */
    195 
    196     picoos_uint8 cbuf1[PICOSA_MAXSIZE_CBUF];
    197     picoos_uint16 cbuf1BufSize; /* actually allocated size */
    198     picoos_uint16 cbuf1Len;     /* length, 0 if empty */
    199 
    200     picoos_uint8 cbuf2[PICOSA_MAXSIZE_CBUF];
    201     picoos_uint16 cbuf2BufSize; /* actually allocated size */
    202     picoos_uint16 cbuf2Len;     /* length, 0 if empty */
    203 
    204     picotrns_possym_t phonBufA[PICOTRNS_MAX_NUM_POSSYM+1];
    205     picotrns_possym_t phonBufB[PICOTRNS_MAX_NUM_POSSYM+1];
    206     picotrns_possym_t * phonBuf;
    207     picotrns_possym_t * phonBufOut;
    208     picoos_uint16 phonReadPos, phonWritePos; /* next pos to read from phonBufIn, next pos to write to phonBufIn */
    209     picoos_uint16 nextReadPos; /* position of (potential) next item to read from */
    210 
    211 
    212     /* buffer for internal calculation of transducer */
    213     picotrns_AltDesc altDescBuf;
    214     /* the number of AltDesc in the buffer */
    215     picoos_uint16 maxAltDescLen;
    216 
    217     /* tab knowledge base */
    218     picoktab_Graphs tabgraphs;
    219     picoktab_Phones tabphones;
    220     picoktab_Pos tabpos;
    221     picoktab_FixedIds fixedIds;
    222 
    223     /* dtposd knowledge base */
    224     picokdt_DtPosD dtposd;
    225 
    226     /* dtg2p knowledge base */
    227     picokdt_DtG2P dtg2p;
    228 
    229     /* lex knowledge base */
    230     picoklex_Lex lex;
    231 
    232     /* ulex knowledge bases */
    233     picoos_uint8 numUlex;
    234     picoklex_Lex ulex[PICOKNOW_MAX_NUM_ULEX];
    235 
    236     /* fst knowledge bases */
    237     picoos_uint8 numFsts;
    238     picokfst_FST fst[PICOKNOW_MAX_NUM_WPHO_FSTS];
    239     picoos_uint8 curFst; /* the fst to be applied next */
    240 
    241 
    242 } sa_subobj_t;
    243 
    244 
    245 static pico_status_t saInitialize(register picodata_ProcessingUnit this, picoos_int32 resetMode) {
    246     sa_subobj_t * sa;
    247     picoos_uint16 i;
    248     picokfst_FST fst;
    249     picoknow_kb_id_t fstKbIds[PICOKNOW_MAX_NUM_WPHO_FSTS] = PICOKNOW_KBID_WPHO_ARRAY;
    250     picoklex_Lex ulex;
    251     picoknow_kb_id_t ulexKbIds[PICOKNOW_MAX_NUM_ULEX] = PICOKNOW_KBID_ULEX_ARRAY;
    252 
    253     PICODBG_DEBUG(("calling"));
    254 
    255     if (NULL == this || NULL == this->subObj) {
    256         return picoos_emRaiseException(this->common->em,
    257                                        PICO_ERR_NULLPTR_ACCESS, NULL, NULL);
    258     }
    259     sa = (sa_subobj_t *) this->subObj;
    260 
    261     /*  sa->common = this->common; */
    262 
    263     sa->procState = SA_STEPSTATE_COLLECT;
    264 
    265     sa->inspaceok = TRUE;
    266     sa->needsmoreitems = TRUE;
    267 
    268     sa->headxBottom = 0;
    269     sa->headxLen = 0;
    270     sa->cbuf1BufSize = PICOSA_MAXSIZE_CBUF;
    271     sa->cbuf2BufSize = PICOSA_MAXSIZE_CBUF;
    272     sa->cbuf1Len = 0;
    273     sa->cbuf2Len = 0;
    274 
    275     /* init headx, cbuf1, cbuf2 */
    276     for (i = 0; i < PICOSA_MAXNR_HEADX; i++){
    277         sa->headx[i].head.type = 0;
    278         sa->headx[i].head.info1 = PICODATA_ITEMINFO1_NA;
    279         sa->headx[i].head.info2 = PICODATA_ITEMINFO2_NA;
    280         sa->headx[i].head.len = 0;
    281         sa->headx[i].cind = 0;
    282     }
    283     for (i = 0; i < PICOSA_MAXSIZE_CBUF; i++) {
    284         sa->cbuf1[i] = 0;
    285         sa->cbuf2[i] = 0;
    286     }
    287 
    288 
    289     /* possym buffer */
    290     sa->phonesTransduced = FALSE;
    291     sa->phonBuf = sa->phonBufA;
    292     sa->phonBufOut = sa->phonBufB;
    293     sa->phonReadPos = 0;
    294     sa->phonWritePos = 0;
    295     sa->nextReadPos = 0;
    296 
    297     if (resetMode == PICO_RESET_SOFT) {
    298         /*following initializations needed only at startup or after a full reset*/
    299         return PICO_OK;
    300     }
    301 
    302     /* kb fst[] */
    303     sa->numFsts = 0;
    304     for (i = 0; i<PICOKNOW_MAX_NUM_WPHO_FSTS; i++) {
    305         fst = picokfst_getFST(this->voice->kbArray[fstKbIds[i]]);
    306         if (NULL != fst) {
    307             sa->fst[sa->numFsts++] = fst;
    308         }
    309     }
    310     sa->curFst = 0;
    311     PICODBG_DEBUG(("got %i fsts", sa->numFsts));
    312     /* kb fixedIds */
    313     sa->fixedIds = picoktab_getFixedIds(this->voice->kbArray[PICOKNOW_KBID_FIXED_IDS]);
    314 
    315     /* kb tabgraphs */
    316     sa->tabgraphs =
    317         picoktab_getGraphs(this->voice->kbArray[PICOKNOW_KBID_TAB_GRAPHS]);
    318     if (sa->tabgraphs == NULL) {
    319         return picoos_emRaiseException(this->common->em, PICO_EXC_KB_MISSING,
    320                                        NULL, NULL);
    321     }
    322     PICODBG_DEBUG(("got tabgraphs"));
    323 
    324     /* kb tabphones */
    325     sa->tabphones =
    326         picoktab_getPhones(this->voice->kbArray[PICOKNOW_KBID_TAB_PHONES]);
    327     if (sa->tabphones == NULL) {
    328         return picoos_emRaiseException(this->common->em, PICO_EXC_KB_MISSING,
    329                                        NULL, NULL);
    330     }
    331     PICODBG_DEBUG(("got tabphones"));
    332 
    333 #ifdef PICO_DEBU
    334     {
    335         picoos_uint16 itmp;
    336         for (itmp = 0; itmp < 256; itmp++) {
    337             if (picoktab_hasVowelProp(sa->tabphones, itmp)) {
    338                 PICODBG_DEBUG(("tabphones hasVowel: %d", itmp));
    339             }
    340             if (picoktab_hasDiphthProp(sa->tabphones, itmp)) {
    341                 PICODBG_DEBUG(("tabphones hasDiphth: %d", itmp));
    342             }
    343             if (picoktab_hasGlottProp(sa->tabphones, itmp)) {
    344                 PICODBG_DEBUG(("tabphones hasGlott: %d", itmp));
    345             }
    346             if (picoktab_hasNonsyllvowelProp(sa->tabphones, itmp)) {
    347                 PICODBG_DEBUG(("tabphones hasNonsyllvowel: %d", itmp));
    348             }
    349             if (picoktab_hasSyllconsProp(sa->tabphones, itmp)) {
    350                 PICODBG_DEBUG(("tabphones hasSyllcons: %d", itmp));
    351             }
    352             if (picoktab_isPrimstress(sa->tabphones, itmp)) {
    353                 PICODBG_DEBUG(("tabphones isPrimstress: %d", itmp));
    354             }
    355             if (picoktab_isSecstress(sa->tabphones, itmp)) {
    356                 PICODBG_DEBUG(("tabphones isSecstress: %d", itmp));
    357             }
    358             if (picoktab_isSyllbound(sa->tabphones, itmp)) {
    359                 PICODBG_DEBUG(("tabphones isSyllbound: %d", itmp));
    360             }
    361             if (picoktab_isPause(sa->tabphones, itmp)) {
    362                 PICODBG_DEBUG(("tabphones isPause: %d", itmp));
    363             }
    364         }
    365 
    366         PICODBG_DEBUG(("tabphones primstressID: %d",
    367                        picoktab_getPrimstressID(sa->tabphones)));
    368         PICODBG_DEBUG(("tabphones secstressID: %d",
    369                        picoktab_getSecstressID(sa->tabphones)));
    370         PICODBG_DEBUG(("tabphones syllboundID: %d",
    371                        picoktab_getSyllboundID(sa->tabphones)));
    372         PICODBG_DEBUG(("tabphones pauseID: %d",
    373                        picoktab_getPauseID(sa->tabphones)));
    374     }
    375 #endif
    376 
    377     /* kb tabpos */
    378     sa->tabpos =
    379         picoktab_getPos(this->voice->kbArray[PICOKNOW_KBID_TAB_POS]);
    380     if (sa->tabpos == NULL) {
    381         return picoos_emRaiseException(this->common->em, PICO_EXC_KB_MISSING,
    382                                        NULL, NULL);
    383     }
    384     PICODBG_DEBUG(("got tabpos"));
    385 
    386     /* kb dtposd */
    387     sa->dtposd = picokdt_getDtPosD(this->voice->kbArray[PICOKNOW_KBID_DT_POSD]);
    388     if (sa->dtposd == NULL) {
    389         return picoos_emRaiseException(this->common->em, PICO_EXC_KB_MISSING,
    390                                        NULL, NULL);
    391     }
    392     PICODBG_DEBUG(("got dtposd"));
    393 
    394     /* kb dtg2p */
    395     sa->dtg2p = picokdt_getDtG2P(this->voice->kbArray[PICOKNOW_KBID_DT_G2P]);
    396     if (sa->dtg2p == NULL) {
    397         return picoos_emRaiseException(this->common->em, PICO_EXC_KB_MISSING,
    398                                        NULL, NULL);
    399     }
    400     PICODBG_DEBUG(("got dtg2p"));
    401 
    402     /* kb lex */
    403     sa->lex = picoklex_getLex(this->voice->kbArray[PICOKNOW_KBID_LEX_MAIN]);
    404     if (sa->lex == NULL) {
    405         return picoos_emRaiseException(this->common->em, PICO_EXC_KB_MISSING,
    406                                        NULL, NULL);
    407     }
    408     PICODBG_DEBUG(("got lex"));
    409 
    410     /* kb ulex[] */
    411     sa->numUlex = 0;
    412     for (i = 0; i<PICOKNOW_MAX_NUM_ULEX; i++) {
    413         ulex = picoklex_getLex(this->voice->kbArray[ulexKbIds[i]]);
    414         if (NULL != ulex) {
    415             sa->ulex[sa->numUlex++] = ulex;
    416         }
    417     }
    418     PICODBG_DEBUG(("got %i user lexica", sa->numUlex));
    419 
    420     return PICO_OK;
    421 }
    422 
    423 static picodata_step_result_t saStep(register picodata_ProcessingUnit this,
    424                                      picoos_int16 mode,
    425                                      picoos_uint16 *numBytesOutput);
    426 
    427 static pico_status_t saTerminate(register picodata_ProcessingUnit this) {
    428     return PICO_OK;
    429 }
    430 
    431 static pico_status_t saSubObjDeallocate(register picodata_ProcessingUnit this,
    432                                         picoos_MemoryManager mm) {
    433     sa_subobj_t * sa;
    434     if (NULL != this) {
    435         sa = (sa_subobj_t *) this->subObj;
    436         picotrns_deallocate_alt_desc_buf(mm,&sa->altDescBuf);
    437         picoos_deallocate(mm, (void *) &this->subObj);
    438     }
    439     return PICO_OK;
    440 }
    441 
    442 
    443 picodata_ProcessingUnit picosa_newSentAnaUnit(picoos_MemoryManager mm,
    444                                               picoos_Common common,
    445                                               picodata_CharBuffer cbIn,
    446                                               picodata_CharBuffer cbOut,
    447                                               picorsrc_Voice voice) {
    448     picodata_ProcessingUnit this;
    449     sa_subobj_t * sa;
    450     this = picodata_newProcessingUnit(mm, common, cbIn, cbOut, voice);
    451     if (this == NULL) {
    452         return NULL;
    453     }
    454 
    455     this->initialize = saInitialize;
    456     PICODBG_DEBUG(("set this->step to saStep"));
    457     this->step = saStep;
    458     this->terminate = saTerminate;
    459     this->subDeallocate = saSubObjDeallocate;
    460 
    461     this->subObj = picoos_allocate(mm, sizeof(sa_subobj_t));
    462     if (this->subObj == NULL) {
    463         picoos_deallocate(mm, (void *)&this);
    464         picoos_emRaiseException(common->em, PICO_EXC_OUT_OF_MEM, NULL, NULL);
    465         return NULL;
    466     }
    467 
    468     sa = (sa_subobj_t *) this->subObj;
    469 
    470     sa->altDescBuf = picotrns_allocate_alt_desc_buf(mm, SA_MAX_ALTDESC_SIZE, &sa->maxAltDescLen);
    471     if (NULL == sa->altDescBuf) {
    472         picotrns_deallocate_alt_desc_buf(mm,&sa->altDescBuf);
    473         picoos_deallocate(mm, (void *)&sa);
    474         picoos_deallocate(mm, (void *)&this);
    475         picoos_emRaiseException(common->em,PICO_EXC_OUT_OF_MEM, NULL, NULL);
    476     }
    477 
    478 
    479     saInitialize(this, PICO_RESET_FULL);
    480     return this;
    481 }
    482 
    483 
    484 /* ***********************************************************************/
    485 /* PROCESS_POSD disambiguation functions */
    486 /* ***********************************************************************/
    487 
    488 /* find next POS to the right of 'ind' and return its POS and index */
    489 static picoos_uint8 saPosDItemSeqGetPosRight(register picodata_ProcessingUnit this,
    490                                             register sa_subobj_t *sa,
    491                                             const picoos_uint16 ind,
    492                                             const picoos_uint16 top,
    493                                             picoos_uint16 *rightind) {
    494     picoos_uint8 val;
    495     picoos_int32 i;
    496 
    497     val = PICOKDT_EPSILON;
    498     for (i = ind + 1; ((val == PICOKDT_EPSILON) && (i < top)); i++) {
    499         if ((sa->headx[i].head.type == PICODATA_ITEM_WORDGRAPH) ||
    500                 (sa->headx[i].head.type == PICODATA_ITEM_WORDINDEX)  ||
    501                 (sa->headx[i].head.type == PICODATA_ITEM_WORDPHON) ) {
    502             val = sa->headx[i].head.info1;
    503         }
    504     }
    505     *rightind = i - 1;
    506     return val;
    507 }
    508 
    509 
    510 /* left-to-right, for each WORDGRAPH/WORDINDEX/WORDPHON do posd */
    511 static pico_status_t saDisambPos(register picodata_ProcessingUnit this,
    512                                  register sa_subobj_t *sa) {
    513     picokdt_classify_result_t dtres;
    514     picoos_uint8 half_nratt_posd = PICOKDT_NRATT_POSD >> 1;
    515     picoos_uint16 valbuf[PICOKDT_NRATT_POSD]; /* only [0..half_nratt_posd] can be >2^8 */
    516     picoos_uint16 prevout;   /* direct dt output (hist.) or POS of prev word */
    517     picoos_uint16 lastprev3; /* last index of POS(es) found to the left */
    518     picoos_uint16 curPOS;     /* POS(es) of current word */
    519     picoos_int32 first;    /* index of first item with POS(es) */
    520     picoos_int32 ci;
    521     picoos_uint8 okay;       /* two uses: processing okay and lexind resovled */
    522     picoos_uint8 i;
    523     picoos_uint16 inval;
    524     picoos_uint16 fallback;
    525 
    526     /* set initial values */
    527     okay = TRUE;
    528     prevout = PICOKDT_HISTORY_ZERO;
    529     curPOS = PICODATA_ITEMINFO1_ERR;
    530     first = 0;
    531 
    532     while ((first < sa->headxLen) &&
    533            (sa->headx[first].head.type != PICODATA_ITEM_WORDGRAPH) &&
    534            (sa->headx[first].head.type != PICODATA_ITEM_WORDINDEX) &&
    535            (sa->headx[first].head.type != PICODATA_ITEM_WORDPHON)) {
    536         first++;
    537     }
    538     if (first >= sa->headxLen) {
    539         /* phrase not containing an item with POSes info, e.g. single flush */
    540         PICODBG_DEBUG(("no item with POSes found"));
    541         return PICO_OK;
    542     }
    543 
    544     lastprev3 = first;
    545 
    546     for (i = 0; i <= half_nratt_posd; i++) {
    547         valbuf[i] = PICOKDT_HISTORY_ZERO;
    548     }
    549     /* set POS(es) of current word, will be shifted afterwards */
    550     valbuf[half_nratt_posd+1] = sa->headx[first].head.info1;
    551     for (i = half_nratt_posd+2; i < PICOKDT_NRATT_POSD; i++) {
    552     /* find next POS to the right and set valbuf[i] */
    553         valbuf[i] = saPosDItemSeqGetPosRight(this, sa, lastprev3, sa->headxLen, &lastprev3);
    554     }
    555 
    556     PICODBG_TRACE(("headxLen: %d", sa->headxLen));
    557 
    558     /* process from left to right all items in headx */
    559     for (ci = first; ci < sa->headxLen; ci++) {
    560         okay = TRUE;
    561 
    562         PICODBG_TRACE(("iter: %d, type: %c", ci, sa->headx[ci].head.type));
    563 
    564         /* if not (WORDGRAPH or WORDINDEX) */
    565         if ((sa->headx[ci].head.type != PICODATA_ITEM_WORDGRAPH) &&
    566                 (sa->headx[ci].head.type != PICODATA_ITEM_WORDINDEX)  &&
    567                 (sa->headx[ci].head.type != PICODATA_ITEM_WORDPHON)) {
    568             continue;
    569         }
    570 
    571         PICODBG_TRACE(("iter: %d, curPOS: %d", ci, sa->headx[ci].head.info1));
    572 
    573         /* no continue so far => at [ci] we have a WORDGRAPH / WORDINDEX item */
    574         /* shift all elements one position to the left */
    575         /* shift predicted values (history) */
    576         for (i=1; i<half_nratt_posd; i++) {
    577             valbuf[i-1] = valbuf[i];
    578         }
    579         /* insert previously predicted value (now history) */
    580         valbuf[half_nratt_posd-1] = prevout;
    581         /* shift not yet predicted values */
    582         for (i=half_nratt_posd+1; i<PICOKDT_NRATT_POSD; i++) {
    583             valbuf[i-1] = valbuf[i];
    584         }
    585         /* find next POS to the right and set valbuf[PICOKDT_NRATT_POSD-1] */
    586         valbuf[PICOKDT_NRATT_POSD-1] = saPosDItemSeqGetPosRight(this, sa, lastprev3, sa->headxLen, &lastprev3);
    587 
    588         /* just to be on the safe side; the following should never happen */
    589         if (sa->headx[ci].head.info1 != valbuf[half_nratt_posd]) {
    590             PICODBG_WARN(("syncing POS"));
    591             picoos_emRaiseWarning(this->common->em, PICO_WARN_INVECTOR,
    592                                   NULL, NULL);
    593             valbuf[half_nratt_posd] = sa->headx[ci].head.info1;
    594         }
    595 
    596         curPOS = valbuf[half_nratt_posd];
    597 
    598         /* Check if POS disambiguation not needed */
    599         if (picoktab_isUniquePos(sa->tabpos, (picoos_uint8) curPOS)) {
    600             /* not needed */
    601             inval = 0;
    602             fallback = 0;
    603             if (!picokdt_dtPosDreverseMapOutFixed(sa->dtposd, curPOS,
    604                                        &prevout, &fallback)) {
    605                 if (fallback) {
    606                     prevout = fallback;
    607 
    608                 } else {
    609                     PICODBG_ERROR(("problem doing reverse output mapping"));
    610                     prevout = curPOS;
    611                 }
    612             }
    613             PICODBG_DEBUG(("keeping: %d", sa->headx[ci].head.info1));
    614             continue;
    615         }
    616 
    617         /* assuming PICOKDT_NRATT_POSD == 7 */
    618         PICODBG_DEBUG(("%d: [%d %d %d %d %d %d %d]",
    619                        ci, valbuf[0], valbuf[1], valbuf[2],
    620                        valbuf[3], valbuf[4], valbuf[5], valbuf[6]));
    621 
    622         /* no continue so far => POS disambiguation needed */
    623         /* construct input vector, which is set in dtposd */
    624         if (!picokdt_dtPosDconstructInVec(sa->dtposd, valbuf)) {
    625             /* error constructing invec */
    626             PICODBG_WARN(("problem with invec"));
    627             picoos_emRaiseWarning(this->common->em, PICO_WARN_INVECTOR,
    628                                   NULL, NULL);
    629             okay = FALSE;
    630         }
    631         /* classify */
    632         if (okay && (!picokdt_dtPosDclassify(sa->dtposd, &prevout))) {
    633             /* error doing classification */
    634             PICODBG_WARN(("problem classifying"));
    635             picoos_emRaiseWarning(this->common->em, PICO_WARN_CLASSIFICATION,
    636                                   NULL, NULL);
    637             okay = FALSE;
    638         }
    639         /* decompose */
    640         if (okay && (!picokdt_dtPosDdecomposeOutClass(sa->dtposd, &dtres))) {
    641             /* error decomposing */
    642             PICODBG_WARN(("problem decomposing"));
    643             picoos_emRaiseWarning(this->common->em, PICO_WARN_OUTVECTOR,
    644                                   NULL, NULL);
    645             okay = FALSE;
    646         }
    647         if (okay && dtres.set) {
    648             PICODBG_DEBUG(("in: %d, out: %d", valbuf[3], dtres.class));
    649         } else {
    650             PICODBG_WARN(("problem disambiguating POS"));
    651             dtres.class = PICODATA_ITEMINFO1_ERR;
    652         }
    653 
    654         if (dtres.class > 255) {
    655             PICODBG_WARN(("dt result outside valid range, setting pos to ERR"));
    656             dtres.class = PICODATA_ITEMINFO1_ERR;
    657         }
    658 
    659         sa->headx[ci].head.info1 = (picoos_uint8)dtres.class;
    660         if (sa->headx[ci].head.type == PICODATA_ITEM_WORDINDEX) {
    661             /* find pos/ind entry in cbuf matching unique,
    662                disambiguated POS, adapt current headx cind/len
    663                accordingly */
    664             PICODBG_DEBUG(("select phon based on POS disambiguation"));
    665             okay = FALSE;
    666             for (i = 0; i < sa->headx[ci].head.len; i += PICOKLEX_POSIND_SIZE) {
    667                 PICODBG_DEBUG(("comparing POS at cind + %d", i));
    668                 if (picoktab_isPartOfPosGroup(sa->tabpos,
    669                             (picoos_uint8)dtres.class,
    670                             sa->cbuf1[sa->headx[ci].cind + i])) {
    671                     PICODBG_DEBUG(("found match for entry %d",
    672                                    i/PICOKLEX_POSIND_SIZE + 1));
    673                     sa->headx[ci].cind += i;
    674                     okay = TRUE;
    675                     break;
    676                 }
    677             }
    678             /* not finding a match is possible if posd predicts a POS that
    679                is not part of any of the input POSes -> no warning */
    680 #if defined(PICO_DEBUG)
    681             if (!okay) {
    682                 PICODBG_DEBUG(("no match found, selecting 1st entry"));
    683             }
    684 #endif
    685             sa->headx[ci].head.len = PICOKLEX_POSIND_SIZE;
    686         }
    687     }
    688     return PICO_OK;
    689 }
    690 
    691 
    692 /* ***********************************************************************/
    693 /* PROCESS_WPHO functions, copy, lexindex, and g2p */
    694 /* ***********************************************************************/
    695 
    696 /* ************** copy ***************/
    697 
    698 static pico_status_t saCopyItemContent1to2(register picodata_ProcessingUnit this,
    699                                            register sa_subobj_t *sa,
    700                                            picoos_uint16 ind) {
    701     picoos_uint16 i;
    702     picoos_uint16 cind1;
    703 
    704     /* set headx.cind, and copy content, head unchanged */
    705     cind1 = sa->headx[ind].cind;
    706     sa->headx[ind].cind = sa->cbuf2Len;
    707 
    708     /* check cbufLen */
    709     if (sa->headx[ind].head.len > (sa->cbuf2BufSize - sa->cbuf2Len)) {
    710         sa->headx[ind].head.len = sa->cbuf2BufSize - sa->cbuf2Len;
    711         PICODBG_WARN(("phones skipped"));
    712         picoos_emRaiseWarning(this->common->em,
    713                               PICO_WARN_INCOMPLETE, NULL, NULL);
    714         if (sa->headx[ind].head.len == 0) {
    715             sa->headx[ind].cind = 0;
    716         }
    717     }
    718 
    719     for (i = 0; i < sa->headx[ind].head.len; i++) {
    720         sa->cbuf2[sa->cbuf2Len] = sa->cbuf1[cind1 + i];
    721         sa->cbuf2Len++;
    722     }
    723 
    724     PICODBG_DEBUG(("%c item, len: %d",
    725                    sa->headx[ind].head.type, sa->headx[ind].head.len));
    726 
    727     return PICO_OK;
    728 }
    729 
    730 
    731 /* ************** lexindex ***************/
    732 
    733 static pico_status_t saLexIndLookup(register picodata_ProcessingUnit this,
    734                                     register sa_subobj_t *sa,
    735                                     picoklex_Lex lex,
    736                                     picoos_uint16 ind) {
    737     picoos_uint8 pos;
    738     picoos_uint8 *phones;
    739     picoos_uint8 plen;
    740     picoos_uint16 i;
    741 
    742     if (picoklex_lexIndLookup(lex, &(sa->cbuf1[sa->headx[ind].cind + 1]),
    743                               PICOKLEX_IND_SIZE, &pos, &phones, &plen)) {
    744         sa->headx[ind].cind = sa->cbuf2Len;
    745 
    746         /* check cbufLen */
    747         if (plen > (sa->cbuf2BufSize - sa->cbuf2Len)) {
    748             plen = sa->cbuf2BufSize - sa->cbuf2Len;
    749             PICODBG_WARN(("phones skipped"));
    750             picoos_emRaiseWarning(this->common->em,
    751                                   PICO_WARN_INCOMPLETE, NULL, NULL);
    752             if (plen == 0) {
    753                 sa->headx[ind].cind = 0;
    754             }
    755         }
    756 
    757         /* set item head, info1, info2 unchanged */
    758         sa->headx[ind].head.type = PICODATA_ITEM_WORDPHON;
    759         sa->headx[ind].head.len = plen;
    760 
    761         for (i = 0; i < plen; i++) {
    762             sa->cbuf2[sa->cbuf2Len] = phones[i];
    763             sa->cbuf2Len++;
    764         }
    765 
    766         PICODBG_DEBUG(("%c item, pos: %d, plen: %d",
    767                        PICODATA_ITEM_WORDPHON, pos, plen));
    768 
    769     } else {
    770         PICODBG_WARN(("lexIndLookup problem"));
    771         picoos_emRaiseWarning(this->common->em, PICO_WARN_PU_IRREG_ITEM,
    772                               NULL, NULL);
    773     }
    774     return PICO_OK;
    775 }
    776 
    777 
    778 
    779 /* ************** g2p ***************/
    780 
    781 
    782 /* Name    :   saGetNvowel
    783    Function:   returns vowel info in a word or word seq
    784    Input   :   sInChar         the grapheme string to be converted in phoneme
    785                inLen           number of bytes in grapheme buffer
    786                inPos           start position of current grapheme (0..inLen-1)
    787    Output  :   nVow            number of vowels in the word
    788                nVord           vowel order in the word
    789    Returns :   TRUE: processing successful;  FALSE: errors
    790 */
    791 static picoos_uint8 saGetNrVowel(register picodata_ProcessingUnit this,
    792                                  register sa_subobj_t *sa,
    793                                  const picoos_uint8 *sInChar,
    794                                  const picoos_uint16 inLen,
    795                                  const picoos_uint8 inPos,
    796                                  picoos_uint8 *nVow,
    797                                  picoos_uint8 *nVord) {
    798     picoos_uint32 nCount;
    799     picoos_uint32 pos;
    800     picoos_uint8 cstr[PICOBASE_UTF8_MAXLEN + 1];
    801 
    802     /*defaults*/
    803     *nVow = 0;
    804     *nVord = 0;
    805     /*1:check wether the current char is a vowel*/
    806     pos = inPos;
    807     if (!picobase_get_next_utf8char(sInChar, inLen, &pos, cstr) ||
    808         !picoktab_hasVowellikeProp(sa->tabgraphs, cstr, PICOBASE_UTF8_MAXLEN)) {
    809         return FALSE;
    810     }
    811     /*2:count number of vowels in current word and find vowel order*/
    812     for (nCount = 0; nCount < inLen; ) {
    813       if (!picobase_get_next_utf8char(sInChar, inLen, &nCount, cstr)) {
    814             return FALSE;
    815       }
    816         if (picoktab_hasVowellikeProp(sa->tabgraphs, cstr,
    817                                       PICOBASE_UTF8_MAXLEN)) {
    818             (*nVow)++;
    819             if (nCount == pos) {
    820                 (*nVord) = (*nVow);
    821         }
    822         }
    823     }
    824     return TRUE;
    825 }
    826 
    827 
    828 /* do g2p for a full word, right-to-left */
    829 static picoos_uint8 saDoG2P(register picodata_ProcessingUnit this,
    830                             register sa_subobj_t *sa,
    831                             const picoos_uint8 *graph,
    832                             const picoos_uint8 graphlen,
    833                             const picoos_uint8 pos,
    834                             picoos_uint8 *phones,
    835                             const picoos_uint16 phonesmaxlen,
    836                             picoos_uint16 *plen) {
    837     picoos_uint16 outNp1Ch; /*last 3 outputs produced*/
    838     picoos_uint16 outNp2Ch;
    839     picoos_uint16 outNp3Ch;
    840     picoos_uint8 nPrimary;
    841     picoos_uint8 nCount;
    842     picoos_uint32 utfpos;
    843     picoos_uint16 nOutVal;
    844     picoos_uint8 okay;
    845     picoos_uint16 phonesind;
    846     picoos_uint8 nrvow;
    847     picoos_uint8 ordvow;
    848     picokdt_classify_vecresult_t dtresv;
    849     picoos_uint16 i;
    850 
    851     *plen = 0;
    852     okay = TRUE;
    853 
    854     /* use sa->tmpbuf[PICOSA_MAXITEMSIZE] to temporarly store the
    855        phones which are predicted in reverse order. Once all are
    856        available put them in phones in usuable order. phonesind is
    857        used to fille item in reverse order starting at the end of
    858        tmpbuf. */
    859     phonesind = PICOSA_MAXITEMSIZE - 1;
    860 
    861     /* prepare the data for loop operations */
    862     outNp1Ch = PICOKDT_HISTORY_ZERO;
    863     outNp2Ch = PICOKDT_HISTORY_ZERO;
    864     outNp3Ch = PICOKDT_HISTORY_ZERO;
    865 
    866     /* inner loop */
    867     nPrimary = 0;
    868 
    869     /* ************************************************/
    870     /* go backward grapheme by grapheme, it's utf8... */
    871     /* ************************************************/
    872 
    873     /* set start nCount to position of start of last utfchar */
    874     /* ! watch out! somethimes starting at 1, sometimes at 0,
    875        ! sometimes counting per byte, sometimes per UTF8 char */
    876     /* nCount is (start position + 1) of utf8 char */
    877     utfpos = graphlen;
    878     if (picobase_get_prev_utf8charpos(graph, 0, &utfpos)) {
    879         nCount = utfpos + 1;
    880     } else {
    881         /* should not occurr */
    882         PICODBG_ERROR(("invalid utf8 string, graphlen: %d", graphlen));
    883         return FALSE;
    884     }
    885 
    886     while (nCount > 0) {
    887         PICODBG_TRACE(("right-to-left g2p, count: %d", nCount));
    888         okay = TRUE;
    889 
    890         if (!saGetNrVowel(this, sa, graph, graphlen, nCount-1, &nrvow,
    891                           &ordvow)) {
    892             nrvow = 0;
    893             ordvow = 0;
    894         }
    895 
    896         /* prepare input vector, set inside tree object invec,
    897          * g2pBuildVector will call the constructInVec tree method */
    898         if (!picokdt_dtG2PconstructInVec(sa->dtg2p,
    899                                          graph, /*grapheme start*/
    900                                          graphlen, /*grapheme length*/
    901                                          nCount-1, /*grapheme current position*/
    902                                          pos, /*Word POS*/
    903                                          nrvow, /*nr vowels if vowel, 0 else */
    904                                          ordvow, /*ord of vowel if vowel, 0 el*/
    905                                          &nPrimary,  /*primary stress flag*/
    906                                          outNp1Ch, /*Right phoneme context +1*/
    907                                          outNp2Ch, /*Right phoneme context +2*/
    908                                          outNp3Ch)) { /*Right phon context +3*/
    909             /*Errors in preparing the input vector : skip processing*/
    910             PICODBG_WARN(("problem with invec"));
    911             picoos_emRaiseWarning(this->common->em, PICO_WARN_INVECTOR,
    912                                   NULL, NULL);
    913             okay = FALSE;
    914         }
    915 
    916         /* classify using the invec in the tree object and save the direct
    917            tree output also in the tree object */
    918         if (okay && (!picokdt_dtG2Pclassify(sa->dtg2p, &nOutVal))) {
    919             /* error doing classification */
    920             PICODBG_WARN(("problem classifying"));
    921             picoos_emRaiseWarning(this->common->em, PICO_WARN_CLASSIFICATION,
    922                                   NULL, NULL);
    923             okay = FALSE;
    924         }
    925 
    926         /* decompose the invec in the tree object and return result in dtresv */
    927         if (okay && (!picokdt_dtG2PdecomposeOutClass(sa->dtg2p, &dtresv))) {
    928             /* error decomposing */
    929             PICODBG_WARN(("problem decomposing"));
    930             picoos_emRaiseWarning(this->common->em, PICO_WARN_OUTVECTOR,
    931                                   NULL, NULL);
    932             okay = FALSE;
    933         }
    934 
    935         if (okay) {
    936             if ((dtresv.nr == 0) || (dtresv.classvec[0] == PICOKDT_EPSILON)) {
    937                 /* no phones to be added */
    938                 PICODBG_TRACE(("epsilon, no phone added %c", graph[nCount-1]));
    939                 ;
    940             } else {
    941                 /* add decomposed output to tmpbuf, reverse order */
    942                 for (i = dtresv.nr; ((((PICOSA_MAXITEMSIZE - 1) -
    943                                        phonesind)<phonesmaxlen) &&
    944                                      (i > 0)); ) {
    945                     i--;
    946                     PICODBG_TRACE(("%c %d",graph[nCount-1],dtresv.classvec[i]));
    947                     if (dtresv.classvec[i] > 255) {
    948                         PICODBG_WARN(("dt result outside valid range, "
    949                                       "skipping phone"));
    950                         continue;
    951                     }
    952                     sa->tmpbuf[phonesind--] = (picoos_uint8)dtresv.classvec[i];
    953                     if (!nPrimary) {
    954                         if (picoktab_isPrimstress(sa->tabphones,
    955                           (picoos_uint8)dtresv.classvec[i])) {
    956                             nPrimary = 1;
    957             }
    958                     }
    959                     (*plen)++;
    960                 }
    961                 if (i > 0) {
    962                     PICODBG_WARN(("phones skipped"));
    963                     picoos_emRaiseWarning(this->common->em,
    964                                           PICO_WARN_INCOMPLETE, NULL, NULL);
    965                 }
    966             }
    967         }
    968 
    969         /*shift tree output history and update*/
    970         outNp3Ch = outNp2Ch;
    971         outNp2Ch = outNp1Ch;
    972         outNp1Ch = nOutVal;
    973 
    974         /* go backward one utf8 char */
    975         /* nCount is in +1 domain */
    976         if (nCount <= 1) {
    977             /* end of str */
    978             nCount = 0;
    979         } else {
    980             utfpos = nCount - 1;
    981             if (!picobase_get_prev_utf8charpos(graph, 0, &utfpos)) {
    982                 /* should not occur */
    983                 PICODBG_ERROR(("invalid utf8 string, utfpos: %d", utfpos));
    984                 return FALSE;
    985             } else {
    986                 nCount = utfpos + 1;
    987             }
    988         }
    989     }
    990 
    991     /* a must be: (PICOSA_MAXITEMSIZE-1) - phonesind == *plen */
    992     /* now that we have all phone IDs, copy in correct order to phones */
    993     /* phonesind point to next free slot in the reverse domainn,
    994        ie. inc first */
    995     phonesind++;
    996     for (i = 0; i < *plen; i++, phonesind++) {
    997         phones[i] = sa->tmpbuf[phonesind];
    998     }
    999     return TRUE;
   1000 }
   1001 
   1002 
   1003 /* item in headx[ind]/cbuf1, out: modified headx and cbuf2 */
   1004 
   1005 static pico_status_t saGraphemeToPhoneme(register picodata_ProcessingUnit this,
   1006                                          register sa_subobj_t *sa,
   1007                                          picoos_uint16 ind) {
   1008     picoos_uint16 plen;
   1009 
   1010     PICODBG_TRACE(("starting g2p"));
   1011 
   1012     if (saDoG2P(this, sa, &(sa->cbuf1[sa->headx[ind].cind]),
   1013                 sa->headx[ind].head.len, sa->headx[ind].head.info1,
   1014                 &(sa->cbuf2[sa->cbuf2Len]), (sa->cbuf2BufSize - sa->cbuf2Len),
   1015                 &plen)) {
   1016 
   1017         /* check of cbuf2Len done in saDoG2P, phones skipped if needed */
   1018         if (plen > 255) {
   1019             PICODBG_WARN(("maximum number of phones exceeded (%d), skipping",
   1020                           plen));
   1021             plen = 255;
   1022         }
   1023 
   1024         /* set item head, info1, info2 unchanged */
   1025         sa->headx[ind].head.type = PICODATA_ITEM_WORDPHON;
   1026         sa->headx[ind].head.len = (picoos_uint8)plen;
   1027         sa->headx[ind].cind = sa->cbuf2Len;
   1028         sa->cbuf2Len += plen;
   1029         PICODBG_DEBUG(("%c item, plen: %d",
   1030                        PICODATA_ITEM_WORDPHON, plen));
   1031     } else {
   1032         PICODBG_WARN(("problem doing g2p"));
   1033         picoos_emRaiseWarning(this->common->em, PICO_WARN_PU_IRREG_ITEM,
   1034                               NULL, NULL);
   1035     }
   1036     return PICO_OK;
   1037 }
   1038 
   1039 
   1040 /* ***********************************************************************/
   1041 /*                          extract phonemes of an item into a phonBuf   */
   1042 /* ***********************************************************************/
   1043 
   1044 static pico_status_t saAddPhoneme(register sa_subobj_t *sa, picoos_uint16 pos, picoos_uint16 sym) {
   1045     /* picoos_uint8 plane, unshifted; */
   1046 
   1047     /* just for debuging */
   1048     /*
   1049     unshifted = picotrns_unplane(sym,&plane);
   1050     PICODBG_DEBUG(("adding %i/%i (%c on plane %i) at phonBuf[%i]",pos,sym,unshifted,plane,sa->phonWritePos));
   1051     */
   1052     if (PICOTRNS_MAX_NUM_POSSYM <= sa->phonWritePos) {
   1053         /* not an error! */
   1054         PICODBG_DEBUG(("couldn't add because phon buffer full"));
   1055         return PICO_EXC_BUF_OVERFLOW;
   1056     } else {
   1057         sa->phonBuf[sa->phonWritePos].pos = pos;
   1058         sa->phonBuf[sa->phonWritePos].sym = sym;
   1059         sa->phonWritePos++;
   1060         return PICO_OK;
   1061     }
   1062 }
   1063 
   1064 /*
   1065 static pico_status_t saAddStartPhoneme(register sa_subobj_t *sa) {
   1066     return saAddPhoneme(sa, PICOTRNS_POS_IGNORE,
   1067             (PICOKFST_PLANE_INTERN << 8) + sa->fixedIds->phonStartId);
   1068 }
   1069 
   1070 
   1071 static pico_status_t saAddTermPhoneme(register sa_subobj_t *sa) {
   1072     return saAddPhoneme(sa, PICOTRNS_POS_IGNORE,
   1073             (PICOKFST_PLANE_INTERN << 8) + sa->fixedIds->phonTermId);
   1074 }
   1075 
   1076 */
   1077 
   1078 static pico_status_t saExtractPhonemes(register picodata_ProcessingUnit this,
   1079         register sa_subobj_t *sa, picoos_uint16 pos,
   1080         picodata_itemhead_t* head, const picoos_uint8* content)
   1081 {
   1082     pico_status_t rv= PICO_OK;
   1083     picoos_uint8 i;
   1084     picoos_int16 fstSymbol;
   1085 #if defined(PICO_DEBUG)
   1086     picoos_char msgstr[SA_MSGSTR_SIZE];
   1087 #endif
   1088 
   1089     PICODBG_TRACE(("doing item %s",
   1090                     picodata_head_to_string(head,msgstr,SA_MSGSTR_SIZE)));
   1091     /*
   1092      Items  considered in a transduction are WORDPHON item. its starting offset within the inBuf is given as
   1093      'pos'.
   1094      Elements that go into the transduction receive "their" position in the buffer.
   1095      */
   1096     sa->phonWritePos = 0;
   1097     /* WORDPHON(POS,WACC)phon */
   1098     rv = saAddPhoneme(sa, PICOTRNS_POS_IGNORE,
   1099                 (PICOKFST_PLANE_INTERN << 8) + sa->fixedIds->phonStartId);
   1100     for (i = 0; i < head->len; i++) {
   1101         fstSymbol = /* (PICOKFST_PLANE_PHONEMES << 8) + */content[i];
   1102         /*  */
   1103         PICODBG_TRACE(("adding phoneme %c",fstSymbol));
   1104         rv = saAddPhoneme(sa, pos+PICODATA_ITEM_HEADSIZE+i, fstSymbol);
   1105     }
   1106     rv = saAddPhoneme(sa, PICOTRNS_POS_IGNORE,
   1107                 (PICOKFST_PLANE_INTERN << 8) + sa->fixedIds->phonTermId);
   1108     sa->nextReadPos = pos + PICODATA_ITEM_HEADSIZE +  head->len;
   1109     return rv;
   1110 }
   1111 
   1112 
   1113 #define SA_POSSYM_OK           0
   1114 #define SA_POSSYM_OUT_OF_RANGE 1
   1115 #define SA_POSSYM_END          2
   1116 #define SA_POSSYM_INVALID     -3
   1117 /* *readPos is the next position in phonBuf to be read, and *writePos is the first position not to be read (may be outside
   1118  * buf).
   1119  * 'rangeEnd' is the first possym position outside the desired range.
   1120  * Possible return values:
   1121  * SA_POSSYM_OK            : 'pos' and 'sym' are set to the read possym, *readPos is advanced
   1122  * SA_POSSYM_OUT_OF_RANGE  : pos is out of range. 'pos' is set to that of the read possym, 'sym' is undefined
   1123  * SA_POSSYM_UNDERFLOW     : no more data in buf. 'pos' is set to PICOTRNS_POS_INVALID,    'sym' is undefined
   1124  * SA_POSSYM_INVALID       : "strange" pos.       'pos' is set to PICOTRNS_POS_INVALID,    'sym' is undefined
   1125  */
   1126 static pico_status_t getNextPosSym(sa_subobj_t * sa, picoos_int16 * pos, picoos_int16 * sym,
   1127         picoos_int16 rangeEnd) {
   1128     /* skip POS_IGNORE */
   1129     while ((sa->phonReadPos < sa->phonWritePos) && (PICOTRNS_POS_IGNORE == sa->phonBuf[sa->phonReadPos].pos))  {
   1130         PICODBG_DEBUG(("ignoring phone at sa->phonBuf[%i] because it has pos==IGNORE",sa->phonReadPos));
   1131         sa->phonReadPos++;
   1132     }
   1133     if ((sa->phonReadPos < sa->phonWritePos)) {
   1134         *pos = sa->phonBuf[sa->phonReadPos].pos;
   1135         if ((PICOTRNS_POS_INSERT == *pos) || ((0 <= *pos) && (*pos < rangeEnd))) {
   1136             *sym = sa->phonBuf[sa->phonReadPos++].sym;
   1137             return SA_POSSYM_OK;
   1138         } else if (*pos < 0){ /* *pos is "strange" (e.g. POS_INVALID) */
   1139             return SA_POSSYM_INVALID;
   1140         } else {
   1141             return SA_POSSYM_OUT_OF_RANGE;
   1142         }
   1143     } else {
   1144         /* no more possyms to read */
   1145         *pos = PICOTRNS_POS_INVALID;
   1146         return SA_POSSYM_END;
   1147     }
   1148 }
   1149 
   1150 
   1151 
   1152 
   1153 /* ***********************************************************************/
   1154 /*                          saStep function                              */
   1155 /* ***********************************************************************/
   1156 
   1157 /*
   1158 complete phrase processed in one step, if not fast enough -> rework
   1159 
   1160 init, collect into internal buffer, process, and then feed to
   1161 output buffer
   1162 
   1163 init state: INIT ext           ext
   1164 state trans:     in hc1  hc2   out
   1165 
   1166 INIT | putItem   =  0    0    +1      | BUSY  -> COLL (put B-SBEG item,
   1167                                                    set do-init to false)
   1168 
   1169                                     inspace-ok-hc1
   1170                                   needs-more-items-(phrase-or-flush)
   1171 COLL1 |getItems -n +n             0 1 | ATOMIC -> PPOSD     (got items,
   1172                                                       if flush set do-init)
   1173 COLL2 |getItems -n +n             1 0 | ATOMIC -> PPOSD (got items, forced)
   1174 COLL3 |getItems -n +n             1 1 | IDLE          (got items, need more)
   1175 COLL4 |getItems  =  =             1 1 | IDLE             (got no items)
   1176 
   1177 PPOSD | posd     = ~n~n               | BUSY     -> PWP     (posd done)
   1178 PWP   | lex/g2p  = ~n-n  0+n          | BUSY     -> PPHR    (lex/g2p done)
   1179 PPHR  | phr      = -n 0 +m=n          | BUSY     -> PACC    (phr done, m>=n)
   1180 PACC  | acc      =  0 0 ~m=n          | BUSY     -> FEED    (acc done)
   1181 
   1182                                   doinit-flag
   1183 FEED | putItems  0  0 0 -m-n  +m  0   | BUSY -> COLL    (put items)
   1184 FEED | putItems  0  0 0 -m-n  +m  1   | BUSY -> INIT    (put items)
   1185 FEED | putItems  0  0 0 -d-d  +d      | OUT_FULL        (put some items)
   1186 */
   1187 
   1188 static picodata_step_result_t saStep(register picodata_ProcessingUnit this,
   1189                                      picoos_int16 mode,
   1190                                      picoos_uint16 *numBytesOutput) {
   1191     register sa_subobj_t *sa;
   1192     pico_status_t rv = PICO_OK;
   1193     pico_status_t rvP = PICO_OK;
   1194     picoos_uint16 blen = 0;
   1195     picoos_uint16 clen = 0;
   1196     picoos_uint16 i;
   1197     picoklex_Lex lex;
   1198 
   1199 
   1200     if (NULL == this || NULL == this->subObj) {
   1201         return PICODATA_PU_ERROR;
   1202     }
   1203     sa = (sa_subobj_t *) this->subObj;
   1204     mode = mode;        /* avoid warning "var not used in this function"*/
   1205     *numBytesOutput = 0;
   1206     while (1) { /* exit via return */
   1207         PICODBG_DEBUG(("doing state %i, hLen|c1Len|c2Len: %d|%d|%d",
   1208                        sa->procState, sa->headxLen, sa->cbuf1Len,
   1209                        sa->cbuf2Len));
   1210 
   1211         switch (sa->procState) {
   1212 
   1213             /* *********************************************************/
   1214             /* collect state: get item(s) from charBuf and store in
   1215              * internal buffers, need a complete punctuation-phrase
   1216              */
   1217             case SA_STEPSTATE_COLLECT:
   1218 
   1219                 while (sa->inspaceok && sa->needsmoreitems
   1220                        && (PICO_OK ==
   1221                            (rv = picodata_cbGetItem(this->cbIn, sa->tmpbuf,
   1222                                             PICOSA_MAXITEMSIZE, &blen)))) {
   1223                     rvP = picodata_get_itemparts(sa->tmpbuf,
   1224                                             PICOSA_MAXITEMSIZE,
   1225                                             &(sa->headx[sa->headxLen].head),
   1226                                             &(sa->cbuf1[sa->cbuf1Len]),
   1227                                             sa->cbuf1BufSize-sa->cbuf1Len,
   1228                                             &clen);
   1229                     if (rvP != PICO_OK) {
   1230                         PICODBG_ERROR(("problem getting item parts"));
   1231                         picoos_emRaiseException(this->common->em, rvP,
   1232                                                 NULL, NULL);
   1233                         return PICODATA_PU_ERROR;
   1234                     }
   1235 
   1236                     /* if CMD(...FLUSH...) -> PUNC(...FLUSH...),
   1237                        construct PUNC-FLUSH item in headx */
   1238                     if ((sa->headx[sa->headxLen].head.type ==
   1239                          PICODATA_ITEM_CMD) &&
   1240                         (sa->headx[sa->headxLen].head.info1 ==
   1241                          PICODATA_ITEMINFO1_CMD_FLUSH)) {
   1242                         sa->headx[sa->headxLen].head.type =
   1243                             PICODATA_ITEM_PUNC;
   1244                         sa->headx[sa->headxLen].head.info1 =
   1245                             PICODATA_ITEMINFO1_PUNC_FLUSH;
   1246                         sa->headx[sa->headxLen].head.info2 =
   1247                             PICODATA_ITEMINFO2_PUNC_SENT_T;
   1248                         sa->headx[sa->headxLen].head.len = 0;
   1249                     }
   1250 
   1251                     /* convert opening phoneme command to WORDPHON
   1252                      * and assign user-POS XX to it (Bug 432) */
   1253                     sa->headx[sa->headxLen].cind = sa->cbuf1Len;
   1254                     /* maybe overwritten later */
   1255                     if ((sa->headx[sa->headxLen].head.type ==
   1256                         PICODATA_ITEM_CMD) &&
   1257                        (sa->headx[sa->headxLen].head.info1 ==
   1258                         PICODATA_ITEMINFO1_CMD_PHONEME)&&
   1259                         (sa->headx[sa->headxLen].head.info2 ==
   1260                          PICODATA_ITEMINFO2_CMD_START)) {
   1261                         picoos_uint8 i;
   1262                         picoos_uint8 wordsep = picoktab_getWordboundID(sa->tabphones);
   1263                         PICODBG_INFO(("wordsep id is %i",wordsep));
   1264                         sa->headx[sa->headxLen].head.type = PICODATA_ITEM_WORDPHON;
   1265                         sa->headx[sa->headxLen].head.info1 = PICODATA_POS_XX;
   1266                         sa->headx[sa->headxLen].head.info2 = PICODATA_ITEMINFO2_NA;
   1267                         /* cut off additional words */
   1268                         i = 0;
   1269                         while ((i < sa->headx[sa->headxLen].head.len) && (wordsep != sa->cbuf1[sa->headx[sa->headxLen].cind+i])) {
   1270                             PICODBG_INFO(("accepting phoneme %i",sa->cbuf1[sa->headx[sa->headxLen].cind+i]));
   1271 
   1272                             i++;
   1273                         }
   1274                         if (i < sa->headx[sa->headxLen].head.len) {
   1275                             PICODBG_INFO(("cutting off superfluous phonetic words at %i",i));
   1276                             sa->headx[sa->headxLen].head.len = i;
   1277                         }
   1278                     }
   1279 
   1280                     /* check/set needsmoreitems */
   1281                     if (sa->headx[sa->headxLen].head.type ==
   1282                         PICODATA_ITEM_PUNC) {
   1283                         sa->needsmoreitems = FALSE;
   1284                     }
   1285 
   1286                     /* check/set inspaceok, keep spare slot for forcing */
   1287                     if ((sa->headxLen >= (PICOSA_MAXNR_HEADX - 2)) ||
   1288                         ((sa->cbuf1BufSize - sa->cbuf1Len) <
   1289                          PICOSA_MAXITEMSIZE)) {
   1290                         sa->inspaceok = FALSE;
   1291                     }
   1292 
   1293                     if (clen > 0) {
   1294                         sa->headx[sa->headxLen].cind = sa->cbuf1Len;
   1295                         sa->cbuf1Len += clen;
   1296                     } else {
   1297                         sa->headx[sa->headxLen].cind = 0;
   1298                     }
   1299                     sa->headxLen++;
   1300                 }
   1301 
   1302                 if (!sa->needsmoreitems) {
   1303                     /* 1, phrase buffered */
   1304                     sa->procState = SA_STEPSTATE_PROCESS_POSD;
   1305                     return PICODATA_PU_ATOMIC;
   1306                 } else if (!sa->inspaceok) {
   1307                     /* 2, forced phrase end */
   1308                     /* at least one slot is still free, use it to
   1309                        force a trailing PUNC item */
   1310                     sa->headx[sa->headxLen].head.type = PICODATA_ITEM_PUNC;
   1311                     sa->headx[sa->headxLen].head.info1 =
   1312                         PICODATA_ITEMINFO1_PUNC_PHRASEEND;
   1313                     sa->headx[sa->headxLen].head.info2 =
   1314                         PICODATA_ITEMINFO2_PUNC_PHRASE_FORCED;
   1315                     sa->headx[sa->headxLen].head.len = 0;
   1316                     sa->needsmoreitems = FALSE; /* not really needed for now */
   1317                     sa->headxLen++;
   1318                     PICODBG_WARN(("forcing phrase end, added PUNC_PHRASEEND"));
   1319                     picoos_emRaiseWarning(this->common->em,
   1320                                           PICO_WARN_FALLBACK, NULL,
   1321                                           (picoos_char *)"forced phrase end");
   1322                     sa->procState = SA_STEPSTATE_PROCESS_POSD;
   1323                     return PICODATA_PU_ATOMIC;
   1324                 } else if (rv == PICO_EOF) {
   1325                     /* 3, 4 */
   1326                     return PICODATA_PU_IDLE;
   1327                 } else if ((rv == PICO_EXC_BUF_UNDERFLOW) ||
   1328                            (rv == PICO_EXC_BUF_OVERFLOW)) {
   1329                     /* error, no valid item in cb (UNDER) */
   1330                     /*        or tmpbuf not large enough, not possible (OVER) */
   1331                     /* no exception raised, left for ctrl to handle */
   1332                     PICODBG_ERROR(("buffer under/overflow, rv: %d", rv));
   1333                     return PICODATA_PU_ERROR;
   1334                 } else {
   1335                     /* error, only possible if cbGetItem implementation
   1336                        changes without this function being adapted*/
   1337                     PICODBG_ERROR(("untreated return value, rv: %d", rv));
   1338                     return PICODATA_PU_ERROR;
   1339                 }
   1340                 break;
   1341 
   1342 
   1343             /* *********************************************************/
   1344             /* process posd state: process items in headx/cbuf1
   1345              * and change in place
   1346              */
   1347             case SA_STEPSTATE_PROCESS_POSD:
   1348                 /* ensure there is an item in inBuf */
   1349                 if (sa->headxLen > 0) {
   1350                     /* we have a phrase in headx, cbuf1 (can be
   1351                        single PUNC item without POS), do pos disamb */
   1352                     if (PICO_OK != saDisambPos(this, sa)) {
   1353                         picoos_emRaiseException(this->common->em,
   1354                                                 PICO_ERR_OTHER, NULL, NULL);
   1355                         return PICODATA_PU_ERROR;
   1356                     }
   1357                     sa->procState = SA_STEPSTATE_PROCESS_WPHO;
   1358 
   1359                 } else if (sa->headxLen == 0) {    /* no items in inBuf */
   1360                     PICODBG_WARN(("no items in inBuf"));
   1361                     sa->procState = SA_STEPSTATE_COLLECT;
   1362                     return PICODATA_PU_BUSY;
   1363                 }
   1364 
   1365 #if defined (PICO_DEBUG)
   1366                 if (1) {
   1367                     picoos_uint8 i, j, ittype;
   1368                     for (i = 0; i < sa->headxLen; i++) {
   1369                         ittype = sa->headx[i].head.type;
   1370                         PICODBG_INFO_CTX();
   1371                         PICODBG_INFO_MSG(("sa-d: ("));
   1372                         PICODBG_INFO_MSG(("'%c',", ittype));
   1373                         if ((32 <= sa->headx[i].head.info1) &&
   1374                             (sa->headx[i].head.info1 < 127) &&
   1375                             (ittype != PICODATA_ITEM_WORDGRAPH) &&
   1376                             (ittype != PICODATA_ITEM_WORDINDEX)) {
   1377                             PICODBG_INFO_MSG(("'%c',",sa->headx[i].head.info1));
   1378                         } else {
   1379                             PICODBG_INFO_MSG(("%3d,", sa->headx[i].head.info1));
   1380                         }
   1381                         if ((32 <= sa->headx[i].head.info2) &&
   1382                             (sa->headx[i].head.info2 < 127)) {
   1383                             PICODBG_INFO_MSG(("'%c',",sa->headx[i].head.info2));
   1384                         } else {
   1385                             PICODBG_INFO_MSG(("%3d,", sa->headx[i].head.info2));
   1386                         }
   1387                         PICODBG_INFO_MSG(("%3d)", sa->headx[i].head.len));
   1388 
   1389                         for (j = 0; j < sa->headx[i].head.len; j++) {
   1390                             if ((ittype == PICODATA_ITEM_WORDGRAPH) ||
   1391                                 (ittype == PICODATA_ITEM_CMD)) {
   1392                                 PICODBG_INFO_MSG(("%c",
   1393                                         sa->cbuf1[sa->headx[i].cind+j]));
   1394                             } else {
   1395                                 PICODBG_INFO_MSG(("%4d",
   1396                                         sa->cbuf1[sa->headx[i].cind+j]));
   1397                             }
   1398                         }
   1399                         PICODBG_INFO_MSG(("\n"));
   1400                     }
   1401                 }
   1402 #endif
   1403 
   1404                 break;
   1405 
   1406 
   1407             /* *********************************************************/
   1408             /* process wpho state: process items in headx/cbuf1 and modify
   1409              * headx in place and fill cbuf2
   1410              */
   1411             case SA_STEPSTATE_PROCESS_WPHO:
   1412                 /* ensure there is an item in inBuf */
   1413                 if (sa->headxLen > 0) {
   1414                     /* we have a phrase in headx, cbuf1 (can be single
   1415                        PUNC item), do lex lookup, g2p, or copy */
   1416 
   1417                     /* check if cbuf2 is empty as it should be */
   1418                     if (sa->cbuf2Len > 0) {
   1419                         /* enforce emptyness */
   1420                         PICODBG_WARN(("forcing empty cbuf2, discarding buf"));
   1421                         picoos_emRaiseWarning(this->common->em,
   1422                                               PICO_WARN_PU_DISCARD_BUF,
   1423                                               NULL, NULL);
   1424                     }
   1425 
   1426                     /* cbuf2 overflow avoided in saGrapheme*, saLexInd*,
   1427                        saCopyItem*, phones skipped if needed */
   1428                     for (i = 0; i < sa->headxLen; i++) {
   1429                         switch (sa->headx[i].head.type) {
   1430                             case PICODATA_ITEM_WORDGRAPH:
   1431                                 if (PICO_OK != saGraphemeToPhoneme(this, sa,
   1432                                                                    i)) {
   1433                                     /* not possible, phones skipped if needed */
   1434                                     picoos_emRaiseException(this->common->em,
   1435                                                             PICO_ERR_OTHER,
   1436                                                             NULL, NULL);
   1437                                     return PICODATA_PU_ERROR;
   1438                                 }
   1439                                 break;
   1440                             case PICODATA_ITEM_WORDINDEX:
   1441                                 if (0 == sa->headx[i].head.info2) {
   1442                                   lex = sa->lex;
   1443                                 } else {
   1444                                     lex = sa->ulex[sa->headx[i].head.info2-1];
   1445                                 }
   1446                                 if (PICO_OK != saLexIndLookup(this, sa, lex, i)) {
   1447                                     /* not possible, phones skipped if needed */
   1448                                     picoos_emRaiseException(this->common->em,
   1449                                                             PICO_ERR_OTHER,
   1450                                                             NULL, NULL);
   1451                                     return PICODATA_PU_ERROR;
   1452                                 }
   1453                                 break;
   1454                             default:
   1455                                 /* copy item unmodified, ie. headx untouched,
   1456                                    content from cbuf1 to cbuf2 */
   1457                                 if (PICO_OK != saCopyItemContent1to2(this, sa,
   1458                                                                      i)) {
   1459                                     /* not possible, phones skipped if needed */
   1460                                     picoos_emRaiseException(this->common->em,
   1461                                                             PICO_ERR_OTHER,
   1462                                                             NULL, NULL);
   1463                                     return PICODATA_PU_ERROR;
   1464                                 }
   1465                                 break;
   1466                         }
   1467                     }
   1468                     /* set cbuf1 to empty */
   1469                     sa->cbuf1Len = 0;
   1470                     sa->procState = SA_STEPSTATE_PROCESS_TRNS_PARSE;
   1471 
   1472                 } else if (sa->headxLen == 0) {    /* no items in inBuf */
   1473                     PICODBG_WARN(("no items in inBuf"));
   1474                     sa->procState = SA_STEPSTATE_COLLECT;
   1475                     return PICODATA_PU_BUSY;
   1476                 }
   1477 
   1478 #if defined (PICO_DEBUG)
   1479                 if (1) {
   1480                     picoos_uint8 i, j, ittype;
   1481                     for (i = 0; i < sa->headxLen; i++) {
   1482                         ittype = sa->headx[i].head.type;
   1483                         PICODBG_INFO_CTX();
   1484                         PICODBG_INFO_MSG(("sa-g: ("));
   1485                         PICODBG_INFO_MSG(("'%c',", ittype));
   1486                         if ((32 <= sa->headx[i].head.info1) &&
   1487                             (sa->headx[i].head.info1 < 127) &&
   1488                             (ittype != PICODATA_ITEM_WORDPHON)) {
   1489                             PICODBG_INFO_MSG(("'%c',",sa->headx[i].head.info1));
   1490                         } else {
   1491                             PICODBG_INFO_MSG(("%3d,", sa->headx[i].head.info1));
   1492                         }
   1493                         if ((32 <= sa->headx[i].head.info2) &&
   1494                             (sa->headx[i].head.info2 < 127)) {
   1495                             PICODBG_INFO_MSG(("'%c',",sa->headx[i].head.info2));
   1496                         } else {
   1497                             PICODBG_INFO_MSG(("%3d,", sa->headx[i].head.info2));
   1498                         }
   1499                         PICODBG_INFO_MSG(("%3d)", sa->headx[i].head.len));
   1500 
   1501                         for (j = 0; j < sa->headx[i].head.len; j++) {
   1502                             if ((ittype == PICODATA_ITEM_CMD)) {
   1503                                 PICODBG_INFO_MSG(("%c",
   1504                                         sa->cbuf2[sa->headx[i].cind+j]));
   1505                             } else {
   1506                                 PICODBG_INFO_MSG(("%4d",
   1507                                         sa->cbuf2[sa->headx[i].cind+j]));
   1508                             }
   1509                         }
   1510                         PICODBG_INFO_MSG(("\n"));
   1511                     }
   1512                 }
   1513 #endif
   1514 
   1515                 break;
   1516 
   1517 
   1518                 /* *********************************************************/
   1519                 /* transduction parse state: extract phonemes of item in internal outBuf */
   1520            case SA_STEPSTATE_PROCESS_TRNS_PARSE:
   1521 
   1522                 PICODBG_DEBUG(("transduce item (bot, remain): (%d, %d)",
   1523                                 sa->headxBottom, sa->headxLen));
   1524 
   1525                 /* check for termination condition first */
   1526                 if (0 == sa->headxLen) {
   1527                     /* reset headx, cbuf2 */
   1528                     sa->headxBottom = 0;
   1529                     sa->cbuf2Len = 0;
   1530                     /* reset collect state support variables */
   1531                     sa->inspaceok = TRUE;
   1532                     sa->needsmoreitems = TRUE;
   1533 
   1534                     sa->procState = SA_STEPSTATE_COLLECT;
   1535                     return PICODATA_PU_BUSY;
   1536                 }
   1537 
   1538                 sa->procState = SA_STEPSTATE_FEED;
   1539                 /* copy item unmodified */
   1540                 rv = picodata_put_itemparts(
   1541                         &(sa->headx[sa->headxBottom].head),
   1542                         &(sa->cbuf2[sa->headx[sa->headxBottom].cind]),
   1543                         sa->headx[sa->headxBottom].head.len, sa->tmpbuf,
   1544                         PICOSA_MAXITEMSIZE, &blen);
   1545 
   1546                 if (PICODATA_ITEM_WORDPHON == sa->headx[sa->headxBottom].head.type) {
   1547                    PICODBG_DEBUG(("PARSE found WORDPHON"));
   1548                    rv = saExtractPhonemes(this, sa, 0, &(sa->headx[sa->headxBottom].head),
   1549                            &(sa->cbuf2[sa->headx[sa->headxBottom].cind]));
   1550                    if (PICO_OK == rv) {
   1551                        PICODBG_DEBUG(("PARSE successfully returned from phoneme extraction"));
   1552                        sa->procState = SA_STEPSTATE_PROCESS_TRNS_FST;
   1553                    } else {
   1554                        PICODBG_WARN(("PARSE phone extraction returned exception %i, output WORDPHON untransduced",rv));
   1555                    }
   1556                } else {
   1557                    PICODBG_DEBUG(("PARSE found other item, just copying"));
   1558                }
   1559                if (SA_STEPSTATE_FEED == sa->procState) {
   1560                     PICODATA_INFO_ITEM(this->voice->kbArray[PICOKNOW_KBID_DBG],
   1561                             (picoos_uint8 *)"sa-p: ",
   1562                             sa->tmpbuf, PICOSA_MAXITEMSIZE);
   1563 
   1564                 }
   1565 
   1566                 /* consume item */
   1567                 sa->headxBottom++;
   1568                 sa->headxLen--;
   1569 
   1570                 break;
   1571 
   1572                 /* *********************************************************/
   1573                 /* transduce state: copy item in internal outBuf to tmpBuf and transduce */
   1574            case SA_STEPSTATE_PROCESS_TRNS_FST:
   1575 
   1576 
   1577 
   1578 
   1579 
   1580                /* if no word-level FSTs: doing trivial syllabification instead */
   1581                if (0 == sa->numFsts) {
   1582                    PICODBG_DEBUG(("doing trivial sylabification with %i phones", sa->phonWritePos));
   1583 #if defined(PICO_DEBUG)
   1584                    {
   1585                        PICODBG_INFO_CTX();
   1586                        PICODBG_INFO_MSG(("sa trying to trivially syllabify: "));
   1587                        PICOTRNS_PRINTSYMSEQ(this->voice->kbArray[PICOKNOW_KBID_DBG], sa->phonBuf, sa->phonWritePos);
   1588                        PICODBG_INFO_MSG(("\n"));
   1589                    }
   1590 #endif
   1591 
   1592                    picotrns_trivial_syllabify(sa->tabphones, sa->phonBuf,
   1593                            sa->phonWritePos, sa->phonBufOut,
   1594                            &sa->phonWritePos,PICOTRNS_MAX_NUM_POSSYM);
   1595                    PICODBG_DEBUG(("returned from trivial sylabification with %i phones", sa->phonWritePos));
   1596 #if defined(PICO_DEBUG)
   1597                    {
   1598                        PICODBG_INFO_CTX();
   1599                        PICODBG_INFO_MSG(("sa returned from syllabification: "));
   1600                        PICOTRNS_PRINTSYMSEQ(this->voice->kbArray[PICOKNOW_KBID_DBG], sa->phonBufOut, sa->phonWritePos);
   1601                        PICODBG_INFO_MSG(("\n"));
   1602                    }
   1603 #endif
   1604 
   1605                    /* eliminate deep epsilons */
   1606                    PICODBG_DEBUG(("doing epsilon elimination with %i phones", sa->phonWritePos));
   1607                    picotrns_eliminate_epsilons(sa->phonBufOut,
   1608                            sa->phonWritePos, sa->phonBuf,
   1609                            &sa->phonWritePos,PICOTRNS_MAX_NUM_POSSYM);
   1610                    PICODBG_DEBUG(("returning from epsilon elimination with %i phones", sa->phonWritePos));
   1611                    sa->phonReadPos = 0;
   1612                    sa->phonesTransduced = 1;
   1613                    sa->procState = SA_STEPSTATE_FEED;
   1614                    break;
   1615                }
   1616 
   1617                /* there are word-level FSTs */
   1618                /* termination condition first */
   1619                if (sa->curFst >= sa->numFsts) {
   1620                    /* reset for next transduction */
   1621                    sa->curFst = 0;
   1622                    sa->phonReadPos = 0;
   1623                    sa->phonesTransduced = 1;
   1624                    sa->procState = SA_STEPSTATE_FEED;
   1625                    break;
   1626                }
   1627 
   1628                /* transduce from phonBufIn to PhonBufOut */
   1629                {
   1630 
   1631                    picoos_uint32 nrSteps;
   1632 #if defined(PICO_DEBUG)
   1633                    {
   1634                        PICODBG_INFO_CTX();
   1635                        PICODBG_INFO_MSG(("sa trying to transduce: "));
   1636                        PICOTRNS_PRINTSYMSEQ(this->voice->kbArray[PICOKNOW_KBID_DBG], sa->phonBuf, sa->phonWritePos);
   1637                        PICODBG_INFO_MSG(("\n"));
   1638                    }
   1639 #endif
   1640                    picotrns_transduce(sa->fst[sa->curFst], FALSE,
   1641                            picotrns_printSolution, sa->phonBuf, sa->phonWritePos, sa->phonBufOut,
   1642                            &sa->phonWritePos,
   1643                            PICOTRNS_MAX_NUM_POSSYM, sa->altDescBuf,
   1644                            sa->maxAltDescLen, &nrSteps);
   1645 #if defined(PICO_DEBUG)
   1646                    {
   1647                        PICODBG_INFO_CTX();
   1648                        PICODBG_INFO_MSG(("sa returned from transduction: "));
   1649                        PICOTRNS_PRINTSYMSEQ(this->voice->kbArray[PICOKNOW_KBID_DBG], sa->phonBufOut, sa->phonWritePos);
   1650                        PICODBG_INFO_MSG(("\n"));
   1651                    }
   1652 #endif
   1653                }
   1654 
   1655 
   1656 
   1657                /*
   1658                 The trasduction output will contain equivalent items i.e. (x,y')  for each (x,y) plus inserted deep symbols (-1,d).
   1659                 In case of deletions, (x,0) might also be omitted...
   1660                 */
   1661                /* eliminate deep epsilons */
   1662                picotrns_eliminate_epsilons(sa->phonBufOut,
   1663                        sa->phonWritePos, sa->phonBuf, &sa->phonWritePos,PICOTRNS_MAX_NUM_POSSYM);
   1664                sa->phonesTransduced = 1;
   1665 
   1666                sa->curFst++;
   1667 
   1668                return PICODATA_PU_ATOMIC;
   1669                /* break; */
   1670 
   1671                 /* *********************************************************/
   1672                 /* feed state: copy item in internal outBuf to output charBuf */
   1673 
   1674            case SA_STEPSTATE_FEED:
   1675 
   1676                PICODBG_DEBUG(("FEED"));
   1677 
   1678                if (sa->phonesTransduced) {
   1679                    /* replace original phones by transduced */
   1680                    picoos_uint16 phonWritePos = PICODATA_ITEM_HEADSIZE;
   1681                    picoos_uint8 plane;
   1682                    picoos_int16 sym, pos;
   1683                    while (SA_POSSYM_OK == (rv = getNextPosSym(sa,&pos,&sym,sa->nextReadPos))) {
   1684                        PICODBG_TRACE(("FEED inserting phoneme %c into inBuf[%i]",sym,phonWritePos));
   1685                        sym = picotrns_unplane(sym, &plane);
   1686                        PICODBG_ASSERT((PICOKFST_PLANE_PHONEMES == plane));
   1687                        sa->tmpbuf[phonWritePos++] = (picoos_uint8) sym;
   1688                    }
   1689                    PICODBG_DEBUG(("FEED setting item length to %i",phonWritePos - PICODATA_ITEM_HEADSIZE));
   1690                    picodata_set_itemlen(sa->tmpbuf,PICODATA_ITEM_HEADSIZE,phonWritePos - PICODATA_ITEM_HEADSIZE);
   1691                    if (SA_POSSYM_INVALID == rv) {
   1692                        PICODBG_ERROR(("FEED unexpected symbol or unexpected end of phoneme list"));
   1693                        return (picodata_step_result_t)picoos_emRaiseException(this->common->em, PICO_WARN_INCOMPLETE, NULL, NULL);
   1694                    }
   1695                    sa->phonesTransduced = 0;
   1696 
   1697                } /* if (sa->phonesTransduced) */
   1698 
   1699 
   1700                 rvP = picodata_cbPutItem(this->cbOut, sa->tmpbuf,
   1701                 PICOSA_MAXITEMSIZE, &clen);
   1702 
   1703                 *numBytesOutput += clen;
   1704 
   1705                 PICODBG_DEBUG(("put item, status: %d", rvP));
   1706 
   1707                 if (rvP == PICO_OK) {
   1708                 } else if (rvP == PICO_EXC_BUF_OVERFLOW) {
   1709                     /* try again next time */
   1710                     PICODBG_DEBUG(("feeding overflow"));
   1711                     return PICODATA_PU_OUT_FULL;
   1712                 } else {
   1713                     /* error, should never happen */
   1714                     PICODBG_ERROR(("untreated return value, rvP: %d", rvP));
   1715                     return PICODATA_PU_ERROR;
   1716                 }
   1717 
   1718                 PICODATA_INFO_ITEM(this->voice->kbArray[PICOKNOW_KBID_DBG],
   1719                         (picoos_uint8 *)"sana: ",
   1720                         sa->tmpbuf, PICOSA_MAXITEMSIZE);
   1721 
   1722                 sa->procState = SA_STEPSTATE_PROCESS_TRNS_PARSE;
   1723                 /* return PICODATA_PU_BUSY; */
   1724                 break;
   1725 
   1726             default:
   1727                 break;
   1728         } /* switch */
   1729 
   1730     } /* while */
   1731 
   1732     /* should be never reached */
   1733     PICODBG_ERROR(("reached end of function"));
   1734     picoos_emRaiseException(this->common->em, PICO_ERR_OTHER, NULL, NULL);
   1735     return PICODATA_PU_ERROR;
   1736 }
   1737 
   1738 #ifdef __cplusplus
   1739 }
   1740 #endif
   1741 
   1742 
   1743 /* end */
   1744