1 /* 2 * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 /** 17 * @file picosa.c 18 * 19 * sentence analysis - POS disambiguation 20 * 21 * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland 22 * All rights reserved. 23 * 24 * History: 25 * - 2009-04-20 -- initial version 26 * 27 */ 28 29 #include "picoos.h" 30 #include "picodbg.h" 31 #include "picobase.h" 32 #include "picokdt.h" 33 #include "picoklex.h" 34 #include "picoktab.h" 35 #include "picokfst.h" 36 #include "picotrns.h" 37 #include "picodata.h" 38 #include "picosa.h" 39 40 #ifdef __cplusplus 41 extern "C" { 42 #endif 43 #if 0 44 } 45 #endif 46 47 48 /* PU saStep states */ 49 #define SA_STEPSTATE_COLLECT 0 50 #define SA_STEPSTATE_PROCESS_POSD 10 51 #define SA_STEPSTATE_PROCESS_WPHO 11 52 #define SA_STEPSTATE_PROCESS_TRNS_PARSE 12 53 #define SA_STEPSTATE_PROCESS_TRNS_FST 13 54 #define SA_STEPSTATE_FEED 2 55 56 #define SA_MAX_ALTDESC_SIZE (30*(PICOTRNS_MAX_NUM_POSSYM + 2)) 57 58 #define SA_MSGSTR_SIZE 32 59 60 /* subobject : SentAnaUnit 61 * shortcut : sa 62 * context size : one phrase, max. 30 non-PUNC items, for non-processed items 63 * one item if internal input empty 64 */ 65 66 /** @addtogroup picosa 67 68 internal buffers: 69 70 - headx: array for extended item heads of fixed size (head plus 71 index for content, plus two fields for boundary strength/type) 72 73 - cbuf1, cbuf2: buffers for item contents (referenced by index in 74 headx). Future: replace these two buffers by a single double-sided 75 buffer (double shrink-grow type) 76 77 0. bottom up filling of items in headx and cbuf1 78 79 1. POS disambiguation (right-to-left, top-to-bottom): 80 - number and sequence of items unchanged 81 - item content can only get smaller (reducing nr of results in WORDINDEX) 82 -> info stays in "headx, cbuf1" and changed in place \n 83 WORDGRAPH(POSes,NA)graph -> WORDGRAPH(POS,NA)graph \n 84 WORDINDEX(POSes,NA)POS1ind1...POSNindN -> WORDINDEX(POS,NA)POS|ind \n 85 86 2. lex-index lookup and G2P (both directions possible, left-to-right done): 87 - number and sequence of items unchanged, item head info and content 88 changes 89 -> headx changed in place; cbuf1 to cbuf2 \n 90 WORDGRAPH(POS,NA)graph -> WORDPHON(POS,NA)phon \n 91 WORDINDEX(POS,NA)POS|ind -> WORDPHON(POS,NA)phon \n 92 93 3. phrasing (right-to-left): 94 95 Previous (before introducing SBEG)\n 96 ---------------------------------- 97 1| 2| 3| 4| \n 98 e.g. from WP WP WP WP WP PUNC WP WP PUNC WP WP WP PUNC FLUSH \n 99 e.g. to BINIT WP WP WP BPHR3 WP WP BPHR1 WP WP BSEND WP WP WP BSEND BTERM \n 100 |1 |2 |3 |4 \n 101 102 3-level bound state: to keep track of bound strength from end of 103 previous punc-phrase, then BOUND item output as first item 104 (strength from prev punc-phrase and type from current 105 punc-phrase). 106 107 trailing PUNC item bound states 108 INIT SEND PHR1 109 PUNC(SENTEND, T) B(I,T)>SEND B(S,T)>SEND B(P1,T)>SEND 110 PUNC(SENTEND, Q) B(I,Q)>SEND B(S,Q)>SEND B(P1,Q)>SEND 111 PUNC(SENTEND, E) B(I,E)>SEND B(S,E)>SEND B(P1,E)>SEND 112 PUNC(PHRASEEND, P) B(I,P)>PHR1 B(S,P)>PHR1 B(P1,P)>PHR1 113 PUNC(PHRASEEND, FORC) B(I,P)>PHR1 B(S,P)>PHR1 B(P1,P)>PHR1 114 PUNC(FLUSH, T) B(I,T).. B(S,T).. B(P1,T).. 115 B(T,NA) B(T,NA) B(T,NA) 116 >INIT >INIT >INIT 117 118 PHR2/3 case: 119 trailing PUNC item bound states 120 INIT SEND PHR1 121 PUNC(SENTEND, T) B(I,P)B(P,T)>SEND B(S,P)B(P,T)>SEND B(P1,P)B(P,T)>SEND 122 PUNC(SENTEND, Q) B(I,P)B(P,Q)>SEND B(S,P)B(P,Q)>SEND B(P1,P)B(P,Q)>SEND 123 PUNC(SENTEND, E) B(I,P)B(P,E)>SEND B(S,P)B(P,E)>SEND B(P1,P)B(P,E)>SEND 124 PUNC(PHRASEEND, P) B(I,P)B(P,P)>PHR1 B(S,P)B(P,P)>PHR1 B(P1,P)B(P,P)>PHR1 125 PUNC(PHREND, FORC) B(I,P)B(P,P)>PHR1 B(S,P)B(P,P)>PHR1 B(P1,P)B(P,P)>PHR1 126 PUNC(FLUSH, T) B(I,P)B(P,T).. B(S,T)B(P,T).. B(P1,T)B(P,T).. 127 B(T,NA) B(T,NA) B(T,NA) 128 >INIT >INIT >INIT 129 130 Current 131 -------- 132 e.g. from WP WP WP WP WP PUNC WP WP PUNC WP WP WP PUNC FLUSH 133 e.g. to BSBEG WP WP WP BPHR3 WP WP BPHR1 WP WP BSEND BSBEG WP WP WP BSEND BTERM 134 |1 |2 |3 |4 135 136 2-level bound state: The internal buffer contains one primary phrase (sometimes forced, if buffer 137 allmost full), with the trailing PUNCT item included (last item). 138 If the trailing PUNC is a a primary phrase separator, the 139 item is not output, but instead, the bound state is set to PPHR, so that the correct BOUND can 140 be output at the start of the next primary phrase. 141 Otherwise, 142 the item is converted to the corresponding BOUND and output. the bound state is set to SSEP, 143 so that a BOUND of type SBEG is output at the start of the next primary phrase. 144 145 trailing PUNC item bound states 146 SSEP PPHR 147 PUNC(SENTEND, X) B(B,X)>SSEP B(P1,X)>SSEP (X = T | Q | E) 148 PUNC(FLUSH, T) B(B,T)>SSEP* B(P1,T)>SSEP 149 PUNC(PHRASEEND, P) B(B,P)>PPHR B(P1,P)>PPHR 150 PUNC(PHRASEEND, FORC) B(B,P)>PPHR B(P1,P)>PPHR 151 152 * If more than one sentence separators follow each other (e.g. SEND-FLUSH, SEND-SEND) then 153 all but the first will be treated as an (empty) phrase containing just this item. 154 If this (single) item is a flush, creation of SBEG is suppressed. 155 156 157 - dtphr phrasing tree (rather subphrasing tree it should be called) 158 determines 159 BOUND_PHR2 160 BOUND_PHR3 161 - boundary strenghts are determined for every word (except the 162 first one) from right-to-left. The boundary types mark the phrase 163 type of the phrase following the boundary. 164 - number of items actually changed (new BOUND items added): because 165 of fixed size without content, two fields are contained in headx 166 to indicate if a BOUND needs to be added to the LEFT of the item. 167 -> headx further extended with boundary strength and type info to 168 indicate that to the left of the headx ele a BOUND needs to be 169 inserted when outputting. 170 171 4. accentuation: 172 - number of items unchanged, content unchanged, only head info changes 173 -> changed in place in headx 174 */ 175 176 177 typedef struct { 178 picodata_itemhead_t head; 179 picoos_uint16 cind; 180 } picosa_headx_t; 181 182 183 typedef struct sa_subobj { 184 picoos_uint8 procState; /* for next processing step decision */ 185 186 picoos_uint8 inspaceok; /* flag: headx/cbuf1 has space for an item */ 187 picoos_uint8 needsmoreitems; /* flag: need more items */ 188 picoos_uint8 phonesTransduced; /* flag: */ 189 190 picoos_uint8 tmpbuf[PICODATA_MAX_ITEMSIZE]; /* tmp. location for an item */ 191 192 picosa_headx_t headx[PICOSA_MAXNR_HEADX]; 193 picoos_uint16 headxBottom; /* bottom */ 194 picoos_uint16 headxLen; /* length, 0 if empty */ 195 196 picoos_uint8 cbuf1[PICOSA_MAXSIZE_CBUF]; 197 picoos_uint16 cbuf1BufSize; /* actually allocated size */ 198 picoos_uint16 cbuf1Len; /* length, 0 if empty */ 199 200 picoos_uint8 cbuf2[PICOSA_MAXSIZE_CBUF]; 201 picoos_uint16 cbuf2BufSize; /* actually allocated size */ 202 picoos_uint16 cbuf2Len; /* length, 0 if empty */ 203 204 picotrns_possym_t phonBufA[PICOTRNS_MAX_NUM_POSSYM+1]; 205 picotrns_possym_t phonBufB[PICOTRNS_MAX_NUM_POSSYM+1]; 206 picotrns_possym_t * phonBuf; 207 picotrns_possym_t * phonBufOut; 208 picoos_uint16 phonReadPos, phonWritePos; /* next pos to read from phonBufIn, next pos to write to phonBufIn */ 209 picoos_uint16 nextReadPos; /* position of (potential) next item to read from */ 210 211 212 /* buffer for internal calculation of transducer */ 213 picotrns_AltDesc altDescBuf; 214 /* the number of AltDesc in the buffer */ 215 picoos_uint16 maxAltDescLen; 216 217 /* tab knowledge base */ 218 picoktab_Graphs tabgraphs; 219 picoktab_Phones tabphones; 220 picoktab_Pos tabpos; 221 picoktab_FixedIds fixedIds; 222 223 /* dtposd knowledge base */ 224 picokdt_DtPosD dtposd; 225 226 /* dtg2p knowledge base */ 227 picokdt_DtG2P dtg2p; 228 229 /* lex knowledge base */ 230 picoklex_Lex lex; 231 232 /* ulex knowledge bases */ 233 picoos_uint8 numUlex; 234 picoklex_Lex ulex[PICOKNOW_MAX_NUM_ULEX]; 235 236 /* fst knowledge bases */ 237 picoos_uint8 numFsts; 238 picokfst_FST fst[PICOKNOW_MAX_NUM_WPHO_FSTS]; 239 picoos_uint8 curFst; /* the fst to be applied next */ 240 241 242 } sa_subobj_t; 243 244 245 static pico_status_t saInitialize(register picodata_ProcessingUnit this, picoos_int32 resetMode) { 246 sa_subobj_t * sa; 247 picoos_uint16 i; 248 picokfst_FST fst; 249 picoknow_kb_id_t fstKbIds[PICOKNOW_MAX_NUM_WPHO_FSTS] = PICOKNOW_KBID_WPHO_ARRAY; 250 picoklex_Lex ulex; 251 picoknow_kb_id_t ulexKbIds[PICOKNOW_MAX_NUM_ULEX] = PICOKNOW_KBID_ULEX_ARRAY; 252 253 PICODBG_DEBUG(("calling")); 254 255 if (NULL == this || NULL == this->subObj) { 256 return picoos_emRaiseException(this->common->em, 257 PICO_ERR_NULLPTR_ACCESS, NULL, NULL); 258 } 259 sa = (sa_subobj_t *) this->subObj; 260 261 /* sa->common = this->common; */ 262 263 sa->procState = SA_STEPSTATE_COLLECT; 264 265 sa->inspaceok = TRUE; 266 sa->needsmoreitems = TRUE; 267 268 sa->headxBottom = 0; 269 sa->headxLen = 0; 270 sa->cbuf1BufSize = PICOSA_MAXSIZE_CBUF; 271 sa->cbuf2BufSize = PICOSA_MAXSIZE_CBUF; 272 sa->cbuf1Len = 0; 273 sa->cbuf2Len = 0; 274 275 /* init headx, cbuf1, cbuf2 */ 276 for (i = 0; i < PICOSA_MAXNR_HEADX; i++){ 277 sa->headx[i].head.type = 0; 278 sa->headx[i].head.info1 = PICODATA_ITEMINFO1_NA; 279 sa->headx[i].head.info2 = PICODATA_ITEMINFO2_NA; 280 sa->headx[i].head.len = 0; 281 sa->headx[i].cind = 0; 282 } 283 for (i = 0; i < PICOSA_MAXSIZE_CBUF; i++) { 284 sa->cbuf1[i] = 0; 285 sa->cbuf2[i] = 0; 286 } 287 288 289 /* possym buffer */ 290 sa->phonesTransduced = FALSE; 291 sa->phonBuf = sa->phonBufA; 292 sa->phonBufOut = sa->phonBufB; 293 sa->phonReadPos = 0; 294 sa->phonWritePos = 0; 295 sa->nextReadPos = 0; 296 297 if (resetMode == PICO_RESET_SOFT) { 298 /*following initializations needed only at startup or after a full reset*/ 299 return PICO_OK; 300 } 301 302 /* kb fst[] */ 303 sa->numFsts = 0; 304 for (i = 0; i<PICOKNOW_MAX_NUM_WPHO_FSTS; i++) { 305 fst = picokfst_getFST(this->voice->kbArray[fstKbIds[i]]); 306 if (NULL != fst) { 307 sa->fst[sa->numFsts++] = fst; 308 } 309 } 310 sa->curFst = 0; 311 PICODBG_DEBUG(("got %i fsts", sa->numFsts)); 312 /* kb fixedIds */ 313 sa->fixedIds = picoktab_getFixedIds(this->voice->kbArray[PICOKNOW_KBID_FIXED_IDS]); 314 315 /* kb tabgraphs */ 316 sa->tabgraphs = 317 picoktab_getGraphs(this->voice->kbArray[PICOKNOW_KBID_TAB_GRAPHS]); 318 if (sa->tabgraphs == NULL) { 319 return picoos_emRaiseException(this->common->em, PICO_EXC_KB_MISSING, 320 NULL, NULL); 321 } 322 PICODBG_DEBUG(("got tabgraphs")); 323 324 /* kb tabphones */ 325 sa->tabphones = 326 picoktab_getPhones(this->voice->kbArray[PICOKNOW_KBID_TAB_PHONES]); 327 if (sa->tabphones == NULL) { 328 return picoos_emRaiseException(this->common->em, PICO_EXC_KB_MISSING, 329 NULL, NULL); 330 } 331 PICODBG_DEBUG(("got tabphones")); 332 333 #ifdef PICO_DEBU 334 { 335 picoos_uint16 itmp; 336 for (itmp = 0; itmp < 256; itmp++) { 337 if (picoktab_hasVowelProp(sa->tabphones, itmp)) { 338 PICODBG_DEBUG(("tabphones hasVowel: %d", itmp)); 339 } 340 if (picoktab_hasDiphthProp(sa->tabphones, itmp)) { 341 PICODBG_DEBUG(("tabphones hasDiphth: %d", itmp)); 342 } 343 if (picoktab_hasGlottProp(sa->tabphones, itmp)) { 344 PICODBG_DEBUG(("tabphones hasGlott: %d", itmp)); 345 } 346 if (picoktab_hasNonsyllvowelProp(sa->tabphones, itmp)) { 347 PICODBG_DEBUG(("tabphones hasNonsyllvowel: %d", itmp)); 348 } 349 if (picoktab_hasSyllconsProp(sa->tabphones, itmp)) { 350 PICODBG_DEBUG(("tabphones hasSyllcons: %d", itmp)); 351 } 352 if (picoktab_isPrimstress(sa->tabphones, itmp)) { 353 PICODBG_DEBUG(("tabphones isPrimstress: %d", itmp)); 354 } 355 if (picoktab_isSecstress(sa->tabphones, itmp)) { 356 PICODBG_DEBUG(("tabphones isSecstress: %d", itmp)); 357 } 358 if (picoktab_isSyllbound(sa->tabphones, itmp)) { 359 PICODBG_DEBUG(("tabphones isSyllbound: %d", itmp)); 360 } 361 if (picoktab_isPause(sa->tabphones, itmp)) { 362 PICODBG_DEBUG(("tabphones isPause: %d", itmp)); 363 } 364 } 365 366 PICODBG_DEBUG(("tabphones primstressID: %d", 367 picoktab_getPrimstressID(sa->tabphones))); 368 PICODBG_DEBUG(("tabphones secstressID: %d", 369 picoktab_getSecstressID(sa->tabphones))); 370 PICODBG_DEBUG(("tabphones syllboundID: %d", 371 picoktab_getSyllboundID(sa->tabphones))); 372 PICODBG_DEBUG(("tabphones pauseID: %d", 373 picoktab_getPauseID(sa->tabphones))); 374 } 375 #endif 376 377 /* kb tabpos */ 378 sa->tabpos = 379 picoktab_getPos(this->voice->kbArray[PICOKNOW_KBID_TAB_POS]); 380 if (sa->tabpos == NULL) { 381 return picoos_emRaiseException(this->common->em, PICO_EXC_KB_MISSING, 382 NULL, NULL); 383 } 384 PICODBG_DEBUG(("got tabpos")); 385 386 /* kb dtposd */ 387 sa->dtposd = picokdt_getDtPosD(this->voice->kbArray[PICOKNOW_KBID_DT_POSD]); 388 if (sa->dtposd == NULL) { 389 return picoos_emRaiseException(this->common->em, PICO_EXC_KB_MISSING, 390 NULL, NULL); 391 } 392 PICODBG_DEBUG(("got dtposd")); 393 394 /* kb dtg2p */ 395 sa->dtg2p = picokdt_getDtG2P(this->voice->kbArray[PICOKNOW_KBID_DT_G2P]); 396 if (sa->dtg2p == NULL) { 397 return picoos_emRaiseException(this->common->em, PICO_EXC_KB_MISSING, 398 NULL, NULL); 399 } 400 PICODBG_DEBUG(("got dtg2p")); 401 402 /* kb lex */ 403 sa->lex = picoklex_getLex(this->voice->kbArray[PICOKNOW_KBID_LEX_MAIN]); 404 if (sa->lex == NULL) { 405 return picoos_emRaiseException(this->common->em, PICO_EXC_KB_MISSING, 406 NULL, NULL); 407 } 408 PICODBG_DEBUG(("got lex")); 409 410 /* kb ulex[] */ 411 sa->numUlex = 0; 412 for (i = 0; i<PICOKNOW_MAX_NUM_ULEX; i++) { 413 ulex = picoklex_getLex(this->voice->kbArray[ulexKbIds[i]]); 414 if (NULL != ulex) { 415 sa->ulex[sa->numUlex++] = ulex; 416 } 417 } 418 PICODBG_DEBUG(("got %i user lexica", sa->numUlex)); 419 420 return PICO_OK; 421 } 422 423 static picodata_step_result_t saStep(register picodata_ProcessingUnit this, 424 picoos_int16 mode, 425 picoos_uint16 *numBytesOutput); 426 427 static pico_status_t saTerminate(register picodata_ProcessingUnit this) { 428 return PICO_OK; 429 } 430 431 static pico_status_t saSubObjDeallocate(register picodata_ProcessingUnit this, 432 picoos_MemoryManager mm) { 433 sa_subobj_t * sa; 434 if (NULL != this) { 435 sa = (sa_subobj_t *) this->subObj; 436 picotrns_deallocate_alt_desc_buf(mm,&sa->altDescBuf); 437 picoos_deallocate(mm, (void *) &this->subObj); 438 } 439 return PICO_OK; 440 } 441 442 443 picodata_ProcessingUnit picosa_newSentAnaUnit(picoos_MemoryManager mm, 444 picoos_Common common, 445 picodata_CharBuffer cbIn, 446 picodata_CharBuffer cbOut, 447 picorsrc_Voice voice) { 448 picodata_ProcessingUnit this; 449 sa_subobj_t * sa; 450 this = picodata_newProcessingUnit(mm, common, cbIn, cbOut, voice); 451 if (this == NULL) { 452 return NULL; 453 } 454 455 this->initialize = saInitialize; 456 PICODBG_DEBUG(("set this->step to saStep")); 457 this->step = saStep; 458 this->terminate = saTerminate; 459 this->subDeallocate = saSubObjDeallocate; 460 461 this->subObj = picoos_allocate(mm, sizeof(sa_subobj_t)); 462 if (this->subObj == NULL) { 463 picoos_deallocate(mm, (void *)&this); 464 picoos_emRaiseException(common->em, PICO_EXC_OUT_OF_MEM, NULL, NULL); 465 return NULL; 466 } 467 468 sa = (sa_subobj_t *) this->subObj; 469 470 sa->altDescBuf = picotrns_allocate_alt_desc_buf(mm, SA_MAX_ALTDESC_SIZE, &sa->maxAltDescLen); 471 if (NULL == sa->altDescBuf) { 472 picotrns_deallocate_alt_desc_buf(mm,&sa->altDescBuf); 473 picoos_deallocate(mm, (void *)&sa); 474 picoos_deallocate(mm, (void *)&this); 475 picoos_emRaiseException(common->em,PICO_EXC_OUT_OF_MEM, NULL, NULL); 476 } 477 478 479 saInitialize(this, PICO_RESET_FULL); 480 return this; 481 } 482 483 484 /* ***********************************************************************/ 485 /* PROCESS_POSD disambiguation functions */ 486 /* ***********************************************************************/ 487 488 /* find next POS to the right of 'ind' and return its POS and index */ 489 static picoos_uint8 saPosDItemSeqGetPosRight(register picodata_ProcessingUnit this, 490 register sa_subobj_t *sa, 491 const picoos_uint16 ind, 492 const picoos_uint16 top, 493 picoos_uint16 *rightind) { 494 picoos_uint8 val; 495 picoos_int32 i; 496 497 val = PICOKDT_EPSILON; 498 for (i = ind + 1; ((val == PICOKDT_EPSILON) && (i < top)); i++) { 499 if ((sa->headx[i].head.type == PICODATA_ITEM_WORDGRAPH) || 500 (sa->headx[i].head.type == PICODATA_ITEM_WORDINDEX) || 501 (sa->headx[i].head.type == PICODATA_ITEM_WORDPHON) ) { 502 val = sa->headx[i].head.info1; 503 } 504 } 505 *rightind = i - 1; 506 return val; 507 } 508 509 510 /* left-to-right, for each WORDGRAPH/WORDINDEX/WORDPHON do posd */ 511 static pico_status_t saDisambPos(register picodata_ProcessingUnit this, 512 register sa_subobj_t *sa) { 513 picokdt_classify_result_t dtres; 514 picoos_uint8 half_nratt_posd = PICOKDT_NRATT_POSD >> 1; 515 picoos_uint16 valbuf[PICOKDT_NRATT_POSD]; /* only [0..half_nratt_posd] can be >2^8 */ 516 picoos_uint16 prevout; /* direct dt output (hist.) or POS of prev word */ 517 picoos_uint16 lastprev3; /* last index of POS(es) found to the left */ 518 picoos_uint16 curPOS; /* POS(es) of current word */ 519 picoos_int32 first; /* index of first item with POS(es) */ 520 picoos_int32 ci; 521 picoos_uint8 okay; /* two uses: processing okay and lexind resovled */ 522 picoos_uint8 i; 523 picoos_uint16 inval; 524 picoos_uint16 fallback; 525 526 /* set initial values */ 527 okay = TRUE; 528 prevout = PICOKDT_HISTORY_ZERO; 529 curPOS = PICODATA_ITEMINFO1_ERR; 530 first = 0; 531 532 while ((first < sa->headxLen) && 533 (sa->headx[first].head.type != PICODATA_ITEM_WORDGRAPH) && 534 (sa->headx[first].head.type != PICODATA_ITEM_WORDINDEX) && 535 (sa->headx[first].head.type != PICODATA_ITEM_WORDPHON)) { 536 first++; 537 } 538 if (first >= sa->headxLen) { 539 /* phrase not containing an item with POSes info, e.g. single flush */ 540 PICODBG_DEBUG(("no item with POSes found")); 541 return PICO_OK; 542 } 543 544 lastprev3 = first; 545 546 for (i = 0; i <= half_nratt_posd; i++) { 547 valbuf[i] = PICOKDT_HISTORY_ZERO; 548 } 549 /* set POS(es) of current word, will be shifted afterwards */ 550 valbuf[half_nratt_posd+1] = sa->headx[first].head.info1; 551 for (i = half_nratt_posd+2; i < PICOKDT_NRATT_POSD; i++) { 552 /* find next POS to the right and set valbuf[i] */ 553 valbuf[i] = saPosDItemSeqGetPosRight(this, sa, lastprev3, sa->headxLen, &lastprev3); 554 } 555 556 PICODBG_TRACE(("headxLen: %d", sa->headxLen)); 557 558 /* process from left to right all items in headx */ 559 for (ci = first; ci < sa->headxLen; ci++) { 560 okay = TRUE; 561 562 PICODBG_TRACE(("iter: %d, type: %c", ci, sa->headx[ci].head.type)); 563 564 /* if not (WORDGRAPH or WORDINDEX) */ 565 if ((sa->headx[ci].head.type != PICODATA_ITEM_WORDGRAPH) && 566 (sa->headx[ci].head.type != PICODATA_ITEM_WORDINDEX) && 567 (sa->headx[ci].head.type != PICODATA_ITEM_WORDPHON)) { 568 continue; 569 } 570 571 PICODBG_TRACE(("iter: %d, curPOS: %d", ci, sa->headx[ci].head.info1)); 572 573 /* no continue so far => at [ci] we have a WORDGRAPH / WORDINDEX item */ 574 /* shift all elements one position to the left */ 575 /* shift predicted values (history) */ 576 for (i=1; i<half_nratt_posd; i++) { 577 valbuf[i-1] = valbuf[i]; 578 } 579 /* insert previously predicted value (now history) */ 580 valbuf[half_nratt_posd-1] = prevout; 581 /* shift not yet predicted values */ 582 for (i=half_nratt_posd+1; i<PICOKDT_NRATT_POSD; i++) { 583 valbuf[i-1] = valbuf[i]; 584 } 585 /* find next POS to the right and set valbuf[PICOKDT_NRATT_POSD-1] */ 586 valbuf[PICOKDT_NRATT_POSD-1] = saPosDItemSeqGetPosRight(this, sa, lastprev3, sa->headxLen, &lastprev3); 587 588 /* just to be on the safe side; the following should never happen */ 589 if (sa->headx[ci].head.info1 != valbuf[half_nratt_posd]) { 590 PICODBG_WARN(("syncing POS")); 591 picoos_emRaiseWarning(this->common->em, PICO_WARN_INVECTOR, 592 NULL, NULL); 593 valbuf[half_nratt_posd] = sa->headx[ci].head.info1; 594 } 595 596 curPOS = valbuf[half_nratt_posd]; 597 598 /* Check if POS disambiguation not needed */ 599 if (picoktab_isUniquePos(sa->tabpos, (picoos_uint8) curPOS)) { 600 /* not needed */ 601 inval = 0; 602 fallback = 0; 603 if (!picokdt_dtPosDreverseMapOutFixed(sa->dtposd, curPOS, 604 &prevout, &fallback)) { 605 if (fallback) { 606 prevout = fallback; 607 608 } else { 609 PICODBG_ERROR(("problem doing reverse output mapping")); 610 prevout = curPOS; 611 } 612 } 613 PICODBG_DEBUG(("keeping: %d", sa->headx[ci].head.info1)); 614 continue; 615 } 616 617 /* assuming PICOKDT_NRATT_POSD == 7 */ 618 PICODBG_DEBUG(("%d: [%d %d %d %d %d %d %d]", 619 ci, valbuf[0], valbuf[1], valbuf[2], 620 valbuf[3], valbuf[4], valbuf[5], valbuf[6])); 621 622 /* no continue so far => POS disambiguation needed */ 623 /* construct input vector, which is set in dtposd */ 624 if (!picokdt_dtPosDconstructInVec(sa->dtposd, valbuf)) { 625 /* error constructing invec */ 626 PICODBG_WARN(("problem with invec")); 627 picoos_emRaiseWarning(this->common->em, PICO_WARN_INVECTOR, 628 NULL, NULL); 629 okay = FALSE; 630 } 631 /* classify */ 632 if (okay && (!picokdt_dtPosDclassify(sa->dtposd, &prevout))) { 633 /* error doing classification */ 634 PICODBG_WARN(("problem classifying")); 635 picoos_emRaiseWarning(this->common->em, PICO_WARN_CLASSIFICATION, 636 NULL, NULL); 637 okay = FALSE; 638 } 639 /* decompose */ 640 if (okay && (!picokdt_dtPosDdecomposeOutClass(sa->dtposd, &dtres))) { 641 /* error decomposing */ 642 PICODBG_WARN(("problem decomposing")); 643 picoos_emRaiseWarning(this->common->em, PICO_WARN_OUTVECTOR, 644 NULL, NULL); 645 okay = FALSE; 646 } 647 if (okay && dtres.set) { 648 PICODBG_DEBUG(("in: %d, out: %d", valbuf[3], dtres.class)); 649 } else { 650 PICODBG_WARN(("problem disambiguating POS")); 651 dtres.class = PICODATA_ITEMINFO1_ERR; 652 } 653 654 if (dtres.class > 255) { 655 PICODBG_WARN(("dt result outside valid range, setting pos to ERR")); 656 dtres.class = PICODATA_ITEMINFO1_ERR; 657 } 658 659 sa->headx[ci].head.info1 = (picoos_uint8)dtres.class; 660 if (sa->headx[ci].head.type == PICODATA_ITEM_WORDINDEX) { 661 /* find pos/ind entry in cbuf matching unique, 662 disambiguated POS, adapt current headx cind/len 663 accordingly */ 664 PICODBG_DEBUG(("select phon based on POS disambiguation")); 665 okay = FALSE; 666 for (i = 0; i < sa->headx[ci].head.len; i += PICOKLEX_POSIND_SIZE) { 667 PICODBG_DEBUG(("comparing POS at cind + %d", i)); 668 if (picoktab_isPartOfPosGroup(sa->tabpos, 669 (picoos_uint8)dtres.class, 670 sa->cbuf1[sa->headx[ci].cind + i])) { 671 PICODBG_DEBUG(("found match for entry %d", 672 i/PICOKLEX_POSIND_SIZE + 1)); 673 sa->headx[ci].cind += i; 674 okay = TRUE; 675 break; 676 } 677 } 678 /* not finding a match is possible if posd predicts a POS that 679 is not part of any of the input POSes -> no warning */ 680 #if defined(PICO_DEBUG) 681 if (!okay) { 682 PICODBG_DEBUG(("no match found, selecting 1st entry")); 683 } 684 #endif 685 sa->headx[ci].head.len = PICOKLEX_POSIND_SIZE; 686 } 687 } 688 return PICO_OK; 689 } 690 691 692 /* ***********************************************************************/ 693 /* PROCESS_WPHO functions, copy, lexindex, and g2p */ 694 /* ***********************************************************************/ 695 696 /* ************** copy ***************/ 697 698 static pico_status_t saCopyItemContent1to2(register picodata_ProcessingUnit this, 699 register sa_subobj_t *sa, 700 picoos_uint16 ind) { 701 picoos_uint16 i; 702 picoos_uint16 cind1; 703 704 /* set headx.cind, and copy content, head unchanged */ 705 cind1 = sa->headx[ind].cind; 706 sa->headx[ind].cind = sa->cbuf2Len; 707 708 /* check cbufLen */ 709 if (sa->headx[ind].head.len > (sa->cbuf2BufSize - sa->cbuf2Len)) { 710 sa->headx[ind].head.len = sa->cbuf2BufSize - sa->cbuf2Len; 711 PICODBG_WARN(("phones skipped")); 712 picoos_emRaiseWarning(this->common->em, 713 PICO_WARN_INCOMPLETE, NULL, NULL); 714 if (sa->headx[ind].head.len == 0) { 715 sa->headx[ind].cind = 0; 716 } 717 } 718 719 for (i = 0; i < sa->headx[ind].head.len; i++) { 720 sa->cbuf2[sa->cbuf2Len] = sa->cbuf1[cind1 + i]; 721 sa->cbuf2Len++; 722 } 723 724 PICODBG_DEBUG(("%c item, len: %d", 725 sa->headx[ind].head.type, sa->headx[ind].head.len)); 726 727 return PICO_OK; 728 } 729 730 731 /* ************** lexindex ***************/ 732 733 static pico_status_t saLexIndLookup(register picodata_ProcessingUnit this, 734 register sa_subobj_t *sa, 735 picoklex_Lex lex, 736 picoos_uint16 ind) { 737 picoos_uint8 pos; 738 picoos_uint8 *phones; 739 picoos_uint8 plen; 740 picoos_uint16 i; 741 742 if (picoklex_lexIndLookup(lex, &(sa->cbuf1[sa->headx[ind].cind + 1]), 743 PICOKLEX_IND_SIZE, &pos, &phones, &plen)) { 744 sa->headx[ind].cind = sa->cbuf2Len; 745 746 /* check cbufLen */ 747 if (plen > (sa->cbuf2BufSize - sa->cbuf2Len)) { 748 plen = sa->cbuf2BufSize - sa->cbuf2Len; 749 PICODBG_WARN(("phones skipped")); 750 picoos_emRaiseWarning(this->common->em, 751 PICO_WARN_INCOMPLETE, NULL, NULL); 752 if (plen == 0) { 753 sa->headx[ind].cind = 0; 754 } 755 } 756 757 /* set item head, info1, info2 unchanged */ 758 sa->headx[ind].head.type = PICODATA_ITEM_WORDPHON; 759 sa->headx[ind].head.len = plen; 760 761 for (i = 0; i < plen; i++) { 762 sa->cbuf2[sa->cbuf2Len] = phones[i]; 763 sa->cbuf2Len++; 764 } 765 766 PICODBG_DEBUG(("%c item, pos: %d, plen: %d", 767 PICODATA_ITEM_WORDPHON, pos, plen)); 768 769 } else { 770 PICODBG_WARN(("lexIndLookup problem")); 771 picoos_emRaiseWarning(this->common->em, PICO_WARN_PU_IRREG_ITEM, 772 NULL, NULL); 773 } 774 return PICO_OK; 775 } 776 777 778 779 /* ************** g2p ***************/ 780 781 782 /* Name : saGetNvowel 783 Function: returns vowel info in a word or word seq 784 Input : sInChar the grapheme string to be converted in phoneme 785 inLen number of bytes in grapheme buffer 786 inPos start position of current grapheme (0..inLen-1) 787 Output : nVow number of vowels in the word 788 nVord vowel order in the word 789 Returns : TRUE: processing successful; FALSE: errors 790 */ 791 static picoos_uint8 saGetNrVowel(register picodata_ProcessingUnit this, 792 register sa_subobj_t *sa, 793 const picoos_uint8 *sInChar, 794 const picoos_uint16 inLen, 795 const picoos_uint8 inPos, 796 picoos_uint8 *nVow, 797 picoos_uint8 *nVord) { 798 picoos_uint32 nCount; 799 picoos_uint32 pos; 800 picoos_uint8 cstr[PICOBASE_UTF8_MAXLEN + 1]; 801 802 /*defaults*/ 803 *nVow = 0; 804 *nVord = 0; 805 /*1:check wether the current char is a vowel*/ 806 pos = inPos; 807 if (!picobase_get_next_utf8char(sInChar, inLen, &pos, cstr) || 808 !picoktab_hasVowellikeProp(sa->tabgraphs, cstr, PICOBASE_UTF8_MAXLEN)) { 809 return FALSE; 810 } 811 /*2:count number of vowels in current word and find vowel order*/ 812 for (nCount = 0; nCount < inLen; ) { 813 if (!picobase_get_next_utf8char(sInChar, inLen, &nCount, cstr)) { 814 return FALSE; 815 } 816 if (picoktab_hasVowellikeProp(sa->tabgraphs, cstr, 817 PICOBASE_UTF8_MAXLEN)) { 818 (*nVow)++; 819 if (nCount == pos) { 820 (*nVord) = (*nVow); 821 } 822 } 823 } 824 return TRUE; 825 } 826 827 828 /* do g2p for a full word, right-to-left */ 829 static picoos_uint8 saDoG2P(register picodata_ProcessingUnit this, 830 register sa_subobj_t *sa, 831 const picoos_uint8 *graph, 832 const picoos_uint8 graphlen, 833 const picoos_uint8 pos, 834 picoos_uint8 *phones, 835 const picoos_uint16 phonesmaxlen, 836 picoos_uint16 *plen) { 837 picoos_uint16 outNp1Ch; /*last 3 outputs produced*/ 838 picoos_uint16 outNp2Ch; 839 picoos_uint16 outNp3Ch; 840 picoos_uint8 nPrimary; 841 picoos_uint8 nCount; 842 picoos_uint32 utfpos; 843 picoos_uint16 nOutVal; 844 picoos_uint8 okay; 845 picoos_uint16 phonesind; 846 picoos_uint8 nrvow; 847 picoos_uint8 ordvow; 848 picokdt_classify_vecresult_t dtresv; 849 picoos_uint16 i; 850 851 *plen = 0; 852 okay = TRUE; 853 854 /* use sa->tmpbuf[PICOSA_MAXITEMSIZE] to temporarly store the 855 phones which are predicted in reverse order. Once all are 856 available put them in phones in usuable order. phonesind is 857 used to fille item in reverse order starting at the end of 858 tmpbuf. */ 859 phonesind = PICOSA_MAXITEMSIZE - 1; 860 861 /* prepare the data for loop operations */ 862 outNp1Ch = PICOKDT_HISTORY_ZERO; 863 outNp2Ch = PICOKDT_HISTORY_ZERO; 864 outNp3Ch = PICOKDT_HISTORY_ZERO; 865 866 /* inner loop */ 867 nPrimary = 0; 868 869 /* ************************************************/ 870 /* go backward grapheme by grapheme, it's utf8... */ 871 /* ************************************************/ 872 873 /* set start nCount to position of start of last utfchar */ 874 /* ! watch out! somethimes starting at 1, sometimes at 0, 875 ! sometimes counting per byte, sometimes per UTF8 char */ 876 /* nCount is (start position + 1) of utf8 char */ 877 utfpos = graphlen; 878 if (picobase_get_prev_utf8charpos(graph, 0, &utfpos)) { 879 nCount = utfpos + 1; 880 } else { 881 /* should not occurr */ 882 PICODBG_ERROR(("invalid utf8 string, graphlen: %d", graphlen)); 883 return FALSE; 884 } 885 886 while (nCount > 0) { 887 PICODBG_TRACE(("right-to-left g2p, count: %d", nCount)); 888 okay = TRUE; 889 890 if (!saGetNrVowel(this, sa, graph, graphlen, nCount-1, &nrvow, 891 &ordvow)) { 892 nrvow = 0; 893 ordvow = 0; 894 } 895 896 /* prepare input vector, set inside tree object invec, 897 * g2pBuildVector will call the constructInVec tree method */ 898 if (!picokdt_dtG2PconstructInVec(sa->dtg2p, 899 graph, /*grapheme start*/ 900 graphlen, /*grapheme length*/ 901 nCount-1, /*grapheme current position*/ 902 pos, /*Word POS*/ 903 nrvow, /*nr vowels if vowel, 0 else */ 904 ordvow, /*ord of vowel if vowel, 0 el*/ 905 &nPrimary, /*primary stress flag*/ 906 outNp1Ch, /*Right phoneme context +1*/ 907 outNp2Ch, /*Right phoneme context +2*/ 908 outNp3Ch)) { /*Right phon context +3*/ 909 /*Errors in preparing the input vector : skip processing*/ 910 PICODBG_WARN(("problem with invec")); 911 picoos_emRaiseWarning(this->common->em, PICO_WARN_INVECTOR, 912 NULL, NULL); 913 okay = FALSE; 914 } 915 916 /* classify using the invec in the tree object and save the direct 917 tree output also in the tree object */ 918 if (okay && (!picokdt_dtG2Pclassify(sa->dtg2p, &nOutVal))) { 919 /* error doing classification */ 920 PICODBG_WARN(("problem classifying")); 921 picoos_emRaiseWarning(this->common->em, PICO_WARN_CLASSIFICATION, 922 NULL, NULL); 923 okay = FALSE; 924 } 925 926 /* decompose the invec in the tree object and return result in dtresv */ 927 if (okay && (!picokdt_dtG2PdecomposeOutClass(sa->dtg2p, &dtresv))) { 928 /* error decomposing */ 929 PICODBG_WARN(("problem decomposing")); 930 picoos_emRaiseWarning(this->common->em, PICO_WARN_OUTVECTOR, 931 NULL, NULL); 932 okay = FALSE; 933 } 934 935 if (okay) { 936 if ((dtresv.nr == 0) || (dtresv.classvec[0] == PICOKDT_EPSILON)) { 937 /* no phones to be added */ 938 PICODBG_TRACE(("epsilon, no phone added %c", graph[nCount-1])); 939 ; 940 } else { 941 /* add decomposed output to tmpbuf, reverse order */ 942 for (i = dtresv.nr; ((((PICOSA_MAXITEMSIZE - 1) - 943 phonesind)<phonesmaxlen) && 944 (i > 0)); ) { 945 i--; 946 PICODBG_TRACE(("%c %d",graph[nCount-1],dtresv.classvec[i])); 947 if (dtresv.classvec[i] > 255) { 948 PICODBG_WARN(("dt result outside valid range, " 949 "skipping phone")); 950 continue; 951 } 952 sa->tmpbuf[phonesind--] = (picoos_uint8)dtresv.classvec[i]; 953 if (!nPrimary) { 954 if (picoktab_isPrimstress(sa->tabphones, 955 (picoos_uint8)dtresv.classvec[i])) { 956 nPrimary = 1; 957 } 958 } 959 (*plen)++; 960 } 961 if (i > 0) { 962 PICODBG_WARN(("phones skipped")); 963 picoos_emRaiseWarning(this->common->em, 964 PICO_WARN_INCOMPLETE, NULL, NULL); 965 } 966 } 967 } 968 969 /*shift tree output history and update*/ 970 outNp3Ch = outNp2Ch; 971 outNp2Ch = outNp1Ch; 972 outNp1Ch = nOutVal; 973 974 /* go backward one utf8 char */ 975 /* nCount is in +1 domain */ 976 if (nCount <= 1) { 977 /* end of str */ 978 nCount = 0; 979 } else { 980 utfpos = nCount - 1; 981 if (!picobase_get_prev_utf8charpos(graph, 0, &utfpos)) { 982 /* should not occur */ 983 PICODBG_ERROR(("invalid utf8 string, utfpos: %d", utfpos)); 984 return FALSE; 985 } else { 986 nCount = utfpos + 1; 987 } 988 } 989 } 990 991 /* a must be: (PICOSA_MAXITEMSIZE-1) - phonesind == *plen */ 992 /* now that we have all phone IDs, copy in correct order to phones */ 993 /* phonesind point to next free slot in the reverse domainn, 994 ie. inc first */ 995 phonesind++; 996 for (i = 0; i < *plen; i++, phonesind++) { 997 phones[i] = sa->tmpbuf[phonesind]; 998 } 999 return TRUE; 1000 } 1001 1002 1003 /* item in headx[ind]/cbuf1, out: modified headx and cbuf2 */ 1004 1005 static pico_status_t saGraphemeToPhoneme(register picodata_ProcessingUnit this, 1006 register sa_subobj_t *sa, 1007 picoos_uint16 ind) { 1008 picoos_uint16 plen; 1009 1010 PICODBG_TRACE(("starting g2p")); 1011 1012 if (saDoG2P(this, sa, &(sa->cbuf1[sa->headx[ind].cind]), 1013 sa->headx[ind].head.len, sa->headx[ind].head.info1, 1014 &(sa->cbuf2[sa->cbuf2Len]), (sa->cbuf2BufSize - sa->cbuf2Len), 1015 &plen)) { 1016 1017 /* check of cbuf2Len done in saDoG2P, phones skipped if needed */ 1018 if (plen > 255) { 1019 PICODBG_WARN(("maximum number of phones exceeded (%d), skipping", 1020 plen)); 1021 plen = 255; 1022 } 1023 1024 /* set item head, info1, info2 unchanged */ 1025 sa->headx[ind].head.type = PICODATA_ITEM_WORDPHON; 1026 sa->headx[ind].head.len = (picoos_uint8)plen; 1027 sa->headx[ind].cind = sa->cbuf2Len; 1028 sa->cbuf2Len += plen; 1029 PICODBG_DEBUG(("%c item, plen: %d", 1030 PICODATA_ITEM_WORDPHON, plen)); 1031 } else { 1032 PICODBG_WARN(("problem doing g2p")); 1033 picoos_emRaiseWarning(this->common->em, PICO_WARN_PU_IRREG_ITEM, 1034 NULL, NULL); 1035 } 1036 return PICO_OK; 1037 } 1038 1039 1040 /* ***********************************************************************/ 1041 /* extract phonemes of an item into a phonBuf */ 1042 /* ***********************************************************************/ 1043 1044 static pico_status_t saAddPhoneme(register sa_subobj_t *sa, picoos_uint16 pos, picoos_uint16 sym) { 1045 /* picoos_uint8 plane, unshifted; */ 1046 1047 /* just for debuging */ 1048 /* 1049 unshifted = picotrns_unplane(sym,&plane); 1050 PICODBG_DEBUG(("adding %i/%i (%c on plane %i) at phonBuf[%i]",pos,sym,unshifted,plane,sa->phonWritePos)); 1051 */ 1052 if (PICOTRNS_MAX_NUM_POSSYM <= sa->phonWritePos) { 1053 /* not an error! */ 1054 PICODBG_DEBUG(("couldn't add because phon buffer full")); 1055 return PICO_EXC_BUF_OVERFLOW; 1056 } else { 1057 sa->phonBuf[sa->phonWritePos].pos = pos; 1058 sa->phonBuf[sa->phonWritePos].sym = sym; 1059 sa->phonWritePos++; 1060 return PICO_OK; 1061 } 1062 } 1063 1064 /* 1065 static pico_status_t saAddStartPhoneme(register sa_subobj_t *sa) { 1066 return saAddPhoneme(sa, PICOTRNS_POS_IGNORE, 1067 (PICOKFST_PLANE_INTERN << 8) + sa->fixedIds->phonStartId); 1068 } 1069 1070 1071 static pico_status_t saAddTermPhoneme(register sa_subobj_t *sa) { 1072 return saAddPhoneme(sa, PICOTRNS_POS_IGNORE, 1073 (PICOKFST_PLANE_INTERN << 8) + sa->fixedIds->phonTermId); 1074 } 1075 1076 */ 1077 1078 static pico_status_t saExtractPhonemes(register picodata_ProcessingUnit this, 1079 register sa_subobj_t *sa, picoos_uint16 pos, 1080 picodata_itemhead_t* head, const picoos_uint8* content) 1081 { 1082 pico_status_t rv= PICO_OK; 1083 picoos_uint8 i; 1084 picoos_int16 fstSymbol; 1085 #if defined(PICO_DEBUG) 1086 picoos_char msgstr[SA_MSGSTR_SIZE]; 1087 #endif 1088 1089 PICODBG_TRACE(("doing item %s", 1090 picodata_head_to_string(head,msgstr,SA_MSGSTR_SIZE))); 1091 /* 1092 Items considered in a transduction are WORDPHON item. its starting offset within the inBuf is given as 1093 'pos'. 1094 Elements that go into the transduction receive "their" position in the buffer. 1095 */ 1096 sa->phonWritePos = 0; 1097 /* WORDPHON(POS,WACC)phon */ 1098 rv = saAddPhoneme(sa, PICOTRNS_POS_IGNORE, 1099 (PICOKFST_PLANE_INTERN << 8) + sa->fixedIds->phonStartId); 1100 for (i = 0; i < head->len; i++) { 1101 fstSymbol = /* (PICOKFST_PLANE_PHONEMES << 8) + */content[i]; 1102 /* */ 1103 PICODBG_TRACE(("adding phoneme %c",fstSymbol)); 1104 rv = saAddPhoneme(sa, pos+PICODATA_ITEM_HEADSIZE+i, fstSymbol); 1105 } 1106 rv = saAddPhoneme(sa, PICOTRNS_POS_IGNORE, 1107 (PICOKFST_PLANE_INTERN << 8) + sa->fixedIds->phonTermId); 1108 sa->nextReadPos = pos + PICODATA_ITEM_HEADSIZE + head->len; 1109 return rv; 1110 } 1111 1112 1113 #define SA_POSSYM_OK 0 1114 #define SA_POSSYM_OUT_OF_RANGE 1 1115 #define SA_POSSYM_END 2 1116 #define SA_POSSYM_INVALID -3 1117 /* *readPos is the next position in phonBuf to be read, and *writePos is the first position not to be read (may be outside 1118 * buf). 1119 * 'rangeEnd' is the first possym position outside the desired range. 1120 * Possible return values: 1121 * SA_POSSYM_OK : 'pos' and 'sym' are set to the read possym, *readPos is advanced 1122 * SA_POSSYM_OUT_OF_RANGE : pos is out of range. 'pos' is set to that of the read possym, 'sym' is undefined 1123 * SA_POSSYM_UNDERFLOW : no more data in buf. 'pos' is set to PICOTRNS_POS_INVALID, 'sym' is undefined 1124 * SA_POSSYM_INVALID : "strange" pos. 'pos' is set to PICOTRNS_POS_INVALID, 'sym' is undefined 1125 */ 1126 static pico_status_t getNextPosSym(sa_subobj_t * sa, picoos_int16 * pos, picoos_int16 * sym, 1127 picoos_int16 rangeEnd) { 1128 /* skip POS_IGNORE */ 1129 while ((sa->phonReadPos < sa->phonWritePos) && (PICOTRNS_POS_IGNORE == sa->phonBuf[sa->phonReadPos].pos)) { 1130 PICODBG_DEBUG(("ignoring phone at sa->phonBuf[%i] because it has pos==IGNORE",sa->phonReadPos)); 1131 sa->phonReadPos++; 1132 } 1133 if ((sa->phonReadPos < sa->phonWritePos)) { 1134 *pos = sa->phonBuf[sa->phonReadPos].pos; 1135 if ((PICOTRNS_POS_INSERT == *pos) || ((0 <= *pos) && (*pos < rangeEnd))) { 1136 *sym = sa->phonBuf[sa->phonReadPos++].sym; 1137 return SA_POSSYM_OK; 1138 } else if (*pos < 0){ /* *pos is "strange" (e.g. POS_INVALID) */ 1139 return SA_POSSYM_INVALID; 1140 } else { 1141 return SA_POSSYM_OUT_OF_RANGE; 1142 } 1143 } else { 1144 /* no more possyms to read */ 1145 *pos = PICOTRNS_POS_INVALID; 1146 return SA_POSSYM_END; 1147 } 1148 } 1149 1150 1151 1152 1153 /* ***********************************************************************/ 1154 /* saStep function */ 1155 /* ***********************************************************************/ 1156 1157 /* 1158 complete phrase processed in one step, if not fast enough -> rework 1159 1160 init, collect into internal buffer, process, and then feed to 1161 output buffer 1162 1163 init state: INIT ext ext 1164 state trans: in hc1 hc2 out 1165 1166 INIT | putItem = 0 0 +1 | BUSY -> COLL (put B-SBEG item, 1167 set do-init to false) 1168 1169 inspace-ok-hc1 1170 needs-more-items-(phrase-or-flush) 1171 COLL1 |getItems -n +n 0 1 | ATOMIC -> PPOSD (got items, 1172 if flush set do-init) 1173 COLL2 |getItems -n +n 1 0 | ATOMIC -> PPOSD (got items, forced) 1174 COLL3 |getItems -n +n 1 1 | IDLE (got items, need more) 1175 COLL4 |getItems = = 1 1 | IDLE (got no items) 1176 1177 PPOSD | posd = ~n~n | BUSY -> PWP (posd done) 1178 PWP | lex/g2p = ~n-n 0+n | BUSY -> PPHR (lex/g2p done) 1179 PPHR | phr = -n 0 +m=n | BUSY -> PACC (phr done, m>=n) 1180 PACC | acc = 0 0 ~m=n | BUSY -> FEED (acc done) 1181 1182 doinit-flag 1183 FEED | putItems 0 0 0 -m-n +m 0 | BUSY -> COLL (put items) 1184 FEED | putItems 0 0 0 -m-n +m 1 | BUSY -> INIT (put items) 1185 FEED | putItems 0 0 0 -d-d +d | OUT_FULL (put some items) 1186 */ 1187 1188 static picodata_step_result_t saStep(register picodata_ProcessingUnit this, 1189 picoos_int16 mode, 1190 picoos_uint16 *numBytesOutput) { 1191 register sa_subobj_t *sa; 1192 pico_status_t rv = PICO_OK; 1193 pico_status_t rvP = PICO_OK; 1194 picoos_uint16 blen = 0; 1195 picoos_uint16 clen = 0; 1196 picoos_uint16 i; 1197 picoklex_Lex lex; 1198 1199 1200 if (NULL == this || NULL == this->subObj) { 1201 return PICODATA_PU_ERROR; 1202 } 1203 sa = (sa_subobj_t *) this->subObj; 1204 mode = mode; /* avoid warning "var not used in this function"*/ 1205 *numBytesOutput = 0; 1206 while (1) { /* exit via return */ 1207 PICODBG_DEBUG(("doing state %i, hLen|c1Len|c2Len: %d|%d|%d", 1208 sa->procState, sa->headxLen, sa->cbuf1Len, 1209 sa->cbuf2Len)); 1210 1211 switch (sa->procState) { 1212 1213 /* *********************************************************/ 1214 /* collect state: get item(s) from charBuf and store in 1215 * internal buffers, need a complete punctuation-phrase 1216 */ 1217 case SA_STEPSTATE_COLLECT: 1218 1219 while (sa->inspaceok && sa->needsmoreitems 1220 && (PICO_OK == 1221 (rv = picodata_cbGetItem(this->cbIn, sa->tmpbuf, 1222 PICOSA_MAXITEMSIZE, &blen)))) { 1223 rvP = picodata_get_itemparts(sa->tmpbuf, 1224 PICOSA_MAXITEMSIZE, 1225 &(sa->headx[sa->headxLen].head), 1226 &(sa->cbuf1[sa->cbuf1Len]), 1227 sa->cbuf1BufSize-sa->cbuf1Len, 1228 &clen); 1229 if (rvP != PICO_OK) { 1230 PICODBG_ERROR(("problem getting item parts")); 1231 picoos_emRaiseException(this->common->em, rvP, 1232 NULL, NULL); 1233 return PICODATA_PU_ERROR; 1234 } 1235 1236 /* if CMD(...FLUSH...) -> PUNC(...FLUSH...), 1237 construct PUNC-FLUSH item in headx */ 1238 if ((sa->headx[sa->headxLen].head.type == 1239 PICODATA_ITEM_CMD) && 1240 (sa->headx[sa->headxLen].head.info1 == 1241 PICODATA_ITEMINFO1_CMD_FLUSH)) { 1242 sa->headx[sa->headxLen].head.type = 1243 PICODATA_ITEM_PUNC; 1244 sa->headx[sa->headxLen].head.info1 = 1245 PICODATA_ITEMINFO1_PUNC_FLUSH; 1246 sa->headx[sa->headxLen].head.info2 = 1247 PICODATA_ITEMINFO2_PUNC_SENT_T; 1248 sa->headx[sa->headxLen].head.len = 0; 1249 } 1250 1251 /* convert opening phoneme command to WORDPHON 1252 * and assign user-POS XX to it (Bug 432) */ 1253 sa->headx[sa->headxLen].cind = sa->cbuf1Len; 1254 /* maybe overwritten later */ 1255 if ((sa->headx[sa->headxLen].head.type == 1256 PICODATA_ITEM_CMD) && 1257 (sa->headx[sa->headxLen].head.info1 == 1258 PICODATA_ITEMINFO1_CMD_PHONEME)&& 1259 (sa->headx[sa->headxLen].head.info2 == 1260 PICODATA_ITEMINFO2_CMD_START)) { 1261 picoos_uint8 i; 1262 picoos_uint8 wordsep = picoktab_getWordboundID(sa->tabphones); 1263 PICODBG_INFO(("wordsep id is %i",wordsep)); 1264 sa->headx[sa->headxLen].head.type = PICODATA_ITEM_WORDPHON; 1265 sa->headx[sa->headxLen].head.info1 = PICODATA_POS_XX; 1266 sa->headx[sa->headxLen].head.info2 = PICODATA_ITEMINFO2_NA; 1267 /* cut off additional words */ 1268 i = 0; 1269 while ((i < sa->headx[sa->headxLen].head.len) && (wordsep != sa->cbuf1[sa->headx[sa->headxLen].cind+i])) { 1270 PICODBG_INFO(("accepting phoneme %i",sa->cbuf1[sa->headx[sa->headxLen].cind+i])); 1271 1272 i++; 1273 } 1274 if (i < sa->headx[sa->headxLen].head.len) { 1275 PICODBG_INFO(("cutting off superfluous phonetic words at %i",i)); 1276 sa->headx[sa->headxLen].head.len = i; 1277 } 1278 } 1279 1280 /* check/set needsmoreitems */ 1281 if (sa->headx[sa->headxLen].head.type == 1282 PICODATA_ITEM_PUNC) { 1283 sa->needsmoreitems = FALSE; 1284 } 1285 1286 /* check/set inspaceok, keep spare slot for forcing */ 1287 if ((sa->headxLen >= (PICOSA_MAXNR_HEADX - 2)) || 1288 ((sa->cbuf1BufSize - sa->cbuf1Len) < 1289 PICOSA_MAXITEMSIZE)) { 1290 sa->inspaceok = FALSE; 1291 } 1292 1293 if (clen > 0) { 1294 sa->headx[sa->headxLen].cind = sa->cbuf1Len; 1295 sa->cbuf1Len += clen; 1296 } else { 1297 sa->headx[sa->headxLen].cind = 0; 1298 } 1299 sa->headxLen++; 1300 } 1301 1302 if (!sa->needsmoreitems) { 1303 /* 1, phrase buffered */ 1304 sa->procState = SA_STEPSTATE_PROCESS_POSD; 1305 return PICODATA_PU_ATOMIC; 1306 } else if (!sa->inspaceok) { 1307 /* 2, forced phrase end */ 1308 /* at least one slot is still free, use it to 1309 force a trailing PUNC item */ 1310 sa->headx[sa->headxLen].head.type = PICODATA_ITEM_PUNC; 1311 sa->headx[sa->headxLen].head.info1 = 1312 PICODATA_ITEMINFO1_PUNC_PHRASEEND; 1313 sa->headx[sa->headxLen].head.info2 = 1314 PICODATA_ITEMINFO2_PUNC_PHRASE_FORCED; 1315 sa->headx[sa->headxLen].head.len = 0; 1316 sa->needsmoreitems = FALSE; /* not really needed for now */ 1317 sa->headxLen++; 1318 PICODBG_WARN(("forcing phrase end, added PUNC_PHRASEEND")); 1319 picoos_emRaiseWarning(this->common->em, 1320 PICO_WARN_FALLBACK, NULL, 1321 (picoos_char *)"forced phrase end"); 1322 sa->procState = SA_STEPSTATE_PROCESS_POSD; 1323 return PICODATA_PU_ATOMIC; 1324 } else if (rv == PICO_EOF) { 1325 /* 3, 4 */ 1326 return PICODATA_PU_IDLE; 1327 } else if ((rv == PICO_EXC_BUF_UNDERFLOW) || 1328 (rv == PICO_EXC_BUF_OVERFLOW)) { 1329 /* error, no valid item in cb (UNDER) */ 1330 /* or tmpbuf not large enough, not possible (OVER) */ 1331 /* no exception raised, left for ctrl to handle */ 1332 PICODBG_ERROR(("buffer under/overflow, rv: %d", rv)); 1333 return PICODATA_PU_ERROR; 1334 } else { 1335 /* error, only possible if cbGetItem implementation 1336 changes without this function being adapted*/ 1337 PICODBG_ERROR(("untreated return value, rv: %d", rv)); 1338 return PICODATA_PU_ERROR; 1339 } 1340 break; 1341 1342 1343 /* *********************************************************/ 1344 /* process posd state: process items in headx/cbuf1 1345 * and change in place 1346 */ 1347 case SA_STEPSTATE_PROCESS_POSD: 1348 /* ensure there is an item in inBuf */ 1349 if (sa->headxLen > 0) { 1350 /* we have a phrase in headx, cbuf1 (can be 1351 single PUNC item without POS), do pos disamb */ 1352 if (PICO_OK != saDisambPos(this, sa)) { 1353 picoos_emRaiseException(this->common->em, 1354 PICO_ERR_OTHER, NULL, NULL); 1355 return PICODATA_PU_ERROR; 1356 } 1357 sa->procState = SA_STEPSTATE_PROCESS_WPHO; 1358 1359 } else if (sa->headxLen == 0) { /* no items in inBuf */ 1360 PICODBG_WARN(("no items in inBuf")); 1361 sa->procState = SA_STEPSTATE_COLLECT; 1362 return PICODATA_PU_BUSY; 1363 } 1364 1365 #if defined (PICO_DEBUG) 1366 if (1) { 1367 picoos_uint8 i, j, ittype; 1368 for (i = 0; i < sa->headxLen; i++) { 1369 ittype = sa->headx[i].head.type; 1370 PICODBG_INFO_CTX(); 1371 PICODBG_INFO_MSG(("sa-d: (")); 1372 PICODBG_INFO_MSG(("'%c',", ittype)); 1373 if ((32 <= sa->headx[i].head.info1) && 1374 (sa->headx[i].head.info1 < 127) && 1375 (ittype != PICODATA_ITEM_WORDGRAPH) && 1376 (ittype != PICODATA_ITEM_WORDINDEX)) { 1377 PICODBG_INFO_MSG(("'%c',",sa->headx[i].head.info1)); 1378 } else { 1379 PICODBG_INFO_MSG(("%3d,", sa->headx[i].head.info1)); 1380 } 1381 if ((32 <= sa->headx[i].head.info2) && 1382 (sa->headx[i].head.info2 < 127)) { 1383 PICODBG_INFO_MSG(("'%c',",sa->headx[i].head.info2)); 1384 } else { 1385 PICODBG_INFO_MSG(("%3d,", sa->headx[i].head.info2)); 1386 } 1387 PICODBG_INFO_MSG(("%3d)", sa->headx[i].head.len)); 1388 1389 for (j = 0; j < sa->headx[i].head.len; j++) { 1390 if ((ittype == PICODATA_ITEM_WORDGRAPH) || 1391 (ittype == PICODATA_ITEM_CMD)) { 1392 PICODBG_INFO_MSG(("%c", 1393 sa->cbuf1[sa->headx[i].cind+j])); 1394 } else { 1395 PICODBG_INFO_MSG(("%4d", 1396 sa->cbuf1[sa->headx[i].cind+j])); 1397 } 1398 } 1399 PICODBG_INFO_MSG(("\n")); 1400 } 1401 } 1402 #endif 1403 1404 break; 1405 1406 1407 /* *********************************************************/ 1408 /* process wpho state: process items in headx/cbuf1 and modify 1409 * headx in place and fill cbuf2 1410 */ 1411 case SA_STEPSTATE_PROCESS_WPHO: 1412 /* ensure there is an item in inBuf */ 1413 if (sa->headxLen > 0) { 1414 /* we have a phrase in headx, cbuf1 (can be single 1415 PUNC item), do lex lookup, g2p, or copy */ 1416 1417 /* check if cbuf2 is empty as it should be */ 1418 if (sa->cbuf2Len > 0) { 1419 /* enforce emptyness */ 1420 PICODBG_WARN(("forcing empty cbuf2, discarding buf")); 1421 picoos_emRaiseWarning(this->common->em, 1422 PICO_WARN_PU_DISCARD_BUF, 1423 NULL, NULL); 1424 } 1425 1426 /* cbuf2 overflow avoided in saGrapheme*, saLexInd*, 1427 saCopyItem*, phones skipped if needed */ 1428 for (i = 0; i < sa->headxLen; i++) { 1429 switch (sa->headx[i].head.type) { 1430 case PICODATA_ITEM_WORDGRAPH: 1431 if (PICO_OK != saGraphemeToPhoneme(this, sa, 1432 i)) { 1433 /* not possible, phones skipped if needed */ 1434 picoos_emRaiseException(this->common->em, 1435 PICO_ERR_OTHER, 1436 NULL, NULL); 1437 return PICODATA_PU_ERROR; 1438 } 1439 break; 1440 case PICODATA_ITEM_WORDINDEX: 1441 if (0 == sa->headx[i].head.info2) { 1442 lex = sa->lex; 1443 } else { 1444 lex = sa->ulex[sa->headx[i].head.info2-1]; 1445 } 1446 if (PICO_OK != saLexIndLookup(this, sa, lex, i)) { 1447 /* not possible, phones skipped if needed */ 1448 picoos_emRaiseException(this->common->em, 1449 PICO_ERR_OTHER, 1450 NULL, NULL); 1451 return PICODATA_PU_ERROR; 1452 } 1453 break; 1454 default: 1455 /* copy item unmodified, ie. headx untouched, 1456 content from cbuf1 to cbuf2 */ 1457 if (PICO_OK != saCopyItemContent1to2(this, sa, 1458 i)) { 1459 /* not possible, phones skipped if needed */ 1460 picoos_emRaiseException(this->common->em, 1461 PICO_ERR_OTHER, 1462 NULL, NULL); 1463 return PICODATA_PU_ERROR; 1464 } 1465 break; 1466 } 1467 } 1468 /* set cbuf1 to empty */ 1469 sa->cbuf1Len = 0; 1470 sa->procState = SA_STEPSTATE_PROCESS_TRNS_PARSE; 1471 1472 } else if (sa->headxLen == 0) { /* no items in inBuf */ 1473 PICODBG_WARN(("no items in inBuf")); 1474 sa->procState = SA_STEPSTATE_COLLECT; 1475 return PICODATA_PU_BUSY; 1476 } 1477 1478 #if defined (PICO_DEBUG) 1479 if (1) { 1480 picoos_uint8 i, j, ittype; 1481 for (i = 0; i < sa->headxLen; i++) { 1482 ittype = sa->headx[i].head.type; 1483 PICODBG_INFO_CTX(); 1484 PICODBG_INFO_MSG(("sa-g: (")); 1485 PICODBG_INFO_MSG(("'%c',", ittype)); 1486 if ((32 <= sa->headx[i].head.info1) && 1487 (sa->headx[i].head.info1 < 127) && 1488 (ittype != PICODATA_ITEM_WORDPHON)) { 1489 PICODBG_INFO_MSG(("'%c',",sa->headx[i].head.info1)); 1490 } else { 1491 PICODBG_INFO_MSG(("%3d,", sa->headx[i].head.info1)); 1492 } 1493 if ((32 <= sa->headx[i].head.info2) && 1494 (sa->headx[i].head.info2 < 127)) { 1495 PICODBG_INFO_MSG(("'%c',",sa->headx[i].head.info2)); 1496 } else { 1497 PICODBG_INFO_MSG(("%3d,", sa->headx[i].head.info2)); 1498 } 1499 PICODBG_INFO_MSG(("%3d)", sa->headx[i].head.len)); 1500 1501 for (j = 0; j < sa->headx[i].head.len; j++) { 1502 if ((ittype == PICODATA_ITEM_CMD)) { 1503 PICODBG_INFO_MSG(("%c", 1504 sa->cbuf2[sa->headx[i].cind+j])); 1505 } else { 1506 PICODBG_INFO_MSG(("%4d", 1507 sa->cbuf2[sa->headx[i].cind+j])); 1508 } 1509 } 1510 PICODBG_INFO_MSG(("\n")); 1511 } 1512 } 1513 #endif 1514 1515 break; 1516 1517 1518 /* *********************************************************/ 1519 /* transduction parse state: extract phonemes of item in internal outBuf */ 1520 case SA_STEPSTATE_PROCESS_TRNS_PARSE: 1521 1522 PICODBG_DEBUG(("transduce item (bot, remain): (%d, %d)", 1523 sa->headxBottom, sa->headxLen)); 1524 1525 /* check for termination condition first */ 1526 if (0 == sa->headxLen) { 1527 /* reset headx, cbuf2 */ 1528 sa->headxBottom = 0; 1529 sa->cbuf2Len = 0; 1530 /* reset collect state support variables */ 1531 sa->inspaceok = TRUE; 1532 sa->needsmoreitems = TRUE; 1533 1534 sa->procState = SA_STEPSTATE_COLLECT; 1535 return PICODATA_PU_BUSY; 1536 } 1537 1538 sa->procState = SA_STEPSTATE_FEED; 1539 /* copy item unmodified */ 1540 rv = picodata_put_itemparts( 1541 &(sa->headx[sa->headxBottom].head), 1542 &(sa->cbuf2[sa->headx[sa->headxBottom].cind]), 1543 sa->headx[sa->headxBottom].head.len, sa->tmpbuf, 1544 PICOSA_MAXITEMSIZE, &blen); 1545 1546 if (PICODATA_ITEM_WORDPHON == sa->headx[sa->headxBottom].head.type) { 1547 PICODBG_DEBUG(("PARSE found WORDPHON")); 1548 rv = saExtractPhonemes(this, sa, 0, &(sa->headx[sa->headxBottom].head), 1549 &(sa->cbuf2[sa->headx[sa->headxBottom].cind])); 1550 if (PICO_OK == rv) { 1551 PICODBG_DEBUG(("PARSE successfully returned from phoneme extraction")); 1552 sa->procState = SA_STEPSTATE_PROCESS_TRNS_FST; 1553 } else { 1554 PICODBG_WARN(("PARSE phone extraction returned exception %i, output WORDPHON untransduced",rv)); 1555 } 1556 } else { 1557 PICODBG_DEBUG(("PARSE found other item, just copying")); 1558 } 1559 if (SA_STEPSTATE_FEED == sa->procState) { 1560 PICODATA_INFO_ITEM(this->voice->kbArray[PICOKNOW_KBID_DBG], 1561 (picoos_uint8 *)"sa-p: ", 1562 sa->tmpbuf, PICOSA_MAXITEMSIZE); 1563 1564 } 1565 1566 /* consume item */ 1567 sa->headxBottom++; 1568 sa->headxLen--; 1569 1570 break; 1571 1572 /* *********************************************************/ 1573 /* transduce state: copy item in internal outBuf to tmpBuf and transduce */ 1574 case SA_STEPSTATE_PROCESS_TRNS_FST: 1575 1576 1577 1578 1579 1580 /* if no word-level FSTs: doing trivial syllabification instead */ 1581 if (0 == sa->numFsts) { 1582 PICODBG_DEBUG(("doing trivial sylabification with %i phones", sa->phonWritePos)); 1583 #if defined(PICO_DEBUG) 1584 { 1585 PICODBG_INFO_CTX(); 1586 PICODBG_INFO_MSG(("sa trying to trivially syllabify: ")); 1587 PICOTRNS_PRINTSYMSEQ(this->voice->kbArray[PICOKNOW_KBID_DBG], sa->phonBuf, sa->phonWritePos); 1588 PICODBG_INFO_MSG(("\n")); 1589 } 1590 #endif 1591 1592 picotrns_trivial_syllabify(sa->tabphones, sa->phonBuf, 1593 sa->phonWritePos, sa->phonBufOut, 1594 &sa->phonWritePos,PICOTRNS_MAX_NUM_POSSYM); 1595 PICODBG_DEBUG(("returned from trivial sylabification with %i phones", sa->phonWritePos)); 1596 #if defined(PICO_DEBUG) 1597 { 1598 PICODBG_INFO_CTX(); 1599 PICODBG_INFO_MSG(("sa returned from syllabification: ")); 1600 PICOTRNS_PRINTSYMSEQ(this->voice->kbArray[PICOKNOW_KBID_DBG], sa->phonBufOut, sa->phonWritePos); 1601 PICODBG_INFO_MSG(("\n")); 1602 } 1603 #endif 1604 1605 /* eliminate deep epsilons */ 1606 PICODBG_DEBUG(("doing epsilon elimination with %i phones", sa->phonWritePos)); 1607 picotrns_eliminate_epsilons(sa->phonBufOut, 1608 sa->phonWritePos, sa->phonBuf, 1609 &sa->phonWritePos,PICOTRNS_MAX_NUM_POSSYM); 1610 PICODBG_DEBUG(("returning from epsilon elimination with %i phones", sa->phonWritePos)); 1611 sa->phonReadPos = 0; 1612 sa->phonesTransduced = 1; 1613 sa->procState = SA_STEPSTATE_FEED; 1614 break; 1615 } 1616 1617 /* there are word-level FSTs */ 1618 /* termination condition first */ 1619 if (sa->curFst >= sa->numFsts) { 1620 /* reset for next transduction */ 1621 sa->curFst = 0; 1622 sa->phonReadPos = 0; 1623 sa->phonesTransduced = 1; 1624 sa->procState = SA_STEPSTATE_FEED; 1625 break; 1626 } 1627 1628 /* transduce from phonBufIn to PhonBufOut */ 1629 { 1630 1631 picoos_uint32 nrSteps; 1632 #if defined(PICO_DEBUG) 1633 { 1634 PICODBG_INFO_CTX(); 1635 PICODBG_INFO_MSG(("sa trying to transduce: ")); 1636 PICOTRNS_PRINTSYMSEQ(this->voice->kbArray[PICOKNOW_KBID_DBG], sa->phonBuf, sa->phonWritePos); 1637 PICODBG_INFO_MSG(("\n")); 1638 } 1639 #endif 1640 picotrns_transduce(sa->fst[sa->curFst], FALSE, 1641 picotrns_printSolution, sa->phonBuf, sa->phonWritePos, sa->phonBufOut, 1642 &sa->phonWritePos, 1643 PICOTRNS_MAX_NUM_POSSYM, sa->altDescBuf, 1644 sa->maxAltDescLen, &nrSteps); 1645 #if defined(PICO_DEBUG) 1646 { 1647 PICODBG_INFO_CTX(); 1648 PICODBG_INFO_MSG(("sa returned from transduction: ")); 1649 PICOTRNS_PRINTSYMSEQ(this->voice->kbArray[PICOKNOW_KBID_DBG], sa->phonBufOut, sa->phonWritePos); 1650 PICODBG_INFO_MSG(("\n")); 1651 } 1652 #endif 1653 } 1654 1655 1656 1657 /* 1658 The trasduction output will contain equivalent items i.e. (x,y') for each (x,y) plus inserted deep symbols (-1,d). 1659 In case of deletions, (x,0) might also be omitted... 1660 */ 1661 /* eliminate deep epsilons */ 1662 picotrns_eliminate_epsilons(sa->phonBufOut, 1663 sa->phonWritePos, sa->phonBuf, &sa->phonWritePos,PICOTRNS_MAX_NUM_POSSYM); 1664 sa->phonesTransduced = 1; 1665 1666 sa->curFst++; 1667 1668 return PICODATA_PU_ATOMIC; 1669 /* break; */ 1670 1671 /* *********************************************************/ 1672 /* feed state: copy item in internal outBuf to output charBuf */ 1673 1674 case SA_STEPSTATE_FEED: 1675 1676 PICODBG_DEBUG(("FEED")); 1677 1678 if (sa->phonesTransduced) { 1679 /* replace original phones by transduced */ 1680 picoos_uint16 phonWritePos = PICODATA_ITEM_HEADSIZE; 1681 picoos_uint8 plane; 1682 picoos_int16 sym, pos; 1683 while (SA_POSSYM_OK == (rv = getNextPosSym(sa,&pos,&sym,sa->nextReadPos))) { 1684 PICODBG_TRACE(("FEED inserting phoneme %c into inBuf[%i]",sym,phonWritePos)); 1685 sym = picotrns_unplane(sym, &plane); 1686 PICODBG_ASSERT((PICOKFST_PLANE_PHONEMES == plane)); 1687 sa->tmpbuf[phonWritePos++] = (picoos_uint8) sym; 1688 } 1689 PICODBG_DEBUG(("FEED setting item length to %i",phonWritePos - PICODATA_ITEM_HEADSIZE)); 1690 picodata_set_itemlen(sa->tmpbuf,PICODATA_ITEM_HEADSIZE,phonWritePos - PICODATA_ITEM_HEADSIZE); 1691 if (SA_POSSYM_INVALID == rv) { 1692 PICODBG_ERROR(("FEED unexpected symbol or unexpected end of phoneme list")); 1693 return (picodata_step_result_t)picoos_emRaiseException(this->common->em, PICO_WARN_INCOMPLETE, NULL, NULL); 1694 } 1695 sa->phonesTransduced = 0; 1696 1697 } /* if (sa->phonesTransduced) */ 1698 1699 1700 rvP = picodata_cbPutItem(this->cbOut, sa->tmpbuf, 1701 PICOSA_MAXITEMSIZE, &clen); 1702 1703 *numBytesOutput += clen; 1704 1705 PICODBG_DEBUG(("put item, status: %d", rvP)); 1706 1707 if (rvP == PICO_OK) { 1708 } else if (rvP == PICO_EXC_BUF_OVERFLOW) { 1709 /* try again next time */ 1710 PICODBG_DEBUG(("feeding overflow")); 1711 return PICODATA_PU_OUT_FULL; 1712 } else { 1713 /* error, should never happen */ 1714 PICODBG_ERROR(("untreated return value, rvP: %d", rvP)); 1715 return PICODATA_PU_ERROR; 1716 } 1717 1718 PICODATA_INFO_ITEM(this->voice->kbArray[PICOKNOW_KBID_DBG], 1719 (picoos_uint8 *)"sana: ", 1720 sa->tmpbuf, PICOSA_MAXITEMSIZE); 1721 1722 sa->procState = SA_STEPSTATE_PROCESS_TRNS_PARSE; 1723 /* return PICODATA_PU_BUSY; */ 1724 break; 1725 1726 default: 1727 break; 1728 } /* switch */ 1729 1730 } /* while */ 1731 1732 /* should be never reached */ 1733 PICODBG_ERROR(("reached end of function")); 1734 picoos_emRaiseException(this->common->em, PICO_ERR_OTHER, NULL, NULL); 1735 return PICODATA_PU_ERROR; 1736 } 1737 1738 #ifdef __cplusplus 1739 } 1740 #endif 1741 1742 1743 /* end */ 1744