1 /* 2 * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 /** 17 * @file picotrns.c 18 * 19 * fst processing 20 * 21 * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland 22 * All rights reserved. 23 * 24 * History: 25 * - 2009-04-20 -- initial version 26 * 27 */ 28 29 #include "picoos.h" 30 #include "picodbg.h" 31 /* #include "picodata.h" */ 32 /* #include "picoknow.h" */ 33 #include "picoktab.h" 34 #include "picokfst.h" 35 #include "picotrns.h" 36 37 #ifdef __cplusplus 38 extern "C" { 39 #endif 40 #if 0 41 } 42 #endif 43 44 45 46 picoos_uint8 picotrns_unplane(picoos_int16 symIn, picoos_uint8 * plane) { 47 if (symIn < 0) { 48 (*plane) = 0; 49 return (picoos_uint8) symIn; 50 } else { 51 (*plane) = symIn >> 8; 52 return (picoos_uint8) (symIn & 0xFF); 53 } 54 } 55 56 #if defined(PICO_DEBUG) 57 58 void PICOTRNS_PRINTSYM1(picoknow_KnowledgeBase kbdbg, picoos_int16 insym, picoos_uint8 phonemic) 59 { 60 #include "picokdbg.h" 61 picoos_int16 sym; 62 picoos_uint8 plane; 63 picokdbg_Dbg dbg = (NULL == kbdbg) ? NULL : picokdbg_getDbg(kbdbg); 64 sym = picotrns_unplane(insym, &plane); 65 switch (plane) { 66 case PICOKFST_PLANE_PHONEMES: /* phones */ 67 if ((NULL == dbg) || !phonemic) { 68 PICODBG_INFO_MSG((" %c", sym)); 69 } else { 70 PICODBG_INFO_MSG((" %s", picokdbg_getPhoneSym(dbg, (picoos_uint8) sym))); 71 } 72 break; 73 case PICOKFST_PLANE_ACCENTS: /* accents */ 74 PICODBG_INFO_MSG((" {A%c}", sym)); 75 break; 76 case PICOKFST_PLANE_XSAMPA: /* xsampa symbols */ 77 PICODBG_INFO_MSG((" {XS:(%i)}", sym)); 78 break; 79 case PICOKFST_PLANE_POS: /* part of speech */ 80 PICODBG_INFO_MSG((" {P:%d}", sym)); 81 break; 82 case PICOKFST_PLANE_PB_STRENGTHS: /* phrases */ 83 if (sym == 48) { 84 PICODBG_INFO_MSG((" {WB}", sym)); 85 } else if (sym == 115) { 86 PICODBG_INFO_MSG((" {P0}", sym)); 87 } else { 88 PICODBG_INFO_MSG((" {P%c}", sym)); 89 } 90 break; 91 case PICOKFST_PLANE_INTERN: /* intern */ 92 PICODBG_INFO_MSG((" [%c]", sym)); 93 break; 94 } 95 } 96 97 void PICOTRNS_PRINTSYM(picoknow_KnowledgeBase kbdbg, picoos_int16 insym) 98 { 99 PICOTRNS_PRINTSYM1(kbdbg,insym,1); 100 } 101 102 void PICOTRNS_PRINTSYMSEQ1(picoknow_KnowledgeBase kbdbg, const picotrns_possym_t seq[], const picoos_uint16 seqLen, 103 picoos_uint8 phonemic) { 104 picoos_uint16 i; 105 for (i=0; i<seqLen; i++) { 106 PICOTRNS_PRINTSYM1(kbdbg, seq[i].sym, phonemic); 107 } 108 } 109 110 void PICOTRNS_PRINTSYMSEQ(picoknow_KnowledgeBase kbdbg, const picotrns_possym_t seq[], const picoos_uint16 seqLen) { 111 PICOTRNS_PRINTSYMSEQ1(kbdbg,seq, seqLen, 1); 112 } 113 114 void picotrns_printSolution(const picotrns_possym_t outSeq[], const picoos_uint16 outSeqLen) 115 { 116 PICODBG_INFO_CTX(); 117 PICODBG_INFO_MSG(("solution: ")); 118 PICOTRNS_PRINTSYMSEQ(NULL, outSeq, outSeqLen); 119 PICODBG_INFO_MSG(("\n")); 120 } 121 122 void picotrns_printSolutionAscii(const picotrns_possym_t outSeq[], const picoos_uint16 outSeqLen) 123 { 124 PICODBG_INFO_CTX(); 125 PICODBG_INFO_MSG(("solution: ")); 126 PICOTRNS_PRINTSYMSEQ1(NULL, outSeq, outSeqLen,0); 127 PICODBG_INFO_MSG(("\n")); 128 } 129 130 #endif 131 132 133 134 135 /* * +CT+ ***/ 136 struct picotrns_transductionState { 137 picoos_uint16 phase; /* transduction phase: 138 0 = before start 139 1 = before regular recursion step 140 2 = before finish 141 3 = after finish */ 142 picoos_uint32 nrSol; /* nr of solutions so far */ 143 picoos_int16 recPos; /* recursion position; must be signed! */ 144 }; 145 146 typedef struct picotrns_altDesc { 147 picokfst_state_t startFSTState; /**< starting FST state in current recursion position */ 148 picoos_int32 inPos; /**< corresponding position in input string */ 149 picokfst_state_t altState; /**< state of alternatives search; 150 - 0 = before pair search 151 - 1 = search state is a valid pair search state 152 - 2 = before inEps search 153 - 3 = search state is a valid inEps trans search state 154 - 4 = no more alternatives */ 155 picoos_int32 searchState; /**< pair search state or inEps trans search state */ 156 picokfst_symid_t altOutSym; /**< current output symbol at this recursion position */ 157 picoos_int32 altOutRefPos; /**< output reference position at this recursion position */ 158 } picotrns_altDesc_t; 159 160 161 picotrns_AltDesc picotrns_allocate_alt_desc_buf(picoos_MemoryManager mm, picoos_uint32 maxByteSize, picoos_uint16 * numAltDescs) 162 { 163 picotrns_AltDesc buf; 164 (*numAltDescs) = (picoos_uint32) (maxByteSize / sizeof(picotrns_altDesc_t)); 165 buf = (picotrns_AltDesc) picoos_allocate(mm, (*numAltDescs) * sizeof(picotrns_altDesc_t)); 166 if (NULL == buf) { 167 (*numAltDescs) = 0; 168 return NULL; 169 } else { 170 return buf; 171 } 172 } 173 174 void picotrns_deallocate_alt_desc_buf(picoos_MemoryManager mm, picotrns_AltDesc * altDescBuf) 175 { 176 picoos_deallocate(mm, (void *) altDescBuf); 177 } 178 179 /* copy elements from inSeq to outSeq, ignoring elements with epsilon symbol */ 180 pico_status_t picotrns_eliminate_epsilons(const picotrns_possym_t inSeq[], picoos_uint16 inSeqLen, 181 picotrns_possym_t outSeq[], picoos_uint16 * outSeqLen, picoos_uint16 maxOutSeqLen) 182 { 183 picoos_uint16 i, j = 0; 184 185 for (i=0; i < inSeqLen; i++) { 186 /* it is assumed that PICOKFST_SYMID_EPS is a hardwired value and not shifted */ 187 if (PICOKFST_SYMID_EPS != inSeq[i].sym) { 188 if (j < maxOutSeqLen) { 189 outSeq[j].pos = inSeq[i].pos; 190 outSeq[j].sym = inSeq[i].sym; 191 j++; 192 } 193 } 194 *outSeqLen = j; 195 } 196 return PICO_OK; 197 } 198 199 200 static void insertSym(picotrns_possym_t inSeq[], picoos_uint16 pos, picoos_int16 sym) { 201 inSeq[pos].sym = sym; 202 inSeq[pos].pos = PICOTRNS_POS_INSERT; 203 } 204 205 /* copy elements from inSeq to outSeq, inserting syllable separators in some trivial way. 206 * inSeq is assumed to be at most PICOTRNS_MAX_NUM_POSSYM, outSeq at least of size PICOTRNS_MAX_NUM_POSSYM */ 207 pico_status_t picotrns_trivial_syllabify(picoktab_Phones phones, 208 const picotrns_possym_t inSeq[], const picoos_uint16 inSeqLen, 209 picotrns_possym_t outSeq[], picoos_uint16 * outSeqLen, picoos_uint16 maxOutSeqLen) 210 { 211 picoos_uint16 i = 0, j = 0, out = 0, numInserted = 0; 212 picoos_uint8 vowelFound = FALSE; 213 picoos_uint16 accentpos = 0; 214 picoos_int16 accent = 0; 215 216 PICODBG_TRACE(("start")); 217 218 219 while (i < inSeqLen) { 220 /* make sure that at least one more sylSep can be inserted */ 221 if (inSeqLen+numInserted+1 >= maxOutSeqLen) { 222 return PICO_EXC_BUF_OVERFLOW; 223 } 224 /* let j skip consonant cluster */ 225 accent = 0; 226 accentpos = 0; 227 while ((j < inSeqLen) && !picoktab_isSyllCarrier(phones,(picoos_uint8)inSeq[j].sym)) { 228 if ((inSeq[j].sym == picoktab_getPrimstressID(phones)) 229 || (inSeq[j].sym == picoktab_getPrimstressID(phones))) { 230 PICODBG_TRACE(("j skipping stress symbol inSeq[%i].sym = %c", j, inSeq[j].sym)); 231 accent = inSeq[j].sym; 232 accentpos = j; 233 } else { 234 PICODBG_TRACE(("j skipping consonant inSeq[%i].sym = %c", j, inSeq[j].sym)); 235 } 236 j++; 237 } 238 if (j < inSeqLen) { /* j is at the start of a new vowel */ 239 /* copy consonant cluster (moving i) to output, insert syll separator if between vowels */ 240 while (i < j-1) { 241 if ((accent > 0) && (i == accentpos)) { 242 PICODBG_TRACE(("skipping inSeq[%i].sym = %c (stress)", i, inSeq[i].sym)); 243 i++; 244 } else { 245 PICODBG_TRACE(("copying inSeq[%i].sym = %c (consonant) into output buffer", i, inSeq[i].sym)); 246 outSeq[out++] = inSeq[i++]; 247 } 248 } 249 if (vowelFound) { /* we're between vowels */ 250 PICODBG_TRACE(("inserting syllable separator into output buffer")); 251 insertSym(outSeq,out++,picoktab_getSyllboundID(phones)); 252 if (accent > 0) { 253 insertSym(outSeq,out++,accent); 254 } 255 numInserted++; 256 } 257 if ((accent > 0) && (i == accentpos)) { 258 PICODBG_TRACE(("skipping inSeq[%i].sym = %c (stress)", i, inSeq[i].sym)); 259 i++; 260 } else { 261 PICODBG_TRACE(("copying inSeq[%i].sym = %c (consonant) into output buffer", i, inSeq[i].sym)); 262 outSeq[out++] = inSeq[i++]; 263 } 264 vowelFound = TRUE; 265 /* now copy vowel cluster */ 266 while ((i < inSeqLen) && picoktab_isSyllCarrier(phones,(picoos_uint8)inSeq[i].sym)) { 267 PICODBG_TRACE(("copying inSeq[%i].sym = %c (vowel) into output buffer", i, inSeq[i].sym)); 268 outSeq[out++] = inSeq[i++]; 269 } 270 j = i; 271 } else { /* j is at end of word or end of input */ 272 while (i < j) { 273 PICODBG_TRACE(("copying inSeq[%i].sym = %c (consonant or stress) into output buffer", i, inSeq[i].sym)); 274 outSeq[out++] = inSeq[i++]; 275 } 276 } 277 *outSeqLen = out; 278 } 279 PICODBG_ASSERT((out == inSeqLen + numInserted)); 280 281 return PICO_OK; 282 } 283 284 285 /* ******** +CT+: full transduction procedure **********/ 286 287 288 /* Gets next acceptable alternative for output symbol '*outSym' at current recursion position 289 starting from previous alternative in 'altDesc'; possibly uses input symbol 290 given by 'inSeq'/'inSeq'; returns whether alterative was found in '*found'; 291 if '*found', the other output values ('*outRefPos', '*endFSTstate', '*nextInPos'*) 292 return the characteristics for next recursion step; 293 if '*found' is false, the output values are undefined. */ 294 295 static void GetNextAlternative (picokfst_FST fst, picotrns_AltDesc altDesc, 296 const picotrns_possym_t inSeq[], picoos_uint16 inSeqLen, 297 picokfst_symid_t * outSym, picoos_int32 * outRefPos, 298 picokfst_state_t * endFSTState, picoos_int32 * nextInPos, picoos_bool * found) 299 { 300 301 picoos_bool inSymFound; 302 picoos_bool pairFound; 303 picokfst_class_t pairClass; 304 picoos_bool inEpsTransFound; 305 picokfst_symid_t inSym; 306 307 (*found) = 0; 308 do { 309 switch (altDesc->altState) { 310 case 0: /* before pair search */ 311 if (altDesc->inPos < inSeqLen) { 312 inSym = inSeq[altDesc->inPos].sym; 313 if (inSym == PICOKFST_SYMID_EPS) { 314 /* very special case: input epsilon simply produces eps in output 315 without fst state change */ 316 (*found) = 1; 317 (*outSym) = PICOKFST_SYMID_EPS; 318 (*outRefPos) = inSeq[altDesc->inPos].pos; 319 (*endFSTState) = altDesc->startFSTState; 320 (*nextInPos) = altDesc->inPos + 1; 321 altDesc->altState = 2; 322 } else { 323 /* start search for alternatives using input symbol */ 324 picokfst_kfstStartPairSearch(fst,inSeq[altDesc->inPos].sym,& inSymFound,& altDesc->searchState); 325 if (!inSymFound) { 326 altDesc->altState = 2; 327 PICODBG_INFO_CTX(); 328 PICODBG_INFO_MSG((" didnt find symbol ")); 329 PICOTRNS_PRINTSYM(NULL, inSeq[altDesc->inPos].sym); 330 PICODBG_INFO_MSG(("\n")); 331 332 } else { 333 altDesc->altState = 1; 334 } 335 } 336 } else { 337 altDesc->altState = 2; 338 } 339 break; 340 case 1: /* within pair search */ 341 picokfst_kfstGetNextPair(fst,& altDesc->searchState,& pairFound,& (*outSym),& pairClass); 342 if (pairFound) { 343 picokfst_kfstGetTrans(fst,altDesc->startFSTState,pairClass,& (*endFSTState)); 344 if ((*endFSTState) > 0) { 345 (*found) = 1; 346 (*outRefPos) = inSeq[altDesc->inPos].pos; 347 (*nextInPos) = altDesc->inPos + 1; 348 } 349 } else { 350 /* no more pair found */ 351 altDesc->altState = 2; 352 } 353 break; 354 case 2: /* before inEps trans search */ 355 picokfst_kfstStartInEpsTransSearch(fst,altDesc->startFSTState,& inEpsTransFound,& altDesc->searchState); 356 if (inEpsTransFound) { 357 altDesc->altState = 3; 358 } else { 359 altDesc->altState = 4; 360 } 361 break; 362 case 3: /* within inEps trans search */ 363 picokfst_kfstGetNextInEpsTrans(fst,& altDesc->searchState,& inEpsTransFound,& (*outSym),& (*endFSTState)); 364 if (inEpsTransFound) { 365 (*found) = 1; 366 (*outRefPos) = PICOTRNS_POS_INSERT; 367 (*nextInPos) = altDesc->inPos; 368 } else { 369 altDesc->altState = 4; 370 } 371 break; 372 case 4: /* no more alternatives */ 373 break; 374 } 375 } while (! ((*found) || (altDesc->altState == 4)) ); /* i.e., until (*found) || (altState == 4) */ 376 } 377 378 379 380 /* Transfers current alternatives path stored in 'altDesc' with current path length 'pathLen' 381 into 'outSeq'/'outSeqLen'. The number of solutions is incremented. */ 382 383 static void NoteSolution (picoos_uint32 * nrSol, picotrns_printSolutionFct printSolution, 384 picotrns_altDesc_t altDesc[], picoos_uint16 pathLen, 385 picotrns_possym_t outSeq[], picoos_uint16 * outSeqLen, picoos_uint16 maxOutSeqLen) 386 { 387 register picotrns_AltDesc ap; 388 picoos_uint32 i; 389 390 (*nrSol)++; 391 (*outSeqLen) = 0; 392 for (i = 0; i < pathLen; i++) { 393 if (i < maxOutSeqLen) { 394 ap = &altDesc[i]; 395 outSeq[i].sym = ap->altOutSym; 396 outSeq[i].pos = ap->altOutRefPos; 397 (*outSeqLen)++; 398 } 399 } 400 if (pathLen > maxOutSeqLen) { 401 PICODBG_WARN(("**** output symbol array too small to hold full solution\n")); 402 } 403 if (printSolution != NULL) { 404 printSolution(outSeq,(*outSeqLen)); 405 } 406 } 407 408 409 410 /* * 411 general scheme to get all solutions ("position" refers to abstract backtracking recursion depth, 412 which in the current solution is equal to the output symbol position): 413 414 "set position to first position"; 415 "initialize alternatives in first position"; 416 REPEAT 417 IF "current state in current position is a solution" THEN 418 "note solution"; 419 END; 420 "get first or next acceptable alternative in current position"; 421 IF "acceptable alternative found" THEN 422 "note alternative"; 423 "go to next position"; 424 "initialize alternatives in that position"; 425 ELSE 426 "step back to previous position"; 427 END; 428 UNTIL "current position is before first position" 429 ***/ 430 431 432 /* Initializes transduction state for further use in repeated application 433 of 'TransductionStep'. */ 434 435 static void StartTransduction (struct picotrns_transductionState * transductionState) 436 { 437 (*transductionState).phase = 0; 438 } 439 440 441 442 /* Performs one step in the transduction of 'inSeqLen' input symbols with corresponding 443 reference positions in 'inSeq'. '*transductionState' must have been 444 initialized by 'StartTransduction'. Repeat calls to this procedure until '*finished' returns true. 445 The output is returned in 'outSeqLen' symbols and reference positions in 'outSeq'. 446 The output reference positions refer to the corresponding input reference positions. 447 Inserted output symbols receive the reference position -1. If several solutions are possible, 448 only the last found solution is returned. 449 'altDesc' is a temporary workspace which should be at least one cell longer than 'outSeq'. 450 'firstSolOnly' determines whether only the first solution should be found or if 451 the search should go on to find all solutions (mainly for testing purposes). 452 453 NOTE: current version written for use in single repetitive steps; 454 could be simplified if full transduction can be done as an atomic operation */ 455 456 static void TransductionStep (picokfst_FST fst, struct picotrns_transductionState * transductionState, 457 picotrns_altDesc_t altDesc[], picoos_uint16 maxAltDescLen, 458 picoos_bool firstSolOnly, picotrns_printSolutionFct printSolution, 459 const picotrns_possym_t inSeq[], picoos_uint16 inSeqLen, 460 picotrns_possym_t outSeq[], picoos_uint16 * outSeqLen, picoos_uint16 maxOutSeqLen, 461 picoos_bool * finished) 462 { 463 register picotrns_AltDesc ap; 464 picoos_int32 i; 465 picokfst_state_t endFSTState; 466 picoos_int32 nextInPos; 467 picoos_bool found; 468 picokfst_symid_t outSym; 469 picoos_int32 outRefPos; 470 picoos_int32 tmpRecPos; 471 472 (*finished) = 0; 473 tmpRecPos = (*transductionState).recPos; 474 switch ((*transductionState).phase) { 475 case 0: /* before initialization */ 476 (*transductionState).nrSol = 0; 477 478 /* check for initial solution (empty strings are always accepted) */ 479 if (inSeqLen == 0) { 480 NoteSolution(& (*transductionState).nrSol,printSolution,altDesc,0,outSeq,outSeqLen,maxOutSeqLen); 481 } 482 483 /* initialize first recursion position */ 484 tmpRecPos = 0; 485 ap = & altDesc[0]; 486 ap->startFSTState = 1; 487 ap->inPos = 0; 488 ap->altState = 0; 489 (*transductionState).phase = 1; 490 break; 491 492 case 1: /* before regular recursion step */ 493 if ((tmpRecPos < 0) || (firstSolOnly && ((*transductionState).nrSol > 0))) { 494 /* end reached */ 495 (*transductionState).phase = 2; 496 } else { 497 /* not finished; do regular step */ 498 499 /* get first or next acceptable alternative in current position */ 500 GetNextAlternative(fst,& altDesc[tmpRecPos],inSeq,inSeqLen,& outSym,& outRefPos,& endFSTState,& nextInPos,& found); 501 if (found) { 502 /* note alternative in current position */ 503 ap = & altDesc[tmpRecPos]; 504 ap->altOutSym = outSym; 505 ap->altOutRefPos = outRefPos; 506 507 /* check for solution after found alternative */ 508 if ((nextInPos == inSeqLen) && picokfst_kfstIsAcceptingState(fst,endFSTState)) { 509 NoteSolution(& (*transductionState).nrSol,printSolution,altDesc,tmpRecPos+1, 510 outSeq,outSeqLen,maxOutSeqLen); 511 } 512 513 /* go to next position if possible, start search for follower alternative symbols */ 514 if (tmpRecPos < maxAltDescLen-1) { 515 /* got to next position */ 516 tmpRecPos = tmpRecPos + 1; 517 518 /* initialize alternatives in new position */ 519 ap = & altDesc[tmpRecPos]; 520 ap->startFSTState = endFSTState; 521 ap->inPos = nextInPos; 522 ap->altState = 0; 523 524 } else { 525 /* do not go on due to limited path but still treat alternatives in current position */ 526 PICODBG_WARN(("--- transduction path too long; may fail to find solution\n")); 527 } 528 } else { /* no more acceptable alternative found in current position */ 529 /* backtrack to previous recursion */ 530 tmpRecPos = tmpRecPos - 1; 531 } 532 } 533 break; 534 535 case 2: /* before finish */ 536 if ((*transductionState).nrSol == 0) { 537 PICODBG_WARN(("--- no transduction solution found, using input as output\n")); 538 i = 0; 539 while ((i < inSeqLen) && (i < maxOutSeqLen)) { 540 outSeq[i].sym = inSeq[i].sym; 541 outSeq[i].pos = inSeq[i].pos; 542 i++; 543 } 544 (*outSeqLen) = i; 545 } else if ((*transductionState).nrSol > 1) { 546 PICODBG_WARN(("--- more than one transducer solutions found\n")); 547 } 548 (*transductionState).phase = 3; 549 break; 550 551 case 3: /* after finish */ 552 (*finished) = 1; 553 break; 554 } 555 (*transductionState).recPos = tmpRecPos; 556 } 557 558 559 560 /* see description in header */ 561 pico_status_t picotrns_transduce (picokfst_FST fst, picoos_bool firstSolOnly, 562 picotrns_printSolutionFct printSolution, 563 const picotrns_possym_t inSeq[], picoos_uint16 inSeqLen, 564 picotrns_possym_t outSeq[], picoos_uint16 * outSeqLen, picoos_uint16 maxOutSeqLen, 565 picotrns_AltDesc altDescBuf, picoos_uint16 maxAltDescLen, 566 picoos_uint32 *nrSteps) 567 { 568 struct picotrns_transductionState transductionState; 569 picoos_bool finished; 570 571 #if defined(PICO_DEBUG) 572 { 573 picoos_uint16 i; 574 575 PICODBG_INFO_CTX(); 576 PICODBG_INFO_MSG(("got input: ")); 577 for (i=0; i<inSeqLen; i++) { 578 PICODBG_INFO_MSG((" %d", inSeq[i].sym)); 579 } 580 PICODBG_INFO_MSG((" (")); 581 PICOTRNS_PRINTSYMSEQ(NULL,inSeq,inSeqLen); 582 PICODBG_INFO_MSG((")\n")); 583 } 584 #endif 585 StartTransduction(&transductionState); 586 finished = 0; 587 *nrSteps = 0; 588 while (!finished) { 589 TransductionStep(fst,&transductionState,altDescBuf,maxAltDescLen,firstSolOnly,printSolution, 590 inSeq,inSeqLen,outSeq,outSeqLen,maxOutSeqLen,&finished); 591 (*nrSteps)++; 592 } 593 594 return PICO_OK; 595 } 596 597 598 /** 599 * Data structure for picotrns_SimpleTransducer object. 600 */ 601 typedef struct picotrns_simple_transducer { 602 picoos_Common common; 603 picotrns_possym_t possymBufA[PICOTRNS_MAX_NUM_POSSYM+1]; 604 picotrns_possym_t possymBufB[PICOTRNS_MAX_NUM_POSSYM+1]; 605 picotrns_possym_t * possymBuf; /**< the buffer of the pos/sym pairs */ 606 picotrns_possym_t * possymBufTmp; 607 picoos_uint16 possymReadPos, possymWritePos; /* next pos to read from phonBufIn, next pos to write to phonBufIn */ 608 609 /* buffer for internal calculation of transducer */ 610 picotrns_AltDesc altDescBuf; 611 /* the number of AltDesc in the buffer */ 612 picoos_uint16 maxAltDescLen; 613 } picotrns_simple_transducer_t; 614 615 616 pico_status_t picotrns_stInitialize(picotrns_SimpleTransducer transducer) 617 { 618 transducer->possymBuf = transducer->possymBufA; 619 transducer->possymBufTmp = transducer->possymBufB; 620 transducer->possymReadPos = 0; 621 transducer->possymWritePos = 0; 622 return PICO_OK; 623 } 624 /** creates a SimpleTranducer with a working buffer of given size 625 * 626 * @param mm MemoryManager handle 627 * @param common Common handle 628 * @param maxAltDescLen maximal size for working buffer (in bytes) 629 * @return handle to new SimpleTransducer or NULL if error 630 */ 631 picotrns_SimpleTransducer picotrns_newSimpleTransducer(picoos_MemoryManager mm, 632 picoos_Common common, 633 picoos_uint16 maxAltDescLen) 634 { 635 picotrns_SimpleTransducer this; 636 this = picoos_allocate(mm, sizeof(picotrns_simple_transducer_t)); 637 if (this == NULL) { 638 picoos_deallocate(mm, (void *)&this); 639 picoos_emRaiseException(common->em, PICO_EXC_OUT_OF_MEM, NULL, NULL); 640 return NULL; 641 } 642 643 /* allocate working buffer */ 644 this->altDescBuf = picotrns_allocate_alt_desc_buf(mm, maxAltDescLen, &this->maxAltDescLen); 645 if (this->altDescBuf == NULL) { 646 picoos_deallocate(mm, (void *)&this); 647 picoos_emRaiseException(common->em, PICO_EXC_OUT_OF_MEM, NULL, NULL); 648 return NULL; 649 } 650 this->common = common; 651 picotrns_stInitialize(this); 652 return this; 653 } 654 /** disposes a SimpleTransducer 655 * 656 * @param this 657 * @param mm 658 * @return PICO_OK 659 */ 660 pico_status_t picotrns_disposeSimpleTransducer(picotrns_SimpleTransducer * this, 661 picoos_MemoryManager mm) 662 { 663 if (NULL != (*this)) { 664 picotrns_deallocate_alt_desc_buf(mm,&(*this)->altDescBuf); 665 picoos_deallocate(mm, (void *) this); 666 (*this) = NULL; 667 } 668 return PICO_OK; 669 } 670 671 /** transduces the contents previously inserted via @ref picotrns_newSimpleTransducer and @ref 672 * picotrns_disposeSimpleTransducer. 673 * 674 * @param this 675 * @param fst 676 * @return 677 */ 678 pico_status_t picotrns_stTransduce(picotrns_SimpleTransducer this, picokfst_FST fst) 679 { 680 picoos_uint16 outSeqLen; 681 picoos_uint32 nrSteps; 682 pico_status_t status; 683 684 status = picotrns_transduce(fst,TRUE,NULL, 685 this->possymBuf, this->possymWritePos, 686 this->possymBufTmp,&outSeqLen, PICOTRNS_MAX_NUM_POSSYM, 687 this->altDescBuf,this->maxAltDescLen,&nrSteps); 688 if (PICO_OK != status) { 689 return status; 690 } 691 return picotrns_eliminate_epsilons(this->possymBufTmp,outSeqLen,this->possymBuf,&this->possymWritePos,PICOTRNS_MAX_NUM_POSSYM); 692 } 693 694 /** 695 * Add chars from NULLC-terminated string \c inStr, shifted to plane \c plane, to internal input buffer of 696 * \c transducer. 697 * 698 * @param this is an initialized picotrns_SimpleTransducer 699 * @param inStr NULLC-terminated byte sequence 700 * @param plane 701 * @return PICO_OK, if all bytes fit into buffer, or PICO_EXC_BUF_OVERFLOW otherwise 702 */ 703 pico_status_t picotrns_stAddWithPlane(picotrns_SimpleTransducer this, picoos_char * inStr, picoos_uint8 plane) 704 { 705 while ((*inStr) && (this->possymWritePos < PICOTRNS_MAX_NUM_POSSYM)) { 706 this->possymBuf[this->possymWritePos].pos = PICOTRNS_POS_INSERT; 707 this->possymBuf[this->possymWritePos].sym = (plane << 8) + (*inStr); 708 PICODBG_DEBUG(("inserting pos/sym = %i/'%c' at pos %i", 709 this->possymBuf[this->possymWritePos].pos, 710 this->possymBuf[this->possymWritePos].sym, 711 this->possymWritePos)); 712 this->possymWritePos++; 713 inStr++; 714 } 715 if (!(*inStr)) { 716 return PICO_OK; 717 } else { 718 return PICO_EXC_BUF_OVERFLOW; 719 } 720 } 721 722 pico_status_t picotrns_stGetSymSequence( 723 picotrns_SimpleTransducer this, 724 picoos_uint8 * outputSymIds, 725 picoos_uint32 maxOutputSymIds) 726 { 727 picoos_uint8 plane; 728 picoos_uint32 outputCount = 0; 729 while ((this->possymReadPos < this->possymWritePos) && (outputCount < maxOutputSymIds)) { 730 *outputSymIds++ = picotrns_unplane(this->possymBuf[this->possymReadPos++].sym, &plane); 731 outputCount++; 732 } 733 *outputSymIds = NULLC; 734 if (outputCount <= maxOutputSymIds) { 735 return PICO_OK; 736 } else { 737 return PICO_EXC_BUF_OVERFLOW; 738 } 739 } 740 741 #ifdef __cplusplus 742 } 743 #endif 744 745 /* end picotrns.c */ 746