1 /* 2 * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 /** 17 * @file picotok.c 18 * 19 * tokenizer 20 * 21 * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland 22 * All rights reserved. 23 * 24 * History: 25 * - 2009-04-20 -- initial version 26 * 27 */ 28 29 30 /* ************************************************************/ 31 /* tokenisation and markup handling */ 32 /* ************************************************************/ 33 34 /** @addtogroup picotok 35 @b tokenisation_overview 36 37 markup handling overview: 38 39 The following markups are recognized 40 - ignore 41 - speed 42 - pitch 43 - volume 44 - voice 45 - preproccontext 46 - mark 47 - play 48 - usesig 49 - genfile 50 - sentence 51 - s 52 - paragraph 53 - p 54 - break 55 - spell (pauses between letter) 56 - phoneme 57 58 All markups which are recognized but are not yet implemented in pico 59 system have the mark. 60 */ 61 62 63 #include "picodefs.h" 64 #include "picoos.h" 65 #include "picobase.h" 66 #include "picodbg.h" 67 #include "picodata.h" 68 #include "picotok.h" 69 #include "picoktab.h" 70 71 #ifdef __cplusplus 72 extern "C" { 73 #endif 74 #if 0 75 } 76 #endif 77 78 /* *****************************************************************************/ 79 80 #define IN_BUF_SIZE 255 81 #define OUT_BUF_SIZE IN_BUF_SIZE + 3 * PICODATA_ITEM_HEADSIZE + 3 82 83 #define MARKUP_STRING_BUF_SIZE (IN_BUF_SIZE*5) 84 #define MAX_NR_MARKUP_PARAMS 6 85 #define MARKUP_HANDLING_DISABLED 0 86 #define MARKUP_HANDLING_ENABLED 1 87 #define EOL '\n' 88 89 90 typedef picoos_int8 pico_tokenSubType; 91 typedef picoos_uint8 pico_tokenType; 92 93 /** @todo : consider adding these specialized exception codes: */ 94 95 #define PICO_ERR_MARKUP_VALUE_OUT_OF_RANGE PICO_ERR_OTHER 96 #define PICO_ERR_INVALID_MARKUP_TAG PICO_ERR_OTHER 97 #define PICO_ERR_INTERNAL_LIMIT PICO_ERR_OTHER 98 99 typedef enum {MIDummyStart, MIIgnore, 100 MIPitch, MISpeed, MIVolume, 101 MIVoice, MIPreprocContext, MIMarker, 102 MIPlay, MIUseSig, MIGenFile, MIParagraph, 103 MISentence, MIBreak, MISpell, MIPhoneme, MIItem, MISpeaker, MIDummyEnd 104 } MarkupId; 105 typedef enum {MSNotInMarkup, MSGotStart, MSExpectingmarkupTagName, MSInmarkupTagName, 106 MSGotmarkupTagName, MSInAttrName, MSGotAttrName, MSGotEqual, MSInAttrValue, 107 MSInAttrValueEscaped, MSGotAttrValue, MSGotEndSlash, MSGotEnd, 108 MSError, MSErrorTooLong, MSErrorSyntax 109 } MarkupState; 110 typedef enum {MENone, MEMissingStart, MEUnknownTag, MEIdent, MEMissingEqual, 111 MEMissingQuote, MEMissingEnd, MEUnexpectedChar, MEInterprete 112 } MarkupParseError; 113 114 typedef enum {MTNone, MTStart, MTEnd, MTEmpty} MarkupTagType; 115 116 #define UTF_CHAR_COMPLETE 2 117 #define UTF_CHAR_INCOMPLETE 1 118 #define UTF_CHAR_MALFORMED 0 119 120 #define TOK_MARKUP_KW_IGNORE (picoos_uchar*)"ignore" 121 #define TOK_MARKUP_KW_SPEED (picoos_uchar*)"speed" 122 #define TOK_MARKUP_KW_PITCH (picoos_uchar*)"pitch" 123 #define TOK_MARKUP_KW_VOLUME (picoos_uchar*)"volume" 124 #define TOK_MARKUP_KW_VOICE (picoos_uchar*)"voice" 125 #define TOK_MARKUP_KW_CONTEXT (picoos_uchar*)"preproccontext" 126 #define TOK_MARKUP_KW_MARK (picoos_uchar*)"mark" 127 #define TOK_MARKUP_KW_PLAY (picoos_uchar*)"play" 128 #define TOK_MARKUP_KW_USESIG (picoos_uchar*)"usesig" 129 #define TOK_MARKUP_KW_GENFILE (picoos_uchar*)"genfile" 130 #define TOK_MARKUP_KW_SENTENCE (picoos_uchar*)"sentence" 131 #define TOK_MARKUP_KW_S (picoos_uchar*)"s" 132 #define TOK_MARKUP_KW_PARAGRAPH (picoos_uchar*)"paragraph" 133 #define TOK_MARKUP_KW_P (picoos_uchar*)"p" 134 #define TOK_MARKUP_KW_BREAK (picoos_uchar*)"break" 135 #define TOK_MARKUP_KW_SPELL (picoos_uchar*)"spell" 136 #define TOK_MARKUP_KW_PHONEME (picoos_uchar*)"phoneme" 137 #define TOK_MARKUP_KW_ITEM (picoos_uchar*)"item" 138 #define TOK_MARKUP_KW_SPEAKER (picoos_uchar*)"speaker" 139 140 #define KWLevel (picoos_uchar *)"level" 141 #define KWName (picoos_uchar *)"name" 142 #define KWProsDomain (picoos_uchar *)"prosodydomain" 143 #define KWTime (picoos_uchar *)"time" 144 #define KWMode (picoos_uchar *)"mode" 145 #define KWSB (picoos_uchar *)"sb" 146 #define KWPB (picoos_uchar *)"pb" 147 #define KWFile (picoos_uchar *)"file" 148 #define KWType (picoos_uchar *)"type" 149 #define KWF0Beg (picoos_uchar *)"f0beg" 150 #define KWF0End (picoos_uchar *)"f0end" 151 #define KWXFadeBeg (picoos_uchar *)"xfadebeg" 152 #define KWXFadeEnd (picoos_uchar *)"xfadeend" 153 #define KWAlphabet (picoos_uchar *)"alphabet" 154 #define KWPH (picoos_uchar *)"ph" 155 #define KWOrthMode (picoos_uchar *)"orthmode" 156 #define KWIgnorePunct (picoos_uchar *)"ignorepunct" 157 #define KWInfo1 (picoos_uchar *)"info1" 158 #define KWInfo2 (picoos_uchar *)"info2" 159 #define KWDATA (picoos_uchar *)"data" 160 161 #define PICO_SPEED_MIN 20 162 #define PICO_SPEED_MAX 500 163 #define PICO_SPEED_DEFAULT 100 164 #define PICO_SPEED_FACTOR_MIN 500 165 #define PICO_SPEED_FACTOR_MAX 2000 166 167 #define PICO_PITCH_MIN 50 168 #define PICO_PITCH_MAX 200 169 #define PICO_PITCH_DEFAULT 100 170 #define PICO_PITCH_FACTOR_MIN 500 171 #define PICO_PITCH_FACTOR_MAX 2000 172 #define PICO_PITCH_ADD_MIN -100 173 #define PICO_PITCH_ADD_MAX 100 174 #define PICO_PITCH_ADD_DEFAULT 0 175 176 #define PICO_VOLUME_MIN 0 177 #define PICO_VOLUME_MAX 500 178 #define PICO_VOLUME_DEFAULT 100 179 #define PICO_VOLUME_FACTOR_MIN 500 180 #define PICO_VOLUME_FACTOR_MAX 2000 181 182 #define PICO_SPEAKER_MIN 20 183 #define PICO_SPEAKER_MAX 180 184 #define PICO_SPEAKER_DEFAULT 100 185 #define PICO_SPEAKER_FACTOR_MIN 500 186 #define PICO_SPEAKER_FACTOR_MAX 2000 187 188 #define PICO_CONTEXT_DEFAULT (picoos_uchar*)"DEFAULT" 189 190 #define PARAGRAPH_PAUSE_DUR 500 191 #define SPELL_WITH_PHRASE_BREAK 1 192 #define SPELL_WITH_SENTENCE_BREAK 2 193 194 /* *****************************************************************************/ 195 196 #define TOK_PUNC_FLUSH (picoos_char) '\0' 197 198 typedef picoos_uchar Word[MARKUP_STRING_BUF_SIZE]; 199 200 201 struct MarkupParam { 202 Word paramId; 203 Word paramVal; 204 }; 205 206 typedef struct MarkupParam MarkupParams[MAX_NR_MARKUP_PARAMS]; 207 208 typedef picoos_uchar utf8char0c[5]; /* one more than needed so it is ended always with 0c*/ 209 210 /** subobject : TokenizeUnit 211 * shortcut : tok 212 */ 213 typedef struct tok_subobj 214 { 215 picoos_int32 ignLevel; 216 217 utf8char0c utf; 218 picoos_int32 utfpos; 219 picoos_int32 utflen; 220 221 MarkupParams markupParams; 222 picoos_int32 nrMarkupParams; 223 MarkupState markupState; 224 picoos_uchar markupStr[MARKUP_STRING_BUF_SIZE]; 225 picoos_int32 markupPos; 226 picoos_int32 markupLevel[MIDummyEnd+1]; 227 picoos_uchar markupTagName[IN_BUF_SIZE]; 228 MarkupTagType markupTagType; 229 MarkupParseError markupTagErr; 230 231 picoos_int32 strPos; 232 picoos_uchar strDelim; 233 picoos_bool isFileAttr; 234 235 pico_tokenType tokenType; 236 pico_tokenSubType tokenSubType; 237 238 picoos_int32 tokenPos; 239 picoos_uchar tokenStr[IN_BUF_SIZE]; 240 241 picoos_int32 nrEOL; 242 243 picoos_bool markupHandlingMode; /* to be moved ??? */ 244 picoos_bool aborted; /* to be moved ??? */ 245 246 picoos_bool start; 247 248 picoos_uint8 outBuf[OUT_BUF_SIZE]; /* internal output buffer */ 249 picoos_uint16 outReadPos; /* next pos to read from outBuf */ 250 picoos_uint16 outWritePos; /* next pos to write to outBuf */ 251 252 picoos_uchar saveFile[IN_BUF_SIZE]; 253 Word phonemes; 254 255 picotrns_SimpleTransducer transducer; 256 257 /* kbs */ 258 259 picoktab_Graphs graphTab; 260 picokfst_FST xsampa_parser; 261 picokfst_FST svoxpa_parser; 262 picokfst_FST xsampa2svoxpa_mapper; 263 264 265 266 } tok_subobj_t; 267 268 /* *****************************************************************************/ 269 270 static void tok_treatMarkupAsSimpleToken (picodata_ProcessingUnit this, tok_subobj_t * tok); 271 static void tok_treatChar (picodata_ProcessingUnit this, tok_subobj_t * tok, picoos_uchar ch, picoos_bool markupHandling); 272 static void tok_treatMarkup (picodata_ProcessingUnit this, tok_subobj_t * tok); 273 static void tok_putToMarkup (picodata_ProcessingUnit this, tok_subobj_t * tok, picoos_uchar str[]); 274 static void tok_treatSimpleToken (picodata_ProcessingUnit this, tok_subobj_t * tok); 275 static MarkupId tok_markupTagId (picoos_uchar tagId[]); 276 277 /* *****************************************************************************/ 278 279 static picoos_bool tok_strEqual(picoos_uchar * str1, picoos_uchar * str2) 280 { 281 return (picoos_strcmp((picoos_char*)str1, (picoos_char*)str2) == 0); 282 } 283 284 static void tok_reduceBlanks(picoos_uchar * str) 285 /* Remove leading and trailing blanks of 'str' and reduce 286 groups of blanks within string to exactly one blank. */ 287 288 { 289 int i = 0; 290 int j = 0; 291 292 while (str[j] != 0) { 293 if (str[j] == (picoos_uchar)' ') { 294 /* note one blank except at the beginning of string */ 295 if (i > 0) { 296 str[i] = (picoos_uchar)' '; 297 i++; 298 } 299 j++; 300 while (str[j] == (picoos_uchar)' ') { 301 j++; 302 } 303 } else { 304 str[i] = str[j]; 305 j++; 306 i++; 307 } 308 } 309 310 /* remove blanks at end of string */ 311 if ((i > 0) && (str[i - 1] == ' ')) { 312 i--; 313 } 314 str[i] = 0; 315 } 316 317 318 static void tok_startIgnore (tok_subobj_t * tok) 319 { 320 tok->ignLevel++; 321 } 322 323 324 static void tok_endIgnore (tok_subobj_t * tok) 325 { 326 if (tok->ignLevel > 0) { 327 tok->ignLevel--; 328 } 329 } 330 331 332 static void tok_getParamIntVal (MarkupParams params, picoos_uchar paramId[], picoos_int32 * paramVal, picoos_bool * paramFound) 333 { 334 int i=0; 335 336 while ((i < MAX_NR_MARKUP_PARAMS) && !tok_strEqual(paramId,params[i].paramId)) { 337 i++; 338 } 339 if ((i < MAX_NR_MARKUP_PARAMS)) { 340 (*paramVal) = picoos_atoi((picoos_char*)params[i].paramVal); 341 (*paramFound) = TRUE; 342 } else { 343 (*paramVal) = -1; 344 (*paramFound) = FALSE; 345 } 346 } 347 348 349 350 static void tok_getParamStrVal (MarkupParams params, picoos_uchar paramId[], picoos_uchar paramStrVal[], picoos_bool * paramFound) 351 { 352 int i=0; 353 354 while ((i < MAX_NR_MARKUP_PARAMS) && !tok_strEqual(paramId,params[i].paramId)) { 355 i++; 356 } 357 if (i < MAX_NR_MARKUP_PARAMS) { 358 picoos_strcpy((picoos_char*)paramStrVal, (picoos_char*)params[i].paramVal); 359 (*paramFound) = TRUE; 360 } else { 361 paramStrVal[0] = 0; 362 (*paramFound) = FALSE; 363 } 364 } 365 366 367 static void tok_getParamPhonesStr (MarkupParams params, picoos_uchar paramId[], picoos_uchar alphabet[], picoos_uchar phones[], picoos_int32 phoneslen, picoos_bool * paramFound) 368 { 369 370 int i; 371 picoos_bool done; 372 373 i = 0; 374 while ((i < MAX_NR_MARKUP_PARAMS) && !tok_strEqual(paramId, params[i].paramId)) { 375 i++; 376 } 377 if (i < MAX_NR_MARKUP_PARAMS) { 378 if (tok_strEqual(alphabet, PICODATA_XSAMPA) || tok_strEqual(alphabet, (picoos_uchar*)"")) { 379 picoos_strlcpy((picoos_char*)phones, (picoos_char*)params[i].paramVal, phoneslen); 380 done = TRUE; 381 } else { 382 done = FALSE; 383 } 384 (*paramFound) = TRUE; 385 } else { 386 done = FALSE; 387 (*paramFound) = FALSE; 388 } 389 if (!done) { 390 phones[0] = 0; 391 } 392 } 393 394 395 static void tok_clearMarkupParams (MarkupParams params) 396 { 397 int i; 398 399 for (i = 0; i<MAX_NR_MARKUP_PARAMS; i++) { 400 params[i].paramId[0] = 0; 401 params[i].paramVal[0] = 0; 402 } 403 } 404 405 406 static void tok_getDur (picoos_uchar durStr[], picoos_uint32 * dur, picoos_bool * done) 407 { 408 409 int num=0; 410 int i=0; 411 picoos_uchar tmpWord[IN_BUF_SIZE]; 412 413 picoos_strlcpy((picoos_char*)tmpWord, (picoos_char*)durStr, sizeof(tmpWord)); 414 tok_reduceBlanks(tmpWord); 415 while ((durStr[i] >= '0') && (durStr[i] <= '9')) { 416 num = 10 * num + (int)durStr[i] - (int)'0'; 417 tmpWord[i] = ' '; 418 i++; 419 } 420 tok_reduceBlanks(tmpWord); 421 if (tok_strEqual(tmpWord, (picoos_uchar*)"s")) { 422 (*dur) = (1000 * num); 423 (*done) = TRUE; 424 } else if (tok_strEqual(tmpWord,(picoos_uchar*)"ms")) { 425 (*dur) = num; 426 (*done) = TRUE; 427 } else { 428 (*dur) = 0; 429 (*done) = FALSE; 430 } 431 } 432 433 434 static picoos_int32 tok_putToUtf (tok_subobj_t * tok, picoos_uchar ch) 435 { 436 if (tok->utfpos < PICOBASE_UTF8_MAXLEN) { 437 tok->utf[tok->utfpos] = ch; 438 if (tok->utfpos == 0) { 439 tok->utflen = picobase_det_utf8_length(ch); 440 } else if (((ch < (picoos_uchar)'\200') || (ch >= (picoos_uchar)'\300'))) { 441 tok->utflen = 0; 442 } 443 (tok->utfpos)++; 444 if ((tok->utfpos == tok->utflen)) { 445 if ((tok->utfpos < PICOBASE_UTF8_MAXLEN)) { 446 tok->utf[tok->utfpos] = 0; 447 } 448 return UTF_CHAR_COMPLETE; 449 } else if (tok->utfpos < tok->utflen) { 450 return UTF_CHAR_INCOMPLETE; 451 } else { 452 return UTF_CHAR_MALFORMED; 453 } 454 } else { 455 return UTF_CHAR_MALFORMED; 456 } 457 } 458 459 460 static picoos_bool tok_isRelative (picoos_uchar strval[], picoos_uint32 * val) 461 { 462 picoos_int32 len; 463 picoos_bool rel; 464 465 rel = FALSE; 466 len = picoos_strlen((picoos_char*)strval); 467 if (len > 0) { 468 if (strval[len - 1] == '%') { 469 strval[len - 1] = 0; 470 if ((strval[0] == '+') || (strval[0] == '-')) { 471 (*val) = 1000 + (picoos_atoi((picoos_char*)strval) * 10); 472 } else { 473 (*val) = picoos_atoi((picoos_char*)strval) * 10; 474 } 475 rel = TRUE; 476 } 477 } 478 return rel; 479 } 480 481 482 static void tok_putItem (picodata_ProcessingUnit this, tok_subobj_t * tok, 483 picoos_uint8 itemType, picoos_uint8 info1, picoos_uint8 info2, 484 picoos_uint16 val, 485 picoos_uchar str[]) 486 { 487 picoos_int32 len, i; 488 489 if ((itemType == PICODATA_ITEM_CMD) && (info1 == PICODATA_ITEMINFO1_CMD_FLUSH)) { 490 tok->outBuf[tok->outWritePos++] = itemType; 491 tok->outBuf[tok->outWritePos++] = info1; 492 tok->outBuf[tok->outWritePos++] = info2; 493 tok->outBuf[tok->outWritePos++] = 0; 494 } 495 else if (tok->ignLevel <= 0) { 496 switch (itemType) { 497 case PICODATA_ITEM_CMD: 498 switch (info1) { 499 case PICODATA_ITEMINFO1_CMD_CONTEXT: 500 case PICODATA_ITEMINFO1_CMD_VOICE: 501 case PICODATA_ITEMINFO1_CMD_MARKER: 502 case PICODATA_ITEMINFO1_CMD_PLAY: 503 case PICODATA_ITEMINFO1_CMD_SAVE: 504 case PICODATA_ITEMINFO1_CMD_UNSAVE: 505 case PICODATA_ITEMINFO1_CMD_PROSDOMAIN: 506 case PICODATA_ITEMINFO1_CMD_PHONEME: 507 len = picoos_strlen((picoos_char*)str); 508 if (tok->outWritePos + 4 + len < OUT_BUF_SIZE) { 509 tok->outBuf[tok->outWritePos++] = itemType; 510 tok->outBuf[tok->outWritePos++] = info1; 511 tok->outBuf[tok->outWritePos++] = info2; 512 tok->outBuf[tok->outWritePos++] = len; 513 for (i=0; i<len; i++) { 514 tok->outBuf[tok->outWritePos++] = str[i]; 515 } 516 } 517 else { 518 PICODBG_WARN(("tok_putItem: output buffer too small")); 519 } 520 break; 521 case PICODATA_ITEMINFO1_CMD_IGNSIG: 522 case PICODATA_ITEMINFO1_CMD_IGNORE: 523 if (tok->outWritePos + 4 < OUT_BUF_SIZE) { 524 tok->outBuf[tok->outWritePos++] = itemType; 525 tok->outBuf[tok->outWritePos++] = info1; 526 tok->outBuf[tok->outWritePos++] = info2; 527 tok->outBuf[tok->outWritePos++] = 0; 528 } 529 else { 530 PICODBG_WARN(("tok_putItem: output buffer too small")); 531 } 532 break; 533 case PICODATA_ITEMINFO1_CMD_SPEED: 534 case PICODATA_ITEMINFO1_CMD_PITCH: 535 case PICODATA_ITEMINFO1_CMD_VOLUME: 536 case PICODATA_ITEMINFO1_CMD_SPELL: 537 case PICODATA_ITEMINFO1_CMD_SIL: 538 case PICODATA_ITEMINFO1_CMD_SPEAKER: 539 if (tok->outWritePos + 4 + 2 < OUT_BUF_SIZE) { 540 tok->outBuf[tok->outWritePos++] = itemType; 541 tok->outBuf[tok->outWritePos++] = info1; 542 tok->outBuf[tok->outWritePos++] = info2; 543 tok->outBuf[tok->outWritePos++] = 2; 544 tok->outBuf[tok->outWritePos++] = val % 256; 545 tok->outBuf[tok->outWritePos++] = val / 256; 546 } 547 else { 548 PICODBG_WARN(("tok_putItem: output buffer too small")); 549 } 550 break; 551 default: 552 PICODBG_WARN(("tok_putItem: unknown command type")); 553 } 554 break; 555 case PICODATA_ITEM_TOKEN: 556 len = picoos_strlen((picoos_char*)str); 557 if (tok->outWritePos + 4 + len < OUT_BUF_SIZE) { 558 tok->outBuf[tok->outWritePos++] = itemType; 559 tok->outBuf[tok->outWritePos++] = info1; 560 tok->outBuf[tok->outWritePos++] = info2; 561 tok->outBuf[tok->outWritePos++] = len; 562 for (i=0; i<len; i++) { 563 tok->outBuf[tok->outWritePos++] = str[i]; 564 } 565 } 566 else { 567 PICODBG_WARN(("tok_putItem: output buffer too small")); 568 } 569 break; 570 default: 571 PICODBG_WARN(("tok_putItem: unknown item type")); 572 } 573 } 574 } 575 576 577 static void tok_putItem2 (picodata_ProcessingUnit this, tok_subobj_t * tok, 578 picoos_uint8 type, 579 picoos_uint8 info1, picoos_uint8 info2, 580 picoos_uint8 len, 581 picoos_uint8 data[]) 582 { 583 picoos_int32 i; 584 585 if (is_valid_itemtype(type)) { 586 tok->outBuf[tok->outWritePos++] = type; 587 tok->outBuf[tok->outWritePos++] = info1; 588 tok->outBuf[tok->outWritePos++] = info2; 589 tok->outBuf[tok->outWritePos++] = len; 590 for (i=0; i<len; i++) { 591 tok->outBuf[tok->outWritePos++] = data[i]; 592 } 593 } 594 } 595 596 597 static MarkupId tok_markupTagId (picoos_uchar tagId[]) 598 { 599 if (picoos_strstr(tagId,(picoos_char *)"svox:") == (picoos_char *)tagId) { 600 tagId+=5; 601 } 602 if (tok_strEqual(tagId, TOK_MARKUP_KW_IGNORE)) { 603 return MIIgnore; 604 } else if (tok_strEqual(tagId, TOK_MARKUP_KW_SPEED)) { 605 return MISpeed; 606 } else if (tok_strEqual(tagId, TOK_MARKUP_KW_PITCH)) { 607 return MIPitch; 608 } else if (tok_strEqual(tagId, TOK_MARKUP_KW_VOLUME)) { 609 return MIVolume; 610 } else if (tok_strEqual(tagId, TOK_MARKUP_KW_SPEAKER)) { 611 return MISpeaker; 612 } else if (tok_strEqual(tagId, TOK_MARKUP_KW_VOICE)) { 613 return MIVoice; 614 } else if (tok_strEqual(tagId, TOK_MARKUP_KW_CONTEXT)) { 615 return MIPreprocContext; 616 } else if (tok_strEqual(tagId, TOK_MARKUP_KW_MARK)) { 617 return MIMarker; 618 } else if (tok_strEqual(tagId, TOK_MARKUP_KW_PLAY)) { 619 return MIPlay; 620 } else if (tok_strEqual(tagId, TOK_MARKUP_KW_USESIG)) { 621 return MIUseSig; 622 } else if (tok_strEqual(tagId, TOK_MARKUP_KW_GENFILE)) { 623 return MIGenFile; 624 } else if (tok_strEqual(tagId, TOK_MARKUP_KW_SENTENCE) || tok_strEqual(tagId, TOK_MARKUP_KW_S)) { 625 return MISentence; 626 } else if (tok_strEqual(tagId, TOK_MARKUP_KW_PARAGRAPH) || tok_strEqual(tagId, TOK_MARKUP_KW_P)) { 627 return MIParagraph; 628 } else if (tok_strEqual(tagId, TOK_MARKUP_KW_BREAK)) { 629 return MIBreak; 630 } else if (tok_strEqual(tagId, TOK_MARKUP_KW_SPELL)) { 631 return MISpell; 632 } else if (tok_strEqual(tagId, TOK_MARKUP_KW_PHONEME)) { 633 return MIPhoneme; 634 } else if (tok_strEqual(tagId, TOK_MARKUP_KW_ITEM)) { 635 return MIItem; 636 } else { 637 return MIDummyEnd; 638 } 639 } 640 641 642 static void tok_checkLimits (picodata_ProcessingUnit this, picoos_uint32 * value, picoos_uint32 min, picoos_uint32 max, picoos_uchar valueType[]) 643 { 644 if ((((*value) < min) || ((*value) > max))) { 645 picoos_emRaiseWarning(this->common->em, PICO_ERR_MARKUP_VALUE_OUT_OF_RANGE, (picoos_char*)"", (picoos_char*)"attempt to set illegal value %i for %s", *value, valueType); 646 if (((*value) < min)) { 647 (*value) = min; 648 } else if (((*value) > max)) { 649 (*value) = max; 650 } 651 } 652 } 653 654 655 656 /* 657 658 static void tok_checkRealLimits (picodata_ProcessingUnit this, picoos_single * value, picoos_single min, picoos_single max, picoos_uchar valueType[]) 659 { 660 if ((((*value) < min) || ((*value) > max))) { 661 picoos_emRaiseWarning(this->common->em, PICO_ERR_MARKUP_VALUE_OUT_OF_RANGE, (picoos_char*)"", (picoos_char*)"attempt to set illegal value %f for %s", *value, valueType); 662 if (((*value) < min)) { 663 (*value) = min; 664 } else if (((*value) > max)) { 665 (*value) = max; 666 } 667 } 668 } 669 */ 670 671 #define VAL_STR_LEN 21 672 673 static void tok_interpretMarkup (picodata_ProcessingUnit this, tok_subobj_t * tok, picoos_bool isStartTag, MarkupId mId) 674 { 675 picoos_bool done; 676 picoos_int32 ival; 677 picoos_uint32 uval; 678 picoos_int32 ival2; 679 picoos_uchar valStr[VAL_STR_LEN]; 680 picoos_uchar valStr2[VAL_STR_LEN]; 681 picoos_uchar valStr3[VAL_STR_LEN]; 682 picoos_int32 i2; 683 picoos_uint32 dur; 684 picoos_bool done1; 685 picoos_bool paramFound; 686 picoos_uint8 type, info1, info2; 687 picoos_uint8 data[256]; 688 picoos_int32 pos, n, len; 689 picoos_uchar part[10]; 690 691 done = FALSE; 692 switch (mId) { 693 case MIIgnore: 694 if (isStartTag && tok_strEqual(tok->markupParams[0].paramId,(picoos_uchar*)"")) { 695 tok_startIgnore(tok); 696 done = TRUE; 697 } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) { 698 tok_endIgnore(tok); 699 done = TRUE; 700 } 701 break; 702 case MISpeed: 703 if (isStartTag && tok_strEqual(tok->markupParams[0].paramId, KWLevel)) { 704 if (tok_isRelative(tok->markupParams[0].paramVal, & uval)) { 705 tok_checkLimits(this, & uval, PICO_SPEED_FACTOR_MIN, PICO_SPEED_FACTOR_MAX,(picoos_uchar*)"relative speed factor"); 706 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_SPEED, PICODATA_ITEMINFO2_CMD_RELATIVE, uval, (picoos_uchar*)""); 707 } else { 708 uval = picoos_atoi((picoos_char*)tok->markupParams[0].paramVal); 709 tok_checkLimits(this, & uval, PICO_SPEED_MIN, PICO_SPEED_MAX,(picoos_uchar*)"speed"); 710 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_SPEED, PICODATA_ITEMINFO2_CMD_ABSOLUTE, uval, (picoos_uchar*)""); 711 } 712 done = TRUE; 713 } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) { 714 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_SPEED, PICODATA_ITEMINFO2_CMD_ABSOLUTE, PICO_SPEED_DEFAULT, (picoos_uchar*)""); 715 done = TRUE; 716 } 717 break; 718 case MIPitch: 719 if (isStartTag && tok_strEqual(tok->markupParams[0].paramId, KWLevel)) { 720 if (tok_isRelative(tok->markupParams[0].paramVal, & uval)) { 721 tok_checkLimits(this, & uval,PICO_PITCH_FACTOR_MIN,PICO_PITCH_FACTOR_MAX, (picoos_uchar*)"relative pitch factor"); 722 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PITCH, PICODATA_ITEMINFO2_CMD_RELATIVE, uval, (picoos_uchar*)""); 723 } else { 724 uval = picoos_atoi((picoos_char*)tok->markupParams[0].paramVal); 725 tok_checkLimits(this, & uval,PICO_PITCH_MIN,PICO_PITCH_MAX, (picoos_uchar*)"pitch"); 726 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PITCH,PICODATA_ITEMINFO2_CMD_ABSOLUTE, uval, (picoos_uchar*)""); 727 } 728 done = TRUE; 729 } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) { 730 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PITCH,PICODATA_ITEMINFO2_CMD_ABSOLUTE, PICO_PITCH_DEFAULT, (picoos_uchar*)""); 731 done = TRUE; 732 } 733 break; 734 case MIVolume: 735 if (isStartTag && tok_strEqual(tok->markupParams[0].paramId, KWLevel)) { 736 if (tok_isRelative(tok->markupParams[0].paramVal, & uval)) { 737 tok_checkLimits(this, & uval, PICO_VOLUME_FACTOR_MIN, PICO_VOLUME_FACTOR_MAX, (picoos_uchar*)"relative volume factor"); 738 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_VOLUME, PICODATA_ITEMINFO2_CMD_RELATIVE, uval, (picoos_uchar*)""); 739 } else { 740 uval = picoos_atoi((picoos_char*)tok->markupParams[0].paramVal); 741 tok_checkLimits(this, & uval, PICO_VOLUME_MIN, PICO_VOLUME_MAX, (picoos_uchar*)"volume"); 742 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_VOLUME, PICODATA_ITEMINFO2_CMD_ABSOLUTE, uval, (picoos_uchar*)""); 743 } 744 done = TRUE; 745 } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) { 746 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_VOLUME, PICODATA_ITEMINFO2_CMD_ABSOLUTE, PICO_VOLUME_DEFAULT, (picoos_uchar*)""); 747 done = TRUE; 748 } 749 break; 750 case MISpeaker: 751 if (isStartTag && tok_strEqual(tok->markupParams[0].paramId, KWLevel)) { 752 if (tok_isRelative(tok->markupParams[0].paramVal, & uval)) { 753 tok_checkLimits(this, & uval, PICO_SPEAKER_FACTOR_MIN, PICO_SPEAKER_FACTOR_MAX, (picoos_uchar*)"relative speaker factor"); 754 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_SPEAKER, PICODATA_ITEMINFO2_CMD_RELATIVE, uval, (picoos_uchar*)""); 755 } else { 756 uval = picoos_atoi((picoos_char*)tok->markupParams[0].paramVal); 757 tok_checkLimits(this, & uval, PICO_SPEAKER_MIN, PICO_SPEAKER_MAX, (picoos_uchar*)"volume"); 758 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_SPEAKER, PICODATA_ITEMINFO2_CMD_ABSOLUTE, uval, (picoos_uchar*)""); 759 } 760 done = TRUE; 761 } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) { 762 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_SPEAKER, PICODATA_ITEMINFO2_CMD_ABSOLUTE, PICO_SPEAKER_DEFAULT, (picoos_uchar*)""); 763 done = TRUE; 764 } 765 break; 766 767 case MIVoice: 768 if (isStartTag && tok_strEqual(tok->markupParams[0].paramId, KWName)) { 769 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_VOICE, PICODATA_ITEMINFO2_NA, 0, tok->markupParams[0].paramVal); 770 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_FLUSH, PICODATA_ITEMINFO2_NA, 0, (picoos_uchar*)""); 771 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PROSDOMAIN, 0, 0, (picoos_uchar*)""); 772 done = TRUE; 773 } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId,(picoos_uchar*)"")) { 774 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_VOICE, PICODATA_ITEMINFO2_NA, 0, (picoos_uchar*)""); 775 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_FLUSH, PICODATA_ITEMINFO2_NA, 0, (picoos_uchar*)""); 776 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PROSDOMAIN, 0, 0, (picoos_uchar*)""); 777 done = TRUE; 778 } 779 break; 780 case MIPreprocContext: 781 if (isStartTag && tok_strEqual(tok->markupParams[0].paramId, KWName)) { 782 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_CONTEXT, PICODATA_ITEMINFO2_NA, 0, tok->markupParams[0].paramVal); 783 done = TRUE; 784 } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId,(picoos_uchar*)"")) { 785 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_CONTEXT, PICODATA_ITEMINFO2_NA, 0, PICO_CONTEXT_DEFAULT); 786 done = TRUE; 787 } 788 break; 789 case MIMarker: 790 if (isStartTag && tok_strEqual(tok->markupParams[0].paramId, KWName)) { 791 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_MARKER, PICODATA_ITEMINFO2_NA, 0, tok->markupParams[0].paramVal); 792 done = TRUE; 793 } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId,(picoos_uchar*)"")) { 794 done = TRUE; 795 } 796 break; 797 case MISentence: 798 if (isStartTag) { 799 tok_getParamStrVal(tok->markupParams, KWProsDomain, (picoos_uchar*)valStr, & paramFound); 800 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_FLUSH, PICODATA_ITEMINFO2_NA, 0, (picoos_uchar*)""); 801 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PROSDOMAIN, 2, 0, valStr); 802 done = TRUE; 803 } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) { 804 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_FLUSH, PICODATA_ITEMINFO2_NA, 0, (picoos_uchar*)""); 805 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PROSDOMAIN, 2, 0, (picoos_uchar*)""); 806 done = TRUE; 807 } 808 break; 809 case MIParagraph: 810 if (isStartTag) { 811 tok_getParamStrVal(tok->markupParams, KWProsDomain, (picoos_uchar*)valStr, & paramFound); 812 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_FLUSH, PICODATA_ITEMINFO2_NA, 0, (picoos_uchar*)""); 813 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PROSDOMAIN, 1, 0, valStr); 814 done = TRUE; 815 } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) { 816 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_FLUSH, PICODATA_ITEMINFO2_NA, 0, (picoos_uchar*)""); 817 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_SIL, PICODATA_ITEMINFO2_NA, PARAGRAPH_PAUSE_DUR, (picoos_uchar*)""); 818 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PROSDOMAIN, 1, 0, (picoos_uchar*)""); 819 done = TRUE; 820 } 821 break; 822 case MIBreak: 823 if (isStartTag && tok_strEqual(tok->markupParams[0].paramId, KWTime)) { 824 tok_getDur(tok->markupParams[0].paramVal, & dur, & done1); 825 tok_checkLimits (this, &dur, 0, 65535, (picoos_uchar*)"time"); 826 if (done1) { 827 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_SIL, PICODATA_ITEMINFO2_NA, dur, (picoos_uchar*)""); 828 done = TRUE; 829 } 830 } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) { 831 done = TRUE; 832 } 833 break; 834 case MISpell: 835 if (isStartTag) { 836 if (tok_strEqual(tok->markupParams[0].paramId, KWMode)) { 837 if (tok_strEqual(tok->markupParams[0].paramVal, KWPB)) { 838 uval = SPELL_WITH_PHRASE_BREAK; 839 } else if (tok_strEqual(tok->markupParams[0].paramVal, KWSB)) { 840 uval = SPELL_WITH_SENTENCE_BREAK; 841 } else { 842 tok_getDur(tok->markupParams[0].paramVal, & uval, & done1); 843 tok_checkLimits (this, & uval, 0, 65535, (picoos_uchar*)"time"); 844 if (done1) { 845 done = TRUE; 846 } 847 } 848 } else { 849 uval = SPELL_WITH_PHRASE_BREAK; 850 } 851 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_SPELL, PICODATA_ITEMINFO2_CMD_START, uval, (picoos_uchar*)""); 852 done = TRUE; 853 } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) { 854 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_SPELL, PICODATA_ITEMINFO2_CMD_END, 0, (picoos_uchar*)""); 855 done = TRUE; 856 } 857 break; 858 case MIGenFile: 859 if (isStartTag && tok_strEqual(tok->markupParams[0].paramId, KWFile)) { 860 if (tok->saveFile[0] != 0) { 861 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_UNSAVE, 862 picodata_getPuTypeFromExtension(tok->saveFile, /*input*/FALSE), 0, tok->saveFile); 863 tok->saveFile[0] = 0; 864 } 865 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_SAVE, 866 picodata_getPuTypeFromExtension(tok->markupParams[0].paramVal, /*input*/FALSE), 0, tok->markupParams[0].paramVal); 867 picoos_strcpy((picoos_char*)tok->saveFile, (picoos_char*)tok->markupParams[0].paramVal); 868 done = TRUE; 869 } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) { 870 if (tok->saveFile[0] != 0) { 871 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_UNSAVE, 872 picodata_getPuTypeFromExtension(tok->saveFile, /*input*/FALSE), 0, (picoos_uchar*)""); 873 tok->saveFile[0] = 0; 874 } 875 done = TRUE; 876 } 877 break; 878 case MIPlay: 879 if (isStartTag && tok_strEqual(tok->markupParams[0].paramId, KWFile)) { 880 if (picoos_FileExists(this->common, (picoos_char*)tok->markupParams[0].paramVal)) { 881 tok_getParamIntVal(tok->markupParams,KWF0Beg,& ival,& paramFound); 882 tok_getParamIntVal(tok->markupParams,KWF0End,& ival2,& paramFound); 883 tok_getParamStrVal(tok->markupParams,KWAlphabet,valStr3,& paramFound); 884 tok_getParamPhonesStr(tok->markupParams,KWXFadeBeg,valStr3,valStr,VAL_STR_LEN,& paramFound); 885 tok_getParamPhonesStr(tok->markupParams,KWXFadeEnd,valStr3,valStr2,VAL_STR_LEN,& paramFound); 886 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PLAY, 887 picodata_getPuTypeFromExtension(tok->markupParams[0].paramVal, /*input*/TRUE), 0, tok->markupParams[0].paramVal); 888 tok_startIgnore(tok); 889 } else { 890 if (tok->ignLevel > 0) { 891 tok_startIgnore(tok); 892 } else { 893 picoos_emRaiseWarning(this->common->em, PICO_EXC_CANT_OPEN_FILE, (picoos_char*)"", (picoos_char*)"file '%s' not found; synthesizing enclosed text instead\n", tok->markupParams[0].paramVal); 894 } 895 } 896 done = TRUE; 897 } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) { 898 tok_endIgnore(tok); 899 done = TRUE; 900 } 901 break; 902 case MIUseSig: 903 if (isStartTag && tok_strEqual(tok->markupParams[0].paramId, KWFile)) { 904 if (picoos_FileExists(this->common, (picoos_char*)tok->markupParams[0].paramVal)) { 905 tok_getParamIntVal(tok->markupParams,KWF0Beg,& ival,& paramFound); 906 tok_getParamIntVal(tok->markupParams,KWF0End,& ival2,& paramFound); 907 tok_getParamStrVal(tok->markupParams,KWAlphabet,valStr3, & paramFound); 908 tok_getParamPhonesStr(tok->markupParams,KWXFadeBeg,valStr3,valStr,VAL_STR_LEN,& paramFound); 909 tok_getParamPhonesStr(tok->markupParams,KWXFadeEnd,valStr3,valStr2,VAL_STR_LEN,& paramFound); 910 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PLAY, 911 picodata_getPuTypeFromExtension(tok->markupParams[0].paramVal, /*input*/TRUE), 0, tok->markupParams[0].paramVal); 912 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_IGNSIG, PICODATA_ITEMINFO2_CMD_START, 0, (picoos_uchar*)""); 913 } else { 914 if (tok->ignLevel <= 0) { 915 picoos_emRaiseWarning(this->common->em, PICO_EXC_CANT_OPEN_FILE, (picoos_char*)"", (picoos_char*)"file '%s' not found; synthesizing enclosed text instead", tok->markupParams[0].paramVal); 916 } 917 } 918 done = TRUE; 919 } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) { 920 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_IGNSIG, PICODATA_ITEMINFO2_CMD_END, 0, (picoos_uchar*)""); 921 done = TRUE; 922 } 923 break; 924 case MIPhoneme: 925 i2 = 0; 926 if (isStartTag) { 927 if (tok_strEqual(tok->markupParams[0].paramId, KWAlphabet) && tok_strEqual(tok->markupParams[1].paramId, KWPH)) { 928 if (tok_strEqual(tok->markupParams[2].paramId, KWOrthMode) 929 && tok_strEqual(tok->markupParams[2].paramVal, KWIgnorePunct)) { 930 i2 = 1; 931 } 932 if (picodata_mapPAStrToPAIds(tok->transducer, this->common, tok->xsampa_parser, tok->svoxpa_parser, tok->xsampa2svoxpa_mapper, tok->markupParams[1].paramVal, tok->markupParams[0].paramVal, tok->phonemes, sizeof(tok->phonemes)-1) == PICO_OK) { 933 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PHONEME, 934 PICODATA_ITEMINFO2_CMD_START, i2, tok->phonemes); 935 done = TRUE; 936 } else { 937 PICODBG_WARN(("cannot map phonetic string '%s'; synthesizeing text instead", tok->markupParams[1].paramVal)); 938 picoos_emRaiseWarning(this->common->em, PICO_ERR_MARKUP_VALUE_OUT_OF_RANGE,(picoos_char*)"", (picoos_char*)"cannot map phonetic string '%s'; synthesizeing text instead", tok->markupParams[1].paramVal); 939 done = TRUE; 940 } 941 } else if (tok_strEqual(tok->markupParams[0].paramId, KWPH)) { 942 if (tok_strEqual(tok->markupParams[1].paramId, KWOrthMode) 943 && tok_strEqual(tok->markupParams[1].paramVal, KWIgnorePunct)) { 944 i2 = 1; 945 } 946 if (picodata_mapPAStrToPAIds(tok->transducer, this->common, tok->xsampa_parser, tok->svoxpa_parser, tok->xsampa2svoxpa_mapper, tok->markupParams[0].paramVal, PICODATA_XSAMPA, tok->phonemes, sizeof(tok->phonemes)) == PICO_OK) { 947 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PHONEME, 948 PICODATA_ITEMINFO2_CMD_START, i2, tok->phonemes); 949 done = TRUE; 950 } 951 else { 952 PICODBG_WARN(("cannot map phonetic string '%s'; synthesizeing text instead", tok->markupParams[1].paramVal)); 953 picoos_emRaiseWarning(this->common->em, PICO_ERR_MARKUP_VALUE_OUT_OF_RANGE,(picoos_char*)"", (picoos_char*)"cannot map phonetic string '%s'; synthesizing text instead", tok->markupParams[0].paramVal); 954 done = TRUE; 955 } 956 } 957 } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) { 958 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PHONEME, 959 PICODATA_ITEMINFO2_CMD_END, i2, (picoos_uchar*)""); 960 done = TRUE; 961 } 962 break; 963 case MIItem: 964 if (isStartTag && tok_strEqual(tok->markupParams[0].paramId, KWType) && 965 tok_strEqual(tok->markupParams[1].paramId, KWInfo1)&& 966 tok_strEqual(tok->markupParams[2].paramId, KWInfo2)&& 967 tok_strEqual(tok->markupParams[3].paramId, KWDATA)) { 968 picoos_int32 len2, n2; 969 type = picoos_atoi(tok->markupParams[0].paramVal); 970 info1 = picoos_atoi(tok->markupParams[1].paramVal); 971 info2 = picoos_atoi(tok->markupParams[2].paramVal); 972 n = 0; n2 = 0; 973 len2 = (picoos_int32)picoos_strlen(tok->markupParams[3].paramVal); 974 while (n<len2) { 975 while ((tok->markupParams[3].paramVal[n] != 0) && (tok->markupParams[3].paramVal[n] <= 32)) { 976 n++; 977 } 978 tok->markupParams[3].paramVal[n2] = tok->markupParams[3].paramVal[n]; 979 n++; 980 n2++; 981 } 982 if (is_valid_itemtype(type)) { 983 done = TRUE; 984 len = 0; 985 pos = 0; 986 picoos_get_sep_part_str(tok->markupParams[3].paramVal, picoos_strlen(tok->markupParams[3].paramVal), 987 &pos, ',', part, 10, &done1); 988 while (done && done1) { 989 n = picoos_atoi(part); 990 if ((n>=0) && (n<256) && (len<256)) { 991 data[len++] = n; 992 } 993 else { 994 done = FALSE; 995 } 996 picoos_get_sep_part_str(tok->markupParams[3].paramVal, picoos_strlen(tok->markupParams[3].paramVal), 997 &pos, ',', part, 10, &done1); 998 } 999 if (done) { 1000 tok_putItem2(this, tok, type, info1, info2, len, data); 1001 } 1002 } 1003 else { 1004 done = FALSE; 1005 } 1006 } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId,(picoos_uchar*)"")) { 1007 done = TRUE; 1008 } 1009 break; 1010 default: 1011 break; 1012 } 1013 if (!done) { 1014 tok->markupTagErr = MEInterprete; 1015 } 1016 if (isStartTag) { 1017 tok->markupLevel[mId]++; 1018 } else if ((tok->markupLevel[mId] > 0)) { 1019 tok->markupLevel[mId]--; 1020 } 1021 } 1022 1023 1024 static picoos_bool tok_attrChar (picoos_uchar ch, picoos_bool first) 1025 { 1026 return ((((ch >= (picoos_uchar)'A') && (ch <= (picoos_uchar)'Z')) || 1027 ((ch >= (picoos_uchar)'a') && (ch <= (picoos_uchar)'z'))) || 1028 ( !(first) && ((ch >= (picoos_uchar)'0') && (ch <= (picoos_uchar)'9')))); 1029 } 1030 1031 1032 1033 static picoos_bool tok_idChar (picoos_uchar ch, picoos_bool first) 1034 { 1035 return tok_attrChar(ch, first) || ( !(first) && (ch == (picoos_uchar)':')); 1036 } 1037 1038 1039 static void tok_setIsFileAttr (picoos_uchar name[], picoos_bool * isFile) 1040 { 1041 (*isFile) = tok_strEqual(name, KWFile); 1042 } 1043 1044 /* *****************************************************************************/ 1045 1046 static void tok_putToSimpleToken (picodata_ProcessingUnit this, tok_subobj_t * tok, picoos_uchar str[], pico_tokenType type, pico_tokenSubType subtype) 1047 { 1048 int i, len; 1049 1050 if (str[0] != 0) { 1051 len = picoos_strlen((picoos_char*)str); 1052 for (i = 0; i < len; i++) { 1053 if (tok->tokenPos >= IN_BUF_SIZE) { 1054 picoos_emRaiseWarning(this->common->em, PICO_ERR_INTERNAL_LIMIT, (picoos_char*)"", (picoos_char*)"simple token too long; forced treatment"); 1055 tok_treatSimpleToken(this, tok); 1056 } 1057 tok->tokenStr[tok->tokenPos] = str[i]; 1058 tok->tokenPos++; 1059 } 1060 } 1061 tok->tokenType = type; 1062 tok->tokenSubType = subtype; 1063 } 1064 1065 1066 static void tok_putToMarkup (picodata_ProcessingUnit this, tok_subobj_t * tok, picoos_uchar str[]) 1067 { 1068 picoos_int32 i, len; 1069 picoos_uint8 ok; 1070 1071 tok->markupTagErr = MENone; 1072 len = picoos_strlen((picoos_char*)str); 1073 for (i = 0; i< len; i++) { 1074 if (tok->markupPos >= (MARKUP_STRING_BUF_SIZE - 1)) { 1075 if ((tok->markupPos == (MARKUP_STRING_BUF_SIZE - 1)) && (tok_markupTagId(tok->markupTagName) != MIDummyEnd)) { 1076 picoos_emRaiseWarning(this->common->em, PICO_ERR_INTERNAL_LIMIT ,(picoos_char*)"", (picoos_char*)"markup tag too long"); 1077 } 1078 tok->markupState = MSErrorTooLong; 1079 } else if ((str[i] == (picoos_uchar)' ') && ((tok->markupState == MSExpectingmarkupTagName) || (tok->markupState == MSGotmarkupTagName) || (tok->markupState == MSGotAttrName) || (tok->markupState == MSGotEqual) || (tok->markupState == MSGotAttrValue))) { 1080 } else if ((str[i] == (picoos_uchar)'>') && ((tok->markupState == MSGotmarkupTagName) || (tok->markupState == MSInmarkupTagName) || (tok->markupState == MSGotAttrValue))) { 1081 tok->markupState = MSGotEnd; 1082 } else if ((str[i] == (picoos_uchar)'/') && ((tok->markupState == MSGotmarkupTagName) || (tok->markupState == MSInmarkupTagName) || (tok->markupState == MSGotAttrValue))) { 1083 if (tok->markupTagType == MTEnd) { 1084 tok->markupTagErr = MEUnexpectedChar; 1085 tok->markupState = MSError; 1086 } else { 1087 tok->markupTagType = MTEmpty; 1088 tok->markupState = MSGotEndSlash; 1089 } 1090 } else { 1091 switch (tok->markupState) { 1092 case MSNotInMarkup: 1093 if (str[i] == (picoos_uchar)'<') { 1094 tok_clearMarkupParams(tok->markupParams); 1095 tok->nrMarkupParams = 0; 1096 tok->strPos = 0; 1097 tok->markupTagType = MTStart; 1098 tok->markupState = MSGotStart; 1099 } else { 1100 tok->markupTagErr = MEMissingStart; 1101 tok->markupState = MSError; 1102 } 1103 break; 1104 case MSGotStart: 1105 if (str[i] == (picoos_uchar)'/') { 1106 tok->markupTagType = MTEnd; 1107 tok->markupState = MSExpectingmarkupTagName; 1108 } else if (str[i] == (picoos_uchar)' ') { 1109 tok->markupState = MSExpectingmarkupTagName; 1110 } else if (tok_idChar(str[i],TRUE)) { 1111 tok->markupTagType = MTStart; 1112 tok->markupTagName[tok->strPos] = str[i]; 1113 tok->strPos++; 1114 tok->markupTagName[tok->strPos] = 0; 1115 tok->markupState = MSInmarkupTagName; 1116 } else { 1117 tok->markupTagErr = MEUnexpectedChar; 1118 tok->markupState = MSError; 1119 } 1120 break; 1121 case MSInmarkupTagName: case MSExpectingmarkupTagName: 1122 if (tok_idChar(str[i],tok->markupState == MSExpectingmarkupTagName)) { 1123 tok->markupTagName[tok->strPos] = str[i]; 1124 tok->strPos++; 1125 tok->markupTagName[(tok->strPos)] = 0; 1126 tok->markupState = MSInmarkupTagName; 1127 } else if ((tok->markupState == MSInmarkupTagName) && (str[i] == (picoos_uchar)' ')) { 1128 tok->markupState = MSGotmarkupTagName; 1129 picobase_lowercase_utf8_str(tok->markupTagName, (picoos_char*)tok->markupTagName, IN_BUF_SIZE, &ok); 1130 tok->strPos = 0; 1131 } else { 1132 tok->markupTagErr = MEIdent; 1133 tok->markupState = MSError; 1134 } 1135 break; 1136 case MSGotmarkupTagName: case MSGotAttrValue: 1137 if (tok_attrChar(str[i], TRUE)) { 1138 if (tok->markupTagType == MTEnd) { 1139 tok->markupTagErr = MEUnexpectedChar; 1140 tok->markupState = MSError; 1141 } else { 1142 if (tok->nrMarkupParams < MAX_NR_MARKUP_PARAMS) { 1143 tok->markupParams[tok->nrMarkupParams].paramId[tok->strPos] = str[i]; 1144 tok->strPos++; 1145 tok->markupParams[tok->nrMarkupParams].paramId[tok->strPos] = 0; 1146 } else { 1147 picoos_emRaiseWarning(this->common->em, PICO_ERR_INTERNAL_LIMIT ,(picoos_char*)"", (picoos_char*)"too many attributes in markup; ignoring"); 1148 } 1149 tok->markupState = MSInAttrName; 1150 } 1151 } else { 1152 tok->markupTagErr = MEUnexpectedChar; 1153 tok->markupState = MSError; 1154 } 1155 break; 1156 case MSInAttrName: 1157 if (tok_attrChar(str[i], FALSE)) { 1158 if (tok->nrMarkupParams < MAX_NR_MARKUP_PARAMS) { 1159 tok->markupParams[tok->nrMarkupParams].paramId[tok->strPos] = str[i]; 1160 tok->strPos++; 1161 tok->markupParams[tok->nrMarkupParams].paramId[tok->strPos] = 0; 1162 } 1163 tok->markupState = MSInAttrName; 1164 } else if (str[i] == (picoos_uchar)' ') { 1165 picobase_lowercase_utf8_str(tok->markupParams[tok->nrMarkupParams].paramId, (picoos_char*)tok->markupParams[tok->nrMarkupParams].paramId, IN_BUF_SIZE, &ok); 1166 tok_setIsFileAttr(tok->markupParams[tok->nrMarkupParams].paramId, & tok->isFileAttr); 1167 tok->markupState = MSGotAttrName; 1168 } else if (str[i] == (picoos_uchar)'=') { 1169 picobase_lowercase_utf8_str(tok->markupParams[tok->nrMarkupParams].paramId, (picoos_char*)tok->markupParams[tok->nrMarkupParams].paramId, IN_BUF_SIZE, &ok); 1170 tok_setIsFileAttr(tok->markupParams[tok->nrMarkupParams].paramId, & tok->isFileAttr); 1171 tok->markupState = MSGotEqual; 1172 } else { 1173 tok->markupTagErr = MEMissingEqual; 1174 tok->markupState = MSError; 1175 } 1176 break; 1177 case MSGotAttrName: 1178 if (str[i] == (picoos_uchar)'=') { 1179 tok->markupState = MSGotEqual; 1180 } else { 1181 tok->markupTagErr = MEMissingEqual; 1182 tok->markupState = MSError; 1183 } 1184 break; 1185 case MSGotEqual: 1186 if ((str[i] == (picoos_uchar)'"') || (str[i] == (picoos_uchar)'\'')) { 1187 tok->strDelim = str[i]; 1188 tok->strPos = 0; 1189 tok->markupState = MSInAttrValue; 1190 } else { 1191 tok->markupTagErr = MEMissingQuote; 1192 tok->markupState = MSError; 1193 } 1194 break; 1195 case MSInAttrValue: 1196 if (!(tok->isFileAttr) && (str[i] == (picoos_uchar)'\\')) { 1197 tok->markupState = MSInAttrValueEscaped; 1198 } else if (str[i] == tok->strDelim) { 1199 if (tok->nrMarkupParams < MAX_NR_MARKUP_PARAMS) { 1200 tok->nrMarkupParams++; 1201 } 1202 tok->strPos = 0; 1203 tok->markupState = MSGotAttrValue; 1204 } else { 1205 if (tok->nrMarkupParams < MAX_NR_MARKUP_PARAMS) { 1206 tok->markupParams[tok->nrMarkupParams].paramVal[tok->strPos] = str[i]; 1207 tok->strPos++; 1208 tok->markupParams[tok->nrMarkupParams].paramVal[tok->strPos] = 0; 1209 } 1210 tok->markupState = MSInAttrValue; 1211 } 1212 break; 1213 case MSInAttrValueEscaped: 1214 if (tok->nrMarkupParams < MAX_NR_MARKUP_PARAMS) { 1215 tok->markupParams[tok->nrMarkupParams].paramVal[tok->strPos] = str[i]; 1216 tok->strPos++; 1217 tok->markupParams[tok->nrMarkupParams].paramVal[tok->strPos] = 0; 1218 } 1219 tok->markupState = MSInAttrValue; 1220 break; 1221 case MSGotEndSlash: 1222 if (str[i] == (picoos_uchar)'>') { 1223 tok->markupState = MSGotEnd; 1224 } else { 1225 tok->markupTagErr = MEUnexpectedChar; 1226 tok->markupState = MSError; 1227 } 1228 break; 1229 default: 1230 tok->markupTagErr = MEUnexpectedChar; 1231 tok->markupState = MSError; 1232 break; 1233 } 1234 } 1235 if (tok->markupTagErr == MENone) { 1236 tok->markupStr[tok->markupPos] = str[i]; 1237 tok->markupPos++; 1238 } /* else restart parsing at current char */ 1239 tok->markupStr[tok->markupPos] = 0; 1240 } 1241 /* 1242 PICODBG_DEBUG(("putToMarkup %s", tok->markupStr)); 1243 */ 1244 } 1245 1246 /* *****************************************************************************/ 1247 1248 static void tok_treatMarkupAsSimpleToken (picodata_ProcessingUnit this, tok_subobj_t * tok) 1249 { 1250 picoos_int32 i; 1251 1252 tok->utfpos = 0; 1253 tok->utflen = 0; 1254 tok->markupState = MSNotInMarkup; 1255 for (i = 0; i < tok->markupPos; i++) { 1256 tok_treatChar(this, tok, tok->markupStr[i], FALSE); 1257 } 1258 tok->markupPos = 0; 1259 tok->strPos = 0; 1260 } 1261 1262 1263 static void tok_treatMarkup (picodata_ProcessingUnit this, tok_subobj_t * tok) 1264 { 1265 MarkupId mId; 1266 1267 if (tok_markupTagId(tok->markupTagName) != MIDummyEnd) { 1268 if (tok->markupTagErr == MENone) { 1269 tok->markupState = MSNotInMarkup; 1270 if ((tok->tokenType != PICODATA_ITEMINFO1_TOKTYPE_SPACE) && (tok->tokenType != PICODATA_ITEMINFO1_TOKTYPE_UNDEFINED)) { 1271 tok_treatSimpleToken(this, tok); 1272 } 1273 tok_putToSimpleToken(this, tok, (picoos_uchar*)" ", PICODATA_ITEMINFO1_TOKTYPE_SPACE, -1); 1274 mId = tok_markupTagId(tok->markupTagName); 1275 if ((tok->markupTagType == MTStart) || (tok->markupTagType == MTEmpty)) { 1276 tok_interpretMarkup(this, tok, TRUE, mId); 1277 } 1278 if (((tok->markupTagType == MTEnd) || (tok->markupTagType == MTEmpty))) { 1279 tok_clearMarkupParams(tok->markupParams); 1280 tok->nrMarkupParams = 0; 1281 tok_interpretMarkup(this, tok, FALSE,mId); 1282 } 1283 } 1284 if (tok->markupTagErr != MENone) { 1285 if (!tok->aborted) { 1286 picoos_emRaiseWarning(this->common->em, PICO_ERR_INVALID_MARKUP_TAG, (picoos_char*)"", (picoos_char*)"syntax error in markup token '%s'",tok->markupStr); 1287 } 1288 tok_treatMarkupAsSimpleToken(this, tok); 1289 } 1290 } else { 1291 tok_treatMarkupAsSimpleToken(this, tok); 1292 } 1293 tok->markupState = MSNotInMarkup; 1294 tok->markupPos = 0; 1295 tok->strPos = 0; 1296 } 1297 1298 1299 1300 static void tok_treatChar (picodata_ProcessingUnit this, tok_subobj_t * tok, picoos_uchar ch, picoos_bool markupHandling) 1301 { 1302 picoos_int32 i, id; 1303 picoos_uint8 uval8; 1304 pico_tokenType type = PICODATA_ITEMINFO1_TOKTYPE_UNDEFINED; 1305 pico_tokenSubType subtype = -1; 1306 picoos_bool dummy; 1307 utf8char0c utf2; 1308 picoos_int32 utf2pos; 1309 1310 if (ch == NULLC) { 1311 tok_treatSimpleToken(this, tok); 1312 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_FLUSH, PICODATA_ITEMINFO2_NA, 0, (picoos_uchar*)""); 1313 } 1314 else { 1315 switch (tok_putToUtf(tok, ch)) { 1316 case UTF_CHAR_MALFORMED: 1317 tok->utfpos = 0; 1318 tok->utflen = 0; 1319 break; 1320 case UTF_CHAR_INCOMPLETE: 1321 break; 1322 case UTF_CHAR_COMPLETE: 1323 markupHandling = (markupHandling && (tok->markupHandlingMode == MARKUP_HANDLING_ENABLED)); 1324 id = picoktab_graphOffset(tok->graphTab, tok->utf); 1325 if (id > 0) { 1326 if (picoktab_getIntPropTokenType(tok->graphTab, id, &uval8)) { 1327 type = (pico_tokenType)uval8; 1328 if (type == PICODATA_ITEMINFO1_TOKTYPE_LETTERV) { 1329 type = PICODATA_ITEMINFO1_TOKTYPE_LETTER; 1330 } 1331 } 1332 dummy = picoktab_getIntPropTokenSubType(tok->graphTab, id, &subtype); 1333 } else if (tok->utf[tok->utfpos-1] <= (picoos_uchar)' ') { 1334 type = PICODATA_ITEMINFO1_TOKTYPE_SPACE; 1335 subtype = -1; 1336 } else { 1337 type = PICODATA_ITEMINFO1_TOKTYPE_UNDEFINED; 1338 subtype = -1; 1339 } 1340 if ((tok->utf[tok->utfpos-1] > (picoos_uchar)' ')) { 1341 tok->nrEOL = 0; 1342 } else if ((tok->utf[tok->utfpos-1] == EOL)) { 1343 tok->nrEOL++; 1344 } 1345 if (markupHandling && (tok->markupState != MSNotInMarkup)) { 1346 tok_putToMarkup(this, tok, tok->utf); 1347 if (tok->markupState >= MSError) { 1348 picoos_strlcpy(utf2, tok->utf, 5); 1349 utf2pos = tok->utfpos; 1350 /* treat string up to (but not including) current char as simple 1351 token and restart markup tag parsing with current char */ 1352 tok_treatMarkupAsSimpleToken(this, tok); 1353 for (i = 0; i < utf2pos; i++) { 1354 tok_treatChar(this, tok, utf2[i], markupHandling); 1355 } 1356 } else if (tok->markupState == MSGotEnd) { 1357 tok_treatMarkup(this, tok); 1358 } 1359 } else if ((markupHandling && (tok->utf[tok->utfpos-1] == (picoos_uchar)'<'))) { 1360 tok_putToMarkup(this, tok, tok->utf); 1361 } else if (type != PICODATA_ITEMINFO1_TOKTYPE_UNDEFINED) { 1362 if ((type != tok->tokenType) || (type == PICODATA_ITEMINFO1_TOKTYPE_CHAR) || (subtype != tok->tokenSubType)) { 1363 tok_treatSimpleToken(this, tok); 1364 } else if ((tok->utf[tok->utfpos-1] == EOL) && (tok->nrEOL == 2)) { 1365 tok_treatSimpleToken(this, tok); 1366 tok_putToSimpleToken(this, tok, (picoos_uchar*)".", PICODATA_ITEMINFO1_TOKTYPE_CHAR, -1); 1367 tok_treatSimpleToken(this, tok); 1368 } 1369 tok_putToSimpleToken(this, tok, tok->utf, type, subtype); 1370 } else { 1371 tok_treatSimpleToken(this, tok); 1372 } 1373 tok->utfpos = 0; 1374 tok->utflen = 0; 1375 break; 1376 } 1377 } 1378 } 1379 1380 1381 static void tok_treatSimpleToken (picodata_ProcessingUnit this, tok_subobj_t * tok) 1382 { 1383 if (tok->tokenPos < IN_BUF_SIZE) { 1384 tok->tokenStr[tok->tokenPos] = 0; 1385 } 1386 if (tok->markupState != MSNotInMarkup) { 1387 if (!(tok->aborted) && (tok->markupState >= MSGotmarkupTagName) && (tok_markupTagId(tok->markupTagName) != MIDummyEnd)) { 1388 picoos_emRaiseWarning(this->common->em, PICO_ERR_INVALID_MARKUP_TAG, (picoos_char*)"", (picoos_char*)"unfinished markup tag '%s'",tok->markupStr); 1389 } 1390 tok_treatMarkupAsSimpleToken(this, tok); 1391 tok_treatSimpleToken(this, tok); 1392 } else if ((tok->tokenPos > 0) && ((tok->ignLevel <= 0) || (tok->tokenType == PICODATA_ITEMINFO1_TOKTYPE_SPACE))) { 1393 tok_putItem(this, tok, PICODATA_ITEM_TOKEN, tok->tokenType, (picoos_uint8)tok->tokenSubType, 0, tok->tokenStr); 1394 } 1395 tok->tokenPos = 0; 1396 tok->tokenType = PICODATA_ITEMINFO1_TOKTYPE_UNDEFINED; 1397 tok->tokenSubType = -1; 1398 } 1399 1400 /* *****************************************************************************/ 1401 1402 static pico_status_t tokReset(register picodata_ProcessingUnit this, picoos_int32 resetMode) 1403 { 1404 tok_subobj_t * tok; 1405 MarkupId mId; 1406 1407 if (NULL == this || NULL == this->subObj) { 1408 return PICO_ERR_OTHER; 1409 } 1410 tok = (tok_subobj_t *) this->subObj; 1411 1412 tok->ignLevel = 0; 1413 1414 tok->utfpos = 0; 1415 tok->utflen = 0; 1416 1417 tok_clearMarkupParams(tok->markupParams); 1418 tok->nrMarkupParams = 0; 1419 tok->markupState = MSNotInMarkup; 1420 tok->markupPos = 0; 1421 for (mId = MIDummyStart; mId <= MIDummyEnd; mId++) { 1422 tok->markupLevel[mId] = 0; 1423 } 1424 tok->markupTagName[0] = 0; 1425 tok->markupTagType = MTNone; 1426 tok->markupTagErr = MENone; 1427 1428 tok->strPos = 0; 1429 tok->strDelim = 0; 1430 tok->isFileAttr = FALSE; 1431 1432 tok->tokenType = PICODATA_ITEMINFO1_TOKTYPE_UNDEFINED; 1433 tok->tokenSubType = -1; 1434 tok->tokenPos = 0; 1435 1436 tok->nrEOL = 0; 1437 1438 1439 tok->markupHandlingMode = TRUE; 1440 tok->aborted = FALSE; 1441 1442 tok->start = TRUE; 1443 1444 tok->outReadPos = 0; 1445 tok->outWritePos = 0; 1446 1447 tok->saveFile[0] = 0; 1448 1449 1450 tok->graphTab = picoktab_getGraphs(this->voice->kbArray[PICOKNOW_KBID_TAB_GRAPHS]); 1451 1452 tok->xsampa_parser = picokfst_getFST(this->voice->kbArray[PICOKNOW_KBID_FST_XSAMPA_PARSE]); 1453 PICODBG_TRACE(("got xsampa_parser @ %i",tok->xsampa_parser)); 1454 1455 tok->svoxpa_parser = picokfst_getFST(this->voice->kbArray[PICOKNOW_KBID_FST_SVOXPA_PARSE]); 1456 PICODBG_TRACE(("got svoxpa_parser @ %i",tok->svoxpa_parser)); 1457 1458 tok->xsampa2svoxpa_mapper = picokfst_getFST(this->voice->kbArray[PICOKNOW_KBID_FST_XSAMPA2SVOXPA]); 1459 PICODBG_TRACE(("got xsampa2svoxpa_mapper @ %i",tok->xsampa2svoxpa_mapper)); 1460 1461 1462 1463 return PICO_OK; 1464 } 1465 1466 static pico_status_t tokInitialize(register picodata_ProcessingUnit this, picoos_int32 resetMode) 1467 { 1468 /* 1469 1470 tok_subobj_t * tok; 1471 1472 if (NULL == this || NULL == this->subObj) { 1473 return PICO_ERR_OTHER; 1474 } 1475 tok = (tok_subobj_t *) this->subObj; 1476 */ 1477 return tokReset(this, resetMode); 1478 } 1479 1480 1481 static pico_status_t tokTerminate(register picodata_ProcessingUnit this) 1482 { 1483 return PICO_OK; 1484 } 1485 1486 static picodata_step_result_t tokStep(register picodata_ProcessingUnit this, picoos_int16 mode, picoos_uint16 * numBytesOutput); 1487 1488 static pico_status_t tokSubObjDeallocate(register picodata_ProcessingUnit this, 1489 picoos_MemoryManager mm) 1490 { 1491 1492 if (NULL != this) { 1493 picoos_deallocate(this->common->mm, (void *) &this->subObj); 1494 } 1495 mm = mm; /* avoid warning "var not used in this function"*/ 1496 return PICO_OK; 1497 } 1498 1499 picodata_ProcessingUnit picotok_newTokenizeUnit(picoos_MemoryManager mm, picoos_Common common, 1500 picodata_CharBuffer cbIn, picodata_CharBuffer cbOut, 1501 picorsrc_Voice voice) 1502 { 1503 tok_subobj_t * tok; 1504 picodata_ProcessingUnit this = picodata_newProcessingUnit(mm, common, cbIn, cbOut, voice); 1505 if (this == NULL) { 1506 return NULL; 1507 } 1508 this->initialize = tokInitialize; 1509 PICODBG_DEBUG(("set this->step to tokStep")); 1510 this->step = tokStep; 1511 this->terminate = tokTerminate; 1512 this->subDeallocate = tokSubObjDeallocate; 1513 this->subObj = picoos_allocate(mm, sizeof(tok_subobj_t)); 1514 if (this->subObj == NULL) { 1515 picoos_deallocate(mm, (void *)&this); 1516 return NULL; 1517 } 1518 tok = (tok_subobj_t *) this->subObj; 1519 tok->transducer = picotrns_newSimpleTransducer(mm, common, 10*(PICOTRNS_MAX_NUM_POSSYM+2)); 1520 if (NULL == tok->transducer) { 1521 tokSubObjDeallocate(this,mm); 1522 picoos_deallocate(mm, (void *)&this); 1523 return NULL; 1524 } 1525 tokInitialize(this, PICO_RESET_FULL); 1526 return this; 1527 } 1528 1529 /** 1530 * fill up internal buffer, try to locate token, write token to output 1531 */ 1532 picodata_step_result_t tokStep(register picodata_ProcessingUnit this, 1533 picoos_int16 mode, picoos_uint16 * numBytesOutput) 1534 { 1535 register tok_subobj_t * tok; 1536 1537 if (NULL == this || NULL == this->subObj) { 1538 return PICODATA_PU_ERROR; 1539 } 1540 tok = (tok_subobj_t *) this->subObj; 1541 1542 mode = mode; /* avoid warning "var not used in this function"*/ 1543 1544 *numBytesOutput = 0; 1545 while (1) { /* exit via return */ 1546 picoos_int16 ch; 1547 1548 if ((tok->outWritePos - tok->outReadPos) > 0) { 1549 if (picodata_cbPutItem(this->cbOut, &tok->outBuf[tok->outReadPos], tok->outWritePos - tok->outReadPos, numBytesOutput) == PICO_OK) { 1550 PICODATA_INFO_ITEM(this->voice->kbArray[PICOKNOW_KBID_DBG], 1551 (picoos_uint8 *)"tok:", &tok->outBuf[tok->outReadPos], tok->outWritePos - tok->outReadPos); 1552 tok->outReadPos += *numBytesOutput; 1553 if (tok->outWritePos == tok->outReadPos) { 1554 tok->outWritePos = 0; 1555 tok->outReadPos = 0; 1556 } 1557 } 1558 else { 1559 return PICODATA_PU_OUT_FULL; 1560 } 1561 1562 } 1563 else if (PICO_EOF != (ch = picodata_cbGetCh(this->cbIn))) { 1564 PICODBG_DEBUG(("read in %c", (picoos_char) ch)); 1565 tok_treatChar(this, tok, (picoos_uchar) ch, /*markupHandling*/TRUE); 1566 } 1567 else { 1568 return PICODATA_PU_IDLE; 1569 } 1570 } 1571 } 1572 1573 #ifdef __cplusplus 1574 } 1575 #endif 1576 1577 /* end */ 1578