Home | History | Annotate | Download | only in lib
      1 /*
      2  * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *     http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 /**
     17  * @file picotok.c
     18  *
     19  * tokenizer
     20  *
     21  * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
     22  * All rights reserved.
     23  *
     24  * History:
     25  * - 2009-04-20 -- initial version
     26  *
     27  */
     28 
     29 
     30 /* ************************************************************/
     31 /* tokenisation and markup handling */
     32 /* ************************************************************/
     33 
     34 /** @addtogroup picotok
     35   @b tokenisation_overview
     36 
     37   markup handling overview:
     38 
     39   The following markups are recognized
     40      - ignore
     41      - speed
     42      - pitch
     43      - volume
     44      - voice
     45      - preproccontext
     46      - mark
     47      - play
     48      - usesig
     49      - genfile
     50      - sentence
     51      - s
     52      - paragraph
     53      - p
     54      - break
     55      - spell            (pauses between letter)
     56      - phoneme
     57 
     58   All markups which are recognized but are not yet implemented in pico
     59   system have the mark.
     60 */
     61 
     62 
     63 #include "picodefs.h"
     64 #include "picoos.h"
     65 #include "picobase.h"
     66 #include "picodbg.h"
     67 #include "picodata.h"
     68 #include "picotok.h"
     69 #include "picoktab.h"
     70 
     71 #ifdef __cplusplus
     72 extern "C" {
     73 #endif
     74 #if 0
     75 }
     76 #endif
     77 
     78 /* *****************************************************************************/
     79 
     80 #define IN_BUF_SIZE   255
     81 #define OUT_BUF_SIZE  IN_BUF_SIZE + 3 * PICODATA_ITEM_HEADSIZE + 3
     82 
     83 #define MARKUP_STRING_BUF_SIZE (IN_BUF_SIZE*5)
     84 #define MAX_NR_MARKUP_PARAMS 6
     85 #define MARKUP_HANDLING_DISABLED  0
     86 #define MARKUP_HANDLING_ENABLED 1
     87 #define EOL '\n'
     88 
     89 
     90 typedef picoos_int8 pico_tokenSubType;
     91 typedef picoos_uint8 pico_tokenType;
     92 
     93 /** @todo : consider adding these specialized exception codes: */
     94 
     95 #define PICO_ERR_MARKUP_VALUE_OUT_OF_RANGE PICO_ERR_OTHER
     96 #define PICO_ERR_INVALID_MARKUP_TAG        PICO_ERR_OTHER
     97 #define PICO_ERR_INTERNAL_LIMIT            PICO_ERR_OTHER
     98 
     99 typedef enum {MIDummyStart, MIIgnore,
    100               MIPitch, MISpeed, MIVolume,
    101               MIVoice, MIPreprocContext, MIMarker,
    102               MIPlay, MIUseSig, MIGenFile, MIParagraph,
    103               MISentence, MIBreak, MISpell, MIPhoneme, MIItem, MISpeaker, MIDummyEnd
    104              }  MarkupId;
    105 typedef enum {MSNotInMarkup, MSGotStart, MSExpectingmarkupTagName, MSInmarkupTagName,
    106               MSGotmarkupTagName, MSInAttrName, MSGotAttrName, MSGotEqual, MSInAttrValue,
    107               MSInAttrValueEscaped, MSGotAttrValue, MSGotEndSlash, MSGotEnd,
    108               MSError, MSErrorTooLong, MSErrorSyntax
    109              }  MarkupState;
    110 typedef enum {MENone, MEMissingStart, MEUnknownTag, MEIdent, MEMissingEqual,
    111               MEMissingQuote, MEMissingEnd, MEUnexpectedChar, MEInterprete
    112              }  MarkupParseError;
    113 
    114 typedef enum {MTNone, MTStart, MTEnd, MTEmpty} MarkupTagType;
    115 
    116 #define UTF_CHAR_COMPLETE   2
    117 #define UTF_CHAR_INCOMPLETE 1
    118 #define UTF_CHAR_MALFORMED  0
    119 
    120 #define TOK_MARKUP_KW_IGNORE     (picoos_uchar*)"ignore"
    121 #define TOK_MARKUP_KW_SPEED      (picoos_uchar*)"speed"
    122 #define TOK_MARKUP_KW_PITCH      (picoos_uchar*)"pitch"
    123 #define TOK_MARKUP_KW_VOLUME     (picoos_uchar*)"volume"
    124 #define TOK_MARKUP_KW_VOICE      (picoos_uchar*)"voice"
    125 #define TOK_MARKUP_KW_CONTEXT    (picoos_uchar*)"preproccontext"
    126 #define TOK_MARKUP_KW_MARK       (picoos_uchar*)"mark"
    127 #define TOK_MARKUP_KW_PLAY       (picoos_uchar*)"play"
    128 #define TOK_MARKUP_KW_USESIG     (picoos_uchar*)"usesig"
    129 #define TOK_MARKUP_KW_GENFILE    (picoos_uchar*)"genfile"
    130 #define TOK_MARKUP_KW_SENTENCE   (picoos_uchar*)"sentence"
    131 #define TOK_MARKUP_KW_S          (picoos_uchar*)"s"
    132 #define TOK_MARKUP_KW_PARAGRAPH  (picoos_uchar*)"paragraph"
    133 #define TOK_MARKUP_KW_P          (picoos_uchar*)"p"
    134 #define TOK_MARKUP_KW_BREAK      (picoos_uchar*)"break"
    135 #define TOK_MARKUP_KW_SPELL      (picoos_uchar*)"spell"
    136 #define TOK_MARKUP_KW_PHONEME    (picoos_uchar*)"phoneme"
    137 #define TOK_MARKUP_KW_ITEM       (picoos_uchar*)"item"
    138 #define TOK_MARKUP_KW_SPEAKER    (picoos_uchar*)"speaker"
    139 
    140 #define KWLevel (picoos_uchar *)"level"
    141 #define KWName (picoos_uchar *)"name"
    142 #define KWProsDomain (picoos_uchar *)"prosodydomain"
    143 #define KWTime (picoos_uchar *)"time"
    144 #define KWMode (picoos_uchar *)"mode"
    145 #define KWSB (picoos_uchar *)"sb"
    146 #define KWPB (picoos_uchar *)"pb"
    147 #define KWFile (picoos_uchar *)"file"
    148 #define KWType (picoos_uchar *)"type"
    149 #define KWF0Beg (picoos_uchar *)"f0beg"
    150 #define KWF0End (picoos_uchar *)"f0end"
    151 #define KWXFadeBeg (picoos_uchar *)"xfadebeg"
    152 #define KWXFadeEnd (picoos_uchar *)"xfadeend"
    153 #define KWAlphabet (picoos_uchar *)"alphabet"
    154 #define KWPH (picoos_uchar *)"ph"
    155 #define KWOrthMode (picoos_uchar *)"orthmode"
    156 #define KWIgnorePunct (picoos_uchar *)"ignorepunct"
    157 #define KWInfo1 (picoos_uchar *)"info1"
    158 #define KWInfo2 (picoos_uchar *)"info2"
    159 #define KWDATA (picoos_uchar *)"data"
    160 
    161 #define PICO_SPEED_MIN           20
    162 #define PICO_SPEED_MAX          500
    163 #define PICO_SPEED_DEFAULT      100
    164 #define PICO_SPEED_FACTOR_MIN   500
    165 #define PICO_SPEED_FACTOR_MAX  2000
    166 
    167 #define PICO_PITCH_MIN           50
    168 #define PICO_PITCH_MAX          200
    169 #define PICO_PITCH_DEFAULT      100
    170 #define PICO_PITCH_FACTOR_MIN   500
    171 #define PICO_PITCH_FACTOR_MAX  2000
    172 #define PICO_PITCH_ADD_MIN     -100
    173 #define PICO_PITCH_ADD_MAX      100
    174 #define PICO_PITCH_ADD_DEFAULT    0
    175 
    176 #define PICO_VOLUME_MIN           0
    177 #define PICO_VOLUME_MAX         500
    178 #define PICO_VOLUME_DEFAULT     100
    179 #define PICO_VOLUME_FACTOR_MIN  500
    180 #define PICO_VOLUME_FACTOR_MAX 2000
    181 
    182 #define PICO_SPEAKER_MIN          20
    183 #define PICO_SPEAKER_MAX         180
    184 #define PICO_SPEAKER_DEFAULT     100
    185 #define PICO_SPEAKER_FACTOR_MIN  500
    186 #define PICO_SPEAKER_FACTOR_MAX 2000
    187 
    188 #define PICO_CONTEXT_DEFAULT   (picoos_uchar*)"DEFAULT"
    189 
    190 #define PARAGRAPH_PAUSE_DUR 500
    191 #define SPELL_WITH_PHRASE_BREAK  1
    192 #define SPELL_WITH_SENTENCE_BREAK  2
    193 
    194 /* *****************************************************************************/
    195 
    196 #define TOK_PUNC_FLUSH  (picoos_char) '\0'
    197 
    198 typedef picoos_uchar Word[MARKUP_STRING_BUF_SIZE];
    199 
    200 
    201 struct MarkupParam {
    202     Word paramId;
    203     Word paramVal;
    204 };
    205 
    206 typedef struct MarkupParam MarkupParams[MAX_NR_MARKUP_PARAMS];
    207 
    208 typedef picoos_uchar utf8char0c[5]; /* one more than needed so it is ended always with 0c*/
    209 
    210 /** subobject : TokenizeUnit
    211  *  shortcut  : tok
    212  */
    213 typedef struct tok_subobj
    214 {
    215     picoos_int32 ignLevel;
    216 
    217     utf8char0c   utf;
    218     picoos_int32 utfpos;
    219     picoos_int32 utflen;
    220 
    221     MarkupParams markupParams;
    222     picoos_int32 nrMarkupParams;
    223     MarkupState markupState;
    224     picoos_uchar markupStr[MARKUP_STRING_BUF_SIZE];
    225     picoos_int32 markupPos;
    226     picoos_int32 markupLevel[MIDummyEnd+1];
    227     picoos_uchar markupTagName[IN_BUF_SIZE];
    228     MarkupTagType markupTagType;
    229     MarkupParseError markupTagErr;
    230 
    231     picoos_int32 strPos;
    232     picoos_uchar strDelim;
    233     picoos_bool isFileAttr;
    234 
    235     pico_tokenType tokenType;
    236     pico_tokenSubType tokenSubType;
    237 
    238     picoos_int32 tokenPos;
    239     picoos_uchar tokenStr[IN_BUF_SIZE];
    240 
    241     picoos_int32 nrEOL;
    242 
    243     picoos_bool markupHandlingMode;       /* to be moved ??? */
    244     picoos_bool aborted;                  /* to be moved ??? */
    245 
    246     picoos_bool start;
    247 
    248     picoos_uint8 outBuf[OUT_BUF_SIZE]; /* internal output buffer */
    249     picoos_uint16 outReadPos; /* next pos to read from outBuf */
    250     picoos_uint16 outWritePos; /* next pos to write to outBuf */
    251 
    252     picoos_uchar saveFile[IN_BUF_SIZE];
    253     Word phonemes;
    254 
    255     picotrns_SimpleTransducer transducer;
    256 
    257     /* kbs */
    258 
    259     picoktab_Graphs graphTab;
    260     picokfst_FST xsampa_parser;
    261     picokfst_FST svoxpa_parser;
    262     picokfst_FST xsampa2svoxpa_mapper;
    263 
    264 
    265 
    266 } tok_subobj_t;
    267 
    268 /* *****************************************************************************/
    269 
    270 static void tok_treatMarkupAsSimpleToken (picodata_ProcessingUnit this, tok_subobj_t * tok);
    271 static void tok_treatChar (picodata_ProcessingUnit this, tok_subobj_t * tok, picoos_uchar ch, picoos_bool markupHandling);
    272 static void tok_treatMarkup (picodata_ProcessingUnit this, tok_subobj_t * tok);
    273 static void tok_putToMarkup (picodata_ProcessingUnit this, tok_subobj_t * tok, picoos_uchar str[]);
    274 static void tok_treatSimpleToken (picodata_ProcessingUnit this, tok_subobj_t * tok);
    275 static MarkupId tok_markupTagId (picoos_uchar tagId[]);
    276 
    277 /* *****************************************************************************/
    278 
    279 static picoos_bool tok_strEqual(picoos_uchar * str1, picoos_uchar * str2)
    280 {
    281    return (picoos_strcmp((picoos_char*)str1, (picoos_char*)str2) == 0);
    282 }
    283 
    284 static void tok_reduceBlanks(picoos_uchar * str)
    285             /* Remove leading and trailing blanks of 'str' and reduce
    286                groups of blanks within string to exactly one blank. */
    287 
    288 {
    289     int i = 0;
    290     int j = 0;
    291 
    292      while (str[j] != 0) {
    293         if (str[j] == (picoos_uchar)' ') {
    294             /* note one blank except at the beginning of string */
    295             if (i > 0) {
    296                 str[i] = (picoos_uchar)' ';
    297                 i++;
    298             }
    299             j++;
    300             while (str[j] == (picoos_uchar)' ') {
    301                 j++;
    302             }
    303         } else {
    304             str[i] = str[j];
    305             j++;
    306             i++;
    307         }
    308     }
    309 
    310     /* remove blanks at end of string */
    311     if ((i > 0) && (str[i - 1] == ' ')) {
    312         i--;
    313     }
    314     str[i] = 0;
    315 }
    316 
    317 
    318 static void tok_startIgnore (tok_subobj_t * tok)
    319 {
    320     tok->ignLevel++;
    321 }
    322 
    323 
    324 static void tok_endIgnore (tok_subobj_t * tok)
    325 {
    326     if (tok->ignLevel > 0) {
    327         tok->ignLevel--;
    328     }
    329 }
    330 
    331 
    332 static void tok_getParamIntVal (MarkupParams params, picoos_uchar paramId[], picoos_int32 * paramVal, picoos_bool * paramFound)
    333 {
    334     int i=0;
    335 
    336     while ((i < MAX_NR_MARKUP_PARAMS) && !tok_strEqual(paramId,params[i].paramId)) {
    337         i++;
    338     }
    339     if ((i < MAX_NR_MARKUP_PARAMS)) {
    340         (*paramVal) = picoos_atoi((picoos_char*)params[i].paramVal);
    341         (*paramFound) = TRUE;
    342     } else {
    343         (*paramVal) =  -1;
    344         (*paramFound) = FALSE;
    345     }
    346 }
    347 
    348 
    349 
    350 static void tok_getParamStrVal (MarkupParams params, picoos_uchar paramId[], picoos_uchar paramStrVal[], picoos_bool * paramFound)
    351 {
    352     int i=0;
    353 
    354     while ((i < MAX_NR_MARKUP_PARAMS) &&  !tok_strEqual(paramId,params[i].paramId)) {
    355         i++;
    356     }
    357     if (i < MAX_NR_MARKUP_PARAMS) {
    358         picoos_strcpy((picoos_char*)paramStrVal, (picoos_char*)params[i].paramVal);
    359         (*paramFound) = TRUE;
    360     } else {
    361         paramStrVal[0] = 0;
    362         (*paramFound) = FALSE;
    363     }
    364 }
    365 
    366 
    367 static void tok_getParamPhonesStr (MarkupParams params, picoos_uchar paramId[], picoos_uchar alphabet[], picoos_uchar phones[], picoos_int32 phoneslen, picoos_bool * paramFound)
    368 {
    369 
    370     int i;
    371     picoos_bool done;
    372 
    373     i = 0;
    374     while ((i < MAX_NR_MARKUP_PARAMS) &&  !tok_strEqual(paramId, params[i].paramId)) {
    375         i++;
    376     }
    377     if (i < MAX_NR_MARKUP_PARAMS) {
    378         if (tok_strEqual(alphabet, PICODATA_XSAMPA) || tok_strEqual(alphabet, (picoos_uchar*)"")) {
    379             picoos_strlcpy((picoos_char*)phones, (picoos_char*)params[i].paramVal, phoneslen);
    380             done = TRUE;
    381         } else {
    382             done = FALSE;
    383         }
    384         (*paramFound) = TRUE;
    385     } else {
    386         done = FALSE;
    387         (*paramFound) = FALSE;
    388     }
    389     if (!done) {
    390         phones[0] = 0;
    391     }
    392 }
    393 
    394 
    395 static void tok_clearMarkupParams (MarkupParams params)
    396 {
    397     int i;
    398 
    399     for (i = 0; i<MAX_NR_MARKUP_PARAMS; i++) {
    400         params[i].paramId[0] = 0;
    401         params[i].paramVal[0] = 0;
    402     }
    403 }
    404 
    405 
    406 static void tok_getDur (picoos_uchar durStr[], picoos_uint32 * dur, picoos_bool * done)
    407 {
    408 
    409     int num=0;
    410     int i=0;
    411     picoos_uchar tmpWord[IN_BUF_SIZE];
    412 
    413     picoos_strlcpy((picoos_char*)tmpWord, (picoos_char*)durStr, sizeof(tmpWord));
    414     tok_reduceBlanks(tmpWord);
    415     while ((durStr[i] >= '0') && (durStr[i] <= '9')) {
    416         num = 10 * num + (int)durStr[i] - (int)'0';
    417         tmpWord[i] = ' ';
    418         i++;
    419     }
    420     tok_reduceBlanks(tmpWord);
    421     if (tok_strEqual(tmpWord, (picoos_uchar*)"s")) {
    422         (*dur) = (1000 * num);
    423         (*done) = TRUE;
    424     } else if (tok_strEqual(tmpWord,(picoos_uchar*)"ms")) {
    425         (*dur) = num;
    426         (*done) = TRUE;
    427     } else {
    428         (*dur) = 0;
    429         (*done) = FALSE;
    430     }
    431 }
    432 
    433 
    434 static picoos_int32 tok_putToUtf (tok_subobj_t * tok, picoos_uchar ch)
    435 {
    436     if (tok->utfpos < PICOBASE_UTF8_MAXLEN) {
    437         tok->utf[tok->utfpos] = ch;
    438         if (tok->utfpos == 0) {
    439             tok->utflen = picobase_det_utf8_length(ch);
    440         } else if (((ch < (picoos_uchar)'\200') || (ch >= (picoos_uchar)'\300'))) {
    441             tok->utflen = 0;
    442         }
    443         (tok->utfpos)++;
    444         if ((tok->utfpos == tok->utflen)) {
    445             if ((tok->utfpos < PICOBASE_UTF8_MAXLEN)) {
    446                 tok->utf[tok->utfpos] = 0;
    447             }
    448             return UTF_CHAR_COMPLETE;
    449         } else if (tok->utfpos < tok->utflen) {
    450             return UTF_CHAR_INCOMPLETE;
    451         } else {
    452             return UTF_CHAR_MALFORMED;
    453         }
    454     } else {
    455         return UTF_CHAR_MALFORMED;
    456     }
    457 }
    458 
    459 
    460 static picoos_bool tok_isRelative (picoos_uchar strval[], picoos_uint32 * val)
    461 {
    462     picoos_int32 len;
    463     picoos_bool rel;
    464 
    465     rel = FALSE;
    466     len = picoos_strlen((picoos_char*)strval);
    467     if (len > 0) {
    468         if (strval[len - 1] == '%') {
    469             strval[len - 1] = 0;
    470             if ((strval[0] == '+') || (strval[0] == '-')) {
    471                 (*val) = 1000 + (picoos_atoi((picoos_char*)strval) * 10);
    472             } else {
    473                 (*val) = picoos_atoi((picoos_char*)strval) * 10;
    474             }
    475             rel = TRUE;
    476         }
    477     }
    478     return rel;
    479 }
    480 
    481 
    482 static void tok_putItem (picodata_ProcessingUnit this,  tok_subobj_t * tok,
    483                          picoos_uint8 itemType, picoos_uint8 info1, picoos_uint8 info2,
    484                          picoos_uint16 val,
    485                          picoos_uchar str[])
    486 {
    487     picoos_int32 len, i;
    488 
    489     if ((itemType == PICODATA_ITEM_CMD) && (info1 == PICODATA_ITEMINFO1_CMD_FLUSH)) {
    490         tok->outBuf[tok->outWritePos++] = itemType;
    491         tok->outBuf[tok->outWritePos++] = info1;
    492         tok->outBuf[tok->outWritePos++] = info2;
    493         tok->outBuf[tok->outWritePos++] = 0;
    494     }
    495     else if (tok->ignLevel <= 0) {
    496         switch (itemType) {
    497         case PICODATA_ITEM_CMD:
    498             switch (info1) {
    499             case PICODATA_ITEMINFO1_CMD_CONTEXT:
    500             case PICODATA_ITEMINFO1_CMD_VOICE:
    501             case PICODATA_ITEMINFO1_CMD_MARKER:
    502             case PICODATA_ITEMINFO1_CMD_PLAY:
    503             case PICODATA_ITEMINFO1_CMD_SAVE:
    504             case PICODATA_ITEMINFO1_CMD_UNSAVE:
    505             case PICODATA_ITEMINFO1_CMD_PROSDOMAIN:
    506             case PICODATA_ITEMINFO1_CMD_PHONEME:
    507                 len = picoos_strlen((picoos_char*)str);
    508                 if (tok->outWritePos + 4 + len < OUT_BUF_SIZE) {
    509                     tok->outBuf[tok->outWritePos++] = itemType;
    510                     tok->outBuf[tok->outWritePos++] = info1;
    511                     tok->outBuf[tok->outWritePos++] = info2;
    512                     tok->outBuf[tok->outWritePos++] = len;
    513                     for (i=0; i<len; i++) {
    514                         tok->outBuf[tok->outWritePos++] = str[i];
    515                     }
    516                 }
    517                 else {
    518                     PICODBG_WARN(("tok_putItem: output buffer too small"));
    519                 }
    520                 break;
    521             case PICODATA_ITEMINFO1_CMD_IGNSIG:
    522             case PICODATA_ITEMINFO1_CMD_IGNORE:
    523                 if (tok->outWritePos + 4 < OUT_BUF_SIZE) {
    524                     tok->outBuf[tok->outWritePos++] = itemType;
    525                     tok->outBuf[tok->outWritePos++] = info1;
    526                     tok->outBuf[tok->outWritePos++] = info2;
    527                     tok->outBuf[tok->outWritePos++] = 0;
    528                 }
    529                 else {
    530                     PICODBG_WARN(("tok_putItem: output buffer too small"));
    531                 }
    532                 break;
    533             case PICODATA_ITEMINFO1_CMD_SPEED:
    534             case PICODATA_ITEMINFO1_CMD_PITCH:
    535             case PICODATA_ITEMINFO1_CMD_VOLUME:
    536             case PICODATA_ITEMINFO1_CMD_SPELL:
    537             case PICODATA_ITEMINFO1_CMD_SIL:
    538             case PICODATA_ITEMINFO1_CMD_SPEAKER:
    539                 if (tok->outWritePos + 4 + 2 < OUT_BUF_SIZE) {
    540                     tok->outBuf[tok->outWritePos++] = itemType;
    541                     tok->outBuf[tok->outWritePos++] = info1;
    542                     tok->outBuf[tok->outWritePos++] = info2;
    543                     tok->outBuf[tok->outWritePos++] = 2;
    544                     tok->outBuf[tok->outWritePos++] = val % 256;
    545                     tok->outBuf[tok->outWritePos++] = val / 256;
    546                 }
    547                 else {
    548                     PICODBG_WARN(("tok_putItem: output buffer too small"));
    549                 }
    550                 break;
    551             default:
    552                 PICODBG_WARN(("tok_putItem: unknown command type"));
    553             }
    554             break;
    555         case PICODATA_ITEM_TOKEN:
    556             len = picoos_strlen((picoos_char*)str);
    557             if (tok->outWritePos + 4 + len < OUT_BUF_SIZE) {
    558                 tok->outBuf[tok->outWritePos++] = itemType;
    559                 tok->outBuf[tok->outWritePos++] = info1;
    560                 tok->outBuf[tok->outWritePos++] = info2;
    561                 tok->outBuf[tok->outWritePos++] = len;
    562                 for (i=0; i<len; i++) {
    563                     tok->outBuf[tok->outWritePos++] = str[i];
    564                 }
    565             }
    566             else {
    567                 PICODBG_WARN(("tok_putItem: output buffer too small"));
    568             }
    569             break;
    570         default:
    571             PICODBG_WARN(("tok_putItem: unknown item type"));
    572         }
    573     }
    574 }
    575 
    576 
    577 static void tok_putItem2 (picodata_ProcessingUnit this,  tok_subobj_t * tok,
    578                           picoos_uint8 type,
    579                           picoos_uint8 info1, picoos_uint8 info2,
    580                           picoos_uint8 len,
    581                           picoos_uint8 data[])
    582 {
    583     picoos_int32 i;
    584 
    585     if (is_valid_itemtype(type)) {
    586         tok->outBuf[tok->outWritePos++] = type;
    587         tok->outBuf[tok->outWritePos++] = info1;
    588         tok->outBuf[tok->outWritePos++] = info2;
    589         tok->outBuf[tok->outWritePos++] = len;
    590         for (i=0; i<len; i++) {
    591             tok->outBuf[tok->outWritePos++] = data[i];
    592         }
    593     }
    594 }
    595 
    596 
    597 static MarkupId tok_markupTagId (picoos_uchar tagId[])
    598 {
    599     if (picoos_strstr(tagId,(picoos_char *)"svox:") == (picoos_char *)tagId) {
    600         tagId+=5;
    601     }
    602     if (tok_strEqual(tagId, TOK_MARKUP_KW_IGNORE)) {
    603         return MIIgnore;
    604     } else if (tok_strEqual(tagId, TOK_MARKUP_KW_SPEED)) {
    605         return MISpeed;
    606     } else if (tok_strEqual(tagId, TOK_MARKUP_KW_PITCH)) {
    607         return MIPitch;
    608     } else if (tok_strEqual(tagId, TOK_MARKUP_KW_VOLUME)) {
    609         return MIVolume;
    610     } else if (tok_strEqual(tagId, TOK_MARKUP_KW_SPEAKER)) {
    611         return MISpeaker;
    612     } else if (tok_strEqual(tagId, TOK_MARKUP_KW_VOICE)) {
    613         return MIVoice;
    614     } else if (tok_strEqual(tagId, TOK_MARKUP_KW_CONTEXT)) {
    615         return MIPreprocContext;
    616     } else if (tok_strEqual(tagId, TOK_MARKUP_KW_MARK)) {
    617         return MIMarker;
    618     } else if (tok_strEqual(tagId, TOK_MARKUP_KW_PLAY)) {
    619         return MIPlay;
    620     } else if (tok_strEqual(tagId, TOK_MARKUP_KW_USESIG)) {
    621         return MIUseSig;
    622     } else if (tok_strEqual(tagId, TOK_MARKUP_KW_GENFILE)) {
    623         return MIGenFile;
    624     } else if (tok_strEqual(tagId, TOK_MARKUP_KW_SENTENCE) || tok_strEqual(tagId, TOK_MARKUP_KW_S)) {
    625         return MISentence;
    626     } else if (tok_strEqual(tagId, TOK_MARKUP_KW_PARAGRAPH) || tok_strEqual(tagId, TOK_MARKUP_KW_P)) {
    627         return MIParagraph;
    628     } else if (tok_strEqual(tagId, TOK_MARKUP_KW_BREAK)) {
    629         return MIBreak;
    630     } else if (tok_strEqual(tagId, TOK_MARKUP_KW_SPELL)) {
    631         return MISpell;
    632     } else if (tok_strEqual(tagId, TOK_MARKUP_KW_PHONEME)) {
    633         return MIPhoneme;
    634     } else if (tok_strEqual(tagId, TOK_MARKUP_KW_ITEM)) {
    635         return MIItem;
    636     } else {
    637         return MIDummyEnd;
    638     }
    639 }
    640 
    641 
    642 static void tok_checkLimits (picodata_ProcessingUnit this, picoos_uint32 * value, picoos_uint32 min, picoos_uint32 max, picoos_uchar valueType[])
    643 {
    644     if ((((*value) < min) || ((*value) > max))) {
    645         picoos_emRaiseWarning(this->common->em, PICO_ERR_MARKUP_VALUE_OUT_OF_RANGE, (picoos_char*)"", (picoos_char*)"attempt to set illegal value %i for %s", *value, valueType);
    646         if (((*value) < min)) {
    647             (*value) = min;
    648         } else if (((*value) > max)) {
    649             (*value) = max;
    650         }
    651     }
    652 }
    653 
    654 
    655 
    656 /*
    657 
    658 static void tok_checkRealLimits (picodata_ProcessingUnit this, picoos_single * value, picoos_single min, picoos_single max, picoos_uchar valueType[])
    659 {
    660     if ((((*value) < min) || ((*value) > max))) {
    661           picoos_emRaiseWarning(this->common->em, PICO_ERR_MARKUP_VALUE_OUT_OF_RANGE, (picoos_char*)"", (picoos_char*)"attempt to set illegal value %f for %s", *value, valueType);
    662         if (((*value) < min)) {
    663             (*value) = min;
    664         } else if (((*value) > max)) {
    665             (*value) = max;
    666         }
    667     }
    668 }
    669 */
    670 
    671 #define VAL_STR_LEN 21
    672 
    673 static void tok_interpretMarkup (picodata_ProcessingUnit this, tok_subobj_t * tok, picoos_bool isStartTag, MarkupId mId)
    674 {
    675     picoos_bool done;
    676     picoos_int32 ival;
    677     picoos_uint32 uval;
    678     picoos_int32 ival2;
    679     picoos_uchar valStr[VAL_STR_LEN];
    680     picoos_uchar valStr2[VAL_STR_LEN];
    681     picoos_uchar valStr3[VAL_STR_LEN];
    682     picoos_int32 i2;
    683     picoos_uint32 dur;
    684     picoos_bool done1;
    685     picoos_bool paramFound;
    686     picoos_uint8 type, info1, info2;
    687     picoos_uint8 data[256];
    688     picoos_int32 pos, n, len;
    689     picoos_uchar part[10];
    690 
    691     done = FALSE;
    692     switch (mId) {
    693         case MIIgnore:
    694             if (isStartTag && tok_strEqual(tok->markupParams[0].paramId,(picoos_uchar*)"")) {
    695                 tok_startIgnore(tok);
    696                 done = TRUE;
    697             } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) {
    698                 tok_endIgnore(tok);
    699                 done = TRUE;
    700             }
    701             break;
    702         case MISpeed:
    703             if (isStartTag && tok_strEqual(tok->markupParams[0].paramId, KWLevel)) {
    704                 if (tok_isRelative(tok->markupParams[0].paramVal, & uval)) {
    705                     tok_checkLimits(this, & uval, PICO_SPEED_FACTOR_MIN, PICO_SPEED_FACTOR_MAX,(picoos_uchar*)"relative speed factor");
    706                     tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_SPEED, PICODATA_ITEMINFO2_CMD_RELATIVE, uval, (picoos_uchar*)"");
    707                 } else {
    708                     uval = picoos_atoi((picoos_char*)tok->markupParams[0].paramVal);
    709                     tok_checkLimits(this, & uval, PICO_SPEED_MIN, PICO_SPEED_MAX,(picoos_uchar*)"speed");
    710                     tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_SPEED, PICODATA_ITEMINFO2_CMD_ABSOLUTE, uval, (picoos_uchar*)"");
    711                 }
    712                 done = TRUE;
    713             } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) {
    714                 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_SPEED, PICODATA_ITEMINFO2_CMD_ABSOLUTE, PICO_SPEED_DEFAULT, (picoos_uchar*)"");
    715                 done = TRUE;
    716             }
    717             break;
    718         case MIPitch:
    719             if (isStartTag && tok_strEqual(tok->markupParams[0].paramId, KWLevel)) {
    720                 if (tok_isRelative(tok->markupParams[0].paramVal, & uval)) {
    721                     tok_checkLimits(this, & uval,PICO_PITCH_FACTOR_MIN,PICO_PITCH_FACTOR_MAX, (picoos_uchar*)"relative pitch factor");
    722                     tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PITCH, PICODATA_ITEMINFO2_CMD_RELATIVE, uval, (picoos_uchar*)"");
    723                 } else {
    724                     uval = picoos_atoi((picoos_char*)tok->markupParams[0].paramVal);
    725                     tok_checkLimits(this, & uval,PICO_PITCH_MIN,PICO_PITCH_MAX, (picoos_uchar*)"pitch");
    726                     tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PITCH,PICODATA_ITEMINFO2_CMD_ABSOLUTE, uval, (picoos_uchar*)"");
    727                 }
    728                 done = TRUE;
    729             } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) {
    730                 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PITCH,PICODATA_ITEMINFO2_CMD_ABSOLUTE, PICO_PITCH_DEFAULT, (picoos_uchar*)"");
    731                 done = TRUE;
    732             }
    733             break;
    734         case MIVolume:
    735             if (isStartTag && tok_strEqual(tok->markupParams[0].paramId, KWLevel)) {
    736                 if (tok_isRelative(tok->markupParams[0].paramVal, & uval)) {
    737                     tok_checkLimits(this, & uval, PICO_VOLUME_FACTOR_MIN, PICO_VOLUME_FACTOR_MAX, (picoos_uchar*)"relative volume factor");
    738                     tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_VOLUME, PICODATA_ITEMINFO2_CMD_RELATIVE, uval, (picoos_uchar*)"");
    739                 } else {
    740                     uval = picoos_atoi((picoos_char*)tok->markupParams[0].paramVal);
    741                     tok_checkLimits(this, & uval, PICO_VOLUME_MIN, PICO_VOLUME_MAX, (picoos_uchar*)"volume");
    742                     tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_VOLUME, PICODATA_ITEMINFO2_CMD_ABSOLUTE, uval, (picoos_uchar*)"");
    743                 }
    744                 done = TRUE;
    745             } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) {
    746                 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_VOLUME, PICODATA_ITEMINFO2_CMD_ABSOLUTE, PICO_VOLUME_DEFAULT, (picoos_uchar*)"");
    747                 done = TRUE;
    748             }
    749             break;
    750         case MISpeaker:
    751             if (isStartTag && tok_strEqual(tok->markupParams[0].paramId, KWLevel)) {
    752                 if (tok_isRelative(tok->markupParams[0].paramVal, & uval)) {
    753                     tok_checkLimits(this, & uval, PICO_SPEAKER_FACTOR_MIN, PICO_SPEAKER_FACTOR_MAX, (picoos_uchar*)"relative speaker factor");
    754                     tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_SPEAKER, PICODATA_ITEMINFO2_CMD_RELATIVE, uval, (picoos_uchar*)"");
    755                 } else {
    756                     uval = picoos_atoi((picoos_char*)tok->markupParams[0].paramVal);
    757                     tok_checkLimits(this, & uval, PICO_SPEAKER_MIN, PICO_SPEAKER_MAX, (picoos_uchar*)"volume");
    758                     tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_SPEAKER, PICODATA_ITEMINFO2_CMD_ABSOLUTE, uval, (picoos_uchar*)"");
    759                 }
    760                 done = TRUE;
    761             } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) {
    762                 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_SPEAKER, PICODATA_ITEMINFO2_CMD_ABSOLUTE, PICO_SPEAKER_DEFAULT, (picoos_uchar*)"");
    763                 done = TRUE;
    764             }
    765             break;
    766 
    767         case MIVoice:
    768             if (isStartTag && tok_strEqual(tok->markupParams[0].paramId, KWName)) {
    769                 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_VOICE, PICODATA_ITEMINFO2_NA, 0, tok->markupParams[0].paramVal);
    770                 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_FLUSH, PICODATA_ITEMINFO2_NA, 0, (picoos_uchar*)"");
    771                 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PROSDOMAIN, 0, 0, (picoos_uchar*)"");
    772                 done = TRUE;
    773             } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId,(picoos_uchar*)"")) {
    774                 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_VOICE, PICODATA_ITEMINFO2_NA, 0, (picoos_uchar*)"");
    775                 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_FLUSH, PICODATA_ITEMINFO2_NA, 0, (picoos_uchar*)"");
    776                 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PROSDOMAIN, 0, 0, (picoos_uchar*)"");
    777                 done = TRUE;
    778             }
    779             break;
    780         case MIPreprocContext:
    781             if (isStartTag && tok_strEqual(tok->markupParams[0].paramId, KWName)) {
    782                 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_CONTEXT, PICODATA_ITEMINFO2_NA, 0, tok->markupParams[0].paramVal);
    783                 done = TRUE;
    784             } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId,(picoos_uchar*)"")) {
    785                 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_CONTEXT, PICODATA_ITEMINFO2_NA, 0, PICO_CONTEXT_DEFAULT);
    786                 done = TRUE;
    787             }
    788             break;
    789         case MIMarker:
    790             if (isStartTag && tok_strEqual(tok->markupParams[0].paramId, KWName)) {
    791                 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_MARKER, PICODATA_ITEMINFO2_NA, 0, tok->markupParams[0].paramVal);
    792                 done = TRUE;
    793             } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId,(picoos_uchar*)"")) {
    794                 done = TRUE;
    795             }
    796             break;
    797         case MISentence:
    798             if (isStartTag) {
    799                 tok_getParamStrVal(tok->markupParams, KWProsDomain, (picoos_uchar*)valStr, & paramFound);
    800                 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_FLUSH, PICODATA_ITEMINFO2_NA, 0, (picoos_uchar*)"");
    801                 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PROSDOMAIN, 2, 0, valStr);
    802                 done = TRUE;
    803             } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) {
    804                 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_FLUSH, PICODATA_ITEMINFO2_NA, 0, (picoos_uchar*)"");
    805                 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PROSDOMAIN, 2, 0, (picoos_uchar*)"");
    806                 done = TRUE;
    807             }
    808             break;
    809         case MIParagraph:
    810             if (isStartTag) {
    811                 tok_getParamStrVal(tok->markupParams, KWProsDomain, (picoos_uchar*)valStr, & paramFound);
    812                 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_FLUSH, PICODATA_ITEMINFO2_NA, 0, (picoos_uchar*)"");
    813                 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PROSDOMAIN, 1, 0, valStr);
    814                 done = TRUE;
    815             } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) {
    816                 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_FLUSH, PICODATA_ITEMINFO2_NA, 0, (picoos_uchar*)"");
    817                 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_SIL, PICODATA_ITEMINFO2_NA, PARAGRAPH_PAUSE_DUR, (picoos_uchar*)"");
    818                 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PROSDOMAIN, 1, 0, (picoos_uchar*)"");
    819                 done = TRUE;
    820             }
    821             break;
    822         case MIBreak:
    823             if (isStartTag && tok_strEqual(tok->markupParams[0].paramId, KWTime)) {
    824                 tok_getDur(tok->markupParams[0].paramVal, & dur, & done1);
    825                 tok_checkLimits (this, &dur, 0, 65535, (picoos_uchar*)"time");
    826                 if (done1) {
    827                     tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_SIL, PICODATA_ITEMINFO2_NA, dur, (picoos_uchar*)"");
    828                     done = TRUE;
    829                 }
    830             } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) {
    831                 done = TRUE;
    832             }
    833             break;
    834         case MISpell:
    835             if (isStartTag) {
    836                 if (tok_strEqual(tok->markupParams[0].paramId, KWMode)) {
    837                     if (tok_strEqual(tok->markupParams[0].paramVal, KWPB)) {
    838                         uval = SPELL_WITH_PHRASE_BREAK;
    839                     } else if (tok_strEqual(tok->markupParams[0].paramVal, KWSB)) {
    840                         uval = SPELL_WITH_SENTENCE_BREAK;
    841                     } else {
    842                         tok_getDur(tok->markupParams[0].paramVal, & uval, & done1);
    843                         tok_checkLimits (this, & uval, 0, 65535, (picoos_uchar*)"time");
    844                         if (done1) {
    845                             done = TRUE;
    846                         }
    847                     }
    848                 } else {
    849                     uval = SPELL_WITH_PHRASE_BREAK;
    850                 }
    851                 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_SPELL, PICODATA_ITEMINFO2_CMD_START, uval, (picoos_uchar*)"");
    852                 done = TRUE;
    853             } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) {
    854                 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_SPELL, PICODATA_ITEMINFO2_CMD_END, 0, (picoos_uchar*)"");
    855                 done = TRUE;
    856             }
    857             break;
    858         case MIGenFile:
    859             if (isStartTag && tok_strEqual(tok->markupParams[0].paramId, KWFile)) {
    860                 if (tok->saveFile[0] != 0) {
    861                    tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_UNSAVE,
    862                                picodata_getPuTypeFromExtension(tok->saveFile, /*input*/FALSE), 0, tok->saveFile);
    863                    tok->saveFile[0] = 0;
    864                 }
    865                 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_SAVE,
    866                             picodata_getPuTypeFromExtension(tok->markupParams[0].paramVal,  /*input*/FALSE), 0, tok->markupParams[0].paramVal);
    867                 picoos_strcpy((picoos_char*)tok->saveFile, (picoos_char*)tok->markupParams[0].paramVal);
    868                 done = TRUE;
    869             } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) {
    870                 if (tok->saveFile[0] != 0) {
    871                     tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_UNSAVE,
    872                                 picodata_getPuTypeFromExtension(tok->saveFile, /*input*/FALSE), 0, (picoos_uchar*)"");
    873                     tok->saveFile[0] = 0;
    874                 }
    875                 done = TRUE;
    876             }
    877             break;
    878         case MIPlay:
    879             if (isStartTag && tok_strEqual(tok->markupParams[0].paramId, KWFile)) {
    880                 if (picoos_FileExists(this->common, (picoos_char*)tok->markupParams[0].paramVal)) {
    881                     tok_getParamIntVal(tok->markupParams,KWF0Beg,& ival,& paramFound);
    882                     tok_getParamIntVal(tok->markupParams,KWF0End,& ival2,& paramFound);
    883                     tok_getParamStrVal(tok->markupParams,KWAlphabet,valStr3,& paramFound);
    884                     tok_getParamPhonesStr(tok->markupParams,KWXFadeBeg,valStr3,valStr,VAL_STR_LEN,& paramFound);
    885                     tok_getParamPhonesStr(tok->markupParams,KWXFadeEnd,valStr3,valStr2,VAL_STR_LEN,& paramFound);
    886                     tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PLAY,
    887                                 picodata_getPuTypeFromExtension(tok->markupParams[0].paramVal, /*input*/TRUE), 0, tok->markupParams[0].paramVal);
    888                     tok_startIgnore(tok);
    889                 } else {
    890                     if (tok->ignLevel > 0) {
    891                         tok_startIgnore(tok);
    892                     } else {
    893                        picoos_emRaiseWarning(this->common->em, PICO_EXC_CANT_OPEN_FILE, (picoos_char*)"", (picoos_char*)"file '%s' not found; synthesizing enclosed text instead\n", tok->markupParams[0].paramVal);
    894                     }
    895                 }
    896                 done = TRUE;
    897             } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) {
    898                 tok_endIgnore(tok);
    899                 done = TRUE;
    900             }
    901             break;
    902         case MIUseSig:
    903             if (isStartTag && tok_strEqual(tok->markupParams[0].paramId, KWFile)) {
    904                 if (picoos_FileExists(this->common, (picoos_char*)tok->markupParams[0].paramVal)) {
    905                     tok_getParamIntVal(tok->markupParams,KWF0Beg,& ival,& paramFound);
    906                     tok_getParamIntVal(tok->markupParams,KWF0End,& ival2,& paramFound);
    907                     tok_getParamStrVal(tok->markupParams,KWAlphabet,valStr3, & paramFound);
    908                     tok_getParamPhonesStr(tok->markupParams,KWXFadeBeg,valStr3,valStr,VAL_STR_LEN,& paramFound);
    909                     tok_getParamPhonesStr(tok->markupParams,KWXFadeEnd,valStr3,valStr2,VAL_STR_LEN,& paramFound);
    910                     tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PLAY,
    911                                 picodata_getPuTypeFromExtension(tok->markupParams[0].paramVal, /*input*/TRUE), 0, tok->markupParams[0].paramVal);
    912                     tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_IGNSIG, PICODATA_ITEMINFO2_CMD_START, 0, (picoos_uchar*)"");
    913                 } else {
    914                     if (tok->ignLevel <= 0) {
    915                         picoos_emRaiseWarning(this->common->em, PICO_EXC_CANT_OPEN_FILE, (picoos_char*)"", (picoos_char*)"file '%s' not found; synthesizing enclosed text instead", tok->markupParams[0].paramVal);
    916                     }
    917                 }
    918                 done = TRUE;
    919             } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) {
    920                 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_IGNSIG, PICODATA_ITEMINFO2_CMD_END, 0, (picoos_uchar*)"");
    921                 done = TRUE;
    922             }
    923             break;
    924         case MIPhoneme:
    925             i2 = 0;
    926             if (isStartTag) {
    927                 if (tok_strEqual(tok->markupParams[0].paramId, KWAlphabet) && tok_strEqual(tok->markupParams[1].paramId, KWPH)) {
    928                     if (tok_strEqual(tok->markupParams[2].paramId, KWOrthMode)
    929                         && tok_strEqual(tok->markupParams[2].paramVal, KWIgnorePunct)) {
    930                         i2 = 1;
    931                     }
    932                     if (picodata_mapPAStrToPAIds(tok->transducer, this->common, tok->xsampa_parser, tok->svoxpa_parser, tok->xsampa2svoxpa_mapper, tok->markupParams[1].paramVal, tok->markupParams[0].paramVal, tok->phonemes, sizeof(tok->phonemes)-1) == PICO_OK) {
    933                         tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PHONEME,
    934                             PICODATA_ITEMINFO2_CMD_START, i2, tok->phonemes);
    935                         done = TRUE;
    936                     } else {
    937                         PICODBG_WARN(("cannot map phonetic string '%s'; synthesizeing text instead", tok->markupParams[1].paramVal));
    938                         picoos_emRaiseWarning(this->common->em, PICO_ERR_MARKUP_VALUE_OUT_OF_RANGE,(picoos_char*)"", (picoos_char*)"cannot map phonetic string '%s'; synthesizeing text instead", tok->markupParams[1].paramVal);
    939                         done = TRUE;
    940                     }
    941                 } else if (tok_strEqual(tok->markupParams[0].paramId, KWPH)) {
    942                     if (tok_strEqual(tok->markupParams[1].paramId, KWOrthMode)
    943                         && tok_strEqual(tok->markupParams[1].paramVal, KWIgnorePunct)) {
    944                         i2 = 1;
    945                     }
    946                     if (picodata_mapPAStrToPAIds(tok->transducer, this->common, tok->xsampa_parser, tok->svoxpa_parser, tok->xsampa2svoxpa_mapper, tok->markupParams[0].paramVal, PICODATA_XSAMPA, tok->phonemes, sizeof(tok->phonemes)) == PICO_OK) {
    947                         tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PHONEME,
    948                             PICODATA_ITEMINFO2_CMD_START, i2, tok->phonemes);
    949                         done = TRUE;
    950                     }
    951                     else {
    952                         PICODBG_WARN(("cannot map phonetic string '%s'; synthesizeing text instead", tok->markupParams[1].paramVal));
    953                         picoos_emRaiseWarning(this->common->em, PICO_ERR_MARKUP_VALUE_OUT_OF_RANGE,(picoos_char*)"", (picoos_char*)"cannot map phonetic string '%s'; synthesizing text instead", tok->markupParams[0].paramVal);
    954                         done = TRUE;
    955                     }
    956                 }
    957             } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) {
    958                 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PHONEME,
    959                     PICODATA_ITEMINFO2_CMD_END, i2, (picoos_uchar*)"");
    960                 done = TRUE;
    961             }
    962             break;
    963         case MIItem:
    964             if (isStartTag && tok_strEqual(tok->markupParams[0].paramId, KWType) &&
    965                               tok_strEqual(tok->markupParams[1].paramId, KWInfo1)&&
    966                               tok_strEqual(tok->markupParams[2].paramId, KWInfo2)&&
    967                               tok_strEqual(tok->markupParams[3].paramId, KWDATA)) {
    968                   picoos_int32 len2, n2;
    969                   type = picoos_atoi(tok->markupParams[0].paramVal);
    970                   info1 = picoos_atoi(tok->markupParams[1].paramVal);
    971                   info2 = picoos_atoi(tok->markupParams[2].paramVal);
    972                   n = 0; n2 = 0;
    973                   len2 = (picoos_int32)picoos_strlen(tok->markupParams[3].paramVal);
    974                   while (n<len2) {
    975                       while ((tok->markupParams[3].paramVal[n] != 0) && (tok->markupParams[3].paramVal[n] <= 32)) {
    976                           n++;
    977                       }
    978                       tok->markupParams[3].paramVal[n2] = tok->markupParams[3].paramVal[n];
    979                       n++;
    980                       n2++;
    981                   }
    982                   if (is_valid_itemtype(type)) {
    983                       done = TRUE;
    984                       len = 0;
    985                       pos = 0;
    986                       picoos_get_sep_part_str(tok->markupParams[3].paramVal, picoos_strlen(tok->markupParams[3].paramVal),
    987                                           &pos, ',', part, 10, &done1);
    988                       while (done && done1) {
    989                           n = picoos_atoi(part);
    990                           if ((n>=0) && (n<256) && (len<256)) {
    991                               data[len++] = n;
    992                           }
    993                           else {
    994                               done = FALSE;
    995                           }
    996                           picoos_get_sep_part_str(tok->markupParams[3].paramVal, picoos_strlen(tok->markupParams[3].paramVal),
    997                                           &pos, ',', part, 10, &done1);
    998                       }
    999                       if (done) {
   1000                           tok_putItem2(this, tok, type, info1, info2, len, data);
   1001                       }
   1002                   }
   1003                   else {
   1004                       done = FALSE;
   1005                   }
   1006             } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId,(picoos_uchar*)"")) {
   1007                 done = TRUE;
   1008             }
   1009             break;
   1010     default:
   1011         break;
   1012     }
   1013     if (!done) {
   1014         tok->markupTagErr = MEInterprete;
   1015     }
   1016     if (isStartTag) {
   1017         tok->markupLevel[mId]++;
   1018     } else if ((tok->markupLevel[mId] > 0)) {
   1019         tok->markupLevel[mId]--;
   1020     }
   1021 }
   1022 
   1023 
   1024 static picoos_bool tok_attrChar (picoos_uchar ch, picoos_bool first)
   1025 {
   1026     return ((((ch >= (picoos_uchar)'A') && (ch <= (picoos_uchar)'Z')) ||
   1027              ((ch >= (picoos_uchar)'a') && (ch <= (picoos_uchar)'z'))) ||
   1028              ( !(first) && ((ch >= (picoos_uchar)'0') && (ch <= (picoos_uchar)'9'))));
   1029 }
   1030 
   1031 
   1032 
   1033 static picoos_bool tok_idChar (picoos_uchar ch, picoos_bool first)
   1034 {
   1035     return tok_attrChar(ch, first) || ( !(first) && (ch == (picoos_uchar)':'));
   1036 }
   1037 
   1038 
   1039 static void tok_setIsFileAttr (picoos_uchar name[], picoos_bool * isFile)
   1040 {
   1041     (*isFile) = tok_strEqual(name, KWFile);
   1042 }
   1043 
   1044 /* *****************************************************************************/
   1045 
   1046 static void tok_putToSimpleToken (picodata_ProcessingUnit this, tok_subobj_t * tok, picoos_uchar str[], pico_tokenType type, pico_tokenSubType subtype)
   1047 {
   1048     int i, len;
   1049 
   1050     if (str[0] != 0) {
   1051         len = picoos_strlen((picoos_char*)str);
   1052         for (i = 0; i < len; i++) {
   1053             if (tok->tokenPos >= IN_BUF_SIZE) {
   1054                 picoos_emRaiseWarning(this->common->em, PICO_ERR_INTERNAL_LIMIT, (picoos_char*)"", (picoos_char*)"simple token too long; forced treatment");
   1055                 tok_treatSimpleToken(this, tok);
   1056             }
   1057             tok->tokenStr[tok->tokenPos] = str[i];
   1058             tok->tokenPos++;
   1059         }
   1060     }
   1061     tok->tokenType = type;
   1062     tok->tokenSubType = subtype;
   1063 }
   1064 
   1065 
   1066 static void tok_putToMarkup (picodata_ProcessingUnit this, tok_subobj_t * tok, picoos_uchar str[])
   1067 {
   1068     picoos_int32 i, len;
   1069     picoos_uint8 ok;
   1070 
   1071     tok->markupTagErr = MENone;
   1072     len = picoos_strlen((picoos_char*)str);
   1073     for (i = 0; i< len; i++) {
   1074         if (tok->markupPos >= (MARKUP_STRING_BUF_SIZE - 1)) {
   1075             if ((tok->markupPos == (MARKUP_STRING_BUF_SIZE - 1)) && (tok_markupTagId(tok->markupTagName) != MIDummyEnd)) {
   1076                 picoos_emRaiseWarning(this->common->em, PICO_ERR_INTERNAL_LIMIT ,(picoos_char*)"", (picoos_char*)"markup tag too long");
   1077             }
   1078             tok->markupState = MSErrorTooLong;
   1079         } else if ((str[i] == (picoos_uchar)' ') && ((tok->markupState == MSExpectingmarkupTagName) || (tok->markupState == MSGotmarkupTagName) || (tok->markupState == MSGotAttrName) || (tok->markupState == MSGotEqual) || (tok->markupState == MSGotAttrValue))) {
   1080         } else if ((str[i] == (picoos_uchar)'>') && ((tok->markupState == MSGotmarkupTagName) || (tok->markupState == MSInmarkupTagName) || (tok->markupState == MSGotAttrValue))) {
   1081             tok->markupState = MSGotEnd;
   1082         } else if ((str[i] == (picoos_uchar)'/') && ((tok->markupState == MSGotmarkupTagName) || (tok->markupState == MSInmarkupTagName) || (tok->markupState == MSGotAttrValue))) {
   1083             if (tok->markupTagType == MTEnd) {
   1084                 tok->markupTagErr = MEUnexpectedChar;
   1085                 tok->markupState = MSError;
   1086             } else {
   1087                 tok->markupTagType = MTEmpty;
   1088                 tok->markupState = MSGotEndSlash;
   1089             }
   1090         } else {
   1091             switch (tok->markupState) {
   1092                 case MSNotInMarkup:
   1093                     if (str[i] == (picoos_uchar)'<') {
   1094                         tok_clearMarkupParams(tok->markupParams);
   1095                         tok->nrMarkupParams = 0;
   1096                         tok->strPos = 0;
   1097                         tok->markupTagType = MTStart;
   1098                         tok->markupState = MSGotStart;
   1099                     } else {
   1100                         tok->markupTagErr = MEMissingStart;
   1101                         tok->markupState = MSError;
   1102                     }
   1103                     break;
   1104                 case MSGotStart:
   1105                     if (str[i] == (picoos_uchar)'/') {
   1106                         tok->markupTagType = MTEnd;
   1107                         tok->markupState = MSExpectingmarkupTagName;
   1108                     } else if (str[i] == (picoos_uchar)' ') {
   1109                         tok->markupState = MSExpectingmarkupTagName;
   1110                     } else if (tok_idChar(str[i],TRUE)) {
   1111                         tok->markupTagType = MTStart;
   1112                         tok->markupTagName[tok->strPos] = str[i];
   1113                         tok->strPos++;
   1114                         tok->markupTagName[tok->strPos] = 0;
   1115                         tok->markupState = MSInmarkupTagName;
   1116                     } else {
   1117                         tok->markupTagErr = MEUnexpectedChar;
   1118                         tok->markupState = MSError;
   1119                     }
   1120                     break;
   1121                 case MSInmarkupTagName:   case MSExpectingmarkupTagName:
   1122                     if (tok_idChar(str[i],tok->markupState == MSExpectingmarkupTagName)) {
   1123                         tok->markupTagName[tok->strPos] = str[i];
   1124                         tok->strPos++;
   1125                         tok->markupTagName[(tok->strPos)] = 0;
   1126                         tok->markupState = MSInmarkupTagName;
   1127                     } else if ((tok->markupState == MSInmarkupTagName) && (str[i] == (picoos_uchar)' ')) {
   1128                         tok->markupState = MSGotmarkupTagName;
   1129                         picobase_lowercase_utf8_str(tok->markupTagName, (picoos_char*)tok->markupTagName, IN_BUF_SIZE, &ok);
   1130                         tok->strPos = 0;
   1131                     } else {
   1132                         tok->markupTagErr = MEIdent;
   1133                         tok->markupState = MSError;
   1134                     }
   1135                     break;
   1136                 case MSGotmarkupTagName:   case MSGotAttrValue:
   1137                     if (tok_attrChar(str[i], TRUE)) {
   1138                         if (tok->markupTagType == MTEnd) {
   1139                             tok->markupTagErr = MEUnexpectedChar;
   1140                             tok->markupState = MSError;
   1141                         } else {
   1142                             if (tok->nrMarkupParams < MAX_NR_MARKUP_PARAMS) {
   1143                                 tok->markupParams[tok->nrMarkupParams].paramId[tok->strPos] = str[i];
   1144                                 tok->strPos++;
   1145                                 tok->markupParams[tok->nrMarkupParams].paramId[tok->strPos] = 0;
   1146                             } else {
   1147                                 picoos_emRaiseWarning(this->common->em, PICO_ERR_INTERNAL_LIMIT ,(picoos_char*)"", (picoos_char*)"too many attributes in markup; ignoring");
   1148                             }
   1149                             tok->markupState = MSInAttrName;
   1150                         }
   1151                     } else {
   1152                         tok->markupTagErr = MEUnexpectedChar;
   1153                         tok->markupState = MSError;
   1154                     }
   1155                     break;
   1156                 case MSInAttrName:
   1157                     if (tok_attrChar(str[i], FALSE)) {
   1158                         if (tok->nrMarkupParams < MAX_NR_MARKUP_PARAMS) {
   1159                             tok->markupParams[tok->nrMarkupParams].paramId[tok->strPos] = str[i];
   1160                             tok->strPos++;
   1161                             tok->markupParams[tok->nrMarkupParams].paramId[tok->strPos] = 0;
   1162                         }
   1163                         tok->markupState = MSInAttrName;
   1164                     } else if (str[i] == (picoos_uchar)' ') {
   1165                         picobase_lowercase_utf8_str(tok->markupParams[tok->nrMarkupParams].paramId, (picoos_char*)tok->markupParams[tok->nrMarkupParams].paramId, IN_BUF_SIZE, &ok);
   1166                         tok_setIsFileAttr(tok->markupParams[tok->nrMarkupParams].paramId, & tok->isFileAttr);
   1167                         tok->markupState = MSGotAttrName;
   1168                     } else if (str[i] == (picoos_uchar)'=') {
   1169                         picobase_lowercase_utf8_str(tok->markupParams[tok->nrMarkupParams].paramId, (picoos_char*)tok->markupParams[tok->nrMarkupParams].paramId, IN_BUF_SIZE, &ok);
   1170                         tok_setIsFileAttr(tok->markupParams[tok->nrMarkupParams].paramId, & tok->isFileAttr);
   1171                         tok->markupState = MSGotEqual;
   1172                     } else {
   1173                         tok->markupTagErr = MEMissingEqual;
   1174                         tok->markupState = MSError;
   1175                     }
   1176                     break;
   1177                 case MSGotAttrName:
   1178                     if (str[i] == (picoos_uchar)'=') {
   1179                         tok->markupState = MSGotEqual;
   1180                     } else {
   1181                         tok->markupTagErr = MEMissingEqual;
   1182                         tok->markupState = MSError;
   1183                     }
   1184                     break;
   1185                 case MSGotEqual:
   1186                     if ((str[i] == (picoos_uchar)'"') || (str[i] == (picoos_uchar)'\'')) {
   1187                         tok->strDelim = str[i];
   1188                         tok->strPos = 0;
   1189                         tok->markupState = MSInAttrValue;
   1190                     } else {
   1191                         tok->markupTagErr = MEMissingQuote;
   1192                         tok->markupState = MSError;
   1193                     }
   1194                     break;
   1195                 case MSInAttrValue:
   1196                     if (!(tok->isFileAttr) && (str[i] == (picoos_uchar)'\\')) {
   1197                         tok->markupState = MSInAttrValueEscaped;
   1198                     } else if (str[i] == tok->strDelim) {
   1199                         if (tok->nrMarkupParams < MAX_NR_MARKUP_PARAMS) {
   1200                             tok->nrMarkupParams++;
   1201                         }
   1202                         tok->strPos = 0;
   1203                         tok->markupState = MSGotAttrValue;
   1204                     } else {
   1205                         if (tok->nrMarkupParams < MAX_NR_MARKUP_PARAMS) {
   1206                             tok->markupParams[tok->nrMarkupParams].paramVal[tok->strPos] = str[i];
   1207                             tok->strPos++;
   1208                             tok->markupParams[tok->nrMarkupParams].paramVal[tok->strPos] = 0;
   1209                         }
   1210                         tok->markupState = MSInAttrValue;
   1211                     }
   1212                     break;
   1213                 case MSInAttrValueEscaped:
   1214                     if (tok->nrMarkupParams < MAX_NR_MARKUP_PARAMS) {
   1215                         tok->markupParams[tok->nrMarkupParams].paramVal[tok->strPos] = str[i];
   1216                         tok->strPos++;
   1217                         tok->markupParams[tok->nrMarkupParams].paramVal[tok->strPos] = 0;
   1218                     }
   1219                     tok->markupState = MSInAttrValue;
   1220                     break;
   1221                 case MSGotEndSlash:
   1222                     if (str[i] == (picoos_uchar)'>') {
   1223                         tok->markupState = MSGotEnd;
   1224                     } else {
   1225                         tok->markupTagErr = MEUnexpectedChar;
   1226                         tok->markupState = MSError;
   1227                     }
   1228                     break;
   1229             default:
   1230                 tok->markupTagErr = MEUnexpectedChar;
   1231                 tok->markupState = MSError;
   1232                 break;
   1233             }
   1234         }
   1235         if (tok->markupTagErr == MENone) {
   1236             tok->markupStr[tok->markupPos] = str[i];
   1237             tok->markupPos++;
   1238         } /* else restart parsing at current char */
   1239         tok->markupStr[tok->markupPos] = 0;
   1240     }
   1241     /*
   1242     PICODBG_DEBUG(("putToMarkup %s", tok->markupStr));
   1243     */
   1244 }
   1245 
   1246 /* *****************************************************************************/
   1247 
   1248 static void tok_treatMarkupAsSimpleToken (picodata_ProcessingUnit this, tok_subobj_t * tok)
   1249 {
   1250     picoos_int32 i;
   1251 
   1252     tok->utfpos = 0;
   1253     tok->utflen = 0;
   1254     tok->markupState = MSNotInMarkup;
   1255     for (i = 0; i < tok->markupPos; i++) {
   1256         tok_treatChar(this, tok, tok->markupStr[i], FALSE);
   1257     }
   1258     tok->markupPos = 0;
   1259     tok->strPos = 0;
   1260 }
   1261 
   1262 
   1263 static void tok_treatMarkup (picodata_ProcessingUnit this, tok_subobj_t * tok)
   1264 {
   1265     MarkupId mId;
   1266 
   1267     if (tok_markupTagId(tok->markupTagName) != MIDummyEnd) {
   1268         if (tok->markupTagErr == MENone) {
   1269             tok->markupState = MSNotInMarkup;
   1270             if ((tok->tokenType != PICODATA_ITEMINFO1_TOKTYPE_SPACE) && (tok->tokenType != PICODATA_ITEMINFO1_TOKTYPE_UNDEFINED)) {
   1271                 tok_treatSimpleToken(this, tok);
   1272             }
   1273             tok_putToSimpleToken(this, tok, (picoos_uchar*)" ", PICODATA_ITEMINFO1_TOKTYPE_SPACE, -1);
   1274             mId = tok_markupTagId(tok->markupTagName);
   1275             if ((tok->markupTagType == MTStart) || (tok->markupTagType == MTEmpty)) {
   1276                 tok_interpretMarkup(this, tok, TRUE, mId);
   1277             }
   1278             if (((tok->markupTagType == MTEnd) || (tok->markupTagType == MTEmpty))) {
   1279                 tok_clearMarkupParams(tok->markupParams);
   1280                 tok->nrMarkupParams = 0;
   1281                 tok_interpretMarkup(this, tok, FALSE,mId);
   1282             }
   1283         }
   1284         if (tok->markupTagErr != MENone) {
   1285             if (!tok->aborted) {
   1286               picoos_emRaiseWarning(this->common->em, PICO_ERR_INVALID_MARKUP_TAG, (picoos_char*)"", (picoos_char*)"syntax error in markup token '%s'",tok->markupStr);
   1287             }
   1288             tok_treatMarkupAsSimpleToken(this, tok);
   1289         }
   1290     } else {
   1291         tok_treatMarkupAsSimpleToken(this, tok);
   1292     }
   1293     tok->markupState = MSNotInMarkup;
   1294     tok->markupPos = 0;
   1295     tok->strPos = 0;
   1296 }
   1297 
   1298 
   1299 
   1300 static void tok_treatChar (picodata_ProcessingUnit this, tok_subobj_t * tok, picoos_uchar ch, picoos_bool markupHandling)
   1301 {
   1302     picoos_int32 i, id;
   1303     picoos_uint8 uval8;
   1304     pico_tokenType type = PICODATA_ITEMINFO1_TOKTYPE_UNDEFINED;
   1305     pico_tokenSubType subtype = -1;
   1306     picoos_bool dummy;
   1307     utf8char0c utf2;
   1308     picoos_int32 utf2pos;
   1309 
   1310     if (ch == NULLC) {
   1311       tok_treatSimpleToken(this, tok);
   1312       tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_FLUSH, PICODATA_ITEMINFO2_NA, 0, (picoos_uchar*)"");
   1313     }
   1314     else {
   1315       switch (tok_putToUtf(tok, ch)) {
   1316         case UTF_CHAR_MALFORMED:
   1317             tok->utfpos = 0;
   1318             tok->utflen = 0;
   1319             break;
   1320         case UTF_CHAR_INCOMPLETE:
   1321             break;
   1322         case UTF_CHAR_COMPLETE:
   1323             markupHandling = (markupHandling && (tok->markupHandlingMode == MARKUP_HANDLING_ENABLED));
   1324             id = picoktab_graphOffset(tok->graphTab, tok->utf);
   1325             if (id > 0) {
   1326                 if (picoktab_getIntPropTokenType(tok->graphTab, id, &uval8)) {
   1327                     type = (pico_tokenType)uval8;
   1328                     if (type == PICODATA_ITEMINFO1_TOKTYPE_LETTERV) {
   1329                         type = PICODATA_ITEMINFO1_TOKTYPE_LETTER;
   1330                     }
   1331                 }
   1332                 dummy = picoktab_getIntPropTokenSubType(tok->graphTab, id, &subtype);
   1333             } else if (tok->utf[tok->utfpos-1] <= (picoos_uchar)' ') {
   1334                 type = PICODATA_ITEMINFO1_TOKTYPE_SPACE;
   1335                 subtype =  -1;
   1336             } else {
   1337                 type = PICODATA_ITEMINFO1_TOKTYPE_UNDEFINED;
   1338                 subtype =  -1;
   1339             }
   1340             if ((tok->utf[tok->utfpos-1] > (picoos_uchar)' ')) {
   1341                 tok->nrEOL = 0;
   1342             } else if ((tok->utf[tok->utfpos-1] == EOL)) {
   1343                 tok->nrEOL++;
   1344             }
   1345             if (markupHandling && (tok->markupState != MSNotInMarkup)) {
   1346                 tok_putToMarkup(this, tok, tok->utf);
   1347                 if (tok->markupState >= MSError) {
   1348                     picoos_strlcpy(utf2, tok->utf, 5);
   1349                     utf2pos = tok->utfpos;
   1350                     /* treat string up to (but not including) current char as simple
   1351                        token and restart markup tag parsing with current char */
   1352                     tok_treatMarkupAsSimpleToken(this, tok);
   1353                     for (i = 0; i < utf2pos; i++) {
   1354                         tok_treatChar(this, tok, utf2[i], markupHandling);
   1355                     }
   1356                 } else if (tok->markupState == MSGotEnd) {
   1357                     tok_treatMarkup(this, tok);
   1358                 }
   1359             } else if ((markupHandling && (tok->utf[tok->utfpos-1] == (picoos_uchar)'<'))) {
   1360                 tok_putToMarkup(this, tok, tok->utf);
   1361             } else if (type != PICODATA_ITEMINFO1_TOKTYPE_UNDEFINED) {
   1362                 if ((type != tok->tokenType) || (type == PICODATA_ITEMINFO1_TOKTYPE_CHAR) || (subtype != tok->tokenSubType)) {
   1363                     tok_treatSimpleToken(this, tok);
   1364                 } else if ((tok->utf[tok->utfpos-1] == EOL) && (tok->nrEOL == 2)) {
   1365                     tok_treatSimpleToken(this, tok);
   1366                     tok_putToSimpleToken(this, tok, (picoos_uchar*)".", PICODATA_ITEMINFO1_TOKTYPE_CHAR, -1);
   1367                     tok_treatSimpleToken(this, tok);
   1368                 }
   1369                 tok_putToSimpleToken(this, tok, tok->utf, type, subtype);
   1370             } else {
   1371                 tok_treatSimpleToken(this, tok);
   1372             }
   1373             tok->utfpos = 0;
   1374             tok->utflen = 0;
   1375             break;
   1376       }
   1377     }
   1378 }
   1379 
   1380 
   1381 static void tok_treatSimpleToken (picodata_ProcessingUnit this, tok_subobj_t * tok)
   1382 {
   1383     if (tok->tokenPos < IN_BUF_SIZE) {
   1384         tok->tokenStr[tok->tokenPos] = 0;
   1385     }
   1386     if (tok->markupState != MSNotInMarkup) {
   1387         if (!(tok->aborted) && (tok->markupState >= MSGotmarkupTagName) && (tok_markupTagId(tok->markupTagName) != MIDummyEnd)) {
   1388             picoos_emRaiseWarning(this->common->em, PICO_ERR_INVALID_MARKUP_TAG, (picoos_char*)"", (picoos_char*)"unfinished markup tag '%s'",tok->markupStr);
   1389         }
   1390         tok_treatMarkupAsSimpleToken(this, tok);
   1391         tok_treatSimpleToken(this, tok);
   1392     } else if ((tok->tokenPos > 0) && ((tok->ignLevel <= 0) || (tok->tokenType == PICODATA_ITEMINFO1_TOKTYPE_SPACE))) {
   1393         tok_putItem(this, tok, PICODATA_ITEM_TOKEN, tok->tokenType, (picoos_uint8)tok->tokenSubType, 0, tok->tokenStr);
   1394     }
   1395     tok->tokenPos = 0;
   1396     tok->tokenType = PICODATA_ITEMINFO1_TOKTYPE_UNDEFINED;
   1397     tok->tokenSubType =  -1;
   1398 }
   1399 
   1400 /* *****************************************************************************/
   1401 
   1402 static pico_status_t tokReset(register picodata_ProcessingUnit this, picoos_int32 resetMode)
   1403 {
   1404     tok_subobj_t * tok;
   1405     MarkupId mId;
   1406 
   1407     if (NULL == this || NULL == this->subObj) {
   1408         return PICO_ERR_OTHER;
   1409     }
   1410     tok = (tok_subobj_t *) this->subObj;
   1411 
   1412     tok->ignLevel = 0;
   1413 
   1414     tok->utfpos = 0;
   1415     tok->utflen = 0;
   1416 
   1417     tok_clearMarkupParams(tok->markupParams);
   1418     tok->nrMarkupParams = 0;
   1419     tok->markupState = MSNotInMarkup;
   1420     tok->markupPos = 0;
   1421     for (mId = MIDummyStart; mId <= MIDummyEnd; mId++) {
   1422         tok->markupLevel[mId] = 0;
   1423     }
   1424     tok->markupTagName[0] = 0;
   1425     tok->markupTagType = MTNone;
   1426     tok->markupTagErr = MENone;
   1427 
   1428     tok->strPos = 0;
   1429     tok->strDelim = 0;
   1430     tok->isFileAttr = FALSE;
   1431 
   1432     tok->tokenType = PICODATA_ITEMINFO1_TOKTYPE_UNDEFINED;
   1433     tok->tokenSubType =  -1;
   1434     tok->tokenPos = 0;
   1435 
   1436     tok->nrEOL = 0;
   1437 
   1438 
   1439     tok->markupHandlingMode = TRUE;
   1440     tok->aborted = FALSE;
   1441 
   1442     tok->start = TRUE;
   1443 
   1444     tok->outReadPos = 0;
   1445     tok->outWritePos = 0;
   1446 
   1447     tok->saveFile[0] = 0;
   1448 
   1449 
   1450     tok->graphTab = picoktab_getGraphs(this->voice->kbArray[PICOKNOW_KBID_TAB_GRAPHS]);
   1451 
   1452     tok->xsampa_parser = picokfst_getFST(this->voice->kbArray[PICOKNOW_KBID_FST_XSAMPA_PARSE]);
   1453     PICODBG_TRACE(("got xsampa_parser @ %i",tok->xsampa_parser));
   1454 
   1455     tok->svoxpa_parser = picokfst_getFST(this->voice->kbArray[PICOKNOW_KBID_FST_SVOXPA_PARSE]);
   1456     PICODBG_TRACE(("got svoxpa_parser @ %i",tok->svoxpa_parser));
   1457 
   1458     tok->xsampa2svoxpa_mapper = picokfst_getFST(this->voice->kbArray[PICOKNOW_KBID_FST_XSAMPA2SVOXPA]);
   1459     PICODBG_TRACE(("got xsampa2svoxpa_mapper @ %i",tok->xsampa2svoxpa_mapper));
   1460 
   1461 
   1462 
   1463     return PICO_OK;
   1464 }
   1465 
   1466 static pico_status_t tokInitialize(register picodata_ProcessingUnit this, picoos_int32 resetMode)
   1467 {
   1468 /*
   1469 
   1470     tok_subobj_t * tok;
   1471 
   1472     if (NULL == this || NULL == this->subObj) {
   1473         return PICO_ERR_OTHER;
   1474     }
   1475     tok = (tok_subobj_t *) this->subObj;
   1476 */
   1477     return tokReset(this, resetMode);
   1478 }
   1479 
   1480 
   1481 static pico_status_t tokTerminate(register picodata_ProcessingUnit this)
   1482 {
   1483     return PICO_OK;
   1484 }
   1485 
   1486 static picodata_step_result_t tokStep(register picodata_ProcessingUnit this, picoos_int16 mode, picoos_uint16 * numBytesOutput);
   1487 
   1488 static pico_status_t tokSubObjDeallocate(register picodata_ProcessingUnit this,
   1489         picoos_MemoryManager mm)
   1490 {
   1491 
   1492     if (NULL != this) {
   1493         picoos_deallocate(this->common->mm, (void *) &this->subObj);
   1494     }
   1495     mm = mm;        /* avoid warning "var not used in this function"*/
   1496     return PICO_OK;
   1497 }
   1498 
   1499 picodata_ProcessingUnit picotok_newTokenizeUnit(picoos_MemoryManager mm, picoos_Common common,
   1500         picodata_CharBuffer cbIn, picodata_CharBuffer cbOut,
   1501         picorsrc_Voice voice)
   1502 {
   1503     tok_subobj_t * tok;
   1504     picodata_ProcessingUnit this = picodata_newProcessingUnit(mm, common, cbIn, cbOut, voice);
   1505     if (this == NULL) {
   1506         return NULL;
   1507     }
   1508     this->initialize = tokInitialize;
   1509     PICODBG_DEBUG(("set this->step to tokStep"));
   1510     this->step = tokStep;
   1511     this->terminate = tokTerminate;
   1512     this->subDeallocate = tokSubObjDeallocate;
   1513     this->subObj = picoos_allocate(mm, sizeof(tok_subobj_t));
   1514     if (this->subObj == NULL) {
   1515         picoos_deallocate(mm, (void *)&this);
   1516         return NULL;
   1517     }
   1518     tok = (tok_subobj_t *) this->subObj;
   1519     tok->transducer = picotrns_newSimpleTransducer(mm, common, 10*(PICOTRNS_MAX_NUM_POSSYM+2));
   1520     if (NULL == tok->transducer) {
   1521         tokSubObjDeallocate(this,mm);
   1522         picoos_deallocate(mm, (void *)&this);
   1523         return NULL;
   1524     }
   1525     tokInitialize(this, PICO_RESET_FULL);
   1526     return this;
   1527 }
   1528 
   1529 /**
   1530  * fill up internal buffer, try to locate token, write token to output
   1531  */
   1532 picodata_step_result_t tokStep(register picodata_ProcessingUnit this,
   1533         picoos_int16 mode, picoos_uint16 * numBytesOutput)
   1534 {
   1535     register tok_subobj_t * tok;
   1536 
   1537     if (NULL == this || NULL == this->subObj) {
   1538         return PICODATA_PU_ERROR;
   1539     }
   1540     tok = (tok_subobj_t *) this->subObj;
   1541 
   1542     mode = mode;        /* avoid warning "var not used in this function"*/
   1543 
   1544     *numBytesOutput = 0;
   1545     while (1) { /* exit via return */
   1546         picoos_int16 ch;
   1547 
   1548         if ((tok->outWritePos - tok->outReadPos) > 0) {
   1549             if (picodata_cbPutItem(this->cbOut, &tok->outBuf[tok->outReadPos], tok->outWritePos - tok->outReadPos, numBytesOutput) == PICO_OK) {
   1550                 PICODATA_INFO_ITEM(this->voice->kbArray[PICOKNOW_KBID_DBG],
   1551                     (picoos_uint8 *)"tok:", &tok->outBuf[tok->outReadPos], tok->outWritePos - tok->outReadPos);
   1552                 tok->outReadPos += *numBytesOutput;
   1553                 if (tok->outWritePos == tok->outReadPos) {
   1554                     tok->outWritePos = 0;
   1555                     tok->outReadPos = 0;
   1556                 }
   1557             }
   1558             else {
   1559                 return PICODATA_PU_OUT_FULL;
   1560             }
   1561 
   1562         }
   1563         else if (PICO_EOF != (ch = picodata_cbGetCh(this->cbIn))) {
   1564             PICODBG_DEBUG(("read in %c", (picoos_char) ch));
   1565             tok_treatChar(this, tok, (picoos_uchar) ch, /*markupHandling*/TRUE);
   1566         }
   1567         else {
   1568             return PICODATA_PU_IDLE;
   1569         }
   1570     }
   1571 }
   1572 
   1573 #ifdef __cplusplus
   1574 }
   1575 #endif
   1576 
   1577 /* end */
   1578