Home | History | Annotate | Download | only in lib

Lines Matching refs:tok

211  *  shortcut  : tok
270 static void tok_treatMarkupAsSimpleToken (picodata_ProcessingUnit this, tok_subobj_t * tok);
271 static void tok_treatChar (picodata_ProcessingUnit this, tok_subobj_t * tok, picoos_uchar ch, picoos_bool markupHandling);
272 static void tok_treatMarkup (picodata_ProcessingUnit this, tok_subobj_t * tok);
273 static void tok_putToMarkup (picodata_ProcessingUnit this, tok_subobj_t * tok, picoos_uchar str[]);
274 static void tok_treatSimpleToken (picodata_ProcessingUnit this, tok_subobj_t * tok);
318 static void tok_startIgnore (tok_subobj_t * tok)
320 tok->ignLevel++;
324 static void tok_endIgnore (tok_subobj_t * tok)
326 if (tok->ignLevel > 0) {
327 tok->ignLevel--;
434 static picoos_int32 tok_putToUtf (tok_subobj_t * tok, picoos_uchar ch)
436 if (tok->utfpos < PICOBASE_UTF8_MAXLEN) {
437 tok->utf[tok->utfpos] = ch;
438 if (tok->utfpos == 0) {
439 tok->utflen = picobase_det_utf8_length(ch);
441 tok->utflen = 0;
443 (tok->utfpos)++;
444 if ((tok->utfpos == tok->utflen)) {
445 if ((tok->utfpos < PICOBASE_UTF8_MAXLEN)) {
446 tok->utf[tok->utfpos] = 0;
449 } else if (tok->utfpos < tok->utflen) {
482 static void tok_putItem (picodata_ProcessingUnit this, tok_subobj_t * tok,
490 tok->outBuf[tok->outWritePos++] = itemType;
491 tok->outBuf[tok->outWritePos++] = info1;
492 tok->outBuf[tok->outWritePos++] = info2;
493 tok->outBuf[tok->outWritePos++] = 0;
495 else if (tok->ignLevel <= 0) {
508 if (tok->outWritePos + 4 + len < OUT_BUF_SIZE) {
509 tok->outBuf[tok->outWritePos++] = itemType;
510 tok->outBuf[tok->outWritePos++] = info1;
511 tok->outBuf[tok->outWritePos++] = info2;
512 tok->outBuf[tok->outWritePos++] = len;
514 tok->outBuf[tok->outWritePos++] = str[i];
523 if (tok->outWritePos + 4 < OUT_BUF_SIZE) {
524 tok->outBuf[tok->outWritePos++] = itemType;
525 tok->outBuf[tok->outWritePos++] = info1;
526 tok->outBuf[tok->outWritePos++] = info2;
527 tok->outBuf[tok->outWritePos++] = 0;
539 if (tok->outWritePos + 4 + 2 < OUT_BUF_SIZE) {
540 tok->outBuf[tok->outWritePos++] = itemType;
541 tok->outBuf[tok->outWritePos++] = info1;
542 tok->outBuf[tok->outWritePos++] = info2;
543 tok->outBuf[tok->outWritePos++] = 2;
544 tok->outBuf[tok->outWritePos++] = val % 256;
545 tok->outBuf[tok->outWritePos++] = val / 256;
557 if (tok->outWritePos + 4 + len < OUT_BUF_SIZE) {
558 tok->outBuf[tok->outWritePos++] = itemType;
559 tok->outBuf[tok->outWritePos++] = info1;
560 tok->outBuf[tok->outWritePos++] = info2;
561 tok->outBuf[tok->outWritePos++] = len;
563 tok->outBuf[tok->outWritePos++] = str[i];
577 static void tok_putItem2 (picodata_ProcessingUnit this, tok_subobj_t * tok,
586 tok->outBuf[tok->outWritePos++] = type;
587 tok->outBuf[tok->outWritePos++] = info1;
588 tok->outBuf[tok->outWritePos++] = info2;
589 tok->outBuf[tok->outWritePos++] = len;
591 tok->outBuf[tok->outWritePos++] = data[i];
673 static void tok_interpretMarkup (picodata_ProcessingUnit this, tok_subobj_t * tok, picoos_bool isStartTag, MarkupId mId)
694 if (isStartTag && tok_strEqual(tok->markupParams[0].paramId,(picoos_uchar*)"")) {
695 tok_startIgnore(tok);
697 } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) {
698 tok_endIgnore(tok);
703 if (isStartTag && tok_strEqual(tok->markupParams[0].paramId, KWLevel)) {
704 if (tok_isRelative(tok->markupParams[0].paramVal, & uval)) {
706 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_SPEED, PICODATA_ITEMINFO2_CMD_RELATIVE, uval, (picoos_uchar*)"");
708 uval = picoos_atoi((picoos_char*)tok->markupParams[0].paramVal);
710 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_SPEED, PICODATA_ITEMINFO2_CMD_ABSOLUTE, uval, (picoos_uchar*)"");
713 } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) {
714 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_SPEED, PICODATA_ITEMINFO2_CMD_ABSOLUTE, PICO_SPEED_DEFAULT, (picoos_uchar*)"");
719 if (isStartTag && tok_strEqual(tok->markupParams[0].paramId, KWLevel)) {
720 if (tok_isRelative(tok->markupParams[0].paramVal, & uval)) {
722 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PITCH, PICODATA_ITEMINFO2_CMD_RELATIVE, uval, (picoos_uchar*)"");
724 uval = picoos_atoi((picoos_char*)tok->markupParams[0].paramVal);
726 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PITCH,PICODATA_ITEMINFO2_CMD_ABSOLUTE, uval, (picoos_uchar*)"");
729 } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) {
730 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PITCH,PICODATA_ITEMINFO2_CMD_ABSOLUTE, PICO_PITCH_DEFAULT, (picoos_uchar*)"");
735 if (isStartTag && tok_strEqual(tok->markupParams[0].paramId, KWLevel)) {
736 if (tok_isRelative(tok->markupParams[0].paramVal, & uval)) {
738 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_VOLUME, PICODATA_ITEMINFO2_CMD_RELATIVE, uval, (picoos_uchar*)"");
740 uval = picoos_atoi((picoos_char*)tok->markupParams[0].paramVal);
742 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_VOLUME, PICODATA_ITEMINFO2_CMD_ABSOLUTE, uval, (picoos_uchar*)"");
745 } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) {
746 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_VOLUME, PICODATA_ITEMINFO2_CMD_ABSOLUTE, PICO_VOLUME_DEFAULT, (picoos_uchar*)"");
751 if (isStartTag && tok_strEqual(tok->markupParams[0].paramId, KWLevel)) {
752 if (tok_isRelative(tok->markupParams[0].paramVal, & uval)) {
754 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_SPEAKER, PICODATA_ITEMINFO2_CMD_RELATIVE, uval, (picoos_uchar*)"");
756 uval = picoos_atoi((picoos_char*)tok->markupParams[0].paramVal);
758 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_SPEAKER, PICODATA_ITEMINFO2_CMD_ABSOLUTE, uval, (picoos_uchar*)"");
761 } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) {
762 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_SPEAKER, PICODATA_ITEMINFO2_CMD_ABSOLUTE, PICO_SPEAKER_DEFAULT, (picoos_uchar*)"");
768 if (isStartTag && tok_strEqual(tok->markupParams[0].paramId, KWName)) {
769 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_VOICE, PICODATA_ITEMINFO2_NA, 0, tok->markupParams[0].paramVal);
770 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_FLUSH, PICODATA_ITEMINFO2_NA, 0, (picoos_uchar*)"");
771 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PROSDOMAIN, 0, 0, (picoos_uchar*)"");
773 } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId,(picoos_uchar*)"")) {
774 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_VOICE, PICODATA_ITEMINFO2_NA, 0, (picoos_uchar*)"");
775 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_FLUSH, PICODATA_ITEMINFO2_NA, 0, (picoos_uchar*)"");
776 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PROSDOMAIN, 0, 0, (picoos_uchar*)"");
781 if (isStartTag && tok_strEqual(tok->markupParams[0].paramId, KWName)) {
782 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_CONTEXT, PICODATA_ITEMINFO2_NA, 0, tok->markupParams[0].paramVal);
784 } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId,(picoos_uchar*)"")) {
785 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_CONTEXT, PICODATA_ITEMINFO2_NA, 0, PICO_CONTEXT_DEFAULT);
790 if (isStartTag && tok_strEqual(tok->markupParams[0].paramId, KWName)) {
791 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_MARKER, PICODATA_ITEMINFO2_NA, 0, tok->markupParams[0].paramVal);
793 } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId,(picoos_uchar*)"")) {
799 tok_getParamStrVal(tok->markupParams, KWProsDomain, (picoos_uchar*)valStr, & paramFound);
800 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_FLUSH, PICODATA_ITEMINFO2_NA, 0, (picoos_uchar*)"");
801 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PROSDOMAIN, 2, 0, valStr);
803 } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) {
804 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_FLUSH, PICODATA_ITEMINFO2_NA, 0, (picoos_uchar*)"");
805 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PROSDOMAIN, 2, 0, (picoos_uchar*)"");
811 tok_getParamStrVal(tok->markupParams, KWProsDomain, (picoos_uchar*)valStr, & paramFound);
812 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_FLUSH, PICODATA_ITEMINFO2_NA, 0, (picoos_uchar*)"");
813 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PROSDOMAIN, 1, 0, valStr);
815 } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) {
816 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_FLUSH, PICODATA_ITEMINFO2_NA, 0, (picoos_uchar*)"");
817 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_SIL, PICODATA_ITEMINFO2_NA, PARAGRAPH_PAUSE_DUR, (picoos_uchar*)"");
818 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PROSDOMAIN, 1, 0, (picoos_uchar*)"");
823 if (isStartTag && tok_strEqual(tok->markupParams[0].paramId, KWTime)) {
824 tok_getDur(tok->markupParams[0].paramVal, & dur, & done1);
827 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_SIL, PICODATA_ITEMINFO2_NA, dur, (picoos_uchar*)"");
830 } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) {
836 if (tok_strEqual(tok->markupParams[0].paramId, KWMode)) {
837 if (tok_strEqual(tok->markupParams[0].paramVal, KWPB)) {
839 } else if (tok_strEqual(tok->markupParams[0].paramVal, KWSB)) {
842 tok_getDur(tok->markupParams[0].paramVal, & uval, & done1);
851 tok_putItem(this, tok
853 } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) {
854 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_SPELL, PICODATA_ITEMINFO2_CMD_END, 0, (picoos_uchar*)"");
859 if (isStartTag && tok_strEqual(tok->markupParams[0].paramId, KWFile)) {
860 if (tok->saveFile[0] != 0) {
861 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_UNSAVE,
862 picodata_getPuTypeFromExtension(tok->saveFile, /*input*/FALSE), 0, tok->saveFile);
863 tok->saveFile[0] = 0;
865 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_SAVE,
866 picodata_getPuTypeFromExtension(tok->markupParams[0].paramVal, /*input*/FALSE), 0, tok->markupParams[0].paramVal);
867 picoos_strcpy((picoos_char*)tok->saveFile, (picoos_char*)tok->markupParams[0].paramVal);
869 } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) {
870 if (tok->saveFile[0] != 0) {
871 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_UNSAVE,
872 picodata_getPuTypeFromExtension(tok->saveFile, /*input*/FALSE), 0, (picoos_uchar*)"");
873 tok->saveFile[0] = 0;
879 if (isStartTag && tok_strEqual(tok->markupParams[0].paramId, KWFile)) {
880 if (picoos_FileExists(this->common, (picoos_char*)tok->markupParams[0].paramVal)) {
881 tok_getParamIntVal(tok->markupParams,KWF0Beg,& ival,& paramFound);
882 tok_getParamIntVal(tok->markupParams,KWF0End,& ival2,& paramFound);
883 tok_getParamStrVal(tok->markupParams,KWAlphabet,valStr3,& paramFound);
884 tok_getParamPhonesStr(tok->markupParams,KWXFadeBeg,valStr3,valStr,VAL_STR_LEN,& paramFound);
885 tok_getParamPhonesStr(tok->markupParams,KWXFadeEnd,valStr3,valStr2,VAL_STR_LEN,& paramFound);
886 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PLAY,
887 picodata_getPuTypeFromExtension(tok->markupParams[0].paramVal, /*input*/TRUE), 0, tok->markupParams[0].paramVal);
888 tok_startIgnore(tok);
890 if (tok->ignLevel > 0) {
891 tok_startIgnore(tok);
893 picoos_emRaiseWarning(this->common->em, PICO_EXC_CANT_OPEN_FILE, (picoos_char*)"", (picoos_char*)"file '%s' not found; synthesizing enclosed text instead\n", tok->markupParams[0].paramVal);
897 } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) {
898 tok_endIgnore(tok);
903 if (isStartTag && tok_strEqual(tok->markupParams[0].paramId, KWFile)) {
904 if (picoos_FileExists(this->common, (picoos_char*)tok->markupParams[0].paramVal)) {
905 tok_getParamIntVal(tok->markupParams,KWF0Beg,& ival,& paramFound);
906 tok_getParamIntVal(tok->markupParams,KWF0End,& ival2,& paramFound);
907 tok_getParamStrVal(tok->markupParams,KWAlphabet,valStr3, & paramFound);
908 tok_getParamPhonesStr(tok->markupParams,KWXFadeBeg,valStr3,valStr,VAL_STR_LEN,& paramFound);
909 tok_getParamPhonesStr(tok->markupParams,KWXFadeEnd,valStr3,valStr2,VAL_STR_LEN,& paramFound);
910 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PLAY,
911 picodata_getPuTypeFromExtension(tok->markupParams[0].paramVal, /*input*/TRUE), 0, tok->markupParams[0].paramVal);
912 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_IGNSIG, PICODATA_ITEMINFO2_CMD_START, 0, (picoos_uchar*)"");
914 if (tok->ignLevel <= 0) {
915 picoos_emRaiseWarning(this->common->em, PICO_EXC_CANT_OPEN_FILE, (picoos_char*)"", (picoos_char*)"file '%s' not found; synthesizing enclosed text instead", tok->markupParams[0].paramVal);
919 } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) {
920 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_IGNSIG, PICODATA_ITEMINFO2_CMD_END, 0, (picoos_uchar*)"");
927 if (tok_strEqual(tok->markupParams[0].paramId, KWAlphabet) && tok_strEqual(tok->markupParams[1].paramId, KWPH)) {
928 if (tok_strEqual(tok->markupParams[2].paramId, KWOrthMode)
929 && tok_strEqual(tok->markupParams[2].paramVal, KWIgnorePunct)) {
932 if (picodata_mapPAStrToPAIds(tok->transducer, this->common, tok->xsampa_parser, tok->svoxpa_parser, tok->xsampa2svoxpa_mapper, tok->markupParams[1].paramVal, tok->markupParams[0].paramVal, tok->phonemes, sizeof(tok->phonemes)-1) == PICO_OK) {
933 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PHONEME,
934 PICODATA_ITEMINFO2_CMD_START, i2, tok->phonemes);
937 PICODBG_WARN(("cannot map phonetic string '%s'; synthesizeing text instead", tok->markupParams[1].paramVal));
938 picoos_emRaiseWarning(this->common->em, PICO_ERR_MARKUP_VALUE_OUT_OF_RANGE,(picoos_char*)"", (picoos_char*)"cannot map phonetic string '%s'; synthesizeing text instead", tok->markupParams[1].paramVal);
941 } else if (tok_strEqual(tok->markupParams[0].paramId, KWPH)) {
942 if (tok_strEqual(tok->markupParams[1].paramId, KWOrthMode)
943 && tok_strEqual(tok->markupParams[1].paramVal, KWIgnorePunct)) {
946 if (picodata_mapPAStrToPAIds(tok->transducer, this->common, tok->xsampa_parser, tok->svoxpa_parser, tok->xsampa2svoxpa_mapper, tok->markupParams[0].paramVal, PICODATA_XSAMPA, tok->phonemes, sizeof(tok->phonemes)) == PICO_OK) {
947 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PHONEME,
948 PICODATA_ITEMINFO2_CMD_START, i2, tok->phonemes);
952 PICODBG_WARN(("cannot map phonetic string '%s'; synthesizeing text instead", tok->markupParams[1].paramVal));
953 picoos_emRaiseWarning(this->common->em, PICO_ERR_MARKUP_VALUE_OUT_OF_RANGE,(picoos_char*)"", (picoos_char*)"cannot map phonetic string '%s'; synthesizing text instead", tok->markupParams[0].paramVal);
957 } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) {
958 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PHONEME,
964 if (isStartTag && tok_strEqual(tok->markupParams[0].paramId, KWType) &&
965 tok_strEqual(tok->markupParams[1].paramId, KWInfo1)&&
966 tok_strEqual(tok->markupParams[2].paramId, KWInfo2)&&
967 tok_strEqual(tok->markupParams[3].paramId, KWDATA)) {
969 type = picoos_atoi(tok->markupParams[0].paramVal);
970 info1 = picoos_atoi(tok->markupParams[1].paramVal);
971 info2 = picoos_atoi(tok->markupParams[2].paramVal);
973 len2 = (picoos_int32)picoos_strlen(tok->markupParams[3].paramVal);
975 while ((tok->markupParams[3].paramVal[n] != 0) && (tok->markupParams[3].paramVal[n] <= 32)) {
978 tok->markupParams[3].paramVal[n2] = tok->markupParams[3].paramVal[n];
986 picoos_get_sep_part_str(tok->markupParams[3].paramVal, picoos_strlen(tok->markupParams[3].paramVal),
996 picoos_get_sep_part_str(tok->markupParams[3].paramVal, picoos_strlen(tok->markupParams[3].paramVal),
1000 tok_putItem2(this, tok, type, info1, info2, len, data);
1006 } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId,(picoos_uchar*)"")) {
1014 tok->markupTagErr = MEInterprete;
1017 tok->markupLevel[mId]++;
1018 } else if ((tok->markupLevel[mId] > 0)) {
1019 tok->markupLevel[mId]--;
1046 static void tok_putToSimpleToken (picodata_ProcessingUnit this, tok_subobj_t * tok, picoos_uchar str[], pico_tokenType type, pico_tokenSubType subtype)
1053 if (tok->tokenPos >= IN_BUF_SIZE) {
1055 tok_treatSimpleToken(this, tok);
1057 tok->tokenStr[tok->tokenPos] = str[i];
1058 tok->tokenPos++;
1061 tok->tokenType = type;
1062 tok->tokenSubType = subtype;
1066 static void tok_putToMarkup (picodata_ProcessingUnit this, tok_subobj_t * tok, picoos_uchar str[])
1071 tok->markupTagErr = MENone;
1074 if (tok->markupPos >= (MARKUP_STRING_BUF_SIZE - 1)) {
1075 if ((tok->markupPos == (MARKUP_STRING_BUF_SIZE - 1)) && (tok_markupTagId(tok->markupTagName) != MIDummyEnd)) {
1078 tok->markupState = MSErrorTooLong;
1079 } else if ((str[i] == (picoos_uchar)' ') && ((tok->markupState == MSExpectingmarkupTagName) || (tok->markupState == MSGotmarkupTagName) || (tok->markupState == MSGotAttrName) || (tok->markupState == MSGotEqual) || (tok->markupState == MSGotAttrValue))) {
1080 } else if ((str[i] == (picoos_uchar)'>') && ((tok->markupState == MSGotmarkupTagName) || (tok->markupState == MSInmarkupTagName) || (tok->markupState == MSGotAttrValue))) {
1081 tok->markupState = MSGotEnd;
1082 } else if ((str[i] == (picoos_uchar)'/') && ((tok->markupState == MSGotmarkupTagName) || (tok->markupState == MSInmarkupTagName) || (tok->markupState == MSGotAttrValue))) {
1083 if (tok->markupTagType == MTEnd) {
1084 tok->markupTagErr = MEUnexpectedChar;
1085 tok->markupState = MSError;
1087 tok->markupTagType = MTEmpty;
1088 tok->markupState = MSGotEndSlash;
1091 switch (tok->markupState) {
1094 tok_clearMarkupParams(tok->markupParams);
1095 tok->nrMarkupParams = 0;
1096 tok->strPos = 0;
1097 tok->markupTagType = MTStart;
1098 tok->markupState = MSGotStart;
1100 tok->markupTagErr = MEMissingStart;
1101 tok->markupState = MSError;
1106 tok->markupTagType = MTEnd;
1107 tok->markupState = MSExpectingmarkupTagName;
1109 tok->markupState = MSExpectingmarkupTagName;
1111 tok->markupTagType = MTStart;
1112 tok->markupTagName[tok->strPos] = str[i];
1113 tok->strPos++;
1114 tok->markupTagName[tok->strPos] = 0;
1115 tok->markupState = MSInmarkupTagName;
1117 tok->markupTagErr = MEUnexpectedChar;
1118 tok->markupState = MSError;
1122 if (tok_idChar(str[i],tok->markupState == MSExpectingmarkupTagName)) {
1123 tok->markupTagName[tok->strPos] = str[i];
1124 tok->strPos++;
1125 tok->markupTagName[(tok->strPos)] = 0;
1126 tok->markupState = MSInmarkupTagName;
1127 } else if ((tok->markupState == MSInmarkupTagName) && (str[i] == (picoos_uchar)' ')) {
1128 tok->markupState = MSGotmarkupTagName;
1129 picobase_lowercase_utf8_str(tok->markupTagName, (picoos_char*)tok->markupTagName, IN_BUF_SIZE, &ok);
1130 tok->strPos = 0;
1132 tok->markupTagErr = MEIdent;
1133 tok->markupState = MSError;
1138 if (tok->markupTagType == MTEnd) {
1139 tok->markupTagErr = MEUnexpectedChar;
1140 tok->markupState = MSError;
1142 if (tok->nrMarkupParams < MAX_NR_MARKUP_PARAMS) {
1143 tok->markupParams[toktok->strPos] = str[i];
1144 tok->strPos++;
1145 tok->markupParams[tok->nrMarkupParams].paramId[tok->strPos] = 0;
1149 tok->markupState = MSInAttrName;
1152 tok->markupTagErr = MEUnexpectedChar;
1153 tok->markupState = MSError;
1158 if (tok->nrMarkupParams < MAX_NR_MARKUP_PARAMS) {
1159 tok->markupParams[tok->nrMarkupParams].paramId[tok->strPos] = str[i];
1160 tok->strPos++;
1161 tok->markupParams[tok->nrMarkupParams].paramId[tok->strPos] = 0;
1163 tok->markupState = MSInAttrName;
1165 picobase_lowercase_utf8_str(tok->markupParams[tok->nrMarkupParams].paramId, (picoos_char*)tok->markupParams[tok->nrMarkupParams].paramId, IN_BUF_SIZE, &ok);
1166 tok_setIsFileAttr(tok->markupParams[tok->nrMarkupParams].paramId, & tok->isFileAttr);
1167 tok->markupState = MSGotAttrName;
1169 picobase_lowercase_utf8_str(tok->markupParams[tok->nrMarkupParams].paramId, (picoos_char*)tok->markupParams[tok->nrMarkupParams].paramId, IN_BUF_SIZE, &ok);
1170 tok_setIsFileAttr(tok->markupParams[tok->nrMarkupParams].paramId, & tok->isFileAttr);
1171 tok->markupState = MSGotEqual;
1173 tok->markupTagErr = MEMissingEqual;
1174 tok->markupState = MSError;
1179 tok->markupState = MSGotEqual;
1181 tok->markupTagErr = MEMissingEqual;
1182 tok->markupState = MSError;
1187 tok->strDelim = str[i];
1188 tok->strPos = 0;
1189 tok->markupState = MSInAttrValue;
1191 tok->markupTagErr = MEMissingQuote;
1192 tok->markupState = MSError;
1196 if (!(tok->isFileAttr) && (str[i] == (picoos_uchar)'\\')) {
1197 tok->markupState = MSInAttrValueEscaped;
1198 } else if (str[i] == tok->strDelim) {
1199 if (tok->nrMarkupParams < MAX_NR_MARKUP_PARAMS) {
1200 tok->nrMarkupParams++;
1202 tok->strPos = 0;
1203 tok->markupState = MSGotAttrValue;
1205 if (tok->nrMarkupParams < MAX_NR_MARKUP_PARAMS) {
1206 tok->markupParams[tok->nrMarkupParams].paramVal[tok->strPos] = str[i];
1207 tok->strPos++;
1208 tok->markupParams[tok->nrMarkupParams].paramVal[tok->strPos] = 0;
1210 tok->markupState = MSInAttrValue;
1214 if (tok->nrMarkupParams < MAX_NR_MARKUP_PARAMS) {
1215 tok->markupParams[tok->nrMarkupParams].paramVal[tok->strPos] = str[i];
1216 tok->strPos++;
1217 tok->markupParams[tok->nrMarkupParams].paramVal[tok->strPos] = 0;
1219 tok->markupState = MSInAttrValue;
1223 tok->markupState = MSGotEnd;
1225 tok->markupTagErr = MEUnexpectedChar;
1226 tok->markupState = MSError;
1230 tok->markupTagErr = MEUnexpectedChar;
1231 tok->markupState = MSError;
1235 if (tok->markupTagErr == MENone) {
1236 tok->markupStr[tok->markupPos] = str[i];
1237 tok->markupPos++;
1239 tok->markupStr[tok->markupPos] = 0;
1242 PICODBG_DEBUG(("putToMarkup %s", tok->markupStr));
1248 static void tok_treatMarkupAsSimpleToken (picodata_ProcessingUnit this, tok_subobj_t * tok)
1252 tok->utfpos = 0;
1253 tok->utflen = 0;
1254 tok->markupState = MSNotInMarkup;
1255 for (i = 0; i < tok->markupPos; i++) {
1256 tok_treatChar(this, tok, tok->markupStr[i], FALSE);
1258 tok->markupPos = 0;
1259 tok->strPos = 0;
1263 static void tok_treatMarkup (picodata_ProcessingUnit this, tok_subobj_t * tok)
1267 if (tok_markupTagId(tok->markupTagName) != MIDummyEnd) {
1268 if (tok->markupTagErr == MENone) {
1269 tok->markupState = MSNotInMarkup;
1270 if ((tok->tokenType != PICODATA_ITEMINFO1_TOKTYPE_SPACE) && (tok->tokenType != PICODATA_ITEMINFO1_TOKTYPE_UNDEFINED)) {
1271 tok_treatSimpleToken(this, tok);
1273 tok_putToSimpleToken(this, tok, (picoos_uchar*)" ", PICODATA_ITEMINFO1_TOKTYPE_SPACE, -1);
1274 mId = tok_markupTagId(tok->markupTagName);
1275 if ((tok->markupTagType == MTStart) || (tok->markupTagType == MTEmpty)) {
1276 tok_interpretMarkup(this, tok, TRUE, mId);
1278 if (((tok->markupTagType == MTEnd) || (tok->markupTagType == MTEmpty))) {
1279 tok_clearMarkupParams(tok->markupParams);
1280 tok->nrMarkupParams = 0;
1281 tok_interpretMarkup(this, tok, FALSE,mId);
1284 if (tok->markupTagErr != MENone) {
1285 if (!tok->aborted) {
1286 picoos_emRaiseWarning(this->common->em, PICO_ERR_INVALID_MARKUP_TAG, (picoos_char*)"", (picoos_char*)"syntax error in markup token '%s'",tok->markupStr);
1288 tok_treatMarkupAsSimpleToken(this, tok);
1291 tok_treatMarkupAsSimpleToken(this, tok);
1293 tok->markupState = MSNotInMarkup;
1294 tok->markupPos = 0;
1295 tok->strPos = 0;
1300 static void tok_treatChar (picodata_ProcessingUnit this, tok_subobj_t * tok, picoos_uchar ch, picoos_bool markupHandling)
1311 tok_treatSimpleToken(this, tok);
1312 tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_FLUSH, PICODATA_ITEMINFO2_NA, 0, (picoos_uchar*)"");
1315 switch (tok_putToUtf(tok, ch)) {
1317 tok->utfpos = 0;
1318 tok->utflen = 0;
1323 markupHandling = (markupHandling && (tok->markupHandlingMode == MARKUP_HANDLING_ENABLED));
1324 id = picoktab_graphOffset(tok->graphTab, tok->utf);
1326 if (picoktab_getIntPropTokenType(tok->graphTab, id, &uval8)) {
1332 dummy = picoktab_getIntPropTokenSubType(tok->graphTab, id, &subtype);
1333 } else if (tok->utf[tok->utfpos-1] <= (picoos_uchar)' ') {
1340 if ((tok->utf[tok->utfpos-1] > (picoos_uchar)' ')) {
1341 tok->nrEOL = 0;
1342 } else if ((tok->utf[tok->utfpos-1] == EOL)) {
1343 tok->nrEOL++;
1345 if (markupHandling && (tok->markupState != MSNotInMarkup)) {
1346 tok_putToMarkup(this, tok, tok->utf);
1347 if (tok->markupState >= MSError) {
1348 picoos_strlcpy(utf2, tok->utf, 5);
1349 utf2pos = tok->utfpos;
1352 tok_treatMarkupAsSimpleToken(this, tok);
1354 tok_treatChar(this, tok, utf2[i], markupHandling);
1356 } else if (tok->markupState == MSGotEnd) {
1357 tok_treatMarkup(this, tok);
1359 } else if ((markupHandling && (tok->utf[tok->utfpos-1] == (picoos_uchar)'<'))) {
1360 tok_putToMarkup(this, tok, tok->utf);
1362 if ((type != tok->tokenType) || (type == PICODATA_ITEMINFO1_TOKTYPE_CHAR) || (subtype != tok->tokenSubType)) {
1363 tok_treatSimpleToken(this, tok);
1364 } else if ((tok->utf[tok->utfpos-1] == EOL) && (tok->nrEOL == 2)) {
1365 tok_treatSimpleToken(this, tok);
1366 tok_putToSimpleToken(this, tok, (picoos_uchar*)".", PICODATA_ITEMINFO1_TOKTYPE_CHAR, -1);
1367 tok_treatSimpleToken(this, tok);
1369 tok_putToSimpleToken(this, tok, tok->utf, type, subtype);
1371 tok_treatSimpleToken(this, tok);
1373 tok->utfpos = 0;
1374 tok->utflen = 0;
1381 static void tok_treatSimpleToken (picodata_ProcessingUnit this, tok_subobj_t * tok)
1383 if (tok->tokenPos < IN_BUF_SIZE) {
1384 tok->tokenStr[tok->tokenPos] = 0;
1386 if (tok->markupState != MSNotInMarkup) {
1387 if (!(tok->aborted) && (tok->markupState >= MSGotmarkupTagName) && (tok_markupTagId(tok->markupTagName) != MIDummyEnd)) {
1388 picoos_emRaiseWarning(this->common->em, PICO_ERR_INVALID_MARKUP_TAG, (picoos_char*)"", (picoos_char*)"unfinished markup tag '%s'",tok->markupStr);
1390 tok_treatMarkupAsSimpleToken(this, tok);
1391 tok_treatSimpleToken(this, tok);
1392 } else if ((tok->tokenPos > 0) && ((tok->ignLevel <= 0) || (tok->tokenType == PICODATA_ITEMINFO1_TOKTYPE_SPACE))) {
1393 tok_putItem(this, tok, PICODATA_ITEM_TOKEN, tok->tokenType, (picoos_uint8)tok->tokenSubType, 0, tok->tokenStr);
1395 tok->tokenPos = 0;
1396 tok->tokenType = PICODATA_ITEMINFO1_TOKTYPE_UNDEFINED;
1397 tok->tokenSubType = -1;
1404 tok_subobj_t * tok;
1410 tok = (tok_subobj_t *) this->subObj;
1412 tok->ignLevel = 0;
1414 tok->utfpos = 0;
1415 tok->utflen = 0;
1417 tok_clearMarkupParams(tok->markupParams);
1418 tok->nrMarkupParams = 0;
1419 tok->markupState = MSNotInMarkup;
1420 tok->markupPos = 0;
1422 tok->markupLevel[mId] = 0;
1424 tok->markupTagName[0] = 0;
1425 tok->markupTagType = MTNone;
1426 tok->markupTagErr = MENone;
1428 tok->strPos = 0;
1429 tok->strDelim = 0;
1430 tok->isFileAttr = FALSE;
1432 tok->tokenType = PICODATA_ITEMINFO1_TOKTYPE_UNDEFINED;
1433 tok->tokenSubType = -1;
1434 tok->tokenPos = 0;
1436 tok->nrEOL = 0;
1439 tok->markupHandlingMode = TRUE;
1440 tok->aborted = FALSE;
1442 tok->start = TRUE;
1444 tok->outReadPos = 0;
1445 tok->outWritePos = 0;
1447 tok->saveFile[0] = 0;
1450 tok->graphTab = picoktab_getGraphs(this->voice->kbArray[PICOKNOW_KBID_TAB_GRAPHS]);
1452 tok->xsampa_parser = picokfst_getFST(this->voice->kbArray[PICOKNOW_KBID_FST_XSAMPA_PARSE]);
1453 PICODBG_TRACE(("got xsampa_parser @ %i",tok->xsampa_parser));
1455 tok->svoxpa_parser = picokfst_getFST(this->voice->kbArray[PICOKNOW_KBID_FST_SVOXPA_PARSE]);
1456 PICODBG_TRACE(("got svoxpa_parser @ %i",tok->svoxpa_parser));
1458 tok->xsampa2svoxpa_mapper = picokfst_getFST(this->voice->kbArray[PICOKNOW_KBID_FST_XSAMPA2SVOXPA]);
1459 PICODBG_TRACE(("got xsampa2svoxpa_mapper @ %i",tok->xsampa2svoxpa_mapper));
1470 tok_subobj_t * tok;
1475 tok = (tok_subobj_t *) this->subObj;
1503 tok_subobj_t * tok;
1518 tok = (tok_subobj_t *) this->subObj;
1519 tok->transducer = picotrns_newSimpleTransducer(mm, common, 10*(PICOTRNS_MAX_NUM_POSSYM+2));
1520 if (NULL == tok->transducer) {
1535 register tok_subobj_t * tok;
1540 tok = (tok_subobj_t *) this->subObj;
1548 if ((tok->outWritePos - tok->outReadPos) > 0) {
1549 if (picodata_cbPutItem(this->cbOut, &tok->outBuf[tok->outReadPos], tok->outWritePos - tok->outReadPos, numBytesOutput) == PICO_OK) {
1551 (picoos_uint8 *)"tok:", &tok->outBuf[tok->outReadPos], tok->outWritePos - tok->outReadPos);
1552 tok->outReadPos += *numBytesOutput;
1553 if (tok->outWritePos == tok
1554 tok->outWritePos = 0;
1555 tok->outReadPos = 0;
1565 tok_treatChar(this, tok, (picoos_uchar) ch, /*markupHandling*/TRUE);