Home | History | Annotate | Download | only in lib
      1 /*
      2  * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *     http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 /**
     17  * @file picospho.h
     18  *
     19  * sentence phonemic/phonetic FSTs PU
     20  *
     21  * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
     22  * All rights reserved.
     23  *
     24  * History:
     25  * - 2009-04-20 -- initial version
     26  *
     27  */
     28 
     29 /** @addtogroup picospho
     30 itemtype, iteminfo1, iteminfo2, content -> TYPE(INFO1,INFO2)content
     31 in the following
     32 
     33 items input
     34 ===========
     35 
     36 processed:
     37 
     38 - WORDPHON(POS,WACC)phon
     39 
     40 - BOUND(BOUNDstrength,BOUNDtype)
     41 
     42 
     43 unprocessed:
     44 - all other item types are forwared through the PU without modification
     45 
     46 
     47 
     48 - POS
     49   a the single, unambiguous POS
     50 
     51 cf. picodata.h for
     52 - WACC    (sentence-level accent (aka prominence))
     53   PICODATA_ACC0
     54   PICODATA_ACC1
     55   PICODATA_ACC2  (<- maybe mapped to ACC1, ie. no ACC2 in output)
     56   PICODATA_ACC3
     57 
     58 
     59 - BOUNDstrength
     60   PICODATA_ITEMINFO1_BOUND_SBEG (sentence start)
     61   PICODATA_ITEMINFO1_BOUND_SEND (at sentence end)
     62   PICODATA_ITEMINFO1_BOUND_TERM (replaces a flush)
     63   PICODATA_ITEMINFO1_BOUND_PHR0 (no break)
     64   PICODATA_ITEMINFO1_BOUND_PHR1 (primary boundary)
     65   PICODATA_ITEMINFO1_BOUND_PHR2 (short break)
     66   PICODATA_ITEMINFO1_BOUND_PHR3 (secondary phrase boundary, no break)
     67 
     68 - BOUNDtype    (actually phrase type of the following phrase)
     69   PICODATA_ITEMINFO2_BOUNDTYPE_P (non-terminal phrase)
     70   PICODATA_ITEMINFO2_BOUNDTYPE_T (terminal phrase)
     71   PICODATA_ITEMINFO2_BOUNDTYPE_Q (question terminal phrase)
     72   PICODATA_ITEMINFO2_BOUNDTYPE_E (exclamation terminal phrase)
     73 
     74 
     75 output sequence (without CMDs):
     76 
     77 <output> = { BOUND(BOUND_SBEG,PHRASEtype) <sentence> BOUND(BOUND_SEND,..)} BOUND(BOUND_TERM,..)
     78 
     79 <sentence> =   <phrase> { BOUND(BOUND_PHR1|2|3,PHRASEtype) <phrase> }
     80 
     81 <phrase> = WORDPHON(POS,ACC)phon { WORDPHON(POS,ACC)phon }
     82 
     83 
     84 
     85 mapping ACC & word-level stress to syllable accent value
     86 
     87   ACC0 prim -> 0
     88   ACC1 prim -> 1
     89   ACC2 prim -> 2
     90   ACC3 prim -> 3
     91 
     92   ACC0 sec  -> 0
     93   ACC1 sec  -> 4
     94   ACC2 sec  -> 4
     95   ACC3 sec  -> 4
     96 
     97 Mapping of values to FST symbol id (has to identical to the symbol table used when compiling the FST)
     98 
     99 Value                   FST symbol id
    100 phoneme_id      ->      phoneme_id     +  256 *  PICOKFST_PLANE_PHONEMES
    101 POS_id          ->      POS_id         +  256 *  PICOKFST_PLANE_POS
    102 phrasetype_id   ->      phrasetype_id  +  256 *  PICOKFST_PLANE_PHRASETYPES
    103 accentlevel_id  ->      accentlevel_id +  256 *  PICOKFST_PLANE_ACCENTS
    104 
    105 
    106 
    107 
    108 
    109 
    110 
    111 minimal input size (before processing starts)
    112 ==================
    113 
    114 processing (ie. sequencially applying spho transducers to phoneme sequence composed of
    115             - phonemes inside WORDPHON items and
    116             - pseudo-phonemes derived from boundaries and POS) is possible with
    117 
    118 - one phrase, consisting of a sequence of maximal 30 non-PUNC items
    119   terminated by a PUNC item. A PUNC is artificially enforced if
    120   needed to start processing.
    121 
    122 - as long as the internal buffer is empty, non-processed item types
    123   can be processed immediately
    124 
    125 
    126 
    127 items output
    128 ============
    129 - BOUND(BOUNDstrength,BOUNDtype)
    130 
    131   bound strength may be changed by the fsts
    132 
    133   in addition, BOUNDs of BOUNDstrength = PHR0 are inserted to mark word boundaries
    134 
    135 - SYLLPHON(POS,ACC)phon
    136   where POS is only set for the first syllable of a word, otherwise NA
    137 
    138 
    139 
    140 
    141 
    142 
    143 other limitations
    144 =================
    145 
    146 
    147  */
    148 #ifndef PICOSPHO_H_
    149 #define PICOSPHO_H_
    150 
    151 #include "picoos.h"
    152 #include "picodata.h"
    153 #include "picorsrc.h"
    154 
    155 #ifdef __cplusplus
    156 extern "C" {
    157 #endif
    158 #if 0
    159 }
    160 #endif
    161 
    162 
    163 picodata_ProcessingUnit picospho_newSentPhoUnit(
    164         picoos_MemoryManager mm,
    165         picoos_Common common,
    166         picodata_CharBuffer cbIn,
    167         picodata_CharBuffer cbOut,
    168         picorsrc_Voice voice);
    169 
    170 #ifdef __cplusplus
    171 }
    172 #endif
    173 
    174 
    175 #endif /*PICOSPHO_H_*/
    176