Home | History | Annotate | Download | only in lib
      1 /*
      2  * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *     http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 /**
     17  * @file picokfst.h
     18  *
     19  * FST knowledge loading and access
     20  *
     21  * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
     22  * All rights reserved.
     23  *
     24  * History:
     25  * - 2009-04-20 -- initial version
     26  *
     27  */
     28 #ifndef PICOKFST_H_
     29 #define PICOKFST_H_
     30 
     31 #include "picodefs.h"
     32 #include "picodbg.h"
     33 #include "picoos.h"
     34 #include "picoknow.h"
     35 
     36 #ifdef __cplusplus
     37 extern "C" {
     38 #endif
     39 #if 0
     40 }
     41 #endif
     42 
     43 typedef picoos_int16 picokfst_symid_t; /* type of symbol identifiers */
     44 typedef picoos_int16 picokfst_class_t; /* type of symbol pair classes */
     45 typedef picoos_int16 picokfst_state_t; /* type of states */
     46 
     47 #define PICOKFST_SYMID_EPS    (picokfst_symid_t)   0   /* epsilon symbol id */
     48 #define PICOKFST_SYMID_ILLEG  (picokfst_symid_t)  -1   /* illegal symbol id */
     49 
     50 /**
     51  * @addtogroup picokfst
     52  *
     53  * Mapping of values to FST symbol id (relevant for compiling the FST) \n
     54  * Value                   FST symbol id                    \n
     55  * --------------------------------------                    \n
     56  * phoneme_id      ->      phoneme_id     +  256 *  PICOKFST_PLANE_PHONEMES    \n
     57  * accentlevel_id  ->      accentlevel_id +  256 *  PICOKFST_PLANE_ACCENTS    \n
     58  * POS_id          ->      POS_id         +  256 *  PICOKFST_PLANE_POS        \n
     59  * pb_strength_id  ->      pb_strength_id +  256 *  PICOKFST_PLANE_PB_STRENGTHS    \n
     60  * phon_term_id    ->      phon_term_id   +  256 *  PICOKFST_PLANE_INTERN    \n
     61 */
     62 enum picokfst_symbol_plane {
     63     PICOKFST_PLANE_PHONEMES = 0,       /* phoneme plane */
     64     PICOKFST_PLANE_ASCII = 1,          /* "ascii" plane (values > 127 may be used internally) */
     65     PICOKFST_PLANE_XSAMPA = 2,         /* x-sampa primitives plane (pico-specific table) */
     66     PICOKFST_PLANE_ACCENTS = 4,        /* accent plane */
     67     PICOKFST_PLANE_POS = 5,            /* part of speech plane */
     68     PICOKFST_PLANE_PB_STRENGTHS = 6,   /* phrase boundary strength plane */
     69     PICOKFST_PLANE_INTERN = 7          /* internal plane, e.g. phonStartId, phonTermId */
     70 };
     71 
     72 /* to be used as bit set, e.g.
     73  * picoos_uint8 transductionMode = PICOKFST_TRANSMODE_NEWSYMS | PICOKFST_TRANSMODE_POSUSED;
     74  */
     75 enum picofst_transduction_mode {
     76     PICOKFST_TRANSMODE_NEWSYMS = 1, /* e.g. {#WB},{#PB-S},{#PB-W},{#ACC0},{#ACC1},{#ACC2},{#ACC3}, */
     77     PICOKFST_TRANSMODE_POSUSED = 2 /* FST contains Part Of Speech symbols */
     78 
     79 };
     80 
     81 
     82 /* ************************************************************/
     83 /* function to create specialized kb, */
     84 /* to be used by knowledge layer (picorsrc) only */
     85 /* ************************************************************/
     86 
     87 /* calculates a small number of data (e.g. addresses) from kb for fast access.
     88  * This data is encapsulated in a picokfst_FST that can later be retrieved
     89  * with picokfst_getFST. */
     90 pico_status_t picokfst_specializeFSTKnowledgeBase(picoknow_KnowledgeBase this,
     91                                                   picoos_Common common);
     92 
     93 
     94 /* ************************************************************/
     95 /* FST type and getFST function */
     96 /* ************************************************************/
     97 
     98 /* FST type */
     99 typedef struct picokfst_fst * picokfst_FST;
    100 
    101 /* return kb FST for usage in PU */
    102 picokfst_FST picokfst_getFST(picoknow_KnowledgeBase this);
    103 
    104 
    105 /* ************************************************************/
    106 /* FST access methods */
    107 /* ************************************************************/
    108 
    109 /* returns transduction mode specified with rule sources;
    110    result to be interpreted as set of picofst_transduction_mode */
    111 picoos_uint8 picokfst_kfstGetTransductionMode(picokfst_FST this);
    112 
    113 /* returns number of states and number of pair classes in FST;
    114    legal states are 1..nrStates, legal classes are 1..nrClasses */
    115 void picokfst_kfstGetFSTSizes (picokfst_FST this, picoos_int32 *nrStates, picoos_int32 *nrClasses);
    116 
    117 /* starts search for all pairs with input symbol 'inSym'; '*inSymFound' returns whether
    118    such pairs exist at all; '*searchState' returns a search state to be used in
    119    subsequent calls to function 'picokfst_kfstGetNextPair', which must be used
    120    to get the symbol pairs */
    121 void picokfst_kfstStartPairSearch (picokfst_FST this, picokfst_symid_t inSym,
    122                                           picoos_bool * inSymFound, picoos_int32 * searchState);
    123 
    124 /* gets next pair for input symbol specified with preceding call to 'picokfst_kfstStartPairSearch';
    125    '*searchState' maintains the search state, 'pairFound' returns whether any more pair was found,
    126    '*outSym' returns the output symbol of the found pair, and '*pairClass' returns the
    127    transition class of the found symbol pair */
    128 void picokfst_kfstGetNextPair (picokfst_FST this, picoos_int32 * searchState,
    129                                       picoos_bool * pairFound,
    130                                       picokfst_symid_t * outSym, picokfst_class_t * pairClass);
    131 
    132 /* attempts to do FST transition from state 'startState' with pair class 'transClass';
    133    if such a transition exists, 'endState' returns the end state of the transition (> 0),
    134    otherwise 'endState' returns <= 0 */
    135 void picokfst_kfstGetTrans (picokfst_FST this, picokfst_state_t startState, picokfst_class_t transClass,
    136                                    picokfst_state_t * endState);
    137 
    138 /* starts search for all pairs with input epsilon symbol and all correponding
    139    FST transitions starting in state 'startState'; to be used for fast
    140    computation of epsilon closures;
    141    '*inEpsTransFound' returns whether any such transition was found at all;
    142    if so, '*searchState' returns a search state to be used in subsequent calls
    143    to 'picokfst_kfstGetNextInEpsTrans' */
    144 void picokfst_kfstStartInEpsTransSearch (picokfst_FST this, picokfst_state_t startState,
    145                                                 picoos_bool * inEpsTransFound, picoos_int32 * searchState);
    146 
    147 /* gets next FST transition with a pair with empty input symbol starting from a state
    148    previoulsy specified in 'picokfst_kfstStartInEpsTransSearch';
    149    '*searchState' maintains the search state, '*inEpsTransFound' returns
    150    whether a new transition with input epsilon was found, '*outSym 'returns
    151    the output symbol of the found pair, and '*endState' returns the end state
    152    of the found transition with that pair */
    153 void picokfst_kfstGetNextInEpsTrans (picokfst_FST this, picoos_int32 * searchState,
    154                                             picoos_bool * inEpsTransFound,
    155                                             picokfst_symid_t * outSym, picokfst_state_t * endState);
    156 
    157 /* returns whether 'state' is an accepting state of FST; originally, only
    158    state 1 was an accepting state; however, in order to remove the need to
    159    always do a last transition with a termination symbol pair, this function
    160    defines a state as an accepting state if there is transition to state 1
    161    with the terminator symbol pair */
    162 picoos_bool picokfst_kfstIsAcceptingState (picokfst_FST this, picokfst_state_t state);
    163 
    164 #ifdef __cplusplus
    165 }
    166 #endif
    167 
    168 
    169 #endif /*PICOKFST_H_*/
    170