Home | History | Annotate | Download | only in lib
      1 /*
      2  * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *     http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 /**
     17  * @file picowa.h
     18  *
     19  * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
     20  * All rights reserved.
     21  *
     22  * History:
     23  * - 2009-04-20 -- initial version
     24  *
     25  */
     26 
     27 
     28 /**
     29  * @addtogroup picowa
     30  * ---------------------------------------------------\n
     31  * <b> Pico Word Analysis </b>\n
     32  * ---------------------------------------------------\n
     33 itemtype, iteminfo1, iteminfo2, content -> TYPE(INFO1,INFO2)content
     34 in the following
     35 
     36 items input\n
     37 ===========
     38 
     39 processed by wa:
     40 - WORDGRAPH(NA,NA)graph
     41 - OTHER(NA,NA)string
     42 
     43 unprocessed:
     44 - all other item types are forwarded through the PU without modification:
     45   - PUNC
     46   - CMD
     47 
     48 
     49 minimal input size (before processing starts)\n
     50 ==================
     51 
     52 processing (ie. lex lookup and POS prediction) is possible with
     53 - one item
     54 
     55 
     56 items processed and output\n
     57 ==========================
     58 
     59 processing an input WORDGRAPH results in one of the following items:
     60 - WORDGRAPH(POSes,NA)graph
     61    - graph not in lex, POSes determined with dtree, or
     62    - graph in lex - single entry without phone (:G2P), POSes from lex
     63 - WORDINDEX(POSes,NA)pos1|ind1...posN|indN
     64    - graph in lex - {1,4} entries with phone, pos1...posN from lex,
     65      {1,4} lexentries indices in content, POSes combined with map table
     66      in klex
     67 
     68 processing an input OTHER results in the item being skipped (in the
     69 future this can be extended to e.g. spelling)
     70 
     71 see picotok.h for PUNC and CMD
     72 
     73 - POSes %d
     74   - is the superset of all single POS and POS combinations defined
     75   in the lingware as unique symbol
     76 - graph, len>0, utf8 graphemes, %s
     77 - pos1|ind1, pos2|ind2, ..., posN|indN
     78   - pos? are the single, unambiguous POS only, one byte %d
     79   - ind? are the lexentry indices, three bytes %d %d %d
     80 
     81 
     82 lexicon (system lexicon, but must also be ensured for user lexica)\n
     83 =======
     84 
     85 - POS GRAPH PHON, all mandatory, but
     86   - * PHON can be an empty string -> no pronunciation in the resulting TTS output
     87   - * PHON can be :G2P -> use G2P later to add pronunciation
     88 - (POS,GRAPH) is a uniq key (only one entry allowed)
     89 - (GRAPH) is almost a uniq key (2-4 entries with the same GRAPH, and
     90   differing POS and differing PHON possible)
     91   - for one graph we can have 2-4 solutions from the lex which all
     92      need to be passed on the the next PU
     93   - in this case GRAPH, POS, and PHON all must be available in lex
     94   - in this case for each entry only a non-ambiguous, unique POS ID
     95      is possible)
     96 
     97 other limitations\n
     98 =================
     99 
    100 - item size: header plus len=256 (valid for Pico in general)
    101 - wa uses one item context only -> internal buffer set to 256+4
    102  */
    103 
    104 
    105 #ifndef PICOWA_H_
    106 #define PICOWA_H_
    107 
    108 #include "picoos.h"
    109 #include "picodata.h"
    110 #include "picorsrc.h"
    111 
    112 #ifdef __cplusplus
    113 extern "C" {
    114 #endif
    115 #if 0
    116 }
    117 #endif
    118 
    119 
    120 /* maximum length of an item incl. head for input and output buffers */
    121 #define PICOWA_MAXITEMSIZE 260
    122 
    123 
    124 picodata_ProcessingUnit picowa_newWordAnaUnit(
    125         picoos_MemoryManager mm,
    126     picoos_Common common,
    127         picodata_CharBuffer cbIn,
    128         picodata_CharBuffer cbOut,
    129         picorsrc_Voice voice);
    130 
    131 #ifdef __cplusplus
    132 }
    133 #endif
    134 
    135 #endif /*PICOWA_H_*/
    136