Home | History | Annotate | Download | only in include
      1 /*---------------------------------------------------------------------------*
      2  *  utteranc.h  *
      3  *                                                                           *
      4  *  Copyright 2007, 2008 Nuance Communciations, Inc.                               *
      5  *                                                                           *
      6  *  Licensed under the Apache License, Version 2.0 (the 'License');          *
      7  *  you may not use this file except in compliance with the License.         *
      8  *                                                                           *
      9  *  You may obtain a copy of the License at                                  *
     10  *      http://www.apache.org/licenses/LICENSE-2.0                           *
     11  *                                                                           *
     12  *  Unless required by applicable law or agreed to in writing, software      *
     13  *  distributed under the License is distributed on an 'AS IS' BASIS,        *
     14  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
     15  *  See the License for the specific language governing permissions and      *
     16  *  limitations under the License.                                           *
     17  *                                                                           *
     18  *---------------------------------------------------------------------------*/
     19 
     20 
     21 
     22 #ifndef _h_utteranc_
     23 #define _h_utteranc_
     24 
     25 #ifdef SET_RCSID
     26 static const char utteranc_h[] = "$Id: utteranc.h,v 1.3.6.7 2007/08/31 17:44:53 dahan Exp $";
     27 #endif
     28 
     29 
     30 
     31 #include "all_defs.h"
     32 #include "hmm_type.h"
     33 #include "fpi_tgt.h"
     34 #include "voicing.h"
     35 #include "specnorm.h"
     36 #include "channorm.h"
     37 #include "swicms.h"
     38 #ifndef _RTT
     39 #include "duk_io.h"
     40 #endif
     41 
     42 #define DEFAULT_BUFFER_SIZE 100 /* in frames */
     43 #define KEEP_FRAMES   40 /* in frames, past frames kept */
     44 
     45 /*  Functions supported are
     46 **  new, delete (by source)
     47 **  open file/device, close file/device
     48 **  attach and detach sink
     49 **  read/store samples - including the header
     50 */
     51 
     52 /**
     53  * @todo document
     54  */
     55 typedef struct
     56 {                /* label structure */
     57   char *label;
     58   long begin;
     59   long end;
     60   char *extra;
     61   unsigned char flag;
     62 }
     63 annotate;
     64 
     65 
     66 /**
     67  * @todo document
     68  */
     69 typedef struct
     70 {
     71   int   utt_type;
     72   int   dim;
     73   fepFramePkt  *frame;
     74   int   num_chan;
     75   int   do_channorm;
     76   spect_dist_info **spchchan; /*  Mirrored from the Wave object */
     77   norm_info   *channorm; /*  Mirrored from the Wave object */
     78   swicms_norm_info     *swicms;    /* copy of wave obj pointer */
     79   spect_dist_info *backchan[MAX_CHAN_DIM];
     80   featdata  *last_push;
     81   int   voice_duration;
     82   int   quiet_duration;
     83   int   unsure_duration;
     84   int   start_windback;
     85 }
     86 utt_generic_info;
     87 
     88 #ifndef _RTT
     89 /**
     90  * @todo document
     91  */
     92 typedef struct
     93 {
     94   char  typ;  /* s (16 bit), c (8 bit), u (newton .utb) */
     95   int   endian;  /* 0 is little 1 is big */
     96   int   do_skip; /* skip every other frame */
     97   unsigned long len;  /* length of file/utterance */
     98   PFile* file;  /* pointer to file */
     99   char  name[MAX_LABEL]; /* file name */
    100   /*    int   op;  read or write */
    101   int   num_utts; /* no. of utterances in utb file */
    102   annotate  *utb_table; /* utb file header information */
    103 }
    104 utt_file_info;
    105 
    106 /**
    107  * @todo document
    108  */
    109 typedef struct
    110 {
    111   int   utt_type;
    112   int   dim;
    113   fepFramePkt  *frame;
    114   int   num_chan;
    115   int   do_channorm;
    116   spect_dist_info **spchchan; /*  Mirrored from the Wave object */
    117   norm_info   *channorm; /*  Mirrored from the Wave object */
    118   swicms_norm_info    *swicms;          /* copy of wave obj pointer */
    119   spect_dist_info *backchan[MAX_CHAN_DIM];
    120   featdata  *last_push;
    121   int   voice_duration;
    122   int   quiet_duration;
    123   int   unsure_duration;
    124   int   start_windback;
    125   /*    voicing_info voice; */
    126   utt_file_info file;
    127 }
    128 file_utterance_info;
    129 #endif
    130 
    131 /**
    132  * @todo document
    133  */
    134 typedef struct
    135 {
    136   int   utt_type;
    137   int   dim;
    138   fepFramePkt  *frame;
    139   int   num_chan;
    140   int   do_channorm;
    141   spect_dist_info **spchchan; /*  Mirrored from the Wave object */
    142   norm_info   *channorm; /*  Mirrored from the Wave object */
    143   swicms_norm_info    *swicms;        /* copy of wave obj pointer */
    144   spect_dist_info *backchan[MAX_CHAN_DIM];
    145   featdata  *last_push;
    146   int   voice_duration;
    147   int   quiet_duration;
    148   int   unsure_duration;
    149   int   start_windback;
    150 }
    151 live_utterance_info;
    152 
    153 /**
    154  * @todo document
    155  */
    156 typedef union
    157 {
    158   int   utt_type; /* live or from file */
    159   utt_generic_info    gen_utt; /* generic one */
    160 #ifndef _RTT
    161   file_utterance_info file_utt;
    162 #endif
    163   live_utterance_info live_utt;
    164 } utterance_info;
    165 
    166 
    167 /*
    168 **  Size of the utb file headers and details
    169 */
    170 
    171 #ifndef _RTT
    172 #define UTT_VERSION 2
    173 #define UTT_HEADER_SIZE 16        /*Size on disk*/
    174 #define UTB_HEADER_SIZE 32        /*Size on disk*/
    175 #define UTB_HEADER_USED 16        /*Size on disk*/   /* SAL */
    176 
    177 /**
    178  * UTB file header.
    179  */
    180 typedef struct _UttHeader
    181 {
    182 	/**
    183 	 * The size of the header in bytes.
    184 	 */
    185   unsigned short headerSize;
    186 	/**
    187 	 * The version of the file format.
    188 	 */
    189   unsigned short version;
    190 	/**
    191 	 * The size of the payload in bytes.
    192 	 */
    193   unsigned long  nBytes;
    194 	/**
    195 	 * The number of parameters per frame.
    196 	 */
    197   unsigned short nParametersPerFrame;
    198 	/**
    199 	 * 0=unknown, 1=none, 2=amp-based, 3=harmonicity-based, 4=mrec style
    200 	 */
    201   unsigned short channelNormalization;
    202   /**
    203 	 * 0=unknown, 1=no, 2=yes
    204 	 */
    205   unsigned short speakerNormalization;
    206   /**
    207 	 * 0=unknown, 1=no, 2=yes
    208 	 */
    209   unsigned short imeldaization;
    210 	/**
    211 	 * Before imelda truncation.
    212 	 */
    213   unsigned short nOriginalParameters;
    214 	/**
    215 	 * The number of samples per frame.
    216 	 */
    217   unsigned short samplesPerFrame;
    218 	/**
    219 	 * The audio sample rate.
    220 	 */
    221   unsigned long  sampleRate;
    222 	/**
    223 	 * not used in version 5.
    224 	 */
    225   unsigned long  checksum;
    226 }
    227 UttHeader;
    228 
    229 int    update_utb_header(file_utterance_info *utt, int frames, int samplerate,
    230                          int framerate);
    231 void    init_utt_v5_header(UttHeader *uhead, int dim, int samplerate, int framerate);
    232 int init_data_file(char *filename, file_utterance_info *utt, int dimen,
    233                    char typ, int endian, int do_skip);
    234 int new_data_file(char *filename, file_utterance_info *utt, int dimen,
    235                   char typ, int endian);
    236 int set_data_frame(file_utterance_info *utt, long begin);
    237 int buffer_data_frames(file_utterance_info *utt, long f_begin, long f_end);
    238 void more_data_frames(file_utterance_info *utt);
    239 int save_data_frames(file_utterance_info *utt);
    240 void close_data_stream(file_utterance_info *utt);
    241 int init_utb_file(file_utterance_info *utt, annotate **table);
    242 int position_utb_file(file_utterance_info *utt, long position, annotate *table);
    243 int load_utb_data(file_utterance_info *utt, int num_frames, int do_skip);
    244 int load_short_data(file_utterance_info *utt, int num_frames, int do_skip);
    245 int save_utb_data(file_utterance_info *utt, int num_frames);
    246 int save_short_data(file_utterance_info *utt, int num_frames);
    247 int read_utt_head(UttHeader *head, PFile* datafile);
    248 int write_utt_head(UttHeader *head, PFile* datafile);
    249 int check_for_utb(char* filename);
    250 
    251 /*  TCP reading routines
    252 */
    253 int     read_tcp(char *filename, annotate **tag_base);
    254 int     read_lst(char *filename, annotate *tag_base, int ntags);
    255 int     read_utb_table(char *filename, annotate **tag_base);
    256 void    save_tcp(char *tcpnam, annotate *tag, int ntags);
    257 void compose_tcp_name_of_utt(char* uttname , char* tcpname);
    258 
    259 #endif
    260 
    261 void init_utterance(utterance_info *utt, int utt_type, int dimen,
    262                     int buffer_size, int keep_frames, int num_chan, int do_voicing);
    263 void set_voicing_durations(utterance_info *utt, int voice_duration,
    264                            int quiet_duration, int unsure_duration,
    265                            int start_windback);
    266 void free_utterance(utterance_info *utt);
    267 int utterance_started(utterance_info *utt);
    268 int utterance_ended(utterance_info *utt);
    269 int load_utterance_frame(utterance_info *utt, unsigned char* pUttFrame, int voicing);
    270 int copy_utterance_frame(utterance_info *oututt, utterance_info *inutt);
    271 
    272 #endif /* _h_utteranc_ */
    273