Home | History | Annotate | Download | only in lib
      1 /*
      2  * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *     http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 /**
     17  * @file picokpdf.c
     18  *
     19  *  knowledge handling for pdf
     20  *
     21  * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
     22  * All rights reserved.
     23  *
     24  * History:
     25  * - 2009-04-20 -- initial version
     26  *
     27  */
     28 
     29 #include "picoos.h"
     30 #include "picodbg.h"
     31 #include "picoknow.h"
     32 #include "picokpdf.h"
     33 
     34 #ifdef __cplusplus
     35 extern "C" {
     36 #endif
     37 #if 0
     38 }
     39 #endif
     40 
     41 
     42 /* ************************************************************/
     43 /* pdf */
     44 /* ************************************************************/
     45 
     46 /*
     47  * @addtogroup picokpdf
     48  *
     49   overview: format of knowledge base pdf file
     50 
     51   This is the format for the dur pdf file:
     52     - Numframes:     1             uint16
     53     - Vecsize:       1             uint8
     54     - sampperframe:  1             uint8
     55     - Phonquantlen:  1             uint8
     56     - Phonquant:     Phonquantlen  uint8
     57     - Statequantlen: 1             uint8
     58     - Statequantlen: Statequantlen uint8
     59     - And then numframes x vecsize uint8
     60 
     61   This is the format for mul (mgc and lfz) pdf files:
     62     - numframes:         1         uint16
     63     - vecsize:           1         uint8
     64     - numstates:         1         uint8
     65     - numframesperstate: numstates uint16
     66     - ceporder:          1         uint8
     67     - numvuv             1         uint8
     68     - numdeltas:         1         uint8
     69     - scmeanpow:         1         uint8
     70     - maxbigpow:         1         uint8
     71     - scmeanpowum  KPDF_NUMSTREAMS * ceporder uint8
     72     - scivarpow    KPDF_NUMSTREAMS * ceporder uint8
     73 
     74     And then numframes x vecsize uint8
     75 
     76 */
     77 
     78 
     79 /* ************************************************************/
     80 /* pdf data defines */
     81 /* may not be changed with current implementation */
     82 /* ************************************************************/
     83 
     84 
     85 #define KPDF_NUMSTREAMS  3 /* coeff, delta, deltadelta */
     86 
     87 
     88 /* ************************************************************/
     89 /* pdf loading */
     90 /* ************************************************************/
     91 
     92 static pico_status_t kpdfDURInitialize(register picoknow_KnowledgeBase this,
     93                                        picoos_Common common) {
     94     picokpdf_pdfdur_t *pdfdur;
     95     picoos_uint16 pos;
     96 
     97     if (NULL == this || NULL == this->subObj) {
     98         return picoos_emRaiseException(common->em, PICO_EXC_KB_MISSING,
     99                                        NULL, NULL);
    100     }
    101     pdfdur = (picokpdf_pdfdur_t *)this->subObj;
    102 
    103     pos = 0;
    104 
    105     pdfdur->numframes = ((picoos_uint16)(this->base[pos+1])) << 8 |
    106         this->base[pos];
    107     pos += 2;
    108     pdfdur->vecsize = this->base[pos++];
    109     pdfdur->sampperframe = this->base[pos++];
    110     pdfdur->phonquantlen = this->base[pos++];
    111     pdfdur->phonquant = &(this->base[pos]);
    112     pos += pdfdur->phonquantlen;
    113     pdfdur->statequantlen = this->base[pos++];
    114     pdfdur->statequant = &(this->base[pos]);
    115     pos += pdfdur->statequantlen;
    116     pdfdur->content = &(this->base[pos]);
    117     PICODBG_DEBUG(("numframes %d, vecsize %d, phonquantlen %d, "
    118                    "statequantlen %d", pdfdur->numframes, pdfdur->vecsize,
    119                    pdfdur->phonquantlen, pdfdur->statequantlen));
    120     if ((picoos_uint32)(pos + (pdfdur->numframes * pdfdur->vecsize)) != this->size) {
    121         PICODBG_DEBUG(("header-spec size %d, kb-size %d",
    122                        pos + (pdfdur->numframes * pdfdur->vecsize),
    123                        this->size));
    124         return picoos_emRaiseException(common->em, PICO_EXC_FILE_CORRUPT,
    125                                        NULL, NULL);
    126     }
    127     PICODBG_DEBUG(("dur pdf initialized"));
    128     return PICO_OK;
    129 }
    130 
    131 static picoos_uint8 convScaleFactorToBig(picoos_uint8 pow, picoos_uint8 bigpow)
    132 {
    133     if (pow > 0x0F) {
    134         pow = bigpow + (0xFF - pow + 1);  /* take 2's complement of negative pow */
    135     } else if (bigpow >= pow) {
    136         pow = bigpow - pow;
    137     } else {
    138         /* error: bigpow is smaller than input pow */
    139         return 0;
    140     }
    141     return pow;
    142 }
    143 
    144 static pico_status_t kpdfMULInitialize(register picoknow_KnowledgeBase this,
    145                                        picoos_Common common) {
    146     picokpdf_pdfmul_t *pdfmul;
    147     picoos_uint16 pos;
    148     picoos_uint8 scmeanpow, maxbigpow, nummean;
    149     picoos_uint8 i;
    150 
    151     if (NULL == this || NULL == this->subObj) {
    152         return picoos_emRaiseException(common->em, PICO_EXC_KB_MISSING,
    153                                        NULL, NULL);
    154     }
    155     pdfmul = (picokpdf_pdfmul_t *)this->subObj;
    156 
    157     pos = 0;
    158 
    159     pdfmul->numframes = ((picoos_uint16)(this->base[pos+1])) << 8 |
    160         this->base[pos];
    161     pos += 2;
    162     pdfmul->vecsize = this->base[pos++];
    163     pdfmul->numstates = this->base[pos++];
    164     {
    165         pdfmul->stateoffset[0] = (picoos_uint16) 0;
    166         for (i=1; i<pdfmul->numstates; i++) {
    167             pdfmul->stateoffset[i] = pdfmul->stateoffset[i-1] + (this->base[pos] | ((picoos_uint16) this->base[pos+1] << 8));
    168             pos += 2;
    169         }
    170         pos += 2; /* we don't need the last number if we only need the offset (i.e. how to get to the vector start) */
    171     }
    172 
    173     pdfmul->ceporder = this->base[pos++];
    174     pdfmul->numvuv = this->base[pos++];
    175     pdfmul->numdeltas = this->base[pos++];
    176     scmeanpow = this->base[pos++];
    177     maxbigpow = this->base[pos++];
    178     if (maxbigpow < PICOKPDF_BIG_POW) {
    179         PICODBG_ERROR(("bigpow %i is larger than maxbigpow %i defined in pdf lingware", PICOKPDF_BIG_POW, maxbigpow));
    180         return picoos_emRaiseException(common->em, PICO_EXC_MAX_NUM_EXCEED,NULL,NULL);
    181     }
    182     pdfmul->bigpow = PICOKPDF_BIG_POW; /* what we have to use is the smaller number! */
    183 
    184     pdfmul->amplif = this->base[pos++];
    185 
    186     /* bigpow corrected by scmeanpow, multiply means by 2^meanpow to obtain fixed point representation */
    187     pdfmul->meanpow = convScaleFactorToBig(scmeanpow, pdfmul->bigpow);
    188     if (0 == pdfmul->meanpow) {
    189         PICODBG_ERROR(("error in convScaleFactorToBig"));
    190         return picoos_emRaiseException(common->em, PICO_EXC_MAX_NUM_EXCEED,NULL,NULL);
    191     }
    192     nummean = 3*pdfmul->ceporder;
    193 
    194     pdfmul->meanpowUm = picoos_allocate(common->mm,nummean*sizeof(picoos_uint8));
    195     pdfmul->ivarpow = picoos_allocate(common->mm,nummean*sizeof(picoos_uint8));
    196     if ((NULL == pdfmul->meanpowUm) || (NULL == pdfmul->ivarpow)) {
    197         picoos_deallocate(common->mm,(void *) &(pdfmul->meanpowUm));
    198         picoos_deallocate(common->mm,(void *) &(pdfmul->ivarpow));
    199         return picoos_emRaiseException(common->em,PICO_EXC_OUT_OF_MEM,NULL,NULL);
    200     }
    201 
    202     /*     read meanpowUm and convert on the fly */
    203     /*     meaning of meanpowUm becomes: multiply means from pdf stream by 2^meanpowUm
    204      * to achieve fixed point scaling by big
    205      */
    206     for (i=0; i<nummean; i++) {
    207         pdfmul->meanpowUm[i] = convScaleFactorToBig(this->base[pos++], pdfmul->bigpow);
    208     }
    209 
    210    /*read ivarpow  and convert on the fly */
    211     for (i=0; i<nummean; i++) {
    212         pdfmul->ivarpow[i] = convScaleFactorToBig(this->base[pos++], pdfmul->bigpow);
    213     }
    214 
    215     /* check numdeltas */
    216     if ((pdfmul->numdeltas == 0xFF) && (pdfmul->vecsize != (pdfmul->numvuv + pdfmul->ceporder * 3 * (2+1)))) {
    217         PICODBG_ERROR(("header has inconsistent values for vecsize, ceporder, numvuv, and numdeltas"));
    218         return picoos_emRaiseException(common->em,PICO_EXC_FILE_CORRUPT,NULL,NULL);
    219      }
    220 
    221 /*     vecsize: 1 uint8 for vuv
    222          + ceporder short for static means
    223          + numdeltas uint8 and short for sparse delta means
    224          + ceporder*3 uint8 for static and delta inverse variances
    225 */
    226     if ((pdfmul->numdeltas != 0xFF) && (pdfmul->vecsize != pdfmul->numvuv+pdfmul->ceporder*2+pdfmul->numdeltas*3+pdfmul->ceporder*3)) {
    227         PICODBG_ERROR(("header has inconsistent values for vecsize, ceporder, numvuv, and numdeltas\n"
    228                 "vecsize = %i while numvuv+ceporder*2 + numdeltas*3 + ceporder*3 = %i",
    229                 pdfmul->vecsize, pdfmul->numvuv + pdfmul->ceporder*2 + pdfmul->numdeltas * 3 + pdfmul->ceporder * 3));
    230         return picoos_emRaiseException(common->em,PICO_EXC_FILE_CORRUPT,NULL,NULL);
    231     }
    232     pdfmul->content = &(this->base[pos]);
    233     PICODBG_DEBUG(("numframes %d, vecsize %d, numstates %d, ceporder %d, "
    234                    "numvuv %d, numdeltas %d, meanpow %d, bigpow %d",
    235                    pdfmul->numframes, pdfmul->vecsize, pdfmul->numstates,
    236                    pdfmul->ceporder, pdfmul->numvuv, pdfmul->numdeltas,
    237                    pdfmul->meanpow, pdfmul->bigpow));
    238     if ((picoos_uint32)(pos + (pdfmul->numframes * pdfmul->vecsize)) != this->size) {
    239         PICODBG_DEBUG(("header-spec size %d, kb-size %d",
    240                        pos + (pdfmul->numframes * pdfmul->vecsize),
    241                        this->size));
    242         return picoos_emRaiseException(common->em, PICO_EXC_FILE_CORRUPT,
    243                                        NULL, NULL);
    244     }
    245     PICODBG_DEBUG(("mul pdf initialized"));
    246     return PICO_OK;
    247 }
    248 
    249 static pico_status_t kpdfPHSInitialize(register picoknow_KnowledgeBase this,
    250                                        picoos_Common common) {
    251     picokpdf_pdfphs_t *pdfphs;
    252     picoos_uint16 pos;
    253 
    254     if (NULL == this || NULL == this->subObj) {
    255         return picoos_emRaiseException(common->em, PICO_EXC_KB_MISSING,
    256                                        NULL, NULL);
    257     }
    258     pdfphs = (picokpdf_pdfphs_t *)this->subObj;
    259 
    260     pos = 0;
    261 
    262     pdfphs->numvectors = ((picoos_uint16)(this->base[pos+1])) << 8 |
    263         this->base[pos];
    264     pos += 2;
    265     pdfphs->indexBase = &(this->base[pos]);
    266     pdfphs->contentBase = pdfphs->indexBase + pdfphs->numvectors * sizeof(picoos_uint32);
    267     PICODBG_DEBUG(("phs pdf initialized"));
    268     return PICO_OK;
    269 }
    270 
    271 
    272 
    273 static pico_status_t kpdfMULSubObjDeallocate(register picoknow_KnowledgeBase this,
    274                                           picoos_MemoryManager mm) {
    275 
    276 
    277     picokpdf_pdfmul_t *pdfmul;
    278 
    279     if ((NULL != this) && (NULL != this->subObj)) {
    280         pdfmul = (picokpdf_pdfmul_t *)this->subObj;
    281         picoos_deallocate(mm,(void *) &(pdfmul->meanpowUm));
    282         picoos_deallocate(mm,(void *) &(pdfmul->ivarpow));
    283         picoos_deallocate(mm, (void *) &(this->subObj));
    284     }
    285     return PICO_OK;
    286 }
    287 
    288 static pico_status_t kpdfDURSubObjDeallocate(register picoknow_KnowledgeBase this,
    289                                           picoos_MemoryManager mm) {
    290     if (NULL != this) {
    291         picoos_deallocate(mm, (void *) &this->subObj);
    292     }
    293     return PICO_OK;
    294 }
    295 
    296 static pico_status_t kpdfPHSSubObjDeallocate(register picoknow_KnowledgeBase this,
    297                                           picoos_MemoryManager mm) {
    298     if (NULL != this) {
    299         picoos_deallocate(mm, (void *) &this->subObj);
    300     }
    301     return PICO_OK;
    302 }
    303 
    304 /* we don't offer a specialized constructor for a *KnowledgeBase but
    305  * instead a "specializer" of an allready existing generic
    306  * picoknow_KnowledgeBase */
    307 
    308 pico_status_t picokpdf_specializePdfKnowledgeBase(picoknow_KnowledgeBase this,
    309                                           picoos_Common common,
    310                                           const picokpdf_kpdftype_t kpdftype) {
    311     pico_status_t status;
    312 
    313     if (NULL == this) {
    314         return picoos_emRaiseException(common->em, PICO_EXC_KB_MISSING,
    315                                        NULL, NULL);
    316     }
    317     switch (kpdftype) {
    318         case PICOKPDF_KPDFTYPE_DUR:
    319             this->subDeallocate = kpdfDURSubObjDeallocate;
    320             this->subObj = picoos_allocate(common->mm,sizeof(picokpdf_pdfdur_t));
    321             if (NULL == this->subObj) {
    322                 return picoos_emRaiseException(common->em, PICO_EXC_OUT_OF_MEM,
    323                                                NULL, NULL);
    324             }
    325             status = kpdfDURInitialize(this, common);
    326             break;
    327         case PICOKPDF_KPDFTYPE_MUL:
    328             this->subDeallocate = kpdfMULSubObjDeallocate;
    329             this->subObj = picoos_allocate(common->mm,sizeof(picokpdf_pdfmul_t));
    330             if (NULL == this->subObj) {
    331                 return picoos_emRaiseException(common->em, PICO_EXC_OUT_OF_MEM,
    332                                                NULL, NULL);
    333             }
    334             status = kpdfMULInitialize(this, common);
    335             break;
    336         case PICOKPDF_KPDFTYPE_PHS:
    337             this->subDeallocate = kpdfPHSSubObjDeallocate;
    338             this->subObj = picoos_allocate(common->mm,sizeof(picokpdf_pdfphs_t));
    339             if (NULL == this->subObj) {
    340                 return picoos_emRaiseException(common->em, PICO_EXC_OUT_OF_MEM,
    341                                                NULL, NULL);
    342             }
    343             status = kpdfPHSInitialize(this, common);
    344             break;
    345 
    346         default:
    347             return picoos_emRaiseException(common->em, PICO_ERR_OTHER,
    348                                            NULL, NULL);
    349     }
    350 
    351     if (status != PICO_OK) {
    352         picoos_deallocate(common->mm, (void *) &this->subObj);
    353         return picoos_emRaiseException(common->em, status, NULL, NULL);
    354     }
    355     return PICO_OK;
    356 }
    357 
    358 
    359 /* ************************************************************/
    360 /* pdf getPdf* */
    361 /* ************************************************************/
    362 
    363 picokpdf_PdfDUR picokpdf_getPdfDUR(picoknow_KnowledgeBase this) {
    364     return ((NULL == this) ? NULL : ((picokpdf_PdfDUR) this->subObj));
    365 }
    366 
    367 picokpdf_PdfMUL picokpdf_getPdfMUL(picoknow_KnowledgeBase this) {
    368     return ((NULL == this) ? NULL : ((picokpdf_PdfMUL) this->subObj));
    369 }
    370 
    371 picokpdf_PdfPHS picokpdf_getPdfPHS(picoknow_KnowledgeBase this) {
    372     return ((NULL == this) ? NULL : ((picokpdf_PdfPHS) this->subObj));
    373 }
    374 
    375 
    376 #ifdef __cplusplus
    377 }
    378 #endif
    379 
    380 
    381 /* end */
    382