1 /* 2 * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 /** 17 * @file picokfst.c 18 * 19 * FST knowledge loading and access 20 * 21 * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland 22 * All rights reserved. 23 * 24 * History: 25 * - 2009-04-20 -- initial version 26 * 27 */ 28 #include "picoos.h" 29 #include "picodbg.h" 30 #include "picoknow.h" 31 #include "picokfst.h" 32 33 #ifdef __cplusplus 34 extern "C" { 35 #endif 36 #if 0 37 } 38 #endif 39 40 41 #define FileHdrSize 4 /* size of FST file header */ 42 43 44 45 /* ************************************************************/ 46 /* function to create specialized kb, */ 47 /* to be used by picorsrc only */ 48 /* ************************************************************/ 49 50 /** object : FSTKnowledgeBase 51 * shortcut : kfst 52 * derived from : picoknow_KnowledgeBase 53 */ 54 55 typedef struct kfst_subobj * kfst_SubObj; 56 57 typedef struct kfst_subobj{ 58 picoos_uint8 * fstStream; /* the byte stream base address */ 59 picoos_int32 hdrLen; /* length of file header */ 60 picoos_int32 transductionMode; /* transduction mode to be used for FST */ 61 picoos_int32 nrClasses; /* nr of pair/transition classes in FST; class is in [1..nrClasses] */ 62 picoos_int32 nrStates; /* nr of states in FST; state is in [1..nrState] */ 63 picoos_int32 termClass; /* pair class of terminator symbol pair; probably obsolete */ 64 picoos_int32 alphaHashTabSize; /* size of pair alphabet hash table */ 65 picoos_int32 alphaHashTabPos; /* absolute address of the start of the pair alphabet */ 66 picoos_int32 transTabEntrySize; /* size in bytes of each transition table entry */ 67 picoos_int32 transTabPos; /* absolute address of the start of the transition table */ 68 picoos_int32 inEpsStateTabPos; /* absolute address of the start of the input epsilon transition table */ 69 picoos_int32 accStateTabPos; /* absolute address of the table of accepting states */ 70 } kfst_subobj_t; 71 72 73 74 /* ************************************************************/ 75 /* primitives for reading from byte stream */ 76 /* ************************************************************/ 77 78 /* Converts 'nrBytes' bytes starting at position '*pos' in byte stream 'stream' into unsigned number 'num'. 79 '*pos' is modified to the position right after the number */ 80 static void FixedBytesToUnsignedNum (picoos_uint8 * stream, picoos_uint8 nrBytes, picoos_uint32 * pos, picoos_uint32 * num) 81 { 82 picoos_int32 i; 83 84 (*num) = 0; 85 for (i = 0; i < nrBytes; i++) { 86 (*num) = ((*num) << 8) + (picoos_uint32)stream[*pos]; 87 (*pos)++; 88 } 89 } 90 91 92 /* Converts 'nrBytes' bytes starting at position '*pos' in byte stream 'stream' into signed number 'num'. 93 '*pos' is modified to the position right after the number */ 94 static void FixedBytesToSignedNum (picoos_uint8 * stream, picoos_uint8 nrBytes, picoos_uint32 * pos, picoos_int32 * num) 95 { 96 picoos_int32 i; 97 picoos_uint32 val; 98 99 val = 0; 100 for (i = 0; i < nrBytes; i++) { 101 val = (val << 8) + (picoos_uint32)stream[*pos]; 102 (*pos)++; 103 } 104 if (val % 2 == 1) { 105 /* negative number */ 106 (*num) = -((picoos_int32)((val - 1) / 2)) - 1; 107 } else { 108 /* positive number */ 109 (*num) = val / 2; 110 } 111 } 112 113 114 /* Converts varying-sized sequence of bytes starting at position '*pos' in byte stream 'stream' 115 into (signed) number 'num'. '*pos' is modified to the position right after the number. */ 116 static void BytesToNum (picoos_uint8 * stream, picoos_uint32 * pos, picoos_int32 * num) 117 { 118 picoos_uint32 val; 119 picoos_uint32 b; 120 121 val = 0; 122 b = (picoos_uint32)stream[*pos]; 123 (*pos)++; 124 while (b < 128) { 125 val = (val << 7) + b; 126 b = (picoos_uint32)stream[*pos]; 127 (*pos)++; 128 } 129 val = (val << 7) + (b - 128); 130 if (val % 2 == 1) { 131 /* negative number */ 132 (*num) = -((picoos_int32)((val - 1) / 2)) - 1; 133 } else { 134 /* positive number */ 135 (*num) = val / 2; 136 } 137 } 138 139 140 /* ************************************************************/ 141 /* setting up FST from byte stream */ 142 /* ************************************************************/ 143 144 static pico_status_t kfstInitialize(register picoknow_KnowledgeBase this, 145 picoos_Common common) 146 { 147 picoos_uint32 curpos; 148 picoos_int32 offs; 149 kfst_subobj_t * kfst; 150 151 PICODBG_DEBUG(("kfstInitialize -- start\n")); 152 153 if (NULL == this || NULL == this->subObj) { 154 return picoos_emRaiseException(common->em, PICO_EXC_KB_MISSING, NULL, 155 NULL); 156 } 157 kfst = (kfst_subobj_t *) this->subObj; 158 159 /* +CT+ */ 160 kfst->fstStream = this->base; 161 PICODBG_TRACE(("base: %d\n",this->base)); 162 kfst->hdrLen = FileHdrSize; 163 curpos = kfst->hdrLen; 164 BytesToNum(kfst->fstStream,& curpos,& kfst->transductionMode); 165 BytesToNum(kfst->fstStream,& curpos,& kfst->nrClasses); 166 BytesToNum(kfst->fstStream,& curpos,& kfst->nrStates); 167 BytesToNum(kfst->fstStream,& curpos,& kfst->termClass); 168 BytesToNum(kfst->fstStream,& curpos,& kfst->alphaHashTabSize); 169 BytesToNum(kfst->fstStream,& curpos,& offs); 170 kfst->alphaHashTabPos = kfst->hdrLen + offs; 171 BytesToNum(kfst->fstStream,& curpos,& kfst->transTabEntrySize); 172 BytesToNum(kfst->fstStream,& curpos,& offs); 173 kfst->transTabPos = kfst->hdrLen + offs; 174 BytesToNum(kfst->fstStream,& curpos,& offs); 175 kfst->inEpsStateTabPos = kfst->hdrLen + offs; 176 BytesToNum(kfst->fstStream,& curpos,& offs); 177 kfst->accStateTabPos = kfst->hdrLen + offs; 178 /* -CT- */ 179 180 return PICO_OK; 181 } 182 183 184 static pico_status_t kfstSubObjDeallocate(register picoknow_KnowledgeBase this, 185 picoos_MemoryManager mm) 186 { 187 if (NULL != this) { 188 picoos_deallocate(mm, (void *) &this->subObj); 189 } 190 return PICO_OK; 191 } 192 193 194 /* calculates a small number of data (e.g. addresses) from kb for fast access. 195 * This data is encapsulated in a picokfst_FST that can later be retrieved 196 * with picokfst_getFST. */ 197 pico_status_t picokfst_specializeFSTKnowledgeBase(picoknow_KnowledgeBase this, 198 picoos_Common common) 199 { 200 pico_status_t status; 201 202 if (NULL == this) { 203 return picoos_emRaiseException(common->em, PICO_EXC_KB_MISSING, NULL, NULL); 204 } 205 if (0 < this->size) { 206 /* not a dummy kb */ 207 this->subDeallocate = kfstSubObjDeallocate; 208 209 this->subObj = picoos_allocate(common->mm, sizeof(kfst_subobj_t)); 210 211 if (NULL == this->subObj) { 212 return picoos_emRaiseException(common->em, PICO_EXC_OUT_OF_MEM, NULL, NULL); 213 } 214 status = kfstInitialize(this, common); 215 if (PICO_OK != status) { 216 picoos_deallocate(common->mm,(void **)&this->subObj); 217 } 218 } 219 return PICO_OK; 220 } 221 222 223 /* ************************************************************/ 224 /* FST type and getFST function */ 225 /* ************************************************************/ 226 227 228 229 /* return kb FST for usage in PU */ 230 picokfst_FST picokfst_getFST(picoknow_KnowledgeBase this) 231 { 232 if (NULL == this) { 233 return NULL; 234 } else { 235 return (picokfst_FST) this->subObj; 236 } 237 } 238 239 240 241 /* ************************************************************/ 242 /* FST access methods */ 243 /* ************************************************************/ 244 245 246 /* see description in header file */ 247 extern picoos_uint8 picokfst_kfstGetTransductionMode(picokfst_FST this) 248 { 249 kfst_SubObj fst = (kfst_SubObj) this; 250 if (fst != NULL) { 251 return fst->transductionMode; 252 } else { 253 return 0; 254 } 255 } 256 257 258 /* see description in header file */ 259 extern void picokfst_kfstGetFSTSizes (picokfst_FST this, picoos_int32 *nrStates, picoos_int32 *nrClasses) 260 { 261 kfst_SubObj fst = (kfst_SubObj) this; 262 if (fst != NULL) { 263 *nrStates = fst->nrStates; 264 *nrClasses = fst->nrClasses; 265 } else { 266 *nrStates = 0; 267 *nrClasses = 0; 268 } 269 } 270 271 /* see description in header file */ 272 extern void picokfst_kfstStartPairSearch (picokfst_FST this, picokfst_symid_t inSym, 273 picoos_bool * inSymFound, picoos_int32 * searchState) 274 { 275 picoos_uint32 pos; 276 picoos_int32 offs; 277 picoos_int32 h; 278 picoos_int32 inSymCellPos; 279 picoos_int32 inSymX; 280 picoos_int32 nextSameHashInSymOffs; 281 282 kfst_SubObj fst = (kfst_SubObj) this; 283 (*searchState) = -1; 284 (*inSymFound) = 0; 285 h = inSym % fst->alphaHashTabSize; 286 pos = fst->alphaHashTabPos + (h * 4); 287 FixedBytesToSignedNum(fst->fstStream,4,& pos,& offs); 288 if (offs > 0) { 289 inSymCellPos = fst->alphaHashTabPos + offs; 290 pos = inSymCellPos; 291 BytesToNum(fst->fstStream,& pos,& inSymX); 292 BytesToNum(fst->fstStream,& pos,& nextSameHashInSymOffs); 293 while ((inSymX != inSym) && (nextSameHashInSymOffs > 0)) { 294 inSymCellPos = inSymCellPos + nextSameHashInSymOffs; 295 pos = inSymCellPos; 296 BytesToNum(fst->fstStream,& pos,& inSymX); 297 BytesToNum(fst->fstStream,& pos,& nextSameHashInSymOffs); 298 } 299 if (inSymX == inSym) { 300 /* input symbol found; state is set to position after symbol cell */ 301 (*searchState) = pos; 302 (*inSymFound) = 1; 303 } 304 } 305 } 306 307 308 /* see description in header file */ 309 extern void picokfst_kfstGetNextPair (picokfst_FST this, picoos_int32 * searchState, 310 picoos_bool * pairFound, 311 picokfst_symid_t * outSym, picokfst_class_t * pairClass) 312 { 313 picoos_uint32 pos; 314 picoos_int32 val; 315 316 kfst_SubObj fst = (kfst_SubObj) this; 317 if ((*searchState) < 0) { 318 (*pairFound) = 0; 319 (*outSym) = PICOKFST_SYMID_ILLEG; 320 (*pairClass) = -1; 321 } else { 322 pos = (*searchState); 323 BytesToNum(fst->fstStream,& pos,& val); 324 *outSym = (picokfst_symid_t)val; 325 if ((*outSym) != PICOKFST_SYMID_ILLEG) { 326 BytesToNum(fst->fstStream,& pos,& val); 327 *pairClass = (picokfst_class_t)val; 328 (*pairFound) = 1; 329 (*searchState) = pos; 330 } else { 331 (*pairFound) = 0; 332 (*outSym) = PICOKFST_SYMID_ILLEG; 333 (*pairClass) = -1; 334 (*searchState) = -1; 335 } 336 } 337 } 338 339 340 341 /* see description in header file */ 342 extern void picokfst_kfstGetTrans (picokfst_FST this, picokfst_state_t startState, picokfst_class_t transClass, 343 picokfst_state_t * endState) 344 { 345 346 picoos_uint32 pos; 347 picoos_int32 index; 348 picoos_uint32 endStateX; 349 350 kfst_SubObj fst = (kfst_SubObj) this; 351 if ((startState < 1) || (startState > fst->nrStates) || (transClass < 1) || (transClass > fst->nrClasses)) { 352 (*endState) = 0; 353 } else { 354 index = (startState - 1) * fst->nrClasses + transClass - 1; 355 pos = fst->transTabPos + (index * fst->transTabEntrySize); 356 FixedBytesToUnsignedNum(fst->fstStream,fst->transTabEntrySize,& pos,& endStateX); 357 (*endState) = endStateX; 358 } 359 } 360 361 362 /* see description in header file */ 363 extern void picokfst_kfstStartInEpsTransSearch (picokfst_FST this, picokfst_state_t startState, 364 picoos_bool * inEpsTransFound, picoos_int32 * searchState) 365 { 366 367 picoos_int32 offs; 368 picoos_uint32 pos; 369 370 kfst_SubObj fst = (kfst_SubObj) this; 371 (*searchState) = -1; 372 (*inEpsTransFound) = 0; 373 if ((startState > 0) && (startState <= fst->nrStates)) { 374 pos = fst->inEpsStateTabPos + (startState - 1) * 4; 375 FixedBytesToSignedNum(fst->fstStream,4,& pos,& offs); 376 if (offs > 0) { 377 (*searchState) = fst->inEpsStateTabPos + offs; 378 (*inEpsTransFound) = 1; 379 } 380 } 381 } 382 383 384 385 /* see description in header file */ 386 extern void picokfst_kfstGetNextInEpsTrans (picokfst_FST this, picoos_int32 * searchState, 387 picoos_bool * inEpsTransFound, 388 picokfst_symid_t * outSym, picokfst_state_t * endState) 389 { 390 picoos_uint32 pos; 391 picoos_int32 val; 392 393 kfst_SubObj fst = (kfst_SubObj) this; 394 if ((*searchState) < 0) { 395 (*inEpsTransFound) = 0; 396 (*outSym) = PICOKFST_SYMID_ILLEG; 397 (*endState) = 0; 398 } else { 399 pos = (*searchState); 400 BytesToNum(fst->fstStream,& pos,& val); 401 *outSym = (picokfst_symid_t)val; 402 if ((*outSym) != PICOKFST_SYMID_ILLEG) { 403 BytesToNum(fst->fstStream,& pos,& val); 404 *endState = (picokfst_state_t)val; 405 (*inEpsTransFound) = 1; 406 (*searchState) = pos; 407 } else { 408 (*inEpsTransFound) = 0; 409 (*outSym) = PICOKFST_SYMID_ILLEG; 410 (*endState) = 0; 411 (*searchState) = -1; 412 } 413 } 414 } 415 416 417 /* see description in header file */ 418 extern picoos_bool picokfst_kfstIsAcceptingState (picokfst_FST this, picokfst_state_t state) 419 { 420 421 picoos_uint32 pos; 422 picoos_uint32 val; 423 424 kfst_SubObj fst = (kfst_SubObj) this; 425 if ((state > 0) && (state <= fst->nrStates)) { 426 pos = fst->accStateTabPos + (state - 1); 427 FixedBytesToUnsignedNum(fst->fstStream,1,& pos,& val); 428 return (val == 1); 429 } else { 430 return 0; 431 } 432 } 433 434 #ifdef __cplusplus 435 } 436 #endif 437 438 /* End picofst.c */ 439