1 /* 2 * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 /** 17 * @file picokdt.c 18 * 19 * knowledge handling for decision trees 20 * 21 * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland 22 * All rights reserved. 23 * 24 * History: 25 * - 2009-04-20 -- initial version 26 * 27 */ 28 29 #include "picoos.h" 30 #include "picodbg.h" 31 #include "picobase.h" 32 #include "picoknow.h" 33 #include "picodata.h" 34 #include "picokdt.h" 35 36 #ifdef __cplusplus 37 extern "C" { 38 #endif 39 #if 0 40 } 41 #endif 42 43 44 /* ************************************************************/ 45 /* decision tree */ 46 /* ************************************************************/ 47 48 /** 49 * @addtogroup picokdt 50 * ---------------------------------------------------\n 51 * <b> Pico KDT support </b>\n 52 * ---------------------------------------------------\n 53 overview extended binary tree file: 54 - dt consists of optional attribute mapping tables and a non-empty 55 tree part 56 - using the attribute mapping tables an attribute value as used 57 throughout the TTS can be mapped to its smaller representation 58 used in the tree 59 - multi-byte values always little endian 60 61 ------------------------------------------------------------------- 62 - bin-file, decision tree knowledge base in binary form 63 64 - dt-kb = header inputmaptables outputmaptables tree 65 66 67 - header = INPMAPTABLEPOS2 OUTMAPTABLEPOS2 TREEPOS2 68 69 - INPMAPTABLEPOS2: two bytes, equals offest in number of bytes from 70 the start of kb to the start of input map tables, 71 may not be 0 72 - OUTMAPTABLEPOS2: two bytes, equals offest in number of bytes from 73 the start of kb to the start of outtables, 74 may not be 0 75 - TREEPOS2: two bytes, equals offest in number of bytes from the 76 start of kb to the start of the tree 77 78 79 - inputmaptables = maptables 80 - outputmaptables = maptables 81 - maptables = NRMAPTABLES1 {maptable}=NRMAPTABLES1 82 - maptable = LENTABLE2 TABLETYPE1 ( bytemaptable 83 | wordmaptable 84 | graphinmaptable 85 | bytetovarmaptable ) 86 - bytemaptable (in or out, usage varies) = NRBYTES2 {BYTE1}=NRBYTES2 87 - wordmaptable (in or out, usage varies) = NRWORDS2 {WORD2}=NRWORDS2 88 - graphinmaptable (in only) = NRGRAPHS2 {GRAPH1:4}=NRGRAPHS2 89 - bytetovarmaptable (out only) = NRINBYTES2 outvarsearchind 90 outvaroutputs 91 - outvarsearchind = {OUTVAROFFSET2}=NRINBYTES2 92 - outvaroutputs = {VARVALID1:}=NRINBYTES2 93 94 - bytemaptable: fixed size, *Map*Fixed \n 95 - wordmaptable: fixed size, *Map*Fixed \n 96 - graphinmaptable: search value is variable size (UTF8 grapheme), \n 97 value to be mapped to is fixed size, one byte \n 98 - bytetovarmaptable: search value is fixed size, one byte, values \n 99 to be mapped to are of variable size (e.g. several \n 100 phones) \n 101 102 - NRMAPTABLES1: one byte representing the number of map tables 103 - LENTABLE2: two bytes, equals offset to the next table (or next 104 part of kb, e.g. tree), 105 if LENTABLE2 = 3, and 106 TABLETYPE1 = EMPTY -> empty table, no mapping to be done 107 - TABLETYPE1: one byte, type of map table (byte, word, or graph=utf8) 108 - NRBYTES2: two bytes, number of bytes following in the table (one 109 would be okay, to simplify some implementation also set 110 to 2) 111 - BYTE1: one btye, the sequence is used to determine the values 112 being mapped to, starting with 0 113 - NRWORDS2: two bytes, number of words (two btyes) following in the table 114 - WORD2: two bytes, the sequence is used to determine the values 115 being mapped to, starting with 0 116 - NRGRAPHS2: two bytes, number of graphemes encoded in UTF8 following 117 in table 118 - GRAPH1:4: one to four bytes, UTF8 representation of a grapheme, the 119 sequence of graphemes is used to determine the value being 120 mapped to, starting with 0, the length information is 121 encoded in UTF8, no need for extra length info 122 - NRINBYTES2: two bytes, number of single byte IDs the tree can produce 123 - OUTVAROFFSET2: two bytes, offset from the start of the 124 outvaroutputs to the start of the following output 125 phone ID group, ie. the first outvaroffset is the 126 offset to the start of the second PHONEID 127 group. Using the previous outvaroffset (or the start 128 of the outvaroutputs) the start and lenth of the 129 PHONEID group can be determined and we can get the 130 sequence of output values we map the chunk value to 131 - VARVALID1:: one to several bytes, one byte each for an output phone ID 132 133 - tree = treenodeinfos TREEBODYSIZE4 treebody 134 - treenodeinfos = NRVFIELDS1 vfields NRATTRIBUTES1 NRQFIELDS1 qfields 135 - vfields = {VFIELD1}=NRVFIELDS1 136 - qfields = {QFIELD1}=NRATTRIBUTES1xNRQFIELDS1 137 - treebody = "cf. code" 138 139 - TREEBODYSIZE4: four bytes, size of treebody in number of bytes 140 - NRVFIELDS1: one byte, number of node properties in the following 141 vector (predefined and fixed sequence of properties) 142 - VFIELD1: number of bits used to represent a node property 143 - NRATTRIBUTES1: one byte, number of attributes (rows) in the 144 following matrix 145 - NRQFIELDS1: one byte, number (columns) of question-dependent node 146 properties per attribute in the following matrix 147 (predefined and fixed sequence of properties) 148 - QFIELD1: number of bits used to represent a question-dependent 149 property in the matrix 150 151 152 - Currently, 153 - NRVFIELDS1 is fixed at 2 for all trees, ie. 154 - vfields = 2 aVFIELD1 bVFIELD1 155 - aVFIELD1: nr of bits for questions 156 - bVFIELD1: nr of bits for decisions 157 158 - NRQFIELDS1 is fixed at 5 for all trees, ie. \n 159 - qfields = NRATTRIBUTES1 5 aQFIELD1 bQFIELD1 cQFIELD1 dQFIELD1 eQFIELD1 \n 160 - aQFIELD1: nr of bits for fork count \n 161 - bQFIELD1: nr of bits for start position for subsets \n 162 - cQFIELD1: nr of bits for group size \n 163 - dQFIELD1: nr of bits for offset to reach output \n 164 - eQFIELD1: nr of bits for threshold (if continuous node) \n 165 */ 166 167 168 /* ************************************************************/ 169 /* decision tree data defines */ 170 /* may not be changed with current implementation */ 171 /* ************************************************************/ 172 173 /* maptables fields */ 174 #define PICOKDT_MTSPOS_NRMAPTABLES 0 175 176 /* position of first byte of first maptable (for omt the only table */ 177 #define PICOKDT_MTPOS_START 1 178 179 /* maptable fields */ 180 #define PICOKDT_MTPOS_LENTABLE 0 181 #define PICOKDT_MTPOS_TABLETYPE 2 182 #define PICOKDT_MTPOS_NUMBER 3 183 #define PICOKDT_MTPOS_MAPSTART 5 184 185 /* treenodeinfos fields */ 186 #define PICOKDT_NIPOS_NRVFIELDS 0 187 #define PICOKDT_NIPOS_NRATTS 3 188 #define PICOKDT_NIPOS_NRQFIELDS 4 189 190 /* fixed treenodeinfos number of fields */ 191 #define PICOKDT_NODEINFO_NRVFIELDS 2 192 #define PICOKDT_NODEINFO_NRQFIELDS 5 193 194 /* fixed number of bits used */ 195 #define PICOKDT_NODETYPE_NRBITS 2 196 #define PICOKDT_SUBSETTYPE_NRBITS 2 197 #define PICOKDT_ISDECIDE_NRBITS 1 198 199 /* number of inpmaptables for each tree. Since we have a possibly 200 empty input map table for each att, currently these values must be 201 equal to PICOKDT_NRATT* */ 202 typedef enum { 203 PICOKDT_NRINPMT_POSP = 12, 204 PICOKDT_NRINPMT_POSD = 7, 205 PICOKDT_NRINPMT_G2P = 16, 206 PICOKDT_NRINPMT_PHR = 8, 207 PICOKDT_NRINPMT_ACC = 13, 208 PICOKDT_NRINPMT_PAM = 60 209 } kdt_nrinpmaptables_t; 210 211 /* number of outmaptables for each tree, at least one, possibly empty, 212 output map table for each tree */ 213 typedef enum { 214 PICOKDT_NROUTMT_POSP = 1, 215 PICOKDT_NROUTMT_POSD = 1, 216 PICOKDT_NROUTMT_G2P = 1, 217 PICOKDT_NROUTMT_PHR = 1, 218 PICOKDT_NROUTMT_ACC = 1, 219 PICOKDT_NROUTMT_PAM = 1 220 } kdt_nroutmaptables_t; 221 222 /* maptable types */ 223 typedef enum { 224 PICOKDT_MTTYPE_EMPTY = 0, 225 PICOKDT_MTTYPE_BYTE = 1, 226 PICOKDT_MTTYPE_WORD = 2, 227 PICOKDT_MTTYPE_GRAPH = 3, 228 PICOKDT_MTTYPE_BYTETOVAR = 4 229 } kdt_mttype_t; 230 231 232 /* ************************************************************/ 233 /* decision tree types and loading */ 234 /* ************************************************************/ 235 /* object : Dt*KnowledgeBase 236 * shortcut : kdt* 237 * derived from : picoknow_KnowledgeBase 238 */ 239 240 /* subobj shared by all decision trees */ 241 typedef struct { 242 picokdt_kdttype_t type; 243 picoos_uint8 *inpmaptable; 244 picoos_uint8 *outmaptable; 245 picoos_uint8 *tree; 246 picoos_uint32 beg_offset[128]; /* for efficiency */ 247 248 /* tree-internal details for faster processing */ 249 picoos_uint8 *vfields; 250 picoos_uint8 *qfields; 251 picoos_uint8 nrattributes; 252 picoos_uint8 *treebody; 253 /*picoos_uint8 nrvfields;*/ /* fix PICOKDT_NODEINFO_NRVFIELDS */ 254 /*picoos_uint8 nrqfields;*/ /* fix PICOKDT_NODEINFO_NRQFIELDS */ 255 256 /* direct output vector (no output mapping) */ 257 picoos_uint8 dset; /* TRUE if class set, FALSE otherwise */ 258 picoos_uint16 dclass; 259 } kdt_subobj_t; 260 261 /* subobj specific for each decision tree type */ 262 typedef struct { 263 kdt_subobj_t dt; 264 picoos_uint16 invec[PICOKDT_NRATT_POSP]; /* input vector */ 265 picoos_uint8 inveclen; /* nr of ele set in invec; must be =nrattributes */ 266 } kdtposp_subobj_t; 267 268 typedef struct { 269 kdt_subobj_t dt; 270 picoos_uint16 invec[PICOKDT_NRATT_POSD]; /* input vector */ 271 picoos_uint8 inveclen; /* nr of ele set in invec; must be =nrattributes */ 272 } kdtposd_subobj_t; 273 274 typedef struct { 275 kdt_subobj_t dt; 276 picoos_uint16 invec[PICOKDT_NRATT_G2P]; /* input vector */ 277 picoos_uint8 inveclen; /* nr of ele set in invec; must be =nrattributes */ 278 } kdtg2p_subobj_t; 279 280 typedef struct { 281 kdt_subobj_t dt; 282 picoos_uint16 invec[PICOKDT_NRATT_PHR]; /* input vector */ 283 picoos_uint8 inveclen; /* nr of ele set in invec; must be =nrattributes */ 284 } kdtphr_subobj_t; 285 286 typedef struct { 287 kdt_subobj_t dt; 288 picoos_uint16 invec[PICOKDT_NRATT_ACC]; /* input vector */ 289 picoos_uint8 inveclen; /* nr of ele set in invec; must be =nrattributes */ 290 } kdtacc_subobj_t; 291 292 typedef struct { 293 kdt_subobj_t dt; 294 picoos_uint16 invec[PICOKDT_NRATT_PAM]; /* input vector */ 295 picoos_uint8 inveclen; /* nr of ele set in invec; must be =nrattributes */ 296 } kdtpam_subobj_t; 297 298 299 static pico_status_t kdtDtInitialize(register picoknow_KnowledgeBase this, 300 picoos_Common common, 301 kdt_subobj_t *dtp) { 302 picoos_uint16 inppos; 303 picoos_uint16 outpos; 304 picoos_uint16 treepos; 305 picoos_uint32 curpos = 0, pos; 306 picoos_uint16 lentable; 307 picoos_uint16 i; 308 picoos_uint8 imtnr; 309 310 PICODBG_DEBUG(("start")); 311 312 /* get inmap, outmap, tree offsets */ 313 if ((PICO_OK == picoos_read_mem_pi_uint16(this->base, &curpos, &inppos)) 314 && (PICO_OK == picoos_read_mem_pi_uint16(this->base, &curpos, &outpos)) 315 && (PICO_OK == picoos_read_mem_pi_uint16(this->base, &curpos, 316 &treepos))) { 317 318 /* all pos are mandatory, verify */ 319 if (inppos && outpos && treepos) { 320 dtp->inpmaptable = this->base + inppos; 321 dtp->outmaptable = this->base + outpos; 322 dtp->tree = this->base + treepos; 323 /* precalc beg offset table */ 324 imtnr=dtp->inpmaptable[0]; 325 pos=1; 326 dtp->beg_offset[0] = 1; 327 for (i = 0; i < imtnr; i++) { 328 lentable = ((picoos_uint16)(dtp->inpmaptable[pos+1])) << 8 | 329 dtp->inpmaptable[pos]; 330 pos += lentable; 331 dtp->beg_offset[i+1] = pos; 332 } 333 } else { 334 dtp->inpmaptable = NULL; 335 dtp->outmaptable = NULL; 336 dtp->tree = NULL; 337 PICODBG_ERROR(("invalid kb position info")); 338 return picoos_emRaiseException(common->em, PICO_EXC_FILE_CORRUPT, 339 NULL, NULL); 340 } 341 342 /* nr of outmaptables is equal 1 for all trees, verify */ 343 if (dtp->outmaptable[PICOKDT_MTSPOS_NRMAPTABLES] != 1) { 344 PICODBG_ERROR(("wrong number of outmaptables")); 345 return picoos_emRaiseException(common->em, PICO_EXC_FILE_CORRUPT, 346 NULL, NULL); 347 } 348 349 /* check if this is an empty table, ie. len == 3 */ 350 if ((dtp->outmaptable[PICOKDT_MTPOS_START + PICOKDT_MTPOS_LENTABLE] 351 == 3) 352 && (dtp->outmaptable[PICOKDT_MTPOS_START + PICOKDT_MTPOS_LENTABLE 353 + 1] == 0)) { 354 /* verify that this is supposed to be an empty table and 355 set outmaptable to NULL if so */ 356 if (dtp->outmaptable[PICOKDT_MTPOS_START + PICOKDT_MTPOS_TABLETYPE] 357 == PICOKDT_MTTYPE_EMPTY) { 358 dtp->outmaptable = NULL; 359 } else { 360 PICODBG_ERROR(("table length vs. type problem")); 361 return picoos_emRaiseException(common->em, 362 PICO_EXC_FILE_CORRUPT, 363 NULL, NULL); 364 } 365 } 366 367 dtp->vfields = dtp->tree + 1; 368 dtp->qfields = dtp->tree + PICOKDT_NODEINFO_NRVFIELDS + 3; 369 dtp->nrattributes = dtp->tree[PICOKDT_NIPOS_NRATTS]; 370 dtp->treebody = dtp->qfields + 4 + 371 (dtp->nrattributes * PICOKDT_NODEINFO_NRQFIELDS); /* TREEBODYSIZE4*/ 372 373 /*dtp->nrvfields = dtp->tree[PICOKDT_NIPOS_NRVFIELDS]; <- is fix */ 374 /*dtp->nrqfields = dtp->tree[PICOKDT_NIPOS_NRQFIELDS]; <- is fix */ 375 /* verify that nrvfields ad nrqfields are correct */ 376 if ((PICOKDT_NODEINFO_NRVFIELDS != dtp->tree[PICOKDT_NIPOS_NRVFIELDS]) || 377 (PICOKDT_NODEINFO_NRQFIELDS != dtp->tree[PICOKDT_NIPOS_NRQFIELDS])) { 378 PICODBG_ERROR(("problem with nr of vfields (%d) or qfields (%d)", 379 dtp->tree[PICOKDT_NIPOS_NRVFIELDS], 380 dtp->tree[PICOKDT_NIPOS_NRQFIELDS])); 381 return picoos_emRaiseException(common->em, PICO_EXC_FILE_CORRUPT, 382 NULL, NULL); 383 } 384 dtp->dset = 0; 385 dtp->dclass = 0; 386 PICODBG_DEBUG(("tree init: nratt: %d, posomt: %d, postree: %d", 387 dtp->nrattributes, (dtp->outmaptable - dtp->inpmaptable), 388 (dtp->tree - dtp->inpmaptable))); 389 return PICO_OK; 390 } else { 391 PICODBG_ERROR(("problem reading kb in memory")); 392 return picoos_emRaiseException(common->em, PICO_EXC_FILE_CORRUPT, 393 NULL, NULL); 394 } 395 } 396 397 398 static pico_status_t kdtDtCheck(register picoknow_KnowledgeBase this, 399 picoos_Common common, 400 kdt_subobj_t *dtp, 401 kdt_nratt_t nratt, 402 kdt_nrinpmaptables_t nrinpmt, 403 kdt_nroutmaptables_t nroutmt, 404 kdt_mttype_t mttype) { 405 /* check nr attributes */ 406 /* check nr inpmaptables */ 407 /* check nr outmaptables */ 408 /* check outmaptable is word type */ 409 if ((nratt != dtp->nrattributes) 410 || (dtp->inpmaptable == NULL) 411 || (dtp->outmaptable == NULL) 412 || (dtp->inpmaptable[PICOKDT_MTSPOS_NRMAPTABLES] != nrinpmt) 413 || (dtp->outmaptable[PICOKDT_MTSPOS_NRMAPTABLES] != nroutmt) 414 || (dtp->outmaptable[PICOKDT_MTPOS_START+PICOKDT_MTPOS_TABLETYPE] 415 != mttype)) { 416 PICODBG_ERROR(("check failed, nratt %d, nrimt %d, nromt %d, omttype %d", 417 dtp->nrattributes, 418 dtp->inpmaptable[PICOKDT_MTSPOS_NRMAPTABLES], 419 dtp->outmaptable[PICOKDT_MTSPOS_NRMAPTABLES], 420 dtp->outmaptable[PICOKDT_MTPOS_START + 421 PICOKDT_MTPOS_TABLETYPE])); 422 return picoos_emRaiseException(common->em, PICO_EXC_FILE_CORRUPT, 423 NULL, NULL); 424 } 425 return PICO_OK; 426 } 427 428 429 430 static pico_status_t kdtPosPInitialize(register picoknow_KnowledgeBase this, 431 picoos_Common common) { 432 pico_status_t status; 433 kdtposp_subobj_t *dtposp; 434 kdt_subobj_t *dt; 435 picoos_uint8 i; 436 437 if (NULL == this || NULL == this->subObj) { 438 return picoos_emRaiseException(common->em, PICO_EXC_KB_MISSING, 439 NULL, NULL); 440 } 441 dtposp = (kdtposp_subobj_t *)this->subObj; 442 dt = &(dtposp->dt); 443 dt->type = PICOKDT_KDTTYPE_POSP; 444 if ((status = kdtDtInitialize(this, common, dt)) != PICO_OK) { 445 return status; 446 } 447 if ((status = kdtDtCheck(this, common, dt, PICOKDT_NRATT_POSP, 448 PICOKDT_NRINPMT_POSP, PICOKDT_NROUTMT_POSP, 449 PICOKDT_MTTYPE_WORD)) != PICO_OK) { 450 return status; 451 } 452 453 /* init specialized subobj part */ 454 for (i = 0; i < PICOKDT_NRATT_POSP; i++) { 455 dtposp->invec[i] = 0; 456 } 457 dtposp->inveclen = 0; 458 PICODBG_DEBUG(("posp tree initialized")); 459 return PICO_OK; 460 } 461 462 463 static pico_status_t kdtPosDInitialize(register picoknow_KnowledgeBase this, 464 picoos_Common common) { 465 pico_status_t status; 466 kdtposd_subobj_t *dtposd; 467 kdt_subobj_t *dt; 468 picoos_uint8 i; 469 470 if (NULL == this || NULL == this->subObj) { 471 return picoos_emRaiseException(common->em, PICO_EXC_KB_MISSING, 472 NULL, NULL); 473 } 474 dtposd = (kdtposd_subobj_t *)this->subObj; 475 dt = &(dtposd->dt); 476 dt->type = PICOKDT_KDTTYPE_POSD; 477 if ((status = kdtDtInitialize(this, common, dt)) != PICO_OK) { 478 return status; 479 } 480 if ((status = kdtDtCheck(this, common, dt, PICOKDT_NRATT_POSD, 481 PICOKDT_NRINPMT_POSD, PICOKDT_NROUTMT_POSD, 482 PICOKDT_MTTYPE_WORD)) != PICO_OK) { 483 return status; 484 } 485 486 /* init spezialized subobj part */ 487 for (i = 0; i < PICOKDT_NRATT_POSD; i++) { 488 dtposd->invec[i] = 0; 489 } 490 dtposd->inveclen = 0; 491 PICODBG_DEBUG(("posd tree initialized")); 492 return PICO_OK; 493 } 494 495 496 static pico_status_t kdtG2PInitialize(register picoknow_KnowledgeBase this, 497 picoos_Common common) { 498 pico_status_t status; 499 kdtg2p_subobj_t *dtg2p; 500 kdt_subobj_t *dt; 501 picoos_uint8 i; 502 503 if (NULL == this || NULL == this->subObj) { 504 return picoos_emRaiseException(common->em, PICO_EXC_KB_MISSING, 505 NULL, NULL); 506 } 507 dtg2p = (kdtg2p_subobj_t *)this->subObj; 508 dt = &(dtg2p->dt); 509 dt->type = PICOKDT_KDTTYPE_G2P; 510 if ((status = kdtDtInitialize(this, common, dt)) != PICO_OK) { 511 return status; 512 } 513 514 if ((status = kdtDtCheck(this, common, dt, PICOKDT_NRATT_G2P, 515 PICOKDT_NRINPMT_G2P, PICOKDT_NROUTMT_G2P, 516 PICOKDT_MTTYPE_BYTETOVAR)) != PICO_OK) { 517 return status; 518 } 519 520 /* init spezialized subobj part */ 521 for (i = 0; i < PICOKDT_NRATT_G2P; i++) { 522 dtg2p->invec[i] = 0; 523 } 524 dtg2p->inveclen = 0; 525 PICODBG_DEBUG(("g2p tree initialized")); 526 return PICO_OK; 527 } 528 529 530 static pico_status_t kdtPhrInitialize(register picoknow_KnowledgeBase this, 531 picoos_Common common) { 532 pico_status_t status; 533 kdtphr_subobj_t *dtphr; 534 kdt_subobj_t *dt; 535 picoos_uint8 i; 536 537 if (NULL == this || NULL == this->subObj) { 538 return picoos_emRaiseException(common->em, PICO_EXC_KB_MISSING, 539 NULL, NULL); 540 } 541 dtphr = (kdtphr_subobj_t *)this->subObj; 542 dt = &(dtphr->dt); 543 dt->type = PICOKDT_KDTTYPE_PHR; 544 if ((status = kdtDtInitialize(this, common,dt)) != PICO_OK) { 545 return status; 546 } 547 548 if ((status = kdtDtCheck(this, common, dt, PICOKDT_NRATT_PHR, 549 PICOKDT_NRINPMT_PHR, PICOKDT_NROUTMT_PHR, 550 PICOKDT_MTTYPE_WORD)) != PICO_OK) { 551 return status; 552 } 553 554 /* init spezialized subobj part */ 555 for (i = 0; i < PICOKDT_NRATT_PHR; i++) { 556 dtphr->invec[i] = 0; 557 } 558 dtphr->inveclen = 0; 559 PICODBG_DEBUG(("phr tree initialized")); 560 return PICO_OK; 561 } 562 563 564 static pico_status_t kdtAccInitialize(register picoknow_KnowledgeBase this, 565 picoos_Common common) { 566 pico_status_t status; 567 kdtacc_subobj_t *dtacc; 568 kdt_subobj_t *dt; 569 picoos_uint8 i; 570 571 if (NULL == this || NULL == this->subObj) { 572 return picoos_emRaiseException(common->em, PICO_EXC_KB_MISSING, 573 NULL, NULL); 574 } 575 dtacc = (kdtacc_subobj_t *)this->subObj; 576 dt = &(dtacc->dt); 577 dt->type = PICOKDT_KDTTYPE_ACC; 578 if ((status = kdtDtInitialize(this, common, dt)) != PICO_OK) { 579 return status; 580 } 581 582 if ((status = kdtDtCheck(this, common, dt, PICOKDT_NRATT_ACC, 583 PICOKDT_NRINPMT_ACC, PICOKDT_NROUTMT_ACC, 584 PICOKDT_MTTYPE_WORD)) != PICO_OK) { 585 return status; 586 } 587 588 /* init spezialized subobj part */ 589 for (i = 0; i < PICOKDT_NRATT_ACC; i++) { 590 dtacc->invec[i] = 0; 591 } 592 dtacc->inveclen = 0; 593 PICODBG_DEBUG(("acc tree initialized")); 594 return PICO_OK; 595 } 596 597 598 static pico_status_t kdtPamInitialize(register picoknow_KnowledgeBase this, 599 picoos_Common common) { 600 pico_status_t status; 601 kdtpam_subobj_t *dtpam; 602 kdt_subobj_t *dt; 603 picoos_uint8 i; 604 605 if (NULL == this || NULL == this->subObj) { 606 return picoos_emRaiseException(common->em, PICO_EXC_KB_MISSING, 607 NULL, NULL); 608 } 609 dtpam = (kdtpam_subobj_t *)this->subObj; 610 dt = &(dtpam->dt); 611 dt->type = PICOKDT_KDTTYPE_PAM; 612 if ((status = kdtDtInitialize(this, common, dt)) != PICO_OK) { 613 return status; 614 } 615 616 if ((status = kdtDtCheck(this, common, dt, PICOKDT_NRATT_PAM, 617 PICOKDT_NRINPMT_PAM, PICOKDT_NROUTMT_PAM, 618 PICOKDT_MTTYPE_WORD)) != PICO_OK) { 619 return status; 620 } 621 622 /* init spezialized subobj part */ 623 for (i = 0; i < PICOKDT_NRATT_PAM; i++) { 624 dtpam->invec[i] = 0; 625 } 626 dtpam->inveclen = 0; 627 PICODBG_DEBUG(("pam tree initialized")); 628 return PICO_OK; 629 } 630 631 632 static pico_status_t kdtSubObjDeallocate(register picoknow_KnowledgeBase this, 633 picoos_MemoryManager mm) { 634 if (NULL != this) { 635 picoos_deallocate(mm, (void *) &this->subObj); 636 } 637 return PICO_OK; 638 } 639 640 641 /* we don't offer a specialized constructor for a *KnowledgeBase but 642 * instead a "specializer" of an allready existing generic 643 * picoknow_KnowledgeBase */ 644 645 pico_status_t picokdt_specializeDtKnowledgeBase(picoknow_KnowledgeBase this, 646 picoos_Common common, 647 const picokdt_kdttype_t kdttype) { 648 pico_status_t status; 649 650 if (NULL == this) { 651 return picoos_emRaiseException(common->em, PICO_EXC_KB_MISSING, 652 NULL, NULL); 653 } 654 this->subDeallocate = kdtSubObjDeallocate; 655 switch (kdttype) { 656 case PICOKDT_KDTTYPE_POSP: 657 this->subObj = picoos_allocate(common->mm,sizeof(kdtposp_subobj_t)); 658 if (NULL == this->subObj) { 659 return picoos_emRaiseException(common->em, PICO_EXC_OUT_OF_MEM, 660 NULL, NULL); 661 } 662 status = kdtPosPInitialize(this, common); 663 break; 664 case PICOKDT_KDTTYPE_POSD: 665 this->subObj = picoos_allocate(common->mm,sizeof(kdtposd_subobj_t)); 666 if (NULL == this->subObj) { 667 return picoos_emRaiseException(common->em, PICO_EXC_OUT_OF_MEM, 668 NULL, NULL); 669 } 670 status = kdtPosDInitialize(this, common); 671 break; 672 case PICOKDT_KDTTYPE_G2P: 673 this->subObj = picoos_allocate(common->mm,sizeof(kdtg2p_subobj_t)); 674 if (NULL == this->subObj) { 675 return picoos_emRaiseException(common->em, PICO_EXC_OUT_OF_MEM, 676 NULL, NULL); 677 } 678 status = kdtG2PInitialize(this, common); 679 break; 680 case PICOKDT_KDTTYPE_PHR: 681 this->subObj = picoos_allocate(common->mm,sizeof(kdtphr_subobj_t)); 682 if (NULL == this->subObj) { 683 return picoos_emRaiseException(common->em, PICO_EXC_OUT_OF_MEM, 684 NULL, NULL); 685 } 686 status = kdtPhrInitialize(this, common); 687 break; 688 case PICOKDT_KDTTYPE_ACC: 689 this->subObj = picoos_allocate(common->mm,sizeof(kdtacc_subobj_t)); 690 if (NULL == this->subObj) { 691 return picoos_emRaiseException(common->em, PICO_EXC_OUT_OF_MEM, 692 NULL, NULL); 693 } 694 status = kdtAccInitialize(this, common); 695 break; 696 case PICOKDT_KDTTYPE_PAM: 697 this->subObj = picoos_allocate(common->mm,sizeof(kdtpam_subobj_t)); 698 if (NULL == this->subObj) { 699 return picoos_emRaiseException(common->em, PICO_EXC_OUT_OF_MEM, 700 NULL, NULL); 701 } 702 status = kdtPamInitialize(this, common); 703 break; 704 default: 705 return picoos_emRaiseException(common->em, PICO_ERR_OTHER, 706 NULL, NULL); 707 } 708 709 if (status != PICO_OK) { 710 picoos_deallocate(common->mm, (void *) &this->subObj); 711 return picoos_emRaiseException(common->em, status, NULL, NULL); 712 } 713 return PICO_OK; 714 } 715 716 717 /* ************************************************************/ 718 /* decision tree getDt* */ 719 /* ************************************************************/ 720 721 picokdt_DtPosP picokdt_getDtPosP(picoknow_KnowledgeBase this) { 722 return ((NULL == this) ? NULL : ((picokdt_DtPosP) this->subObj)); 723 } 724 725 picokdt_DtPosD picokdt_getDtPosD(picoknow_KnowledgeBase this) { 726 return ((NULL == this) ? NULL : ((picokdt_DtPosD) this->subObj)); 727 } 728 729 picokdt_DtG2P picokdt_getDtG2P (picoknow_KnowledgeBase this) { 730 return ((NULL == this) ? NULL : ((picokdt_DtG2P) this->subObj)); 731 } 732 733 picokdt_DtPHR picokdt_getDtPHR (picoknow_KnowledgeBase this) { 734 return ((NULL == this) ? NULL : ((picokdt_DtPHR) this->subObj)); 735 } 736 737 picokdt_DtACC picokdt_getDtACC (picoknow_KnowledgeBase this) { 738 return ((NULL == this) ? NULL : ((picokdt_DtACC) this->subObj)); 739 } 740 741 picokdt_DtPAM picokdt_getDtPAM (picoknow_KnowledgeBase this) { 742 return ((NULL == this) ? NULL : ((picokdt_DtPAM) this->subObj)); 743 } 744 745 746 747 /* ************************************************************/ 748 /* decision tree support functions, tree */ 749 /* ************************************************************/ 750 751 752 typedef enum { 753 eQuestion = 0, /* index to #bits to identify question */ 754 eDecide = 1 /* index to #bits to identify decision */ 755 } kdt_vfields_ind_t; 756 757 typedef enum { 758 eForkCount = 0, /* index to #bits for number of forks */ 759 eBitNo = 1, /* index to #bits for index of 1st element */ 760 eBitCount = 2, /* index to #bits for size of the group */ 761 eJump = 3, /* index to #bits for offset to reach output node */ 762 eCut = 4 /* for contin. node: #bits for threshold checked */ 763 } kdt_qfields_ind_t; 764 765 typedef enum { 766 eNTerminal = 0, 767 eNBinary = 1, 768 eNContinuous = 2, 769 eNDiscrete = 3 770 } kdt_nodetypes_t; 771 772 typedef enum { 773 eOneValue = 0, 774 eTwoValues = 1, 775 eWithoutBitMask = 2, 776 eBitMask = 3 777 } kdt_subsettypes_t; 778 779 780 /* Name : kdt_jump 781 Function: maps the iJump offset to byte + bit coordinates 782 Input : iJump absolute bit offset (0..(nr-bytes-treebody)*8) 783 Output : iByteNo the first byte containing the bits to extract 784 (0..(nr-bytes-treebody)) 785 iBitNo the first bit to be extracted (0..7) 786 Returns : void 787 Notes : updates the iByteNo + iBitNo fields 788 */ 789 static void kdt_jump(const picoos_uint32 iJump, 790 picoos_uint32 *iByteNo, 791 picoos_int8 *iBitNo) { 792 picoos_uint32 iByteSize; 793 794 iByteSize = (iJump / 8 ); 795 *iBitNo = (iJump - (iByteSize * 8)) + (7 - *iBitNo); 796 *iByteNo += iByteSize; 797 if (*iBitNo >= 8) { 798 (*iByteNo)++; 799 *iBitNo = 15 - *iBitNo; 800 } else { 801 *iBitNo = 7 - *iBitNo; 802 } 803 } 804 805 806 /* replaced inline for speedup */ 807 /* Name : kdtIsVal 808 Function: Returns the binary value of the bit pointed to by iByteNo, iBitNo 809 Input : iByteNo ofsset to the byte containing the bits to extract 810 (0..sizeof(treebody)) 811 iBitNo ofsset to the first bit to be extracted (0..7) 812 Returns : 0/1 depending on the bit pointed to 813 */ 814 /* 815 static picoos_uint8 kdtIsVal(register kdt_subobj_t *this, 816 picoos_uint32 iByteNo, 817 picoos_int8 iBitNo) { 818 return ((this->treebody[iByteNo] & ((1)<<iBitNo)) > 0); 819 } 820 */ 821 822 823 /* @todo : consider replacing inline for speedup */ 824 825 /* Name : kdtGetQFieldsVal (was: m_QuestDependentFields) 826 Function: gets a byte from qfields 827 Input : this handle to a dt subobj 828 attind index of the attribute 829 qind index of the byte to be read 830 Returns : the requested byte 831 Notes : check that attind < this->nrattributes needed before calling 832 this function! 833 */ 834 static picoos_uint8 kdtGetQFieldsVal(register kdt_subobj_t *this, 835 const picoos_uint8 attind, 836 const kdt_qfields_ind_t qind) { 837 /* check of qind done in initialize and (for some compilers) with typing */ 838 /* check of attind needed before calling this function */ 839 return this->qfields[(attind * PICOKDT_NODEINFO_NRQFIELDS) + qind]; 840 } 841 842 843 /* Name : kdtGetShiftVal (was: get_shift_value) 844 Function: returns the (treebody) value pointed to by iByteNo, iBitNo, 845 and with size iSize 846 Input : this reference to the processing unit struct 847 iSize number of bits to be extracted (0..N) 848 iByteNo ofsset to the byte containing the bits to extract 849 (0..sizeof(treebody)) 850 iBitNo ofsset to the first bit to be extracted (0..7) 851 Returns : the value requested (if size==0 --> 0 is returned) 852 */ 853 /* 854 static picoos_uint32 orig_kdtGetShiftVal(register kdt_subobj_t *this, 855 const picoos_int16 iSize, 856 picoos_uint32 *iByteNo, 857 picoos_int8 *iBitNo) { 858 picoos_uint32 iVal; 859 picoos_int16 i; 860 861 iVal = 0; 862 for (i = iSize-1; i >= 0; i--) { 863 if ( (this->treebody[*iByteNo] & ((1)<<(*iBitNo))) > 0) { 864 iVal |= ( (1) << i ); 865 } 866 (*iBitNo)--; 867 if (*iBitNo < 0) { 868 *iBitNo = 7; 869 (*iByteNo)++; 870 } 871 } 872 return iVal; 873 } 874 */ 875 /* refactor */ 876 static picoos_uint32 kdtGetShiftVal(register kdt_subobj_t *this, 877 const picoos_int16 iSize, picoos_uint32 *iByteNo, picoos_int8 *iBitNo) 878 { 879 picoos_uint32 v, b, iVal; 880 picoos_int16 i, j, len; 881 picoos_uint8 val; 882 883 if (iSize < 4) { 884 iVal = 0; 885 for (i = iSize - 1; i >= 0; i--) { 886 /* no check that *iByteNo is within valid treebody range */ 887 if ((this->treebody[*iByteNo] & ((1) << (*iBitNo))) > 0) { 888 iVal |= ((1) << i); 889 } 890 (*iBitNo)--; 891 if (*iBitNo < 0) { 892 *iBitNo = 7; 893 (*iByteNo)++; 894 } 895 } 896 return iVal; 897 } 898 899 b = *iByteNo; 900 j = *iBitNo; 901 len = iSize; 902 *iBitNo = j - iSize; 903 v = 0; 904 while (*iBitNo < 0) { 905 *iBitNo += 8; 906 (*iByteNo)++; 907 } 908 909 val = this->treebody[b++]; 910 if (j < 7) { 911 switch (j) { 912 case 0: 913 val &= 0x01; 914 break; 915 case 1: 916 val &= 0x03; 917 break; 918 case 2: 919 val &= 0x07; 920 break; 921 case 3: 922 val &= 0x0f; 923 break; 924 case 4: 925 val &= 0x1f; 926 break; 927 case 5: 928 val &= 0x3f; 929 break; 930 case 6: 931 val &= 0x7f; 932 break; 933 } 934 } 935 len -= j + 1; 936 if (len < 0) { 937 val >>= -len; 938 } 939 v = val; 940 while (len > 0) { 941 if (len >= 8) { 942 j = 8; 943 } else { 944 j = len; 945 } 946 v <<= j; 947 val = this->treebody[b++]; 948 if (j < 8) { 949 switch (j) { 950 case 1: 951 val &= 0x80; 952 val >>= 7; 953 break; 954 case 2: 955 val &= 0xc0; 956 val >>= 6; 957 break; 958 case 3: 959 val &= 0xe0; 960 val >>= 5; 961 break; 962 case 4: 963 val &= 0xf0; 964 val >>= 4; 965 break; 966 case 5: 967 val &= 0xf8; 968 val >>= 3; 969 break; 970 case 6: 971 val &= 0xfc; 972 val >>= 2; 973 break; 974 case 7: 975 val &= 0xfe; 976 val >>= 1; 977 break; 978 } 979 } 980 v |= val; 981 len -= j; 982 } 983 return v; 984 } 985 986 987 /* Name : kdtAskTree 988 Function: Tree Traversal routine 989 Input : iByteNo ofsset to the first byte containing the bits 990 to extract (0..sizeof(treebody)) 991 iBitNo ofsset to the first bit to be extracted (0..7) 992 Returns : >0 continue, no solution yet found 993 =0 solution found 994 <0 error, no solution found 995 Notes : 996 */ 997 static picoos_int8 kdtAskTree(register kdt_subobj_t *this, 998 picoos_uint16 *invec, 999 const kdt_nratt_t invecmax, 1000 picoos_uint32 *iByteNo, 1001 picoos_int8 *iBitNo) { 1002 picoos_uint32 iNodeType; 1003 picoos_uint8 iQuestion; 1004 picoos_int32 iVal; 1005 picoos_int32 iForks; 1006 picoos_int32 iID; 1007 1008 picoos_int32 iCut, iSubsetType, iBitPos, iBitCount, iPos, iJump, iDecision; 1009 picoos_int32 i; 1010 picoos_char iIsDecide; 1011 1012 PICODBG_TRACE(("start")); 1013 1014 /* get node type, value should be in kdt_nodetype_t range */ 1015 iNodeType = kdtGetShiftVal(this, PICOKDT_NODETYPE_NRBITS, iByteNo, iBitNo); 1016 PICODBG_TRACE(("iNodeType: %d", iNodeType)); 1017 1018 /* get attribute to be used in question, check if in range, and get val */ 1019 /* check of vfields argument done in initialize */ 1020 iQuestion = kdtGetShiftVal(this, this->vfields[eQuestion], iByteNo, iBitNo); 1021 if ((iQuestion < this->nrattributes) && (iQuestion < invecmax)) { 1022 iVal = invec[iQuestion]; 1023 } else { 1024 this->dset = FALSE; 1025 PICODBG_TRACE(("invalid question")); 1026 return -1; /* iQuestion invalid */ 1027 } 1028 iForks = 0; 1029 iID = -1; 1030 PICODBG_TRACE(("iQuestion: %d", iQuestion)); 1031 1032 switch (iNodeType) { 1033 case eNBinary: { 1034 iForks = 2; 1035 iID = iVal; 1036 break; 1037 } 1038 case eNContinuous: { 1039 iForks = 2; 1040 iID = 1; 1041 iCut = kdtGetShiftVal(this, kdtGetQFieldsVal(this, iQuestion, eCut), 1042 iByteNo, iBitNo); /*read the threshold*/ 1043 if (iVal <= iCut) { 1044 iID = 0; 1045 } 1046 break; 1047 } 1048 case eNDiscrete: { 1049 iForks = 1050 kdtGetShiftVal(this, 1051 kdtGetQFieldsVal(this, iQuestion, eForkCount), 1052 iByteNo, iBitNo); 1053 1054 for (i = 0; i < iForks-1; i++) { 1055 iSubsetType = 1056 kdtGetShiftVal(this, PICOKDT_SUBSETTYPE_NRBITS, 1057 iByteNo, iBitNo); 1058 1059 switch (iSubsetType) { 1060 case eOneValue: { 1061 if (iID > -1) { 1062 kdt_jump(kdtGetQFieldsVal(this, iQuestion, eBitNo), 1063 iByteNo, iBitNo); 1064 break; 1065 } 1066 iBitPos = 1067 kdtGetShiftVal(this, 1068 kdtGetQFieldsVal(this, iQuestion, 1069 eBitNo), 1070 iByteNo, iBitNo); 1071 if (iVal == iBitPos) { 1072 iID = i; 1073 } 1074 break; 1075 } 1076 case eTwoValues: { 1077 if (iID > -1) { 1078 kdt_jump((kdtGetQFieldsVal(this, iQuestion, eBitNo) + 1079 kdtGetQFieldsVal(this, iQuestion, eBitCount)), 1080 iByteNo, iBitNo); 1081 break; 1082 } 1083 1084 iBitPos = 1085 kdtGetShiftVal(this, kdtGetQFieldsVal(this, iQuestion, 1086 eBitNo), 1087 iByteNo, iBitNo); 1088 iBitCount = 1089 kdtGetShiftVal(this, kdtGetQFieldsVal(this, iQuestion, 1090 eBitCount), 1091 iByteNo, iBitNo); 1092 if ((iVal == iBitPos) || (iVal == iBitCount)) { 1093 iID = i; 1094 } 1095 break; 1096 } 1097 case eWithoutBitMask: { 1098 if (iID > -1) { 1099 kdt_jump((kdtGetQFieldsVal(this, iQuestion, eBitNo) + 1100 kdtGetQFieldsVal(this, iQuestion, eBitCount)), 1101 iByteNo, iBitNo); 1102 break; 1103 } 1104 1105 iBitPos = 1106 kdtGetShiftVal(this, kdtGetQFieldsVal(this, iQuestion, 1107 eBitNo), 1108 iByteNo, iBitNo); 1109 iBitCount = 1110 kdtGetShiftVal(this, kdtGetQFieldsVal(this, iQuestion, 1111 eBitCount), 1112 iByteNo, iBitNo); 1113 if ((iVal >= iBitPos) && (iVal < (iBitPos + iBitCount))) { 1114 iID = i; 1115 } 1116 break; 1117 } 1118 case eBitMask: { 1119 iBitPos = 0; 1120 if (iID > -1) { 1121 kdt_jump(kdtGetQFieldsVal(this, iQuestion, eBitNo), 1122 iByteNo, iBitNo); 1123 } else { 1124 iBitPos = 1125 kdtGetShiftVal(this, 1126 kdtGetQFieldsVal(this, iQuestion, 1127 eBitNo), 1128 iByteNo, iBitNo); 1129 } 1130 1131 iBitCount = 1132 kdtGetShiftVal(this, 1133 kdtGetQFieldsVal(this, iQuestion, 1134 eBitCount), 1135 iByteNo, iBitNo); 1136 if (iID > -1) { 1137 kdt_jump(iBitCount, iByteNo, iBitNo); 1138 break; 1139 } 1140 1141 if ((iVal >= iBitPos) && (iVal < (iBitPos + iBitCount))) { 1142 iPos = iVal - iBitPos; 1143 kdt_jump((iVal - iBitPos), iByteNo, iBitNo); 1144 /* if (kdtIsVal(this, *iByteNo, *iBitNo))*/ 1145 if ((this->treebody[*iByteNo] & ((1)<<(*iBitNo))) > 0) { 1146 iID = i; 1147 } 1148 kdt_jump((iBitCount - (iVal-iBitPos)), iByteNo, iBitNo); 1149 } else { 1150 kdt_jump(iBitCount, iByteNo, iBitNo); 1151 } 1152 break; 1153 }/*end case eBitMask*/ 1154 }/*end switch (iSubsetType)*/ 1155 }/*end for ( i = 0; i < iForks-1; i++ ) */ 1156 1157 /*default tree branch*/ 1158 if (-1 == iID) { 1159 iID = iForks-1; 1160 } 1161 break; 1162 }/*end case eNDiscrete*/ 1163 }/*end switch (iNodeType)*/ 1164 1165 for (i = 0; i < iForks; i++) { 1166 iIsDecide = kdtGetShiftVal(this, PICOKDT_ISDECIDE_NRBITS, iByteNo, iBitNo); 1167 1168 PICODBG_TRACE(("doing forks: %d", i)); 1169 1170 if (!iIsDecide) { 1171 if (iID == i) { 1172 iJump = 1173 kdtGetShiftVal(this, kdtGetQFieldsVal(this, iQuestion, eJump), 1174 iByteNo, iBitNo); 1175 kdt_jump(iJump, iByteNo, iBitNo); 1176 this->dset = FALSE; 1177 return 1; /* to be continued, no solution yet found */ 1178 } else { 1179 kdt_jump(kdtGetQFieldsVal(this, iQuestion, eJump), 1180 iByteNo, iBitNo); 1181 } 1182 } else { 1183 if (iID == i) { 1184 /* check of vfields argument done in initialize */ 1185 iDecision = kdtGetShiftVal(this, this->vfields[eDecide], 1186 iByteNo, iBitNo); 1187 this->dclass = iDecision; 1188 this->dset = TRUE; 1189 return 0; /* solution found */ 1190 } else { 1191 /* check of vfields argument done in initialize */ 1192 kdt_jump(this->vfields[eDecide], iByteNo, iBitNo); 1193 } 1194 }/*end if (!iIsDecide)*/ 1195 }/*end for (i = 0; i < iForks; i++ )*/ 1196 1197 this->dset = FALSE; 1198 PICODBG_TRACE(("problem determining class")); 1199 return -1; /* solution not found, problem determining a class */ 1200 } 1201 1202 1203 1204 /* ************************************************************/ 1205 /* decision tree support functions, mappings */ 1206 /* ************************************************************/ 1207 1208 1209 /* size==1 -> MapInByte, size==2 -> MapInWord, 1210 size determined from table type contained in kb. 1211 if the inmaptable is empty, outval = inval */ 1212 1213 static picoos_uint8 kdtMapInFixed(const kdt_subobj_t *dt, 1214 const picoos_uint8 imtnr, 1215 const picoos_uint16 inval, 1216 picoos_uint16 *outval, 1217 picoos_uint16 *outfallbackval) { 1218 picoos_uint8 size; 1219 picoos_uint32 pos; 1220 picoos_uint16 lentable; 1221 picoos_uint16 posbound; 1222 picoos_uint16 i; 1223 1224 *outval = 0; 1225 *outfallbackval = 0; 1226 1227 size = 0; 1228 pos = 0; 1229 1230 /* check what can be checked */ 1231 if (imtnr >= dt->inpmaptable[pos++]) { /* outside tablenr range? */ 1232 PICODBG_ERROR(("check failed: nrtab: %d, imtnr: %d", 1233 dt->inpmaptable[pos-1], imtnr)); 1234 return FALSE; 1235 } 1236 1237 /* go forward to the needed tablenr */ 1238 if (imtnr > 0) { 1239 pos = dt->beg_offset[imtnr]; 1240 } 1241 1242 /* get length */ 1243 lentable = ((picoos_uint16)(dt->inpmaptable[pos+1])) << 8 | 1244 dt->inpmaptable[pos]; 1245 posbound = pos + lentable; 1246 pos += 2; 1247 1248 /* check type of table and set size */ 1249 if (dt->inpmaptable[pos] == PICOKDT_MTTYPE_EMPTY) { 1250 /* empty table no mapping needed */ 1251 PICODBG_TRACE(("empty table: %d", imtnr)); 1252 *outval = inval; 1253 return TRUE; 1254 } else if (dt->inpmaptable[pos] == PICOKDT_MTTYPE_BYTE) { 1255 size = 1; 1256 } else if (dt->inpmaptable[pos] == PICOKDT_MTTYPE_WORD) { 1257 size = 2; 1258 } else { 1259 /* wrong table type */ 1260 PICODBG_ERROR(("wrong table type %d", dt->inpmaptable[pos])); 1261 return FALSE; 1262 } 1263 pos++; 1264 1265 /* set fallback value in case of failed mapping, and set upper bound pos */ 1266 *outfallbackval = ((picoos_uint16)(dt->inpmaptable[pos+1])) << 8 | 1267 dt->inpmaptable[pos]; 1268 pos += 2; 1269 1270 /* size must be 1 or 2 here, keep 'redundant' so save time */ 1271 if (size == 1) { 1272 for (i = 0; (i < *outfallbackval) && (pos < posbound); i++) { 1273 if (inval == dt->inpmaptable[pos]) { 1274 *outval = i; 1275 PICODBG_TRACE(("s1 %d in %d -> out %d", imtnr, inval, *outval)); 1276 return TRUE; 1277 } 1278 pos++; 1279 } 1280 } else if (size == 2) { 1281 posbound--; 1282 for (i = 0; (i < *outfallbackval) && (pos < posbound); i++) { 1283 if (inval == (((picoos_uint16)(dt->inpmaptable[pos+1])) << 8 | 1284 dt->inpmaptable[pos])) { 1285 *outval = i; 1286 PICODBG_TRACE(("s2 %d in %d -> out %d", imtnr, inval, *outval)); 1287 return TRUE; 1288 } 1289 pos += 2; 1290 } 1291 } else { 1292 /* impossible size */ 1293 PICODBG_ERROR(("wrong size %d", size)); 1294 return FALSE; 1295 } 1296 1297 PICODBG_DEBUG(("no mapping found, fallback: %d", *outfallbackval)); 1298 return FALSE; 1299 } 1300 1301 1302 static picoos_uint8 kdtMapInGraph(const kdt_subobj_t *dt, 1303 const picoos_uint8 imtnr, 1304 const picoos_uint8 *inval, 1305 const picoos_uint8 invalmaxlen, 1306 picoos_uint16 *outval, 1307 picoos_uint16 *outfallbackval) { 1308 picoos_uint8 ilen; 1309 picoos_uint8 tlen; 1310 picoos_uint8 cont; 1311 picoos_uint32 pos; 1312 picoos_uint16 lentable; 1313 picoos_uint16 posbound; 1314 picoos_uint16 i; 1315 picoos_uint8 j; 1316 1317 *outfallbackval = 0; 1318 1319 pos = 0; 1320 /* check what can be checked */ 1321 if ((imtnr >= dt->inpmaptable[pos++]) || /* outside tablenr range? */ 1322 (invalmaxlen == 0) || /* too short? */ 1323 ((ilen = picobase_det_utf8_length(inval[0])) == 0) || /* invalid? */ 1324 (ilen > invalmaxlen)) { /* not accessible? */ 1325 PICODBG_ERROR(("check failed: nrtab: %d, imtnr: %d, invalmaxlen: %d, " 1326 "ilen: %d", 1327 dt->inpmaptable[pos-1], imtnr, invalmaxlen, ilen)); 1328 return FALSE; 1329 } 1330 1331 /* go forward to the needed tablenr */ 1332 for (i = 0; i < imtnr; i++) { 1333 lentable = ((picoos_uint16)(dt->inpmaptable[pos+1])) << 8 | 1334 dt->inpmaptable[pos]; 1335 pos += lentable; 1336 } 1337 1338 /* get length and check type of inpmaptable */ 1339 lentable = ((picoos_uint16)(dt->inpmaptable[pos+1])) << 8 | 1340 dt->inpmaptable[pos]; 1341 posbound = pos + lentable; 1342 pos += 2; 1343 1344 #if defined(PICO_DEBUG) 1345 if (1) { 1346 int id; 1347 PICODBG_TRACE(("imtnr %d", imtnr)); 1348 for (id = pos-2; id < posbound; id++) { 1349 PICODBG_TRACE(("imtbyte pos %d, %c %d", id - (pos-2), 1350 dt->inpmaptable[id], dt->inpmaptable[id])); 1351 } 1352 } 1353 #endif 1354 1355 /* check type of table */ 1356 if (dt->inpmaptable[pos] != PICOKDT_MTTYPE_GRAPH) { 1357 /* empty table does not make sense for graph */ 1358 /* wrong table type */ 1359 PICODBG_ERROR(("wrong table type")); 1360 return FALSE; 1361 } 1362 pos++; 1363 1364 /* set fallback value in case of failed mapping, and set upper bound pos */ 1365 *outfallbackval = ((picoos_uint16)(dt->inpmaptable[pos+1])) << 8 | 1366 dt->inpmaptable[pos]; 1367 pos += 2; 1368 1369 /* sequential search */ 1370 for (i = 0; (i < *outfallbackval) && (pos < posbound); i++) { 1371 tlen = picobase_det_utf8_length(dt->inpmaptable[pos]); 1372 if ((pos + tlen) > posbound) { 1373 PICODBG_ERROR(("trying outside imt, posb: %d, pos: %d, tlen: %d", 1374 posbound, pos, tlen)); 1375 return FALSE; 1376 } 1377 if (ilen == tlen) { 1378 cont = TRUE; 1379 for (j = 0; cont && (j < ilen); j++) { 1380 if (dt->inpmaptable[pos + j] != inval[j]) { 1381 cont = FALSE; 1382 } 1383 } 1384 if (cont && (j == ilen)) { /* match found */ 1385 *outval = i; 1386 PICODBG_TRACE(("found mapval, posb %d, pos %d, i %d, tlen %d", 1387 posbound, pos, i, tlen)); 1388 return TRUE; 1389 } 1390 } 1391 pos += tlen; 1392 } 1393 PICODBG_DEBUG(("outside imt %d, posb/pos/i: %d/%d/%d, fallback: %d", 1394 imtnr, posbound, pos, i, *outfallbackval)); 1395 return FALSE; 1396 } 1397 1398 1399 /* size==1 -> MapOutByte, size==2 -> MapOutWord */ 1400 static picoos_uint8 kdtMapOutFixed(const kdt_subobj_t *dt, 1401 const picoos_uint16 inval, 1402 picoos_uint16 *outval) { 1403 picoos_uint8 size; 1404 picoos_uint16 nr; 1405 1406 /* no check of lentable vs. nr in initialize done */ 1407 1408 size = 0; 1409 1410 /* type */ 1411 nr = dt->outmaptable[PICOKDT_MTPOS_START + PICOKDT_MTPOS_TABLETYPE]; 1412 1413 /* check type of table and set size */ 1414 if (nr == PICOKDT_MTTYPE_EMPTY) { 1415 /* empty table no mapping needed */ 1416 PICODBG_TRACE(("empty table")); 1417 *outval = inval; 1418 return TRUE; 1419 } else if (nr == PICOKDT_MTTYPE_BYTE) { 1420 size = 1; 1421 } else if (nr == PICOKDT_MTTYPE_WORD) { 1422 size = 2; 1423 } else { 1424 /* wrong table type */ 1425 PICODBG_ERROR(("wrong table type %d", nr)); 1426 return FALSE; 1427 } 1428 1429 /* number of mapvalues */ 1430 nr = ((picoos_uint16)(dt->outmaptable[PICOKDT_MTPOS_START + 1431 PICOKDT_MTPOS_NUMBER + 1])) << 8 1432 | dt->outmaptable[PICOKDT_MTPOS_START + PICOKDT_MTPOS_NUMBER]; 1433 1434 if (inval < nr) { 1435 if (size == 1) { 1436 *outval = dt->outmaptable[PICOKDT_MTPOS_START + 1437 PICOKDT_MTPOS_MAPSTART + (size * inval)]; 1438 } else { 1439 *outval = ((picoos_uint16)(dt->outmaptable[PICOKDT_MTPOS_START + 1440 PICOKDT_MTPOS_MAPSTART + (size * inval) + 1])) << 8 1441 | dt->outmaptable[PICOKDT_MTPOS_START + 1442 PICOKDT_MTPOS_MAPSTART + (size * inval)]; 1443 } 1444 return TRUE; 1445 } else { 1446 *outval = 0; 1447 return FALSE; 1448 } 1449 } 1450 1451 1452 /* size==1 -> ReverseMapOutByte, size==2 -> ReverseMapOutWord */ 1453 /* outmaptable also used to map from decoded tree output domain to 1454 direct tree output domain */ 1455 static picoos_uint8 kdtReverseMapOutFixed(const kdt_subobj_t *dt, 1456 const picoos_uint16 inval, 1457 picoos_uint16 *outval, 1458 picoos_uint16 *outfallbackval) { 1459 picoos_uint8 size; 1460 picoos_uint32 pos; 1461 picoos_uint16 lentable; 1462 picoos_uint16 posbound; 1463 picoos_uint16 i; 1464 1465 /* no check of lentable vs. nr in initialize done */ 1466 1467 size = 0; 1468 pos = 0; 1469 *outval = 0; 1470 *outfallbackval = 0; 1471 1472 if (dt->outmaptable == NULL) { 1473 /* empty table no mapping needed */ 1474 PICODBG_TRACE(("empty table")); 1475 *outval = inval; 1476 return TRUE; 1477 } 1478 1479 /* check what can be checked */ 1480 if (dt->outmaptable[pos++] != 1) { /* only one omt possible */ 1481 PICODBG_ERROR(("check failed: nrtab: %d", dt->outmaptable[pos-1])); 1482 return FALSE; 1483 } 1484 1485 /* get length */ 1486 lentable = ((picoos_uint16)(dt->outmaptable[pos+1])) << 8 | 1487 dt->outmaptable[pos]; 1488 posbound = pos + lentable; 1489 pos += 2; 1490 1491 /* check type of table and set size */ 1492 /* if (dt->outmaptable[pos] == PICOKDT_MTTYPE_EMPTY), in 1493 ...Initialize the omt is set to NULL if not existing, checked 1494 above */ 1495 1496 if (dt->outmaptable[pos] == PICOKDT_MTTYPE_BYTE) { 1497 size = 1; 1498 } else if (dt->outmaptable[pos] == PICOKDT_MTTYPE_WORD) { 1499 size = 2; 1500 } else { 1501 /* wrong table type */ 1502 PICODBG_ERROR(("wrong table type %d", dt->outmaptable[pos])); 1503 return FALSE; 1504 } 1505 pos++; 1506 1507 /* set fallback value in case of failed mapping, and set upper bound pos */ 1508 *outfallbackval = ((picoos_uint16)(dt->outmaptable[pos+1])) << 8 | 1509 dt->outmaptable[pos]; 1510 pos += 2; 1511 1512 /* size must be 1 or 2 here, keep 'redundant' so save time */ 1513 if (size == 1) { 1514 for (i = 0; (i < *outfallbackval) && (pos < posbound); i++) { 1515 if (inval == dt->outmaptable[pos]) { 1516 *outval = i; 1517 PICODBG_TRACE(("s1 inval %d -> outval %d", inval, *outval)); 1518 return TRUE; 1519 } 1520 pos++; 1521 } 1522 } else if (size == 2) { 1523 posbound--; 1524 for (i = 0; (i < *outfallbackval) && (pos < posbound); i++) { 1525 if (inval == (((picoos_uint16)(dt->outmaptable[pos+1])) << 8 | 1526 dt->outmaptable[pos])) { 1527 *outval = i; 1528 PICODBG_TRACE(("s2 inval %d -> outval %d", inval, *outval)); 1529 return TRUE; 1530 } 1531 pos += 2; 1532 } 1533 } else { 1534 /* impossible size */ 1535 PICODBG_ERROR(("wrong size %d", size)); 1536 return FALSE; 1537 } 1538 1539 PICODBG_DEBUG(("no mapping found, fallback: %d", *outfallbackval)); 1540 return FALSE; 1541 } 1542 1543 1544 picoos_uint8 picokdt_dtPosDreverseMapOutFixed(const picokdt_DtPosD this, 1545 const picoos_uint16 inval, 1546 picoos_uint16 *outval, 1547 picoos_uint16 *outfallbackval) { 1548 1549 kdtposd_subobj_t * dtposd = (kdtposd_subobj_t *)this; 1550 kdt_subobj_t * dt = &(dtposd->dt); 1551 return kdtReverseMapOutFixed(dt,inval, outval, outfallbackval); 1552 } 1553 1554 /* not yet impl. size==1 -> MapOutByteToVar, 1555 fix: size==2 -> MapOutWordToVar */ 1556 static picoos_uint8 kdtMapOutVar(const kdt_subobj_t *dt, 1557 const picoos_uint16 inval, 1558 picoos_uint8 *nr, 1559 picoos_uint16 *outval, 1560 const picoos_uint16 outvalmaxlen) { 1561 picoos_uint16 pos; 1562 picoos_uint16 off2ind; 1563 picoos_uint16 lentable; 1564 picoos_uint16 nrinbytes; 1565 picoos_uint8 size; 1566 picoos_uint16 offset1; 1567 picoos_uint16 i; 1568 1569 if (dt->outmaptable == NULL) { 1570 /* empty table not possible */ 1571 PICODBG_ERROR(("no table found")); 1572 return FALSE; 1573 } 1574 1575 /* nr of tables == 1 already checked in *Initialize, no need here, go 1576 directly to position 1 */ 1577 pos = 1; 1578 1579 /* get length of table */ 1580 lentable = (((picoos_uint16)(dt->outmaptable[pos + 1])) << 8 | 1581 dt->outmaptable[pos]); 1582 pos += 2; 1583 1584 /* check table type */ 1585 if (dt->outmaptable[pos] != PICOKDT_MTTYPE_BYTETOVAR) { 1586 /* wrong table type */ 1587 PICODBG_ERROR(("wrong table type %d", dt->outmaptable[pos])); 1588 return FALSE; 1589 } 1590 size = 2; 1591 pos++; 1592 1593 /* get nr of ele in maptable (= nr of possible invals) */ 1594 nrinbytes = (((picoos_uint16)(dt->outmaptable[pos+1])) << 8 | 1595 dt->outmaptable[pos]); 1596 pos += 2; 1597 1598 /* check what's checkable */ 1599 if (nrinbytes == 0) { 1600 PICODBG_ERROR(("table with length zero")); 1601 return FALSE; 1602 } else if (inval >= nrinbytes) { 1603 PICODBG_ERROR(("inval %d outside valid range %d", inval, nrinbytes)); 1604 return FALSE; 1605 } 1606 1607 PICODBG_TRACE(("inval %d, lentable %d, nrinbytes %d, pos %d", inval, 1608 lentable, nrinbytes, pos)); 1609 1610 /* set off2ind to the position of the start of offset2-val */ 1611 /* offset2 points to start of next ele */ 1612 off2ind = pos + (size*inval); 1613 1614 /* get number of output values, offset2 - offset1 */ 1615 if (inval == 0) { 1616 offset1 = 0; 1617 } else { 1618 offset1 = (((picoos_uint16)(dt->outmaptable[off2ind - 1])) << 8 | 1619 dt->outmaptable[off2ind - 2]); 1620 } 1621 *nr = (((picoos_uint16)(dt->outmaptable[off2ind + 1])) << 8 | 1622 dt->outmaptable[off2ind]) - offset1; 1623 1624 PICODBG_TRACE(("offset1 %d, nr %d, pos %d", offset1, *nr, pos)); 1625 1626 /* set pos to position of 1st value being mapped to */ 1627 pos += (size * nrinbytes) + offset1; 1628 1629 if ((pos + *nr - 1) > lentable) { 1630 /* outside table, should not happen */ 1631 PICODBG_ERROR(("problem with table index, pos %d, nr %d, len %d", 1632 pos, *nr, lentable)); 1633 return FALSE; 1634 } 1635 if (*nr > outvalmaxlen) { 1636 /* not enough space in outval */ 1637 PICODBG_ERROR(("overflow in outval, %d > %d", *nr, outvalmaxlen)); 1638 return FALSE; 1639 } 1640 1641 /* finally, copy outmap result to outval */ 1642 for (i = 0; i < *nr; i++) { 1643 outval[i] = dt->outmaptable[pos++]; 1644 } 1645 return TRUE; 1646 } 1647 1648 1649 1650 /* ************************************************************/ 1651 /* decision tree POS prediction (PosP) functions */ 1652 /* ************************************************************/ 1653 1654 /* number of prefix and suffix graphemes used to construct the input vector */ 1655 #define KDT_POSP_NRGRAPHPREFATT 4 1656 #define KDT_POSP_NRGRAPHSUFFATT 6 1657 #define KDT_POSP_NRGRAPHATT 10 1658 1659 /* positions of specgraph and nrgraphs attributes */ 1660 #define KDT_POSP_SPECGRAPHATTPOS 10 1661 #define KDT_POSP_NRGRAPHSATTPOS 11 1662 1663 1664 /* construct PosP input vector 1665 1666 PosP invec: 12 elements 1667 1668 prefix 0-3 prefix graphemes (encoded using tree inpmaptable 0-3) 1669 suffix 4-9 suffix graphemes (encoded using tree inpmaptable 4-9) 1670 isspecchar 10 is a special grapheme (e.g. hyphen) inside the word (0/1)? 1671 nr-utf-graphs 11 number of graphemes (ie. UTF8 chars) 1672 1673 if there are less than 10 graphemes, each grapheme is used only 1674 once, with the suffix having higher priority, ie. elements 0-9 are 1675 filled as follows: 1676 1677 #graph 1678 1 0 0 0 0 0 0 0 0 0 1 1679 2 0 0 0 0 0 0 0 0 1 2 1680 3 0 0 0 0 0 0 0 1 2 3 1681 4 0 0 0 0 0 0 1 2 3 4 1682 5 0 0 0 0 0 1 2 3 4 5 1683 6 0 0 0 0 1 2 3 4 5 6 1684 7 1 0 0 0 2 3 4 5 6 7 1685 8 1 2 0 0 3 4 5 6 7 8 1686 9 1 2 3 0 4 5 6 7 8 9 1687 10 1 2 3 4 5 6 7 8 9 10 1688 11 1 2 3 4 6 7 8 9 10 11 1689 ... 1690 1691 1-6: Fill chbuf 1692 7-10: front to invec 1st part, remove front, add rear 1693 >10: remove front, add rear 1694 no more graph -> 1695 while chbuflen>0: 1696 add rear to the last empty slot in 2nd part of invec, remove rear 1697 */ 1698 1699 1700 picoos_uint8 picokdt_dtPosPconstructInVec(const picokdt_DtPosP this, 1701 const picoos_uint8 *graph, 1702 const picoos_uint16 graphlen, 1703 const picoos_uint8 specgraphflag) { 1704 kdtposp_subobj_t *dtposp; 1705 1706 /* utf8 circular char buffer, used as restricted input deque */ 1707 /* 2nd part of graph invec has KDT_POSP_NRGRAPHSUFFATT elements, */ 1708 /* max of UTF8_MAXLEN bytes per utf8 char */ 1709 picoos_uint8 chbuf[KDT_POSP_NRGRAPHSUFFATT][PICOBASE_UTF8_MAXLEN]; 1710 picoos_uint8 chbrear; /* next free pos */ 1711 picoos_uint8 chbfront; /* next read pos */ 1712 picoos_uint8 chblen; /* empty=0; full=KDT_POSP_NRGRAPHSUFFATT */ 1713 1714 picoos_uint16 poscg; /* position of current graph (= utf8 char) */ 1715 picoos_uint16 lencg = 0; /* length of current grapheme */ 1716 picoos_uint16 nrutfg; /* number of utf graphemes */ 1717 picoos_uint8 invecpos; /* next element to add in invec */ 1718 picoos_uint16 fallback; /* fallback value for failed graph encodings */ 1719 picoos_uint8 i; 1720 1721 dtposp = (kdtposp_subobj_t *)this; 1722 chbrear = 0; 1723 chbfront = 0; 1724 chblen = 0; 1725 poscg = 0; 1726 nrutfg = 0; 1727 invecpos = 0; 1728 1729 PICODBG_DEBUG(("graphlen %d", graphlen)); 1730 1731 /* not needed, since all elements are set 1732 for (i = 0; i < PICOKDT_NRATT_POSP; i++) { 1733 dtposp->invec[i] = '\x63'; 1734 } 1735 */ 1736 1737 dtposp->inveclen = 0; 1738 1739 while ((poscg < graphlen) && 1740 ((lencg = picobase_det_utf8_length(graph[poscg])) > 0)) { 1741 if (chblen >= KDT_POSP_NRGRAPHSUFFATT) { /* chbuf full */ 1742 if (invecpos < KDT_POSP_NRGRAPHPREFATT) { /* prefix not full */ 1743 /* att-encode front utf graph and add in invec */ 1744 if (!kdtMapInGraph(&(dtposp->dt), invecpos, 1745 chbuf[chbfront], PICOBASE_UTF8_MAXLEN, 1746 &(dtposp->invec[invecpos]), 1747 &fallback)) { 1748 if (fallback) { 1749 dtposp->invec[invecpos] = fallback; 1750 } else { 1751 return FALSE; 1752 } 1753 } 1754 invecpos++; 1755 } 1756 /* remove front utf graph */ 1757 chbfront++; 1758 chbfront %= KDT_POSP_NRGRAPHSUFFATT; 1759 chblen--; 1760 } 1761 /* add current utf graph to chbuf */ 1762 for (i=0; i<lencg; i++) { 1763 chbuf[chbrear][i] = graph[poscg++]; 1764 } 1765 if (i < PICOBASE_UTF8_MAXLEN) { 1766 chbuf[chbrear][i] = '\0'; 1767 } 1768 chbrear++; 1769 chbrear %= KDT_POSP_NRGRAPHSUFFATT; 1770 chblen++; 1771 /* increase utf graph count */ 1772 nrutfg++; 1773 } 1774 1775 if ((lencg == 0) || (chblen == 0)) { 1776 return FALSE; 1777 } else if (chblen > 0) { 1778 1779 while (invecpos < KDT_POSP_NRGRAPHPREFATT) { /* fill up prefix */ 1780 if (!kdtMapInGraph(&(dtposp->dt), invecpos, 1781 PICOKDT_OUTSIDEGRAPH_DEFSTR, 1782 PICOKDT_OUTSIDEGRAPH_DEFLEN, 1783 &(dtposp->invec[invecpos]), &fallback)) { 1784 if (fallback) { 1785 dtposp->invec[invecpos] = fallback; 1786 } else { 1787 return FALSE; 1788 } 1789 } 1790 invecpos++; 1791 } 1792 1793 for (i = (KDT_POSP_NRGRAPHATT - 1); 1794 i >= KDT_POSP_NRGRAPHPREFATT; i--) { 1795 if (chblen > 0) { 1796 if (chbrear == 0) { 1797 chbrear = KDT_POSP_NRGRAPHSUFFATT - 1; 1798 } else { 1799 chbrear--; 1800 } 1801 if (!kdtMapInGraph(&(dtposp->dt), i, chbuf[chbrear], 1802 PICOBASE_UTF8_MAXLEN, 1803 &(dtposp->invec[i]), &fallback)) { 1804 if (fallback) { 1805 dtposp->invec[i] = fallback; 1806 } else { 1807 return FALSE; 1808 } 1809 } 1810 chblen--; 1811 } else { 1812 if (!kdtMapInGraph(&(dtposp->dt), i, 1813 PICOKDT_OUTSIDEGRAPH_DEFSTR, 1814 PICOKDT_OUTSIDEGRAPH_DEFLEN, 1815 &(dtposp->invec[i]), &fallback)) { 1816 if (fallback) { 1817 dtposp->invec[i] = fallback; 1818 } else { 1819 return FALSE; 1820 } 1821 } 1822 } 1823 } 1824 1825 /* set isSpecChar attribute, reuse var i */ 1826 i = (specgraphflag ? 1 : 0); 1827 if (!kdtMapInFixed(&(dtposp->dt), KDT_POSP_SPECGRAPHATTPOS, i, 1828 &(dtposp->invec[KDT_POSP_SPECGRAPHATTPOS]), 1829 &fallback)) { 1830 if (fallback) { 1831 dtposp->invec[KDT_POSP_SPECGRAPHATTPOS] = fallback; 1832 } else { 1833 return FALSE; 1834 } 1835 } 1836 1837 /* set nrGraphs attribute */ 1838 if (!kdtMapInFixed(&(dtposp->dt), KDT_POSP_NRGRAPHSATTPOS, nrutfg, 1839 &(dtposp->invec[KDT_POSP_NRGRAPHSATTPOS]), 1840 &fallback)) { 1841 if (fallback) { 1842 dtposp->invec[KDT_POSP_NRGRAPHSATTPOS] = fallback; 1843 } else { 1844 return FALSE; 1845 } 1846 } 1847 PICODBG_DEBUG(("posp-invec: [%d,%d,%d,%d|%d,%d,%d,%d,%d,%d|%d|%d]", 1848 dtposp->invec[0], dtposp->invec[1], dtposp->invec[2], 1849 dtposp->invec[3], dtposp->invec[4], dtposp->invec[5], 1850 dtposp->invec[6], dtposp->invec[7], dtposp->invec[8], 1851 dtposp->invec[9], dtposp->invec[10], 1852 dtposp->invec[11], dtposp->invec[12])); 1853 dtposp->inveclen = PICOKDT_NRINPMT_POSP; 1854 return TRUE; 1855 } 1856 1857 return FALSE; 1858 } 1859 1860 1861 picoos_uint8 picokdt_dtPosPclassify(const picokdt_DtPosP this) { 1862 picoos_uint32 iByteNo; 1863 picoos_int8 iBitNo; 1864 picoos_int8 rv; 1865 kdtposp_subobj_t *dtposp; 1866 kdt_subobj_t *dt; 1867 1868 dtposp = (kdtposp_subobj_t *)this; 1869 dt = &(dtposp->dt); 1870 iByteNo = 0; 1871 iBitNo = 7; 1872 while ((rv = kdtAskTree(dt, dtposp->invec, PICOKDT_NRATT_POSP, 1873 &iByteNo, &iBitNo)) > 0) { 1874 PICODBG_TRACE(("asking tree")); 1875 } 1876 PICODBG_DEBUG(("done: %d", dt->dclass)); 1877 return ((rv == 0) && dt->dset); 1878 } 1879 1880 1881 picoos_uint8 picokdt_dtPosPdecomposeOutClass(const picokdt_DtPosP this, 1882 picokdt_classify_result_t *dtres) { 1883 kdtposp_subobj_t *dtposp; 1884 picoos_uint16 val; 1885 1886 dtposp = (kdtposp_subobj_t *)this; 1887 1888 if (dtposp->dt.dset && 1889 kdtMapOutFixed(&(dtposp->dt), dtposp->dt.dclass, &val)) { 1890 dtres->set = TRUE; 1891 dtres->class = val; 1892 return TRUE; 1893 } else { 1894 dtres->set = FALSE; 1895 return FALSE; 1896 } 1897 } 1898 1899 1900 1901 /* ************************************************************/ 1902 /* decision tree POS disambiguation (PosD) functions */ 1903 /* ************************************************************/ 1904 1905 1906 picoos_uint8 picokdt_dtPosDconstructInVec(const picokdt_DtPosD this, 1907 const picoos_uint16 * input) { 1908 kdtposd_subobj_t *dtposd; 1909 picoos_uint8 i; 1910 picoos_uint16 fallback = 0; 1911 1912 dtposd = (kdtposd_subobj_t *)this; 1913 dtposd->inveclen = 0; 1914 1915 PICODBG_DEBUG(("in: [%d,%d,%d|%d|%d,%d,%d]", 1916 input[0], input[1], input[2], 1917 input[3], input[4], input[5], 1918 input[6])); 1919 for (i = 0; i < PICOKDT_NRATT_POSD; i++) { 1920 1921 /* do the imt mapping for all inval */ 1922 if (!kdtMapInFixed(&(dtposd->dt), i, input[i], 1923 &(dtposd->invec[i]), &fallback)) { 1924 if (fallback) { 1925 PICODBG_DEBUG(("*** using fallback for input mapping: %i -> %i", input[i], fallback)); 1926 dtposd->invec[i] = fallback; 1927 } else { 1928 PICODBG_ERROR(("problem doing input mapping")); 1929 return FALSE; 1930 } 1931 } 1932 } 1933 1934 PICODBG_DEBUG(("out: [%d,%d,%d|%d|%d,%d,%d]", 1935 dtposd->invec[0], dtposd->invec[1], dtposd->invec[2], 1936 dtposd->invec[3], dtposd->invec[4], dtposd->invec[5], 1937 dtposd->invec[6])); 1938 dtposd->inveclen = PICOKDT_NRINPMT_POSD; 1939 return TRUE; 1940 } 1941 1942 1943 picoos_uint8 picokdt_dtPosDclassify(const picokdt_DtPosD this, 1944 picoos_uint16 *treeout) { 1945 picoos_uint32 iByteNo; 1946 picoos_int8 iBitNo; 1947 picoos_int8 rv; 1948 kdtposd_subobj_t *dtposd; 1949 kdt_subobj_t *dt; 1950 1951 dtposd = (kdtposd_subobj_t *)this; 1952 dt = &(dtposd->dt); 1953 iByteNo = 0; 1954 iBitNo = 7; 1955 while ((rv = kdtAskTree(dt, dtposd->invec, PICOKDT_NRATT_POSD, 1956 &iByteNo, &iBitNo)) > 0) { 1957 PICODBG_TRACE(("asking tree")); 1958 } 1959 PICODBG_DEBUG(("done: %d", dt->dclass)); 1960 if ((rv == 0) && dt->dset) { 1961 *treeout = dt->dclass; 1962 return TRUE; 1963 } else { 1964 return FALSE; 1965 } 1966 } 1967 1968 1969 /* decompose the tree output and return the class in dtres 1970 dtres: POS classification result 1971 returns: TRUE if okay, FALSE otherwise 1972 */ 1973 picoos_uint8 picokdt_dtPosDdecomposeOutClass(const picokdt_DtPosD this, 1974 picokdt_classify_result_t *dtres) { 1975 kdtposd_subobj_t *dtposd; 1976 picoos_uint16 val; 1977 1978 dtposd = (kdtposd_subobj_t *)this; 1979 1980 if (dtposd->dt.dset && 1981 kdtMapOutFixed(&(dtposd->dt), dtposd->dt.dclass, &val)) { 1982 dtres->set = TRUE; 1983 dtres->class = val; 1984 return TRUE; 1985 } else { 1986 dtres->set = FALSE; 1987 return FALSE; 1988 } 1989 } 1990 1991 1992 1993 /* ************************************************************/ 1994 /* decision tree grapheme-to-phoneme (G2P) functions */ 1995 /* ************************************************************/ 1996 1997 1998 /* get the nr'th (starting at 0) utf char in utfgraph */ 1999 static picoos_uint8 kdtGetUTF8char(const picoos_uint8 *utfgraph, 2000 const picoos_uint16 graphlen, 2001 const picoos_uint16 nr, 2002 picoos_uint8 *utf8char) { 2003 picoos_uint16 i; 2004 picoos_uint32 pos; 2005 2006 pos = 0; 2007 for (i = 0; i < nr; i++) { 2008 if (!picobase_get_next_utf8charpos(utfgraph, graphlen, &pos)) { 2009 return FALSE; 2010 } 2011 } 2012 return picobase_get_next_utf8char(utfgraph, graphlen, &pos, utf8char); 2013 } 2014 2015 /* determine the utfchar count (starting at 1) of the utfchar starting at pos */ 2016 static picoos_uint16 kdtGetUTF8Nr(const picoos_uint8 *utfgraph, 2017 const picoos_uint16 graphlen, 2018 const picoos_uint16 pos) { 2019 picoos_uint32 postmp; 2020 picoos_uint16 count; 2021 2022 count = 0; 2023 postmp = 0; 2024 while ((postmp <= pos) && (count < graphlen)) { 2025 if (!picobase_get_next_utf8charpos(utfgraph, graphlen, &postmp)) { 2026 PICODBG_ERROR(("invalid utf8 string, count: %d, pos: %d, post: %d", 2027 count, pos, postmp)); 2028 return count + 1; 2029 } 2030 count++; 2031 } 2032 return count; 2033 } 2034 2035 2036 picoos_uint8 picokdt_dtG2PconstructInVec(const picokdt_DtG2P this, 2037 const picoos_uint8 *graph, 2038 const picoos_uint16 graphlen, 2039 const picoos_uint8 count, 2040 const picoos_uint8 pos, 2041 const picoos_uint8 nrvow, 2042 const picoos_uint8 ordvow, 2043 picoos_uint8 *primstressflag, 2044 const picoos_uint16 phonech1, 2045 const picoos_uint16 phonech2, 2046 const picoos_uint16 phonech3) { 2047 kdtg2p_subobj_t *dtg2p; 2048 picoos_uint16 fallback = 0; 2049 picoos_uint8 iAttr; 2050 picoos_uint8 utf8char[PICOBASE_UTF8_MAXLEN + 1]; 2051 picoos_uint16 inval; 2052 picoos_int16 cinv; 2053 picoos_uint8 retval; 2054 picoos_int32 utfgraphlen; 2055 picoos_uint16 utfcount; 2056 2057 dtg2p = (kdtg2p_subobj_t *)this; 2058 retval = TRUE; 2059 inval = 0; 2060 2061 PICODBG_TRACE(("in: [%d,%d,%d|%d,%d|%d|%d,%d,%d]", graphlen, count, pos, 2062 nrvow, ordvow, *primstressflag, phonech1, phonech2, 2063 phonech3)); 2064 2065 dtg2p->inveclen = 0; 2066 2067 /* many speed-ups possible */ 2068 2069 /* graph attributes */ 2070 /* count > = <= count 2071 iAttr lowbound eow upbound delta 2072 0 4 4 graphlen 5 2073 1 3 3 graphlen 4 2074 2 2 2 graphlen 3 2075 3 1 1 graphlen 2 2076 4 0 - graphlen 1 2077 2078 5 0 graphlen graphlen-1 0 2079 6 0 graphlen-1 graphlen-2 -1 2080 7 0 graphlen-2 graphlen-3 -2 2081 8 0 graphlen-3 graphlen-4 -3 2082 */ 2083 2084 /* graph attributes left (context -4/-3/-2/-1) and current, MapInGraph */ 2085 2086 utfgraphlen = picobase_utf8_length(graph, graphlen); 2087 if (utfgraphlen <= 0) { 2088 utfgraphlen = 0; 2089 } 2090 utfcount = kdtGetUTF8Nr(graph, graphlen, count); 2091 2092 cinv = 4; 2093 for (iAttr = 0; iAttr < 5; iAttr++) { 2094 if ((utfcount > cinv) && (utfcount <= utfgraphlen)) { 2095 2096 /* utf8char[0] = graph[count - cinv - 1];*/ 2097 if (!kdtGetUTF8char(graph, graphlen, utfcount-cinv-1, 2098 utf8char)) { 2099 PICODBG_WARN(("problem getting UTF char %d", utfcount-cinv-1)); 2100 utf8char[0] = PICOKDT_OUTSIDEGRAPH_DEFCH; 2101 utf8char[1] = '\0'; 2102 } 2103 } else { 2104 if ((utfcount == cinv) && (iAttr != 4)) { 2105 utf8char[0] = PICOKDT_OUTSIDEGRAPH_EOW_DEFCH; 2106 } else { 2107 utf8char[0] = PICOKDT_OUTSIDEGRAPH_DEFCH; 2108 } 2109 utf8char[1] = '\0'; 2110 } 2111 2112 if (!kdtMapInGraph(&(dtg2p->dt), iAttr, 2113 utf8char, PICOBASE_UTF8_MAXLEN, 2114 &(dtg2p->invec[iAttr]), 2115 &fallback)) { 2116 if (fallback) { 2117 dtg2p->invec[iAttr] = fallback; 2118 } else { 2119 PICODBG_WARN(("setting attribute %d to zero", iAttr)); 2120 dtg2p->invec[iAttr] = 0; 2121 retval = FALSE; 2122 } 2123 } 2124 PICODBG_TRACE(("invec %d %c", iAttr, utf8char[0])); 2125 cinv--; 2126 } 2127 2128 /* graph attributes right (context 1/2/3/4), MapInGraph */ 2129 cinv = utfgraphlen; 2130 for (iAttr = 5; iAttr < 9; iAttr++) { 2131 if ((utfcount > 0) && (utfcount <= (cinv - 1))) { 2132 /* utf8char[0] = graph[count + graphlen - cinv];*/ 2133 if (!kdtGetUTF8char(graph, graphlen, utfcount+utfgraphlen-cinv, 2134 utf8char)) { 2135 PICODBG_WARN(("problem getting UTF char %d", 2136 utfcount+utfgraphlen-cinv-1)); 2137 utf8char[0] = PICOKDT_OUTSIDEGRAPH_DEFCH; 2138 utf8char[1] = '\0'; 2139 } 2140 } else { 2141 if (utfcount == cinv) { 2142 utf8char[0] = PICOKDT_OUTSIDEGRAPH_EOW_DEFCH; 2143 utf8char[1] = '\0'; 2144 } else { 2145 utf8char[0] = PICOKDT_OUTSIDEGRAPH_DEFCH; 2146 utf8char[1] = '\0'; 2147 } 2148 } 2149 if (!kdtMapInGraph(&(dtg2p->dt), iAttr, 2150 utf8char, PICOBASE_UTF8_MAXLEN, 2151 &(dtg2p->invec[iAttr]), 2152 &fallback)) { 2153 if (fallback) { 2154 dtg2p->invec[iAttr] = fallback; 2155 } else { 2156 PICODBG_WARN(("setting attribute %d to zero", iAttr)); 2157 dtg2p->invec[iAttr] = 0; 2158 retval = FALSE; 2159 } 2160 } 2161 PICODBG_TRACE(("invec %d %c", iAttr, utf8char[0])); 2162 cinv--; 2163 } 2164 2165 /* other attributes, MapInFixed */ 2166 for (iAttr = 9; iAttr < PICOKDT_NRATT_G2P; iAttr++) { 2167 switch (iAttr) { 2168 case 9: /* word POS, Fix1 */ 2169 inval = pos; 2170 break; 2171 case 10: /* nr of vowel-like graphs in word, if vowel, Fix2 */ 2172 inval = nrvow; 2173 break; 2174 case 11: /* order of current vowel-like graph in word, Fix2 */ 2175 inval = ordvow; 2176 break; 2177 case 12: /* primary stress mark, Fix2 */ 2178 if (*primstressflag == 1) { 2179 /*already set previously*/ 2180 inval = 1; 2181 } else { 2182 inval = 0; 2183 } 2184 break; 2185 case 13: /* phone chunk right context +1, Hist */ 2186 inval = phonech1; 2187 break; 2188 case 14: /* phone chunk right context +2, Hist */ 2189 inval = phonech2; 2190 break; 2191 case 15: /* phone chunk right context +3, Hist */ 2192 inval = phonech3; 2193 break; 2194 } 2195 2196 PICODBG_TRACE(("invec %d %d", iAttr, inval)); 2197 2198 if (!kdtMapInFixed(&(dtg2p->dt), iAttr, inval, 2199 &(dtg2p->invec[iAttr]), &fallback)) { 2200 if (fallback) { 2201 dtg2p->invec[iAttr] = fallback; 2202 } else { 2203 PICODBG_WARN(("setting attribute %d to zero", iAttr)); 2204 dtg2p->invec[iAttr] = 0; 2205 retval = FALSE; 2206 } 2207 } 2208 } 2209 2210 PICODBG_TRACE(("out: [%d,%d%,%d,%d|%d|%d,%d,%d,%d|%d,%d,%d,%d|" 2211 "%d,%d,%d]", dtg2p->invec[0], dtg2p->invec[1], 2212 dtg2p->invec[2], dtg2p->invec[3], dtg2p->invec[4], 2213 dtg2p->invec[5], dtg2p->invec[6], dtg2p->invec[7], 2214 dtg2p->invec[8], dtg2p->invec[9], dtg2p->invec[10], 2215 dtg2p->invec[11], dtg2p->invec[12], dtg2p->invec[13], 2216 dtg2p->invec[14], dtg2p->invec[15])); 2217 2218 dtg2p->inveclen = PICOKDT_NRINPMT_G2P; 2219 return retval; 2220 } 2221 2222 2223 2224 2225 picoos_uint8 picokdt_dtG2Pclassify(const picokdt_DtG2P this, 2226 picoos_uint16 *treeout) { 2227 picoos_uint32 iByteNo; 2228 picoos_int8 iBitNo; 2229 picoos_int8 rv; 2230 kdtg2p_subobj_t *dtg2p; 2231 kdt_subobj_t *dt; 2232 2233 dtg2p = (kdtg2p_subobj_t *)this; 2234 dt = &(dtg2p->dt); 2235 iByteNo = 0; 2236 iBitNo = 7; 2237 while ((rv = kdtAskTree(dt, dtg2p->invec, PICOKDT_NRATT_G2P, 2238 &iByteNo, &iBitNo)) > 0) { 2239 PICODBG_TRACE(("asking tree")); 2240 } 2241 PICODBG_TRACE(("done: %d", dt->dclass)); 2242 if ((rv == 0) && dt->dset) { 2243 *treeout = dt->dclass; 2244 return TRUE; 2245 } else { 2246 return FALSE; 2247 } 2248 } 2249 2250 2251 2252 picoos_uint8 picokdt_dtG2PdecomposeOutClass(const picokdt_DtG2P this, 2253 picokdt_classify_vecresult_t *dtvres) { 2254 kdtg2p_subobj_t *dtg2p; 2255 2256 dtg2p = (kdtg2p_subobj_t *)this; 2257 2258 if (dtg2p->dt.dset && 2259 kdtMapOutVar(&(dtg2p->dt), dtg2p->dt.dclass, &(dtvres->nr), 2260 dtvres->classvec, PICOKDT_MAXSIZE_OUTVEC)) { 2261 return TRUE; 2262 } else { 2263 dtvres->nr = 0; 2264 return FALSE; 2265 } 2266 return TRUE; 2267 } 2268 2269 2270 2271 /* ************************************************************/ 2272 /* decision tree phrasing (PHR) functions */ 2273 /* ************************************************************/ 2274 2275 picoos_uint8 picokdt_dtPHRconstructInVec(const picokdt_DtPHR this, 2276 const picoos_uint8 pre2, 2277 const picoos_uint8 pre1, 2278 const picoos_uint8 src, 2279 const picoos_uint8 fol1, 2280 const picoos_uint8 fol2, 2281 const picoos_uint16 nrwordspre, 2282 const picoos_uint16 nrwordsfol, 2283 const picoos_uint16 nrsyllsfol) { 2284 kdtphr_subobj_t *dtphr; 2285 picoos_uint8 i; 2286 picoos_uint16 inval = 0; 2287 picoos_uint16 fallback = 0; 2288 2289 dtphr = (kdtphr_subobj_t *)this; 2290 PICODBG_DEBUG(("in: [%d,%d|%d|%d,%d|%d,%d,%d]", 2291 pre2, pre1, src, fol1, fol2, 2292 nrwordspre, nrwordsfol, nrsyllsfol)); 2293 dtphr->inveclen = 0; 2294 2295 for (i = 0; i < PICOKDT_NRATT_PHR; i++) { 2296 switch (i) { 2297 case 0: inval = pre2; break; 2298 case 1: inval = pre1; break; 2299 case 2: inval = src; break; 2300 case 3: inval = fol1; break; 2301 case 4: inval = fol2; break; 2302 case 5: inval = nrwordspre; break; 2303 case 6: inval = nrwordsfol; break; 2304 case 7: inval = nrsyllsfol; break; 2305 default: 2306 PICODBG_ERROR(("size mismatch")); 2307 return FALSE; 2308 break; 2309 } 2310 2311 /* do the imt mapping for all inval */ 2312 if (!kdtMapInFixed(&(dtphr->dt), i, inval, 2313 &(dtphr->invec[i]), &fallback)) { 2314 if (fallback) { 2315 dtphr->invec[i] = fallback; 2316 } else { 2317 PICODBG_ERROR(("problem doing input mapping")); 2318 return FALSE; 2319 } 2320 } 2321 } 2322 2323 PICODBG_DEBUG(("out: [%d,%d|%d|%d,%d|%d,%d,%d]", 2324 dtphr->invec[0], dtphr->invec[1], dtphr->invec[2], 2325 dtphr->invec[3], dtphr->invec[4], dtphr->invec[5], 2326 dtphr->invec[6], dtphr->invec[7])); 2327 dtphr->inveclen = PICOKDT_NRINPMT_PHR; 2328 return TRUE; 2329 } 2330 2331 2332 picoos_uint8 picokdt_dtPHRclassify(const picokdt_DtPHR this) { 2333 picoos_uint32 iByteNo; 2334 picoos_int8 iBitNo; 2335 picoos_int8 rv; 2336 kdtphr_subobj_t *dtphr; 2337 kdt_subobj_t *dt; 2338 2339 dtphr = (kdtphr_subobj_t *)this; 2340 dt = &(dtphr->dt); 2341 iByteNo = 0; 2342 iBitNo = 7; 2343 while ((rv = kdtAskTree(dt, dtphr->invec, PICOKDT_NRATT_PHR, 2344 &iByteNo, &iBitNo)) > 0) { 2345 PICODBG_TRACE(("asking tree")); 2346 } 2347 PICODBG_DEBUG(("done: %d", dt->dclass)); 2348 return ((rv == 0) && dt->dset); 2349 } 2350 2351 2352 picoos_uint8 picokdt_dtPHRdecomposeOutClass(const picokdt_DtPHR this, 2353 picokdt_classify_result_t *dtres) { 2354 kdtphr_subobj_t *dtphr; 2355 picoos_uint16 val; 2356 2357 dtphr = (kdtphr_subobj_t *)this; 2358 2359 if (dtphr->dt.dset && 2360 kdtMapOutFixed(&(dtphr->dt), dtphr->dt.dclass, &val)) { 2361 dtres->set = TRUE; 2362 dtres->class = val; 2363 return TRUE; 2364 } else { 2365 dtres->set = FALSE; 2366 return FALSE; 2367 } 2368 } 2369 2370 2371 2372 /* ************************************************************/ 2373 /* decision tree phono-acoustical model (PAM) functions */ 2374 /* ************************************************************/ 2375 2376 picoos_uint8 picokdt_dtPAMconstructInVec(const picokdt_DtPAM this, 2377 const picoos_uint8 *vec, 2378 const picoos_uint8 veclen) { 2379 kdtpam_subobj_t *dtpam; 2380 picoos_uint8 i; 2381 picoos_uint16 fallback = 0; 2382 2383 dtpam = (kdtpam_subobj_t *)this; 2384 2385 PICODBG_TRACE(("in0: %d %d %d %d %d %d %d %d %d %d", 2386 vec[0], vec[1], vec[2], vec[3], vec[4], 2387 vec[5], vec[6], vec[7], vec[8], vec[9])); 2388 PICODBG_TRACE(("in1: %d %d %d %d %d %d %d %d %d %d", 2389 vec[10], vec[11], vec[12], vec[13], vec[14], 2390 vec[15], vec[16], vec[17], vec[18], vec[19])); 2391 PICODBG_TRACE(("in2: %d %d %d %d %d %d %d %d %d %d", 2392 vec[20], vec[21], vec[22], vec[23], vec[24], 2393 vec[25], vec[26], vec[27], vec[28], vec[29])); 2394 PICODBG_TRACE(("in3: %d %d %d %d %d %d %d %d %d %d", 2395 vec[30], vec[31], vec[32], vec[33], vec[34], 2396 vec[35], vec[36], vec[37], vec[38], vec[39])); 2397 PICODBG_TRACE(("in4: %d %d %d %d %d %d %d %d %d %d", 2398 vec[40], vec[41], vec[42], vec[43], vec[44], 2399 vec[45], vec[46], vec[47], vec[48], vec[49])); 2400 PICODBG_TRACE(("in5: %d %d %d %d %d %d %d %d %d %d", 2401 vec[50], vec[51], vec[52], vec[53], vec[54], 2402 vec[55], vec[56], vec[57], vec[58], vec[59])); 2403 2404 dtpam->inveclen = 0; 2405 2406 /* check veclen */ 2407 if (veclen != PICOKDT_NRINPMT_PAM) { 2408 PICODBG_ERROR(("wrong number of input vector elements")); 2409 return FALSE; 2410 } 2411 2412 for (i = 0; i < PICOKDT_NRATT_PAM; i++) { 2413 2414 /* do the imt mapping for all vec eles */ 2415 if (!kdtMapInFixed(&(dtpam->dt), i, vec[i], 2416 &(dtpam->invec[i]), &fallback)) { 2417 if (fallback) { 2418 dtpam->invec[i] = fallback; 2419 } else { 2420 PICODBG_ERROR(("problem doing input mapping, %d %d", i,vec[i])); 2421 return FALSE; 2422 } 2423 } 2424 } 2425 2426 PICODBG_TRACE(("in0: %d %d %d %d %d %d %d %d %d %d", 2427 dtpam->invec[0], dtpam->invec[1], dtpam->invec[2], 2428 dtpam->invec[3], dtpam->invec[4], dtpam->invec[5], 2429 dtpam->invec[6], dtpam->invec[7], dtpam->invec[8], 2430 dtpam->invec[9])); 2431 PICODBG_TRACE(("in1: %d %d %d %d %d %d %d %d %d %d", 2432 dtpam->invec[10], dtpam->invec[11], dtpam->invec[12], 2433 dtpam->invec[13], dtpam->invec[14], dtpam->invec[15], 2434 dtpam->invec[16], dtpam->invec[17], dtpam->invec[18], 2435 dtpam->invec[19])); 2436 PICODBG_TRACE(("in2: %d %d %d %d %d %d %d %d %d %d", 2437 dtpam->invec[20], dtpam->invec[21], dtpam->invec[22], 2438 dtpam->invec[23], dtpam->invec[24], dtpam->invec[25], 2439 dtpam->invec[26], dtpam->invec[27], dtpam->invec[28], 2440 dtpam->invec[29])); 2441 PICODBG_TRACE(("in3: %d %d %d %d %d %d %d %d %d %d", 2442 dtpam->invec[30], dtpam->invec[31], dtpam->invec[32], 2443 dtpam->invec[33], dtpam->invec[34], dtpam->invec[35], 2444 dtpam->invec[36], dtpam->invec[37], dtpam->invec[38], 2445 dtpam->invec[39])); 2446 PICODBG_TRACE(("in4: %d %d %d %d %d %d %d %d %d %d", 2447 dtpam->invec[40], dtpam->invec[41], dtpam->invec[42], 2448 dtpam->invec[43], dtpam->invec[44], dtpam->invec[45], 2449 dtpam->invec[46], dtpam->invec[47], dtpam->invec[48], 2450 dtpam->invec[49])); 2451 PICODBG_TRACE(("in5: %d %d %d %d %d %d %d %d %d %d", 2452 dtpam->invec[50], dtpam->invec[51], dtpam->invec[52], 2453 dtpam->invec[53], dtpam->invec[54], dtpam->invec[55], 2454 dtpam->invec[56], dtpam->invec[57], dtpam->invec[58], 2455 dtpam->invec[59])); 2456 2457 dtpam->inveclen = PICOKDT_NRINPMT_PAM; 2458 return TRUE; 2459 } 2460 2461 2462 picoos_uint8 picokdt_dtPAMclassify(const picokdt_DtPAM this) { 2463 picoos_uint32 iByteNo; 2464 picoos_int8 iBitNo; 2465 picoos_int8 rv; 2466 kdtpam_subobj_t *dtpam; 2467 kdt_subobj_t *dt; 2468 2469 dtpam = (kdtpam_subobj_t *)this; 2470 dt = &(dtpam->dt); 2471 iByteNo = 0; 2472 iBitNo = 7; 2473 while ((rv = kdtAskTree(dt, dtpam->invec, PICOKDT_NRATT_PAM, 2474 &iByteNo, &iBitNo)) > 0) { 2475 PICODBG_TRACE(("asking tree")); 2476 } 2477 PICODBG_DEBUG(("done: %d", dt->dclass)); 2478 return ((rv == 0) && dt->dset); 2479 } 2480 2481 2482 picoos_uint8 picokdt_dtPAMdecomposeOutClass(const picokdt_DtPAM this, 2483 picokdt_classify_result_t *dtres) { 2484 kdtpam_subobj_t *dtpam; 2485 picoos_uint16 val; 2486 2487 dtpam = (kdtpam_subobj_t *)this; 2488 2489 if (dtpam->dt.dset && 2490 kdtMapOutFixed(&(dtpam->dt), dtpam->dt.dclass, &val)) { 2491 dtres->set = TRUE; 2492 dtres->class = val; 2493 return TRUE; 2494 } else { 2495 dtres->set = FALSE; 2496 return FALSE; 2497 } 2498 } 2499 2500 2501 2502 /* ************************************************************/ 2503 /* decision tree accentuation (ACC) functions */ 2504 /* ************************************************************/ 2505 2506 picoos_uint8 picokdt_dtACCconstructInVec(const picokdt_DtACC this, 2507 const picoos_uint8 pre2, 2508 const picoos_uint8 pre1, 2509 const picoos_uint8 src, 2510 const picoos_uint8 fol1, 2511 const picoos_uint8 fol2, 2512 const picoos_uint16 hist1, 2513 const picoos_uint16 hist2, 2514 const picoos_uint16 nrwordspre, 2515 const picoos_uint16 nrsyllspre, 2516 const picoos_uint16 nrwordsfol, 2517 const picoos_uint16 nrsyllsfol, 2518 const picoos_uint16 footwordsfol, 2519 const picoos_uint16 footsyllsfol) { 2520 kdtacc_subobj_t *dtacc; 2521 picoos_uint8 i; 2522 picoos_uint16 inval = 0; 2523 picoos_uint16 fallback = 0; 2524 2525 dtacc = (kdtacc_subobj_t *)this; 2526 PICODBG_DEBUG(("in: [%d,%d,%d,%d,%d|%d,%d|%d,%d,%d,%d|%d,%d]", 2527 pre2, pre1, src, fol1, fol2, hist1, hist2, 2528 nrwordspre, nrsyllspre, nrwordsfol, nrsyllsfol, 2529 footwordsfol, footsyllsfol)); 2530 dtacc->inveclen = 0; 2531 2532 for (i = 0; i < PICOKDT_NRATT_ACC; i++) { 2533 switch (i) { 2534 case 0: inval = pre2; break; 2535 case 1: inval = pre1; break; 2536 case 2: inval = src; break; 2537 case 3: inval = fol1; break; 2538 case 4: inval = fol2; break; 2539 case 5: inval = hist1; break; 2540 case 6: inval = hist2; break; 2541 case 7: inval = nrwordspre; break; 2542 case 8: inval = nrsyllspre; break; 2543 case 9: inval = nrwordsfol; break; 2544 case 10: inval = nrsyllsfol; break; 2545 case 11: inval = footwordsfol; break; 2546 case 12: inval = footsyllsfol; break; 2547 default: 2548 PICODBG_ERROR(("size mismatch")); 2549 return FALSE; 2550 break; 2551 } 2552 2553 if (((i == 5) || (i == 6)) && (inval == PICOKDT_HISTORY_ZERO)) { 2554 /* in input to this function the HISTORY_ZERO is used to 2555 mark the no-value-available case. For sparsity reasons 2556 this was not used in the training. For 2557 no-value-available cases, instead, do reverse out 2558 mapping of ACC0 to get tree domain for ACC0 */ 2559 if (!kdtReverseMapOutFixed(&(dtacc->dt), PICODATA_ACC0, 2560 &inval, &fallback)) { 2561 if (fallback) { 2562 inval = fallback; 2563 } else { 2564 PICODBG_ERROR(("problem doing reverse output mapping")); 2565 return FALSE; 2566 } 2567 } 2568 } 2569 2570 /* do the imt mapping for all inval */ 2571 if (!kdtMapInFixed(&(dtacc->dt), i, inval, 2572 &(dtacc->invec[i]), &fallback)) { 2573 if (fallback) { 2574 dtacc->invec[i] = fallback; 2575 } else { 2576 PICODBG_ERROR(("problem doing input mapping")); 2577 return FALSE; 2578 } 2579 } 2580 } 2581 2582 PICODBG_DEBUG(("out: [%d,%d,%d,%d,%d|%d,%d|%d,%d,%d,%d|%d,%d]", 2583 dtacc->invec[0], dtacc->invec[1], dtacc->invec[2], 2584 dtacc->invec[3], dtacc->invec[4], dtacc->invec[5], 2585 dtacc->invec[6], dtacc->invec[7], dtacc->invec[8], 2586 dtacc->invec[9], dtacc->invec[10], dtacc->invec[11], 2587 dtacc->invec[12])); 2588 dtacc->inveclen = PICOKDT_NRINPMT_ACC; 2589 return TRUE; 2590 } 2591 2592 2593 picoos_uint8 picokdt_dtACCclassify(const picokdt_DtACC this, 2594 picoos_uint16 *treeout) { 2595 picoos_uint32 iByteNo; 2596 picoos_int8 iBitNo; 2597 picoos_int8 rv; 2598 kdtacc_subobj_t *dtacc; 2599 kdt_subobj_t *dt; 2600 2601 dtacc = (kdtacc_subobj_t *)this; 2602 dt = &(dtacc->dt); 2603 iByteNo = 0; 2604 iBitNo = 7; 2605 while ((rv = kdtAskTree(dt, dtacc->invec, PICOKDT_NRATT_ACC, 2606 &iByteNo, &iBitNo)) > 0) { 2607 PICODBG_TRACE(("asking tree")); 2608 } 2609 PICODBG_TRACE(("done: %d", dt->dclass)); 2610 if ((rv == 0) && dt->dset) { 2611 *treeout = dt->dclass; 2612 return TRUE; 2613 } else { 2614 return FALSE; 2615 } 2616 } 2617 2618 2619 picoos_uint8 picokdt_dtACCdecomposeOutClass(const picokdt_DtACC this, 2620 picokdt_classify_result_t *dtres) { 2621 kdtacc_subobj_t *dtacc; 2622 picoos_uint16 val; 2623 2624 dtacc = (kdtacc_subobj_t *)this; 2625 2626 if (dtacc->dt.dset && 2627 kdtMapOutFixed(&(dtacc->dt), dtacc->dt.dclass, &val)) { 2628 dtres->set = TRUE; 2629 dtres->class = val; 2630 return TRUE; 2631 } else { 2632 dtres->set = FALSE; 2633 return FALSE; 2634 } 2635 } 2636 2637 #ifdef __cplusplus 2638 } 2639 #endif 2640 2641 2642 /* end */ 2643