Lines Matching full:state
16 * This file handles ICU .ucm file state information as part of the ucm module.
32 /* MBCS state handling ------------------------------------------------------ */
35 * state table row grammar (ebnf-style):
40 * (initial state (default for state 0), output is all surrogate pairs)
46 * (unassigned, state change only, surrogate pair, illegal)
50 parseState(const char *s, int32_t state[256], uint32_t *pFlags) {
55 /* initialize the state: all illegal with U+ffff */
57 state[i]=MBCS_ENTRY_FINAL(0, MBCS_STATE_ILLEGAL, 0xffff);
77 /* empty state row: all-illegal */
102 /* determine the state entrys for this range */
104 /* the default is: final state with valid entries */
109 /* get the next state, default to 0 */
121 /* get the state action, default to valid */
123 /* this is a final state */
150 /* this is an intermediate state, nothing to do */
154 /* adjust "final valid" states according to the state flags */
174 state[i]=entry;
197 fprintf(stderr, "ucm error: parse error in state definition at '%s'\n", error);
296 } else if(uprv_strcmp(*pKey, "icu:state")==0) {
297 /* if an SBCS/DBCS/EBCDIC_STATEFUL converter has icu:state, then turn it into MBCS */
307 fprintf(stderr, "ucm error: <icu:state> entry for non-MBCS table or before the <uconv_class> line\n");
312 fprintf(stderr, "ucm error: <icu:state> before the <mb_cur_max> line\n");
333 int32_t entry, sum, state, cell, count;
338 * In each final state (where there are only final entries),
340 * In all other state table rows, for each transition entry to another state,
341 * the offsets sum of that state needs to be added.
347 for(state=states->countStates-1; state>=0; --state) {
348 if(!(states->stateFlags[state]&MBCS_STATE_FLAG_READY)) {
354 entry=states->stateTable[state][cell];
358 states->stateTable[state][cell]=MBCS_ENTRY_FINAL_SET_VALUE(entry, sum);
362 states->stateTable[state][cell]=MBCS_ENTRY_FINAL_SET_VALUE(entry, sum);
374 entry=states->stateTable[state][cell];
377 states->stateTable[state][cell]=MBCS_ENTRY_TRANSITION_SET_OFFSET(entry, sum);
380 /* that next state does not have a sum yet, we cannot finish the one for this state */
388 states->stateOffsetSum[state]=sum;
389 states->stateFlags[state]|=MBCS_STATE_FLAG_READY;
396 fprintf(stderr, "ucm error: the state table contains loops\n");
406 for(state=1; state<states->countStates; ++state) {
407 if((states->stateFlags[state]&0xf)==MBCS_STATE_FLAG_DIRECT) {
409 sum+=states->stateOffsetSum[state];
411 entry=states->stateTable[state][cell];
413 states->stateTable[state][cell]=MBCS_ENTRY_TRANSITION_ADD_OFFSET(entry, sum2);
425 int32_t entry, state, cell, count;
435 /* SBCS: use MBCS data structure with a default state table */
444 fprintf(stderr, "ucm error: missing state table information (<icu:state>) for MBCS\n");
448 /* EBCDIC_STATEFUL: use MBCS data structure with a default state table */
461 /* DBCS: use MBCS data structure with a default state table */
481 * to do this right, all paths through the state table would have to be
483 * but these simple checks cover most state tables in practice
492 for(state=0; state<states->countStates; ++state) {
493 if((states->stateFlags[state]&0xf)!=MBCS_STATE_FLAG_DIRECT) {
507 * then the initial state must have direct result states
525 * make sure that all "next state" values are within limits
529 for(state=states->countStates-1; state>=0; --state) {
531 entry=states->stateTable[state][cell];
533 fprintf(stderr, "ucm error: state table entry [%x][%x] has a next state of %x that is too high\n",
534 (int)state, (int)cell, (int)MBCS_ENTRY_STATE(entry));
538 fprintf(stderr, "ucm error: state table entry [%x][%x] is final but has a non-initial next state of %x\n",
539 (int)state, (int)cell, (int)MBCS_ENTRY_STATE(entry));
542 fprintf(stderr, "ucm error: state table entry [%x][%x] is not final but has an initial next state of %x\n",
543 (int)state, (int)cell, (int)MBCS_ENTRY_STATE(entry));
549 /* is this an SI/SO (like EBCDIC-stateful) state table? */
571 state=2;
573 state=1;
576 /* check that no unexpected state is a "direct" one */
577 while(state<states->countStates) {
578 if((states->stateFlags[state]&0xf)==MBCS_STATE_FLAG_DIRECT) {
579 fprintf(stderr, "ucm error: state %d is 'initial' - not supported except for SI/SO codepages\n", (int)state);
582 ++state;
610 * by finding lead bytes with all-unassigned trail bytes and adding another state
625 /* find the lead state */
627 /* use the DBCS lead state for SI/SO codepages */
633 /* find the main trail state: the most used target state */
689 /* subtract from the possible savings the cost of an additional state */
702 /* make a copy of the state table */
710 /* add the new state */
713 * have all-unassigned trail bytes and the lead state could be removed
717 /* copy the old trail state, turning all assigned states into unassigned ones */
731 /* in the lead state, redirect all lead bytes with all-unassigned trail bytes to the new state */
738 /* sum up the new state table */
758 /* revert to the old state table */
771 * The old state table has the same lead _and_ trail states for assigned characters!
773 * For each character with an assigned state in the new table, it was assigned in the old one.
778 /* for each initial state */
786 /* the new state does not have assigned states */
797 /* find the old offset according to the old state table */
806 /* find the old offset according to the old state table */
835 * if all sequences from this state are unassigned, returns the
843 int32_t state, int32_t offset, uint32_t b) {
850 entry=states->stateTable[state][i];
861 printf(" all-unassigned sequences from prefix 0x%02lx state %ld use %ld bytes\n",
862 state, (long)savings);
900 int32_t state, savings;
902 /* for each initial state */
903 for(state=0; state<states->countStates; ++state) {
904 if((states->stateFlags[state]&0xf)==MBCS_STATE_FLAG_DIRECT) {
908 state, 0, 0);
910 printf(" all-unassigned sequences from initial state %ld use %ld bytes\n",
911 (long)state, (long)savings);
928 int32_t state, cell, entry;
930 /* test each state table entry */
931 for(state=0; state<states->countStates; ++state) {
933 entry=states->stateTable[state][cell];
940 states->stateTable[state][cell]=MBCS_ENTRY_FINAL_SET_ACTION(entry, MBCS_STATE_UNASSIGNED);
969 /* use a complete state table ----------------------------------------------- */
976 uint8_t state;
980 state=0;
983 fprintf(stderr, "ucm error: there is no state information!\n");
987 /* for SI/SO (like EBCDIC-stateful), double-byte sequences start in state 1 */
989 state=1;
993 * Walk down the state table like in conversion,
998 entry=states->stateTable[state][bytes[i]];
1000 state=(uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry);
1005 fprintf(stderr, "ucm error: byte sequence ends in illegal state\n");
1008 fprintf(stderr, "ucm error: byte sequence ends in state-change-only\n");
1019 state=(uint8_t)MBCS_ENTRY_FINAL_STATE(entry);
1031 fprintf(stderr, "ucm error: byte sequence too short, ends in non-final state %hu\n", state);