Home | History | Annotate | Download | only in makeconv

Lines Matching defs:mbcsData

33 struct MBCSData {
69 MBCSStartMappings(MBCSData *mbcsData);
72 MBCSAddToUnicode(MBCSData *mbcsData,
82 MBCSSingleAddFromUnicode(MBCSData *mbcsData,
88 MBCSAddFromUnicode(MBCSData *mbcsData,
94 MBCSPostprocess(MBCSData *mbcsData, const UConverterStaticData *staticData);
126 static MBCSData gDummy;
128 U_CFUNC const MBCSData *
130 uprv_memset(&gDummy, 0, sizeof(MBCSData));
150 MBCSInit(MBCSData *mbcsData, UCMFile *ucm) {
151 uprv_memset(mbcsData, 0, sizeof(MBCSData));
153 mbcsData->ucm=ucm; /* aliased, not owned */
155 mbcsData->newConverter.close=MBCSClose;
156 mbcsData->newConverter.isValid=MBCSIsValid;
157 mbcsData->newConverter.addTable=MBCSAddTable;
158 mbcsData->newConverter.write=MBCSWrite;
163 MBCSData *mbcsData=(MBCSData *)uprv_malloc(sizeof(MBCSData));
164 if(mbcsData==NULL) {
169 MBCSInit(mbcsData, ucm);
170 return &mbcsData->newConverter;
174 MBCSDestruct(MBCSData *mbcsData) {
175 uprv_free(mbcsData->unicodeCodeUnits);
176 uprv_free(mbcsData->fromUBytes);
181 MBCSData *mbcsData=(MBCSData *)cnvData;
182 if(mbcsData!=NULL) {
183 MBCSDestruct(mbcsData);
184 uprv_free(mbcsData);
189 MBCSStartMappings(MBCSData *mbcsData) {
197 sum=mbcsData->ucm->states.countToUCodeUnits;
203 mbcsData->unicodeCodeUnits=(uint16_t *)uprv_malloc(sum*sizeof(uint16_t));
204 if(mbcsData->unicodeCodeUnits==NULL) {
210 mbcsData->unicodeCodeUnits[i]=0xfffe;
215 maxCharLength=mbcsData->ucm->states.maxCharLength;
225 mbcsData->fromUBytes=(uint8_t *)uprv_malloc(sum);
226 if(mbcsData->fromUBytes==NULL) {
230 uprv_memset(mbcsData->fromUBytes, 0, sum);
273 mbcsData->stage1[i]=sum;
276 mbcsData->stage2Top=stage2NullLength+stage2AllocLength; /* ==sum */
287 mbcsData->stage2Single[mbcsData->stage1[0]+i]=sum;
293 mbcsData->stage2[mbcsData->stage1[0]+i]=sum;
300 mbcsData->stageUTF8[i]=sum;
309 mbcsData->stage3Top=(stage3NullLength+stage3AllocLength)*maxCharLength; /* ==sum*maxCharLength */
316 setFallback(MBCSData *mbcsData, uint32_t offset, UChar32 c) {
317 int32_t i=ucm_findFallback(mbcsData->toUFallbacks, mbcsData->countToUFallbacks, offset);
320 mbcsData->toUFallbacks[i].codePoint=c;
324 i=mbcsData->countToUFallbacks;
329 mbcsData->toUFallbacks[i].offset=offset;
330 mbcsData->toUFallbacks[i].codePoint=c;
331 mbcsData->countToUFallbacks=i+1;
339 removeFallback(MBCSData *mbcsData, uint32_t offset) {
340 int32_t i=ucm_findFallback(mbcsData->toUFallbacks, mbcsData->countToUFallbacks, offset);
345 toUFallbacks=mbcsData->toUFallbacks;
346 limit=mbcsData->countToUFallbacks;
352 mbcsData->countToUFallbacks=limit-1;
366 MBCSAddToUnicode(MBCSData *mbcsData,
375 if(mbcsData->ucm->states.countStates==0) {
381 if(length==2 && mbcsData->ucm->states.outputType==MBCS_OUTPUT_2_SISO) {
391 entry=mbcsData->ucm->states.stateTable[state][bytes[i++]];
452 mbcsData->ucm->states.stateTable[state][bytes[i-1]]=entry;
459 if((old=mbcsData->unicodeCodeUnits[offset])!=0xfffe || (old=removeFallback(mbcsData, offset))!=-1) {
476 if(mbcsData->unicodeCodeUnits[offset]==0xfffe) {
477 return setFallback(mbcsData, offset, c);
480 mbcsData->unicodeCodeUnits[offset]=(uint16_t)c;
488 old=mbcsData->unicodeCodeUnits[offset];
494 real=0x10000+((old&0x3ff)<<10)+((mbcsData->unicodeCodeUnits[offset+1])&0x3ff);
496 real=mbcsData->unicodeCodeUnits[offset+1];
513 mbcsData->unicodeCodeUnits[offset++]=0xe001;
514 mbcsData->unicodeCodeUnits[offset]=(uint16_t)c;
517 mbcsData->unicodeCodeUnits[offset++]=(uint16_t)(0xdbc0+(c>>10));
518 mbcsData->unicodeCodeUnits[offset]=(uint16_t)(0xdc00+(c&0x3ff));
523 mbcsData->unicodeCodeUnits[offset]=(uint16_t)c;
526 mbcsData->unicodeCodeUnits[offset++]=0xe000;
527 mbcsData->unicodeCodeUnits[offset]=(uint16_t)c;
530 mbcsData->unicodeCodeUnits[offset++]=(uint16_t)(0xd7c0+(c>>10));
531 mbcsData->unicodeCodeUnits[offset]=(uint16_t)(0xdc00+(c&0x3ff));
551 MBCSData *mbcsData=(MBCSData *)cnvData;
553 return (UBool)(1==ucm_countChars(&mbcsData->ucm->states, bytes, length));
557 MBCSSingleAddFromUnicode(MBCSData *mbcsData,
579 stage3=(uint16_t *)mbcsData->fromUBytes;
584 if(mbcsData->utf8Friendly && c<=SBCS_UTF8_MAX) {
589 if(mbcsData->stage1[idx]==MBCS_STAGE_2_ALL_UNASSIGNED_INDEX) {
591 newBlock=mbcsData->stage2Top;
592 if(mbcsData->utf8Friendly) {
594 while(min<newBlock && mbcsData->stage2Single[newBlock-1]==0) {
609 mbcsData->stage1[idx]=(uint16_t)newBlock;
610 mbcsData->stage2Top=newTop;
614 idx=mbcsData->stage1[idx]+nextOffset;
615 if(mbcsData->utf8Friendly && c<=SBCS_UTF8_MAX) {
623 if(mbcsData->stage2Single[idx]==0) {
625 newBlock=mbcsData->stage3Top;
626 if(mbcsData->utf8Friendly) {
641 mbcsData->stage2Single[i++]=(uint16_t)newBlock;
644 mbcsData->stage3Top=newTop; /* ==newBlock */
648 p=stage3+mbcsData->stage2Single[idx]+nextOffset;
675 MBCSAddFromUnicode(MBCSData *mbcsData,
687 maxCharLength=mbcsData->ucm->states.maxCharLength;
689 if( mbcsData->ucm->states.outputType==MBCS_OUTPUT_2_SISO &&
710 stage3=mbcsData->fromUBytes;
714 if(mbcsData->utf8Friendly && c<=mbcsData->utf8Max) {
719 if(mbcsData->stage1[idx]==MBCS_STAGE_2_ALL_UNASSIGNED_INDEX) {
721 newBlock=mbcsData->stage2Top;
722 if(mbcsData->utf8Friendly) {
724 while(min<newBlock && mbcsData->stage2[newBlock-1]==0) {
742 mbcsData->stage1[i++]=(uint16_t)newBlock;
745 mbcsData->stage2Top=newTop; /* ==newBlock */
749 idx=mbcsData->stage1[idx]+nextOffset;
750 if(mbcsData->utf8Friendly && c<=mbcsData->utf8Max) {
758 if(mbcsData->stage2[idx]==0) {
760 newBlock=mbcsData->stage3Top;
761 if(mbcsData->utf8Friendly && nextOffset>=MBCS_STAGE_3_GRANULARITY) {
786 mbcsData->stage2[i++]=(newBlock/MBCS_STAGE_3_GRANULARITY)/maxCharLength;
789 mbcsData->stage3Top=newTop; /* ==newBlock */
792 stage3Index=MBCS_STAGE_3_GRANULARITY*(uint32_t)(uint16_t)mbcsData->stage2[idx];
795 if(mbcsData->utf8Friendly && c<=mbcsData->utf8Max) {
816 mbcsData->utf8Max=0xfeff;
822 mbcsData->stageUTF8[c>>MBCS_UTF8_STAGE_SHIFT]=(uint16_t)stage3Index;
869 if((mbcsData->stage2[idx+(nextOffset>>MBCS_STAGE_2_SHIFT)]&(1UL<<(16+(c&0xf))))!=0 || old!=0) {
883 mbcsData->stage2[idx+(nextOffset>>4)]|=(1UL<<(16+(c&0xf)));
890 MBCSOkForBaseFromUnicode(const MBCSData *mbcsData,
917 if(mbcsData->utf8Friendly && flag<=1 && c<=mbcsData->utf8Max && (bytes[0]==0 || flag==1)) {
926 if(mbcsData->omitFromU && flag!=0) {
937 MBCSData *mbcsData;
952 mbcsData=(MBCSData *)cnvData;
953 maxCharLength=mbcsData->ucm->states.maxCharLength;
960 mbcsData->utf8Friendly=utf8Friendly=(UBool)((table->flagsType&UCM_FLAGS_EXPLICIT)!=0);
962 mbcsData->utf8Max=MBCS_UTF8_MAX;
964 mbcsData->omitFromU=TRUE;
967 mbcsData->utf8Max=0;
974 if(!MBCSStartMappings(mbcsData)) {
1003 if( mbcsData->omitFromU && f<=1 &&
1004 mbcsData->utf8Max<c && c<=0xffff &&
1005 mbcsData->utf8Max<0xfeff
1007 mbcsData->utf8Max=0xffff;
1016 isOK&=MBCSAddToUnicode(mbcsData, m->b.bytes, m->bLen, c, f);
1019 isOK&=MBCSSingleAddFromUnicode(mbcsData, m->b.bytes, m->bLen, c, f);
1020 } else if(MBCSOkForBaseFromUnicode(mbcsData, m->b.bytes, m->bLen, c, f)) {
1021 isOK&=MBCSAddFromUnicode(mbcsData, m->b.bytes, m->bLen, c, f);
1031 isOK&=MBCSSingleAddFromUnicode(mbcsData, m->b.bytes, m->bLen, c, f);
1032 } else if(MBCSOkForBaseFromUnicode(mbcsData, m->b.bytes, m->bLen, c, f)) {
1034 isOK&=MBCSAddFromUnicode(mbcsData, m->b.bytes, m->bLen, c, f);
1050 isOK&=MBCSAddToUnicode(mbcsData, m->b.bytes, m->bLen, c, f);
1064 MBCSPostprocess(mbcsData, staticData);
1070 transformEUC(MBCSData *mbcsData) {
1075 oldLength=mbcsData->ucm->states.maxCharLength;
1080 old3Top=mbcsData->stage3Top;
1085 p8=mbcsData->fromUBytes;
1101 p8=mbcsData->fromUBytes;
1104 mbcsData->ucm->states.outputType=(int8_t)(MBCS_OUTPUT_3_EUC+oldLength-3);
1105 mbcsData->stage3Top=(old3Top*(oldLength-1))/oldLength;
1167 singleCompactStage2(MBCSData *mbcsData) {
1177 while(start<mbcsData->stage2Top) {
1181 for(i=0; i<MBCS_STAGE_2_BLOCK_SIZE && mbcsData->stage2Single[start+i]==0 && mbcsData->stage2Single[prevEnd-i]==0; ++i) {}
1189 mbcsData->stage2Single[newStart++]=mbcsData->stage2Single[start++];
1195 mbcsData->stage2Single[newStart++]=mbcsData->stage2Single[start++];
1204 if(VERBOSE && newStart<mbcsData->stage2Top) {
1206 (unsigned long)mbcsData->stage2Top, (unsigned long)newStart,
1207 (long)(mbcsData->stage2Top-newStart)*2);
1209 mbcsData->stage2Top=newStart;
1213 mbcsData->stage1[i]=map[mbcsData->stage1[i]>>MBCS_STAGE_2_BLOCK_SIZE_SHIFT];
1219 singleCompactStage3(MBCSData *mbcsData) {
1220 uint16_t *stage3=(uint16_t *)mbcsData->fromUBytes;
1231 while(start<mbcsData->stage3Top) {
1258 if(VERBOSE && newStart<mbcsData->stage3Top) {
1260 (unsigned long)mbcsData->stage3Top, (unsigned long)newStart,
1261 (long)(mbcsData->stage3Top-newStart)*2);
1263 mbcsData->stage3Top=newStart;
1266 for(i=0; i<mbcsData->stage2Top; ++i) {
1267 mbcsData->stage2Single[i]=map[mbcsData->stage2Single[i]>>4];
1279 compactStage2(MBCSData *mbcsData) {
1289 while(start<mbcsData->stage2Top) {
1293 for(i=0; i<MBCS_STAGE_2_BLOCK_SIZE && mbcsData->stage2[start+i]==0 && mbcsData->stage2[prevEnd-i]==0; ++i) {}
1301 mbcsData->stage2[newStart++]=mbcsData->stage2[start++];
1307 mbcsData->stage2[newStart++]=mbcsData->stage2[start++];
1316 if(VERBOSE && newStart<mbcsData->stage2Top) {
1318 (unsigned long)mbcsData->stage2Top, (unsigned long)newStart,
1319 (long)(mbcsData->stage2Top-newStart)*4);
1321 mbcsData->stage2Top=newStart;
1325 mbcsData->stage1[i]=map[mbcsData->stage1[i]>>MBCS_STAGE_2_BLOCK_SIZE_SHIFT];
1330 MBCSPostprocess(MBCSData *mbcsData, const UConverterStaticData * /*staticData*/) {
1334 states=&mbcsData->ucm->states;
1338 &mbcsData->unicodeCodeUnits,
1339 mbcsData->toUFallbacks, mbcsData->countToUFallbacks,
1343 if(transformEUC(mbcsData)) {
1357 if(!mbcsData->utf8Friendly) {
1359 singleCompactStage3(mbcsData);
1360 singleCompactStage2(mbcsData);
1362 compactStage2(mbcsData);
1371 (unsigned long)mbcsData->stage2Top,
1372 (unsigned long)mbcsData->stage2Top);
1375 (unsigned long)mbcsData->stage3Top/stage3Width,
1376 (unsigned long)mbcsData->stage3Top/stage3Width);
1380 i2=mbcsData->stage1[i1];
1387 i3=mbcsData->stage2Single[i2];
1389 i3=(uint16_t)mbcsData->stage2[i2];
1410 MBCSData *mbcsData=(MBCSData *)cnvData;
1418 stage2Length=mbcsData->stage2Top;
1419 if(mbcsData->omitFromU) {
1421 int32_t utf8Limit=(int32_t)mbcsData->utf8Max+1;
1425 if((utf8Limit&((1<<MBCS_STAGE_1_SHIFT)-1))!=0 && (st2=mbcsData->stage1[i])!=0) {
1430 while(i>0 && (st2=mbcsData->stage1[--i])==0) {}
1440 (unsigned long)mbcsData->stage2Top,
1441 (unsigned long)mbcsData->stage3Top);
1442 printf("+ total size savings: %lu bytes\n", (unsigned long)stage2Start*4+mbcsData->stage3Top);
1455 if(mbcsData->ucm->states.maxCharLength==1) {
1457 mbcsData->stage1[i]+=(uint16_t)stage1Top;
1465 mbcsData->stage3Top*=2;
1467 if(mbcsData->utf8Friendly) {
1472 mbcsData->stage1[i]+=(uint16_t)stage1Top/2; /* stage 2 contains 32-bit entries, stage 1 16-bit entries */
1479 if(mbcsData->utf8Friendly) {
1480 stageUTF8Length=(mbcsData->utf8Max+1)>>MBCS_UTF8_STAGE_SHIFT;
1481 header.version[2]=(uint8_t)(mbcsData->utf8Max>>8); /* store 0xd7 for max==0xd7ff */
1488 mbcsData->stage3Top=(mbcsData->stage3Top+3)&~3;
1507 header.countStates=mbcsData->ucm->states.countStates;
1508 header.countToUFallbacks=mbcsData->countToUFallbacks;
1512 mbcsData->ucm->states.countStates*1024+
1513 mbcsData->countToUFallbacks*sizeof(_MBCSToUFallback);
1516 mbcsData->ucm->states.countToUCodeUnits*2;
1521 header.fromUBytesLength=mbcsData->stage3Top;
1528 header.flags=(uint8_t)(mbcsData->ucm->states.outputType);
1541 udata_writeBlock(pData, mbcsData->ucm->states.stateTable, header.countStates*1024);
1542 udata_writeBlock(pData, mbcsData->toUFallbacks, mbcsData->countToUFallbacks*sizeof(_MBCSToUFallback));
1543 udata_writeBlock(pData, mbcsData->unicodeCodeUnits, mbcsData->ucm->states.countToUCodeUnits*2);
1544 udata_writeBlock(pData, mbcsData->stage1, stage1Top*2);
1545 if(mbcsData->ucm->states.maxCharLength==1) {
1546 udata_writeBlock(pData, mbcsData->stage2Single+stage2Start, stage2Length);
1548 udata_writeBlock(pData, mbcsData->stage2+stage2Start, stage2Length);
1551 udata_writeBlock(pData, mbcsData->fromUBytes, mbcsData->stage3Top);
1555 udata_writeBlock(pData, mbcsData->stageUTF8, stageUTF8Length*2);