Home | History | Annotate | Download | only in makeconv

Lines Matching refs:mbcsData

33 struct MBCSData {
69 MBCSStartMappings(MBCSData *mbcsData);
72 MBCSAddToUnicode(MBCSData *mbcsData,
82 MBCSSingleAddFromUnicode(MBCSData *mbcsData,
88 MBCSAddFromUnicode(MBCSData *mbcsData,
94 MBCSPostprocess(MBCSData *mbcsData, const UConverterStaticData *staticData);
126 static MBCSData gDummy;
128 U_CFUNC const MBCSData *
130 uprv_memset(&gDummy, 0, sizeof(MBCSData));
150 MBCSInit(MBCSData *mbcsData, UCMFile *ucm) {
151 uprv_memset(mbcsData, 0, sizeof(MBCSData));
153 mbcsData->ucm=ucm; /* aliased, not owned */
155 mbcsData->newConverter.close=MBCSClose;
156 mbcsData->newConverter.isValid=MBCSIsValid;
157 mbcsData->newConverter.addTable=MBCSAddTable;
158 mbcsData->newConverter.write=MBCSWrite;
163 MBCSData *mbcsData=(MBCSData *)uprv_malloc(sizeof(MBCSData));
164 if(mbcsData==NULL) {
169 MBCSInit(mbcsData, ucm);
170 return &mbcsData->newConverter;
174 MBCSDestruct(MBCSData *mbcsData) {
175 uprv_free(mbcsData->unicodeCodeUnits);
176 uprv_free(mbcsData->fromUBytes);
181 MBCSData *mbcsData=(MBCSData *)cnvData;
182 if(mbcsData!=NULL) {
183 MBCSDestruct(mbcsData);
184 uprv_free(mbcsData);
189 MBCSStartMappings(MBCSData *mbcsData) {
197 sum=mbcsData->ucm->states.countToUCodeUnits;
203 mbcsData->unicodeCodeUnits=(uint16_t *)uprv_malloc(sum*sizeof(uint16_t));
204 if(mbcsData->unicodeCodeUnits==NULL) {
210 mbcsData->unicodeCodeUnits[i]=0xfffe;
215 maxCharLength=mbcsData->ucm->states.maxCharLength;
225 mbcsData->fromUBytes=(uint8_t *)uprv_malloc(sum);
226 if(mbcsData->fromUBytes==NULL) {
230 uprv_memset(mbcsData->fromUBytes, 0, sum);
273 mbcsData->stage1[i]=sum;
276 mbcsData->stage2Top=stage2NullLength+stage2AllocLength; /* ==sum */
287 mbcsData->stage2Single[mbcsData->stage1[0]+i]=sum;
293 mbcsData->stage2[mbcsData->stage1[0]+i]=sum;
300 mbcsData->stageUTF8[i]=sum;
309 mbcsData->stage3Top=(stage3NullLength+stage3AllocLength)*maxCharLength; /* ==sum*maxCharLength */
316 setFallback(MBCSData *mbcsData, uint32_t offset, UChar32 c) {
317 int32_t i=ucm_findFallback(mbcsData->toUFallbacks, mbcsData->countToUFallbacks, offset);
320 mbcsData->toUFallbacks[i].codePoint=c;
324 i=mbcsData->countToUFallbacks;
329 mbcsData->toUFallbacks[i].offset=offset;
330 mbcsData->toUFallbacks[i].codePoint=c;
331 mbcsData->countToUFallbacks=i+1;
339 removeFallback(MBCSData *mbcsData, uint32_t offset) {
340 int32_t i=ucm_findFallback(mbcsData->toUFallbacks, mbcsData->countToUFallbacks, offset);
345 toUFallbacks=mbcsData->toUFallbacks;
346 limit=mbcsData->countToUFallbacks;
352 mbcsData->countToUFallbacks=limit-1;
366 MBCSAddToUnicode(MBCSData *mbcsData,
375 if(mbcsData->ucm->states.countStates==0) {
381 if(length==2 && mbcsData->ucm->states.outputType==MBCS_OUTPUT_2_SISO) {
391 entry=mbcsData->ucm->states.stateTable[state][bytes[i++]];
452 mbcsData->ucm->states.stateTable[state][bytes[i-1]]=entry;
459 if((old=mbcsData->unicodeCodeUnits[offset])!=0xfffe || (old=removeFallback(mbcsData, offset))!=-1) {
476 if(mbcsData->unicodeCodeUnits[offset]==0xfffe) {
477 return setFallback(mbcsData, offset, c);
480 mbcsData->unicodeCodeUnits[offset]=(uint16_t)c;
488 old=mbcsData->unicodeCodeUnits[offset];
494 real=0x10000+((old&0x3ff)<<10)+((mbcsData->unicodeCodeUnits[offset+1])&0x3ff);
496 real=mbcsData->unicodeCodeUnits[offset+1];
513 mbcsData->unicodeCodeUnits[offset++]=0xe001;
514 mbcsData->unicodeCodeUnits[offset]=(uint16_t)c;
517 mbcsData->unicodeCodeUnits[offset++]=(uint16_t)(0xdbc0+(c>>10));
518 mbcsData->unicodeCodeUnits[offset]=(uint16_t)(0xdc00+(c&0x3ff));
523 mbcsData->unicodeCodeUnits[offset]=(uint16_t)c;
526 mbcsData->unicodeCodeUnits[offset++]=0xe000;
527 mbcsData->unicodeCodeUnits[offset]=(uint16_t)c;
530 mbcsData->unicodeCodeUnits[offset++]=(uint16_t)(0xd7c0+(c>>10));
531 mbcsData->unicodeCodeUnits[offset]=(uint16_t)(0xdc00+(c&0x3ff));
551 MBCSData *mbcsData=(MBCSData *)cnvData;
553 return (UBool)(1==ucm_countChars(&mbcsData->ucm->states, bytes, length));
557 MBCSSingleAddFromUnicode(MBCSData *mbcsData,
579 stage3=(uint16_t *)mbcsData->fromUBytes;
584 if(mbcsData->utf8Friendly && c<=SBCS_UTF8_MAX) {
589 if(mbcsData->stage1[idx]==MBCS_STAGE_2_ALL_UNASSIGNED_INDEX) {
591 newBlock=mbcsData->stage2Top;
592 if(mbcsData->utf8Friendly) {
594 while(min<newBlock && mbcsData->stage2Single[newBlock-1]==0) {
609 mbcsData->stage1[idx]=(uint16_t)newBlock;
610 mbcsData->stage2Top=newTop;
614 idx=mbcsData->stage1[idx]+nextOffset;
615 if(mbcsData->utf8Friendly && c<=SBCS_UTF8_MAX) {
623 if(mbcsData->stage2Single[idx]==0) {
625 newBlock=mbcsData->stage3Top;
626 if(mbcsData->utf8Friendly) {
641 mbcsData->stage2Single[i++]=(uint16_t)newBlock;
644 mbcsData->stage3Top=newTop; /* ==newBlock */
648 p=stage3+mbcsData->stage2Single[idx]+nextOffset;
675 MBCSAddFromUnicode(MBCSData *mbcsData,
687 maxCharLength=mbcsData->ucm->states.maxCharLength;
689 if( mbcsData->ucm->states.outputType==MBCS_OUTPUT_2_SISO &&
710 stage3=mbcsData->fromUBytes;
714 if(mbcsData->utf8Friendly && c<=mbcsData->utf8Max) {
719 if(mbcsData->stage1[idx]==MBCS_STAGE_2_ALL_UNASSIGNED_INDEX) {
721 newBlock=mbcsData->stage2Top;
722 if(mbcsData->utf8Friendly) {
724 while(min<newBlock && mbcsData->stage2[newBlock-1]==0) {
742 mbcsData->stage1[i++]=(uint16_t)newBlock;
745 mbcsData->stage2Top=newTop; /* ==newBlock */
749 idx=mbcsData->stage1[idx]+nextOffset;
750 if(mbcsData->utf8Friendly && c<=mbcsData->utf8Max) {
758 if(mbcsData->stage2[idx]==0) {
760 newBlock=mbcsData->stage3Top;
761 if(mbcsData->utf8Friendly && nextOffset>=MBCS_STAGE_3_GRANULARITY) {
786 mbcsData->stage2[i++]=(newBlock/MBCS_STAGE_3_GRANULARITY)/maxCharLength;
789 mbcsData->stage3Top=newTop; /* ==newBlock */
792 stage3Index=MBCS_STAGE_3_GRANULARITY*(uint32_t)(uint16_t)mbcsData->stage2[idx];
795 if(mbcsData->utf8Friendly && c<=mbcsData->utf8Max) {
816 mbcsData->utf8Max=0xfeff;
822 mbcsData->stageUTF8[c>>MBCS_UTF8_STAGE_SHIFT]=(uint16_t)stage3Index;
869 if((mbcsData->stage2[idx+(nextOffset>>MBCS_STAGE_2_SHIFT)]&(1UL<<(16+(c&0xf))))!=0 || old!=0) {
883 mbcsData->stage2[idx+(nextOffset>>4)]|=(1UL<<(16+(c&0xf)));
890 MBCSOkForBaseFromUnicode(const MBCSData *mbcsData,
917 if(mbcsData->utf8Friendly && flag<=1 && c<=mbcsData->utf8Max && (bytes[0]==0 || flag==1)) {
926 if(mbcsData->omitFromU && flag!=0) {
937 MBCSData *mbcsData;
952 mbcsData=(MBCSData *)cnvData;
953 maxCharLength=mbcsData->ucm->states.maxCharLength;
960 mbcsData->utf8Friendly=utf8Friendly=(UBool)((table->flagsType&UCM_FLAGS_EXPLICIT)!=0);
962 mbcsData->utf8Max=MBCS_UTF8_MAX;
964 mbcsData->omitFromU=TRUE;
967 mbcsData->utf8Max=0;
974 if(!MBCSStartMappings(mbcsData)) {
1003 if( mbcsData->omitFromU && f<=1 &&
1004 mbcsData->utf8Max<c && c<=0xffff &&
1005 mbcsData->utf8Max<0xfeff
1007 mbcsData->utf8Max=0xffff;
1016 isOK&=MBCSAddToUnicode(mbcsData, m->b.bytes, m->bLen, c, f);
1019 isOK&=MBCSSingleAddFromUnicode(mbcsData, m->b.bytes, m->bLen, c, f);
1020 } else if(MBCSOkForBaseFromUnicode(mbcsData, m->b.bytes, m->bLen, c, f)) {
1021 isOK&=MBCSAddFromUnicode(mbcsData, m->b.bytes, m->bLen, c, f);
1031 isOK&=MBCSSingleAddFromUnicode(mbcsData, m->b.bytes, m->bLen, c, f);
1032 } else if(MBCSOkForBaseFromUnicode(mbcsData, m->b.bytes, m->bLen, c, f)) {
1034 isOK&=MBCSAddFromUnicode(mbcsData, m->b.bytes, m->bLen, c, f);
1050 isOK&=MBCSAddToUnicode(mbcsData, m->b.bytes, m->bLen, c, f);
1059 MBCSPostprocess(mbcsData, staticData);
1065 transformEUC(MBCSData *mbcsData) {
1070 oldLength=mbcsData->ucm->states.maxCharLength;
1075 old3Top=mbcsData->stage3Top;
1080 p8=mbcsData->fromUBytes;
1096 p8=mbcsData->fromUBytes;
1099 mbcsData->ucm->states.outputType=(int8_t)(MBCS_OUTPUT_3_EUC+oldLength-3);
1100 mbcsData->stage3Top=new3Top=(old3Top*(oldLength-1))/oldLength;
1162 singleCompactStage2(MBCSData *mbcsData) {
1172 while(start<mbcsData->stage2Top) {
1176 for(i=0; i<MBCS_STAGE_2_BLOCK_SIZE && mbcsData->stage2Single[start+i]==0 && mbcsData->stage2Single[prevEnd-i]==0; ++i) {}
1184 mbcsData->stage2Single[newStart++]=mbcsData->stage2Single[start++];
1190 mbcsData->stage2Single[newStart++]=mbcsData->stage2Single[start++];
1199 if(VERBOSE && newStart<mbcsData->stage2Top) {
1201 (unsigned long)mbcsData->stage2Top, (unsigned long)newStart,
1202 (long)(mbcsData->stage2Top-newStart)*2);
1204 mbcsData->stage2Top=newStart;
1208 mbcsData->stage1[i]=map[mbcsData->stage1[i]>>MBCS_STAGE_2_BLOCK_SIZE_SHIFT];
1214 singleCompactStage3(MBCSData *mbcsData) {
1215 uint16_t *stage3=(uint16_t *)mbcsData->fromUBytes;
1226 while(start<mbcsData->stage3Top) {
1253 if(VERBOSE && newStart<mbcsData->stage3Top) {
1255 (unsigned long)mbcsData->stage3Top, (unsigned long)newStart,
1256 (long)(mbcsData->stage3Top-newStart)*2);
1258 mbcsData->stage3Top=newStart;
1261 for(i=0; i<mbcsData->stage2Top; ++i) {
1262 mbcsData->stage2Single[i]=map[mbcsData->stage2Single[i]>>4];
1274 compactStage2(MBCSData *mbcsData) {
1284 while(start<mbcsData->stage2Top) {
1288 for(i=0; i<MBCS_STAGE_2_BLOCK_SIZE && mbcsData->stage2[start+i]==0 && mbcsData->stage2[prevEnd-i]==0; ++i) {}
1296 mbcsData->stage2[newStart++]=mbcsData->stage2[start++];
1302 mbcsData->stage2[newStart++]=mbcsData->stage2[start++];
1311 if(VERBOSE && newStart<mbcsData->stage2Top) {
1313 (unsigned long)mbcsData->stage2Top, (unsigned long)newStart,
1314 (long)(mbcsData->stage2Top-newStart)*4);
1316 mbcsData->stage2Top=newStart;
1320 mbcsData->stage1[i]=map[mbcsData->stage1[i]>>MBCS_STAGE_2_BLOCK_SIZE_SHIFT];
1325 MBCSPostprocess(MBCSData *mbcsData, const UConverterStaticData *staticData) {
1329 states=&mbcsData->ucm->states;
1333 &mbcsData->unicodeCodeUnits,
1334 mbcsData->toUFallbacks, mbcsData->countToUFallbacks,
1338 if(transformEUC(mbcsData)) {
1352 if(!mbcsData->utf8Friendly) {
1354 singleCompactStage3(mbcsData);
1355 singleCompactStage2(mbcsData);
1357 compactStage2(mbcsData);
1366 (unsigned long)mbcsData->stage2Top,
1367 (unsigned long)mbcsData->stage2Top);
1370 (unsigned long)mbcsData->stage3Top/stage3Width,
1371 (unsigned long)mbcsData->stage3Top/stage3Width);
1375 i2=mbcsData->stage1[i1];
1382 i3=mbcsData->stage2Single[i2];
1384 i3=(uint16_t)mbcsData->stage2[i2];
1405 MBCSData *mbcsData=(MBCSData *)cnvData;
1413 stage2Length=mbcsData->stage2Top;
1414 if(mbcsData->omitFromU) {
1416 int32_t utf8Limit=(int32_t)mbcsData->utf8Max+1;
1420 if((utf8Limit&((1<<MBCS_STAGE_1_SHIFT)-1))!=0 && (st2=mbcsData->stage1[i])!=0) {
1425 while(i>0 && (st2=mbcsData->stage1[--i])==0) {}
1435 (unsigned long)mbcsData->stage2Top,
1436 (unsigned long)mbcsData->stage3Top);
1437 printf("+ total size savings: %lu bytes\n", (unsigned long)stage2Start*4+mbcsData->stage3Top);
1450 if(mbcsData->ucm->states.maxCharLength==1) {
1452 mbcsData->stage1[i]+=(uint16_t)stage1Top;
1460 mbcsData->stage3Top*=2;
1462 if(mbcsData->utf8Friendly) {
1467 mbcsData->stage1[i]+=(uint16_t)stage1Top/2; /* stage 2 contains 32-bit entries, stage 1 16-bit entries */
1474 if(mbcsData->utf8Friendly) {
1475 stageUTF8Length=(mbcsData->utf8Max+1)>>MBCS_UTF8_STAGE_SHIFT;
1476 header.version[2]=(uint8_t)(mbcsData->utf8Max>>8); /* store 0xd7 for max==0xd7ff */
1483 mbcsData->stage3Top=(mbcsData->stage3Top+3)&~3;
1502 header.countStates=mbcsData->ucm->states.countStates;
1503 header.countToUFallbacks=mbcsData->countToUFallbacks;
1507 mbcsData->ucm->states.countStates*1024+
1508 mbcsData->countToUFallbacks*sizeof(_MBCSToUFallback);
1511 mbcsData->ucm->states.countToUCodeUnits*2;
1516 header.fromUBytesLength=mbcsData->stage3Top;
1523 header.flags=(uint8_t)(mbcsData->ucm->states.outputType);
1536 udata_writeBlock(pData, mbcsData->ucm->states.stateTable, header.countStates*1024);
1537 udata_writeBlock(pData, mbcsData->toUFallbacks, mbcsData->countToUFallbacks*sizeof(_MBCSToUFallback));
1538 udata_writeBlock(pData, mbcsData->unicodeCodeUnits, mbcsData->ucm->states.countToUCodeUnits*2);
1539 udata_writeBlock(pData, mbcsData->stage1, stage1Top*2);
1540 if(mbcsData->ucm->states.maxCharLength==1) {
1541 udata_writeBlock(pData, mbcsData->stage2Single+stage2Start, stage2Length);
1543 udata_writeBlock(pData, mbcsData->stage2+stage2Start, stage2Length);
1546 udata_writeBlock(pData, mbcsData->fromUBytes, mbcsData->stage3Top);
1550 udata_writeBlock(pData, mbcsData->stageUTF8, stageUTF8Length*2);