Home | History | Annotate | Download | only in gennorm

Lines Matching full:norm

78     { 0x4e, 0x6f, 0x72, 0x6d },   /* dataFormat="Norm" */
97 typedef void EnumTrieFn(void *context, uint32_t code, Norm *norm);
107 static Norm *norms;
156 /* allocate and initialize a Norm unit */
157 static Norm *
159 /* allocate Norm */
160 Norm *p=(Norm *)utm_alloc(normMem);
188 /* allocate Norm structures and reset the first one */
189 normMem=utm_open("gennorm normalization structs", 20000, 20000, sizeof(Norm));
221 * get or create a Norm unit;
224 static Norm *
226 Norm *p;
233 /* allocate Norm */
243 /* get an existing Norm unit */
244 static Norm *
258 Norm *norm=getNorm(code);
259 if(norm==NULL) {
262 return norm->udataCC;
267 * enumerate all code points with their Norm structs and call a function for each
470 /* calculate final combining indexes and store them in the Norm entries */
525 Norm *normPtr;
564 * Decompose Hangul syllables algorithmically and fill a pseudo-Norm struct.
568 getHangulDecomposition(uint32_t c, Norm *pHangulNorm, uint32_t hangulBuffer[3]) {
573 uprv_memset(pHangulNorm, 0, sizeof(Norm));
603 decompStoreNewNF(uint32_t code, Norm *norm) {
605 Norm hangulNorm;
608 Norm *p;
614 if((length=norm->lenNFD)!=0) {
617 s32=norm->nfd;
618 } else if((length=norm->lenNFKD)!=0) {
621 s32=norm->nfkd;
671 /* assume that norm->lenNFD==1 or ==2 */
672 if(norm->lenNFD==2 && !(norm->combiningFlags&0x80)) {
683 norm->lenNFD=lenNFD;
684 norm->nfd=s32;
694 norm->lenNFKD=lenNFKD;
695 norm->nfkd=s32;
702 Norm *norm;
710 decompWithSingleFn(void *context, uint32_t code, Norm *norm) {
721 myLenNFD=me->norm->lenNFD;
722 myLenNFKD=me->norm->lenNFKD;
725 if((length=norm->lenNFD)!=0 && myLenNFD!=0) {
726 /* apply NFD(myC) to norm->nfd */
727 s32=norm->nfd;
731 uprv_memcpy(nfd+lenNFD, me->norm->nfd, myLenNFD*4);
740 if((length=norm->lenNFKD)!=0) {
741 /* apply NFD(myC) and NFKD(myC) to norm->nfkd */
742 s32=norm->nfkd;
747 uprv_memcpy(nfkd+lenNFKD, me->norm->nfkd, myLenNFKD*4);
750 uprv_memcpy(nfkd+lenNFKD, me->norm->nfd, myLenNFD*4);
758 } else if((length=norm->lenNFD)!=0 && myLenNFKD!=0) {
759 /* apply NFKD(myC) to norm->nfd, forming a new norm->nfkd */
760 s32=norm->nfd;
764 uprv_memcpy(nfkd+lenNFKD, me->norm->nfkd, myLenNFKD*4);
776 if(lenNFD>norm->lenNFD) {
779 s32=norm->nfd;
785 norm->lenNFD=lenNFD;
786 norm->nfd=s32;
790 if(lenNFKD>norm->lenNFKD) {
793 s32=norm->nfkd;
799 norm->lenNFKD=lenNFKD;
800 norm->nfkd=s32;
809 storeNorm(uint32_t code, Norm *norm) {
811 Norm *p;
815 norm->lenNFKD=0;
820 norm->qcFlags=p->qcFlags;
821 norm->combiningFlags=p->combiningFlags;
822 norm->fncIndex=p->fncIndex;
825 if((norm->lenNFD|norm->lenNFKD)!=0) {
827 decompStoreNewNF(code, norm);
833 decompSingle.norm=norm;
839 uprv_memcpy(p, norm, sizeof(Norm));
891 Norm *norm;
903 norm=createNorm(c);
904 norm->specialTag=_NORM_EXTRA_INDEX_TOP+_NORM_EXTRA_JAMO_L;
906 norm->combiningFlags=1;
910 norm->canonStart=uset_open(hangul, hangul+21*28-1);
916 norm=createNorm(c);
917 norm->specialTag=_NORM_EXTRA_INDEX_TOP+_NORM_EXTRA_JAMO_V;
919 norm->combiningFlags=2;
921 norm->unsafeStart=TRUE;
926 norm=createNorm(c);
927 norm->specialTag=_NORM_EXTRA_INDEX_TOP+_NORM_EXTRA_JAMO_T;
929 norm->combiningFlags=2;
931 norm->unsafeStart=TRUE;
935 norm=allocNorm();
936 norm->specialTag=_NORM_EXTRA_INDEX_TOP+_NORM_EXTRA_HANGUL;
938 norm->qcFlags=_NORM_QC_NFD|_NORM_QC_NFKD;
940 norm->qcFlags=_NORM_QC_NFD;
943 if(!utrie_setRange32(normTrie, 0xac00, 0xd7a4, (uint32_t)(norm-norms), TRUE)) {
1057 postParseFn(void *context, uint32_t code, Norm *norm) {
1061 length=norm->lenNFD;
1063 norm->canonBothCCs=reorderString(norm->nfd, length);
1067 length=norm->lenNFKD;
1069 norm->compatBothCCs=reorderString(norm->nfkd, length);
1073 if((norm->lenNFD!=0) != ((norm->qcFlags&_NORM_QC_NFD)!=0)) {
1074 fprintf(stderr, "gennorm warning: U+%04lx has NFD[%d] but quick check 0x%02x\n", (long)code, norm->lenNFD, norm->qcFlags);
1076 if(((norm->lenNFD|norm->lenNFKD)!=0) != ((norm->qcFlags&(_NORM_QC_NFD|_NORM_QC_NFKD))!=0)) {
1077 fprintf(stderr, "gennorm warning: U+%04lx has NFD[%d] NFKD[%d] but quick check 0x%02x\n", (long)code, norm->lenNFD, norm->lenNFKD, norm->qcFlags);
1082 combineAndQC[(norm->qcFlags&0x33)|((norm->combiningFlags&3)<<2)]=1;
1085 if(norm->combiningFlags&1) {
1086 if(norm->udataCC!=0) {
1088 fprintf(stderr, "gennorm warning: U+%04lx combines forward but udataCC==%u\n", (long)code, norm->udataCC);
1091 if(norm->combiningFlags&2) {
1092 if((norm->qcFlags&0x11)==0) {
1097 if(norm->udataCC==0) {
1102 if((norm->combiningFlags&3)==3 && beVerbose) {
1110 * add the current character (code) to the set of canonical starters of its norm->nfd[0]
1111 * set the "unsafe starter" flag for each norm->nfd[1..]
1113 length=norm->lenNFD;
1115 Norm *otherNorm;
1120 c=norm->nfd[0];
1138 createNorm(norm->nfd[i])->unsafeStart=TRUE;
1144 make32BitNorm(Norm *norm) {
1146 const Norm *other;
1156 if(norm->udataCC==0) {
1158 if((norm->qcFlags&_NORM_QC_NFC)==0 && norm->lenNFD>0) {
1160 if( norm->canonBothCCs>=0x100 || /* lead cc!=0 or */
1161 ((other=getNorm(norm->nfd[0]))!=NULL && (other->qcFlags&_NORM_QC_NFC)!=0) /* nfd[0] not NFC_YES */
1166 norm->lenNFD, (long)norm->nfd[0], (long)norm->nfd[1],
1167 norm->lenNFD<=2 ? "" : " ...");
1172 if((norm->qcFlags&_NORM_QC_NFKC)==0) {
1173 if(norm->lenNFKD>0) {
1175 if( norm->compatBothCCs>=0x100 || /* lead cc!=0 or */
1176 ((other=getNorm(norm->nfkd[0]))!=NULL && (other->qcFlags&_NORM_QC_NFKC)!=0) /* nfkd[0] not NFKC_YES */
1181 norm->lenNFKD, (long)norm->nfkd[0], (long)norm->nfkd[1],
1182 norm->lenNFKD<=2 ? "" : " ...");
1185 } else if(norm->lenNFD>0) {
1187 if( norm->canonBothCCs>=0x100 || /* lead cc!=0 or */
1188 ((other=getNorm(norm->nfd[0]))!=NULL && (other->qcFlags&_NORM_QC_NFKC)!=0) /* nfd[0] not NFKC_YES */
1193 norm->lenNFD, (long)norm->nfd[0], (long)norm->nfd[1],
1194 norm->lenNFD<=2 ? "" : " ...");
1202 word=norm->qcFlags;
1205 word|=(uint32_t)norm->udataCC<<_NORM_CC_SHIFT;
1208 if(norm->combiningFlags&3) {
1209 word|=(uint32_t)(norm->combiningFlags&3)<<6;
1214 if(norm->combiningIndex!=0xffff) {
1215 extra[0]=norm->combiningIndex;
1222 if((norm->lenNFD|norm->lenNFKD)!=0) {
1225 length=norm->lenNFD;
1227 if(norm->canonBothCCs!=0) {
1229 extra[count++]=norm->canonBothCCs;
1233 UTF_APPEND_CHAR_UNSAFE(extra, count, norm->nfd[i]);
1238 length=norm->lenNFKD;
1240 if(norm->compatBothCCs!=0) {
1242 extra[count++]=norm->compatBothCCs;
1246 UTF_APPEND_CHAR_UNSAFE(extra, count, norm->nfkd[i]);
1256 if(norm->specialTag!=0) {
1257 fprintf(stderr, "error: gennorm - illegal to have both extra data and a special tag (0x%x)\n", norm->specialTag);
1266 } else if(norm->specialTag!=0) {
1268 word|=(uint32_t)norm->specialTag<<_NORM_EXTRA_SHIFT;
1274 /* turn all Norm structs into corresponding 32-bit norm values */
1298 * extract all Norm.canonBothCCs into the FCD table
1346 makeCanonSetFn(void *context, uint32_t code, Norm *norm) {
1347 if(norm->canonStart!=NULL && !uset_isEmpty(norm->canonStart)) {
1353 c=usetContainsOne(norm->canonStart);
1391 uset_serialize(norm->canonStart,
1567 getSkippableFlags(const Norm *norm) {
1571 if(norm->specialTag==_NORM_EXTRA_INDEX_TOP+_NORM_EXTRA_HANGUL) {
1586 * don't have a Norm struct so they won't get here
1590 if(norm->udataCC!=0) {
1611 if( (norm->qcFlags&(_NORM_QC_NFC&_NORM_QC_ANY_NO))!=0 ||
1612 (norm->combiningFlags&3)!=0) {
1615 if(norm->lenNFD!=0 && canChangeWithFollowing(norm->nfd, norm->lenNFD, (uint8_t)norm->canonBothCCs)) {
1624 Norm *norm;
1631 norm=norms+pData[i];
1637 ((uint32_t)(norm->combiningFlags&0x80)<<(_NORM_AUX_COMP_EX_SHIFT-7))|
1638 (uint32_t)norm->fncIndex;
1640 if(norm->unsafeStart || norm->udataCC!=0) {
1644 pData[i]|=getSkippableFlags(norm);
1770 /* turn the Norm structs (stage2, norms) into 32-bit data words */
1775 /* FCD data: take Norm.canonBothCCs and store them in the FCD table */