1 /******************************************************************** 2 * COPYRIGHT: 3 * Copyright (c) 1997-2005, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ********************************************************************/ 6 7 #include "unicode/utypes.h" 8 #include "unicode/uchar.h" 9 #include "unicode/normlzr.h" 10 #include "unicode/uniset.h" 11 #include "unicode/usetiter.h" 12 #include "unicode/schriter.h" 13 #include "unormimp.h" 14 #include "tstnorm.h" 15 16 #if !UCONFIG_NO_NORMALIZATION 17 18 static UErrorCode status = U_ZERO_ERROR; 19 20 // test APIs that are not otherwise used - improve test coverage 21 void 22 BasicNormalizerTest::TestNormalizerAPI() { 23 // instantiate a Normalizer from a CharacterIterator 24 UnicodeString s=UnicodeString("a\\u0308\\uac00\\U0002f800", "").unescape(); 25 s.append(s); // make s a bit longer and more interesting 26 StringCharacterIterator iter(s); 27 Normalizer norm(iter, UNORM_NFC); 28 if(norm.next()!=0xe4) { 29 errln("error in Normalizer(CharacterIterator).next()"); 30 } 31 32 // test copy constructor 33 Normalizer copy(norm); 34 if(copy.next()!=0xac00) { 35 errln("error in Normalizer(Normalizer(CharacterIterator)).next()"); 36 } 37 38 // test clone(), ==, and hashCode() 39 Normalizer *clone=copy.clone(); 40 if(*clone!=copy) { 41 errln("error in Normalizer(Normalizer(CharacterIterator)).clone()!=copy"); 42 } 43 // clone must have the same hashCode() 44 if(clone->hashCode()!=copy.hashCode()) { 45 errln("error in Normalizer(Normalizer(CharacterIterator)).clone()->hashCode()!=copy.hashCode()"); 46 } 47 if(clone->next()!=0x4e3d) { 48 errln("error in Normalizer(Normalizer(CharacterIterator)).clone()->next()"); 49 } 50 // position changed, must change hashCode() 51 if(clone->hashCode()==copy.hashCode()) { 52 errln("error in Normalizer(Normalizer(CharacterIterator)).clone()->next().hashCode()==copy.hashCode()"); 53 } 54 delete clone; 55 clone=0; 56 57 // test compose() and decompose() 58 UnicodeString tel, nfkc, nfkd; 59 tel=UnicodeString(1, (UChar32)0x2121, 10); 60 tel.insert(1, (UChar)0x301); 61 62 UErrorCode errorCode=U_ZERO_ERROR; 63 Normalizer::compose(tel, TRUE, 0, nfkc, errorCode); 64 Normalizer::decompose(tel, TRUE, 0, nfkd, errorCode); 65 if(U_FAILURE(errorCode)) { 66 errln("error in Normalizer::(de)compose(): %s", u_errorName(errorCode)); 67 } else if( 68 nfkc!=UnicodeString("TE\\u0139TELTELTELTELTELTELTELTELTEL", "").unescape() || 69 nfkd!=UnicodeString("TEL\\u0301TELTELTELTELTELTELTELTELTEL", "").unescape() 70 ) { 71 errln("error in Normalizer::(de)compose(): wrong result(s)"); 72 } 73 74 // test setIndex() 75 norm.setIndexOnly(3); 76 if(norm.current()!=0x4e3d) { 77 errln("error in Normalizer(CharacterIterator).setIndex(3)"); 78 } 79 80 // test setText(CharacterIterator) and getText() 81 UnicodeString out, out2; 82 errorCode=U_ZERO_ERROR; 83 copy.setText(iter, errorCode); 84 if(U_FAILURE(errorCode)) { 85 errln("error Normalizer::setText() failed: %s", u_errorName(errorCode)); 86 } else { 87 copy.getText(out); 88 iter.getText(out2); 89 if( out!=out2 || 90 copy.startIndex()!=iter.startIndex() || 91 copy.endIndex()!=iter.endIndex() 92 ) { 93 errln("error in Normalizer::setText() or Normalizer::getText()"); 94 } 95 } 96 97 // test setText(UChar *), getUMode() and setMode() 98 errorCode=U_ZERO_ERROR; 99 copy.setText(s.getBuffer()+1, s.length()-1, errorCode); 100 copy.setMode(UNORM_NFD); 101 if(copy.getUMode()!=UNORM_NFD) { 102 errln("error in Normalizer::setMode() or Normalizer::getUMode()"); 103 } 104 if(copy.next()!=0x308 || copy.next()!=0x1100) { 105 errln("error in Normalizer::setText(UChar *) or Normalizer::setMode()"); 106 } 107 108 // test setText(UChar *, length=-1) 109 errorCode=U_ZERO_ERROR; 110 111 // NUL-terminate s 112 s.append((UChar)0); // append NUL 113 s.truncate(s.length()-1); // undo length change 114 115 copy.setText(s.getBuffer()+1, -1, errorCode); 116 if(copy.endIndex()!=s.length()-1) { 117 errln("error in Normalizer::setText(UChar *, -1)"); 118 } 119 120 // test setOption() and getOption() 121 copy.setOption(0xaa0000, TRUE); 122 copy.setOption(0x20000, FALSE); 123 if(!copy.getOption(0x880000) || copy.getOption(0x20000)) { 124 errln("error in Normalizer::setOption() or Normalizer::getOption()"); 125 } 126 127 // test last()/previous() with an internal buffer overflow 128 errorCode=U_ZERO_ERROR; 129 copy.setText(UnicodeString(1000, (UChar32)0x308, 1000), errorCode); 130 if(copy.last()!=0x308) { 131 errln("error in Normalizer(1000*U+0308).last()"); 132 } 133 134 // test UNORM_NONE 135 norm.setMode(UNORM_NONE); 136 if(norm.first()!=0x61 || norm.next()!=0x308 || norm.last()!=0x2f800) { 137 errln("error in Normalizer(UNORM_NONE).first()/next()/last()"); 138 } 139 Normalizer::normalize(s, UNORM_NONE, 0, out, status); 140 if(out!=s) { 141 errln("error in Normalizer::normalize(UNORM_NONE)"); 142 } 143 144 // test that the same string can be used as source and destination 145 s.setTo((UChar)0xe4); 146 Normalizer::normalize(s, UNORM_NFD, 0, s, status); 147 if(s.charAt(1)!=0x308) { 148 errln("error in Normalizer::normalize(UNORM_NFD, self)"); 149 } 150 Normalizer::normalize(s, UNORM_NFC, 0, s, status); 151 if(s.charAt(0)!=0xe4) { 152 errln("error in Normalizer::normalize(UNORM_NFC, self)"); 153 } 154 Normalizer::decompose(s, FALSE, 0, s, status); 155 if(s.charAt(1)!=0x308) { 156 errln("error in Normalizer::decompose(self)"); 157 } 158 Normalizer::compose(s, FALSE, 0, s, status); 159 if(s.charAt(0)!=0xe4) { 160 errln("error in Normalizer::compose(self)"); 161 } 162 Normalizer::concatenate(s, s, s, UNORM_NFC, 0, status); 163 if(s.charAt(1)!=0xe4) { 164 errln("error in Normalizer::decompose(self)"); 165 } 166 167 // test internal normalization exclusion options 168 // s contains a compatibility CJK character and a Hangul syllable 169 s=UnicodeString("a\\uFACE\\uD7A3b", -1, US_INV).unescape(); 170 status=U_ZERO_ERROR; 171 Normalizer::decompose(s, FALSE, UNORM_NX_HANGUL, out, status); 172 if(U_FAILURE(status) || out!=UNICODE_STRING_SIMPLE("a\\u9F9C\\uD7A3b").unescape()) { 173 errln("Normalizer::decompose(UNORM_NX_HANGUL) failed - %s", u_errorName(status)); 174 } 175 status=U_ZERO_ERROR; 176 Normalizer::decompose(s, FALSE, UNORM_NX_CJK_COMPAT, out, status); 177 if(U_FAILURE(status) || out!=UNICODE_STRING_SIMPLE("a\\uFACE\\u1112\\u1175\\u11c2b").unescape()) { 178 errln("Normalizer::decompose(UNORM_NX_CJK_COMPAT) failed - %s", u_errorName(status)); 179 } 180 status=U_ZERO_ERROR; 181 Normalizer::decompose(s, FALSE, UNORM_NX_CJK_COMPAT|UNORM_NX_HANGUL, out, status); 182 if(U_FAILURE(status) || out!=UNICODE_STRING_SIMPLE("a\\uFACE\\uD7A3b").unescape()) { 183 errln("Normalizer::decompose(UNORM_NX_CJK_COMPAT|UNORM_NX_HANGUL) failed - %s", u_errorName(status)); 184 } 185 } 186 187 #endif 188