Home | History | Annotate | Download | only in intltest
      1 /********************************************************************
      2  * COPYRIGHT:
      3  * Copyright (c) 1997-2005, International Business Machines Corporation and
      4  * others. All Rights Reserved.
      5  ********************************************************************/
      6 
      7 #include "unicode/utypes.h"
      8 #include "unicode/uchar.h"
      9 #include "unicode/normlzr.h"
     10 #include "unicode/uniset.h"
     11 #include "unicode/usetiter.h"
     12 #include "unicode/schriter.h"
     13 #include "unormimp.h"
     14 #include "tstnorm.h"
     15 
     16 #if !UCONFIG_NO_NORMALIZATION
     17 
     18 static UErrorCode status = U_ZERO_ERROR;
     19 
     20 // test APIs that are not otherwise used - improve test coverage
     21 void
     22 BasicNormalizerTest::TestNormalizerAPI() {
     23     // instantiate a Normalizer from a CharacterIterator
     24     UnicodeString s=UnicodeString("a\\u0308\\uac00\\U0002f800", "").unescape();
     25     s.append(s); // make s a bit longer and more interesting
     26     StringCharacterIterator iter(s);
     27     Normalizer norm(iter, UNORM_NFC);
     28     if(norm.next()!=0xe4) {
     29         errln("error in Normalizer(CharacterIterator).next()");
     30     }
     31 
     32     // test copy constructor
     33     Normalizer copy(norm);
     34     if(copy.next()!=0xac00) {
     35         errln("error in Normalizer(Normalizer(CharacterIterator)).next()");
     36     }
     37 
     38     // test clone(), ==, and hashCode()
     39     Normalizer *clone=copy.clone();
     40     if(*clone!=copy) {
     41         errln("error in Normalizer(Normalizer(CharacterIterator)).clone()!=copy");
     42     }
     43     // clone must have the same hashCode()
     44     if(clone->hashCode()!=copy.hashCode()) {
     45         errln("error in Normalizer(Normalizer(CharacterIterator)).clone()->hashCode()!=copy.hashCode()");
     46     }
     47     if(clone->next()!=0x4e3d) {
     48         errln("error in Normalizer(Normalizer(CharacterIterator)).clone()->next()");
     49     }
     50     // position changed, must change hashCode()
     51     if(clone->hashCode()==copy.hashCode()) {
     52         errln("error in Normalizer(Normalizer(CharacterIterator)).clone()->next().hashCode()==copy.hashCode()");
     53     }
     54     delete clone;
     55     clone=0;
     56 
     57     // test compose() and decompose()
     58     UnicodeString tel, nfkc, nfkd;
     59     tel=UnicodeString(1, (UChar32)0x2121, 10);
     60     tel.insert(1, (UChar)0x301);
     61 
     62     UErrorCode errorCode=U_ZERO_ERROR;
     63     Normalizer::compose(tel, TRUE, 0, nfkc, errorCode);
     64     Normalizer::decompose(tel, TRUE, 0, nfkd, errorCode);
     65     if(U_FAILURE(errorCode)) {
     66         errln("error in Normalizer::(de)compose(): %s", u_errorName(errorCode));
     67     } else if(
     68         nfkc!=UnicodeString("TE\\u0139TELTELTELTELTELTELTELTELTEL", "").unescape() ||
     69         nfkd!=UnicodeString("TEL\\u0301TELTELTELTELTELTELTELTELTEL", "").unescape()
     70     ) {
     71         errln("error in Normalizer::(de)compose(): wrong result(s)");
     72     }
     73 
     74     // test setIndex()
     75     norm.setIndexOnly(3);
     76     if(norm.current()!=0x4e3d) {
     77         errln("error in Normalizer(CharacterIterator).setIndex(3)");
     78     }
     79 
     80     // test setText(CharacterIterator) and getText()
     81     UnicodeString out, out2;
     82     errorCode=U_ZERO_ERROR;
     83     copy.setText(iter, errorCode);
     84     if(U_FAILURE(errorCode)) {
     85         errln("error Normalizer::setText() failed: %s", u_errorName(errorCode));
     86     } else {
     87         copy.getText(out);
     88         iter.getText(out2);
     89         if( out!=out2 ||
     90             copy.startIndex()!=iter.startIndex() ||
     91             copy.endIndex()!=iter.endIndex()
     92         ) {
     93             errln("error in Normalizer::setText() or Normalizer::getText()");
     94         }
     95     }
     96 
     97     // test setText(UChar *), getUMode() and setMode()
     98     errorCode=U_ZERO_ERROR;
     99     copy.setText(s.getBuffer()+1, s.length()-1, errorCode);
    100     copy.setMode(UNORM_NFD);
    101     if(copy.getUMode()!=UNORM_NFD) {
    102         errln("error in Normalizer::setMode() or Normalizer::getUMode()");
    103     }
    104     if(copy.next()!=0x308 || copy.next()!=0x1100) {
    105         errln("error in Normalizer::setText(UChar *) or Normalizer::setMode()");
    106     }
    107 
    108     // test setText(UChar *, length=-1)
    109     errorCode=U_ZERO_ERROR;
    110 
    111     // NUL-terminate s
    112     s.append((UChar)0);         // append NUL
    113     s.truncate(s.length()-1);   // undo length change
    114 
    115     copy.setText(s.getBuffer()+1, -1, errorCode);
    116     if(copy.endIndex()!=s.length()-1) {
    117         errln("error in Normalizer::setText(UChar *, -1)");
    118     }
    119 
    120     // test setOption() and getOption()
    121     copy.setOption(0xaa0000, TRUE);
    122     copy.setOption(0x20000, FALSE);
    123     if(!copy.getOption(0x880000) || copy.getOption(0x20000)) {
    124         errln("error in Normalizer::setOption() or Normalizer::getOption()");
    125     }
    126 
    127     // test last()/previous() with an internal buffer overflow
    128     errorCode=U_ZERO_ERROR;
    129     copy.setText(UnicodeString(1000, (UChar32)0x308, 1000), errorCode);
    130     if(copy.last()!=0x308) {
    131         errln("error in Normalizer(1000*U+0308).last()");
    132     }
    133 
    134     // test UNORM_NONE
    135     norm.setMode(UNORM_NONE);
    136     if(norm.first()!=0x61 || norm.next()!=0x308 || norm.last()!=0x2f800) {
    137         errln("error in Normalizer(UNORM_NONE).first()/next()/last()");
    138     }
    139     Normalizer::normalize(s, UNORM_NONE, 0, out, status);
    140     if(out!=s) {
    141         errln("error in Normalizer::normalize(UNORM_NONE)");
    142     }
    143 
    144     // test that the same string can be used as source and destination
    145     s.setTo((UChar)0xe4);
    146     Normalizer::normalize(s, UNORM_NFD, 0, s, status);
    147     if(s.charAt(1)!=0x308) {
    148         errln("error in Normalizer::normalize(UNORM_NFD, self)");
    149     }
    150     Normalizer::normalize(s, UNORM_NFC, 0, s, status);
    151     if(s.charAt(0)!=0xe4) {
    152         errln("error in Normalizer::normalize(UNORM_NFC, self)");
    153     }
    154     Normalizer::decompose(s, FALSE, 0, s, status);
    155     if(s.charAt(1)!=0x308) {
    156         errln("error in Normalizer::decompose(self)");
    157     }
    158     Normalizer::compose(s, FALSE, 0, s, status);
    159     if(s.charAt(0)!=0xe4) {
    160         errln("error in Normalizer::compose(self)");
    161     }
    162     Normalizer::concatenate(s, s, s, UNORM_NFC, 0, status);
    163     if(s.charAt(1)!=0xe4) {
    164         errln("error in Normalizer::decompose(self)");
    165     }
    166 
    167     // test internal normalization exclusion options
    168     // s contains a compatibility CJK character and a Hangul syllable
    169     s=UnicodeString("a\\uFACE\\uD7A3b", -1, US_INV).unescape();
    170     status=U_ZERO_ERROR;
    171     Normalizer::decompose(s, FALSE, UNORM_NX_HANGUL, out, status);
    172     if(U_FAILURE(status) || out!=UNICODE_STRING_SIMPLE("a\\u9F9C\\uD7A3b").unescape()) {
    173         errln("Normalizer::decompose(UNORM_NX_HANGUL) failed - %s", u_errorName(status));
    174     }
    175     status=U_ZERO_ERROR;
    176     Normalizer::decompose(s, FALSE, UNORM_NX_CJK_COMPAT, out, status);
    177     if(U_FAILURE(status) || out!=UNICODE_STRING_SIMPLE("a\\uFACE\\u1112\\u1175\\u11c2b").unescape()) {
    178         errln("Normalizer::decompose(UNORM_NX_CJK_COMPAT) failed - %s", u_errorName(status));
    179     }
    180     status=U_ZERO_ERROR;
    181     Normalizer::decompose(s, FALSE, UNORM_NX_CJK_COMPAT|UNORM_NX_HANGUL, out, status);
    182     if(U_FAILURE(status) || out!=UNICODE_STRING_SIMPLE("a\\uFACE\\uD7A3b").unescape()) {
    183         errln("Normalizer::decompose(UNORM_NX_CJK_COMPAT|UNORM_NX_HANGUL) failed - %s", u_errorName(status));
    184     }
    185 }
    186 
    187 #endif
    188