Home | History | Annotate | Download | only in i18n
      1 /*
      2 *******************************************************************************
      3 *
      4 *   Copyright (C) 2001-2008, International Business Machines
      5 *   Corporation and others.  All Rights Reserved.
      6 *
      7 *******************************************************************************
      8 *   file name:  casetrn.cpp
      9 *   encoding:   US-ASCII
     10 *   tab size:   8 (not used)
     11 *   indentation:4
     12 *
     13 *   created on: 2004sep03
     14 *   created by: Markus W. Scherer
     15 *
     16 *   Implementation class for lower-/upper-/title-casing transliterators.
     17 */
     18 
     19 #include "unicode/utypes.h"
     20 
     21 #if !UCONFIG_NO_TRANSLITERATION
     22 
     23 #include "unicode/uchar.h"
     24 #include "unicode/ustring.h"
     25 #include "tolowtrn.h"
     26 #include "ucase.h"
     27 #include "cpputils.h"
     28 
     29 /* case context iterator using a Replaceable */
     30 U_CFUNC UChar32 U_CALLCONV
     31 utrans_rep_caseContextIterator(void *context, int8_t dir)
     32 {
     33     U_NAMESPACE_USE
     34 
     35     UCaseContext *csc=(UCaseContext *)context;
     36     Replaceable *rep=(Replaceable *)csc->p;
     37     UChar32 c;
     38 
     39     if(dir<0) {
     40         /* reset for backward iteration */
     41         csc->index=csc->cpStart;
     42         csc->dir=dir;
     43     } else if(dir>0) {
     44         /* reset for forward iteration */
     45         csc->index=csc->cpLimit;
     46         csc->dir=dir;
     47     } else {
     48         /* continue current iteration direction */
     49         dir=csc->dir;
     50     }
     51 
     52     // automatically adjust start and limit if the Replaceable disagrees
     53     // with the original values
     54     if(dir<0) {
     55         if(csc->start<csc->index) {
     56             c=rep->char32At(csc->index-1);
     57             if(c<0) {
     58                 csc->start=csc->index;
     59             } else {
     60                 csc->index-=U16_LENGTH(c);
     61                 return c;
     62             }
     63         }
     64     } else {
     65         // detect, and store in csc->b1, if we hit the limit
     66         if(csc->index<csc->limit) {
     67             c=rep->char32At(csc->index);
     68             if(c<0) {
     69                 csc->limit=csc->index;
     70                 csc->b1=TRUE;
     71             } else {
     72                 csc->index+=U16_LENGTH(c);
     73                 return c;
     74             }
     75         } else {
     76             csc->b1=TRUE;
     77         }
     78     }
     79     return U_SENTINEL;
     80 }
     81 
     82 U_NAMESPACE_BEGIN
     83 
     84 UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(CaseMapTransliterator)
     85 
     86 /**
     87  * Constructs a transliterator.
     88  */
     89 CaseMapTransliterator::CaseMapTransliterator(const UnicodeString &id, UCaseMapFull *map) :
     90     Transliterator(id, 0),
     91     fCsp(NULL),
     92     fMap(map)
     93 {
     94     UErrorCode errorCode = U_ZERO_ERROR;
     95     fCsp = ucase_getSingleton(&errorCode); // expect to get NULL if failure
     96 
     97     // TODO test incremental mode with context-sensitive text (e.g. greek sigma)
     98     // TODO need to call setMaximumContextLength()?!
     99 }
    100 
    101 /**
    102  * Destructor.
    103  */
    104 CaseMapTransliterator::~CaseMapTransliterator() {
    105 }
    106 
    107 /**
    108  * Copy constructor.
    109  */
    110 CaseMapTransliterator::CaseMapTransliterator(const CaseMapTransliterator& o) :
    111     Transliterator(o),
    112     fCsp(o.fCsp), fMap(o.fMap)
    113 {
    114 }
    115 
    116 /**
    117  * Assignment operator.
    118  */
    119 /*CaseMapTransliterator& CaseMapTransliterator::operator=(const CaseMapTransliterator& o) {
    120     Transliterator::operator=(o);
    121     fCsp = o.fCsp;
    122     fMap = o.fMap;
    123     return *this;
    124 }*/
    125 
    126 /**
    127  * Transliterator API.
    128  */
    129 /*Transliterator* CaseMapTransliterator::clone(void) const {
    130     return new CaseMapTransliterator(*this);
    131 }*/
    132 
    133 /**
    134  * Implements {@link Transliterator#handleTransliterate}.
    135  */
    136 void CaseMapTransliterator::handleTransliterate(Replaceable& text,
    137                                  UTransPosition& offsets,
    138                                  UBool isIncremental) const
    139 {
    140     if (offsets.start >= offsets.limit) {
    141         return;
    142     }
    143 
    144     UCaseContext csc;
    145     uprv_memset(&csc, 0, sizeof(csc));
    146     csc.p = &text;
    147     csc.start = offsets.contextStart;
    148     csc.limit = offsets.contextLimit;
    149 
    150     UnicodeString tmp;
    151     const UChar *s;
    152     UChar32 c;
    153     int32_t textPos, delta, result, locCache=0;
    154 
    155     for(textPos=offsets.start; textPos<offsets.limit;) {
    156         csc.cpStart=textPos;
    157         c=text.char32At(textPos);
    158         csc.cpLimit=textPos+=U16_LENGTH(c);
    159 
    160         result=fMap(fCsp, c, utrans_rep_caseContextIterator, &csc, &s, "", &locCache);
    161 
    162         if(csc.b1 && isIncremental) {
    163             // fMap() tried to look beyond the context limit
    164             // wait for more input
    165             offsets.start=csc.cpStart;
    166             return;
    167         }
    168 
    169         if(result>=0) {
    170             // replace the current code point with its full case mapping result
    171             // see UCASE_MAX_STRING_LENGTH
    172             if(result<=UCASE_MAX_STRING_LENGTH) {
    173                 // string s[result]
    174                 tmp.setTo(FALSE, s, result);
    175                 delta=result-U16_LENGTH(c);
    176             } else {
    177                 // single code point
    178                 tmp.setTo(result);
    179                 delta=tmp.length()-U16_LENGTH(c);
    180             }
    181             text.handleReplaceBetween(csc.cpStart, textPos, tmp);
    182             if(delta!=0) {
    183                 textPos+=delta;
    184                 csc.limit=offsets.contextLimit+=delta;
    185                 offsets.limit+=delta;
    186             }
    187         }
    188     }
    189     offsets.start=textPos;
    190 }
    191 
    192 U_NAMESPACE_END
    193 
    194 #endif /* #if !UCONFIG_NO_TRANSLITERATION */
    195