Home | History | Annotate | Download | only in i18n
      1 /*
      2 *******************************************************************************
      3 *
      4 *   Copyright (C) 2001-2011, International Business Machines
      5 *   Corporation and others.  All Rights Reserved.
      6 *
      7 *******************************************************************************
      8 *   file name:  casetrn.cpp
      9 *   encoding:   US-ASCII
     10 *   tab size:   8 (not used)
     11 *   indentation:4
     12 *
     13 *   created on: 2004sep03
     14 *   created by: Markus W. Scherer
     15 *
     16 *   Implementation class for lower-/upper-/title-casing transliterators.
     17 */
     18 
     19 #include "unicode/utypes.h"
     20 
     21 #if !UCONFIG_NO_TRANSLITERATION
     22 
     23 #include "unicode/uchar.h"
     24 #include "unicode/ustring.h"
     25 #include "unicode/utf.h"
     26 #include "unicode/utf16.h"
     27 #include "tolowtrn.h"
     28 #include "ucase.h"
     29 #include "cpputils.h"
     30 
     31 /* case context iterator using a Replaceable */
     32 U_CFUNC UChar32 U_CALLCONV
     33 utrans_rep_caseContextIterator(void *context, int8_t dir)
     34 {
     35     U_NAMESPACE_USE
     36 
     37     UCaseContext *csc=(UCaseContext *)context;
     38     Replaceable *rep=(Replaceable *)csc->p;
     39     UChar32 c;
     40 
     41     if(dir<0) {
     42         /* reset for backward iteration */
     43         csc->index=csc->cpStart;
     44         csc->dir=dir;
     45     } else if(dir>0) {
     46         /* reset for forward iteration */
     47         csc->index=csc->cpLimit;
     48         csc->dir=dir;
     49     } else {
     50         /* continue current iteration direction */
     51         dir=csc->dir;
     52     }
     53 
     54     // automatically adjust start and limit if the Replaceable disagrees
     55     // with the original values
     56     if(dir<0) {
     57         if(csc->start<csc->index) {
     58             c=rep->char32At(csc->index-1);
     59             if(c<0) {
     60                 csc->start=csc->index;
     61             } else {
     62                 csc->index-=U16_LENGTH(c);
     63                 return c;
     64             }
     65         }
     66     } else {
     67         // detect, and store in csc->b1, if we hit the limit
     68         if(csc->index<csc->limit) {
     69             c=rep->char32At(csc->index);
     70             if(c<0) {
     71                 csc->limit=csc->index;
     72                 csc->b1=TRUE;
     73             } else {
     74                 csc->index+=U16_LENGTH(c);
     75                 return c;
     76             }
     77         } else {
     78             csc->b1=TRUE;
     79         }
     80     }
     81     return U_SENTINEL;
     82 }
     83 
     84 U_NAMESPACE_BEGIN
     85 
     86 UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(CaseMapTransliterator)
     87 
     88 /**
     89  * Constructs a transliterator.
     90  */
     91 CaseMapTransliterator::CaseMapTransliterator(const UnicodeString &id, UCaseMapFull *map) :
     92     Transliterator(id, 0),
     93     fCsp(ucase_getSingleton()),
     94     fMap(map)
     95 {
     96     // TODO test incremental mode with context-sensitive text (e.g. greek sigma)
     97     // TODO need to call setMaximumContextLength()?!
     98 }
     99 
    100 /**
    101  * Destructor.
    102  */
    103 CaseMapTransliterator::~CaseMapTransliterator() {
    104 }
    105 
    106 /**
    107  * Copy constructor.
    108  */
    109 CaseMapTransliterator::CaseMapTransliterator(const CaseMapTransliterator& o) :
    110     Transliterator(o),
    111     fCsp(o.fCsp), fMap(o.fMap)
    112 {
    113 }
    114 
    115 /**
    116  * Assignment operator.
    117  */
    118 /*CaseMapTransliterator& CaseMapTransliterator::operator=(const CaseMapTransliterator& o) {
    119     Transliterator::operator=(o);
    120     fCsp = o.fCsp;
    121     fMap = o.fMap;
    122     return *this;
    123 }*/
    124 
    125 /**
    126  * Transliterator API.
    127  */
    128 /*Transliterator* CaseMapTransliterator::clone(void) const {
    129     return new CaseMapTransliterator(*this);
    130 }*/
    131 
    132 /**
    133  * Implements {@link Transliterator#handleTransliterate}.
    134  */
    135 void CaseMapTransliterator::handleTransliterate(Replaceable& text,
    136                                  UTransPosition& offsets,
    137                                  UBool isIncremental) const
    138 {
    139     if (offsets.start >= offsets.limit) {
    140         return;
    141     }
    142 
    143     UCaseContext csc;
    144     uprv_memset(&csc, 0, sizeof(csc));
    145     csc.p = &text;
    146     csc.start = offsets.contextStart;
    147     csc.limit = offsets.contextLimit;
    148 
    149     UnicodeString tmp;
    150     const UChar *s;
    151     UChar32 c;
    152     int32_t textPos, delta, result, locCache=0;
    153 
    154     for(textPos=offsets.start; textPos<offsets.limit;) {
    155         csc.cpStart=textPos;
    156         c=text.char32At(textPos);
    157         csc.cpLimit=textPos+=U16_LENGTH(c);
    158 
    159         result=fMap(fCsp, c, utrans_rep_caseContextIterator, &csc, &s, "", &locCache);
    160 
    161         if(csc.b1 && isIncremental) {
    162             // fMap() tried to look beyond the context limit
    163             // wait for more input
    164             offsets.start=csc.cpStart;
    165             return;
    166         }
    167 
    168         if(result>=0) {
    169             // replace the current code point with its full case mapping result
    170             // see UCASE_MAX_STRING_LENGTH
    171             if(result<=UCASE_MAX_STRING_LENGTH) {
    172                 // string s[result]
    173                 tmp.setTo(FALSE, s, result);
    174                 delta=result-U16_LENGTH(c);
    175             } else {
    176                 // single code point
    177                 tmp.setTo(result);
    178                 delta=tmp.length()-U16_LENGTH(c);
    179             }
    180             text.handleReplaceBetween(csc.cpStart, textPos, tmp);
    181             if(delta!=0) {
    182                 textPos+=delta;
    183                 csc.limit=offsets.contextLimit+=delta;
    184                 offsets.limit+=delta;
    185             }
    186         }
    187     }
    188     offsets.start=textPos;
    189 }
    190 
    191 U_NAMESPACE_END
    192 
    193 #endif /* #if !UCONFIG_NO_TRANSLITERATION */
    194