Home | History | Annotate | Download | only in i18n
      1 /*
      2 **********************************************************************
      3 *   Copyright (C) 2001-2007, International Business Machines
      4 *   Corporation and others.  All Rights Reserved.
      5 **********************************************************************
      6 *   Date        Name        Description
      7 *   05/24/01    aliu        Creation.
      8 **********************************************************************
      9 */
     10 
     11 #include "unicode/utypes.h"
     12 
     13 #if !UCONFIG_NO_TRANSLITERATION
     14 
     15 #include "unicode/uchar.h"
     16 #include "unicode/uniset.h"
     17 #include "unicode/ustring.h"
     18 #include "titletrn.h"
     19 #include "umutex.h"
     20 #include "ucase.h"
     21 #include "cpputils.h"
     22 
     23 U_NAMESPACE_BEGIN
     24 
     25 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(TitlecaseTransliterator)
     26 
     27 TitlecaseTransliterator::TitlecaseTransliterator() :
     28     CaseMapTransliterator(UNICODE_STRING("Any-Title", 9), NULL)
     29 {
     30     // Need to look back 2 characters in the case of "can't"
     31     setMaximumContextLength(2);
     32 }
     33 
     34 /**
     35  * Destructor.
     36  */
     37 TitlecaseTransliterator::~TitlecaseTransliterator() {
     38 }
     39 
     40 /**
     41  * Copy constructor.
     42  */
     43 TitlecaseTransliterator::TitlecaseTransliterator(const TitlecaseTransliterator& o) :
     44     CaseMapTransliterator(o)
     45 {
     46 }
     47 
     48 /**
     49  * Assignment operator.
     50  */
     51 /*TitlecaseTransliterator& TitlecaseTransliterator::operator=(
     52                              const TitlecaseTransliterator& o) {
     53     CaseMapTransliterator::operator=(o);
     54     return *this;
     55 }*/
     56 
     57 /**
     58  * Transliterator API.
     59  */
     60 Transliterator* TitlecaseTransliterator::clone(void) const {
     61     return new TitlecaseTransliterator(*this);
     62 }
     63 
     64 /**
     65  * Implements {@link Transliterator#handleTransliterate}.
     66  */
     67 void TitlecaseTransliterator::handleTransliterate(
     68                                   Replaceable& text, UTransPosition& offsets,
     69                                   UBool isIncremental) const
     70 {
     71     // TODO reimplement, see ustrcase.c
     72     // using a real word break iterator
     73     //   instead of just looking for a transition between cased and uncased characters
     74     // call CaseMapTransliterator::handleTransliterate() for lowercasing? (set fMap)
     75     // needs to take isIncremental into account because case mappings are context-sensitive
     76     //   also detect when lowercasing function did not finish because of context
     77 
     78     if (offsets.start >= offsets.limit) {
     79         return;
     80     }
     81 
     82     // case type: >0 cased (UCASE_LOWER etc.)  ==0 uncased  <0 case-ignorable
     83     int32_t type;
     84 
     85     // Our mode; we are either converting letter toTitle or
     86     // toLower.
     87     UBool doTitle = TRUE;
     88 
     89     // Determine if there is a preceding context of cased case-ignorable*,
     90     // in which case we want to start in toLower mode.  If the
     91     // prior context is anything else (including empty) then start
     92     // in toTitle mode.
     93     UChar32 c;
     94     int32_t start;
     95     for (start = offsets.start - 1; start >= offsets.contextStart; start -= U16_LENGTH(c)) {
     96         c = text.char32At(start);
     97         type=ucase_getTypeOrIgnorable(fCsp, c);
     98         if(type>0) { // cased
     99             doTitle=FALSE;
    100             break;
    101         } else if(type==0) { // uncased but not ignorable
    102             break;
    103         }
    104         // else (type<0) case-ignorable: continue
    105     }
    106 
    107     // Convert things after a cased character toLower; things
    108     // after an uncased, non-case-ignorable character toTitle.  Case-ignorable
    109     // characters are copied directly and do not change the mode.
    110     UCaseContext csc;
    111     uprv_memset(&csc, 0, sizeof(csc));
    112     csc.p = &text;
    113     csc.start = offsets.contextStart;
    114     csc.limit = offsets.contextLimit;
    115 
    116     UnicodeString tmp;
    117     const UChar *s;
    118     int32_t textPos, delta, result, locCache=0;
    119 
    120     for(textPos=offsets.start; textPos<offsets.limit;) {
    121         csc.cpStart=textPos;
    122         c=text.char32At(textPos);
    123         csc.cpLimit=textPos+=U16_LENGTH(c);
    124 
    125         type=ucase_getTypeOrIgnorable(fCsp, c);
    126         if(type>=0) { // not case-ignorable
    127             if(doTitle) {
    128                 result=ucase_toFullTitle(fCsp, c, utrans_rep_caseContextIterator, &csc, &s, "", &locCache);
    129             } else {
    130                 result=ucase_toFullLower(fCsp, c, utrans_rep_caseContextIterator, &csc, &s, "", &locCache);
    131             }
    132             doTitle = (UBool)(type==0); // doTitle=isUncased
    133 
    134             if(csc.b1 && isIncremental) {
    135                 // fMap() tried to look beyond the context limit
    136                 // wait for more input
    137                 offsets.start=csc.cpStart;
    138                 return;
    139             }
    140 
    141             if(result>=0) {
    142                 // replace the current code point with its full case mapping result
    143                 // see UCASE_MAX_STRING_LENGTH
    144                 if(result<=UCASE_MAX_STRING_LENGTH) {
    145                     // string s[result]
    146                     tmp.setTo(FALSE, s, result);
    147                     delta=result-U16_LENGTH(c);
    148                 } else {
    149                     // single code point
    150                     tmp.setTo(result);
    151                     delta=tmp.length()-U16_LENGTH(c);
    152                 }
    153                 text.handleReplaceBetween(csc.cpStart, textPos, tmp);
    154                 if(delta!=0) {
    155                     textPos+=delta;
    156                     csc.limit=offsets.contextLimit+=delta;
    157                     offsets.limit+=delta;
    158                 }
    159             }
    160         }
    161     }
    162     offsets.start=textPos;
    163 }
    164 
    165 U_NAMESPACE_END
    166 
    167 #endif /* #if !UCONFIG_NO_TRANSLITERATION */
    168