Home | History | Annotate | Download | only in impl
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html#License
      3 /*
      4  *******************************************************************************
      5  * Copyright (C) 2013, International Business Machines Corporation and         *
      6  * others. All Rights Reserved.                                                *
      7  *******************************************************************************
      8  */
      9 package com.ibm.icu.impl;
     10 
     11 import java.text.CharacterIterator;
     12 
     13 import com.ibm.icu.text.UTF16;
     14 
     15 public final class CharacterIteration {
     16     // disallow instantiation
     17     private CharacterIteration() { }
     18 
     19     // 32 bit Char value returned from when an iterator has run out of range.
     20     //     Positive value so fast case (not end, not surrogate) can be checked
     21     //     with a single test.
     22     public static final int DONE32 = 0x7fffffff;
     23 
     24     /**
     25      * Move the iterator forward to the next code point, and return that code point,
     26      *   leaving the iterator positioned at char returned.
     27      *   For Supplementary chars, the iterator is left positioned at the lead surrogate.
     28      * @param ci  The character iterator
     29      * @return    The next code point.
     30      */
     31     public static int next32(CharacterIterator ci) {
     32         // If the current position is at a surrogate pair, move to the trail surrogate
     33         //   which leaves it in position for underlying iterator's next() to work.
     34         int c = ci.current();
     35         if (c >= UTF16.LEAD_SURROGATE_MIN_VALUE && c<=UTF16.LEAD_SURROGATE_MAX_VALUE) {
     36             c = ci.next();
     37             if (c<UTF16.TRAIL_SURROGATE_MIN_VALUE || c>UTF16.TRAIL_SURROGATE_MAX_VALUE) {
     38                 ci.previous();
     39             }
     40         }
     41 
     42         // For BMP chars, this next() is the real deal.
     43         c = ci.next();
     44 
     45         // If we might have a lead surrogate, we need to peak ahead to get the trail
     46         //  even though we don't want to really be positioned there.
     47         if (c >= UTF16.LEAD_SURROGATE_MIN_VALUE) {
     48             c = nextTrail32(ci, c);
     49         }
     50 
     51         if (c >= UTF16.SUPPLEMENTARY_MIN_VALUE && c != DONE32) {
     52             // We got a supplementary char.  Back the iterator up to the postion
     53             // of the lead surrogate.
     54             ci.previous();
     55         }
     56         return c;
     57    }
     58 
     59 
     60     // Out-of-line portion of the in-line Next32 code.
     61     // The call site does an initial ci.next() and calls this function
     62     //    if the 16 bit value it gets is >= LEAD_SURROGATE_MIN_VALUE.
     63     // NOTE:  we leave the underlying char iterator positioned in the
     64     //        middle of a surrogate pair.  ci.next() will work correctly
     65     //        from there, but the ci.getIndex() will be wrong, and needs
     66     //        adjustment.
     67     public static int nextTrail32(CharacterIterator ci, int lead) {
     68         if (lead == CharacterIterator.DONE && ci.getIndex() >= ci.getEndIndex()) {
     69             return DONE32;
     70         }
     71         int retVal = lead;
     72         if (lead <= UTF16.LEAD_SURROGATE_MAX_VALUE) {
     73             char  cTrail = ci.next();
     74             if (UTF16.isTrailSurrogate(cTrail)) {
     75                 retVal = ((lead  - UTF16.LEAD_SURROGATE_MIN_VALUE) << 10) +
     76                             (cTrail - UTF16.TRAIL_SURROGATE_MIN_VALUE) +
     77                             UTF16.SUPPLEMENTARY_MIN_VALUE;
     78             } else {
     79                 ci.previous();
     80             }
     81         }
     82         return retVal;
     83     }
     84 
     85     public static int previous32(CharacterIterator ci) {
     86         if (ci.getIndex() <= ci.getBeginIndex()) {
     87             return DONE32;
     88         }
     89         char trail = ci.previous();
     90         int retVal = trail;
     91         if (UTF16.isTrailSurrogate(trail) && ci.getIndex()>ci.getBeginIndex()) {
     92             char lead = ci.previous();
     93             if (UTF16.isLeadSurrogate(lead)) {
     94                 retVal = (((int)lead  - UTF16.LEAD_SURROGATE_MIN_VALUE) << 10) +
     95                           ((int)trail - UTF16.TRAIL_SURROGATE_MIN_VALUE) +
     96                           UTF16.SUPPLEMENTARY_MIN_VALUE;
     97             } else {
     98                 ci.next();
     99             }
    100         }
    101         return retVal;
    102     }
    103 
    104     public static int current32(CharacterIterator ci) {
    105         char  lead   = ci.current();
    106         int   retVal = lead;
    107         if (retVal < UTF16.LEAD_SURROGATE_MIN_VALUE) {
    108             return retVal;
    109         }
    110         if (UTF16.isLeadSurrogate(lead)) {
    111             int  trail = (int)ci.next();
    112             ci.previous();
    113             if (UTF16.isTrailSurrogate((char)trail)) {
    114                 retVal = ((lead  - UTF16.LEAD_SURROGATE_MIN_VALUE) << 10) +
    115                          (trail - UTF16.TRAIL_SURROGATE_MIN_VALUE) +
    116                          UTF16.SUPPLEMENTARY_MIN_VALUE;
    117             }
    118          } else {
    119             if (lead == CharacterIterator.DONE) {
    120                 if (ci.getIndex() >= ci.getEndIndex())   {
    121                     retVal = DONE32;
    122                 }
    123             }
    124          }
    125         return retVal;
    126     }
    127 }
    128