Home | History | Annotate | Download | only in coll
      1 /* GENERATED SOURCE. DO NOT MODIFY. */
      2 /*
      3  *******************************************************************************
      4  * Copyright (C) 1996-2015, International Business Machines
      5  * Corporation and others.  All Rights Reserved.
      6  *******************************************************************************
      7  * CollationCompare.java, ported from collationcompare.h/.cpp
      8  *
      9  * C++ version created on: 2012feb14 with new and old collation code
     10  * created by: Markus W. Scherer
     11  */
     12 
     13 package android.icu.impl.coll;
     14 
     15 import android.icu.text.Collator;
     16 
     17 /**
     18  * @hide Only a subset of ICU is exposed in Android
     19  */
     20 public final class CollationCompare /* all static */ {
     21     public static int compareUpToQuaternary(CollationIterator left, CollationIterator right,
     22             CollationSettings settings) {
     23         int options = settings.options;
     24         long variableTop;
     25         if ((options & CollationSettings.ALTERNATE_MASK) == 0) {
     26             variableTop = 0;
     27         } else {
     28             // +1 so that we can use "<" and primary ignorables test out early.
     29             variableTop = settings.variableTop + 1;
     30         }
     31         boolean anyVariable = false;
     32 
     33         // Fetch CEs, compare primaries, store secondary & tertiary weights.
     34         for (;;) {
     35             // We fetch CEs until we get a non-ignorable primary or reach the end.
     36             long leftPrimary;
     37             do {
     38                 long ce = left.nextCE();
     39                 leftPrimary = ce >>> 32;
     40                 if (leftPrimary < variableTop && leftPrimary > Collation.MERGE_SEPARATOR_PRIMARY) {
     41                     // Variable CE, shift it to quaternary level.
     42                     // Ignore all following primary ignorables, and shift further variable CEs.
     43                     anyVariable = true;
     44                     do {
     45                         // Store only the primary of the variable CE.
     46                         left.setCurrentCE(ce & 0xffffffff00000000L);
     47                         for (;;) {
     48                             ce = left.nextCE();
     49                             leftPrimary = ce >>> 32;
     50                             if (leftPrimary == 0) {
     51                                 left.setCurrentCE(0);
     52                             } else {
     53                                 break;
     54                             }
     55                         }
     56                     } while (leftPrimary < variableTop && leftPrimary > Collation.MERGE_SEPARATOR_PRIMARY);
     57                 }
     58             } while (leftPrimary == 0);
     59 
     60             long rightPrimary;
     61             do {
     62                 long ce = right.nextCE();
     63                 rightPrimary = ce >>> 32;
     64                 if (rightPrimary < variableTop && rightPrimary > Collation.MERGE_SEPARATOR_PRIMARY) {
     65                     // Variable CE, shift it to quaternary level.
     66                     // Ignore all following primary ignorables, and shift further variable CEs.
     67                     anyVariable = true;
     68                     do {
     69                         // Store only the primary of the variable CE.
     70                         right.setCurrentCE(ce & 0xffffffff00000000L);
     71                         for (;;) {
     72                             ce = right.nextCE();
     73                             rightPrimary = ce >>> 32;
     74                             if (rightPrimary == 0) {
     75                                 right.setCurrentCE(0);
     76                             } else {
     77                                 break;
     78                             }
     79                         }
     80                     } while (rightPrimary < variableTop && rightPrimary > Collation.MERGE_SEPARATOR_PRIMARY);
     81                 }
     82             } while (rightPrimary == 0);
     83 
     84             if (leftPrimary != rightPrimary) {
     85                 // Return the primary difference, with script reordering.
     86                 if (settings.hasReordering()) {
     87                     leftPrimary = settings.reorder(leftPrimary);
     88                     rightPrimary = settings.reorder(rightPrimary);
     89                 }
     90                 return (leftPrimary < rightPrimary) ? Collation.LESS : Collation.GREATER;
     91             }
     92             if (leftPrimary == Collation.NO_CE_PRIMARY) {
     93                 break;
     94             }
     95         }
     96 
     97         // Compare the buffered secondary & tertiary weights.
     98         // We might skip the secondary level but continue with the case level
     99         // which is turned on separately.
    100         if (CollationSettings.getStrength(options) >= Collator.SECONDARY) {
    101             if ((options & CollationSettings.BACKWARD_SECONDARY) == 0) {
    102                 int leftIndex = 0;
    103                 int rightIndex = 0;
    104                 for (;;) {
    105                     int leftSecondary;
    106                     do {
    107                         leftSecondary = ((int) left.getCE(leftIndex++)) >>> 16;
    108                     } while (leftSecondary == 0);
    109 
    110                     int rightSecondary;
    111                     do {
    112                         rightSecondary = ((int) right.getCE(rightIndex++)) >>> 16;
    113                     } while (rightSecondary == 0);
    114 
    115                     if (leftSecondary != rightSecondary) {
    116                         return (leftSecondary < rightSecondary) ? Collation.LESS : Collation.GREATER;
    117                     }
    118                     if (leftSecondary == Collation.NO_CE_WEIGHT16) {
    119                         break;
    120                     }
    121                 }
    122             } else {
    123                 // The backwards secondary level compares secondary weights backwards
    124                 // within segments separated by the merge separator (U+FFFE, weight 02).
    125                 int leftStart = 0;
    126                 int rightStart = 0;
    127                 for (;;) {
    128                     // Find the merge separator or the NO_CE terminator.
    129                     long p;
    130                     int leftLimit = leftStart;
    131                     while ((p = left.getCE(leftLimit) >>> 32) > Collation.MERGE_SEPARATOR_PRIMARY
    132                             || p == 0) {
    133                         ++leftLimit;
    134                     }
    135                     int rightLimit = rightStart;
    136                     while ((p = right.getCE(rightLimit) >>> 32) > Collation.MERGE_SEPARATOR_PRIMARY
    137                             || p == 0) {
    138                         ++rightLimit;
    139                     }
    140 
    141                     // Compare the segments.
    142                     int leftIndex = leftLimit;
    143                     int rightIndex = rightLimit;
    144                     for (;;) {
    145                         int leftSecondary = 0;
    146                         while (leftSecondary == 0 && leftIndex > leftStart) {
    147                             leftSecondary = ((int) left.getCE(--leftIndex)) >>> 16;
    148                         }
    149 
    150                         int rightSecondary = 0;
    151                         while (rightSecondary == 0 && rightIndex > rightStart) {
    152                             rightSecondary = ((int) right.getCE(--rightIndex)) >>> 16;
    153                         }
    154 
    155                         if (leftSecondary != rightSecondary) {
    156                             return (leftSecondary < rightSecondary) ? Collation.LESS : Collation.GREATER;
    157                         }
    158                         if (leftSecondary == 0) {
    159                             break;
    160                         }
    161                     }
    162 
    163                     // Did we reach the end of either string?
    164                     // Both strings have the same number of merge separators,
    165                     // or else there would have been a primary-level difference.
    166                     assert (left.getCE(leftLimit) == right.getCE(rightLimit));
    167                     if (p == Collation.NO_CE_PRIMARY) {
    168                         break;
    169                     }
    170                     // Skip both merge separators and continue.
    171                     leftStart = leftLimit + 1;
    172                     rightStart = rightLimit + 1;
    173                 }
    174             }
    175         }
    176 
    177         if ((options & CollationSettings.CASE_LEVEL) != 0) {
    178             int strength = CollationSettings.getStrength(options);
    179             int leftIndex = 0;
    180             int rightIndex = 0;
    181             for (;;) {
    182                 int leftCase, leftLower32, rightCase;
    183                 if (strength == Collator.PRIMARY) {
    184                     // Primary+caseLevel: Ignore case level weights of primary ignorables.
    185                     // Otherwise we would get a-umlaut > a
    186                     // which is not desirable for accent-insensitive sorting.
    187                     // Check for (lower 32 bits) == 0 as well because variable CEs are stored
    188                     // with only primary weights.
    189                     long ce;
    190                     do {
    191                         ce = left.getCE(leftIndex++);
    192                         leftCase = (int) ce;
    193                     } while ((ce >>> 32) == 0 || leftCase == 0);
    194                     leftLower32 = leftCase;
    195                     leftCase &= 0xc000;
    196 
    197                     do {
    198                         ce = right.getCE(rightIndex++);
    199                         rightCase = (int) ce;
    200                     } while ((ce >>> 32) == 0 || rightCase == 0);
    201                     rightCase &= 0xc000;
    202                 } else {
    203                     // Secondary+caseLevel: By analogy with the above,
    204                     // ignore case level weights of secondary ignorables.
    205                     //
    206                     // Note: A tertiary CE has uppercase case bits (0.0.ut)
    207                     // to keep tertiary+caseFirst well-formed.
    208                     //
    209                     // Tertiary+caseLevel: Also ignore case level weights of secondary ignorables.
    210                     // Otherwise a tertiary CE's uppercase would be no greater than
    211                     // a primary/secondary CE's uppercase.
    212                     // (See UCA well-formedness condition 2.)
    213                     // We could construct a special case weight higher than uppercase,
    214                     // but it's simpler to always ignore case weights of secondary ignorables,
    215                     // turning 0.0.ut into 0.0.0.t.
    216                     // (See LDML Collation, Case Parameters.)
    217                     do {
    218                         leftCase = (int) left.getCE(leftIndex++);
    219                     } while ((leftCase & 0xffff0000) == 0);
    220                     leftLower32 = leftCase;
    221                     leftCase &= 0xc000;
    222 
    223                     do {
    224                         rightCase = (int) right.getCE(rightIndex++);
    225                     } while ((rightCase & 0xffff0000) == 0);
    226                     rightCase &= 0xc000;
    227                 }
    228 
    229                 // No need to handle NO_CE and MERGE_SEPARATOR specially:
    230                 // There is one case weight for each previous-level weight,
    231                 // so level length differences were handled there.
    232                 if (leftCase != rightCase) {
    233                     if ((options & CollationSettings.UPPER_FIRST) == 0) {
    234                         return (leftCase < rightCase) ? Collation.LESS : Collation.GREATER;
    235                     } else {
    236                         return (leftCase < rightCase) ? Collation.GREATER : Collation.LESS;
    237                     }
    238                 }
    239                 if ((leftLower32 >>> 16) == Collation.NO_CE_WEIGHT16) {
    240                     break;
    241                 }
    242             }
    243         }
    244         if (CollationSettings.getStrength(options) <= Collator.SECONDARY) {
    245             return Collation.EQUAL;
    246         }
    247 
    248         int tertiaryMask = CollationSettings.getTertiaryMask(options);
    249 
    250         int leftIndex = 0;
    251         int rightIndex = 0;
    252         int anyQuaternaries = 0;
    253         for (;;) {
    254             int leftLower32, leftTertiary;
    255             do {
    256                 leftLower32 = (int) left.getCE(leftIndex++);
    257                 anyQuaternaries |= leftLower32;
    258                 assert ((leftLower32 & Collation.ONLY_TERTIARY_MASK) != 0 || (leftLower32 & 0xc0c0) == 0);
    259                 leftTertiary = leftLower32 & tertiaryMask;
    260             } while (leftTertiary == 0);
    261 
    262             int rightLower32, rightTertiary;
    263             do {
    264                 rightLower32 = (int) right.getCE(rightIndex++);
    265                 anyQuaternaries |= rightLower32;
    266                 assert ((rightLower32 & Collation.ONLY_TERTIARY_MASK) != 0 || (rightLower32 & 0xc0c0) == 0);
    267                 rightTertiary = rightLower32 & tertiaryMask;
    268             } while (rightTertiary == 0);
    269 
    270             if (leftTertiary != rightTertiary) {
    271                 if (CollationSettings.sortsTertiaryUpperCaseFirst(options)) {
    272                     // Pass through NO_CE and keep real tertiary weights larger than that.
    273                     // Do not change the artificial uppercase weight of a tertiary CE (0.0.ut),
    274                     // to keep tertiary CEs well-formed.
    275                     // Their case+tertiary weights must be greater than those of
    276                     // primary and secondary CEs.
    277                     if (leftTertiary > Collation.NO_CE_WEIGHT16) {
    278                         if ((leftLower32 & 0xffff0000) != 0) {
    279                             leftTertiary ^= 0xc000;
    280                         } else {
    281                             leftTertiary += 0x4000;
    282                         }
    283                     }
    284                     if (rightTertiary > Collation.NO_CE_WEIGHT16) {
    285                         if ((rightLower32 & 0xffff0000) != 0) {
    286                             rightTertiary ^= 0xc000;
    287                         } else {
    288                             rightTertiary += 0x4000;
    289                         }
    290                     }
    291                 }
    292                 return (leftTertiary < rightTertiary) ? Collation.LESS : Collation.GREATER;
    293             }
    294             if (leftTertiary == Collation.NO_CE_WEIGHT16) {
    295                 break;
    296             }
    297         }
    298         if (CollationSettings.getStrength(options) <= Collator.TERTIARY) {
    299             return Collation.EQUAL;
    300         }
    301 
    302         if (!anyVariable && (anyQuaternaries & 0xc0) == 0) {
    303             // If there are no "variable" CEs and no non-zero quaternary weights,
    304             // then there are no quaternary differences.
    305             return Collation.EQUAL;
    306         }
    307 
    308         leftIndex = 0;
    309         rightIndex = 0;
    310         for (;;) {
    311             long leftQuaternary;
    312             do {
    313                 long ce = left.getCE(leftIndex++);
    314                 leftQuaternary = ce & 0xffff;
    315                 if (leftQuaternary <= Collation.NO_CE_WEIGHT16) {
    316                     // Variable primary or completely ignorable or NO_CE.
    317                     leftQuaternary = ce >>> 32;
    318                 } else {
    319                     // Regular CE, not tertiary ignorable.
    320                     // Preserve the quaternary weight in bits 7..6.
    321                     leftQuaternary |= 0xffffff3fL;
    322                 }
    323             } while (leftQuaternary == 0);
    324 
    325             long rightQuaternary;
    326             do {
    327                 long ce = right.getCE(rightIndex++);
    328                 rightQuaternary = ce & 0xffff;
    329                 if (rightQuaternary <= Collation.NO_CE_WEIGHT16) {
    330                     // Variable primary or completely ignorable or NO_CE.
    331                     rightQuaternary = ce >>> 32;
    332                 } else {
    333                     // Regular CE, not tertiary ignorable.
    334                     // Preserve the quaternary weight in bits 7..6.
    335                     rightQuaternary |= 0xffffff3fL;
    336                 }
    337             } while (rightQuaternary == 0);
    338 
    339             if (leftQuaternary != rightQuaternary) {
    340                 // Return the difference, with script reordering.
    341                 if (settings.hasReordering()) {
    342                     leftQuaternary = settings.reorder(leftQuaternary);
    343                     rightQuaternary = settings.reorder(rightQuaternary);
    344                 }
    345                 return (leftQuaternary < rightQuaternary) ? Collation.LESS : Collation.GREATER;
    346             }
    347             if (leftQuaternary == Collation.NO_CE_PRIMARY) {
    348                 break;
    349             }
    350         }
    351         return Collation.EQUAL;
    352     }
    353 }
    354