Home | History | Annotate | Download | only in coll
      1 /* GENERATED SOURCE. DO NOT MODIFY. */
      2 //  2016 and later: Unicode, Inc. and others.
      3 // License & terms of use: http://www.unicode.org/copyright.html#License
      4 /*
      5  *******************************************************************************
      6  * Copyright (C) 1996-2015, International Business Machines
      7  * Corporation and others.  All Rights Reserved.
      8  *******************************************************************************
      9  * CollationCompare.java, ported from collationcompare.h/.cpp
     10  *
     11  * C++ version created on: 2012feb14 with new and old collation code
     12  * created by: Markus W. Scherer
     13  */
     14 
     15 package android.icu.impl.coll;
     16 
     17 import android.icu.text.Collator;
     18 
     19 /**
     20  * @hide Only a subset of ICU is exposed in Android
     21  */
     22 public final class CollationCompare /* all static */ {
     23     public static int compareUpToQuaternary(CollationIterator left, CollationIterator right,
     24             CollationSettings settings) {
     25         int options = settings.options;
     26         long variableTop;
     27         if ((options & CollationSettings.ALTERNATE_MASK) == 0) {
     28             variableTop = 0;
     29         } else {
     30             // +1 so that we can use "<" and primary ignorables test out early.
     31             variableTop = settings.variableTop + 1;
     32         }
     33         boolean anyVariable = false;
     34 
     35         // Fetch CEs, compare primaries, store secondary & tertiary weights.
     36         for (;;) {
     37             // We fetch CEs until we get a non-ignorable primary or reach the end.
     38             long leftPrimary;
     39             do {
     40                 long ce = left.nextCE();
     41                 leftPrimary = ce >>> 32;
     42                 if (leftPrimary < variableTop && leftPrimary > Collation.MERGE_SEPARATOR_PRIMARY) {
     43                     // Variable CE, shift it to quaternary level.
     44                     // Ignore all following primary ignorables, and shift further variable CEs.
     45                     anyVariable = true;
     46                     do {
     47                         // Store only the primary of the variable CE.
     48                         left.setCurrentCE(ce & 0xffffffff00000000L);
     49                         for (;;) {
     50                             ce = left.nextCE();
     51                             leftPrimary = ce >>> 32;
     52                             if (leftPrimary == 0) {
     53                                 left.setCurrentCE(0);
     54                             } else {
     55                                 break;
     56                             }
     57                         }
     58                     } while (leftPrimary < variableTop && leftPrimary > Collation.MERGE_SEPARATOR_PRIMARY);
     59                 }
     60             } while (leftPrimary == 0);
     61 
     62             long rightPrimary;
     63             do {
     64                 long ce = right.nextCE();
     65                 rightPrimary = ce >>> 32;
     66                 if (rightPrimary < variableTop && rightPrimary > Collation.MERGE_SEPARATOR_PRIMARY) {
     67                     // Variable CE, shift it to quaternary level.
     68                     // Ignore all following primary ignorables, and shift further variable CEs.
     69                     anyVariable = true;
     70                     do {
     71                         // Store only the primary of the variable CE.
     72                         right.setCurrentCE(ce & 0xffffffff00000000L);
     73                         for (;;) {
     74                             ce = right.nextCE();
     75                             rightPrimary = ce >>> 32;
     76                             if (rightPrimary == 0) {
     77                                 right.setCurrentCE(0);
     78                             } else {
     79                                 break;
     80                             }
     81                         }
     82                     } while (rightPrimary < variableTop && rightPrimary > Collation.MERGE_SEPARATOR_PRIMARY);
     83                 }
     84             } while (rightPrimary == 0);
     85 
     86             if (leftPrimary != rightPrimary) {
     87                 // Return the primary difference, with script reordering.
     88                 if (settings.hasReordering()) {
     89                     leftPrimary = settings.reorder(leftPrimary);
     90                     rightPrimary = settings.reorder(rightPrimary);
     91                 }
     92                 return (leftPrimary < rightPrimary) ? Collation.LESS : Collation.GREATER;
     93             }
     94             if (leftPrimary == Collation.NO_CE_PRIMARY) {
     95                 break;
     96             }
     97         }
     98 
     99         // Compare the buffered secondary & tertiary weights.
    100         // We might skip the secondary level but continue with the case level
    101         // which is turned on separately.
    102         if (CollationSettings.getStrength(options) >= Collator.SECONDARY) {
    103             if ((options & CollationSettings.BACKWARD_SECONDARY) == 0) {
    104                 int leftIndex = 0;
    105                 int rightIndex = 0;
    106                 for (;;) {
    107                     int leftSecondary;
    108                     do {
    109                         leftSecondary = ((int) left.getCE(leftIndex++)) >>> 16;
    110                     } while (leftSecondary == 0);
    111 
    112                     int rightSecondary;
    113                     do {
    114                         rightSecondary = ((int) right.getCE(rightIndex++)) >>> 16;
    115                     } while (rightSecondary == 0);
    116 
    117                     if (leftSecondary != rightSecondary) {
    118                         return (leftSecondary < rightSecondary) ? Collation.LESS : Collation.GREATER;
    119                     }
    120                     if (leftSecondary == Collation.NO_CE_WEIGHT16) {
    121                         break;
    122                     }
    123                 }
    124             } else {
    125                 // The backwards secondary level compares secondary weights backwards
    126                 // within segments separated by the merge separator (U+FFFE, weight 02).
    127                 int leftStart = 0;
    128                 int rightStart = 0;
    129                 for (;;) {
    130                     // Find the merge separator or the NO_CE terminator.
    131                     long p;
    132                     int leftLimit = leftStart;
    133                     while ((p = left.getCE(leftLimit) >>> 32) > Collation.MERGE_SEPARATOR_PRIMARY
    134                             || p == 0) {
    135                         ++leftLimit;
    136                     }
    137                     int rightLimit = rightStart;
    138                     while ((p = right.getCE(rightLimit) >>> 32) > Collation.MERGE_SEPARATOR_PRIMARY
    139                             || p == 0) {
    140                         ++rightLimit;
    141                     }
    142 
    143                     // Compare the segments.
    144                     int leftIndex = leftLimit;
    145                     int rightIndex = rightLimit;
    146                     for (;;) {
    147                         int leftSecondary = 0;
    148                         while (leftSecondary == 0 && leftIndex > leftStart) {
    149                             leftSecondary = ((int) left.getCE(--leftIndex)) >>> 16;
    150                         }
    151 
    152                         int rightSecondary = 0;
    153                         while (rightSecondary == 0 && rightIndex > rightStart) {
    154                             rightSecondary = ((int) right.getCE(--rightIndex)) >>> 16;
    155                         }
    156 
    157                         if (leftSecondary != rightSecondary) {
    158                             return (leftSecondary < rightSecondary) ? Collation.LESS : Collation.GREATER;
    159                         }
    160                         if (leftSecondary == 0) {
    161                             break;
    162                         }
    163                     }
    164 
    165                     // Did we reach the end of either string?
    166                     // Both strings have the same number of merge separators,
    167                     // or else there would have been a primary-level difference.
    168                     assert (left.getCE(leftLimit) == right.getCE(rightLimit));
    169                     if (p == Collation.NO_CE_PRIMARY) {
    170                         break;
    171                     }
    172                     // Skip both merge separators and continue.
    173                     leftStart = leftLimit + 1;
    174                     rightStart = rightLimit + 1;
    175                 }
    176             }
    177         }
    178 
    179         if ((options & CollationSettings.CASE_LEVEL) != 0) {
    180             int strength = CollationSettings.getStrength(options);
    181             int leftIndex = 0;
    182             int rightIndex = 0;
    183             for (;;) {
    184                 int leftCase, leftLower32, rightCase;
    185                 if (strength == Collator.PRIMARY) {
    186                     // Primary+caseLevel: Ignore case level weights of primary ignorables.
    187                     // Otherwise we would get a-umlaut > a
    188                     // which is not desirable for accent-insensitive sorting.
    189                     // Check for (lower 32 bits) == 0 as well because variable CEs are stored
    190                     // with only primary weights.
    191                     long ce;
    192                     do {
    193                         ce = left.getCE(leftIndex++);
    194                         leftCase = (int) ce;
    195                     } while ((ce >>> 32) == 0 || leftCase == 0);
    196                     leftLower32 = leftCase;
    197                     leftCase &= 0xc000;
    198 
    199                     do {
    200                         ce = right.getCE(rightIndex++);
    201                         rightCase = (int) ce;
    202                     } while ((ce >>> 32) == 0 || rightCase == 0);
    203                     rightCase &= 0xc000;
    204                 } else {
    205                     // Secondary+caseLevel: By analogy with the above,
    206                     // ignore case level weights of secondary ignorables.
    207                     //
    208                     // Note: A tertiary CE has uppercase case bits (0.0.ut)
    209                     // to keep tertiary+caseFirst well-formed.
    210                     //
    211                     // Tertiary+caseLevel: Also ignore case level weights of secondary ignorables.
    212                     // Otherwise a tertiary CE's uppercase would be no greater than
    213                     // a primary/secondary CE's uppercase.
    214                     // (See UCA well-formedness condition 2.)
    215                     // We could construct a special case weight higher than uppercase,
    216                     // but it's simpler to always ignore case weights of secondary ignorables,
    217                     // turning 0.0.ut into 0.0.0.t.
    218                     // (See LDML Collation, Case Parameters.)
    219                     do {
    220                         leftCase = (int) left.getCE(leftIndex++);
    221                     } while ((leftCase & 0xffff0000) == 0);
    222                     leftLower32 = leftCase;
    223                     leftCase &= 0xc000;
    224 
    225                     do {
    226                         rightCase = (int) right.getCE(rightIndex++);
    227                     } while ((rightCase & 0xffff0000) == 0);
    228                     rightCase &= 0xc000;
    229                 }
    230 
    231                 // No need to handle NO_CE and MERGE_SEPARATOR specially:
    232                 // There is one case weight for each previous-level weight,
    233                 // so level length differences were handled there.
    234                 if (leftCase != rightCase) {
    235                     if ((options & CollationSettings.UPPER_FIRST) == 0) {
    236                         return (leftCase < rightCase) ? Collation.LESS : Collation.GREATER;
    237                     } else {
    238                         return (leftCase < rightCase) ? Collation.GREATER : Collation.LESS;
    239                     }
    240                 }
    241                 if ((leftLower32 >>> 16) == Collation.NO_CE_WEIGHT16) {
    242                     break;
    243                 }
    244             }
    245         }
    246         if (CollationSettings.getStrength(options) <= Collator.SECONDARY) {
    247             return Collation.EQUAL;
    248         }
    249 
    250         int tertiaryMask = CollationSettings.getTertiaryMask(options);
    251 
    252         int leftIndex = 0;
    253         int rightIndex = 0;
    254         int anyQuaternaries = 0;
    255         for (;;) {
    256             int leftLower32, leftTertiary;
    257             do {
    258                 leftLower32 = (int) left.getCE(leftIndex++);
    259                 anyQuaternaries |= leftLower32;
    260                 assert ((leftLower32 & Collation.ONLY_TERTIARY_MASK) != 0 || (leftLower32 & 0xc0c0) == 0);
    261                 leftTertiary = leftLower32 & tertiaryMask;
    262             } while (leftTertiary == 0);
    263 
    264             int rightLower32, rightTertiary;
    265             do {
    266                 rightLower32 = (int) right.getCE(rightIndex++);
    267                 anyQuaternaries |= rightLower32;
    268                 assert ((rightLower32 & Collation.ONLY_TERTIARY_MASK) != 0 || (rightLower32 & 0xc0c0) == 0);
    269                 rightTertiary = rightLower32 & tertiaryMask;
    270             } while (rightTertiary == 0);
    271 
    272             if (leftTertiary != rightTertiary) {
    273                 if (CollationSettings.sortsTertiaryUpperCaseFirst(options)) {
    274                     // Pass through NO_CE and keep real tertiary weights larger than that.
    275                     // Do not change the artificial uppercase weight of a tertiary CE (0.0.ut),
    276                     // to keep tertiary CEs well-formed.
    277                     // Their case+tertiary weights must be greater than those of
    278                     // primary and secondary CEs.
    279                     if (leftTertiary > Collation.NO_CE_WEIGHT16) {
    280                         if ((leftLower32 & 0xffff0000) != 0) {
    281                             leftTertiary ^= 0xc000;
    282                         } else {
    283                             leftTertiary += 0x4000;
    284                         }
    285                     }
    286                     if (rightTertiary > Collation.NO_CE_WEIGHT16) {
    287                         if ((rightLower32 & 0xffff0000) != 0) {
    288                             rightTertiary ^= 0xc000;
    289                         } else {
    290                             rightTertiary += 0x4000;
    291                         }
    292                     }
    293                 }
    294                 return (leftTertiary < rightTertiary) ? Collation.LESS : Collation.GREATER;
    295             }
    296             if (leftTertiary == Collation.NO_CE_WEIGHT16) {
    297                 break;
    298             }
    299         }
    300         if (CollationSettings.getStrength(options) <= Collator.TERTIARY) {
    301             return Collation.EQUAL;
    302         }
    303 
    304         if (!anyVariable && (anyQuaternaries & 0xc0) == 0) {
    305             // If there are no "variable" CEs and no non-zero quaternary weights,
    306             // then there are no quaternary differences.
    307             return Collation.EQUAL;
    308         }
    309 
    310         leftIndex = 0;
    311         rightIndex = 0;
    312         for (;;) {
    313             long leftQuaternary;
    314             do {
    315                 long ce = left.getCE(leftIndex++);
    316                 leftQuaternary = ce & 0xffff;
    317                 if (leftQuaternary <= Collation.NO_CE_WEIGHT16) {
    318                     // Variable primary or completely ignorable or NO_CE.
    319                     leftQuaternary = ce >>> 32;
    320                 } else {
    321                     // Regular CE, not tertiary ignorable.
    322                     // Preserve the quaternary weight in bits 7..6.
    323                     leftQuaternary |= 0xffffff3fL;
    324                 }
    325             } while (leftQuaternary == 0);
    326 
    327             long rightQuaternary;
    328             do {
    329                 long ce = right.getCE(rightIndex++);
    330                 rightQuaternary = ce & 0xffff;
    331                 if (rightQuaternary <= Collation.NO_CE_WEIGHT16) {
    332                     // Variable primary or completely ignorable or NO_CE.
    333                     rightQuaternary = ce >>> 32;
    334                 } else {
    335                     // Regular CE, not tertiary ignorable.
    336                     // Preserve the quaternary weight in bits 7..6.
    337                     rightQuaternary |= 0xffffff3fL;
    338                 }
    339             } while (rightQuaternary == 0);
    340 
    341             if (leftQuaternary != rightQuaternary) {
    342                 // Return the difference, with script reordering.
    343                 if (settings.hasReordering()) {
    344                     leftQuaternary = settings.reorder(leftQuaternary);
    345                     rightQuaternary = settings.reorder(rightQuaternary);
    346                 }
    347                 return (leftQuaternary < rightQuaternary) ? Collation.LESS : Collation.GREATER;
    348             }
    349             if (leftQuaternary == Collation.NO_CE_PRIMARY) {
    350                 break;
    351             }
    352         }
    353         return Collation.EQUAL;
    354     }
    355 }
    356