Home | History | Annotate | Download | only in locale
      1 /*
      2  * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
      3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
      4  *
      5  * This code is free software; you can redistribute it and/or modify it
      6  * under the terms of the GNU General Public License version 2 only, as
      7  * published by the Free Software Foundation.  Oracle designates this
      8  * particular file as subject to the "Classpath" exception as provided
      9  * by Oracle in the LICENSE file that accompanied this code.
     10  *
     11  * This code is distributed in the hope that it will be useful, but WITHOUT
     12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
     13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     14  * version 2 for more details (a copy is included in the LICENSE file that
     15  * accompanied this code).
     16  *
     17  * You should have received a copy of the GNU General Public License version
     18  * 2 along with this work; if not, write to the Free Software Foundation,
     19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
     20  *
     21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
     22  * or visit www.oracle.com if you need additional information or have any
     23  * questions.
     24  */
     25 
     26 package sun.util.locale;
     27 
     28 import java.util.ArrayList;
     29 import java.util.Collection;
     30 import java.util.HashMap;
     31 import java.util.Iterator;
     32 import java.util.LinkedHashMap;
     33 import java.util.LinkedList;
     34 import java.util.List;
     35 import java.util.Locale;
     36 import java.util.Locale.*;
     37 import static java.util.Locale.FilteringMode.*;
     38 import static java.util.Locale.LanguageRange.*;
     39 import java.util.Map;
     40 import java.util.Set;
     41 
     42 /**
     43  * Implementation for BCP47 Locale matching
     44  *
     45  */
     46 public final class LocaleMatcher {
     47 
     48     public static List<Locale> filter(List<LanguageRange> priorityList,
     49                                       Collection<Locale> locales,
     50                                       FilteringMode mode) {
     51         if (priorityList.isEmpty() || locales.isEmpty()) {
     52             return new ArrayList<>(); // need to return a empty mutable List
     53         }
     54 
     55         // Create a list of language tags to be matched.
     56         List<String> tags = new ArrayList<>();
     57         for (Locale locale : locales) {
     58             tags.add(locale.toLanguageTag());
     59         }
     60 
     61         // Filter language tags.
     62         List<String> filteredTags = filterTags(priorityList, tags, mode);
     63 
     64         // Create a list of matching locales.
     65         List<Locale> filteredLocales = new ArrayList<>(filteredTags.size());
     66         for (String tag : filteredTags) {
     67               filteredLocales.add(Locale.forLanguageTag(tag));
     68         }
     69 
     70         return filteredLocales;
     71     }
     72 
     73     public static List<String> filterTags(List<LanguageRange> priorityList,
     74                                           Collection<String> tags,
     75                                           FilteringMode mode) {
     76         if (priorityList.isEmpty() || tags.isEmpty()) {
     77             return new ArrayList<>(); // need to return a empty mutable List
     78         }
     79 
     80         ArrayList<LanguageRange> list;
     81         if (mode == EXTENDED_FILTERING) {
     82             return filterExtended(priorityList, tags);
     83         } else {
     84             list = new ArrayList<>();
     85             for (LanguageRange lr : priorityList) {
     86                 String range = lr.getRange();
     87                 if (range.startsWith("*-")
     88                     || range.indexOf("-*") != -1) { // Extended range
     89                     if (mode == AUTOSELECT_FILTERING) {
     90                         return filterExtended(priorityList, tags);
     91                     } else if (mode == MAP_EXTENDED_RANGES) {
     92                         if (range.charAt(0) == '*') {
     93                             range = "*";
     94                         } else {
     95                             range = range.replaceAll("-[*]", "");
     96                         }
     97                         list.add(new LanguageRange(range, lr.getWeight()));
     98                     } else if (mode == REJECT_EXTENDED_RANGES) {
     99                         throw new IllegalArgumentException("An extended range \""
    100                                       + range
    101                                       + "\" found in REJECT_EXTENDED_RANGES mode.");
    102                     }
    103                 } else { // Basic range
    104                     list.add(lr);
    105                 }
    106             }
    107 
    108             return filterBasic(list, tags);
    109         }
    110     }
    111 
    112     private static List<String> filterBasic(List<LanguageRange> priorityList,
    113                                             Collection<String> tags) {
    114         List<String> list = new ArrayList<>();
    115         for (LanguageRange lr : priorityList) {
    116             String range = lr.getRange();
    117             if (range.equals("*")) {
    118                 return new ArrayList<String>(tags);
    119             } else {
    120                 for (String tag : tags) {
    121                     tag = tag.toLowerCase();
    122                     if (tag.startsWith(range)) {
    123                         int len = range.length();
    124                         if ((tag.length() == len || tag.charAt(len) == '-')
    125                             && !list.contains(tag)) {
    126                             list.add(tag);
    127                         }
    128                     }
    129                 }
    130             }
    131         }
    132 
    133         return list;
    134     }
    135 
    136     private static List<String> filterExtended(List<LanguageRange> priorityList,
    137                                                Collection<String> tags) {
    138         List<String> list = new ArrayList<>();
    139         for (LanguageRange lr : priorityList) {
    140             String range = lr.getRange();
    141             if (range.equals("*")) {
    142                 return new ArrayList<String>(tags);
    143             }
    144             String[] rangeSubtags = range.split("-");
    145             for (String tag : tags) {
    146                 tag = tag.toLowerCase();
    147                 String[] tagSubtags = tag.split("-");
    148                 if (!rangeSubtags[0].equals(tagSubtags[0])
    149                     && !rangeSubtags[0].equals("*")) {
    150                     continue;
    151                 }
    152 
    153                 int rangeIndex = 1;
    154                 int tagIndex = 1;
    155 
    156                 while (rangeIndex < rangeSubtags.length
    157                        && tagIndex < tagSubtags.length) {
    158                    if (rangeSubtags[rangeIndex].equals("*")) {
    159                        rangeIndex++;
    160                    } else if (rangeSubtags[rangeIndex].equals(tagSubtags[tagIndex])) {
    161                        rangeIndex++;
    162                        tagIndex++;
    163                    } else if (tagSubtags[tagIndex].length() == 1
    164                               && !tagSubtags[tagIndex].equals("*")) {
    165                        break;
    166                    } else {
    167                        tagIndex++;
    168                    }
    169                }
    170 
    171                if (rangeSubtags.length == rangeIndex && !list.contains(tag)) {
    172                    list.add(tag);
    173                }
    174             }
    175         }
    176 
    177         return list;
    178     }
    179 
    180     public static Locale lookup(List<LanguageRange> priorityList,
    181                                 Collection<Locale> locales) {
    182         if (priorityList.isEmpty() || locales.isEmpty()) {
    183             return null;
    184         }
    185 
    186         // Create a list of language tags to be matched.
    187         List<String> tags = new ArrayList<>();
    188         for (Locale locale : locales) {
    189             tags.add(locale.toLanguageTag());
    190         }
    191 
    192         // Look up a language tags.
    193         String lookedUpTag = lookupTag(priorityList, tags);
    194 
    195         if (lookedUpTag == null) {
    196             return null;
    197         } else {
    198             return Locale.forLanguageTag(lookedUpTag);
    199         }
    200     }
    201 
    202     public static String lookupTag(List<LanguageRange> priorityList,
    203                                    Collection<String> tags) {
    204         if (priorityList.isEmpty() || tags.isEmpty()) {
    205             return null;
    206         }
    207 
    208         for (LanguageRange lr : priorityList) {
    209             String range = lr.getRange();
    210 
    211             // Special language range ("*") is ignored in lookup.
    212             if (range.equals("*")) {
    213                 continue;
    214             }
    215             // Android-changed: backport OpenJDK 9 fix for JDK-8166994
    216             String rangeForRegex = range.replace("*", "\\p{Alnum}*");
    217             while (rangeForRegex.length() > 0) {
    218                 for (String tag : tags) {
    219                     tag = tag.toLowerCase();
    220                     if (tag.matches(rangeForRegex)) {
    221                         return tag;
    222                     }
    223                 }
    224 
    225                 // Truncate from the end....
    226                 int index = rangeForRegex.lastIndexOf('-');
    227                 if (index >= 0) {
    228                     rangeForRegex = rangeForRegex.substring(0, index);
    229 
    230                     // if range ends with an extension key, truncate it.
    231                     if (rangeForRegex.lastIndexOf('-') == rangeForRegex.length()-2) {
    232                         rangeForRegex =
    233                             rangeForRegex.substring(0, rangeForRegex.length()-2);
    234                     }
    235                 } else {
    236                     rangeForRegex = "";
    237                 }
    238             }
    239         }
    240 
    241         return null;
    242     }
    243 
    244     public static List<LanguageRange> parse(String ranges) {
    245         // Android-changed: backport OpenJDK 9 fix for JDK-8166994
    246         ranges = ranges.replace(" ", "").toLowerCase();
    247         if (ranges.startsWith("accept-language:")) {
    248             ranges = ranges.substring(16); // delete unnecessary prefix
    249         }
    250 
    251         String[] langRanges = ranges.split(",");
    252         List<LanguageRange> list = new ArrayList<>(langRanges.length);
    253         List<String> tempList = new ArrayList<>();
    254         int numOfRanges = 0;
    255 
    256         for (String range : langRanges) {
    257             int index;
    258             String r;
    259             double w;
    260 
    261             if ((index = range.indexOf(";q=")) == -1) {
    262                 r = range;
    263                 w = MAX_WEIGHT;
    264             } else {
    265                 r = range.substring(0, index);
    266                 index += 3;
    267                 try {
    268                     w = Double.parseDouble(range.substring(index));
    269                 }
    270                 catch (Exception e) {
    271                     throw new IllegalArgumentException("weight=\""
    272                                   + range.substring(index)
    273                                   + "\" for language range \"" + r + "\"");
    274                 }
    275 
    276                 if (w < MIN_WEIGHT || w > MAX_WEIGHT) {
    277                     throw new IllegalArgumentException("weight=" + w
    278                                   + " for language range \"" + r
    279                                   + "\". It must be between " + MIN_WEIGHT
    280                                   + " and " + MAX_WEIGHT + ".");
    281                 }
    282             }
    283 
    284             if (!tempList.contains(r)) {
    285                 LanguageRange lr = new LanguageRange(r, w);
    286                 index = numOfRanges;
    287                 for (int j = 0; j < numOfRanges; j++) {
    288                     if (list.get(j).getWeight() < w) {
    289                         index = j;
    290                         break;
    291                     }
    292                 }
    293                 list.add(index, lr);
    294                 numOfRanges++;
    295                 tempList.add(r);
    296 
    297                 // Check if the range has an equivalent using IANA LSR data.
    298                 // If yes, add it to the User's Language Priority List as well.
    299 
    300                 // aa-XX -> aa-YY
    301                 String equivalent;
    302                 if ((equivalent = getEquivalentForRegionAndVariant(r)) != null
    303                     && !tempList.contains(equivalent)) {
    304                     list.add(index+1, new LanguageRange(equivalent, w));
    305                     numOfRanges++;
    306                     tempList.add(equivalent);
    307                 }
    308 
    309                 String[] equivalents;
    310                 if ((equivalents = getEquivalentsForLanguage(r)) != null) {
    311                     for (String equiv: equivalents) {
    312                         // aa-XX -> bb-XX(, cc-XX)
    313                         if (!tempList.contains(equiv)) {
    314                             list.add(index+1, new LanguageRange(equiv, w));
    315                             numOfRanges++;
    316                             tempList.add(equiv);
    317                         }
    318 
    319                         // bb-XX -> bb-YY(, cc-YY)
    320                         equivalent = getEquivalentForRegionAndVariant(equiv);
    321                         if (equivalent != null
    322                             && !tempList.contains(equivalent)) {
    323                             list.add(index+1, new LanguageRange(equivalent, w));
    324                             numOfRanges++;
    325                             tempList.add(equivalent);
    326                         }
    327                     }
    328                 }
    329             }
    330         }
    331 
    332         return list;
    333     }
    334 
    335     // BEGIN Android-added: backport OpenJDK 9 fix for JDK-8166994
    336     /**
    337      * A faster alternative approach to String.replaceFirst(), if the given
    338      * string is a literal String, not a regex.
    339      */
    340     private static String replaceFirstSubStringMatch(String range,
    341             String substr, String replacement) {
    342         int pos = range.indexOf(substr);
    343         if (pos == -1) {
    344             return range;
    345         } else {
    346             return range.substring(0, pos) + replacement
    347                     + range.substring(pos + substr.length());
    348         }
    349     }
    350     // END Android-added: backport OpenJDK 9 fix for JDK-8166994
    351 
    352     private static String[] getEquivalentsForLanguage(String range) {
    353         String r = range;
    354 
    355         while (r.length() > 0) {
    356             if (LocaleEquivalentMaps.singleEquivMap.containsKey(r)) {
    357                 String equiv = LocaleEquivalentMaps.singleEquivMap.get(r);
    358                 // Return immediately for performance if the first matching
    359                 // subtag is found.
    360 // BEGIN Android-added: backport OpenJDK 9 fix for JDK-8166994
    361 // Upstream bug: https://bugs.openjdk.java.net/browse/JDK-8166994
    362 // Upstream fix: http://hg.openjdk.java.net/jdk9/dev/jdk/rev/60837db5d445
    363                 return new String[]{replaceFirstSubStringMatch(range,
    364                     r, equiv)};
    365             } else if (LocaleEquivalentMaps.multiEquivsMap.containsKey(r)) {
    366                 String[] equivs = LocaleEquivalentMaps.multiEquivsMap.get(r);
    367                 String[] result = new String[equivs.length];
    368                 for (int i = 0; i < equivs.length; i++) {
    369                     result[i] = replaceFirstSubStringMatch(range,
    370                             r, equivs[i]);
    371                 }
    372                 return result;
    373 // END Android-added: backport OpenJDK 9 fix for JDK-8166994
    374             }
    375 
    376             // Truncate the last subtag simply.
    377             int index = r.lastIndexOf('-');
    378             if (index == -1) {
    379                 break;
    380             }
    381             r = r.substring(0, index);
    382         }
    383 
    384         return null;
    385     }
    386 
    387     private static String getEquivalentForRegionAndVariant(String range) {
    388         int extensionKeyIndex = getExtentionKeyIndex(range);
    389 
    390         for (String subtag : LocaleEquivalentMaps.regionVariantEquivMap.keySet()) {
    391             int index;
    392             if ((index = range.indexOf(subtag)) != -1) {
    393                 // Check if the matching text is a valid region or variant.
    394                 if (extensionKeyIndex != Integer.MIN_VALUE
    395                     && index > extensionKeyIndex) {
    396                     continue;
    397                 }
    398 
    399                 int len = index + subtag.length();
    400                 if (range.length() == len || range.charAt(len) == '-') {
    401                     return replaceFirstSubStringMatch(range, subtag,
    402                             LocaleEquivalentMaps.regionVariantEquivMap
    403                                     .get(subtag));
    404                 }
    405             }
    406         }
    407 
    408         return null;
    409     }
    410 
    411     private static int getExtentionKeyIndex(String s) {
    412         char[] c = s.toCharArray();
    413         int index = Integer.MIN_VALUE;
    414         for (int i = 1; i < c.length; i++) {
    415             if (c[i] == '-') {
    416                 if (i - index == 2) {
    417                     return index;
    418                 } else {
    419                     index = i;
    420                 }
    421             }
    422         }
    423         return Integer.MIN_VALUE;
    424     }
    425 
    426     public static List<LanguageRange> mapEquivalents(
    427                                           List<LanguageRange>priorityList,
    428                                           Map<String, List<String>> map) {
    429         if (priorityList.isEmpty()) {
    430             return new ArrayList<>(); // need to return a empty mutable List
    431         }
    432         if (map == null || map.isEmpty()) {
    433             return new ArrayList<LanguageRange>(priorityList);
    434         }
    435 
    436         // Create a map, key=originalKey.toLowerCaes(), value=originalKey
    437         Map<String, String> keyMap = new HashMap<>();
    438         for (String key : map.keySet()) {
    439             keyMap.put(key.toLowerCase(), key);
    440         }
    441 
    442         List<LanguageRange> list = new ArrayList<>();
    443         for (LanguageRange lr : priorityList) {
    444             String range = lr.getRange();
    445             String r = range;
    446             boolean hasEquivalent = false;
    447 
    448             while (r.length() > 0) {
    449                 if (keyMap.containsKey(r)) {
    450                     hasEquivalent = true;
    451                     List<String> equivalents = map.get(keyMap.get(r));
    452                     if (equivalents != null) {
    453                         int len = r.length();
    454                         for (String equivalent : equivalents) {
    455                             list.add(new LanguageRange(equivalent.toLowerCase()
    456                                      + range.substring(len),
    457                                      lr.getWeight()));
    458                         }
    459                     }
    460                     // Return immediately if the first matching subtag is found.
    461                     break;
    462                 }
    463 
    464                 // Truncate the last subtag simply.
    465                 int index = r.lastIndexOf('-');
    466                 if (index == -1) {
    467                     break;
    468                 }
    469                 r = r.substring(0, index);
    470             }
    471 
    472             if (!hasEquivalent) {
    473                 list.add(lr);
    474             }
    475         }
    476 
    477         return list;
    478     }
    479 
    480     private LocaleMatcher() {}
    481 
    482 }
    483