Home | History | Annotate | Download | only in prefixmapper
      1 /*
      2  * Copyright (C) 2011 The Libphonenumber Authors
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  * http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package com.google.i18n.phonenumbers.prefixmapper;
     18 
     19 import java.io.Externalizable;
     20 import java.io.IOException;
     21 import java.io.ObjectInput;
     22 import java.io.ObjectOutput;
     23 import java.util.ArrayList;
     24 import java.util.Arrays;
     25 import java.util.Collections;
     26 import java.util.HashMap;
     27 import java.util.HashSet;
     28 import java.util.List;
     29 import java.util.Map;
     30 import java.util.Set;
     31 import java.util.SortedMap;
     32 import java.util.SortedSet;
     33 import java.util.TreeSet;
     34 
     35 /**
     36  * A utility which knows the data files that are available for the phone prefix mappers to use.
     37  * The data files contain mappings from phone number prefixes to text descriptions, and are
     38  * organized by country calling code and language that the text descriptions are in.
     39  *
     40  * @author Shaopeng Jia
     41  */
     42 public class MappingFileProvider implements Externalizable {
     43   private int numOfEntries = 0;
     44   private int[] countryCallingCodes;
     45   private List<Set<String>> availableLanguages;
     46   private static final Map<String, String> LOCALE_NORMALIZATION_MAP;
     47 
     48   static {
     49     Map<String, String> normalizationMap = new HashMap<String, String>();
     50     normalizationMap.put("zh_TW", "zh_Hant");
     51     normalizationMap.put("zh_HK", "zh_Hant");
     52     normalizationMap.put("zh_MO", "zh_Hant");
     53 
     54     LOCALE_NORMALIZATION_MAP = Collections.unmodifiableMap(normalizationMap);
     55   }
     56 
     57   /**
     58    * Creates an empty {@link MappingFileProvider}. The default constructor is necessary for
     59    * implementing {@link Externalizable}. The empty provider could later be populated by
     60    * {@link #readFileConfigs(java.util.SortedMap)} or {@link #readExternal(java.io.ObjectInput)}.
     61    */
     62   public MappingFileProvider() {
     63   }
     64 
     65   /**
     66    * Initializes an {@link MappingFileProvider} with {@code availableDataFiles}.
     67    *
     68    * @param availableDataFiles  a map from country calling codes to sets of languages in which data
     69    *     files are available for the specific country calling code. The map is sorted in ascending
     70    *     order of the country calling codes as integers.
     71    */
     72   public void readFileConfigs(SortedMap<Integer, Set<String>> availableDataFiles) {
     73     numOfEntries = availableDataFiles.size();
     74     countryCallingCodes = new int[numOfEntries];
     75     availableLanguages = new ArrayList<Set<String>>(numOfEntries);
     76     int index = 0;
     77     for (int countryCallingCode : availableDataFiles.keySet()) {
     78       countryCallingCodes[index++] = countryCallingCode;
     79       availableLanguages.add(new HashSet<String>(availableDataFiles.get(countryCallingCode)));
     80     }
     81   }
     82 
     83   /**
     84    * Supports Java Serialization.
     85    */
     86   public void readExternal(ObjectInput objectInput) throws IOException {
     87     numOfEntries = objectInput.readInt();
     88     if (countryCallingCodes == null || countryCallingCodes.length < numOfEntries) {
     89       countryCallingCodes = new int[numOfEntries];
     90     }
     91     if (availableLanguages == null) {
     92       availableLanguages = new ArrayList<Set<String>>();
     93     }
     94     for (int i = 0; i < numOfEntries; i++) {
     95       countryCallingCodes[i] = objectInput.readInt();
     96       int numOfLangs = objectInput.readInt();
     97       Set<String> setOfLangs = new HashSet<String>();
     98       for (int j = 0; j < numOfLangs; j++) {
     99         setOfLangs.add(objectInput.readUTF());
    100       }
    101       availableLanguages.add(setOfLangs);
    102     }
    103   }
    104 
    105   /**
    106    * Supports Java Serialization.
    107    */
    108   public void writeExternal(ObjectOutput objectOutput) throws IOException {
    109     objectOutput.writeInt(numOfEntries);
    110     for (int i = 0; i < numOfEntries; i++) {
    111       objectOutput.writeInt(countryCallingCodes[i]);
    112       Set<String> setOfLangs = availableLanguages.get(i);
    113       int numOfLangs = setOfLangs.size();
    114       objectOutput.writeInt(numOfLangs);
    115       for (String lang : setOfLangs) {
    116         objectOutput.writeUTF(lang);
    117       }
    118     }
    119   }
    120 
    121   /**
    122    * Returns a string representing the data in this class. The string contains one line for each
    123    * country calling code. The country calling code is followed by a '|' and then a list of
    124    * comma-separated languages sorted in ascending order.
    125    */
    126   @Override
    127   public String toString() {
    128     StringBuilder output = new StringBuilder();
    129     for (int i = 0; i < numOfEntries; i++) {
    130       output.append(countryCallingCodes[i]);
    131       output.append('|');
    132       SortedSet<String> sortedSetOfLangs = new TreeSet<String>(availableLanguages.get(i));
    133       for (String lang : sortedSetOfLangs) {
    134         output.append(lang);
    135         output.append(',');
    136       }
    137       output.append('\n');
    138     }
    139     return output.toString();
    140   }
    141 
    142   /**
    143    * Gets the name of the file that contains the mapping data for the {@code countryCallingCode} in
    144    * the language specified.
    145    *
    146    * @param countryCallingCode  the country calling code of phone numbers which the data file
    147    *     contains
    148    * @param language  two or three-letter lowercase ISO language codes as defined by ISO 639. Note
    149    *     that where two different language codes exist (e.g. 'he' and 'iw' for Hebrew) we use the
    150    *     one that Java/Android canonicalized on ('iw' in this case).
    151    * @param script  four-letter titlecase (the first letter is uppercase and the rest of the letters
    152    *     are lowercase) ISO script codes as defined in ISO 15924
    153    * @param region  two-letter uppercase ISO country codes as defined by ISO 3166-1
    154    * @return  the name of the file, or empty string if no such file can be found
    155    */
    156   String getFileName(int countryCallingCode, String language, String script, String region) {
    157     if (language.length() == 0) {
    158       return "";
    159     }
    160     int index = Arrays.binarySearch(countryCallingCodes, countryCallingCode);
    161     if (index < 0) {
    162       return "";
    163     }
    164     Set<String> setOfLangs = availableLanguages.get(index);
    165     if (setOfLangs.size() > 0) {
    166       String languageCode = findBestMatchingLanguageCode(setOfLangs, language, script, region);
    167       if (languageCode.length() > 0) {
    168         StringBuilder fileName = new StringBuilder();
    169         fileName.append(countryCallingCode).append('_').append(languageCode);
    170         return fileName.toString();
    171       }
    172     }
    173     return "";
    174   }
    175 
    176   private String findBestMatchingLanguageCode(
    177       Set<String> setOfLangs, String language, String script, String region) {
    178     StringBuilder fullLocale = constructFullLocale(language, script, region);
    179     String fullLocaleStr = fullLocale.toString();
    180     String normalizedLocale = LOCALE_NORMALIZATION_MAP.get(fullLocaleStr);
    181     if (normalizedLocale != null) {
    182       if (setOfLangs.contains(normalizedLocale)) {
    183         return normalizedLocale;
    184       }
    185     }
    186     if (setOfLangs.contains(fullLocaleStr)) {
    187       return fullLocaleStr;
    188     }
    189 
    190     if (onlyOneOfScriptOrRegionIsEmpty(script, region)) {
    191       if (setOfLangs.contains(language)) {
    192         return language;
    193       }
    194     } else if (script.length() > 0 && region.length() > 0) {
    195       StringBuilder langWithScript = new StringBuilder(language).append('_').append(script);
    196       String langWithScriptStr = langWithScript.toString();
    197       if (setOfLangs.contains(langWithScriptStr)) {
    198         return langWithScriptStr;
    199       }
    200 
    201       StringBuilder langWithRegion = new StringBuilder(language).append('_').append(region);
    202       String langWithRegionStr = langWithRegion.toString();
    203       if (setOfLangs.contains(langWithRegionStr)) {
    204         return langWithRegionStr;
    205       }
    206 
    207       if (setOfLangs.contains(language)) {
    208         return language;
    209       }
    210     }
    211     return "";
    212   }
    213 
    214   private boolean onlyOneOfScriptOrRegionIsEmpty(String script, String region) {
    215     return (script.length() == 0 && region.length() > 0)
    216         || (region.length() == 0 && script.length() > 0);
    217   }
    218 
    219   private StringBuilder constructFullLocale(String language, String script, String region) {
    220     StringBuilder fullLocale = new StringBuilder(language);
    221     appendSubsequentLocalePart(script, fullLocale);
    222     appendSubsequentLocalePart(region, fullLocale);
    223     return fullLocale;
    224   }
    225 
    226   private void appendSubsequentLocalePart(String subsequentLocalePart, StringBuilder fullLocale) {
    227     if (subsequentLocalePart.length() > 0) {
    228       fullLocale.append('_').append(subsequentLocalePart);
    229     }
    230   }
    231 }
    232