Home | History | Annotate | Download | only in prefixmapper
      1 /*
      2  * Copyright (C) 2011 The Libphonenumber Authors
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  * http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package com.google.i18n.phonenumbers.prefixmapper;
     18 
     19 import java.io.Externalizable;
     20 import java.io.IOException;
     21 import java.io.ObjectInput;
     22 import java.io.ObjectOutput;
     23 import java.util.ArrayList;
     24 import java.util.Arrays;
     25 import java.util.Collections;
     26 import java.util.HashMap;
     27 import java.util.HashSet;
     28 import java.util.List;
     29 import java.util.Map;
     30 import java.util.Set;
     31 import java.util.SortedMap;
     32 import java.util.SortedSet;
     33 import java.util.TreeSet;
     34 
     35 /**
     36  * A utility which knows the data files that are available for the phone prefix mappers to use.
     37  * The data files contain mappings from phone number prefixes to text descriptions, and are
     38  * organized by country calling code and language that the text descriptions are in.
     39  *
     40  * @author Shaopeng Jia
     41  */
     42 public class MappingFileProvider implements Externalizable {
     43   private int numOfEntries = 0;
     44   private int[] countryCallingCodes;
     45   private List<Set<String>> availableLanguages;
     46   private static final Map<String, String> LOCALE_NORMALIZATION_MAP;
     47 
     48   static {
     49     Map<String, String> normalizationMap = new HashMap<String, String>();
     50     normalizationMap.put("zh_TW", "zh_Hant");
     51     normalizationMap.put("zh_HK", "zh_Hant");
     52     normalizationMap.put("zh_MO", "zh_Hant");
     53 
     54     LOCALE_NORMALIZATION_MAP = Collections.unmodifiableMap(normalizationMap);
     55   }
     56 
     57   /**
     58    * Creates an empty {@link MappingFileProvider}. The default constructor is necessary for
     59    * implementing {@link Externalizable}. The empty provider could later be populated by
     60    * {@link #readFileConfigs(java.util.SortedMap)} or {@link #readExternal(java.io.ObjectInput)}.
     61    */
     62   public MappingFileProvider() {
     63   }
     64 
     65   /**
     66    * Initializes an {@link MappingFileProvider} with {@code availableDataFiles}.
     67    *
     68    * @param availableDataFiles  a map from country calling codes to sets of languages in which data
     69    *     files are available for the specific country calling code. The map is sorted in ascending
     70    *     order of the country calling codes as integers.
     71    */
     72   public void readFileConfigs(SortedMap<Integer, Set<String>> availableDataFiles) {
     73     numOfEntries = availableDataFiles.size();
     74     countryCallingCodes = new int[numOfEntries];
     75     availableLanguages = new ArrayList<Set<String>>(numOfEntries);
     76     int index = 0;
     77     for (int countryCallingCode : availableDataFiles.keySet()) {
     78       countryCallingCodes[index++] = countryCallingCode;
     79       availableLanguages.add(new HashSet<String>(availableDataFiles.get(countryCallingCode)));
     80     }
     81   }
     82 
     83   /**
     84    * Supports Java Serialization.
     85    */
     86   public void readExternal(ObjectInput objectInput) throws IOException {
     87     numOfEntries = objectInput.readInt();
     88     if (countryCallingCodes == null || countryCallingCodes.length < numOfEntries) {
     89       countryCallingCodes = new int[numOfEntries];
     90     }
     91     if (availableLanguages == null) {
     92       availableLanguages = new ArrayList<Set<String>>();
     93     }
     94     for (int i = 0; i < numOfEntries; i++) {
     95       countryCallingCodes[i] = objectInput.readInt();
     96       int numOfLangs = objectInput.readInt();
     97       Set<String> setOfLangs = new HashSet<String>();
     98       for (int j = 0; j < numOfLangs; j++) {
     99         setOfLangs.add(objectInput.readUTF());
    100       }
    101       availableLanguages.add(setOfLangs);
    102     }
    103   }
    104 
    105   /**
    106    * Supports Java Serialization.
    107    */
    108   public void writeExternal(ObjectOutput objectOutput) throws IOException {
    109     objectOutput.writeInt(numOfEntries);
    110     for (int i = 0; i < numOfEntries; i++) {
    111       objectOutput.writeInt(countryCallingCodes[i]);
    112       Set<String> setOfLangs = availableLanguages.get(i);
    113       int numOfLangs = setOfLangs.size();
    114       objectOutput.writeInt(numOfLangs);
    115       for (String lang : setOfLangs) {
    116         objectOutput.writeUTF(lang);
    117       }
    118     }
    119   }
    120 
    121   /**
    122    * Returns a string representing the data in this class. The string contains one line for each
    123    * country calling code. The country calling code is followed by a '|' and then a list of
    124    * comma-separated languages sorted in ascending order.
    125    */
    126   @Override
    127   public String toString() {
    128     StringBuilder output = new StringBuilder();
    129     for (int i = 0; i < numOfEntries; i++) {
    130       output.append(countryCallingCodes[i]);
    131       output.append('|');
    132       SortedSet<String> sortedSetOfLangs = new TreeSet<String>(availableLanguages.get(i));
    133       for (String lang : sortedSetOfLangs) {
    134         output.append(lang);
    135         output.append(',');
    136       }
    137       output.append('\n');
    138     }
    139     return output.toString();
    140   }
    141 
    142   /**
    143    * Gets the name of the file that contains the mapping data for the {@code countryCallingCode} in
    144    * the language specified.
    145    *
    146    * @param countryCallingCode  the country calling code of phone numbers which the data file
    147    *     contains
    148    * @param language  two-letter lowercase ISO language codes as defined by ISO 639-1
    149    * @param script  four-letter titlecase (the first letter is uppercase and the rest of the letters
    150    *     are lowercase) ISO script codes as defined in ISO 15924
    151    * @param region  two-letter uppercase ISO country codes as defined by ISO 3166-1
    152    * @return  the name of the file, or empty string if no such file can be found
    153    */
    154   String getFileName(int countryCallingCode, String language, String script, String region) {
    155     if (language.length() == 0) {
    156       return "";
    157     }
    158     int index = Arrays.binarySearch(countryCallingCodes, countryCallingCode);
    159     if (index < 0) {
    160       return "";
    161     }
    162     Set<String> setOfLangs = availableLanguages.get(index);
    163     if (setOfLangs.size() > 0) {
    164       String languageCode = findBestMatchingLanguageCode(setOfLangs, language, script, region);
    165       if (languageCode.length() > 0) {
    166         StringBuilder fileName = new StringBuilder();
    167         fileName.append(countryCallingCode).append('_').append(languageCode);
    168         return fileName.toString();
    169       }
    170     }
    171     return "";
    172   }
    173 
    174   private String findBestMatchingLanguageCode(
    175       Set<String> setOfLangs, String language, String script, String region) {
    176     StringBuilder fullLocale = constructFullLocale(language, script, region);
    177     String fullLocaleStr = fullLocale.toString();
    178     String normalizedLocale = LOCALE_NORMALIZATION_MAP.get(fullLocaleStr);
    179     if (normalizedLocale != null) {
    180       if (setOfLangs.contains(normalizedLocale)) {
    181         return normalizedLocale;
    182       }
    183     }
    184     if (setOfLangs.contains(fullLocaleStr)) {
    185       return fullLocaleStr;
    186     }
    187 
    188     if (onlyOneOfScriptOrRegionIsEmpty(script, region)) {
    189       if (setOfLangs.contains(language)) {
    190         return language;
    191       }
    192     } else if (script.length() > 0 && region.length() > 0) {
    193       StringBuilder langWithScript = new StringBuilder(language).append('_').append(script);
    194       String langWithScriptStr = langWithScript.toString();
    195       if (setOfLangs.contains(langWithScriptStr)) {
    196         return langWithScriptStr;
    197       }
    198 
    199       StringBuilder langWithRegion = new StringBuilder(language).append('_').append(region);
    200       String langWithRegionStr = langWithRegion.toString();
    201       if (setOfLangs.contains(langWithRegionStr)) {
    202         return langWithRegionStr;
    203       }
    204 
    205       if (setOfLangs.contains(language)) {
    206         return language;
    207       }
    208     }
    209     return "";
    210   }
    211 
    212   private boolean onlyOneOfScriptOrRegionIsEmpty(String script, String region) {
    213     return (script.length() == 0 && region.length() > 0) ||
    214             (region.length() == 0 && script.length() > 0);
    215   }
    216 
    217   private StringBuilder constructFullLocale(String language, String script, String region) {
    218     StringBuilder fullLocale = new StringBuilder(language);
    219     appendSubsequentLocalePart(script, fullLocale);
    220     appendSubsequentLocalePart(region, fullLocale);
    221     return fullLocale;
    222   }
    223 
    224   private void appendSubsequentLocalePart(String subsequentLocalePart, StringBuilder fullLocale) {
    225     if (subsequentLocalePart.length() > 0) {
    226       fullLocale.append('_').append(subsequentLocalePart);
    227     }
    228   }
    229 }
    230