Home | History | Annotate | Download | only in phonenumbers
      1 /*
      2  * Copyright (C) 2011 The Libphonenumber Authors
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  * http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package com.google.i18n.phonenumbers;
     18 
     19 import com.google.i18n.phonenumbers.PhoneNumberUtil.Leniency;
     20 import com.google.i18n.phonenumbers.PhoneNumberUtil.MatchType;
     21 import com.google.i18n.phonenumbers.PhoneNumberUtil.PhoneNumberFormat;
     22 import com.google.i18n.phonenumbers.Phonemetadata.NumberFormat;
     23 import com.google.i18n.phonenumbers.Phonemetadata.PhoneMetadata;
     24 import com.google.i18n.phonenumbers.Phonenumber.PhoneNumber.CountryCodeSource;
     25 import com.google.i18n.phonenumbers.Phonenumber.PhoneNumber;
     26 
     27 import java.lang.Character.UnicodeBlock;
     28 import java.util.Iterator;
     29 import java.util.NoSuchElementException;
     30 import java.util.regex.Matcher;
     31 import java.util.regex.Pattern;
     32 
     33 /**
     34  * A stateful class that finds and extracts telephone numbers from {@linkplain CharSequence text}.
     35  * Instances can be created using the {@linkplain PhoneNumberUtil#findNumbers factory methods} in
     36  * {@link PhoneNumberUtil}.
     37  *
     38  * <p>Vanity numbers (phone numbers using alphabetic digits such as <tt>1-800-SIX-FLAGS</tt> are
     39  * not found.
     40  *
     41  * <p>This class is not thread-safe.
     42  */
     43 final class PhoneNumberMatcher implements Iterator<PhoneNumberMatch> {
     44   /**
     45    * The phone number pattern used by {@link #find}, similar to
     46    * {@code PhoneNumberUtil.VALID_PHONE_NUMBER}, but with the following differences:
     47    * <ul>
     48    *   <li>All captures are limited in order to place an upper bound to the text matched by the
     49    *       pattern.
     50    * <ul>
     51    *   <li>Leading punctuation / plus signs are limited.
     52    *   <li>Consecutive occurrences of punctuation are limited.
     53    *   <li>Number of digits is limited.
     54    * </ul>
     55    *   <li>No whitespace is allowed at the start or end.
     56    *   <li>No alpha digits (vanity numbers such as 1-800-SIX-FLAGS) are currently supported.
     57    * </ul>
     58    */
     59   private static final Pattern PATTERN;
     60   /**
     61    * Matches strings that look like publication pages. Example:
     62    * <pre>Computing Complete Answers to Queries in the Presence of Limited Access Patterns.
     63    * Chen Li. VLDB J. 12(3): 211-227 (2003).</pre>
     64    *
     65    * The string "211-227 (2003)" is not a telephone number.
     66    */
     67   private static final Pattern PUB_PAGES = Pattern.compile("\\d{1,5}-+\\d{1,5}\\s{0,4}\\(\\d{1,4}");
     68 
     69   /**
     70    * Matches strings that look like dates using "/" as a separator. Examples: 3/10/2011, 31/10/96 or
     71    * 08/31/95.
     72    */
     73   private static final Pattern SLASH_SEPARATED_DATES =
     74       Pattern.compile("(?:(?:[0-3]?\\d/[01]?\\d)|(?:[01]?\\d/[0-3]?\\d))/(?:[12]\\d)?\\d{2}");
     75 
     76   /**
     77    * Matches timestamps. Examples: "2012-01-02 08:00". Note that the reg-ex does not include the
     78    * trailing ":\d\d" -- that is covered by TIME_STAMPS_SUFFIX.
     79    */
     80   private static final Pattern TIME_STAMPS =
     81       Pattern.compile("[12]\\d{3}[-/]?[01]\\d[-/]?[0-3]\\d +[0-2]\\d$");
     82   private static final Pattern TIME_STAMPS_SUFFIX = Pattern.compile(":[0-5]\\d");
     83 
     84   /**
     85    * Pattern to check that brackets match. Opening brackets should be closed within a phone number.
     86    * This also checks that there is something inside the brackets. Having no brackets at all is also
     87    * fine.
     88    */
     89   private static final Pattern MATCHING_BRACKETS;
     90 
     91   /**
     92    * Patterns used to extract phone numbers from a larger phone-number-like pattern. These are
     93    * ordered according to specificity. For example, white-space is last since that is frequently
     94    * used in numbers, not just to separate two numbers. We have separate patterns since we don't
     95    * want to break up the phone-number-like text on more than one different kind of symbol at one
     96    * time, although symbols of the same type (e.g. space) can be safely grouped together.
     97    *
     98    * Note that if there is a match, we will always check any text found up to the first match as
     99    * well.
    100    */
    101   private static final Pattern[] INNER_MATCHES = {
    102       // Breaks on the slash - e.g. "651-234-2345/332-445-1234"
    103       Pattern.compile("/+(.*)"),
    104       // Note that the bracket here is inside the capturing group, since we consider it part of the
    105       // phone number. Will match a pattern like "(650) 223 3345 (754) 223 3321".
    106       Pattern.compile("(\\([^(]*)"),
    107       // Breaks on a hyphen - e.g. "12345 - 332-445-1234 is my number."
    108       // We require a space on either side of the hyphen for it to be considered a separator.
    109       Pattern.compile("(?:\\p{Z}-|-\\p{Z})\\p{Z}*(.+)"),
    110       // Various types of wide hyphens. Note we have decided not to enforce a space here, since it's
    111       // possible that it's supposed to be used to break two numbers without spaces, and we haven't
    112       // seen many instances of it used within a number.
    113       Pattern.compile("[\u2012-\u2015\uFF0D]\\p{Z}*(.+)"),
    114       // Breaks on a full stop - e.g. "12345. 332-445-1234 is my number."
    115       Pattern.compile("\\.+\\p{Z}*([^.]+)"),
    116       // Breaks on space - e.g. "3324451234 8002341234"
    117       Pattern.compile("\\p{Z}+(\\P{Z}+)")
    118   };
    119 
    120   /**
    121    * Punctuation that may be at the start of a phone number - brackets and plus signs.
    122    */
    123   private static final Pattern LEAD_CLASS;
    124 
    125   static {
    126     /* Builds the MATCHING_BRACKETS and PATTERN regular expressions. The building blocks below exist
    127      * to make the pattern more easily understood. */
    128 
    129     String openingParens = "(\\[\uFF08\uFF3B";
    130     String closingParens = ")\\]\uFF09\uFF3D";
    131     String nonParens = "[^" + openingParens + closingParens + "]";
    132 
    133     /* Limit on the number of pairs of brackets in a phone number. */
    134     String bracketPairLimit = limit(0, 3);
    135     /*
    136      * An opening bracket at the beginning may not be closed, but subsequent ones should be.  It's
    137      * also possible that the leading bracket was dropped, so we shouldn't be surprised if we see a
    138      * closing bracket first. We limit the sets of brackets in a phone number to four.
    139      */
    140     MATCHING_BRACKETS = Pattern.compile(
    141         "(?:[" + openingParens + "])?" + "(?:" + nonParens + "+" + "[" + closingParens + "])?"
    142         + nonParens + "+"
    143         + "(?:[" + openingParens + "]" + nonParens + "+[" + closingParens + "])" + bracketPairLimit
    144         + nonParens + "*");
    145 
    146     /* Limit on the number of leading (plus) characters. */
    147     String leadLimit = limit(0, 2);
    148     /* Limit on the number of consecutive punctuation characters. */
    149     String punctuationLimit = limit(0, 4);
    150     /* The maximum number of digits allowed in a digit-separated block. As we allow all digits in a
    151      * single block, set high enough to accommodate the entire national number and the international
    152      * country code. */
    153     int digitBlockLimit =
    154         PhoneNumberUtil.MAX_LENGTH_FOR_NSN + PhoneNumberUtil.MAX_LENGTH_COUNTRY_CODE;
    155     /* Limit on the number of blocks separated by punctuation. Uses digitBlockLimit since some
    156      * formats use spaces to separate each digit. */
    157     String blockLimit = limit(0, digitBlockLimit);
    158 
    159     /* A punctuation sequence allowing white space. */
    160     String punctuation = "[" + PhoneNumberUtil.VALID_PUNCTUATION + "]" + punctuationLimit;
    161     /* A digits block without punctuation. */
    162     String digitSequence = "\\p{Nd}" + limit(1, digitBlockLimit);
    163 
    164     String leadClassChars = openingParens + PhoneNumberUtil.PLUS_CHARS;
    165     String leadClass = "[" + leadClassChars + "]";
    166     LEAD_CLASS = Pattern.compile(leadClass);
    167 
    168     /* Phone number pattern allowing optional punctuation. */
    169     PATTERN = Pattern.compile(
    170         "(?:" + leadClass + punctuation + ")" + leadLimit
    171         + digitSequence + "(?:" + punctuation + digitSequence + ")" + blockLimit
    172         + "(?:" + PhoneNumberUtil.EXTN_PATTERNS_FOR_MATCHING + ")?",
    173         PhoneNumberUtil.REGEX_FLAGS);
    174   }
    175 
    176   /** Returns a regular expression quantifier with an upper and lower limit. */
    177   private static String limit(int lower, int upper) {
    178     if ((lower < 0) || (upper <= 0) || (upper < lower)) {
    179       throw new IllegalArgumentException();
    180     }
    181     return "{" + lower + "," + upper + "}";
    182   }
    183 
    184   /** The potential states of a PhoneNumberMatcher. */
    185   private enum State {
    186     NOT_READY, READY, DONE
    187   }
    188 
    189   /** The phone number utility. */
    190   private final PhoneNumberUtil phoneUtil;
    191   /** The text searched for phone numbers. */
    192   private final CharSequence text;
    193   /**
    194    * The region (country) to assume for phone numbers without an international prefix, possibly
    195    * null.
    196    */
    197   private final String preferredRegion;
    198   /** The degree of validation requested. */
    199   private final Leniency leniency;
    200   /** The maximum number of retries after matching an invalid number. */
    201   private long maxTries;
    202 
    203   /** The iteration tristate. */
    204   private State state = State.NOT_READY;
    205   /** The last successful match, null unless in {@link State#READY}. */
    206   private PhoneNumberMatch lastMatch = null;
    207   /** The next index to start searching at. Undefined in {@link State#DONE}. */
    208   private int searchIndex = 0;
    209 
    210   /**
    211    * Creates a new instance. See the factory methods in {@link PhoneNumberUtil} on how to obtain a
    212    * new instance.
    213    *
    214    * @param util  the phone number util to use
    215    * @param text  the character sequence that we will search, null for no text
    216    * @param country  the country to assume for phone numbers not written in international format
    217    *     (with a leading plus, or with the international dialing prefix of the specified region).
    218    *     May be null or "ZZ" if only numbers with a leading plus should be
    219    *     considered.
    220    * @param leniency  the leniency to use when evaluating candidate phone numbers
    221    * @param maxTries  the maximum number of invalid numbers to try before giving up on the text.
    222    *     This is to cover degenerate cases where the text has a lot of false positives in it. Must
    223    *     be {@code >= 0}.
    224    */
    225   PhoneNumberMatcher(PhoneNumberUtil util, CharSequence text, String country, Leniency leniency,
    226       long maxTries) {
    227 
    228     if ((util == null) || (leniency == null)) {
    229       throw new NullPointerException();
    230     }
    231     if (maxTries < 0) {
    232       throw new IllegalArgumentException();
    233     }
    234     this.phoneUtil = util;
    235     this.text = (text != null) ? text : "";
    236     this.preferredRegion = country;
    237     this.leniency = leniency;
    238     this.maxTries = maxTries;
    239   }
    240 
    241   /**
    242    * Attempts to find the next subsequence in the searched sequence on or after {@code searchIndex}
    243    * that represents a phone number. Returns the next match, null if none was found.
    244    *
    245    * @param index  the search index to start searching at
    246    * @return  the phone number match found, null if none can be found
    247    */
    248   private PhoneNumberMatch find(int index) {
    249     Matcher matcher = PATTERN.matcher(text);
    250     while ((maxTries > 0) && matcher.find(index)) {
    251       int start = matcher.start();
    252       CharSequence candidate = text.subSequence(start, matcher.end());
    253 
    254       // Check for extra numbers at the end.
    255       // TODO: This is the place to start when trying to support extraction of multiple phone number
    256       // from split notations (+41 79 123 45 67 / 68).
    257       candidate = trimAfterFirstMatch(PhoneNumberUtil.SECOND_NUMBER_START_PATTERN, candidate);
    258 
    259       PhoneNumberMatch match = extractMatch(candidate, start);
    260       if (match != null) {
    261         return match;
    262       }
    263 
    264       index = start + candidate.length();
    265       maxTries--;
    266     }
    267 
    268     return null;
    269   }
    270 
    271   /**
    272    * Trims away any characters after the first match of {@code pattern} in {@code candidate},
    273    * returning the trimmed version.
    274    */
    275   private static CharSequence trimAfterFirstMatch(Pattern pattern, CharSequence candidate) {
    276     Matcher trailingCharsMatcher = pattern.matcher(candidate);
    277     if (trailingCharsMatcher.find()) {
    278       candidate = candidate.subSequence(0, trailingCharsMatcher.start());
    279     }
    280     return candidate;
    281   }
    282 
    283   /**
    284    * Helper method to determine if a character is a Latin-script letter or not. For our purposes,
    285    * combining marks should also return true since we assume they have been added to a preceding
    286    * Latin character.
    287    */
    288   // @VisibleForTesting
    289   static boolean isLatinLetter(char letter) {
    290     // Combining marks are a subset of non-spacing-mark.
    291     if (!Character.isLetter(letter) && Character.getType(letter) != Character.NON_SPACING_MARK) {
    292       return false;
    293     }
    294     UnicodeBlock block = UnicodeBlock.of(letter);
    295     return block.equals(UnicodeBlock.BASIC_LATIN)
    296         || block.equals(UnicodeBlock.LATIN_1_SUPPLEMENT)
    297         || block.equals(UnicodeBlock.LATIN_EXTENDED_A)
    298         || block.equals(UnicodeBlock.LATIN_EXTENDED_ADDITIONAL)
    299         || block.equals(UnicodeBlock.LATIN_EXTENDED_B)
    300         || block.equals(UnicodeBlock.COMBINING_DIACRITICAL_MARKS);
    301   }
    302 
    303   private static boolean isInvalidPunctuationSymbol(char character) {
    304     return character == '%' || Character.getType(character) == Character.CURRENCY_SYMBOL;
    305   }
    306 
    307   /**
    308    * Attempts to extract a match from a {@code candidate} character sequence.
    309    *
    310    * @param candidate  the candidate text that might contain a phone number
    311    * @param offset  the offset of {@code candidate} within {@link #text}
    312    * @return  the match found, null if none can be found
    313    */
    314   private PhoneNumberMatch extractMatch(CharSequence candidate, int offset) {
    315     // Skip a match that is more likely to be a date.
    316     if (SLASH_SEPARATED_DATES.matcher(candidate).find()) {
    317       return null;
    318     }
    319 
    320     // Skip potential time-stamps.
    321     if (TIME_STAMPS.matcher(candidate).find()) {
    322       String followingText = text.toString().substring(offset + candidate.length());
    323       if (TIME_STAMPS_SUFFIX.matcher(followingText).lookingAt()) {
    324         return null;
    325       }
    326     }
    327 
    328     // Try to come up with a valid match given the entire candidate.
    329     String rawString = candidate.toString();
    330     PhoneNumberMatch match = parseAndVerify(rawString, offset);
    331     if (match != null) {
    332       return match;
    333     }
    334 
    335     // If that failed, try to find an "inner match" - there might be a phone number within this
    336     // candidate.
    337     return extractInnerMatch(rawString, offset);
    338   }
    339 
    340   /**
    341    * Attempts to extract a match from {@code candidate} if the whole candidate does not qualify as a
    342    * match.
    343    *
    344    * @param candidate  the candidate text that might contain a phone number
    345    * @param offset  the current offset of {@code candidate} within {@link #text}
    346    * @return  the match found, null if none can be found
    347    */
    348   private PhoneNumberMatch extractInnerMatch(String candidate, int offset) {
    349     for (Pattern possibleInnerMatch : INNER_MATCHES) {
    350       Matcher groupMatcher = possibleInnerMatch.matcher(candidate);
    351       boolean isFirstMatch = true;
    352       while (groupMatcher.find() && maxTries > 0) {
    353         if (isFirstMatch) {
    354           // We should handle any group before this one too.
    355           CharSequence group = trimAfterFirstMatch(
    356               PhoneNumberUtil.UNWANTED_END_CHAR_PATTERN,
    357               candidate.substring(0, groupMatcher.start()));
    358           PhoneNumberMatch match = parseAndVerify(group.toString(), offset);
    359           if (match != null) {
    360             return match;
    361           }
    362           maxTries--;
    363           isFirstMatch = false;
    364         }
    365         CharSequence group = trimAfterFirstMatch(
    366             PhoneNumberUtil.UNWANTED_END_CHAR_PATTERN, groupMatcher.group(1));
    367         PhoneNumberMatch match = parseAndVerify(group.toString(), offset + groupMatcher.start(1));
    368         if (match != null) {
    369           return match;
    370         }
    371         maxTries--;
    372       }
    373     }
    374     return null;
    375   }
    376 
    377   /**
    378    * Parses a phone number from the {@code candidate} using {@link PhoneNumberUtil#parse} and
    379    * verifies it matches the requested {@link #leniency}. If parsing and verification succeed, a
    380    * corresponding {@link PhoneNumberMatch} is returned, otherwise this method returns null.
    381    *
    382    * @param candidate  the candidate match
    383    * @param offset  the offset of {@code candidate} within {@link #text}
    384    * @return  the parsed and validated phone number match, or null
    385    */
    386   private PhoneNumberMatch parseAndVerify(String candidate, int offset) {
    387     try {
    388       // Check the candidate doesn't contain any formatting which would indicate that it really
    389       // isn't a phone number.
    390       if (!MATCHING_BRACKETS.matcher(candidate).matches() || PUB_PAGES.matcher(candidate).find()) {
    391         return null;
    392       }
    393 
    394       // If leniency is set to VALID or stricter, we also want to skip numbers that are surrounded
    395       // by Latin alphabetic characters, to skip cases like abc8005001234 or 8005001234def.
    396       if (leniency.compareTo(Leniency.VALID) >= 0) {
    397         // If the candidate is not at the start of the text, and does not start with phone-number
    398         // punctuation, check the previous character.
    399         if (offset > 0 && !LEAD_CLASS.matcher(candidate).lookingAt()) {
    400           char previousChar = text.charAt(offset - 1);
    401           // We return null if it is a latin letter or an invalid punctuation symbol.
    402           if (isInvalidPunctuationSymbol(previousChar) || isLatinLetter(previousChar)) {
    403             return null;
    404           }
    405         }
    406         int lastCharIndex = offset + candidate.length();
    407         if (lastCharIndex < text.length()) {
    408           char nextChar = text.charAt(lastCharIndex);
    409           if (isInvalidPunctuationSymbol(nextChar) || isLatinLetter(nextChar)) {
    410             return null;
    411           }
    412         }
    413       }
    414 
    415       PhoneNumber number = phoneUtil.parseAndKeepRawInput(candidate, preferredRegion);
    416 
    417       // Check Israel * numbers: these are a special case in that they are four-digit numbers that
    418       // our library supports, but they can only be dialled with a leading *. Since we don't
    419       // actually store or detect the * in our phone number library, this means in practice we
    420       // detect most four digit numbers as being valid for Israel. We are considering moving these
    421       // numbers to ShortNumberInfo instead, in which case this problem would go away, but in the
    422       // meantime we want to restrict the false matches so we only allow these numbers if they are
    423       // preceded by a star. We enforce this for all leniency levels even though these numbers are
    424       // technically accepted by isPossibleNumber and isValidNumber since we consider it to be a
    425       // deficiency in those methods that they accept these numbers without the *.
    426       // TODO: Remove this or make it significantly less hacky once we've decided how to
    427       // handle these short codes going forward in ShortNumberInfo. We could use the formatting
    428       // rules for instance, but that would be slower.
    429       if (phoneUtil.getRegionCodeForCountryCode(number.getCountryCode()).equals("IL")
    430           && phoneUtil.getNationalSignificantNumber(number).length() == 4
    431           && (offset == 0 || (offset > 0 && text.charAt(offset - 1) != '*'))) {
    432         // No match.
    433         return null;
    434       }
    435 
    436       if (leniency.verify(number, candidate, phoneUtil)) {
    437         // We used parseAndKeepRawInput to create this number, but for now we don't return the extra
    438         // values parsed. TODO: stop clearing all values here and switch all users over
    439         // to using rawInput() rather than the rawString() of PhoneNumberMatch.
    440         number.clearCountryCodeSource();
    441         number.clearRawInput();
    442         number.clearPreferredDomesticCarrierCode();
    443         return new PhoneNumberMatch(offset, candidate, number);
    444       }
    445     } catch (NumberParseException e) {
    446       // ignore and continue
    447     }
    448     return null;
    449   }
    450 
    451   /**
    452    * Small helper interface such that the number groups can be checked according to different
    453    * criteria, both for our default way of performing formatting and for any alternate formats we
    454    * may want to check.
    455    */
    456   interface NumberGroupingChecker {
    457     /**
    458      * Returns true if the groups of digits found in our candidate phone number match our
    459      * expectations.
    460      *
    461      * @param number  the original number we found when parsing
    462      * @param normalizedCandidate  the candidate number, normalized to only contain ASCII digits,
    463      *     but with non-digits (spaces etc) retained
    464      * @param expectedNumberGroups  the groups of digits that we would expect to see if we
    465      *     formatted this number
    466      */
    467     boolean checkGroups(PhoneNumberUtil util, PhoneNumber number,
    468                         StringBuilder normalizedCandidate, String[] expectedNumberGroups);
    469   }
    470 
    471   static boolean allNumberGroupsRemainGrouped(PhoneNumberUtil util,
    472                                               PhoneNumber number,
    473                                               StringBuilder normalizedCandidate,
    474                                               String[] formattedNumberGroups) {
    475     int fromIndex = 0;
    476     if (number.getCountryCodeSource() != CountryCodeSource.FROM_DEFAULT_COUNTRY) {
    477       // First skip the country code if the normalized candidate contained it.
    478       String countryCode = Integer.toString(number.getCountryCode());
    479       fromIndex = normalizedCandidate.indexOf(countryCode) + countryCode.length();
    480     }
    481     // Check each group of consecutive digits are not broken into separate groupings in the
    482     // {@code normalizedCandidate} string.
    483     for (int i = 0; i < formattedNumberGroups.length; i++) {
    484       // Fails if the substring of {@code normalizedCandidate} starting from {@code fromIndex}
    485       // doesn't contain the consecutive digits in formattedNumberGroups[i].
    486       fromIndex = normalizedCandidate.indexOf(formattedNumberGroups[i], fromIndex);
    487       if (fromIndex < 0) {
    488         return false;
    489       }
    490       // Moves {@code fromIndex} forward.
    491       fromIndex += formattedNumberGroups[i].length();
    492       if (i == 0 && fromIndex < normalizedCandidate.length()) {
    493         // We are at the position right after the NDC. We get the region used for formatting
    494         // information based on the country code in the phone number, rather than the number itself,
    495         // as we do not need to distinguish between different countries with the same country
    496         // calling code and this is faster.
    497         String region = util.getRegionCodeForCountryCode(number.getCountryCode());
    498         if (util.getNddPrefixForRegion(region, true) != null
    499             && Character.isDigit(normalizedCandidate.charAt(fromIndex))) {
    500           // This means there is no formatting symbol after the NDC. In this case, we only
    501           // accept the number if there is no formatting symbol at all in the number, except
    502           // for extensions. This is only important for countries with national prefixes.
    503           String nationalSignificantNumber = util.getNationalSignificantNumber(number);
    504           return normalizedCandidate.substring(fromIndex - formattedNumberGroups[i].length())
    505               .startsWith(nationalSignificantNumber);
    506         }
    507       }
    508     }
    509     // The check here makes sure that we haven't mistakenly already used the extension to
    510     // match the last group of the subscriber number. Note the extension cannot have
    511     // formatting in-between digits.
    512     return normalizedCandidate.substring(fromIndex).contains(number.getExtension());
    513   }
    514 
    515   static boolean allNumberGroupsAreExactlyPresent(PhoneNumberUtil util,
    516                                                   PhoneNumber number,
    517                                                   StringBuilder normalizedCandidate,
    518                                                   String[] formattedNumberGroups) {
    519     String[] candidateGroups =
    520         PhoneNumberUtil.NON_DIGITS_PATTERN.split(normalizedCandidate.toString());
    521     // Set this to the last group, skipping it if the number has an extension.
    522     int candidateNumberGroupIndex =
    523         number.hasExtension() ? candidateGroups.length - 2 : candidateGroups.length - 1;
    524     // First we check if the national significant number is formatted as a block.
    525     // We use contains and not equals, since the national significant number may be present with
    526     // a prefix such as a national number prefix, or the country code itself.
    527     if (candidateGroups.length == 1
    528         || candidateGroups[candidateNumberGroupIndex].contains(
    529             util.getNationalSignificantNumber(number))) {
    530       return true;
    531     }
    532     // Starting from the end, go through in reverse, excluding the first group, and check the
    533     // candidate and number groups are the same.
    534     for (int formattedNumberGroupIndex = (formattedNumberGroups.length - 1);
    535          formattedNumberGroupIndex > 0 && candidateNumberGroupIndex >= 0;
    536          formattedNumberGroupIndex--, candidateNumberGroupIndex--) {
    537       if (!candidateGroups[candidateNumberGroupIndex].equals(
    538           formattedNumberGroups[formattedNumberGroupIndex])) {
    539         return false;
    540       }
    541     }
    542     // Now check the first group. There may be a national prefix at the start, so we only check
    543     // that the candidate group ends with the formatted number group.
    544     return (candidateNumberGroupIndex >= 0
    545         && candidateGroups[candidateNumberGroupIndex].endsWith(formattedNumberGroups[0]));
    546   }
    547 
    548   /**
    549    * Helper method to get the national-number part of a number, formatted without any national
    550    * prefix, and return it as a set of digit blocks that would be formatted together.
    551    */
    552   private static String[] getNationalNumberGroups(PhoneNumberUtil util, PhoneNumber number,
    553                                                   NumberFormat formattingPattern) {
    554     if (formattingPattern == null) {
    555       // This will be in the format +CC-DG;ext=EXT where DG represents groups of digits.
    556       String rfc3966Format = util.format(number, PhoneNumberFormat.RFC3966);
    557       // We remove the extension part from the formatted string before splitting it into different
    558       // groups.
    559       int endIndex = rfc3966Format.indexOf(';');
    560       if (endIndex < 0) {
    561         endIndex = rfc3966Format.length();
    562       }
    563       // The country-code will have a '-' following it.
    564       int startIndex = rfc3966Format.indexOf('-') + 1;
    565       return rfc3966Format.substring(startIndex, endIndex).split("-");
    566     } else {
    567       // We format the NSN only, and split that according to the separator.
    568       String nationalSignificantNumber = util.getNationalSignificantNumber(number);
    569       return util.formatNsnUsingPattern(nationalSignificantNumber,
    570                                         formattingPattern, PhoneNumberFormat.RFC3966).split("-");
    571     }
    572   }
    573 
    574   static boolean checkNumberGroupingIsValid(
    575       PhoneNumber number, String candidate, PhoneNumberUtil util, NumberGroupingChecker checker) {
    576     // TODO: Evaluate how this works for other locales (testing has been limited to NANPA regions)
    577     // and optimise if necessary.
    578     StringBuilder normalizedCandidate =
    579         PhoneNumberUtil.normalizeDigits(candidate, true /* keep non-digits */);
    580     String[] formattedNumberGroups = getNationalNumberGroups(util, number, null);
    581     if (checker.checkGroups(util, number, normalizedCandidate, formattedNumberGroups)) {
    582       return true;
    583     }
    584     // If this didn't pass, see if there are any alternate formats, and try them instead.
    585     PhoneMetadata alternateFormats =
    586         MetadataManager.getAlternateFormatsForCountry(number.getCountryCode());
    587     if (alternateFormats != null) {
    588       for (NumberFormat alternateFormat : alternateFormats.numberFormats()) {
    589         formattedNumberGroups = getNationalNumberGroups(util, number, alternateFormat);
    590         if (checker.checkGroups(util, number, normalizedCandidate, formattedNumberGroups)) {
    591           return true;
    592         }
    593       }
    594     }
    595     return false;
    596   }
    597 
    598   static boolean containsMoreThanOneSlashInNationalNumber(PhoneNumber number, String candidate) {
    599     int firstSlashInBodyIndex = candidate.indexOf('/');
    600     if (firstSlashInBodyIndex < 0) {
    601       // No slashes, this is okay.
    602       return false;
    603     }
    604     // Now look for a second one.
    605     int secondSlashInBodyIndex = candidate.indexOf('/', firstSlashInBodyIndex + 1);
    606     if (secondSlashInBodyIndex < 0) {
    607       // Only one slash, this is okay.
    608       return false;
    609     }
    610 
    611     // If the first slash is after the country calling code, this is permitted.
    612     boolean candidateHasCountryCode =
    613         (number.getCountryCodeSource() == CountryCodeSource.FROM_NUMBER_WITH_PLUS_SIGN
    614          || number.getCountryCodeSource() == CountryCodeSource.FROM_NUMBER_WITHOUT_PLUS_SIGN);
    615     if (candidateHasCountryCode
    616         && PhoneNumberUtil.normalizeDigitsOnly(candidate.substring(0, firstSlashInBodyIndex))
    617             .equals(Integer.toString(number.getCountryCode()))) {
    618       // Any more slashes and this is illegal.
    619       return candidate.substring(secondSlashInBodyIndex + 1).contains("/");
    620     }
    621     return true;
    622   }
    623 
    624   static boolean containsOnlyValidXChars(
    625       PhoneNumber number, String candidate, PhoneNumberUtil util) {
    626     // The characters 'x' and 'X' can be (1) a carrier code, in which case they always precede the
    627     // national significant number or (2) an extension sign, in which case they always precede the
    628     // extension number. We assume a carrier code is more than 1 digit, so the first case has to
    629     // have more than 1 consecutive 'x' or 'X', whereas the second case can only have exactly 1 'x'
    630     // or 'X'. We ignore the character if it appears as the last character of the string.
    631     for (int index = 0; index < candidate.length() - 1; index++) {
    632       char charAtIndex = candidate.charAt(index);
    633       if (charAtIndex == 'x' || charAtIndex == 'X') {
    634         char charAtNextIndex = candidate.charAt(index + 1);
    635         if (charAtNextIndex == 'x' || charAtNextIndex == 'X') {
    636           // This is the carrier code case, in which the 'X's always precede the national
    637           // significant number.
    638           index++;
    639           if (util.isNumberMatch(number, candidate.substring(index)) != MatchType.NSN_MATCH) {
    640             return false;
    641           }
    642         // This is the extension sign case, in which the 'x' or 'X' should always precede the
    643         // extension number.
    644         } else if (!PhoneNumberUtil.normalizeDigitsOnly(candidate.substring(index)).equals(
    645             number.getExtension())) {
    646           return false;
    647         }
    648       }
    649     }
    650     return true;
    651   }
    652 
    653   static boolean isNationalPrefixPresentIfRequired(PhoneNumber number, PhoneNumberUtil util) {
    654     // First, check how we deduced the country code. If it was written in international format, then
    655     // the national prefix is not required.
    656     if (number.getCountryCodeSource() != CountryCodeSource.FROM_DEFAULT_COUNTRY) {
    657       return true;
    658     }
    659     String phoneNumberRegion =
    660         util.getRegionCodeForCountryCode(number.getCountryCode());
    661     PhoneMetadata metadata = util.getMetadataForRegion(phoneNumberRegion);
    662     if (metadata == null) {
    663       return true;
    664     }
    665     // Check if a national prefix should be present when formatting this number.
    666     String nationalNumber = util.getNationalSignificantNumber(number);
    667     NumberFormat formatRule =
    668         util.chooseFormattingPatternForNumber(metadata.numberFormats(), nationalNumber);
    669     // To do this, we check that a national prefix formatting rule was present and that it wasn't
    670     // just the first-group symbol ($1) with punctuation.
    671     if ((formatRule != null) && formatRule.getNationalPrefixFormattingRule().length() > 0) {
    672       if (formatRule.isNationalPrefixOptionalWhenFormatting()) {
    673         // The national-prefix is optional in these cases, so we don't need to check if it was
    674         // present.
    675         return true;
    676       }
    677       if (PhoneNumberUtil.formattingRuleHasFirstGroupOnly(
    678           formatRule.getNationalPrefixFormattingRule())) {
    679         // National Prefix not needed for this number.
    680         return true;
    681       }
    682       // Normalize the remainder.
    683       String rawInputCopy = PhoneNumberUtil.normalizeDigitsOnly(number.getRawInput());
    684       StringBuilder rawInput = new StringBuilder(rawInputCopy);
    685       // Check if we found a national prefix and/or carrier code at the start of the raw input, and
    686       // return the result.
    687       return util.maybeStripNationalPrefixAndCarrierCode(rawInput, metadata, null);
    688     }
    689     return true;
    690   }
    691 
    692   @Override
    693   public boolean hasNext() {
    694     if (state == State.NOT_READY) {
    695       lastMatch = find(searchIndex);
    696       if (lastMatch == null) {
    697         state = State.DONE;
    698       } else {
    699         searchIndex = lastMatch.end();
    700         state = State.READY;
    701       }
    702     }
    703     return state == State.READY;
    704   }
    705 
    706   @Override
    707   public PhoneNumberMatch next() {
    708     // Check the state and find the next match as a side-effect if necessary.
    709     if (!hasNext()) {
    710       throw new NoSuchElementException();
    711     }
    712 
    713     // Don't retain that memory any longer than necessary.
    714     PhoneNumberMatch result = lastMatch;
    715     lastMatch = null;
    716     state = State.NOT_READY;
    717     return result;
    718   }
    719 
    720   /**
    721    * Always throws {@link UnsupportedOperationException} as removal is not supported.
    722    */
    723   @Override
    724   public void remove() {
    725     throw new UnsupportedOperationException();
    726   }
    727 }
    728