Home | History | Annotate | Download | only in phonenumbers
      1 /*
      2  * Copyright (C) 2011 The Libphonenumber Authors
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  * http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package com.android.i18n.phonenumbers;
     18 
     19 import com.android.i18n.phonenumbers.PhoneNumberUtil.Leniency;
     20 import com.android.i18n.phonenumbers.PhoneNumberUtil.MatchType;
     21 import com.android.i18n.phonenumbers.PhoneNumberUtil.PhoneNumberFormat;
     22 import com.android.i18n.phonenumbers.Phonemetadata.NumberFormat;
     23 import com.android.i18n.phonenumbers.Phonemetadata.PhoneMetadata;
     24 import com.android.i18n.phonenumbers.Phonenumber.PhoneNumber.CountryCodeSource;
     25 import com.android.i18n.phonenumbers.Phonenumber.PhoneNumber;
     26 
     27 import java.lang.Character.UnicodeBlock;
     28 import java.util.Iterator;
     29 import java.util.NoSuchElementException;
     30 import java.util.regex.Matcher;
     31 import java.util.regex.Pattern;
     32 
     33 /**
     34  * A stateful class that finds and extracts telephone numbers from {@linkplain CharSequence text}.
     35  * Instances can be created using the {@linkplain PhoneNumberUtil#findNumbers factory methods} in
     36  * {@link PhoneNumberUtil}.
     37  *
     38  * <p>Vanity numbers (phone numbers using alphabetic digits such as <tt>1-800-SIX-FLAGS</tt> are
     39  * not found.
     40  *
     41  * <p>This class is not thread-safe.
     42  *
     43  * @author Tom Hofmann
     44  */
     45 final class PhoneNumberMatcher implements Iterator<PhoneNumberMatch> {
     46   /**
     47    * The phone number pattern used by {@link #find}, similar to
     48    * {@code PhoneNumberUtil.VALID_PHONE_NUMBER}, but with the following differences:
     49    * <ul>
     50    *   <li>All captures are limited in order to place an upper bound to the text matched by the
     51    *       pattern.
     52    * <ul>
     53    *   <li>Leading punctuation / plus signs are limited.
     54    *   <li>Consecutive occurrences of punctuation are limited.
     55    *   <li>Number of digits is limited.
     56    * </ul>
     57    *   <li>No whitespace is allowed at the start or end.
     58    *   <li>No alpha digits (vanity numbers such as 1-800-SIX-FLAGS) are currently supported.
     59    * </ul>
     60    */
     61   private static final Pattern PATTERN;
     62   /**
     63    * Matches strings that look like publication pages. Example:
     64    * <pre>Computing Complete Answers to Queries in the Presence of Limited Access Patterns.
     65    * Chen Li. VLDB J. 12(3): 211-227 (2003).</pre>
     66    *
     67    * The string "211-227 (2003)" is not a telephone number.
     68    */
     69   private static final Pattern PUB_PAGES = Pattern.compile("\\d{1,5}-+\\d{1,5}\\s{0,4}\\(\\d{1,4}");
     70 
     71   /**
     72    * Matches strings that look like dates using "/" as a separator. Examples: 3/10/2011, 31/10/96 or
     73    * 08/31/95.
     74    */
     75   private static final Pattern SLASH_SEPARATED_DATES =
     76       Pattern.compile("(?:(?:[0-3]?\\d/[01]?\\d)|(?:[01]?\\d/[0-3]?\\d))/(?:[12]\\d)?\\d{2}");
     77 
     78   /**
     79    * Matches timestamps. Examples: "2012-01-02 08:00". Note that the reg-ex does not include the
     80    * trailing ":\d\d" -- that is covered by TIME_STAMPS_SUFFIX.
     81    */
     82   private static final Pattern TIME_STAMPS =
     83       Pattern.compile("[12]\\d{3}[-/]?[01]\\d[-/]?[0-3]\\d [0-2]\\d$");
     84   private static final Pattern TIME_STAMPS_SUFFIX = Pattern.compile(":[0-5]\\d");
     85 
     86   /**
     87    * Pattern to check that brackets match. Opening brackets should be closed within a phone number.
     88    * This also checks that there is something inside the brackets. Having no brackets at all is also
     89    * fine.
     90    */
     91   private static final Pattern MATCHING_BRACKETS;
     92 
     93   /**
     94    * Matches white-space, which may indicate the end of a phone number and the start of something
     95    * else (such as a neighbouring zip-code). If white-space is found, continues to match all
     96    * characters that are not typically used to start a phone number.
     97    */
     98   private static final Pattern GROUP_SEPARATOR;
     99 
    100   /**
    101    * Punctuation that may be at the start of a phone number - brackets and plus signs.
    102    */
    103   private static final Pattern LEAD_CLASS;
    104 
    105   static {
    106     /* Builds the MATCHING_BRACKETS and PATTERN regular expressions. The building blocks below exist
    107      * to make the pattern more easily understood. */
    108 
    109     String openingParens = "(\\[\uFF08\uFF3B";
    110     String closingParens = ")\\]\uFF09\uFF3D";
    111     String nonParens = "[^" + openingParens + closingParens + "]";
    112 
    113     /* Limit on the number of pairs of brackets in a phone number. */
    114     String bracketPairLimit = limit(0, 3);
    115     /*
    116      * An opening bracket at the beginning may not be closed, but subsequent ones should be.  It's
    117      * also possible that the leading bracket was dropped, so we shouldn't be surprised if we see a
    118      * closing bracket first. We limit the sets of brackets in a phone number to four.
    119      */
    120     MATCHING_BRACKETS = Pattern.compile(
    121         "(?:[" + openingParens + "])?" + "(?:" + nonParens + "+" + "[" + closingParens + "])?" +
    122         nonParens + "+" +
    123         "(?:[" + openingParens + "]" + nonParens + "+[" + closingParens + "])" + bracketPairLimit +
    124         nonParens + "*");
    125 
    126     /* Limit on the number of leading (plus) characters. */
    127     String leadLimit = limit(0, 2);
    128     /* Limit on the number of consecutive punctuation characters. */
    129     String punctuationLimit = limit(0, 4);
    130     /* The maximum number of digits allowed in a digit-separated block. As we allow all digits in a
    131      * single block, set high enough to accommodate the entire national number and the international
    132      * country code. */
    133     int digitBlockLimit =
    134         PhoneNumberUtil.MAX_LENGTH_FOR_NSN + PhoneNumberUtil.MAX_LENGTH_COUNTRY_CODE;
    135     /* Limit on the number of blocks separated by punctuation. Uses digitBlockLimit since some
    136      * formats use spaces to separate each digit. */
    137     String blockLimit = limit(0, digitBlockLimit);
    138 
    139     /* A punctuation sequence allowing white space. */
    140     String punctuation = "[" + PhoneNumberUtil.VALID_PUNCTUATION + "]" + punctuationLimit;
    141     /* A digits block without punctuation. */
    142     String digitSequence = "\\p{Nd}" + limit(1, digitBlockLimit);
    143 
    144     String leadClassChars = openingParens + PhoneNumberUtil.PLUS_CHARS;
    145     String leadClass = "[" + leadClassChars + "]";
    146     LEAD_CLASS = Pattern.compile(leadClass);
    147     GROUP_SEPARATOR = Pattern.compile("\\p{Z}" + "[^" + leadClassChars  + "\\p{Nd}]*");
    148 
    149     /* Phone number pattern allowing optional punctuation. */
    150     PATTERN = Pattern.compile(
    151         "(?:" + leadClass + punctuation + ")" + leadLimit +
    152         digitSequence + "(?:" + punctuation + digitSequence + ")" + blockLimit +
    153         "(?:" + PhoneNumberUtil.EXTN_PATTERNS_FOR_MATCHING + ")?",
    154         PhoneNumberUtil.REGEX_FLAGS);
    155   }
    156 
    157   /** Returns a regular expression quantifier with an upper and lower limit. */
    158   private static String limit(int lower, int upper) {
    159     if ((lower < 0) || (upper <= 0) || (upper < lower)) {
    160       throw new IllegalArgumentException();
    161     }
    162     return "{" + lower + "," + upper + "}";
    163   }
    164 
    165   /** The potential states of a PhoneNumberMatcher. */
    166   private enum State {
    167     NOT_READY, READY, DONE
    168   }
    169 
    170   /** The phone number utility. */
    171   private final PhoneNumberUtil phoneUtil;
    172   /** The text searched for phone numbers. */
    173   private final CharSequence text;
    174   /**
    175    * The region (country) to assume for phone numbers without an international prefix, possibly
    176    * null.
    177    */
    178   private final String preferredRegion;
    179   /** The degree of validation requested. */
    180   private final Leniency leniency;
    181   /** The maximum number of retries after matching an invalid number. */
    182   private long maxTries;
    183 
    184   /** The iteration tristate. */
    185   private State state = State.NOT_READY;
    186   /** The last successful match, null unless in {@link State#READY}. */
    187   private PhoneNumberMatch lastMatch = null;
    188   /** The next index to start searching at. Undefined in {@link State#DONE}. */
    189   private int searchIndex = 0;
    190 
    191   /**
    192    * Creates a new instance. See the factory methods in {@link PhoneNumberUtil} on how to obtain a
    193    * new instance.
    194    *
    195    * @param util      the phone number util to use
    196    * @param text      the character sequence that we will search, null for no text
    197    * @param country   the country to assume for phone numbers not written in international format
    198    *                  (with a leading plus, or with the international dialing prefix of the
    199    *                  specified region). May be null or "ZZ" if only numbers with a
    200    *                  leading plus should be considered.
    201    * @param leniency  the leniency to use when evaluating candidate phone numbers
    202    * @param maxTries  the maximum number of invalid numbers to try before giving up on the text.
    203    *                  This is to cover degenerate cases where the text has a lot of false positives
    204    *                  in it. Must be {@code >= 0}.
    205    */
    206   PhoneNumberMatcher(PhoneNumberUtil util, CharSequence text, String country, Leniency leniency,
    207       long maxTries) {
    208 
    209     if ((util == null) || (leniency == null)) {
    210       throw new NullPointerException();
    211     }
    212     if (maxTries < 0) {
    213       throw new IllegalArgumentException();
    214     }
    215     this.phoneUtil = util;
    216     this.text = (text != null) ? text : "";
    217     this.preferredRegion = country;
    218     this.leniency = leniency;
    219     this.maxTries = maxTries;
    220   }
    221 
    222   /**
    223    * Attempts to find the next subsequence in the searched sequence on or after {@code searchIndex}
    224    * that represents a phone number. Returns the next match, null if none was found.
    225    *
    226    * @param index  the search index to start searching at
    227    * @return  the phone number match found, null if none can be found
    228    */
    229   private PhoneNumberMatch find(int index) {
    230     Matcher matcher = PATTERN.matcher(text);
    231     while ((maxTries > 0) && matcher.find(index)) {
    232       int start = matcher.start();
    233       CharSequence candidate = text.subSequence(start, matcher.end());
    234 
    235       // Check for extra numbers at the end.
    236       // TODO: This is the place to start when trying to support extraction of multiple phone number
    237       // from split notations (+41 79 123 45 67 / 68).
    238       candidate = trimAfterFirstMatch(PhoneNumberUtil.SECOND_NUMBER_START_PATTERN, candidate);
    239 
    240       PhoneNumberMatch match = extractMatch(candidate, start);
    241       if (match != null) {
    242         return match;
    243       }
    244 
    245       index = start + candidate.length();
    246       maxTries--;
    247     }
    248 
    249     return null;
    250   }
    251 
    252   /**
    253    * Trims away any characters after the first match of {@code pattern} in {@code candidate},
    254    * returning the trimmed version.
    255    */
    256   private static CharSequence trimAfterFirstMatch(Pattern pattern, CharSequence candidate) {
    257     Matcher trailingCharsMatcher = pattern.matcher(candidate);
    258     if (trailingCharsMatcher.find()) {
    259       candidate = candidate.subSequence(0, trailingCharsMatcher.start());
    260     }
    261     return candidate;
    262   }
    263 
    264   /**
    265    * Helper method to determine if a character is a Latin-script letter or not. For our purposes,
    266    * combining marks should also return true since we assume they have been added to a preceding
    267    * Latin character.
    268    */
    269   // @VisibleForTesting
    270   static boolean isLatinLetter(char letter) {
    271     // Combining marks are a subset of non-spacing-mark.
    272     if (!Character.isLetter(letter) && Character.getType(letter) != Character.NON_SPACING_MARK) {
    273       return false;
    274     }
    275     UnicodeBlock block = UnicodeBlock.of(letter);
    276     return block.equals(UnicodeBlock.BASIC_LATIN) ||
    277         block.equals(UnicodeBlock.LATIN_1_SUPPLEMENT) ||
    278         block.equals(UnicodeBlock.LATIN_EXTENDED_A) ||
    279         block.equals(UnicodeBlock.LATIN_EXTENDED_ADDITIONAL) ||
    280         block.equals(UnicodeBlock.LATIN_EXTENDED_B) ||
    281         block.equals(UnicodeBlock.COMBINING_DIACRITICAL_MARKS);
    282   }
    283 
    284   private static boolean isInvalidPunctuationSymbol(char character) {
    285     return character == '%' || Character.getType(character) == Character.CURRENCY_SYMBOL;
    286   }
    287 
    288   /**
    289    * Attempts to extract a match from a {@code candidate} character sequence.
    290    *
    291    * @param candidate  the candidate text that might contain a phone number
    292    * @param offset  the offset of {@code candidate} within {@link #text}
    293    * @return  the match found, null if none can be found
    294    */
    295   private PhoneNumberMatch extractMatch(CharSequence candidate, int offset) {
    296     // Skip a match that is more likely a publication page reference or a date.
    297     if (PUB_PAGES.matcher(candidate).find() || SLASH_SEPARATED_DATES.matcher(candidate).find()) {
    298       return null;
    299     }
    300     // Skip potential time-stamps.
    301     if (TIME_STAMPS.matcher(candidate).find()) {
    302       String followingText = text.toString().substring(offset + candidate.length());
    303       if (TIME_STAMPS_SUFFIX.matcher(followingText).lookingAt()) {
    304         return null;
    305       }
    306     }
    307 
    308     // Try to come up with a valid match given the entire candidate.
    309     String rawString = candidate.toString();
    310     PhoneNumberMatch match = parseAndVerify(rawString, offset);
    311     if (match != null) {
    312       return match;
    313     }
    314 
    315     // If that failed, try to find an "inner match" - there might be a phone number within this
    316     // candidate.
    317     return extractInnerMatch(rawString, offset);
    318   }
    319 
    320   /**
    321    * Attempts to extract a match from {@code candidate} if the whole candidate does not qualify as a
    322    * match.
    323    *
    324    * @param candidate  the candidate text that might contain a phone number
    325    * @param offset  the current offset of {@code candidate} within {@link #text}
    326    * @return  the match found, null if none can be found
    327    */
    328   private PhoneNumberMatch extractInnerMatch(String candidate, int offset) {
    329     // Try removing either the first or last "group" in the number and see if this gives a result.
    330     // We consider white space to be a possible indication of the start or end of the phone number.
    331     Matcher groupMatcher = GROUP_SEPARATOR.matcher(candidate);
    332 
    333     if (groupMatcher.find()) {
    334       // Try the first group by itself.
    335       CharSequence firstGroupOnly = candidate.substring(0, groupMatcher.start());
    336       firstGroupOnly = trimAfterFirstMatch(PhoneNumberUtil.UNWANTED_END_CHAR_PATTERN,
    337                                            firstGroupOnly);
    338       PhoneNumberMatch match = parseAndVerify(firstGroupOnly.toString(), offset);
    339       if (match != null) {
    340         return match;
    341       }
    342       maxTries--;
    343 
    344       int withoutFirstGroupStart = groupMatcher.end();
    345       // Try the rest of the candidate without the first group.
    346       CharSequence withoutFirstGroup = candidate.substring(withoutFirstGroupStart);
    347       withoutFirstGroup = trimAfterFirstMatch(PhoneNumberUtil.UNWANTED_END_CHAR_PATTERN,
    348                                               withoutFirstGroup);
    349       match = parseAndVerify(withoutFirstGroup.toString(), offset + withoutFirstGroupStart);
    350       if (match != null) {
    351         return match;
    352       }
    353       maxTries--;
    354 
    355       if (maxTries > 0) {
    356         int lastGroupStart = withoutFirstGroupStart;
    357         while (groupMatcher.find()) {
    358           // Find the last group.
    359           lastGroupStart = groupMatcher.start();
    360         }
    361         CharSequence withoutLastGroup = candidate.substring(0, lastGroupStart);
    362         withoutLastGroup = trimAfterFirstMatch(PhoneNumberUtil.UNWANTED_END_CHAR_PATTERN,
    363                                                withoutLastGroup);
    364         if (withoutLastGroup.equals(firstGroupOnly)) {
    365           // If there are only two groups, then the group "without the last group" is the same as
    366           // the first group. In these cases, we don't want to re-check the number group, so we exit
    367           // already.
    368           return null;
    369         }
    370         match = parseAndVerify(withoutLastGroup.toString(), offset);
    371         if (match != null) {
    372           return match;
    373         }
    374         maxTries--;
    375       }
    376     }
    377     return null;
    378   }
    379 
    380   /**
    381    * Parses a phone number from the {@code candidate} using {@link PhoneNumberUtil#parse} and
    382    * verifies it matches the requested {@link #leniency}. If parsing and verification succeed, a
    383    * corresponding {@link PhoneNumberMatch} is returned, otherwise this method returns null.
    384    *
    385    * @param candidate  the candidate match
    386    * @param offset  the offset of {@code candidate} within {@link #text}
    387    * @return  the parsed and validated phone number match, or null
    388    */
    389   private PhoneNumberMatch parseAndVerify(String candidate, int offset) {
    390     try {
    391       // Check the candidate doesn't contain any formatting which would indicate that it really
    392       // isn't a phone number.
    393       if (!MATCHING_BRACKETS.matcher(candidate).matches()) {
    394         return null;
    395       }
    396 
    397       // If leniency is set to VALID or stricter, we also want to skip numbers that are surrounded
    398       // by Latin alphabetic characters, to skip cases like abc8005001234 or 8005001234def.
    399       if (leniency.compareTo(Leniency.VALID) >= 0) {
    400         // If the candidate is not at the start of the text, and does not start with phone-number
    401         // punctuation, check the previous character.
    402         if (offset > 0 && !LEAD_CLASS.matcher(candidate).lookingAt()) {
    403           char previousChar = text.charAt(offset - 1);
    404           // We return null if it is a latin letter or an invalid punctuation symbol.
    405           if (isInvalidPunctuationSymbol(previousChar) || isLatinLetter(previousChar)) {
    406             return null;
    407           }
    408         }
    409         int lastCharIndex = offset + candidate.length();
    410         if (lastCharIndex < text.length()) {
    411           char nextChar = text.charAt(lastCharIndex);
    412           if (isInvalidPunctuationSymbol(nextChar) || isLatinLetter(nextChar)) {
    413             return null;
    414           }
    415         }
    416       }
    417 
    418       PhoneNumber number = phoneUtil.parseAndKeepRawInput(candidate, preferredRegion);
    419       if (leniency.verify(number, candidate, phoneUtil)) {
    420         // We used parseAndKeepRawInput to create this number, but for now we don't return the extra
    421         // values parsed. TODO: stop clearing all values here and switch all users over
    422         // to using rawInput() rather than the rawString() of PhoneNumberMatch.
    423         number.clearCountryCodeSource();
    424         number.clearRawInput();
    425         number.clearPreferredDomesticCarrierCode();
    426         return new PhoneNumberMatch(offset, candidate, number);
    427       }
    428     } catch (NumberParseException e) {
    429       // ignore and continue
    430     }
    431     return null;
    432   }
    433 
    434   /**
    435    * Small helper interface such that the number groups can be checked according to different
    436    * criteria, both for our default way of performing formatting and for any alternate formats we
    437    * may want to check.
    438    */
    439   interface NumberGroupingChecker {
    440     /**
    441      * Returns true if the groups of digits found in our candidate phone number match our
    442      * expectations.
    443      *
    444      * @param number  the original number we found when parsing
    445      * @param normalizedCandidate  the candidate number, normalized to only contain ASCII digits,
    446      *     but with non-digits (spaces etc) retained
    447      * @param expectedNumberGroups  the groups of digits that we would expect to see if we
    448      *     formatted this number
    449      */
    450     boolean checkGroups(PhoneNumberUtil util, PhoneNumber number,
    451                         StringBuilder normalizedCandidate, String[] expectedNumberGroups);
    452   }
    453 
    454   static boolean allNumberGroupsRemainGrouped(PhoneNumberUtil util,
    455                                               PhoneNumber number,
    456                                               StringBuilder normalizedCandidate,
    457                                               String[] formattedNumberGroups) {
    458     int fromIndex = 0;
    459     // Check each group of consecutive digits are not broken into separate groupings in the
    460     // {@code normalizedCandidate} string.
    461     for (int i = 0; i < formattedNumberGroups.length; i++) {
    462       // Fails if the substring of {@code normalizedCandidate} starting from {@code fromIndex}
    463       // doesn't contain the consecutive digits in formattedNumberGroups[i].
    464       fromIndex = normalizedCandidate.indexOf(formattedNumberGroups[i], fromIndex);
    465       if (fromIndex < 0) {
    466         return false;
    467       }
    468       // Moves {@code fromIndex} forward.
    469       fromIndex += formattedNumberGroups[i].length();
    470       if (i == 0 && fromIndex < normalizedCandidate.length()) {
    471         // We are at the position right after the NDC.
    472         if (Character.isDigit(normalizedCandidate.charAt(fromIndex))) {
    473           // This means there is no formatting symbol after the NDC. In this case, we only
    474           // accept the number if there is no formatting symbol at all in the number, except
    475           // for extensions.
    476           String nationalSignificantNumber = util.getNationalSignificantNumber(number);
    477           return normalizedCandidate.substring(fromIndex - formattedNumberGroups[i].length())
    478               .startsWith(nationalSignificantNumber);
    479         }
    480       }
    481     }
    482     // The check here makes sure that we haven't mistakenly already used the extension to
    483     // match the last group of the subscriber number. Note the extension cannot have
    484     // formatting in-between digits.
    485     return normalizedCandidate.substring(fromIndex).contains(number.getExtension());
    486   }
    487 
    488   static boolean allNumberGroupsAreExactlyPresent(PhoneNumberUtil util,
    489                                                   PhoneNumber number,
    490                                                   StringBuilder normalizedCandidate,
    491                                                   String[] formattedNumberGroups) {
    492     String[] candidateGroups =
    493         PhoneNumberUtil.NON_DIGITS_PATTERN.split(normalizedCandidate.toString());
    494     // Set this to the last group, skipping it if the number has an extension.
    495     int candidateNumberGroupIndex =
    496         number.hasExtension() ? candidateGroups.length - 2 : candidateGroups.length - 1;
    497     // First we check if the national significant number is formatted as a block.
    498     // We use contains and not equals, since the national significant number may be present with
    499     // a prefix such as a national number prefix, or the country code itself.
    500     if (candidateGroups.length == 1 ||
    501         candidateGroups[candidateNumberGroupIndex].contains(
    502             util.getNationalSignificantNumber(number))) {
    503       return true;
    504     }
    505     // Starting from the end, go through in reverse, excluding the first group, and check the
    506     // candidate and number groups are the same.
    507     for (int formattedNumberGroupIndex = (formattedNumberGroups.length - 1);
    508          formattedNumberGroupIndex > 0 && candidateNumberGroupIndex >= 0;
    509          formattedNumberGroupIndex--, candidateNumberGroupIndex--) {
    510       if (!candidateGroups[candidateNumberGroupIndex].equals(
    511           formattedNumberGroups[formattedNumberGroupIndex])) {
    512         return false;
    513       }
    514     }
    515     // Now check the first group. There may be a national prefix at the start, so we only check
    516     // that the candidate group ends with the formatted number group.
    517     return (candidateNumberGroupIndex >= 0 &&
    518             candidateGroups[candidateNumberGroupIndex].endsWith(formattedNumberGroups[0]));
    519   }
    520 
    521   /**
    522    * Helper method to get the national-number part of a number, formatted without any national
    523    * prefix, and return it as a set of digit blocks that would be formatted together.
    524    */
    525   private static String[] getNationalNumberGroups(PhoneNumberUtil util, PhoneNumber number,
    526                                                   NumberFormat formattingPattern) {
    527     if (formattingPattern == null) {
    528       // This will be in the format +CC-DG;ext=EXT where DG represents groups of digits.
    529       String rfc3966Format = util.format(number, PhoneNumberFormat.RFC3966);
    530       // We remove the extension part from the formatted string before splitting it into different
    531       // groups.
    532       int endIndex = rfc3966Format.indexOf(';');
    533       if (endIndex < 0) {
    534         endIndex = rfc3966Format.length();
    535       }
    536       // The country-code will have a '-' following it.
    537       int startIndex = rfc3966Format.indexOf('-') + 1;
    538       return rfc3966Format.substring(startIndex, endIndex).split("-");
    539     } else {
    540       // We format the NSN only, and split that according to the separator.
    541       String nationalSignificantNumber = util.getNationalSignificantNumber(number);
    542       return util.formatNsnUsingPattern(nationalSignificantNumber,
    543                                         formattingPattern, PhoneNumberFormat.RFC3966).split("-");
    544     }
    545   }
    546 
    547   static boolean checkNumberGroupingIsValid(
    548       PhoneNumber number, String candidate, PhoneNumberUtil util, NumberGroupingChecker checker) {
    549     // TODO: Evaluate how this works for other locales (testing has been limited to NANPA regions)
    550     // and optimise if necessary.
    551     StringBuilder normalizedCandidate =
    552         PhoneNumberUtil.normalizeDigits(candidate, true /* keep non-digits */);
    553     String[] formattedNumberGroups = getNationalNumberGroups(util, number, null);
    554     if (checker.checkGroups(util, number, normalizedCandidate, formattedNumberGroups)) {
    555       return true;
    556     }
    557     // If this didn't pass, see if there are any alternate formats, and try them instead.
    558     PhoneMetadata alternateFormats =
    559         MetadataManager.getAlternateFormatsForCountry(number.getCountryCode());
    560     if (alternateFormats != null) {
    561       for (NumberFormat alternateFormat : alternateFormats.numberFormats()) {
    562         formattedNumberGroups = getNationalNumberGroups(util, number, alternateFormat);
    563         if (checker.checkGroups(util, number, normalizedCandidate, formattedNumberGroups)) {
    564           return true;
    565         }
    566       }
    567     }
    568     return false;
    569   }
    570 
    571   static boolean containsMoreThanOneSlash(String candidate) {
    572     int firstSlashIndex = candidate.indexOf('/');
    573     return (firstSlashIndex > 0 && candidate.substring(firstSlashIndex + 1).contains("/"));
    574   }
    575 
    576   static boolean containsOnlyValidXChars(
    577       PhoneNumber number, String candidate, PhoneNumberUtil util) {
    578     // The characters 'x' and 'X' can be (1) a carrier code, in which case they always precede the
    579     // national significant number or (2) an extension sign, in which case they always precede the
    580     // extension number. We assume a carrier code is more than 1 digit, so the first case has to
    581     // have more than 1 consecutive 'x' or 'X', whereas the second case can only have exactly 1 'x'
    582     // or 'X'. We ignore the character if it appears as the last character of the string.
    583     for (int index = 0; index < candidate.length() - 1; index++) {
    584       char charAtIndex = candidate.charAt(index);
    585       if (charAtIndex == 'x' || charAtIndex == 'X') {
    586         char charAtNextIndex = candidate.charAt(index + 1);
    587         if (charAtNextIndex == 'x' || charAtNextIndex == 'X') {
    588           // This is the carrier code case, in which the 'X's always precede the national
    589           // significant number.
    590           index++;
    591           if (util.isNumberMatch(number, candidate.substring(index)) != MatchType.NSN_MATCH) {
    592             return false;
    593           }
    594         // This is the extension sign case, in which the 'x' or 'X' should always precede the
    595         // extension number.
    596         } else if (!PhoneNumberUtil.normalizeDigitsOnly(candidate.substring(index)).equals(
    597             number.getExtension())) {
    598             return false;
    599         }
    600       }
    601     }
    602     return true;
    603   }
    604 
    605   static boolean isNationalPrefixPresentIfRequired(PhoneNumber number, PhoneNumberUtil util) {
    606     // First, check how we deduced the country code. If it was written in international format, then
    607     // the national prefix is not required.
    608     if (number.getCountryCodeSource() != CountryCodeSource.FROM_DEFAULT_COUNTRY) {
    609       return true;
    610     }
    611     String phoneNumberRegion =
    612         util.getRegionCodeForCountryCode(number.getCountryCode());
    613     PhoneMetadata metadata = util.getMetadataForRegion(phoneNumberRegion);
    614     if (metadata == null) {
    615       return true;
    616     }
    617     // Check if a national prefix should be present when formatting this number.
    618     String nationalNumber = util.getNationalSignificantNumber(number);
    619     NumberFormat formatRule =
    620         util.chooseFormattingPatternForNumber(metadata.numberFormats(), nationalNumber);
    621     // To do this, we check that a national prefix formatting rule was present and that it wasn't
    622     // just the first-group symbol ($1) with punctuation.
    623     if ((formatRule != null) && formatRule.getNationalPrefixFormattingRule().length() > 0) {
    624       if (formatRule.isNationalPrefixOptionalWhenFormatting()) {
    625         // The national-prefix is optional in these cases, so we don't need to check if it was
    626         // present.
    627         return true;
    628       }
    629       // Remove the first-group symbol.
    630       String candidateNationalPrefixRule = formatRule.getNationalPrefixFormattingRule();
    631       // We assume that the first-group symbol will never be _before_ the national prefix.
    632       candidateNationalPrefixRule =
    633           candidateNationalPrefixRule.substring(0, candidateNationalPrefixRule.indexOf("$1"));
    634       candidateNationalPrefixRule =
    635           PhoneNumberUtil.normalizeDigitsOnly(candidateNationalPrefixRule);
    636       if (candidateNationalPrefixRule.length() == 0) {
    637         // National Prefix not needed for this number.
    638         return true;
    639       }
    640       // Normalize the remainder.
    641       String rawInputCopy = PhoneNumberUtil.normalizeDigitsOnly(number.getRawInput());
    642       StringBuilder rawInput = new StringBuilder(rawInputCopy);
    643       // Check if we found a national prefix and/or carrier code at the start of the raw input, and
    644       // return the result.
    645       return util.maybeStripNationalPrefixAndCarrierCode(rawInput, metadata, null);
    646     }
    647     return true;
    648   }
    649 
    650   public boolean hasNext() {
    651     if (state == State.NOT_READY) {
    652       lastMatch = find(searchIndex);
    653       if (lastMatch == null) {
    654         state = State.DONE;
    655       } else {
    656         searchIndex = lastMatch.end();
    657         state = State.READY;
    658       }
    659     }
    660     return state == State.READY;
    661   }
    662 
    663   public PhoneNumberMatch next() {
    664     // Check the state and find the next match as a side-effect if necessary.
    665     if (!hasNext()) {
    666       throw new NoSuchElementException();
    667     }
    668 
    669     // Don't retain that memory any longer than necessary.
    670     PhoneNumberMatch result = lastMatch;
    671     lastMatch = null;
    672     state = State.NOT_READY;
    673     return result;
    674   }
    675 
    676   /**
    677    * Always throws {@link UnsupportedOperationException} as removal is not supported.
    678    */
    679   public void remove() {
    680     throw new UnsupportedOperationException();
    681   }
    682 }
    683