1 /* 2 * Copyright (C) 2011 The Libphonenumber Authors 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.android.i18n.phonenumbers; 18 19 import com.android.i18n.phonenumbers.PhoneNumberUtil.Leniency; 20 import com.android.i18n.phonenumbers.PhoneNumberUtil.MatchType; 21 import com.android.i18n.phonenumbers.PhoneNumberUtil.PhoneNumberFormat; 22 import com.android.i18n.phonenumbers.Phonemetadata.NumberFormat; 23 import com.android.i18n.phonenumbers.Phonemetadata.PhoneMetadata; 24 import com.android.i18n.phonenumbers.Phonenumber.PhoneNumber.CountryCodeSource; 25 import com.android.i18n.phonenumbers.Phonenumber.PhoneNumber; 26 27 import java.lang.Character.UnicodeBlock; 28 import java.util.Iterator; 29 import java.util.NoSuchElementException; 30 import java.util.regex.Matcher; 31 import java.util.regex.Pattern; 32 33 /** 34 * A stateful class that finds and extracts telephone numbers from {@linkplain CharSequence text}. 35 * Instances can be created using the {@linkplain PhoneNumberUtil#findNumbers factory methods} in 36 * {@link PhoneNumberUtil}. 37 * 38 * <p>Vanity numbers (phone numbers using alphabetic digits such as <tt>1-800-SIX-FLAGS</tt> are 39 * not found. 40 * 41 * <p>This class is not thread-safe. 42 * 43 * @author Tom Hofmann 44 */ 45 final class PhoneNumberMatcher implements Iterator<PhoneNumberMatch> { 46 /** 47 * The phone number pattern used by {@link #find}, similar to 48 * {@code PhoneNumberUtil.VALID_PHONE_NUMBER}, but with the following differences: 49 * <ul> 50 * <li>All captures are limited in order to place an upper bound to the text matched by the 51 * pattern. 52 * <ul> 53 * <li>Leading punctuation / plus signs are limited. 54 * <li>Consecutive occurrences of punctuation are limited. 55 * <li>Number of digits is limited. 56 * </ul> 57 * <li>No whitespace is allowed at the start or end. 58 * <li>No alpha digits (vanity numbers such as 1-800-SIX-FLAGS) are currently supported. 59 * </ul> 60 */ 61 private static final Pattern PATTERN; 62 /** 63 * Matches strings that look like publication pages. Example: 64 * <pre>Computing Complete Answers to Queries in the Presence of Limited Access Patterns. 65 * Chen Li. VLDB J. 12(3): 211-227 (2003).</pre> 66 * 67 * The string "211-227 (2003)" is not a telephone number. 68 */ 69 private static final Pattern PUB_PAGES = Pattern.compile("\\d{1,5}-+\\d{1,5}\\s{0,4}\\(\\d{1,4}"); 70 71 /** 72 * Matches strings that look like dates using "/" as a separator. Examples: 3/10/2011, 31/10/96 or 73 * 08/31/95. 74 */ 75 private static final Pattern SLASH_SEPARATED_DATES = 76 Pattern.compile("(?:(?:[0-3]?\\d/[01]?\\d)|(?:[01]?\\d/[0-3]?\\d))/(?:[12]\\d)?\\d{2}"); 77 78 /** 79 * Matches timestamps. Examples: "2012-01-02 08:00". Note that the reg-ex does not include the 80 * trailing ":\d\d" -- that is covered by TIME_STAMPS_SUFFIX. 81 */ 82 private static final Pattern TIME_STAMPS = 83 Pattern.compile("[12]\\d{3}[-/]?[01]\\d[-/]?[0-3]\\d [0-2]\\d$"); 84 private static final Pattern TIME_STAMPS_SUFFIX = Pattern.compile(":[0-5]\\d"); 85 86 /** 87 * Pattern to check that brackets match. Opening brackets should be closed within a phone number. 88 * This also checks that there is something inside the brackets. Having no brackets at all is also 89 * fine. 90 */ 91 private static final Pattern MATCHING_BRACKETS; 92 93 /** 94 * Matches white-space, which may indicate the end of a phone number and the start of something 95 * else (such as a neighbouring zip-code). If white-space is found, continues to match all 96 * characters that are not typically used to start a phone number. 97 */ 98 private static final Pattern GROUP_SEPARATOR; 99 100 /** 101 * Punctuation that may be at the start of a phone number - brackets and plus signs. 102 */ 103 private static final Pattern LEAD_CLASS; 104 105 static { 106 /* Builds the MATCHING_BRACKETS and PATTERN regular expressions. The building blocks below exist 107 * to make the pattern more easily understood. */ 108 109 String openingParens = "(\\[\uFF08\uFF3B"; 110 String closingParens = ")\\]\uFF09\uFF3D"; 111 String nonParens = "[^" + openingParens + closingParens + "]"; 112 113 /* Limit on the number of pairs of brackets in a phone number. */ 114 String bracketPairLimit = limit(0, 3); 115 /* 116 * An opening bracket at the beginning may not be closed, but subsequent ones should be. It's 117 * also possible that the leading bracket was dropped, so we shouldn't be surprised if we see a 118 * closing bracket first. We limit the sets of brackets in a phone number to four. 119 */ 120 MATCHING_BRACKETS = Pattern.compile( 121 "(?:[" + openingParens + "])?" + "(?:" + nonParens + "+" + "[" + closingParens + "])?" + 122 nonParens + "+" + 123 "(?:[" + openingParens + "]" + nonParens + "+[" + closingParens + "])" + bracketPairLimit + 124 nonParens + "*"); 125 126 /* Limit on the number of leading (plus) characters. */ 127 String leadLimit = limit(0, 2); 128 /* Limit on the number of consecutive punctuation characters. */ 129 String punctuationLimit = limit(0, 4); 130 /* The maximum number of digits allowed in a digit-separated block. As we allow all digits in a 131 * single block, set high enough to accommodate the entire national number and the international 132 * country code. */ 133 int digitBlockLimit = 134 PhoneNumberUtil.MAX_LENGTH_FOR_NSN + PhoneNumberUtil.MAX_LENGTH_COUNTRY_CODE; 135 /* Limit on the number of blocks separated by punctuation. Uses digitBlockLimit since some 136 * formats use spaces to separate each digit. */ 137 String blockLimit = limit(0, digitBlockLimit); 138 139 /* A punctuation sequence allowing white space. */ 140 String punctuation = "[" + PhoneNumberUtil.VALID_PUNCTUATION + "]" + punctuationLimit; 141 /* A digits block without punctuation. */ 142 String digitSequence = "\\p{Nd}" + limit(1, digitBlockLimit); 143 144 String leadClassChars = openingParens + PhoneNumberUtil.PLUS_CHARS; 145 String leadClass = "[" + leadClassChars + "]"; 146 LEAD_CLASS = Pattern.compile(leadClass); 147 GROUP_SEPARATOR = Pattern.compile("\\p{Z}" + "[^" + leadClassChars + "\\p{Nd}]*"); 148 149 /* Phone number pattern allowing optional punctuation. */ 150 PATTERN = Pattern.compile( 151 "(?:" + leadClass + punctuation + ")" + leadLimit + 152 digitSequence + "(?:" + punctuation + digitSequence + ")" + blockLimit + 153 "(?:" + PhoneNumberUtil.EXTN_PATTERNS_FOR_MATCHING + ")?", 154 PhoneNumberUtil.REGEX_FLAGS); 155 } 156 157 /** Returns a regular expression quantifier with an upper and lower limit. */ 158 private static String limit(int lower, int upper) { 159 if ((lower < 0) || (upper <= 0) || (upper < lower)) { 160 throw new IllegalArgumentException(); 161 } 162 return "{" + lower + "," + upper + "}"; 163 } 164 165 /** The potential states of a PhoneNumberMatcher. */ 166 private enum State { 167 NOT_READY, READY, DONE 168 } 169 170 /** The phone number utility. */ 171 private final PhoneNumberUtil phoneUtil; 172 /** The text searched for phone numbers. */ 173 private final CharSequence text; 174 /** 175 * The region (country) to assume for phone numbers without an international prefix, possibly 176 * null. 177 */ 178 private final String preferredRegion; 179 /** The degree of validation requested. */ 180 private final Leniency leniency; 181 /** The maximum number of retries after matching an invalid number. */ 182 private long maxTries; 183 184 /** The iteration tristate. */ 185 private State state = State.NOT_READY; 186 /** The last successful match, null unless in {@link State#READY}. */ 187 private PhoneNumberMatch lastMatch = null; 188 /** The next index to start searching at. Undefined in {@link State#DONE}. */ 189 private int searchIndex = 0; 190 191 /** 192 * Creates a new instance. See the factory methods in {@link PhoneNumberUtil} on how to obtain a 193 * new instance. 194 * 195 * @param util the phone number util to use 196 * @param text the character sequence that we will search, null for no text 197 * @param country the country to assume for phone numbers not written in international format 198 * (with a leading plus, or with the international dialing prefix of the 199 * specified region). May be null or "ZZ" if only numbers with a 200 * leading plus should be considered. 201 * @param leniency the leniency to use when evaluating candidate phone numbers 202 * @param maxTries the maximum number of invalid numbers to try before giving up on the text. 203 * This is to cover degenerate cases where the text has a lot of false positives 204 * in it. Must be {@code >= 0}. 205 */ 206 PhoneNumberMatcher(PhoneNumberUtil util, CharSequence text, String country, Leniency leniency, 207 long maxTries) { 208 209 if ((util == null) || (leniency == null)) { 210 throw new NullPointerException(); 211 } 212 if (maxTries < 0) { 213 throw new IllegalArgumentException(); 214 } 215 this.phoneUtil = util; 216 this.text = (text != null) ? text : ""; 217 this.preferredRegion = country; 218 this.leniency = leniency; 219 this.maxTries = maxTries; 220 } 221 222 /** 223 * Attempts to find the next subsequence in the searched sequence on or after {@code searchIndex} 224 * that represents a phone number. Returns the next match, null if none was found. 225 * 226 * @param index the search index to start searching at 227 * @return the phone number match found, null if none can be found 228 */ 229 private PhoneNumberMatch find(int index) { 230 Matcher matcher = PATTERN.matcher(text); 231 while ((maxTries > 0) && matcher.find(index)) { 232 int start = matcher.start(); 233 CharSequence candidate = text.subSequence(start, matcher.end()); 234 235 // Check for extra numbers at the end. 236 // TODO: This is the place to start when trying to support extraction of multiple phone number 237 // from split notations (+41 79 123 45 67 / 68). 238 candidate = trimAfterFirstMatch(PhoneNumberUtil.SECOND_NUMBER_START_PATTERN, candidate); 239 240 PhoneNumberMatch match = extractMatch(candidate, start); 241 if (match != null) { 242 return match; 243 } 244 245 index = start + candidate.length(); 246 maxTries--; 247 } 248 249 return null; 250 } 251 252 /** 253 * Trims away any characters after the first match of {@code pattern} in {@code candidate}, 254 * returning the trimmed version. 255 */ 256 private static CharSequence trimAfterFirstMatch(Pattern pattern, CharSequence candidate) { 257 Matcher trailingCharsMatcher = pattern.matcher(candidate); 258 if (trailingCharsMatcher.find()) { 259 candidate = candidate.subSequence(0, trailingCharsMatcher.start()); 260 } 261 return candidate; 262 } 263 264 /** 265 * Helper method to determine if a character is a Latin-script letter or not. For our purposes, 266 * combining marks should also return true since we assume they have been added to a preceding 267 * Latin character. 268 */ 269 // @VisibleForTesting 270 static boolean isLatinLetter(char letter) { 271 // Combining marks are a subset of non-spacing-mark. 272 if (!Character.isLetter(letter) && Character.getType(letter) != Character.NON_SPACING_MARK) { 273 return false; 274 } 275 UnicodeBlock block = UnicodeBlock.of(letter); 276 return block.equals(UnicodeBlock.BASIC_LATIN) || 277 block.equals(UnicodeBlock.LATIN_1_SUPPLEMENT) || 278 block.equals(UnicodeBlock.LATIN_EXTENDED_A) || 279 block.equals(UnicodeBlock.LATIN_EXTENDED_ADDITIONAL) || 280 block.equals(UnicodeBlock.LATIN_EXTENDED_B) || 281 block.equals(UnicodeBlock.COMBINING_DIACRITICAL_MARKS); 282 } 283 284 private static boolean isInvalidPunctuationSymbol(char character) { 285 return character == '%' || Character.getType(character) == Character.CURRENCY_SYMBOL; 286 } 287 288 /** 289 * Attempts to extract a match from a {@code candidate} character sequence. 290 * 291 * @param candidate the candidate text that might contain a phone number 292 * @param offset the offset of {@code candidate} within {@link #text} 293 * @return the match found, null if none can be found 294 */ 295 private PhoneNumberMatch extractMatch(CharSequence candidate, int offset) { 296 // Skip a match that is more likely a publication page reference or a date. 297 if (PUB_PAGES.matcher(candidate).find() || SLASH_SEPARATED_DATES.matcher(candidate).find()) { 298 return null; 299 } 300 // Skip potential time-stamps. 301 if (TIME_STAMPS.matcher(candidate).find()) { 302 String followingText = text.toString().substring(offset + candidate.length()); 303 if (TIME_STAMPS_SUFFIX.matcher(followingText).lookingAt()) { 304 return null; 305 } 306 } 307 308 // Try to come up with a valid match given the entire candidate. 309 String rawString = candidate.toString(); 310 PhoneNumberMatch match = parseAndVerify(rawString, offset); 311 if (match != null) { 312 return match; 313 } 314 315 // If that failed, try to find an "inner match" - there might be a phone number within this 316 // candidate. 317 return extractInnerMatch(rawString, offset); 318 } 319 320 /** 321 * Attempts to extract a match from {@code candidate} if the whole candidate does not qualify as a 322 * match. 323 * 324 * @param candidate the candidate text that might contain a phone number 325 * @param offset the current offset of {@code candidate} within {@link #text} 326 * @return the match found, null if none can be found 327 */ 328 private PhoneNumberMatch extractInnerMatch(String candidate, int offset) { 329 // Try removing either the first or last "group" in the number and see if this gives a result. 330 // We consider white space to be a possible indication of the start or end of the phone number. 331 Matcher groupMatcher = GROUP_SEPARATOR.matcher(candidate); 332 333 if (groupMatcher.find()) { 334 // Try the first group by itself. 335 CharSequence firstGroupOnly = candidate.substring(0, groupMatcher.start()); 336 firstGroupOnly = trimAfterFirstMatch(PhoneNumberUtil.UNWANTED_END_CHAR_PATTERN, 337 firstGroupOnly); 338 PhoneNumberMatch match = parseAndVerify(firstGroupOnly.toString(), offset); 339 if (match != null) { 340 return match; 341 } 342 maxTries--; 343 344 int withoutFirstGroupStart = groupMatcher.end(); 345 // Try the rest of the candidate without the first group. 346 CharSequence withoutFirstGroup = candidate.substring(withoutFirstGroupStart); 347 withoutFirstGroup = trimAfterFirstMatch(PhoneNumberUtil.UNWANTED_END_CHAR_PATTERN, 348 withoutFirstGroup); 349 match = parseAndVerify(withoutFirstGroup.toString(), offset + withoutFirstGroupStart); 350 if (match != null) { 351 return match; 352 } 353 maxTries--; 354 355 if (maxTries > 0) { 356 int lastGroupStart = withoutFirstGroupStart; 357 while (groupMatcher.find()) { 358 // Find the last group. 359 lastGroupStart = groupMatcher.start(); 360 } 361 CharSequence withoutLastGroup = candidate.substring(0, lastGroupStart); 362 withoutLastGroup = trimAfterFirstMatch(PhoneNumberUtil.UNWANTED_END_CHAR_PATTERN, 363 withoutLastGroup); 364 if (withoutLastGroup.equals(firstGroupOnly)) { 365 // If there are only two groups, then the group "without the last group" is the same as 366 // the first group. In these cases, we don't want to re-check the number group, so we exit 367 // already. 368 return null; 369 } 370 match = parseAndVerify(withoutLastGroup.toString(), offset); 371 if (match != null) { 372 return match; 373 } 374 maxTries--; 375 } 376 } 377 return null; 378 } 379 380 /** 381 * Parses a phone number from the {@code candidate} using {@link PhoneNumberUtil#parse} and 382 * verifies it matches the requested {@link #leniency}. If parsing and verification succeed, a 383 * corresponding {@link PhoneNumberMatch} is returned, otherwise this method returns null. 384 * 385 * @param candidate the candidate match 386 * @param offset the offset of {@code candidate} within {@link #text} 387 * @return the parsed and validated phone number match, or null 388 */ 389 private PhoneNumberMatch parseAndVerify(String candidate, int offset) { 390 try { 391 // Check the candidate doesn't contain any formatting which would indicate that it really 392 // isn't a phone number. 393 if (!MATCHING_BRACKETS.matcher(candidate).matches()) { 394 return null; 395 } 396 397 // If leniency is set to VALID or stricter, we also want to skip numbers that are surrounded 398 // by Latin alphabetic characters, to skip cases like abc8005001234 or 8005001234def. 399 if (leniency.compareTo(Leniency.VALID) >= 0) { 400 // If the candidate is not at the start of the text, and does not start with phone-number 401 // punctuation, check the previous character. 402 if (offset > 0 && !LEAD_CLASS.matcher(candidate).lookingAt()) { 403 char previousChar = text.charAt(offset - 1); 404 // We return null if it is a latin letter or an invalid punctuation symbol. 405 if (isInvalidPunctuationSymbol(previousChar) || isLatinLetter(previousChar)) { 406 return null; 407 } 408 } 409 int lastCharIndex = offset + candidate.length(); 410 if (lastCharIndex < text.length()) { 411 char nextChar = text.charAt(lastCharIndex); 412 if (isInvalidPunctuationSymbol(nextChar) || isLatinLetter(nextChar)) { 413 return null; 414 } 415 } 416 } 417 418 PhoneNumber number = phoneUtil.parseAndKeepRawInput(candidate, preferredRegion); 419 if (leniency.verify(number, candidate, phoneUtil)) { 420 // We used parseAndKeepRawInput to create this number, but for now we don't return the extra 421 // values parsed. TODO: stop clearing all values here and switch all users over 422 // to using rawInput() rather than the rawString() of PhoneNumberMatch. 423 number.clearCountryCodeSource(); 424 number.clearRawInput(); 425 number.clearPreferredDomesticCarrierCode(); 426 return new PhoneNumberMatch(offset, candidate, number); 427 } 428 } catch (NumberParseException e) { 429 // ignore and continue 430 } 431 return null; 432 } 433 434 /** 435 * Small helper interface such that the number groups can be checked according to different 436 * criteria, both for our default way of performing formatting and for any alternate formats we 437 * may want to check. 438 */ 439 interface NumberGroupingChecker { 440 /** 441 * Returns true if the groups of digits found in our candidate phone number match our 442 * expectations. 443 * 444 * @param number the original number we found when parsing 445 * @param normalizedCandidate the candidate number, normalized to only contain ASCII digits, 446 * but with non-digits (spaces etc) retained 447 * @param expectedNumberGroups the groups of digits that we would expect to see if we 448 * formatted this number 449 */ 450 boolean checkGroups(PhoneNumberUtil util, PhoneNumber number, 451 StringBuilder normalizedCandidate, String[] expectedNumberGroups); 452 } 453 454 static boolean allNumberGroupsRemainGrouped(PhoneNumberUtil util, 455 PhoneNumber number, 456 StringBuilder normalizedCandidate, 457 String[] formattedNumberGroups) { 458 int fromIndex = 0; 459 // Check each group of consecutive digits are not broken into separate groupings in the 460 // {@code normalizedCandidate} string. 461 for (int i = 0; i < formattedNumberGroups.length; i++) { 462 // Fails if the substring of {@code normalizedCandidate} starting from {@code fromIndex} 463 // doesn't contain the consecutive digits in formattedNumberGroups[i]. 464 fromIndex = normalizedCandidate.indexOf(formattedNumberGroups[i], fromIndex); 465 if (fromIndex < 0) { 466 return false; 467 } 468 // Moves {@code fromIndex} forward. 469 fromIndex += formattedNumberGroups[i].length(); 470 if (i == 0 && fromIndex < normalizedCandidate.length()) { 471 // We are at the position right after the NDC. 472 if (Character.isDigit(normalizedCandidate.charAt(fromIndex))) { 473 // This means there is no formatting symbol after the NDC. In this case, we only 474 // accept the number if there is no formatting symbol at all in the number, except 475 // for extensions. 476 String nationalSignificantNumber = util.getNationalSignificantNumber(number); 477 return normalizedCandidate.substring(fromIndex - formattedNumberGroups[i].length()) 478 .startsWith(nationalSignificantNumber); 479 } 480 } 481 } 482 // The check here makes sure that we haven't mistakenly already used the extension to 483 // match the last group of the subscriber number. Note the extension cannot have 484 // formatting in-between digits. 485 return normalizedCandidate.substring(fromIndex).contains(number.getExtension()); 486 } 487 488 static boolean allNumberGroupsAreExactlyPresent(PhoneNumberUtil util, 489 PhoneNumber number, 490 StringBuilder normalizedCandidate, 491 String[] formattedNumberGroups) { 492 String[] candidateGroups = 493 PhoneNumberUtil.NON_DIGITS_PATTERN.split(normalizedCandidate.toString()); 494 // Set this to the last group, skipping it if the number has an extension. 495 int candidateNumberGroupIndex = 496 number.hasExtension() ? candidateGroups.length - 2 : candidateGroups.length - 1; 497 // First we check if the national significant number is formatted as a block. 498 // We use contains and not equals, since the national significant number may be present with 499 // a prefix such as a national number prefix, or the country code itself. 500 if (candidateGroups.length == 1 || 501 candidateGroups[candidateNumberGroupIndex].contains( 502 util.getNationalSignificantNumber(number))) { 503 return true; 504 } 505 // Starting from the end, go through in reverse, excluding the first group, and check the 506 // candidate and number groups are the same. 507 for (int formattedNumberGroupIndex = (formattedNumberGroups.length - 1); 508 formattedNumberGroupIndex > 0 && candidateNumberGroupIndex >= 0; 509 formattedNumberGroupIndex--, candidateNumberGroupIndex--) { 510 if (!candidateGroups[candidateNumberGroupIndex].equals( 511 formattedNumberGroups[formattedNumberGroupIndex])) { 512 return false; 513 } 514 } 515 // Now check the first group. There may be a national prefix at the start, so we only check 516 // that the candidate group ends with the formatted number group. 517 return (candidateNumberGroupIndex >= 0 && 518 candidateGroups[candidateNumberGroupIndex].endsWith(formattedNumberGroups[0])); 519 } 520 521 /** 522 * Helper method to get the national-number part of a number, formatted without any national 523 * prefix, and return it as a set of digit blocks that would be formatted together. 524 */ 525 private static String[] getNationalNumberGroups(PhoneNumberUtil util, PhoneNumber number, 526 NumberFormat formattingPattern) { 527 if (formattingPattern == null) { 528 // This will be in the format +CC-DG;ext=EXT where DG represents groups of digits. 529 String rfc3966Format = util.format(number, PhoneNumberFormat.RFC3966); 530 // We remove the extension part from the formatted string before splitting it into different 531 // groups. 532 int endIndex = rfc3966Format.indexOf(';'); 533 if (endIndex < 0) { 534 endIndex = rfc3966Format.length(); 535 } 536 // The country-code will have a '-' following it. 537 int startIndex = rfc3966Format.indexOf('-') + 1; 538 return rfc3966Format.substring(startIndex, endIndex).split("-"); 539 } else { 540 // We format the NSN only, and split that according to the separator. 541 String nationalSignificantNumber = util.getNationalSignificantNumber(number); 542 return util.formatNsnUsingPattern(nationalSignificantNumber, 543 formattingPattern, PhoneNumberFormat.RFC3966).split("-"); 544 } 545 } 546 547 static boolean checkNumberGroupingIsValid( 548 PhoneNumber number, String candidate, PhoneNumberUtil util, NumberGroupingChecker checker) { 549 // TODO: Evaluate how this works for other locales (testing has been limited to NANPA regions) 550 // and optimise if necessary. 551 StringBuilder normalizedCandidate = 552 PhoneNumberUtil.normalizeDigits(candidate, true /* keep non-digits */); 553 String[] formattedNumberGroups = getNationalNumberGroups(util, number, null); 554 if (checker.checkGroups(util, number, normalizedCandidate, formattedNumberGroups)) { 555 return true; 556 } 557 // If this didn't pass, see if there are any alternate formats, and try them instead. 558 PhoneMetadata alternateFormats = 559 MetadataManager.getAlternateFormatsForCountry(number.getCountryCode()); 560 if (alternateFormats != null) { 561 for (NumberFormat alternateFormat : alternateFormats.numberFormats()) { 562 formattedNumberGroups = getNationalNumberGroups(util, number, alternateFormat); 563 if (checker.checkGroups(util, number, normalizedCandidate, formattedNumberGroups)) { 564 return true; 565 } 566 } 567 } 568 return false; 569 } 570 571 static boolean containsMoreThanOneSlash(String candidate) { 572 int firstSlashIndex = candidate.indexOf('/'); 573 return (firstSlashIndex > 0 && candidate.substring(firstSlashIndex + 1).contains("/")); 574 } 575 576 static boolean containsOnlyValidXChars( 577 PhoneNumber number, String candidate, PhoneNumberUtil util) { 578 // The characters 'x' and 'X' can be (1) a carrier code, in which case they always precede the 579 // national significant number or (2) an extension sign, in which case they always precede the 580 // extension number. We assume a carrier code is more than 1 digit, so the first case has to 581 // have more than 1 consecutive 'x' or 'X', whereas the second case can only have exactly 1 'x' 582 // or 'X'. We ignore the character if it appears as the last character of the string. 583 for (int index = 0; index < candidate.length() - 1; index++) { 584 char charAtIndex = candidate.charAt(index); 585 if (charAtIndex == 'x' || charAtIndex == 'X') { 586 char charAtNextIndex = candidate.charAt(index + 1); 587 if (charAtNextIndex == 'x' || charAtNextIndex == 'X') { 588 // This is the carrier code case, in which the 'X's always precede the national 589 // significant number. 590 index++; 591 if (util.isNumberMatch(number, candidate.substring(index)) != MatchType.NSN_MATCH) { 592 return false; 593 } 594 // This is the extension sign case, in which the 'x' or 'X' should always precede the 595 // extension number. 596 } else if (!PhoneNumberUtil.normalizeDigitsOnly(candidate.substring(index)).equals( 597 number.getExtension())) { 598 return false; 599 } 600 } 601 } 602 return true; 603 } 604 605 static boolean isNationalPrefixPresentIfRequired(PhoneNumber number, PhoneNumberUtil util) { 606 // First, check how we deduced the country code. If it was written in international format, then 607 // the national prefix is not required. 608 if (number.getCountryCodeSource() != CountryCodeSource.FROM_DEFAULT_COUNTRY) { 609 return true; 610 } 611 String phoneNumberRegion = 612 util.getRegionCodeForCountryCode(number.getCountryCode()); 613 PhoneMetadata metadata = util.getMetadataForRegion(phoneNumberRegion); 614 if (metadata == null) { 615 return true; 616 } 617 // Check if a national prefix should be present when formatting this number. 618 String nationalNumber = util.getNationalSignificantNumber(number); 619 NumberFormat formatRule = 620 util.chooseFormattingPatternForNumber(metadata.numberFormats(), nationalNumber); 621 // To do this, we check that a national prefix formatting rule was present and that it wasn't 622 // just the first-group symbol ($1) with punctuation. 623 if ((formatRule != null) && formatRule.getNationalPrefixFormattingRule().length() > 0) { 624 if (formatRule.isNationalPrefixOptionalWhenFormatting()) { 625 // The national-prefix is optional in these cases, so we don't need to check if it was 626 // present. 627 return true; 628 } 629 // Remove the first-group symbol. 630 String candidateNationalPrefixRule = formatRule.getNationalPrefixFormattingRule(); 631 // We assume that the first-group symbol will never be _before_ the national prefix. 632 candidateNationalPrefixRule = 633 candidateNationalPrefixRule.substring(0, candidateNationalPrefixRule.indexOf("$1")); 634 candidateNationalPrefixRule = 635 PhoneNumberUtil.normalizeDigitsOnly(candidateNationalPrefixRule); 636 if (candidateNationalPrefixRule.length() == 0) { 637 // National Prefix not needed for this number. 638 return true; 639 } 640 // Normalize the remainder. 641 String rawInputCopy = PhoneNumberUtil.normalizeDigitsOnly(number.getRawInput()); 642 StringBuilder rawInput = new StringBuilder(rawInputCopy); 643 // Check if we found a national prefix and/or carrier code at the start of the raw input, and 644 // return the result. 645 return util.maybeStripNationalPrefixAndCarrierCode(rawInput, metadata, null); 646 } 647 return true; 648 } 649 650 public boolean hasNext() { 651 if (state == State.NOT_READY) { 652 lastMatch = find(searchIndex); 653 if (lastMatch == null) { 654 state = State.DONE; 655 } else { 656 searchIndex = lastMatch.end(); 657 state = State.READY; 658 } 659 } 660 return state == State.READY; 661 } 662 663 public PhoneNumberMatch next() { 664 // Check the state and find the next match as a side-effect if necessary. 665 if (!hasNext()) { 666 throw new NoSuchElementException(); 667 } 668 669 // Don't retain that memory any longer than necessary. 670 PhoneNumberMatch result = lastMatch; 671 lastMatch = null; 672 state = State.NOT_READY; 673 return result; 674 } 675 676 /** 677 * Always throws {@link UnsupportedOperationException} as removal is not supported. 678 */ 679 public void remove() { 680 throw new UnsupportedOperationException(); 681 } 682 } 683