Home | History | Annotate | Download | only in util
      1 /*
      2  * Copyright (C) 2012 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package com.android.contacts.common.util;
     18 
     19 import android.support.annotation.VisibleForTesting;
     20 
     21 /** Methods related to search. */
     22 public class SearchUtil {
     23 
     24   /**
     25    * Given a string with lines delimited with '\n', finds the matching line to the given substring.
     26    *
     27    * @param contents The string to search.
     28    * @param substring The substring to search for.
     29    * @return A MatchedLine object containing the matching line and the startIndex of the substring
     30    *     match within that line.
     31    */
     32   public static MatchedLine findMatchingLine(String contents, String substring) {
     33     final MatchedLine matched = new MatchedLine();
     34 
     35     // Snippet may contain multiple lines separated by "\n".
     36     // Locate the lines of the content that contain the substring.
     37     final int index = SearchUtil.contains(contents, substring);
     38     if (index != -1) {
     39       // Match found.  Find the corresponding line.
     40       int start = index - 1;
     41       while (start > -1) {
     42         if (contents.charAt(start) == '\n') {
     43           break;
     44         }
     45         start--;
     46       }
     47       int end = index + 1;
     48       while (end < contents.length()) {
     49         if (contents.charAt(end) == '\n') {
     50           break;
     51         }
     52         end++;
     53       }
     54       matched.line = contents.substring(start + 1, end);
     55       matched.startIndex = index - (start + 1);
     56     }
     57     return matched;
     58   }
     59 
     60   /**
     61    * Similar to String.contains() with two main differences:
     62    *
     63    * <p>1) Only searches token prefixes. A token is defined as any combination of letters or
     64    * numbers.
     65    *
     66    * <p>2) Returns the starting index where the substring is found.
     67    *
     68    * @param value The string to search.
     69    * @param substring The substring to look for.
     70    * @return The starting index where the substring is found. {@literal -1} if substring is not
     71    *     found in value.
     72    */
     73   @VisibleForTesting
     74   static int contains(String value, String substring) {
     75     if (value.length() < substring.length()) {
     76       return -1;
     77     }
     78 
     79     // i18n support
     80     // Generate the code points for the substring once.
     81     // There will be a maximum of substring.length code points.  But may be fewer.
     82     // Since the array length is not an accurate size, we need to keep a separate variable.
     83     final int[] substringCodePoints = new int[substring.length()];
     84     int substringLength = 0; // may not equal substring.length()!!
     85     for (int i = 0; i < substring.length(); ) {
     86       final int codePoint = Character.codePointAt(substring, i);
     87       substringCodePoints[substringLength] = codePoint;
     88       substringLength++;
     89       i += Character.charCount(codePoint);
     90     }
     91 
     92     for (int i = 0; i < value.length(); i = findNextTokenStart(value, i)) {
     93       int numMatch = 0;
     94       for (int j = i; j < value.length() && numMatch < substringLength; ++numMatch) {
     95         int valueCp = Character.toLowerCase(value.codePointAt(j));
     96         int substringCp = substringCodePoints[numMatch];
     97         if (valueCp != substringCp) {
     98           break;
     99         }
    100         j += Character.charCount(valueCp);
    101       }
    102       if (numMatch == substringLength) {
    103         return i;
    104       }
    105     }
    106     return -1;
    107   }
    108 
    109   /**
    110    * Find the start of the next token. A token is composed of letters and numbers. Any other
    111    * character are considered delimiters.
    112    *
    113    * @param line The string to search for the next token.
    114    * @param startIndex The index to start searching. 0 based indexing.
    115    * @return The index for the start of the next token. line.length() if next token not found.
    116    */
    117   @VisibleForTesting
    118   static int findNextTokenStart(String line, int startIndex) {
    119     int index = startIndex;
    120 
    121     // If already in token, eat remainder of token.
    122     while (index <= line.length()) {
    123       if (index == line.length()) {
    124         // No more tokens.
    125         return index;
    126       }
    127       final int codePoint = line.codePointAt(index);
    128       if (!Character.isLetterOrDigit(codePoint)) {
    129         break;
    130       }
    131       index += Character.charCount(codePoint);
    132     }
    133 
    134     // Out of token, eat all consecutive delimiters.
    135     while (index <= line.length()) {
    136       if (index == line.length()) {
    137         return index;
    138       }
    139       final int codePoint = line.codePointAt(index);
    140       if (Character.isLetterOrDigit(codePoint)) {
    141         break;
    142       }
    143       index += Character.charCount(codePoint);
    144     }
    145 
    146     return index;
    147   }
    148 
    149   /**
    150    * Anything other than letter and numbers are considered delimiters. Remove start and end
    151    * delimiters since they are not relevant to search.
    152    *
    153    * @param query The query string to clean.
    154    * @return The cleaned query. Empty string if all characters are cleaned out.
    155    */
    156   public static String cleanStartAndEndOfSearchQuery(String query) {
    157     int start = 0;
    158     while (start < query.length()) {
    159       int codePoint = query.codePointAt(start);
    160       if (Character.isLetterOrDigit(codePoint)) {
    161         break;
    162       }
    163       start += Character.charCount(codePoint);
    164     }
    165 
    166     if (start == query.length()) {
    167       // All characters are delimiters.
    168       return "";
    169     }
    170 
    171     int end = query.length() - 1;
    172     while (end > -1) {
    173       if (Character.isLowSurrogate(query.charAt(end))) {
    174         // Assume valid i18n string.  There should be a matching high surrogate before it.
    175         end--;
    176       }
    177       int codePoint = query.codePointAt(end);
    178       if (Character.isLetterOrDigit(codePoint)) {
    179         break;
    180       }
    181       end--;
    182     }
    183 
    184     // end is a letter or digit.
    185     return query.substring(start, end + 1);
    186   }
    187 
    188   public static class MatchedLine {
    189 
    190     public int startIndex = -1;
    191     public String line;
    192 
    193     @Override
    194     public String toString() {
    195       return "MatchedLine{" + "line='" + line + '\'' + ", startIndex=" + startIndex + '}';
    196     }
    197   }
    198 }
    199