Home | History | Annotate | Download | only in util
      1 /*
      2  * Copyright (C) 2012 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package com.android.contacts.common.util;
     18 
     19 import com.google.common.annotations.VisibleForTesting;
     20 
     21 /**
     22  * Methods related to search.
     23  */
     24 public class SearchUtil {
     25 
     26     public static class MatchedLine {
     27 
     28         public int startIndex = -1;
     29         public String line;
     30 
     31         @Override
     32         public String toString() {
     33             return "MatchedLine{" +
     34                     "line='" + line + '\'' +
     35                     ", startIndex=" + startIndex +
     36                     '}';
     37         }
     38     }
     39 
     40     /**
     41      * Given a string with lines delimited with '\n', finds the matching line to the given
     42      * substring.
     43      *
     44      * @param contents The string to search.
     45      * @param substring The substring to search for.
     46      * @return A MatchedLine object containing the matching line and the startIndex of the substring
     47      * match within that line.
     48      */
     49     public static MatchedLine findMatchingLine(String contents, String substring) {
     50         final MatchedLine matched = new MatchedLine();
     51 
     52         // Snippet may contain multiple lines separated by "\n".
     53         // Locate the lines of the content that contain the substring.
     54         final int index = SearchUtil.contains(contents, substring);
     55         if (index != -1) {
     56             // Match found.  Find the corresponding line.
     57             int start = index - 1;
     58             while (start > -1) {
     59                 if (contents.charAt(start) == '\n') {
     60                     break;
     61                 }
     62                 start--;
     63             }
     64             int end = index + 1;
     65             while (end < contents.length()) {
     66                 if (contents.charAt(end) == '\n') {
     67                     break;
     68                 }
     69                 end++;
     70             }
     71             matched.line = contents.substring(start + 1, end);
     72             matched.startIndex = index - (start + 1);
     73         }
     74         return matched;
     75     }
     76 
     77     /**
     78      * Similar to String.contains() with two main differences:
     79      * <p>
     80      * 1) Only searches token prefixes.  A token is defined as any combination of letters or
     81      * numbers.
     82      * <p>
     83      * 2) Returns the starting index where the substring is found.
     84      *
     85      * @param value The string to search.
     86      * @param substring The substring to look for.
     87      * @return The starting index where the substring is found. {@literal -1} if substring is not
     88      *         found in value.
     89      */
     90     @VisibleForTesting
     91     static int contains(String value, String substring) {
     92         if (value.length() < substring.length()) {
     93             return -1;
     94         }
     95 
     96         // i18n support
     97         // Generate the code points for the substring once.
     98         // There will be a maximum of substring.length code points.  But may be fewer.
     99         // Since the array length is not an accurate size, we need to keep a separate variable.
    100         final int[] substringCodePoints = new int[substring.length()];
    101         int substringLength = 0;  // may not equal substring.length()!!
    102         for (int i = 0; i < substring.length(); ) {
    103             final int codePoint = Character.codePointAt(substring, i);
    104             substringCodePoints[substringLength] = codePoint;
    105             substringLength++;
    106             i += Character.charCount(codePoint);
    107         }
    108 
    109         for (int i = 0; i < value.length(); i = findNextTokenStart(value, i)) {
    110             int numMatch = 0;
    111             for (int j = i; j < value.length() && numMatch < substringLength; ++numMatch) {
    112                 int valueCp = Character.toLowerCase(value.codePointAt(j));
    113                 int substringCp = substringCodePoints[numMatch];
    114                 if (valueCp != substringCp) {
    115                     break;
    116                 }
    117                 j += Character.charCount(valueCp);
    118             }
    119             if (numMatch == substringLength) {
    120                 return i;
    121             }
    122         }
    123         return -1;
    124     }
    125 
    126     /**
    127      * Find the start of the next token.  A token is composed of letters and numbers. Any other
    128      * character are considered delimiters.
    129      *
    130      * @param line The string to search for the next token.
    131      * @param startIndex The index to start searching.  0 based indexing.
    132      * @return The index for the start of the next token.  line.length() if next token not found.
    133      */
    134     @VisibleForTesting
    135     static int findNextTokenStart(String line, int startIndex) {
    136         int index = startIndex;
    137 
    138         // If already in token, eat remainder of token.
    139         while (index <= line.length()) {
    140             if (index == line.length()) {
    141                 // No more tokens.
    142                 return index;
    143             }
    144             final int codePoint = line.codePointAt(index);
    145             if (!Character.isLetterOrDigit(codePoint)) {
    146                 break;
    147             }
    148             index += Character.charCount(codePoint);
    149         }
    150 
    151         // Out of token, eat all consecutive delimiters.
    152         while (index <= line.length()) {
    153             if (index == line.length()) {
    154                 return index;
    155             }
    156             final int codePoint = line.codePointAt(index);
    157             if (Character.isLetterOrDigit(codePoint)) {
    158                 break;
    159             }
    160             index += Character.charCount(codePoint);
    161         }
    162 
    163         return index;
    164     }
    165 
    166     /**
    167      * Anything other than letter and numbers are considered delimiters.  Remove start and end
    168      * delimiters since they are not relevant to search.
    169      *
    170      * @param query The query string to clean.
    171      * @return The cleaned query. Empty string if all characters are cleaned out.
    172      */
    173     public static String cleanStartAndEndOfSearchQuery(String query) {
    174         int start = 0;
    175         while (start < query.length()) {
    176             int codePoint = query.codePointAt(start);
    177             if (Character.isLetterOrDigit(codePoint)) {
    178                 break;
    179             }
    180             start += Character.charCount(codePoint);
    181         }
    182 
    183         if (start == query.length()) {
    184             // All characters are delimiters.
    185             return "";
    186         }
    187 
    188         int end = query.length() - 1;
    189         while (end > -1) {
    190             if (Character.isLowSurrogate(query.charAt(end))) {
    191                 // Assume valid i18n string.  There should be a matching high surrogate before it.
    192                 end--;
    193             }
    194             int codePoint = query.codePointAt(end);
    195             if (Character.isLetterOrDigit(codePoint)) {
    196                 break;
    197             }
    198             end--;
    199         }
    200 
    201         // end is a letter or digit.
    202         return query.substring(start, end + 1);
    203     }
    204 }
    205