Home | History | Annotate | Download | only in regex
      1 /*
      2  * Copyright (C) 2010 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package java.util.regex;
     18 
     19 import java.util.ArrayList;
     20 import java.util.List;
     21 import libcore.util.EmptyArray;
     22 
     23 /**
     24  * Used to make {@code String.split} fast (and to help {@code Pattern.split} too).
     25  * @hide
     26  */
     27 public class Splitter {
     28     // The RI allows regular expressions beginning with ] or }, but that's probably a bug.
     29     private static final String METACHARACTERS = "\\?*+[](){}^$.|";
     30 
     31     private Splitter() {
     32     }
     33 
     34     /**
     35      * Returns a result equivalent to {@code s.split(separator, limit)} if it's able
     36      * to compute it more cheaply than ICU, or null if the caller should fall back to
     37      * using ICU.
     38      */
     39     public static String[] fastSplit(String re, String input, int limit) {
     40         // Can we do it cheaply?
     41         int len = re.length();
     42         if (len == 0) {
     43             return null;
     44         }
     45         char ch = re.charAt(0);
     46         if (len == 1 && METACHARACTERS.indexOf(ch) == -1) {
     47             // We're looking for a single non-metacharacter. Easy.
     48         } else if (len == 2 && ch == '\\') {
     49             // We're looking for a quoted character.
     50             // Quoted metacharacters are effectively single non-metacharacters.
     51             ch = re.charAt(1);
     52             if (METACHARACTERS.indexOf(ch) == -1) {
     53                 return null;
     54             }
     55         } else {
     56             return null;
     57         }
     58 
     59         // We can do this cheaply...
     60 
     61         // Unlike Perl, which considers the result of splitting the empty string to be the empty
     62         // array, Java returns an array containing the empty string.
     63         if (input.isEmpty()) {
     64             return new String[] { "" };
     65         }
     66 
     67         // Count separators
     68         int separatorCount = 0;
     69         int begin = 0;
     70         int end;
     71         while (separatorCount + 1 != limit && (end = input.indexOf(ch, begin)) != -1) {
     72             ++separatorCount;
     73             begin = end + 1;
     74         }
     75         int lastPartEnd = input.length();
     76         if (limit == 0 && begin == lastPartEnd) {
     77             // Last part is empty for limit == 0, remove all trailing empty matches.
     78             if (separatorCount == lastPartEnd) {
     79                 // Input contains only separators.
     80                 return EmptyArray.STRING;
     81             }
     82             // Find the beginning of trailing separators.
     83             do {
     84                 --begin;
     85             } while (input.charAt(begin - 1) == ch);
     86             // Reduce separatorCount and fix lastPartEnd.
     87             separatorCount -= input.length() - begin;
     88             lastPartEnd = begin;
     89         }
     90 
     91         // Collect the result parts.
     92         String[] result = new String[separatorCount + 1];
     93         begin = 0;
     94         for (int i = 0; i != separatorCount; ++i) {
     95             end = input.indexOf(ch, begin);
     96             result[i] = input.substring(begin, end);
     97             begin = end + 1;
     98         }
     99         // Add last part.
    100         result[separatorCount] = input.substring(begin, lastPartEnd);
    101         return result;
    102     }
    103 
    104     public static String[] split(Pattern pattern, String re, String input, int limit) {
    105         String[] fastResult = fastSplit(re, input, limit);
    106         if (fastResult != null) {
    107             return fastResult;
    108         }
    109 
    110         // Unlike Perl, which considers the result of splitting the empty string to be the empty
    111         // array, Java returns an array containing the empty string.
    112         if (input.isEmpty()) {
    113             return new String[] { "" };
    114         }
    115 
    116         // Collect text preceding each occurrence of the separator, while there's enough space.
    117         ArrayList<String> list = new ArrayList<String>();
    118         Matcher matcher = new Matcher(pattern, input);
    119         int begin = 0;
    120         while (list.size() + 1 != limit && matcher.find()) {
    121             list.add(input.substring(begin, matcher.start()));
    122             begin = matcher.end();
    123         }
    124         return finishSplit(list, input, begin, limit);
    125     }
    126 
    127     private static String[] finishSplit(List<String> list, String input, int begin, int limit) {
    128         // Add trailing text.
    129         if (begin < input.length()) {
    130             list.add(input.substring(begin));
    131         } else if (limit != 0) {
    132             list.add("");
    133         } else {
    134             // Remove all trailing empty matches in the limit == 0 case.
    135             int i = list.size() - 1;
    136             while (i >= 0 && list.get(i).isEmpty()) {
    137                 list.remove(i);
    138                 i--;
    139             }
    140         }
    141         // Convert to an array.
    142         return list.toArray(new String[list.size()]);
    143     }
    144 }
    145