Home | History | Annotate | Download | only in util
      1 /*
      2  * Copyright (C) 2010 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 package com.android.tradefed.util;
     17 
     18 import com.android.ddmlib.Log;
     19 
     20 import java.util.ArrayList;
     21 import java.util.regex.Matcher;
     22 import java.util.regex.Pattern;
     23 
     24 public class QuotationAwareTokenizer {
     25     private static final String LOG_TAG = "TOKEN";
     26 
     27     /**
     28      * Tokenizes the string, splitting on specified delimiter.  Does not split between consecutive,
     29      * unquoted double-quote marks.
     30      * <p/>
     31      * How the tokenizer works:
     32      * <ol>
     33      *     <li> Split the string into "characters" where each "character" is either an escaped
     34      *          character like \" (that is, "\\\"") or a single real character like f (just "f").
     35      *     <li> For each "character"
     36      *     <ol>
     37      *         <li> If it's a space, finish a token unless we're being quoted
     38      *         <li> If it's a quotation mark, flip the "we're being quoted" bit
     39      *         <li> Otherwise, add it to the token being built
     40      *     </ol>
     41      *     <li> At EOL, we typically haven't added the final token to the (tokens) {@link ArrayList}
     42      *     <ol>
     43      *         <li> If the last "character" is an escape character, throw an exception; that's not
     44      *              valid
     45      *         <li> If we're in the middle of a quotation, throw an exception; that's not valid
     46      *         <li> Otherwise, add the final token to (tokens)
     47      *     </ol>
     48      *     <li> Return a String[] version of (tokens)
     49      * </ol>
     50      *
     51      * @param line A {@link String} to be tokenized
     52      * @return A tokenized version of the string
     53      * @throws IllegalArgumentException if the line cannot be parsed
     54      */
     55     public static String[] tokenizeLine(String line, String delim) throws IllegalArgumentException {
     56         if (line == null) {
     57             throw new IllegalArgumentException("line is null");
     58         }
     59 
     60         ArrayList<String> tokens = new ArrayList<String>();
     61         StringBuilder token = new StringBuilder();
     62         // This pattern matches an escaped character or a character.  Escaped char takes precedence
     63         final Pattern charPattern = Pattern.compile("\\\\.|.");
     64         final Matcher charMatcher = charPattern.matcher(line);
     65         String aChar = "";
     66         boolean quotation = false;
     67 
     68         Log.d(LOG_TAG, String.format("Trying to tokenize the line '%s'", line));
     69         while (charMatcher.find()) {
     70             aChar = charMatcher.group();
     71 
     72             if (delim.equals(aChar)) {
     73                 if (quotation) {
     74                     // inside a quotation; treat spaces as part of the token
     75                     token.append(aChar);
     76                 } else {
     77                     if (token.length() > 0) {
     78                         // this is the end of a non-empty token; dump it in our list of tokens,
     79                         // clear our temp storage, and keep rolling
     80                         Log.v(LOG_TAG, String.format("Finished token '%s'", token.toString()));
     81                         tokens.add(token.toString());
     82                         token.delete(0, token.length());
     83                     }
     84                     // otherwise, this is the non-first in a sequence of spaces; ignore.
     85                 }
     86             } else if ("\"".equals(aChar)) {
     87                 // unescaped quotation mark; flip quotation state
     88                 Log.v(LOG_TAG, "Flipped quotation state");
     89                 quotation ^= true;
     90             } else {
     91                 // default case: add the character to the token being built
     92                 token.append(aChar);
     93             }
     94         }
     95 
     96         if (quotation || "\\".equals(aChar)) {
     97             // We ended in a quotation or with an escape character; this is not valid
     98             throw new IllegalArgumentException("Unexpected EOL in a quotation or after an escape " +
     99                     "character");
    100         }
    101 
    102         // Add the final token to the tokens array.
    103         if (token.length() > 0) {
    104             Log.v(LOG_TAG, String.format("Finished final token '%s'", token.toString()));
    105             tokens.add(token.toString());
    106             token.delete(0, token.length());
    107         }
    108 
    109         String[] tokensArray = new String[tokens.size()];
    110         return tokens.toArray(tokensArray);
    111     }
    112 
    113     /**
    114      * Tokenizes the string, splitting on spaces.  Does not split between consecutive,
    115      * unquoted double-quote marks.
    116      * <p>
    117      * See also {@link #tokenizeLine(String, String)}
    118      */
    119     public static String[] tokenizeLine(String line) throws IllegalArgumentException {
    120         return tokenizeLine(line, " ");
    121     }
    122 
    123     /**
    124      * Perform the reverse of {@link #tokenizeLine(String)}. <br/>
    125      * Given array of tokens, combine them into a single line.
    126      *
    127      * @param tokens
    128      * @return A {@link String} created from all the tokens.
    129      */
    130     public static String combineTokens(String... tokens) {
    131         final Pattern wsPattern = Pattern.compile("\\s");
    132         StringBuilder sb = new StringBuilder();
    133         for (int i=0; i < tokens.length; i++) {
    134             final String token = tokens[i];
    135             final Matcher wsMatcher = wsPattern.matcher(token);
    136             if (wsMatcher.find()) {
    137                 sb.append('"');
    138                 sb.append(token);
    139                 sb.append('"');
    140             } else {
    141                 sb.append(token);
    142             }
    143             if (i < (tokens.length - 1)) {
    144                 // don't output space after last token
    145                 sb.append(' ');
    146             }
    147         }
    148         return sb.toString();
    149     }
    150 }
    151