Home | History | Annotate | Download | only in json
      1 /*
      2  * Copyright (C) 2010 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package org.json;
     18 
     19 // Note: this class was written without inspecting the non-free org.json sourcecode.
     20 
     21 /**
     22  * Parses a JSON (<a href="http://www.ietf.org/rfc/rfc4627.txt">RFC 4627</a>)
     23  * encoded string into the corresponding object. Most clients of
     24  * this class will use only need the {@link #JSONTokener(String) constructor}
     25  * and {@link #nextValue} method. Example usage: <pre>
     26  * String json = "{"
     27  *         + "  \"query\": \"Pizza\", "
     28  *         + "  \"locations\": [ 94043, 90210 ] "
     29  *         + "}";
     30  *
     31  * JSONObject object = (JSONObject) new JSONTokener(json).nextValue();
     32  * String query = object.getString("query");
     33  * JSONArray locations = object.getJSONArray("locations");</pre>
     34  *
     35  * <p>For best interoperability and performance use JSON that complies with
     36  * RFC 4627, such as that generated by {@link JSONStringer}. For legacy reasons
     37  * this parser is lenient, so a successful parse does not indicate that the
     38  * input string was valid JSON. All of the following syntax errors will be
     39  * ignored:
     40  * <ul>
     41  *   <li>End of line comments starting with {@code //} or {@code #} and ending
     42  *       with a newline character.
     43  *   <li>C-style comments starting with {@code /*} and ending with
     44  *       {@code *}{@code /}. Such comments may not be nested.
     45  *   <li>Strings that are unquoted or {@code 'single quoted'}.
     46  *   <li>Hexadecimal integers prefixed with {@code 0x} or {@code 0X}.
     47  *   <li>Octal integers prefixed with {@code 0}.
     48  *   <li>Array elements separated by {@code ;}.
     49  *   <li>Unnecessary array separators. These are interpreted as if null was the
     50  *       omitted value.
     51  *   <li>Key-value pairs separated by {@code =} or {@code =>}.
     52  *   <li>Key-value pairs separated by {@code ;}.
     53  * </ul>
     54  *
     55  * <p>Each tokener may be used to parse a single JSON string. Instances of this
     56  * class are not thread safe. Although this class is nonfinal, it was not
     57  * designed for inheritance and should not be subclassed. In particular,
     58  * self-use by overrideable methods is not specified. See <i>Effective Java</i>
     59  * Item 17, "Design and Document or inheritance or else prohibit it" for further
     60  * information.
     61  */
     62 public class JSONTokener {
     63 
     64     /** The input JSON. */
     65     private final String in;
     66 
     67     /**
     68      * The index of the next character to be returned by {@link #next}. When
     69      * the input is exhausted, this equals the input's length.
     70      */
     71     private int pos;
     72 
     73     /**
     74      * @param in JSON encoded string. Null is not permitted and will yield a
     75      *     tokener that throws {@code NullPointerExceptions} when methods are
     76      *     called.
     77      */
     78     public JSONTokener(String in) {
     79         // consume an optional byte order mark (BOM) if it exists
     80         if (in != null && in.startsWith("\ufeff")) {
     81             in = in.substring(1);
     82         }
     83         this.in = in;
     84     }
     85 
     86     /**
     87      * Returns the next value from the input.
     88      *
     89      * @return a {@link JSONObject}, {@link JSONArray}, String, Boolean,
     90      *     Integer, Long, Double or {@link JSONObject#NULL}.
     91      * @throws JSONException if the input is malformed.
     92      */
     93     public Object nextValue() throws JSONException {
     94         int c = nextCleanInternal();
     95         switch (c) {
     96             case -1:
     97                 throw syntaxError("End of input");
     98 
     99             case '{':
    100                 return readObject();
    101 
    102             case '[':
    103                 return readArray();
    104 
    105             case '\'':
    106             case '"':
    107                 return nextString((char) c);
    108 
    109             default:
    110                 pos--;
    111                 return readLiteral();
    112         }
    113     }
    114 
    115     private int nextCleanInternal() throws JSONException {
    116         while (pos < in.length()) {
    117             int c = in.charAt(pos++);
    118             switch (c) {
    119                 case '\t':
    120                 case ' ':
    121                 case '\n':
    122                 case '\r':
    123                     continue;
    124 
    125                 case '/':
    126                     if (pos == in.length()) {
    127                         return c;
    128                     }
    129 
    130                     char peek = in.charAt(pos);
    131                     switch (peek) {
    132                         case '*':
    133                             // skip a /* c-style comment */
    134                             pos++;
    135                             int commentEnd = in.indexOf("*/", pos);
    136                             if (commentEnd == -1) {
    137                                 throw syntaxError("Unterminated comment");
    138                             }
    139                             pos = commentEnd + 2;
    140                             continue;
    141 
    142                         case '/':
    143                             // skip a // end-of-line comment
    144                             pos++;
    145                             skipToEndOfLine();
    146                             continue;
    147 
    148                         default:
    149                             return c;
    150                     }
    151 
    152                 case '#':
    153                     /*
    154                      * Skip a # hash end-of-line comment. The JSON RFC doesn't
    155                      * specify this behavior, but it's required to parse
    156                      * existing documents. See http://b/2571423.
    157                      */
    158                     skipToEndOfLine();
    159                     continue;
    160 
    161                 default:
    162                     return c;
    163             }
    164         }
    165 
    166         return -1;
    167     }
    168 
    169     /**
    170      * Advances the position until after the next newline character. If the line
    171      * is terminated by "\r\n", the '\n' must be consumed as whitespace by the
    172      * caller.
    173      */
    174     private void skipToEndOfLine() {
    175         for (; pos < in.length(); pos++) {
    176             char c = in.charAt(pos);
    177             if (c == '\r' || c == '\n') {
    178                 pos++;
    179                 break;
    180             }
    181         }
    182     }
    183 
    184     /**
    185      * Returns the string up to but not including {@code quote}, unescaping any
    186      * character escape sequences encountered along the way. The opening quote
    187      * should have already been read. This consumes the closing quote, but does
    188      * not include it in the returned string.
    189      *
    190      * @param quote either ' or ".
    191      * @throws NumberFormatException if any unicode escape sequences are
    192      *     malformed.
    193      */
    194     public String nextString(char quote) throws JSONException {
    195         /*
    196          * For strings that are free of escape sequences, we can just extract
    197          * the result as a substring of the input. But if we encounter an escape
    198          * sequence, we need to use a StringBuilder to compose the result.
    199          */
    200         StringBuilder builder = null;
    201 
    202         /* the index of the first character not yet appended to the builder. */
    203         int start = pos;
    204 
    205         while (pos < in.length()) {
    206             int c = in.charAt(pos++);
    207             if (c == quote) {
    208                 if (builder == null) {
    209                     // a new string avoids leaking memory
    210                     return new String(in.substring(start, pos - 1));
    211                 } else {
    212                     builder.append(in, start, pos - 1);
    213                     return builder.toString();
    214                 }
    215             }
    216 
    217             if (c == '\\') {
    218                 if (pos == in.length()) {
    219                     throw syntaxError("Unterminated escape sequence");
    220                 }
    221                 if (builder == null) {
    222                     builder = new StringBuilder();
    223                 }
    224                 builder.append(in, start, pos - 1);
    225                 builder.append(readEscapeCharacter());
    226                 start = pos;
    227             }
    228         }
    229 
    230         throw syntaxError("Unterminated string");
    231     }
    232 
    233     /**
    234      * Unescapes the character identified by the character or characters that
    235      * immediately follow a backslash. The backslash '\' should have already
    236      * been read. This supports both unicode escapes "u000A" and two-character
    237      * escapes "\n".
    238      *
    239      * @throws NumberFormatException if any unicode escape sequences are
    240      *     malformed.
    241      */
    242     private char readEscapeCharacter() throws JSONException {
    243         char escaped = in.charAt(pos++);
    244         switch (escaped) {
    245             case 'u':
    246                 if (pos + 4 > in.length()) {
    247                     throw syntaxError("Unterminated escape sequence");
    248                 }
    249                 String hex = in.substring(pos, pos + 4);
    250                 pos += 4;
    251                 return (char) Integer.parseInt(hex, 16);
    252 
    253             case 't':
    254                 return '\t';
    255 
    256             case 'b':
    257                 return '\b';
    258 
    259             case 'n':
    260                 return '\n';
    261 
    262             case 'r':
    263                 return '\r';
    264 
    265             case 'f':
    266                 return '\f';
    267 
    268             case '\'':
    269             case '"':
    270             case '\\':
    271             default:
    272                 return escaped;
    273         }
    274     }
    275 
    276     /**
    277      * Reads a null, boolean, numeric or unquoted string literal value. Numeric
    278      * values will be returned as an Integer, Long, or Double, in that order of
    279      * preference.
    280      */
    281     private Object readLiteral() throws JSONException {
    282         String literal = nextToInternal("{}[]/\\:,=;# \t\f");
    283 
    284         if (literal.length() == 0) {
    285             throw syntaxError("Expected literal value");
    286         } else if ("null".equalsIgnoreCase(literal)) {
    287             return JSONObject.NULL;
    288         } else if ("true".equalsIgnoreCase(literal)) {
    289             return Boolean.TRUE;
    290         } else if ("false".equalsIgnoreCase(literal)) {
    291             return Boolean.FALSE;
    292         }
    293 
    294         /* try to parse as an integral type... */
    295         if (literal.indexOf('.') == -1) {
    296             int base = 10;
    297             String number = literal;
    298             if (number.startsWith("0x") || number.startsWith("0X")) {
    299                 number = number.substring(2);
    300                 base = 16;
    301             } else if (number.startsWith("0") && number.length() > 1) {
    302                 number = number.substring(1);
    303                 base = 8;
    304             }
    305             try {
    306                 long longValue = Long.parseLong(number, base);
    307                 if (longValue <= Integer.MAX_VALUE && longValue >= Integer.MIN_VALUE) {
    308                     return (int) longValue;
    309                 } else {
    310                     return longValue;
    311                 }
    312             } catch (NumberFormatException e) {
    313                 /*
    314                  * This only happens for integral numbers greater than
    315                  * Long.MAX_VALUE, numbers in exponential form (5e-10) and
    316                  * unquoted strings. Fall through to try floating point.
    317                  */
    318             }
    319         }
    320 
    321         /* ...next try to parse as a floating point... */
    322         try {
    323             return Double.valueOf(literal);
    324         } catch (NumberFormatException ignored) {
    325         }
    326 
    327         /* ... finally give up. We have an unquoted string */
    328         return new String(literal); // a new string avoids leaking memory
    329     }
    330 
    331     /**
    332      * Returns the string up to but not including any of the given characters or
    333      * a newline character. This does not consume the excluded character.
    334      */
    335     private String nextToInternal(String excluded) {
    336         int start = pos;
    337         for (; pos < in.length(); pos++) {
    338             char c = in.charAt(pos);
    339             if (c == '\r' || c == '\n' || excluded.indexOf(c) != -1) {
    340                 return in.substring(start, pos);
    341             }
    342         }
    343         return in.substring(start);
    344     }
    345 
    346     /**
    347      * Reads a sequence of key/value pairs and the trailing closing brace '}' of
    348      * an object. The opening brace '{' should have already been read.
    349      */
    350     private JSONObject readObject() throws JSONException {
    351         JSONObject result = new JSONObject();
    352 
    353         /* Peek to see if this is the empty object. */
    354         int first = nextCleanInternal();
    355         if (first == '}') {
    356             return result;
    357         } else if (first != -1) {
    358             pos--;
    359         }
    360 
    361         while (true) {
    362             Object name = nextValue();
    363             if (!(name instanceof String)) {
    364                 if (name == null) {
    365                     throw syntaxError("Names cannot be null");
    366                 } else {
    367                     throw syntaxError("Names must be strings, but " + name
    368                             + " is of type " + name.getClass().getName());
    369                 }
    370             }
    371 
    372             /*
    373              * Expect the name/value separator to be either a colon ':', an
    374              * equals sign '=', or an arrow "=>". The last two are bogus but we
    375              * include them because that's what the original implementation did.
    376              */
    377             int separator = nextCleanInternal();
    378             if (separator != ':' && separator != '=') {
    379                 throw syntaxError("Expected ':' after " + name);
    380             }
    381             if (pos < in.length() && in.charAt(pos) == '>') {
    382                 pos++;
    383             }
    384 
    385             result.put((String) name, nextValue());
    386 
    387             switch (nextCleanInternal()) {
    388                 case '}':
    389                     return result;
    390                 case ';':
    391                 case ',':
    392                     continue;
    393                 default:
    394                     throw syntaxError("Unterminated object");
    395             }
    396         }
    397     }
    398 
    399     /**
    400      * Reads a sequence of values and the trailing closing brace ']' of an
    401      * array. The opening brace '[' should have already been read. Note that
    402      * "[]" yields an empty array, but "[,]" returns a two-element array
    403      * equivalent to "[null,null]".
    404      */
    405     private JSONArray readArray() throws JSONException {
    406         JSONArray result = new JSONArray();
    407 
    408         /* to cover input that ends with ",]". */
    409         boolean hasTrailingSeparator = false;
    410 
    411         while (true) {
    412             switch (nextCleanInternal()) {
    413                 case -1:
    414                     throw syntaxError("Unterminated array");
    415                 case ']':
    416                     if (hasTrailingSeparator) {
    417                         result.put(null);
    418                     }
    419                     return result;
    420                 case ',':
    421                 case ';':
    422                     /* A separator without a value first means "null". */
    423                     result.put(null);
    424                     hasTrailingSeparator = true;
    425                     continue;
    426                 default:
    427                     pos--;
    428             }
    429 
    430             result.put(nextValue());
    431 
    432             switch (nextCleanInternal()) {
    433                 case ']':
    434                     return result;
    435                 case ',':
    436                 case ';':
    437                     hasTrailingSeparator = true;
    438                     continue;
    439                 default:
    440                     throw syntaxError("Unterminated array");
    441             }
    442         }
    443     }
    444 
    445     /**
    446      * Returns an exception containing the given message plus the current
    447      * position and the entire input string.
    448      */
    449     public JSONException syntaxError(String message) {
    450         return new JSONException(message + this);
    451     }
    452 
    453     /**
    454      * Returns the current position and the entire input string.
    455      */
    456     @Override public String toString() {
    457         // consistent with the original implementation
    458         return " at character " + pos + " of " + in;
    459     }
    460 
    461     /*
    462      * Legacy APIs.
    463      *
    464      * None of the methods below are on the critical path of parsing JSON
    465      * documents. They exist only because they were exposed by the original
    466      * implementation and may be used by some clients.
    467      */
    468 
    469     /**
    470      * Returns true until the input has been exhausted.
    471      */
    472     public boolean more() {
    473         return pos < in.length();
    474     }
    475 
    476     /**
    477      * Returns the next available character, or the null character '\0' if all
    478      * input has been exhausted. The return value of this method is ambiguous
    479      * for JSON strings that contain the character '\0'.
    480      */
    481     public char next() {
    482         return pos < in.length() ? in.charAt(pos++) : '\0';
    483     }
    484 
    485     /**
    486      * Returns the next available character if it equals {@code c}. Otherwise an
    487      * exception is thrown.
    488      */
    489     public char next(char c) throws JSONException {
    490         char result = next();
    491         if (result != c) {
    492             throw syntaxError("Expected " + c + " but was " + result);
    493         }
    494         return result;
    495     }
    496 
    497     /**
    498      * Returns the next character that is not whitespace and does not belong to
    499      * a comment. If the input is exhausted before such a character can be
    500      * found, the null character '\0' is returned. The return value of this
    501      * method is ambiguous for JSON strings that contain the character '\0'.
    502      */
    503     public char nextClean() throws JSONException {
    504         int nextCleanInt = nextCleanInternal();
    505         return nextCleanInt == -1 ? '\0' : (char) nextCleanInt;
    506     }
    507 
    508     /**
    509      * Returns the next {@code length} characters of the input.
    510      *
    511      * <p>The returned string shares its backing character array with this
    512      * tokener's input string. If a reference to the returned string may be held
    513      * indefinitely, you should use {@code new String(result)} to copy it first
    514      * to avoid memory leaks.
    515      *
    516      * @throws JSONException if the remaining input is not long enough to
    517      *     satisfy this request.
    518      */
    519     public String next(int length) throws JSONException {
    520         if (pos + length > in.length()) {
    521             throw syntaxError(length + " is out of bounds");
    522         }
    523         String result = in.substring(pos, pos + length);
    524         pos += length;
    525         return result;
    526     }
    527 
    528     /**
    529      * Returns the {@link String#trim trimmed} string holding the characters up
    530      * to but not including the first of:
    531      * <ul>
    532      *   <li>any character in {@code excluded}
    533      *   <li>a newline character '\n'
    534      *   <li>a carriage return '\r'
    535      * </ul>
    536      *
    537      * <p>The returned string shares its backing character array with this
    538      * tokener's input string. If a reference to the returned string may be held
    539      * indefinitely, you should use {@code new String(result)} to copy it first
    540      * to avoid memory leaks.
    541      *
    542      * @return a possibly-empty string
    543      */
    544     public String nextTo(String excluded) {
    545         if (excluded == null) {
    546             throw new NullPointerException();
    547         }
    548         return nextToInternal(excluded).trim();
    549     }
    550 
    551     /**
    552      * Equivalent to {@code nextTo(String.valueOf(excluded))}.
    553      */
    554     public String nextTo(char excluded) {
    555         return nextToInternal(String.valueOf(excluded)).trim();
    556     }
    557 
    558     /**
    559      * Advances past all input up to and including the next occurrence of
    560      * {@code thru}. If the remaining input doesn't contain {@code thru}, the
    561      * input is exhausted.
    562      */
    563     public void skipPast(String thru) {
    564         int thruStart = in.indexOf(thru, pos);
    565         pos = thruStart == -1 ? in.length() : (thruStart + thru.length());
    566     }
    567 
    568     /**
    569      * Advances past all input up to but not including the next occurrence of
    570      * {@code to}. If the remaining input doesn't contain {@code to}, the input
    571      * is unchanged.
    572      */
    573     public char skipTo(char to) {
    574         int index = in.indexOf(to, pos);
    575         if (index != -1) {
    576             pos = index;
    577             return to;
    578         } else {
    579             return '\0';
    580         }
    581     }
    582 
    583     /**
    584      * Unreads the most recent character of input. If no input characters have
    585      * been read, the input is unchanged.
    586      */
    587     public void back() {
    588         if (--pos == -1) {
    589             pos = 0;
    590         }
    591     }
    592 
    593     /**
    594      * Returns the integer [0..15] value for the given hex character, or -1
    595      * for non-hex input.
    596      *
    597      * @param hex a character in the ranges [0-9], [A-F] or [a-f]. Any other
    598      *     character will yield a -1 result.
    599      */
    600     public static int dehexchar(char hex) {
    601         if (hex >= '0' && hex <= '9') {
    602             return hex - '0';
    603         } else if (hex >= 'A' && hex <= 'F') {
    604             return hex - 'A' + 10;
    605         } else if (hex >= 'a' && hex <= 'f') {
    606             return hex - 'a' + 10;
    607         } else {
    608             return -1;
    609         }
    610     }
    611 }
    612