1 /* 2 * Copyright (C) 2010 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package org.json; 18 19 // Note: this class was written without inspecting the non-free org.json sourcecode. 20 21 /** 22 * Parses a JSON (<a href="http://www.ietf.org/rfc/rfc4627.txt">RFC 4627</a>) 23 * encoded string into the corresponding object. Most clients of 24 * this class will use only need the {@link #JSONTokener(String) constructor} 25 * and {@link #nextValue} method. Example usage: <pre> 26 * String json = "{" 27 * + " \"query\": \"Pizza\", " 28 * + " \"locations\": [ 94043, 90210 ] " 29 * + "}"; 30 * 31 * JSONObject object = (JSONObject) new JSONTokener(json).nextValue(); 32 * String query = object.getString("query"); 33 * JSONArray locations = object.getJSONArray("locations");</pre> 34 * 35 * <p>For best interoperability and performance use JSON that complies with 36 * RFC 4627, such as that generated by {@link JSONStringer}. For legacy reasons 37 * this parser is lenient, so a successful parse does not indicate that the 38 * input string was valid JSON. All of the following syntax errors will be 39 * ignored: 40 * <ul> 41 * <li>End of line comments starting with {@code //} or {@code #} and ending 42 * with a newline character. 43 * <li>C-style comments starting with {@code /*} and ending with 44 * {@code *}{@code /}. Such comments may not be nested. 45 * <li>Strings that are unquoted or {@code 'single quoted'}. 46 * <li>Hexadecimal integers prefixed with {@code 0x} or {@code 0X}. 47 * <li>Octal integers prefixed with {@code 0}. 48 * <li>Array elements separated by {@code ;}. 49 * <li>Unnecessary array separators. These are interpreted as if null was the 50 * omitted value. 51 * <li>Key-value pairs separated by {@code =} or {@code =>}. 52 * <li>Key-value pairs separated by {@code ;}. 53 * </ul> 54 * 55 * <p>Each tokener may be used to parse a single JSON string. Instances of this 56 * class are not thread safe. Although this class is nonfinal, it was not 57 * designed for inheritance and should not be subclassed. In particular, 58 * self-use by overrideable methods is not specified. See <i>Effective Java</i> 59 * Item 17, "Design and Document or inheritance or else prohibit it" for further 60 * information. 61 */ 62 public class JSONTokener { 63 64 /** The input JSON. */ 65 private final String in; 66 67 /** 68 * The index of the next character to be returned by {@link #next}. When 69 * the input is exhausted, this equals the input's length. 70 */ 71 private int pos; 72 73 /** 74 * @param in JSON encoded string. Null is not permitted and will yield a 75 * tokener that throws {@code NullPointerExceptions} when methods are 76 * called. 77 */ 78 public JSONTokener(String in) { 79 // consume an optional byte order mark (BOM) if it exists 80 if (in != null && in.startsWith("\ufeff")) { 81 in = in.substring(1); 82 } 83 this.in = in; 84 } 85 86 /** 87 * Returns the next value from the input. 88 * 89 * @return a {@link JSONObject}, {@link JSONArray}, String, Boolean, 90 * Integer, Long, Double or {@link JSONObject#NULL}. 91 * @throws JSONException if the input is malformed. 92 */ 93 public Object nextValue() throws JSONException { 94 int c = nextCleanInternal(); 95 switch (c) { 96 case -1: 97 throw syntaxError("End of input"); 98 99 case '{': 100 return readObject(); 101 102 case '[': 103 return readArray(); 104 105 case '\'': 106 case '"': 107 return nextString((char) c); 108 109 default: 110 pos--; 111 return readLiteral(); 112 } 113 } 114 115 private int nextCleanInternal() throws JSONException { 116 while (pos < in.length()) { 117 int c = in.charAt(pos++); 118 switch (c) { 119 case '\t': 120 case ' ': 121 case '\n': 122 case '\r': 123 continue; 124 125 case '/': 126 if (pos == in.length()) { 127 return c; 128 } 129 130 char peek = in.charAt(pos); 131 switch (peek) { 132 case '*': 133 // skip a /* c-style comment */ 134 pos++; 135 int commentEnd = in.indexOf("*/", pos); 136 if (commentEnd == -1) { 137 throw syntaxError("Unterminated comment"); 138 } 139 pos = commentEnd + 2; 140 continue; 141 142 case '/': 143 // skip a // end-of-line comment 144 pos++; 145 skipToEndOfLine(); 146 continue; 147 148 default: 149 return c; 150 } 151 152 case '#': 153 /* 154 * Skip a # hash end-of-line comment. The JSON RFC doesn't 155 * specify this behavior, but it's required to parse 156 * existing documents. See http://b/2571423. 157 */ 158 skipToEndOfLine(); 159 continue; 160 161 default: 162 return c; 163 } 164 } 165 166 return -1; 167 } 168 169 /** 170 * Advances the position until after the next newline character. If the line 171 * is terminated by "\r\n", the '\n' must be consumed as whitespace by the 172 * caller. 173 */ 174 private void skipToEndOfLine() { 175 for (; pos < in.length(); pos++) { 176 char c = in.charAt(pos); 177 if (c == '\r' || c == '\n') { 178 pos++; 179 break; 180 } 181 } 182 } 183 184 /** 185 * Returns the string up to but not including {@code quote}, unescaping any 186 * character escape sequences encountered along the way. The opening quote 187 * should have already been read. This consumes the closing quote, but does 188 * not include it in the returned string. 189 * 190 * @param quote either ' or ". 191 * @throws NumberFormatException if any unicode escape sequences are 192 * malformed. 193 */ 194 public String nextString(char quote) throws JSONException { 195 /* 196 * For strings that are free of escape sequences, we can just extract 197 * the result as a substring of the input. But if we encounter an escape 198 * sequence, we need to use a StringBuilder to compose the result. 199 */ 200 StringBuilder builder = null; 201 202 /* the index of the first character not yet appended to the builder. */ 203 int start = pos; 204 205 while (pos < in.length()) { 206 int c = in.charAt(pos++); 207 if (c == quote) { 208 if (builder == null) { 209 // a new string avoids leaking memory 210 return new String(in.substring(start, pos - 1)); 211 } else { 212 builder.append(in, start, pos - 1); 213 return builder.toString(); 214 } 215 } 216 217 if (c == '\\') { 218 if (pos == in.length()) { 219 throw syntaxError("Unterminated escape sequence"); 220 } 221 if (builder == null) { 222 builder = new StringBuilder(); 223 } 224 builder.append(in, start, pos - 1); 225 builder.append(readEscapeCharacter()); 226 start = pos; 227 } 228 } 229 230 throw syntaxError("Unterminated string"); 231 } 232 233 /** 234 * Unescapes the character identified by the character or characters that 235 * immediately follow a backslash. The backslash '\' should have already 236 * been read. This supports both unicode escapes "u000A" and two-character 237 * escapes "\n". 238 * 239 * @throws NumberFormatException if any unicode escape sequences are 240 * malformed. 241 */ 242 private char readEscapeCharacter() throws JSONException { 243 char escaped = in.charAt(pos++); 244 switch (escaped) { 245 case 'u': 246 if (pos + 4 > in.length()) { 247 throw syntaxError("Unterminated escape sequence"); 248 } 249 String hex = in.substring(pos, pos + 4); 250 pos += 4; 251 return (char) Integer.parseInt(hex, 16); 252 253 case 't': 254 return '\t'; 255 256 case 'b': 257 return '\b'; 258 259 case 'n': 260 return '\n'; 261 262 case 'r': 263 return '\r'; 264 265 case 'f': 266 return '\f'; 267 268 case '\'': 269 case '"': 270 case '\\': 271 default: 272 return escaped; 273 } 274 } 275 276 /** 277 * Reads a null, boolean, numeric or unquoted string literal value. Numeric 278 * values will be returned as an Integer, Long, or Double, in that order of 279 * preference. 280 */ 281 private Object readLiteral() throws JSONException { 282 String literal = nextToInternal("{}[]/\\:,=;# \t\f"); 283 284 if (literal.length() == 0) { 285 throw syntaxError("Expected literal value"); 286 } else if ("null".equalsIgnoreCase(literal)) { 287 return JSONObject.NULL; 288 } else if ("true".equalsIgnoreCase(literal)) { 289 return Boolean.TRUE; 290 } else if ("false".equalsIgnoreCase(literal)) { 291 return Boolean.FALSE; 292 } 293 294 /* try to parse as an integral type... */ 295 if (literal.indexOf('.') == -1) { 296 int base = 10; 297 String number = literal; 298 if (number.startsWith("0x") || number.startsWith("0X")) { 299 number = number.substring(2); 300 base = 16; 301 } else if (number.startsWith("0") && number.length() > 1) { 302 number = number.substring(1); 303 base = 8; 304 } 305 try { 306 long longValue = Long.parseLong(number, base); 307 if (longValue <= Integer.MAX_VALUE && longValue >= Integer.MIN_VALUE) { 308 return (int) longValue; 309 } else { 310 return longValue; 311 } 312 } catch (NumberFormatException e) { 313 /* 314 * This only happens for integral numbers greater than 315 * Long.MAX_VALUE, numbers in exponential form (5e-10) and 316 * unquoted strings. Fall through to try floating point. 317 */ 318 } 319 } 320 321 /* ...next try to parse as a floating point... */ 322 try { 323 return Double.valueOf(literal); 324 } catch (NumberFormatException ignored) { 325 } 326 327 /* ... finally give up. We have an unquoted string */ 328 return new String(literal); // a new string avoids leaking memory 329 } 330 331 /** 332 * Returns the string up to but not including any of the given characters or 333 * a newline character. This does not consume the excluded character. 334 */ 335 private String nextToInternal(String excluded) { 336 int start = pos; 337 for (; pos < in.length(); pos++) { 338 char c = in.charAt(pos); 339 if (c == '\r' || c == '\n' || excluded.indexOf(c) != -1) { 340 return in.substring(start, pos); 341 } 342 } 343 return in.substring(start); 344 } 345 346 /** 347 * Reads a sequence of key/value pairs and the trailing closing brace '}' of 348 * an object. The opening brace '{' should have already been read. 349 */ 350 private JSONObject readObject() throws JSONException { 351 JSONObject result = new JSONObject(); 352 353 /* Peek to see if this is the empty object. */ 354 int first = nextCleanInternal(); 355 if (first == '}') { 356 return result; 357 } else if (first != -1) { 358 pos--; 359 } 360 361 while (true) { 362 Object name = nextValue(); 363 if (!(name instanceof String)) { 364 if (name == null) { 365 throw syntaxError("Names cannot be null"); 366 } else { 367 throw syntaxError("Names must be strings, but " + name 368 + " is of type " + name.getClass().getName()); 369 } 370 } 371 372 /* 373 * Expect the name/value separator to be either a colon ':', an 374 * equals sign '=', or an arrow "=>". The last two are bogus but we 375 * include them because that's what the original implementation did. 376 */ 377 int separator = nextCleanInternal(); 378 if (separator != ':' && separator != '=') { 379 throw syntaxError("Expected ':' after " + name); 380 } 381 if (pos < in.length() && in.charAt(pos) == '>') { 382 pos++; 383 } 384 385 result.put((String) name, nextValue()); 386 387 switch (nextCleanInternal()) { 388 case '}': 389 return result; 390 case ';': 391 case ',': 392 continue; 393 default: 394 throw syntaxError("Unterminated object"); 395 } 396 } 397 } 398 399 /** 400 * Reads a sequence of values and the trailing closing brace ']' of an 401 * array. The opening brace '[' should have already been read. Note that 402 * "[]" yields an empty array, but "[,]" returns a two-element array 403 * equivalent to "[null,null]". 404 */ 405 private JSONArray readArray() throws JSONException { 406 JSONArray result = new JSONArray(); 407 408 /* to cover input that ends with ",]". */ 409 boolean hasTrailingSeparator = false; 410 411 while (true) { 412 switch (nextCleanInternal()) { 413 case -1: 414 throw syntaxError("Unterminated array"); 415 case ']': 416 if (hasTrailingSeparator) { 417 result.put(null); 418 } 419 return result; 420 case ',': 421 case ';': 422 /* A separator without a value first means "null". */ 423 result.put(null); 424 hasTrailingSeparator = true; 425 continue; 426 default: 427 pos--; 428 } 429 430 result.put(nextValue()); 431 432 switch (nextCleanInternal()) { 433 case ']': 434 return result; 435 case ',': 436 case ';': 437 hasTrailingSeparator = true; 438 continue; 439 default: 440 throw syntaxError("Unterminated array"); 441 } 442 } 443 } 444 445 /** 446 * Returns an exception containing the given message plus the current 447 * position and the entire input string. 448 */ 449 public JSONException syntaxError(String message) { 450 return new JSONException(message + this); 451 } 452 453 /** 454 * Returns the current position and the entire input string. 455 */ 456 @Override public String toString() { 457 // consistent with the original implementation 458 return " at character " + pos + " of " + in; 459 } 460 461 /* 462 * Legacy APIs. 463 * 464 * None of the methods below are on the critical path of parsing JSON 465 * documents. They exist only because they were exposed by the original 466 * implementation and may be used by some clients. 467 */ 468 469 /** 470 * Returns true until the input has been exhausted. 471 */ 472 public boolean more() { 473 return pos < in.length(); 474 } 475 476 /** 477 * Returns the next available character, or the null character '\0' if all 478 * input has been exhausted. The return value of this method is ambiguous 479 * for JSON strings that contain the character '\0'. 480 */ 481 public char next() { 482 return pos < in.length() ? in.charAt(pos++) : '\0'; 483 } 484 485 /** 486 * Returns the next available character if it equals {@code c}. Otherwise an 487 * exception is thrown. 488 */ 489 public char next(char c) throws JSONException { 490 char result = next(); 491 if (result != c) { 492 throw syntaxError("Expected " + c + " but was " + result); 493 } 494 return result; 495 } 496 497 /** 498 * Returns the next character that is not whitespace and does not belong to 499 * a comment. If the input is exhausted before such a character can be 500 * found, the null character '\0' is returned. The return value of this 501 * method is ambiguous for JSON strings that contain the character '\0'. 502 */ 503 public char nextClean() throws JSONException { 504 int nextCleanInt = nextCleanInternal(); 505 return nextCleanInt == -1 ? '\0' : (char) nextCleanInt; 506 } 507 508 /** 509 * Returns the next {@code length} characters of the input. 510 * 511 * <p>The returned string shares its backing character array with this 512 * tokener's input string. If a reference to the returned string may be held 513 * indefinitely, you should use {@code new String(result)} to copy it first 514 * to avoid memory leaks. 515 * 516 * @throws JSONException if the remaining input is not long enough to 517 * satisfy this request. 518 */ 519 public String next(int length) throws JSONException { 520 if (pos + length > in.length()) { 521 throw syntaxError(length + " is out of bounds"); 522 } 523 String result = in.substring(pos, pos + length); 524 pos += length; 525 return result; 526 } 527 528 /** 529 * Returns the {@link String#trim trimmed} string holding the characters up 530 * to but not including the first of: 531 * <ul> 532 * <li>any character in {@code excluded} 533 * <li>a newline character '\n' 534 * <li>a carriage return '\r' 535 * </ul> 536 * 537 * <p>The returned string shares its backing character array with this 538 * tokener's input string. If a reference to the returned string may be held 539 * indefinitely, you should use {@code new String(result)} to copy it first 540 * to avoid memory leaks. 541 * 542 * @return a possibly-empty string 543 */ 544 public String nextTo(String excluded) { 545 if (excluded == null) { 546 throw new NullPointerException(); 547 } 548 return nextToInternal(excluded).trim(); 549 } 550 551 /** 552 * Equivalent to {@code nextTo(String.valueOf(excluded))}. 553 */ 554 public String nextTo(char excluded) { 555 return nextToInternal(String.valueOf(excluded)).trim(); 556 } 557 558 /** 559 * Advances past all input up to and including the next occurrence of 560 * {@code thru}. If the remaining input doesn't contain {@code thru}, the 561 * input is exhausted. 562 */ 563 public void skipPast(String thru) { 564 int thruStart = in.indexOf(thru, pos); 565 pos = thruStart == -1 ? in.length() : (thruStart + thru.length()); 566 } 567 568 /** 569 * Advances past all input up to but not including the next occurrence of 570 * {@code to}. If the remaining input doesn't contain {@code to}, the input 571 * is unchanged. 572 */ 573 public char skipTo(char to) { 574 int index = in.indexOf(to, pos); 575 if (index != -1) { 576 pos = index; 577 return to; 578 } else { 579 return '\0'; 580 } 581 } 582 583 /** 584 * Unreads the most recent character of input. If no input characters have 585 * been read, the input is unchanged. 586 */ 587 public void back() { 588 if (--pos == -1) { 589 pos = 0; 590 } 591 } 592 593 /** 594 * Returns the integer [0..15] value for the given hex character, or -1 595 * for non-hex input. 596 * 597 * @param hex a character in the ranges [0-9], [A-F] or [a-f]. Any other 598 * character will yield a -1 result. 599 */ 600 public static int dehexchar(char hex) { 601 if (hex >= '0' && hex <= '9') { 602 return hex - '0'; 603 } else if (hex >= 'A' && hex <= 'F') { 604 return hex - 'A' + 10; 605 } else if (hex >= 'a' && hex <= 'f') { 606 return hex - 'a' + 10; 607 } else { 608 return -1; 609 } 610 } 611 } 612