1 // Copyright (c) 2003-2004 Brian Wellington (bwelling (at) xbill.org) 2 // 3 // Copyright (C) 2003-2004 Nominum, Inc. 4 // 5 // Permission to use, copy, modify, and distribute this software for any 6 // purpose with or without fee is hereby granted, provided that the above 7 // copyright notice and this permission notice appear in all copies. 8 // 9 // THE SOFTWARE IS PROVIDED "AS IS" AND NOMINUM DISCLAIMS ALL WARRANTIES 10 // WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 // MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL NOMINUM BE LIABLE FOR ANY 12 // SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 // WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 // ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT 15 // OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 // 17 18 package org.xbill.DNS; 19 20 import java.io.*; 21 import java.net.*; 22 23 import org.xbill.DNS.utils.*; 24 25 /** 26 * Tokenizer is used to parse DNS records and zones from text format, 27 * 28 * @author Brian Wellington 29 * @author Bob Halley 30 */ 31 32 public class Tokenizer { 33 34 private static String delim = " \t\n;()\""; 35 private static String quotes = "\""; 36 37 /** End of file */ 38 public static final int EOF = 0; 39 40 /** End of line */ 41 public static final int EOL = 1; 42 43 /** Whitespace; only returned when wantWhitespace is set */ 44 public static final int WHITESPACE = 2; 45 46 /** An identifier (unquoted string) */ 47 public static final int IDENTIFIER = 3; 48 49 /** A quoted string */ 50 public static final int QUOTED_STRING = 4; 51 52 /** A comment; only returned when wantComment is set */ 53 public static final int COMMENT = 5; 54 55 private PushbackInputStream is; 56 private boolean ungottenToken; 57 private int multiline; 58 private boolean quoting; 59 private String delimiters; 60 private Token current; 61 private StringBuffer sb; 62 private boolean wantClose; 63 64 private String filename; 65 private int line; 66 67 public static class Token { 68 /** The type of token. */ 69 public int type; 70 71 /** The value of the token, or null for tokens without values. */ 72 public String value; 73 74 private 75 Token() { 76 type = -1; 77 value = null; 78 } 79 80 private Token 81 set(int type, StringBuffer value) { 82 if (type < 0) 83 throw new IllegalArgumentException(); 84 this.type = type; 85 this.value = value == null ? null : value.toString(); 86 return this; 87 } 88 89 /** 90 * Converts the token to a string containing a representation useful 91 * for debugging. 92 */ 93 public String 94 toString() { 95 switch (type) { 96 case EOF: 97 return "<eof>"; 98 case EOL: 99 return "<eol>"; 100 case WHITESPACE: 101 return "<whitespace>"; 102 case IDENTIFIER: 103 return "<identifier: " + value + ">"; 104 case QUOTED_STRING: 105 return "<quoted_string: " + value + ">"; 106 case COMMENT: 107 return "<comment: " + value + ">"; 108 default: 109 return "<unknown>"; 110 } 111 } 112 113 /** Indicates whether this token contains a string. */ 114 public boolean 115 isString() { 116 return (type == IDENTIFIER || type == QUOTED_STRING); 117 } 118 119 /** Indicates whether this token contains an EOL or EOF. */ 120 public boolean 121 isEOL() { 122 return (type == EOL || type == EOF); 123 } 124 } 125 126 static class TokenizerException extends TextParseException { 127 String message; 128 129 public 130 TokenizerException(String filename, int line, String message) { 131 super(filename + ":" + line + ": " + message); 132 this.message = message; 133 } 134 135 public String 136 getBaseMessage() { 137 return message; 138 } 139 } 140 141 /** 142 * Creates a Tokenizer from an arbitrary input stream. 143 * @param is The InputStream to tokenize. 144 */ 145 public 146 Tokenizer(InputStream is) { 147 if (!(is instanceof BufferedInputStream)) 148 is = new BufferedInputStream(is); 149 this.is = new PushbackInputStream(is, 2); 150 ungottenToken = false; 151 multiline = 0; 152 quoting = false; 153 delimiters = delim; 154 current = new Token(); 155 sb = new StringBuffer(); 156 filename = "<none>"; 157 line = 1; 158 } 159 160 /** 161 * Creates a Tokenizer from a string. 162 * @param s The String to tokenize. 163 */ 164 public 165 Tokenizer(String s) { 166 this(new ByteArrayInputStream(s.getBytes())); 167 } 168 169 /** 170 * Creates a Tokenizer from a file. 171 * @param f The File to tokenize. 172 */ 173 public 174 Tokenizer(File f) throws FileNotFoundException { 175 this(new FileInputStream(f)); 176 wantClose = true; 177 filename = f.getName(); 178 } 179 180 private int 181 getChar() throws IOException { 182 int c = is.read(); 183 if (c == '\r') { 184 int next = is.read(); 185 if (next != '\n') 186 is.unread(next); 187 c = '\n'; 188 } 189 if (c == '\n') 190 line++; 191 return c; 192 } 193 194 private void 195 ungetChar(int c) throws IOException { 196 if (c == -1) 197 return; 198 is.unread(c); 199 if (c == '\n') 200 line--; 201 } 202 203 private int 204 skipWhitespace() throws IOException { 205 int skipped = 0; 206 while (true) { 207 int c = getChar(); 208 if (c != ' ' && c != '\t') { 209 if (!(c == '\n' && multiline > 0)) { 210 ungetChar(c); 211 return skipped; 212 } 213 } 214 skipped++; 215 } 216 } 217 218 private void 219 checkUnbalancedParens() throws TextParseException { 220 if (multiline > 0) 221 throw exception("unbalanced parentheses"); 222 } 223 224 /** 225 * Gets the next token from a tokenizer. 226 * @param wantWhitespace If true, leading whitespace will be returned as a 227 * token. 228 * @param wantComment If true, comments are returned as tokens. 229 * @return The next token in the stream. 230 * @throws TextParseException The input was invalid. 231 * @throws IOException An I/O error occurred. 232 */ 233 public Token 234 get(boolean wantWhitespace, boolean wantComment) throws IOException { 235 int type; 236 int c; 237 238 if (ungottenToken) { 239 ungottenToken = false; 240 if (current.type == WHITESPACE) { 241 if (wantWhitespace) 242 return current; 243 } else if (current.type == COMMENT) { 244 if (wantComment) 245 return current; 246 } else { 247 if (current.type == EOL) 248 line++; 249 return current; 250 } 251 } 252 int skipped = skipWhitespace(); 253 if (skipped > 0 && wantWhitespace) 254 return current.set(WHITESPACE, null); 255 type = IDENTIFIER; 256 sb.setLength(0); 257 while (true) { 258 c = getChar(); 259 if (c == -1 || delimiters.indexOf(c) != -1) { 260 if (c == -1) { 261 if (quoting) 262 throw exception("EOF in " + 263 "quoted string"); 264 else if (sb.length() == 0) 265 return current.set(EOF, null); 266 else 267 return current.set(type, sb); 268 } 269 if (sb.length() == 0 && type != QUOTED_STRING) { 270 if (c == '(') { 271 multiline++; 272 skipWhitespace(); 273 continue; 274 } else if (c == ')') { 275 if (multiline <= 0) 276 throw exception("invalid " + 277 "close " + 278 "parenthesis"); 279 multiline--; 280 skipWhitespace(); 281 continue; 282 } else if (c == '"') { 283 if (!quoting) { 284 quoting = true; 285 delimiters = quotes; 286 type = QUOTED_STRING; 287 } else { 288 quoting = false; 289 delimiters = delim; 290 skipWhitespace(); 291 } 292 continue; 293 } else if (c == '\n') { 294 return current.set(EOL, null); 295 } else if (c == ';') { 296 while (true) { 297 c = getChar(); 298 if (c == '\n' || c == -1) 299 break; 300 sb.append((char)c); 301 } 302 if (wantComment) { 303 ungetChar(c); 304 return current.set(COMMENT, sb); 305 } else if (c == -1 && 306 type != QUOTED_STRING) 307 { 308 checkUnbalancedParens(); 309 return current.set(EOF, null); 310 } else if (multiline > 0) { 311 skipWhitespace(); 312 sb.setLength(0); 313 continue; 314 } else 315 return current.set(EOL, null); 316 } else 317 throw new IllegalStateException(); 318 } else 319 ungetChar(c); 320 break; 321 } else if (c == '\\') { 322 c = getChar(); 323 if (c == -1) 324 throw exception("unterminated escape sequence"); 325 sb.append('\\'); 326 } else if (quoting && c == '\n') { 327 throw exception("newline in quoted string"); 328 } 329 sb.append((char)c); 330 } 331 if (sb.length() == 0 && type != QUOTED_STRING) { 332 checkUnbalancedParens(); 333 return current.set(EOF, null); 334 } 335 return current.set(type, sb); 336 } 337 338 /** 339 * Gets the next token from a tokenizer, ignoring whitespace and comments. 340 * @return The next token in the stream. 341 * @throws TextParseException The input was invalid. 342 * @throws IOException An I/O error occurred. 343 */ 344 public Token 345 get() throws IOException { 346 return get(false, false); 347 } 348 349 /** 350 * Returns a token to the stream, so that it will be returned by the next call 351 * to get(). 352 * @throws IllegalStateException There are already ungotten tokens. 353 */ 354 public void 355 unget() { 356 if (ungottenToken) 357 throw new IllegalStateException 358 ("Cannot unget multiple tokens"); 359 if (current.type == EOL) 360 line--; 361 ungottenToken = true; 362 } 363 364 /** 365 * Gets the next token from a tokenizer and converts it to a string. 366 * @return The next token in the stream, as a string. 367 * @throws TextParseException The input was invalid or not a string. 368 * @throws IOException An I/O error occurred. 369 */ 370 public String 371 getString() throws IOException { 372 Token next = get(); 373 if (!next.isString()) { 374 throw exception("expected a string"); 375 } 376 return next.value; 377 } 378 379 private String 380 _getIdentifier(String expected) throws IOException { 381 Token next = get(); 382 if (next.type != IDENTIFIER) 383 throw exception("expected " + expected); 384 return next.value; 385 } 386 387 /** 388 * Gets the next token from a tokenizer, ensures it is an unquoted string, 389 * and converts it to a string. 390 * @return The next token in the stream, as a string. 391 * @throws TextParseException The input was invalid or not an unquoted string. 392 * @throws IOException An I/O error occurred. 393 */ 394 public String 395 getIdentifier() throws IOException { 396 return _getIdentifier("an identifier"); 397 } 398 399 /** 400 * Gets the next token from a tokenizer and converts it to a long. 401 * @return The next token in the stream, as a long. 402 * @throws TextParseException The input was invalid or not a long. 403 * @throws IOException An I/O error occurred. 404 */ 405 public long 406 getLong() throws IOException { 407 String next = _getIdentifier("an integer"); 408 if (!Character.isDigit(next.charAt(0))) 409 throw exception("expected an integer"); 410 try { 411 return Long.parseLong(next); 412 } catch (NumberFormatException e) { 413 throw exception("expected an integer"); 414 } 415 } 416 417 /** 418 * Gets the next token from a tokenizer and converts it to an unsigned 32 bit 419 * integer. 420 * @return The next token in the stream, as an unsigned 32 bit integer. 421 * @throws TextParseException The input was invalid or not an unsigned 32 422 * bit integer. 423 * @throws IOException An I/O error occurred. 424 */ 425 public long 426 getUInt32() throws IOException { 427 long l = getLong(); 428 if (l < 0 || l > 0xFFFFFFFFL) 429 throw exception("expected an 32 bit unsigned integer"); 430 return l; 431 } 432 433 /** 434 * Gets the next token from a tokenizer and converts it to an unsigned 16 bit 435 * integer. 436 * @return The next token in the stream, as an unsigned 16 bit integer. 437 * @throws TextParseException The input was invalid or not an unsigned 16 438 * bit integer. 439 * @throws IOException An I/O error occurred. 440 */ 441 public int 442 getUInt16() throws IOException { 443 long l = getLong(); 444 if (l < 0 || l > 0xFFFFL) 445 throw exception("expected an 16 bit unsigned integer"); 446 return (int) l; 447 } 448 449 /** 450 * Gets the next token from a tokenizer and converts it to an unsigned 8 bit 451 * integer. 452 * @return The next token in the stream, as an unsigned 8 bit integer. 453 * @throws TextParseException The input was invalid or not an unsigned 8 454 * bit integer. 455 * @throws IOException An I/O error occurred. 456 */ 457 public int 458 getUInt8() throws IOException { 459 long l = getLong(); 460 if (l < 0 || l > 0xFFL) 461 throw exception("expected an 8 bit unsigned integer"); 462 return (int) l; 463 } 464 465 /** 466 * Gets the next token from a tokenizer and parses it as a TTL. 467 * @return The next token in the stream, as an unsigned 32 bit integer. 468 * @throws TextParseException The input was not valid. 469 * @throws IOException An I/O error occurred. 470 * @see TTL 471 */ 472 public long 473 getTTL() throws IOException { 474 String next = _getIdentifier("a TTL value"); 475 try { 476 return TTL.parseTTL(next); 477 } 478 catch (NumberFormatException e) { 479 throw exception("expected a TTL value"); 480 } 481 } 482 483 /** 484 * Gets the next token from a tokenizer and parses it as if it were a TTL. 485 * @return The next token in the stream, as an unsigned 32 bit integer. 486 * @throws TextParseException The input was not valid. 487 * @throws IOException An I/O error occurred. 488 * @see TTL 489 */ 490 public long 491 getTTLLike() throws IOException { 492 String next = _getIdentifier("a TTL-like value"); 493 try { 494 return TTL.parse(next, false); 495 } 496 catch (NumberFormatException e) { 497 throw exception("expected a TTL-like value"); 498 } 499 } 500 501 /** 502 * Gets the next token from a tokenizer and converts it to a name. 503 * @param origin The origin to append to relative names. 504 * @return The next token in the stream, as a name. 505 * @throws TextParseException The input was invalid or not a valid name. 506 * @throws IOException An I/O error occurred. 507 * @throws RelativeNameException The parsed name was relative, even with the 508 * origin. 509 * @see Name 510 */ 511 public Name 512 getName(Name origin) throws IOException { 513 String next = _getIdentifier("a name"); 514 try { 515 Name name = Name.fromString(next, origin); 516 if (!name.isAbsolute()) 517 throw new RelativeNameException(name); 518 return name; 519 } 520 catch (TextParseException e) { 521 throw exception(e.getMessage()); 522 } 523 } 524 525 /** 526 * Gets the next token from a tokenizer and converts it to an IP Address. 527 * @param family The address family. 528 * @return The next token in the stream, as an InetAddress 529 * @throws TextParseException The input was invalid or not a valid address. 530 * @throws IOException An I/O error occurred. 531 * @see Address 532 */ 533 public InetAddress 534 getAddress(int family) throws IOException { 535 String next = _getIdentifier("an address"); 536 try { 537 return Address.getByAddress(next, family); 538 } 539 catch (UnknownHostException e) { 540 throw exception(e.getMessage()); 541 } 542 } 543 544 /** 545 * Gets the next token from a tokenizer, which must be an EOL or EOF. 546 * @throws TextParseException The input was invalid or not an EOL or EOF token. 547 * @throws IOException An I/O error occurred. 548 */ 549 public void 550 getEOL() throws IOException { 551 Token next = get(); 552 if (next.type != EOL && next.type != EOF) { 553 throw exception("expected EOL or EOF"); 554 } 555 } 556 557 /** 558 * Returns a concatenation of the remaining strings from a Tokenizer. 559 */ 560 private String 561 remainingStrings() throws IOException { 562 StringBuffer buffer = null; 563 while (true) { 564 Tokenizer.Token t = get(); 565 if (!t.isString()) 566 break; 567 if (buffer == null) 568 buffer = new StringBuffer(); 569 buffer.append(t.value); 570 } 571 unget(); 572 if (buffer == null) 573 return null; 574 return buffer.toString(); 575 } 576 577 /** 578 * Gets the remaining string tokens until an EOL/EOF is seen, concatenates 579 * them together, and converts the base64 encoded data to a byte array. 580 * @param required If true, an exception will be thrown if no strings remain; 581 * otherwise null be be returned. 582 * @return The byte array containing the decoded strings, or null if there 583 * were no strings to decode. 584 * @throws TextParseException The input was invalid. 585 * @throws IOException An I/O error occurred. 586 */ 587 public byte [] 588 getBase64(boolean required) throws IOException { 589 String s = remainingStrings(); 590 if (s == null) { 591 if (required) 592 throw exception("expected base64 encoded string"); 593 else 594 return null; 595 } 596 byte [] array = base64.fromString(s); 597 if (array == null) 598 throw exception("invalid base64 encoding"); 599 return array; 600 } 601 602 /** 603 * Gets the remaining string tokens until an EOL/EOF is seen, concatenates 604 * them together, and converts the base64 encoded data to a byte array. 605 * @return The byte array containing the decoded strings, or null if there 606 * were no strings to decode. 607 * @throws TextParseException The input was invalid. 608 * @throws IOException An I/O error occurred. 609 */ 610 public byte [] 611 getBase64() throws IOException { 612 return getBase64(false); 613 } 614 615 /** 616 * Gets the remaining string tokens until an EOL/EOF is seen, concatenates 617 * them together, and converts the hex encoded data to a byte array. 618 * @param required If true, an exception will be thrown if no strings remain; 619 * otherwise null be be returned. 620 * @return The byte array containing the decoded strings, or null if there 621 * were no strings to decode. 622 * @throws TextParseException The input was invalid. 623 * @throws IOException An I/O error occurred. 624 */ 625 public byte [] 626 getHex(boolean required) throws IOException { 627 String s = remainingStrings(); 628 if (s == null) { 629 if (required) 630 throw exception("expected hex encoded string"); 631 else 632 return null; 633 } 634 byte [] array = base16.fromString(s); 635 if (array == null) 636 throw exception("invalid hex encoding"); 637 return array; 638 } 639 640 /** 641 * Gets the remaining string tokens until an EOL/EOF is seen, concatenates 642 * them together, and converts the hex encoded data to a byte array. 643 * @return The byte array containing the decoded strings, or null if there 644 * were no strings to decode. 645 * @throws TextParseException The input was invalid. 646 * @throws IOException An I/O error occurred. 647 */ 648 public byte [] 649 getHex() throws IOException { 650 return getHex(false); 651 } 652 653 /** 654 * Gets the next token from a tokenizer and decodes it as hex. 655 * @return The byte array containing the decoded string. 656 * @throws TextParseException The input was invalid. 657 * @throws IOException An I/O error occurred. 658 */ 659 public byte [] 660 getHexString() throws IOException { 661 String next = _getIdentifier("a hex string"); 662 byte [] array = base16.fromString(next); 663 if (array == null) 664 throw exception("invalid hex encoding"); 665 return array; 666 } 667 668 /** 669 * Gets the next token from a tokenizer and decodes it as base32. 670 * @param b32 The base32 context to decode with. 671 * @return The byte array containing the decoded string. 672 * @throws TextParseException The input was invalid. 673 * @throws IOException An I/O error occurred. 674 */ 675 public byte [] 676 getBase32String(base32 b32) throws IOException { 677 String next = _getIdentifier("a base32 string"); 678 byte [] array = b32.fromString(next); 679 if (array == null) 680 throw exception("invalid base32 encoding"); 681 return array; 682 } 683 684 /** 685 * Creates an exception which includes the current state in the error message 686 * @param s The error message to include. 687 * @return The exception to be thrown 688 */ 689 public TextParseException 690 exception(String s) { 691 return new TokenizerException(filename, line, s); 692 } 693 694 /** 695 * Closes any files opened by this tokenizer. 696 */ 697 public void 698 close() { 699 if (wantClose) { 700 try { 701 is.close(); 702 } 703 catch (IOException e) { 704 } 705 } 706 } 707 708 protected void 709 finalize() { 710 close(); 711 } 712 713 } 714