Home | History | Annotate | Download | only in DNS
      1 // Copyright (c) 2003-2004 Brian Wellington (bwelling (at) xbill.org)
      2 //
      3 // Copyright (C) 2003-2004 Nominum, Inc.
      4 //
      5 // Permission to use, copy, modify, and distribute this software for any
      6 // purpose with or without fee is hereby granted, provided that the above
      7 // copyright notice and this permission notice appear in all copies.
      8 //
      9 // THE SOFTWARE IS PROVIDED "AS IS" AND NOMINUM DISCLAIMS ALL WARRANTIES
     10 // WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
     11 // MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL NOMINUM BE LIABLE FOR ANY
     12 // SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
     13 // WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
     14 // ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
     15 // OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
     16 //
     17 
     18 package org.xbill.DNS;
     19 
     20 import java.io.*;
     21 import java.net.*;
     22 
     23 import org.xbill.DNS.utils.*;
     24 
     25 /**
     26  * Tokenizer is used to parse DNS records and zones from text format,
     27  *
     28  * @author Brian Wellington
     29  * @author Bob Halley
     30  */
     31 
     32 public class Tokenizer {
     33 
     34 private static String delim = " \t\n;()\"";
     35 private static String quotes = "\"";
     36 
     37 /** End of file */
     38 public static final int EOF		= 0;
     39 
     40 /** End of line */
     41 public static final int EOL		= 1;
     42 
     43 /** Whitespace; only returned when wantWhitespace is set */
     44 public static final int WHITESPACE	= 2;
     45 
     46 /** An identifier (unquoted string) */
     47 public static final int IDENTIFIER	= 3;
     48 
     49 /** A quoted string */
     50 public static final int QUOTED_STRING	= 4;
     51 
     52 /** A comment; only returned when wantComment is set */
     53 public static final int COMMENT		= 5;
     54 
     55 private PushbackInputStream is;
     56 private boolean ungottenToken;
     57 private int multiline;
     58 private boolean quoting;
     59 private String delimiters;
     60 private Token current;
     61 private StringBuffer sb;
     62 private boolean wantClose;
     63 
     64 private String filename;
     65 private int line;
     66 
     67 public static class Token {
     68 	/** The type of token. */
     69 	public int type;
     70 
     71 	/** The value of the token, or null for tokens without values. */
     72 	public String value;
     73 
     74 	private
     75 	Token() {
     76 		type = -1;
     77 		value = null;
     78 	}
     79 
     80 	private Token
     81 	set(int type, StringBuffer value) {
     82 		if (type < 0)
     83 			throw new IllegalArgumentException();
     84 		this.type = type;
     85 		this.value = value == null ? null : value.toString();
     86 		return this;
     87 	}
     88 
     89 	/**
     90 	 * Converts the token to a string containing a representation useful
     91 	 * for debugging.
     92 	 */
     93 	public String
     94 	toString() {
     95 		switch (type) {
     96 		case EOF:
     97 			return "<eof>";
     98 		case EOL:
     99 			return "<eol>";
    100 		case WHITESPACE:
    101 			return "<whitespace>";
    102 		case IDENTIFIER:
    103 			return "<identifier: " + value + ">";
    104 		case QUOTED_STRING:
    105 			return "<quoted_string: " + value + ">";
    106 		case COMMENT:
    107 			return "<comment: " + value + ">";
    108 		default:
    109 			return "<unknown>";
    110 		}
    111 	}
    112 
    113 	/** Indicates whether this token contains a string. */
    114 	public boolean
    115 	isString() {
    116 		return (type == IDENTIFIER || type == QUOTED_STRING);
    117 	}
    118 
    119 	/** Indicates whether this token contains an EOL or EOF. */
    120 	public boolean
    121 	isEOL() {
    122 		return (type == EOL || type == EOF);
    123 	}
    124 }
    125 
    126 static class TokenizerException extends TextParseException {
    127 	String message;
    128 
    129 	public
    130 	TokenizerException(String filename, int line, String message) {
    131 		super(filename + ":" + line + ": " + message);
    132 		this.message = message;
    133 	}
    134 
    135 	public String
    136 	getBaseMessage() {
    137 		return message;
    138 	}
    139 }
    140 
    141 /**
    142  * Creates a Tokenizer from an arbitrary input stream.
    143  * @param is The InputStream to tokenize.
    144  */
    145 public
    146 Tokenizer(InputStream is) {
    147 	if (!(is instanceof BufferedInputStream))
    148 		is = new BufferedInputStream(is);
    149 	this.is = new PushbackInputStream(is, 2);
    150 	ungottenToken = false;
    151 	multiline = 0;
    152 	quoting = false;
    153 	delimiters = delim;
    154 	current = new Token();
    155 	sb = new StringBuffer();
    156 	filename = "<none>";
    157 	line = 1;
    158 }
    159 
    160 /**
    161  * Creates a Tokenizer from a string.
    162  * @param s The String to tokenize.
    163  */
    164 public
    165 Tokenizer(String s) {
    166 	this(new ByteArrayInputStream(s.getBytes()));
    167 }
    168 
    169 /**
    170  * Creates a Tokenizer from a file.
    171  * @param f The File to tokenize.
    172  */
    173 public
    174 Tokenizer(File f) throws FileNotFoundException {
    175 	this(new FileInputStream(f));
    176 	wantClose = true;
    177 	filename = f.getName();
    178 }
    179 
    180 private int
    181 getChar() throws IOException {
    182 	int c = is.read();
    183 	if (c == '\r') {
    184 		int next = is.read();
    185 		if (next != '\n')
    186 			is.unread(next);
    187 		c = '\n';
    188 	}
    189 	if (c == '\n')
    190 		line++;
    191 	return c;
    192 }
    193 
    194 private void
    195 ungetChar(int c) throws IOException {
    196 	if (c == -1)
    197 		return;
    198 	is.unread(c);
    199 	if (c == '\n')
    200 		line--;
    201 }
    202 
    203 private int
    204 skipWhitespace() throws IOException {
    205 	int skipped = 0;
    206 	while (true) {
    207 		int c = getChar();
    208 		if (c != ' ' && c != '\t') {
    209 	                if (!(c == '\n' && multiline > 0)) {
    210 				ungetChar(c);
    211 				return skipped;
    212 			}
    213 		}
    214 		skipped++;
    215 	}
    216 }
    217 
    218 private void
    219 checkUnbalancedParens() throws TextParseException {
    220 	if (multiline > 0)
    221 		throw exception("unbalanced parentheses");
    222 }
    223 
    224 /**
    225  * Gets the next token from a tokenizer.
    226  * @param wantWhitespace If true, leading whitespace will be returned as a
    227  * token.
    228  * @param wantComment If true, comments are returned as tokens.
    229  * @return The next token in the stream.
    230  * @throws TextParseException The input was invalid.
    231  * @throws IOException An I/O error occurred.
    232  */
    233 public Token
    234 get(boolean wantWhitespace, boolean wantComment) throws IOException {
    235 	int type;
    236 	int c;
    237 
    238 	if (ungottenToken) {
    239 		ungottenToken = false;
    240 		if (current.type == WHITESPACE) {
    241 			if (wantWhitespace)
    242 				return current;
    243 		} else if (current.type == COMMENT) {
    244 			if (wantComment)
    245 				return current;
    246 		} else {
    247 			if (current.type == EOL)
    248 				line++;
    249 			return current;
    250 		}
    251 	}
    252 	int skipped = skipWhitespace();
    253 	if (skipped > 0 && wantWhitespace)
    254 		return current.set(WHITESPACE, null);
    255 	type = IDENTIFIER;
    256 	sb.setLength(0);
    257 	while (true) {
    258 		c = getChar();
    259 		if (c == -1 || delimiters.indexOf(c) != -1) {
    260 			if (c == -1) {
    261 				if (quoting)
    262 					throw exception("EOF in " +
    263 							"quoted string");
    264 				else if (sb.length() == 0)
    265 					return current.set(EOF, null);
    266 				else
    267 					return current.set(type, sb);
    268 			}
    269 			if (sb.length() == 0 && type != QUOTED_STRING) {
    270 				if (c == '(') {
    271 					multiline++;
    272 					skipWhitespace();
    273 					continue;
    274 				} else if (c == ')') {
    275 					if (multiline <= 0)
    276 						throw exception("invalid " +
    277 								"close " +
    278 								"parenthesis");
    279 					multiline--;
    280 					skipWhitespace();
    281 					continue;
    282 				} else if (c == '"') {
    283 					if (!quoting) {
    284 						quoting = true;
    285 						delimiters = quotes;
    286 						type = QUOTED_STRING;
    287 					} else {
    288 						quoting = false;
    289 						delimiters = delim;
    290 						skipWhitespace();
    291 					}
    292 					continue;
    293 				} else if (c == '\n') {
    294 					return current.set(EOL, null);
    295 				} else if (c == ';') {
    296 					while (true) {
    297 						c = getChar();
    298 						if (c == '\n' || c == -1)
    299 							break;
    300 						sb.append((char)c);
    301 					}
    302 					if (wantComment) {
    303 						ungetChar(c);
    304 						return current.set(COMMENT, sb);
    305 					} else if (c == -1 &&
    306 						   type != QUOTED_STRING)
    307 					{
    308 						checkUnbalancedParens();
    309 						return current.set(EOF, null);
    310 					} else if (multiline > 0) {
    311 						skipWhitespace();
    312 						sb.setLength(0);
    313 						continue;
    314 					} else
    315 						return current.set(EOL, null);
    316 				} else
    317 					throw new IllegalStateException();
    318 			} else
    319 				ungetChar(c);
    320 			break;
    321 		} else if (c == '\\') {
    322 			c = getChar();
    323 			if (c == -1)
    324 				throw exception("unterminated escape sequence");
    325 			sb.append('\\');
    326 		} else if (quoting && c == '\n') {
    327 			throw exception("newline in quoted string");
    328 		}
    329 		sb.append((char)c);
    330 	}
    331 	if (sb.length() == 0 && type != QUOTED_STRING) {
    332 		checkUnbalancedParens();
    333 		return current.set(EOF, null);
    334 	}
    335 	return current.set(type, sb);
    336 }
    337 
    338 /**
    339  * Gets the next token from a tokenizer, ignoring whitespace and comments.
    340  * @return The next token in the stream.
    341  * @throws TextParseException The input was invalid.
    342  * @throws IOException An I/O error occurred.
    343  */
    344 public Token
    345 get() throws IOException {
    346 	return get(false, false);
    347 }
    348 
    349 /**
    350  * Returns a token to the stream, so that it will be returned by the next call
    351  * to get().
    352  * @throws IllegalStateException There are already ungotten tokens.
    353  */
    354 public void
    355 unget() {
    356 	if (ungottenToken)
    357 		throw new IllegalStateException
    358 				("Cannot unget multiple tokens");
    359 	if (current.type == EOL)
    360 		line--;
    361 	ungottenToken = true;
    362 }
    363 
    364 /**
    365  * Gets the next token from a tokenizer and converts it to a string.
    366  * @return The next token in the stream, as a string.
    367  * @throws TextParseException The input was invalid or not a string.
    368  * @throws IOException An I/O error occurred.
    369  */
    370 public String
    371 getString() throws IOException {
    372 	Token next = get();
    373 	if (!next.isString()) {
    374 		throw exception("expected a string");
    375 	}
    376 	return next.value;
    377 }
    378 
    379 private String
    380 _getIdentifier(String expected) throws IOException {
    381 	Token next = get();
    382 	if (next.type != IDENTIFIER)
    383 		throw exception("expected " + expected);
    384 	return next.value;
    385 }
    386 
    387 /**
    388  * Gets the next token from a tokenizer, ensures it is an unquoted string,
    389  * and converts it to a string.
    390  * @return The next token in the stream, as a string.
    391  * @throws TextParseException The input was invalid or not an unquoted string.
    392  * @throws IOException An I/O error occurred.
    393  */
    394 public String
    395 getIdentifier() throws IOException {
    396 	return _getIdentifier("an identifier");
    397 }
    398 
    399 /**
    400  * Gets the next token from a tokenizer and converts it to a long.
    401  * @return The next token in the stream, as a long.
    402  * @throws TextParseException The input was invalid or not a long.
    403  * @throws IOException An I/O error occurred.
    404  */
    405 public long
    406 getLong() throws IOException {
    407 	String next = _getIdentifier("an integer");
    408 	if (!Character.isDigit(next.charAt(0)))
    409 		throw exception("expected an integer");
    410 	try {
    411 		return Long.parseLong(next);
    412 	} catch (NumberFormatException e) {
    413 		throw exception("expected an integer");
    414 	}
    415 }
    416 
    417 /**
    418  * Gets the next token from a tokenizer and converts it to an unsigned 32 bit
    419  * integer.
    420  * @return The next token in the stream, as an unsigned 32 bit integer.
    421  * @throws TextParseException The input was invalid or not an unsigned 32
    422  * bit integer.
    423  * @throws IOException An I/O error occurred.
    424  */
    425 public long
    426 getUInt32() throws IOException {
    427 	long l = getLong();
    428 	if (l < 0 || l > 0xFFFFFFFFL)
    429 		throw exception("expected an 32 bit unsigned integer");
    430 	return l;
    431 }
    432 
    433 /**
    434  * Gets the next token from a tokenizer and converts it to an unsigned 16 bit
    435  * integer.
    436  * @return The next token in the stream, as an unsigned 16 bit integer.
    437  * @throws TextParseException The input was invalid or not an unsigned 16
    438  * bit integer.
    439  * @throws IOException An I/O error occurred.
    440  */
    441 public int
    442 getUInt16() throws IOException {
    443 	long l = getLong();
    444 	if (l < 0 || l > 0xFFFFL)
    445 		throw exception("expected an 16 bit unsigned integer");
    446 	return (int) l;
    447 }
    448 
    449 /**
    450  * Gets the next token from a tokenizer and converts it to an unsigned 8 bit
    451  * integer.
    452  * @return The next token in the stream, as an unsigned 8 bit integer.
    453  * @throws TextParseException The input was invalid or not an unsigned 8
    454  * bit integer.
    455  * @throws IOException An I/O error occurred.
    456  */
    457 public int
    458 getUInt8() throws IOException {
    459 	long l = getLong();
    460 	if (l < 0 || l > 0xFFL)
    461 		throw exception("expected an 8 bit unsigned integer");
    462 	return (int) l;
    463 }
    464 
    465 /**
    466  * Gets the next token from a tokenizer and parses it as a TTL.
    467  * @return The next token in the stream, as an unsigned 32 bit integer.
    468  * @throws TextParseException The input was not valid.
    469  * @throws IOException An I/O error occurred.
    470  * @see TTL
    471  */
    472 public long
    473 getTTL() throws IOException {
    474 	String next = _getIdentifier("a TTL value");
    475 	try {
    476 		return TTL.parseTTL(next);
    477 	}
    478 	catch (NumberFormatException e) {
    479 		throw exception("expected a TTL value");
    480 	}
    481 }
    482 
    483 /**
    484  * Gets the next token from a tokenizer and parses it as if it were a TTL.
    485  * @return The next token in the stream, as an unsigned 32 bit integer.
    486  * @throws TextParseException The input was not valid.
    487  * @throws IOException An I/O error occurred.
    488  * @see TTL
    489  */
    490 public long
    491 getTTLLike() throws IOException {
    492 	String next = _getIdentifier("a TTL-like value");
    493 	try {
    494 		return TTL.parse(next, false);
    495 	}
    496 	catch (NumberFormatException e) {
    497 		throw exception("expected a TTL-like value");
    498 	}
    499 }
    500 
    501 /**
    502  * Gets the next token from a tokenizer and converts it to a name.
    503  * @param origin The origin to append to relative names.
    504  * @return The next token in the stream, as a name.
    505  * @throws TextParseException The input was invalid or not a valid name.
    506  * @throws IOException An I/O error occurred.
    507  * @throws RelativeNameException The parsed name was relative, even with the
    508  * origin.
    509  * @see Name
    510  */
    511 public Name
    512 getName(Name origin) throws IOException {
    513 	String next = _getIdentifier("a name");
    514 	try {
    515 		Name name = Name.fromString(next, origin);
    516 		if (!name.isAbsolute())
    517 			throw new RelativeNameException(name);
    518 		return name;
    519 	}
    520 	catch (TextParseException e) {
    521 		throw exception(e.getMessage());
    522 	}
    523 }
    524 
    525 /**
    526  * Gets the next token from a tokenizer and converts it to an IP Address.
    527  * @param family The address family.
    528  * @return The next token in the stream, as an InetAddress
    529  * @throws TextParseException The input was invalid or not a valid address.
    530  * @throws IOException An I/O error occurred.
    531  * @see Address
    532  */
    533 public InetAddress
    534 getAddress(int family) throws IOException {
    535 	String next = _getIdentifier("an address");
    536 	try {
    537 		return Address.getByAddress(next, family);
    538 	}
    539 	catch (UnknownHostException e) {
    540 		throw exception(e.getMessage());
    541 	}
    542 }
    543 
    544 /**
    545  * Gets the next token from a tokenizer, which must be an EOL or EOF.
    546  * @throws TextParseException The input was invalid or not an EOL or EOF token.
    547  * @throws IOException An I/O error occurred.
    548  */
    549 public void
    550 getEOL() throws IOException {
    551 	Token next = get();
    552 	if (next.type != EOL && next.type != EOF) {
    553 		throw exception("expected EOL or EOF");
    554 	}
    555 }
    556 
    557 /**
    558  * Returns a concatenation of the remaining strings from a Tokenizer.
    559  */
    560 private String
    561 remainingStrings() throws IOException {
    562         StringBuffer buffer = null;
    563         while (true) {
    564                 Tokenizer.Token t = get();
    565                 if (!t.isString())
    566                         break;
    567                 if (buffer == null)
    568                         buffer = new StringBuffer();
    569                 buffer.append(t.value);
    570         }
    571         unget();
    572         if (buffer == null)
    573                 return null;
    574         return buffer.toString();
    575 }
    576 
    577 /**
    578  * Gets the remaining string tokens until an EOL/EOF is seen, concatenates
    579  * them together, and converts the base64 encoded data to a byte array.
    580  * @param required If true, an exception will be thrown if no strings remain;
    581  * otherwise null be be returned.
    582  * @return The byte array containing the decoded strings, or null if there
    583  * were no strings to decode.
    584  * @throws TextParseException The input was invalid.
    585  * @throws IOException An I/O error occurred.
    586  */
    587 public byte []
    588 getBase64(boolean required) throws IOException {
    589 	String s = remainingStrings();
    590 	if (s == null) {
    591 		if (required)
    592 			throw exception("expected base64 encoded string");
    593 		else
    594 			return null;
    595 	}
    596 	byte [] array = base64.fromString(s);
    597 	if (array == null)
    598 		throw exception("invalid base64 encoding");
    599 	return array;
    600 }
    601 
    602 /**
    603  * Gets the remaining string tokens until an EOL/EOF is seen, concatenates
    604  * them together, and converts the base64 encoded data to a byte array.
    605  * @return The byte array containing the decoded strings, or null if there
    606  * were no strings to decode.
    607  * @throws TextParseException The input was invalid.
    608  * @throws IOException An I/O error occurred.
    609  */
    610 public byte []
    611 getBase64() throws IOException {
    612 	return getBase64(false);
    613 }
    614 
    615 /**
    616  * Gets the remaining string tokens until an EOL/EOF is seen, concatenates
    617  * them together, and converts the hex encoded data to a byte array.
    618  * @param required If true, an exception will be thrown if no strings remain;
    619  * otherwise null be be returned.
    620  * @return The byte array containing the decoded strings, or null if there
    621  * were no strings to decode.
    622  * @throws TextParseException The input was invalid.
    623  * @throws IOException An I/O error occurred.
    624  */
    625 public byte []
    626 getHex(boolean required) throws IOException {
    627 	String s = remainingStrings();
    628 	if (s == null) {
    629 		if (required)
    630 			throw exception("expected hex encoded string");
    631 		else
    632 			return null;
    633 	}
    634 	byte [] array = base16.fromString(s);
    635 	if (array == null)
    636 		throw exception("invalid hex encoding");
    637 	return array;
    638 }
    639 
    640 /**
    641  * Gets the remaining string tokens until an EOL/EOF is seen, concatenates
    642  * them together, and converts the hex encoded data to a byte array.
    643  * @return The byte array containing the decoded strings, or null if there
    644  * were no strings to decode.
    645  * @throws TextParseException The input was invalid.
    646  * @throws IOException An I/O error occurred.
    647  */
    648 public byte []
    649 getHex() throws IOException {
    650 	return getHex(false);
    651 }
    652 
    653 /**
    654  * Gets the next token from a tokenizer and decodes it as hex.
    655  * @return The byte array containing the decoded string.
    656  * @throws TextParseException The input was invalid.
    657  * @throws IOException An I/O error occurred.
    658  */
    659 public byte []
    660 getHexString() throws IOException {
    661 	String next = _getIdentifier("a hex string");
    662 	byte [] array = base16.fromString(next);
    663 	if (array == null)
    664 		throw exception("invalid hex encoding");
    665 	return array;
    666 }
    667 
    668 /**
    669  * Gets the next token from a tokenizer and decodes it as base32.
    670  * @param b32 The base32 context to decode with.
    671  * @return The byte array containing the decoded string.
    672  * @throws TextParseException The input was invalid.
    673  * @throws IOException An I/O error occurred.
    674  */
    675 public byte []
    676 getBase32String(base32 b32) throws IOException {
    677 	String next = _getIdentifier("a base32 string");
    678 	byte [] array = b32.fromString(next);
    679 	if (array == null)
    680 		throw exception("invalid base32 encoding");
    681 	return array;
    682 }
    683 
    684 /**
    685  * Creates an exception which includes the current state in the error message
    686  * @param s The error message to include.
    687  * @return The exception to be thrown
    688  */
    689 public TextParseException
    690 exception(String s) {
    691 	return new TokenizerException(filename, line, s);
    692 }
    693 
    694 /**
    695  * Closes any files opened by this tokenizer.
    696  */
    697 public void
    698 close() {
    699 	if (wantClose) {
    700 		try {
    701 			is.close();
    702 		}
    703 		catch (IOException e) {
    704 		}
    705 	}
    706 }
    707 
    708 protected void
    709 finalize() {
    710 	close();
    711 }
    712 
    713 }
    714