1 /* 2 * Copyright (C) 2010 Google Inc. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.google.clearsilver.jsilver.data; 18 19 import com.google.clearsilver.jsilver.resourceloader.ResourceLoader; 20 21 import java.io.IOException; 22 import java.io.LineNumberReader; 23 import java.io.Reader; 24 import java.util.ArrayList; 25 import java.util.Iterator; 26 import java.util.Stack; 27 28 /** 29 * Parser for HDF based on the following grammar by Brandon Long. 30 * 31 * COMMAND := (INCLUDE | COMMENT | HDF_SET | HDF_DESCEND | HDF_ASCEND ) INCLUDE := #include 32 * "FILENAME" EOL COMMENT := # .* EOL HDF_DESCEND := HDF_NAME_ATTRS { EOL HDF_ASCEND := } EOL 33 * HDF_SET := (HDF_ASSIGN | HDF_MULTILINE_ASSIGN | HDF_COPY | HDF_LINK) HDF_ASSIGN := HDF_NAME_ATTRS 34 * = .* EOL HDF_MULTILINE_ASSIGN := HDF_NAME_ATTRS << EOM_MARKER EOL (.* EOL)* EOM_MARKER EOL 35 * HDF_COPY := HDF_NAME_ATTRS := HDF_NAME EOL HDF_LINK := HDF_NAME_ATTRS : HDF_NAME EOL 36 * HDF_NAME_ATTRS := (HDF_NAME | HDF_NAME [HDF_ATTRS]) HDF_ATTRS := (HDF_ATTR | HDF_ATTR, HDF_ATTRS) 37 * HDF_ATTR := (HDF_ATTR_KEY | HDF_ATTR_KEY = [^\s,\]]+ | HDF_ATTR_KEY = DQUOTED_STRING) 38 * HDF_ATTR_KEY := [0-9a-zA-Z]+ DQUOTED_STRING := "([^\\"]|\\[ntr]|\\.)*" HDF_NAME := (HDF_SUB_NAME 39 * | HDF_SUB_NAME\.HDF_NAME) HDF_SUB_NAME := [0-9a-zA-Z_]+ EOM_MARKER := \S.*\S EOL := \n 40 */ 41 public class NewHdfParser implements Parser { 42 43 private final StringInternStrategy internStrategy; 44 45 /** 46 * Special exception used to detect when we unexpectedly run out of characters on the line. 47 */ 48 private static class OutOfCharsException extends Exception {} 49 50 /** 51 * Object used to hold the name and attributes of an HDF node before we are ready to commit it to 52 * the Data object. 53 */ 54 private static class HdfNameAttrs { 55 String name; 56 ArrayList<String> attrs = null; 57 int endOfSequence; 58 59 void reset(String newname) { 60 // TODO: think about moving interning here instead of parser code 61 this.name = newname; 62 if (attrs != null) { 63 attrs.clear(); 64 } 65 endOfSequence = 0; 66 } 67 68 void addAttribute(String key, String value) { 69 if (attrs == null) { 70 attrs = new ArrayList<String>(10); 71 } 72 attrs.ensureCapacity(attrs.size() + 2); 73 // TODO: think about moving interning here instead of parser code 74 attrs.add(key); 75 attrs.add(value); 76 } 77 78 Data toData(Data data) { 79 Data child = data.createChild(name); 80 if (attrs != null) { 81 Iterator<String> it = attrs.iterator(); 82 while (it.hasNext()) { 83 String key = it.next(); 84 String value = it.next(); 85 child.setAttribute(key, value); 86 } 87 } 88 return child; 89 } 90 } 91 92 static final String UNNAMED_INPUT = "[UNNAMED_INPUT]"; 93 94 /** 95 * State information that we pass through the parse methods. Allows parser to be reentrant as all 96 * the state is passed through method calls. 97 */ 98 static class ParseState { 99 final Stack<Data> context = new Stack<Data>(); 100 final Data output; 101 final LineNumberReader lineReader; 102 final ErrorHandler errorHandler; 103 final ResourceLoader resourceLoader; 104 final NewHdfParser hdfParser; 105 final boolean ignoreAttributes; 106 final HdfNameAttrs hdfNameAttrs; 107 final UniqueStack<String> includeStack; 108 final String parsedFileName; 109 110 String line; 111 Data currentNode; 112 113 private ParseState(Data output, LineNumberReader lineReader, ErrorHandler errorHandler, 114 ResourceLoader resourceLoader, NewHdfParser hdfParser, String parsedFileName, 115 boolean ignoreAttributes, HdfNameAttrs hdfNameAttrs, UniqueStack<String> includeStack) { 116 this.lineReader = lineReader; 117 this.errorHandler = errorHandler; 118 this.output = output; 119 currentNode = output; 120 this.resourceLoader = resourceLoader; 121 this.hdfParser = hdfParser; 122 this.parsedFileName = parsedFileName; 123 this.ignoreAttributes = ignoreAttributes; 124 this.hdfNameAttrs = hdfNameAttrs; 125 this.includeStack = includeStack; 126 } 127 128 public static ParseState createNewParseState(Data output, Reader reader, 129 ErrorHandler errorHandler, ResourceLoader resourceLoader, NewHdfParser hdfParser, 130 String parsedFileName, boolean ignoreAttributes) { 131 132 if (parsedFileName == null) { 133 parsedFileName = UNNAMED_INPUT; 134 } 135 UniqueStack<String> includeStack = new UniqueStack<String>(); 136 includeStack.push(parsedFileName); 137 138 return new ParseState(output, new LineNumberReader(reader), errorHandler, resourceLoader, 139 hdfParser, parsedFileName, ignoreAttributes, new HdfNameAttrs(), includeStack); 140 } 141 142 public static ParseState createParseStateForIncludedFile(ParseState originalState, 143 String includeFileName, Reader includeFileReader) { 144 return new ParseState(originalState.output, new LineNumberReader(includeFileReader), 145 originalState.errorHandler, originalState.resourceLoader, originalState.hdfParser, 146 originalState.parsedFileName, originalState.ignoreAttributes, new HdfNameAttrs(), 147 originalState.includeStack); 148 } 149 } 150 151 152 /** 153 * Constructor for {@link NewHdfParser}. 154 * 155 * @param internPool - {@link StringInternStrategy} instance used to optimize the HDF parsing. 156 */ 157 public NewHdfParser(StringInternStrategy internPool) { 158 this.internStrategy = internPool; 159 } 160 161 private static class NewHdfParserFactory implements ParserFactory { 162 private final StringInternStrategy stringInternStrategy; 163 164 public NewHdfParserFactory(StringInternStrategy stringInternStrategy) { 165 this.stringInternStrategy = stringInternStrategy; 166 } 167 168 @Override 169 public Parser newInstance() { 170 return new NewHdfParser(stringInternStrategy); 171 } 172 } 173 174 /** 175 * Creates a {@link ParserFactory} instance. 176 * 177 * <p> 178 * Provided {@code stringInternStrategy} instance will be used by shared all {@link Parser} 179 * objects created by the factory and used to optimize the HDF parsing process by reusing the 180 * String for keys and values. 181 * 182 * @param stringInternStrategy - {@link StringInternStrategy} instance used to optimize the HDF 183 * parsing. 184 * @return an instance of {@link ParserFactory} implementation. 185 */ 186 public static ParserFactory newFactory(StringInternStrategy stringInternStrategy) { 187 return new NewHdfParserFactory(stringInternStrategy); 188 } 189 190 public void parse(Reader reader, Data output, Parser.ErrorHandler errorHandler, 191 ResourceLoader resourceLoader, String dataFileName, boolean ignoreAttributes) 192 throws IOException { 193 194 parse(ParseState.createNewParseState(output, reader, errorHandler, resourceLoader, this, 195 dataFileName, ignoreAttributes)); 196 } 197 198 private void parse(ParseState state) throws IOException { 199 while ((state.line = state.lineReader.readLine()) != null) { 200 String seq = stripWhitespace(state.line); 201 try { 202 parseCommand(seq, state); 203 } catch (OutOfCharsException e) { 204 reportError(state, "End of line was prematurely reached. Parse error."); 205 } 206 } 207 } 208 209 private static final String INCLUDE_WS = "#include "; 210 211 private void parseCommand(String seq, ParseState state) throws IOException, OutOfCharsException { 212 if (seq.length() == 0) { 213 // Empty line. 214 return; 215 } 216 if (charAt(seq, 0) == '#') { 217 // If there isn't a match on include then this is a comment and we do nothing. 218 if (matches(seq, 0, INCLUDE_WS)) { 219 // This is an include command 220 int start = skipLeadingWhitespace(seq, INCLUDE_WS.length()); 221 parseInclude(seq, start, state); 222 } 223 return; 224 } else if (charAt(seq, 0) == '}') { 225 if (skipLeadingWhitespace(seq, 1) != seq.length()) { 226 reportError(state, "Extra chars after '}'"); 227 return; 228 } 229 handleAscend(state); 230 } else { 231 parseHdfElement(seq, state); 232 } 233 } 234 235 private void parseInclude(String seq, int start, ParseState state) throws IOException, 236 OutOfCharsException { 237 int end = seq.length(); 238 if (charAt(seq, start) == '"') { 239 if (charAt(seq, end - 1) == '"') { 240 start++; 241 end--; 242 } else { 243 reportError(state, "Missing '\"' at end of include"); 244 return; 245 } 246 } 247 handleInclude(seq.substring(start, end), state); 248 } 249 250 private static final int NO_MATCH = -1; 251 252 private void parseHdfElement(String seq, ParseState state) throws IOException, 253 OutOfCharsException { 254 // Re-use a single element to avoid repeated allocations/trashing (serious 255 // performance impact, 5% of real service performance) 256 HdfNameAttrs element = state.hdfNameAttrs; 257 if (!parseHdfNameAttrs(element, seq, 0, state)) { 258 return; 259 } 260 int index = skipLeadingWhitespace(seq, element.endOfSequence); 261 switch (charAt(seq, index)) { 262 case '{': 263 // Descend 264 if (index + 1 != seq.length()) { 265 reportError(state, "No characters expected after '{'"); 266 return; 267 } 268 handleDescend(state, element); 269 return; 270 case '=': 271 // Assignment 272 index = skipLeadingWhitespace(seq, index + 1); 273 String value = internStrategy.intern(seq.substring(index, seq.length())); 274 handleAssign(state, element, value); 275 return; 276 case ':': 277 if (charAt(seq, index + 1) == '=') { 278 // Copy 279 index = skipLeadingWhitespace(seq, index + 2); 280 String src = parseHdfName(seq, index); 281 if (src == null) { 282 reportError(state, "Invalid HDF name"); 283 return; 284 } 285 if (index + src.length() != seq.length()) { 286 reportError(state, "No characters expected after '{'"); 287 return; 288 } 289 handleCopy(state, element, src); 290 } else { 291 // Link 292 index = skipLeadingWhitespace(seq, index + 1); 293 String src = parseHdfName(seq, index); 294 if (src == null) { 295 reportError(state, "Invalid HDF name"); 296 return; 297 } 298 if (index + src.length() != seq.length()) { 299 reportError(state, "No characters expected after '{'"); 300 return; 301 } 302 handleLink(state, element, src); 303 } 304 return; 305 case '<': 306 if (charAt(seq, index + 1) != '<') { 307 reportError(state, "Expected '<<'"); 308 } 309 index = skipLeadingWhitespace(seq, index + 2); 310 String eomMarker = seq.substring(index, seq.length()); 311 // TODO: think about moving interning to handleAssign() 312 String multilineValue = internStrategy.intern(parseMultilineValue(state, eomMarker)); 313 if (multilineValue == null) { 314 return; 315 } 316 handleAssign(state, element, multilineValue); 317 return; 318 default: 319 reportError(state, "No valid operator"); 320 return; 321 } 322 } 323 324 /** 325 * This method parses out an HDF element name and any optional attributes into a caller-supplied 326 * HdfNameAttrs object. It returns a {@code boolean} with whether it succeeded to parse. 327 */ 328 private boolean parseHdfNameAttrs(HdfNameAttrs destination, String seq, int index, 329 ParseState state) throws OutOfCharsException { 330 String hdfName = parseHdfName(seq, index); 331 if (hdfName == null) { 332 reportError(state, "Invalid HDF name"); 333 return false; 334 } 335 destination.reset(hdfName); 336 index = skipLeadingWhitespace(seq, index + hdfName.length()); 337 int end = parseAttributes(seq, index, state, destination); 338 if (end == NO_MATCH) { 339 // Error already reported below. 340 return false; 341 } else { 342 destination.endOfSequence = end; 343 return true; 344 } 345 } 346 347 /** 348 * Parses a valid hdf path name. 349 */ 350 private String parseHdfName(String seq, int index) throws OutOfCharsException { 351 int end = index; 352 while (end < seq.length() && isHdfNameChar(charAt(seq, end))) { 353 end++; 354 } 355 if (end == index) { 356 return null; 357 } 358 return internStrategy.intern(seq.substring(index, end)); 359 } 360 361 /** 362 * Looks for optional attributes and adds them to the HdfNameAttrs object passed into the method. 363 */ 364 private int parseAttributes(String seq, int index, ParseState state, HdfNameAttrs element) 365 throws OutOfCharsException { 366 if (charAt(seq, index) != '[') { 367 // No attributes to parse 368 return index; 369 } 370 index = skipLeadingWhitespace(seq, index + 1); 371 372 // If we don't care about attributes, just skip over them. 373 if (state.ignoreAttributes) { 374 while (charAt(seq, index) != ']') { 375 index++; 376 } 377 return index + 1; 378 } 379 380 boolean first = true; 381 do { 382 if (first) { 383 first = false; 384 } else if (charAt(seq, index) == ',') { 385 index = skipLeadingWhitespace(seq, index + 1); 386 } else { 387 reportError(state, "Error parsing attribute list"); 388 } 389 index = parseAttribute(seq, index, state, element); 390 if (index == NO_MATCH) { 391 // reportError called by parseAttribute already. 392 return NO_MATCH; 393 } 394 index = skipLeadingWhitespace(seq, index); 395 } while (charAt(seq, index) != ']'); 396 return index + 1; 397 } 398 399 private static final String DEFAULT_ATTR_VALUE = "1"; 400 401 /** 402 * Parse out a single HDF attribute. If there is no explicit value, use default value of "1" like 403 * in C clearsilver. Returns NO_MATCH if it fails to parse an attribute. 404 */ 405 private int parseAttribute(String seq, int index, ParseState state, HdfNameAttrs element) 406 throws OutOfCharsException { 407 int end = parseAttributeKey(seq, index); 408 if (index == end) { 409 reportError(state, "No valid attribute key"); 410 return NO_MATCH; 411 } 412 String attrKey = internStrategy.intern(seq.substring(index, end)); 413 index = skipLeadingWhitespace(seq, end); 414 if (charAt(seq, index) != '=') { 415 // No value for this attribute key. Use default value of "1" 416 element.addAttribute(attrKey, DEFAULT_ATTR_VALUE); 417 return index; 418 } 419 // We need to parse out the attribute value. 420 index = skipLeadingWhitespace(seq, index + 1); 421 if (charAt(seq, index) == '"') { 422 index++; 423 StringBuilder sb = new StringBuilder(); 424 end = parseQuotedAttributeValue(seq, index, sb); 425 if (end == NO_MATCH) { 426 reportError(state, "Unable to parse quoted attribute value"); 427 return NO_MATCH; 428 } 429 String attrValue = internStrategy.intern(sb.toString()); 430 element.addAttribute(attrKey, attrValue); 431 end++; 432 } else { 433 // Simple attribute that has no whitespace. 434 String attrValue = parseAttributeValue(seq, index, state); 435 if (attrValue == null || attrValue.length() == 0) { 436 reportError(state, "No attribute for key " + attrKey); 437 return NO_MATCH; 438 } 439 440 attrValue = internStrategy.intern(attrValue); 441 element.addAttribute(attrKey, attrValue); 442 end = index + attrValue.length(); 443 } 444 return end; 445 } 446 447 /** 448 * Returns the range in the sequence starting at start that corresponds to a valid attribute key. 449 */ 450 private int parseAttributeKey(String seq, int index) throws OutOfCharsException { 451 while (isAlphaNumericChar(charAt(seq, index))) { 452 index++; 453 } 454 return index; 455 } 456 457 /** 458 * Parses a quoted attribute value. Unescapes octal characters and \n, \r, \t, \", etc. 459 */ 460 private int parseQuotedAttributeValue(String seq, int index, StringBuilder sb) 461 throws OutOfCharsException { 462 char c; 463 while ((c = charAt(seq, index)) != '"') { 464 if (c == '\\') { 465 // Escaped character. Look for 1 to 3 digits in a row as octal or n,t,r. 466 index++; 467 char next = charAt(seq, index); 468 if (isNumericChar(next)) { 469 // Parse the next 1 to 3 characters if they are digits. Treat it as an octal code. 470 int val = next - '0'; 471 if (isNumericChar(charAt(seq, index + 1))) { 472 index++; 473 val = val * 8 + (charAt(seq, index) - '0'); 474 if (isNumericChar(charAt(seq, index + 1))) { 475 index++; 476 val = val * 8 + (charAt(seq, index) - '0'); 477 } 478 } 479 c = (char) val; 480 } else if (next == 'n') { 481 c = '\n'; 482 } else if (next == 't') { 483 c = '\t'; 484 } else if (next == 'r') { 485 c = '\r'; 486 } else { 487 // Regular escaped char like " or / 488 c = next; 489 } 490 } 491 sb.append(c); 492 index++; 493 } 494 return index; 495 } 496 497 /** 498 * Parses a simple attribute value that cannot have any whitespace or specific punctuation 499 * reserved by the HDF grammar. 500 */ 501 private String parseAttributeValue(String seq, int index, ParseState state) 502 throws OutOfCharsException { 503 int end = index; 504 char c = charAt(seq, end); 505 while (c != ',' && c != ']' && c != '"' && !Character.isWhitespace(c)) { 506 end++; 507 c = charAt(seq, end); 508 } 509 return seq.substring(index, end); 510 } 511 512 private String parseMultilineValue(ParseState state, String eomMarker) throws IOException { 513 StringBuilder sb = new StringBuilder(256); 514 String line; 515 while ((line = state.lineReader.readLine()) != null) { 516 if (line.startsWith(eomMarker) 517 && skipLeadingWhitespace(line, eomMarker.length()) == line.length()) { 518 return sb.toString(); 519 } else { 520 sb.append(line).append('\n'); 521 } 522 } 523 reportError(state, "EOM " + eomMarker + " never found"); 524 return null; 525 } 526 527 // ////////////////////////////////////////////////////////////////////////// 528 // 529 // Handlers 530 531 private void handleDescend(ParseState state, HdfNameAttrs element) { 532 Data child = handleNodeCreation(state.currentNode, element); 533 state.context.push(state.currentNode); 534 state.currentNode = child; 535 } 536 537 private Data handleNodeCreation(Data node, HdfNameAttrs element) { 538 return element.toData(node); 539 } 540 541 private void handleAssign(ParseState state, HdfNameAttrs element, String value) { 542 // TODO: think about moving interning here 543 Data child = handleNodeCreation(state.currentNode, element); 544 child.setValue(value); 545 } 546 547 private void handleCopy(ParseState state, HdfNameAttrs element, String srcName) { 548 Data child = handleNodeCreation(state.currentNode, element); 549 Data src = state.output.getChild(srcName); 550 if (src != null) { 551 child.setValue(src.getValue()); 552 } else { 553 child.setValue(""); 554 } 555 } 556 557 private void handleLink(ParseState state, HdfNameAttrs element, String srcName) { 558 Data child = handleNodeCreation(state.currentNode, element); 559 child.setSymlink(state.output.createChild(srcName)); 560 } 561 562 private void handleAscend(ParseState state) { 563 if (state.context.isEmpty()) { 564 reportError(state, "Too many '}'"); 565 return; 566 } 567 state.currentNode = state.context.pop(); 568 } 569 570 private void handleInclude(String seq, ParseState state) throws IOException { 571 String includeFileName = internStrategy.intern(seq); 572 573 // Load the file 574 Reader reader = state.resourceLoader.open(includeFileName); 575 if (reader == null) { 576 reportError(state, "Unable to find file " + includeFileName); 577 return; 578 } 579 580 // Check whether we are in include loop 581 if (!state.includeStack.push(includeFileName)) { 582 reportError(state, createIncludeStackTraceMessage(state.includeStack, includeFileName)); 583 return; 584 } 585 586 // Parse the file 587 state.hdfParser.parse(ParseState 588 .createParseStateForIncludedFile(state, includeFileName, reader)); 589 590 if (!includeFileName.equals(state.includeStack.pop())) { 591 // Include stack trace is corrupted 592 throw new IllegalStateException("Unable to find on include stack: " + includeFileName); 593 } 594 } 595 596 private String createIncludeStackTraceMessage(UniqueStack<String> includeStack, 597 String includeFileName) { 598 StringBuilder message = new StringBuilder(); 599 message.append("File included twice: "); 600 message.append(includeFileName); 601 602 message.append(" Include stack: "); 603 for (String fileName : includeStack) { 604 message.append(fileName); 605 message.append(" -> "); 606 } 607 message.append(includeFileName); 608 return message.toString(); 609 } 610 611 // ///////////////////////////////////////////////////////////////////////// 612 // 613 // Character values 614 615 private static boolean isNumericChar(char c) { 616 if ('0' <= c && c <= '9') { 617 return true; 618 } else { 619 return false; 620 } 621 } 622 623 private static boolean isAlphaNumericChar(char c) { 624 if (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || ('0' <= c && c <= '9')) { 625 return true; 626 } else { 627 return false; 628 } 629 } 630 631 private static boolean isHdfNameChar(char c) { 632 if (isAlphaNumericChar(c) || c == '_' || c == '.') { 633 return true; 634 } else { 635 return false; 636 } 637 } 638 639 private static String stripWhitespace(String seq) { 640 int start = skipLeadingWhitespace(seq, 0); 641 int end = seq.length() - 1; 642 while (end > start && Character.isWhitespace(seq.charAt(end))) { 643 --end; 644 } 645 if (start == 0 && end == seq.length() - 1) { 646 return seq; 647 } else { 648 return seq.substring(start, end + 1); 649 } 650 } 651 652 private static int skipLeadingWhitespace(String seq, int index) { 653 while (index < seq.length() && Character.isWhitespace(seq.charAt(index))) { 654 index++; 655 } 656 return index; 657 } 658 659 /** 660 * Determines if a character sequence appears in the given sequence starting at a specified index. 661 * 662 * @param seq the sequence that we want to see if it contains the string match. 663 * @param start the index into seq where we want to check for match 664 * @param match the String we want to look for in the sequence. 665 * @return {@code true} if the string match appears in seq starting at the index start, {@code 666 * false} otherwise. 667 */ 668 private static boolean matches(String seq, int start, String match) { 669 if (seq.length() - start < match.length()) { 670 return false; 671 } 672 for (int i = 0; i < match.length(); i++) { 673 if (match.charAt(i) != seq.charAt(start + i)) { 674 return false; 675 } 676 } 677 return true; 678 } 679 680 /** 681 * Reads the character at the specified index in the given String. Throws an exception to be 682 * caught above if the index is out of range. 683 */ 684 private static char charAt(String seq, int index) throws OutOfCharsException { 685 if (0 <= index && index < seq.length()) { 686 return seq.charAt(index); 687 } else { 688 throw new OutOfCharsException(); 689 } 690 } 691 692 693 private static void reportError(ParseState state, String errorMessage) { 694 if (state.errorHandler != null) { 695 state.errorHandler.error(state.lineReader.getLineNumber(), state.line, state.parsedFileName, 696 errorMessage); 697 } else { 698 throw new RuntimeException("Parse Error on line " + state.lineReader.getLineNumber() + ": " 699 + errorMessage + " : " + state.line); 700 } 701 } 702 } 703