Home | History | Annotate | Download | only in syntax
      1 /*
      2  * Copyright (C) 2010 Google Inc.
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  * http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package com.google.clearsilver.jsilver.syntax;
     18 
     19 import com.google.clearsilver.jsilver.syntax.analysis.DepthFirstAdapter;
     20 import com.google.clearsilver.jsilver.syntax.node.AAltCommand;
     21 import com.google.clearsilver.jsilver.syntax.node.ACallCommand;
     22 import com.google.clearsilver.jsilver.syntax.node.ADataCommand;
     23 import com.google.clearsilver.jsilver.syntax.node.ADefCommand;
     24 import com.google.clearsilver.jsilver.syntax.node.AEachCommand;
     25 import com.google.clearsilver.jsilver.syntax.node.AEscapeCommand;
     26 import com.google.clearsilver.jsilver.syntax.node.AEvarCommand;
     27 import com.google.clearsilver.jsilver.syntax.node.AIfCommand;
     28 import com.google.clearsilver.jsilver.syntax.node.ALoopCommand;
     29 import com.google.clearsilver.jsilver.syntax.node.ALoopIncCommand;
     30 import com.google.clearsilver.jsilver.syntax.node.ALoopToCommand;
     31 import com.google.clearsilver.jsilver.syntax.node.ALvarCommand;
     32 import com.google.clearsilver.jsilver.syntax.node.ANameCommand;
     33 import com.google.clearsilver.jsilver.syntax.node.ANoopCommand;
     34 import com.google.clearsilver.jsilver.syntax.node.ASetCommand;
     35 import com.google.clearsilver.jsilver.syntax.node.AUvarCommand;
     36 import com.google.clearsilver.jsilver.syntax.node.AVarCommand;
     37 import com.google.clearsilver.jsilver.syntax.node.AWithCommand;
     38 import com.google.clearsilver.jsilver.syntax.node.Start;
     39 import com.google.clearsilver.jsilver.syntax.node.TData;
     40 
     41 import java.util.ArrayList;
     42 import java.util.List;
     43 import java.util.regex.Matcher;
     44 import java.util.regex.Pattern;
     45 
     46 /**
     47  * Detects sequences of commands corresponding to a line in the template containing only structural
     48  * commands, comments or whitespace and rewrites the syntax tree to effectively remove any data
     49  * (text) associated with that line (including the trailing whitespace).
     50  * <p>
     51  * A structural command is any command that never emits any output. These come in three types:
     52  * <ul>
     53  * <li>Commands that can contain other commands (eg, "alt", "each", "escape", "if", "loop", "with",
     54  * etc...).
     55  * <li>Commands that operate on the template itself (eg, "include", "autoescape", etc...).
     56  * <li>Comments.
     57  * </ul>
     58  * <p>
     59  * This makes it much easier to write human readable templates in cases where the output format is
     60  * whitespace sensitive.
     61  * <p>
     62  * Thus the input:
     63  *
     64  * <pre>
     65  * {@literal
     66  * ----------------
     67  * Value is:
     68  * <?cs if:x>0 ?>
     69  *   positive
     70  * <?cs elif:x<0 ?>
     71  *   negative
     72  * <?cs else ?>
     73  *   zero
     74  * <?cs /if ?>.
     75  * ----------------
     76  * }
     77  * </pre>
     78  * is equivalent to:
     79  *
     80  * <pre>
     81  * {@literal
     82  * ----------------
     83  * Value is:
     84  * <?cs if:x>0 ?>  positive
     85  * <?cs elif:x<0 ?>  negative
     86  * <?cs else ?>  zero
     87  * <?cs /if ?>.
     88  * ----------------
     89  * }
     90  * </pre>
     91  * but is much easier to read.
     92  * <p>
     93  * Where data commands become empty they are replaced with Noop commands, which effectively removes
     94  * them from the tree. These can be removed (if needed) by a later optimization step but shouldn't
     95  * cause any issues.
     96  */
     97 public class StructuralWhitespaceStripper extends DepthFirstAdapter {
     98   /**
     99    * A regex snippet to match sequences of inline whitespace. The easiest way to define this is as
    100    * "not (non-space or newline)".
    101    */
    102   private static final String IWS = "[^\\S\\n]*";
    103 
    104   /** Pattern to match strings that consist only of inline whitespace. */
    105   private static final Pattern INLINE_WHITESPACE = Pattern.compile(IWS);
    106 
    107   /**
    108    * Pattern to match strings that start with arbitrary (inline) whitespace, followed by a newline.
    109    */
    110   private static final Pattern STARTS_WITH_NEWLINE = Pattern.compile("^" + IWS + "\\n");
    111 
    112   /**
    113    * Pattern to match strings that end with a newline, followed by trailing (inline) whitespace.
    114    */
    115   private static final Pattern ENDS_WITH_NEWLINE = Pattern.compile("\\n" + IWS + "$");
    116 
    117   /**
    118    * Pattern to capture the content of a string after a leading newline. Only ever used on input
    119    * that previously matched STARTS_WITH_NEWLINE.
    120    */
    121   private static final Pattern LEADING_WHITESPACE_AND_NEWLINE =
    122       Pattern.compile("^" + IWS + "\\n(.*)$", Pattern.DOTALL);
    123 
    124   /**
    125    * Pattern to capture the content of a string before a trailing newline. Note that this may have
    126    * to match text that has already had the final newline removed so we must greedily match the
    127    * whitespace rather than the content.
    128    */
    129   private static final Pattern TRAILING_WHITESPACE =
    130       Pattern.compile("^(.*?)" + IWS + "$", Pattern.DOTALL);
    131 
    132   /**
    133    * Flag to tell us if we are in whitespace chomping mode. By default we start in this mode because
    134    * the content of the first line in a template is not preceded by a newline (but should behave as
    135    * if it was). Once this flag has been set to false, it remains unset until a new line is
    136    * encountered.
    137    * <p>
    138    * Note that we only actually remove whitespace when we find the terminating condition rather than
    139    * when as visit the nodes (ie, this mode can be aborted and any visited whitespace will be left
    140    * untouched).
    141    */
    142   private boolean maybeChompWhitespace = true;
    143 
    144   /**
    145    * Flag to tell us if the line we are processing has an inline command in it.
    146    * <p>
    147    * An inline command is a complex command (eg. 'if', 'loop') where both the start and end of the
    148    * command exists on the same line. Non-complex commands (eg. 'var', 'name') cannot be considered
    149    * inline.
    150    * <p>
    151    * This flag is set when we process the start of a complex command and unset when we finish
    152    * processing a line. Thus if the flag is still true when we encounter the end of a complex
    153    * command, it tells us that (at least one) complex command was entirely contained within the
    154    * current line and that we should stop chomping whitespace for the current line.
    155    * <p>
    156    * This means we can detect input such as:
    157    *
    158    * <pre>
    159    * {@literal <?cs if:x?>   <?cs /if?>}
    160    * </pre>
    161    * for which the trailing newline and surrounding whitespace should not be removed, as opposed to:
    162    *
    163    * <pre>
    164    * {@literal <?cs if:x?>
    165    *    something
    166    *  <?cs /if?>
    167    * }
    168    * </pre>
    169    * where the trailing newlines for both the opening and closing of the 'if' command should be
    170    * removed.
    171    */
    172   private boolean currentLineContainsInlineComplexCommand = false;
    173 
    174   /**
    175    * First data command we saw when we started 'chomping' whitespace (note that this can be null if
    176    * we are at the beginning of a file or when we have chomped a previous data command down to
    177    * nothing).
    178    */
    179   private ADataCommand firstChompedData = null;
    180 
    181   /**
    182    * Intermediate whitespace-only data commands that we may need to remove.
    183    * <p>
    184    * This list is built up as we visit commands and is either processed when we need to remove
    185    * structural whitespace or cleared if we encounter situations that prohibit whitespace removal.
    186    */
    187   private List<ADataCommand> whitespaceData = new ArrayList<ADataCommand>();
    188 
    189   private static boolean isInlineWhitespace(String text) {
    190     return INLINE_WHITESPACE.matcher(text).matches();
    191   }
    192 
    193   private static boolean startsWithNewline(String text) {
    194     return STARTS_WITH_NEWLINE.matcher(text).find();
    195   }
    196 
    197   private static boolean endsWithNewline(String text) {
    198     return ENDS_WITH_NEWLINE.matcher(text).find();
    199   }
    200 
    201   /**
    202    * Removes leading whitespace (including first newline) from the given string. The text must start
    203    * with optional whitespace followed by a newline.
    204    */
    205   private static String stripLeadingWhitespaceAndNewline(String text) {
    206     Matcher matcher = LEADING_WHITESPACE_AND_NEWLINE.matcher(text);
    207     if (!matcher.matches()) {
    208       throw new IllegalStateException("Text '" + text + "' should have leading whitespace/newline.");
    209     }
    210     return matcher.group(1);
    211   }
    212 
    213   /**
    214    * Removes trailing whitespace (if present) from the given string.
    215    */
    216   private static String stripTrailingWhitespace(String text) {
    217     Matcher matcher = TRAILING_WHITESPACE.matcher(text);
    218     if (!matcher.matches()) {
    219       // The trailing whitespace regex should never fail to match a string.
    220       throw new AssertionError("Error in regular expression");
    221     }
    222     return matcher.group(1);
    223   }
    224 
    225   /**
    226    * Remove whitespace (including first newline) from the start of the given data command (replacing
    227    * it with a Noop command if it becomes empty). Returns a modified data command, or null if all
    228    * text was removed.
    229    * <p>
    230    * The given command can be null at the beginning of the file or if the original data command was
    231    * entirely consumed by a previous strip operation (remember that data commands can be processed
    232    * twice, at both the start and end of a whitespace sequence).
    233    */
    234   private static ADataCommand stripLeadingWhitespaceAndNewline(ADataCommand data) {
    235     if (data != null) {
    236       String text = stripLeadingWhitespaceAndNewline(data.getData().getText());
    237       if (text.isEmpty()) {
    238         data.replaceBy(new ANoopCommand());
    239         // Returning null just means we have chomped the whitespace to nothing.
    240         data = null;
    241       } else {
    242         data.setData(new TData(text));
    243       }
    244     }
    245     return data;
    246   }
    247 
    248   /**
    249    * Removes whitespace from the end of the given data command (replacing it with a Noop command if
    250    * it becomes empty).
    251    */
    252   private static void stripTrailingWhitespace(ADataCommand data) {
    253     if (data != null) {
    254       String text = stripTrailingWhitespace(data.getData().getText());
    255       if (text.isEmpty()) {
    256         data.replaceBy(new ANoopCommand());
    257       } else {
    258         data.setData(new TData(text));
    259       }
    260     }
    261   }
    262 
    263   /**
    264    * Removes all data commands collected while chomping the current line and clears the given list.
    265    */
    266   private static void removeWhitespace(List<ADataCommand> whitespaceData) {
    267     for (ADataCommand data : whitespaceData) {
    268       data.replaceBy(new ANoopCommand());
    269     }
    270     whitespaceData.clear();
    271   }
    272 
    273   @Override
    274   public void caseStart(Start node) {
    275     // Process the hierarchy.
    276     super.caseStart(node);
    277     // We might end after processing a non-data node, so deal with any
    278     // unprocessed whitespace before we exit.
    279     if (maybeChompWhitespace) {
    280       stripTrailingWhitespace(firstChompedData);
    281       removeWhitespace(whitespaceData);
    282       firstChompedData = null;
    283     }
    284     // Verify we have consumed (and cleared) any object references.
    285     if (firstChompedData != null) {
    286       throw new IllegalStateException("Unexpected first data node.");
    287     }
    288     if (!whitespaceData.isEmpty()) {
    289       throw new IllegalStateException("Unexpected data nodes.");
    290     }
    291   }
    292 
    293   @Override
    294   public void caseADataCommand(ADataCommand data) {
    295     final String originalText = data.getData().getText();
    296     if (maybeChompWhitespace) {
    297       if (isInlineWhitespace(originalText)) {
    298         // This data command is whitespace between two commands on the same
    299         // line, simply chomp it and continue ("Om-nom-nom").
    300         whitespaceData.add(data);
    301         return;
    302       }
    303       if (startsWithNewline(originalText)) {
    304         // This data command is at the end of a line that contains only
    305         // structural commands and whitespace. We remove all whitespace
    306         // associated with this line by:
    307         // * Stripping whitespace from the end of the data command at the start
    308         // of this line.
    309         // * Removing all intermediate (whitespace only) data commands.
    310         // * Stripping whitespace from the start of the current data command.
    311         stripTrailingWhitespace(firstChompedData);
    312         removeWhitespace(whitespaceData);
    313         data = stripLeadingWhitespaceAndNewline(data);
    314         currentLineContainsInlineComplexCommand = false;
    315       } else {
    316         // This data command contains some non-whitespace text so we must abort
    317         // the chomping of this line and output it normally.
    318         abortWhitespaceChompingForCurrentLine();
    319       }
    320     }
    321     // Test to see if we should start chomping on the next line.
    322     maybeChompWhitespace = endsWithNewline(originalText);
    323     // Note that data can be null here if we stripped all the whitespace from
    324     // it (which means that firstChompedData can be null next time around).
    325     firstChompedData = maybeChompWhitespace ? data : null;
    326   }
    327 
    328   /**
    329    * Helper method to abort whitespace processing for the current line. This method is idempotent on
    330    * a per line basis, and once it has been called the state is only reset at the start of the next
    331    * line.
    332    */
    333   private void abortWhitespaceChompingForCurrentLine() {
    334     maybeChompWhitespace = false;
    335     currentLineContainsInlineComplexCommand = false;
    336     whitespaceData.clear();
    337   }
    338 
    339   // ---- Inline commands that prohibit whitespace removal. ----
    340 
    341   @Override
    342   public void inAAltCommand(AAltCommand node) {
    343     abortWhitespaceChompingForCurrentLine();
    344   }
    345 
    346   @Override
    347   public void inACallCommand(ACallCommand node) {
    348     abortWhitespaceChompingForCurrentLine();
    349   }
    350 
    351   @Override
    352   public void inAEvarCommand(AEvarCommand node) {
    353     abortWhitespaceChompingForCurrentLine();
    354   }
    355 
    356   @Override
    357   public void inALvarCommand(ALvarCommand node) {
    358     abortWhitespaceChompingForCurrentLine();
    359   }
    360 
    361   @Override
    362   public void inANameCommand(ANameCommand node) {
    363     abortWhitespaceChompingForCurrentLine();
    364   }
    365 
    366   @Override
    367   public void inASetCommand(ASetCommand node) {
    368     abortWhitespaceChompingForCurrentLine();
    369   }
    370 
    371   @Override
    372   public void inAUvarCommand(AUvarCommand node) {
    373     abortWhitespaceChompingForCurrentLine();
    374   }
    375 
    376   @Override
    377   public void inAVarCommand(AVarCommand node) {
    378     abortWhitespaceChompingForCurrentLine();
    379   }
    380 
    381   // ---- Two part (open/close) commands that can have child commands. ----
    382 
    383   public void enterComplexCommand() {
    384     currentLineContainsInlineComplexCommand = true;
    385   }
    386 
    387   public void exitComplexCommand() {
    388     if (currentLineContainsInlineComplexCommand) {
    389       abortWhitespaceChompingForCurrentLine();
    390     }
    391   }
    392 
    393   @Override
    394   public void caseAAltCommand(AAltCommand node) {
    395     enterComplexCommand();
    396     super.caseAAltCommand(node);
    397     exitComplexCommand();
    398   }
    399 
    400   @Override
    401   public void caseADefCommand(ADefCommand node) {
    402     enterComplexCommand();
    403     super.caseADefCommand(node);
    404     exitComplexCommand();
    405   }
    406 
    407   @Override
    408   public void caseAEachCommand(AEachCommand node) {
    409     enterComplexCommand();
    410     super.caseAEachCommand(node);
    411     exitComplexCommand();
    412   }
    413 
    414   @Override
    415   public void caseAEscapeCommand(AEscapeCommand node) {
    416     enterComplexCommand();
    417     super.caseAEscapeCommand(node);
    418     exitComplexCommand();
    419   }
    420 
    421   @Override
    422   public void caseAIfCommand(AIfCommand node) {
    423     enterComplexCommand();
    424     super.caseAIfCommand(node);
    425     exitComplexCommand();
    426   }
    427 
    428   @Override
    429   public void caseALoopCommand(ALoopCommand node) {
    430     enterComplexCommand();
    431     super.caseALoopCommand(node);
    432     exitComplexCommand();
    433   }
    434 
    435   @Override
    436   public void caseALoopIncCommand(ALoopIncCommand node) {
    437     enterComplexCommand();
    438     super.caseALoopIncCommand(node);
    439     exitComplexCommand();
    440   }
    441 
    442   @Override
    443   public void caseALoopToCommand(ALoopToCommand node) {
    444     enterComplexCommand();
    445     super.caseALoopToCommand(node);
    446     exitComplexCommand();
    447   }
    448 
    449   @Override
    450   public void caseAWithCommand(AWithCommand node) {
    451     enterComplexCommand();
    452     super.caseAWithCommand(node);
    453     exitComplexCommand();
    454   }
    455 }
    456