Home | History | Annotate | Download | only in util
      1 /*
      2  * Copyright (C) 2010 Google Inc.
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  * http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package com.google.streamhtmlparser.util;
     18 
     19 import com.google.common.base.Preconditions;
     20 
     21 import java.util.Arrays;
     22 
     23 /**
     24  * Implements a circular (ring) buffer of characters with specialized
     25  * application logic in order to determine the context of some
     26  * Javascript content that is being parsed.
     27  *
     28  * This is a specialized class - of no use to external code -
     29  * which aims to be 100% compatible with the corresponding logic
     30  * in the C-version of the HtmlParser, specifically
     31  * <code>jsparser.c</code>. In particular:
     32  * <ul>
     33  *   <li> The API is odd, using negative indexes to access content in
     34  *        the buffer. Changing the API would mean changing the test
     35  *        cases and have more difficulty determining whether we are
     36  *        remaining compatible with the C-version. It is left as an
     37  *        exercise for once the code is very stable and proven.
     38  *   <li> Repeated whitespace is folded into just one character to
     39  *        use the space available efficiently.
     40  *   <li> The buffer size is fixed. There is currently no need to
     41  *        make it variable so we avoid the need for constructors.
     42  * </ul>
     43  */
     44 public class JavascriptTokenBuffer {
     45 
     46   /**
     47    * Size of the ring buffer used to lookup the last token in the javascript
     48    * stream. The size is somewhat arbitrary but must be larger than
     49    * the biggest token we want to lookup plus three: Two delimiters plus
     50    * an empty ring buffer slot.
     51    */
     52   private static final int BUFFER_SIZE = 18;
     53 
     54   /** Storage implementing the circular buffer. */
     55   private final char[] buffer;
     56 
     57   /** Index of the first item in our circular buffer. */
     58   private int startIndex;
     59 
     60   /** Index of the last item in our circular buffer. */
     61   private int endIndex;
     62 
     63   /**
     64    * Constructs an empty javascript token buffer. The size is fixed,
     65    * see {@link #BUFFER_SIZE}.
     66    */
     67   public JavascriptTokenBuffer() {
     68     buffer = new char[BUFFER_SIZE];
     69     startIndex = 0;
     70     endIndex = 0;
     71   }
     72 
     73   /**
     74    * Constructs a javascript token buffer that is identical to
     75    * the one given. In particular, it has the same size and contents.
     76    *
     77    * @param aJavascriptTokenBuffer the {@code JavascriptTokenBuffer} to copy
     78    */
     79   public JavascriptTokenBuffer(JavascriptTokenBuffer aJavascriptTokenBuffer) {
     80     buffer = Arrays.copyOf(aJavascriptTokenBuffer.buffer,
     81                            aJavascriptTokenBuffer.buffer.length);
     82     startIndex = aJavascriptTokenBuffer.startIndex;
     83     endIndex = aJavascriptTokenBuffer.endIndex;
     84   }
     85 
     86   /**
     87    * A simple wrapper over <code>appendChar</code>, it appends a string
     88    * to the buffer. Sequences of whitespace and newlines
     89    * are folded into one character to save space. Null strings are
     90    * not allowed.
     91    *
     92    * @param input the {@code String} to append, cannot be {@code null}
     93    */
     94   // TODO: Move to testing since not used in code.
     95   public void appendString(String input) {
     96     if (input == null) {
     97       throw new NullPointerException("input == null is not allowed");
     98     }
     99     for (int i = 0; i < input.length(); i++) {
    100       appendChar(input.charAt(i));
    101     }
    102   }
    103 
    104   /**
    105    * Appends a character to the buffer. We fold sequences of whitespace and
    106    * newlines into one to save space.
    107    *
    108    * @param input the {@code char} to append
    109    */
    110   public void appendChar(char input) {
    111     if (HtmlUtils.isJavascriptWhitespace(input) &&
    112         HtmlUtils.isJavascriptWhitespace(getChar(-1))) {
    113       return;
    114     }
    115     buffer[endIndex] = input;
    116     endIndex = (endIndex + 1) % buffer.length;
    117     if (endIndex == startIndex) {
    118       startIndex = (endIndex + 1) % buffer.length;
    119     }
    120   }
    121 
    122   /**
    123    * Returns the last character in the buffer and removes it from the buffer
    124    * or the NUL character '\0' if the buffer is empty.
    125    *
    126    * @return last character in the buffer or '\0' if the buffer is empty
    127    */
    128   public char popChar() {
    129     if (startIndex == endIndex) {
    130       return '\0';
    131     }
    132     endIndex--;
    133     if (endIndex < 0) {
    134       endIndex += buffer.length;
    135     }
    136     return buffer[endIndex];
    137   }
    138 
    139   /**
    140    * Returns the character at a given index in the buffer or nul ('\0')
    141    * if the index is outside the range of the buffer. Such could happen
    142    * if the buffer is not filled enough or the index is larger than the
    143    * size of the buffer.
    144    *
    145    * <p>Position must be negative where -1 is the index of the last
    146    * character in the buffer.
    147    *
    148    * @param position The index into the buffer
    149    *
    150    * @return character at the requested index
    151    */
    152   public char getChar(int position) {
    153     assert(position < 0);   // Developer error if it triggers.
    154 
    155     int absolutePosition = getAbsolutePosition(position);
    156     if (absolutePosition < 0) {
    157       return '\0';
    158     }
    159 
    160     return buffer[absolutePosition];
    161   }
    162 
    163   /**
    164    * Sets the given {@code input} at the given {@code position} of the buffer.
    165    * Returns {@code true} if we succeeded or {@code false} if we
    166    * failed (i.e. the write was beyond the buffer boundary).
    167    *
    168    * <p>Index positions are negative where -1 is the index of the
    169    * last character in the buffer.
    170    *
    171    * @param position The index at which to set the character
    172    * @param input The character to set in the buffer
    173    * @return {@code true} if we succeeded, {@code false} otherwise
    174    */
    175   public boolean setChar(int position, char input) {
    176     assert(position < 0);   // Developer error if it triggers.
    177 
    178     int absolutePosition = getAbsolutePosition(position);
    179     if (absolutePosition < 0) {
    180       return false;
    181     }
    182 
    183     buffer[absolutePosition] = input;
    184     return true;
    185   }
    186 
    187 
    188   /**
    189    * Returns the last javascript identifier/keyword in the buffer.
    190    *
    191    * @return the last identifier or {@code null} if none was found
    192    */
    193   public String getLastIdentifier() {
    194     int end = -1;
    195 
    196     if (HtmlUtils.isJavascriptWhitespace(getChar(-1))) {
    197       end--;
    198     }
    199     int position;
    200     for (position = end; HtmlUtils.isJavascriptIdentifier(getChar(position));
    201          position--) {
    202     }
    203     if ((position + 1) >= end) {
    204       return null;
    205     }
    206     return slice(position + 1, end);
    207   }
    208 
    209   /**
    210    * Returns a slice of the buffer delimited by the given indices.
    211    *
    212    * The start and end indexes represent the start and end of the
    213    * slice to copy. If the start argument extends beyond the beginning
    214    * of the buffer, the slice will only contain characters
    215    * starting from the beginning of the buffer.
    216    *
    217    * @param start The index of the first character the copy
    218    * @param end the index of the last character to copy
    219    *
    220    * @return {@code String} between the given indices
    221    */
    222   public String slice(int start, int end) {
    223     // Developer error if any of the asserts below fail.
    224     Preconditions.checkArgument(start <= end);
    225     Preconditions.checkArgument(start < 0);
    226     Preconditions.checkArgument(end < 0);
    227 
    228     StringBuffer output = new StringBuffer();
    229     for (int position = start; position <= end; position++) {
    230       char c = getChar(position);
    231       if (c != '\0') {
    232         output.append(c);
    233       }
    234     }
    235     return new String(output);
    236   }
    237 
    238   /**
    239    * Returns the position relative to the start of the buffer or -1
    240    * if the position is past the size of the buffer.
    241    *
    242    * @param position the index to be translated
    243    * @return the position relative to the start of the buffer
    244    */
    245   private int getAbsolutePosition(int position) {
    246     assert (position < 0);   // Developer error if it triggers.
    247     if (position <= -buffer.length) {
    248       return -1;
    249     }
    250     int len = endIndex - startIndex;
    251     if (len < 0) {
    252       len += buffer.length;
    253     }
    254     if (position < -len) {
    255       return -1;
    256     }
    257     int absolutePosition = (position + endIndex) % buffer.length;
    258     if (absolutePosition < 0) {
    259       absolutePosition += buffer.length;
    260     }
    261     return absolutePosition;
    262   }
    263 }
    264