Home | History | Annotate | Download | only in reader
      1 /**
      2  * Copyright (c) 2008, http://www.snakeyaml.org
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *     http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 package org.yaml.snakeyaml.reader;
     17 
     18 import java.io.IOException;
     19 import java.io.Reader;
     20 import java.nio.charset.Charset;
     21 import java.util.regex.Matcher;
     22 import java.util.regex.Pattern;
     23 
     24 import org.yaml.snakeyaml.error.Mark;
     25 import org.yaml.snakeyaml.error.YAMLException;
     26 import org.yaml.snakeyaml.scanner.Constant;
     27 
     28 /**
     29  * Reader: checks if characters are in allowed range, adds '\0' to the end.
     30  */
     31 public class StreamReader {
     32     public final static Pattern NON_PRINTABLE = Pattern
     33             .compile("[^\t\n\r\u0020-\u007E\u0085\u00A0-\uD7FF\uE000-\uFFFD]");
     34     private String name;
     35     private final Reader stream;
     36     private int pointer = 0;
     37     private boolean eof = true;
     38     private String buffer;
     39     private int index = 0;
     40     private int line = 0;
     41     private int column = 0;
     42     private char[] data;
     43 
     44     public StreamReader(String stream) {
     45         this.name = "'string'";
     46         this.buffer = ""; // to set length to 0
     47         checkPrintable(stream);
     48         this.buffer = stream + "\0";
     49         this.stream = null;
     50         this.eof = true;
     51         this.data = null;
     52     }
     53 
     54     public StreamReader(Reader reader) {
     55         this.name = "'reader'";
     56         this.buffer = "";
     57         this.stream = reader;
     58         this.eof = false;
     59         this.data = new char[1024];
     60         this.update();
     61     }
     62 
     63     void checkPrintable(CharSequence data) {
     64         Matcher em = NON_PRINTABLE.matcher(data);
     65         if (em.find()) {
     66             int position = this.index + this.buffer.length() - this.pointer + em.start();
     67             throw new ReaderException(name, position, em.group().charAt(0),
     68                     "special characters are not allowed");
     69         }
     70     }
     71 
     72     /**
     73      * Checks <code>chars</chars> for the non-printable characters.
     74      *
     75      * @param chars
     76      *            the array where to search.
     77      * @param begin
     78      *            the beginning index, inclusive.
     79      * @param end
     80      *            the ending index, exclusive.
     81      * @throws ReaderException
     82      *             if <code>chars</code> contains non-printable character(s).
     83      */
     84     void checkPrintable(final char[] chars, final int begin, final int end) {
     85         for (int i = begin; i < end; i++) {
     86             final char c = chars[i];
     87 
     88             if (isPrintable(c)) {
     89                 continue;
     90             }
     91 
     92             int position = this.index + this.buffer.length() - this.pointer + i;
     93             throw new ReaderException(name, position, c, "special characters are not allowed");
     94         }
     95     }
     96 
     97     public static boolean isPrintable(final char c) {
     98         return (c >= '\u0020' && c <= '\u007E') || c == '\n' || c == '\r' || c == '\t'
     99                 || c == '\u0085' || (c >= '\u00A0' && c <= '\uD7FF')
    100                 || (c >= '\uE000' && c <= '\uFFFD');
    101     }
    102 
    103     public Mark getMark() {
    104         return new Mark(name, this.index, this.line, this.column, this.buffer, this.pointer);
    105     }
    106 
    107     public void forward() {
    108         forward(1);
    109     }
    110 
    111     /**
    112      * read the next length characters and move the pointer.
    113      *
    114      * @param length
    115      */
    116     public void forward(int length) {
    117         if (this.pointer + length + 1 >= this.buffer.length()) {
    118             update();
    119         }
    120         char ch = 0;
    121         for (int i = 0; i < length; i++) {
    122             ch = this.buffer.charAt(this.pointer);
    123             this.pointer++;
    124             this.index++;
    125             if (Constant.LINEBR.has(ch) || (ch == '\r' && buffer.charAt(pointer) != '\n')) {
    126                 this.line++;
    127                 this.column = 0;
    128             } else if (ch != '\uFEFF') {
    129                 this.column++;
    130             }
    131         }
    132     }
    133 
    134     public char peek() {
    135         return this.buffer.charAt(this.pointer);
    136     }
    137 
    138     /**
    139      * Peek the next index-th character
    140      *
    141      * @param index
    142      * @return the next index-th character
    143      */
    144     public char peek(int index) {
    145         if (this.pointer + index + 1 > this.buffer.length()) {
    146             update();
    147         }
    148         return this.buffer.charAt(this.pointer + index);
    149     }
    150 
    151     /**
    152      * peek the next length characters
    153      *
    154      * @param length
    155      * @return the next length characters
    156      */
    157     public String prefix(int length) {
    158         if (this.pointer + length >= this.buffer.length()) {
    159             update();
    160         }
    161         if (this.pointer + length > this.buffer.length()) {
    162             return this.buffer.substring(this.pointer);
    163         }
    164         return this.buffer.substring(this.pointer, this.pointer + length);
    165     }
    166 
    167     /**
    168      * prefix(length) immediately followed by forward(length)
    169      */
    170     public String prefixForward(int length) {
    171         final String prefix = prefix(length);
    172         this.pointer += length;
    173         this.index += length;
    174         // prefix never contains new line characters
    175         this.column += length;
    176         return prefix;
    177     }
    178 
    179     private void update() {
    180         if (!this.eof) {
    181             this.buffer = buffer.substring(this.pointer);
    182             this.pointer = 0;
    183             try {
    184                 int converted = this.stream.read(data);
    185                 if (converted > 0) {
    186                     /*
    187                      * Let's create StringBuilder manually. Anyway str1 + str2
    188                      * generates new StringBuilder(str1).append(str2).toSting()
    189                      * Giving correct capacity to the constructor prevents
    190                      * unnecessary operations in appends.
    191                      */
    192                     checkPrintable(data, 0, converted);
    193                     this.buffer = new StringBuilder(buffer.length() + converted).append(buffer)
    194                             .append(data, 0, converted).toString();
    195                 } else {
    196                     this.eof = true;
    197                     this.buffer += "\0";
    198                 }
    199             } catch (IOException ioe) {
    200                 throw new YAMLException(ioe);
    201             }
    202         }
    203     }
    204 
    205     public int getColumn() {
    206         return column;
    207     }
    208 
    209     public Charset getEncoding() {
    210         return Charset.forName(((UnicodeReader) this.stream).getEncoding());
    211     }
    212 
    213     public int getIndex() {
    214         return index;
    215     }
    216 
    217     public int getLine() {
    218         return line;
    219     }
    220 }
    221