Home | History | Annotate | Download | only in pyyaml
      1 /**
      2  * Copyright (c) 2008, http://www.snakeyaml.org
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *     http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 package org.pyyaml;
     17 
     18 import java.util.ArrayList;
     19 import java.util.List;
     20 import java.util.Map;
     21 
     22 import org.yaml.snakeyaml.error.Mark;
     23 import org.yaml.snakeyaml.nodes.Tag;
     24 import org.yaml.snakeyaml.scanner.Scanner;
     25 import org.yaml.snakeyaml.scanner.ScannerImpl;
     26 import org.yaml.snakeyaml.tokens.AliasToken;
     27 import org.yaml.snakeyaml.tokens.AnchorToken;
     28 import org.yaml.snakeyaml.tokens.DirectiveToken;
     29 import org.yaml.snakeyaml.tokens.DocumentStartToken;
     30 import org.yaml.snakeyaml.tokens.FlowEntryToken;
     31 import org.yaml.snakeyaml.tokens.FlowMappingEndToken;
     32 import org.yaml.snakeyaml.tokens.FlowMappingStartToken;
     33 import org.yaml.snakeyaml.tokens.FlowSequenceEndToken;
     34 import org.yaml.snakeyaml.tokens.FlowSequenceStartToken;
     35 import org.yaml.snakeyaml.tokens.KeyToken;
     36 import org.yaml.snakeyaml.tokens.ScalarToken;
     37 import org.yaml.snakeyaml.tokens.StreamEndToken;
     38 import org.yaml.snakeyaml.tokens.StreamStartToken;
     39 import org.yaml.snakeyaml.tokens.TagToken;
     40 import org.yaml.snakeyaml.tokens.TagTuple;
     41 import org.yaml.snakeyaml.tokens.Token;
     42 import org.yaml.snakeyaml.tokens.ValueToken;
     43 
     44 public class CanonicalScanner implements Scanner {
     45     private static final String DIRECTIVE = "%YAML 1.1";
     46     private final static Map<Character, Integer> QUOTE_CODES = ScannerImpl.ESCAPE_CODES;
     47 
     48     private final static Map<Character, String> QUOTE_REPLACES = ScannerImpl.ESCAPE_REPLACEMENTS;
     49 
     50     private String data;
     51     private int index;
     52     public ArrayList<Token> tokens;
     53     private boolean scanned;
     54     private Mark mark;
     55 
     56     public CanonicalScanner(String data) {
     57         this.data = data + "\0";
     58         this.index = 0;
     59         this.tokens = new ArrayList<Token>();
     60         this.scanned = false;
     61         this.mark = new Mark("test", 0, 0, 0, data, 0);
     62     }
     63 
     64     public boolean checkToken(Token.ID... choices) {
     65         if (!scanned) {
     66             scan();
     67         }
     68         if (!tokens.isEmpty()) {
     69             if (choices.length == 0) {
     70                 return true;
     71             }
     72             Token first = this.tokens.get(0);
     73             for (Token.ID choice : choices) {
     74                 if (first.getTokenId() == choice) {
     75                     return true;
     76                 }
     77             }
     78         }
     79         return false;
     80     }
     81 
     82     public Token peekToken() {
     83         if (!scanned) {
     84             scan();
     85         }
     86         if (!tokens.isEmpty()) {
     87             return this.tokens.get(0);
     88         }
     89         return null;
     90     }
     91 
     92     public Token getToken() {
     93         if (!scanned) {
     94             scan();
     95         }
     96         return this.tokens.remove(0);
     97     }
     98 
     99     public Token getToken(Token.ID choice) {
    100         Token token = getToken();
    101         if (choice != null && token.getTokenId() != choice) {
    102             throw new CanonicalException("unexpected token " + token);
    103         }
    104         return token;
    105     }
    106 
    107     private void scan() {
    108         this.tokens.add(new StreamStartToken(mark, mark));
    109         boolean stop = false;
    110         while (!stop) {
    111             findToken();
    112             char ch = data.charAt(index);
    113             switch (ch) {
    114             case '\0':
    115                 tokens.add(new StreamEndToken(mark, mark));
    116                 stop = true;
    117                 break;
    118 
    119             case '%':
    120                 tokens.add(scanDirective());
    121                 break;
    122 
    123             case '-':
    124                 if ("---".equals(data.substring(index, index + 3))) {
    125                     index += 3;
    126                     tokens.add(new DocumentStartToken(mark, mark));
    127                 }
    128                 break;
    129 
    130             case '[':
    131                 index++;
    132                 tokens.add(new FlowSequenceStartToken(mark, mark));
    133                 break;
    134 
    135             case '{':
    136                 index++;
    137                 tokens.add(new FlowMappingStartToken(mark, mark));
    138                 break;
    139 
    140             case ']':
    141                 index++;
    142                 tokens.add(new FlowSequenceEndToken(mark, mark));
    143                 break;
    144 
    145             case '}':
    146                 index++;
    147                 tokens.add(new FlowMappingEndToken(mark, mark));
    148                 break;
    149 
    150             case '?':
    151                 index++;
    152                 tokens.add(new KeyToken(mark, mark));
    153                 break;
    154 
    155             case ':':
    156                 index++;
    157                 tokens.add(new ValueToken(mark, mark));
    158                 break;
    159 
    160             case ',':
    161                 index++;
    162                 tokens.add(new FlowEntryToken(mark, mark));
    163                 break;
    164 
    165             case '*':
    166                 tokens.add(scanAlias());
    167                 break;
    168 
    169             case '&':
    170                 tokens.add(scanAlias());
    171                 break;
    172 
    173             case '!':
    174                 tokens.add(scanTag());
    175                 break;
    176 
    177             case '"':
    178                 tokens.add(scanScalar());
    179                 break;
    180 
    181             default:
    182                 throw new CanonicalException("invalid token");
    183             }
    184         }
    185         scanned = true;
    186     }
    187 
    188     private Token scanDirective() {
    189         String chunk1 = data.substring(index, index + DIRECTIVE.length());
    190         char chunk2 = data.charAt(index + DIRECTIVE.length());
    191         if (DIRECTIVE.equals(chunk1) && "\n\0".indexOf(chunk2) != -1) {
    192             index += DIRECTIVE.length();
    193             List<Integer> implicit = new ArrayList<Integer>(2);
    194             implicit.add(new Integer(1));
    195             implicit.add(new Integer(1));
    196             return new DirectiveToken<Integer>("YAML", implicit, mark, mark);
    197         } else {
    198             throw new CanonicalException("invalid directive");
    199         }
    200     }
    201 
    202     private Token scanAlias() {
    203         boolean isTokenClassAlias;
    204         if (data.charAt(index) == '*') {
    205             isTokenClassAlias = true;
    206         } else {
    207             isTokenClassAlias = false;
    208         }
    209         index++;
    210         int start = index;
    211         while (", \n\0".indexOf(data.charAt(index)) == -1) {
    212             index++;
    213         }
    214         String value = data.substring(start, index);
    215         Token token;
    216         if (isTokenClassAlias) {
    217             token = new AliasToken(value, mark, mark);
    218         } else {
    219             token = new AnchorToken(value, mark, mark);
    220         }
    221         return token;
    222     }
    223 
    224     private Token scanTag() {
    225         index++;
    226         int start = index;
    227         while (" \n\0".indexOf(data.charAt(index)) == -1) {
    228             index++;
    229         }
    230         String value = data.substring(start, index);
    231         if (value.length() == 0) {
    232             value = "!";
    233         } else if (value.charAt(0) == '!') {
    234             value = Tag.PREFIX + value.substring(1);
    235         } else if (value.charAt(0) == '<' && value.charAt(value.length() - 1) == '>') {
    236             value = value.substring(1, value.length() - 1);
    237         } else {
    238             value = "!" + value;
    239         }
    240         return new TagToken(new TagTuple("", value), mark, mark);
    241     }
    242 
    243     private Token scanScalar() {
    244         index++;
    245         StringBuilder chunks = new StringBuilder();
    246         int start = index;
    247         boolean ignoreSpaces = false;
    248         while (data.charAt(index) != '"') {
    249             if (data.charAt(index) == '\\') {
    250                 ignoreSpaces = false;
    251                 chunks.append(data.substring(start, index));
    252                 index++;
    253                 char ch = data.charAt(index);
    254                 index++;
    255                 if (ch == '\n') {
    256                     ignoreSpaces = true;
    257                 } else if (QUOTE_CODES.keySet().contains(ch)) {
    258                     int length = QUOTE_CODES.get(ch);
    259                     int code = Integer.parseInt(data.substring(index, index + length), 16);
    260                     chunks.append(String.valueOf((char) code));
    261                     index += length;
    262                 } else {
    263                     if (!QUOTE_REPLACES.keySet().contains(ch)) {
    264                         throw new CanonicalException("invalid escape code");
    265                     }
    266                     chunks.append(QUOTE_REPLACES.get(ch));
    267                 }
    268                 start = index;
    269             } else if (data.charAt(index) == '\n') {
    270                 chunks.append(data.substring(start, index));
    271                 chunks.append(" ");
    272                 index++;
    273                 start = index;
    274                 ignoreSpaces = true;
    275             } else if (ignoreSpaces && data.charAt(index) == ' ') {
    276                 index++;
    277                 start = index;
    278             } else {
    279                 ignoreSpaces = false;
    280                 index++;
    281             }
    282         }
    283         chunks.append(data.substring(start, index));
    284         index++;
    285         return new ScalarToken(chunks.toString(), mark, mark, false);
    286     }
    287 
    288     private void findToken() {
    289         boolean found = false;
    290         while (!found) {
    291             while (" \t".indexOf(data.charAt(index)) != -1) {
    292                 index++;
    293             }
    294             if (data.charAt(index) == '#') {
    295                 while (data.charAt(index) != '\n') {
    296                     index++;
    297                 }
    298             }
    299             if (data.charAt(index) == '\n') {
    300                 index++;
    301             } else {
    302                 found = true;
    303             }
    304         }
    305     }
    306 }
    307