Home | History | Annotate | Download | only in front-end
      1 /*
      2  * Copyright (C) 2009 Google Inc. All rights reserved.
      3  *
      4  * Redistribution and use in source and binary forms, with or without
      5  * modification, are permitted provided that the following conditions are
      6  * met:
      7  *
      8  *     * Redistributions of source code must retain the above copyright
      9  * notice, this list of conditions and the following disclaimer.
     10  *     * Redistributions in binary form must reproduce the above
     11  * copyright notice, this list of conditions and the following disclaimer
     12  * in the documentation and/or other materials provided with the
     13  * distribution.
     14  *     * Neither the name of Google Inc. nor the names of its
     15  * contributors may be used to endorse or promote products derived from
     16  * this software without specific prior written permission.
     17  *
     18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     21  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     22  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     23  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     24  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     28  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     29  */
     30 
     31 // Generate js file as follows:
     32 //
     33 // re2c -isc WebCore/inspector/front-end/SourceHTMLTokenizer.re2js \
     34 // | sed 's|^yy\([^:]*\)*\:|case \1:|' \
     35 // | sed 's|[*]cursor[+][+]|this._charAt(cursor++)|' \
     36 // | sed 's|[[*][+][+]cursor|this._charAt(++cursor)|' \
     37 // | sed 's|[*]cursor|this._charAt(cursor)|' \
     38 // | sed 's|yych = \*\([^;]*\)|yych = this._charAt\1|' \
     39 // | sed 's|goto case \([^;]*\)|{ gotoCase = \1; continue; }|' \
     40 // | sed 's|unsigned\ int|var|' \
     41 // | sed 's|var\ yych|case 1: var yych|'
     42 
     43 WebInspector.SourceHTMLTokenizer = function()
     44 {
     45     WebInspector.SourceTokenizer.call(this);
     46 
     47     // The order is determined by the generated code.
     48     this._lexConditions = {
     49         INITIAL: 0,
     50         COMMENT: 1,
     51         DOCTYPE: 2,
     52         TAG: 3,
     53         DSTRING: 4,
     54         SSTRING: 5
     55     };
     56     this.case_INITIAL = 1000;
     57     this.case_COMMENT = 1001;
     58     this.case_DOCTYPE = 1002;
     59     this.case_TAG = 1003;
     60     this.case_DSTRING = 1004;
     61     this.case_SSTRING = 1005;
     62 
     63     this._parseConditions = {
     64         INITIAL: 0,
     65         ATTRIBUTE: 1,
     66         ATTRIBUTE_VALUE: 2,
     67         LINKIFY: 4,
     68         A_NODE: 8,
     69         SCRIPT: 16,
     70         STYLE: 32
     71     };
     72 
     73     this.initialCondition = { lexCondition: this._lexConditions.INITIAL, parseCondition: this._parseConditions.INITIAL };
     74     this.condition = this.initialCondition;
     75 }
     76 
     77 WebInspector.SourceHTMLTokenizer.prototype = {
     78     set line(line) {
     79         if (this._internalJavaScriptTokenizer) {
     80             var match = /<\/script/i.exec(line);
     81             if (match) {
     82                 this._internalJavaScriptTokenizer.line = line.substring(0, match.index);
     83             } else
     84                 this._internalJavaScriptTokenizer.line = line;
     85         } else if (this._internalCSSTokenizer) {
     86             var match = /<\/style/i.exec(line);
     87             if (match) {
     88                 this._internalCSSTokenizer.line = line.substring(0, match.index);
     89             } else
     90                 this._internalCSSTokenizer.line = line;
     91         }
     92         this._line = line;
     93     },
     94 
     95     _isExpectingAttribute: function()
     96     {
     97         return this._condition.parseCondition & this._parseConditions.ATTRIBUTE;
     98     },
     99 
    100     _isExpectingAttributeValue: function()
    101     {
    102         return this._condition.parseCondition & this._parseConditions.ATTRIBUTE_VALUE;
    103     },
    104 
    105     _setExpectingAttribute: function()
    106     {
    107         if (this._isExpectingAttributeValue())
    108             this._condition.parseCondition ^= this._parseConditions.ATTRIBUTE_VALUE;
    109         this._condition.parseCondition |= this._parseConditions.ATTRIBUTE;
    110     },
    111 
    112     _setExpectingAttributeValue: function()
    113     {
    114         if (this._isExpectingAttribute())
    115             this._condition.parseCondition ^= this._parseConditions.ATTRIBUTE;
    116         this._condition.parseCondition |= this._parseConditions.ATTRIBUTE_VALUE;
    117     },
    118 
    119     _stringToken: function(cursor, stringEnds)
    120     {
    121         if (!this._isExpectingAttributeValue()) {
    122             this.tokenType = null;
    123             return cursor;
    124         }
    125         this.tokenType = this._attrValueTokenType();
    126         if (stringEnds)
    127             this._setExpectingAttribute();
    128         return cursor;
    129     },
    130 
    131     _attrValueTokenType: function()
    132     {
    133         if (this._condition.parseCondition & this._parseConditions.LINKIFY) {
    134             if (this._condition.parseCondition & this._parseConditions.A_NODE)
    135                 return "html-external-link";
    136             return "html-resource-link";
    137         }
    138         return "html-attribute-value";
    139     },
    140 
    141     nextToken: function(cursor)
    142     {
    143         if (this._internalJavaScriptTokenizer) {
    144             // Re-set line to force </script> detection first.
    145             this.line = this._line;
    146             if (cursor !== this._internalJavaScriptTokenizer._line.length) {
    147                 // Tokenizer is stateless, so restore its condition before tokenizing and save it after.
    148                 this._internalJavaScriptTokenizer.condition = this._condition.internalJavaScriptTokenizerCondition;
    149                 var result = this._internalJavaScriptTokenizer.nextToken(cursor);
    150                 this.tokenType = this._internalJavaScriptTokenizer.tokenType;
    151                 this._condition.internalJavaScriptTokenizerCondition = this._internalJavaScriptTokenizer.condition;
    152                 return result;
    153             } else if (cursor !== this._line.length)
    154                 delete this._internalJavaScriptTokenizer;
    155         } else if (this._internalCSSTokenizer) {
    156             // Re-set line to force </style> detection first.
    157             this.line = this._line;
    158             if (cursor !== this._internalCSSTokenizer._line.length) {
    159                 // Tokenizer is stateless, so restore its condition before tokenizing and save it after.
    160                 this._internalCSSTokenizer.condition = this._condition.internalCSSTokenizerCondition;
    161                 var result = this._internalCSSTokenizer.nextToken(cursor);
    162                 this.tokenType = this._internalCSSTokenizer.tokenType;
    163                 this._condition.internalCSSTokenizerCondition = this._internalCSSTokenizer.condition;
    164                 return result;
    165             } else if (cursor !== this._line.length)
    166                 delete this._internalCSSTokenizer;
    167         }
    168 
    169         var cursorOnEnter = cursor;
    170         var gotoCase = 1;
    171         while (1) {
    172             switch (gotoCase)
    173             // Following comment is replaced with generated state machine.
    174             /*!re2c
    175                 re2c:define:YYCTYPE  = "var";
    176                 re2c:define:YYCURSOR = cursor;
    177                 re2c:define:YYGETCONDITION = "this.getLexCondition";
    178                 re2c:define:YYSETCONDITION = "this.setLexCondition";
    179                 re2c:condprefix = "case this.case_";
    180                 re2c:condenumprefix = "this._lexConditions.";
    181                 re2c:yyfill:enable = 0;
    182                 re2c:labelprefix = "case ";
    183                 re2c:indent:top = 2;
    184                 re2c:indent:string = "    ";
    185 
    186                 CommentContent = ([^-\r\n] | ("--" [^>]))*;
    187                 Comment = "<!--" CommentContent "-->";
    188                 CommentStart = "<!--" CommentContent [\r\n];
    189                 CommentEnd = CommentContent "-->";
    190 
    191                 DocTypeStart = "<!" [Dd] [Oo] [Cc] [Tt] [Yy] [Pp] [Ee];
    192                 DocTypeContent = [^\r\n>]*;
    193 
    194                 ScriptStart = "<" [Ss] [Cc] [Rr] [Ii] [Pp] [Tt];
    195                 ScriptEnd = "</" [Ss] [Cc] [Rr] [Ii] [Pp] [Tt];
    196 
    197                 StyleStart = "<" [Ss] [Tt] [Yy] [Ll] [Ee];
    198                 StyleEnd = "</" [Ss] [Tt] [Yy] [Ll] [Ee];
    199 
    200                 LT = "<" | "</";
    201                 GT = ">";
    202                 EqualSign = "=";
    203 
    204                 DoubleStringContent = [^\r\n\"]*;
    205                 SingleStringContent = [^\r\n\']*;
    206                 StringLiteral = "\"" DoubleStringContent "\"" | "'" SingleStringContent "'";
    207                 DoubleStringStart = "\"" DoubleStringContent [\r\n];
    208                 DoubleStringEnd = DoubleStringContent "\"";
    209                 SingleStringStart = "'" SingleStringContent [\r\n];
    210                 SingleStringEnd = SingleStringContent "'";
    211 
    212                 Identifier = [^ \r\n"'<>\[\]=]+;
    213 
    214                 <INITIAL> Comment { this.tokenType = "html-comment"; return cursor; }
    215                 <INITIAL> CommentStart => COMMENT { this.tokenType = "html-comment"; return cursor; }
    216                 <COMMENT> CommentContent => COMMENT { this.tokenType = "html-comment"; return cursor; }
    217                 <COMMENT> CommentEnd => INITIAL { this.tokenType = "html-comment"; return cursor; }
    218 
    219                 <INITIAL> DocTypeStart => DOCTYPE { this.tokenType = "html-doctype"; return cursor; }
    220                 <DOCTYPE> DocTypeContent => DOCTYPE { this.tokenType = "html-doctype"; return cursor; }
    221                 <DOCTYPE> GT => INITIAL { this.tokenType = "html-doctype"; return cursor; }
    222 
    223                 <INITIAL> ScriptStart => TAG
    224                 {
    225                     if (this._condition.parseCondition & this._parseConditions.SCRIPT) {
    226                         // Do not tokenize script tag contents, keep lexer state, even though processing "<".
    227                         this.setLexCondition(this._lexConditions.INITIAL);
    228                         this.tokenType = null;
    229                         return cursor;
    230                     }
    231                     this.tokenType = "html-tag";
    232                     this._condition.parseCondition = this._parseConditions.SCRIPT;
    233                     this._setExpectingAttribute();
    234                     return cursor;
    235                 }
    236 
    237                 <INITIAL> ScriptEnd => TAG
    238                 {
    239                     this.tokenType = "html-tag";
    240                     this._condition.parseCondition = this._parseConditions.INITIAL;
    241                     return cursor;
    242                 }
    243 
    244                 <INITIAL> StyleStart => TAG
    245                 {
    246                     if (this._condition.parseCondition & this._parseConditions.STYLE) {
    247                         // Do not tokenize style tag contents, keep lexer state, even though processing "<".
    248                         this.setLexCondition(this._lexConditions.INITIAL);
    249                         this.tokenType = null;
    250                         return cursor;
    251                     }
    252                     this.tokenType = "html-tag";
    253                     this._condition.parseCondition = this._parseConditions.STYLE;
    254                     this._setExpectingAttribute();
    255                     return cursor;
    256                 }
    257 
    258                 <INITIAL> StyleEnd => TAG
    259                 {
    260                     this.tokenType = "html-tag";
    261                     this._condition.parseCondition = this._parseConditions.INITIAL;
    262                     return cursor;
    263                 }
    264 
    265                 <INITIAL> LT => TAG
    266                 {
    267                     if (this._condition.parseCondition & (this._parseConditions.SCRIPT | this._parseConditions.STYLE)) {
    268                         // Do not tokenize script and style tag contents, keep lexer state, even though processing "<".
    269                         this.setLexCondition(this._lexConditions.INITIAL);
    270                         this.tokenType = null;
    271                         return cursor;
    272                     }
    273 
    274                     this._condition.parseCondition = this._parseConditions.INITIAL;
    275                     this.tokenType = "html-tag";
    276                     return cursor;
    277                 }
    278 
    279                 <TAG> GT => INITIAL
    280                 {
    281                     this.tokenType = "html-tag";
    282                     if (this._condition.parseCondition & this._parseConditions.SCRIPT) {
    283                         if (!this._internalJavaScriptTokenizer) {
    284                             this._internalJavaScriptTokenizer = WebInspector.SourceTokenizer.Registry.getInstance().getTokenizer("text/javascript");
    285                             this._condition.internalJavaScriptTokenizerCondition = this._internalJavaScriptTokenizer.initialCondition;
    286                         }
    287                         // Do not tokenize script tag contents.
    288                         return cursor;
    289                     }
    290 
    291                     if (this._condition.parseCondition & this._parseConditions.STYLE) {
    292                         if (!this._internalCSSTokenizer) {
    293                             this._internalCSSTokenizer = WebInspector.SourceTokenizer.Registry.getInstance().getTokenizer("text/css");
    294                             this._condition.internalCSSTokenizerCondition = this._internalCSSTokenizer.initialCondition;
    295                         }
    296                         // Do not tokenize style tag contents.
    297                         return cursor;
    298                     }
    299 
    300                     this._condition.parseCondition = this._parseConditions.INITIAL;
    301                     return cursor;
    302                 }
    303 
    304                 <TAG> StringLiteral { return this._stringToken(cursor, true); }
    305                 <TAG> DoubleStringStart => DSTRING { return this._stringToken(cursor); }
    306                 <DSTRING> DoubleStringContent => DSTRING { return this._stringToken(cursor); }
    307                 <DSTRING> DoubleStringEnd => TAG { return this._stringToken(cursor, true); }
    308                 <TAG> SingleStringStart => SSTRING { return this._stringToken(cursor); }
    309                 <SSTRING> SingleStringContent => SSTRING { return this._stringToken(cursor); }
    310                 <SSTRING> SingleStringEnd => TAG { return this._stringToken(cursor, true); }
    311 
    312                 <TAG> EqualSign => TAG
    313                 {
    314                     if (this._isExpectingAttribute())
    315                         this._setExpectingAttributeValue();
    316                     this.tokenType = null;
    317                     return cursor;
    318                 }
    319 
    320                 <TAG> Identifier
    321                 {
    322                     if (this._condition.parseCondition === this._parseConditions.SCRIPT || this._condition.parseCondition === this._parseConditions.STYLE) {
    323                         // Fall through if expecting attributes.
    324                         this.tokenType = null;
    325                         return cursor;
    326                     }
    327 
    328                     if (this._condition.parseCondition === this._parseConditions.INITIAL) {
    329                         this.tokenType = "html-tag";
    330                         this._setExpectingAttribute();
    331                         var token = this._line.substring(cursorOnEnter, cursor);
    332                         if (token === "a")
    333                             this._condition.parseCondition |= this._parseConditions.A_NODE;
    334                         else if (this._condition.parseCondition & this._parseConditions.A_NODE)
    335                             this._condition.parseCondition ^= this._parseConditions.A_NODE;
    336                     } else if (this._isExpectingAttribute()) {
    337                         var token = this._line.substring(cursorOnEnter, cursor);
    338                         if (token === "href" || token === "src")
    339                             this._condition.parseCondition |= this._parseConditions.LINKIFY;
    340                         else if (this._condition.parseCondition |= this._parseConditions.LINKIFY)
    341                             this._condition.parseCondition ^= this._parseConditions.LINKIFY;
    342                         this.tokenType = "html-attribute-name";
    343                     } else if (this._isExpectingAttributeValue())
    344                         this.tokenType = this._attrValueTokenType();
    345                     else
    346                         this.tokenType = null;
    347                     return cursor;
    348                 }
    349                 <*> [^] { this.tokenType = null; return cursor; }
    350             */
    351         }
    352     }
    353 }
    354 
    355 WebInspector.SourceHTMLTokenizer.prototype.__proto__ = WebInspector.SourceTokenizer.prototype;
    356