Home | History | Annotate | Download | only in front-end
      1 /*
      2  * Copyright (C) 2009 Google Inc. All rights reserved.
      3  *
      4  * Redistribution and use in source and binary forms, with or without
      5  * modification, are permitted provided that the following conditions are
      6  * met:
      7  *
      8  *     * Redistributions of source code must retain the above copyright
      9  * notice, this list of conditions and the following disclaimer.
     10  *     * Redistributions in binary form must reproduce the above
     11  * copyright notice, this list of conditions and the following disclaimer
     12  * in the documentation and/or other materials provided with the
     13  * distribution.
     14  *     * Neither the name of Google Inc. nor the names of its
     15  * contributors may be used to endorse or promote products derived from
     16  * this software without specific prior written permission.
     17  *
     18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     21  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     22  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     23  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     24  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     28  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     29  */
     30 
     31 // Generate js file as follows:
     32 //
     33 // re2c -isc WebCore/inspector/front-end/SourceHTMLTokenizer.re2js \
     34 // | sed 's|^yy\([^:]*\)*\:|case \1:|' \
     35 // | sed 's|[*]cursor[+][+]|this._charAt(cursor++)|' \
     36 // | sed 's|[[*][+][+]cursor|this._charAt(++cursor)|' \
     37 // | sed 's|[*]cursor|this._charAt(cursor)|' \
     38 // | sed 's|yych = \*\([^;]*\)|yych = this._charAt\1|' \
     39 // | sed 's|goto case \([^;]*\)|{ gotoCase = \1; continue; }|' \
     40 // | sed 's|unsigned\ int|var|' \
     41 // | sed 's|var\ yych|case 1: var yych|'
     42 
     43 WebInspector.SourceHTMLTokenizer = function()
     44 {
     45     WebInspector.SourceTokenizer.call(this);
     46 
     47     // The order is determined by the generated code.
     48     this._lexConditions = {
     49         INITIAL: 0,
     50         COMMENT: 1,
     51         DOCTYPE: 2,
     52         TAG: 3,
     53         DSTRING: 4,
     54         SSTRING: 5
     55     };
     56     this.case_INITIAL = 1000;
     57     this.case_COMMENT = 1001;
     58     this.case_DOCTYPE = 1002;
     59     this.case_TAG = 1003;
     60     this.case_DSTRING = 1004;
     61     this.case_SSTRING = 1005;
     62 
     63     this._parseConditions = {
     64         INITIAL: 0,
     65         ATTRIBUTE: 1,
     66         ATTRIBUTE_VALUE: 2,
     67         LINKIFY: 4,
     68         A_NODE: 8,
     69         SCRIPT: 16
     70     };
     71 
     72     this.initialCondition = { lexCondition: this._lexConditions.INITIAL, parseCondition: this._parseConditions.INITIAL };
     73 }
     74 
     75 WebInspector.SourceHTMLTokenizer.prototype = {
     76     _isExpectingAttribute: function()
     77     {
     78         return this._parseCondition & this._parseConditions.ATTRIBUTE;
     79     },
     80 
     81     _isExpectingAttributeValue: function()
     82     {
     83         return this._parseCondition & this._parseConditions.ATTRIBUTE_VALUE;
     84     },
     85 
     86     _setExpectingAttribute: function()
     87     {
     88         if (this._isExpectingAttributeValue())
     89             this._parseCondition ^= this._parseConditions.ATTRIBUTE_VALUE;
     90         this._parseCondition |= this._parseConditions.ATTRIBUTE;
     91     },
     92 
     93     _setExpectingAttributeValue: function()
     94     {
     95         if (this._isExpectingAttribute())
     96             this._parseCondition ^= this._parseConditions.ATTRIBUTE;
     97         this._parseCondition |= this._parseConditions.ATTRIBUTE_VALUE;
     98     },
     99 
    100     _stringToken: function(cursor, stringEnds)
    101     {
    102         if (!this._isExpectingAttributeValue()) {
    103             this.tokenType = null;
    104             return cursor;
    105         }
    106         this.tokenType = this._attrValueTokenType();
    107         if (stringEnds)
    108             this._setExpectingAttribute();
    109         return cursor;
    110     },
    111 
    112     _attrValueTokenType: function()
    113     {
    114         if (this._parseCondition & this._parseConditions.LINKIFY) {
    115             if (this._parseCondition & this._parseConditions.A_NODE)
    116                 return "html-external-link";
    117             return "html-resource-link";
    118         }
    119         return "html-attribute-value";
    120     },
    121 
    122     nextToken: function(cursor)
    123     {
    124         var cursorOnEnter = cursor;
    125         var gotoCase = 1;
    126         while (1) {
    127             switch (gotoCase)
    128             // Following comment is replaced with generated state machine.
    129             /*!re2c
    130                 re2c:define:YYCTYPE  = "var";
    131                 re2c:define:YYCURSOR = cursor;
    132                 re2c:define:YYGETCONDITION = "this.getLexCondition";
    133                 re2c:define:YYSETCONDITION = "this.setLexCondition";
    134                 re2c:condprefix = "case this.case_";
    135                 re2c:condenumprefix = "this._lexConditions.";
    136                 re2c:yyfill:enable = 0;
    137                 re2c:labelprefix = "case ";
    138                 re2c:indent:top = 2;
    139                 re2c:indent:string = "    ";
    140 
    141                 CommentContent = ([^-\r\n] | ("--" [^>]))*;
    142                 Comment = "<!--" CommentContent "-->";
    143                 CommentStart = "<!--" CommentContent [\r\n];
    144                 CommentEnd = CommentContent "-->";
    145 
    146                 DocTypeStart = "<!" [Dd] [Oo] [Cc] [Tt] [Yy] [Pp] [Ee];
    147                 DocTypeContent = [^\r\n>]*;
    148 
    149                 ScriptStart = "<" [Ss] [Cc] [Rr] [Ii] [Pp] [Tt];
    150                 ScriptEnd = "</" [Ss] [Cc] [Rr] [Ii] [Pp] [Tt];
    151 
    152                 LT = "<" | "</";
    153                 GT = ">";
    154                 EqualSign = "=";
    155 
    156                 DoubleStringContent = [^\r\n\"]*;
    157                 SingleStringContent = [^\r\n\']*;
    158                 StringLiteral = "\"" DoubleStringContent "\"" | "'" SingleStringContent "'";
    159                 DoubleStringStart = "\"" DoubleStringContent [\r\n];
    160                 DoubleStringEnd = DoubleStringContent "\"";
    161                 SingleStringStart = "'" SingleStringContent [\r\n];
    162                 SingleStringEnd = SingleStringContent "'";
    163 
    164                 Identifier = [^ \r\n"'<>\[\]=]+;
    165 
    166                 <INITIAL> Comment { this.tokenType = "html-comment"; return cursor; }
    167                 <INITIAL> CommentStart => COMMENT { this.tokenType = "html-comment"; return cursor; }
    168                 <COMMENT> CommentContent => COMMENT { this.tokenType = "html-comment"; return cursor; }
    169                 <COMMENT> CommentEnd => INITIAL { this.tokenType = "html-comment"; return cursor; }
    170 
    171                 <INITIAL> DocTypeStart => DOCTYPE { this.tokenType = "html-doctype"; return cursor; }
    172                 <DOCTYPE> DocTypeContent => DOCTYPE { this.tokenType = "html-doctype"; return cursor; }
    173                 <DOCTYPE> GT => INITIAL { this.tokenType = "html-doctype"; return cursor; }
    174 
    175                 <INITIAL> ScriptStart => TAG
    176                 {
    177                     this.tokenType = "html-tag";
    178                     this._parseCondition = this._parseConditions.SCRIPT;
    179                     this._setExpectingAttribute();
    180                     return cursor;
    181                 }
    182 
    183                 <INITIAL> ScriptEnd => TAG
    184                 {
    185                     this.tokenType = "html-tag";
    186                     this._parseCondition = this._parseConditions.INITIAL;
    187                     return cursor;
    188                 }
    189 
    190                 <INITIAL> LT => TAG
    191                 {
    192                     if (this._parseCondition & this._parseConditions.SCRIPT) {
    193                         // Do not tokenize script tag contents, keep lexer state although processing "<".
    194                         this.setLexCondition(this._lexConditions.INITIAL);
    195                         this.tokenType = null;
    196                         return cursor;
    197                     }
    198 
    199                     this._parseCondition = this._parseConditions.INITIAL;
    200                     this.tokenType = "html-tag";
    201                     return cursor;
    202                 }
    203 
    204                 <TAG> GT => INITIAL
    205                 {
    206                     if (this._parseCondition & this._parseConditions.SCRIPT) {
    207                         // Do not tokenize script tag contents.
    208                         this.tokenType = null;
    209                         return cursor;
    210                     }
    211 
    212                     this._parseCondition = this._parseConditions.INITIAL;
    213                     this.tokenType = "html-tag";
    214                     return cursor;
    215                 }
    216 
    217                 <TAG> StringLiteral { return this._stringToken(cursor, true); }
    218                 <TAG> DoubleStringStart => DSTRING { return this._stringToken(cursor); }
    219                 <DSTRING> DoubleStringContent => DSTRING { return this._stringToken(cursor); }
    220                 <DSTRING> DoubleStringEnd => TAG { return this._stringToken(cursor, true); }
    221                 <TAG> SingleStringStart => SSTRING { return this._stringToken(cursor); }
    222                 <SSTRING> SingleStringContent => SSTRING { return this._stringToken(cursor); }
    223                 <SSTRING> SingleStringEnd => TAG { return this._stringToken(cursor, true); }
    224 
    225                 <TAG> EqualSign => TAG
    226                 {
    227                     if (this._isExpectingAttribute())
    228                         this._setExpectingAttributeValue();
    229                     this.tokenType = null;
    230                     return cursor;
    231                 }
    232 
    233                 <TAG> Identifier
    234                 {
    235                     if (this._parseCondition === this._parseConditions.SCRIPT) {
    236                         // Fall through if expecting attributes.
    237                         this.tokenType = null;
    238                         return cursor;
    239                     }
    240 
    241                     if (this._parseCondition === this._parseConditions.INITIAL) {
    242                         this.tokenType = "html-tag";
    243                         this._setExpectingAttribute();
    244                         var token = this._line.substring(cursorOnEnter, cursor);
    245                         if (token === "a")
    246                             this._parseCondition |= this._parseConditions.A_NODE;
    247                         else if (this._parseCondition & this._parseConditions.A_NODE)
    248                             this._parseCondition ^= this._parseConditions.A_NODE;
    249                     } else if (this._isExpectingAttribute()) {
    250                         var token = this._line.substring(cursorOnEnter, cursor);
    251                         if (token === "href" || token === "src")
    252                             this._parseCondition |= this._parseConditions.LINKIFY;
    253                         else if (this._parseCondition |= this._parseConditions.LINKIFY)
    254                             this._parseCondition ^= this._parseConditions.LINKIFY;
    255                         this.tokenType = "html-attribute-name";
    256                     } else if (this._isExpectingAttributeValue())
    257                         this.tokenType = this._attrValueTokenType();
    258                     else
    259                         this.tokenType = null;
    260                     return cursor;
    261                 }
    262                 <*> [^] { this.tokenType = null; return cursor; }
    263             */
    264         }
    265     }
    266 }
    267 
    268 WebInspector.SourceHTMLTokenizer.prototype.__proto__ = WebInspector.SourceTokenizer.prototype;
    269