1 /* 2 * Copyright (C) 2009 Google Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions are 6 * met: 7 * 8 * * Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * * Redistributions in binary form must reproduce the above 11 * copyright notice, this list of conditions and the following disclaimer 12 * in the documentation and/or other materials provided with the 13 * distribution. 14 * * Neither the name of Google Inc. nor the names of its 15 * contributors may be used to endorse or promote products derived from 16 * this software without specific prior written permission. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31 // Generate js file as follows: 32 // 33 // re2c -isc WebCore/inspector/front-end/SourceHTMLTokenizer.re2js \ 34 // | sed 's|^yy\([^:]*\)*\:|case \1:|' \ 35 // | sed 's|[*]cursor[+][+]|this._charAt(cursor++)|' \ 36 // | sed 's|[[*][+][+]cursor|this._charAt(++cursor)|' \ 37 // | sed 's|[*]cursor|this._charAt(cursor)|' \ 38 // | sed 's|yych = \*\([^;]*\)|yych = this._charAt\1|' \ 39 // | sed 's|goto case \([^;]*\)|{ gotoCase = \1; continue; }|' \ 40 // | sed 's|unsigned\ int|var|' \ 41 // | sed 's|var\ yych|case 1: var yych|' 42 43 WebInspector.SourceHTMLTokenizer = function() 44 { 45 WebInspector.SourceTokenizer.call(this); 46 47 // The order is determined by the generated code. 48 this._lexConditions = { 49 INITIAL: 0, 50 COMMENT: 1, 51 DOCTYPE: 2, 52 TAG: 3, 53 DSTRING: 4, 54 SSTRING: 5 55 }; 56 this.case_INITIAL = 1000; 57 this.case_COMMENT = 1001; 58 this.case_DOCTYPE = 1002; 59 this.case_TAG = 1003; 60 this.case_DSTRING = 1004; 61 this.case_SSTRING = 1005; 62 63 this._parseConditions = { 64 INITIAL: 0, 65 ATTRIBUTE: 1, 66 ATTRIBUTE_VALUE: 2, 67 LINKIFY: 4, 68 A_NODE: 8, 69 SCRIPT: 16, 70 STYLE: 32 71 }; 72 73 this.initialCondition = { lexCondition: this._lexConditions.INITIAL, parseCondition: this._parseConditions.INITIAL }; 74 this.condition = this.initialCondition; 75 } 76 77 WebInspector.SourceHTMLTokenizer.prototype = { 78 set line(line) { 79 if (this._internalJavaScriptTokenizer) { 80 var match = /<\/script/i.exec(line); 81 if (match) { 82 this._internalJavaScriptTokenizer.line = line.substring(0, match.index); 83 } else 84 this._internalJavaScriptTokenizer.line = line; 85 } else if (this._internalCSSTokenizer) { 86 var match = /<\/style/i.exec(line); 87 if (match) { 88 this._internalCSSTokenizer.line = line.substring(0, match.index); 89 } else 90 this._internalCSSTokenizer.line = line; 91 } 92 this._line = line; 93 }, 94 95 _isExpectingAttribute: function() 96 { 97 return this._condition.parseCondition & this._parseConditions.ATTRIBUTE; 98 }, 99 100 _isExpectingAttributeValue: function() 101 { 102 return this._condition.parseCondition & this._parseConditions.ATTRIBUTE_VALUE; 103 }, 104 105 _setExpectingAttribute: function() 106 { 107 if (this._isExpectingAttributeValue()) 108 this._condition.parseCondition ^= this._parseConditions.ATTRIBUTE_VALUE; 109 this._condition.parseCondition |= this._parseConditions.ATTRIBUTE; 110 }, 111 112 _setExpectingAttributeValue: function() 113 { 114 if (this._isExpectingAttribute()) 115 this._condition.parseCondition ^= this._parseConditions.ATTRIBUTE; 116 this._condition.parseCondition |= this._parseConditions.ATTRIBUTE_VALUE; 117 }, 118 119 _stringToken: function(cursor, stringEnds) 120 { 121 if (!this._isExpectingAttributeValue()) { 122 this.tokenType = null; 123 return cursor; 124 } 125 this.tokenType = this._attrValueTokenType(); 126 if (stringEnds) 127 this._setExpectingAttribute(); 128 return cursor; 129 }, 130 131 _attrValueTokenType: function() 132 { 133 if (this._condition.parseCondition & this._parseConditions.LINKIFY) { 134 if (this._condition.parseCondition & this._parseConditions.A_NODE) 135 return "html-external-link"; 136 return "html-resource-link"; 137 } 138 return "html-attribute-value"; 139 }, 140 141 nextToken: function(cursor) 142 { 143 if (this._internalJavaScriptTokenizer) { 144 // Re-set line to force </script> detection first. 145 this.line = this._line; 146 if (cursor !== this._internalJavaScriptTokenizer._line.length) { 147 // Tokenizer is stateless, so restore its condition before tokenizing and save it after. 148 this._internalJavaScriptTokenizer.condition = this._condition.internalJavaScriptTokenizerCondition; 149 var result = this._internalJavaScriptTokenizer.nextToken(cursor); 150 this.tokenType = this._internalJavaScriptTokenizer.tokenType; 151 this._condition.internalJavaScriptTokenizerCondition = this._internalJavaScriptTokenizer.condition; 152 return result; 153 } else if (cursor !== this._line.length) 154 delete this._internalJavaScriptTokenizer; 155 } else if (this._internalCSSTokenizer) { 156 // Re-set line to force </style> detection first. 157 this.line = this._line; 158 if (cursor !== this._internalCSSTokenizer._line.length) { 159 // Tokenizer is stateless, so restore its condition before tokenizing and save it after. 160 this._internalCSSTokenizer.condition = this._condition.internalCSSTokenizerCondition; 161 var result = this._internalCSSTokenizer.nextToken(cursor); 162 this.tokenType = this._internalCSSTokenizer.tokenType; 163 this._condition.internalCSSTokenizerCondition = this._internalCSSTokenizer.condition; 164 return result; 165 } else if (cursor !== this._line.length) 166 delete this._internalCSSTokenizer; 167 } 168 169 var cursorOnEnter = cursor; 170 var gotoCase = 1; 171 while (1) { 172 switch (gotoCase) 173 // Following comment is replaced with generated state machine. 174 /*!re2c 175 re2c:define:YYCTYPE = "var"; 176 re2c:define:YYCURSOR = cursor; 177 re2c:define:YYGETCONDITION = "this.getLexCondition"; 178 re2c:define:YYSETCONDITION = "this.setLexCondition"; 179 re2c:condprefix = "case this.case_"; 180 re2c:condenumprefix = "this._lexConditions."; 181 re2c:yyfill:enable = 0; 182 re2c:labelprefix = "case "; 183 re2c:indent:top = 2; 184 re2c:indent:string = " "; 185 186 CommentContent = ([^-\r\n] | ("--" [^>]))*; 187 Comment = "<!--" CommentContent "-->"; 188 CommentStart = "<!--" CommentContent [\r\n]; 189 CommentEnd = CommentContent "-->"; 190 191 DocTypeStart = "<!" [Dd] [Oo] [Cc] [Tt] [Yy] [Pp] [Ee]; 192 DocTypeContent = [^\r\n>]*; 193 194 ScriptStart = "<" [Ss] [Cc] [Rr] [Ii] [Pp] [Tt]; 195 ScriptEnd = "</" [Ss] [Cc] [Rr] [Ii] [Pp] [Tt]; 196 197 StyleStart = "<" [Ss] [Tt] [Yy] [Ll] [Ee]; 198 StyleEnd = "</" [Ss] [Tt] [Yy] [Ll] [Ee]; 199 200 LT = "<" | "</"; 201 GT = ">"; 202 EqualSign = "="; 203 204 DoubleStringContent = [^\r\n\"]*; 205 SingleStringContent = [^\r\n\']*; 206 StringLiteral = "\"" DoubleStringContent "\"" | "'" SingleStringContent "'"; 207 DoubleStringStart = "\"" DoubleStringContent [\r\n]; 208 DoubleStringEnd = DoubleStringContent "\""; 209 SingleStringStart = "'" SingleStringContent [\r\n]; 210 SingleStringEnd = SingleStringContent "'"; 211 212 Identifier = [^ \r\n"'<>\[\]=]+; 213 214 <INITIAL> Comment { this.tokenType = "html-comment"; return cursor; } 215 <INITIAL> CommentStart => COMMENT { this.tokenType = "html-comment"; return cursor; } 216 <COMMENT> CommentContent => COMMENT { this.tokenType = "html-comment"; return cursor; } 217 <COMMENT> CommentEnd => INITIAL { this.tokenType = "html-comment"; return cursor; } 218 219 <INITIAL> DocTypeStart => DOCTYPE { this.tokenType = "html-doctype"; return cursor; } 220 <DOCTYPE> DocTypeContent => DOCTYPE { this.tokenType = "html-doctype"; return cursor; } 221 <DOCTYPE> GT => INITIAL { this.tokenType = "html-doctype"; return cursor; } 222 223 <INITIAL> ScriptStart => TAG 224 { 225 if (this._condition.parseCondition & this._parseConditions.SCRIPT) { 226 // Do not tokenize script tag contents, keep lexer state, even though processing "<". 227 this.setLexCondition(this._lexConditions.INITIAL); 228 this.tokenType = null; 229 return cursor; 230 } 231 this.tokenType = "html-tag"; 232 this._condition.parseCondition = this._parseConditions.SCRIPT; 233 this._setExpectingAttribute(); 234 return cursor; 235 } 236 237 <INITIAL> ScriptEnd => TAG 238 { 239 this.tokenType = "html-tag"; 240 this._condition.parseCondition = this._parseConditions.INITIAL; 241 return cursor; 242 } 243 244 <INITIAL> StyleStart => TAG 245 { 246 if (this._condition.parseCondition & this._parseConditions.STYLE) { 247 // Do not tokenize style tag contents, keep lexer state, even though processing "<". 248 this.setLexCondition(this._lexConditions.INITIAL); 249 this.tokenType = null; 250 return cursor; 251 } 252 this.tokenType = "html-tag"; 253 this._condition.parseCondition = this._parseConditions.STYLE; 254 this._setExpectingAttribute(); 255 return cursor; 256 } 257 258 <INITIAL> StyleEnd => TAG 259 { 260 this.tokenType = "html-tag"; 261 this._condition.parseCondition = this._parseConditions.INITIAL; 262 return cursor; 263 } 264 265 <INITIAL> LT => TAG 266 { 267 if (this._condition.parseCondition & (this._parseConditions.SCRIPT | this._parseConditions.STYLE)) { 268 // Do not tokenize script and style tag contents, keep lexer state, even though processing "<". 269 this.setLexCondition(this._lexConditions.INITIAL); 270 this.tokenType = null; 271 return cursor; 272 } 273 274 this._condition.parseCondition = this._parseConditions.INITIAL; 275 this.tokenType = "html-tag"; 276 return cursor; 277 } 278 279 <TAG> GT => INITIAL 280 { 281 this.tokenType = "html-tag"; 282 if (this._condition.parseCondition & this._parseConditions.SCRIPT) { 283 if (!this._internalJavaScriptTokenizer) { 284 this._internalJavaScriptTokenizer = WebInspector.SourceTokenizer.Registry.getInstance().getTokenizer("text/javascript"); 285 this._condition.internalJavaScriptTokenizerCondition = this._internalJavaScriptTokenizer.initialCondition; 286 } 287 // Do not tokenize script tag contents. 288 return cursor; 289 } 290 291 if (this._condition.parseCondition & this._parseConditions.STYLE) { 292 if (!this._internalCSSTokenizer) { 293 this._internalCSSTokenizer = WebInspector.SourceTokenizer.Registry.getInstance().getTokenizer("text/css"); 294 this._condition.internalCSSTokenizerCondition = this._internalCSSTokenizer.initialCondition; 295 } 296 // Do not tokenize style tag contents. 297 return cursor; 298 } 299 300 this._condition.parseCondition = this._parseConditions.INITIAL; 301 return cursor; 302 } 303 304 <TAG> StringLiteral { return this._stringToken(cursor, true); } 305 <TAG> DoubleStringStart => DSTRING { return this._stringToken(cursor); } 306 <DSTRING> DoubleStringContent => DSTRING { return this._stringToken(cursor); } 307 <DSTRING> DoubleStringEnd => TAG { return this._stringToken(cursor, true); } 308 <TAG> SingleStringStart => SSTRING { return this._stringToken(cursor); } 309 <SSTRING> SingleStringContent => SSTRING { return this._stringToken(cursor); } 310 <SSTRING> SingleStringEnd => TAG { return this._stringToken(cursor, true); } 311 312 <TAG> EqualSign => TAG 313 { 314 if (this._isExpectingAttribute()) 315 this._setExpectingAttributeValue(); 316 this.tokenType = null; 317 return cursor; 318 } 319 320 <TAG> Identifier 321 { 322 if (this._condition.parseCondition === this._parseConditions.SCRIPT || this._condition.parseCondition === this._parseConditions.STYLE) { 323 // Fall through if expecting attributes. 324 this.tokenType = null; 325 return cursor; 326 } 327 328 if (this._condition.parseCondition === this._parseConditions.INITIAL) { 329 this.tokenType = "html-tag"; 330 this._setExpectingAttribute(); 331 var token = this._line.substring(cursorOnEnter, cursor); 332 if (token === "a") 333 this._condition.parseCondition |= this._parseConditions.A_NODE; 334 else if (this._condition.parseCondition & this._parseConditions.A_NODE) 335 this._condition.parseCondition ^= this._parseConditions.A_NODE; 336 } else if (this._isExpectingAttribute()) { 337 var token = this._line.substring(cursorOnEnter, cursor); 338 if (token === "href" || token === "src") 339 this._condition.parseCondition |= this._parseConditions.LINKIFY; 340 else if (this._condition.parseCondition |= this._parseConditions.LINKIFY) 341 this._condition.parseCondition ^= this._parseConditions.LINKIFY; 342 this.tokenType = "html-attribute-name"; 343 } else if (this._isExpectingAttributeValue()) 344 this.tokenType = this._attrValueTokenType(); 345 else 346 this.tokenType = null; 347 return cursor; 348 } 349 <*> [^] { this.tokenType = null; return cursor; } 350 */ 351 } 352 } 353 } 354 355 WebInspector.SourceHTMLTokenizer.prototype.__proto__ = WebInspector.SourceTokenizer.prototype; 356