1 /* ***** BEGIN LICENSE BLOCK ***** 2 * Version: NPL 1.1/GPL 2.0/LGPL 2.1 3 * 4 * The contents of this file are subject to the Netscape Public License 5 * Version 1.1 (the "License"); you may not use this file except in 6 * compliance with the License. You may obtain a copy of the License at 7 * http://www.mozilla.org/NPL/ 8 * 9 * Software distributed under the License is distributed on an "AS IS" basis, 10 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 11 * for the specific language governing rights and limitations under the 12 * License. 13 * 14 * The Original Code is JavaScript Engine testing utilities. 15 * 16 * The Initial Developer of the Original Code is Netscape Communications Corp. 17 * Portions created by the Initial Developer are Copyright (C) 2002 18 * the Initial Developer. All Rights Reserved. 19 * 20 * Contributor(s): rogerl (at) netscape.com, pschwartau (at) netscape.com 21 * 22 * Alternatively, the contents of this file may be used under the terms of 23 * either the GNU General Public License Version 2 or later (the "GPL"), or 24 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), 25 * in which case the provisions of the GPL or the LGPL are applicable instead 26 * of those above. If you wish to allow use of your version of this file only 27 * under the terms of either the GPL or the LGPL, and not to allow others to 28 * use your version of this file under the terms of the NPL, indicate your 29 * decision by deleting the provisions above and replace them with the notice 30 * and other provisions required by the GPL or the LGPL. If you do not delete 31 * the provisions above, a recipient may use your version of this file under 32 * the terms of any one of the NPL, the GPL or the LGPL. 33 * 34 * ***** END LICENSE BLOCK ***** 35 * 36 * 37 * Date: 14 Feb 2002 38 * SUMMARY: Performance: Regexp performance degraded from 4.7 39 * See http://bugzilla.mozilla.org/show_bug.cgi?id=85721 40 * 41 * Adjust this testcase if necessary. The FAST constant defines 42 * an upper bound in milliseconds for any execution to take. 43 * 44 */ 45 //----------------------------------------------------------------------------- 46 var bug = 85721; 47 var summary = 'Performance: execution of regular expression'; 48 var FAST = 100; // execution should be 100 ms or less to pass the test 49 var MSG_FAST = 'Execution took less than ' + FAST + ' ms'; 50 var MSG_SLOW = 'Execution took '; 51 var MSG_MS = ' ms'; 52 var str = ''; 53 var re = ''; 54 var status = ''; 55 var actual = ''; 56 var expect= ''; 57 58 printBugNumber (bug); 59 printStatus (summary); 60 61 62 function elapsedTime(startTime) 63 { 64 return new Date() - startTime; 65 } 66 67 68 function isThisFast(ms) 69 { 70 if (ms <= FAST) 71 return MSG_FAST; 72 return MSG_SLOW + ms + MSG_MS; 73 } 74 75 76 77 /* 78 * The first regexp. We'll test for performance (Section 1) and accuracy (Section 2). 79 */ 80 str='<sql:connection id="conn1"> <sql:url>www.m.com</sql:url> <sql:driver>drive.class</sql:driver>\n<sql:userId>foo</sql:userId> <sql:password>goo</sql:password> </sql:connection>'; 81 re = /<sql:connection id="([^\r\n]*?)">\s*<sql:url>\s*([^\r\n]*?)\s*<\/sql:url>\s*<sql:driver>\s*([^\r\n]*?)\s*<\/sql:driver>\s*(\s*<sql:userId>\s*([^\r\n]*?)\s*<\/sql:userId>\s*)?\s*(\s*<sql:password>\s*([^\r\n]*?)\s*<\/sql:password>\s*)?\s*<\/sql:connection>/; 82 expect = Array("<sql:connection id=\"conn1\"> <sql:url>www.m.com</sql:url> <sql:driver>drive.class</sql:driver>\n<sql:userId>foo</sql:userId> <sql:password>goo</sql:password> </sql:connection>","conn1","www.m.com","drive.class","<sql:userId>foo</sql:userId> ","foo","<sql:password>goo</sql:password> ","goo"); 83 84 /* 85 * Check performance - 86 */ 87 status = inSection(1); 88 var start = new Date(); 89 var result = re.exec(str); 90 actual = elapsedTime(start); 91 reportCompare(isThisFast(FAST), isThisFast(actual), status); 92 93 /* 94 * Check accuracy - 95 */ 96 status = inSection(2); 97 testRegExp([status], [re], [str], [result], [expect]); 98 99 100 101 /* 102 * The second regexp (HUGE!). We'll test for performance (Section 3) and accuracy (Section 4). 103 * It comes from the O'Reilly book "Mastering Regular Expressions" by Jeffrey Friedl, Appendix B 104 */ 105 106 //# Some things for avoiding backslashitis later on. 107 $esc = '\\\\'; 108 $Period = '\.'; 109 $space = '\040'; $tab = '\t'; 110 $OpenBR = '\\['; $CloseBR = '\\]'; 111 $OpenParen = '\\('; $CloseParen = '\\)'; 112 $NonASCII = '\x80-\xff'; $ctrl = '\000-\037'; 113 $CRlist = '\n\015'; //# note: this should really be only \015. 114 // Items 19, 20, 21 115 $qtext = '[^' + $esc + $NonASCII + $CRlist + '\"]'; // # for within "..." 116 $dtext = '[^' + $esc + $NonASCII + $CRlist + $OpenBR + $CloseBR + ']'; // # for within [...] 117 $quoted_pair = $esc + '[^' + $NonASCII + ']'; // # an escaped character 118 119 //############################################################################## 120 //# Items 22 and 23, comment. 121 //# Impossible to do properly with a regex, I make do by allowing at most one level of nesting. 122 $ctext = '[^' + $esc + $NonASCII + $CRlist + '()]'; 123 124 //# $Cnested matches one non-nested comment. 125 //# It is unrolled, with normal of $ctext, special of $quoted_pair. 126 $Cnested = 127 $OpenParen + // # ( 128 $ctext + '*' + // # normal* 129 '(?:' + $quoted_pair + $ctext + '*)*' + // # (special normal*)* 130 $CloseParen; // # ) 131 132 133 //# $comment allows one level of nested parentheses 134 //# It is unrolled, with normal of $ctext, special of ($quoted_pair|$Cnested) 135 $comment = 136 $OpenParen + // # ( 137 $ctext + '*' + // # normal* 138 '(?:' + // # ( 139 '(?:' + $quoted_pair + '|' + $Cnested + ')' + // # special 140 $ctext + '*' + // # normal* 141 ')*' + // # )* 142 $CloseParen; // # ) 143 144 145 //############################################################################## 146 //# $X is optional whitespace/comments. 147 $X = 148 '[' + $space + $tab + ']*' + // # Nab whitespace. 149 '(?:' + $comment + '[' + $space + $tab + ']*)*'; // # If comment found, allow more spaces. 150 151 152 //# Item 10: atom 153 $atom_char = '[^(' + $space + '<>\@,;:\".' + $esc + $OpenBR + $CloseBR + $ctrl + $NonASCII + ']'; 154 $atom = 155 $atom_char + '+' + // # some number of atom characters... 156 '(?!' + $atom_char + ')'; // # ..not followed by something that could be part of an atom 157 158 // # Item 11: doublequoted string, unrolled. 159 $quoted_str = 160 '\"' + // # " 161 $qtext + '*' + // # normal 162 '(?:' + $quoted_pair + $qtext + '*)*' + // # ( special normal* )* 163 '\"'; // # " 164 165 //# Item 7: word is an atom or quoted string 166 $word = 167 '(?:' + 168 $atom + // # Atom 169 '|' + // # or 170 $quoted_str + // # Quoted string 171 ')' 172 173 //# Item 12: domain-ref is just an atom 174 $domain_ref = $atom; 175 176 //# Item 13: domain-literal is like a quoted string, but [...] instead of "..." 177 $domain_lit = 178 $OpenBR + // # [ 179 '(?:' + $dtext + '|' + $quoted_pair + ')*' + // # stuff 180 $CloseBR; // # ] 181 182 // # Item 9: sub-domain is a domain-ref or domain-literal 183 $sub_domain = 184 '(?:' + 185 $domain_ref + 186 '|' + 187 $domain_lit + 188 ')' + 189 $X; // # optional trailing comments 190 191 // # Item 6: domain is a list of subdomains separated by dots. 192 $domain = 193 $sub_domain + 194 '(?:' + 195 $Period + $X + $sub_domain + 196 ')*'; 197 198 //# Item 8: a route. A bunch of "@ $domain" separated by commas, followed by a colon. 199 $route = 200 '\@' + $X + $domain + 201 '(?:,' + $X + '\@' + $X + $domain + ')*' + // # additional domains 202 ':' + 203 $X; // # optional trailing comments 204 205 //# Item 6: local-part is a bunch of $word separated by periods 206 $local_part = 207 $word + $X 208 '(?:' + 209 $Period + $X + $word + $X + // # additional words 210 ')*'; 211 212 // # Item 2: addr-spec is local@domain 213 $addr_spec = 214 $local_part + '\@' + $X + $domain; 215 216 //# Item 4: route-addr is <route? addr-spec> 217 $route_addr = 218 '<' + $X + // # < 219 '(?:' + $route + ')?' + // # optional route 220 $addr_spec + // # address spec 221 '>'; // # > 222 223 //# Item 3: phrase........ 224 $phrase_ctrl = '\000-\010\012-\037'; // # like ctrl, but without tab 225 226 //# Like atom-char, but without listing space, and uses phrase_ctrl. 227 //# Since the class is negated, this matches the same as atom-char plus space and tab 228 $phrase_char = 229 '[^()<>\@,;:\".' + $esc + $OpenBR + $CloseBR + $NonASCII + $phrase_ctrl + ']'; 230 231 // # We've worked it so that $word, $comment, and $quoted_str to not consume trailing $X 232 // # because we take care of it manually. 233 $phrase = 234 $word + // # leading word 235 $phrase_char + '*' + // # "normal" atoms and/or spaces 236 '(?:' + 237 '(?:' + $comment + '|' + $quoted_str + ')' + // # "special" comment or quoted string 238 $phrase_char + '*' + // # more "normal" 239 ')*'; 240 241 // ## Item #1: mailbox is an addr_spec or a phrase/route_addr 242 $mailbox = 243 $X + // # optional leading comment 244 '(?:' + 245 $phrase + $route_addr + // # name and address 246 '|' + // # or 247 $addr_spec + // # address 248 ')'; 249 250 251 //########################################################################### 252 253 254 re = new RegExp($mailbox, "g"); 255 str = 'Jeffy<"That Tall Guy"@ora.com (this address is no longer active)>'; 256 expect = Array('Jeffy<"That Tall Guy"@ora.com (this address is no longer active)>'); 257 258 /* 259 * Check performance - 260 */ 261 status = inSection(3); 262 var start = new Date(); 263 var result = re.exec(str); 264 actual = elapsedTime(start); 265 reportCompare(isThisFast(FAST), isThisFast(actual), status); 266 267 /* 268 * Check accuracy - 269 */ 270 status = inSection(4); 271 testRegExp([status], [re], [str], [result], [expect]); 272