Home | History | Annotate | Download | only in RegExp
      1 /* ***** BEGIN LICENSE BLOCK *****
      2 * Version: NPL 1.1/GPL 2.0/LGPL 2.1
      3 *
      4 * The contents of this file are subject to the Netscape Public License
      5 * Version 1.1 (the "License"); you may not use this file except in
      6 * compliance with the License. You may obtain a copy of the License at
      7 * http://www.mozilla.org/NPL/
      8 *
      9 * Software distributed under the License is distributed on an "AS IS" basis,
     10 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
     11 * for the specific language governing rights and limitations under the
     12 * License.
     13 *
     14 * The Original Code is JavaScript Engine testing utilities.
     15 *
     16 * The Initial Developer of the Original Code is Netscape Communications Corp.
     17 * Portions created by the Initial Developer are Copyright (C) 2002
     18 * the Initial Developer. All Rights Reserved.
     19 *
     20 * Contributor(s): rogerl (at) netscape.com, pschwartau (at) netscape.com
     21 *
     22 * Alternatively, the contents of this file may be used under the terms of
     23 * either the GNU General Public License Version 2 or later (the "GPL"), or
     24 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
     25 * in which case the provisions of the GPL or the LGPL are applicable instead
     26 * of those above. If you wish to allow use of your version of this file only
     27 * under the terms of either the GPL or the LGPL, and not to allow others to
     28 * use your version of this file under the terms of the NPL, indicate your
     29 * decision by deleting the provisions above and replace them with the notice
     30 * and other provisions required by the GPL or the LGPL. If you do not delete
     31 * the provisions above, a recipient may use your version of this file under
     32 * the terms of any one of the NPL, the GPL or the LGPL.
     33 *
     34 * ***** END LICENSE BLOCK *****
     35 *
     36 *
     37 * Date:    14 Feb 2002
     38 * SUMMARY: Performance: Regexp performance degraded from 4.7
     39 * See http://bugzilla.mozilla.org/show_bug.cgi?id=85721
     40 *
     41 * Adjust this testcase if necessary. The FAST constant defines
     42 * an upper bound in milliseconds for any execution to take.
     43 *
     44 */
     45 //-----------------------------------------------------------------------------
     46 var bug = 85721;
     47 var summary = 'Performance: execution of regular expression';
     48 var FAST = 100; // execution should be 100 ms or less to pass the test
     49 var MSG_FAST = 'Execution took less than ' + FAST + ' ms';
     50 var MSG_SLOW = 'Execution took ';
     51 var MSG_MS = ' ms';
     52 var str = '';
     53 var re = '';
     54 var status = '';
     55 var actual = '';
     56 var expect= '';
     57 
     58 printBugNumber (bug);
     59 printStatus (summary);
     60 
     61 
     62 function elapsedTime(startTime)
     63 {
     64   return new Date() - startTime;
     65 }
     66 
     67 
     68 function isThisFast(ms)
     69 {
     70   if (ms <= FAST)
     71     return MSG_FAST;
     72   return MSG_SLOW + ms + MSG_MS;
     73 }
     74 
     75 
     76 
     77 /*
     78  * The first regexp. We'll test for performance (Section 1) and accuracy (Section 2).
     79  */
     80 str='<sql:connection id="conn1"> <sql:url>www.m.com</sql:url> <sql:driver>drive.class</sql:driver>\n<sql:userId>foo</sql:userId> <sql:password>goo</sql:password> </sql:connection>';
     81 re = /<sql:connection id="([^\r\n]*?)">\s*<sql:url>\s*([^\r\n]*?)\s*<\/sql:url>\s*<sql:driver>\s*([^\r\n]*?)\s*<\/sql:driver>\s*(\s*<sql:userId>\s*([^\r\n]*?)\s*<\/sql:userId>\s*)?\s*(\s*<sql:password>\s*([^\r\n]*?)\s*<\/sql:password>\s*)?\s*<\/sql:connection>/;
     82 expect = Array("<sql:connection id=\"conn1\"> <sql:url>www.m.com</sql:url> <sql:driver>drive.class</sql:driver>\n<sql:userId>foo</sql:userId> <sql:password>goo</sql:password> </sql:connection>","conn1","www.m.com","drive.class","<sql:userId>foo</sql:userId> ","foo","<sql:password>goo</sql:password> ","goo");
     83 
     84 /*
     85  *  Check performance -
     86  */
     87 status = inSection(1);
     88 var start = new Date();
     89 var result = re.exec(str);
     90 actual = elapsedTime(start);
     91 reportCompare(isThisFast(FAST), isThisFast(actual), status);
     92 
     93 /*
     94  *  Check accuracy -
     95  */
     96 status = inSection(2);
     97 testRegExp([status], [re], [str], [result], [expect]);
     98 
     99 
    100 
    101 /*
    102  * The second regexp (HUGE!). We'll test for performance (Section 3) and accuracy (Section 4).
    103  * It comes from the O'Reilly book "Mastering Regular Expressions" by Jeffrey Friedl, Appendix B
    104  */
    105 
    106 //# Some things for avoiding backslashitis later on.
    107 $esc        = '\\\\';
    108 $Period      = '\.';
    109 $space      = '\040';              $tab         = '\t';
    110 $OpenBR     = '\\[';               $CloseBR     = '\\]';
    111 $OpenParen  = '\\(';               $CloseParen  = '\\)';
    112 $NonASCII   = '\x80-\xff';         $ctrl        = '\000-\037';
    113 $CRlist     = '\n\015';  //# note: this should really be only \015.
    114 // Items 19, 20, 21
    115 $qtext = '[^' + $esc + $NonASCII + $CRlist + '\"]';						  // # for within "..."
    116 $dtext = '[^' + $esc + $NonASCII + $CRlist + $OpenBR + $CloseBR + ']';    // # for within [...]
    117 $quoted_pair = $esc + '[^' + $NonASCII + ']';							  // # an escaped character
    118 
    119 //##############################################################################
    120 //# Items 22 and 23, comment.
    121 //# Impossible to do properly with a regex, I make do by allowing at most one level of nesting.
    122 $ctext   =  '[^' + $esc + $NonASCII + $CRlist + '()]';
    123 
    124 //# $Cnested matches one non-nested comment.
    125 //# It is unrolled, with normal of $ctext, special of $quoted_pair.
    126 $Cnested =
    127    $OpenParen +                                 // #  (
    128       $ctext + '*' +                            // #     normal*
    129       '(?:' + $quoted_pair + $ctext + '*)*' +   // #     (special normal*)*
    130    $CloseParen;                                 // #                       )
    131 
    132 
    133 //# $comment allows one level of nested parentheses
    134 //# It is unrolled, with normal of $ctext, special of ($quoted_pair|$Cnested)
    135 $comment =
    136    $OpenParen +                                           // #  (
    137        $ctext + '*' +                                     // #     normal*
    138        '(?:' +                                            // #       (
    139           '(?:' + $quoted_pair + '|' + $Cnested + ')' +   // #         special
    140            $ctext + '*' +                                 // #         normal*
    141        ')*' +                                             // #            )*
    142    $CloseParen;                                           // #                )
    143 
    144 
    145 //##############################################################################
    146 //# $X is optional whitespace/comments.
    147 $X =
    148    '[' + $space + $tab + ']*' +					       // # Nab whitespace.
    149    '(?:' + $comment + '[' + $space + $tab + ']*)*';    // # If comment found, allow more spaces.
    150 
    151 
    152 //# Item 10: atom
    153 $atom_char   = '[^(' + $space + '<>\@,;:\".' + $esc + $OpenBR + $CloseBR + $ctrl + $NonASCII + ']';
    154 $atom =
    155   $atom_char + '+' +            // # some number of atom characters...
    156   '(?!' + $atom_char + ')';     // # ..not followed by something that could be part of an atom
    157 
    158 // # Item 11: doublequoted string, unrolled.
    159 $quoted_str =
    160     '\"' +                                         // # "
    161        $qtext + '*' +                              // #   normal
    162        '(?:' + $quoted_pair + $qtext + '*)*' +     // #   ( special normal* )*
    163     '\"';                                          // # "
    164 
    165 //# Item 7: word is an atom or quoted string
    166 $word =
    167     '(?:' +
    168        $atom +                // # Atom
    169        '|' +                  //     #  or
    170        $quoted_str +          // # Quoted string
    171      ')'
    172 
    173 //# Item 12: domain-ref is just an atom
    174 $domain_ref  = $atom;
    175 
    176 //# Item 13: domain-literal is like a quoted string, but [...] instead of  "..."
    177 $domain_lit  =
    178     $OpenBR +								   	     // # [
    179     '(?:' + $dtext + '|' + $quoted_pair + ')*' +     // #    stuff
    180     $CloseBR;                                        // #           ]
    181 
    182 // # Item 9: sub-domain is a domain-ref or domain-literal
    183 $sub_domain  =
    184   '(?:' +
    185     $domain_ref +
    186     '|' +
    187     $domain_lit +
    188    ')' +
    189    $X;                 // # optional trailing comments
    190 
    191 // # Item 6: domain is a list of subdomains separated by dots.
    192 $domain =
    193      $sub_domain +
    194      '(?:' +
    195         $Period + $X + $sub_domain +
    196      ')*';
    197 
    198 //# Item 8: a route. A bunch of "@ $domain" separated by commas, followed by a colon.
    199 $route =
    200     '\@' + $X + $domain +
    201     '(?:,' + $X + '\@' + $X + $domain + ')*' +  // # additional domains
    202     ':' +
    203     $X;					// # optional trailing comments
    204 
    205 //# Item 6: local-part is a bunch of $word separated by periods
    206 $local_part =
    207     $word + $X
    208     '(?:' +
    209         $Period + $X + $word + $X +		// # additional words
    210     ')*';
    211 
    212 // # Item 2: addr-spec is local@domain
    213 $addr_spec  =
    214   $local_part + '\@' + $X + $domain;
    215 
    216 //# Item 4: route-addr is <route? addr-spec>
    217 $route_addr =
    218     '<' + $X +                     // # <
    219        '(?:' + $route + ')?' +     // #       optional route
    220        $addr_spec +                // #       address spec
    221     '>';                           // #                 >
    222 
    223 //# Item 3: phrase........
    224 $phrase_ctrl = '\000-\010\012-\037'; // # like ctrl, but without tab
    225 
    226 //# Like atom-char, but without listing space, and uses phrase_ctrl.
    227 //# Since the class is negated, this matches the same as atom-char plus space and tab
    228 $phrase_char =
    229    '[^()<>\@,;:\".' + $esc + $OpenBR + $CloseBR + $NonASCII + $phrase_ctrl + ']';
    230 
    231 // # We've worked it so that $word, $comment, and $quoted_str to not consume trailing $X
    232 // # because we take care of it manually.
    233 $phrase =
    234    $word +                                                  // # leading word
    235    $phrase_char + '*' +                                     // # "normal" atoms and/or spaces
    236    '(?:' +
    237       '(?:' + $comment + '|' + $quoted_str + ')' +          // # "special" comment or quoted string
    238       $phrase_char + '*' +                                  // #  more "normal"
    239    ')*';
    240 
    241 // ## Item #1: mailbox is an addr_spec or a phrase/route_addr
    242 $mailbox =
    243     $X +                                // # optional leading comment
    244     '(?:' +
    245             $phrase + $route_addr +     // # name and address
    246             '|' +                       //     #  or
    247             $addr_spec +                // # address
    248      ')';
    249 
    250 
    251 //###########################################################################
    252 
    253 
    254 re = new RegExp($mailbox, "g");
    255 str = 'Jeffy<"That Tall Guy"@ora.com (this address is no longer active)>';
    256 expect = Array('Jeffy<"That Tall Guy"@ora.com (this address is no longer active)>');
    257 
    258 /*
    259  *  Check performance -
    260  */
    261 status = inSection(3);
    262 var start = new Date();
    263 var result = re.exec(str);
    264 actual = elapsedTime(start);
    265 reportCompare(isThisFast(FAST), isThisFast(actual), status);
    266 
    267 /*
    268  *  Check accuracy -
    269  */
    270 status = inSection(4);
    271 testRegExp([status], [re], [str], [result], [expect]);
    272