1 #!/usr/bin/perl -w 2 # 3 # This is JavaScriptCore's variant of the PCRE library. While this library 4 # started out as a copy of PCRE, many of the features of PCRE have been 5 # removed. This library now supports only the regular expression features 6 # required by the JavaScript language specification, and has only the functions 7 # needed by JavaScriptCore and the rest of WebKit. 8 # 9 # Originally written by Philip Hazel 10 # Copyright (c) 1997-2006 University of Cambridge 11 # Copyright (C) 2002, 2004, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved. 12 # 13 # ----------------------------------------------------------------------------- 14 # Redistribution and use in source and binary forms, with or without 15 # modification, are permitted provided that the following conditions are met: 16 # 17 # * Redistributions of source code must retain the above copyright notice, 18 # this list of conditions and the following disclaimer. 19 # 20 # * Redistributions in binary form must reproduce the above copyright 21 # notice, this list of conditions and the following disclaimer in the 22 # documentation and/or other materials provided with the distribution. 23 # 24 # * Neither the name of the University of Cambridge nor the names of its 25 # contributors may be used to endorse or promote products derived from 26 # this software without specific prior written permission. 27 # 28 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 29 # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 30 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 31 # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 32 # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 33 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 34 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 35 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 36 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 37 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 38 # POSSIBILITY OF SUCH DAMAGE. 39 # ----------------------------------------------------------------------------- 40 41 # This is a freestanding support program to generate a file containing 42 # character tables. The tables are built according to the default C 43 # locale. 44 45 use strict; 46 47 use File::Basename; 48 use File::Spec; 49 use File::Temp qw(tempfile); 50 use Getopt::Long; 51 52 sub readHeaderValues(); 53 54 my %pcre_internal; 55 56 if (scalar(@ARGV) < 1) { 57 print STDERR "Usage: ", basename($0), " [--preprocessor=program] output-file\n"; 58 exit 1; 59 } 60 61 my $outputFile; 62 my $preprocessor; 63 GetOptions('preprocessor=s' => \$preprocessor); 64 if (not $preprocessor) { 65 $preprocessor = "cpp"; 66 } 67 68 $outputFile = $ARGV[0]; 69 die('Must specify output file.') unless defined($outputFile); 70 71 readHeaderValues(); 72 73 open(OUT, ">", $outputFile) or die "$!"; 74 binmode(OUT); 75 76 printf(OUT 77 "/*************************************************\n" . 78 "* Perl-Compatible Regular Expressions *\n" . 79 "*************************************************/\n\n" . 80 "/* This file is automatically written by the dftables auxiliary \n" . 81 "program. If you edit it by hand, you might like to edit the Makefile to \n" . 82 "prevent its ever being regenerated.\n\n"); 83 printf(OUT 84 "This file contains the default tables for characters with codes less than\n" . 85 "128 (ASCII characters). These tables are used when no external tables are\n" . 86 "passed to PCRE. */\n\n" . 87 "const unsigned char jsc_pcre_default_tables[%d] = {\n\n" . 88 "/* This table is a lower casing table. */\n\n", $pcre_internal{tables_length}); 89 90 if ($pcre_internal{lcc_offset} != 0) { 91 die "lcc_offset != 0"; 92 } 93 94 printf(OUT " "); 95 for (my $i = 0; $i < 128; $i++) { 96 if (($i & 7) == 0 && $i != 0) { 97 printf(OUT "\n "); 98 } 99 printf(OUT "0x%02X", ord(lc(chr($i)))); 100 if ($i != 127) { 101 printf(OUT ", "); 102 } 103 } 104 printf(OUT ",\n\n"); 105 106 printf(OUT "/* This table is a case flipping table. */\n\n"); 107 108 if ($pcre_internal{fcc_offset} != 128) { 109 die "fcc_offset != 128"; 110 } 111 112 printf(OUT " "); 113 for (my $i = 0; $i < 128; $i++) { 114 if (($i & 7) == 0 && $i != 0) { 115 printf(OUT "\n "); 116 } 117 my $c = chr($i); 118 printf(OUT "0x%02X", $c =~ /[[:lower:]]/ ? ord(uc($c)) : ord(lc($c))); 119 if ($i != 127) { 120 printf(OUT ", "); 121 } 122 } 123 printf(OUT ",\n\n"); 124 125 printf(OUT 126 "/* This table contains bit maps for various character classes.\n" . 127 "Each map is 32 bytes long and the bits run from the least\n" . 128 "significant end of each byte. The classes are: space, digit, word. */\n\n"); 129 130 if ($pcre_internal{cbits_offset} != $pcre_internal{fcc_offset} + 128) { 131 die "cbits_offset != fcc_offset + 128"; 132 } 133 134 my @cbit_table = (0) x $pcre_internal{cbit_length}; 135 for (my $i = ord('0'); $i <= ord('9'); $i++) { 136 $cbit_table[$pcre_internal{cbit_digit} + $i / 8] |= 1 << ($i & 7); 137 } 138 $cbit_table[$pcre_internal{cbit_word} + ord('_') / 8] |= 1 << (ord('_') & 7); 139 for (my $i = 0; $i < 128; $i++) { 140 my $c = chr($i); 141 if ($c =~ /[[:alnum:]]/) { 142 $cbit_table[$pcre_internal{cbit_word} + $i / 8] |= 1 << ($i & 7); 143 } 144 if ($c =~ /[[:space:]]/) { 145 $cbit_table[$pcre_internal{cbit_space} + $i / 8] |= 1 << ($i & 7); 146 } 147 } 148 149 printf(OUT " "); 150 for (my $i = 0; $i < $pcre_internal{cbit_length}; $i++) { 151 if (($i & 7) == 0 && $i != 0) { 152 if (($i & 31) == 0) { 153 printf(OUT "\n"); 154 } 155 printf(OUT "\n "); 156 } 157 printf(OUT "0x%02X", $cbit_table[$i]); 158 if ($i != $pcre_internal{cbit_length} - 1) { 159 printf(OUT ", "); 160 } 161 } 162 printf(OUT ",\n\n"); 163 164 printf(OUT 165 "/* This table identifies various classes of character by individual bits:\n" . 166 " 0x%02x white space character\n" . 167 " 0x%02x hexadecimal digit\n" . 168 " 0x%02x alphanumeric or '_'\n*/\n\n", 169 $pcre_internal{ctype_space}, $pcre_internal{ctype_xdigit}, $pcre_internal{ctype_word}); 170 171 if ($pcre_internal{ctypes_offset} != $pcre_internal{cbits_offset} + $pcre_internal{cbit_length}) { 172 die "ctypes_offset != cbits_offset + cbit_length"; 173 } 174 175 printf(OUT " "); 176 for (my $i = 0; $i < 128; $i++) { 177 my $x = 0; 178 my $c = chr($i); 179 if ($c =~ /[[:space:]]/) { 180 $x += $pcre_internal{ctype_space}; 181 } 182 if ($c =~ /[[:xdigit:]]/) { 183 $x += $pcre_internal{ctype_xdigit}; 184 } 185 if ($c =~ /[[:alnum:]_]/) { 186 $x += $pcre_internal{ctype_word}; 187 } 188 printf(OUT "0x%02X", $x); 189 if ($i != 127) { 190 printf(OUT ", "); 191 } else { 192 printf(OUT "};"); 193 } 194 if (($i & 7) == 7) { 195 printf(OUT " /* "); 196 my $d = chr($i - 7); 197 if ($d =~ /[[:print:]]/) { 198 printf(OUT " %c -", $i - 7); 199 } else { 200 printf(OUT "%3d-", $i - 7); 201 } 202 if ($c =~ m/[[:print:]]/) { 203 printf(OUT " %c ", $i); 204 } else { 205 printf(OUT "%3d", $i); 206 } 207 printf(OUT " */\n"); 208 if ($i != 127) { 209 printf(OUT " "); 210 } 211 } 212 } 213 214 if ($pcre_internal{tables_length} != $pcre_internal{ctypes_offset} + 128) { 215 die "tables_length != ctypes_offset + 128"; 216 } 217 218 printf(OUT "\n\n/* End of chartables.c */\n"); 219 220 close(OUT); 221 222 exit 0; 223 224 sub readHeaderValues() 225 { 226 my @variables = qw( 227 cbit_digit 228 cbit_length 229 cbit_space 230 cbit_word 231 cbits_offset 232 ctype_space 233 ctype_word 234 ctype_xdigit 235 ctypes_offset 236 fcc_offset 237 lcc_offset 238 tables_length 239 ); 240 241 local $/ = undef; 242 243 my $headerPath = File::Spec->catfile(dirname($0), "pcre_internal.h"); 244 245 my ($fh, $tempFile) = tempfile( 246 basename($0) . "-XXXXXXXX", 247 DIR => File::Spec->tmpdir(), 248 SUFFIX => ".in", 249 UNLINK => 0, 250 ); 251 252 print $fh "#define DFTABLES\n\n"; 253 254 open(HEADER, "<", $headerPath) or die "$!"; 255 print $fh <HEADER>; 256 close(HEADER); 257 258 print $fh "\n\n"; 259 260 for my $v (@variables) { 261 print $fh "\$pcre_internal{\"$v\"} = $v;\n"; 262 } 263 264 close($fh); 265 266 open(CPP, "$preprocessor \"$tempFile\" |") or die "$!"; 267 my $content = <CPP>; 268 close(CPP); 269 270 eval $content; 271 die "$@" if $@; 272 unlink $tempFile; 273 } 274