Home | History | Annotate | Download | only in pcre
      1 #!/usr/bin/perl -w
      2 #
      3 # This is JavaScriptCore's variant of the PCRE library. While this library
      4 # started out as a copy of PCRE, many of the features of PCRE have been
      5 # removed. This library now supports only the regular expression features
      6 # required by the JavaScript language specification, and has only the functions
      7 # needed by JavaScriptCore and the rest of WebKit.
      8 # 
      9 #                  Originally written by Philip Hazel
     10 #            Copyright (c) 1997-2006 University of Cambridge
     11 #  Copyright (C) 2002, 2004, 2006, 2007, 2008, 2009 Apple Inc.  All rights reserved.
     12 # 
     13 # -----------------------------------------------------------------------------
     14 # Redistribution and use in source and binary forms, with or without
     15 # modification, are permitted provided that the following conditions are met:
     16 # 
     17 #     * Redistributions of source code must retain the above copyright notice,
     18 #       this list of conditions and the following disclaimer.
     19 # 
     20 #     * Redistributions in binary form must reproduce the above copyright
     21 #       notice, this list of conditions and the following disclaimer in the
     22 #       documentation and/or other materials provided with the distribution.
     23 # 
     24 #     * Neither the name of the University of Cambridge nor the names of its
     25 #       contributors may be used to endorse or promote products derived from
     26 #       this software without specific prior written permission.
     27 # 
     28 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
     29 # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     30 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     31 # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
     32 # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     33 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     34 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     35 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     36 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     37 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     38 # POSSIBILITY OF SUCH DAMAGE.
     39 # -----------------------------------------------------------------------------
     40 
     41 # This is a freestanding support program to generate a file containing
     42 # character tables. The tables are built according to the default C
     43 # locale.
     44 
     45 use strict;
     46 
     47 use File::Basename;
     48 use File::Spec;
     49 use File::Temp qw(tempfile);
     50 use Getopt::Long;
     51 
     52 sub readHeaderValues();
     53 
     54 my %pcre_internal;
     55 
     56 if (scalar(@ARGV) < 1) {
     57     print STDERR "Usage: ", basename($0), " [--preprocessor=program] output-file\n";
     58     exit 1;
     59 }
     60 
     61 my $outputFile;
     62 my $preprocessor;
     63 GetOptions('preprocessor=s' => \$preprocessor);
     64 if (not $preprocessor) {
     65     $preprocessor = "cpp";
     66 }
     67 
     68 $outputFile = $ARGV[0];
     69 die('Must specify output file.') unless defined($outputFile);
     70 
     71 readHeaderValues();
     72 
     73 open(OUT, ">", $outputFile) or die "$!";
     74 binmode(OUT);
     75 
     76 printf(OUT
     77     "/*************************************************\n" .
     78     "*      Perl-Compatible Regular Expressions       *\n" .
     79     "*************************************************/\n\n" .
     80     "/* This file is automatically written by the dftables auxiliary \n" .
     81     "program. If you edit it by hand, you might like to edit the Makefile to \n" .
     82     "prevent its ever being regenerated.\n\n");
     83 printf(OUT
     84     "This file contains the default tables for characters with codes less than\n" .
     85     "128 (ASCII characters). These tables are used when no external tables are\n" .
     86     "passed to PCRE. */\n\n" .
     87     "const unsigned char jsc_pcre_default_tables[%d] = {\n\n" .
     88     "/* This table is a lower casing table. */\n\n", $pcre_internal{tables_length});
     89 
     90 if ($pcre_internal{lcc_offset} != 0) {
     91     die "lcc_offset != 0";
     92 }
     93 
     94 printf(OUT "  ");
     95 for (my $i = 0; $i < 128; $i++) {
     96     if (($i & 7) == 0 && $i != 0) {
     97         printf(OUT "\n  ");
     98     }
     99     printf(OUT "0x%02X", ord(lc(chr($i))));
    100     if ($i != 127) {
    101         printf(OUT ", ");
    102     }
    103 }
    104 printf(OUT ",\n\n");
    105 
    106 printf(OUT "/* This table is a case flipping table. */\n\n");
    107 
    108 if ($pcre_internal{fcc_offset} != 128) {
    109   die "fcc_offset != 128";
    110 }
    111 
    112 printf(OUT "  ");
    113 for (my $i = 0; $i < 128; $i++) {
    114     if (($i & 7) == 0 && $i != 0) {
    115         printf(OUT "\n  ");
    116     }
    117     my $c = chr($i);
    118     printf(OUT "0x%02X", $c =~ /[[:lower:]]/ ? ord(uc($c)) : ord(lc($c)));
    119     if ($i != 127) {
    120         printf(OUT ", ");
    121     }
    122 }
    123 printf(OUT ",\n\n");
    124 
    125 printf(OUT
    126     "/* This table contains bit maps for various character classes.\n" .
    127     "Each map is 32 bytes long and the bits run from the least\n" .
    128     "significant end of each byte. The classes are: space, digit, word. */\n\n");
    129 
    130 if ($pcre_internal{cbits_offset} != $pcre_internal{fcc_offset} + 128) {
    131     die "cbits_offset != fcc_offset + 128";
    132 }
    133 
    134 my @cbit_table = (0) x $pcre_internal{cbit_length};
    135 for (my $i = ord('0'); $i <= ord('9'); $i++) {
    136     $cbit_table[$pcre_internal{cbit_digit} + $i / 8] |= 1 << ($i & 7);
    137 }
    138 $cbit_table[$pcre_internal{cbit_word} + ord('_') / 8] |= 1 << (ord('_') & 7);
    139 for (my $i = 0; $i < 128; $i++) {
    140     my $c = chr($i);
    141     if ($c =~ /[[:alnum:]]/) {
    142         $cbit_table[$pcre_internal{cbit_word} + $i / 8] |= 1 << ($i & 7);
    143     }
    144     if ($c =~ /[[:space:]]/) {
    145         $cbit_table[$pcre_internal{cbit_space} + $i / 8] |= 1 << ($i & 7);
    146     }
    147 }
    148 
    149 printf(OUT "  ");
    150 for (my $i = 0; $i < $pcre_internal{cbit_length}; $i++) {
    151     if (($i & 7) == 0 && $i != 0) {
    152         if (($i & 31) == 0) {
    153             printf(OUT "\n");
    154         }
    155         printf(OUT "\n  ");
    156     }
    157     printf(OUT "0x%02X", $cbit_table[$i]);
    158     if ($i != $pcre_internal{cbit_length} - 1) {
    159         printf(OUT ", ");
    160     }
    161 }
    162 printf(OUT ",\n\n");
    163 
    164 printf(OUT
    165     "/* This table identifies various classes of character by individual bits:\n" .
    166     "  0x%02x   white space character\n" .
    167     "  0x%02x   hexadecimal digit\n" .
    168     "  0x%02x   alphanumeric or '_'\n*/\n\n",
    169     $pcre_internal{ctype_space}, $pcre_internal{ctype_xdigit}, $pcre_internal{ctype_word});
    170 
    171 if ($pcre_internal{ctypes_offset} != $pcre_internal{cbits_offset} + $pcre_internal{cbit_length}) {
    172     die "ctypes_offset != cbits_offset + cbit_length";
    173 }
    174 
    175 printf(OUT "  ");
    176 for (my $i = 0; $i < 128; $i++) {
    177     my $x = 0;
    178     my $c = chr($i);
    179     if ($c =~ /[[:space:]]/) {
    180         $x += $pcre_internal{ctype_space};
    181     }
    182     if ($c =~ /[[:xdigit:]]/) {
    183         $x += $pcre_internal{ctype_xdigit};
    184     }
    185     if ($c =~ /[[:alnum:]_]/) {
    186         $x += $pcre_internal{ctype_word};
    187     }
    188     printf(OUT "0x%02X", $x);
    189     if ($i != 127) {
    190         printf(OUT ", ");
    191     } else {
    192         printf(OUT "};");
    193     }
    194     if (($i & 7) == 7) {
    195         printf(OUT " /* ");
    196         my $d = chr($i - 7);
    197         if ($d =~ /[[:print:]]/) {
    198             printf(OUT " %c -", $i - 7);
    199         } else {
    200             printf(OUT "%3d-", $i - 7);
    201         }
    202         if ($c =~ m/[[:print:]]/) {
    203             printf(OUT " %c ", $i);
    204         } else {
    205             printf(OUT "%3d", $i);
    206         }
    207         printf(OUT " */\n");
    208         if ($i != 127) {
    209             printf(OUT "  ");
    210         }
    211     }
    212 }
    213 
    214 if ($pcre_internal{tables_length} != $pcre_internal{ctypes_offset} + 128) {
    215     die "tables_length != ctypes_offset + 128";
    216 }
    217 
    218 printf(OUT "\n\n/* End of chartables.c */\n");
    219 
    220 close(OUT);
    221 
    222 exit 0;
    223 
    224 sub readHeaderValues()
    225 {
    226     my @variables = qw(
    227         cbit_digit
    228         cbit_length
    229         cbit_space
    230         cbit_word
    231         cbits_offset
    232         ctype_space
    233         ctype_word
    234         ctype_xdigit
    235         ctypes_offset
    236         fcc_offset
    237         lcc_offset
    238         tables_length
    239     );
    240 
    241     local $/ = undef;
    242 
    243     my $headerPath = File::Spec->catfile(dirname($0), "pcre_internal.h");
    244  
    245     my ($fh, $tempFile) = tempfile(
    246         basename($0) . "-XXXXXXXX",
    247         DIR => File::Spec->tmpdir(),
    248         SUFFIX => ".in",
    249         UNLINK => 0,
    250     );
    251 
    252     print $fh "#define DFTABLES\n\n";
    253 
    254     open(HEADER, "<", $headerPath) or die "$!";
    255     print $fh <HEADER>;
    256     close(HEADER);
    257 
    258     print $fh "\n\n";
    259 
    260     for my $v (@variables) {
    261         print $fh "\$pcre_internal{\"$v\"} = $v;\n";
    262     }
    263 
    264     close($fh);
    265 
    266     open(CPP, "$preprocessor \"$tempFile\" |") or die "$!";
    267     my $content = <CPP>;
    268     close(CPP);
    269     
    270     eval $content;
    271     die "$@" if $@;
    272     unlink $tempFile;
    273 }
    274