Home | History | Annotate | Download | only in re2
      1 #!/usr/bin/perl
      2 # Copyright 2008 The RE2 Authors.  All Rights Reserved.
      3 # Use of this source code is governed by a BSD-style
      4 # license that can be found in the LICENSE file.
      5 
      6 # Generate table entries giving character ranges
      7 # for POSIX/Perl character classes.  Rather than
      8 # figure out what the definition is, it is easier to ask
      9 # Perl about each letter from 0-128 and write down
     10 # its answer.
     11 
     12 @posixclasses = (
     13 	"[:alnum:]",
     14 	"[:alpha:]",
     15 	"[:ascii:]",
     16 	"[:blank:]",
     17 	"[:cntrl:]",
     18 	"[:digit:]",
     19 	"[:graph:]",
     20 	"[:lower:]",
     21 	"[:print:]",
     22 	"[:punct:]",
     23 	"[:space:]",
     24 	"[:upper:]",
     25 	"[:word:]",
     26 	"[:xdigit:]",
     27 );
     28 
     29 @perlclasses = (
     30 	"\\d",
     31 	"\\s",
     32 	"\\w",
     33 );
     34 
     35 sub ComputeClass($) {
     36   my @ranges;
     37   my ($class) = @_;
     38   my $regexp = "[$class]";
     39   my $start = -1;
     40   for (my $i=0; $i<=129; $i++) {
     41     if ($i == 129) { $i = 256; }
     42     if ($i <= 128 && chr($i) =~ $regexp) {
     43       if ($start < 0) {
     44         $start = $i;
     45       }
     46     } else {
     47       if ($start >= 0) {
     48         push @ranges, [$start, $i-1];
     49       }
     50       $start = -1;
     51     }
     52   }
     53   return @ranges;
     54 }
     55 
     56 sub PrintClass($$@) {
     57   my ($cname, $name, @ranges) = @_;
     58   print "static URange16 code${cname}[] = {  /* $name */\n";
     59   for (my $i=0; $i<@ranges; $i++) {
     60     my @a = @{$ranges[$i]};
     61     printf "\t{ 0x%x, 0x%x },\n", $a[0], $a[1];
     62   }
     63   print "};\n";
     64   my $n = @ranges;
     65   my $escname = $name;
     66   $escname =~ s/\\/\\\\/g;
     67   $negname = $escname;
     68   if ($negname =~ /:/) {
     69     $negname =~ s/:/:^/;
     70   } else {
     71     $negname =~ y/a-z/A-Z/;
     72   }
     73   return "{ \"$escname\", +1, code$cname, $n }", "{ \"$negname\", -1, code$cname, $n }";
     74 }
     75 
     76 my $gen = 0;
     77 
     78 sub PrintClasses($@) {
     79   my ($cname, @classes) = @_;
     80   my @entries;
     81   foreach my $cl (@classes) {
     82     my @ranges = ComputeClass($cl);
     83     push @entries, PrintClass(++$gen, $cl, @ranges);
     84   }
     85   print "UGroup ${cname}_groups[] = {\n";
     86   foreach my $e (@entries) {
     87     print "\t$e,\n";
     88   }
     89   print "};\n";
     90   my $count = @entries;
     91   print "int num_${cname}_groups = $count;\n";
     92 }
     93 
     94 print <<EOF;
     95 // GENERATED BY make_perl_groups.pl; DO NOT EDIT.
     96 // make_perl_groups.pl >perl_groups.cc
     97 
     98 #include "re2/unicode_groups.h"
     99 
    100 namespace re2 {
    101 
    102 EOF
    103 
    104 PrintClasses("perl", @perlclasses);
    105 PrintClasses("posix", @posixclasses);
    106 
    107 print <<EOF;
    108 
    109 }  // namespace re2
    110 EOF
    111