1 #!/usr/bin/perl 2 # Copyright 2008 The RE2 Authors. All Rights Reserved. 3 # Use of this source code is governed by a BSD-style 4 # license that can be found in the LICENSE file. 5 6 # Generate table entries giving character ranges 7 # for POSIX/Perl character classes. Rather than 8 # figure out what the definition is, it is easier to ask 9 # Perl about each letter from 0-128 and write down 10 # its answer. 11 12 @posixclasses = ( 13 "[:alnum:]", 14 "[:alpha:]", 15 "[:ascii:]", 16 "[:blank:]", 17 "[:cntrl:]", 18 "[:digit:]", 19 "[:graph:]", 20 "[:lower:]", 21 "[:print:]", 22 "[:punct:]", 23 "[:space:]", 24 "[:upper:]", 25 "[:word:]", 26 "[:xdigit:]", 27 ); 28 29 @perlclasses = ( 30 "\\d", 31 "\\s", 32 "\\w", 33 ); 34 35 sub ComputeClass($) { 36 my @ranges; 37 my ($class) = @_; 38 my $regexp = "[$class]"; 39 my $start = -1; 40 for (my $i=0; $i<=129; $i++) { 41 if ($i == 129) { $i = 256; } 42 if ($i <= 128 && chr($i) =~ $regexp) { 43 if ($start < 0) { 44 $start = $i; 45 } 46 } else { 47 if ($start >= 0) { 48 push @ranges, [$start, $i-1]; 49 } 50 $start = -1; 51 } 52 } 53 return @ranges; 54 } 55 56 sub PrintClass($$@) { 57 my ($cname, $name, @ranges) = @_; 58 print "static URange16 code${cname}[] = { /* $name */\n"; 59 for (my $i=0; $i<@ranges; $i++) { 60 my @a = @{$ranges[$i]}; 61 printf "\t{ 0x%x, 0x%x },\n", $a[0], $a[1]; 62 } 63 print "};\n"; 64 my $n = @ranges; 65 my $escname = $name; 66 $escname =~ s/\\/\\\\/g; 67 $negname = $escname; 68 if ($negname =~ /:/) { 69 $negname =~ s/:/:^/; 70 } else { 71 $negname =~ y/a-z/A-Z/; 72 } 73 return "{ \"$escname\", +1, code$cname, $n }", "{ \"$negname\", -1, code$cname, $n }"; 74 } 75 76 my $gen = 0; 77 78 sub PrintClasses($@) { 79 my ($cname, @classes) = @_; 80 my @entries; 81 foreach my $cl (@classes) { 82 my @ranges = ComputeClass($cl); 83 push @entries, PrintClass(++$gen, $cl, @ranges); 84 } 85 print "UGroup ${cname}_groups[] = {\n"; 86 foreach my $e (@entries) { 87 print "\t$e,\n"; 88 } 89 print "};\n"; 90 my $count = @entries; 91 print "int num_${cname}_groups = $count;\n"; 92 } 93 94 print <<EOF; 95 // GENERATED BY make_perl_groups.pl; DO NOT EDIT. 96 // make_perl_groups.pl >perl_groups.cc 97 98 #include "re2/unicode_groups.h" 99 100 namespace re2 { 101 102 EOF 103 104 PrintClasses("perl", @perlclasses); 105 PrintClasses("posix", @posixclasses); 106 107 print <<EOF; 108 109 } // namespace re2 110 EOF 111