Home | History | Annotate | Download | only in mac
      1 #!/usr/bin/perl -w
      2 
      3 # Copyright (C) 2003, 2004, 2005, 2006 Apple Computer, Inc. All rights reserved.
      4 #
      5 # Redistribution and use in source and binary forms, with or without
      6 # modification, are permitted provided that the following conditions
      7 # are met:
      8 #
      9 # 1.  Redistributions of source code must retain the above copyright
     10 #     notice, this list of conditions and the following disclaimer. 
     11 # 2.  Redistributions in binary form must reproduce the above copyright
     12 #     notice, this list of conditions and the following disclaimer in the
     13 #     documentation and/or other materials provided with the distribution. 
     14 # 3.  Neither the name of Apple Computer, Inc. ("Apple") nor the names of
     15 #     its contributors may be used to endorse or promote products derived
     16 #     from this software without specific prior written permission. 
     17 #
     18 # THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
     19 # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     20 # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     21 # DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
     22 # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
     23 # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
     24 # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
     25 # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     26 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     27 # THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     28 
     29 use strict;
     30 
     31 my %aliasesFromCharsetsFile;
     32 my %namesWritten;
     33 
     34 my $output = "";
     35 
     36 my $error = 0;
     37 
     38 sub error ($)
     39 {
     40     print STDERR @_, "\n";
     41     $error = 1;
     42 }
     43 
     44 sub emit_line
     45 {
     46     my ($name, $prefix, $encoding, $flags) = @_;
     47  
     48     error "$name shows up twice in output" if $namesWritten{$name};
     49     $namesWritten{$name} = 1;
     50     
     51     $output .= "        { \"$name\", $prefix$encoding },\n";
     52 }
     53 
     54 sub process_platform_encodings
     55 {
     56     my ($filename, $PlatformPrefix) = @_;
     57     my $baseFilename = $filename;
     58     $baseFilename =~ s|.*/||;
     59     
     60     my %seenPlatformNames;
     61     my %seenIANANames;
     62     
     63     open PLATFORM_ENCODINGS, $filename or die;
     64     
     65     while (<PLATFORM_ENCODINGS>) {
     66         chomp;
     67         s/\#.*$//;
     68         s/\s+$//;
     69         if (my ($PlatformName, undef, $flags, $IANANames) = /^(.+?)(, (.+))?: (.+)$/) {
     70             my %aliases;
     71             
     72             my $PlatformNameWithFlags = $PlatformName;
     73             if ($flags) {
     74                 $PlatformNameWithFlags .= ", " . $flags;
     75             } else {
     76                 $flags = "NoEncodingFlags";
     77             }
     78             error "Platform encoding name $PlatformName is mentioned twice in $baseFilename" if $seenPlatformNames{$PlatformNameWithFlags};
     79             $seenPlatformNames{$PlatformNameWithFlags} = 1;
     80 
     81             # Build the aliases list.
     82             # Also check that no two names are part of the same entry in the charsets file.
     83             my @IANANames = split ", ", $IANANames;
     84             my $firstName = "";
     85             my $canonicalFirstName = "";
     86             my $prevName = "";
     87             for my $name (@IANANames) {
     88                 if ($firstName eq "") {
     89                     if ($name !~ /^[-A-Za-z0-9_]+$/) {
     90                         error "$name, in $baseFilename, has illegal characters in it";
     91                         next;
     92                     }
     93                     $firstName = $name;
     94                 } else {
     95                     if ($name !~ /^[a-z0-9]+$/) {
     96                         error "$name, in $baseFilename, has illegal characters in it (must be all lowercase alphanumeric)";
     97                         next;
     98                     }
     99                     if ($name le $prevName) {
    100                         error "$name comes after $prevName in $baseFilename, but everything must be in alphabetical order";
    101                     }
    102                     $prevName = $name;
    103                 }
    104                 
    105                 my $canonicalName = lc $name;
    106                 $canonicalName =~ tr/-_//d;
    107                 
    108                 $canonicalFirstName = $canonicalName if $canonicalFirstName eq "";
    109                 
    110                 error "$name is mentioned twice in $baseFilename" if $seenIANANames{$canonicalName};
    111                 $seenIANANames{$canonicalName} = 1;
    112                 
    113                 $aliases{$canonicalName} = 1;
    114                 next if !$aliasesFromCharsetsFile{$canonicalName};
    115                 for my $alias (@{$aliasesFromCharsetsFile{$canonicalName}}) {
    116                     $aliases{$alias} = 1;
    117                 }
    118                 for my $otherName (@IANANames) {
    119                     next if $canonicalName eq $otherName;
    120                     if ($aliasesFromCharsetsFile{$otherName}
    121                         && $aliasesFromCharsetsFile{$canonicalName} eq $aliasesFromCharsetsFile{$otherName}
    122                         && $canonicalName le $otherName) {
    123                         error "$baseFilename lists both $name and $otherName under $PlatformName, but that aliasing is already specified in character-sets.txt";
    124                     }
    125                 }
    126             }
    127             
    128             # write out
    129             emit_line($firstName, $PlatformPrefix, $PlatformName, $flags);
    130             for my $alias (sort keys %aliases) {
    131                 emit_line($alias, $PlatformPrefix, $PlatformName, $flags) if $alias ne $canonicalFirstName;
    132             }
    133         } elsif (/^([a-zA-Z0-9_]+)(, (.+))?$/) {
    134             my $PlatformName = $1;
    135             
    136             error "Platform encoding name $PlatformName is mentioned twice in $baseFilename" if $seenPlatformNames{$PlatformName};
    137             $seenPlatformNames{$PlatformName} = 1;
    138         } elsif (/./) {
    139             error "syntax error in $baseFilename, line $.";
    140         }
    141     }
    142     
    143     close PLATFORM_ENCODINGS;
    144 }
    145 
    146 sub process_iana_charset 
    147 {
    148     my ($canonical_name, @aliases) = @_;
    149     
    150     return if !$canonical_name;
    151     
    152     my @names = sort $canonical_name, @aliases;
    153     
    154     for my $name (@names) {
    155         $aliasesFromCharsetsFile{$name} = \@names;
    156     }
    157 }
    158 
    159 sub process_iana_charsets
    160 {
    161     my ($filename) = @_;
    162     
    163     open CHARSETS, $filename or die;
    164     
    165     my %seen;
    166     
    167     my $canonical_name;
    168     my @aliases;
    169     
    170     my %exceptions = ( isoir91 => 1, isoir92 => 1 );
    171     
    172     while (<CHARSETS>) {
    173         chomp;
    174         if ((my $new_canonical_name) = /Name: ([^ \t]*).*/) {
    175             $new_canonical_name = lc $new_canonical_name;
    176             $new_canonical_name =~ tr/a-z0-9//cd;
    177             
    178             error "saw $new_canonical_name twice in character-sets.txt", if $seen{$new_canonical_name};
    179             $seen{$new_canonical_name} = $new_canonical_name;
    180             
    181             process_iana_charset $canonical_name, @aliases;
    182 
    183             $canonical_name = $new_canonical_name;
    184             @aliases = ();
    185         } elsif ((my $new_alias) = /Alias: ([^ \t]*).*/) {
    186             $new_alias = lc $new_alias;
    187             $new_alias =~ tr/a-z0-9//cd;
    188             
    189             # do this after normalizing the alias, sometimes character-sets.txt
    190             # has weird escape characters, e.g. \b after None
    191             next if $new_alias eq "none";
    192 
    193             error "saw $new_alias twice in character-sets.txt $seen{$new_alias}, $canonical_name", if $seen{$new_alias} && $seen{$new_alias} ne $canonical_name && !$exceptions{$new_alias};
    194             push @aliases, $new_alias if !$seen{$new_alias};
    195             $seen{$new_alias} = $canonical_name;            
    196         }
    197     }
    198     
    199     process_iana_charset $canonical_name, @aliases;
    200     
    201     close CHARSETS;
    202 }
    203 
    204 # Program body
    205 
    206 process_iana_charsets($ARGV[0]);
    207 process_platform_encodings($ARGV[1], $ARGV[2]);
    208 
    209 exit 1 if $error;
    210 
    211 print <<EOF
    212 // File generated by make-charset-table.pl. Do not edit!
    213 
    214 #include "config.h"
    215 #include "CharsetData.h"
    216 
    217 namespace WebCore {
    218 
    219     const CharsetEntry CharsetTable[] = {
    220 $output
    221         { 0, 0 }
    222     };
    223 
    224 }
    225 EOF
    226