Home | History | Annotate | Download | only in genpname
      1 #!/usr/bin/perl
      2 #*
      3 #*******************************************************************************
      4 #*   Copyright (C) 2006, International Business Machines
      5 #*   Corporation and others.  All Rights Reserved.
      6 #*******************************************************************************
      7 #*
      8 #*   file name:  genspva.pl
      9 #*   encoding:   US-ASCII
     10 #*   tab size:   8 (not used)
     11 #*   indentation:4
     12 #*
     13 #*   Created by: Ram Viswanadha
     14 #*
     15 #* This file filters iso15924-utf8-<date>.txt
     16 #*
     17 
     18 use File::Find;
     19 use File::Basename;
     20 use IO::File;
     21 use Cwd;
     22 use File::Copy;
     23 use Getopt::Long;
     24 use File::Path;
     25 use File::Copy;
     26 
     27 #run the program
     28 main();
     29 
     30 #---------------------------------------------------------------------
     31 # The main program
     32 
     33 sub main(){
     34     GetOptions(
     35            "--destdir=s" => \$destdir,
     36            "--iso15924=s"  => \$iso,
     37            "--prop=s"  => \$prop,
     38            "--code-start=s"  => \$code,
     39            );
     40     usage() unless defined $destdir;
     41     usage() unless defined $iso;
     42     usage() unless defined $prop;
     43     
     44     $outfile = "$destdir/SyntheticPropertyValueAliases.txt";
     45     $propFH = IO::File->new($prop,"r")
     46             or die  "could not open the file $prop for reading: $! \n";
     47     $isoFH = IO::File->new($iso,"r")
     48             or die  "could not open the file $iso for reading: $! \n";
     49     $outFH = IO::File->new($outfile,"w")
     50             or die  "could not open the file $outfile for reading: $! \n";
     51     my @propLines;
     52     while (<$propFH>) {
     53         next if(!($_ =~/sc ; /));
     54         push(@propLines, $_);
     55     }
     56     printHeader($outFH);
     57     if(defined $code){
     58         print "Please add the following to UScriptCode enum in uscript.h.\n";
     59         print "#ifndef U_HIDE_DRAFT_API\n";
     60     }
     61     while (<$isoFH>) {
     62         next if($_=~/^#/);#skip if the line starts with a comment char
     63         ($script, $t, $name, $rest) = split(/;/,$_,4);
     64         #sc ; Arab
     65         $outstr = "sc ; $script";
     66         $encoded = 0; #false
     67         
     68         # seach the propLines to make sure that this scipt code is not 
     69         # encoded in Unicode
     70         foreach $key (@propLines){
     71             if($key =~ /$outstr/){
     72                 $encoded = 1;
     73             }
     74         }
     75         next if($encoded == 1);
     76         #ignore private use codes 
     77         next if($script =~ /Qa[ab][a-z]/);
     78         
     79         #if($script eq "Qaaa"){
     80         #    $outstr = $outstr." ; Private_Use_Start\n";
     81         #}elsif($script eq  "Qabx"){
     82         #    $outstr = $outstr." ; Private_Use_End\n";
     83         #}else{
     84         #    $outstr = $outstr." ; $script \n";
     85         #} 
     86         
     87         $outstr = $outstr." ; $script \n";
     88         print $outFH $outstr;
     89         
     90         #print to console
     91         if(defined $code){
     92             if($name =~ /[(\s,\x80-\xFF]/){
     93                 $name = $script;
     94             }
     95             $name =~s/-/_/g;
     96         
     97             $scriptcode =  "USCRIPT_".uc($name);
     98             print "      $scriptcode          = $code, /* $script */\n";
     99             $code++;
    100         }
    101         
    102     }
    103     if(defined $code){
    104         print "#endif /* U_HIDE_DRAFT_API */\n";
    105     }
    106     for($i=0; $i<2; $i++){
    107         
    108     }
    109     close($isoFH);
    110     close($propFH);
    111     close($outFH);
    112 }
    113 #-----------------------------------------------------------------------
    114 sub printHeader{
    115     ($outFH) = @_;
    116     ($DAY, $MONTH, $YEAR) = (localtime)[3,4,5];
    117     $YEAR += 1900;
    118     #We will print our copyright here + warnings
    119 print $outFH <<END_HEADER_COMMENT;
    120 ########################################################################
    121 # Copyright (c) 2006-$YEAR, International Business Machines
    122 # Corporation and others.  All Rights Reserved.
    123 ########################################################################
    124 #   file name:      SyntheticPropertyValueAliases.txt
    125 #   encoding:       US-ASCII
    126 #   tab size:       8 (not used)
    127 #   indentation:    4
    128 #   created by:     gensvpa.pl
    129 ########################################################################
    130 
    131 # This file follows the format of PropertyValueAliases.txt
    132 # It contains synthetic property value aliases not present
    133 # in the UCD.  Unlike PropertyValueAliases.txt, it should
    134 # NOT contain a version number.
    135 
    136 ########################################################################
    137 #  THIS FILE IS MACHINE-GENERATED, DON'T PLAY WITH IT IF YOU DON'T KNOW
    138 #  WHAT YOU ARE DOING, OTHERWISE VERY BAD THINGS WILL HAPPEN!
    139 ########################################################################
    140 
    141 # set the same names as short and long names to fit the syntax without 
    142 # inventing names that we would have to support forever
    143 
    144 # Script (sc)
    145 
    146 END_HEADER_COMMENT
    147 }
    148 #-----------------------------------------------------------------------
    149 sub usage {
    150     print << "END";
    151 Usage:
    152 gensvpa.pl
    153 Options:
    154         --destdir=<directory>
    155         --iso15924=<file name>
    156         --prop=<PropertyValueAliases.txt>
    157         --code-start=s
    158 e.g.: gensvpa.pl  --destdir=<icu>/source/tools/genpname --iso15924=iso15924-utf8-20041025.txt --prop=<icu>/source/data/unidata --code-start=60
    159 END
    160     exit(0);
    161 }