1 #!/usr/bin/perl 2 #* 3 #******************************************************************************* 4 #* Copyright (C) 2006, International Business Machines 5 #* Corporation and others. All Rights Reserved. 6 #******************************************************************************* 7 #* 8 #* file name: genspva.pl 9 #* encoding: US-ASCII 10 #* tab size: 8 (not used) 11 #* indentation:4 12 #* 13 #* Created by: Ram Viswanadha 14 #* 15 #* This file filters iso15924-utf8-<date>.txt 16 #* 17 18 use File::Find; 19 use File::Basename; 20 use IO::File; 21 use Cwd; 22 use File::Copy; 23 use Getopt::Long; 24 use File::Path; 25 use File::Copy; 26 27 #run the program 28 main(); 29 30 #--------------------------------------------------------------------- 31 # The main program 32 33 sub main(){ 34 GetOptions( 35 "--destdir=s" => \$destdir, 36 "--iso15924=s" => \$iso, 37 "--prop=s" => \$prop, 38 "--code-start=s" => \$code, 39 ); 40 usage() unless defined $destdir; 41 usage() unless defined $iso; 42 usage() unless defined $prop; 43 44 $outfile = "$destdir/SyntheticPropertyValueAliases.txt"; 45 $propFH = IO::File->new($prop,"r") 46 or die "could not open the file $prop for reading: $! \n"; 47 $isoFH = IO::File->new($iso,"r") 48 or die "could not open the file $iso for reading: $! \n"; 49 $outFH = IO::File->new($outfile,"w") 50 or die "could not open the file $outfile for reading: $! \n"; 51 my @propLines; 52 while (<$propFH>) { 53 next if(!($_ =~/sc ; /)); 54 push(@propLines, $_); 55 } 56 printHeader($outFH); 57 if(defined $code){ 58 print "Please add the following to UScriptCode enum in uscript.h.\n"; 59 print "#ifndef U_HIDE_DRAFT_API\n"; 60 } 61 while (<$isoFH>) { 62 next if($_=~/^#/);#skip if the line starts with a comment char 63 ($script, $t, $name, $rest) = split(/;/,$_,4); 64 #sc ; Arab 65 $outstr = "sc ; $script"; 66 $encoded = 0; #false 67 68 # seach the propLines to make sure that this scipt code is not 69 # encoded in Unicode 70 foreach $key (@propLines){ 71 if($key =~ /$outstr/){ 72 $encoded = 1; 73 } 74 } 75 next if($encoded == 1); 76 #ignore private use codes 77 next if($script =~ /Qa[ab][a-z]/); 78 79 #if($script eq "Qaaa"){ 80 # $outstr = $outstr." ; Private_Use_Start\n"; 81 #}elsif($script eq "Qabx"){ 82 # $outstr = $outstr." ; Private_Use_End\n"; 83 #}else{ 84 # $outstr = $outstr." ; $script \n"; 85 #} 86 87 $outstr = $outstr." ; $script \n"; 88 print $outFH $outstr; 89 90 #print to console 91 if(defined $code){ 92 if($name =~ /[(\s,\x80-\xFF]/){ 93 $name = $script; 94 } 95 $name =~s/-/_/g; 96 97 $scriptcode = "USCRIPT_".uc($name); 98 print " $scriptcode = $code, /* $script */\n"; 99 $code++; 100 } 101 102 } 103 if(defined $code){ 104 print "#endif /* U_HIDE_DRAFT_API */\n"; 105 } 106 for($i=0; $i<2; $i++){ 107 108 } 109 close($isoFH); 110 close($propFH); 111 close($outFH); 112 } 113 #----------------------------------------------------------------------- 114 sub printHeader{ 115 ($outFH) = @_; 116 ($DAY, $MONTH, $YEAR) = (localtime)[3,4,5]; 117 $YEAR += 1900; 118 #We will print our copyright here + warnings 119 print $outFH <<END_HEADER_COMMENT; 120 ######################################################################## 121 # Copyright (c) 2006-$YEAR, International Business Machines 122 # Corporation and others. All Rights Reserved. 123 ######################################################################## 124 # file name: SyntheticPropertyValueAliases.txt 125 # encoding: US-ASCII 126 # tab size: 8 (not used) 127 # indentation: 4 128 # created by: gensvpa.pl 129 ######################################################################## 130 131 # This file follows the format of PropertyValueAliases.txt 132 # It contains synthetic property value aliases not present 133 # in the UCD. Unlike PropertyValueAliases.txt, it should 134 # NOT contain a version number. 135 136 ######################################################################## 137 # THIS FILE IS MACHINE-GENERATED, DON'T PLAY WITH IT IF YOU DON'T KNOW 138 # WHAT YOU ARE DOING, OTHERWISE VERY BAD THINGS WILL HAPPEN! 139 ######################################################################## 140 141 # set the same names as short and long names to fit the syntax without 142 # inventing names that we would have to support forever 143 144 # Script (sc) 145 146 END_HEADER_COMMENT 147 } 148 #----------------------------------------------------------------------- 149 sub usage { 150 print << "END"; 151 Usage: 152 gensvpa.pl 153 Options: 154 --destdir=<directory> 155 --iso15924=<file name> 156 --prop=<PropertyValueAliases.txt> 157 --code-start=s 158 e.g.: gensvpa.pl --destdir=<icu>/source/tools/genpname --iso15924=iso15924-utf8-20041025.txt --prop=<icu>/source/data/unidata --code-start=60 159 END 160 exit(0); 161 }