Home | History | Annotate | Download | only in gensprep
      1 #!/usr/bin/perl
      2 # Copyright (c) 2001-2009 International Business Machines
      3 # Corporation and others. All Rights Reserved.
      4 
      5 ####################################################################################
      6 # filterRFC3454.pl:
      7 # This tool filters the RFC-3454 txt file for StringPrep tables and creates a table
      8 # to be used in NamePrepProfile
      9 #
     10 # Author: Ram Viswanadha
     11 #        
     12 ####################################################################################
     13 
     14 use File::Find;
     15 use File::Basename;
     16 use IO::File;
     17 use Cwd;
     18 use File::Copy;
     19 use Getopt::Long;
     20 use File::Path;
     21 use File::Copy;
     22 use Time::localtime;
     23 
     24 $icu_copyright = "#####################################################################\n# Copyright (c) %d, International Business Machines Corporation and\n# others. All Rights Reserved.\n#####################################################################\n\n";
     25 $copyright = "###################\n# This file was generated from RFC 3454 (http://www.ietf.org/rfc/rfc3454.txt)\n# Copyright (C) The Internet Society (2002).  All Rights Reserved. \n###################\n\n";
     26 $warning = "###################\n# WARNING: This table is generated by filterRFC3454.pl tool with\n# options: @ARGV \n###################\n\n";
     27 #run the program)
     28 main();
     29 
     30 #---------------------------------------------------------------------
     31 # The main program
     32 
     33 sub main(){
     34   GetOptions(
     35            "--sourcedir=s" => \$sourceDir,
     36            "--destdir=s" => \$destDir,
     37            "--src-filename=s" => \$srcFileName,
     38            "--dest-filename=s" => \$destFileName,
     39            "--A1"  => \$a1,
     40            "--B1"  => \$b1,
     41            "--B2"  => \$b2,
     42            "--B3"  => \$b3,
     43            "--C11" => \$c11,
     44            "--C12" => \$c12,
     45            "--C21" => \$c21,
     46            "--C22" => \$c22,
     47            "--C3"  => \$c3,
     48            "--C4"  => \$c4,
     49            "--C5"  => \$c5,
     50            "--C6"  => \$c6,
     51            "--C7"  => \$c7,
     52            "--C8"  => \$c8,
     53            "--C9"  => \$c9,
     54            "--iscsi" => \$writeISCSIProhibitedExtra,
     55            "--xmpp-node" => \$writeXMPPNodeProhibitedExtra,
     56            "--sasl" => \$writeSASLMap,
     57            "--ldap" => \$writeLDAPMap,
     58            "--normalize" => \$norm,
     59            "--check-bidi" => \$checkBidi,
     60            );
     61   usage() unless defined $sourceDir;
     62   usage() unless defined $destDir;
     63   usage() unless defined $srcFileName;
     64   usage() unless defined $destFileName;
     65 
     66   $infile = $sourceDir."/".$srcFileName;
     67   $inFH = IO::File->new($infile,"r")
     68             or die  "could not open the file $infile for reading: $! \n";
     69   $outfile = $destDir."/".$destFileName;
     70 
     71   unlink($outfile);
     72   $outFH = IO::File->new($outfile,"a")
     73             or die  "could not open the file $outfile for writing: $! \n";
     74 
     75   printf $outFH  $icu_copyright, localtime->year()+1900;
     76   print $outFH  $copyright;
     77   print $outFH  $warning;
     78 
     79   if(defined $norm) {
     80       print $outFH "\@normalize;;\n";
     81   }
     82   if(defined $checkBidi) {
     83       print $outFH "\@check-bidi;;\n";
     84   }
     85   print $outFH "\n";
     86   close($outFH);
     87 
     88   if(defined $b2 && defined $b3){
     89       die "ERROR: --B2 and --B3 are both specified\!\n";
     90   }
     91 
     92   while(defined ($line=<$inFH>)){
     93       next unless $line=~ /Start\sTable/;
     94       if($line =~ /A.1/){
     95             createUnassignedTable($inFH,$outfile);
     96       }
     97       if($line =~ /B.1/ && defined $b1){
     98             createMapToNothing($inFH,$outfile);
     99       }
    100       if($line =~ /B.2/ && defined $b2){
    101             createCaseMapNorm($inFH,$outfile);
    102       }
    103       if($line =~ /B.3/ && defined $b3){
    104             createCaseMapNoNorm($inFH,$outfile);
    105       }
    106       if($line =~ /C.1.1/ && defined $c11 ){
    107             createProhibitedTable($inFH,$outfile,$line);
    108       }
    109       if($line =~ /C.1.2/ && defined $c12 ){
    110             createProhibitedTable($inFH,$outfile,$line);
    111       }
    112       if($line =~ /C.2.1/ && defined $c21 ){
    113             createProhibitedTable($inFH,$outfile,$line);
    114       }
    115       if($line =~ /C.2.2/ && defined $c22 ){
    116             createProhibitedTable($inFH,$outfile,$line);
    117       }
    118       if($line =~ /C.3/ && defined $c3 ){
    119             createProhibitedTable($inFH,$outfile,$line);
    120       }
    121       if($line =~ /C.4/ && defined $c4 ){
    122             createProhibitedTable($inFH,$outfile,$line);
    123       }
    124       if($line =~ /C.5/ && defined $c5 ){
    125             createProhibitedTable($inFH,$outfile,$line);
    126       }
    127       if($line =~ /C.6/ && defined $c6 ){
    128             createProhibitedTable($inFH,$outfile,$line);
    129       }
    130       if($line =~ /C.7/ && defined $c7 ){
    131             createProhibitedTable($inFH,$outfile,$line);
    132       }
    133       if($line =~ /C.8/ && defined $c8 ){
    134             createProhibitedTable($inFH,$outfile,$line);
    135       }
    136       if($line =~ /C.9/ && defined $c9 ){
    137             createProhibitedTable($inFH,$outfile,$line);
    138       }
    139   }
    140   if( defined $writeISCSIProhibitedExtra){
    141       create_iSCSIExtraProhibitedTable($inFH, $outfile);
    142   }
    143   if( defined $writeXMPPNodeProhitedExtra){
    144       create_XMPPNodeExtraProhibitedTable($inFH, $outfile);
    145   }
    146   if( defined $writeSASLMap){
    147       create_SASLMapTable($inFH, $outfile);
    148   }
    149   if( defined $writeLDAPMap){
    150       create_LDAPMapTable($inFH, $outfile);
    151   }
    152   close($inFH);
    153 }
    154 
    155 #-----------------------------------------------------------------------
    156 sub readPrint{
    157     local ($inFH, $outFH,$comment, $table) = @_;
    158     $count = 0;
    159     print $outFH $comment."\n";
    160     while(defined ($line = <$inFH>)){
    161         next if $line =~ /Hoffman\s\&\sBlanchet/;  # ignore heading
    162         next if $line =~ /RFC\s3454/; # ignore heading
    163         next if $line =~ /\f/;  # ignore form feed
    164         next if $line eq "\n";  # ignore blank lines
    165         # break if "End Table" is found
    166         if( $line =~ /End\sTable/){
    167             print $outFH "\n# Total code points $count\n\n";
    168             return;
    169         }
    170         if($print==1){
    171             print $line;
    172         }
    173         $line =~ s/-/../;
    174         $line =~ s/^\s+//;
    175         if($line =~ /\;/){
    176         }else{
    177             $line =~ s/$/;/;
    178         }
    179         if($table =~ /A/ ){
    180             ($code, $noise) = split /;/ , $line;
    181             $line = $code."; ; UNASSIGNED\n";
    182         }elsif ( $table =~ /B\.1/ ){
    183             $line =~ s/Map to nothing/MAP/;
    184         }elsif ( $table =~ /B\.[23]/ ){
    185             $line =~ s/Case map/MAP/;
    186             $line =~ s/Additional folding/MAP/;
    187         }elsif ( $table =~ /C/ ) {
    188             ($code, $noise) = split /;/ , $line;   
    189             $line = $code."; ; PROHIBITED\n";
    190         }
    191         if($line =~ /\.\./){
    192             ($code, $noise) = split /;/ , $line;
    193             ($startStr, $endStr ) = split /\.\./, $code;
    194             $start = atoi($startStr);
    195             $end   = atoi($endStr);
    196             #print $start."     ".$end."\n";
    197             while($start <= $end){
    198                 $count++;
    199                 $start++;
    200             }
    201         }else{
    202               $count++;
    203         }
    204         print $outFH $line;
    205     }
    206 }
    207 #-----------------------------------------------------------------------
    208 sub atoi {
    209     my $t;
    210     foreach my $d (split(//, shift())) {
    211         $t = $t * 16 + $d;
    212     }
    213     return $t;
    214 }
    215 #-----------------------------------------------------------------------
    216 sub createUnassignedTable{
    217     ($inFH,$outfile) = @_;
    218     $outFH = IO::File->new($outfile,"a")
    219             or die  "could not open the file $outfile for writing: $! \n";
    220     $comment = "# This table contains code points from Table A.1 from RFC 3454\n";
    221     readPrint($inFH,$outFH, $comment, "A");
    222     close($outFH);
    223 }
    224 #-----------------------------------------------------------------------
    225 sub createMapToNothing{
    226     ($inFH,$outfile) = @_;
    227     $outFH = IO::File->new($outfile,"a")
    228             or die  "could not open the file $outfile for writing: $! \n";
    229     $comment = "# This table contains code points from Table B.1 from RFC 3454\n";
    230     readPrint($inFH,$outFH,$comment, "B.1");
    231     close($outFH);
    232 }
    233 #-----------------------------------------------------------------------
    234 sub createCaseMapNorm{
    235     ($inFH,$outfile) = @_;
    236     $outFH = IO::File->new($outfile,"a")
    237             or die  "could not open the file $outfile for writing: $! \n";
    238     $comment = $warning."# This table contains code points from Table B.2 from RFC 3454\n";
    239     readPrint($inFH,$outFH,$comment, "B.2");
    240     close($outFH);
    241 }
    242 #-----------------------------------------------------------------------
    243 sub createCaseMapNoNorm{
    244     ($inFH,$outfile) = @_;
    245     $outFH = IO::File->new($outfile,"a")
    246             or die  "could not open the file $outfile for writing: $! \n";
    247     $comment = $warning."# This table contains code points from Table B.3 from RFC 3454\n";
    248     readPrint($inFH,$outFH,$comment, "B.3");
    249     close($outFH);
    250 }
    251 #-----------------------------------------------------------------------
    252 sub createProhibitedTable{
    253     ($inFH,$outfile,$line) = @_;
    254     $line =~ s/Start//;
    255     $line =~ s/-//g;
    256     $comment = "# code points from $line";
    257 
    258     $outFH = IO::File->new($outfile, "a")
    259             or die  "could not open the file $outfile for writing: $! \n";
    260     readPrint($inFH,$outFH,$comment, "C");
    261     close($outFH);
    262 }
    263 
    264 #-----------------------------------------------------------------------
    265 sub create_iSCSIExtraProhibitedTable{
    266     ($inFH,$outfile,$line) = @_;
    267     $comment ="# Additional prohibitions from iSCSI profile (rfc3722.txt)\n\n";
    268 
    269     $outFH = IO::File->new($outfile, "a")
    270             or die  "could not open the file $outfile for writing: $! \n";
    271     print $outFH $comment;
    272     print $outFH "0021..002C; ; PROHIBITED\n";
    273     print $outFH "002F; ; PROHIBITED\n";
    274     print $outFH "003B..0040; ; PROHIBITED\n";
    275     print $outFH "005B..0060; ; PROHIBITED\n";
    276     print $outFH "007B..007E; ; PROHIBITED\n";
    277     print $outFH "3002; ; PROHIBITED\n";
    278     print $outFH "\n# Total code points 30\n";
    279     close($outFH);
    280 }
    281 #-----------------------------------------------------------------------
    282 sub create_XMPPNodeExtraProhibitedTable{
    283     ($inFH,$outfile,$line) = @_;
    284     $comment ="# Additional prohibitions from XMPP Nodeprep profile (rfc3920.txt)\n\n";
    285 
    286     $outFH = IO::File->new($outfile, "a")
    287             or die  "could not open the file $outfile for writing: $! \n";
    288     print $outFH $comment;
    289     print $outFH "0022; ; PROHIBITED\n";
    290     print $outFH "0026; ; PROHIBITED\n";
    291     print $outFH "0027; ; PROHIBITED\n";
    292     print $outFH "002F; ; PROHIBITED\n";
    293     print $outFH "003A; ; PROHIBITED\n";
    294     print $outFH "003C; ; PROHIBITED\n";
    295     print $outFH "003E; ; PROHIBITED\n";
    296     print $outFH "0040; ; PROHIBITED\n";
    297     print $outFH "\n# Total code points 8\n";
    298     close($outFH);
    299 }
    300 #-----------------------------------------------------------------------
    301 sub create_SASLMapTable{
    302     ($inFH,$outfile,$line) = @_;
    303     $comment ="# Map table for SASL profile (rfc4013.txt)\n\n";
    304 
    305     $outFH = IO::File->new($outfile, "a")
    306             or die  "could not open the file $outfile for writing: $! \n";
    307     print $outFH $comment;
    308     # non-ASCII space characters [C.1.2] to SPACE
    309     print $outFH "00A0; 0020; MAP\n";
    310     print $outFH "1680; 0020; MAP\n";
    311     print $outFH "2000; 0020; MAP\n";
    312     print $outFH "2001; 0020; MAP\n";
    313     print $outFH "2002; 0020; MAP\n";
    314     print $outFH "2003; 0020; MAP\n";
    315     print $outFH "2004; 0020; MAP\n";
    316     print $outFH "2005; 0020; MAP\n";
    317     print $outFH "2006; 0020; MAP\n";
    318     print $outFH "2007; 0020; MAP\n";
    319     print $outFH "2008; 0020; MAP\n";
    320     print $outFH "2009; 0020; MAP\n";
    321     print $outFH "200A; 0020; MAP\n";
    322     print $outFH "200B; 0020; MAP\n";
    323     print $outFH "202F; 0020; MAP\n";
    324     print $outFH "205F; 0020; MAP\n";
    325     print $outFH "3000; 0020; MAP\n";
    326 
    327     # commonly mapped to nothing characters except U+200B to nothing
    328     print $outFH "00AD; ; MAP\n";
    329     print $outFH "034F; ; MAP\n";
    330     print $outFH "1806; ; MAP\n";
    331     print $outFH "180B; ; MAP\n";
    332     print $outFH "180C; ; MAP\n";
    333     print $outFH "180D; ; MAP\n";
    334     print $outFH "200C; ; MAP\n";
    335     print $outFH "200D; ; MAP\n";
    336     print $outFH "2060; ; MAP\n";
    337     print $outFH "FE00; ; MAP\n";
    338     print $outFH "FE01; ; MAP\n";
    339     print $outFH "FE02; ; MAP\n";
    340     print $outFH "FE03; ; MAP\n";
    341     print $outFH "FE04; ; MAP\n";
    342     print $outFH "FE05; ; MAP\n";
    343     print $outFH "FE06; ; MAP\n";
    344     print $outFH "FE07; ; MAP\n";
    345     print $outFH "FE08; ; MAP\n";
    346     print $outFH "FE09; ; MAP\n";
    347     print $outFH "FE0A; ; MAP\n";
    348     print $outFH "FE0B; ; MAP\n";
    349     print $outFH "FE0C; ; MAP\n";
    350     print $outFH "FE0D; ; MAP\n";
    351     print $outFH "FE0E; ; MAP\n";
    352     print $outFH "FE0F; ; MAP\n";
    353     print $outFH "FEFF; ; MAP\n";
    354     print $outFH "\n# Total code points 43\n";
    355     close($outFH);
    356 }
    357 #-----------------------------------------------------------------------
    358 sub create_LDAPMapTable{
    359     ($inFH,$outfile,$line) = @_;
    360     $comment ="# Map table for LDAP profile (rfc4518.txt)\n\n";
    361 
    362     $outFH = IO::File->new($outfile, "a")
    363             or die  "could not open the file $outfile for writing: $! \n";
    364     print $outFH $comment;
    365 
    366     #   SOFT HYPHEN (U+00AD) and MONGOLIAN TODO SOFT HYPHEN (U+1806) code
    367     #   points are mapped to nothing.  COMBINING GRAPHEME JOINER (U+034F) and
    368     #   VARIATION SELECTORs (U+180B-180D, FF00-FE0F) code points are also
    369     #   mapped to nothing.  The OBJECT REPLACEMENT CHARACTER (U+FFFC) is
    370     #   mapped to nothing.
    371 
    372     print $outFH "00AD; ; MAP\n";
    373     print $outFH "034F; ; MAP\n";
    374     print $outFH "1806; ; MAP\n";
    375     print $outFH "180B; ; MAP\n";
    376     print $outFH "180C; ; MAP\n";
    377     print $outFH "180D; ; MAP\n";
    378     print $outFH "FE00; ; MAP\n";
    379     print $outFH "FE01; ; MAP\n";
    380     print $outFH "FE02; ; MAP\n";
    381     print $outFH "FE03; ; MAP\n";
    382     print $outFH "FE04; ; MAP\n";
    383     print $outFH "FE05; ; MAP\n";
    384     print $outFH "FE06; ; MAP\n";
    385     print $outFH "FE07; ; MAP\n";
    386     print $outFH "FE08; ; MAP\n";
    387     print $outFH "FE09; ; MAP\n";
    388     print $outFH "FE0A; ; MAP\n";
    389     print $outFH "FE0B; ; MAP\n";
    390     print $outFH "FE0C; ; MAP\n";
    391     print $outFH "FE0D; ; MAP\n";
    392     print $outFH "FE0E; ; MAP\n";
    393     print $outFH "FE0F; ; MAP\n";
    394     print $outFH "FFFC; ; MAP\n";
    395 
    396 #   CHARACTER TABULATION (U+0009), LINE FEED (LF) (U+000A), LINE
    397 #   TABULATION (U+000B), FORM FEED (FF) (U+000C), CARRIAGE RETURN (CR)
    398 #   (U+000D), and NEXT LINE (NEL) (U+0085) are mapped to SPACE (U+0020).
    399 
    400     print $outFH "0009; 0020; MAP\n";
    401     print $outFH "000A; 0020; MAP\n";
    402     print $outFH "000B; 0020; MAP\n";
    403     print $outFH "000C; 0020; MAP\n";
    404     print $outFH "000D; 0020; MAP\n";
    405     print $outFH "0085; 0020; MAP\n";
    406 
    407     #   All other control code (e.g., Cc) points or code points with a
    408     #   control function (e.g., Cf) are mapped to nothing.  The following is
    409     #   a complete list of these code points: U+0000-0008, 000E-001F, 007F-
    410     #   0084, 0086-009F, 06DD, 070F, 180E, 200C-200F, 202A-202E, 2060-2063,
    411     #   206A-206F, FEFF, FFF9-FFFB, 1D173-1D17A, E0001, E0020-E007F.
    412 
    413     print $outFH "0000; ; MAP\n";
    414     print $outFH "0001; ; MAP\n";
    415     print $outFH "0002; ; MAP\n";
    416     print $outFH "0003; ; MAP\n";
    417     print $outFH "0004; ; MAP\n";
    418     print $outFH "0005; ; MAP\n";
    419     print $outFH "0006; ; MAP\n";
    420     print $outFH "0007; ; MAP\n";
    421     print $outFH "0008; ; MAP\n";
    422     print $outFH "000E; ; MAP\n";
    423     print $outFH "000F; ; MAP\n";
    424     print $outFH "0010; ; MAP\n";
    425     print $outFH "0011; ; MAP\n";
    426     print $outFH "0012; ; MAP\n";
    427     print $outFH "0013; ; MAP\n";
    428     print $outFH "0014; ; MAP\n";
    429     print $outFH "0015; ; MAP\n";
    430     print $outFH "0016; ; MAP\n";
    431     print $outFH "0017; ; MAP\n";
    432     print $outFH "0018; ; MAP\n";
    433     print $outFH "0019; ; MAP\n";
    434     print $outFH "001A; ; MAP\n";
    435     print $outFH "001B; ; MAP\n";
    436     print $outFH "001C; ; MAP\n";
    437     print $outFH "001D; ; MAP\n";
    438     print $outFH "001E; ; MAP\n";
    439     print $outFH "001F; ; MAP\n";
    440     print $outFH "007F; ; MAP\n";
    441     print $outFH "0080; ; MAP\n";
    442     print $outFH "0081; ; MAP\n";
    443     print $outFH "0082; ; MAP\n";
    444     print $outFH "0083; ; MAP\n";
    445     print $outFH "0084; ; MAP\n";
    446     print $outFH "0086; ; MAP\n";
    447     print $outFH "0087; ; MAP\n";
    448     print $outFH "0088; ; MAP\n";
    449     print $outFH "0089; ; MAP\n";
    450     print $outFH "008A; ; MAP\n";
    451     print $outFH "008B; ; MAP\n";
    452     print $outFH "008C; ; MAP\n";
    453     print $outFH "008D; ; MAP\n";
    454     print $outFH "008E; ; MAP\n";
    455     print $outFH "008F; ; MAP\n";
    456     print $outFH "0090; ; MAP\n";
    457     print $outFH "0091; ; MAP\n";
    458     print $outFH "0092; ; MAP\n";
    459     print $outFH "0093; ; MAP\n";
    460     print $outFH "0094; ; MAP\n";
    461     print $outFH "0095; ; MAP\n";
    462     print $outFH "0096; ; MAP\n";
    463     print $outFH "0097; ; MAP\n";
    464     print $outFH "0098; ; MAP\n";
    465     print $outFH "0099; ; MAP\n";
    466     print $outFH "009A; ; MAP\n";
    467     print $outFH "009B; ; MAP\n";
    468     print $outFH "009C; ; MAP\n";
    469     print $outFH "009D; ; MAP\n";
    470     print $outFH "009E; ; MAP\n";
    471     print $outFH "009F; ; MAP\n";
    472     print $outFH "06DD; ; MAP\n";
    473     print $outFH "070F; ; MAP\n";
    474     print $outFH "180E; ; MAP\n";
    475     print $outFH "200C; ; MAP\n";
    476     print $outFH "200D; ; MAP\n";
    477     print $outFH "200E; ; MAP\n";
    478     print $outFH "200F; ; MAP\n";
    479     print $outFH "202A; ; MAP\n";
    480     print $outFH "202B; ; MAP\n";
    481     print $outFH "202C; ; MAP\n";
    482     print $outFH "202D; ; MAP\n";
    483     print $outFH "202E; ; MAP\n";
    484     print $outFH "2060; ; MAP\n";
    485     print $outFH "2061; ; MAP\n";
    486     print $outFH "2062; ; MAP\n";
    487     print $outFH "2063; ; MAP\n";
    488     print $outFH "206A; ; MAP\n";
    489     print $outFH "206B; ; MAP\n";
    490     print $outFH "206C; ; MAP\n";
    491     print $outFH "206D; ; MAP\n";
    492     print $outFH "206E; ; MAP\n";
    493     print $outFH "206F; ; MAP\n";
    494     print $outFH "FEFF; ; MAP\n";
    495     print $outFH "FFF9; ; MAP\n";
    496     print $outFH "FFFA; ; MAP\n";
    497     print $outFH "FFFB; ; MAP\n";
    498     print $outFH "1D173; ; MAP\n";
    499     print $outFH "1D174; ; MAP\n";
    500     print $outFH "1D175; ; MAP\n";
    501     print $outFH "1D176; ; MAP\n";
    502     print $outFH "1D177; ; MAP\n";
    503     print $outFH "1D178; ; MAP\n";
    504     print $outFH "1D179; ; MAP\n";
    505     print $outFH "1D17A; ; MAP\n";
    506     print $outFH "E0001; ; MAP\n";
    507     print $outFH "E0020; ; MAP\n";
    508     print $outFH "E0021; ; MAP\n";
    509     print $outFH "E0022; ; MAP\n";
    510     print $outFH "E0023; ; MAP\n";
    511     print $outFH "E0024; ; MAP\n";
    512     print $outFH "E0025; ; MAP\n";
    513     print $outFH "E0026; ; MAP\n";
    514     print $outFH "E0027; ; MAP\n";
    515     print $outFH "E0028; ; MAP\n";
    516     print $outFH "E0029; ; MAP\n";
    517     print $outFH "E002A; ; MAP\n";
    518     print $outFH "E002B; ; MAP\n";
    519     print $outFH "E002C; ; MAP\n";
    520     print $outFH "E002D; ; MAP\n";
    521     print $outFH "E002E; ; MAP\n";
    522     print $outFH "E002F; ; MAP\n";
    523     print $outFH "E0030; ; MAP\n";
    524     print $outFH "E0031; ; MAP\n";
    525     print $outFH "E0032; ; MAP\n";
    526     print $outFH "E0033; ; MAP\n";
    527     print $outFH "E0034; ; MAP\n";
    528     print $outFH "E0035; ; MAP\n";
    529     print $outFH "E0036; ; MAP\n";
    530     print $outFH "E0037; ; MAP\n";
    531     print $outFH "E0038; ; MAP\n";
    532     print $outFH "E0039; ; MAP\n";
    533     print $outFH "E003A; ; MAP\n";
    534     print $outFH "E003B; ; MAP\n";
    535     print $outFH "E003C; ; MAP\n";
    536     print $outFH "E003D; ; MAP\n";
    537     print $outFH "E003E; ; MAP\n";
    538     print $outFH "E003F; ; MAP\n";
    539     print $outFH "E0040; ; MAP\n";
    540     print $outFH "E0041; ; MAP\n";
    541     print $outFH "E0042; ; MAP\n";
    542     print $outFH "E0043; ; MAP\n";
    543     print $outFH "E0044; ; MAP\n";
    544     print $outFH "E0045; ; MAP\n";
    545     print $outFH "E0046; ; MAP\n";
    546     print $outFH "E0047; ; MAP\n";
    547     print $outFH "E0048; ; MAP\n";
    548     print $outFH "E0049; ; MAP\n";
    549     print $outFH "E004A; ; MAP\n";
    550     print $outFH "E004B; ; MAP\n";
    551     print $outFH "E004C; ; MAP\n";
    552     print $outFH "E004D; ; MAP\n";
    553     print $outFH "E004E; ; MAP\n";
    554     print $outFH "E004F; ; MAP\n";
    555     print $outFH "E0050; ; MAP\n";
    556     print $outFH "E0051; ; MAP\n";
    557     print $outFH "E0052; ; MAP\n";
    558     print $outFH "E0053; ; MAP\n";
    559     print $outFH "E0054; ; MAP\n";
    560     print $outFH "E0055; ; MAP\n";
    561     print $outFH "E0056; ; MAP\n";
    562     print $outFH "E0057; ; MAP\n";
    563     print $outFH "E0058; ; MAP\n";
    564     print $outFH "E0059; ; MAP\n";
    565     print $outFH "E005A; ; MAP\n";
    566     print $outFH "E005B; ; MAP\n";
    567     print $outFH "E005C; ; MAP\n";
    568     print $outFH "E005D; ; MAP\n";
    569     print $outFH "E005E; ; MAP\n";
    570     print $outFH "E005F; ; MAP\n";
    571     print $outFH "E0060; ; MAP\n";
    572     print $outFH "E0061; ; MAP\n";
    573     print $outFH "E0062; ; MAP\n";
    574     print $outFH "E0063; ; MAP\n";
    575     print $outFH "E0064; ; MAP\n";
    576     print $outFH "E0065; ; MAP\n";
    577     print $outFH "E0066; ; MAP\n";
    578     print $outFH "E0067; ; MAP\n";
    579     print $outFH "E0068; ; MAP\n";
    580     print $outFH "E0069; ; MAP\n";
    581     print $outFH "E006A; ; MAP\n";
    582     print $outFH "E006B; ; MAP\n";
    583     print $outFH "E006C; ; MAP\n";
    584     print $outFH "E006D; ; MAP\n";
    585     print $outFH "E006E; ; MAP\n";
    586     print $outFH "E006F; ; MAP\n";
    587     print $outFH "E0070; ; MAP\n";
    588     print $outFH "E0071; ; MAP\n";
    589     print $outFH "E0072; ; MAP\n";
    590     print $outFH "E0073; ; MAP\n";
    591     print $outFH "E0074; ; MAP\n";
    592     print $outFH "E0075; ; MAP\n";
    593     print $outFH "E0076; ; MAP\n";
    594     print $outFH "E0077; ; MAP\n";
    595     print $outFH "E0078; ; MAP\n";
    596     print $outFH "E0079; ; MAP\n";
    597     print $outFH "E007A; ; MAP\n";
    598     print $outFH "E007B; ; MAP\n";
    599     print $outFH "E007C; ; MAP\n";
    600     print $outFH "E007D; ; MAP\n";
    601     print $outFH "E007E; ; MAP\n";
    602     print $outFH "E007F; ; MAP\n";
    603 
    604     #   ZERO WIDTH SPACE (U+200B) is mapped to nothing.  All other code
    605     #   points with Separator (space, line, or paragraph) property (e.g., Zs,
    606     #   Zl, or Zp) are mapped to SPACE (U+0020).  The following is a complete
    607     #   list of these code points: U+0020, 00A0, 1680, 2000-200A, 2028-2029,
    608     #   202F, 205F, 3000.
    609 
    610     print $outFH "200B; ; MAP\n";
    611     print $outFH "00A0; 0020; MAP\n";
    612     print $outFH "1680; 0020; MAP\n";
    613     print $outFH "2000; 0020; MAP\n";
    614     print $outFH "2001; 0020; MAP\n";
    615     print $outFH "2002; 0020; MAP\n";
    616     print $outFH "2003; 0020; MAP\n";
    617     print $outFH "2004; 0020; MAP\n";
    618     print $outFH "2005; 0020; MAP\n";
    619     print $outFH "2006; 0020; MAP\n";
    620     print $outFH "2007; 0020; MAP\n";
    621     print $outFH "2008; 0020; MAP\n";
    622     print $outFH "2009; 0020; MAP\n";
    623     print $outFH "200A; 0020; MAP\n";
    624     print $outFH "2028; 0020; MAP\n";
    625     print $outFH "2029; 0020; MAP\n";
    626     print $outFH "202F; 0020; MAP\n";
    627     print $outFH "205F; 0020; MAP\n";
    628     print $outFH "3000; 0020; MAP\n";
    629 
    630     print $outFH "\n# Total code points 238\n";
    631     close($outFH);
    632 }
    633 #-----------------------------------------------------------------------
    634 sub usage {
    635     print << "END";
    636 Usage:
    637 filterRFC3454.pl
    638 Options:
    639         --sourcedir=<directory>
    640         --destdir=<directory>
    641         --src-filename=<name of RFC file>
    642         --dest-filename=<name of destination file>
    643         --A1             Generate data for table A.1
    644         --B1             Generate data for table B.1
    645         --B2             Generate data for table B.2
    646         --B3             Generate data for table B.3
    647         --C11            Generate data for table C.1.1
    648         --C12            Generate data for table C.1.2
    649         --C21            Generate data for table C.2.1
    650         --C22            Generate data for table C.2.2
    651         --C3             Generate data for table C.3
    652         --C4             Generate data for table C.4
    653         --C5             Generate data for table C.5
    654         --C6             Generate data for table C.6
    655         --C7             Generate data for table C.7
    656         --C8             Generate data for table C.8
    657         --C9             Generate data for table C.9
    658         --iscsi          Generate data for iSCSI extra prohibited table
    659         --xmpp-node      Generate data for XMPP extra prohibited table
    660         --sasl           Generate data for SASL map table
    661         --ldap           Generate data for LDAP map table
    662         --normalize      Embed the normalization directive in the output file
    663         --check-bidi     Embed the check bidi directove in the output file
    664 
    665 Note, --B2 and --B3 are mutually exclusive.
    666 
    667 e.g.: filterRFC3454.pl --sourcedir=. --destdir=./output --src-filename=rfc3454.txt  --dest-filename=NamePrepProfile.txt --A1 --B1 --B2 --C12 --C22 --C3 --C4 --C5 --C6 --C7 --C8 --C9 --normalize --check-bidi
    668 
    669 filterRFC3454.pl filters the RFC file and creates String prep table files.
    670 The RFC text can be downloaded from ftp://ftp.rfc-editor.org/in-notes/rfc3454.txt
    671 
    672 END
    673   exit(0);
    674 }
    675 
    676 
    677