Home | History | Annotate | Download | only in gensprep
      1 #!/usr/bin/perl
      2 # Copyright (C) 2016 and later: Unicode, Inc. and others.
      3 # License & terms of use: http://www.unicode.org/copyright.html
      4 # Copyright (c) 2001-2015 International Business Machines
      5 # Corporation and others. All Rights Reserved.
      6 
      7 ####################################################################################
      8 # filterRFC3454.pl:
      9 # This tool filters the RFC-3454 txt file for StringPrep tables and creates a table
     10 # to be used in NamePrepProfile
     11 #
     12 # Author: Ram Viswanadha
     13 #        
     14 ####################################################################################
     15 
     16 use File::Find;
     17 use File::Basename;
     18 use IO::File;
     19 use Cwd;
     20 use File::Copy;
     21 use Getopt::Long;
     22 use File::Path;
     23 use File::Copy;
     24 use Time::localtime;
     25 
     26 $icu_copyright = "#####################################################################\n# Copyright (c) %d, International Business Machines Corporation and\n# others. All Rights Reserved.\n#####################################################################\n\n";
     27 $copyright = "###################\n# This file was generated from RFC 3454 (http://www.ietf.org/rfc/rfc3454.txt)\n# Copyright (C) The Internet Society (2002).  All Rights Reserved. \n###################\n\n";
     28 $warning = "###################\n# WARNING: This table is generated by filterRFC3454.pl tool with\n# options: @ARGV \n###################\n\n";
     29 #run the program)
     30 main();
     31 
     32 #---------------------------------------------------------------------
     33 # The main program
     34 
     35 sub main(){
     36   GetOptions(
     37            "--sourcedir=s" => \$sourceDir,
     38            "--destdir=s" => \$destDir,
     39            "--src-filename=s" => \$srcFileName,
     40            "--dest-filename=s" => \$destFileName,
     41            "--A1"  => \$a1,
     42            "--B1"  => \$b1,
     43            "--B2"  => \$b2,
     44            "--B3"  => \$b3,
     45            "--C11" => \$c11,
     46            "--C12" => \$c12,
     47            "--C21" => \$c21,
     48            "--C22" => \$c22,
     49            "--C3"  => \$c3,
     50            "--C4"  => \$c4,
     51            "--C5"  => \$c5,
     52            "--C6"  => \$c6,
     53            "--C7"  => \$c7,
     54            "--C8"  => \$c8,
     55            "--C9"  => \$c9,
     56            "--iscsi" => \$writeISCSIProhibitedExtra,
     57            "--xmpp-node" => \$writeXMPPNodeProhibitedExtra,
     58            "--sasl" => \$writeSASLMap,
     59            "--ldap" => \$writeLDAPMap,
     60            "--normalize" => \$norm,
     61            "--check-bidi" => \$checkBidi,
     62            );
     63   usage() unless defined $sourceDir;
     64   usage() unless defined $destDir;
     65   usage() unless defined $srcFileName;
     66   usage() unless defined $destFileName;
     67 
     68   $infile = $sourceDir."/".$srcFileName;
     69   $inFH = IO::File->new($infile,"r")
     70             or die  "could not open the file $infile for reading: $! \n";
     71   $outfile = $destDir."/".$destFileName;
     72 
     73   unlink($outfile);
     74   $outFH = IO::File->new($outfile,"a")
     75             or die  "could not open the file $outfile for writing: $! \n";
     76 
     77   printf $outFH  $icu_copyright, localtime->year()+1900;
     78   print $outFH  $copyright;
     79   print $outFH  $warning;
     80 
     81   if(defined $norm) {
     82       print $outFH "\@normalize;;\n";
     83   }
     84   if(defined $checkBidi) {
     85       print $outFH "\@check-bidi;;\n";
     86   }
     87   print $outFH "\n";
     88   close($outFH);
     89 
     90   if(defined $b2 && defined $b3){
     91       die "ERROR: --B2 and --B3 are both specified\!\n";
     92   }
     93 
     94   while(defined ($line=<$inFH>)){
     95       next unless $line=~ /Start\sTable/;
     96       if($line =~ /A.1/){
     97             createUnassignedTable($inFH,$outfile);
     98       }
     99       if($line =~ /B.1/ && defined $b1){
    100             createMapToNothing($inFH,$outfile);
    101       }
    102       if($line =~ /B.2/ && defined $b2){
    103             createCaseMapNorm($inFH,$outfile);
    104       }
    105       if($line =~ /B.3/ && defined $b3){
    106             createCaseMapNoNorm($inFH,$outfile);
    107       }
    108       if($line =~ /C.1.1/ && defined $c11 ){
    109             createProhibitedTable($inFH,$outfile,$line);
    110       }
    111       if($line =~ /C.1.2/ && defined $c12 ){
    112             createProhibitedTable($inFH,$outfile,$line);
    113       }
    114       if($line =~ /C.2.1/ && defined $c21 ){
    115             createProhibitedTable($inFH,$outfile,$line);
    116       }
    117       if($line =~ /C.2.2/ && defined $c22 ){
    118             createProhibitedTable($inFH,$outfile,$line);
    119       }
    120       if($line =~ /C.3/ && defined $c3 ){
    121             createProhibitedTable($inFH,$outfile,$line);
    122       }
    123       if($line =~ /C.4/ && defined $c4 ){
    124             createProhibitedTable($inFH,$outfile,$line);
    125       }
    126       if($line =~ /C.5/ && defined $c5 ){
    127             createProhibitedTable($inFH,$outfile,$line);
    128       }
    129       if($line =~ /C.6/ && defined $c6 ){
    130             createProhibitedTable($inFH,$outfile,$line);
    131       }
    132       if($line =~ /C.7/ && defined $c7 ){
    133             createProhibitedTable($inFH,$outfile,$line);
    134       }
    135       if($line =~ /C.8/ && defined $c8 ){
    136             createProhibitedTable($inFH,$outfile,$line);
    137       }
    138       if($line =~ /C.9/ && defined $c9 ){
    139             createProhibitedTable($inFH,$outfile,$line);
    140       }
    141   }
    142   if( defined $writeISCSIProhibitedExtra){
    143       create_iSCSIExtraProhibitedTable($inFH, $outfile);
    144   }
    145   if( defined $writeXMPPNodeProhibitedExtra){
    146       create_XMPPNodeExtraProhibitedTable($inFH, $outfile);
    147   }
    148   if( defined $writeSASLMap){
    149       create_SASLMapTable($inFH, $outfile);
    150   }
    151   if( defined $writeLDAPMap){
    152       create_LDAPMapTable($inFH, $outfile);
    153   }
    154   close($inFH);
    155 }
    156 
    157 #-----------------------------------------------------------------------
    158 sub readPrint{
    159     local ($inFH, $outFH,$comment, $table) = @_;
    160     $count = 0;
    161     print $outFH $comment."\n";
    162     while(defined ($line = <$inFH>)){
    163         next if $line =~ /Hoffman\s\&\sBlanchet/;  # ignore heading
    164         next if $line =~ /RFC\s3454/; # ignore heading
    165         next if $line =~ /\f/;  # ignore form feed
    166         next if $line eq "\n";  # ignore blank lines
    167         # break if "End Table" is found
    168         if( $line =~ /End\sTable/){
    169             print $outFH "\n# Total code points $count\n\n";
    170             return;
    171         }
    172         if($print==1){
    173             print $line;
    174         }
    175         $line =~ s/-/../;
    176         $line =~ s/^\s+//;
    177         if($line =~ /\;/){
    178         }else{
    179             $line =~ s/$/;/;
    180         }
    181         if($table =~ /A/ ){
    182             ($code, $noise) = split /;/ , $line;
    183             $line = $code."; ; UNASSIGNED\n";
    184         }elsif ( $table =~ /B\.1/ ){
    185             $line =~ s/Map to nothing/MAP/;
    186         }elsif ( $table =~ /B\.[23]/ ){
    187             $line =~ s/Case map/MAP/;
    188             $line =~ s/Additional folding/MAP/;
    189         }elsif ( $table =~ /C/ ) {
    190             ($code, $noise) = split /;/ , $line;   
    191             $line = $code."; ; PROHIBITED\n";
    192         }
    193         if($line =~ /\.\./){
    194             ($code, $noise) = split /;/ , $line;
    195             ($startStr, $endStr ) = split /\.\./, $code;
    196             $start = atoi($startStr);
    197             $end   = atoi($endStr);
    198             #print $start."     ".$end."\n";
    199             while($start <= $end){
    200                 $count++;
    201                 $start++;
    202             }
    203         }else{
    204               $count++;
    205         }
    206         print $outFH $line;
    207     }
    208 }
    209 #-----------------------------------------------------------------------
    210 sub atoi {
    211     my $t;
    212     foreach my $d (split(//, shift())) {
    213         $t = $t * 16 + $d;
    214     }
    215     return $t;
    216 }
    217 #-----------------------------------------------------------------------
    218 sub createUnassignedTable{
    219     ($inFH,$outfile) = @_;
    220     $outFH = IO::File->new($outfile,"a")
    221             or die  "could not open the file $outfile for writing: $! \n";
    222     $comment = "# This table contains code points from Table A.1 from RFC 3454\n";
    223     readPrint($inFH,$outFH, $comment, "A");
    224     close($outFH);
    225 }
    226 #-----------------------------------------------------------------------
    227 sub createMapToNothing{
    228     ($inFH,$outfile) = @_;
    229     $outFH = IO::File->new($outfile,"a")
    230             or die  "could not open the file $outfile for writing: $! \n";
    231     $comment = "# This table contains code points from Table B.1 from RFC 3454\n";
    232     readPrint($inFH,$outFH,$comment, "B.1");
    233     close($outFH);
    234 }
    235 #-----------------------------------------------------------------------
    236 sub createCaseMapNorm{
    237     ($inFH,$outfile) = @_;
    238     $outFH = IO::File->new($outfile,"a")
    239             or die  "could not open the file $outfile for writing: $! \n";
    240     $comment = $warning."# This table contains code points from Table B.2 from RFC 3454\n";
    241     readPrint($inFH,$outFH,$comment, "B.2");
    242     close($outFH);
    243 }
    244 #-----------------------------------------------------------------------
    245 sub createCaseMapNoNorm{
    246     ($inFH,$outfile) = @_;
    247     $outFH = IO::File->new($outfile,"a")
    248             or die  "could not open the file $outfile for writing: $! \n";
    249     $comment = $warning."# This table contains code points from Table B.3 from RFC 3454\n";
    250     readPrint($inFH,$outFH,$comment, "B.3");
    251     close($outFH);
    252 }
    253 #-----------------------------------------------------------------------
    254 sub createProhibitedTable{
    255     ($inFH,$outfile,$line) = @_;
    256     $line =~ s/Start//;
    257     $line =~ s/-//g;
    258     $comment = "# code points from $line";
    259 
    260     $outFH = IO::File->new($outfile, "a")
    261             or die  "could not open the file $outfile for writing: $! \n";
    262     readPrint($inFH,$outFH,$comment, "C");
    263     close($outFH);
    264 }
    265 
    266 #-----------------------------------------------------------------------
    267 sub create_iSCSIExtraProhibitedTable{
    268     ($inFH,$outfile,$line) = @_;
    269     $comment ="# Additional prohibitions from iSCSI profile (rfc3722.txt)\n\n";
    270 
    271     $outFH = IO::File->new($outfile, "a")
    272             or die  "could not open the file $outfile for writing: $! \n";
    273     print $outFH $comment;
    274     print $outFH "0021..002C; ; PROHIBITED\n";
    275     print $outFH "002F; ; PROHIBITED\n";
    276     print $outFH "003B..0040; ; PROHIBITED\n";
    277     print $outFH "005B..0060; ; PROHIBITED\n";
    278     print $outFH "007B..007E; ; PROHIBITED\n";
    279     print $outFH "3002; ; PROHIBITED\n";
    280     print $outFH "\n# Total code points 30\n";
    281     close($outFH);
    282 }
    283 #-----------------------------------------------------------------------
    284 sub create_XMPPNodeExtraProhibitedTable{
    285     ($inFH,$outfile,$line) = @_;
    286     $comment ="# Additional prohibitions from XMPP Nodeprep profile (rfc3920.txt)\n\n";
    287 
    288     $outFH = IO::File->new($outfile, "a")
    289             or die  "could not open the file $outfile for writing: $! \n";
    290     print $outFH $comment;
    291     print $outFH "0022; ; PROHIBITED\n";
    292     print $outFH "0026; ; PROHIBITED\n";
    293     print $outFH "0027; ; PROHIBITED\n";
    294     print $outFH "002F; ; PROHIBITED\n";
    295     print $outFH "003A; ; PROHIBITED\n";
    296     print $outFH "003C; ; PROHIBITED\n";
    297     print $outFH "003E; ; PROHIBITED\n";
    298     print $outFH "0040; ; PROHIBITED\n";
    299     print $outFH "\n# Total code points 8\n";
    300     close($outFH);
    301 }
    302 #-----------------------------------------------------------------------
    303 sub create_SASLMapTable{
    304     ($inFH,$outfile,$line) = @_;
    305     $comment ="# Map table for SASL profile (rfc4013.txt)\n\n";
    306 
    307     $outFH = IO::File->new($outfile, "a")
    308             or die  "could not open the file $outfile for writing: $! \n";
    309     print $outFH $comment;
    310     # non-ASCII space characters [C.1.2] to SPACE
    311     print $outFH "00A0; 0020; MAP\n";
    312     print $outFH "1680; 0020; MAP\n";
    313     print $outFH "2000; 0020; MAP\n";
    314     print $outFH "2001; 0020; MAP\n";
    315     print $outFH "2002; 0020; MAP\n";
    316     print $outFH "2003; 0020; MAP\n";
    317     print $outFH "2004; 0020; MAP\n";
    318     print $outFH "2005; 0020; MAP\n";
    319     print $outFH "2006; 0020; MAP\n";
    320     print $outFH "2007; 0020; MAP\n";
    321     print $outFH "2008; 0020; MAP\n";
    322     print $outFH "2009; 0020; MAP\n";
    323     print $outFH "200A; 0020; MAP\n";
    324     print $outFH "200B; 0020; MAP\n";
    325     print $outFH "202F; 0020; MAP\n";
    326     print $outFH "205F; 0020; MAP\n";
    327     print $outFH "3000; 0020; MAP\n";
    328 
    329     # commonly mapped to nothing characters except U+200B to nothing
    330     print $outFH "00AD; ; MAP\n";
    331     print $outFH "034F; ; MAP\n";
    332     print $outFH "1806; ; MAP\n";
    333     print $outFH "180B; ; MAP\n";
    334     print $outFH "180C; ; MAP\n";
    335     print $outFH "180D; ; MAP\n";
    336     print $outFH "200C; ; MAP\n";
    337     print $outFH "200D; ; MAP\n";
    338     print $outFH "2060; ; MAP\n";
    339     print $outFH "FE00; ; MAP\n";
    340     print $outFH "FE01; ; MAP\n";
    341     print $outFH "FE02; ; MAP\n";
    342     print $outFH "FE03; ; MAP\n";
    343     print $outFH "FE04; ; MAP\n";
    344     print $outFH "FE05; ; MAP\n";
    345     print $outFH "FE06; ; MAP\n";
    346     print $outFH "FE07; ; MAP\n";
    347     print $outFH "FE08; ; MAP\n";
    348     print $outFH "FE09; ; MAP\n";
    349     print $outFH "FE0A; ; MAP\n";
    350     print $outFH "FE0B; ; MAP\n";
    351     print $outFH "FE0C; ; MAP\n";
    352     print $outFH "FE0D; ; MAP\n";
    353     print $outFH "FE0E; ; MAP\n";
    354     print $outFH "FE0F; ; MAP\n";
    355     print $outFH "FEFF; ; MAP\n";
    356     print $outFH "\n# Total code points 43\n";
    357     close($outFH);
    358 }
    359 #-----------------------------------------------------------------------
    360 sub create_LDAPMapTable{
    361     ($inFH,$outfile,$line) = @_;
    362     $comment ="# Map table for LDAP profile (rfc4518.txt)\n\n";
    363 
    364     $outFH = IO::File->new($outfile, "a")
    365             or die  "could not open the file $outfile for writing: $! \n";
    366     print $outFH $comment;
    367 
    368     #   SOFT HYPHEN (U+00AD) and MONGOLIAN TODO SOFT HYPHEN (U+1806) code
    369     #   points are mapped to nothing.  COMBINING GRAPHEME JOINER (U+034F) and
    370     #   VARIATION SELECTORs (U+180B-180D, FF00-FE0F) code points are also
    371     #   mapped to nothing.  The OBJECT REPLACEMENT CHARACTER (U+FFFC) is
    372     #   mapped to nothing.
    373 
    374     print $outFH "00AD; ; MAP\n";
    375     print $outFH "034F; ; MAP\n";
    376     print $outFH "1806; ; MAP\n";
    377     print $outFH "180B; ; MAP\n";
    378     print $outFH "180C; ; MAP\n";
    379     print $outFH "180D; ; MAP\n";
    380     print $outFH "FE00; ; MAP\n";
    381     print $outFH "FE01; ; MAP\n";
    382     print $outFH "FE02; ; MAP\n";
    383     print $outFH "FE03; ; MAP\n";
    384     print $outFH "FE04; ; MAP\n";
    385     print $outFH "FE05; ; MAP\n";
    386     print $outFH "FE06; ; MAP\n";
    387     print $outFH "FE07; ; MAP\n";
    388     print $outFH "FE08; ; MAP\n";
    389     print $outFH "FE09; ; MAP\n";
    390     print $outFH "FE0A; ; MAP\n";
    391     print $outFH "FE0B; ; MAP\n";
    392     print $outFH "FE0C; ; MAP\n";
    393     print $outFH "FE0D; ; MAP\n";
    394     print $outFH "FE0E; ; MAP\n";
    395     print $outFH "FE0F; ; MAP\n";
    396     print $outFH "FFFC; ; MAP\n";
    397 
    398 #   CHARACTER TABULATION (U+0009), LINE FEED (LF) (U+000A), LINE
    399 #   TABULATION (U+000B), FORM FEED (FF) (U+000C), CARRIAGE RETURN (CR)
    400 #   (U+000D), and NEXT LINE (NEL) (U+0085) are mapped to SPACE (U+0020).
    401 
    402     print $outFH "0009; 0020; MAP\n";
    403     print $outFH "000A; 0020; MAP\n";
    404     print $outFH "000B; 0020; MAP\n";
    405     print $outFH "000C; 0020; MAP\n";
    406     print $outFH "000D; 0020; MAP\n";
    407     print $outFH "0085; 0020; MAP\n";
    408 
    409     #   All other control code (e.g., Cc) points or code points with a
    410     #   control function (e.g., Cf) are mapped to nothing.  The following is
    411     #   a complete list of these code points: U+0000-0008, 000E-001F, 007F-
    412     #   0084, 0086-009F, 06DD, 070F, 180E, 200C-200F, 202A-202E, 2060-2063,
    413     #   206A-206F, FEFF, FFF9-FFFB, 1D173-1D17A, E0001, E0020-E007F.
    414 
    415     print $outFH "0000; ; MAP\n";
    416     print $outFH "0001; ; MAP\n";
    417     print $outFH "0002; ; MAP\n";
    418     print $outFH "0003; ; MAP\n";
    419     print $outFH "0004; ; MAP\n";
    420     print $outFH "0005; ; MAP\n";
    421     print $outFH "0006; ; MAP\n";
    422     print $outFH "0007; ; MAP\n";
    423     print $outFH "0008; ; MAP\n";
    424     print $outFH "000E; ; MAP\n";
    425     print $outFH "000F; ; MAP\n";
    426     print $outFH "0010; ; MAP\n";
    427     print $outFH "0011; ; MAP\n";
    428     print $outFH "0012; ; MAP\n";
    429     print $outFH "0013; ; MAP\n";
    430     print $outFH "0014; ; MAP\n";
    431     print $outFH "0015; ; MAP\n";
    432     print $outFH "0016; ; MAP\n";
    433     print $outFH "0017; ; MAP\n";
    434     print $outFH "0018; ; MAP\n";
    435     print $outFH "0019; ; MAP\n";
    436     print $outFH "001A; ; MAP\n";
    437     print $outFH "001B; ; MAP\n";
    438     print $outFH "001C; ; MAP\n";
    439     print $outFH "001D; ; MAP\n";
    440     print $outFH "001E; ; MAP\n";
    441     print $outFH "001F; ; MAP\n";
    442     print $outFH "007F; ; MAP\n";
    443     print $outFH "0080; ; MAP\n";
    444     print $outFH "0081; ; MAP\n";
    445     print $outFH "0082; ; MAP\n";
    446     print $outFH "0083; ; MAP\n";
    447     print $outFH "0084; ; MAP\n";
    448     print $outFH "0086; ; MAP\n";
    449     print $outFH "0087; ; MAP\n";
    450     print $outFH "0088; ; MAP\n";
    451     print $outFH "0089; ; MAP\n";
    452     print $outFH "008A; ; MAP\n";
    453     print $outFH "008B; ; MAP\n";
    454     print $outFH "008C; ; MAP\n";
    455     print $outFH "008D; ; MAP\n";
    456     print $outFH "008E; ; MAP\n";
    457     print $outFH "008F; ; MAP\n";
    458     print $outFH "0090; ; MAP\n";
    459     print $outFH "0091; ; MAP\n";
    460     print $outFH "0092; ; MAP\n";
    461     print $outFH "0093; ; MAP\n";
    462     print $outFH "0094; ; MAP\n";
    463     print $outFH "0095; ; MAP\n";
    464     print $outFH "0096; ; MAP\n";
    465     print $outFH "0097; ; MAP\n";
    466     print $outFH "0098; ; MAP\n";
    467     print $outFH "0099; ; MAP\n";
    468     print $outFH "009A; ; MAP\n";
    469     print $outFH "009B; ; MAP\n";
    470     print $outFH "009C; ; MAP\n";
    471     print $outFH "009D; ; MAP\n";
    472     print $outFH "009E; ; MAP\n";
    473     print $outFH "009F; ; MAP\n";
    474     print $outFH "06DD; ; MAP\n";
    475     print $outFH "070F; ; MAP\n";
    476     print $outFH "180E; ; MAP\n";
    477     print $outFH "200C; ; MAP\n";
    478     print $outFH "200D; ; MAP\n";
    479     print $outFH "200E; ; MAP\n";
    480     print $outFH "200F; ; MAP\n";
    481     print $outFH "202A; ; MAP\n";
    482     print $outFH "202B; ; MAP\n";
    483     print $outFH "202C; ; MAP\n";
    484     print $outFH "202D; ; MAP\n";
    485     print $outFH "202E; ; MAP\n";
    486     print $outFH "2060; ; MAP\n";
    487     print $outFH "2061; ; MAP\n";
    488     print $outFH "2062; ; MAP\n";
    489     print $outFH "2063; ; MAP\n";
    490     print $outFH "206A; ; MAP\n";
    491     print $outFH "206B; ; MAP\n";
    492     print $outFH "206C; ; MAP\n";
    493     print $outFH "206D; ; MAP\n";
    494     print $outFH "206E; ; MAP\n";
    495     print $outFH "206F; ; MAP\n";
    496     print $outFH "FEFF; ; MAP\n";
    497     print $outFH "FFF9; ; MAP\n";
    498     print $outFH "FFFA; ; MAP\n";
    499     print $outFH "FFFB; ; MAP\n";
    500     print $outFH "1D173; ; MAP\n";
    501     print $outFH "1D174; ; MAP\n";
    502     print $outFH "1D175; ; MAP\n";
    503     print $outFH "1D176; ; MAP\n";
    504     print $outFH "1D177; ; MAP\n";
    505     print $outFH "1D178; ; MAP\n";
    506     print $outFH "1D179; ; MAP\n";
    507     print $outFH "1D17A; ; MAP\n";
    508     print $outFH "E0001; ; MAP\n";
    509     print $outFH "E0020; ; MAP\n";
    510     print $outFH "E0021; ; MAP\n";
    511     print $outFH "E0022; ; MAP\n";
    512     print $outFH "E0023; ; MAP\n";
    513     print $outFH "E0024; ; MAP\n";
    514     print $outFH "E0025; ; MAP\n";
    515     print $outFH "E0026; ; MAP\n";
    516     print $outFH "E0027; ; MAP\n";
    517     print $outFH "E0028; ; MAP\n";
    518     print $outFH "E0029; ; MAP\n";
    519     print $outFH "E002A; ; MAP\n";
    520     print $outFH "E002B; ; MAP\n";
    521     print $outFH "E002C; ; MAP\n";
    522     print $outFH "E002D; ; MAP\n";
    523     print $outFH "E002E; ; MAP\n";
    524     print $outFH "E002F; ; MAP\n";
    525     print $outFH "E0030; ; MAP\n";
    526     print $outFH "E0031; ; MAP\n";
    527     print $outFH "E0032; ; MAP\n";
    528     print $outFH "E0033; ; MAP\n";
    529     print $outFH "E0034; ; MAP\n";
    530     print $outFH "E0035; ; MAP\n";
    531     print $outFH "E0036; ; MAP\n";
    532     print $outFH "E0037; ; MAP\n";
    533     print $outFH "E0038; ; MAP\n";
    534     print $outFH "E0039; ; MAP\n";
    535     print $outFH "E003A; ; MAP\n";
    536     print $outFH "E003B; ; MAP\n";
    537     print $outFH "E003C; ; MAP\n";
    538     print $outFH "E003D; ; MAP\n";
    539     print $outFH "E003E; ; MAP\n";
    540     print $outFH "E003F; ; MAP\n";
    541     print $outFH "E0040; ; MAP\n";
    542     print $outFH "E0041; ; MAP\n";
    543     print $outFH "E0042; ; MAP\n";
    544     print $outFH "E0043; ; MAP\n";
    545     print $outFH "E0044; ; MAP\n";
    546     print $outFH "E0045; ; MAP\n";
    547     print $outFH "E0046; ; MAP\n";
    548     print $outFH "E0047; ; MAP\n";
    549     print $outFH "E0048; ; MAP\n";
    550     print $outFH "E0049; ; MAP\n";
    551     print $outFH "E004A; ; MAP\n";
    552     print $outFH "E004B; ; MAP\n";
    553     print $outFH "E004C; ; MAP\n";
    554     print $outFH "E004D; ; MAP\n";
    555     print $outFH "E004E; ; MAP\n";
    556     print $outFH "E004F; ; MAP\n";
    557     print $outFH "E0050; ; MAP\n";
    558     print $outFH "E0051; ; MAP\n";
    559     print $outFH "E0052; ; MAP\n";
    560     print $outFH "E0053; ; MAP\n";
    561     print $outFH "E0054; ; MAP\n";
    562     print $outFH "E0055; ; MAP\n";
    563     print $outFH "E0056; ; MAP\n";
    564     print $outFH "E0057; ; MAP\n";
    565     print $outFH "E0058; ; MAP\n";
    566     print $outFH "E0059; ; MAP\n";
    567     print $outFH "E005A; ; MAP\n";
    568     print $outFH "E005B; ; MAP\n";
    569     print $outFH "E005C; ; MAP\n";
    570     print $outFH "E005D; ; MAP\n";
    571     print $outFH "E005E; ; MAP\n";
    572     print $outFH "E005F; ; MAP\n";
    573     print $outFH "E0060; ; MAP\n";
    574     print $outFH "E0061; ; MAP\n";
    575     print $outFH "E0062; ; MAP\n";
    576     print $outFH "E0063; ; MAP\n";
    577     print $outFH "E0064; ; MAP\n";
    578     print $outFH "E0065; ; MAP\n";
    579     print $outFH "E0066; ; MAP\n";
    580     print $outFH "E0067; ; MAP\n";
    581     print $outFH "E0068; ; MAP\n";
    582     print $outFH "E0069; ; MAP\n";
    583     print $outFH "E006A; ; MAP\n";
    584     print $outFH "E006B; ; MAP\n";
    585     print $outFH "E006C; ; MAP\n";
    586     print $outFH "E006D; ; MAP\n";
    587     print $outFH "E006E; ; MAP\n";
    588     print $outFH "E006F; ; MAP\n";
    589     print $outFH "E0070; ; MAP\n";
    590     print $outFH "E0071; ; MAP\n";
    591     print $outFH "E0072; ; MAP\n";
    592     print $outFH "E0073; ; MAP\n";
    593     print $outFH "E0074; ; MAP\n";
    594     print $outFH "E0075; ; MAP\n";
    595     print $outFH "E0076; ; MAP\n";
    596     print $outFH "E0077; ; MAP\n";
    597     print $outFH "E0078; ; MAP\n";
    598     print $outFH "E0079; ; MAP\n";
    599     print $outFH "E007A; ; MAP\n";
    600     print $outFH "E007B; ; MAP\n";
    601     print $outFH "E007C; ; MAP\n";
    602     print $outFH "E007D; ; MAP\n";
    603     print $outFH "E007E; ; MAP\n";
    604     print $outFH "E007F; ; MAP\n";
    605 
    606     #   ZERO WIDTH SPACE (U+200B) is mapped to nothing.  All other code
    607     #   points with Separator (space, line, or paragraph) property (e.g., Zs,
    608     #   Zl, or Zp) are mapped to SPACE (U+0020).  The following is a complete
    609     #   list of these code points: U+0020, 00A0, 1680, 2000-200A, 2028-2029,
    610     #   202F, 205F, 3000.
    611 
    612     print $outFH "200B; ; MAP\n";
    613     print $outFH "00A0; 0020; MAP\n";
    614     print $outFH "1680; 0020; MAP\n";
    615     print $outFH "2000; 0020; MAP\n";
    616     print $outFH "2001; 0020; MAP\n";
    617     print $outFH "2002; 0020; MAP\n";
    618     print $outFH "2003; 0020; MAP\n";
    619     print $outFH "2004; 0020; MAP\n";
    620     print $outFH "2005; 0020; MAP\n";
    621     print $outFH "2006; 0020; MAP\n";
    622     print $outFH "2007; 0020; MAP\n";
    623     print $outFH "2008; 0020; MAP\n";
    624     print $outFH "2009; 0020; MAP\n";
    625     print $outFH "200A; 0020; MAP\n";
    626     print $outFH "2028; 0020; MAP\n";
    627     print $outFH "2029; 0020; MAP\n";
    628     print $outFH "202F; 0020; MAP\n";
    629     print $outFH "205F; 0020; MAP\n";
    630     print $outFH "3000; 0020; MAP\n";
    631 
    632     print $outFH "\n# Total code points 238\n";
    633     close($outFH);
    634 }
    635 #-----------------------------------------------------------------------
    636 sub usage {
    637     print << "END";
    638 Usage:
    639 filterRFC3454.pl
    640 Options:
    641         --sourcedir=<directory>
    642         --destdir=<directory>
    643         --src-filename=<name of RFC file>
    644         --dest-filename=<name of destination file>
    645         --A1             Generate data for table A.1
    646         --B1             Generate data for table B.1
    647         --B2             Generate data for table B.2
    648         --B3             Generate data for table B.3
    649         --C11            Generate data for table C.1.1
    650         --C12            Generate data for table C.1.2
    651         --C21            Generate data for table C.2.1
    652         --C22            Generate data for table C.2.2
    653         --C3             Generate data for table C.3
    654         --C4             Generate data for table C.4
    655         --C5             Generate data for table C.5
    656         --C6             Generate data for table C.6
    657         --C7             Generate data for table C.7
    658         --C8             Generate data for table C.8
    659         --C9             Generate data for table C.9
    660         --iscsi          Generate data for iSCSI extra prohibited table
    661         --xmpp-node      Generate data for XMPP extra prohibited table
    662         --sasl           Generate data for SASL map table
    663         --ldap           Generate data for LDAP map table
    664         --normalize      Embed the normalization directive in the output file
    665         --check-bidi     Embed the check bidi directove in the output file
    666 
    667 Note, --B2 and --B3 are mutually exclusive.
    668 
    669 e.g.: filterRFC3454.pl --sourcedir=. --destdir=./output --src-filename=rfc3454.txt  --dest-filename=NamePrepProfile.txt --A1 --B1 --B2 --C12 --C22 --C3 --C4 --C5 --C6 --C7 --C8 --C9 --normalize --check-bidi
    670 
    671 filterRFC3454.pl filters the RFC file and creates String prep table files.
    672 The RFC text can be downloaded from ftp://ftp.rfc-editor.org/in-notes/rfc3454.txt
    673 
    674 END
    675   exit(0);
    676 }
    677 
    678 
    679