Home | History | Annotate | Download | only in scripts
      1 #!/usr/bin/perl
      2     eval 'exec perl -S $0 "$@"'
      3     if $runnning_under_some_shell;
      4 #
      5 # txt2html.pl
      6 # Convert raw text to something with a little HTML formatting
      7 #
      8 # Written by Seth Golub <seth (at] cs.wustl.edu>
      9 #            http://www.cs.wustl.edu/~seth/txt2html/
     10 #
     11 # $Revision: 1.15 $
     12 # $Date: 2004/10/05 20:30:33 $
     13 # $Author: cristy $
     14 #
     15 #
     16 # $Log: txt2html,v $
     17 # Revision 1.15  2004/10/05 20:30:33  cristy
     18 # *** empty log message ***
     19 #
     20 # Revision 1.14  2004/04/26 19:53:42  cristy
     21 # *** empty log message ***
     22 #
     23 # Revision 1.13  2004/04/26 15:03:00  cristy
     24 # *** empty log message ***
     25 #
     26 # Revision 1.12  2004/04/24 13:48:50  cristy
     27 # *** empty log message ***
     28 #
     29 # Revision 1.11  2003/10/28 18:40:59  cristy
     30 # *** empty log message ***
     31 #
     32 # Revision 1.10  2003/10/28 03:44:38  cristy
     33 # *** empty log message ***
     34 #
     35 # Revision 1.9  2003/10/17 13:59:12  cristy
     36 # *** empty log message ***
     37 #
     38 # Revision 1.8  2003/10/16 22:26:06  cristy
     39 # *** empty log message ***
     40 #
     41 # Revision 1.7  2003/10/12 04:10:15  cristy
     42 # *** empty log message ***
     43 #
     44 # Revision 1.6  2003/07/20 03:39:50  cristy
     45 # *** empty log message ***
     46 #
     47 # Revision 1.5  2003/07/19 19:44:20  cristy
     48 # *** empty log message ***
     49 #
     50 # Revision 1.4  2003/04/07 23:35:40  cristy
     51 # *** empty log message ***
     52 #
     53 # Revision 1.3  2003/04/05 02:52:42  cristy
     54 # *** empty log message ***
     55 #
     56 # Revision 1.2  2003/04/04 20:50:50  cristy
     57 # *** empty log message ***
     58 #
     59 # Revision 1.1  2003/03/25 15:10:23  cristy
     60 # genesis
     61 #
     62 # Revision 1.1  2003/03/22 17:02:00  cristy
     63 # *** empty log message ***
     64 #
     65 # Revision 1.10  1994/12/28  20:10:25  seth
     66 #  * Added --extract, etc.
     67 #
     68 # Revision 1.9  94/12/13  15:16:23  15:16:23  seth (Seth Golub)
     69 #  * Changed from #!/usr/local/bin/perl to the more clever version in
     70 #    the man page.  (How did I manage not to read this for so long?)
     71 #  * Swapped hrule & header back to handle double lines.  Why should
     72 #    this order screw up headers?
     73 #
     74 # Revision 1.8  1994/11/30  21:07:03  seth
     75 #  * put mail_anchor back in.  (Why did I take this out?)
     76 #  * Finally added handling of lettered lists (ordered lists marked with
     77 #    letters)
     78 #  * Added title option (--title, -t)
     79 #  * Shortline now looks at how long the line was before txt2html
     80 #    started adding tags.   ($line_length)
     81 #  * Changed list references to scalars where appropriate.  (@foo[0] -> $foo[0])
     82 #  * Added untabify() to homogenize leading indentation for list
     83 #    prefixes and functions that use line length
     84 #  * Added "underline tolerance" for when underlines are not exactly the
     85 #    same length as what they underline.
     86 #  * Added error message for unrecognized options
     87 #  * removed \w matching on --capstag
     88 #  * Tagline now removes leading & trailing whitespace before tagging
     89 #  * swapped order of caps & heading in main loop
     90 #  * Cleaned up code for speed and to get rid of warnings
     91 #  * Added more restrictions to something being a mail header
     92 #  * Added indentation for lists, just to make the output more readable.
     93 #  * Fixed major bug in lists: $OL and $UL were never set, so when a
     94 #    list was ended "</UL>" was *always* used!
     95 #  * swapped order of hrule & header to properly handle long underlines
     96 #
     97 # Revision 1.7  94/10/28  13:16:11  13:16:11  seth (Seth Golub)
     98 #  * Added to comments in options section
     99 #  * renamed blank to is_blank
    100 #  * Page break is converted to horizontal rule <HR>
    101 #  * moved usage subroutine up top so people who look through code see
    102 #    it sooner
    103 #
    104 # Revision 1.6  94/10/28  12:43:46  12:43:46  seth (Seth Golub)
    105 #  * Creates anchors at each heading
    106 #
    107 # Revision 1.5  94/07/14  17:43:59  17:43:59  seth (Seth Golub)
    108 #  * Fixed minor bug in Headers
    109 #  * Preformatting can be set to only start/stop when TWO lines of
    110 #    [non]formatted-looking-text are encountered.  Old behavior is still
    111 #    possible through command line options (-pb 1 -pe 1).
    112 #  * Can preformat entire document (-pb 0) or disable preformatting
    113 #    completely (-pe 0).
    114 #  * Fixed minor bug in CAPS handling (paragraph breaks broke)
    115 #  * Puts paragraph tags *before* paragraphs, not just between them.
    116 #
    117 # Revision 1.4  94/06/20  16:42:55  16:42:55  seth (Seth Golub)
    118 #  * Allow ':' for numbered lists (e.g. "1: Figs")
    119 #  * Whitespace at end of line will not start or end preformatting
    120 #  * Mailmode is now off by default
    121 #  * Doesn't break short lines if they are the first line in a list
    122 #    item.  It *should* break them anyway if the next line is a
    123 #    continuation of the list item, but I haven't dealt with this yet.
    124 #  * Added action on lines that are all capital letters.  You can change
    125 #    how these lines get tagged, as well as the mininum number of
    126 #    consecutive capital letters required to fire off this action.
    127 #
    128 # Revision 1.3  94/05/17  15:58:58  15:58:58  seth (Seth Golub)
    129 # * Tiny bugfix in unhyphenation
    130 #
    131 # Revision 1.2  94/05/16  18:15:16  18:15:16  seth (Seth Golub)
    132 #  * Added unhyphenation
    133 #
    134 # Revision 1.1  94/05/16  16:19:03  16:19:03  seth (Seth Golub)
    135 # Initial revision
    136 #
    137 #
    138 # 1.02  Allow '-' in mail headers
    139 #       Added handling for multiline mail headers
    140 #
    141 #
    142 #
    143 # Oscar Nierstrasz has a nice script for hypertextifying URLs.
    144 # It is available at:
    145 #   http://cui_www.unige.ch/ftp/PUBLIC/oscar/scripts/html.pl
    146 #
    147 
    148 #########################
    149 # Configurable options
    150 #
    151 
    152 # [-s <n>    ] | [--shortline <n>                 ]
    153 $short_line_length = 40;        # Lines this short (or shorter) must be
    154                                 # intentionally broken and are kept
    155                                 # that short. <BR>
    156 
    157 # [-p <n>    ] | [--prewhite <n>                  ]
    158 $preformat_whitespace_min = 5;  # Minimum number of consecutive leading
    159                                 # whitespace characters to trigger
    160                                 # preformatting.
    161                                 # NOTE: Tabs are now expanded to
    162                                 # spaces before this check is made.
    163                                 # That means if $tab_width is 8 and
    164                                 # this is 5, then one tab is expanded
    165                                 # to 8 spaces, which is enough to
    166                                 # trigger preformatting.
    167 
    168 # [-pb <n>   ] | [--prebegin <n>                  ]
    169 $preformat_trigger_lines = 2;   # How many lines of preformatted-looking
    170                                 # text are needed to switch to <PRE>
    171                                 # <= 0 : Preformat entire document
    172                                 #    1 : one line triggers
    173                                 # >= 2 : two lines trigger
    174 
    175 # [-pe <n>   ] | [--preend <n>                    ]
    176 $endpreformat_trigger_lines = 2; # How many lines of unpreformatted-looking
    177                                  # text are needed to switch from <PRE>
    178                                  # <= 0 : Never preformat within document
    179                                  #    1 : one line triggers
    180                                  # >= 2 : two lines trigger
    181 # NOTE for --prebegin and --preend:
    182 # A zero takes precedence.  If one is zero, the other is ignored.
    183 # If both are zero, entire document is preformatted.
    184 
    185 
    186 # [-r <n>    ] | [--hrule <n>                     ]
    187 $hrule_min = 4;                 # Min number of ---s for an HRule.
    188 
    189 # [-c <n>    ] | [--caps <n>                      ]
    190 $min_caps_length = 3;           # min sequential CAPS for an all-caps line
    191 
    192 # [-ct <tag> ] | [--capstag <tag>                 ]
    193 $caps_tag = "STRONG";           # Tag to put around all-caps lines
    194 
    195 # [-m/+m     ] | [--mail        / --nomail        ]
    196 $mailmode = 0;                  # Deal with mail headers & quoted text
    197 
    198 # [-u/+u     ] | [--unhyphenate / --nounhyphenate ]
    199 $unhyphenation = 1;             # Enables unhyphenation of text.
    200 
    201 # [-a <file> ] | [--append <file>                 ]
    202 # [+a        ] | [--noappend                      ]
    203 $append_file = 0;               # If you want something appended by
    204                                 # default, put the filename here.
    205                                 # The appended text will not be
    206                                 # processed at all, so make sure it's
    207                                 # plain text or decent HTML.  i.e. do
    208                                 # not have things like:
    209                                 #   Seth Golub <seth (at] cs.wustl.edu>
    210                                 # but instead, have:
    211                                 #   Seth Golub &lt;seth (at] cs.wustl.edu&gt;
    212 
    213 # [-t <title>] | [--title <title>                 ]
    214 $title = 0;                     # You can specify a title.
    215                                 # Otherwise it won't put one in.
    216 
    217 # [-ul <n>   ] | [--underlinelong <n>             ]
    218 $underline_tolerance_long = 1;  # How much longer can underlines
    219                                 # be and still be underlines?
    220 
    221 # [-us <n>   ] | [--underlineshort <n>            ]
    222 $underline_tolerance_short = 1; # How much shorter can underlines
    223                                 # be and still be underlines?
    224 
    225 # [-tw <n>   ] | [--tabwidth <n>                  ]
    226 $tab_width = 8;                 # How many spaces equal a tab?
    227 
    228 
    229 # [-iw <n>   ] | [--indent <n>                    ]
    230 $indent_width = 2;              # Indents this many spaces for each
    231                                 # level of a list
    232 
    233 # [-/+e      ] | [--extract / --noextract         ]
    234 $extract = 0;                   # Extract Mode (suitable for inserting)
    235 
    236 # END OF CONFIGURABLE OPTIONS
    237 ########################################
    238 
    239 
    240 ########################################
    241 # Definitions  (Don't change these)
    242 #
    243 $NONE       =   0;
    244 $LIST       =   1;
    245 $HRULE      =   2;
    246 $PAR        =   4;
    247 $PRE        =   8;
    248 $END        =  16;
    249 $BREAK      =  32;
    250 $HEADER     =  64;
    251 $MAILHEADER = 128;
    252 $MAILQUOTE  = 256;
    253 $CAPS       = 512;
    254 
    255 $OL = 1;
    256 $UL = 2;
    257 
    258 sub usage
    259 {
    260     $0 =~ s#.*/##;
    261     local($s) = " " x length($0);
    262     print STDERR <<EOF;
    263 
    264 Usage: $0 [options]
    265 
    266 where options are:
    267        $s [-v        ] | [--version                       ]
    268        $s [-h        ] | [--help                          ]
    269        $s [-s <n>    ] | [--shortline <n>                 ]
    270        $s [-p <n>    ] | [--prewhite <n>                  ]
    271        $s [-pb <n>   ] | [--prebegin <n>                  ]
    272        $s [-pe <n>   ] | [--preend <n>                    ]
    273        $s [-e/+e     ] | [--extract / --noextract         ]
    274        $s [-r <n>    ] | [--hrule <n>                     ]
    275        $s [-c <n>    ] | [--caps <n>                      ]
    276        $s [-ct <tag> ] | [--capstag <tag>                 ]
    277        $s [-m/+m     ] | [--mail     / --nomail           ]
    278        $s [-u/+u     ] | [--unhyphen / --nounhyphen       ]
    279        $s [-a <file> ] | [--append <file>                 ]
    280        $s [+a        ] | [--noappend                      ]
    281        $s [-t <title>] | [--title <title>                 ]
    282        $s [-tw <n>   ] | [--tabwidth <n>                  ]
    283        $s [-iw <n>   ] | [--indent <n>                    ]
    284        $s [-ul <n>   ] | [--underlinelong <n>             ]
    285        $s [-us <n>   ] | [--underlineshort <n>            ]
    286 
    287   More complete explanations of these options can be found in
    288   comments near the beginning of the script.
    289 
    290 EOF
    291 }
    292 
    293 
    294 sub deal_with_options
    295 {
    296     while ($ARGV[0] =~ /^[-+].+/)
    297     {
    298         if (($ARGV[0] eq "-r" || $ARGV[0] eq "--hrule") &&
    299             $ARGV[1] =~ /^%d+$/)
    300         {
    301             $hrule_min = $ARGV[1];
    302             shift @ARGV;
    303             next;
    304         }
    305 
    306         if (($ARGV[0] eq "-s" || $ARGV[0] eq "--shortline") &&
    307             $ARGV[1] =~ /^\d+$/)
    308         {
    309             $short_line_length = $ARGV[1];
    310             shift @ARGV;
    311             next;
    312         }
    313 
    314         if (($ARGV[0] eq "-p" || $ARGV[0] eq "--prewhite") &&
    315             $ARGV[1] =~ /^\d+$/)
    316         {
    317             $preformat_whitespace_min = $ARGV[1];
    318             shift @ARGV;
    319             next;
    320         }
    321 
    322         if (($ARGV[0] eq "-pb" || $ARGV[0] eq "--prebegin") &&
    323             $ARGV[1] =~ /^\d+$/)
    324         {
    325             $preformat_trigger_lines = $ARGV[1];
    326             shift @ARGV;
    327             next;
    328         }
    329 
    330         if (($ARGV[0] eq "-pe" || $ARGV[0] eq "--preend") &&
    331             $ARGV[1] =~ /^\d+$/)
    332         {
    333             $endpreformat_trigger_lines = $ARGV[1];
    334             shift @ARGV;
    335             next;
    336         }
    337 
    338         if (($ARGV[0] eq "-e" || $ARGV[0] eq "--extract"))
    339         {
    340             $extract = 1;
    341             shift @ARGV;
    342             next;
    343         }
    344 
    345         if (($ARGV[0] eq "+e" || $ARGV[0] eq "--noextract"))
    346         {
    347             $extract = 0;
    348             shift @ARGV;
    349             next;
    350         }
    351 
    352         if (($ARGV[0] eq "-c" || $ARGV[0] eq "--caps") &&
    353             $ARGV[1] =~ /^\d+$/)
    354         {
    355             $min_caps_length = $ARGV[1];
    356             shift @ARGV;
    357             next;
    358         }
    359 
    360         if (($ARGV[0] eq "-ct" || $ARGV[0] eq "--capstag") &&
    361             $ARGV[1])
    362         {
    363             $caps_tag = $ARGV[1];
    364             shift @ARGV;
    365             next;
    366         }
    367 
    368         if ($ARGV[0] eq "-m" || $ARGV[0] eq "--mail")
    369         {
    370             $mailmode = 1;
    371             next;
    372         }
    373 
    374         if ($ARGV[0] eq "+m" || $ARGV[0] eq "--nomail")
    375         {
    376             $mailmode = 0;
    377             next;
    378         }
    379 
    380         if ($ARGV[0] eq "-u" || $ARGV[0] eq "--unhyphen")
    381         {
    382             $unhyphenation = 1;
    383             next;
    384         }
    385 
    386         if ($ARGV[0] eq "+u" || $ARGV[0] eq "--nounhyphen")
    387         {
    388             $unhyphenation = 0;
    389             next;
    390         }
    391 
    392         if (($ARGV[0] eq "-a" || $ARGV[0] eq "--append") &&
    393             $ARGV[1])
    394         {
    395             if (-r $ARGV[1]) {
    396                 $append_file = $ARGV[1];
    397             } else {
    398                 print STDERR "Can't find or read $ARGV[1].\n";
    399             }
    400             shift @ARGV;
    401             next;
    402         }
    403 
    404         if ($ARGV[0] eq "+a" || $ARGV[0] eq "--noappend")
    405         {
    406             $append_file = 0;
    407             next;
    408         }
    409 
    410         if (($ARGV[0] eq "-t" || $ARGV[0] eq "--title") &&
    411             $ARGV[1])
    412         {
    413             $title = $ARGV[1];
    414             shift @ARGV;
    415             next;
    416         }
    417 
    418         if (($ARGV[0] eq "-ul" || $ARGV[0] eq "--underlinelong") &&
    419             $ARGV[1] =~ /^\d+$/)
    420         {
    421             $underline_tolerance_long = $ARGV[1];
    422             shift @ARGV;
    423             next;
    424         }
    425 
    426         if (($ARGV[0] eq "-us" || $ARGV[0] eq "--underlineshort") &&
    427             $ARGV[1] =~ /^\d+$/)
    428         {
    429             $underline_tolerance_short = $ARGV[1];
    430             shift @ARGV;
    431             next;
    432         }
    433 
    434         if (($ARGV[0] eq "-tw" || $ARGV[0] eq "--tabwidth") &&
    435             $ARGV[1] =~ /^\d+$/)
    436         {
    437             $tab_width = $ARGV[1];
    438             shift @ARGV;
    439             next;
    440         }
    441 
    442         if (($ARGV[0] eq "-iw" || $ARGV[0] eq "--indentwidth") &&
    443             $ARGV[1] =~ /^\d+$/)
    444         {
    445             $indent_width = $ARGV[1];
    446             shift @ARGV;
    447             next;
    448         }
    449 
    450         if ($ARGV[0] eq "-v" || $ARGV[0] eq "--version")
    451         {
    452             print '$Header: /users/hilco/seth/projects/txt2html/txt2html.pl,v 1
    453 .10 1994/12/28 20:10:25 seth Exp seth $ ';
    454             print "\n";
    455             exit;
    456         }
    457 
    458         if ($ARGV[0] eq "-h" || $ARGV[0] eq "--help")
    459         {
    460             &usage;
    461             exit;
    462         }
    463 
    464         print STDERR "Unrecognized option: $ARGV[0]\n";
    465         print STDERR " or bad paramater: $ARGV[1]\n" if($ARGV[1]);
    466 
    467         &usage;
    468         exit(1);
    469 
    470     } continue {
    471 
    472         shift @ARGV;
    473     }
    474 
    475     $preformat_trigger_lines = 0 if ($preformat_trigger_lines < 0);
    476     $preformat_trigger_lines = 2 if ($preformat_trigger_lines > 2);
    477 
    478     $endpreformat_trigger_lines = 1 if ($preformat_trigger_lines == 0);
    479     $endpreformat_trigger_lines = 0 if ($endpreformat_trigger_lines < 0);
    480     $endpreformat_trigger_lines = 2 if ($endpreformat_trigger_lines > 2);
    481 
    482     $underline_tolerance_long  = 0 if $underline_tolerance_long < 0;
    483     $underline_tolerance_short = 0 if $underline_tolerance_short < 0;
    484 }
    485 
    486 sub is_blank
    487 {
    488     return $_[0] =~ /^\s*$/;
    489 }
    490 
    491 sub escape
    492 {
    493     $line =~ s/&/&amp;/g;
    494     $line =~ s/>/&gt;/g;
    495     $line =~ s/</&lt;/g;
    496     $line =~ s/\014/\n<HR>\n/g; # Linefeeds become horizontal rules
    497 }
    498 
    499 sub hrule
    500 {
    501     if ($line =~ /^\s*([-_~=\*]\s*){$hrule_min,}$/)
    502     {
    503         $line = "<HR>\n";
    504         $prev =~ s/<p>//;
    505         $line_action |= $HRULE;
    506     }
    507 }
    508 
    509 sub shortline
    510 {
    511     if (!($mode & $PRE) &&
    512         !&is_blank($line) &&
    513         ($line_length < $short_line_length) &&
    514         !&is_blank($nextline) &&
    515         !($line_action & ($HEADER | $HRULE | $BREAK | $LIST)))
    516     {
    517         $line =~ s/$/<BR>/;
    518         $line_action |= $BREAK;
    519     }
    520 }
    521 
    522 sub mailstuff
    523 {
    524     if ((($line =~ /^\w*&gt/) || # Handle "FF> Werewolves."
    525          ($line =~ /^\w*\|/))&&  # Handle "Igor| There wolves."
    526         !&is_blank($nextline))
    527     {
    528         $line =~ s/$/<BR>/;
    529         $line_action |= $BREAK | $MAILQUOTE;
    530     } elsif (($line =~ /^[\w\-]*:/) # Handle "Some-Header: blah"
    531              && (($previous_action & $MAILHEADER) || &is_blank($prev))
    532              && !&is_blank($nextline))
    533     {
    534         &anchor_mail if !($previous_action & $MAILHEADER);
    535         $line =~ s/$/<BR>/;
    536         $line_action |= $BREAK | $MAILHEADER;
    537     } elsif (($line =~ /^\s+\S/) &&   # Handle multi-line mail headers
    538              ($previous_action & $MAILHEADER) &&
    539              !&is_blank($nextline))
    540     {
    541         $line =~ s/$/<BR>/;
    542         $line_action |= $BREAK | $MAILHEADER;
    543     }
    544 }
    545 
    546 sub paragraph
    547 {
    548     $prev .= "<p>\n";
    549     $line_action |= $PAR;
    550 }
    551 
    552 sub listprefix
    553 {
    554     local($line) = @_;
    555     local($prefix, $number, $rawprefix);
    556 
    557     return (0,0,0) if (!($line =~ /^\s*[-=\*o]\s+\S/ ) &&
    558                        !($line =~ /^\s*(\d+|[a-zA-Z])[\.\)\]:]\s+\S/ ));
    559 
    560     ($number) = $line =~ /^\s*(\d+|[a-zA-Z])/;
    561 
    562     # That slippery exception of "o" as a bullet
    563     # (This ought to be determined more through the context of what lists
    564     #  we have in progress, but this will probably work well enough.)
    565     if($line =~ /^\s*o\s/)
    566     {
    567         $number = 0;
    568     }
    569 
    570     if ($number)
    571     {
    572         ($rawprefix) = $line =~ /^(\s*(\d+|[a-zA-Z]).)/;
    573         $prefix = $rawprefix;
    574         $prefix =~ s/(\d+|[a-zA-Z])//;  # Take the number out
    575     } else {
    576         ($rawprefix) = $line =~ /^(\s*[-=o\*].)/;
    577         $prefix = $rawprefix;
    578     }
    579     ($prefix, $number, $rawprefix);
    580 }
    581 
    582 sub startlist
    583 {
    584     local($prefix, $number, $rawprefix) = @_;
    585 
    586     $listprefix[$listnum] = $prefix;
    587     if($number)
    588     {
    589         # It doesn't start with 1,a,A.  Let's not screw with it.
    590         if (($number != 1) && ($number ne "a") && ($number ne "A"))
    591         {
    592             return;
    593         }
    594         $prev .= "$list_indent<OL>\n";
    595         $list[$listnum] = $OL;
    596     } else {
    597         $prev .= "$list_indent<font size=-2><UL>\n";
    598         $list[$listnum] = $UL;
    599     }
    600     $listnum++;
    601     $list_indent = " " x $listnum x $indent_width;
    602     $line_action |= $LIST;
    603     $mode |= $LIST;
    604 }
    605 
    606 
    607 sub endlist                     # End N lists
    608 {
    609     local($n) = @_;
    610     for(; $n > 0; $n--, $listnum--)
    611     {
    612         $list_indent = " " x ($listnum-1) x $indent_width;
    613         if($list[$listnum-1] == $UL)
    614         {
    615             $prev .= "$list_indent</UL></font>\n";
    616         } elsif($list[$listnum-1] == $OL)
    617         {
    618             $prev .= "$list_indent</OL>\n";
    619         } else
    620         {
    621             print STDERR "Encountered list of unknown type\n";
    622         }
    623     }
    624     $line_action |= $END;
    625     $mode ^= ($LIST & $mode) if (!$listnum);
    626 }
    627 
    628 sub continuelist
    629 {
    630     $line =~ s/^\s*[-=o\*]\s*/$list_indent<LI> / if $list[$listnum-1] == $UL;
    631     $line =~ s/^\s*(\d+|[a-zA-Z]).\s*/$list_indent<LI> /    if $list[$listnum-1
    632 ] == $OL;
    633     $line_action |= $LIST;
    634 }
    635 
    636 sub liststuff
    637 {
    638     local($i);
    639 
    640     local($prefix, $number, $rawprefix) = &listprefix($line);
    641 
    642     $i = $listnum;
    643     if (!$prefix)
    644     {
    645         return if !&is_blank($prev); # inside a list item
    646 
    647         # This ain't no list.  We'll want to end all of them.
    648         return if !($mode & $LIST);     # This just speeds up the inevitable
    649         $i = 0;
    650     } else
    651     {
    652         # Maybe we're going back up to a previous list
    653         $i-- while (($prefix ne $listprefix[$i-1]) && ($i >= 0));
    654     }
    655 
    656     if (($i >= 0) && ($i != $listnum))
    657     {
    658         &endlist($listnum - $i);
    659     } elsif (!$listnum || $i != $listnum)
    660     {
    661         &startlist($prefix, $number, $rawprefix);
    662     }
    663 
    664     &continuelist($prefix, $number, $rawprefix) if ($mode & $LIST);
    665 }
    666 
    667 sub endpreformat
    668 {
    669     if(!($line =~ /\s{$preformat_whitespace_min,}\S+/) &&
    670        ($endpreformat_trigger_lines == 1 ||
    671         !($nextline =~ /\s{$preformat_whitespace_min,}\S+/)))
    672     {
    673         $prev =~ s#$#\n</PRE></font>#;
    674         $mode ^= ($PRE & $mode);
    675         $line_action |= $END;
    676     }
    677 }
    678 
    679 sub preformat
    680 {
    681     if($preformat_trigger_lines == 0 ||
    682        (($line =~ /\s{$preformat_whitespace_min,}\S+/) &&
    683         ($preformat_trigger_lines == 1 ||
    684          $nextline =~ /\s{$preformat_whitespace_min,}\S+/)))
    685     {
    686         $line =~ s/^/<font size=-1><PRE>\n/;
    687         $prev =~ s/<p>//;
    688         $mode |= $PRE;
    689         $line_action |= $PRE;
    690     }
    691 }
    692 
    693 sub make_new_anchor
    694 {
    695     $anchor++;
    696     $anchor;
    697 }
    698 
    699 sub anchor_mail
    700 {
    701     local($text) = $line =~ /\S+: *(.*) *$/;
    702     local($anchor) = &make_new_anchor($text);
    703     $line =~ s/(.*)/<A NAME="$anchor">$1<\/A>/;
    704 }
    705 
    706 sub anchor_heading
    707 {
    708     local($heading) = @_;
    709     local($anchor) = &make_new_anchor($heading);
    710     $line =~ s/(<H.>.*<\/H.>)/<A NAME="$anchor">$1<\/A>/;
    711 }
    712 
    713 sub heading
    714 {
    715     local($hindent, $heading) = $line =~ /^(\s*)(.+)$/;
    716     $hindent = 0;               # This isn't used yet, but Perl warns of
    717                                 # "possible typo" if I declare a var
    718                                 # and never reference it.
    719 
    720     # This is now taken care of in main()
    721 #    $heading =~ s/\s+$//;      # get rid of trailing whitespace.
    722 
    723     local($underline) = $nextline =~ /^\s*(\S+)\s*$/;
    724 
    725     if((length($heading) > (length($underline) + $underline_tolerance_short))
    726        || (length($heading) < (length($underline) -$underline_tolerance_long)))
    727     {
    728         return;
    729     }
    730 
    731 #    $underline =~ s/(^.).*/$1/;     # Could I do this any less efficiently?
    732     $underline = substr($underline,0,1);
    733 
    734     local($hlevel);
    735     $hlevel = 1 if $underline eq "*";
    736     $hlevel = 2 if $underline eq "=";
    737     $hlevel = 3 if $underline eq "+";
    738     $hlevel = 4 if $underline eq "-";
    739     $hlevel = 5 if $underline eq "~";
    740     $hlevel = 6 if $underline eq ".";
    741     return if !$hlevel;
    742 
    743     $nextline = <STDIN>;        # Eat the underline
    744     &tagline("H${hlevel}");
    745     &anchor_heading($heading);
    746     $line_action |= $HEADER;
    747 }
    748 
    749 sub unhyphenate
    750 {
    751     local($second);
    752 
    753     # This looks hairy because of all the quoted characters.
    754     # All I'm doing is pulling out the word that begins the next line.
    755     # Along with it, I pull out any punctuation that follows.
    756     # Preceding whitespace is preserved.  We don't want to screw up
    757     # our own guessing systems that rely on indentation.
    758     ($second) = $nextline =~ /^\s*([a-zA-Z]+[\)\}\]\.,:;\'\"\>]*\s*)/; # "
    759     $nextline =~ s/^(\s*)[a-zA-Z]+[\)\}\]\.,:;\'\"\>]*\s*/$1/; # "
    760     # (The silly comments are for my less-than-perfect code hilighter)
    761 
    762     $line =~ s/\-\s*$/$second/;
    763     $line .= "\n";
    764 }
    765 
    766 sub untabify
    767 {
    768     local($oldws) = $line =~ /^([ \011]+)/;
    769     local($oldlen) = (length($oldws));
    770 
    771     local($i, $column);
    772     for($i=0, $column = 0; $i < $oldlen; $i++)
    773     {
    774         if(substr($oldws, $i, 1) eq " ")
    775         {                       # Space
    776             $column++;
    777         } else {                # Tab
    778             $column += $tab_width - ($column % $tab_width);
    779         }
    780     }
    781     $line = (" " x $column) . substr($line, $oldlen);
    782 }
    783 
    784 sub tagline
    785 {
    786     local($tag) = @_;
    787     $line =~ s/^\s*(.*)\s*$/<$tag>$1<\/$tag>\n/;
    788 }
    789 
    790 sub caps
    791 {
    792     if($line =~ /^[^a-z<]*[A-Z]{$min_caps_length,}[^a-z<]*$/)
    793     {
    794         &tagline($caps_tag);
    795         $line_action |= $CAPS;
    796     }
    797 }
    798 
    799 
    800 
    801 sub main
    802 {
    803     &deal_with_options;
    804 
    805     if(1)
    806     {
    807 	print q(
    808 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
    809   "http://www.w3.org/TR/html4/loose.dtd">
    810 <html lang="en-US">
    811 <head>
    812 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
    813 <meta name="Description" content="ImageMagick - a robust collection of tools and libraries to read, write and manipulate an image in any of the popular image formats. ImageMagick allows dynamic creation of GIFs, making it suitable for Web applications.">
    814 <meta name="Keywords" content="ImageMagick,Image Magick,Image Magic,PerlMagick,Perl Magick,Perl Magic,WebMagick,Web Magic,image processing,software development,simulation,image software,AniMagick,Animagic,Magick++">
    815 <meta name="Resource-type" content="document">
    816 <meta name="Robots" content="ALL">
    817 <link rel="stylesheet" type="text/css" href="../www/magick.css">
    818 </head>
    819 
    820 <body marginheight=1 marginwidth=1 topmargin=1 leftmargin=1>
    821 <a name="top"></a>
    822 <table border="0" cellpadding="0" cellspacing="0" summary="Masthead" width="100%">
    823 <tbody>
    824 <tr>
    825 <td bgcolor="#003399" width="25%" height="118" background="../images/background.gif"><a href="http://www.imagemagick.org/"><img src="../images/script.gif" width="278" height="118" border="0" alt="" /></a></td>
    826 <td bgcolor="#003399" width="60%" height="118" background="../images/background.gif"><a href="http://www.networkeleven.com/direct.php?magick_all"><img src="../images/promote.png" border="0" width="186" height="52" vspace="29" alt="Powered by NetworkEleven" /></a></td>
    827 <td bgcolor="#003399" width="114" height="118" align="right"><img src="../images/sprite.png" width="114" height="118" alt="" /></td>
    828 <td bgcolor="#003399" width="114" height="118" align="right"><a href="http://www.imagemagick.net"><img src="../images/logo.png" width="114" height="118" border="0" alt="ImageMagick logo" /></a></td>
    829 </tr></tbody></table>
    830 </table><table align="left" border=0 cellpadding=2 cellspacing=2 summary="Navigation buttons" width="20%">
    831  <tr>
    832   <td>
    833    <form target="_self" action="../index.html"><input type="submit" title="ImageMagick Home" value=" Home " style="background-image:url('../images/background.gif'); color:#fbc713; font-weight:bold"></form>
    834   </td><td>
    835    <form target="_self" action="../www/apis.html"><input type="submit" title="ImageMagick API" value=" API " style="background-image:url('../images/background.gif'); color:#fbc713; font-weight:bold"></form>
    836   </td><td>
    837    <form target="_self" action="../www/archives.html"><input type="submit" title="ImageMagick Download" value="Download" style="background-image:url('../images/background.gif'); color:#fbc713; font-weight:bold"></form>
    838   </td>
    839  </tr>
    840 </table>
    841 <div align="right" style="margin-top:3px; padding-right:4px">
    842   <form action="http://studio.imagemagick.org/Sage/scripts/Sage.cgi">
    843 	<input type="TEXT" name="query" size=32 maxlength=255>
    844 	<input type="SUBMIT" name="sa" value="Search" style="background-image:url('../images/background.gif'); bgcolor:#003399; color:#fbc713; font-weight:bold"> 
    845  </form><br>
    846 </div>
    847 <table align="left" border=0 cellpadding=10 cellspacing=0 style="margin-top:-17px" width="100%"><tr><td>
    848 
    849 <br>&nbsp;<br>
    850 ) . "\n";
    851         print "<HTML>\n";
    852         print "<HEAD>\n";
    853 
    854         # It'd be nice if we could guess a title from the first header,
    855         # but even that would be too late if we're doing this in one pass.
    856         print "<TITLE>$title</TITLE>\n" if($title);
    857 
    858         print "</HEAD>\n";
    859 
    860 	print q(<body text="#000000" bgcolor="#fbc713" link="#1F00FF" vlink="#9900DD" alink="#FF0000">) . "\n";
    861 
    862         if ($title) {
    863 	    print "<h3>$title</h3>\n";
    864 	}
    865     }
    866 
    867     $prev     = "";
    868     $line     = <STDIN>;
    869     $nextline = <STDIN>;
    870     do
    871     {
    872         $line =~ s/[ \011]*$//; # Chop trailing whitespace
    873 
    874         &untabify;              # Change leading whitespace into spaces
    875 
    876         $line_length = length($line); # Do this before tags go in
    877 
    878         &escape;
    879 
    880         &endpreformat if (($mode & $PRE) && ($preformat_trigger_lines != 0));
    881 
    882         &hrule if !($mode & $PRE);
    883 
    884         &heading   if (!($mode & $PRE) &&
    885                        $nextline =~ /^\s*[=\-\*\.~\+]+$/);
    886 
    887         &caps if  !($mode & $PRE);
    888 
    889         &liststuff if (!($mode & $PRE) &&
    890                        !&is_blank($line));
    891 
    892         &mailstuff if ($mailmode &&
    893                        !($mode & $PRE) &&
    894                        !($line_action & $HEADER));
    895 
    896         &preformat if (!($line_action & ($HEADER | $LIST | $MAILHEADER)) &&
    897                        !($mode & ($LIST | $PRE)) &&
    898                        ($endpreformat_trigger_lines != 0));
    899 
    900         &paragraph if ((&is_blank($prev) || ($line_action & $END)) &&
    901                        !&is_blank($line) &&
    902                        !($mode & ($LIST | $PRE)) && # paragraphs in lists
    903                                                     # *should* be allowed.
    904                        (!$line_action ||
    905                         ($line_action & ($CAPS | $END | $MAILQUOTE))));
    906 
    907         &shortline;
    908 
    909         &unhyphenate if ($unhyphenation &&
    910                          ($line =~ /[a-zA-Z]\-$/) && # ends in hyphen
    911                          # next line starts w/letters
    912                          ($nextline =~ /^\s*[a-zA-Z]/) &&
    913                          !($mode & ($PRE | $HEADER | $MAILHEADER | $BREAK)));
    914 
    915 
    916         # Print it out and move on.
    917 
    918         print $prev;
    919 
    920         if (!&is_blank($nextline))
    921         {
    922             $previous_action = $line_action;
    923             $line_action     = $NONE;
    924         }
    925 
    926         $prev = $line;
    927         $line = $nextline;
    928         $nextline = <STDIN>;
    929     } until (!$nextline && !$line && !$prev);
    930 
    931     $prev = "";
    932     &endlist($listnum) if ($mode & $LIST); # End all lists
    933     print $prev;
    934 
    935     print "\n";
    936 
    937     print "</PRE></font>\n" if ($mode & $PRE);
    938 
    939     if ($append_file)
    940     {
    941         if(-r $append_file)
    942         {
    943             open(APPEND, $append_file);
    944             print while <APPEND>;
    945         } else {
    946             print STDERR "Can't find or read file $append_file to append.\n";
    947         }
    948     } else {
    949         print q(<hr>) . "\n";
    950 print q(
    951 <a href="#top"><img src="../images/top.gif" border=0 width="35" height="46" align="right" alt="Top of page"></a>
    952 <form action="http://studio.imagemagick.org/magick/" style="margin-top:5px">
    953  <input type="submit" title="Help!" value="Help!" style="background-image:url('../images/background.gif'); color:#fbc713; font-weight:bold">
    954   <small>&quot;Image manipulation software that works like magick&quot;</small>
    955 	</form></td>
    956 	</tr></table>
    957 ) . "\n";
    958     }
    959 
    960     if(!$extract)
    961     {
    962         print "</BODY>\n";
    963         print "</HTML>\n";
    964     }
    965 }
    966 
    967 &main();
    968 
    969 
    970