1 #!/usr/bin/perl 2 eval 'exec perl -S $0 "$@"' 3 if $runnning_under_some_shell; 4 # 5 # txt2html.pl 6 # Convert raw text to something with a little HTML formatting 7 # 8 # Written by Seth Golub <seth (at] cs.wustl.edu> 9 # http://www.cs.wustl.edu/~seth/txt2html/ 10 # 11 # $Revision: 1.15 $ 12 # $Date: 2004/10/05 20:30:33 $ 13 # $Author: cristy $ 14 # 15 # 16 # $Log: txt2html,v $ 17 # Revision 1.15 2004/10/05 20:30:33 cristy 18 # *** empty log message *** 19 # 20 # Revision 1.14 2004/04/26 19:53:42 cristy 21 # *** empty log message *** 22 # 23 # Revision 1.13 2004/04/26 15:03:00 cristy 24 # *** empty log message *** 25 # 26 # Revision 1.12 2004/04/24 13:48:50 cristy 27 # *** empty log message *** 28 # 29 # Revision 1.11 2003/10/28 18:40:59 cristy 30 # *** empty log message *** 31 # 32 # Revision 1.10 2003/10/28 03:44:38 cristy 33 # *** empty log message *** 34 # 35 # Revision 1.9 2003/10/17 13:59:12 cristy 36 # *** empty log message *** 37 # 38 # Revision 1.8 2003/10/16 22:26:06 cristy 39 # *** empty log message *** 40 # 41 # Revision 1.7 2003/10/12 04:10:15 cristy 42 # *** empty log message *** 43 # 44 # Revision 1.6 2003/07/20 03:39:50 cristy 45 # *** empty log message *** 46 # 47 # Revision 1.5 2003/07/19 19:44:20 cristy 48 # *** empty log message *** 49 # 50 # Revision 1.4 2003/04/07 23:35:40 cristy 51 # *** empty log message *** 52 # 53 # Revision 1.3 2003/04/05 02:52:42 cristy 54 # *** empty log message *** 55 # 56 # Revision 1.2 2003/04/04 20:50:50 cristy 57 # *** empty log message *** 58 # 59 # Revision 1.1 2003/03/25 15:10:23 cristy 60 # genesis 61 # 62 # Revision 1.1 2003/03/22 17:02:00 cristy 63 # *** empty log message *** 64 # 65 # Revision 1.10 1994/12/28 20:10:25 seth 66 # * Added --extract, etc. 67 # 68 # Revision 1.9 94/12/13 15:16:23 15:16:23 seth (Seth Golub) 69 # * Changed from #!/usr/local/bin/perl to the more clever version in 70 # the man page. (How did I manage not to read this for so long?) 71 # * Swapped hrule & header back to handle double lines. Why should 72 # this order screw up headers? 73 # 74 # Revision 1.8 1994/11/30 21:07:03 seth 75 # * put mail_anchor back in. (Why did I take this out?) 76 # * Finally added handling of lettered lists (ordered lists marked with 77 # letters) 78 # * Added title option (--title, -t) 79 # * Shortline now looks at how long the line was before txt2html 80 # started adding tags. ($line_length) 81 # * Changed list references to scalars where appropriate. (@foo[0] -> $foo[0]) 82 # * Added untabify() to homogenize leading indentation for list 83 # prefixes and functions that use line length 84 # * Added "underline tolerance" for when underlines are not exactly the 85 # same length as what they underline. 86 # * Added error message for unrecognized options 87 # * removed \w matching on --capstag 88 # * Tagline now removes leading & trailing whitespace before tagging 89 # * swapped order of caps & heading in main loop 90 # * Cleaned up code for speed and to get rid of warnings 91 # * Added more restrictions to something being a mail header 92 # * Added indentation for lists, just to make the output more readable. 93 # * Fixed major bug in lists: $OL and $UL were never set, so when a 94 # list was ended "</UL>" was *always* used! 95 # * swapped order of hrule & header to properly handle long underlines 96 # 97 # Revision 1.7 94/10/28 13:16:11 13:16:11 seth (Seth Golub) 98 # * Added to comments in options section 99 # * renamed blank to is_blank 100 # * Page break is converted to horizontal rule <HR> 101 # * moved usage subroutine up top so people who look through code see 102 # it sooner 103 # 104 # Revision 1.6 94/10/28 12:43:46 12:43:46 seth (Seth Golub) 105 # * Creates anchors at each heading 106 # 107 # Revision 1.5 94/07/14 17:43:59 17:43:59 seth (Seth Golub) 108 # * Fixed minor bug in Headers 109 # * Preformatting can be set to only start/stop when TWO lines of 110 # [non]formatted-looking-text are encountered. Old behavior is still 111 # possible through command line options (-pb 1 -pe 1). 112 # * Can preformat entire document (-pb 0) or disable preformatting 113 # completely (-pe 0). 114 # * Fixed minor bug in CAPS handling (paragraph breaks broke) 115 # * Puts paragraph tags *before* paragraphs, not just between them. 116 # 117 # Revision 1.4 94/06/20 16:42:55 16:42:55 seth (Seth Golub) 118 # * Allow ':' for numbered lists (e.g. "1: Figs") 119 # * Whitespace at end of line will not start or end preformatting 120 # * Mailmode is now off by default 121 # * Doesn't break short lines if they are the first line in a list 122 # item. It *should* break them anyway if the next line is a 123 # continuation of the list item, but I haven't dealt with this yet. 124 # * Added action on lines that are all capital letters. You can change 125 # how these lines get tagged, as well as the mininum number of 126 # consecutive capital letters required to fire off this action. 127 # 128 # Revision 1.3 94/05/17 15:58:58 15:58:58 seth (Seth Golub) 129 # * Tiny bugfix in unhyphenation 130 # 131 # Revision 1.2 94/05/16 18:15:16 18:15:16 seth (Seth Golub) 132 # * Added unhyphenation 133 # 134 # Revision 1.1 94/05/16 16:19:03 16:19:03 seth (Seth Golub) 135 # Initial revision 136 # 137 # 138 # 1.02 Allow '-' in mail headers 139 # Added handling for multiline mail headers 140 # 141 # 142 # 143 # Oscar Nierstrasz has a nice script for hypertextifying URLs. 144 # It is available at: 145 # http://cui_www.unige.ch/ftp/PUBLIC/oscar/scripts/html.pl 146 # 147 148 ######################### 149 # Configurable options 150 # 151 152 # [-s <n> ] | [--shortline <n> ] 153 $short_line_length = 40; # Lines this short (or shorter) must be 154 # intentionally broken and are kept 155 # that short. <BR> 156 157 # [-p <n> ] | [--prewhite <n> ] 158 $preformat_whitespace_min = 5; # Minimum number of consecutive leading 159 # whitespace characters to trigger 160 # preformatting. 161 # NOTE: Tabs are now expanded to 162 # spaces before this check is made. 163 # That means if $tab_width is 8 and 164 # this is 5, then one tab is expanded 165 # to 8 spaces, which is enough to 166 # trigger preformatting. 167 168 # [-pb <n> ] | [--prebegin <n> ] 169 $preformat_trigger_lines = 2; # How many lines of preformatted-looking 170 # text are needed to switch to <PRE> 171 # <= 0 : Preformat entire document 172 # 1 : one line triggers 173 # >= 2 : two lines trigger 174 175 # [-pe <n> ] | [--preend <n> ] 176 $endpreformat_trigger_lines = 2; # How many lines of unpreformatted-looking 177 # text are needed to switch from <PRE> 178 # <= 0 : Never preformat within document 179 # 1 : one line triggers 180 # >= 2 : two lines trigger 181 # NOTE for --prebegin and --preend: 182 # A zero takes precedence. If one is zero, the other is ignored. 183 # If both are zero, entire document is preformatted. 184 185 186 # [-r <n> ] | [--hrule <n> ] 187 $hrule_min = 4; # Min number of ---s for an HRule. 188 189 # [-c <n> ] | [--caps <n> ] 190 $min_caps_length = 3; # min sequential CAPS for an all-caps line 191 192 # [-ct <tag> ] | [--capstag <tag> ] 193 $caps_tag = "STRONG"; # Tag to put around all-caps lines 194 195 # [-m/+m ] | [--mail / --nomail ] 196 $mailmode = 0; # Deal with mail headers & quoted text 197 198 # [-u/+u ] | [--unhyphenate / --nounhyphenate ] 199 $unhyphenation = 1; # Enables unhyphenation of text. 200 201 # [-a <file> ] | [--append <file> ] 202 # [+a ] | [--noappend ] 203 $append_file = 0; # If you want something appended by 204 # default, put the filename here. 205 # The appended text will not be 206 # processed at all, so make sure it's 207 # plain text or decent HTML. i.e. do 208 # not have things like: 209 # Seth Golub <seth (at] cs.wustl.edu> 210 # but instead, have: 211 # Seth Golub <seth (at] cs.wustl.edu> 212 213 # [-t <title>] | [--title <title> ] 214 $title = 0; # You can specify a title. 215 # Otherwise it won't put one in. 216 217 # [-ul <n> ] | [--underlinelong <n> ] 218 $underline_tolerance_long = 1; # How much longer can underlines 219 # be and still be underlines? 220 221 # [-us <n> ] | [--underlineshort <n> ] 222 $underline_tolerance_short = 1; # How much shorter can underlines 223 # be and still be underlines? 224 225 # [-tw <n> ] | [--tabwidth <n> ] 226 $tab_width = 8; # How many spaces equal a tab? 227 228 229 # [-iw <n> ] | [--indent <n> ] 230 $indent_width = 2; # Indents this many spaces for each 231 # level of a list 232 233 # [-/+e ] | [--extract / --noextract ] 234 $extract = 0; # Extract Mode (suitable for inserting) 235 236 # END OF CONFIGURABLE OPTIONS 237 ######################################## 238 239 240 ######################################## 241 # Definitions (Don't change these) 242 # 243 $NONE = 0; 244 $LIST = 1; 245 $HRULE = 2; 246 $PAR = 4; 247 $PRE = 8; 248 $END = 16; 249 $BREAK = 32; 250 $HEADER = 64; 251 $MAILHEADER = 128; 252 $MAILQUOTE = 256; 253 $CAPS = 512; 254 255 $OL = 1; 256 $UL = 2; 257 258 sub usage 259 { 260 $0 =~ s#.*/##; 261 local($s) = " " x length($0); 262 print STDERR <<EOF; 263 264 Usage: $0 [options] 265 266 where options are: 267 $s [-v ] | [--version ] 268 $s [-h ] | [--help ] 269 $s [-s <n> ] | [--shortline <n> ] 270 $s [-p <n> ] | [--prewhite <n> ] 271 $s [-pb <n> ] | [--prebegin <n> ] 272 $s [-pe <n> ] | [--preend <n> ] 273 $s [-e/+e ] | [--extract / --noextract ] 274 $s [-r <n> ] | [--hrule <n> ] 275 $s [-c <n> ] | [--caps <n> ] 276 $s [-ct <tag> ] | [--capstag <tag> ] 277 $s [-m/+m ] | [--mail / --nomail ] 278 $s [-u/+u ] | [--unhyphen / --nounhyphen ] 279 $s [-a <file> ] | [--append <file> ] 280 $s [+a ] | [--noappend ] 281 $s [-t <title>] | [--title <title> ] 282 $s [-tw <n> ] | [--tabwidth <n> ] 283 $s [-iw <n> ] | [--indent <n> ] 284 $s [-ul <n> ] | [--underlinelong <n> ] 285 $s [-us <n> ] | [--underlineshort <n> ] 286 287 More complete explanations of these options can be found in 288 comments near the beginning of the script. 289 290 EOF 291 } 292 293 294 sub deal_with_options 295 { 296 while ($ARGV[0] =~ /^[-+].+/) 297 { 298 if (($ARGV[0] eq "-r" || $ARGV[0] eq "--hrule") && 299 $ARGV[1] =~ /^%d+$/) 300 { 301 $hrule_min = $ARGV[1]; 302 shift @ARGV; 303 next; 304 } 305 306 if (($ARGV[0] eq "-s" || $ARGV[0] eq "--shortline") && 307 $ARGV[1] =~ /^\d+$/) 308 { 309 $short_line_length = $ARGV[1]; 310 shift @ARGV; 311 next; 312 } 313 314 if (($ARGV[0] eq "-p" || $ARGV[0] eq "--prewhite") && 315 $ARGV[1] =~ /^\d+$/) 316 { 317 $preformat_whitespace_min = $ARGV[1]; 318 shift @ARGV; 319 next; 320 } 321 322 if (($ARGV[0] eq "-pb" || $ARGV[0] eq "--prebegin") && 323 $ARGV[1] =~ /^\d+$/) 324 { 325 $preformat_trigger_lines = $ARGV[1]; 326 shift @ARGV; 327 next; 328 } 329 330 if (($ARGV[0] eq "-pe" || $ARGV[0] eq "--preend") && 331 $ARGV[1] =~ /^\d+$/) 332 { 333 $endpreformat_trigger_lines = $ARGV[1]; 334 shift @ARGV; 335 next; 336 } 337 338 if (($ARGV[0] eq "-e" || $ARGV[0] eq "--extract")) 339 { 340 $extract = 1; 341 shift @ARGV; 342 next; 343 } 344 345 if (($ARGV[0] eq "+e" || $ARGV[0] eq "--noextract")) 346 { 347 $extract = 0; 348 shift @ARGV; 349 next; 350 } 351 352 if (($ARGV[0] eq "-c" || $ARGV[0] eq "--caps") && 353 $ARGV[1] =~ /^\d+$/) 354 { 355 $min_caps_length = $ARGV[1]; 356 shift @ARGV; 357 next; 358 } 359 360 if (($ARGV[0] eq "-ct" || $ARGV[0] eq "--capstag") && 361 $ARGV[1]) 362 { 363 $caps_tag = $ARGV[1]; 364 shift @ARGV; 365 next; 366 } 367 368 if ($ARGV[0] eq "-m" || $ARGV[0] eq "--mail") 369 { 370 $mailmode = 1; 371 next; 372 } 373 374 if ($ARGV[0] eq "+m" || $ARGV[0] eq "--nomail") 375 { 376 $mailmode = 0; 377 next; 378 } 379 380 if ($ARGV[0] eq "-u" || $ARGV[0] eq "--unhyphen") 381 { 382 $unhyphenation = 1; 383 next; 384 } 385 386 if ($ARGV[0] eq "+u" || $ARGV[0] eq "--nounhyphen") 387 { 388 $unhyphenation = 0; 389 next; 390 } 391 392 if (($ARGV[0] eq "-a" || $ARGV[0] eq "--append") && 393 $ARGV[1]) 394 { 395 if (-r $ARGV[1]) { 396 $append_file = $ARGV[1]; 397 } else { 398 print STDERR "Can't find or read $ARGV[1].\n"; 399 } 400 shift @ARGV; 401 next; 402 } 403 404 if ($ARGV[0] eq "+a" || $ARGV[0] eq "--noappend") 405 { 406 $append_file = 0; 407 next; 408 } 409 410 if (($ARGV[0] eq "-t" || $ARGV[0] eq "--title") && 411 $ARGV[1]) 412 { 413 $title = $ARGV[1]; 414 shift @ARGV; 415 next; 416 } 417 418 if (($ARGV[0] eq "-ul" || $ARGV[0] eq "--underlinelong") && 419 $ARGV[1] =~ /^\d+$/) 420 { 421 $underline_tolerance_long = $ARGV[1]; 422 shift @ARGV; 423 next; 424 } 425 426 if (($ARGV[0] eq "-us" || $ARGV[0] eq "--underlineshort") && 427 $ARGV[1] =~ /^\d+$/) 428 { 429 $underline_tolerance_short = $ARGV[1]; 430 shift @ARGV; 431 next; 432 } 433 434 if (($ARGV[0] eq "-tw" || $ARGV[0] eq "--tabwidth") && 435 $ARGV[1] =~ /^\d+$/) 436 { 437 $tab_width = $ARGV[1]; 438 shift @ARGV; 439 next; 440 } 441 442 if (($ARGV[0] eq "-iw" || $ARGV[0] eq "--indentwidth") && 443 $ARGV[1] =~ /^\d+$/) 444 { 445 $indent_width = $ARGV[1]; 446 shift @ARGV; 447 next; 448 } 449 450 if ($ARGV[0] eq "-v" || $ARGV[0] eq "--version") 451 { 452 print '$Header: /users/hilco/seth/projects/txt2html/txt2html.pl,v 1 453 .10 1994/12/28 20:10:25 seth Exp seth $ '; 454 print "\n"; 455 exit; 456 } 457 458 if ($ARGV[0] eq "-h" || $ARGV[0] eq "--help") 459 { 460 &usage; 461 exit; 462 } 463 464 print STDERR "Unrecognized option: $ARGV[0]\n"; 465 print STDERR " or bad paramater: $ARGV[1]\n" if($ARGV[1]); 466 467 &usage; 468 exit(1); 469 470 } continue { 471 472 shift @ARGV; 473 } 474 475 $preformat_trigger_lines = 0 if ($preformat_trigger_lines < 0); 476 $preformat_trigger_lines = 2 if ($preformat_trigger_lines > 2); 477 478 $endpreformat_trigger_lines = 1 if ($preformat_trigger_lines == 0); 479 $endpreformat_trigger_lines = 0 if ($endpreformat_trigger_lines < 0); 480 $endpreformat_trigger_lines = 2 if ($endpreformat_trigger_lines > 2); 481 482 $underline_tolerance_long = 0 if $underline_tolerance_long < 0; 483 $underline_tolerance_short = 0 if $underline_tolerance_short < 0; 484 } 485 486 sub is_blank 487 { 488 return $_[0] =~ /^\s*$/; 489 } 490 491 sub escape 492 { 493 $line =~ s/&/&/g; 494 $line =~ s/>/>/g; 495 $line =~ s/</</g; 496 $line =~ s/\014/\n<HR>\n/g; # Linefeeds become horizontal rules 497 } 498 499 sub hrule 500 { 501 if ($line =~ /^\s*([-_~=\*]\s*){$hrule_min,}$/) 502 { 503 $line = "<HR>\n"; 504 $prev =~ s/<p>//; 505 $line_action |= $HRULE; 506 } 507 } 508 509 sub shortline 510 { 511 if (!($mode & $PRE) && 512 !&is_blank($line) && 513 ($line_length < $short_line_length) && 514 !&is_blank($nextline) && 515 !($line_action & ($HEADER | $HRULE | $BREAK | $LIST))) 516 { 517 $line =~ s/$/<BR>/; 518 $line_action |= $BREAK; 519 } 520 } 521 522 sub mailstuff 523 { 524 if ((($line =~ /^\w*>/) || # Handle "FF> Werewolves." 525 ($line =~ /^\w*\|/))&& # Handle "Igor| There wolves." 526 !&is_blank($nextline)) 527 { 528 $line =~ s/$/<BR>/; 529 $line_action |= $BREAK | $MAILQUOTE; 530 } elsif (($line =~ /^[\w\-]*:/) # Handle "Some-Header: blah" 531 && (($previous_action & $MAILHEADER) || &is_blank($prev)) 532 && !&is_blank($nextline)) 533 { 534 &anchor_mail if !($previous_action & $MAILHEADER); 535 $line =~ s/$/<BR>/; 536 $line_action |= $BREAK | $MAILHEADER; 537 } elsif (($line =~ /^\s+\S/) && # Handle multi-line mail headers 538 ($previous_action & $MAILHEADER) && 539 !&is_blank($nextline)) 540 { 541 $line =~ s/$/<BR>/; 542 $line_action |= $BREAK | $MAILHEADER; 543 } 544 } 545 546 sub paragraph 547 { 548 $prev .= "<p>\n"; 549 $line_action |= $PAR; 550 } 551 552 sub listprefix 553 { 554 local($line) = @_; 555 local($prefix, $number, $rawprefix); 556 557 return (0,0,0) if (!($line =~ /^\s*[-=\*o]\s+\S/ ) && 558 !($line =~ /^\s*(\d+|[a-zA-Z])[\.\)\]:]\s+\S/ )); 559 560 ($number) = $line =~ /^\s*(\d+|[a-zA-Z])/; 561 562 # That slippery exception of "o" as a bullet 563 # (This ought to be determined more through the context of what lists 564 # we have in progress, but this will probably work well enough.) 565 if($line =~ /^\s*o\s/) 566 { 567 $number = 0; 568 } 569 570 if ($number) 571 { 572 ($rawprefix) = $line =~ /^(\s*(\d+|[a-zA-Z]).)/; 573 $prefix = $rawprefix; 574 $prefix =~ s/(\d+|[a-zA-Z])//; # Take the number out 575 } else { 576 ($rawprefix) = $line =~ /^(\s*[-=o\*].)/; 577 $prefix = $rawprefix; 578 } 579 ($prefix, $number, $rawprefix); 580 } 581 582 sub startlist 583 { 584 local($prefix, $number, $rawprefix) = @_; 585 586 $listprefix[$listnum] = $prefix; 587 if($number) 588 { 589 # It doesn't start with 1,a,A. Let's not screw with it. 590 if (($number != 1) && ($number ne "a") && ($number ne "A")) 591 { 592 return; 593 } 594 $prev .= "$list_indent<OL>\n"; 595 $list[$listnum] = $OL; 596 } else { 597 $prev .= "$list_indent<font size=-2><UL>\n"; 598 $list[$listnum] = $UL; 599 } 600 $listnum++; 601 $list_indent = " " x $listnum x $indent_width; 602 $line_action |= $LIST; 603 $mode |= $LIST; 604 } 605 606 607 sub endlist # End N lists 608 { 609 local($n) = @_; 610 for(; $n > 0; $n--, $listnum--) 611 { 612 $list_indent = " " x ($listnum-1) x $indent_width; 613 if($list[$listnum-1] == $UL) 614 { 615 $prev .= "$list_indent</UL></font>\n"; 616 } elsif($list[$listnum-1] == $OL) 617 { 618 $prev .= "$list_indent</OL>\n"; 619 } else 620 { 621 print STDERR "Encountered list of unknown type\n"; 622 } 623 } 624 $line_action |= $END; 625 $mode ^= ($LIST & $mode) if (!$listnum); 626 } 627 628 sub continuelist 629 { 630 $line =~ s/^\s*[-=o\*]\s*/$list_indent<LI> / if $list[$listnum-1] == $UL; 631 $line =~ s/^\s*(\d+|[a-zA-Z]).\s*/$list_indent<LI> / if $list[$listnum-1 632 ] == $OL; 633 $line_action |= $LIST; 634 } 635 636 sub liststuff 637 { 638 local($i); 639 640 local($prefix, $number, $rawprefix) = &listprefix($line); 641 642 $i = $listnum; 643 if (!$prefix) 644 { 645 return if !&is_blank($prev); # inside a list item 646 647 # This ain't no list. We'll want to end all of them. 648 return if !($mode & $LIST); # This just speeds up the inevitable 649 $i = 0; 650 } else 651 { 652 # Maybe we're going back up to a previous list 653 $i-- while (($prefix ne $listprefix[$i-1]) && ($i >= 0)); 654 } 655 656 if (($i >= 0) && ($i != $listnum)) 657 { 658 &endlist($listnum - $i); 659 } elsif (!$listnum || $i != $listnum) 660 { 661 &startlist($prefix, $number, $rawprefix); 662 } 663 664 &continuelist($prefix, $number, $rawprefix) if ($mode & $LIST); 665 } 666 667 sub endpreformat 668 { 669 if(!($line =~ /\s{$preformat_whitespace_min,}\S+/) && 670 ($endpreformat_trigger_lines == 1 || 671 !($nextline =~ /\s{$preformat_whitespace_min,}\S+/))) 672 { 673 $prev =~ s#$#\n</PRE></font>#; 674 $mode ^= ($PRE & $mode); 675 $line_action |= $END; 676 } 677 } 678 679 sub preformat 680 { 681 if($preformat_trigger_lines == 0 || 682 (($line =~ /\s{$preformat_whitespace_min,}\S+/) && 683 ($preformat_trigger_lines == 1 || 684 $nextline =~ /\s{$preformat_whitespace_min,}\S+/))) 685 { 686 $line =~ s/^/<font size=-1><PRE>\n/; 687 $prev =~ s/<p>//; 688 $mode |= $PRE; 689 $line_action |= $PRE; 690 } 691 } 692 693 sub make_new_anchor 694 { 695 $anchor++; 696 $anchor; 697 } 698 699 sub anchor_mail 700 { 701 local($text) = $line =~ /\S+: *(.*) *$/; 702 local($anchor) = &make_new_anchor($text); 703 $line =~ s/(.*)/<A NAME="$anchor">$1<\/A>/; 704 } 705 706 sub anchor_heading 707 { 708 local($heading) = @_; 709 local($anchor) = &make_new_anchor($heading); 710 $line =~ s/(<H.>.*<\/H.>)/<A NAME="$anchor">$1<\/A>/; 711 } 712 713 sub heading 714 { 715 local($hindent, $heading) = $line =~ /^(\s*)(.+)$/; 716 $hindent = 0; # This isn't used yet, but Perl warns of 717 # "possible typo" if I declare a var 718 # and never reference it. 719 720 # This is now taken care of in main() 721 # $heading =~ s/\s+$//; # get rid of trailing whitespace. 722 723 local($underline) = $nextline =~ /^\s*(\S+)\s*$/; 724 725 if((length($heading) > (length($underline) + $underline_tolerance_short)) 726 || (length($heading) < (length($underline) -$underline_tolerance_long))) 727 { 728 return; 729 } 730 731 # $underline =~ s/(^.).*/$1/; # Could I do this any less efficiently? 732 $underline = substr($underline,0,1); 733 734 local($hlevel); 735 $hlevel = 1 if $underline eq "*"; 736 $hlevel = 2 if $underline eq "="; 737 $hlevel = 3 if $underline eq "+"; 738 $hlevel = 4 if $underline eq "-"; 739 $hlevel = 5 if $underline eq "~"; 740 $hlevel = 6 if $underline eq "."; 741 return if !$hlevel; 742 743 $nextline = <STDIN>; # Eat the underline 744 &tagline("H${hlevel}"); 745 &anchor_heading($heading); 746 $line_action |= $HEADER; 747 } 748 749 sub unhyphenate 750 { 751 local($second); 752 753 # This looks hairy because of all the quoted characters. 754 # All I'm doing is pulling out the word that begins the next line. 755 # Along with it, I pull out any punctuation that follows. 756 # Preceding whitespace is preserved. We don't want to screw up 757 # our own guessing systems that rely on indentation. 758 ($second) = $nextline =~ /^\s*([a-zA-Z]+[\)\}\]\.,:;\'\"\>]*\s*)/; # " 759 $nextline =~ s/^(\s*)[a-zA-Z]+[\)\}\]\.,:;\'\"\>]*\s*/$1/; # " 760 # (The silly comments are for my less-than-perfect code hilighter) 761 762 $line =~ s/\-\s*$/$second/; 763 $line .= "\n"; 764 } 765 766 sub untabify 767 { 768 local($oldws) = $line =~ /^([ \011]+)/; 769 local($oldlen) = (length($oldws)); 770 771 local($i, $column); 772 for($i=0, $column = 0; $i < $oldlen; $i++) 773 { 774 if(substr($oldws, $i, 1) eq " ") 775 { # Space 776 $column++; 777 } else { # Tab 778 $column += $tab_width - ($column % $tab_width); 779 } 780 } 781 $line = (" " x $column) . substr($line, $oldlen); 782 } 783 784 sub tagline 785 { 786 local($tag) = @_; 787 $line =~ s/^\s*(.*)\s*$/<$tag>$1<\/$tag>\n/; 788 } 789 790 sub caps 791 { 792 if($line =~ /^[^a-z<]*[A-Z]{$min_caps_length,}[^a-z<]*$/) 793 { 794 &tagline($caps_tag); 795 $line_action |= $CAPS; 796 } 797 } 798 799 800 801 sub main 802 { 803 &deal_with_options; 804 805 if(1) 806 { 807 print q( 808 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" 809 "http://www.w3.org/TR/html4/loose.dtd"> 810 <html lang="en-US"> 811 <head> 812 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> 813 <meta name="Description" content="ImageMagick - a robust collection of tools and libraries to read, write and manipulate an image in any of the popular image formats. ImageMagick allows dynamic creation of GIFs, making it suitable for Web applications."> 814 <meta name="Keywords" content="ImageMagick,Image Magick,Image Magic,PerlMagick,Perl Magick,Perl Magic,WebMagick,Web Magic,image processing,software development,simulation,image software,AniMagick,Animagic,Magick++"> 815 <meta name="Resource-type" content="document"> 816 <meta name="Robots" content="ALL"> 817 <link rel="stylesheet" type="text/css" href="../www/magick.css"> 818 </head> 819 820 <body marginheight=1 marginwidth=1 topmargin=1 leftmargin=1> 821 <a name="top"></a> 822 <table border="0" cellpadding="0" cellspacing="0" summary="Masthead" width="100%"> 823 <tbody> 824 <tr> 825 <td bgcolor="#003399" width="25%" height="118" background="../images/background.gif"><a href="http://www.imagemagick.org/"><img src="../images/script.gif" width="278" height="118" border="0" alt="" /></a></td> 826 <td bgcolor="#003399" width="60%" height="118" background="../images/background.gif"><a href="http://www.networkeleven.com/direct.php?magick_all"><img src="../images/promote.png" border="0" width="186" height="52" vspace="29" alt="Powered by NetworkEleven" /></a></td> 827 <td bgcolor="#003399" width="114" height="118" align="right"><img src="../images/sprite.png" width="114" height="118" alt="" /></td> 828 <td bgcolor="#003399" width="114" height="118" align="right"><a href="http://www.imagemagick.net"><img src="../images/logo.png" width="114" height="118" border="0" alt="ImageMagick logo" /></a></td> 829 </tr></tbody></table> 830 </table><table align="left" border=0 cellpadding=2 cellspacing=2 summary="Navigation buttons" width="20%"> 831 <tr> 832 <td> 833 <form target="_self" action="../index.html"><input type="submit" title="ImageMagick Home" value=" Home " style="background-image:url('../images/background.gif'); color:#fbc713; font-weight:bold"></form> 834 </td><td> 835 <form target="_self" action="../www/apis.html"><input type="submit" title="ImageMagick API" value=" API " style="background-image:url('../images/background.gif'); color:#fbc713; font-weight:bold"></form> 836 </td><td> 837 <form target="_self" action="../www/archives.html"><input type="submit" title="ImageMagick Download" value="Download" style="background-image:url('../images/background.gif'); color:#fbc713; font-weight:bold"></form> 838 </td> 839 </tr> 840 </table> 841 <div align="right" style="margin-top:3px; padding-right:4px"> 842 <form action="http://studio.imagemagick.org/Sage/scripts/Sage.cgi"> 843 <input type="TEXT" name="query" size=32 maxlength=255> 844 <input type="SUBMIT" name="sa" value="Search" style="background-image:url('../images/background.gif'); bgcolor:#003399; color:#fbc713; font-weight:bold"> 845 </form><br> 846 </div> 847 <table align="left" border=0 cellpadding=10 cellspacing=0 style="margin-top:-17px" width="100%"><tr><td> 848 849 <br> <br> 850 ) . "\n"; 851 print "<HTML>\n"; 852 print "<HEAD>\n"; 853 854 # It'd be nice if we could guess a title from the first header, 855 # but even that would be too late if we're doing this in one pass. 856 print "<TITLE>$title</TITLE>\n" if($title); 857 858 print "</HEAD>\n"; 859 860 print q(<body text="#000000" bgcolor="#fbc713" link="#1F00FF" vlink="#9900DD" alink="#FF0000">) . "\n"; 861 862 if ($title) { 863 print "<h3>$title</h3>\n"; 864 } 865 } 866 867 $prev = ""; 868 $line = <STDIN>; 869 $nextline = <STDIN>; 870 do 871 { 872 $line =~ s/[ \011]*$//; # Chop trailing whitespace 873 874 &untabify; # Change leading whitespace into spaces 875 876 $line_length = length($line); # Do this before tags go in 877 878 &escape; 879 880 &endpreformat if (($mode & $PRE) && ($preformat_trigger_lines != 0)); 881 882 &hrule if !($mode & $PRE); 883 884 &heading if (!($mode & $PRE) && 885 $nextline =~ /^\s*[=\-\*\.~\+]+$/); 886 887 &caps if !($mode & $PRE); 888 889 &liststuff if (!($mode & $PRE) && 890 !&is_blank($line)); 891 892 &mailstuff if ($mailmode && 893 !($mode & $PRE) && 894 !($line_action & $HEADER)); 895 896 &preformat if (!($line_action & ($HEADER | $LIST | $MAILHEADER)) && 897 !($mode & ($LIST | $PRE)) && 898 ($endpreformat_trigger_lines != 0)); 899 900 ¶graph if ((&is_blank($prev) || ($line_action & $END)) && 901 !&is_blank($line) && 902 !($mode & ($LIST | $PRE)) && # paragraphs in lists 903 # *should* be allowed. 904 (!$line_action || 905 ($line_action & ($CAPS | $END | $MAILQUOTE)))); 906 907 &shortline; 908 909 &unhyphenate if ($unhyphenation && 910 ($line =~ /[a-zA-Z]\-$/) && # ends in hyphen 911 # next line starts w/letters 912 ($nextline =~ /^\s*[a-zA-Z]/) && 913 !($mode & ($PRE | $HEADER | $MAILHEADER | $BREAK))); 914 915 916 # Print it out and move on. 917 918 print $prev; 919 920 if (!&is_blank($nextline)) 921 { 922 $previous_action = $line_action; 923 $line_action = $NONE; 924 } 925 926 $prev = $line; 927 $line = $nextline; 928 $nextline = <STDIN>; 929 } until (!$nextline && !$line && !$prev); 930 931 $prev = ""; 932 &endlist($listnum) if ($mode & $LIST); # End all lists 933 print $prev; 934 935 print "\n"; 936 937 print "</PRE></font>\n" if ($mode & $PRE); 938 939 if ($append_file) 940 { 941 if(-r $append_file) 942 { 943 open(APPEND, $append_file); 944 print while <APPEND>; 945 } else { 946 print STDERR "Can't find or read file $append_file to append.\n"; 947 } 948 } else { 949 print q(<hr>) . "\n"; 950 print q( 951 <a href="#top"><img src="../images/top.gif" border=0 width="35" height="46" align="right" alt="Top of page"></a> 952 <form action="http://studio.imagemagick.org/magick/" style="margin-top:5px"> 953 <input type="submit" title="Help!" value="Help!" style="background-image:url('../images/background.gif'); color:#fbc713; font-weight:bold"> 954 <small>"Image manipulation software that works like magick"</small> 955 </form></td> 956 </tr></table> 957 ) . "\n"; 958 } 959 960 if(!$extract) 961 { 962 print "</BODY>\n"; 963 print "</HTML>\n"; 964 } 965 } 966 967 &main(); 968 969 970