1 #! /usr/bin/perl -w 2 3 # Script to take the output of nroff -man and remove all the backspacing and 4 # the page footers and the screen commands etc so that it is more usefully 5 # readable online. In fact, in the latest nroff, intermediate footers don't 6 # seem to be generated any more. 7 8 $blankcount = 0; 9 $lastwascut = 0; 10 $firstheader = 1; 11 12 # Input on STDIN; output to STDOUT. 13 14 while (<STDIN>) 15 { 16 s/\x1b\[\d+m//g; # Remove screen controls "ESC [ number m" 17 s/.\x8//g; # Remove "char, backspace" 18 19 # Handle header lines. Retain only the first one we encounter, but remove 20 # the blank line that follows. Any others (e.g. at end of document) and the 21 # following blank line are dropped. 22 23 if (/^PCRE(\w*)\(([13])\)\s+PCRE\1\(\2\)$/) 24 { 25 if ($firstheader) 26 { 27 $firstheader = 0; 28 print; 29 $lastprinted = $_; 30 $lastwascut = 0; 31 } 32 $_=<STDIN>; # Remove a blank that follows 33 next; 34 } 35 36 # Count runs of empty lines 37 38 if (/^\s*$/) 39 { 40 $blankcount++; 41 $lastwascut = 0; 42 next; 43 } 44 45 # If a chunk of lines has been cut out (page footer) and the next line 46 # has a different indentation, put back one blank line. 47 48 if ($lastwascut && $blankcount < 1 && defined($lastprinted)) 49 { 50 ($a) = $lastprinted =~ /^(\s*)/; 51 ($b) = $_ =~ /^(\s*)/; 52 $blankcount++ if ($a ne $b); 53 } 54 55 # We get here only when we have a non-blank line in hand. If it was preceded 56 # by 3 or more blank lines, read the next 3 lines and see if they are blank. 57 # If so, remove all 7 lines, and remember that we have just done a cut. 58 59 if ($blankcount >= 3) 60 { 61 for ($i = 0; $i < 3; $i++) 62 { 63 $next[$i] = <STDIN>; 64 $next[$i] = "" if !defined $next[$i]; 65 $next[$i] =~ s/\x1b\[\d+m//g; # Remove screen controls "ESC [ number m" 66 $next[$i] =~ s/.\x8//g; # Remove "char, backspace" 67 } 68 69 # Cut out chunks of the form <3 blanks><non-blank><3 blanks> 70 71 if ($next[0] =~ /^\s*$/ && 72 $next[1] =~ /^\s*$/ && 73 $next[2] =~ /^\s*$/) 74 { 75 $blankcount -= 3; 76 $lastwascut = 1; 77 } 78 79 # Otherwise output the saved blanks, the current, and the next three 80 # lines. Remember the last printed line. 81 82 else 83 { 84 for ($i = 0; $i < $blankcount; $i++) { print "\n"; } 85 print; 86 for ($i = 0; $i < 3; $i++) 87 { 88 $next[$i] =~ s/.\x8//g; 89 print $next[$i]; 90 $lastprinted = $_; 91 } 92 $lastwascut = 0; 93 $blankcount = 0; 94 } 95 } 96 97 # This non-blank line is not preceded by 3 or more blank lines. Output 98 # any blanks there are, and the line. Remember it. Force two blank lines 99 # before headings. 100 101 else 102 { 103 $blankcount = 2 if /^\S/ && !/^Last updated/ && !/^Copyright/ && 104 defined($lastprinted); 105 for ($i = 0; $i < $blankcount; $i++) { print "\n"; } 106 print; 107 $lastprinted = $_; 108 $lastwascut = 0; 109 $blankcount = 0; 110 } 111 } 112 113 # End 114