Home | History | Annotate | Download | only in tabwriter
      1 // Copyright 2009 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 // Package tabwriter implements a write filter (tabwriter.Writer) that
      6 // translates tabbed columns in input into properly aligned text.
      7 //
      8 // The package is using the Elastic Tabstops algorithm described at
      9 // http://nickgravgaard.com/elastictabstops/index.html.
     10 //
     11 package tabwriter
     12 
     13 import (
     14 	"bytes"
     15 	"io"
     16 	"unicode/utf8"
     17 )
     18 
     19 // ----------------------------------------------------------------------------
     20 // Filter implementation
     21 
     22 // A cell represents a segment of text terminated by tabs or line breaks.
     23 // The text itself is stored in a separate buffer; cell only describes the
     24 // segment's size in bytes, its width in runes, and whether it's an htab
     25 // ('\t') terminated cell.
     26 //
     27 type cell struct {
     28 	size  int  // cell size in bytes
     29 	width int  // cell width in runes
     30 	htab  bool // true if the cell is terminated by an htab ('\t')
     31 }
     32 
     33 // A Writer is a filter that inserts padding around tab-delimited
     34 // columns in its input to align them in the output.
     35 //
     36 // The Writer treats incoming bytes as UTF-8 encoded text consisting
     37 // of cells terminated by (horizontal or vertical) tabs or line
     38 // breaks (newline or formfeed characters). Cells in adjacent lines
     39 // constitute a column. The Writer inserts padding as needed to
     40 // make all cells in a column have the same width, effectively
     41 // aligning the columns. It assumes that all characters have the
     42 // same width except for tabs for which a tabwidth must be specified.
     43 // Note that cells are tab-terminated, not tab-separated: trailing
     44 // non-tab text at the end of a line does not form a column cell.
     45 //
     46 // The Writer assumes that all Unicode code points have the same width;
     47 // this may not be true in some fonts.
     48 //
     49 // If DiscardEmptyColumns is set, empty columns that are terminated
     50 // entirely by vertical (or "soft") tabs are discarded. Columns
     51 // terminated by horizontal (or "hard") tabs are not affected by
     52 // this flag.
     53 //
     54 // If a Writer is configured to filter HTML, HTML tags and entities
     55 // are passed through. The widths of tags and entities are
     56 // assumed to be zero (tags) and one (entities) for formatting purposes.
     57 //
     58 // A segment of text may be escaped by bracketing it with Escape
     59 // characters. The tabwriter passes escaped text segments through
     60 // unchanged. In particular, it does not interpret any tabs or line
     61 // breaks within the segment. If the StripEscape flag is set, the
     62 // Escape characters are stripped from the output; otherwise they
     63 // are passed through as well. For the purpose of formatting, the
     64 // width of the escaped text is always computed excluding the Escape
     65 // characters.
     66 //
     67 // The formfeed character ('\f') acts like a newline but it also
     68 // terminates all columns in the current line (effectively calling
     69 // Flush). Cells in the next line start new columns. Unless found
     70 // inside an HTML tag or inside an escaped text segment, formfeed
     71 // characters appear as newlines in the output.
     72 //
     73 // The Writer must buffer input internally, because proper spacing
     74 // of one line may depend on the cells in future lines. Clients must
     75 // call Flush when done calling Write.
     76 //
     77 type Writer struct {
     78 	// configuration
     79 	output   io.Writer
     80 	minwidth int
     81 	tabwidth int
     82 	padding  int
     83 	padbytes [8]byte
     84 	flags    uint
     85 
     86 	// current state
     87 	buf     bytes.Buffer // collected text excluding tabs or line breaks
     88 	pos     int          // buffer position up to which cell.width of incomplete cell has been computed
     89 	cell    cell         // current incomplete cell; cell.width is up to buf[pos] excluding ignored sections
     90 	endChar byte         // terminating char of escaped sequence (Escape for escapes, '>', ';' for HTML tags/entities, or 0)
     91 	lines   [][]cell     // list of lines; each line is a list of cells
     92 	widths  []int        // list of column widths in runes - re-used during formatting
     93 }
     94 
     95 func (b *Writer) addLine() { b.lines = append(b.lines, []cell{}) }
     96 
     97 // Reset the current state.
     98 func (b *Writer) reset() {
     99 	b.buf.Reset()
    100 	b.pos = 0
    101 	b.cell = cell{}
    102 	b.endChar = 0
    103 	b.lines = b.lines[0:0]
    104 	b.widths = b.widths[0:0]
    105 	b.addLine()
    106 }
    107 
    108 // Internal representation (current state):
    109 //
    110 // - all text written is appended to buf; tabs and line breaks are stripped away
    111 // - at any given time there is a (possibly empty) incomplete cell at the end
    112 //   (the cell starts after a tab or line break)
    113 // - cell.size is the number of bytes belonging to the cell so far
    114 // - cell.width is text width in runes of that cell from the start of the cell to
    115 //   position pos; html tags and entities are excluded from this width if html
    116 //   filtering is enabled
    117 // - the sizes and widths of processed text are kept in the lines list
    118 //   which contains a list of cells for each line
    119 // - the widths list is a temporary list with current widths used during
    120 //   formatting; it is kept in Writer because it's re-used
    121 //
    122 //                    |<---------- size ---------->|
    123 //                    |                            |
    124 //                    |<- width ->|<- ignored ->|  |
    125 //                    |           |             |  |
    126 // [---processed---tab------------<tag>...</tag>...]
    127 // ^                  ^                         ^
    128 // |                  |                         |
    129 // buf                start of incomplete cell  pos
    130 
    131 // Formatting can be controlled with these flags.
    132 const (
    133 	// Ignore html tags and treat entities (starting with '&'
    134 	// and ending in ';') as single characters (width = 1).
    135 	FilterHTML uint = 1 << iota
    136 
    137 	// Strip Escape characters bracketing escaped text segments
    138 	// instead of passing them through unchanged with the text.
    139 	StripEscape
    140 
    141 	// Force right-alignment of cell content.
    142 	// Default is left-alignment.
    143 	AlignRight
    144 
    145 	// Handle empty columns as if they were not present in
    146 	// the input in the first place.
    147 	DiscardEmptyColumns
    148 
    149 	// Always use tabs for indentation columns (i.e., padding of
    150 	// leading empty cells on the left) independent of padchar.
    151 	TabIndent
    152 
    153 	// Print a vertical bar ('|') between columns (after formatting).
    154 	// Discarded columns appear as zero-width columns ("||").
    155 	Debug
    156 )
    157 
    158 // A Writer must be initialized with a call to Init. The first parameter (output)
    159 // specifies the filter output. The remaining parameters control the formatting:
    160 //
    161 //	minwidth	minimal cell width including any padding
    162 //	tabwidth	width of tab characters (equivalent number of spaces)
    163 //	padding		padding added to a cell before computing its width
    164 //	padchar		ASCII char used for padding
    165 //			if padchar == '\t', the Writer will assume that the
    166 //			width of a '\t' in the formatted output is tabwidth,
    167 //			and cells are left-aligned independent of align_left
    168 //			(for correct-looking results, tabwidth must correspond
    169 //			to the tab width in the viewer displaying the result)
    170 //	flags		formatting control
    171 //
    172 func (b *Writer) Init(output io.Writer, minwidth, tabwidth, padding int, padchar byte, flags uint) *Writer {
    173 	if minwidth < 0 || tabwidth < 0 || padding < 0 {
    174 		panic("negative minwidth, tabwidth, or padding")
    175 	}
    176 	b.output = output
    177 	b.minwidth = minwidth
    178 	b.tabwidth = tabwidth
    179 	b.padding = padding
    180 	for i := range b.padbytes {
    181 		b.padbytes[i] = padchar
    182 	}
    183 	if padchar == '\t' {
    184 		// tab padding enforces left-alignment
    185 		flags &^= AlignRight
    186 	}
    187 	b.flags = flags
    188 
    189 	b.reset()
    190 
    191 	return b
    192 }
    193 
    194 // debugging support (keep code around)
    195 func (b *Writer) dump() {
    196 	pos := 0
    197 	for i, line := range b.lines {
    198 		print("(", i, ") ")
    199 		for _, c := range line {
    200 			print("[", string(b.buf.Bytes()[pos:pos+c.size]), "]")
    201 			pos += c.size
    202 		}
    203 		print("\n")
    204 	}
    205 	print("\n")
    206 }
    207 
    208 // local error wrapper so we can distinguish errors we want to return
    209 // as errors from genuine panics (which we don't want to return as errors)
    210 type osError struct {
    211 	err error
    212 }
    213 
    214 func (b *Writer) write0(buf []byte) {
    215 	n, err := b.output.Write(buf)
    216 	if n != len(buf) && err == nil {
    217 		err = io.ErrShortWrite
    218 	}
    219 	if err != nil {
    220 		panic(osError{err})
    221 	}
    222 }
    223 
    224 func (b *Writer) writeN(src []byte, n int) {
    225 	for n > len(src) {
    226 		b.write0(src)
    227 		n -= len(src)
    228 	}
    229 	b.write0(src[0:n])
    230 }
    231 
    232 var (
    233 	newline = []byte{'\n'}
    234 	tabs    = []byte("\t\t\t\t\t\t\t\t")
    235 )
    236 
    237 func (b *Writer) writePadding(textw, cellw int, useTabs bool) {
    238 	if b.padbytes[0] == '\t' || useTabs {
    239 		// padding is done with tabs
    240 		if b.tabwidth == 0 {
    241 			return // tabs have no width - can't do any padding
    242 		}
    243 		// make cellw the smallest multiple of b.tabwidth
    244 		cellw = (cellw + b.tabwidth - 1) / b.tabwidth * b.tabwidth
    245 		n := cellw - textw // amount of padding
    246 		if n < 0 {
    247 			panic("internal error")
    248 		}
    249 		b.writeN(tabs, (n+b.tabwidth-1)/b.tabwidth)
    250 		return
    251 	}
    252 
    253 	// padding is done with non-tab characters
    254 	b.writeN(b.padbytes[0:], cellw-textw)
    255 }
    256 
    257 var vbar = []byte{'|'}
    258 
    259 func (b *Writer) writeLines(pos0 int, line0, line1 int) (pos int) {
    260 	pos = pos0
    261 	for i := line0; i < line1; i++ {
    262 		line := b.lines[i]
    263 
    264 		// if TabIndent is set, use tabs to pad leading empty cells
    265 		useTabs := b.flags&TabIndent != 0
    266 
    267 		for j, c := range line {
    268 			if j > 0 && b.flags&Debug != 0 {
    269 				// indicate column break
    270 				b.write0(vbar)
    271 			}
    272 
    273 			if c.size == 0 {
    274 				// empty cell
    275 				if j < len(b.widths) {
    276 					b.writePadding(c.width, b.widths[j], useTabs)
    277 				}
    278 			} else {
    279 				// non-empty cell
    280 				useTabs = false
    281 				if b.flags&AlignRight == 0 { // align left
    282 					b.write0(b.buf.Bytes()[pos : pos+c.size])
    283 					pos += c.size
    284 					if j < len(b.widths) {
    285 						b.writePadding(c.width, b.widths[j], false)
    286 					}
    287 				} else { // align right
    288 					if j < len(b.widths) {
    289 						b.writePadding(c.width, b.widths[j], false)
    290 					}
    291 					b.write0(b.buf.Bytes()[pos : pos+c.size])
    292 					pos += c.size
    293 				}
    294 			}
    295 		}
    296 
    297 		if i+1 == len(b.lines) {
    298 			// last buffered line - we don't have a newline, so just write
    299 			// any outstanding buffered data
    300 			b.write0(b.buf.Bytes()[pos : pos+b.cell.size])
    301 			pos += b.cell.size
    302 		} else {
    303 			// not the last line - write newline
    304 			b.write0(newline)
    305 		}
    306 	}
    307 	return
    308 }
    309 
    310 // Format the text between line0 and line1 (excluding line1); pos
    311 // is the buffer position corresponding to the beginning of line0.
    312 // Returns the buffer position corresponding to the beginning of
    313 // line1 and an error, if any.
    314 //
    315 func (b *Writer) format(pos0 int, line0, line1 int) (pos int) {
    316 	pos = pos0
    317 	column := len(b.widths)
    318 	for this := line0; this < line1; this++ {
    319 		line := b.lines[this]
    320 
    321 		if column < len(line)-1 {
    322 			// cell exists in this column => this line
    323 			// has more cells than the previous line
    324 			// (the last cell per line is ignored because cells are
    325 			// tab-terminated; the last cell per line describes the
    326 			// text before the newline/formfeed and does not belong
    327 			// to a column)
    328 
    329 			// print unprinted lines until beginning of block
    330 			pos = b.writeLines(pos, line0, this)
    331 			line0 = this
    332 
    333 			// column block begin
    334 			width := b.minwidth // minimal column width
    335 			discardable := true // true if all cells in this column are empty and "soft"
    336 			for ; this < line1; this++ {
    337 				line = b.lines[this]
    338 				if column < len(line)-1 {
    339 					// cell exists in this column
    340 					c := line[column]
    341 					// update width
    342 					if w := c.width + b.padding; w > width {
    343 						width = w
    344 					}
    345 					// update discardable
    346 					if c.width > 0 || c.htab {
    347 						discardable = false
    348 					}
    349 				} else {
    350 					break
    351 				}
    352 			}
    353 			// column block end
    354 
    355 			// discard empty columns if necessary
    356 			if discardable && b.flags&DiscardEmptyColumns != 0 {
    357 				width = 0
    358 			}
    359 
    360 			// format and print all columns to the right of this column
    361 			// (we know the widths of this column and all columns to the left)
    362 			b.widths = append(b.widths, width) // push width
    363 			pos = b.format(pos, line0, this)
    364 			b.widths = b.widths[0 : len(b.widths)-1] // pop width
    365 			line0 = this
    366 		}
    367 	}
    368 
    369 	// print unprinted lines until end
    370 	return b.writeLines(pos, line0, line1)
    371 }
    372 
    373 // Append text to current cell.
    374 func (b *Writer) append(text []byte) {
    375 	b.buf.Write(text)
    376 	b.cell.size += len(text)
    377 }
    378 
    379 // Update the cell width.
    380 func (b *Writer) updateWidth() {
    381 	b.cell.width += utf8.RuneCount(b.buf.Bytes()[b.pos:b.buf.Len()])
    382 	b.pos = b.buf.Len()
    383 }
    384 
    385 // To escape a text segment, bracket it with Escape characters.
    386 // For instance, the tab in this string "Ignore this tab: \xff\t\xff"
    387 // does not terminate a cell and constitutes a single character of
    388 // width one for formatting purposes.
    389 //
    390 // The value 0xff was chosen because it cannot appear in a valid UTF-8 sequence.
    391 //
    392 const Escape = '\xff'
    393 
    394 // Start escaped mode.
    395 func (b *Writer) startEscape(ch byte) {
    396 	switch ch {
    397 	case Escape:
    398 		b.endChar = Escape
    399 	case '<':
    400 		b.endChar = '>'
    401 	case '&':
    402 		b.endChar = ';'
    403 	}
    404 }
    405 
    406 // Terminate escaped mode. If the escaped text was an HTML tag, its width
    407 // is assumed to be zero for formatting purposes; if it was an HTML entity,
    408 // its width is assumed to be one. In all other cases, the width is the
    409 // unicode width of the text.
    410 //
    411 func (b *Writer) endEscape() {
    412 	switch b.endChar {
    413 	case Escape:
    414 		b.updateWidth()
    415 		if b.flags&StripEscape == 0 {
    416 			b.cell.width -= 2 // don't count the Escape chars
    417 		}
    418 	case '>': // tag of zero width
    419 	case ';':
    420 		b.cell.width++ // entity, count as one rune
    421 	}
    422 	b.pos = b.buf.Len()
    423 	b.endChar = 0
    424 }
    425 
    426 // Terminate the current cell by adding it to the list of cells of the
    427 // current line. Returns the number of cells in that line.
    428 //
    429 func (b *Writer) terminateCell(htab bool) int {
    430 	b.cell.htab = htab
    431 	line := &b.lines[len(b.lines)-1]
    432 	*line = append(*line, b.cell)
    433 	b.cell = cell{}
    434 	return len(*line)
    435 }
    436 
    437 func handlePanic(err *error, op string) {
    438 	if e := recover(); e != nil {
    439 		if nerr, ok := e.(osError); ok {
    440 			*err = nerr.err
    441 			return
    442 		}
    443 		panic("tabwriter: panic during " + op)
    444 	}
    445 }
    446 
    447 // Flush should be called after the last call to Write to ensure
    448 // that any data buffered in the Writer is written to output. Any
    449 // incomplete escape sequence at the end is considered
    450 // complete for formatting purposes.
    451 //
    452 func (b *Writer) Flush() (err error) {
    453 	defer b.reset() // even in the presence of errors
    454 	defer handlePanic(&err, "Flush")
    455 
    456 	// add current cell if not empty
    457 	if b.cell.size > 0 {
    458 		if b.endChar != 0 {
    459 			// inside escape - terminate it even if incomplete
    460 			b.endEscape()
    461 		}
    462 		b.terminateCell(false)
    463 	}
    464 
    465 	// format contents of buffer
    466 	b.format(0, 0, len(b.lines))
    467 
    468 	return
    469 }
    470 
    471 var hbar = []byte("---\n")
    472 
    473 // Write writes buf to the writer b.
    474 // The only errors returned are ones encountered
    475 // while writing to the underlying output stream.
    476 //
    477 func (b *Writer) Write(buf []byte) (n int, err error) {
    478 	defer handlePanic(&err, "Write")
    479 
    480 	// split text into cells
    481 	n = 0
    482 	for i, ch := range buf {
    483 		if b.endChar == 0 {
    484 			// outside escape
    485 			switch ch {
    486 			case '\t', '\v', '\n', '\f':
    487 				// end of cell
    488 				b.append(buf[n:i])
    489 				b.updateWidth()
    490 				n = i + 1 // ch consumed
    491 				ncells := b.terminateCell(ch == '\t')
    492 				if ch == '\n' || ch == '\f' {
    493 					// terminate line
    494 					b.addLine()
    495 					if ch == '\f' || ncells == 1 {
    496 						// A '\f' always forces a flush. Otherwise, if the previous
    497 						// line has only one cell which does not have an impact on
    498 						// the formatting of the following lines (the last cell per
    499 						// line is ignored by format()), thus we can flush the
    500 						// Writer contents.
    501 						if err = b.Flush(); err != nil {
    502 							return
    503 						}
    504 						if ch == '\f' && b.flags&Debug != 0 {
    505 							// indicate section break
    506 							b.write0(hbar)
    507 						}
    508 					}
    509 				}
    510 
    511 			case Escape:
    512 				// start of escaped sequence
    513 				b.append(buf[n:i])
    514 				b.updateWidth()
    515 				n = i
    516 				if b.flags&StripEscape != 0 {
    517 					n++ // strip Escape
    518 				}
    519 				b.startEscape(Escape)
    520 
    521 			case '<', '&':
    522 				// possibly an html tag/entity
    523 				if b.flags&FilterHTML != 0 {
    524 					// begin of tag/entity
    525 					b.append(buf[n:i])
    526 					b.updateWidth()
    527 					n = i
    528 					b.startEscape(ch)
    529 				}
    530 			}
    531 
    532 		} else {
    533 			// inside escape
    534 			if ch == b.endChar {
    535 				// end of tag/entity
    536 				j := i + 1
    537 				if ch == Escape && b.flags&StripEscape != 0 {
    538 					j = i // strip Escape
    539 				}
    540 				b.append(buf[n:j])
    541 				n = i + 1 // ch consumed
    542 				b.endEscape()
    543 			}
    544 		}
    545 	}
    546 
    547 	// append leftover text
    548 	b.append(buf[n:])
    549 	n = len(buf)
    550 	return
    551 }
    552 
    553 // NewWriter allocates and initializes a new tabwriter.Writer.
    554 // The parameters are the same as for the Init function.
    555 //
    556 func NewWriter(output io.Writer, minwidth, tabwidth, padding int, padchar byte, flags uint) *Writer {
    557 	return new(Writer).Init(output, minwidth, tabwidth, padding, padchar, flags)
    558 }
    559