Home | History | Annotate | Download | only in ascii85
      1 // Copyright 2009 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 // Package ascii85 implements the ascii85 data encoding
      6 // as used in the btoa tool and Adobe's PostScript and PDF document formats.
      7 package ascii85
      8 
      9 import (
     10 	"io"
     11 	"strconv"
     12 )
     13 
     14 /*
     15  * Encoder
     16  */
     17 
     18 // Encode encodes src into at most MaxEncodedLen(len(src))
     19 // bytes of dst, returning the actual number of bytes written.
     20 //
     21 // The encoding handles 4-byte chunks, using a special encoding
     22 // for the last fragment, so Encode is not appropriate for use on
     23 // individual blocks of a large data stream.  Use NewEncoder() instead.
     24 //
     25 // Often, ascii85-encoded data is wrapped in <~ and ~> symbols.
     26 // Encode does not add these.
     27 func Encode(dst, src []byte) int {
     28 	if len(src) == 0 {
     29 		return 0
     30 	}
     31 
     32 	n := 0
     33 	for len(src) > 0 {
     34 		dst[0] = 0
     35 		dst[1] = 0
     36 		dst[2] = 0
     37 		dst[3] = 0
     38 		dst[4] = 0
     39 
     40 		// Unpack 4 bytes into uint32 to repack into base 85 5-byte.
     41 		var v uint32
     42 		switch len(src) {
     43 		default:
     44 			v |= uint32(src[3])
     45 			fallthrough
     46 		case 3:
     47 			v |= uint32(src[2]) << 8
     48 			fallthrough
     49 		case 2:
     50 			v |= uint32(src[1]) << 16
     51 			fallthrough
     52 		case 1:
     53 			v |= uint32(src[0]) << 24
     54 		}
     55 
     56 		// Special case: zero (!!!!!) shortens to z.
     57 		if v == 0 && len(src) >= 4 {
     58 			dst[0] = 'z'
     59 			dst = dst[1:]
     60 			src = src[4:]
     61 			n++
     62 			continue
     63 		}
     64 
     65 		// Otherwise, 5 base 85 digits starting at !.
     66 		for i := 4; i >= 0; i-- {
     67 			dst[i] = '!' + byte(v%85)
     68 			v /= 85
     69 		}
     70 
     71 		// If src was short, discard the low destination bytes.
     72 		m := 5
     73 		if len(src) < 4 {
     74 			m -= 4 - len(src)
     75 			src = nil
     76 		} else {
     77 			src = src[4:]
     78 		}
     79 		dst = dst[m:]
     80 		n += m
     81 	}
     82 	return n
     83 }
     84 
     85 // MaxEncodedLen returns the maximum length of an encoding of n source bytes.
     86 func MaxEncodedLen(n int) int { return (n + 3) / 4 * 5 }
     87 
     88 // NewEncoder returns a new ascii85 stream encoder.  Data written to
     89 // the returned writer will be encoded and then written to w.
     90 // Ascii85 encodings operate in 32-bit blocks; when finished
     91 // writing, the caller must Close the returned encoder to flush any
     92 // trailing partial block.
     93 func NewEncoder(w io.Writer) io.WriteCloser { return &encoder{w: w} }
     94 
     95 type encoder struct {
     96 	err  error
     97 	w    io.Writer
     98 	buf  [4]byte    // buffered data waiting to be encoded
     99 	nbuf int        // number of bytes in buf
    100 	out  [1024]byte // output buffer
    101 }
    102 
    103 func (e *encoder) Write(p []byte) (n int, err error) {
    104 	if e.err != nil {
    105 		return 0, e.err
    106 	}
    107 
    108 	// Leading fringe.
    109 	if e.nbuf > 0 {
    110 		var i int
    111 		for i = 0; i < len(p) && e.nbuf < 4; i++ {
    112 			e.buf[e.nbuf] = p[i]
    113 			e.nbuf++
    114 		}
    115 		n += i
    116 		p = p[i:]
    117 		if e.nbuf < 4 {
    118 			return
    119 		}
    120 		nout := Encode(e.out[0:], e.buf[0:])
    121 		if _, e.err = e.w.Write(e.out[0:nout]); e.err != nil {
    122 			return n, e.err
    123 		}
    124 		e.nbuf = 0
    125 	}
    126 
    127 	// Large interior chunks.
    128 	for len(p) >= 4 {
    129 		nn := len(e.out) / 5 * 4
    130 		if nn > len(p) {
    131 			nn = len(p)
    132 		}
    133 		nn -= nn % 4
    134 		if nn > 0 {
    135 			nout := Encode(e.out[0:], p[0:nn])
    136 			if _, e.err = e.w.Write(e.out[0:nout]); e.err != nil {
    137 				return n, e.err
    138 			}
    139 		}
    140 		n += nn
    141 		p = p[nn:]
    142 	}
    143 
    144 	// Trailing fringe.
    145 	for i := 0; i < len(p); i++ {
    146 		e.buf[i] = p[i]
    147 	}
    148 	e.nbuf = len(p)
    149 	n += len(p)
    150 	return
    151 }
    152 
    153 // Close flushes any pending output from the encoder.
    154 // It is an error to call Write after calling Close.
    155 func (e *encoder) Close() error {
    156 	// If there's anything left in the buffer, flush it out
    157 	if e.err == nil && e.nbuf > 0 {
    158 		nout := Encode(e.out[0:], e.buf[0:e.nbuf])
    159 		e.nbuf = 0
    160 		_, e.err = e.w.Write(e.out[0:nout])
    161 	}
    162 	return e.err
    163 }
    164 
    165 /*
    166  * Decoder
    167  */
    168 
    169 type CorruptInputError int64
    170 
    171 func (e CorruptInputError) Error() string {
    172 	return "illegal ascii85 data at input byte " + strconv.FormatInt(int64(e), 10)
    173 }
    174 
    175 // Decode decodes src into dst, returning both the number
    176 // of bytes written to dst and the number consumed from src.
    177 // If src contains invalid ascii85 data, Decode will return the
    178 // number of bytes successfully written and a CorruptInputError.
    179 // Decode ignores space and control characters in src.
    180 // Often, ascii85-encoded data is wrapped in <~ and ~> symbols.
    181 // Decode expects these to have been stripped by the caller.
    182 //
    183 // If flush is true, Decode assumes that src represents the
    184 // end of the input stream and processes it completely rather
    185 // than wait for the completion of another 32-bit block.
    186 //
    187 // NewDecoder wraps an io.Reader interface around Decode.
    188 //
    189 func Decode(dst, src []byte, flush bool) (ndst, nsrc int, err error) {
    190 	var v uint32
    191 	var nb int
    192 	for i, b := range src {
    193 		if len(dst)-ndst < 4 {
    194 			return
    195 		}
    196 		switch {
    197 		case b <= ' ':
    198 			continue
    199 		case b == 'z' && nb == 0:
    200 			nb = 5
    201 			v = 0
    202 		case '!' <= b && b <= 'u':
    203 			v = v*85 + uint32(b-'!')
    204 			nb++
    205 		default:
    206 			return 0, 0, CorruptInputError(i)
    207 		}
    208 		if nb == 5 {
    209 			nsrc = i + 1
    210 			dst[ndst] = byte(v >> 24)
    211 			dst[ndst+1] = byte(v >> 16)
    212 			dst[ndst+2] = byte(v >> 8)
    213 			dst[ndst+3] = byte(v)
    214 			ndst += 4
    215 			nb = 0
    216 			v = 0
    217 		}
    218 	}
    219 	if flush {
    220 		nsrc = len(src)
    221 		if nb > 0 {
    222 			// The number of output bytes in the last fragment
    223 			// is the number of leftover input bytes - 1:
    224 			// the extra byte provides enough bits to cover
    225 			// the inefficiency of the encoding for the block.
    226 			if nb == 1 {
    227 				return 0, 0, CorruptInputError(len(src))
    228 			}
    229 			for i := nb; i < 5; i++ {
    230 				// The short encoding truncated the output value.
    231 				// We have to assume the worst case values (digit 84)
    232 				// in order to ensure that the top bits are correct.
    233 				v = v*85 + 84
    234 			}
    235 			for i := 0; i < nb-1; i++ {
    236 				dst[ndst] = byte(v >> 24)
    237 				v <<= 8
    238 				ndst++
    239 			}
    240 		}
    241 	}
    242 	return
    243 }
    244 
    245 // NewDecoder constructs a new ascii85 stream decoder.
    246 func NewDecoder(r io.Reader) io.Reader { return &decoder{r: r} }
    247 
    248 type decoder struct {
    249 	err     error
    250 	readErr error
    251 	r       io.Reader
    252 	buf     [1024]byte // leftover input
    253 	nbuf    int
    254 	out     []byte // leftover decoded output
    255 	outbuf  [1024]byte
    256 }
    257 
    258 func (d *decoder) Read(p []byte) (n int, err error) {
    259 	if len(p) == 0 {
    260 		return 0, nil
    261 	}
    262 	if d.err != nil {
    263 		return 0, d.err
    264 	}
    265 
    266 	for {
    267 		// Copy leftover output from last decode.
    268 		if len(d.out) > 0 {
    269 			n = copy(p, d.out)
    270 			d.out = d.out[n:]
    271 			return
    272 		}
    273 
    274 		// Decode leftover input from last read.
    275 		var nn, nsrc, ndst int
    276 		if d.nbuf > 0 {
    277 			ndst, nsrc, d.err = Decode(d.outbuf[0:], d.buf[0:d.nbuf], d.readErr != nil)
    278 			if ndst > 0 {
    279 				d.out = d.outbuf[0:ndst]
    280 				d.nbuf = copy(d.buf[0:], d.buf[nsrc:d.nbuf])
    281 				continue // copy out and return
    282 			}
    283 			if ndst == 0 && d.err == nil {
    284 				// Special case: input buffer is mostly filled with non-data bytes.
    285 				// Filter out such bytes to make room for more input.
    286 				off := 0
    287 				for i := 0; i < d.nbuf; i++ {
    288 					if d.buf[i] > ' ' {
    289 						d.buf[off] = d.buf[i]
    290 						off++
    291 					}
    292 				}
    293 				d.nbuf = off
    294 			}
    295 		}
    296 
    297 		// Out of input, out of decoded output.  Check errors.
    298 		if d.err != nil {
    299 			return 0, d.err
    300 		}
    301 		if d.readErr != nil {
    302 			d.err = d.readErr
    303 			return 0, d.err
    304 		}
    305 
    306 		// Read more data.
    307 		nn, d.readErr = d.r.Read(d.buf[d.nbuf:])
    308 		d.nbuf += nn
    309 	}
    310 }
    311