Home | History | Annotate | Download | only in base32
      1 // Copyright 2011 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 // Package base32 implements base32 encoding as specified by RFC 4648.
      6 package base32
      7 
      8 import (
      9 	"bytes"
     10 	"io"
     11 	"strconv"
     12 	"strings"
     13 )
     14 
     15 /*
     16  * Encodings
     17  */
     18 
     19 // An Encoding is a radix 32 encoding/decoding scheme, defined by a
     20 // 32-character alphabet. The most common is the "base32" encoding
     21 // introduced for SASL GSSAPI and standardized in RFC 4648.
     22 // The alternate "base32hex" encoding is used in DNSSEC.
     23 type Encoding struct {
     24 	encode    string
     25 	decodeMap [256]byte
     26 }
     27 
     28 const encodeStd = "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567"
     29 const encodeHex = "0123456789ABCDEFGHIJKLMNOPQRSTUV"
     30 
     31 // NewEncoding returns a new Encoding defined by the given alphabet,
     32 // which must be a 32-byte string.
     33 func NewEncoding(encoder string) *Encoding {
     34 	e := new(Encoding)
     35 	e.encode = encoder
     36 	for i := 0; i < len(e.decodeMap); i++ {
     37 		e.decodeMap[i] = 0xFF
     38 	}
     39 	for i := 0; i < len(encoder); i++ {
     40 		e.decodeMap[encoder[i]] = byte(i)
     41 	}
     42 	return e
     43 }
     44 
     45 // StdEncoding is the standard base32 encoding, as defined in
     46 // RFC 4648.
     47 var StdEncoding = NewEncoding(encodeStd)
     48 
     49 // HexEncoding is the ``Extended Hex Alphabet'' defined in RFC 4648.
     50 // It is typically used in DNS.
     51 var HexEncoding = NewEncoding(encodeHex)
     52 
     53 var removeNewlinesMapper = func(r rune) rune {
     54 	if r == '\r' || r == '\n' {
     55 		return -1
     56 	}
     57 	return r
     58 }
     59 
     60 /*
     61  * Encoder
     62  */
     63 
     64 // Encode encodes src using the encoding enc, writing
     65 // EncodedLen(len(src)) bytes to dst.
     66 //
     67 // The encoding pads the output to a multiple of 8 bytes,
     68 // so Encode is not appropriate for use on individual blocks
     69 // of a large data stream. Use NewEncoder() instead.
     70 func (enc *Encoding) Encode(dst, src []byte) {
     71 	if len(src) == 0 {
     72 		return
     73 	}
     74 
     75 	for len(src) > 0 {
     76 		var b0, b1, b2, b3, b4, b5, b6, b7 byte
     77 
     78 		// Unpack 8x 5-bit source blocks into a 5 byte
     79 		// destination quantum
     80 		switch len(src) {
     81 		default:
     82 			b7 = src[4] & 0x1F
     83 			b6 = src[4] >> 5
     84 			fallthrough
     85 		case 4:
     86 			b6 |= (src[3] << 3) & 0x1F
     87 			b5 = (src[3] >> 2) & 0x1F
     88 			b4 = src[3] >> 7
     89 			fallthrough
     90 		case 3:
     91 			b4 |= (src[2] << 1) & 0x1F
     92 			b3 = (src[2] >> 4) & 0x1F
     93 			fallthrough
     94 		case 2:
     95 			b3 |= (src[1] << 4) & 0x1F
     96 			b2 = (src[1] >> 1) & 0x1F
     97 			b1 = (src[1] >> 6) & 0x1F
     98 			fallthrough
     99 		case 1:
    100 			b1 |= (src[0] << 2) & 0x1F
    101 			b0 = src[0] >> 3
    102 		}
    103 
    104 		// Encode 5-bit blocks using the base32 alphabet
    105 		dst[0] = enc.encode[b0]
    106 		dst[1] = enc.encode[b1]
    107 		dst[2] = enc.encode[b2]
    108 		dst[3] = enc.encode[b3]
    109 		dst[4] = enc.encode[b4]
    110 		dst[5] = enc.encode[b5]
    111 		dst[6] = enc.encode[b6]
    112 		dst[7] = enc.encode[b7]
    113 
    114 		// Pad the final quantum
    115 		if len(src) < 5 {
    116 			dst[7] = '='
    117 			if len(src) < 4 {
    118 				dst[6] = '='
    119 				dst[5] = '='
    120 				if len(src) < 3 {
    121 					dst[4] = '='
    122 					if len(src) < 2 {
    123 						dst[3] = '='
    124 						dst[2] = '='
    125 					}
    126 				}
    127 			}
    128 			break
    129 		}
    130 		src = src[5:]
    131 		dst = dst[8:]
    132 	}
    133 }
    134 
    135 // EncodeToString returns the base32 encoding of src.
    136 func (enc *Encoding) EncodeToString(src []byte) string {
    137 	buf := make([]byte, enc.EncodedLen(len(src)))
    138 	enc.Encode(buf, src)
    139 	return string(buf)
    140 }
    141 
    142 type encoder struct {
    143 	err  error
    144 	enc  *Encoding
    145 	w    io.Writer
    146 	buf  [5]byte    // buffered data waiting to be encoded
    147 	nbuf int        // number of bytes in buf
    148 	out  [1024]byte // output buffer
    149 }
    150 
    151 func (e *encoder) Write(p []byte) (n int, err error) {
    152 	if e.err != nil {
    153 		return 0, e.err
    154 	}
    155 
    156 	// Leading fringe.
    157 	if e.nbuf > 0 {
    158 		var i int
    159 		for i = 0; i < len(p) && e.nbuf < 5; i++ {
    160 			e.buf[e.nbuf] = p[i]
    161 			e.nbuf++
    162 		}
    163 		n += i
    164 		p = p[i:]
    165 		if e.nbuf < 5 {
    166 			return
    167 		}
    168 		e.enc.Encode(e.out[0:], e.buf[0:])
    169 		if _, e.err = e.w.Write(e.out[0:8]); e.err != nil {
    170 			return n, e.err
    171 		}
    172 		e.nbuf = 0
    173 	}
    174 
    175 	// Large interior chunks.
    176 	for len(p) >= 5 {
    177 		nn := len(e.out) / 8 * 5
    178 		if nn > len(p) {
    179 			nn = len(p)
    180 			nn -= nn % 5
    181 		}
    182 		e.enc.Encode(e.out[0:], p[0:nn])
    183 		if _, e.err = e.w.Write(e.out[0 : nn/5*8]); e.err != nil {
    184 			return n, e.err
    185 		}
    186 		n += nn
    187 		p = p[nn:]
    188 	}
    189 
    190 	// Trailing fringe.
    191 	for i := 0; i < len(p); i++ {
    192 		e.buf[i] = p[i]
    193 	}
    194 	e.nbuf = len(p)
    195 	n += len(p)
    196 	return
    197 }
    198 
    199 // Close flushes any pending output from the encoder.
    200 // It is an error to call Write after calling Close.
    201 func (e *encoder) Close() error {
    202 	// If there's anything left in the buffer, flush it out
    203 	if e.err == nil && e.nbuf > 0 {
    204 		e.enc.Encode(e.out[0:], e.buf[0:e.nbuf])
    205 		e.nbuf = 0
    206 		_, e.err = e.w.Write(e.out[0:8])
    207 	}
    208 	return e.err
    209 }
    210 
    211 // NewEncoder returns a new base32 stream encoder. Data written to
    212 // the returned writer will be encoded using enc and then written to w.
    213 // Base32 encodings operate in 5-byte blocks; when finished
    214 // writing, the caller must Close the returned encoder to flush any
    215 // partially written blocks.
    216 func NewEncoder(enc *Encoding, w io.Writer) io.WriteCloser {
    217 	return &encoder{enc: enc, w: w}
    218 }
    219 
    220 // EncodedLen returns the length in bytes of the base32 encoding
    221 // of an input buffer of length n.
    222 func (enc *Encoding) EncodedLen(n int) int { return (n + 4) / 5 * 8 }
    223 
    224 /*
    225  * Decoder
    226  */
    227 
    228 type CorruptInputError int64
    229 
    230 func (e CorruptInputError) Error() string {
    231 	return "illegal base32 data at input byte " + strconv.FormatInt(int64(e), 10)
    232 }
    233 
    234 // decode is like Decode but returns an additional 'end' value, which
    235 // indicates if end-of-message padding was encountered and thus any
    236 // additional data is an error. This method assumes that src has been
    237 // stripped of all supported whitespace ('\r' and '\n').
    238 func (enc *Encoding) decode(dst, src []byte) (n int, end bool, err error) {
    239 	olen := len(src)
    240 	for len(src) > 0 && !end {
    241 		// Decode quantum using the base32 alphabet
    242 		var dbuf [8]byte
    243 		dlen := 8
    244 
    245 		for j := 0; j < 8; {
    246 			if len(src) == 0 {
    247 				return n, false, CorruptInputError(olen - len(src) - j)
    248 			}
    249 			in := src[0]
    250 			src = src[1:]
    251 			if in == '=' && j >= 2 && len(src) < 8 {
    252 				// We've reached the end and there's padding
    253 				if len(src)+j < 8-1 {
    254 					// not enough padding
    255 					return n, false, CorruptInputError(olen)
    256 				}
    257 				for k := 0; k < 8-1-j; k++ {
    258 					if len(src) > k && src[k] != '=' {
    259 						// incorrect padding
    260 						return n, false, CorruptInputError(olen - len(src) + k - 1)
    261 					}
    262 				}
    263 				dlen, end = j, true
    264 				// 7, 5 and 2 are not valid padding lengths, and so 1, 3 and 6 are not
    265 				// valid dlen values. See RFC 4648 Section 6 "Base 32 Encoding" listing
    266 				// the five valid padding lengths, and Section 9 "Illustrations and
    267 				// Examples" for an illustration for how the 1st, 3rd and 6th base32
    268 				// src bytes do not yield enough information to decode a dst byte.
    269 				if dlen == 1 || dlen == 3 || dlen == 6 {
    270 					return n, false, CorruptInputError(olen - len(src) - 1)
    271 				}
    272 				break
    273 			}
    274 			dbuf[j] = enc.decodeMap[in]
    275 			if dbuf[j] == 0xFF {
    276 				return n, false, CorruptInputError(olen - len(src) - 1)
    277 			}
    278 			j++
    279 		}
    280 
    281 		// Pack 8x 5-bit source blocks into 5 byte destination
    282 		// quantum
    283 		switch dlen {
    284 		case 8:
    285 			dst[4] = dbuf[6]<<5 | dbuf[7]
    286 			fallthrough
    287 		case 7:
    288 			dst[3] = dbuf[4]<<7 | dbuf[5]<<2 | dbuf[6]>>3
    289 			fallthrough
    290 		case 5:
    291 			dst[2] = dbuf[3]<<4 | dbuf[4]>>1
    292 			fallthrough
    293 		case 4:
    294 			dst[1] = dbuf[1]<<6 | dbuf[2]<<1 | dbuf[3]>>4
    295 			fallthrough
    296 		case 2:
    297 			dst[0] = dbuf[0]<<3 | dbuf[1]>>2
    298 		}
    299 		dst = dst[5:]
    300 		switch dlen {
    301 		case 2:
    302 			n += 1
    303 		case 4:
    304 			n += 2
    305 		case 5:
    306 			n += 3
    307 		case 7:
    308 			n += 4
    309 		case 8:
    310 			n += 5
    311 		}
    312 	}
    313 	return n, end, nil
    314 }
    315 
    316 // Decode decodes src using the encoding enc. It writes at most
    317 // DecodedLen(len(src)) bytes to dst and returns the number of bytes
    318 // written. If src contains invalid base32 data, it will return the
    319 // number of bytes successfully written and CorruptInputError.
    320 // New line characters (\r and \n) are ignored.
    321 func (enc *Encoding) Decode(dst, src []byte) (n int, err error) {
    322 	src = bytes.Map(removeNewlinesMapper, src)
    323 	n, _, err = enc.decode(dst, src)
    324 	return
    325 }
    326 
    327 // DecodeString returns the bytes represented by the base32 string s.
    328 func (enc *Encoding) DecodeString(s string) ([]byte, error) {
    329 	s = strings.Map(removeNewlinesMapper, s)
    330 	dbuf := make([]byte, enc.DecodedLen(len(s)))
    331 	n, _, err := enc.decode(dbuf, []byte(s))
    332 	return dbuf[:n], err
    333 }
    334 
    335 type decoder struct {
    336 	err    error
    337 	enc    *Encoding
    338 	r      io.Reader
    339 	end    bool       // saw end of message
    340 	buf    [1024]byte // leftover input
    341 	nbuf   int
    342 	out    []byte // leftover decoded output
    343 	outbuf [1024 / 8 * 5]byte
    344 }
    345 
    346 func (d *decoder) Read(p []byte) (n int, err error) {
    347 	if d.err != nil {
    348 		return 0, d.err
    349 	}
    350 
    351 	// Use leftover decoded output from last read.
    352 	if len(d.out) > 0 {
    353 		n = copy(p, d.out)
    354 		d.out = d.out[n:]
    355 		return n, nil
    356 	}
    357 
    358 	// Read a chunk.
    359 	nn := len(p) / 5 * 8
    360 	if nn < 8 {
    361 		nn = 8
    362 	}
    363 	if nn > len(d.buf) {
    364 		nn = len(d.buf)
    365 	}
    366 	nn, d.err = io.ReadAtLeast(d.r, d.buf[d.nbuf:nn], 8-d.nbuf)
    367 	d.nbuf += nn
    368 	if d.nbuf < 8 {
    369 		return 0, d.err
    370 	}
    371 
    372 	// Decode chunk into p, or d.out and then p if p is too small.
    373 	nr := d.nbuf / 8 * 8
    374 	nw := d.nbuf / 8 * 5
    375 	if nw > len(p) {
    376 		nw, d.end, d.err = d.enc.decode(d.outbuf[0:], d.buf[0:nr])
    377 		d.out = d.outbuf[0:nw]
    378 		n = copy(p, d.out)
    379 		d.out = d.out[n:]
    380 	} else {
    381 		n, d.end, d.err = d.enc.decode(p, d.buf[0:nr])
    382 	}
    383 	d.nbuf -= nr
    384 	for i := 0; i < d.nbuf; i++ {
    385 		d.buf[i] = d.buf[i+nr]
    386 	}
    387 
    388 	if d.err == nil {
    389 		d.err = err
    390 	}
    391 	return n, d.err
    392 }
    393 
    394 type newlineFilteringReader struct {
    395 	wrapped io.Reader
    396 }
    397 
    398 func (r *newlineFilteringReader) Read(p []byte) (int, error) {
    399 	n, err := r.wrapped.Read(p)
    400 	for n > 0 {
    401 		offset := 0
    402 		for i, b := range p[0:n] {
    403 			if b != '\r' && b != '\n' {
    404 				if i != offset {
    405 					p[offset] = b
    406 				}
    407 				offset++
    408 			}
    409 		}
    410 		if offset > 0 {
    411 			return offset, err
    412 		}
    413 		// Previous buffer entirely whitespace, read again
    414 		n, err = r.wrapped.Read(p)
    415 	}
    416 	return n, err
    417 }
    418 
    419 // NewDecoder constructs a new base32 stream decoder.
    420 func NewDecoder(enc *Encoding, r io.Reader) io.Reader {
    421 	return &decoder{enc: enc, r: &newlineFilteringReader{r}}
    422 }
    423 
    424 // DecodedLen returns the maximum length in bytes of the decoded data
    425 // corresponding to n bytes of base32-encoded data.
    426 func (enc *Encoding) DecodedLen(n int) int { return n / 8 * 5 }
    427