1 // Copyright 2011 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package base32 implements base32 encoding as specified by RFC 4648. 6 package base32 7 8 import ( 9 "bytes" 10 "io" 11 "strconv" 12 "strings" 13 ) 14 15 /* 16 * Encodings 17 */ 18 19 // An Encoding is a radix 32 encoding/decoding scheme, defined by a 20 // 32-character alphabet. The most common is the "base32" encoding 21 // introduced for SASL GSSAPI and standardized in RFC 4648. 22 // The alternate "base32hex" encoding is used in DNSSEC. 23 type Encoding struct { 24 encode string 25 decodeMap [256]byte 26 } 27 28 const encodeStd = "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567" 29 const encodeHex = "0123456789ABCDEFGHIJKLMNOPQRSTUV" 30 31 // NewEncoding returns a new Encoding defined by the given alphabet, 32 // which must be a 32-byte string. 33 func NewEncoding(encoder string) *Encoding { 34 e := new(Encoding) 35 e.encode = encoder 36 for i := 0; i < len(e.decodeMap); i++ { 37 e.decodeMap[i] = 0xFF 38 } 39 for i := 0; i < len(encoder); i++ { 40 e.decodeMap[encoder[i]] = byte(i) 41 } 42 return e 43 } 44 45 // StdEncoding is the standard base32 encoding, as defined in 46 // RFC 4648. 47 var StdEncoding = NewEncoding(encodeStd) 48 49 // HexEncoding is the ``Extended Hex Alphabet'' defined in RFC 4648. 50 // It is typically used in DNS. 51 var HexEncoding = NewEncoding(encodeHex) 52 53 var removeNewlinesMapper = func(r rune) rune { 54 if r == '\r' || r == '\n' { 55 return -1 56 } 57 return r 58 } 59 60 /* 61 * Encoder 62 */ 63 64 // Encode encodes src using the encoding enc, writing 65 // EncodedLen(len(src)) bytes to dst. 66 // 67 // The encoding pads the output to a multiple of 8 bytes, 68 // so Encode is not appropriate for use on individual blocks 69 // of a large data stream. Use NewEncoder() instead. 70 func (enc *Encoding) Encode(dst, src []byte) { 71 if len(src) == 0 { 72 return 73 } 74 75 for len(src) > 0 { 76 var b0, b1, b2, b3, b4, b5, b6, b7 byte 77 78 // Unpack 8x 5-bit source blocks into a 5 byte 79 // destination quantum 80 switch len(src) { 81 default: 82 b7 = src[4] & 0x1F 83 b6 = src[4] >> 5 84 fallthrough 85 case 4: 86 b6 |= (src[3] << 3) & 0x1F 87 b5 = (src[3] >> 2) & 0x1F 88 b4 = src[3] >> 7 89 fallthrough 90 case 3: 91 b4 |= (src[2] << 1) & 0x1F 92 b3 = (src[2] >> 4) & 0x1F 93 fallthrough 94 case 2: 95 b3 |= (src[1] << 4) & 0x1F 96 b2 = (src[1] >> 1) & 0x1F 97 b1 = (src[1] >> 6) & 0x1F 98 fallthrough 99 case 1: 100 b1 |= (src[0] << 2) & 0x1F 101 b0 = src[0] >> 3 102 } 103 104 // Encode 5-bit blocks using the base32 alphabet 105 dst[0] = enc.encode[b0] 106 dst[1] = enc.encode[b1] 107 dst[2] = enc.encode[b2] 108 dst[3] = enc.encode[b3] 109 dst[4] = enc.encode[b4] 110 dst[5] = enc.encode[b5] 111 dst[6] = enc.encode[b6] 112 dst[7] = enc.encode[b7] 113 114 // Pad the final quantum 115 if len(src) < 5 { 116 dst[7] = '=' 117 if len(src) < 4 { 118 dst[6] = '=' 119 dst[5] = '=' 120 if len(src) < 3 { 121 dst[4] = '=' 122 if len(src) < 2 { 123 dst[3] = '=' 124 dst[2] = '=' 125 } 126 } 127 } 128 break 129 } 130 src = src[5:] 131 dst = dst[8:] 132 } 133 } 134 135 // EncodeToString returns the base32 encoding of src. 136 func (enc *Encoding) EncodeToString(src []byte) string { 137 buf := make([]byte, enc.EncodedLen(len(src))) 138 enc.Encode(buf, src) 139 return string(buf) 140 } 141 142 type encoder struct { 143 err error 144 enc *Encoding 145 w io.Writer 146 buf [5]byte // buffered data waiting to be encoded 147 nbuf int // number of bytes in buf 148 out [1024]byte // output buffer 149 } 150 151 func (e *encoder) Write(p []byte) (n int, err error) { 152 if e.err != nil { 153 return 0, e.err 154 } 155 156 // Leading fringe. 157 if e.nbuf > 0 { 158 var i int 159 for i = 0; i < len(p) && e.nbuf < 5; i++ { 160 e.buf[e.nbuf] = p[i] 161 e.nbuf++ 162 } 163 n += i 164 p = p[i:] 165 if e.nbuf < 5 { 166 return 167 } 168 e.enc.Encode(e.out[0:], e.buf[0:]) 169 if _, e.err = e.w.Write(e.out[0:8]); e.err != nil { 170 return n, e.err 171 } 172 e.nbuf = 0 173 } 174 175 // Large interior chunks. 176 for len(p) >= 5 { 177 nn := len(e.out) / 8 * 5 178 if nn > len(p) { 179 nn = len(p) 180 nn -= nn % 5 181 } 182 e.enc.Encode(e.out[0:], p[0:nn]) 183 if _, e.err = e.w.Write(e.out[0 : nn/5*8]); e.err != nil { 184 return n, e.err 185 } 186 n += nn 187 p = p[nn:] 188 } 189 190 // Trailing fringe. 191 for i := 0; i < len(p); i++ { 192 e.buf[i] = p[i] 193 } 194 e.nbuf = len(p) 195 n += len(p) 196 return 197 } 198 199 // Close flushes any pending output from the encoder. 200 // It is an error to call Write after calling Close. 201 func (e *encoder) Close() error { 202 // If there's anything left in the buffer, flush it out 203 if e.err == nil && e.nbuf > 0 { 204 e.enc.Encode(e.out[0:], e.buf[0:e.nbuf]) 205 e.nbuf = 0 206 _, e.err = e.w.Write(e.out[0:8]) 207 } 208 return e.err 209 } 210 211 // NewEncoder returns a new base32 stream encoder. Data written to 212 // the returned writer will be encoded using enc and then written to w. 213 // Base32 encodings operate in 5-byte blocks; when finished 214 // writing, the caller must Close the returned encoder to flush any 215 // partially written blocks. 216 func NewEncoder(enc *Encoding, w io.Writer) io.WriteCloser { 217 return &encoder{enc: enc, w: w} 218 } 219 220 // EncodedLen returns the length in bytes of the base32 encoding 221 // of an input buffer of length n. 222 func (enc *Encoding) EncodedLen(n int) int { return (n + 4) / 5 * 8 } 223 224 /* 225 * Decoder 226 */ 227 228 type CorruptInputError int64 229 230 func (e CorruptInputError) Error() string { 231 return "illegal base32 data at input byte " + strconv.FormatInt(int64(e), 10) 232 } 233 234 // decode is like Decode but returns an additional 'end' value, which 235 // indicates if end-of-message padding was encountered and thus any 236 // additional data is an error. This method assumes that src has been 237 // stripped of all supported whitespace ('\r' and '\n'). 238 func (enc *Encoding) decode(dst, src []byte) (n int, end bool, err error) { 239 olen := len(src) 240 for len(src) > 0 && !end { 241 // Decode quantum using the base32 alphabet 242 var dbuf [8]byte 243 dlen := 8 244 245 for j := 0; j < 8; { 246 if len(src) == 0 { 247 return n, false, CorruptInputError(olen - len(src) - j) 248 } 249 in := src[0] 250 src = src[1:] 251 if in == '=' && j >= 2 && len(src) < 8 { 252 // We've reached the end and there's padding 253 if len(src)+j < 8-1 { 254 // not enough padding 255 return n, false, CorruptInputError(olen) 256 } 257 for k := 0; k < 8-1-j; k++ { 258 if len(src) > k && src[k] != '=' { 259 // incorrect padding 260 return n, false, CorruptInputError(olen - len(src) + k - 1) 261 } 262 } 263 dlen, end = j, true 264 // 7, 5 and 2 are not valid padding lengths, and so 1, 3 and 6 are not 265 // valid dlen values. See RFC 4648 Section 6 "Base 32 Encoding" listing 266 // the five valid padding lengths, and Section 9 "Illustrations and 267 // Examples" for an illustration for how the 1st, 3rd and 6th base32 268 // src bytes do not yield enough information to decode a dst byte. 269 if dlen == 1 || dlen == 3 || dlen == 6 { 270 return n, false, CorruptInputError(olen - len(src) - 1) 271 } 272 break 273 } 274 dbuf[j] = enc.decodeMap[in] 275 if dbuf[j] == 0xFF { 276 return n, false, CorruptInputError(olen - len(src) - 1) 277 } 278 j++ 279 } 280 281 // Pack 8x 5-bit source blocks into 5 byte destination 282 // quantum 283 switch dlen { 284 case 8: 285 dst[4] = dbuf[6]<<5 | dbuf[7] 286 fallthrough 287 case 7: 288 dst[3] = dbuf[4]<<7 | dbuf[5]<<2 | dbuf[6]>>3 289 fallthrough 290 case 5: 291 dst[2] = dbuf[3]<<4 | dbuf[4]>>1 292 fallthrough 293 case 4: 294 dst[1] = dbuf[1]<<6 | dbuf[2]<<1 | dbuf[3]>>4 295 fallthrough 296 case 2: 297 dst[0] = dbuf[0]<<3 | dbuf[1]>>2 298 } 299 dst = dst[5:] 300 switch dlen { 301 case 2: 302 n += 1 303 case 4: 304 n += 2 305 case 5: 306 n += 3 307 case 7: 308 n += 4 309 case 8: 310 n += 5 311 } 312 } 313 return n, end, nil 314 } 315 316 // Decode decodes src using the encoding enc. It writes at most 317 // DecodedLen(len(src)) bytes to dst and returns the number of bytes 318 // written. If src contains invalid base32 data, it will return the 319 // number of bytes successfully written and CorruptInputError. 320 // New line characters (\r and \n) are ignored. 321 func (enc *Encoding) Decode(dst, src []byte) (n int, err error) { 322 src = bytes.Map(removeNewlinesMapper, src) 323 n, _, err = enc.decode(dst, src) 324 return 325 } 326 327 // DecodeString returns the bytes represented by the base32 string s. 328 func (enc *Encoding) DecodeString(s string) ([]byte, error) { 329 s = strings.Map(removeNewlinesMapper, s) 330 dbuf := make([]byte, enc.DecodedLen(len(s))) 331 n, _, err := enc.decode(dbuf, []byte(s)) 332 return dbuf[:n], err 333 } 334 335 type decoder struct { 336 err error 337 enc *Encoding 338 r io.Reader 339 end bool // saw end of message 340 buf [1024]byte // leftover input 341 nbuf int 342 out []byte // leftover decoded output 343 outbuf [1024 / 8 * 5]byte 344 } 345 346 func (d *decoder) Read(p []byte) (n int, err error) { 347 if d.err != nil { 348 return 0, d.err 349 } 350 351 // Use leftover decoded output from last read. 352 if len(d.out) > 0 { 353 n = copy(p, d.out) 354 d.out = d.out[n:] 355 return n, nil 356 } 357 358 // Read a chunk. 359 nn := len(p) / 5 * 8 360 if nn < 8 { 361 nn = 8 362 } 363 if nn > len(d.buf) { 364 nn = len(d.buf) 365 } 366 nn, d.err = io.ReadAtLeast(d.r, d.buf[d.nbuf:nn], 8-d.nbuf) 367 d.nbuf += nn 368 if d.nbuf < 8 { 369 return 0, d.err 370 } 371 372 // Decode chunk into p, or d.out and then p if p is too small. 373 nr := d.nbuf / 8 * 8 374 nw := d.nbuf / 8 * 5 375 if nw > len(p) { 376 nw, d.end, d.err = d.enc.decode(d.outbuf[0:], d.buf[0:nr]) 377 d.out = d.outbuf[0:nw] 378 n = copy(p, d.out) 379 d.out = d.out[n:] 380 } else { 381 n, d.end, d.err = d.enc.decode(p, d.buf[0:nr]) 382 } 383 d.nbuf -= nr 384 for i := 0; i < d.nbuf; i++ { 385 d.buf[i] = d.buf[i+nr] 386 } 387 388 if d.err == nil { 389 d.err = err 390 } 391 return n, d.err 392 } 393 394 type newlineFilteringReader struct { 395 wrapped io.Reader 396 } 397 398 func (r *newlineFilteringReader) Read(p []byte) (int, error) { 399 n, err := r.wrapped.Read(p) 400 for n > 0 { 401 offset := 0 402 for i, b := range p[0:n] { 403 if b != '\r' && b != '\n' { 404 if i != offset { 405 p[offset] = b 406 } 407 offset++ 408 } 409 } 410 if offset > 0 { 411 return offset, err 412 } 413 // Previous buffer entirely whitespace, read again 414 n, err = r.wrapped.Read(p) 415 } 416 return n, err 417 } 418 419 // NewDecoder constructs a new base32 stream decoder. 420 func NewDecoder(enc *Encoding, r io.Reader) io.Reader { 421 return &decoder{enc: enc, r: &newlineFilteringReader{r}} 422 } 423 424 // DecodedLen returns the maximum length in bytes of the decoded data 425 // corresponding to n bytes of base32-encoded data. 426 func (enc *Encoding) DecodedLen(n int) int { return n / 8 * 5 } 427