Home | History | Annotate | Download | only in quotedprintable
      1 // Copyright 2012 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 // Package quotedprintable implements quoted-printable encoding as specified by
      6 // RFC 2045.
      7 package quotedprintable
      8 
      9 import (
     10 	"bufio"
     11 	"bytes"
     12 	"fmt"
     13 	"io"
     14 )
     15 
     16 // Reader is a quoted-printable decoder.
     17 type Reader struct {
     18 	br   *bufio.Reader
     19 	rerr error  // last read error
     20 	line []byte // to be consumed before more of br
     21 }
     22 
     23 // NewReader returns a quoted-printable reader, decoding from r.
     24 func NewReader(r io.Reader) *Reader {
     25 	return &Reader{
     26 		br: bufio.NewReader(r),
     27 	}
     28 }
     29 
     30 func fromHex(b byte) (byte, error) {
     31 	switch {
     32 	case b >= '0' && b <= '9':
     33 		return b - '0', nil
     34 	case b >= 'A' && b <= 'F':
     35 		return b - 'A' + 10, nil
     36 	// Accept badly encoded bytes.
     37 	case b >= 'a' && b <= 'f':
     38 		return b - 'a' + 10, nil
     39 	}
     40 	return 0, fmt.Errorf("quotedprintable: invalid hex byte 0x%02x", b)
     41 }
     42 
     43 func readHexByte(v []byte) (b byte, err error) {
     44 	if len(v) < 2 {
     45 		return 0, io.ErrUnexpectedEOF
     46 	}
     47 	var hb, lb byte
     48 	if hb, err = fromHex(v[0]); err != nil {
     49 		return 0, err
     50 	}
     51 	if lb, err = fromHex(v[1]); err != nil {
     52 		return 0, err
     53 	}
     54 	return hb<<4 | lb, nil
     55 }
     56 
     57 func isQPDiscardWhitespace(r rune) bool {
     58 	switch r {
     59 	case '\n', '\r', ' ', '\t':
     60 		return true
     61 	}
     62 	return false
     63 }
     64 
     65 var (
     66 	crlf       = []byte("\r\n")
     67 	lf         = []byte("\n")
     68 	softSuffix = []byte("=")
     69 )
     70 
     71 // Read reads and decodes quoted-printable data from the underlying reader.
     72 func (r *Reader) Read(p []byte) (n int, err error) {
     73 	// Deviations from RFC 2045:
     74 	// 1. in addition to "=\r\n", "=\n" is also treated as soft line break.
     75 	// 2. it will pass through a '\r' or '\n' not preceded by '=', consistent
     76 	//    with other broken QP encoders & decoders.
     77 	// 3. it accepts soft line-break (=) at end of message (issue 15486); i.e.
     78 	//    the final byte read from the underlying reader is allowed to be '=',
     79 	//    and it will be silently ignored.
     80 	// 4. it takes = as literal = if not followed by two hex digits
     81 	//    but not at end of line (issue 13219).
     82 	for len(p) > 0 {
     83 		if len(r.line) == 0 {
     84 			if r.rerr != nil {
     85 				return n, r.rerr
     86 			}
     87 			r.line, r.rerr = r.br.ReadSlice('\n')
     88 
     89 			// Does the line end in CRLF instead of just LF?
     90 			hasLF := bytes.HasSuffix(r.line, lf)
     91 			hasCR := bytes.HasSuffix(r.line, crlf)
     92 			wholeLine := r.line
     93 			r.line = bytes.TrimRightFunc(wholeLine, isQPDiscardWhitespace)
     94 			if bytes.HasSuffix(r.line, softSuffix) {
     95 				rightStripped := wholeLine[len(r.line):]
     96 				r.line = r.line[:len(r.line)-1]
     97 				if !bytes.HasPrefix(rightStripped, lf) && !bytes.HasPrefix(rightStripped, crlf) &&
     98 					!(len(rightStripped) == 0 && len(r.line) > 0 && r.rerr == io.EOF) {
     99 					r.rerr = fmt.Errorf("quotedprintable: invalid bytes after =: %q", rightStripped)
    100 				}
    101 			} else if hasLF {
    102 				if hasCR {
    103 					r.line = append(r.line, '\r', '\n')
    104 				} else {
    105 					r.line = append(r.line, '\n')
    106 				}
    107 			}
    108 			continue
    109 		}
    110 		b := r.line[0]
    111 
    112 		switch {
    113 		case b == '=':
    114 			b, err = readHexByte(r.line[1:])
    115 			if err != nil {
    116 				if len(r.line) >= 2 && r.line[1] != '\r' && r.line[1] != '\n' {
    117 					// Take the = as a literal =.
    118 					b = '='
    119 					break
    120 				}
    121 				return n, err
    122 			}
    123 			r.line = r.line[2:] // 2 of the 3; other 1 is done below
    124 		case b == '\t' || b == '\r' || b == '\n':
    125 			break
    126 		case b < ' ' || b > '~':
    127 			return n, fmt.Errorf("quotedprintable: invalid unescaped byte 0x%02x in body", b)
    128 		}
    129 		p[0] = b
    130 		p = p[1:]
    131 		r.line = r.line[1:]
    132 		n++
    133 	}
    134 	return n, nil
    135 }
    136