Home | History | Annotate | Download | only in email
      1 // Copyright 2017 syzkaller project authors. All rights reserved.
      2 // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
      3 
      4 package email
      5 
      6 import (
      7 	"bytes"
      8 	"encoding/base64"
      9 	"fmt"
     10 	"io"
     11 	"io/ioutil"
     12 	"mime"
     13 	"mime/multipart"
     14 	"mime/quotedprintable"
     15 	"net/mail"
     16 	"regexp"
     17 	"sort"
     18 	"strings"
     19 )
     20 
     21 type Email struct {
     22 	BugID       string
     23 	MessageID   string
     24 	Link        string
     25 	Subject     string
     26 	From        string
     27 	Cc          []string
     28 	Body        string // text/plain part
     29 	Patch       string // attached patch, if any
     30 	Command     string // command to bot (#syz is stripped)
     31 	CommandArgs string // arguments for the command
     32 }
     33 
     34 const commandPrefix = "#syz "
     35 
     36 var groupsLinkRe = regexp.MustCompile("\nTo view this discussion on the web visit" +
     37 	" (https://groups\\.google\\.com/.*?)\\.(?:\r)?\n")
     38 
     39 func Parse(r io.Reader, ownEmails []string) (*Email, error) {
     40 	msg, err := mail.ReadMessage(r)
     41 	if err != nil {
     42 		return nil, fmt.Errorf("failed to read email: %v", err)
     43 	}
     44 	from, err := msg.Header.AddressList("From")
     45 	if err != nil {
     46 		return nil, fmt.Errorf("failed to parse email header 'From': %v", err)
     47 	}
     48 	if len(from) == 0 {
     49 		return nil, fmt.Errorf("failed to parse email header 'To': no senders")
     50 	}
     51 	// Ignore errors since To: header may not be present (we've seen such case).
     52 	to, _ := msg.Header.AddressList("To")
     53 	// AddressList fails if the header is not present.
     54 	cc, _ := msg.Header.AddressList("Cc")
     55 	bugID := ""
     56 	var ccList []string
     57 	ownAddrs := make(map[string]bool)
     58 	for _, email := range ownEmails {
     59 		ownAddrs[email] = true
     60 		if addr, err := mail.ParseAddress(email); err == nil {
     61 			ownAddrs[addr.Address] = true
     62 		}
     63 	}
     64 	fromMe := false
     65 	for _, addr := range from {
     66 		cleaned, _, _ := RemoveAddrContext(addr.Address)
     67 		if addr, err := mail.ParseAddress(cleaned); err == nil && ownAddrs[addr.Address] {
     68 			fromMe = true
     69 		}
     70 	}
     71 	for _, addr := range append(append(cc, to...), from...) {
     72 		cleaned, context, _ := RemoveAddrContext(addr.Address)
     73 		if addr, err := mail.ParseAddress(cleaned); err == nil {
     74 			cleaned = addr.Address
     75 		}
     76 		if ownAddrs[cleaned] {
     77 			if bugID == "" {
     78 				bugID = context
     79 			}
     80 		} else {
     81 			ccList = append(ccList, cleaned)
     82 		}
     83 	}
     84 	ccList = MergeEmailLists(ccList)
     85 	body, attachments, err := parseBody(msg.Body, msg.Header)
     86 	if err != nil {
     87 		return nil, err
     88 	}
     89 	bodyStr := string(body)
     90 	patch, cmd, cmdArgs := "", "", ""
     91 	if !fromMe {
     92 		for _, a := range attachments {
     93 			_, patch, _ = ParsePatch(string(a))
     94 			if patch != "" {
     95 				break
     96 			}
     97 		}
     98 		if patch == "" {
     99 			_, patch, _ = ParsePatch(bodyStr)
    100 		}
    101 		cmd, cmdArgs = extractCommand(body)
    102 	}
    103 	link := ""
    104 	if match := groupsLinkRe.FindStringSubmatchIndex(bodyStr); match != nil {
    105 		link = bodyStr[match[2]:match[3]]
    106 	}
    107 	email := &Email{
    108 		BugID:       bugID,
    109 		MessageID:   msg.Header.Get("Message-ID"),
    110 		Link:        link,
    111 		Subject:     msg.Header.Get("Subject"),
    112 		From:        from[0].String(),
    113 		Cc:          ccList,
    114 		Body:        string(body),
    115 		Patch:       patch,
    116 		Command:     cmd,
    117 		CommandArgs: cmdArgs,
    118 	}
    119 	return email, nil
    120 }
    121 
    122 // AddAddrContext embeds context into local part of the provided email address using '+'.
    123 // Returns the resulting email address.
    124 func AddAddrContext(email, context string) (string, error) {
    125 	addr, err := mail.ParseAddress(email)
    126 	if err != nil {
    127 		return "", fmt.Errorf("failed to parse %q as email: %v", email, err)
    128 	}
    129 	at := strings.IndexByte(addr.Address, '@')
    130 	if at == -1 {
    131 		return "", fmt.Errorf("failed to parse %q as email: no @", email)
    132 	}
    133 	result := addr.Address[:at] + "+" + context + addr.Address[at:]
    134 	if addr.Name != "" {
    135 		addr.Address = result
    136 		result = addr.String()
    137 	}
    138 	return result, nil
    139 }
    140 
    141 // RemoveAddrContext extracts context after '+' from the local part of the provided email address.
    142 // Returns address without the context and the context.
    143 func RemoveAddrContext(email string) (string, string, error) {
    144 	addr, err := mail.ParseAddress(email)
    145 	if err != nil {
    146 		return "", "", fmt.Errorf("failed to parse %q as email: %v", email, err)
    147 	}
    148 	at := strings.IndexByte(addr.Address, '@')
    149 	if at == -1 {
    150 		return "", "", fmt.Errorf("failed to parse %q as email: no @", email)
    151 	}
    152 	plus := strings.LastIndexByte(addr.Address[:at], '+')
    153 	if plus == -1 {
    154 		return email, "", nil
    155 	}
    156 	context := addr.Address[plus+1 : at]
    157 	addr.Address = addr.Address[:plus] + addr.Address[at:]
    158 	return addr.String(), context, nil
    159 }
    160 
    161 func CanonicalEmail(email string) string {
    162 	addr, err := mail.ParseAddress(email)
    163 	if err != nil {
    164 		return email
    165 	}
    166 	at := strings.IndexByte(addr.Address, '@')
    167 	if at == -1 {
    168 		return email
    169 	}
    170 	if plus := strings.IndexByte(addr.Address[:at], '+'); plus != -1 {
    171 		addr.Address = addr.Address[:plus] + addr.Address[at:]
    172 	}
    173 	return strings.ToLower(addr.Address)
    174 }
    175 
    176 // extractCommand extracts command to syzbot from email body.
    177 // Commands are of the following form:
    178 // ^#syz cmd args...
    179 func extractCommand(body []byte) (cmd, args string) {
    180 	cmdPos := bytes.Index(append([]byte{'\n'}, body...), []byte("\n"+commandPrefix))
    181 	if cmdPos == -1 {
    182 		return
    183 	}
    184 	cmdPos += len(commandPrefix)
    185 	for cmdPos < len(body) && body[cmdPos] == ' ' {
    186 		cmdPos++
    187 	}
    188 	cmdEnd := bytes.IndexByte(body[cmdPos:], '\n')
    189 	if cmdEnd == -1 {
    190 		cmdEnd = len(body) - cmdPos
    191 	}
    192 	if cmdEnd1 := bytes.IndexByte(body[cmdPos:], '\r'); cmdEnd1 != -1 && cmdEnd1 < cmdEnd {
    193 		cmdEnd = cmdEnd1
    194 	}
    195 	if cmdEnd1 := bytes.IndexByte(body[cmdPos:], ' '); cmdEnd1 != -1 && cmdEnd1 < cmdEnd {
    196 		cmdEnd = cmdEnd1
    197 	}
    198 	cmd = string(body[cmdPos : cmdPos+cmdEnd])
    199 	// Some email clients split text emails at 80 columns are the transformation is irrevesible.
    200 	// We try hard to restore what was there before.
    201 	// For "test:" command we know that there must be 2 tokens without spaces.
    202 	// For "fix:"/"dup:" we need a whole non-empty line of text.
    203 	switch cmd {
    204 	case "test:":
    205 		args = extractArgsTokens(body[cmdPos+cmdEnd:], 2)
    206 	case "test_5_arg_cmd":
    207 		args = extractArgsTokens(body[cmdPos+cmdEnd:], 5)
    208 	case "fix:", "dup:":
    209 		args = extractArgsLine(body[cmdPos+cmdEnd:])
    210 	}
    211 	return
    212 }
    213 
    214 func extractArgsTokens(body []byte, num int) string {
    215 	var args []string
    216 	for pos := 0; len(args) < num && pos < len(body); {
    217 		lineEnd := bytes.IndexByte(body[pos:], '\n')
    218 		if lineEnd == -1 {
    219 			lineEnd = len(body) - pos
    220 		}
    221 		line := strings.TrimSpace(string(body[pos : pos+lineEnd]))
    222 		for {
    223 			line1 := strings.Replace(line, "  ", " ", -1)
    224 			if line == line1 {
    225 				break
    226 			}
    227 			line = line1
    228 		}
    229 		if line != "" {
    230 			args = append(args, strings.Split(line, " ")...)
    231 		}
    232 		pos += lineEnd + 1
    233 	}
    234 	return strings.TrimSpace(strings.Join(args, " "))
    235 }
    236 
    237 func extractArgsLine(body []byte) string {
    238 	pos := 0
    239 	for pos < len(body) && (body[pos] == ' ' || body[pos] == '\t' ||
    240 		body[pos] == '\n' || body[pos] == '\r') {
    241 		pos++
    242 	}
    243 	lineEnd := bytes.IndexByte(body[pos:], '\n')
    244 	if lineEnd == -1 {
    245 		lineEnd = len(body) - pos
    246 	}
    247 	return strings.TrimSpace(string(body[pos : pos+lineEnd]))
    248 }
    249 
    250 func parseBody(r io.Reader, headers mail.Header) ([]byte, [][]byte, error) {
    251 	// git-send-email sends emails without Content-Type, let's assume it's text.
    252 	mediaType := "text/plain"
    253 	var params map[string]string
    254 	if contentType := headers.Get("Content-Type"); contentType != "" {
    255 		var err error
    256 		mediaType, params, err = mime.ParseMediaType(headers.Get("Content-Type"))
    257 		if err != nil {
    258 			return nil, nil, fmt.Errorf("failed to parse email header 'Content-Type': %v", err)
    259 		}
    260 	}
    261 	switch strings.ToLower(headers.Get("Content-Transfer-Encoding")) {
    262 	case "quoted-printable":
    263 		r = quotedprintable.NewReader(r)
    264 	case "base64":
    265 		r = base64.NewDecoder(base64.StdEncoding, r)
    266 	}
    267 	disp, _, _ := mime.ParseMediaType(headers.Get("Content-Disposition"))
    268 	if disp == "attachment" {
    269 		attachment, err := ioutil.ReadAll(r)
    270 		if err != nil {
    271 			return nil, nil, fmt.Errorf("failed to read email body: %v", err)
    272 		}
    273 		return nil, [][]byte{attachment}, nil
    274 	}
    275 	if mediaType == "text/plain" {
    276 		body, err := ioutil.ReadAll(r)
    277 		if err != nil {
    278 			return nil, nil, fmt.Errorf("failed to read email body: %v", err)
    279 		}
    280 		return body, nil, nil
    281 	}
    282 	if !strings.HasPrefix(mediaType, "multipart/") {
    283 		return nil, nil, nil
    284 	}
    285 	var body []byte
    286 	var attachments [][]byte
    287 	mr := multipart.NewReader(r, params["boundary"])
    288 	for {
    289 		p, err := mr.NextPart()
    290 		if err == io.EOF {
    291 			return body, attachments, nil
    292 		}
    293 		if err != nil {
    294 			return nil, nil, fmt.Errorf("failed to parse MIME parts: %v", err)
    295 		}
    296 		body1, attachments1, err1 := parseBody(p, mail.Header(p.Header))
    297 		if err1 != nil {
    298 			return nil, nil, err1
    299 		}
    300 		if body == nil {
    301 			body = body1
    302 		}
    303 		attachments = append(attachments, attachments1...)
    304 	}
    305 }
    306 
    307 // MergeEmailLists merges several email lists removing duplicates and invalid entries.
    308 func MergeEmailLists(lists ...[]string) []string {
    309 	const (
    310 		maxEmailLen = 1000
    311 		maxEmails   = 50
    312 	)
    313 	merged := make(map[string]bool)
    314 	for _, list := range lists {
    315 		for _, email := range list {
    316 			addr, err := mail.ParseAddress(email)
    317 			if err != nil || len(addr.Address) > maxEmailLen {
    318 				continue
    319 			}
    320 			merged[addr.Address] = true
    321 		}
    322 	}
    323 	var result []string
    324 	for e := range merged {
    325 		result = append(result, e)
    326 	}
    327 	sort.Strings(result)
    328 	if len(result) > maxEmails {
    329 		result = result[:maxEmails]
    330 	}
    331 	return result
    332 }
    333