Home | History | Annotate | Download | only in vm
      1 // Copyright 2015 syzkaller project authors. All rights reserved.
      2 // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
      3 
      4 // Package vm provides an abstract test machine (VM, physical machine, etc)
      5 // interface for the rest of the system.
      6 // For convenience test machines are subsequently collectively called VMs.
      7 // Package wraps vmimpl package interface with some common functionality
      8 // and higher-level interface.
      9 package vm
     10 
     11 import (
     12 	"bytes"
     13 	"fmt"
     14 	"os"
     15 	"time"
     16 
     17 	"github.com/google/syzkaller/pkg/mgrconfig"
     18 	"github.com/google/syzkaller/pkg/osutil"
     19 	"github.com/google/syzkaller/pkg/report"
     20 	"github.com/google/syzkaller/vm/vmimpl"
     21 
     22 	// Import all VM implementations, so that users only need to import vm.
     23 	_ "github.com/google/syzkaller/vm/adb"
     24 	_ "github.com/google/syzkaller/vm/gce"
     25 	_ "github.com/google/syzkaller/vm/gvisor"
     26 	_ "github.com/google/syzkaller/vm/isolated"
     27 	_ "github.com/google/syzkaller/vm/kvm"
     28 	_ "github.com/google/syzkaller/vm/odroid"
     29 	_ "github.com/google/syzkaller/vm/qemu"
     30 )
     31 
     32 type Pool struct {
     33 	impl    vmimpl.Pool
     34 	workdir string
     35 }
     36 
     37 type Instance struct {
     38 	impl    vmimpl.Instance
     39 	workdir string
     40 	index   int
     41 }
     42 
     43 var (
     44 	Shutdown   = vmimpl.Shutdown
     45 	ErrTimeout = vmimpl.ErrTimeout
     46 )
     47 
     48 type BootErrorer interface {
     49 	BootError() (string, []byte)
     50 }
     51 
     52 func Create(cfg *mgrconfig.Config, debug bool) (*Pool, error) {
     53 	env := &vmimpl.Env{
     54 		Name:    cfg.Name,
     55 		OS:      cfg.TargetOS,
     56 		Arch:    cfg.TargetVMArch,
     57 		Workdir: cfg.Workdir,
     58 		Image:   cfg.Image,
     59 		SSHKey:  cfg.SSHKey,
     60 		SSHUser: cfg.SSHUser,
     61 		Debug:   debug,
     62 		Config:  cfg.VM,
     63 	}
     64 	impl, err := vmimpl.Create(cfg.Type, env)
     65 	if err != nil {
     66 		return nil, err
     67 	}
     68 	return &Pool{
     69 		impl:    impl,
     70 		workdir: env.Workdir,
     71 	}, nil
     72 }
     73 
     74 func (pool *Pool) Count() int {
     75 	return pool.impl.Count()
     76 }
     77 
     78 func (pool *Pool) Create(index int) (*Instance, error) {
     79 	if index < 0 || index >= pool.Count() {
     80 		return nil, fmt.Errorf("invalid VM index %v (count %v)", index, pool.Count())
     81 	}
     82 	workdir, err := osutil.ProcessTempDir(pool.workdir)
     83 	if err != nil {
     84 		return nil, fmt.Errorf("failed to create instance temp dir: %v", err)
     85 	}
     86 	impl, err := pool.impl.Create(workdir, index)
     87 	if err != nil {
     88 		os.RemoveAll(workdir)
     89 		return nil, err
     90 	}
     91 	return &Instance{
     92 		impl:    impl,
     93 		workdir: workdir,
     94 		index:   index,
     95 	}, nil
     96 }
     97 
     98 func (inst *Instance) Copy(hostSrc string) (string, error) {
     99 	return inst.impl.Copy(hostSrc)
    100 }
    101 
    102 func (inst *Instance) Forward(port int) (string, error) {
    103 	return inst.impl.Forward(port)
    104 }
    105 
    106 func (inst *Instance) Run(timeout time.Duration, stop <-chan bool, command string) (
    107 	outc <-chan []byte, errc <-chan error, err error) {
    108 	return inst.impl.Run(timeout, stop, command)
    109 }
    110 
    111 func (inst *Instance) Diagnose() bool {
    112 	return inst.impl.Diagnose()
    113 }
    114 
    115 func (inst *Instance) Close() {
    116 	inst.impl.Close()
    117 	os.RemoveAll(inst.workdir)
    118 }
    119 
    120 // MonitorExecution monitors execution of a program running inside of a VM.
    121 // It detects kernel oopses in output, lost connections, hangs, etc.
    122 // outc/errc is what vm.Instance.Run returns, reporter parses kernel output for oopses.
    123 // If canExit is false and the program exits, it is treated as an error.
    124 // Returns a non-symbolized crash report, or nil if no error happens.
    125 func (inst *Instance) MonitorExecution(outc <-chan []byte, errc <-chan error,
    126 	reporter report.Reporter, canExit bool) (rep *report.Report) {
    127 	mon := &monitor{
    128 		inst:     inst,
    129 		outc:     outc,
    130 		errc:     errc,
    131 		reporter: reporter,
    132 		canExit:  canExit,
    133 	}
    134 	lastExecuteTime := time.Now()
    135 	ticker := time.NewTicker(10 * time.Second)
    136 	defer ticker.Stop()
    137 	for {
    138 		select {
    139 		case err := <-errc:
    140 			switch err {
    141 			case nil:
    142 				// The program has exited without errors,
    143 				// but wait for kernel output in case there is some delayed oops.
    144 				return mon.extractError("")
    145 			case ErrTimeout:
    146 				return nil
    147 			default:
    148 				// Note: connection lost can race with a kernel oops message.
    149 				// In such case we want to return the kernel oops.
    150 				return mon.extractError("lost connection to test machine")
    151 			}
    152 		case out := <-outc:
    153 			lastPos := len(mon.output)
    154 			mon.output = append(mon.output, out...)
    155 			if bytes.Contains(mon.output[lastPos:], executingProgram1) ||
    156 				bytes.Contains(mon.output[lastPos:], executingProgram2) {
    157 				lastExecuteTime = time.Now()
    158 			}
    159 			if reporter.ContainsCrash(mon.output[mon.matchPos:]) {
    160 				return mon.extractError("unknown error")
    161 			}
    162 			if len(mon.output) > 2*beforeContext {
    163 				copy(mon.output, mon.output[len(mon.output)-beforeContext:])
    164 				mon.output = mon.output[:beforeContext]
    165 			}
    166 			mon.matchPos = len(mon.output) - maxErrorLength
    167 			if mon.matchPos < 0 {
    168 				mon.matchPos = 0
    169 			}
    170 		case <-ticker.C:
    171 			// Detect both "not output whatsoever" and "kernel episodically prints
    172 			// something to console, but fuzzer is not actually executing programs".
    173 			// The timeout used to be 3 mins for a long time.
    174 			// But (1) we were seeing flakes on linux where net namespace
    175 			// destruction can be really slow, and (2) gVisor watchdog timeout
    176 			// is 3 mins + 1/4 of that for checking period = 3m45s.
    177 			// Current linux max timeout is CONFIG_DEFAULT_HUNG_TASK_TIMEOUT=140
    178 			// and workqueue.watchdog_thresh=140 which both actually result
    179 			// in 140-280s detection delay.
    180 			// So the current timeout is 5 mins (300s).
    181 			// We don't want it to be too long too because it will waste time on real hangs.
    182 			if time.Since(lastExecuteTime) < 5*time.Minute {
    183 				break
    184 			}
    185 			if inst.Diagnose() {
    186 				mon.waitForOutput()
    187 			}
    188 			rep := &report.Report{
    189 				Title:      "no output from test machine",
    190 				Output:     mon.output,
    191 				Suppressed: report.IsSuppressed(mon.reporter, mon.output),
    192 			}
    193 			return rep
    194 		case <-Shutdown:
    195 			return nil
    196 		}
    197 	}
    198 }
    199 
    200 type monitor struct {
    201 	inst     *Instance
    202 	outc     <-chan []byte
    203 	errc     <-chan error
    204 	reporter report.Reporter
    205 	canExit  bool
    206 	output   []byte
    207 	matchPos int
    208 }
    209 
    210 func (mon *monitor) extractError(defaultError string) *report.Report {
    211 	// Give it some time to finish writing the error message.
    212 	mon.inst.Diagnose()
    213 	mon.waitForOutput()
    214 	if bytes.Contains(mon.output, []byte("SYZ-FUZZER: PREEMPTED")) {
    215 		return nil
    216 	}
    217 	if !mon.reporter.ContainsCrash(mon.output[mon.matchPos:]) {
    218 		if defaultError == "" {
    219 			if mon.canExit {
    220 				return nil
    221 			}
    222 			defaultError = "lost connection to test machine"
    223 		}
    224 		rep := &report.Report{
    225 			Title:      defaultError,
    226 			Output:     mon.output,
    227 			Suppressed: report.IsSuppressed(mon.reporter, mon.output),
    228 		}
    229 		return rep
    230 	}
    231 	rep := mon.reporter.Parse(mon.output[mon.matchPos:])
    232 	if rep == nil {
    233 		panic(fmt.Sprintf("reporter.ContainsCrash/Parse disagree:\n%s", mon.output[mon.matchPos:]))
    234 	}
    235 	start := mon.matchPos + rep.StartPos - beforeContext
    236 	if start < 0 {
    237 		start = 0
    238 	}
    239 	end := mon.matchPos + rep.EndPos + afterContext
    240 	if end > len(mon.output) {
    241 		end = len(mon.output)
    242 	}
    243 	rep.Output = mon.output[start:end]
    244 	rep.StartPos += mon.matchPos - start
    245 	rep.EndPos += mon.matchPos - start
    246 	return rep
    247 }
    248 
    249 func (mon *monitor) waitForOutput() {
    250 	timer := time.NewTimer(10 * time.Second)
    251 	for {
    252 		select {
    253 		case out, ok := <-mon.outc:
    254 			if !ok {
    255 				timer.Stop()
    256 				return
    257 			}
    258 			mon.output = append(mon.output, out...)
    259 		case <-timer.C:
    260 			return
    261 		}
    262 	}
    263 }
    264 
    265 const (
    266 	beforeContext  = 1024 << 10
    267 	afterContext   = 128 << 10
    268 	maxErrorLength = 512
    269 )
    270 
    271 var (
    272 	executingProgram1 = []byte("executing program")  // syz-fuzzer output
    273 	executingProgram2 = []byte("executed programs:") // syz-execprog output
    274 )
    275