Home | History | Annotate | Download | only in cgi
      1 // Copyright 2011 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 // This file implements the host side of CGI (being the webserver
      6 // parent process).
      7 
      8 // Package cgi implements CGI (Common Gateway Interface) as specified
      9 // in RFC 3875.
     10 //
     11 // Note that using CGI means starting a new process to handle each
     12 // request, which is typically less efficient than using a
     13 // long-running server. This package is intended primarily for
     14 // compatibility with existing systems.
     15 package cgi
     16 
     17 import (
     18 	"bufio"
     19 	"fmt"
     20 	"io"
     21 	"log"
     22 	"net"
     23 	"net/http"
     24 	"os"
     25 	"os/exec"
     26 	"path/filepath"
     27 	"regexp"
     28 	"runtime"
     29 	"strconv"
     30 	"strings"
     31 )
     32 
     33 var trailingPort = regexp.MustCompile(`:([0-9]+)$`)
     34 
     35 var osDefaultInheritEnv = map[string][]string{
     36 	"darwin":  {"DYLD_LIBRARY_PATH"},
     37 	"freebsd": {"LD_LIBRARY_PATH"},
     38 	"hpux":    {"LD_LIBRARY_PATH", "SHLIB_PATH"},
     39 	"irix":    {"LD_LIBRARY_PATH", "LD_LIBRARYN32_PATH", "LD_LIBRARY64_PATH"},
     40 	"linux":   {"LD_LIBRARY_PATH"},
     41 	"openbsd": {"LD_LIBRARY_PATH"},
     42 	"solaris": {"LD_LIBRARY_PATH", "LD_LIBRARY_PATH_32", "LD_LIBRARY_PATH_64"},
     43 	"windows": {"SystemRoot", "COMSPEC", "PATHEXT", "WINDIR"},
     44 }
     45 
     46 // Handler runs an executable in a subprocess with a CGI environment.
     47 type Handler struct {
     48 	Path string // path to the CGI executable
     49 	Root string // root URI prefix of handler or empty for "/"
     50 
     51 	// Dir specifies the CGI executable's working directory.
     52 	// If Dir is empty, the base directory of Path is used.
     53 	// If Path has no base directory, the current working
     54 	// directory is used.
     55 	Dir string
     56 
     57 	Env        []string    // extra environment variables to set, if any, as "key=value"
     58 	InheritEnv []string    // environment variables to inherit from host, as "key"
     59 	Logger     *log.Logger // optional log for errors or nil to use log.Print
     60 	Args       []string    // optional arguments to pass to child process
     61 	Stderr     io.Writer   // optional stderr for the child process; nil means os.Stderr
     62 
     63 	// PathLocationHandler specifies the root http Handler that
     64 	// should handle internal redirects when the CGI process
     65 	// returns a Location header value starting with a "/", as
     66 	// specified in RFC 3875  6.3.2. This will likely be
     67 	// http.DefaultServeMux.
     68 	//
     69 	// If nil, a CGI response with a local URI path is instead sent
     70 	// back to the client and not redirected internally.
     71 	PathLocationHandler http.Handler
     72 }
     73 
     74 func (h *Handler) stderr() io.Writer {
     75 	if h.Stderr != nil {
     76 		return h.Stderr
     77 	}
     78 	return os.Stderr
     79 }
     80 
     81 // removeLeadingDuplicates remove leading duplicate in environments.
     82 // It's possible to override environment like following.
     83 //    cgi.Handler{
     84 //      ...
     85 //      Env: []string{"SCRIPT_FILENAME=foo.php"},
     86 //    }
     87 func removeLeadingDuplicates(env []string) (ret []string) {
     88 	for i, e := range env {
     89 		found := false
     90 		if eq := strings.IndexByte(e, '='); eq != -1 {
     91 			keq := e[:eq+1] // "key="
     92 			for _, e2 := range env[i+1:] {
     93 				if strings.HasPrefix(e2, keq) {
     94 					found = true
     95 					break
     96 				}
     97 			}
     98 		}
     99 		if !found {
    100 			ret = append(ret, e)
    101 		}
    102 	}
    103 	return
    104 }
    105 
    106 func (h *Handler) ServeHTTP(rw http.ResponseWriter, req *http.Request) {
    107 	root := h.Root
    108 	if root == "" {
    109 		root = "/"
    110 	}
    111 
    112 	if len(req.TransferEncoding) > 0 && req.TransferEncoding[0] == "chunked" {
    113 		rw.WriteHeader(http.StatusBadRequest)
    114 		rw.Write([]byte("Chunked request bodies are not supported by CGI."))
    115 		return
    116 	}
    117 
    118 	pathInfo := req.URL.Path
    119 	if root != "/" && strings.HasPrefix(pathInfo, root) {
    120 		pathInfo = pathInfo[len(root):]
    121 	}
    122 
    123 	port := "80"
    124 	if matches := trailingPort.FindStringSubmatch(req.Host); len(matches) != 0 {
    125 		port = matches[1]
    126 	}
    127 
    128 	env := []string{
    129 		"SERVER_SOFTWARE=go",
    130 		"SERVER_NAME=" + req.Host,
    131 		"SERVER_PROTOCOL=HTTP/1.1",
    132 		"HTTP_HOST=" + req.Host,
    133 		"GATEWAY_INTERFACE=CGI/1.1",
    134 		"REQUEST_METHOD=" + req.Method,
    135 		"QUERY_STRING=" + req.URL.RawQuery,
    136 		"REQUEST_URI=" + req.URL.RequestURI(),
    137 		"PATH_INFO=" + pathInfo,
    138 		"SCRIPT_NAME=" + root,
    139 		"SCRIPT_FILENAME=" + h.Path,
    140 		"SERVER_PORT=" + port,
    141 	}
    142 
    143 	if remoteIP, remotePort, err := net.SplitHostPort(req.RemoteAddr); err == nil {
    144 		env = append(env, "REMOTE_ADDR="+remoteIP, "REMOTE_HOST="+remoteIP, "REMOTE_PORT="+remotePort)
    145 	} else {
    146 		// could not parse ip:port, let's use whole RemoteAddr and leave REMOTE_PORT undefined
    147 		env = append(env, "REMOTE_ADDR="+req.RemoteAddr, "REMOTE_HOST="+req.RemoteAddr)
    148 	}
    149 
    150 	if req.TLS != nil {
    151 		env = append(env, "HTTPS=on")
    152 	}
    153 
    154 	for k, v := range req.Header {
    155 		k = strings.Map(upperCaseAndUnderscore, k)
    156 		if k == "PROXY" {
    157 			// See Issue 16405
    158 			continue
    159 		}
    160 		joinStr := ", "
    161 		if k == "COOKIE" {
    162 			joinStr = "; "
    163 		}
    164 		env = append(env, "HTTP_"+k+"="+strings.Join(v, joinStr))
    165 	}
    166 
    167 	if req.ContentLength > 0 {
    168 		env = append(env, fmt.Sprintf("CONTENT_LENGTH=%d", req.ContentLength))
    169 	}
    170 	if ctype := req.Header.Get("Content-Type"); ctype != "" {
    171 		env = append(env, "CONTENT_TYPE="+ctype)
    172 	}
    173 
    174 	envPath := os.Getenv("PATH")
    175 	if envPath == "" {
    176 		envPath = "/bin:/usr/bin:/usr/ucb:/usr/bsd:/usr/local/bin"
    177 	}
    178 	env = append(env, "PATH="+envPath)
    179 
    180 	for _, e := range h.InheritEnv {
    181 		if v := os.Getenv(e); v != "" {
    182 			env = append(env, e+"="+v)
    183 		}
    184 	}
    185 
    186 	for _, e := range osDefaultInheritEnv[runtime.GOOS] {
    187 		if v := os.Getenv(e); v != "" {
    188 			env = append(env, e+"="+v)
    189 		}
    190 	}
    191 
    192 	if h.Env != nil {
    193 		env = append(env, h.Env...)
    194 	}
    195 
    196 	env = removeLeadingDuplicates(env)
    197 
    198 	var cwd, path string
    199 	if h.Dir != "" {
    200 		path = h.Path
    201 		cwd = h.Dir
    202 	} else {
    203 		cwd, path = filepath.Split(h.Path)
    204 	}
    205 	if cwd == "" {
    206 		cwd = "."
    207 	}
    208 
    209 	internalError := func(err error) {
    210 		rw.WriteHeader(http.StatusInternalServerError)
    211 		h.printf("CGI error: %v", err)
    212 	}
    213 
    214 	cmd := &exec.Cmd{
    215 		Path:   path,
    216 		Args:   append([]string{h.Path}, h.Args...),
    217 		Dir:    cwd,
    218 		Env:    env,
    219 		Stderr: h.stderr(),
    220 	}
    221 	if req.ContentLength != 0 {
    222 		cmd.Stdin = req.Body
    223 	}
    224 	stdoutRead, err := cmd.StdoutPipe()
    225 	if err != nil {
    226 		internalError(err)
    227 		return
    228 	}
    229 
    230 	err = cmd.Start()
    231 	if err != nil {
    232 		internalError(err)
    233 		return
    234 	}
    235 	if hook := testHookStartProcess; hook != nil {
    236 		hook(cmd.Process)
    237 	}
    238 	defer cmd.Wait()
    239 	defer stdoutRead.Close()
    240 
    241 	linebody := bufio.NewReaderSize(stdoutRead, 1024)
    242 	headers := make(http.Header)
    243 	statusCode := 0
    244 	headerLines := 0
    245 	sawBlankLine := false
    246 	for {
    247 		line, isPrefix, err := linebody.ReadLine()
    248 		if isPrefix {
    249 			rw.WriteHeader(http.StatusInternalServerError)
    250 			h.printf("cgi: long header line from subprocess.")
    251 			return
    252 		}
    253 		if err == io.EOF {
    254 			break
    255 		}
    256 		if err != nil {
    257 			rw.WriteHeader(http.StatusInternalServerError)
    258 			h.printf("cgi: error reading headers: %v", err)
    259 			return
    260 		}
    261 		if len(line) == 0 {
    262 			sawBlankLine = true
    263 			break
    264 		}
    265 		headerLines++
    266 		parts := strings.SplitN(string(line), ":", 2)
    267 		if len(parts) < 2 {
    268 			h.printf("cgi: bogus header line: %s", string(line))
    269 			continue
    270 		}
    271 		header, val := parts[0], parts[1]
    272 		header = strings.TrimSpace(header)
    273 		val = strings.TrimSpace(val)
    274 		switch {
    275 		case header == "Status":
    276 			if len(val) < 3 {
    277 				h.printf("cgi: bogus status (short): %q", val)
    278 				return
    279 			}
    280 			code, err := strconv.Atoi(val[0:3])
    281 			if err != nil {
    282 				h.printf("cgi: bogus status: %q", val)
    283 				h.printf("cgi: line was %q", line)
    284 				return
    285 			}
    286 			statusCode = code
    287 		default:
    288 			headers.Add(header, val)
    289 		}
    290 	}
    291 	if headerLines == 0 || !sawBlankLine {
    292 		rw.WriteHeader(http.StatusInternalServerError)
    293 		h.printf("cgi: no headers")
    294 		return
    295 	}
    296 
    297 	if loc := headers.Get("Location"); loc != "" {
    298 		if strings.HasPrefix(loc, "/") && h.PathLocationHandler != nil {
    299 			h.handleInternalRedirect(rw, req, loc)
    300 			return
    301 		}
    302 		if statusCode == 0 {
    303 			statusCode = http.StatusFound
    304 		}
    305 	}
    306 
    307 	if statusCode == 0 && headers.Get("Content-Type") == "" {
    308 		rw.WriteHeader(http.StatusInternalServerError)
    309 		h.printf("cgi: missing required Content-Type in headers")
    310 		return
    311 	}
    312 
    313 	if statusCode == 0 {
    314 		statusCode = http.StatusOK
    315 	}
    316 
    317 	// Copy headers to rw's headers, after we've decided not to
    318 	// go into handleInternalRedirect, which won't want its rw
    319 	// headers to have been touched.
    320 	for k, vv := range headers {
    321 		for _, v := range vv {
    322 			rw.Header().Add(k, v)
    323 		}
    324 	}
    325 
    326 	rw.WriteHeader(statusCode)
    327 
    328 	_, err = io.Copy(rw, linebody)
    329 	if err != nil {
    330 		h.printf("cgi: copy error: %v", err)
    331 		// And kill the child CGI process so we don't hang on
    332 		// the deferred cmd.Wait above if the error was just
    333 		// the client (rw) going away. If it was a read error
    334 		// (because the child died itself), then the extra
    335 		// kill of an already-dead process is harmless (the PID
    336 		// won't be reused until the Wait above).
    337 		cmd.Process.Kill()
    338 	}
    339 }
    340 
    341 func (h *Handler) printf(format string, v ...interface{}) {
    342 	if h.Logger != nil {
    343 		h.Logger.Printf(format, v...)
    344 	} else {
    345 		log.Printf(format, v...)
    346 	}
    347 }
    348 
    349 func (h *Handler) handleInternalRedirect(rw http.ResponseWriter, req *http.Request, path string) {
    350 	url, err := req.URL.Parse(path)
    351 	if err != nil {
    352 		rw.WriteHeader(http.StatusInternalServerError)
    353 		h.printf("cgi: error resolving local URI path %q: %v", path, err)
    354 		return
    355 	}
    356 	// TODO: RFC 3875 isn't clear if only GET is supported, but it
    357 	// suggests so: "Note that any message-body attached to the
    358 	// request (such as for a POST request) may not be available
    359 	// to the resource that is the target of the redirect."  We
    360 	// should do some tests against Apache to see how it handles
    361 	// POST, HEAD, etc. Does the internal redirect get the same
    362 	// method or just GET? What about incoming headers?
    363 	// (e.g. Cookies) Which headers, if any, are copied into the
    364 	// second request?
    365 	newReq := &http.Request{
    366 		Method:     "GET",
    367 		URL:        url,
    368 		Proto:      "HTTP/1.1",
    369 		ProtoMajor: 1,
    370 		ProtoMinor: 1,
    371 		Header:     make(http.Header),
    372 		Host:       url.Host,
    373 		RemoteAddr: req.RemoteAddr,
    374 		TLS:        req.TLS,
    375 	}
    376 	h.PathLocationHandler.ServeHTTP(rw, newReq)
    377 }
    378 
    379 func upperCaseAndUnderscore(r rune) rune {
    380 	switch {
    381 	case r >= 'a' && r <= 'z':
    382 		return r - ('a' - 'A')
    383 	case r == '-':
    384 		return '_'
    385 	case r == '=':
    386 		// Maybe not part of the CGI 'spec' but would mess up
    387 		// the environment in any case, as Go represents the
    388 		// environment as a slice of "key=value" strings.
    389 		return '_'
    390 	}
    391 	// TODO: other transformations in spec or practice?
    392 	return r
    393 }
    394 
    395 var testHookStartProcess func(*os.Process) // nil except for some tests
    396