Home | History | Annotate | Download | only in cgi
      1 // Copyright 2011 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 // This file implements the host side of CGI (being the webserver
      6 // parent process).
      7 
      8 // Package cgi implements CGI (Common Gateway Interface) as specified
      9 // in RFC 3875.
     10 //
     11 // Note that using CGI means starting a new process to handle each
     12 // request, which is typically less efficient than using a
     13 // long-running server.  This package is intended primarily for
     14 // compatibility with existing systems.
     15 package cgi
     16 
     17 import (
     18 	"bufio"
     19 	"fmt"
     20 	"io"
     21 	"log"
     22 	"net"
     23 	"net/http"
     24 	"os"
     25 	"os/exec"
     26 	"path/filepath"
     27 	"regexp"
     28 	"runtime"
     29 	"strconv"
     30 	"strings"
     31 )
     32 
     33 var trailingPort = regexp.MustCompile(`:([0-9]+)$`)
     34 
     35 var osDefaultInheritEnv = map[string][]string{
     36 	"darwin":  {"DYLD_LIBRARY_PATH"},
     37 	"freebsd": {"LD_LIBRARY_PATH"},
     38 	"hpux":    {"LD_LIBRARY_PATH", "SHLIB_PATH"},
     39 	"irix":    {"LD_LIBRARY_PATH", "LD_LIBRARYN32_PATH", "LD_LIBRARY64_PATH"},
     40 	"linux":   {"LD_LIBRARY_PATH"},
     41 	"openbsd": {"LD_LIBRARY_PATH"},
     42 	"solaris": {"LD_LIBRARY_PATH", "LD_LIBRARY_PATH_32", "LD_LIBRARY_PATH_64"},
     43 	"windows": {"SystemRoot", "COMSPEC", "PATHEXT", "WINDIR"},
     44 }
     45 
     46 // Handler runs an executable in a subprocess with a CGI environment.
     47 type Handler struct {
     48 	Path string // path to the CGI executable
     49 	Root string // root URI prefix of handler or empty for "/"
     50 
     51 	// Dir specifies the CGI executable's working directory.
     52 	// If Dir is empty, the base directory of Path is used.
     53 	// If Path has no base directory, the current working
     54 	// directory is used.
     55 	Dir string
     56 
     57 	Env        []string    // extra environment variables to set, if any, as "key=value"
     58 	InheritEnv []string    // environment variables to inherit from host, as "key"
     59 	Logger     *log.Logger // optional log for errors or nil to use log.Print
     60 	Args       []string    // optional arguments to pass to child process
     61 
     62 	// PathLocationHandler specifies the root http Handler that
     63 	// should handle internal redirects when the CGI process
     64 	// returns a Location header value starting with a "/", as
     65 	// specified in RFC 3875  6.3.2. This will likely be
     66 	// http.DefaultServeMux.
     67 	//
     68 	// If nil, a CGI response with a local URI path is instead sent
     69 	// back to the client and not redirected internally.
     70 	PathLocationHandler http.Handler
     71 }
     72 
     73 // removeLeadingDuplicates remove leading duplicate in environments.
     74 // It's possible to override environment like following.
     75 //    cgi.Handler{
     76 //      ...
     77 //      Env: []string{"SCRIPT_FILENAME=foo.php"},
     78 //    }
     79 func removeLeadingDuplicates(env []string) (ret []string) {
     80 	n := len(env)
     81 	for i := 0; i < n; i++ {
     82 		e := env[i]
     83 		s := strings.SplitN(e, "=", 2)[0]
     84 		found := false
     85 		for j := i + 1; j < n; j++ {
     86 			if s == strings.SplitN(env[j], "=", 2)[0] {
     87 				found = true
     88 				break
     89 			}
     90 		}
     91 		if !found {
     92 			ret = append(ret, e)
     93 		}
     94 	}
     95 	return
     96 }
     97 
     98 func (h *Handler) ServeHTTP(rw http.ResponseWriter, req *http.Request) {
     99 	root := h.Root
    100 	if root == "" {
    101 		root = "/"
    102 	}
    103 
    104 	if len(req.TransferEncoding) > 0 && req.TransferEncoding[0] == "chunked" {
    105 		rw.WriteHeader(http.StatusBadRequest)
    106 		rw.Write([]byte("Chunked request bodies are not supported by CGI."))
    107 		return
    108 	}
    109 
    110 	pathInfo := req.URL.Path
    111 	if root != "/" && strings.HasPrefix(pathInfo, root) {
    112 		pathInfo = pathInfo[len(root):]
    113 	}
    114 
    115 	port := "80"
    116 	if matches := trailingPort.FindStringSubmatch(req.Host); len(matches) != 0 {
    117 		port = matches[1]
    118 	}
    119 
    120 	env := []string{
    121 		"SERVER_SOFTWARE=go",
    122 		"SERVER_NAME=" + req.Host,
    123 		"SERVER_PROTOCOL=HTTP/1.1",
    124 		"HTTP_HOST=" + req.Host,
    125 		"GATEWAY_INTERFACE=CGI/1.1",
    126 		"REQUEST_METHOD=" + req.Method,
    127 		"QUERY_STRING=" + req.URL.RawQuery,
    128 		"REQUEST_URI=" + req.URL.RequestURI(),
    129 		"PATH_INFO=" + pathInfo,
    130 		"SCRIPT_NAME=" + root,
    131 		"SCRIPT_FILENAME=" + h.Path,
    132 		"SERVER_PORT=" + port,
    133 	}
    134 
    135 	if remoteIP, remotePort, err := net.SplitHostPort(req.RemoteAddr); err == nil {
    136 		env = append(env, "REMOTE_ADDR="+remoteIP, "REMOTE_HOST="+remoteIP, "REMOTE_PORT="+remotePort)
    137 	} else {
    138 		// could not parse ip:port, let's use whole RemoteAddr and leave REMOTE_PORT undefined
    139 		env = append(env, "REMOTE_ADDR="+req.RemoteAddr, "REMOTE_HOST="+req.RemoteAddr)
    140 	}
    141 
    142 	if req.TLS != nil {
    143 		env = append(env, "HTTPS=on")
    144 	}
    145 
    146 	for k, v := range req.Header {
    147 		k = strings.Map(upperCaseAndUnderscore, k)
    148 		joinStr := ", "
    149 		if k == "COOKIE" {
    150 			joinStr = "; "
    151 		}
    152 		env = append(env, "HTTP_"+k+"="+strings.Join(v, joinStr))
    153 	}
    154 
    155 	if req.ContentLength > 0 {
    156 		env = append(env, fmt.Sprintf("CONTENT_LENGTH=%d", req.ContentLength))
    157 	}
    158 	if ctype := req.Header.Get("Content-Type"); ctype != "" {
    159 		env = append(env, "CONTENT_TYPE="+ctype)
    160 	}
    161 
    162 	if h.Env != nil {
    163 		env = append(env, h.Env...)
    164 	}
    165 
    166 	envPath := os.Getenv("PATH")
    167 	if envPath == "" {
    168 		envPath = "/bin:/usr/bin:/usr/ucb:/usr/bsd:/usr/local/bin"
    169 	}
    170 	env = append(env, "PATH="+envPath)
    171 
    172 	for _, e := range h.InheritEnv {
    173 		if v := os.Getenv(e); v != "" {
    174 			env = append(env, e+"="+v)
    175 		}
    176 	}
    177 
    178 	for _, e := range osDefaultInheritEnv[runtime.GOOS] {
    179 		if v := os.Getenv(e); v != "" {
    180 			env = append(env, e+"="+v)
    181 		}
    182 	}
    183 
    184 	env = removeLeadingDuplicates(env)
    185 
    186 	var cwd, path string
    187 	if h.Dir != "" {
    188 		path = h.Path
    189 		cwd = h.Dir
    190 	} else {
    191 		cwd, path = filepath.Split(h.Path)
    192 	}
    193 	if cwd == "" {
    194 		cwd = "."
    195 	}
    196 
    197 	internalError := func(err error) {
    198 		rw.WriteHeader(http.StatusInternalServerError)
    199 		h.printf("CGI error: %v", err)
    200 	}
    201 
    202 	cmd := &exec.Cmd{
    203 		Path:   path,
    204 		Args:   append([]string{h.Path}, h.Args...),
    205 		Dir:    cwd,
    206 		Env:    env,
    207 		Stderr: os.Stderr, // for now
    208 	}
    209 	if req.ContentLength != 0 {
    210 		cmd.Stdin = req.Body
    211 	}
    212 	stdoutRead, err := cmd.StdoutPipe()
    213 	if err != nil {
    214 		internalError(err)
    215 		return
    216 	}
    217 
    218 	err = cmd.Start()
    219 	if err != nil {
    220 		internalError(err)
    221 		return
    222 	}
    223 	if hook := testHookStartProcess; hook != nil {
    224 		hook(cmd.Process)
    225 	}
    226 	defer cmd.Wait()
    227 	defer stdoutRead.Close()
    228 
    229 	linebody := bufio.NewReaderSize(stdoutRead, 1024)
    230 	headers := make(http.Header)
    231 	statusCode := 0
    232 	headerLines := 0
    233 	sawBlankLine := false
    234 	for {
    235 		line, isPrefix, err := linebody.ReadLine()
    236 		if isPrefix {
    237 			rw.WriteHeader(http.StatusInternalServerError)
    238 			h.printf("cgi: long header line from subprocess.")
    239 			return
    240 		}
    241 		if err == io.EOF {
    242 			break
    243 		}
    244 		if err != nil {
    245 			rw.WriteHeader(http.StatusInternalServerError)
    246 			h.printf("cgi: error reading headers: %v", err)
    247 			return
    248 		}
    249 		if len(line) == 0 {
    250 			sawBlankLine = true
    251 			break
    252 		}
    253 		headerLines++
    254 		parts := strings.SplitN(string(line), ":", 2)
    255 		if len(parts) < 2 {
    256 			h.printf("cgi: bogus header line: %s", string(line))
    257 			continue
    258 		}
    259 		header, val := parts[0], parts[1]
    260 		header = strings.TrimSpace(header)
    261 		val = strings.TrimSpace(val)
    262 		switch {
    263 		case header == "Status":
    264 			if len(val) < 3 {
    265 				h.printf("cgi: bogus status (short): %q", val)
    266 				return
    267 			}
    268 			code, err := strconv.Atoi(val[0:3])
    269 			if err != nil {
    270 				h.printf("cgi: bogus status: %q", val)
    271 				h.printf("cgi: line was %q", line)
    272 				return
    273 			}
    274 			statusCode = code
    275 		default:
    276 			headers.Add(header, val)
    277 		}
    278 	}
    279 	if headerLines == 0 || !sawBlankLine {
    280 		rw.WriteHeader(http.StatusInternalServerError)
    281 		h.printf("cgi: no headers")
    282 		return
    283 	}
    284 
    285 	if loc := headers.Get("Location"); loc != "" {
    286 		if strings.HasPrefix(loc, "/") && h.PathLocationHandler != nil {
    287 			h.handleInternalRedirect(rw, req, loc)
    288 			return
    289 		}
    290 		if statusCode == 0 {
    291 			statusCode = http.StatusFound
    292 		}
    293 	}
    294 
    295 	if statusCode == 0 && headers.Get("Content-Type") == "" {
    296 		rw.WriteHeader(http.StatusInternalServerError)
    297 		h.printf("cgi: missing required Content-Type in headers")
    298 		return
    299 	}
    300 
    301 	if statusCode == 0 {
    302 		statusCode = http.StatusOK
    303 	}
    304 
    305 	// Copy headers to rw's headers, after we've decided not to
    306 	// go into handleInternalRedirect, which won't want its rw
    307 	// headers to have been touched.
    308 	for k, vv := range headers {
    309 		for _, v := range vv {
    310 			rw.Header().Add(k, v)
    311 		}
    312 	}
    313 
    314 	rw.WriteHeader(statusCode)
    315 
    316 	_, err = io.Copy(rw, linebody)
    317 	if err != nil {
    318 		h.printf("cgi: copy error: %v", err)
    319 		// And kill the child CGI process so we don't hang on
    320 		// the deferred cmd.Wait above if the error was just
    321 		// the client (rw) going away. If it was a read error
    322 		// (because the child died itself), then the extra
    323 		// kill of an already-dead process is harmless (the PID
    324 		// won't be reused until the Wait above).
    325 		cmd.Process.Kill()
    326 	}
    327 }
    328 
    329 func (h *Handler) printf(format string, v ...interface{}) {
    330 	if h.Logger != nil {
    331 		h.Logger.Printf(format, v...)
    332 	} else {
    333 		log.Printf(format, v...)
    334 	}
    335 }
    336 
    337 func (h *Handler) handleInternalRedirect(rw http.ResponseWriter, req *http.Request, path string) {
    338 	url, err := req.URL.Parse(path)
    339 	if err != nil {
    340 		rw.WriteHeader(http.StatusInternalServerError)
    341 		h.printf("cgi: error resolving local URI path %q: %v", path, err)
    342 		return
    343 	}
    344 	// TODO: RFC 3875 isn't clear if only GET is supported, but it
    345 	// suggests so: "Note that any message-body attached to the
    346 	// request (such as for a POST request) may not be available
    347 	// to the resource that is the target of the redirect."  We
    348 	// should do some tests against Apache to see how it handles
    349 	// POST, HEAD, etc. Does the internal redirect get the same
    350 	// method or just GET? What about incoming headers?
    351 	// (e.g. Cookies) Which headers, if any, are copied into the
    352 	// second request?
    353 	newReq := &http.Request{
    354 		Method:     "GET",
    355 		URL:        url,
    356 		Proto:      "HTTP/1.1",
    357 		ProtoMajor: 1,
    358 		ProtoMinor: 1,
    359 		Header:     make(http.Header),
    360 		Host:       url.Host,
    361 		RemoteAddr: req.RemoteAddr,
    362 		TLS:        req.TLS,
    363 	}
    364 	h.PathLocationHandler.ServeHTTP(rw, newReq)
    365 }
    366 
    367 func upperCaseAndUnderscore(r rune) rune {
    368 	switch {
    369 	case r >= 'a' && r <= 'z':
    370 		return r - ('a' - 'A')
    371 	case r == '-':
    372 		return '_'
    373 	case r == '=':
    374 		// Maybe not part of the CGI 'spec' but would mess up
    375 		// the environment in any case, as Go represents the
    376 		// environment as a slice of "key=value" strings.
    377 		return '_'
    378 	}
    379 	// TODO: other transformations in spec or practice?
    380 	return r
    381 }
    382 
    383 var testHookStartProcess func(*os.Process) // nil except for some tests
    384