Home | History | Annotate | Download | only in http
      1 // Copyright 2009 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 // HTTP file system request handler
      6 
      7 package http
      8 
      9 import (
     10 	"errors"
     11 	"fmt"
     12 	"io"
     13 	"mime"
     14 	"mime/multipart"
     15 	"net/textproto"
     16 	"net/url"
     17 	"os"
     18 	"path"
     19 	"path/filepath"
     20 	"strconv"
     21 	"strings"
     22 	"time"
     23 )
     24 
     25 // A Dir implements FileSystem using the native file system restricted to a
     26 // specific directory tree.
     27 //
     28 // While the FileSystem.Open method takes '/'-separated paths, a Dir's string
     29 // value is a filename on the native file system, not a URL, so it is separated
     30 // by filepath.Separator, which isn't necessarily '/'.
     31 //
     32 // An empty Dir is treated as ".".
     33 type Dir string
     34 
     35 func (d Dir) Open(name string) (File, error) {
     36 	if filepath.Separator != '/' && strings.IndexRune(name, filepath.Separator) >= 0 ||
     37 		strings.Contains(name, "\x00") {
     38 		return nil, errors.New("http: invalid character in file path")
     39 	}
     40 	dir := string(d)
     41 	if dir == "" {
     42 		dir = "."
     43 	}
     44 	f, err := os.Open(filepath.Join(dir, filepath.FromSlash(path.Clean("/"+name))))
     45 	if err != nil {
     46 		return nil, err
     47 	}
     48 	return f, nil
     49 }
     50 
     51 // A FileSystem implements access to a collection of named files.
     52 // The elements in a file path are separated by slash ('/', U+002F)
     53 // characters, regardless of host operating system convention.
     54 type FileSystem interface {
     55 	Open(name string) (File, error)
     56 }
     57 
     58 // A File is returned by a FileSystem's Open method and can be
     59 // served by the FileServer implementation.
     60 //
     61 // The methods should behave the same as those on an *os.File.
     62 type File interface {
     63 	io.Closer
     64 	io.Reader
     65 	Readdir(count int) ([]os.FileInfo, error)
     66 	Seek(offset int64, whence int) (int64, error)
     67 	Stat() (os.FileInfo, error)
     68 }
     69 
     70 func dirList(w ResponseWriter, f File) {
     71 	w.Header().Set("Content-Type", "text/html; charset=utf-8")
     72 	fmt.Fprintf(w, "<pre>\n")
     73 	for {
     74 		dirs, err := f.Readdir(100)
     75 		if err != nil || len(dirs) == 0 {
     76 			break
     77 		}
     78 		for _, d := range dirs {
     79 			name := d.Name()
     80 			if d.IsDir() {
     81 				name += "/"
     82 			}
     83 			// name may contain '?' or '#', which must be escaped to remain
     84 			// part of the URL path, and not indicate the start of a query
     85 			// string or fragment.
     86 			url := url.URL{Path: name}
     87 			fmt.Fprintf(w, "<a href=\"%s\">%s</a>\n", url.String(), htmlReplacer.Replace(name))
     88 		}
     89 	}
     90 	fmt.Fprintf(w, "</pre>\n")
     91 }
     92 
     93 // ServeContent replies to the request using the content in the
     94 // provided ReadSeeker.  The main benefit of ServeContent over io.Copy
     95 // is that it handles Range requests properly, sets the MIME type, and
     96 // handles If-Modified-Since requests.
     97 //
     98 // If the response's Content-Type header is not set, ServeContent
     99 // first tries to deduce the type from name's file extension and,
    100 // if that fails, falls back to reading the first block of the content
    101 // and passing it to DetectContentType.
    102 // The name is otherwise unused; in particular it can be empty and is
    103 // never sent in the response.
    104 //
    105 // If modtime is not the zero time or Unix epoch, ServeContent
    106 // includes it in a Last-Modified header in the response.  If the
    107 // request includes an If-Modified-Since header, ServeContent uses
    108 // modtime to decide whether the content needs to be sent at all.
    109 //
    110 // The content's Seek method must work: ServeContent uses
    111 // a seek to the end of the content to determine its size.
    112 //
    113 // If the caller has set w's ETag header, ServeContent uses it to
    114 // handle requests using If-Range and If-None-Match.
    115 //
    116 // Note that *os.File implements the io.ReadSeeker interface.
    117 func ServeContent(w ResponseWriter, req *Request, name string, modtime time.Time, content io.ReadSeeker) {
    118 	sizeFunc := func() (int64, error) {
    119 		size, err := content.Seek(0, os.SEEK_END)
    120 		if err != nil {
    121 			return 0, errSeeker
    122 		}
    123 		_, err = content.Seek(0, os.SEEK_SET)
    124 		if err != nil {
    125 			return 0, errSeeker
    126 		}
    127 		return size, nil
    128 	}
    129 	serveContent(w, req, name, modtime, sizeFunc, content)
    130 }
    131 
    132 // errSeeker is returned by ServeContent's sizeFunc when the content
    133 // doesn't seek properly. The underlying Seeker's error text isn't
    134 // included in the sizeFunc reply so it's not sent over HTTP to end
    135 // users.
    136 var errSeeker = errors.New("seeker can't seek")
    137 
    138 // if name is empty, filename is unknown. (used for mime type, before sniffing)
    139 // if modtime.IsZero(), modtime is unknown.
    140 // content must be seeked to the beginning of the file.
    141 // The sizeFunc is called at most once. Its error, if any, is sent in the HTTP response.
    142 func serveContent(w ResponseWriter, r *Request, name string, modtime time.Time, sizeFunc func() (int64, error), content io.ReadSeeker) {
    143 	if checkLastModified(w, r, modtime) {
    144 		return
    145 	}
    146 	rangeReq, done := checkETag(w, r, modtime)
    147 	if done {
    148 		return
    149 	}
    150 
    151 	code := StatusOK
    152 
    153 	// If Content-Type isn't set, use the file's extension to find it, but
    154 	// if the Content-Type is unset explicitly, do not sniff the type.
    155 	ctypes, haveType := w.Header()["Content-Type"]
    156 	var ctype string
    157 	if !haveType {
    158 		ctype = mime.TypeByExtension(filepath.Ext(name))
    159 		if ctype == "" {
    160 			// read a chunk to decide between utf-8 text and binary
    161 			var buf [sniffLen]byte
    162 			n, _ := io.ReadFull(content, buf[:])
    163 			ctype = DetectContentType(buf[:n])
    164 			_, err := content.Seek(0, os.SEEK_SET) // rewind to output whole file
    165 			if err != nil {
    166 				Error(w, "seeker can't seek", StatusInternalServerError)
    167 				return
    168 			}
    169 		}
    170 		w.Header().Set("Content-Type", ctype)
    171 	} else if len(ctypes) > 0 {
    172 		ctype = ctypes[0]
    173 	}
    174 
    175 	size, err := sizeFunc()
    176 	if err != nil {
    177 		Error(w, err.Error(), StatusInternalServerError)
    178 		return
    179 	}
    180 
    181 	// handle Content-Range header.
    182 	sendSize := size
    183 	var sendContent io.Reader = content
    184 	if size >= 0 {
    185 		ranges, err := parseRange(rangeReq, size)
    186 		if err != nil {
    187 			Error(w, err.Error(), StatusRequestedRangeNotSatisfiable)
    188 			return
    189 		}
    190 		if sumRangesSize(ranges) > size {
    191 			// The total number of bytes in all the ranges
    192 			// is larger than the size of the file by
    193 			// itself, so this is probably an attack, or a
    194 			// dumb client.  Ignore the range request.
    195 			ranges = nil
    196 		}
    197 		switch {
    198 		case len(ranges) == 1:
    199 			// RFC 2616, Section 14.16:
    200 			// "When an HTTP message includes the content of a single
    201 			// range (for example, a response to a request for a
    202 			// single range, or to a request for a set of ranges
    203 			// that overlap without any holes), this content is
    204 			// transmitted with a Content-Range header, and a
    205 			// Content-Length header showing the number of bytes
    206 			// actually transferred.
    207 			// ...
    208 			// A response to a request for a single range MUST NOT
    209 			// be sent using the multipart/byteranges media type."
    210 			ra := ranges[0]
    211 			if _, err := content.Seek(ra.start, os.SEEK_SET); err != nil {
    212 				Error(w, err.Error(), StatusRequestedRangeNotSatisfiable)
    213 				return
    214 			}
    215 			sendSize = ra.length
    216 			code = StatusPartialContent
    217 			w.Header().Set("Content-Range", ra.contentRange(size))
    218 		case len(ranges) > 1:
    219 			sendSize = rangesMIMESize(ranges, ctype, size)
    220 			code = StatusPartialContent
    221 
    222 			pr, pw := io.Pipe()
    223 			mw := multipart.NewWriter(pw)
    224 			w.Header().Set("Content-Type", "multipart/byteranges; boundary="+mw.Boundary())
    225 			sendContent = pr
    226 			defer pr.Close() // cause writing goroutine to fail and exit if CopyN doesn't finish.
    227 			go func() {
    228 				for _, ra := range ranges {
    229 					part, err := mw.CreatePart(ra.mimeHeader(ctype, size))
    230 					if err != nil {
    231 						pw.CloseWithError(err)
    232 						return
    233 					}
    234 					if _, err := content.Seek(ra.start, os.SEEK_SET); err != nil {
    235 						pw.CloseWithError(err)
    236 						return
    237 					}
    238 					if _, err := io.CopyN(part, content, ra.length); err != nil {
    239 						pw.CloseWithError(err)
    240 						return
    241 					}
    242 				}
    243 				mw.Close()
    244 				pw.Close()
    245 			}()
    246 		}
    247 
    248 		w.Header().Set("Accept-Ranges", "bytes")
    249 		if w.Header().Get("Content-Encoding") == "" {
    250 			w.Header().Set("Content-Length", strconv.FormatInt(sendSize, 10))
    251 		}
    252 	}
    253 
    254 	w.WriteHeader(code)
    255 
    256 	if r.Method != "HEAD" {
    257 		io.CopyN(w, sendContent, sendSize)
    258 	}
    259 }
    260 
    261 var unixEpochTime = time.Unix(0, 0)
    262 
    263 // modtime is the modification time of the resource to be served, or IsZero().
    264 // return value is whether this request is now complete.
    265 func checkLastModified(w ResponseWriter, r *Request, modtime time.Time) bool {
    266 	if modtime.IsZero() || modtime.Equal(unixEpochTime) {
    267 		// If the file doesn't have a modtime (IsZero), or the modtime
    268 		// is obviously garbage (Unix time == 0), then ignore modtimes
    269 		// and don't process the If-Modified-Since header.
    270 		return false
    271 	}
    272 
    273 	// The Date-Modified header truncates sub-second precision, so
    274 	// use mtime < t+1s instead of mtime <= t to check for unmodified.
    275 	if t, err := time.Parse(TimeFormat, r.Header.Get("If-Modified-Since")); err == nil && modtime.Before(t.Add(1*time.Second)) {
    276 		h := w.Header()
    277 		delete(h, "Content-Type")
    278 		delete(h, "Content-Length")
    279 		w.WriteHeader(StatusNotModified)
    280 		return true
    281 	}
    282 	w.Header().Set("Last-Modified", modtime.UTC().Format(TimeFormat))
    283 	return false
    284 }
    285 
    286 // checkETag implements If-None-Match and If-Range checks.
    287 //
    288 // The ETag or modtime must have been previously set in the
    289 // ResponseWriter's headers.  The modtime is only compared at second
    290 // granularity and may be the zero value to mean unknown.
    291 //
    292 // The return value is the effective request "Range" header to use and
    293 // whether this request is now considered done.
    294 func checkETag(w ResponseWriter, r *Request, modtime time.Time) (rangeReq string, done bool) {
    295 	etag := w.Header().get("Etag")
    296 	rangeReq = r.Header.get("Range")
    297 
    298 	// Invalidate the range request if the entity doesn't match the one
    299 	// the client was expecting.
    300 	// "If-Range: version" means "ignore the Range: header unless version matches the
    301 	// current file."
    302 	// We only support ETag versions.
    303 	// The caller must have set the ETag on the response already.
    304 	if ir := r.Header.get("If-Range"); ir != "" && ir != etag {
    305 		// The If-Range value is typically the ETag value, but it may also be
    306 		// the modtime date. See golang.org/issue/8367.
    307 		timeMatches := false
    308 		if !modtime.IsZero() {
    309 			if t, err := ParseTime(ir); err == nil && t.Unix() == modtime.Unix() {
    310 				timeMatches = true
    311 			}
    312 		}
    313 		if !timeMatches {
    314 			rangeReq = ""
    315 		}
    316 	}
    317 
    318 	if inm := r.Header.get("If-None-Match"); inm != "" {
    319 		// Must know ETag.
    320 		if etag == "" {
    321 			return rangeReq, false
    322 		}
    323 
    324 		// TODO(bradfitz): non-GET/HEAD requests require more work:
    325 		// sending a different status code on matches, and
    326 		// also can't use weak cache validators (those with a "W/
    327 		// prefix).  But most users of ServeContent will be using
    328 		// it on GET or HEAD, so only support those for now.
    329 		if r.Method != "GET" && r.Method != "HEAD" {
    330 			return rangeReq, false
    331 		}
    332 
    333 		// TODO(bradfitz): deal with comma-separated or multiple-valued
    334 		// list of If-None-match values.  For now just handle the common
    335 		// case of a single item.
    336 		if inm == etag || inm == "*" {
    337 			h := w.Header()
    338 			delete(h, "Content-Type")
    339 			delete(h, "Content-Length")
    340 			w.WriteHeader(StatusNotModified)
    341 			return "", true
    342 		}
    343 	}
    344 	return rangeReq, false
    345 }
    346 
    347 // name is '/'-separated, not filepath.Separator.
    348 func serveFile(w ResponseWriter, r *Request, fs FileSystem, name string, redirect bool) {
    349 	const indexPage = "/index.html"
    350 
    351 	// redirect .../index.html to .../
    352 	// can't use Redirect() because that would make the path absolute,
    353 	// which would be a problem running under StripPrefix
    354 	if strings.HasSuffix(r.URL.Path, indexPage) {
    355 		localRedirect(w, r, "./")
    356 		return
    357 	}
    358 
    359 	f, err := fs.Open(name)
    360 	if err != nil {
    361 		msg, code := toHTTPError(err)
    362 		Error(w, msg, code)
    363 		return
    364 	}
    365 	defer f.Close()
    366 
    367 	d, err1 := f.Stat()
    368 	if err1 != nil {
    369 		msg, code := toHTTPError(err)
    370 		Error(w, msg, code)
    371 		return
    372 	}
    373 
    374 	if redirect {
    375 		// redirect to canonical path: / at end of directory url
    376 		// r.URL.Path always begins with /
    377 		url := r.URL.Path
    378 		if d.IsDir() {
    379 			if url[len(url)-1] != '/' {
    380 				localRedirect(w, r, path.Base(url)+"/")
    381 				return
    382 			}
    383 		} else {
    384 			if url[len(url)-1] == '/' {
    385 				localRedirect(w, r, "../"+path.Base(url))
    386 				return
    387 			}
    388 		}
    389 	}
    390 
    391 	// use contents of index.html for directory, if present
    392 	if d.IsDir() {
    393 		index := strings.TrimSuffix(name, "/") + indexPage
    394 		ff, err := fs.Open(index)
    395 		if err == nil {
    396 			defer ff.Close()
    397 			dd, err := ff.Stat()
    398 			if err == nil {
    399 				name = index
    400 				d = dd
    401 				f = ff
    402 			}
    403 		}
    404 	}
    405 
    406 	// Still a directory? (we didn't find an index.html file)
    407 	if d.IsDir() {
    408 		if checkLastModified(w, r, d.ModTime()) {
    409 			return
    410 		}
    411 		dirList(w, f)
    412 		return
    413 	}
    414 
    415 	// serveContent will check modification time
    416 	sizeFunc := func() (int64, error) { return d.Size(), nil }
    417 	serveContent(w, r, d.Name(), d.ModTime(), sizeFunc, f)
    418 }
    419 
    420 // toHTTPError returns a non-specific HTTP error message and status code
    421 // for a given non-nil error value. It's important that toHTTPError does not
    422 // actually return err.Error(), since msg and httpStatus are returned to users,
    423 // and historically Go's ServeContent always returned just "404 Not Found" for
    424 // all errors. We don't want to start leaking information in error messages.
    425 func toHTTPError(err error) (msg string, httpStatus int) {
    426 	if os.IsNotExist(err) {
    427 		return "404 page not found", StatusNotFound
    428 	}
    429 	if os.IsPermission(err) {
    430 		return "403 Forbidden", StatusForbidden
    431 	}
    432 	// Default:
    433 	return "500 Internal Server Error", StatusInternalServerError
    434 }
    435 
    436 // localRedirect gives a Moved Permanently response.
    437 // It does not convert relative paths to absolute paths like Redirect does.
    438 func localRedirect(w ResponseWriter, r *Request, newPath string) {
    439 	if q := r.URL.RawQuery; q != "" {
    440 		newPath += "?" + q
    441 	}
    442 	w.Header().Set("Location", newPath)
    443 	w.WriteHeader(StatusMovedPermanently)
    444 }
    445 
    446 // ServeFile replies to the request with the contents of the named
    447 // file or directory.
    448 //
    449 // As a special case, ServeFile redirects any request where r.URL.Path
    450 // ends in "/index.html" to the same path, without the final
    451 // "index.html". To avoid such redirects either modify the path or
    452 // use ServeContent.
    453 func ServeFile(w ResponseWriter, r *Request, name string) {
    454 	dir, file := filepath.Split(name)
    455 	serveFile(w, r, Dir(dir), file, false)
    456 }
    457 
    458 type fileHandler struct {
    459 	root FileSystem
    460 }
    461 
    462 // FileServer returns a handler that serves HTTP requests
    463 // with the contents of the file system rooted at root.
    464 //
    465 // To use the operating system's file system implementation,
    466 // use http.Dir:
    467 //
    468 //     http.Handle("/", http.FileServer(http.Dir("/tmp")))
    469 //
    470 // As a special case, the returned file server redirects any request
    471 // ending in "/index.html" to the same path, without the final
    472 // "index.html".
    473 func FileServer(root FileSystem) Handler {
    474 	return &fileHandler{root}
    475 }
    476 
    477 func (f *fileHandler) ServeHTTP(w ResponseWriter, r *Request) {
    478 	upath := r.URL.Path
    479 	if !strings.HasPrefix(upath, "/") {
    480 		upath = "/" + upath
    481 		r.URL.Path = upath
    482 	}
    483 	serveFile(w, r, f.root, path.Clean(upath), true)
    484 }
    485 
    486 // httpRange specifies the byte range to be sent to the client.
    487 type httpRange struct {
    488 	start, length int64
    489 }
    490 
    491 func (r httpRange) contentRange(size int64) string {
    492 	return fmt.Sprintf("bytes %d-%d/%d", r.start, r.start+r.length-1, size)
    493 }
    494 
    495 func (r httpRange) mimeHeader(contentType string, size int64) textproto.MIMEHeader {
    496 	return textproto.MIMEHeader{
    497 		"Content-Range": {r.contentRange(size)},
    498 		"Content-Type":  {contentType},
    499 	}
    500 }
    501 
    502 // parseRange parses a Range header string as per RFC 2616.
    503 func parseRange(s string, size int64) ([]httpRange, error) {
    504 	if s == "" {
    505 		return nil, nil // header not present
    506 	}
    507 	const b = "bytes="
    508 	if !strings.HasPrefix(s, b) {
    509 		return nil, errors.New("invalid range")
    510 	}
    511 	var ranges []httpRange
    512 	for _, ra := range strings.Split(s[len(b):], ",") {
    513 		ra = strings.TrimSpace(ra)
    514 		if ra == "" {
    515 			continue
    516 		}
    517 		i := strings.Index(ra, "-")
    518 		if i < 0 {
    519 			return nil, errors.New("invalid range")
    520 		}
    521 		start, end := strings.TrimSpace(ra[:i]), strings.TrimSpace(ra[i+1:])
    522 		var r httpRange
    523 		if start == "" {
    524 			// If no start is specified, end specifies the
    525 			// range start relative to the end of the file.
    526 			i, err := strconv.ParseInt(end, 10, 64)
    527 			if err != nil {
    528 				return nil, errors.New("invalid range")
    529 			}
    530 			if i > size {
    531 				i = size
    532 			}
    533 			r.start = size - i
    534 			r.length = size - r.start
    535 		} else {
    536 			i, err := strconv.ParseInt(start, 10, 64)
    537 			if err != nil || i >= size || i < 0 {
    538 				return nil, errors.New("invalid range")
    539 			}
    540 			r.start = i
    541 			if end == "" {
    542 				// If no end is specified, range extends to end of the file.
    543 				r.length = size - r.start
    544 			} else {
    545 				i, err := strconv.ParseInt(end, 10, 64)
    546 				if err != nil || r.start > i {
    547 					return nil, errors.New("invalid range")
    548 				}
    549 				if i >= size {
    550 					i = size - 1
    551 				}
    552 				r.length = i - r.start + 1
    553 			}
    554 		}
    555 		ranges = append(ranges, r)
    556 	}
    557 	return ranges, nil
    558 }
    559 
    560 // countingWriter counts how many bytes have been written to it.
    561 type countingWriter int64
    562 
    563 func (w *countingWriter) Write(p []byte) (n int, err error) {
    564 	*w += countingWriter(len(p))
    565 	return len(p), nil
    566 }
    567 
    568 // rangesMIMESize returns the number of bytes it takes to encode the
    569 // provided ranges as a multipart response.
    570 func rangesMIMESize(ranges []httpRange, contentType string, contentSize int64) (encSize int64) {
    571 	var w countingWriter
    572 	mw := multipart.NewWriter(&w)
    573 	for _, ra := range ranges {
    574 		mw.CreatePart(ra.mimeHeader(contentType, contentSize))
    575 		encSize += ra.length
    576 	}
    577 	mw.Close()
    578 	encSize += int64(w)
    579 	return
    580 }
    581 
    582 func sumRangesSize(ranges []httpRange) (size int64) {
    583 	for _, ra := range ranges {
    584 		size += ra.length
    585 	}
    586 	return
    587 }
    588