Home | History | Annotate | Download | only in get
      1 // Copyright 2012 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 package get
      6 
      7 import (
      8 	"bytes"
      9 	"encoding/json"
     10 	"errors"
     11 	"fmt"
     12 	"internal/singleflight"
     13 	"log"
     14 	"net/url"
     15 	"os"
     16 	"os/exec"
     17 	"path/filepath"
     18 	"regexp"
     19 	"strings"
     20 	"sync"
     21 
     22 	"cmd/go/internal/base"
     23 	"cmd/go/internal/cfg"
     24 	"cmd/go/internal/web"
     25 )
     26 
     27 // A vcsCmd describes how to use a version control system
     28 // like Mercurial, Git, or Subversion.
     29 type vcsCmd struct {
     30 	name string
     31 	cmd  string // name of binary to invoke command
     32 
     33 	createCmd   []string // commands to download a fresh copy of a repository
     34 	downloadCmd []string // commands to download updates into an existing repository
     35 
     36 	tagCmd         []tagCmd // commands to list tags
     37 	tagLookupCmd   []tagCmd // commands to lookup tags before running tagSyncCmd
     38 	tagSyncCmd     []string // commands to sync to specific tag
     39 	tagSyncDefault []string // commands to sync to default tag
     40 
     41 	scheme  []string
     42 	pingCmd string
     43 
     44 	remoteRepo  func(v *vcsCmd, rootDir string) (remoteRepo string, err error)
     45 	resolveRepo func(v *vcsCmd, rootDir, remoteRepo string) (realRepo string, err error)
     46 }
     47 
     48 var defaultSecureScheme = map[string]bool{
     49 	"https":   true,
     50 	"git+ssh": true,
     51 	"bzr+ssh": true,
     52 	"svn+ssh": true,
     53 	"ssh":     true,
     54 }
     55 
     56 func (v *vcsCmd) isSecure(repo string) bool {
     57 	u, err := url.Parse(repo)
     58 	if err != nil {
     59 		// If repo is not a URL, it's not secure.
     60 		return false
     61 	}
     62 	return v.isSecureScheme(u.Scheme)
     63 }
     64 
     65 func (v *vcsCmd) isSecureScheme(scheme string) bool {
     66 	switch v.cmd {
     67 	case "git":
     68 		// GIT_ALLOW_PROTOCOL is an environment variable defined by Git. It is a
     69 		// colon-separated list of schemes that are allowed to be used with git
     70 		// fetch/clone. Any scheme not mentioned will be considered insecure.
     71 		if allow := os.Getenv("GIT_ALLOW_PROTOCOL"); allow != "" {
     72 			for _, s := range strings.Split(allow, ":") {
     73 				if s == scheme {
     74 					return true
     75 				}
     76 			}
     77 			return false
     78 		}
     79 	}
     80 	return defaultSecureScheme[scheme]
     81 }
     82 
     83 // A tagCmd describes a command to list available tags
     84 // that can be passed to tagSyncCmd.
     85 type tagCmd struct {
     86 	cmd     string // command to list tags
     87 	pattern string // regexp to extract tags from list
     88 }
     89 
     90 // vcsList lists the known version control systems
     91 var vcsList = []*vcsCmd{
     92 	vcsHg,
     93 	vcsGit,
     94 	vcsSvn,
     95 	vcsBzr,
     96 	vcsFossil,
     97 }
     98 
     99 // vcsByCmd returns the version control system for the given
    100 // command name (hg, git, svn, bzr).
    101 func vcsByCmd(cmd string) *vcsCmd {
    102 	for _, vcs := range vcsList {
    103 		if vcs.cmd == cmd {
    104 			return vcs
    105 		}
    106 	}
    107 	return nil
    108 }
    109 
    110 // vcsHg describes how to use Mercurial.
    111 var vcsHg = &vcsCmd{
    112 	name: "Mercurial",
    113 	cmd:  "hg",
    114 
    115 	createCmd:   []string{"clone -U {repo} {dir}"},
    116 	downloadCmd: []string{"pull"},
    117 
    118 	// We allow both tag and branch names as 'tags'
    119 	// for selecting a version. This lets people have
    120 	// a go.release.r60 branch and a go1 branch
    121 	// and make changes in both, without constantly
    122 	// editing .hgtags.
    123 	tagCmd: []tagCmd{
    124 		{"tags", `^(\S+)`},
    125 		{"branches", `^(\S+)`},
    126 	},
    127 	tagSyncCmd:     []string{"update -r {tag}"},
    128 	tagSyncDefault: []string{"update default"},
    129 
    130 	scheme:     []string{"https", "http", "ssh"},
    131 	pingCmd:    "identify {scheme}://{repo}",
    132 	remoteRepo: hgRemoteRepo,
    133 }
    134 
    135 func hgRemoteRepo(vcsHg *vcsCmd, rootDir string) (remoteRepo string, err error) {
    136 	out, err := vcsHg.runOutput(rootDir, "paths default")
    137 	if err != nil {
    138 		return "", err
    139 	}
    140 	return strings.TrimSpace(string(out)), nil
    141 }
    142 
    143 // vcsGit describes how to use Git.
    144 var vcsGit = &vcsCmd{
    145 	name: "Git",
    146 	cmd:  "git",
    147 
    148 	createCmd:   []string{"clone {repo} {dir}", "-go-internal-cd {dir} submodule update --init --recursive"},
    149 	downloadCmd: []string{"pull --ff-only", "submodule update --init --recursive"},
    150 
    151 	tagCmd: []tagCmd{
    152 		// tags/xxx matches a git tag named xxx
    153 		// origin/xxx matches a git branch named xxx on the default remote repository
    154 		{"show-ref", `(?:tags|origin)/(\S+)$`},
    155 	},
    156 	tagLookupCmd: []tagCmd{
    157 		{"show-ref tags/{tag} origin/{tag}", `((?:tags|origin)/\S+)$`},
    158 	},
    159 	tagSyncCmd: []string{"checkout {tag}", "submodule update --init --recursive"},
    160 	// both createCmd and downloadCmd update the working dir.
    161 	// No need to do more here. We used to 'checkout master'
    162 	// but that doesn't work if the default branch is not named master.
    163 	// DO NOT add 'checkout master' here.
    164 	// See golang.org/issue/9032.
    165 	tagSyncDefault: []string{"submodule update --init --recursive"},
    166 
    167 	scheme:     []string{"git", "https", "http", "git+ssh", "ssh"},
    168 	pingCmd:    "ls-remote {scheme}://{repo}",
    169 	remoteRepo: gitRemoteRepo,
    170 }
    171 
    172 // scpSyntaxRe matches the SCP-like addresses used by Git to access
    173 // repositories by SSH.
    174 var scpSyntaxRe = regexp.MustCompile(`^([a-zA-Z0-9_]+)@([a-zA-Z0-9._-]+):(.*)$`)
    175 
    176 func gitRemoteRepo(vcsGit *vcsCmd, rootDir string) (remoteRepo string, err error) {
    177 	cmd := "config remote.origin.url"
    178 	errParse := errors.New("unable to parse output of git " + cmd)
    179 	errRemoteOriginNotFound := errors.New("remote origin not found")
    180 	outb, err := vcsGit.run1(rootDir, cmd, nil, false)
    181 	if err != nil {
    182 		// if it doesn't output any message, it means the config argument is correct,
    183 		// but the config value itself doesn't exist
    184 		if outb != nil && len(outb) == 0 {
    185 			return "", errRemoteOriginNotFound
    186 		}
    187 		return "", err
    188 	}
    189 	out := strings.TrimSpace(string(outb))
    190 
    191 	var repoURL *url.URL
    192 	if m := scpSyntaxRe.FindStringSubmatch(out); m != nil {
    193 		// Match SCP-like syntax and convert it to a URL.
    194 		// Eg, "git (a] github.com:user/repo" becomes
    195 		// "ssh://git (a] github.com/user/repo".
    196 		repoURL = &url.URL{
    197 			Scheme: "ssh",
    198 			User:   url.User(m[1]),
    199 			Host:   m[2],
    200 			Path:   m[3],
    201 		}
    202 	} else {
    203 		repoURL, err = url.Parse(out)
    204 		if err != nil {
    205 			return "", err
    206 		}
    207 	}
    208 
    209 	// Iterate over insecure schemes too, because this function simply
    210 	// reports the state of the repo. If we can't see insecure schemes then
    211 	// we can't report the actual repo URL.
    212 	for _, s := range vcsGit.scheme {
    213 		if repoURL.Scheme == s {
    214 			return repoURL.String(), nil
    215 		}
    216 	}
    217 	return "", errParse
    218 }
    219 
    220 // vcsBzr describes how to use Bazaar.
    221 var vcsBzr = &vcsCmd{
    222 	name: "Bazaar",
    223 	cmd:  "bzr",
    224 
    225 	createCmd: []string{"branch {repo} {dir}"},
    226 
    227 	// Without --overwrite bzr will not pull tags that changed.
    228 	// Replace by --overwrite-tags after http://pad.lv/681792 goes in.
    229 	downloadCmd: []string{"pull --overwrite"},
    230 
    231 	tagCmd:         []tagCmd{{"tags", `^(\S+)`}},
    232 	tagSyncCmd:     []string{"update -r {tag}"},
    233 	tagSyncDefault: []string{"update -r revno:-1"},
    234 
    235 	scheme:      []string{"https", "http", "bzr", "bzr+ssh"},
    236 	pingCmd:     "info {scheme}://{repo}",
    237 	remoteRepo:  bzrRemoteRepo,
    238 	resolveRepo: bzrResolveRepo,
    239 }
    240 
    241 func bzrRemoteRepo(vcsBzr *vcsCmd, rootDir string) (remoteRepo string, err error) {
    242 	outb, err := vcsBzr.runOutput(rootDir, "config parent_location")
    243 	if err != nil {
    244 		return "", err
    245 	}
    246 	return strings.TrimSpace(string(outb)), nil
    247 }
    248 
    249 func bzrResolveRepo(vcsBzr *vcsCmd, rootDir, remoteRepo string) (realRepo string, err error) {
    250 	outb, err := vcsBzr.runOutput(rootDir, "info "+remoteRepo)
    251 	if err != nil {
    252 		return "", err
    253 	}
    254 	out := string(outb)
    255 
    256 	// Expect:
    257 	// ...
    258 	//   (branch root|repository branch): <URL>
    259 	// ...
    260 
    261 	found := false
    262 	for _, prefix := range []string{"\n  branch root: ", "\n  repository branch: "} {
    263 		i := strings.Index(out, prefix)
    264 		if i >= 0 {
    265 			out = out[i+len(prefix):]
    266 			found = true
    267 			break
    268 		}
    269 	}
    270 	if !found {
    271 		return "", fmt.Errorf("unable to parse output of bzr info")
    272 	}
    273 
    274 	i := strings.Index(out, "\n")
    275 	if i < 0 {
    276 		return "", fmt.Errorf("unable to parse output of bzr info")
    277 	}
    278 	out = out[:i]
    279 	return strings.TrimSpace(out), nil
    280 }
    281 
    282 // vcsSvn describes how to use Subversion.
    283 var vcsSvn = &vcsCmd{
    284 	name: "Subversion",
    285 	cmd:  "svn",
    286 
    287 	createCmd:   []string{"checkout {repo} {dir}"},
    288 	downloadCmd: []string{"update"},
    289 
    290 	// There is no tag command in subversion.
    291 	// The branch information is all in the path names.
    292 
    293 	scheme:     []string{"https", "http", "svn", "svn+ssh"},
    294 	pingCmd:    "info {scheme}://{repo}",
    295 	remoteRepo: svnRemoteRepo,
    296 }
    297 
    298 func svnRemoteRepo(vcsSvn *vcsCmd, rootDir string) (remoteRepo string, err error) {
    299 	outb, err := vcsSvn.runOutput(rootDir, "info")
    300 	if err != nil {
    301 		return "", err
    302 	}
    303 	out := string(outb)
    304 
    305 	// Expect:
    306 	//
    307 	//	 ...
    308 	// 	URL: <URL>
    309 	// 	...
    310 	//
    311 	// Note that we're not using the Repository Root line,
    312 	// because svn allows checking out subtrees.
    313 	// The URL will be the URL of the subtree (what we used with 'svn co')
    314 	// while the Repository Root may be a much higher parent.
    315 	i := strings.Index(out, "\nURL: ")
    316 	if i < 0 {
    317 		return "", fmt.Errorf("unable to parse output of svn info")
    318 	}
    319 	out = out[i+len("\nURL: "):]
    320 	i = strings.Index(out, "\n")
    321 	if i < 0 {
    322 		return "", fmt.Errorf("unable to parse output of svn info")
    323 	}
    324 	out = out[:i]
    325 	return strings.TrimSpace(out), nil
    326 }
    327 
    328 // fossilRepoName is the name go get associates with a fossil repository. In the
    329 // real world the file can be named anything.
    330 const fossilRepoName = ".fossil"
    331 
    332 // vcsFossil describes how to use Fossil (fossil-scm.org)
    333 var vcsFossil = &vcsCmd{
    334 	name: "Fossil",
    335 	cmd:  "fossil",
    336 
    337 	createCmd:   []string{"-go-internal-mkdir {dir} clone {repo} " + filepath.Join("{dir}", fossilRepoName), "-go-internal-cd {dir} open .fossil"},
    338 	downloadCmd: []string{"up"},
    339 
    340 	tagCmd:         []tagCmd{{"tag ls", `(.*)`}},
    341 	tagSyncCmd:     []string{"up tag:{tag}"},
    342 	tagSyncDefault: []string{"up trunk"},
    343 
    344 	scheme:     []string{"https", "http"},
    345 	remoteRepo: fossilRemoteRepo,
    346 }
    347 
    348 func fossilRemoteRepo(vcsFossil *vcsCmd, rootDir string) (remoteRepo string, err error) {
    349 	out, err := vcsFossil.runOutput(rootDir, "remote-url")
    350 	if err != nil {
    351 		return "", err
    352 	}
    353 	return strings.TrimSpace(string(out)), nil
    354 }
    355 
    356 func (v *vcsCmd) String() string {
    357 	return v.name
    358 }
    359 
    360 // run runs the command line cmd in the given directory.
    361 // keyval is a list of key, value pairs. run expands
    362 // instances of {key} in cmd into value, but only after
    363 // splitting cmd into individual arguments.
    364 // If an error occurs, run prints the command line and the
    365 // command's combined stdout+stderr to standard error.
    366 // Otherwise run discards the command's output.
    367 func (v *vcsCmd) run(dir string, cmd string, keyval ...string) error {
    368 	_, err := v.run1(dir, cmd, keyval, true)
    369 	return err
    370 }
    371 
    372 // runVerboseOnly is like run but only generates error output to standard error in verbose mode.
    373 func (v *vcsCmd) runVerboseOnly(dir string, cmd string, keyval ...string) error {
    374 	_, err := v.run1(dir, cmd, keyval, false)
    375 	return err
    376 }
    377 
    378 // runOutput is like run but returns the output of the command.
    379 func (v *vcsCmd) runOutput(dir string, cmd string, keyval ...string) ([]byte, error) {
    380 	return v.run1(dir, cmd, keyval, true)
    381 }
    382 
    383 // run1 is the generalized implementation of run and runOutput.
    384 func (v *vcsCmd) run1(dir string, cmdline string, keyval []string, verbose bool) ([]byte, error) {
    385 	m := make(map[string]string)
    386 	for i := 0; i < len(keyval); i += 2 {
    387 		m[keyval[i]] = keyval[i+1]
    388 	}
    389 	args := strings.Fields(cmdline)
    390 	for i, arg := range args {
    391 		args[i] = expand(m, arg)
    392 	}
    393 
    394 	if len(args) >= 2 && args[0] == "-go-internal-mkdir" {
    395 		var err error
    396 		if filepath.IsAbs(args[1]) {
    397 			err = os.Mkdir(args[1], os.ModePerm)
    398 		} else {
    399 			err = os.Mkdir(filepath.Join(dir, args[1]), os.ModePerm)
    400 		}
    401 		if err != nil {
    402 			return nil, err
    403 		}
    404 		args = args[2:]
    405 	}
    406 
    407 	if len(args) >= 2 && args[0] == "-go-internal-cd" {
    408 		if filepath.IsAbs(args[1]) {
    409 			dir = args[1]
    410 		} else {
    411 			dir = filepath.Join(dir, args[1])
    412 		}
    413 		args = args[2:]
    414 	}
    415 
    416 	_, err := exec.LookPath(v.cmd)
    417 	if err != nil {
    418 		fmt.Fprintf(os.Stderr,
    419 			"go: missing %s command. See https://golang.org/s/gogetcmd\n",
    420 			v.name)
    421 		return nil, err
    422 	}
    423 
    424 	cmd := exec.Command(v.cmd, args...)
    425 	cmd.Dir = dir
    426 	cmd.Env = base.EnvForDir(cmd.Dir, os.Environ())
    427 	if cfg.BuildX {
    428 		fmt.Printf("cd %s\n", dir)
    429 		fmt.Printf("%s %s\n", v.cmd, strings.Join(args, " "))
    430 	}
    431 	var buf bytes.Buffer
    432 	cmd.Stdout = &buf
    433 	cmd.Stderr = &buf
    434 	err = cmd.Run()
    435 	out := buf.Bytes()
    436 	if err != nil {
    437 		if verbose || cfg.BuildV {
    438 			fmt.Fprintf(os.Stderr, "# cd %s; %s %s\n", dir, v.cmd, strings.Join(args, " "))
    439 			os.Stderr.Write(out)
    440 		}
    441 		return out, err
    442 	}
    443 	return out, nil
    444 }
    445 
    446 // ping pings to determine scheme to use.
    447 func (v *vcsCmd) ping(scheme, repo string) error {
    448 	return v.runVerboseOnly(".", v.pingCmd, "scheme", scheme, "repo", repo)
    449 }
    450 
    451 // create creates a new copy of repo in dir.
    452 // The parent of dir must exist; dir must not.
    453 func (v *vcsCmd) create(dir, repo string) error {
    454 	for _, cmd := range v.createCmd {
    455 		if err := v.run(".", cmd, "dir", dir, "repo", repo); err != nil {
    456 			return err
    457 		}
    458 	}
    459 	return nil
    460 }
    461 
    462 // download downloads any new changes for the repo in dir.
    463 func (v *vcsCmd) download(dir string) error {
    464 	for _, cmd := range v.downloadCmd {
    465 		if err := v.run(dir, cmd); err != nil {
    466 			return err
    467 		}
    468 	}
    469 	return nil
    470 }
    471 
    472 // tags returns the list of available tags for the repo in dir.
    473 func (v *vcsCmd) tags(dir string) ([]string, error) {
    474 	var tags []string
    475 	for _, tc := range v.tagCmd {
    476 		out, err := v.runOutput(dir, tc.cmd)
    477 		if err != nil {
    478 			return nil, err
    479 		}
    480 		re := regexp.MustCompile(`(?m-s)` + tc.pattern)
    481 		for _, m := range re.FindAllStringSubmatch(string(out), -1) {
    482 			tags = append(tags, m[1])
    483 		}
    484 	}
    485 	return tags, nil
    486 }
    487 
    488 // tagSync syncs the repo in dir to the named tag,
    489 // which either is a tag returned by tags or is v.tagDefault.
    490 func (v *vcsCmd) tagSync(dir, tag string) error {
    491 	if v.tagSyncCmd == nil {
    492 		return nil
    493 	}
    494 	if tag != "" {
    495 		for _, tc := range v.tagLookupCmd {
    496 			out, err := v.runOutput(dir, tc.cmd, "tag", tag)
    497 			if err != nil {
    498 				return err
    499 			}
    500 			re := regexp.MustCompile(`(?m-s)` + tc.pattern)
    501 			m := re.FindStringSubmatch(string(out))
    502 			if len(m) > 1 {
    503 				tag = m[1]
    504 				break
    505 			}
    506 		}
    507 	}
    508 
    509 	if tag == "" && v.tagSyncDefault != nil {
    510 		for _, cmd := range v.tagSyncDefault {
    511 			if err := v.run(dir, cmd); err != nil {
    512 				return err
    513 			}
    514 		}
    515 		return nil
    516 	}
    517 
    518 	for _, cmd := range v.tagSyncCmd {
    519 		if err := v.run(dir, cmd, "tag", tag); err != nil {
    520 			return err
    521 		}
    522 	}
    523 	return nil
    524 }
    525 
    526 // A vcsPath describes how to convert an import path into a
    527 // version control system and repository name.
    528 type vcsPath struct {
    529 	prefix string                              // prefix this description applies to
    530 	re     string                              // pattern for import path
    531 	repo   string                              // repository to use (expand with match of re)
    532 	vcs    string                              // version control system to use (expand with match of re)
    533 	check  func(match map[string]string) error // additional checks
    534 	ping   bool                                // ping for scheme to use to download repo
    535 
    536 	regexp *regexp.Regexp // cached compiled form of re
    537 }
    538 
    539 // vcsFromDir inspects dir and its parents to determine the
    540 // version control system and code repository to use.
    541 // On return, root is the import path
    542 // corresponding to the root of the repository.
    543 func vcsFromDir(dir, srcRoot string) (vcs *vcsCmd, root string, err error) {
    544 	// Clean and double-check that dir is in (a subdirectory of) srcRoot.
    545 	dir = filepath.Clean(dir)
    546 	srcRoot = filepath.Clean(srcRoot)
    547 	if len(dir) <= len(srcRoot) || dir[len(srcRoot)] != filepath.Separator {
    548 		return nil, "", fmt.Errorf("directory %q is outside source root %q", dir, srcRoot)
    549 	}
    550 
    551 	var vcsRet *vcsCmd
    552 	var rootRet string
    553 
    554 	origDir := dir
    555 	for len(dir) > len(srcRoot) {
    556 		for _, vcs := range vcsList {
    557 			if _, err := os.Stat(filepath.Join(dir, "."+vcs.cmd)); err == nil {
    558 				root := filepath.ToSlash(dir[len(srcRoot)+1:])
    559 				// Record first VCS we find, but keep looking,
    560 				// to detect mistakes like one kind of VCS inside another.
    561 				if vcsRet == nil {
    562 					vcsRet = vcs
    563 					rootRet = root
    564 					continue
    565 				}
    566 				// Allow .git inside .git, which can arise due to submodules.
    567 				if vcsRet == vcs && vcs.cmd == "git" {
    568 					continue
    569 				}
    570 				// Otherwise, we have one VCS inside a different VCS.
    571 				return nil, "", fmt.Errorf("directory %q uses %s, but parent %q uses %s",
    572 					filepath.Join(srcRoot, rootRet), vcsRet.cmd, filepath.Join(srcRoot, root), vcs.cmd)
    573 			}
    574 		}
    575 
    576 		// Move to parent.
    577 		ndir := filepath.Dir(dir)
    578 		if len(ndir) >= len(dir) {
    579 			// Shouldn't happen, but just in case, stop.
    580 			break
    581 		}
    582 		dir = ndir
    583 	}
    584 
    585 	if vcsRet != nil {
    586 		return vcsRet, rootRet, nil
    587 	}
    588 
    589 	return nil, "", fmt.Errorf("directory %q is not using a known version control system", origDir)
    590 }
    591 
    592 // checkNestedVCS checks for an incorrectly-nested VCS-inside-VCS
    593 // situation for dir, checking parents up until srcRoot.
    594 func checkNestedVCS(vcs *vcsCmd, dir, srcRoot string) error {
    595 	if len(dir) <= len(srcRoot) || dir[len(srcRoot)] != filepath.Separator {
    596 		return fmt.Errorf("directory %q is outside source root %q", dir, srcRoot)
    597 	}
    598 
    599 	otherDir := dir
    600 	for len(otherDir) > len(srcRoot) {
    601 		for _, otherVCS := range vcsList {
    602 			if _, err := os.Stat(filepath.Join(otherDir, "."+otherVCS.cmd)); err == nil {
    603 				// Allow expected vcs in original dir.
    604 				if otherDir == dir && otherVCS == vcs {
    605 					continue
    606 				}
    607 				// Allow .git inside .git, which can arise due to submodules.
    608 				if otherVCS == vcs && vcs.cmd == "git" {
    609 					continue
    610 				}
    611 				// Otherwise, we have one VCS inside a different VCS.
    612 				return fmt.Errorf("directory %q uses %s, but parent %q uses %s", dir, vcs.cmd, otherDir, otherVCS.cmd)
    613 			}
    614 		}
    615 		// Move to parent.
    616 		newDir := filepath.Dir(otherDir)
    617 		if len(newDir) >= len(otherDir) {
    618 			// Shouldn't happen, but just in case, stop.
    619 			break
    620 		}
    621 		otherDir = newDir
    622 	}
    623 
    624 	return nil
    625 }
    626 
    627 // repoRoot represents a version control system, a repo, and a root of
    628 // where to put it on disk.
    629 type repoRoot struct {
    630 	vcs *vcsCmd
    631 
    632 	// repo is the repository URL, including scheme
    633 	repo string
    634 
    635 	// root is the import path corresponding to the root of the
    636 	// repository
    637 	root string
    638 
    639 	// isCustom is true for custom import paths (those defined by HTML meta tags)
    640 	isCustom bool
    641 }
    642 
    643 var httpPrefixRE = regexp.MustCompile(`^https?:`)
    644 
    645 // repoRootForImportPath analyzes importPath to determine the
    646 // version control system, and code repository to use.
    647 func repoRootForImportPath(importPath string, security web.SecurityMode) (*repoRoot, error) {
    648 	rr, err := repoRootFromVCSPaths(importPath, "", security, vcsPaths)
    649 	if err == errUnknownSite {
    650 		// If there are wildcards, look up the thing before the wildcard,
    651 		// hoping it applies to the wildcarded parts too.
    652 		// This makes 'go get rsc.io/pdf/...' work in a fresh GOPATH.
    653 		lookup := strings.TrimSuffix(importPath, "/...")
    654 		if i := strings.Index(lookup, "/.../"); i >= 0 {
    655 			lookup = lookup[:i]
    656 		}
    657 		rr, err = repoRootForImportDynamic(lookup, security)
    658 		if err != nil {
    659 			err = fmt.Errorf("unrecognized import path %q (%v)", importPath, err)
    660 		}
    661 	}
    662 	if err != nil {
    663 		rr1, err1 := repoRootFromVCSPaths(importPath, "", security, vcsPathsAfterDynamic)
    664 		if err1 == nil {
    665 			rr = rr1
    666 			err = nil
    667 		}
    668 	}
    669 
    670 	if err == nil && strings.Contains(importPath, "...") && strings.Contains(rr.root, "...") {
    671 		// Do not allow wildcards in the repo root.
    672 		rr = nil
    673 		err = fmt.Errorf("cannot expand ... in %q", importPath)
    674 	}
    675 	return rr, err
    676 }
    677 
    678 var errUnknownSite = errors.New("dynamic lookup required to find mapping")
    679 
    680 // repoRootFromVCSPaths attempts to map importPath to a repoRoot
    681 // using the mappings defined in vcsPaths.
    682 // If scheme is non-empty, that scheme is forced.
    683 func repoRootFromVCSPaths(importPath, scheme string, security web.SecurityMode, vcsPaths []*vcsPath) (*repoRoot, error) {
    684 	// A common error is to use https://packagepath because that's what
    685 	// hg and git require. Diagnose this helpfully.
    686 	if loc := httpPrefixRE.FindStringIndex(importPath); loc != nil {
    687 		// The importPath has been cleaned, so has only one slash. The pattern
    688 		// ignores the slashes; the error message puts them back on the RHS at least.
    689 		return nil, fmt.Errorf("%q not allowed in import path", importPath[loc[0]:loc[1]]+"//")
    690 	}
    691 	for _, srv := range vcsPaths {
    692 		if !strings.HasPrefix(importPath, srv.prefix) {
    693 			continue
    694 		}
    695 		m := srv.regexp.FindStringSubmatch(importPath)
    696 		if m == nil {
    697 			if srv.prefix != "" {
    698 				return nil, fmt.Errorf("invalid %s import path %q", srv.prefix, importPath)
    699 			}
    700 			continue
    701 		}
    702 
    703 		// Build map of named subexpression matches for expand.
    704 		match := map[string]string{
    705 			"prefix": srv.prefix,
    706 			"import": importPath,
    707 		}
    708 		for i, name := range srv.regexp.SubexpNames() {
    709 			if name != "" && match[name] == "" {
    710 				match[name] = m[i]
    711 			}
    712 		}
    713 		if srv.vcs != "" {
    714 			match["vcs"] = expand(match, srv.vcs)
    715 		}
    716 		if srv.repo != "" {
    717 			match["repo"] = expand(match, srv.repo)
    718 		}
    719 		if srv.check != nil {
    720 			if err := srv.check(match); err != nil {
    721 				return nil, err
    722 			}
    723 		}
    724 		vcs := vcsByCmd(match["vcs"])
    725 		if vcs == nil {
    726 			return nil, fmt.Errorf("unknown version control system %q", match["vcs"])
    727 		}
    728 		if srv.ping {
    729 			if scheme != "" {
    730 				match["repo"] = scheme + "://" + match["repo"]
    731 			} else {
    732 				for _, scheme := range vcs.scheme {
    733 					if security == web.Secure && !vcs.isSecureScheme(scheme) {
    734 						continue
    735 					}
    736 					if vcs.ping(scheme, match["repo"]) == nil {
    737 						match["repo"] = scheme + "://" + match["repo"]
    738 						break
    739 					}
    740 				}
    741 			}
    742 		}
    743 		rr := &repoRoot{
    744 			vcs:  vcs,
    745 			repo: match["repo"],
    746 			root: match["root"],
    747 		}
    748 		return rr, nil
    749 	}
    750 	return nil, errUnknownSite
    751 }
    752 
    753 // repoRootForImportDynamic finds a *repoRoot for a custom domain that's not
    754 // statically known by repoRootForImportPathStatic.
    755 //
    756 // This handles custom import paths like "name.tld/pkg/foo" or just "name.tld".
    757 func repoRootForImportDynamic(importPath string, security web.SecurityMode) (*repoRoot, error) {
    758 	slash := strings.Index(importPath, "/")
    759 	if slash < 0 {
    760 		slash = len(importPath)
    761 	}
    762 	host := importPath[:slash]
    763 	if !strings.Contains(host, ".") {
    764 		return nil, errors.New("import path does not begin with hostname")
    765 	}
    766 	urlStr, body, err := web.GetMaybeInsecure(importPath, security)
    767 	if err != nil {
    768 		msg := "https fetch: %v"
    769 		if security == web.Insecure {
    770 			msg = "http/" + msg
    771 		}
    772 		return nil, fmt.Errorf(msg, err)
    773 	}
    774 	defer body.Close()
    775 	imports, err := parseMetaGoImports(body)
    776 	if err != nil {
    777 		return nil, fmt.Errorf("parsing %s: %v", importPath, err)
    778 	}
    779 	// Find the matched meta import.
    780 	mmi, err := matchGoImport(imports, importPath)
    781 	if err != nil {
    782 		if _, ok := err.(ImportMismatchError); !ok {
    783 			return nil, fmt.Errorf("parse %s: %v", urlStr, err)
    784 		}
    785 		return nil, fmt.Errorf("parse %s: no go-import meta tags (%s)", urlStr, err)
    786 	}
    787 	if cfg.BuildV {
    788 		log.Printf("get %q: found meta tag %#v at %s", importPath, mmi, urlStr)
    789 	}
    790 	// If the import was "uni.edu/bob/project", which said the
    791 	// prefix was "uni.edu" and the RepoRoot was "evilroot.com",
    792 	// make sure we don't trust Bob and check out evilroot.com to
    793 	// "uni.edu" yet (possibly overwriting/preempting another
    794 	// non-evil student). Instead, first verify the root and see
    795 	// if it matches Bob's claim.
    796 	if mmi.Prefix != importPath {
    797 		if cfg.BuildV {
    798 			log.Printf("get %q: verifying non-authoritative meta tag", importPath)
    799 		}
    800 		urlStr0 := urlStr
    801 		var imports []metaImport
    802 		urlStr, imports, err = metaImportsForPrefix(mmi.Prefix, security)
    803 		if err != nil {
    804 			return nil, err
    805 		}
    806 		metaImport2, err := matchGoImport(imports, importPath)
    807 		if err != nil || mmi != metaImport2 {
    808 			return nil, fmt.Errorf("%s and %s disagree about go-import for %s", urlStr0, urlStr, mmi.Prefix)
    809 		}
    810 	}
    811 
    812 	if !strings.Contains(mmi.RepoRoot, "://") {
    813 		return nil, fmt.Errorf("%s: invalid repo root %q; no scheme", urlStr, mmi.RepoRoot)
    814 	}
    815 	rr := &repoRoot{
    816 		vcs:      vcsByCmd(mmi.VCS),
    817 		repo:     mmi.RepoRoot,
    818 		root:     mmi.Prefix,
    819 		isCustom: true,
    820 	}
    821 	if rr.vcs == nil {
    822 		return nil, fmt.Errorf("%s: unknown vcs %q", urlStr, mmi.VCS)
    823 	}
    824 	return rr, nil
    825 }
    826 
    827 var fetchGroup singleflight.Group
    828 var (
    829 	fetchCacheMu sync.Mutex
    830 	fetchCache   = map[string]fetchResult{} // key is metaImportsForPrefix's importPrefix
    831 )
    832 
    833 // metaImportsForPrefix takes a package's root import path as declared in a <meta> tag
    834 // and returns its HTML discovery URL and the parsed metaImport lines
    835 // found on the page.
    836 //
    837 // The importPath is of the form "golang.org/x/tools".
    838 // It is an error if no imports are found.
    839 // urlStr will still be valid if err != nil.
    840 // The returned urlStr will be of the form "https://golang.org/x/tools?go-get=1"
    841 func metaImportsForPrefix(importPrefix string, security web.SecurityMode) (urlStr string, imports []metaImport, err error) {
    842 	setCache := func(res fetchResult) (fetchResult, error) {
    843 		fetchCacheMu.Lock()
    844 		defer fetchCacheMu.Unlock()
    845 		fetchCache[importPrefix] = res
    846 		return res, nil
    847 	}
    848 
    849 	resi, _, _ := fetchGroup.Do(importPrefix, func() (resi interface{}, err error) {
    850 		fetchCacheMu.Lock()
    851 		if res, ok := fetchCache[importPrefix]; ok {
    852 			fetchCacheMu.Unlock()
    853 			return res, nil
    854 		}
    855 		fetchCacheMu.Unlock()
    856 
    857 		urlStr, body, err := web.GetMaybeInsecure(importPrefix, security)
    858 		if err != nil {
    859 			return setCache(fetchResult{urlStr: urlStr, err: fmt.Errorf("fetch %s: %v", urlStr, err)})
    860 		}
    861 		imports, err := parseMetaGoImports(body)
    862 		if err != nil {
    863 			return setCache(fetchResult{urlStr: urlStr, err: fmt.Errorf("parsing %s: %v", urlStr, err)})
    864 		}
    865 		if len(imports) == 0 {
    866 			err = fmt.Errorf("fetch %s: no go-import meta tag", urlStr)
    867 		}
    868 		return setCache(fetchResult{urlStr: urlStr, imports: imports, err: err})
    869 	})
    870 	res := resi.(fetchResult)
    871 	return res.urlStr, res.imports, res.err
    872 }
    873 
    874 type fetchResult struct {
    875 	urlStr  string // e.g. "https://foo.com/x/bar?go-get=1"
    876 	imports []metaImport
    877 	err     error
    878 }
    879 
    880 // metaImport represents the parsed <meta name="go-import"
    881 // content="prefix vcs reporoot" /> tags from HTML files.
    882 type metaImport struct {
    883 	Prefix, VCS, RepoRoot string
    884 }
    885 
    886 func splitPathHasPrefix(path, prefix []string) bool {
    887 	if len(path) < len(prefix) {
    888 		return false
    889 	}
    890 	for i, p := range prefix {
    891 		if path[i] != p {
    892 			return false
    893 		}
    894 	}
    895 	return true
    896 }
    897 
    898 // A ImportMismatchError is returned where metaImport/s are present
    899 // but none match our import path.
    900 type ImportMismatchError struct {
    901 	importPath string
    902 	mismatches []string // the meta imports that were discarded for not matching our importPath
    903 }
    904 
    905 func (m ImportMismatchError) Error() string {
    906 	formattedStrings := make([]string, len(m.mismatches))
    907 	for i, pre := range m.mismatches {
    908 		formattedStrings[i] = fmt.Sprintf("meta tag %s did not match import path %s", pre, m.importPath)
    909 	}
    910 	return strings.Join(formattedStrings, ", ")
    911 }
    912 
    913 // matchGoImport returns the metaImport from imports matching importPath.
    914 // An error is returned if there are multiple matches.
    915 // errNoMatch is returned if none match.
    916 func matchGoImport(imports []metaImport, importPath string) (metaImport, error) {
    917 	match := -1
    918 	imp := strings.Split(importPath, "/")
    919 
    920 	errImportMismatch := ImportMismatchError{importPath: importPath}
    921 	for i, im := range imports {
    922 		pre := strings.Split(im.Prefix, "/")
    923 
    924 		if !splitPathHasPrefix(imp, pre) {
    925 			errImportMismatch.mismatches = append(errImportMismatch.mismatches, im.Prefix)
    926 			continue
    927 		}
    928 
    929 		if match != -1 {
    930 			return metaImport{}, fmt.Errorf("multiple meta tags match import path %q", importPath)
    931 		}
    932 		match = i
    933 	}
    934 
    935 	if match == -1 {
    936 		return metaImport{}, errImportMismatch
    937 	}
    938 	return imports[match], nil
    939 }
    940 
    941 // expand rewrites s to replace {k} with match[k] for each key k in match.
    942 func expand(match map[string]string, s string) string {
    943 	for k, v := range match {
    944 		s = strings.Replace(s, "{"+k+"}", v, -1)
    945 	}
    946 	return s
    947 }
    948 
    949 // vcsPaths defines the meaning of import paths referring to
    950 // commonly-used VCS hosting sites (github.com/user/dir)
    951 // and import paths referring to a fully-qualified importPath
    952 // containing a VCS type (foo.com/repo.git/dir)
    953 var vcsPaths = []*vcsPath{
    954 	// Github
    955 	{
    956 		prefix: "github.com/",
    957 		re:     `^(?P<root>github\.com/[A-Za-z0-9_.\-]+/[A-Za-z0-9_.\-]+)(/[\p{L}0-9_.\-]+)*$`,
    958 		vcs:    "git",
    959 		repo:   "https://{root}",
    960 		check:  noVCSSuffix,
    961 	},
    962 
    963 	// Bitbucket
    964 	{
    965 		prefix: "bitbucket.org/",
    966 		re:     `^(?P<root>bitbucket\.org/(?P<bitname>[A-Za-z0-9_.\-]+/[A-Za-z0-9_.\-]+))(/[A-Za-z0-9_.\-]+)*$`,
    967 		repo:   "https://{root}",
    968 		check:  bitbucketVCS,
    969 	},
    970 
    971 	// IBM DevOps Services (JazzHub)
    972 	{
    973 		prefix: "hub.jazz.net/git/",
    974 		re:     `^(?P<root>hub.jazz.net/git/[a-z0-9]+/[A-Za-z0-9_.\-]+)(/[A-Za-z0-9_.\-]+)*$`,
    975 		vcs:    "git",
    976 		repo:   "https://{root}",
    977 		check:  noVCSSuffix,
    978 	},
    979 
    980 	// Git at Apache
    981 	{
    982 		prefix: "git.apache.org/",
    983 		re:     `^(?P<root>git.apache.org/[a-z0-9_.\-]+\.git)(/[A-Za-z0-9_.\-]+)*$`,
    984 		vcs:    "git",
    985 		repo:   "https://{root}",
    986 	},
    987 
    988 	// Git at OpenStack
    989 	{
    990 		prefix: "git.openstack.org/",
    991 		re:     `^(?P<root>git\.openstack\.org/[A-Za-z0-9_.\-]+/[A-Za-z0-9_.\-]+)(\.git)?(/[A-Za-z0-9_.\-]+)*$`,
    992 		vcs:    "git",
    993 		repo:   "https://{root}",
    994 	},
    995 
    996 	// chiselapp.com for fossil
    997 	{
    998 		prefix: "chiselapp.com/",
    999 		re:     `^(?P<root>chiselapp\.com/user/[A-Za-z0-9]+/repository/[A-Za-z0-9_.\-]+)$`,
   1000 		vcs:    "fossil",
   1001 		repo:   "https://{root}",
   1002 	},
   1003 
   1004 	// General syntax for any server.
   1005 	// Must be last.
   1006 	{
   1007 		re:   `^(?P<root>(?P<repo>([a-z0-9.\-]+\.)+[a-z0-9.\-]+(:[0-9]+)?(/~?[A-Za-z0-9_.\-]+)+?)\.(?P<vcs>bzr|fossil|git|hg|svn))(/~?[A-Za-z0-9_.\-]+)*$`,
   1008 		ping: true,
   1009 	},
   1010 }
   1011 
   1012 // vcsPathsAfterDynamic gives additional vcsPaths entries
   1013 // to try after the dynamic HTML check.
   1014 // This gives those sites a chance to introduce <meta> tags
   1015 // as part of a graceful transition away from the hard-coded logic.
   1016 var vcsPathsAfterDynamic = []*vcsPath{
   1017 	// Launchpad. See golang.org/issue/11436.
   1018 	{
   1019 		prefix: "launchpad.net/",
   1020 		re:     `^(?P<root>launchpad\.net/((?P<project>[A-Za-z0-9_.\-]+)(?P<series>/[A-Za-z0-9_.\-]+)?|~[A-Za-z0-9_.\-]+/(\+junk|[A-Za-z0-9_.\-]+)/[A-Za-z0-9_.\-]+))(/[A-Za-z0-9_.\-]+)*$`,
   1021 		vcs:    "bzr",
   1022 		repo:   "https://{root}",
   1023 		check:  launchpadVCS,
   1024 	},
   1025 }
   1026 
   1027 func init() {
   1028 	// fill in cached regexps.
   1029 	// Doing this eagerly discovers invalid regexp syntax
   1030 	// without having to run a command that needs that regexp.
   1031 	for _, srv := range vcsPaths {
   1032 		srv.regexp = regexp.MustCompile(srv.re)
   1033 	}
   1034 	for _, srv := range vcsPathsAfterDynamic {
   1035 		srv.regexp = regexp.MustCompile(srv.re)
   1036 	}
   1037 }
   1038 
   1039 // noVCSSuffix checks that the repository name does not
   1040 // end in .foo for any version control system foo.
   1041 // The usual culprit is ".git".
   1042 func noVCSSuffix(match map[string]string) error {
   1043 	repo := match["repo"]
   1044 	for _, vcs := range vcsList {
   1045 		if strings.HasSuffix(repo, "."+vcs.cmd) {
   1046 			return fmt.Errorf("invalid version control suffix in %s path", match["prefix"])
   1047 		}
   1048 	}
   1049 	return nil
   1050 }
   1051 
   1052 // bitbucketVCS determines the version control system for a
   1053 // Bitbucket repository, by using the Bitbucket API.
   1054 func bitbucketVCS(match map[string]string) error {
   1055 	if err := noVCSSuffix(match); err != nil {
   1056 		return err
   1057 	}
   1058 
   1059 	var resp struct {
   1060 		SCM string `json:"scm"`
   1061 	}
   1062 	url := expand(match, "https://api.bitbucket.org/2.0/repositories/{bitname}?fields=scm")
   1063 	data, err := web.Get(url)
   1064 	if err != nil {
   1065 		if httpErr, ok := err.(*web.HTTPError); ok && httpErr.StatusCode == 403 {
   1066 			// this may be a private repository. If so, attempt to determine which
   1067 			// VCS it uses. See issue 5375.
   1068 			root := match["root"]
   1069 			for _, vcs := range []string{"git", "hg"} {
   1070 				if vcsByCmd(vcs).ping("https", root) == nil {
   1071 					resp.SCM = vcs
   1072 					break
   1073 				}
   1074 			}
   1075 		}
   1076 
   1077 		if resp.SCM == "" {
   1078 			return err
   1079 		}
   1080 	} else {
   1081 		if err := json.Unmarshal(data, &resp); err != nil {
   1082 			return fmt.Errorf("decoding %s: %v", url, err)
   1083 		}
   1084 	}
   1085 
   1086 	if vcsByCmd(resp.SCM) != nil {
   1087 		match["vcs"] = resp.SCM
   1088 		if resp.SCM == "git" {
   1089 			match["repo"] += ".git"
   1090 		}
   1091 		return nil
   1092 	}
   1093 
   1094 	return fmt.Errorf("unable to detect version control system for bitbucket.org/ path")
   1095 }
   1096 
   1097 // launchpadVCS solves the ambiguity for "lp.net/project/foo". In this case,
   1098 // "foo" could be a series name registered in Launchpad with its own branch,
   1099 // and it could also be the name of a directory within the main project
   1100 // branch one level up.
   1101 func launchpadVCS(match map[string]string) error {
   1102 	if match["project"] == "" || match["series"] == "" {
   1103 		return nil
   1104 	}
   1105 	_, err := web.Get(expand(match, "https://code.launchpad.net/{project}{series}/.bzr/branch-format"))
   1106 	if err != nil {
   1107 		match["root"] = expand(match, "launchpad.net/{project}")
   1108 		match["repo"] = expand(match, "https://{root}")
   1109 	}
   1110 	return nil
   1111 }
   1112