cmd/go: terminate TestScript commands more aggressively when the test times out

- Avoid starting subprocesses when the test is already very close to timing out. The overhead of starting and stopping processes may cause the test to exceed its deadline even if each individual process is signaled soon after it is started. - If a command does not shut down quickly enough after receiving os.Interrupt, send it os.Kill using the same style of grace period as in CL 228438. - Fail the test if a background command whose exit status is not ignored is left running at the end of the test. We have no reliable way to distinguish a failure due to the termination signal from an unexpected failure, and the termination signal varies across platforms (so may cause failure on one platform but success on another). For #38797 Change-Id: I767898cf551dca45579bf01a9d1bb312e12d6193 Reviewed-on: https://go-review.googlesource.com/c/go/+/233526 Run-TryBot: Bryan C. Mills <bcmills@google.com> Reviewed-by: Jay Conrod <jayconrod@google.com>
2025-05-05 23:53:05 +00:00 · 2020-05-12 23:06:14 -04:00 · 2020-05-12 23:06:14 -04:00 · e3ccf40623
commit e3ccf40623
parent a88c26eb28
4 changed files with 202 additions and 113 deletions
--- a/src/cmd/go/go_test.go
+++ b/src/cmd/go/go_test.go
@ -6,7 +6,6 @@ package main_test
 import (
 	"bytes"
 	"context"
 	"debug/elf"
 	"debug/macho"
 	"debug/pe"
@ -114,12 +113,6 @@ var testGo string
 var testTmpDir string
 var testBin string
 // testCtx is canceled when the test binary is about to time out.
 //
 // If https://golang.org/issue/28135 is accepted, uses of this variable in test
 // functions should be replaced by t.Context().
 var testCtx = context.Background()
 // The TestMain function creates a go command for testing purposes and
 // deletes it after the tests have been run.
 func TestMain(m *testing.M) {
@ -135,19 +128,6 @@ func TestMain(m *testing.M) {
 	flag.Parse()
 	timeoutFlag := flag.Lookup("test.timeout")
 	if timeoutFlag != nil {
 		// TODO(golang.org/issue/28147): The go command does not pass the
 		// test.timeout flag unless either -timeout or -test.timeout is explicitly
 		// set on the command line.
 		if d := timeoutFlag.Value.(flag.Getter).Get().(time.Duration); d != 0 {
 			aBitShorter := d * 95 / 100
 			var cancel context.CancelFunc
 			testCtx, cancel = context.WithTimeout(testCtx, aBitShorter)
 			defer cancel()
 		}
 	}
 	if *proxyAddr != "" {
 		StartProxy()
 		select {}
--- a/src/cmd/go/script_test.go
+++ b/src/cmd/go/script_test.go
@ -10,6 +10,7 @@ package main_test
 import (
 	"bytes"
 	"context"
 	"errors"
 	"fmt"
 	"go/build"
 	"internal/testenv"
@ -77,13 +78,16 @@ type testScript struct {
 	stderr     string            // standard error from last 'go' command; for 'stderr' command
 	stopped    bool              // test wants to stop early
 	start      time.Time         // time phase started
-	background []backgroundCmd   // backgrounded 'exec' and 'go' commands
+	background []*backgroundCmd  // backgrounded 'exec' and 'go' commands
 }
 type backgroundCmd struct {
-	cmd  *exec.Cmd
+	want           simpleStatus
-	wait <-chan struct{}
+	args           []string
-	want simpleStatus
+	cancel         context.CancelFunc
 	done           <-chan struct{}
 	err            error
 	stdout, stderr strings.Builder
 }
 type simpleStatus string
@ -193,10 +197,10 @@ func (ts *testScript) run() {
 		// before we print PASS. If we return early (e.g., due to a test failure),
 		// don't print anything about the processes that were still running.
 		for _, bg := range ts.background {
-			interruptProcess(bg.cmd.Process)
+			bg.cancel()
 		}
 		for _, bg := range ts.background {
-			<-bg.wait
+			<-bg.done
 		}
 		ts.background = nil
@ -347,7 +351,7 @@ Script:
 	}
 	for _, bg := range ts.background {
-		interruptProcess(bg.cmd.Process)
+		bg.cancel()
 	}
 	ts.cmdWait(success, nil)
@ -633,40 +637,35 @@ func (ts *testScript) cmdExec(want simpleStatus, args []string) {
 		ts.fatalf("usage: exec program [args...] [&]")
 	}
-	var err error
+	background := false
 	if len(args) > 0 && args[len(args)-1] == "&" {
-		var cmd *exec.Cmd
+		background = true
-		cmd, err = ts.execBackground(args[0], args[1:len(args)-1]...)
+		args = args[:len(args)-1]
 		if err == nil {
 			wait := make(chan struct{})
 			go func() {
 				ctxWait(testCtx, cmd)
 				close(wait)
 			}()
 			ts.background = append(ts.background, backgroundCmd{cmd, wait, want})
 		}
 		ts.stdout, ts.stderr = "", ""
 	} else {
 		ts.stdout, ts.stderr, err = ts.exec(args[0], args[1:]...)
 		if ts.stdout != "" {
 			fmt.Fprintf(&ts.log, "[stdout]\n%s", ts.stdout)
 		}
 		if ts.stderr != "" {
 			fmt.Fprintf(&ts.log, "[stderr]\n%s", ts.stderr)
 		}
 		if err == nil && want == failure {
 			ts.fatalf("unexpected command success")
 		}
 	}
 	bg, err := ts.startBackground(want, args[0], args[1:]...)
 	if err != nil {
-		fmt.Fprintf(&ts.log, "[%v]\n", err)
+		ts.fatalf("unexpected error starting command: %v", err)
 		if testCtx.Err() != nil {
 			ts.fatalf("test timed out while running command")
 		} else if want == success {
 			ts.fatalf("unexpected command failure")
 		}
 	}
 	if background {
 		ts.stdout, ts.stderr = "", ""
 		ts.background = append(ts.background, bg)
 		return
 	}
 	<-bg.done
 	ts.stdout = bg.stdout.String()
 	ts.stderr = bg.stderr.String()
 	if ts.stdout != "" {
 		fmt.Fprintf(&ts.log, "[stdout]\n%s", ts.stdout)
 	}
 	if ts.stderr != "" {
 		fmt.Fprintf(&ts.log, "[stderr]\n%s", ts.stderr)
 	}
 	if bg.err != nil {
 		fmt.Fprintf(&ts.log, "[%v]\n", bg.err)
 	}
 	ts.checkCmd(bg)
 }
 // exists checks that the list of files exists.
@ -759,7 +758,7 @@ func (ts *testScript) cmdSkip(want simpleStatus, args []string) {
 	// Before we mark the test as skipped, shut down any background processes and
 	// make sure they have returned the correct status.
 	for _, bg := range ts.background {
-		interruptProcess(bg.cmd.Process)
+		bg.cancel()
 	}
 	ts.cmdWait(success, nil)
@ -932,34 +931,24 @@ func (ts *testScript) cmdWait(want simpleStatus, args []string) {
 	var stdouts, stderrs []string
 	for _, bg := range ts.background {
-		<-bg.wait
+		<-bg.done
-		args := append([]string{filepath.Base(bg.cmd.Args[0])}, bg.cmd.Args[1:]...)
+		args := append([]string{filepath.Base(bg.args[0])}, bg.args[1:]...)
-		fmt.Fprintf(&ts.log, "[background] %s: %v\n", strings.Join(args, " "), bg.cmd.ProcessState)
+		fmt.Fprintf(&ts.log, "[background] %s: %v\n", strings.Join(args, " "), bg.err)
-		cmdStdout := bg.cmd.Stdout.(*strings.Builder).String()
+		cmdStdout := bg.stdout.String()
 		if cmdStdout != "" {
 			fmt.Fprintf(&ts.log, "[stdout]\n%s", cmdStdout)
 			stdouts = append(stdouts, cmdStdout)
 		}
-		cmdStderr := bg.cmd.Stderr.(*strings.Builder).String()
+		cmdStderr := bg.stderr.String()
 		if cmdStderr != "" {
 			fmt.Fprintf(&ts.log, "[stderr]\n%s", cmdStderr)
 			stderrs = append(stderrs, cmdStderr)
 		}
-		if bg.cmd.ProcessState.Success() {
+		ts.checkCmd(bg)
 			if bg.want == failure {
 				ts.fatalf("unexpected command success")
 			}
 		} else {
 			if testCtx.Err() != nil {
 				ts.fatalf("test timed out while running command")
 			} else if bg.want == success {
 				ts.fatalf("unexpected command failure")
 			}
 		}
 	}
 	ts.stdout = strings.Join(stdouts, "")
@ -987,58 +976,176 @@ func (ts *testScript) check(err error) {
 	}
 }
 func (ts *testScript) checkCmd(bg *backgroundCmd) {
 	select {
 	case <-bg.done:
 	default:
 		panic("checkCmd called when not done")
 	}
 	if bg.err == nil {
 		if bg.want == failure {
 			ts.fatalf("unexpected command success")
 		}
 		return
 	}
 	if errors.Is(bg.err, context.DeadlineExceeded) {
 		ts.fatalf("test timed out while running command")
 	}
 	if errors.Is(bg.err, context.Canceled) {
 		// The process was still running at the end of the test.
 		// The test must not depend on its exit status.
 		if bg.want != successOrFailure {
 			ts.fatalf("unexpected background command remaining at test end")
 		}
 		return
 	}
 	if bg.want == success {
 		ts.fatalf("unexpected command failure")
 	}
 }
 // exec runs the given command line (an actual subprocess, not simulated)
 // in ts.cd with environment ts.env and then returns collected standard output and standard error.
 func (ts *testScript) exec(command string, args ...string) (stdout, stderr string, err error) {
-	cmd := exec.Command(command, args...)
+	bg, err := ts.startBackground(success, command, args...)
-	cmd.Dir = ts.cd
+	if err != nil {
-	cmd.Env = append(ts.env, "PWD="+ts.cd)
+		return "", "", err
 	var stdoutBuf, stderrBuf strings.Builder
 	cmd.Stdout = &stdoutBuf
 	cmd.Stderr = &stderrBuf
 	if err = cmd.Start(); err == nil {
 		err = ctxWait(testCtx, cmd)
 	}
-	return stdoutBuf.String(), stderrBuf.String(), err
+	<-bg.done
 	return bg.stdout.String(), bg.stderr.String(), bg.err
 }
-// execBackground starts the given command line (an actual subprocess, not simulated)
+// startBackground starts the given command line (an actual subprocess, not simulated)
 // in ts.cd with environment ts.env.
-func (ts *testScript) execBackground(command string, args ...string) (*exec.Cmd, error) {
+func (ts *testScript) startBackground(want simpleStatus, command string, args ...string) (*backgroundCmd, error) {
 	done := make(chan struct{})
 	bg := &backgroundCmd{
 		want:   want,
 		args:   append([]string{command}, args...),
 		done:   done,
 		cancel: func() {},
 	}
 	ctx := context.Background()
 	gracePeriod := 100 * time.Millisecond
 	if deadline, ok := ts.t.Deadline(); ok {
 		timeout := time.Until(deadline)
 		// If time allows, increase the termination grace period to 5% of the
 		// remaining time.
 		if gp := timeout / 20; gp > gracePeriod {
 			gracePeriod = gp
 		}
 		// Send the first termination signal with two grace periods remaining.
 		// If it still hasn't finished after the first period has elapsed,
 		// we'll escalate to os.Kill with a second period remaining until the
 		// test deadline..
 		timeout -= 2 * gracePeriod
 		if timeout <= 0 {
 			// The test has less than the grace period remaining. There is no point in
 			// even starting the command, because it will be terminated immediately.
 			// Save the expense of starting it in the first place.
 			bg.err = context.DeadlineExceeded
 			close(done)
 			return bg, nil
 		}
 		ctx, bg.cancel = context.WithTimeout(ctx, timeout)
 	}
 	cmd := exec.Command(command, args...)
 	cmd.Dir = ts.cd
 	cmd.Env = append(ts.env, "PWD="+ts.cd)
-	var stdoutBuf, stderrBuf strings.Builder
+	cmd.Stdout = &bg.stdout
-	cmd.Stdout = &stdoutBuf
+	cmd.Stderr = &bg.stderr
-	cmd.Stderr = &stderrBuf
+	if err := cmd.Start(); err != nil {
-	return cmd, cmd.Start()
+		bg.cancel()
-}
+		return nil, err
 // ctxWait is like cmd.Wait, but terminates cmd with os.Interrupt if ctx becomes done.
 //
 // This differs from exec.CommandContext in that it prefers os.Interrupt over os.Kill.
 // (See https://golang.org/issue/21135.)
 func ctxWait(ctx context.Context, cmd *exec.Cmd) error {
 	errc := make(chan error, 1)
 	go func() { errc <- cmd.Wait() }()
 	select {
 	case err := <-errc:
 		return err
 	case <-ctx.Done():
 		interruptProcess(cmd.Process)
 		return <-errc
 	}
 	go func() {
 		bg.err = waitOrStop(ctx, cmd, stopSignal(), gracePeriod)
 		close(done)
 	}()
 	return bg, nil
 }
-// interruptProcess sends os.Interrupt to p if supported, or os.Kill otherwise.
+// stopSignal returns the appropriate signal to use to request that a process
-func interruptProcess(p *os.Process) {
+// stop execution.
-	if err := p.Signal(os.Interrupt); err != nil {
+func stopSignal() os.Signal {
 	if runtime.GOOS == "windows" {
 		// Per https://golang.org/pkg/os/#Signal, “Interrupt is not implemented on
 		// Windows; using it with os.Process.Signal will return an error.”
 		// Fall back to Kill instead.
-		p.Kill()
+		return os.Kill
 	}
 	return os.Interrupt
 }
 // waitOrStop waits for the already-started command cmd by calling its Wait method.
 //
 // If cmd does not return before ctx is done, waitOrStop sends it the given interrupt signal.
 // If killDelay is positive, waitOrStop waits that additional period for Wait to return before sending os.Kill.
 //
 // This function is copied from the one added to x/playground/internal in
 // http://golang.org/cl/228438.
 func waitOrStop(ctx context.Context, cmd *exec.Cmd, interrupt os.Signal, killDelay time.Duration) error {
 	if cmd.Process == nil {
 		panic("waitOrStop called with a nil cmd.Process — missing Start call?")
 	}
 	if interrupt == nil {
 		panic("waitOrStop requires a non-nil interrupt signal")
 	}
 	errc := make(chan error)
 	go func() {
 		select {
 		case errc <- nil:
 			return
 		case <-ctx.Done():
 		}
 		err := cmd.Process.Signal(interrupt)
 		if err == nil {
 			err = ctx.Err() // Report ctx.Err() as the reason we interrupted.
 		} else if err.Error() == "os: process already finished" {
 			errc <- nil
 			return
 		}
 		if killDelay > 0 {
 			timer := time.NewTimer(killDelay)
 			select {
 			// Report ctx.Err() as the reason we interrupted the process...
 			case errc <- ctx.Err():
 				timer.Stop()
 				return
 			// ...but after killDelay has elapsed, fall back to a stronger signal.
 			case <-timer.C:
 			}
 			// Wait still hasn't returned.
 			// Kill the process harder to make sure that it exits.
 			//
 			// Ignore any error: if cmd.Process has already terminated, we still
 			// want to send ctx.Err() (or the error from the Interrupt call)
 			// to properly attribute the signal that may have terminated it.
 			_ = cmd.Process.Kill()
 		}
 		errc <- err
 	}()
 	waitErr := cmd.Wait()
 	if interruptErr := <-errc; interruptErr != nil {
 		return interruptErr
 	}
 	return waitErr
 }
 // expand applies environment variable expansion to the string s.
--- a/src/cmd/go/testdata/script/README
+++ b/src/cmd/go/testdata/script/README
@ -138,8 +138,9 @@ The commands are:
  output and standard error of the previous command is cleared, but the output
  of the background process is buffered — and checking of its exit status is
  delayed — until the next call to 'wait', 'skip', or 'stop' or the end of the
-  test. At the end of the test, any remaining background processes are
+  test. If any background processes remain at the end of the test, they
-  terminated using os.Interrupt (if supported) or os.Kill.
+  are terminated using os.Interrupt (if supported) or os.Kill and the test
  must not depend upon their exit status.
 - [!] exists [-readonly] [-exec] file...
  Each of the listed files or directories must (or must not) exist.
--- a/src/cmd/go/testdata/script/script_wait.txt
+++ b/src/cmd/go/testdata/script/script_wait.txt
@ -19,6 +19,7 @@ wait
 stdout 'foo\nbar'
 # The end of the test should interrupt or kill any remaining background
-# programs.
+# programs, but that should not cause the test to fail if it does not
-[!exec:sleep] skip
+# care about the exit status of those programs.
-! exec sleep 86400 &
+[!exec:sleep] stop
 ? exec sleep 86400 &