1
0
mirror of https://github.com/golang/go synced 2024-09-30 22:08:32 -06:00

net/http: scale rstAvoidanceDelay to reduce test flakiness

As far as I can tell, some flakiness is unavoidable in tests
that race a large client request write against a server's response
when the server doesn't read the full request.
It does not appear to be possible to simultaneously ensure that
well-behaved clients see EOF instead of ECONNRESET and also prevent
misbehaving clients from consuming arbitrary server resources.
(See RFC 7230 §6.6 for more detail.)

Since there doesn't appear to be a way to cleanly eliminate
this source of flakiness, we can instead work around it:
we can allow the test to adjust the hard-coded delay if it
sees a plausibly-related failure, so that the test can retry
with a longer delay.

As a nice side benefit, this also allows the tests to run more quickly
in the typical case: since the test will retry in case of spurious
failures, we can start with an aggressively short delay, and only back
off to a longer one if it is really needed on the specific machine
running the test.

Fixes #57084.
Fixes #51104.
For #58398.

Change-Id: Ia4050679f0777e5eeba7670307a77d93cfce856f
Cq-Include-Trybots: luci.golang.try:gotip-linux-amd64-longtest-race,gotip-linux-amd64-race,gotip-windows-amd64-race
Reviewed-on: https://go-review.googlesource.com/c/go/+/527196
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Damien Neil <dneil@google.com>
Auto-Submit: Bryan Mills <bcmills@google.com>
This commit is contained in:
Bryan C. Mills 2023-09-11 16:17:03 -04:00 committed by Gopher Robot
parent 545e4f38e0
commit 6760f20ef5
4 changed files with 419 additions and 289 deletions

View File

@ -315,3 +315,21 @@ func ResponseWriterConnForTesting(w ResponseWriter) (c net.Conn, ok bool) {
}
return nil, false
}
func init() {
// Set the default rstAvoidanceDelay to the minimum possible value to shake
// out tests that unexpectedly depend on it. Such tests should use
// runTimeSensitiveTest and SetRSTAvoidanceDelay to explicitly raise the delay
// if needed.
rstAvoidanceDelay = 1 * time.Nanosecond
}
// SetRSTAvoidanceDelay sets how long we are willing to wait between calling
// CloseWrite on a connection and fully closing the connection.
func SetRSTAvoidanceDelay(t *testing.T, d time.Duration) {
prevDelay := rstAvoidanceDelay
t.Cleanup(func() {
rstAvoidanceDelay = prevDelay
})
rstAvoidanceDelay = d
}

View File

@ -646,19 +646,15 @@ func benchmarkServeMux(b *testing.B, runHandler bool) {
func TestServerTimeouts(t *testing.T) { run(t, testServerTimeouts, []testMode{http1Mode}) }
func testServerTimeouts(t *testing.T, mode testMode) {
// Try three times, with increasing timeouts.
tries := []time.Duration{250 * time.Millisecond, 500 * time.Millisecond, 1 * time.Second}
for i, timeout := range tries {
err := testServerTimeoutsWithTimeout(t, timeout, mode)
if err == nil {
return
}
t.Logf("failed at %v: %v", timeout, err)
if i != len(tries)-1 {
t.Logf("retrying at %v ...", tries[i+1])
}
}
t.Fatal("all attempts failed")
runTimeSensitiveTest(t, []time.Duration{
10 * time.Millisecond,
50 * time.Millisecond,
100 * time.Millisecond,
500 * time.Millisecond,
1 * time.Second,
}, func(t *testing.T, timeout time.Duration) error {
return testServerTimeoutsWithTimeout(t, timeout, mode)
})
}
func testServerTimeoutsWithTimeout(t *testing.T, timeout time.Duration, mode testMode) error {
@ -3101,23 +3097,42 @@ func TestServerBufferedChunking(t *testing.T) {
// closing the TCP connection, causing the client to get a RST.
// See https://golang.org/issue/3595
func TestServerGracefulClose(t *testing.T) {
run(t, testServerGracefulClose, []testMode{http1Mode})
// Not parallel: modifies the global rstAvoidanceDelay.
run(t, testServerGracefulClose, []testMode{http1Mode}, testNotParallel)
}
func testServerGracefulClose(t *testing.T, mode testMode) {
ts := newClientServerTest(t, mode, HandlerFunc(func(w ResponseWriter, r *Request) {
Error(w, "bye", StatusUnauthorized)
})).ts
runTimeSensitiveTest(t, []time.Duration{
1 * time.Millisecond,
5 * time.Millisecond,
10 * time.Millisecond,
50 * time.Millisecond,
100 * time.Millisecond,
500 * time.Millisecond,
time.Second,
5 * time.Second,
}, func(t *testing.T, timeout time.Duration) error {
SetRSTAvoidanceDelay(t, timeout)
t.Logf("set RST avoidance delay to %v", timeout)
conn, err := net.Dial("tcp", ts.Listener.Addr().String())
if err != nil {
t.Fatal(err)
}
defer conn.Close()
const bodySize = 5 << 20
req := []byte(fmt.Sprintf("POST / HTTP/1.1\r\nHost: foo.com\r\nContent-Length: %d\r\n\r\n", bodySize))
for i := 0; i < bodySize; i++ {
req = append(req, 'x')
}
cst := newClientServerTest(t, mode, HandlerFunc(func(w ResponseWriter, r *Request) {
Error(w, "bye", StatusUnauthorized)
}))
// We need to close cst explicitly here so that in-flight server
// requests don't race with the call to SetRSTAvoidanceDelay for a retry.
defer cst.close()
ts := cst.ts
conn, err := net.Dial("tcp", ts.Listener.Addr().String())
if err != nil {
return err
}
defer conn.Close()
writeErr := make(chan error)
go func() {
_, err := conn.Write(req)
@ -3131,7 +3146,7 @@ func testServerGracefulClose(t *testing.T, mode testMode) {
break
}
if err != nil {
t.Fatalf("ReadLine: %v", err)
return fmt.Errorf("ReadLine: %v", err)
}
lineNum++
if lineNum == 1 && !strings.Contains(line, "401 Unauthorized") {
@ -3142,6 +3157,8 @@ func testServerGracefulClose(t *testing.T, mode testMode) {
// Darwin and Linux, but checking this isn't the point of
// the test.
<-writeErr
return nil
})
}
func TestCaseSensitiveMethod(t *testing.T) { run(t, testCaseSensitiveMethod) }
@ -3923,46 +3940,37 @@ func TestContentTypeOkayOn204(t *testing.T) {
// and the http client), and both think they can close it on failure.
// Therefore, all incoming server requests Bodies need to be thread-safe.
func TestTransportAndServerSharedBodyRace(t *testing.T) {
run(t, testTransportAndServerSharedBodyRace)
run(t, testTransportAndServerSharedBodyRace, testNotParallel)
}
func testTransportAndServerSharedBodyRace(t *testing.T, mode testMode) {
// The proxy server in the middle of the stack for this test potentially
// from its handler after only reading half of the body.
// That can trigger https://go.dev/issue/3595, which is otherwise
// irrelevant to this test.
runTimeSensitiveTest(t, []time.Duration{
1 * time.Millisecond,
5 * time.Millisecond,
10 * time.Millisecond,
50 * time.Millisecond,
100 * time.Millisecond,
500 * time.Millisecond,
time.Second,
5 * time.Second,
}, func(t *testing.T, timeout time.Duration) error {
SetRSTAvoidanceDelay(t, timeout)
t.Logf("set RST avoidance delay to %v", timeout)
const bodySize = 1 << 20
// errorf is like t.Errorf, but also writes to println. When
// this test fails, it hangs. This helps debugging and I've
// added this enough times "temporarily". It now gets added
// full time.
errorf := func(format string, args ...any) {
v := fmt.Sprintf(format, args...)
println(v)
t.Error(v)
}
unblockBackend := make(chan bool)
backend := newClientServerTest(t, mode, HandlerFunc(func(rw ResponseWriter, req *Request) {
gone := rw.(CloseNotifier).CloseNotify()
didCopy := make(chan any)
go func() {
n, err := io.CopyN(rw, req.Body, bodySize)
didCopy <- []any{n, err}
}()
isGone := false
Loop:
for {
select {
case <-didCopy:
break Loop
case <-gone:
isGone = true
case <-time.After(time.Second):
println("1 second passes in backend, proxygone=", isGone)
}
}
<-unblockBackend
t.Logf("backend CopyN: %v, %v", n, err)
<-req.Context().Done()
}))
// We need to close explicitly here so that in-flight server
// requests don't race with the call to SetRSTAvoidanceDelay for a retry.
defer backend.close()
backendRespc := make(chan *Response, 1)
var proxy *clientServerTest
proxy = newClientServerTest(t, mode, HandlerFunc(func(rw ResponseWriter, req *Request) {
req2, _ := NewRequest("POST", backend.ts.URL, req.Body)
@ -3972,18 +3980,21 @@ func testTransportAndServerSharedBodyRace(t *testing.T, mode testMode) {
bresp, err := proxy.c.Do(req2)
if err != nil {
errorf("Proxy outbound request: %v", err)
t.Errorf("Proxy outbound request: %v", err)
return
}
_, err = io.CopyN(io.Discard, bresp.Body, bodySize/2)
if err != nil {
errorf("Proxy copy error: %v", err)
t.Errorf("Proxy copy error: %v", err)
return
}
backendRespc <- bresp // to close later
t.Cleanup(func() { bresp.Body.Close() })
// Try to cause a race: Both the Transport and the proxy handler's Server
// will try to read/close req.Body (aka req2.Body)
// Try to cause a race. Canceling the client request will cause the client
// transport to close req2.Body. Returning from the server handler will
// cause the server to close req.Body. Since they are the same underlying
// ReadCloser, that will result in concurrent calls to Close (and possibly a
// Read concurrent with a Close).
if mode == http2Mode {
close(cancel)
} else {
@ -3993,21 +4004,14 @@ func testTransportAndServerSharedBodyRace(t *testing.T, mode testMode) {
}))
defer proxy.close()
defer close(unblockBackend)
req, _ := NewRequest("POST", proxy.ts.URL, io.LimitReader(neverEnding('a'), bodySize))
res, err := proxy.c.Do(req)
if err != nil {
t.Fatalf("Original request: %v", err)
return fmt.Errorf("original request: %v", err)
}
// Cleanup, so we don't leak goroutines.
res.Body.Close()
select {
case res := <-backendRespc:
res.Body.Close()
default:
// We failed earlier. (e.g. on proxy.c.Do(req2))
}
return nil
})
}
// Test that a hanging Request.Body.Read from another goroutine can't
@ -4342,7 +4346,8 @@ func (c *closeWriteTestConn) CloseWrite() error {
}
func TestCloseWrite(t *testing.T) {
setParallel(t)
SetRSTAvoidanceDelay(t, 1*time.Millisecond)
var srv Server
var testConn closeWriteTestConn
c := ExportServerNewConn(&srv, &testConn)
@ -5382,49 +5387,73 @@ func testServerIdleTimeout(t *testing.T, mode testMode) {
if testing.Short() {
t.Skip("skipping in short mode")
}
runTimeSensitiveTest(t, []time.Duration{
10 * time.Millisecond,
100 * time.Millisecond,
1 * time.Second,
10 * time.Second,
}, func(t *testing.T, readHeaderTimeout time.Duration) error {
ts := newClientServerTest(t, mode, HandlerFunc(func(w ResponseWriter, r *Request) {
io.Copy(io.Discard, r.Body)
io.WriteString(w, r.RemoteAddr)
}), func(ts *httptest.Server) {
ts.Config.ReadHeaderTimeout = 1 * time.Second
ts.Config.IdleTimeout = 2 * time.Second
ts.Config.ReadHeaderTimeout = readHeaderTimeout
ts.Config.IdleTimeout = 2 * readHeaderTimeout
}).ts
t.Logf("ReadHeaderTimeout = %v", ts.Config.ReadHeaderTimeout)
t.Logf("IdleTimeout = %v", ts.Config.IdleTimeout)
c := ts.Client()
get := func() string {
get := func() (string, error) {
res, err := c.Get(ts.URL)
if err != nil {
t.Fatal(err)
return "", err
}
defer res.Body.Close()
slurp, err := io.ReadAll(res.Body)
if err != nil {
// If we're at this point the headers have definitely already been
// read and the server is not idle, so neither timeout applies:
// this should never fail.
t.Fatal(err)
}
return string(slurp)
return string(slurp), nil
}
a1, a2 := get(), get()
if a1 != a2 {
t.Fatalf("did requests on different connections")
a1, err := get()
if err != nil {
return err
}
a2, err := get()
if err != nil {
return err
}
if a1 != a2 {
return fmt.Errorf("did requests on different connections")
}
time.Sleep(ts.Config.IdleTimeout * 3 / 2)
a3, err := get()
if err != nil {
return err
}
time.Sleep(3 * time.Second)
a3 := get()
if a2 == a3 {
t.Fatal("request three unexpectedly on same connection")
return fmt.Errorf("request three unexpectedly on same connection")
}
// And test that ReadHeaderTimeout still works:
conn, err := net.Dial("tcp", ts.Listener.Addr().String())
if err != nil {
t.Fatal(err)
return err
}
defer conn.Close()
conn.Write([]byte("GET / HTTP/1.1\r\nHost: foo.com\r\n"))
time.Sleep(2 * time.Second)
time.Sleep(ts.Config.ReadHeaderTimeout * 2)
if _, err := io.CopyN(io.Discard, conn, 1); err == nil {
t.Fatal("copy byte succeeded; want err")
return fmt.Errorf("copy byte succeeded; want err")
}
return nil
})
}
func get(t *testing.T, c *Client, url string) string {
@ -5773,9 +5802,10 @@ func runTimeSensitiveTest(t *testing.T, durations []time.Duration, test func(t *
if err == nil {
return
}
if i == len(durations)-1 {
if i == len(durations)-1 || t.Failed() {
t.Fatalf("failed with duration %v: %v", d, err)
}
t.Logf("retrying after error with duration %v: %v", d, err)
}
}
@ -6620,7 +6650,7 @@ func testQuerySemicolon(t *testing.T, mode testMode, query string, wantX string,
}
func TestMaxBytesHandler(t *testing.T) {
setParallel(t)
// Not parallel: modifies the global rstAvoidanceDelay.
defer afterTest(t)
for _, maxSize := range []int64{100, 1_000, 1_000_000} {
@ -6629,13 +6659,26 @@ func TestMaxBytesHandler(t *testing.T) {
func(t *testing.T) {
run(t, func(t *testing.T, mode testMode) {
testMaxBytesHandler(t, mode, maxSize, requestSize)
})
}, testNotParallel)
})
}
}
}
func testMaxBytesHandler(t *testing.T, mode testMode, maxSize, requestSize int64) {
runTimeSensitiveTest(t, []time.Duration{
1 * time.Millisecond,
5 * time.Millisecond,
10 * time.Millisecond,
50 * time.Millisecond,
100 * time.Millisecond,
500 * time.Millisecond,
time.Second,
5 * time.Second,
}, func(t *testing.T, timeout time.Duration) error {
SetRSTAvoidanceDelay(t, timeout)
t.Logf("set RST avoidance delay to %v", timeout)
var (
handlerN int64
handlerErr error
@ -6646,9 +6689,11 @@ func testMaxBytesHandler(t *testing.T, mode testMode, maxSize, requestSize int64
io.Copy(w, &buf)
})
ts := newClientServerTest(t, mode, MaxBytesHandler(echo, maxSize)).ts
defer ts.Close()
cst := newClientServerTest(t, mode, MaxBytesHandler(echo, maxSize))
// We need to close cst explicitly here so that in-flight server
// requests don't race with the call to SetRSTAvoidanceDelay for a retry.
defer cst.close()
ts := cst.ts
c := ts.Client()
body := strings.Repeat("a", int(requestSize))
@ -6675,14 +6720,18 @@ func testMaxBytesHandler(t *testing.T, mode testMode, maxSize, requestSize int64
var buf strings.Builder
res, err := c.Do(req)
if err != nil {
t.Errorf("unexpected connection error: %v", err)
return fmt.Errorf("unexpected connection error: %v", err)
} else {
_, err = io.Copy(&buf, res.Body)
res.Body.Close()
if err != nil {
t.Errorf("unexpected read error: %v", err)
return fmt.Errorf("unexpected read error: %v", err)
}
}
// We don't expect any of the errors after this point to occur due
// to rstAvoidanceDelay being too short, so we use t.Errorf for those
// instead of returning a (retriable) error.
if handlerN > maxSize {
t.Errorf("expected max request body %d; got %d", maxSize, handlerN)
}
@ -6700,6 +6749,9 @@ func testMaxBytesHandler(t *testing.T, mode testMode, maxSize, requestSize int64
if buf.Len() != int(handlerN) {
t.Errorf("expected echo of size %d; got %d", handlerN, buf.Len())
}
return nil
})
}
func TestEarlyHints(t *testing.T) {

View File

@ -1750,8 +1750,12 @@ func (c *conn) close() {
// and processes its final data before they process the subsequent RST
// from closing a connection with known unread data.
// This RST seems to occur mostly on BSD systems. (And Windows?)
// This timeout is somewhat arbitrary (~latency around the planet).
const rstAvoidanceDelay = 500 * time.Millisecond
// This timeout is somewhat arbitrary (~latency around the planet),
// and may be modified by tests.
//
// TODO(bcmills): This should arguably be a server configuration parameter,
// not a hard-coded value.
var rstAvoidanceDelay = 500 * time.Millisecond
type closeWriter interface {
CloseWrite() error
@ -1770,6 +1774,27 @@ func (c *conn) closeWriteAndWait() {
if tcp, ok := c.rwc.(closeWriter); ok {
tcp.CloseWrite()
}
// When we return from closeWriteAndWait, the caller will fully close the
// connection. If client is still writing to the connection, this will cause
// the write to fail with ECONNRESET or similar. Unfortunately, many TCP
// implementations will also drop unread packets from the client's read buffer
// when a write fails, causing our final response to be truncated away too.
//
// As a result, https://www.rfc-editor.org/rfc/rfc7230#section-6.6 recommends
// that “[t]he server … continues to read from the connection until it
// receives a corresponding close by the client, or until the server is
// reasonably certain that its own TCP stack has received the client's
// acknowledgement of the packet(s) containing the server's last response.”
//
// Unfortunately, we have no straightforward way to be “reasonably certain”
// that we have received the client's ACK, and at any rate we don't want to
// allow a misbehaving client to soak up server connections indefinitely by
// withholding an ACK, nor do we want to go through the complexity or overhead
// of using low-level APIs to figure out when a TCP round-trip has completed.
//
// Instead, we declare that we are “reasonably certain” that we received the
// ACK if maxRSTAvoidanceDelay has elapsed.
time.Sleep(rstAvoidanceDelay)
}

View File

@ -2099,25 +2099,50 @@ func testIssue3644(t *testing.T, mode testMode) {
// Test that a client receives a server's reply, even if the server doesn't read
// the entire request body.
func TestIssue3595(t *testing.T) { run(t, testIssue3595) }
func TestIssue3595(t *testing.T) {
// Not parallel: modifies the global rstAvoidanceDelay.
run(t, testIssue3595, testNotParallel)
}
func testIssue3595(t *testing.T, mode testMode) {
runTimeSensitiveTest(t, []time.Duration{
1 * time.Millisecond,
5 * time.Millisecond,
10 * time.Millisecond,
50 * time.Millisecond,
100 * time.Millisecond,
500 * time.Millisecond,
time.Second,
5 * time.Second,
}, func(t *testing.T, timeout time.Duration) error {
SetRSTAvoidanceDelay(t, timeout)
t.Logf("set RST avoidance delay to %v", timeout)
const deniedMsg = "sorry, denied."
ts := newClientServerTest(t, mode, HandlerFunc(func(w ResponseWriter, r *Request) {
cst := newClientServerTest(t, mode, HandlerFunc(func(w ResponseWriter, r *Request) {
Error(w, deniedMsg, StatusUnauthorized)
})).ts
}))
// We need to close cst explicitly here so that in-flight server
// requests don't race with the call to SetRSTAvoidanceDelay for a retry.
defer cst.close()
ts := cst.ts
c := ts.Client()
res, err := c.Post(ts.URL, "application/octet-stream", neverEnding('a'))
if err != nil {
t.Errorf("Post: %v", err)
return
return fmt.Errorf("Post: %v", err)
}
got, err := io.ReadAll(res.Body)
if err != nil {
t.Fatalf("Body ReadAll: %v", err)
return fmt.Errorf("Body ReadAll: %v", err)
}
t.Logf("server response:\n%s", got)
if !strings.Contains(string(got), deniedMsg) {
// If we got an RST packet too early, we should have seen an error
// from io.ReadAll, not a silently-truncated body.
t.Errorf("Known bug: response %q does not contain %q", got, deniedMsg)
}
return nil
})
}
// From https://golang.org/issue/4454 ,
@ -4327,24 +4352,42 @@ func (c *wgReadCloser) Close() error {
// Issue 11745.
func TestTransportPrefersResponseOverWriteError(t *testing.T) {
run(t, testTransportPrefersResponseOverWriteError)
// Not parallel: modifies the global rstAvoidanceDelay.
run(t, testTransportPrefersResponseOverWriteError, testNotParallel)
}
func testTransportPrefersResponseOverWriteError(t *testing.T, mode testMode) {
if testing.Short() {
t.Skip("skipping in short mode")
}
runTimeSensitiveTest(t, []time.Duration{
1 * time.Millisecond,
5 * time.Millisecond,
10 * time.Millisecond,
50 * time.Millisecond,
100 * time.Millisecond,
500 * time.Millisecond,
time.Second,
5 * time.Second,
}, func(t *testing.T, timeout time.Duration) error {
SetRSTAvoidanceDelay(t, timeout)
t.Logf("set RST avoidance delay to %v", timeout)
const contentLengthLimit = 1024 * 1024 // 1MB
ts := newClientServerTest(t, mode, HandlerFunc(func(w ResponseWriter, r *Request) {
cst := newClientServerTest(t, mode, HandlerFunc(func(w ResponseWriter, r *Request) {
if r.ContentLength >= contentLengthLimit {
w.WriteHeader(StatusBadRequest)
r.Body.Close()
return
}
w.WriteHeader(StatusOK)
})).ts
}))
// We need to close cst explicitly here so that in-flight server
// requests don't race with the call to SetRSTAvoidanceDelay for a retry.
defer cst.close()
ts := cst.ts
c := ts.Client()
fail := 0
count := 100
bigBody := strings.Repeat("a", contentLengthLimit*2)
@ -4371,14 +4414,7 @@ func testTransportPrefersResponseOverWriteError(t *testing.T, mode testMode) {
resp, err := c.Do(req)
if err != nil {
fail++
t.Logf("%d = %#v", i, err)
if ue, ok := err.(*url.Error); ok {
t.Logf("urlErr = %#v", ue.Err)
if ne, ok := ue.Err.(*net.OpError); ok {
t.Logf("netOpError = %#v", ne.Err)
}
}
return fmt.Errorf("Do %d: %v", i, err)
} else {
resp.Body.Close()
if resp.StatusCode != 400 {
@ -4386,9 +4422,8 @@ func testTransportPrefersResponseOverWriteError(t *testing.T, mode testMode) {
}
}
}
if fail > 0 {
t.Errorf("Failed %v out of %v\n", fail, count)
}
return nil
})
}
func TestTransportAutomaticHTTP2(t *testing.T) {