net/http: reject client-side retries in server timeout tests

This breaks an unbounded client-side retry loop if the server's timeout happens to fire during its final read of the TLS handshake. The retry loop was observed on wasm platforms at CL 557437. I was also able to reproduce chains of dozens of retries on my linux/amd64 workstation by adjusting some timeouts and adding a couple of sleeps, as in this patch: https://gist.github.com/bcmills/d0a0a57e5f64eebc24e8211d8ea502b3 However, on linux/amd64 on my workstation the test always eventually breaks out of the retry loop due to timing jitter. I couldn't find a retry-specific hook in the http.Client, http.Transport, or tls.Config structs, so I have instead abused the Transport.Proxy hook for this purpose. Separately, we may want to consider adding a retry-specific hook, or changing the net/http implementation to avoid transparently retrying in this case. Fixes #65410. Updates #65178. Change-Id: I0e43c039615fe815f0a4ba99a8813c48b1fdc7e6 Reviewed-on: https://go-review.googlesource.com/c/go/+/559835 Reviewed-by: Damien Neil <dneil@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Auto-Submit: Bryan Mills <bcmills@google.com> Reviewed-by: Michael Pratt <mpratt@google.com>
2024-09-29 11:24:28 -06:00 · 2024-01-31 11:43:38 -05:00 · 2024-01-31 11:43:38 -05:00 · fc0d9a4b7d
commit fc0d9a4b7d
parent f8b4653500
1 changed files with 46 additions and 0 deletions
--- a/src/net/http/serve_test.go
+++ b/src/net/http/serve_test.go
@ -764,7 +764,17 @@ func testServerReadTimeout(t *testing.T, mode testMode) {
 		}), func(ts *httptest.Server) {
 			ts.Config.ReadHeaderTimeout = -1 // don't time out while reading headers
 			ts.Config.ReadTimeout = timeout
+			t.Logf("Server.Config.ReadTimeout = %v", timeout)
 		})
+
+		var retries atomic.Int32
+		cst.c.Transport.(*Transport).Proxy = func(*Request) (*url.URL, error) {
+			if retries.Add(1) != 1 {
+				return nil, errors.New("too many retries")
+			}
+			return nil, nil
+		}
+
 		pr, pw := io.Pipe()
 		res, err := cst.c.Post(cst.ts.URL, "text/apocryphal", pr)
 		if err != nil {
@ -792,7 +802,34 @@ func testServerWriteTimeout(t *testing.T, mode testMode) {
 			errc <- err
 		}), func(ts *httptest.Server) {
 			ts.Config.WriteTimeout = timeout
+			t.Logf("Server.Config.WriteTimeout = %v", timeout)
 		})
+
+		// The server's WriteTimeout parameter also applies to reads during the TLS
+		// handshake. The client makes the last write during the handshake, and if
+		// the server happens to time out during the read of that write, the client
+		// may think that the connection was accepted even though the server thinks
+		// it timed out.
+		//
+		// The client only notices that the server connection is gone when it goes
+		// to actually write the request — and when that fails, it retries
+		// internally (the same as if the server had closed the connection due to a
+		// racing idle-timeout).
+		//
+		// With unlucky and very stable scheduling (as may be the case with the fake wasm
+		// net stack), this can result in an infinite retry loop that doesn't
+		// propagate the error up far enough for us to adjust the WriteTimeout.
+		//
+		// To avoid that problem, we explicitly forbid internal retries by rejecting
+		// them in a Proxy hook in the transport.
+		var retries atomic.Int32
+		cst.c.Transport.(*Transport).Proxy = func(*Request) (*url.URL, error) {
+			if retries.Add(1) != 1 {
+				return nil, errors.New("too many retries")
+			}
+			return nil, nil
+		}
+
 		res, err := cst.c.Get(cst.ts.URL)
 		if err != nil {
 			// Probably caused by the write timeout expiring before the handler runs.
@ -5778,10 +5815,19 @@ func testServerCancelsReadTimeoutWhenIdle(t *testing.T, mode testMode) {
 			}
 		}), func(ts *httptest.Server) {
 			ts.Config.ReadTimeout = timeout
+			t.Logf("Server.Config.ReadTimeout = %v", timeout)
 		})
 		defer cst.close()
 		ts := cst.ts

+		var retries atomic.Int32
+		cst.c.Transport.(*Transport).Proxy = func(*Request) (*url.URL, error) {
+			if retries.Add(1) != 1 {
+				return nil, errors.New("too many retries")
+			}
+			return nil, nil
+		}
+
 		c := ts.Client()

 		res, err := c.Get(ts.URL)