1
0
mirror of https://github.com/golang/go synced 2024-11-06 04:16:11 -07:00
go/src/net/tcpsock_posix.go

177 lines
5.3 KiB
Go
Raw Normal View History

// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build unix || (js && wasm) || windows
package net
import (
"context"
"io"
"os"
"syscall"
)
func sockaddrToTCP(sa syscall.Sockaddr) Addr {
switch sa := sa.(type) {
case *syscall.SockaddrInet4:
return &TCPAddr{IP: sa.Addr[0:], Port: sa.Port}
case *syscall.SockaddrInet6:
return &TCPAddr{IP: sa.Addr[0:], Port: sa.Port, Zone: zoneCache.name(int(sa.ZoneId))}
}
return nil
}
func (a *TCPAddr) family() int {
if a == nil || len(a.IP) <= IPv4len {
return syscall.AF_INET
}
if a.IP.To4() != nil {
return syscall.AF_INET
}
return syscall.AF_INET6
}
func (a *TCPAddr) sockaddr(family int) (syscall.Sockaddr, error) {
if a == nil {
return nil, nil
}
return ipToSockaddr(family, a.IP, a.Port, a.Zone)
}
func (a *TCPAddr) toLocal(net string) sockaddr {
return &TCPAddr{loopbackIP(net), a.Port, a.Zone}
}
func (c *TCPConn) readFrom(r io.Reader) (int64, error) {
net: add support for splice(2) in (*TCPConn).ReadFrom on Linux This change adds support for the splice system call on Linux, for the purpose of optimizing (*TCPConn).ReadFrom by reducing copies of data from and to userspace. It does so by creating a temporary pipe and splicing data from the source connection to the pipe, then from the pipe to the destination connection. The pipe serves as an in-kernel buffer for the data transfer. No new API is added to package net, but a new Splice function is added to package internal/poll, because using splice requires help from the network poller. Users of the net package should benefit from the change transparently. This change only enables the optimization if the Reader in ReadFrom is a TCP connection. Since splice is a more general interface, it could, in theory, also be enabled if the Reader were a unix socket, or the read half of a pipe. However, benchmarks show that enabling it for unix sockets is most likely not a net performance gain. The tcp <- unix case is also fairly unlikely to be used very much by users of package net. Enabling the optimization for pipes is also problematic from an implementation perspective, since package net cannot easily get at the *poll.FD of an *os.File. A possible solution to this would be to dup the pipe file descriptor, register the duped descriptor with the network poller, and work on that *poll.FD instead of the original. However, this seems too intrusive, so it has not been done. If there was a clean way to do it, it would probably be worth doing, since splicing from a pipe to a socket can be done directly. Therefore, this patch only enables the optimization for what is likely the most common use case: tcp <- tcp. The following benchmark compares the performance of the previous userspace genericReadFrom code path to the new optimized code path. The sub-benchmarks represent chunk sizes used by the writer on the other end of the Reader passed to ReadFrom. benchmark old ns/op new ns/op delta BenchmarkTCPReadFrom/1024-4 4727 4954 +4.80% BenchmarkTCPReadFrom/2048-4 4389 4301 -2.01% BenchmarkTCPReadFrom/4096-4 4606 4534 -1.56% BenchmarkTCPReadFrom/8192-4 5219 4779 -8.43% BenchmarkTCPReadFrom/16384-4 8708 8008 -8.04% BenchmarkTCPReadFrom/32768-4 16349 14973 -8.42% BenchmarkTCPReadFrom/65536-4 35246 27406 -22.24% BenchmarkTCPReadFrom/131072-4 72920 52382 -28.17% BenchmarkTCPReadFrom/262144-4 149311 95094 -36.31% BenchmarkTCPReadFrom/524288-4 306704 181856 -40.71% BenchmarkTCPReadFrom/1048576-4 674174 357406 -46.99% benchmark old MB/s new MB/s speedup BenchmarkTCPReadFrom/1024-4 216.62 206.69 0.95x BenchmarkTCPReadFrom/2048-4 466.61 476.08 1.02x BenchmarkTCPReadFrom/4096-4 889.09 903.31 1.02x BenchmarkTCPReadFrom/8192-4 1569.40 1714.06 1.09x BenchmarkTCPReadFrom/16384-4 1881.42 2045.84 1.09x BenchmarkTCPReadFrom/32768-4 2004.18 2188.41 1.09x BenchmarkTCPReadFrom/65536-4 1859.38 2391.25 1.29x BenchmarkTCPReadFrom/131072-4 1797.46 2502.21 1.39x BenchmarkTCPReadFrom/262144-4 1755.69 2756.68 1.57x BenchmarkTCPReadFrom/524288-4 1709.42 2882.98 1.69x BenchmarkTCPReadFrom/1048576-4 1555.35 2933.84 1.89x Fixes #10948 Change-Id: I3ce27f21f7adda8b696afdc48a91149998ae16a5 Reviewed-on: https://go-review.googlesource.com/107715 Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org> Run-TryBot: Ian Lance Taylor <iant@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
2018-04-18 02:56:06 -06:00
if n, err, handled := splice(c.fd, r); handled {
return n, err
}
if n, err, handled := sendFile(c.fd, r); handled {
return n, err
}
return genericReadFrom(c, r)
}
func (sd *sysDialer) dialTCP(ctx context.Context, laddr, raddr *TCPAddr) (*TCPConn, error) {
if h := sd.testHookDialTCP; h != nil {
return h(ctx, sd.network, laddr, raddr)
}
if h := testHookDialTCP; h != nil {
return h(ctx, sd.network, laddr, raddr)
}
return sd.doDialTCP(ctx, laddr, raddr)
}
func (sd *sysDialer) doDialTCP(ctx context.Context, laddr, raddr *TCPAddr) (*TCPConn, error) {
fd, err := internetSocket(ctx, sd.network, laddr, raddr, syscall.SOCK_STREAM, 0, "dial", sd.Dialer.Control)
// TCP has a rarely used mechanism called a 'simultaneous connection' in
// which Dial("tcp", addr1, addr2) run on the machine at addr1 can
// connect to a simultaneous Dial("tcp", addr2, addr1) run on the machine
// at addr2, without either machine executing Listen. If laddr == nil,
// it means we want the kernel to pick an appropriate originating local
// address. Some Linux kernels cycle blindly through a fixed range of
// local ports, regardless of destination port. If a kernel happens to
// pick local port 50001 as the source for a Dial("tcp", "", "localhost:50001"),
// then the Dial will succeed, having simultaneously connected to itself.
// This can only happen when we are letting the kernel pick a port (laddr == nil)
// and when there is no listener for the destination address.
// It's hard to argue this is anything other than a kernel bug. If we
// see this happen, rather than expose the buggy effect to users, we
// close the fd and try again. If it happens twice more, we relent and
// use the result. See also:
// https://golang.org/issue/2690
// https://stackoverflow.com/questions/4949858/
//
// The opposite can also happen: if we ask the kernel to pick an appropriate
// originating local address, sometimes it picks one that is already in use.
// So if the error is EADDRNOTAVAIL, we have to try again too, just for
// a different reason.
//
// The kernel socket code is no doubt enjoying watching us squirm.
for i := 0; i < 2 && (laddr == nil || laddr.Port == 0) && (selfConnect(fd, err) || spuriousENOTAVAIL(err)); i++ {
if err == nil {
fd.Close()
}
fd, err = internetSocket(ctx, sd.network, laddr, raddr, syscall.SOCK_STREAM, 0, "dial", sd.Dialer.Control)
}
if err != nil {
return nil, err
}
return newTCPConn(fd), nil
}
func selfConnect(fd *netFD, err error) bool {
// If the connect failed, we clearly didn't connect to ourselves.
if err != nil {
return false
}
// The socket constructor can return an fd with raddr nil under certain
// unknown conditions. The errors in the calls there to Getpeername
// are discarded, but we can't catch the problem there because those
// calls are sometimes legally erroneous with a "socket not connected".
// Since this code (selfConnect) is already trying to work around
// a problem, we make sure if this happens we recognize trouble and
// ask the DialTCP routine to try again.
// TODO: try to understand what's really going on.
if fd.laddr == nil || fd.raddr == nil {
return true
}
l := fd.laddr.(*TCPAddr)
r := fd.raddr.(*TCPAddr)
return l.Port == r.Port && l.IP.Equal(r.IP)
}
func spuriousENOTAVAIL(err error) bool {
if op, ok := err.(*OpError); ok {
err = op.Err
}
if sys, ok := err.(*os.SyscallError); ok {
err = sys.Err
}
return err == syscall.EADDRNOTAVAIL
}
func (ln *TCPListener) ok() bool { return ln != nil && ln.fd != nil }
func (ln *TCPListener) accept() (*TCPConn, error) {
fd, err := ln.fd.accept()
if err != nil {
return nil, err
}
tc := newTCPConn(fd)
if ln.lc.KeepAlive >= 0 {
setKeepAlive(fd, true)
ka := ln.lc.KeepAlive
if ln.lc.KeepAlive == 0 {
ka = defaultTCPKeepAlive
}
setKeepAlivePeriod(fd, ka)
}
return tc, nil
}
func (ln *TCPListener) close() error {
return ln.fd.Close()
}
func (ln *TCPListener) file() (*os.File, error) {
f, err := ln.fd.dup()
if err != nil {
return nil, err
}
return f, nil
}
func (sl *sysListener) listenTCP(ctx context.Context, laddr *TCPAddr) (*TCPListener, error) {
fd, err := internetSocket(ctx, sl.network, laddr, nil, syscall.SOCK_STREAM, 0, "listen", sl.ListenConfig.Control)
if err != nil {
return nil, err
}
return &TCPListener{fd: fd, lc: sl.ListenConfig}, nil
}