tcp: add new tcp_mtu_probe_floor sysctl
authorJosh Hunt <johunt@akamai.com>
Wed, 7 Aug 2019 23:52:29 +0000 (19:52 -0400)
committerDavid S. Miller <davem@davemloft.net>
Fri, 9 Aug 2019 20:03:30 +0000 (13:03 -0700)
The current implementation of TCP MTU probing can considerably
underestimate the MTU on lossy connections allowing the MSS to get down to
48. We have found that in almost all of these cases on our networks these
paths can handle much larger MTUs meaning the connections are being
artificially limited. Even though TCP MTU probing can raise the MSS back up
we have seen this not to be the case causing connections to be "stuck" with
an MSS of 48 when heavy loss is present.

Prior to pushing out this change we could not keep TCP MTU probing enabled
b/c of the above reasons. Now with a reasonble floor set we've had it
enabled for the past 6 months.

The new sysctl will still default to TCP_MIN_SND_MSS (48), but gives
administrators the ability to control the floor of MSS probing.

Signed-off-by: Josh Hunt <johunt@akamai.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Documentation/networking/ip-sysctl.txt
include/net/netns/ipv4.h
net/ipv4/sysctl_net_ipv4.c
net/ipv4/tcp_ipv4.c
net/ipv4/tcp_timer.c

index df33674799b5260933fe6bcef73dcb60f44096e3..49e95f438ed7571a93bceffdc17846c35dd64fca 100644 (file)
@@ -256,6 +256,12 @@ tcp_base_mss - INTEGER
        Path MTU discovery (MTU probing).  If MTU probing is enabled,
        this is the initial MSS used by the connection.
 
+tcp_mtu_probe_floor - INTEGER
+       If MTU probing is enabled this caps the minimum MSS used for search_low
+       for the connection.
+
+       Default : 48
+
 tcp_min_snd_mss - INTEGER
        TCP SYN and SYNACK messages usually advertise an ADVMSS option,
        as described in RFC 1122 and RFC 6691.
index bc24a8ec1ce5197d65ac0cbb3b93077d325ec7ec..c0c0791b191232b16355aa538650e4a4815b1c3a 100644 (file)
@@ -116,6 +116,7 @@ struct netns_ipv4 {
        int sysctl_tcp_l3mdev_accept;
 #endif
        int sysctl_tcp_mtu_probing;
+       int sysctl_tcp_mtu_probe_floor;
        int sysctl_tcp_base_mss;
        int sysctl_tcp_min_snd_mss;
        int sysctl_tcp_probe_threshold;
index 0b980e84192738362e19ce3a9e921cff23a1449b..59ded25acd045d90573eb144381df4381ecba837 100644 (file)
@@ -819,6 +819,15 @@ static struct ctl_table ipv4_net_table[] = {
                .extra1         = &tcp_min_snd_mss_min,
                .extra2         = &tcp_min_snd_mss_max,
        },
+       {
+               .procname       = "tcp_mtu_probe_floor",
+               .data           = &init_net.ipv4.sysctl_tcp_mtu_probe_floor,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = &tcp_min_snd_mss_min,
+               .extra2         = &tcp_min_snd_mss_max,
+       },
        {
                .procname       = "tcp_probe_threshold",
                .data           = &init_net.ipv4.sysctl_tcp_probe_threshold,
index d57641cb3477d0c573e6648fccfc77ce52f52660..e0a372676329fe8c46dc10a6eeb8466077764173 100644 (file)
@@ -2637,6 +2637,7 @@ static int __net_init tcp_sk_init(struct net *net)
        net->ipv4.sysctl_tcp_min_snd_mss = TCP_MIN_SND_MSS;
        net->ipv4.sysctl_tcp_probe_threshold = TCP_PROBE_THRESHOLD;
        net->ipv4.sysctl_tcp_probe_interval = TCP_PROBE_INTERVAL;
+       net->ipv4.sysctl_tcp_mtu_probe_floor = TCP_MIN_SND_MSS;
 
        net->ipv4.sysctl_tcp_keepalive_time = TCP_KEEPALIVE_TIME;
        net->ipv4.sysctl_tcp_keepalive_probes = TCP_KEEPALIVE_PROBES;
index c801cd37cc2a9c11f2dd4b9681137755e501a538..dbd9d2d0ee63aa46ad2dda417da6ec9409442b77 100644 (file)
@@ -154,7 +154,7 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk)
        } else {
                mss = tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low) >> 1;
                mss = min(net->ipv4.sysctl_tcp_base_mss, mss);
-               mss = max(mss, 68 - tcp_sk(sk)->tcp_header_len);
+               mss = max(mss, net->ipv4.sysctl_tcp_mtu_probe_floor);
                mss = max(mss, net->ipv4.sysctl_tcp_min_snd_mss);
                icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss);
        }