net-tcp: extend tcp_tw_reuse sysctl to enable loopback only optimization

author Maciej Żenczykowski <maze@google.com>

Sun, 3 Jun 2018 17:41:17 +0000 (10:41 -0700)

committer David S. Miller <davem@davemloft.net>

Mon, 4 Jun 2018 21:13:35 +0000 (17:13 -0400)
author Maciej Żenczykowski <maze@google.com>
Sun, 3 Jun 2018 17:41:17 +0000 (10:41 -0700)
committer David S. Miller <davem@davemloft.net>
Mon, 4 Jun 2018 21:13:35 +0000 (17:13 -0400)
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt

index 924bd51327b7a8dff3503d7afccdd54e1eb5c29b..6841c74eac007f9c80073c449406d4cc5dcad82d 100644 (file)
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -667,11 +667,15 @@ tcp_tso_win_divisor - INTEGER
         building larger TSO frames.
         Default: 3
  
-tcp_tw_reuse - BOOLEAN
-       Allow to reuse TIME-WAIT sockets for new connections when it is
-       safe from protocol viewpoint. Default value is 0.
+tcp_tw_reuse - INTEGER
+       Enable reuse of TIME-WAIT sockets for new connections when it is
+       safe from protocol viewpoint.
+       0 - disable
+       1 - global enable
+       2 - enable for loopback traffic only
         It should not be changed without advice/request of technical
         experts.
+       Default: 2
  
  tcp_window_scaling - BOOLEAN
         Enable window scaling as defined in RFC1323.
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c

index d2eed3ddcb0a1ad9778d96d46c685f6c60b93d8d..d06247ba08b2667b1049329e8921af9388545c54 100644 (file)
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -30,6 +30,7 @@
  
  static int zero;
  static int one = 1;
+static int two = 2;
  static int four = 4;
  static int thousand = 1000;
  static int gso_max_segs = GSO_MAX_SEGS;
@@ -845,7 +846,9 @@ static struct ctl_table ipv4_net_table[] = {
                 .data           = &init_net.ipv4.sysctl_tcp_tw_reuse,
                 .maxlen         = sizeof(int),
                 .mode           = 0644,
-               .proc_handler   = proc_dointvec
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = &zero,
+               .extra2         = &two,
         },
         {
                 .procname       = "tcp_max_tw_buckets",
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c

index 749b0ef9f405fdcda4ec957342ab73c2fc9bf2b9..633963e228bcd931eafb3cf71f1bd113aedede7e 100644 (file)
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -110,8 +110,38 @@ static u32 tcp_v4_init_ts_off(const struct net *net, const struct sk_buff *skb)
  
  int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
  {
+       const struct inet_timewait_sock *tw = inet_twsk(sktw);
         const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
         struct tcp_sock *tp = tcp_sk(sk);
+       int reuse = sock_net(sk)->ipv4.sysctl_tcp_tw_reuse;
+
+       if (reuse == 2) {
+               /* Still does not detect *everything* that goes through
+                * lo, since we require a loopback src or dst address
+                * or direct binding to 'lo' interface.
+                */
+               bool loopback = false;
+               if (tw->tw_bound_dev_if == LOOPBACK_IFINDEX)
+                       loopback = true;
+#if IS_ENABLED(CONFIG_IPV6)
+               if (tw->tw_family == AF_INET6) {
+                       if (ipv6_addr_loopback(&tw->tw_v6_daddr) ||
+                           (ipv6_addr_v4mapped(&tw->tw_v6_daddr) &&
+                            (tw->tw_v6_daddr.s6_addr[12] == 127)) ||
+                           ipv6_addr_loopback(&tw->tw_v6_rcv_saddr) ||
+                           (ipv6_addr_v4mapped(&tw->tw_v6_rcv_saddr) &&
+                            (tw->tw_v6_rcv_saddr.s6_addr[12] == 127)))
+                               loopback = true;
+               } else
+#endif
+               {
+                       if (ipv4_is_loopback(tw->tw_daddr) ||
+                           ipv4_is_loopback(tw->tw_rcv_saddr))
+                               loopback = true;
+               }
+               if (!loopback)
+                       reuse = 0;
+       }
  
         /* With PAWS, it is safe from the viewpoint
            of data integrity. Even without PAWS it is safe provided sequence
@@ -125,8 +155,7 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
            and use initial timestamp retrieved from peer table.
          */
         if (tcptw->tw_ts_recent_stamp &&
-           (!twp || (sock_net(sk)->ipv4.sysctl_tcp_tw_reuse &&
-                            get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
+           (!twp || (reuse && get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
                 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
                 if (tp->write_seq == 0)
                         tp->write_seq = 1;
@@ -2529,7 +2558,7 @@ static int __net_init tcp_sk_init(struct net *net)
         net->ipv4.sysctl_tcp_orphan_retries = 0;
         net->ipv4.sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT;
         net->ipv4.sysctl_tcp_notsent_lowat = UINT_MAX;
-       net->ipv4.sysctl_tcp_tw_reuse = 0;
+       net->ipv4.sysctl_tcp_tw_reuse = 2;
  
         cnt = tcp_hashinfo.ehash_mask + 1;
         net->ipv4.tcp_death_row.sysctl_max_tw_buckets = (cnt + 1) / 2;
author	Maciej Żenczykowski <maze@google.com>
	Sun, 3 Jun 2018 17:41:17 +0000 (10:41 -0700)
committer	David S. Miller <davem@davemloft.net>
	Mon, 4 Jun 2018 21:13:35 +0000 (17:13 -0400)
Documentation/networking/ip-sysctl.txt		patch \| blob \| history
net/ipv4/sysctl_net_ipv4.c		patch \| blob \| history
net/ipv4/tcp_ipv4.c		patch \| blob \| history