tcp: sndbuf autotuning improvements
authorEric Dumazet <edumazet@google.com>
Tue, 1 Oct 2013 17:23:44 +0000 (10:23 -0700)
committerDavid S. Miller <davem@davemloft.net>
Wed, 2 Oct 2013 20:45:17 +0000 (16:45 -0400)
tcp_fixup_sndbuf() is underestimating initial send buffer requirements.

It was not noticed because big GSO packets were escaping the limitation,
but with smaller TSO packets (or TSO/GSO/SG off), application hits
sk_sndbuf before having a chance to fill enough packets in socket write
queue.

- initial cwnd can be bigger than 10 for specific routes

- SKB_TRUESIZE() is a bit under real needs in some cases,
  because of power-of-two rounding in kmalloc()

- Fast Recovery (RFC 5681 3.2) : Cubic needs 70% factor

- Extra cushion (application might react slowly to POLLOUT)

tcp_v4_conn_req_fastopen() needs to call tcp_init_metrics() before
calling tcp_init_buffer_space()

Then we realize tcp_new_space() should call tcp_fixup_sndbuf()
instead of duplicating this stuff.

Rename tcp_fixup_sndbuf() to tcp_sndbuf_expand() to be more
descriptive.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Acked-by: Maciej Żenczykowski <maze@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
net/ipv4/tcp_input.c
net/ipv4/tcp_ipv4.c

index 66aa816ad30be89daf2ad9b0c96a37553bc0d77e..cd65674ece92ef7a3d8373b4f01011bc84e93cd9 100644 (file)
@@ -267,11 +267,31 @@ static bool TCP_ECN_rcv_ecn_echo(const struct tcp_sock *tp, const struct tcphdr
  * 1. Tuning sk->sk_sndbuf, when connection enters established state.
  */
 
-static void tcp_fixup_sndbuf(struct sock *sk)
+static void tcp_sndbuf_expand(struct sock *sk)
 {
-       int sndmem = SKB_TRUESIZE(tcp_sk(sk)->rx_opt.mss_clamp + MAX_TCP_HEADER);
+       const struct tcp_sock *tp = tcp_sk(sk);
+       int sndmem, per_mss;
+       u32 nr_segs;
+
+       /* Worst case is non GSO/TSO : each frame consumes one skb
+        * and skb->head is kmalloced using power of two area of memory
+        */
+       per_mss = max_t(u32, tp->rx_opt.mss_clamp, tp->mss_cache) +
+                 MAX_TCP_HEADER +
+                 SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+
+       per_mss = roundup_pow_of_two(per_mss) +
+                 SKB_DATA_ALIGN(sizeof(struct sk_buff));
+
+       nr_segs = max_t(u32, TCP_INIT_CWND, tp->snd_cwnd);
+       nr_segs = max_t(u32, nr_segs, tp->reordering + 1);
+
+       /* Fast Recovery (RFC 5681 3.2) :
+        * Cubic needs 1.7 factor, rounded to 2 to include
+        * extra cushion (application might react slowly to POLLOUT)
+        */
+       sndmem = 2 * nr_segs * per_mss;
 
-       sndmem *= TCP_INIT_CWND;
        if (sk->sk_sndbuf < sndmem)
                sk->sk_sndbuf = min(sndmem, sysctl_tcp_wmem[2]);
 }
@@ -376,7 +396,7 @@ void tcp_init_buffer_space(struct sock *sk)
        if (!(sk->sk_userlocks & SOCK_RCVBUF_LOCK))
                tcp_fixup_rcvbuf(sk);
        if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK))
-               tcp_fixup_sndbuf(sk);
+               tcp_sndbuf_expand(sk);
 
        tp->rcvq_space.space = tp->rcv_wnd;
        tp->rcvq_space.time = tcp_time_stamp;
@@ -4723,15 +4743,7 @@ static void tcp_new_space(struct sock *sk)
        struct tcp_sock *tp = tcp_sk(sk);
 
        if (tcp_should_expand_sndbuf(sk)) {
-               int sndmem = SKB_TRUESIZE(max_t(u32,
-                                               tp->rx_opt.mss_clamp,
-                                               tp->mss_cache) +
-                                         MAX_TCP_HEADER);
-               int demanded = max_t(unsigned int, tp->snd_cwnd,
-                                    tp->reordering + 1);
-               sndmem *= 2 * demanded;
-               if (sndmem > sk->sk_sndbuf)
-                       sk->sk_sndbuf = min(sndmem, sysctl_tcp_wmem[2]);
+               tcp_sndbuf_expand(sk);
                tp->snd_cwnd_stamp = tcp_time_stamp;
        }
 
index b14266bb91eb5e3b1f43f3329dc2510c8be26a19..5d6b1a609da858cd29386c573ecb4169f04e5bc7 100644 (file)
@@ -1410,8 +1410,8 @@ static int tcp_v4_conn_req_fastopen(struct sock *sk,
        inet_csk(child)->icsk_af_ops->rebuild_header(child);
        tcp_init_congestion_control(child);
        tcp_mtup_init(child);
-       tcp_init_buffer_space(child);
        tcp_init_metrics(child);
+       tcp_init_buffer_space(child);
 
        /* Queue the data carried in the SYN packet. We need to first
         * bump skb's refcnt because the caller will attempt to free it.