tcp: prefer packet timing to TS-ECR for RTT

author Yuchung Cheng <ycheng@google.com>

Mon, 22 Jul 2013 23:20:46 +0000 (16:20 -0700)

committer David S. Miller <davem@davemloft.net>

Tue, 23 Jul 2013 00:53:42 +0000 (17:53 -0700)
author Yuchung Cheng <ycheng@google.com>
Mon, 22 Jul 2013 23:20:46 +0000 (16:20 -0700)
committer David S. Miller <davem@davemloft.net>
Tue, 23 Jul 2013 00:53:42 +0000 (17:53 -0700)
diff --git a/include/net/tcp.h b/include/net/tcp.h

index f9777dbede759b5ff0a8e81c9ace2cf06ccc3154..c5868471abae93b1d64573e52a2932f9aaad20ec 100644 (file)
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -591,7 +591,6 @@ extern void tcp_initialize_rcv_mss(struct sock *sk);
  extern int tcp_mtu_to_mss(struct sock *sk, int pmtu);
  extern int tcp_mss_to_mtu(struct sock *sk, int mss);
  extern void tcp_mtup_init(struct sock *sk);
-extern void tcp_valid_rtt_meas(struct sock *sk, u32 seq_rtt);
  extern void tcp_init_buffer_space(struct sock *sk);
  
  static inline void tcp_bound_rto(const struct sock *sk)
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c

index b531710596eceeba856f38aae04890dbfa0bcfd0..c7398f05d12b995da1a2857df2bda538abe1261d 100644 (file)
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2792,65 +2792,36 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked,
         tcp_xmit_retransmit_queue(sk);
  }
  
-void tcp_valid_rtt_meas(struct sock *sk, u32 seq_rtt)
+static inline void tcp_ack_update_rtt(struct sock *sk, const int flag,
+                                     s32 seq_rtt)
  {
-       tcp_rtt_estimator(sk, seq_rtt);
-       tcp_set_rto(sk);
-       inet_csk(sk)->icsk_backoff = 0;
-}
-EXPORT_SYMBOL(tcp_valid_rtt_meas);
+       const struct tcp_sock *tp = tcp_sk(sk);
+
+       /* Prefer RTT measured from ACK's timing to TS-ECR. This is because
+        * broken middle-boxes or peers may corrupt TS-ECR fields. But
+        * Karn's algorithm forbids taking RTT if some retransmitted data
+        * is acked (RFC6298).
+        */
+       if (flag & FLAG_RETRANS_DATA_ACKED)
+               seq_rtt = -1;
  
-/* Read draft-ietf-tcplw-high-performance before mucking
- * with this code. (Supersedes RFC1323)
- */
-static void tcp_ack_saw_tstamp(struct sock *sk, int flag)
-{
         /* RTTM Rule: A TSecr value received in a segment is used to
          * update the averaged RTT measurement only if the segment
          * acknowledges some new data, i.e., only if it advances the
          * left edge of the send window.
-        *
          * See draft-ietf-tcplw-high-performance-00, section 3.3.
-        * 1998/04/10 Andrey V. Savochkin <saw@msu.ru>
-        *
-        * Changed: reset backoff as soon as we see the first valid sample.
-        * If we do not, we get strongly overestimated rto. With timestamps
-        * samples are accepted even from very old segments: f.e., when rtt=1
-        * increases to 8, we retransmit 5 times and after 8 seconds delayed
-        * answer arrives rto becomes 120 seconds! If at least one of segments
-        * in window is lost... Voila.                          --ANK (010210)
          */
-       struct tcp_sock *tp = tcp_sk(sk);
+       if (seq_rtt < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
+               seq_rtt = tcp_time_stamp - tp->rx_opt.rcv_tsecr;
  
-       tcp_valid_rtt_meas(sk, tcp_time_stamp - tp->rx_opt.rcv_tsecr);
-}
-
-static void tcp_ack_no_tstamp(struct sock *sk, u32 seq_rtt, int flag)
-{
-       /* We don't have a timestamp. Can only use
-        * packets that are not retransmitted to determine
-        * rtt estimates. Also, we must not reset the
-        * backoff for rto until we get a non-retransmitted
-        * packet. This allows us to deal with a situation
-        * where the network delay has increased suddenly.
-        * I.e. Karn's algorithm. (SIGCOMM '87, p5.)
-        */
-
-       if (flag & FLAG_RETRANS_DATA_ACKED)
+       if (seq_rtt < 0)
                 return;
  
-       tcp_valid_rtt_meas(sk, seq_rtt);
-}
+       tcp_rtt_estimator(sk, seq_rtt);
+       tcp_set_rto(sk);
  
-static inline void tcp_ack_update_rtt(struct sock *sk, const int flag,
-                                     const s32 seq_rtt)
-{
-       const struct tcp_sock *tp = tcp_sk(sk);
-       /* Note that peer MAY send zero echo. In this case it is ignored. (rfc1323) */
-       if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
-               tcp_ack_saw_tstamp(sk, flag);
-       else if (seq_rtt >= 0)
-               tcp_ack_no_tstamp(sk, seq_rtt, flag);
+       /* RFC6298: only reset backoff on valid RTT measurement. */
+       inet_csk(sk)->icsk_backoff = 0;
  }
  
  /* Compute time elapsed between (last) SYNACK and the ACK completing 3WHS. */
@@ -2989,8 +2960,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
                         if (sacked & TCPCB_SACKED_RETRANS)
                                 tp->retrans_out -= acked_pcount;
                         flag |= FLAG_RETRANS_DATA_ACKED;
-                       ca_seq_rtt = -1;
-                       seq_rtt = -1;
                 } else {
                         ca_seq_rtt = now - scb->when;
                         last_ackt = skb->tstamp;
author	Yuchung Cheng <ycheng@google.com>
	Mon, 22 Jul 2013 23:20:46 +0000 (16:20 -0700)
committer	David S. Miller <davem@davemloft.net>
	Tue, 23 Jul 2013 00:53:42 +0000 (17:53 -0700)
include/net/tcp.h		patch \| blob \| history
net/ipv4/tcp_input.c		patch \| blob \| history