tcp: SOF_TIMESTAMPING_OPT_STATS option for SO_TIMESTAMPING
authorFrancis Yan <francisyyan@gmail.com>
Mon, 28 Nov 2016 07:07:18 +0000 (23:07 -0800)
committerDavid S. Miller <davem@davemloft.net>
Wed, 30 Nov 2016 15:04:25 +0000 (10:04 -0500)
This patch exports the sender chronograph stats via the socket
SO_TIMESTAMPING channel. Currently we can instrument how long a
particular application unit of data was queued in TCP by tracking
SOF_TIMESTAMPING_TX_SOFTWARE and SOF_TIMESTAMPING_TX_SCHED. Having
these sender chronograph stats exported simultaneously along with
these timestamps allow further breaking down the various sender
limitation.  For example, a video server can tell if a particular
chunk of video on a connection takes a long time to deliver because
TCP was experiencing small receive window. It is not possible to
tell before this patch without packet traces.

To prepare these stats, the user needs to set
SOF_TIMESTAMPING_OPT_STATS and SOF_TIMESTAMPING_OPT_TSONLY flags
while requesting other SOF_TIMESTAMPING TX timestamps. When the
timestamps are available in the error queue, the stats are returned
in a separate control message of type SCM_TIMESTAMPING_OPT_STATS,
in a list of TLVs (struct nlattr) of types: TCP_NLA_BUSY_TIME,
TCP_NLA_RWND_LIMITED, TCP_NLA_SNDBUF_LIMITED. Unit is microsecond.

Signed-off-by: Francis Yan <francisyyan@gmail.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Soheil Hassas Yeganeh <soheil@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
20 files changed:
Documentation/networking/timestamping.txt
arch/alpha/include/uapi/asm/socket.h
arch/frv/include/uapi/asm/socket.h
arch/ia64/include/uapi/asm/socket.h
arch/m32r/include/uapi/asm/socket.h
arch/mips/include/uapi/asm/socket.h
arch/mn10300/include/uapi/asm/socket.h
arch/parisc/include/uapi/asm/socket.h
arch/powerpc/include/uapi/asm/socket.h
arch/s390/include/uapi/asm/socket.h
arch/sparc/include/uapi/asm/socket.h
arch/xtensa/include/uapi/asm/socket.h
include/linux/tcp.h
include/uapi/asm-generic/socket.h
include/uapi/linux/net_tstamp.h
include/uapi/linux/tcp.h
net/core/skbuff.c
net/core/sock.c
net/ipv4/tcp.c
net/socket.c

index 671cccf0dcd2677eaa04789f25de11700c6fd65b..96f50694a74837735a60a5b83d3b83a58ae72c23 100644 (file)
@@ -182,6 +182,16 @@ SOF_TIMESTAMPING_OPT_TSONLY:
   the timestamp even if sysctl net.core.tstamp_allow_data is 0.
   This option disables SOF_TIMESTAMPING_OPT_CMSG.
 
+SOF_TIMESTAMPING_OPT_STATS:
+
+  Optional stats that are obtained along with the transmit timestamps.
+  It must be used together with SOF_TIMESTAMPING_OPT_TSONLY. When the
+  transmit timestamp is available, the stats are available in a
+  separate control message of type SCM_TIMESTAMPING_OPT_STATS, as a
+  list of TLVs (struct nlattr) of types. These stats allow the
+  application to associate various transport layer stats with
+  the transmit timestamps, such as how long a certain block of
+  data was limited by peer's receiver window.
 
 New applications are encouraged to pass SOF_TIMESTAMPING_OPT_ID to
 disambiguate timestamps and SOF_TIMESTAMPING_OPT_TSONLY to operate
index 9e46d6e656d978cd203abe4202f8b1ee353bea90..afc901b7a6f6e68c819aec1ab9199806f24fc1c1 100644 (file)
@@ -97,4 +97,6 @@
 
 #define SO_CNX_ADVICE          53
 
+#define SCM_TIMESTAMPING_OPT_STATS     54
+
 #endif /* _UAPI_ASM_SOCKET_H */
index afbc98f02d278d097d31add71104f5e4fe4040ea..81e03530ed39ee7e3b25b7442361f64aa883c179 100644 (file)
@@ -90,5 +90,7 @@
 
 #define SO_CNX_ADVICE          53
 
+#define SCM_TIMESTAMPING_OPT_STATS     54
+
 #endif /* _ASM_SOCKET_H */
 
index 0018fad9039f4bd9435d2b6a5976093e56d38db7..57feb0c1f7d707dd51ce20ffba0a418f5b5687ff 100644 (file)
@@ -99,4 +99,6 @@
 
 #define SO_CNX_ADVICE          53
 
+#define SCM_TIMESTAMPING_OPT_STATS     54
+
 #endif /* _ASM_IA64_SOCKET_H */
index 5fe42fc7b6c5dd29e15edbcfbb8ea4cb370ea25c..5853f8e92c20cda02450346d839b3f0b466359ee 100644 (file)
@@ -90,4 +90,6 @@
 
 #define SO_CNX_ADVICE          53
 
+#define SCM_TIMESTAMPING_OPT_STATS     54
+
 #endif /* _ASM_M32R_SOCKET_H */
index 2027240aafbb8432f0cc67148b70ae57f48ed2a6..566ecdcb5b4bcb2cd4d5888a1ce787b8fcbd0b97 100644 (file)
 
 #define SO_CNX_ADVICE          53
 
+#define SCM_TIMESTAMPING_OPT_STATS     54
+
 #endif /* _UAPI_ASM_SOCKET_H */
index 5129f23a9ee1008fc4b7203d2af689b0bc915a46..0e12527c4b0e6de154efaa91fe197eb995a1535c 100644 (file)
@@ -90,4 +90,6 @@
 
 #define SO_CNX_ADVICE          53
 
+#define SCM_TIMESTAMPING_OPT_STATS     54
+
 #endif /* _ASM_SOCKET_H */
index 9c935d717df94c998dce3dc66ad8eefd1b71d066..7a109b73ddf7e814f9c29ffa1ec9b99977ead5d1 100644 (file)
@@ -89,4 +89,6 @@
 
 #define SO_CNX_ADVICE          0x402E
 
+#define SCM_TIMESTAMPING_OPT_STATS     0x402F
+
 #endif /* _UAPI_ASM_SOCKET_H */
index 1672e3398270bf50a740895c6ad534689eb84cf5..44583a52f882540986928cc48a63971251226a0f 100644 (file)
@@ -97,4 +97,6 @@
 
 #define SO_CNX_ADVICE          53
 
+#define SCM_TIMESTAMPING_OPT_STATS     54
+
 #endif /* _ASM_POWERPC_SOCKET_H */
index 41b51c2f4f1ba98055f7f75ac36915665b62392c..b24a64cbfeb10a91274a59117f2e76ea3c583e00 100644 (file)
@@ -96,4 +96,6 @@
 
 #define SO_CNX_ADVICE          53
 
+#define SCM_TIMESTAMPING_OPT_STATS     54
+
 #endif /* _ASM_SOCKET_H */
index 31aede3af088034934c4205f2ab3bf7003956370..a25dc32f5d6a163c1b0e7b7ae775f43898ba4d58 100644 (file)
@@ -86,6 +86,8 @@
 
 #define SO_CNX_ADVICE          0x0037
 
+#define SCM_TIMESTAMPING_OPT_STATS     0x0038
+
 /* Security levels - as per NRL IPv6 - don't actually do anything */
 #define SO_SECURITY_AUTHENTICATION             0x5001
 #define SO_SECURITY_ENCRYPTION_TRANSPORT       0x5002
index 81435d995e1183d07ed0aac8903503e174a5c21e..9fdbe1fe0473802caaf04782f9a5c05ca813f013 100644 (file)
 
 #define SO_CNX_ADVICE          53
 
+#define SCM_TIMESTAMPING_OPT_STATS     54
+
 #endif /* _XTENSA_SOCKET_H */
index d5d3bd814338744f0414063e033c769651418451..00e0ee8f001f46068642c4c323e1b5f99a55a740 100644 (file)
@@ -428,4 +428,6 @@ static inline void tcp_saved_syn_free(struct tcp_sock *tp)
        tp->saved_syn = NULL;
 }
 
+struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk);
+
 #endif /* _LINUX_TCP_H */
index 67d632f1743ddd9ce351962fd7326f7320743efc..2c748ddad5f875711ed66f91eae9bc69b9a41fe0 100644 (file)
@@ -92,4 +92,6 @@
 
 #define SO_CNX_ADVICE          53
 
+#define SCM_TIMESTAMPING_OPT_STATS     54
+
 #endif /* __ASM_GENERIC_SOCKET_H */
index 264e515de16f6ede07c962344ae0ac92f641d161..464dcca5ed68304bac7c80ab5fd62192d81b3970 100644 (file)
@@ -25,8 +25,9 @@ enum {
        SOF_TIMESTAMPING_TX_ACK = (1<<9),
        SOF_TIMESTAMPING_OPT_CMSG = (1<<10),
        SOF_TIMESTAMPING_OPT_TSONLY = (1<<11),
+       SOF_TIMESTAMPING_OPT_STATS = (1<<12),
 
-       SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_TSONLY,
+       SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_STATS,
        SOF_TIMESTAMPING_MASK = (SOF_TIMESTAMPING_LAST - 1) |
                                 SOF_TIMESTAMPING_LAST
 };
index 2863b661d6e1c1d217b9832834b21c0fffda8c70..c53de2691cecec43f4dbde55dd69da3c07da719f 100644 (file)
@@ -220,6 +220,14 @@ struct tcp_info {
        __u64   tcpi_sndbuf_limited; /* Time (usec) limited by send buffer */
 };
 
+/* netlink attributes types for SCM_TIMESTAMPING_OPT_STATS */
+enum {
+       TCP_NLA_PAD,
+       TCP_NLA_BUSY,           /* Time (usec) busy sending data */
+       TCP_NLA_RWND_LIMITED,   /* Time (usec) limited by receive window */
+       TCP_NLA_SNDBUF_LIMITED, /* Time (usec) limited by send buffer */
+};
+
 /* for TCP_MD5SIG socket option */
 #define TCP_MD5SIG_MAXKEYLEN   80
 
index d1d1a5a5ad24ded523fc12ffba8c602b03bd0830..ea6fa954c7a0d63551bbf5804014a0230919d3d4 100644 (file)
@@ -3839,10 +3839,18 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb,
        if (!skb_may_tx_timestamp(sk, tsonly))
                return;
 
-       if (tsonly)
-               skb = alloc_skb(0, GFP_ATOMIC);
-       else
+       if (tsonly) {
+#ifdef CONFIG_INET
+               if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_STATS) &&
+                   sk->sk_protocol == IPPROTO_TCP &&
+                   sk->sk_type == SOCK_STREAM)
+                       skb = tcp_get_timestamping_opt_stats(sk);
+               else
+#endif
+                       skb = alloc_skb(0, GFP_ATOMIC);
+       } else {
                skb = skb_clone(orig_skb, GFP_ATOMIC);
+       }
        if (!skb)
                return;
 
index 14e6145be33b9b19e5796063dc9c546427a6542d..d8c7f8c877ca3c4fd3ace2a43ffa6de4a07da4be 100644 (file)
@@ -854,6 +854,13 @@ set_rcvbuf:
                                sk->sk_tskey = 0;
                        }
                }
+
+               if (val & SOF_TIMESTAMPING_OPT_STATS &&
+                   !(val & SOF_TIMESTAMPING_OPT_TSONLY)) {
+                       ret = -EINVAL;
+                       break;
+               }
+
                sk->sk_tsflags = val;
                if (val & SOF_TIMESTAMPING_RX_SOFTWARE)
                        sock_enable_timestamp(sk,
index cdde20f49999cc3c068efc5a8b9f257f72ad5813..1149b48700a125b03359b4b65fbb3f1d8494e0c4 100644 (file)
@@ -2841,6 +2841,26 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
 }
 EXPORT_SYMBOL_GPL(tcp_get_info);
 
+struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk)
+{
+       const struct tcp_sock *tp = tcp_sk(sk);
+       struct sk_buff *stats;
+       struct tcp_info info;
+
+       stats = alloc_skb(3 * nla_total_size_64bit(sizeof(u64)), GFP_ATOMIC);
+       if (!stats)
+               return NULL;
+
+       tcp_get_info_chrono_stats(tp, &info);
+       nla_put_u64_64bit(stats, TCP_NLA_BUSY,
+                         info.tcpi_busy_time, TCP_NLA_PAD);
+       nla_put_u64_64bit(stats, TCP_NLA_RWND_LIMITED,
+                         info.tcpi_rwnd_limited, TCP_NLA_PAD);
+       nla_put_u64_64bit(stats, TCP_NLA_SNDBUF_LIMITED,
+                         info.tcpi_sndbuf_limited, TCP_NLA_PAD);
+       return stats;
+}
+
 static int do_tcp_getsockopt(struct sock *sk, int level,
                int optname, char __user *optval, int __user *optlen)
 {
index e2584c51aa1fe9d7b786b4ce203eb63c7a9202e8..e6318943ad07045f88e36c77787d41c5e28c16df 100644 (file)
@@ -693,9 +693,14 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
            (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
            ktime_to_timespec_cond(shhwtstamps->hwtstamp, tss.ts + 2))
                empty = 0;
-       if (!empty)
+       if (!empty) {
                put_cmsg(msg, SOL_SOCKET,
                         SCM_TIMESTAMPING, sizeof(tss), &tss);
+
+               if (skb->len && (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_STATS))
+                       put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_OPT_STATS,
+                                skb->len, skb->data);
+       }
 }
 EXPORT_SYMBOL_GPL(__sock_recv_timestamp);