2f03074bc9ac8f135594a229df33d19a284aa24f
[openwrt/staging/stintel.git] /
1 From: Jakub Sitnicki <jakub@cloudflare.com>
2 Date: Wed, 26 Jun 2024 19:51:26 +0200
3 Subject: [PATCH] udp: Allow GSO transmit from devices with no checksum offload
4
5 Today sending a UDP GSO packet from a TUN device results in an EIO error:
6
7 import fcntl, os, struct
8 from socket import *
9
10 TUNSETIFF = 0x400454CA
11 IFF_TUN = 0x0001
12 IFF_NO_PI = 0x1000
13 UDP_SEGMENT = 103
14
15 tun_fd = os.open("/dev/net/tun", os.O_RDWR)
16 ifr = struct.pack("16sH", b"tun0", IFF_TUN | IFF_NO_PI)
17 fcntl.ioctl(tun_fd, TUNSETIFF, ifr)
18
19 os.system("ip addr add 192.0.2.1/24 dev tun0")
20 os.system("ip link set dev tun0 up")
21
22 s = socket(AF_INET, SOCK_DGRAM)
23 s.setsockopt(SOL_UDP, UDP_SEGMENT, 1200)
24 s.sendto(b"x" * 3000, ("192.0.2.2", 9)) # EIO
25
26 This is due to a check in the udp stack if the egress device offers
27 checksum offload. While TUN/TAP devices, by default, don't advertise this
28 capability because it requires support from the TUN/TAP reader.
29
30 However, the GSO stack has a software fallback for checksum calculation,
31 which we can use. This way we don't force UDP_SEGMENT users to handle the
32 EIO error and implement a segmentation fallback.
33
34 Lift the restriction so that UDP_SEGMENT can be used with any egress
35 device. We also need to adjust the UDP GSO code to match the GSO stack
36 expectation about ip_summed field, as set in commit 8d63bee643f1 ("net:
37 avoid skb_warn_bad_offload false positives on UFO"). Otherwise we will hit
38 the bad offload check.
39
40 Users should, however, expect a potential performance impact when
41 batch-sending packets with UDP_SEGMENT without checksum offload on the
42 egress device. In such case the packet payload is read twice: first during
43 the sendmsg syscall when copying data from user memory, and then in the GSO
44 stack for checksum computation. This double memory read can be less
45 efficient than a regular sendmsg where the checksum is calculated during
46 the initial data copy from user memory.
47
48 Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com>
49 Reviewed-by: Willem de Bruijn <willemb@google.com>
50 Link: https://patch.msgid.link/20240626-linux-udpgso-v2-1-422dfcbd6b48@cloudflare.com
51 Signed-off-by: Jakub Kicinski <kuba@kernel.org>
52 ---
53
54 --- a/net/ipv4/udp.c
55 +++ b/net/ipv4/udp.c
56 @@ -942,8 +942,7 @@ static int udp_send_skb(struct sk_buff *
57 kfree_skb(skb);
58 return -EINVAL;
59 }
60 - if (skb->ip_summed != CHECKSUM_PARTIAL || is_udplite ||
61 - dst_xfrm(skb_dst(skb))) {
62 + if (is_udplite || dst_xfrm(skb_dst(skb))) {
63 kfree_skb(skb);
64 return -EIO;
65 }
66 --- a/net/ipv4/udp_offload.c
67 +++ b/net/ipv4/udp_offload.c
68 @@ -380,6 +380,14 @@ struct sk_buff *__udp_gso_segment(struct
69 else
70 uh->check = gso_make_checksum(seg, ~check) ? : CSUM_MANGLED_0;
71
72 + /* On the TX path, CHECKSUM_NONE and CHECKSUM_UNNECESSARY have the same
73 + * meaning. However, check for bad offloads in the GSO stack expects the
74 + * latter, if the checksum was calculated in software. To vouch for the
75 + * segment skbs we actually need to set it on the gso_skb.
76 + */
77 + if (gso_skb->ip_summed == CHECKSUM_NONE)
78 + gso_skb->ip_summed = CHECKSUM_UNNECESSARY;
79 +
80 /* update refcount for the packet */
81 if (copy_dtor) {
82 int delta = sum_truesize - gso_skb->truesize;
83 --- a/net/ipv6/udp.c
84 +++ b/net/ipv6/udp.c
85 @@ -1261,8 +1261,7 @@ static int udp_v6_send_skb(struct sk_buf
86 kfree_skb(skb);
87 return -EINVAL;
88 }
89 - if (skb->ip_summed != CHECKSUM_PARTIAL || is_udplite ||
90 - dst_xfrm(skb_dst(skb))) {
91 + if (is_udplite || dst_xfrm(skb_dst(skb))) {
92 kfree_skb(skb);
93 return -EIO;
94 }