--- /dev/null
+From: Jakub Sitnicki <jakub@cloudflare.com>
+Date: Wed, 26 Jun 2024 19:51:26 +0200
+Subject: [PATCH] udp: Allow GSO transmit from devices with no checksum offload
+
+Today sending a UDP GSO packet from a TUN device results in an EIO error:
+
+ import fcntl, os, struct
+ from socket import *
+
+ TUNSETIFF = 0x400454CA
+ IFF_TUN = 0x0001
+ IFF_NO_PI = 0x1000
+ UDP_SEGMENT = 103
+
+ tun_fd = os.open("/dev/net/tun", os.O_RDWR)
+ ifr = struct.pack("16sH", b"tun0", IFF_TUN | IFF_NO_PI)
+ fcntl.ioctl(tun_fd, TUNSETIFF, ifr)
+
+ os.system("ip addr add 192.0.2.1/24 dev tun0")
+ os.system("ip link set dev tun0 up")
+
+ s = socket(AF_INET, SOCK_DGRAM)
+ s.setsockopt(SOL_UDP, UDP_SEGMENT, 1200)
+ s.sendto(b"x" * 3000, ("192.0.2.2", 9)) # EIO
+
+This is due to a check in the udp stack if the egress device offers
+checksum offload. While TUN/TAP devices, by default, don't advertise this
+capability because it requires support from the TUN/TAP reader.
+
+However, the GSO stack has a software fallback for checksum calculation,
+which we can use. This way we don't force UDP_SEGMENT users to handle the
+EIO error and implement a segmentation fallback.
+
+Lift the restriction so that UDP_SEGMENT can be used with any egress
+device. We also need to adjust the UDP GSO code to match the GSO stack
+expectation about ip_summed field, as set in commit 8d63bee643f1 ("net:
+avoid skb_warn_bad_offload false positives on UFO"). Otherwise we will hit
+the bad offload check.
+
+Users should, however, expect a potential performance impact when
+batch-sending packets with UDP_SEGMENT without checksum offload on the
+egress device. In such case the packet payload is read twice: first during
+the sendmsg syscall when copying data from user memory, and then in the GSO
+stack for checksum computation. This double memory read can be less
+efficient than a regular sendmsg where the checksum is calculated during
+the initial data copy from user memory.
+
+Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com>
+Reviewed-by: Willem de Bruijn <willemb@google.com>
+Link: https://patch.msgid.link/20240626-linux-udpgso-v2-1-422dfcbd6b48@cloudflare.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+---
+
+--- a/net/ipv4/udp.c
++++ b/net/ipv4/udp.c
+@@ -942,8 +942,7 @@ static int udp_send_skb(struct sk_buff *
+ kfree_skb(skb);
+ return -EINVAL;
+ }
+- if (skb->ip_summed != CHECKSUM_PARTIAL || is_udplite ||
+- dst_xfrm(skb_dst(skb))) {
++ if (is_udplite || dst_xfrm(skb_dst(skb))) {
+ kfree_skb(skb);
+ return -EIO;
+ }
+--- a/net/ipv4/udp_offload.c
++++ b/net/ipv4/udp_offload.c
+@@ -357,6 +357,14 @@ struct sk_buff *__udp_gso_segment(struct
+ else
+ uh->check = gso_make_checksum(seg, ~check) ? : CSUM_MANGLED_0;
+
++ /* On the TX path, CHECKSUM_NONE and CHECKSUM_UNNECESSARY have the same
++ * meaning. However, check for bad offloads in the GSO stack expects the
++ * latter, if the checksum was calculated in software. To vouch for the
++ * segment skbs we actually need to set it on the gso_skb.
++ */
++ if (gso_skb->ip_summed == CHECKSUM_NONE)
++ gso_skb->ip_summed = CHECKSUM_UNNECESSARY;
++
+ /* update refcount for the packet */
+ if (copy_dtor) {
+ int delta = sum_truesize - gso_skb->truesize;
+--- a/net/ipv6/udp.c
++++ b/net/ipv6/udp.c
+@@ -1261,8 +1261,7 @@ static int udp_v6_send_skb(struct sk_buf
+ kfree_skb(skb);
+ return -EINVAL;
+ }
+- if (skb->ip_summed != CHECKSUM_PARTIAL || is_udplite ||
+- dst_xfrm(skb_dst(skb))) {
++ if (is_udplite || dst_xfrm(skb_dst(skb))) {
+ kfree_skb(skb);
+ return -EIO;
+ }
--- /dev/null
+From: Jakub Sitnicki <jakub@cloudflare.com>
+Date: Thu, 8 Aug 2024 11:56:21 +0200
+Subject: [PATCH] net: Make USO depend on CSUM offload
+
+UDP segmentation offload inherently depends on checksum offload. It should
+not be possible to disable checksum offload while leaving USO enabled.
+Enforce this dependency in code.
+
+There is a single tx-udp-segmentation feature flag to indicate support for
+both IPv4/6, hence the devices wishing to support USO must offer checksum
+offload for both IP versions.
+
+Fixes: 10154dbded6d ("udp: Allow GSO transmit from devices with no checksum offload")
+Suggested-by: Willem de Bruijn <willemdebruijn.kernel@gmail.com>
+Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com>
+Reviewed-by: Willem de Bruijn <willemb@google.com>
+Link: https://patch.msgid.link/20240808-udp-gso-egress-from-tunnel-v4-1-f5c5b4149ab9@cloudflare.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+---
+
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -9751,6 +9751,15 @@ static void netdev_sync_lower_features(s
+ }
+ }
+
++static bool netdev_has_ip_or_hw_csum(netdev_features_t features)
++{
++ netdev_features_t ip_csum_mask = NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
++ bool ip_csum = (features & ip_csum_mask) == ip_csum_mask;
++ bool hw_csum = features & NETIF_F_HW_CSUM;
++
++ return ip_csum || hw_csum;
++}
++
+ static netdev_features_t netdev_fix_features(struct net_device *dev,
+ netdev_features_t features)
+ {
+@@ -9832,15 +9841,9 @@ static netdev_features_t netdev_fix_feat
+ features &= ~NETIF_F_LRO;
+ }
+
+- if (features & NETIF_F_HW_TLS_TX) {
+- bool ip_csum = (features & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)) ==
+- (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM);
+- bool hw_csum = features & NETIF_F_HW_CSUM;
+-
+- if (!ip_csum && !hw_csum) {
+- netdev_dbg(dev, "Dropping TLS TX HW offload feature since no CSUM feature.\n");
+- features &= ~NETIF_F_HW_TLS_TX;
+- }
++ if ((features & NETIF_F_HW_TLS_TX) && !netdev_has_ip_or_hw_csum(features)) {
++ netdev_dbg(dev, "Dropping TLS TX HW offload feature since no CSUM feature.\n");
++ features &= ~NETIF_F_HW_TLS_TX;
+ }
+
+ if ((features & NETIF_F_HW_TLS_RX) && !(features & NETIF_F_RXCSUM)) {
+@@ -9848,6 +9851,11 @@ static netdev_features_t netdev_fix_feat
+ features &= ~NETIF_F_HW_TLS_RX;
+ }
+
++ if ((features & NETIF_F_GSO_UDP_L4) && !netdev_has_ip_or_hw_csum(features)) {
++ netdev_dbg(dev, "Dropping USO feature since no CSUM feature.\n");
++ features &= ~NETIF_F_GSO_UDP_L4;
++ }
++
+ return features;
+ }
+
--- /dev/null
+From: Jakub Sitnicki <jakub@cloudflare.com>
+Date: Thu, 8 Aug 2024 11:56:22 +0200
+Subject: [PATCH] udp: Fall back to software USO if IPv6 extension headers are
+ present
+
+In commit 10154dbded6d ("udp: Allow GSO transmit from devices with no
+checksum offload") we have intentionally allowed UDP GSO packets marked
+CHECKSUM_NONE to pass to the GSO stack, so that they can be segmented and
+checksummed by a software fallback when the egress device lacks these
+features.
+
+What was not taken into consideration is that a CHECKSUM_NONE skb can be
+handed over to the GSO stack also when the egress device advertises the
+tx-udp-segmentation / NETIF_F_GSO_UDP_L4 feature.
+
+This will happen when there are IPv6 extension headers present, which we
+check for in __ip6_append_data(). Syzbot has discovered this scenario,
+producing a warning as below:
+
+ ip6tnl0: caps=(0x00000006401d7869, 0x00000006401d7869)
+ WARNING: CPU: 0 PID: 5112 at net/core/dev.c:3293 skb_warn_bad_offload+0x166/0x1a0 net/core/dev.c:3291
+ Modules linked in:
+ CPU: 0 PID: 5112 Comm: syz-executor391 Not tainted 6.10.0-rc7-syzkaller-01603-g80ab5445da62 #0
+ Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 06/07/2024
+ RIP: 0010:skb_warn_bad_offload+0x166/0x1a0 net/core/dev.c:3291
+ [...]
+ Call Trace:
+ <TASK>
+ __skb_gso_segment+0x3be/0x4c0 net/core/gso.c:127
+ skb_gso_segment include/net/gso.h:83 [inline]
+ validate_xmit_skb+0x585/0x1120 net/core/dev.c:3661
+ __dev_queue_xmit+0x17a4/0x3e90 net/core/dev.c:4415
+ neigh_output include/net/neighbour.h:542 [inline]
+ ip6_finish_output2+0xffa/0x1680 net/ipv6/ip6_output.c:137
+ ip6_finish_output+0x41e/0x810 net/ipv6/ip6_output.c:222
+ ip6_send_skb+0x112/0x230 net/ipv6/ip6_output.c:1958
+ udp_v6_send_skb+0xbf5/0x1870 net/ipv6/udp.c:1292
+ udpv6_sendmsg+0x23b3/0x3270 net/ipv6/udp.c:1588
+ sock_sendmsg_nosec net/socket.c:730 [inline]
+ __sock_sendmsg+0xef/0x270 net/socket.c:745
+ ____sys_sendmsg+0x525/0x7d0 net/socket.c:2585
+ ___sys_sendmsg net/socket.c:2639 [inline]
+ __sys_sendmmsg+0x3b2/0x740 net/socket.c:2725
+ __do_sys_sendmmsg net/socket.c:2754 [inline]
+ __se_sys_sendmmsg net/socket.c:2751 [inline]
+ __x64_sys_sendmmsg+0xa0/0xb0 net/socket.c:2751
+ do_syscall_x64 arch/x86/entry/common.c:52 [inline]
+ do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83
+ entry_SYSCALL_64_after_hwframe+0x77/0x7f
+ [...]
+ </TASK>
+
+We are hitting the bad offload warning because when an egress device is
+capable of handling segmentation offload requested by
+skb_shinfo(skb)->gso_type, the chain of gso_segment callbacks won't produce
+any segment skbs and return NULL. See the skb_gso_ok() branch in
+{__udp,tcp,sctp}_gso_segment helpers.
+
+To fix it, force a fallback to software USO when processing a packet with
+IPv6 extension headers, since we don't know if these can checksummed by
+all devices which offer USO.
+
+Fixes: 10154dbded6d ("udp: Allow GSO transmit from devices with no checksum offload")
+Reported-by: syzbot+e15b7e15b8a751a91d9a@syzkaller.appspotmail.com
+Closes: https://lore.kernel.org/all/000000000000e1609a061d5330ce@google.com/
+Reviewed-by: Willem de Bruijn <willemb@google.com>
+Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com>
+Link: https://patch.msgid.link/20240808-udp-gso-egress-from-tunnel-v4-2-f5c5b4149ab9@cloudflare.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+---
+
+--- a/net/ipv4/udp_offload.c
++++ b/net/ipv4/udp_offload.c
+@@ -278,6 +278,12 @@ struct sk_buff *__udp_gso_segment(struct
+ if (gso_skb->len <= sizeof(*uh) + mss)
+ return ERR_PTR(-EINVAL);
+
++ /* We don't know if egress device can segment and checksum the packet
++ * when IPv6 extension headers are present. Fall back to software GSO.
++ */
++ if (gso_skb->ip_summed != CHECKSUM_PARTIAL)
++ features &= ~(NETIF_F_GSO_UDP_L4 | NETIF_F_CSUM_MASK);
++
+ if (skb_gso_ok(gso_skb, features | NETIF_F_GSO_ROBUST)) {
+ /* Packet is from an untrusted source, reset gso_segs. */
+ skb_shinfo(gso_skb)->gso_segs = DIV_ROUND_UP(gso_skb->len - sizeof(*uh),
skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV4;
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
-@@ -433,33 +433,6 @@ out:
+@@ -447,33 +447,6 @@ out:
return segs;
}