--- /dev/null
+From: Richard Gobert <richardbgobert@gmail.com>
+Date: Wed, 3 Jan 2024 15:44:21 +0100
+Subject: [PATCH] net: gro: parse ipv6 ext headers without frag0 invalidation
+
+The existing code always pulls the IPv6 header and sets the transport
+offset initially. Then optionally again pulls any extension headers in
+ipv6_gso_pull_exthdrs and sets the transport offset again on return from
+that call. skb->data is set at the start of the first extension header
+before calling ipv6_gso_pull_exthdrs, and must disable the frag0
+optimization because that function uses pskb_may_pull/pskb_pull instead of
+skb_gro_ helpers. It sets the GRO offset to the TCP header with
+skb_gro_pull and sets the transport header. Then returns skb->data to its
+position before this block.
+
+This commit introduces a new helper function - ipv6_gro_pull_exthdrs -
+which is used in ipv6_gro_receive to pull ipv6 ext headers instead of
+ipv6_gso_pull_exthdrs. Thus, there is no modification of skb->data, all
+operations use skb_gro_* helpers, and the frag0 fast path can be taken for
+IPv6 packets with ext headers.
+
+Signed-off-by: Richard Gobert <richardbgobert@gmail.com>
+Reviewed-by: Willem de Bruijn <willemb@google.com>
+Reviewed-by: David Ahern <dsahern@kernel.org>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Link: https://lore.kernel.org/r/504130f6-b56c-4dcc-882c-97942c59f5b7@gmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+---
+
+--- a/net/ipv6/ip6_offload.c
++++ b/net/ipv6/ip6_offload.c
+@@ -36,6 +36,40 @@
+ INDIRECT_CALL_L4(cb, f2, f1, head, skb); \
+ })
+
++static int ipv6_gro_pull_exthdrs(struct sk_buff *skb, int off, int proto)
++{
++ const struct net_offload *ops = NULL;
++ struct ipv6_opt_hdr *opth;
++
++ for (;;) {
++ int len;
++
++ ops = rcu_dereference(inet6_offloads[proto]);
++
++ if (unlikely(!ops))
++ break;
++
++ if (!(ops->flags & INET6_PROTO_GSO_EXTHDR))
++ break;
++
++ opth = skb_gro_header(skb, off + sizeof(*opth), off);
++ if (unlikely(!opth))
++ break;
++
++ len = ipv6_optlen(opth);
++
++ opth = skb_gro_header(skb, off + len, off);
++ if (unlikely(!opth))
++ break;
++ proto = opth->nexthdr;
++
++ off += len;
++ }
++
++ skb_gro_pull(skb, off - skb_network_offset(skb));
++ return proto;
++}
++
+ static int ipv6_gso_pull_exthdrs(struct sk_buff *skb, int proto)
+ {
+ const struct net_offload *ops = NULL;
+@@ -224,28 +258,25 @@ INDIRECT_CALLABLE_SCOPE struct sk_buff *
+ goto out;
+
+ skb_set_network_header(skb, off);
+- skb_gro_pull(skb, sizeof(*iph));
+- skb_set_transport_header(skb, skb_gro_offset(skb));
+
+- flush += ntohs(iph->payload_len) != skb_gro_len(skb);
++ flush += ntohs(iph->payload_len) != skb->len - hlen;
+
+ proto = iph->nexthdr;
+ ops = rcu_dereference(inet6_offloads[proto]);
+ if (!ops || !ops->callbacks.gro_receive) {
+- pskb_pull(skb, skb_gro_offset(skb));
+- skb_gro_frag0_invalidate(skb);
+- proto = ipv6_gso_pull_exthdrs(skb, proto);
+- skb_gro_pull(skb, -skb_transport_offset(skb));
+- skb_reset_transport_header(skb);
+- __skb_push(skb, skb_gro_offset(skb));
++ proto = ipv6_gro_pull_exthdrs(skb, hlen, proto);
+
+ ops = rcu_dereference(inet6_offloads[proto]);
+ if (!ops || !ops->callbacks.gro_receive)
+ goto out;
+
+- iph = ipv6_hdr(skb);
++ iph = skb_gro_network_header(skb);
++ } else {
++ skb_gro_pull(skb, sizeof(*iph));
+ }
+
++ skb_set_transport_header(skb, skb_gro_offset(skb));
++
+ NAPI_GRO_CB(skb)->proto = proto;
+
+ flush--;
--- /dev/null
+From: Richard Gobert <richardbgobert@gmail.com>
+Date: Tue, 30 Apr 2024 16:35:55 +0200
+Subject: [PATCH] net: gro: add flush check in udp_gro_receive_segment
+
+GRO-GSO path is supposed to be transparent and as such L3 flush checks are
+relevant to all UDP flows merging in GRO. This patch uses the same logic
+and code from tcp_gro_receive, terminating merge if flush is non zero.
+
+Fixes: e20cf8d3f1f7 ("udp: implement GRO for plain UDP sockets.")
+Signed-off-by: Richard Gobert <richardbgobert@gmail.com>
+Reviewed-by: Willem de Bruijn <willemb@google.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+---
+
+--- a/net/ipv4/udp_offload.c
++++ b/net/ipv4/udp_offload.c
+@@ -463,6 +463,7 @@ static struct sk_buff *udp_gro_receive_s
+ struct sk_buff *p;
+ unsigned int ulen;
+ int ret = 0;
++ int flush;
+
+ /* requires non zero csum, for symmetry with GSO */
+ if (!uh->check) {
+@@ -496,13 +497,22 @@ static struct sk_buff *udp_gro_receive_s
+ return p;
+ }
+
++ flush = NAPI_GRO_CB(p)->flush;
++
++ if (NAPI_GRO_CB(p)->flush_id != 1 ||
++ NAPI_GRO_CB(p)->count != 1 ||
++ !NAPI_GRO_CB(p)->is_atomic)
++ flush |= NAPI_GRO_CB(p)->flush_id;
++ else
++ NAPI_GRO_CB(p)->is_atomic = false;
++
+ /* Terminate the flow on len mismatch or if it grow "too much".
+ * Under small packet flood GRO count could elsewhere grow a lot
+ * leading to excessive truesize values.
+ * On len mismatch merge the first packet shorter than gso_size,
+ * otherwise complete the GRO packet.
+ */
+- if (ulen > ntohs(uh2->len)) {
++ if (ulen > ntohs(uh2->len) || flush) {
+ pp = p;
+ } else {
+ if (NAPI_GRO_CB(skb)->is_flist) {
--- /dev/null
+From: Richard Gobert <richardbgobert@gmail.com>
+Date: Wed, 3 Jan 2024 15:44:21 +0100
+Subject: [PATCH] net: gro: parse ipv6 ext headers without frag0 invalidation
+
+The existing code always pulls the IPv6 header and sets the transport
+offset initially. Then optionally again pulls any extension headers in
+ipv6_gso_pull_exthdrs and sets the transport offset again on return from
+that call. skb->data is set at the start of the first extension header
+before calling ipv6_gso_pull_exthdrs, and must disable the frag0
+optimization because that function uses pskb_may_pull/pskb_pull instead of
+skb_gro_ helpers. It sets the GRO offset to the TCP header with
+skb_gro_pull and sets the transport header. Then returns skb->data to its
+position before this block.
+
+This commit introduces a new helper function - ipv6_gro_pull_exthdrs -
+which is used in ipv6_gro_receive to pull ipv6 ext headers instead of
+ipv6_gso_pull_exthdrs. Thus, there is no modification of skb->data, all
+operations use skb_gro_* helpers, and the frag0 fast path can be taken for
+IPv6 packets with ext headers.
+
+Signed-off-by: Richard Gobert <richardbgobert@gmail.com>
+Reviewed-by: Willem de Bruijn <willemb@google.com>
+Reviewed-by: David Ahern <dsahern@kernel.org>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Link: https://lore.kernel.org/r/504130f6-b56c-4dcc-882c-97942c59f5b7@gmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+---
+
+--- a/net/ipv6/ip6_offload.c
++++ b/net/ipv6/ip6_offload.c
+@@ -37,6 +37,40 @@
+ INDIRECT_CALL_L4(cb, f2, f1, head, skb); \
+ })
+
++static int ipv6_gro_pull_exthdrs(struct sk_buff *skb, int off, int proto)
++{
++ const struct net_offload *ops = NULL;
++ struct ipv6_opt_hdr *opth;
++
++ for (;;) {
++ int len;
++
++ ops = rcu_dereference(inet6_offloads[proto]);
++
++ if (unlikely(!ops))
++ break;
++
++ if (!(ops->flags & INET6_PROTO_GSO_EXTHDR))
++ break;
++
++ opth = skb_gro_header(skb, off + sizeof(*opth), off);
++ if (unlikely(!opth))
++ break;
++
++ len = ipv6_optlen(opth);
++
++ opth = skb_gro_header(skb, off + len, off);
++ if (unlikely(!opth))
++ break;
++ proto = opth->nexthdr;
++
++ off += len;
++ }
++
++ skb_gro_pull(skb, off - skb_network_offset(skb));
++ return proto;
++}
++
+ static int ipv6_gso_pull_exthdrs(struct sk_buff *skb, int proto)
+ {
+ const struct net_offload *ops = NULL;
+@@ -206,28 +240,25 @@ INDIRECT_CALLABLE_SCOPE struct sk_buff *
+ goto out;
+
+ skb_set_network_header(skb, off);
+- skb_gro_pull(skb, sizeof(*iph));
+- skb_set_transport_header(skb, skb_gro_offset(skb));
+
+- flush += ntohs(iph->payload_len) != skb_gro_len(skb);
++ flush += ntohs(iph->payload_len) != skb->len - hlen;
+
+ proto = iph->nexthdr;
+ ops = rcu_dereference(inet6_offloads[proto]);
+ if (!ops || !ops->callbacks.gro_receive) {
+- pskb_pull(skb, skb_gro_offset(skb));
+- skb_gro_frag0_invalidate(skb);
+- proto = ipv6_gso_pull_exthdrs(skb, proto);
+- skb_gro_pull(skb, -skb_transport_offset(skb));
+- skb_reset_transport_header(skb);
+- __skb_push(skb, skb_gro_offset(skb));
++ proto = ipv6_gro_pull_exthdrs(skb, hlen, proto);
+
+ ops = rcu_dereference(inet6_offloads[proto]);
+ if (!ops || !ops->callbacks.gro_receive)
+ goto out;
+
+- iph = ipv6_hdr(skb);
++ iph = skb_gro_network_header(skb);
++ } else {
++ skb_gro_pull(skb, sizeof(*iph));
+ }
+
++ skb_set_transport_header(skb, skb_gro_offset(skb));
++
+ NAPI_GRO_CB(skb)->proto = proto;
+
+ flush--;
--- /dev/null
+From: Richard Gobert <richardbgobert@gmail.com>
+Date: Tue, 30 Apr 2024 16:35:54 +0200
+Subject: [PATCH] net: gro: fix udp bad offset in socket lookup by adding
+ {inner_}network_offset to napi_gro_cb
+
+Commits a602456 ("udp: Add GRO functions to UDP socket") and 57c67ff ("udp:
+additional GRO support") introduce incorrect usage of {ip,ipv6}_hdr in the
+complete phase of gro. The functions always return skb->network_header,
+which in the case of encapsulated packets at the gro complete phase, is
+always set to the innermost L3 of the packet. That means that calling
+{ip,ipv6}_hdr for skbs which completed the GRO receive phase (both in
+gro_list and *_gro_complete) when parsing an encapsulated packet's _outer_
+L3/L4 may return an unexpected value.
+
+This incorrect usage leads to a bug in GRO's UDP socket lookup.
+udp{4,6}_lib_lookup_skb functions use ip_hdr/ipv6_hdr respectively. These
+*_hdr functions return network_header which will point to the innermost L3,
+resulting in the wrong offset being used in __udp{4,6}_lib_lookup with
+encapsulated packets.
+
+This patch adds network_offset and inner_network_offset to napi_gro_cb, and
+makes sure both are set correctly.
+
+To fix the issue, network_offsets union is used inside napi_gro_cb, in
+which both the outer and the inner network offsets are saved.
+
+Reproduction example:
+
+Endpoint configuration example (fou + local address bind)
+
+ # ip fou add port 6666 ipproto 4
+ # ip link add name tun1 type ipip remote 2.2.2.1 local 2.2.2.2 encap fou encap-dport 5555 encap-sport 6666 mode ipip
+ # ip link set tun1 up
+ # ip a add 1.1.1.2/24 dev tun1
+
+Netperf TCP_STREAM result on net-next before patch is applied:
+
+net-next main, GRO enabled:
+ $ netperf -H 1.1.1.2 -t TCP_STREAM -l 5
+ Recv Send Send
+ Socket Socket Message Elapsed
+ Size Size Size Time Throughput
+ bytes bytes bytes secs. 10^6bits/sec
+
+ 131072 16384 16384 5.28 2.37
+
+net-next main, GRO disabled:
+ $ netperf -H 1.1.1.2 -t TCP_STREAM -l 5
+ Recv Send Send
+ Socket Socket Message Elapsed
+ Size Size Size Time Throughput
+ bytes bytes bytes secs. 10^6bits/sec
+
+ 131072 16384 16384 5.01 2745.06
+
+patch applied, GRO enabled:
+ $ netperf -H 1.1.1.2 -t TCP_STREAM -l 5
+ Recv Send Send
+ Socket Socket Message Elapsed
+ Size Size Size Time Throughput
+ bytes bytes bytes secs. 10^6bits/sec
+
+ 131072 16384 16384 5.01 2877.38
+
+Fixes: a6024562ffd7 ("udp: Add GRO functions to UDP socket")
+Signed-off-by: Richard Gobert <richardbgobert@gmail.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Reviewed-by: Willem de Bruijn <willemb@google.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+---
+
+--- a/include/net/gro.h
++++ b/include/net/gro.h
+@@ -86,6 +86,15 @@ struct napi_gro_cb {
+
+ /* used to support CHECKSUM_COMPLETE for tunneling protocols */
+ __wsum csum;
++
++ /* L3 offsets */
++ union {
++ struct {
++ u16 network_offset;
++ u16 inner_network_offset;
++ };
++ u16 network_offsets[2];
++ };
+ };
+
+ #define NAPI_GRO_CB(skb) ((struct napi_gro_cb *)(skb)->cb)
+--- a/net/8021q/vlan_core.c
++++ b/net/8021q/vlan_core.c
+@@ -478,6 +478,8 @@ static struct sk_buff *vlan_gro_receive(
+ if (unlikely(!vhdr))
+ goto out;
+
++ NAPI_GRO_CB(skb)->network_offsets[NAPI_GRO_CB(skb)->encap_mark] = hlen;
++
+ type = vhdr->h_vlan_encapsulated_proto;
+
+ ptype = gro_find_receive_by_type(type);
+--- a/net/core/gro.c
++++ b/net/core/gro.c
+@@ -373,6 +373,7 @@ static inline void skb_gro_reset_offset(
+ const struct skb_shared_info *pinfo = skb_shinfo(skb);
+ const skb_frag_t *frag0 = &pinfo->frags[0];
+
++ NAPI_GRO_CB(skb)->network_offset = 0;
+ NAPI_GRO_CB(skb)->data_offset = 0;
+ NAPI_GRO_CB(skb)->frag0 = NULL;
+ NAPI_GRO_CB(skb)->frag0_len = 0;
+--- a/net/ipv4/af_inet.c
++++ b/net/ipv4/af_inet.c
+@@ -1571,6 +1571,7 @@ struct sk_buff *inet_gro_receive(struct
+ /* The above will be needed by the transport layer if there is one
+ * immediately following this IP hdr.
+ */
++ NAPI_GRO_CB(skb)->inner_network_offset = off;
+
+ /* Note : No need to call skb_gro_postpull_rcsum() here,
+ * as we already checked checksum over ipv4 header was 0
+--- a/net/ipv4/udp.c
++++ b/net/ipv4/udp.c
+@@ -534,7 +534,8 @@ static inline struct sock *__udp4_lib_lo
+ struct sock *udp4_lib_lookup_skb(const struct sk_buff *skb,
+ __be16 sport, __be16 dport)
+ {
+- const struct iphdr *iph = ip_hdr(skb);
++ const u16 offset = NAPI_GRO_CB(skb)->network_offsets[skb->encapsulation];
++ const struct iphdr *iph = (struct iphdr *)(skb->data + offset);
+ struct net *net = dev_net(skb->dev);
+ int iif, sdif;
+
+--- a/net/ipv4/udp_offload.c
++++ b/net/ipv4/udp_offload.c
+@@ -718,7 +718,8 @@ EXPORT_SYMBOL(udp_gro_complete);
+
+ INDIRECT_CALLABLE_SCOPE int udp4_gro_complete(struct sk_buff *skb, int nhoff)
+ {
+- const struct iphdr *iph = ip_hdr(skb);
++ const u16 offset = NAPI_GRO_CB(skb)->network_offsets[skb->encapsulation];
++ const struct iphdr *iph = (struct iphdr *)(skb->data + offset);
+ struct udphdr *uh = (struct udphdr *)(skb->data + nhoff);
+
+ /* do fraglist only if there is no outer UDP encap (or we already processed it) */
+--- a/net/ipv6/ip6_offload.c
++++ b/net/ipv6/ip6_offload.c
+@@ -240,6 +240,7 @@ INDIRECT_CALLABLE_SCOPE struct sk_buff *
+ goto out;
+
+ skb_set_network_header(skb, off);
++ NAPI_GRO_CB(skb)->inner_network_offset = off;
+
+ flush += ntohs(iph->payload_len) != skb->len - hlen;
+
+--- a/net/ipv6/udp.c
++++ b/net/ipv6/udp.c
+@@ -275,7 +275,8 @@ static struct sock *__udp6_lib_lookup_sk
+ struct sock *udp6_lib_lookup_skb(const struct sk_buff *skb,
+ __be16 sport, __be16 dport)
+ {
+- const struct ipv6hdr *iph = ipv6_hdr(skb);
++ const u16 offset = NAPI_GRO_CB(skb)->network_offsets[skb->encapsulation];
++ const struct ipv6hdr *iph = (struct ipv6hdr *)(skb->data + offset);
+ struct net *net = dev_net(skb->dev);
+ int iif, sdif;
+
+--- a/net/ipv6/udp_offload.c
++++ b/net/ipv6/udp_offload.c
+@@ -164,7 +164,8 @@ flush:
+
+ INDIRECT_CALLABLE_SCOPE int udp6_gro_complete(struct sk_buff *skb, int nhoff)
+ {
+- const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
++ const u16 offset = NAPI_GRO_CB(skb)->network_offsets[skb->encapsulation];
++ const struct ipv6hdr *ipv6h = (struct ipv6hdr *)(skb->data + offset);
+ struct udphdr *uh = (struct udphdr *)(skb->data + nhoff);
+
+ /* do fraglist only if there is no outer UDP encap (or we already processed it) */
--- /dev/null
+From: Richard Gobert <richardbgobert@gmail.com>
+Date: Tue, 30 Apr 2024 16:35:55 +0200
+Subject: [PATCH] net: gro: add flush check in udp_gro_receive_segment
+
+GRO-GSO path is supposed to be transparent and as such L3 flush checks are
+relevant to all UDP flows merging in GRO. This patch uses the same logic
+and code from tcp_gro_receive, terminating merge if flush is non zero.
+
+Fixes: e20cf8d3f1f7 ("udp: implement GRO for plain UDP sockets.")
+Signed-off-by: Richard Gobert <richardbgobert@gmail.com>
+Reviewed-by: Willem de Bruijn <willemb@google.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+---
+
+--- a/net/ipv4/udp_offload.c
++++ b/net/ipv4/udp_offload.c
+@@ -471,6 +471,7 @@ static struct sk_buff *udp_gro_receive_s
+ struct sk_buff *p;
+ unsigned int ulen;
+ int ret = 0;
++ int flush;
+
+ /* requires non zero csum, for symmetry with GSO */
+ if (!uh->check) {
+@@ -504,13 +505,22 @@ static struct sk_buff *udp_gro_receive_s
+ return p;
+ }
+
++ flush = NAPI_GRO_CB(p)->flush;
++
++ if (NAPI_GRO_CB(p)->flush_id != 1 ||
++ NAPI_GRO_CB(p)->count != 1 ||
++ !NAPI_GRO_CB(p)->is_atomic)
++ flush |= NAPI_GRO_CB(p)->flush_id;
++ else
++ NAPI_GRO_CB(p)->is_atomic = false;
++
+ /* Terminate the flow on len mismatch or if it grow "too much".
+ * Under small packet flood GRO count could elsewhere grow a lot
+ * leading to excessive truesize values.
+ * On len mismatch merge the first packet shorter than gso_size,
+ * otherwise complete the GRO packet.
+ */
+- if (ulen > ntohs(uh2->len)) {
++ if (ulen > ntohs(uh2->len) || flush) {
+ pp = p;
+ } else {
+ if (NAPI_GRO_CB(skb)->is_flist) {
--- a/include/net/gro.h
+++ b/include/net/gro.h
-@@ -430,6 +430,7 @@ static inline __wsum ip6_gro_compute_pse
+@@ -439,6 +439,7 @@ static inline __wsum ip6_gro_compute_pse
}
int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb);