e42d2558f285a253bbee8a7138ee06d5b8e7e911
[openwrt/staging/nbd.git] /
1 From: Pablo Neira Ayuso <pablo@netfilter.org>
2 Date: Wed, 24 Mar 2021 02:30:40 +0100
3 Subject: [PATCH] netfilter: flowtable: use dev_fill_forward_path() to
4 obtain egress device
5
6 The egress device in the tuple is obtained from route. Use
7 dev_fill_forward_path() instead to provide the real egress device for
8 this flow whenever this is available.
9
10 The new FLOW_OFFLOAD_XMIT_DIRECT type uses dev_queue_xmit() to transmit
11 ethernet frames. Cache the source and destination hardware address to
12 use dev_queue_xmit() to transfer packets.
13
14 The FLOW_OFFLOAD_XMIT_DIRECT replaces FLOW_OFFLOAD_XMIT_NEIGH if
15 dev_fill_forward_path() finds a direct transmit path.
16
17 In case of topology updates, if peer is moved to different bridge port,
18 the connection will time out, reconnect will result in a new entry with
19 the correct path. Snooping fdb updates would allow for cleaning up stale
20 flowtable entries.
21
22 Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
23 ---
24
25 --- a/include/net/netfilter/nf_flow_table.h
26 +++ b/include/net/netfilter/nf_flow_table.h
27 @@ -92,6 +92,7 @@ enum flow_offload_tuple_dir {
28 enum flow_offload_xmit_type {
29 FLOW_OFFLOAD_XMIT_NEIGH = 0,
30 FLOW_OFFLOAD_XMIT_XFRM,
31 + FLOW_OFFLOAD_XMIT_DIRECT,
32 };
33
34 struct flow_offload_tuple {
35 @@ -120,8 +121,14 @@ struct flow_offload_tuple {
36 xmit_type:2;
37
38 u16 mtu;
39 -
40 - struct dst_entry *dst_cache;
41 + union {
42 + struct dst_entry *dst_cache;
43 + struct {
44 + u32 ifidx;
45 + u8 h_source[ETH_ALEN];
46 + u8 h_dest[ETH_ALEN];
47 + } out;
48 + };
49 };
50
51 struct flow_offload_tuple_rhash {
52 @@ -168,6 +175,11 @@ struct nf_flow_route {
53 struct {
54 u32 ifindex;
55 } in;
56 + struct {
57 + u32 ifindex;
58 + u8 h_source[ETH_ALEN];
59 + u8 h_dest[ETH_ALEN];
60 + } out;
61 enum flow_offload_xmit_type xmit_type;
62 } tuple[FLOW_OFFLOAD_DIR_MAX];
63 };
64 --- a/net/netfilter/nf_flow_table_core.c
65 +++ b/net/netfilter/nf_flow_table_core.c
66 @@ -81,9 +81,6 @@ static int flow_offload_fill_route(struc
67 struct flow_offload_tuple *flow_tuple = &flow->tuplehash[dir].tuple;
68 struct dst_entry *dst = route->tuple[dir].dst;
69
70 - if (!dst_hold_safe(route->tuple[dir].dst))
71 - return -1;
72 -
73 switch (flow_tuple->l3proto) {
74 case NFPROTO_IPV4:
75 flow_tuple->mtu = ip_dst_mtu_maybe_forward(dst, true);
76 @@ -94,12 +91,36 @@ static int flow_offload_fill_route(struc
77 }
78
79 flow_tuple->iifidx = route->tuple[dir].in.ifindex;
80 +
81 + switch (route->tuple[dir].xmit_type) {
82 + case FLOW_OFFLOAD_XMIT_DIRECT:
83 + memcpy(flow_tuple->out.h_dest, route->tuple[dir].out.h_dest,
84 + ETH_ALEN);
85 + memcpy(flow_tuple->out.h_source, route->tuple[dir].out.h_source,
86 + ETH_ALEN);
87 + flow_tuple->out.ifidx = route->tuple[dir].out.ifindex;
88 + break;
89 + case FLOW_OFFLOAD_XMIT_XFRM:
90 + case FLOW_OFFLOAD_XMIT_NEIGH:
91 + if (!dst_hold_safe(route->tuple[dir].dst))
92 + return -1;
93 +
94 + flow_tuple->dst_cache = dst;
95 + break;
96 + }
97 flow_tuple->xmit_type = route->tuple[dir].xmit_type;
98 - flow_tuple->dst_cache = dst;
99
100 return 0;
101 }
102
103 +static void nft_flow_dst_release(struct flow_offload *flow,
104 + enum flow_offload_tuple_dir dir)
105 +{
106 + if (flow->tuplehash[dir].tuple.xmit_type == FLOW_OFFLOAD_XMIT_NEIGH ||
107 + flow->tuplehash[dir].tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)
108 + dst_release(flow->tuplehash[dir].tuple.dst_cache);
109 +}
110 +
111 int flow_offload_route_init(struct flow_offload *flow,
112 const struct nf_flow_route *route)
113 {
114 @@ -118,7 +139,7 @@ int flow_offload_route_init(struct flow_
115 return 0;
116
117 err_route_reply:
118 - dst_release(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst);
119 + nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_ORIGINAL);
120
121 return err;
122 }
123 @@ -169,8 +190,8 @@ static void flow_offload_fixup_ct(struct
124
125 static void flow_offload_route_release(struct flow_offload *flow)
126 {
127 - dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache);
128 - dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache);
129 + nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_ORIGINAL);
130 + nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_REPLY);
131 }
132
133 void flow_offload_free(struct flow_offload *flow)
134 --- a/net/netfilter/nf_flow_table_ip.c
135 +++ b/net/netfilter/nf_flow_table_ip.c
136 @@ -207,6 +207,24 @@ static unsigned int nf_flow_xmit_xfrm(st
137 return NF_STOLEN;
138 }
139
140 +static unsigned int nf_flow_queue_xmit(struct net *net, struct sk_buff *skb,
141 + const struct flow_offload_tuple_rhash *tuplehash,
142 + unsigned short type)
143 +{
144 + struct net_device *outdev;
145 +
146 + outdev = dev_get_by_index_rcu(net, tuplehash->tuple.out.ifidx);
147 + if (!outdev)
148 + return NF_DROP;
149 +
150 + skb->dev = outdev;
151 + dev_hard_header(skb, skb->dev, type, tuplehash->tuple.out.h_dest,
152 + tuplehash->tuple.out.h_source, skb->len);
153 + dev_queue_xmit(skb);
154 +
155 + return NF_STOLEN;
156 +}
157 +
158 unsigned int
159 nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
160 const struct nf_hook_state *state)
161 @@ -222,6 +240,7 @@ nf_flow_offload_ip_hook(void *priv, stru
162 struct iphdr *iph;
163 __be32 nexthop;
164 u32 hdrsize;
165 + int ret;
166
167 if (skb->protocol != htons(ETH_P_IP))
168 return NF_ACCEPT;
169 @@ -244,9 +263,13 @@ nf_flow_offload_ip_hook(void *priv, stru
170 if (nf_flow_state_check(flow, iph->protocol, skb, thoff))
171 return NF_ACCEPT;
172
173 - if (!dst_check(&rt->dst, 0)) {
174 - flow_offload_teardown(flow);
175 - return NF_ACCEPT;
176 + if (tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_NEIGH ||
177 + tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM) {
178 + rt = (struct rtable *)tuplehash->tuple.dst_cache;
179 + if (!dst_check(&rt->dst, 0)) {
180 + flow_offload_teardown(flow);
181 + return NF_ACCEPT;
182 + }
183 }
184
185 if (skb_try_make_writable(skb, thoff + hdrsize))
186 @@ -263,8 +286,6 @@ nf_flow_offload_ip_hook(void *priv, stru
187 if (flow_table->flags & NF_FLOWTABLE_COUNTER)
188 nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len);
189
190 - rt = (struct rtable *)tuplehash->tuple.dst_cache;
191 -
192 if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) {
193 memset(skb->cb, 0, sizeof(struct inet_skb_parm));
194 IPCB(skb)->iif = skb->dev->ifindex;
195 @@ -272,13 +293,23 @@ nf_flow_offload_ip_hook(void *priv, stru
196 return nf_flow_xmit_xfrm(skb, state, &rt->dst);
197 }
198
199 - outdev = rt->dst.dev;
200 - skb->dev = outdev;
201 - nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
202 - skb_dst_set_noref(skb, &rt->dst);
203 - neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb);
204 + switch (tuplehash->tuple.xmit_type) {
205 + case FLOW_OFFLOAD_XMIT_NEIGH:
206 + outdev = rt->dst.dev;
207 + skb->dev = outdev;
208 + nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
209 + skb_dst_set_noref(skb, &rt->dst);
210 + neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb);
211 + ret = NF_STOLEN;
212 + break;
213 + case FLOW_OFFLOAD_XMIT_DIRECT:
214 + ret = nf_flow_queue_xmit(state->net, skb, tuplehash, ETH_P_IP);
215 + if (ret == NF_DROP)
216 + flow_offload_teardown(flow);
217 + break;
218 + }
219
220 - return NF_STOLEN;
221 + return ret;
222 }
223 EXPORT_SYMBOL_GPL(nf_flow_offload_ip_hook);
224
225 @@ -444,6 +475,7 @@ nf_flow_offload_ipv6_hook(void *priv, st
226 struct ipv6hdr *ip6h;
227 struct rt6_info *rt;
228 u32 hdrsize;
229 + int ret;
230
231 if (skb->protocol != htons(ETH_P_IPV6))
232 return NF_ACCEPT;
233 @@ -465,9 +497,13 @@ nf_flow_offload_ipv6_hook(void *priv, st
234 sizeof(*ip6h)))
235 return NF_ACCEPT;
236
237 - if (!dst_check(&rt->dst, 0)) {
238 - flow_offload_teardown(flow);
239 - return NF_ACCEPT;
240 + if (tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_NEIGH ||
241 + tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM) {
242 + rt = (struct rt6_info *)tuplehash->tuple.dst_cache;
243 + if (!dst_check(&rt->dst, 0)) {
244 + flow_offload_teardown(flow);
245 + return NF_ACCEPT;
246 + }
247 }
248
249 if (skb_try_make_writable(skb, sizeof(*ip6h) + hdrsize))
250 @@ -484,8 +520,6 @@ nf_flow_offload_ipv6_hook(void *priv, st
251 if (flow_table->flags & NF_FLOWTABLE_COUNTER)
252 nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len);
253
254 - rt = (struct rt6_info *)tuplehash->tuple.dst_cache;
255 -
256 if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) {
257 memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
258 IP6CB(skb)->iif = skb->dev->ifindex;
259 @@ -493,12 +527,22 @@ nf_flow_offload_ipv6_hook(void *priv, st
260 return nf_flow_xmit_xfrm(skb, state, &rt->dst);
261 }
262
263 - outdev = rt->dst.dev;
264 - skb->dev = outdev;
265 - nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6);
266 - skb_dst_set_noref(skb, &rt->dst);
267 - neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb);
268 + switch (tuplehash->tuple.xmit_type) {
269 + case FLOW_OFFLOAD_XMIT_NEIGH:
270 + outdev = rt->dst.dev;
271 + skb->dev = outdev;
272 + nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6);
273 + skb_dst_set_noref(skb, &rt->dst);
274 + neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb);
275 + ret = NF_STOLEN;
276 + break;
277 + case FLOW_OFFLOAD_XMIT_DIRECT:
278 + ret = nf_flow_queue_xmit(state->net, skb, tuplehash, ETH_P_IPV6);
279 + if (ret == NF_DROP)
280 + flow_offload_teardown(flow);
281 + break;
282 + }
283
284 - return NF_STOLEN;
285 + return ret;
286 }
287 EXPORT_SYMBOL_GPL(nf_flow_offload_ipv6_hook);
288 --- a/net/netfilter/nft_flow_offload.c
289 +++ b/net/netfilter/nft_flow_offload.c
290 @@ -39,12 +39,11 @@ static void nft_default_forward_path(str
291 static int nft_dev_fill_forward_path(const struct nf_flow_route *route,
292 const struct dst_entry *dst_cache,
293 const struct nf_conn *ct,
294 - enum ip_conntrack_dir dir,
295 + enum ip_conntrack_dir dir, u8 *ha,
296 struct net_device_path_stack *stack)
297 {
298 const void *daddr = &ct->tuplehash[!dir].tuple.src.u3;
299 struct net_device *dev = dst_cache->dev;
300 - unsigned char ha[ETH_ALEN];
301 struct neighbour *n;
302 u8 nud_state;
303
304 @@ -66,27 +65,43 @@ static int nft_dev_fill_forward_path(con
305
306 struct nft_forward_info {
307 const struct net_device *indev;
308 + const struct net_device *outdev;
309 + u8 h_source[ETH_ALEN];
310 + u8 h_dest[ETH_ALEN];
311 + enum flow_offload_xmit_type xmit_type;
312 };
313
314 static void nft_dev_path_info(const struct net_device_path_stack *stack,
315 - struct nft_forward_info *info)
316 + struct nft_forward_info *info,
317 + unsigned char *ha)
318 {
319 const struct net_device_path *path;
320 int i;
321
322 + memcpy(info->h_dest, ha, ETH_ALEN);
323 +
324 for (i = 0; i < stack->num_paths; i++) {
325 path = &stack->path[i];
326 switch (path->type) {
327 case DEV_PATH_ETHERNET:
328 info->indev = path->dev;
329 + if (is_zero_ether_addr(info->h_source))
330 + memcpy(info->h_source, path->dev->dev_addr, ETH_ALEN);
331 break;
332 - case DEV_PATH_VLAN:
333 case DEV_PATH_BRIDGE:
334 + if (is_zero_ether_addr(info->h_source))
335 + memcpy(info->h_source, path->dev->dev_addr, ETH_ALEN);
336 +
337 + info->xmit_type = FLOW_OFFLOAD_XMIT_DIRECT;
338 + break;
339 + case DEV_PATH_VLAN:
340 default:
341 info->indev = NULL;
342 break;
343 }
344 }
345 + if (!info->outdev)
346 + info->outdev = info->indev;
347 }
348
349 static bool nft_flowtable_find_dev(const struct net_device *dev,
350 @@ -114,14 +129,22 @@ static void nft_dev_forward_path(struct
351 const struct dst_entry *dst = route->tuple[dir].dst;
352 struct net_device_path_stack stack;
353 struct nft_forward_info info = {};
354 + unsigned char ha[ETH_ALEN];
355
356 - if (nft_dev_fill_forward_path(route, dst, ct, dir, &stack) >= 0)
357 - nft_dev_path_info(&stack, &info);
358 + if (nft_dev_fill_forward_path(route, dst, ct, dir, ha, &stack) >= 0)
359 + nft_dev_path_info(&stack, &info, ha);
360
361 if (!info.indev || !nft_flowtable_find_dev(info.indev, ft))
362 return;
363
364 route->tuple[!dir].in.ifindex = info.indev->ifindex;
365 +
366 + if (info.xmit_type == FLOW_OFFLOAD_XMIT_DIRECT) {
367 + memcpy(route->tuple[dir].out.h_source, info.h_source, ETH_ALEN);
368 + memcpy(route->tuple[dir].out.h_dest, info.h_dest, ETH_ALEN);
369 + route->tuple[dir].out.ifindex = info.outdev->ifindex;
370 + route->tuple[dir].xmit_type = info.xmit_type;
371 + }
372 }
373
374 static int nft_flow_route(const struct nft_pktinfo *pkt,