1 From d5f19486cee79d04c054427577ac96ed123706db Mon Sep 17 00:00:00 2001
2 From: Vladimir Oltean <vladimir.oltean@nxp.com>
3 Date: Wed, 6 Jan 2021 11:51:35 +0200
4 Subject: [PATCH] net: dsa: listen for SWITCHDEV_{FDB,DEL}_ADD_TO_DEVICE on
5 foreign bridge neighbors
7 Some DSA switches (and not only) cannot learn source MAC addresses from
8 packets injected from the CPU. They only perform hardware address
9 learning from inbound traffic.
11 This can be problematic when we have a bridge spanning some DSA switch
12 ports and some non-DSA ports (which we'll call "foreign interfaces" from
15 There are 2 classes of problems created by the lack of learning on
17 - excessive flooding, due to the fact that DSA treats those addresses as
19 - the risk of stale routes, which can lead to temporary packet loss
21 To illustrate the second class, consider the following situation, which
22 is common in production equipment (wireless access points, where there
23 is a WLAN interface and an Ethernet switch, and these form a single
27 +------------------------------------------------------------------------+
29 +------------------------------------------------------------------------+
30 +------------+ +------------+ +------------+ +------------+ +------------+
31 | swp0 | | swp1 | | swp2 | | swp3 | | wlan0 |
32 +------------+ +------------+ +------------+ +------------+ +------------+
40 +------------+ +------------+ +------------+ +------------+ +------------+
41 | swp0 | | swp1 | | swp2 | | swp3 | | wlan0 |
42 +------------+ +------------+ +------------+ +------------+ +------------+
43 +------------------------------------------------------------------------+
45 +------------------------------------------------------------------------+
48 - br0 of AP 1 will know that Clients A and B are reachable via wlan0
49 - the hardware fdb of a DSA switch driver today is not kept in sync with
50 the software entries on other bridge ports, so it will not know that
51 clients A and B are reachable via the CPU port UNLESS the hardware
52 switch itself performs SA learning from traffic injected from the CPU.
53 Nonetheless, a substantial number of switches don't.
54 - the hardware fdb of the DSA switch on AP 2 may autonomously learn that
55 Client A and B are reachable through swp0. Therefore, the software br0
56 of AP 2 also may or may not learn this. In the example we're
57 illustrating, some Ethernet traffic has been going on, and br0 from AP
58 2 has indeed learnt that it can reach Client B through swp0.
60 One of the wireless clients, say Client B, disconnects from AP 1 and
61 roams to AP 2. The topology now looks like this:
64 +------------------------------------------------------------------------+
66 +------------------------------------------------------------------------+
67 +------------+ +------------+ +------------+ +------------+ +------------+
68 | swp0 | | swp1 | | swp2 | | swp3 | | wlan0 |
69 +------------+ +------------+ +------------+ +------------+ +------------+
78 +------------+ +------------+ +------------+ +------------+ +------------+
79 | swp0 | | swp1 | | swp2 | | swp3 | | wlan0 |
80 +------------+ +------------+ +------------+ +------------+ +------------+
81 +------------------------------------------------------------------------+
83 +------------------------------------------------------------------------+
86 - br0 of AP 1 still knows that Client A is reachable via wlan0 (no change)
87 - br0 of AP 1 will (possibly) know that Client B has left wlan0. There
88 are cases where it might never find out though. Either way, DSA today
89 does not process that notification in any way.
90 - the hardware FDB of the DSA switch on AP 1 may learn autonomously that
91 Client B can be reached via swp0, if it receives any packet with
92 Client 1's source MAC address over Ethernet.
93 - the hardware FDB of the DSA switch on AP 2 still thinks that Client B
94 can be reached via swp0. It does not know that it has roamed to wlan0,
95 because it doesn't perform SA learning from the CPU port.
97 Now Client A contacts Client B.
98 AP 1 routes the packet fine towards swp0 and delivers it on the Ethernet
100 AP 2 sees a frame on swp0 and its fdb says that the destination is swp0.
101 Hairpinning is disabled => drop.
103 This problem comes from the fact that these switches have a 'blind spot'
104 for addresses coming from software bridging. The generic solution is not
105 to assume that hardware learning can be enabled somehow, but to listen
106 to more bridge learning events. It turns out that the bridge driver does
107 learn in software from all inbound frames, in __br_handle_local_finish.
108 A proper SWITCHDEV_FDB_ADD_TO_DEVICE notification is emitted for the
109 addresses serviced by the bridge on 'foreign' interfaces. The software
110 bridge also does the right thing on migration, by notifying that the old
111 entry is deleted, so that does not need to be special-cased in DSA. When
112 it is deleted, we just need to delete our static FDB entry towards the
115 The problem is that DSA currently only cares about SWITCHDEV_FDB_ADD_TO_DEVICE
116 events received on its own interfaces, such as static FDB entries.
118 Luckily we can change that, and DSA can listen to all switchdev FDB
119 add/del events in the system and figure out if those events were emitted
120 by a bridge that spans at least one of DSA's own ports. In case that is
121 true, DSA will also offload that address towards its own CPU port, in
122 the eventuality that there might be bridge clients attached to the DSA
123 switch who want to talk to the station connected to the foreign
126 In terms of implementation, we need to keep the fdb_info->added_by_user
127 check for the case where the switchdev event was targeted directly at a
128 DSA switch port. But we don't need to look at that flag for snooped
129 events. So the check is currently too late, we need to move it earlier.
130 This also simplifies the code a bit, since we avoid uselessly allocating
131 and freeing switchdev_work.
133 We could probably do some improvements in the future. For example,
134 multi-bridge support is rudimentary at the moment. If there are two
135 bridges spanning a DSA switch's ports, and both of them need to service
136 the same MAC address, then what will happen is that the migration of one
137 of those stations will trigger the deletion of the FDB entry from the
138 CPU port while it is still used by other bridge. That could be improved
139 with reference counting but is left for another time.
141 This behavior needs to be enabled at driver level by setting
142 ds->assisted_learning_on_cpu_port = true. This is because we don't want
143 to inflict a potential performance penalty (accesses through
144 MDIO/I2C/SPI are expensive) to hardware that really doesn't need it
145 because address learning on the CPU port works there.
147 Reported-by: DENG Qingfang <dqfext@gmail.com>
148 Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
149 Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
150 Reviewed-by: Andrew Lunn <andrew@lunn.ch>
151 Signed-off-by: Jakub Kicinski <kuba@kernel.org>
153 include/net/dsa.h | 5 +++++
154 net/dsa/slave.c | 66 +++++++++++++++++++++++++++++++++++++++++++++----------
155 2 files changed, 60 insertions(+), 11 deletions(-)
157 --- a/include/net/dsa.h
158 +++ b/include/net/dsa.h
159 @@ -317,6 +317,11 @@ struct dsa_switch {
161 bool untag_bridge_pvid;
163 + /* Let DSA manage the FDB entries towards the CPU, based on the
164 + * software bridge database.
166 + bool assisted_learning_on_cpu_port;
168 /* In case vlan_filtering_is_global is set, the VLAN awareness state
169 * should be retrieved from here and not from the per-port settings.
171 --- a/net/dsa/slave.c
172 +++ b/net/dsa/slave.c
173 @@ -2146,6 +2146,28 @@ static void dsa_slave_switchdev_event_wo
177 +static int dsa_lower_dev_walk(struct net_device *lower_dev,
178 + struct netdev_nested_priv *priv)
180 + if (dsa_slave_dev_check(lower_dev)) {
181 + priv->data = (void *)netdev_priv(lower_dev);
188 +static struct dsa_slave_priv *dsa_slave_dev_lower_find(struct net_device *dev)
190 + struct netdev_nested_priv priv = {
194 + netdev_walk_all_lower_dev_rcu(dev, dsa_lower_dev_walk, &priv);
196 + return (struct dsa_slave_priv *)priv.data;
199 /* Called under rcu_read_lock() */
200 static int dsa_slave_switchdev_event(struct notifier_block *unused,
201 unsigned long event, void *ptr)
202 @@ -2164,10 +2186,37 @@ static int dsa_slave_switchdev_event(str
203 return notifier_from_errno(err);
204 case SWITCHDEV_FDB_ADD_TO_DEVICE:
205 case SWITCHDEV_FDB_DEL_TO_DEVICE:
206 - if (!dsa_slave_dev_check(dev))
207 - return NOTIFY_DONE;
210 + if (dsa_slave_dev_check(dev)) {
211 + if (!fdb_info->added_by_user)
214 + dp = dsa_slave_to_port(dev);
216 + /* Snoop addresses learnt on foreign interfaces
217 + * bridged with us, for switches that don't
218 + * automatically learn SA from CPU-injected traffic
220 + struct net_device *br_dev;
221 + struct dsa_slave_priv *p;
223 + br_dev = netdev_master_upper_dev_get_rcu(dev);
225 + return NOTIFY_DONE;
227 + if (!netif_is_bridge_master(br_dev))
228 + return NOTIFY_DONE;
230 + p = dsa_slave_dev_lower_find(br_dev);
232 + return NOTIFY_DONE;
234 - dp = dsa_slave_to_port(dev);
235 + dp = p->dp->cpu_dp;
237 + if (!dp->ds->assisted_learning_on_cpu_port)
238 + return NOTIFY_DONE;
241 if (!dp->ds->ops->port_fdb_add || !dp->ds->ops->port_fdb_del)
243 @@ -2182,18 +2231,13 @@ static int dsa_slave_switchdev_event(str
244 switchdev_work->port = dp->index;
245 switchdev_work->event = event;
249 - if (!fdb_info->added_by_user) {
250 - kfree(switchdev_work);
254 ether_addr_copy(switchdev_work->addr,
256 switchdev_work->vid = fdb_info->vid;
259 + /* Hold a reference on the slave for dsa_fdb_offload_notify */
260 + if (dsa_is_user_port(dp->ds, dp->index))
262 dsa_schedule_work(&switchdev_work->work);