--- /dev/null
+From: Felix Fietkau <nbd@nbd.name>
+Date: Wed, 16 Dec 2020 21:34:03 +0100
+Subject: [PATCH] mac80211: add rx decapsulation offload support
+
+This allows drivers to pass 802.3 frames to mac80211, with some restrictions:
+
+- the skb must be passed with a valid sta
+- fast-rx needs to be active for the sta
+- monitor mode needs to be disabled
+
+mac80211 will tell the driver when it is safe to enable rx decap offload for
+a particular station.
+
+In order to implement support, a driver must:
+
+- call ieee80211_hw_set(hw, SUPPORTS_RX_DECAP_OFFLOAD)
+- implement ops->sta_set_decap_offload
+- mark 802.3 frames with RX_FLAG_8023
+
+If it doesn't want to enable offload for some vif types, it can mask out
+IEEE80211_OFFLOAD_DECAP_ENABLED in vif->offload_flags from within the
+.add_interface or .update_vif_offload driver ops
+
+Signed-off-by: Felix Fietkau <nbd@nbd.name>
+---
+
+--- a/include/net/mac80211.h
++++ b/include/net/mac80211.h
+@@ -1297,6 +1297,8 @@ ieee80211_tx_info_clear_status(struct ie
+ * the "0-length PSDU" field included there. The value for it is
+ * in &struct ieee80211_rx_status. Note that if this value isn't
+ * known the frame shouldn't be reported.
++ * @RX_FLAG_8023: the frame has an 802.3 header (decap offload performed by
++ * hardware or driver)
+ */
+ enum mac80211_rx_flags {
+ RX_FLAG_MMIC_ERROR = BIT(0),
+@@ -1329,6 +1331,7 @@ enum mac80211_rx_flags {
+ RX_FLAG_RADIOTAP_HE_MU = BIT(27),
+ RX_FLAG_RADIOTAP_LSIG = BIT(28),
+ RX_FLAG_NO_PSDU = BIT(29),
++ RX_FLAG_8023 = BIT(30),
+ };
+
+ /**
+@@ -1650,11 +1653,15 @@ enum ieee80211_vif_flags {
+ * The driver supports sending frames passed as 802.3 frames by mac80211.
+ * It must also support sending 802.11 packets for the same interface.
+ * @IEEE80211_OFFLOAD_ENCAP_4ADDR: support 4-address mode encapsulation offload
++ * @IEEE80211_OFFLOAD_DECAP_ENABLED: rx encapsulation offload is enabled
++ * The driver supports passing received 802.11 frames as 802.3 frames to
++ * mac80211.
+ */
+
+ enum ieee80211_offload_flags {
+ IEEE80211_OFFLOAD_ENCAP_ENABLED = BIT(0),
+ IEEE80211_OFFLOAD_ENCAP_4ADDR = BIT(1),
++ IEEE80211_OFFLOAD_DECAP_ENABLED = BIT(2),
+ };
+
+ /**
+@@ -2390,6 +2397,9 @@ struct ieee80211_txq {
+ * @IEEE80211_HW_SUPPORTS_TX_ENCAP_OFFLOAD: Hardware supports tx encapsulation
+ * offload
+ *
++ * @IEEE80211_HW_SUPPORTS_RX_DECAP_OFFLOAD: Hardware supports rx decapsulation
++ * offload
++ *
+ * @NUM_IEEE80211_HW_FLAGS: number of hardware flags, used for sizing arrays
+ */
+ enum ieee80211_hw_flags {
+@@ -2443,6 +2453,7 @@ enum ieee80211_hw_flags {
+ IEEE80211_HW_SUPPORTS_ONLY_HE_MULTI_BSSID,
+ IEEE80211_HW_AMPDU_KEYBORDER_SUPPORT,
+ IEEE80211_HW_SUPPORTS_TX_ENCAP_OFFLOAD,
++ IEEE80211_HW_SUPPORTS_RX_DECAP_OFFLOAD,
+
+ /* keep last, obviously */
+ NUM_IEEE80211_HW_FLAGS
+@@ -4196,6 +4207,9 @@ struct ieee80211_ops {
+ struct ieee80211_vif *vif);
+ void (*sta_set_4addr)(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
+ struct ieee80211_sta *sta, bool enabled);
++ void (*sta_set_decap_offload)(struct ieee80211_hw *hw,
++ struct ieee80211_vif *vif,
++ struct ieee80211_sta *sta, bool enabled);
+ };
+
+ /**
+--- a/net/mac80211/debugfs.c
++++ b/net/mac80211/debugfs.c
+@@ -409,6 +409,7 @@ static const char *hw_flag_names[] = {
+ FLAG(SUPPORTS_ONLY_HE_MULTI_BSSID),
+ FLAG(AMPDU_KEYBORDER_SUPPORT),
+ FLAG(SUPPORTS_TX_ENCAP_OFFLOAD),
++ FLAG(SUPPORTS_RX_DECAP_OFFLOAD),
+ #undef FLAG
+ };
+
+--- a/net/mac80211/debugfs_sta.c
++++ b/net/mac80211/debugfs_sta.c
+@@ -79,6 +79,7 @@ static const char * const sta_flag_names
+ FLAG(MPSP_RECIPIENT),
+ FLAG(PS_DELIVER),
+ FLAG(USES_ENCRYPTION),
++ FLAG(DECAP_OFFLOAD),
+ #undef FLAG
+ };
+
+--- a/net/mac80211/driver-ops.h
++++ b/net/mac80211/driver-ops.h
+@@ -1413,4 +1413,20 @@ static inline void drv_sta_set_4addr(str
+ trace_drv_return_void(local);
+ }
+
++static inline void drv_sta_set_decap_offload(struct ieee80211_local *local,
++ struct ieee80211_sub_if_data *sdata,
++ struct ieee80211_sta *sta,
++ bool enabled)
++{
++ sdata = get_bss_sdata(sdata);
++ if (!check_sdata_in_driver(sdata))
++ return;
++
++ trace_drv_sta_set_decap_offload(local, sdata, sta, enabled);
++ if (local->ops->sta_set_decap_offload)
++ local->ops->sta_set_decap_offload(&local->hw, &sdata->vif, sta,
++ enabled);
++ trace_drv_return_void(local);
++}
++
+ #endif /* __MAC80211_DRIVER_OPS */
+--- a/net/mac80211/iface.c
++++ b/net/mac80211/iface.c
+@@ -839,7 +839,7 @@ static const struct net_device_ops ieee8
+
+ };
+
+-static bool ieee80211_iftype_supports_encap_offload(enum nl80211_iftype iftype)
++static bool ieee80211_iftype_supports_hdr_offload(enum nl80211_iftype iftype)
+ {
+ switch (iftype) {
+ /* P2P GO and client are mapped to AP/STATION types */
+@@ -859,7 +859,7 @@ static bool ieee80211_set_sdata_offload_
+ flags = sdata->vif.offload_flags;
+
+ if (ieee80211_hw_check(&local->hw, SUPPORTS_TX_ENCAP_OFFLOAD) &&
+- ieee80211_iftype_supports_encap_offload(sdata->vif.type)) {
++ ieee80211_iftype_supports_hdr_offload(sdata->vif.type)) {
+ flags |= IEEE80211_OFFLOAD_ENCAP_ENABLED;
+
+ if (!ieee80211_hw_check(&local->hw, SUPPORTS_TX_FRAG) &&
+@@ -872,10 +872,21 @@ static bool ieee80211_set_sdata_offload_
+ flags &= ~IEEE80211_OFFLOAD_ENCAP_ENABLED;
+ }
+
++ if (ieee80211_hw_check(&local->hw, SUPPORTS_RX_DECAP_OFFLOAD) &&
++ ieee80211_iftype_supports_hdr_offload(sdata->vif.type)) {
++ flags |= IEEE80211_OFFLOAD_DECAP_ENABLED;
++
++ if (local->monitors)
++ flags &= ~IEEE80211_OFFLOAD_DECAP_ENABLED;
++ } else {
++ flags &= ~IEEE80211_OFFLOAD_DECAP_ENABLED;
++ }
++
+ if (sdata->vif.offload_flags == flags)
+ return false;
+
+ sdata->vif.offload_flags = flags;
++ ieee80211_check_fast_rx_iface(sdata);
+ return true;
+ }
+
+@@ -893,7 +904,7 @@ static void ieee80211_set_vif_encap_ops(
+ }
+
+ if (!ieee80211_hw_check(&local->hw, SUPPORTS_TX_ENCAP_OFFLOAD) ||
+- !ieee80211_iftype_supports_encap_offload(bss->vif.type))
++ !ieee80211_iftype_supports_hdr_offload(bss->vif.type))
+ return;
+
+ enabled = bss->vif.offload_flags & IEEE80211_OFFLOAD_ENCAP_ENABLED;
+--- a/net/mac80211/rx.c
++++ b/net/mac80211/rx.c
+@@ -4114,7 +4114,9 @@ void ieee80211_check_fast_rx(struct sta_
+ .vif_type = sdata->vif.type,
+ .control_port_protocol = sdata->control_port_protocol,
+ }, *old, *new = NULL;
++ bool set_offload = false;
+ bool assign = false;
++ bool offload;
+
+ /* use sparse to check that we don't return without updating */
+ __acquire(check_fast_rx);
+@@ -4227,6 +4229,17 @@ void ieee80211_check_fast_rx(struct sta_
+ if (assign)
+ new = kmemdup(&fastrx, sizeof(fastrx), GFP_KERNEL);
+
++ offload = assign &&
++ (sdata->vif.offload_flags & IEEE80211_OFFLOAD_DECAP_ENABLED);
++
++ if (offload)
++ set_offload = !test_and_set_sta_flag(sta, WLAN_STA_DECAP_OFFLOAD);
++ else
++ set_offload = test_and_clear_sta_flag(sta, WLAN_STA_DECAP_OFFLOAD);
++
++ if (set_offload)
++ drv_sta_set_decap_offload(local, sdata, &sta->sta, assign);
++
+ spin_lock_bh(&sta->lock);
+ old = rcu_dereference_protected(sta->fast_rx, true);
+ rcu_assign_pointer(sta->fast_rx, new);
+@@ -4273,6 +4286,108 @@ void ieee80211_check_fast_rx_iface(struc
+ mutex_unlock(&local->sta_mtx);
+ }
+
++static void ieee80211_rx_8023(struct ieee80211_rx_data *rx,
++ struct ieee80211_fast_rx *fast_rx,
++ int orig_len)
++{
++ struct ieee80211_sta_rx_stats *stats;
++ struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb);
++ struct sta_info *sta = rx->sta;
++ struct sk_buff *skb = rx->skb;
++ void *sa = skb->data + ETH_ALEN;
++ void *da = skb->data;
++
++ stats = &sta->rx_stats;
++ if (fast_rx->uses_rss)
++ stats = this_cpu_ptr(sta->pcpu_rx_stats);
++
++ /* statistics part of ieee80211_rx_h_sta_process() */
++ if (!(status->flag & RX_FLAG_NO_SIGNAL_VAL)) {
++ stats->last_signal = status->signal;
++ if (!fast_rx->uses_rss)
++ ewma_signal_add(&sta->rx_stats_avg.signal,
++ -status->signal);
++ }
++
++ if (status->chains) {
++ int i;
++
++ stats->chains = status->chains;
++ for (i = 0; i < ARRAY_SIZE(status->chain_signal); i++) {
++ int signal = status->chain_signal[i];
++
++ if (!(status->chains & BIT(i)))
++ continue;
++
++ stats->chain_signal_last[i] = signal;
++ if (!fast_rx->uses_rss)
++ ewma_signal_add(&sta->rx_stats_avg.chain_signal[i],
++ -signal);
++ }
++ }
++ /* end of statistics */
++
++ stats->last_rx = jiffies;
++ stats->last_rate = sta_stats_encode_rate(status);
++
++ stats->fragments++;
++ stats->packets++;
++
++ skb->dev = fast_rx->dev;
++
++ ieee80211_rx_stats(fast_rx->dev, skb->len);
++
++ /* The seqno index has the same property as needed
++ * for the rx_msdu field, i.e. it is IEEE80211_NUM_TIDS
++ * for non-QoS-data frames. Here we know it's a data
++ * frame, so count MSDUs.
++ */
++ u64_stats_update_begin(&stats->syncp);
++ stats->msdu[rx->seqno_idx]++;
++ stats->bytes += orig_len;
++ u64_stats_update_end(&stats->syncp);
++
++ if (fast_rx->internal_forward) {
++ struct sk_buff *xmit_skb = NULL;
++ if (is_multicast_ether_addr(da)) {
++ xmit_skb = skb_copy(skb, GFP_ATOMIC);
++ } else if (!ether_addr_equal(da, sa) &&
++ sta_info_get(rx->sdata, da)) {
++ xmit_skb = skb;
++ skb = NULL;
++ }
++
++ if (xmit_skb) {
++ /*
++ * Send to wireless media and increase priority by 256
++ * to keep the received priority instead of
++ * reclassifying the frame (see cfg80211_classify8021d).
++ */
++ xmit_skb->priority += 256;
++ xmit_skb->protocol = htons(ETH_P_802_3);
++ skb_reset_network_header(xmit_skb);
++ skb_reset_mac_header(xmit_skb);
++ dev_queue_xmit(xmit_skb);
++ }
++
++ if (!skb)
++ return;
++ }
++
++ /* deliver to local stack */
++ skb->protocol = eth_type_trans(skb, fast_rx->dev);
++ memset(skb->cb, 0, sizeof(skb->cb));
++ if (rx->list)
++#if LINUX_VERSION_IS_GEQ(4,19,0)
++ list_add_tail(&skb->list, rx->list);
++#else
++ __skb_queue_tail(rx->list, skb);
++#endif
++ else
++ netif_receive_skb(skb);
++
++}
++
+ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx,
+ struct ieee80211_fast_rx *fast_rx)
+ {
+@@ -4293,9 +4408,6 @@ static bool ieee80211_invoke_fast_rx(str
+ } addrs __aligned(2);
+ struct ieee80211_sta_rx_stats *stats = &sta->rx_stats;
+
+- if (fast_rx->uses_rss)
+- stats = this_cpu_ptr(sta->pcpu_rx_stats);
+-
+ /* for parallel-rx, we need to have DUP_VALIDATED, otherwise we write
+ * to a common data structure; drivers can implement that per queue
+ * but we don't have that information in mac80211
+@@ -4369,32 +4481,6 @@ static bool ieee80211_invoke_fast_rx(str
+ pskb_trim(skb, skb->len - fast_rx->icv_len))
+ goto drop;
+
+- /* statistics part of ieee80211_rx_h_sta_process() */
+- if (!(status->flag & RX_FLAG_NO_SIGNAL_VAL)) {
+- stats->last_signal = status->signal;
+- if (!fast_rx->uses_rss)
+- ewma_signal_add(&sta->rx_stats_avg.signal,
+- -status->signal);
+- }
+-
+- if (status->chains) {
+- int i;
+-
+- stats->chains = status->chains;
+- for (i = 0; i < ARRAY_SIZE(status->chain_signal); i++) {
+- int signal = status->chain_signal[i];
+-
+- if (!(status->chains & BIT(i)))
+- continue;
+-
+- stats->chain_signal_last[i] = signal;
+- if (!fast_rx->uses_rss)
+- ewma_signal_add(&sta->rx_stats_avg.chain_signal[i],
+- -signal);
+- }
+- }
+- /* end of statistics */
+-
+ if (rx->key && !ieee80211_has_protected(hdr->frame_control))
+ goto drop;
+
+@@ -4406,12 +4492,6 @@ static bool ieee80211_invoke_fast_rx(str
+ return true;
+ }
+
+- stats->last_rx = jiffies;
+- stats->last_rate = sta_stats_encode_rate(status);
+-
+- stats->fragments++;
+- stats->packets++;
+-
+ /* do the header conversion - first grab the addresses */
+ ether_addr_copy(addrs.da, skb->data + fast_rx->da_offs);
+ ether_addr_copy(addrs.sa, skb->data + fast_rx->sa_offs);
+@@ -4420,62 +4500,14 @@ static bool ieee80211_invoke_fast_rx(str
+ /* push the addresses in front */
+ memcpy(skb_push(skb, sizeof(addrs)), &addrs, sizeof(addrs));
+
+- skb->dev = fast_rx->dev;
+-
+- ieee80211_rx_stats(fast_rx->dev, skb->len);
+-
+- /* The seqno index has the same property as needed
+- * for the rx_msdu field, i.e. it is IEEE80211_NUM_TIDS
+- * for non-QoS-data frames. Here we know it's a data
+- * frame, so count MSDUs.
+- */
+- u64_stats_update_begin(&stats->syncp);
+- stats->msdu[rx->seqno_idx]++;
+- stats->bytes += orig_len;
+- u64_stats_update_end(&stats->syncp);
+-
+- if (fast_rx->internal_forward) {
+- struct sk_buff *xmit_skb = NULL;
+- if (is_multicast_ether_addr(addrs.da)) {
+- xmit_skb = skb_copy(skb, GFP_ATOMIC);
+- } else if (!ether_addr_equal(addrs.da, addrs.sa) &&
+- sta_info_get(rx->sdata, addrs.da)) {
+- xmit_skb = skb;
+- skb = NULL;
+- }
+-
+- if (xmit_skb) {
+- /*
+- * Send to wireless media and increase priority by 256
+- * to keep the received priority instead of
+- * reclassifying the frame (see cfg80211_classify8021d).
+- */
+- xmit_skb->priority += 256;
+- xmit_skb->protocol = htons(ETH_P_802_3);
+- skb_reset_network_header(xmit_skb);
+- skb_reset_mac_header(xmit_skb);
+- dev_queue_xmit(xmit_skb);
+- }
+-
+- if (!skb)
+- return true;
+- }
+-
+- /* deliver to local stack */
+- skb->protocol = eth_type_trans(skb, fast_rx->dev);
+- memset(skb->cb, 0, sizeof(skb->cb));
+- if (rx->list)
+-#if LINUX_VERSION_IS_GEQ(4,19,0)
+- list_add_tail(&skb->list, rx->list);
+-#else
+- __skb_queue_tail(rx->list, skb);
+-#endif
+- else
+- netif_receive_skb(skb);
++ ieee80211_rx_8023(rx, fast_rx, orig_len);
+
+ return true;
+ drop:
+ dev_kfree_skb(skb);
++ if (fast_rx->uses_rss)
++ stats = this_cpu_ptr(sta->pcpu_rx_stats);
++
+ stats->dropped++;
+ return true;
+ }
+@@ -4529,6 +4561,47 @@ static bool ieee80211_prepare_and_rx_han
+ return true;
+ }
+
++static void __ieee80211_rx_handle_8023(struct ieee80211_hw *hw,
++ struct ieee80211_sta *pubsta,
++ struct sk_buff *skb,
++#if LINUX_VERSION_IS_GEQ(4,19,0)
++ struct list_head *list)
++#else
++ struct sk_buff_head *list)
++#endif
++{
++ struct ieee80211_local *local = hw_to_local(hw);
++ struct ieee80211_fast_rx *fast_rx;
++ struct ieee80211_rx_data rx;
++
++ memset(&rx, 0, sizeof(rx));
++ rx.skb = skb;
++ rx.local = local;
++ rx.list = list;
++
++ I802_DEBUG_INC(local->dot11ReceivedFragmentCount);
++
++ /* drop frame if too short for header */
++ if (skb->len < sizeof(struct ethhdr))
++ goto drop;
++
++ if (!pubsta)
++ goto drop;
++
++ rx.sta = container_of(pubsta, struct sta_info, sta);
++ rx.sdata = rx.sta->sdata;
++
++ fast_rx = rcu_dereference(rx.sta->fast_rx);
++ if (!fast_rx)
++ goto drop;
++
++ ieee80211_rx_8023(&rx, fast_rx, skb->len);
++ return;
++
++drop:
++ dev_kfree_skb(skb);
++}
++
+ /*
+ * This is the actual Rx frames handler. as it belongs to Rx path it must
+ * be called with rcu_read_lock protection.
+@@ -4766,15 +4839,20 @@ void ieee80211_rx_list(struct ieee80211_
+ * if it was previously present.
+ * Also, frames with less than 16 bytes are dropped.
+ */
+- skb = ieee80211_rx_monitor(local, skb, rate);
+- if (!skb)
+- return;
++ if (!(status->flag & RX_FLAG_8023)) {
++ skb = ieee80211_rx_monitor(local, skb, rate);
++ if (!skb)
++ return;
++ }
+
+ ieee80211_tpt_led_trig_rx(local,
+ ((struct ieee80211_hdr *)skb->data)->frame_control,
+ skb->len);
+
+- __ieee80211_rx_handle_packet(hw, pubsta, skb, list);
++ if (status->flag & RX_FLAG_8023)
++ __ieee80211_rx_handle_8023(hw, pubsta, skb, list);
++ else
++ __ieee80211_rx_handle_packet(hw, pubsta, skb, list);
+
+ return;
+ drop:
+--- a/net/mac80211/sta_info.h
++++ b/net/mac80211/sta_info.h
+@@ -71,6 +71,7 @@
+ * until pending frames are delivered
+ * @WLAN_STA_USES_ENCRYPTION: This station was configured for encryption,
+ * so drop all packets without a key later.
++ * @WLAN_STA_DECAP_OFFLOAD: This station uses rx decap offload
+ *
+ * @NUM_WLAN_STA_FLAGS: number of defined flags
+ */
+@@ -102,6 +103,7 @@ enum ieee80211_sta_info_flags {
+ WLAN_STA_MPSP_RECIPIENT,
+ WLAN_STA_PS_DELIVER,
+ WLAN_STA_USES_ENCRYPTION,
++ WLAN_STA_DECAP_OFFLOAD,
+
+ NUM_WLAN_STA_FLAGS,
+ };
+--- a/net/mac80211/trace.h
++++ b/net/mac80211/trace.h
+@@ -2740,7 +2740,7 @@ DEFINE_EVENT(local_sdata_addr_evt, drv_u
+ TP_ARGS(local, sdata)
+ );
+
+-TRACE_EVENT(drv_sta_set_4addr,
++DECLARE_EVENT_CLASS(sta_flag_evt,
+ TP_PROTO(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_sta *sta, bool enabled),
+@@ -2767,6 +2767,22 @@ TRACE_EVENT(drv_sta_set_4addr,
+ )
+ );
+
++DEFINE_EVENT(sta_flag_evt, drv_sta_set_4addr,
++ TP_PROTO(struct ieee80211_local *local,
++ struct ieee80211_sub_if_data *sdata,
++ struct ieee80211_sta *sta, bool enabled),
++
++ TP_ARGS(local, sdata, sta, enabled)
++);
++
++DEFINE_EVENT(sta_flag_evt, drv_sta_set_decap_offload,
++ TP_PROTO(struct ieee80211_local *local,
++ struct ieee80211_sub_if_data *sdata,
++ struct ieee80211_sta *sta, bool enabled),
++
++ TP_ARGS(local, sdata, sta, enabled)
++);
++
+ #endif /* !__MAC80211_DRIVER_TRACE || TRACE_HEADER_MULTI_READ */
+
+ #undef TRACE_INCLUDE_PATH