mac80211: add AQL support for broadcast/multicast packets
authorFelix Fietkau <nbd@nbd.name>
Fri, 9 Feb 2024 19:47:39 +0000 (20:47 +0100)
committerFelix Fietkau <nbd@nbd.name>
Wed, 21 Feb 2024 09:56:22 +0000 (10:56 +0100)
Should improve performance/reliability with lots of mcast packets

Signed-off-by: Felix Fietkau <nbd@nbd.name>
package/kernel/mac80211/patches/subsys/330-mac80211-add-AQL-support-for-broadcast-packets.patch [new file with mode: 0644]

diff --git a/package/kernel/mac80211/patches/subsys/330-mac80211-add-AQL-support-for-broadcast-packets.patch b/package/kernel/mac80211/patches/subsys/330-mac80211-add-AQL-support-for-broadcast-packets.patch
new file mode 100644 (file)
index 0000000..6f64467
--- /dev/null
@@ -0,0 +1,226 @@
+From: Felix Fietkau <nbd@nbd.name>
+Date: Fri, 9 Feb 2024 19:43:40 +0100
+Subject: [PATCH] mac80211: add AQL support for broadcast packets
+
+Excessive broadcast traffic with little competing unicast traffic can easily
+flood hardware queues, leading to throughput issues. Additionally, filling
+the hardware queues with too many packets breaks FQ for broadcast data.
+Fix this by enabling AQL for broadcast packets.
+
+Signed-off-by: Felix Fietkau <nbd@nbd.name>
+---
+
+--- a/include/net/cfg80211.h
++++ b/include/net/cfg80211.h
+@@ -3324,6 +3324,7 @@ enum wiphy_params_flags {
+ /* The per TXQ device queue limit in airtime */
+ #define IEEE80211_DEFAULT_AQL_TXQ_LIMIT_L     5000
+ #define IEEE80211_DEFAULT_AQL_TXQ_LIMIT_H     12000
++#define IEEE80211_DEFAULT_AQL_TXQ_LIMIT_BC    50000
+ /* The per interface airtime threshold to switch to lower queue limit */
+ #define IEEE80211_AQL_THRESHOLD                       24000
+--- a/net/mac80211/debugfs.c
++++ b/net/mac80211/debugfs.c
+@@ -215,11 +215,13 @@ static ssize_t aql_pending_read(struct f
+                       "VI     %u us\n"
+                       "BE     %u us\n"
+                       "BK     %u us\n"
++                      "BC/MC  %u us\n"
+                       "total  %u us\n",
+                       atomic_read(&local->aql_ac_pending_airtime[IEEE80211_AC_VO]),
+                       atomic_read(&local->aql_ac_pending_airtime[IEEE80211_AC_VI]),
+                       atomic_read(&local->aql_ac_pending_airtime[IEEE80211_AC_BE]),
+                       atomic_read(&local->aql_ac_pending_airtime[IEEE80211_AC_BK]),
++                      atomic_read(&local->aql_bc_pending_airtime),
+                       atomic_read(&local->aql_total_pending_airtime));
+       return simple_read_from_buffer(user_buf, count, ppos,
+                                      buf, len);
+@@ -245,7 +247,8 @@ static ssize_t aql_txq_limit_read(struct
+                       "VO     %u              %u\n"
+                       "VI     %u              %u\n"
+                       "BE     %u              %u\n"
+-                      "BK     %u              %u\n",
++                      "BK     %u              %u\n"
++                      "BC/MC  %u\n",
+                       local->aql_txq_limit_low[IEEE80211_AC_VO],
+                       local->aql_txq_limit_high[IEEE80211_AC_VO],
+                       local->aql_txq_limit_low[IEEE80211_AC_VI],
+@@ -253,7 +256,8 @@ static ssize_t aql_txq_limit_read(struct
+                       local->aql_txq_limit_low[IEEE80211_AC_BE],
+                       local->aql_txq_limit_high[IEEE80211_AC_BE],
+                       local->aql_txq_limit_low[IEEE80211_AC_BK],
+-                      local->aql_txq_limit_high[IEEE80211_AC_BK]);
++                      local->aql_txq_limit_high[IEEE80211_AC_BK],
++                      local->aql_txq_limit_bc);
+       return simple_read_from_buffer(user_buf, count, ppos,
+                                      buf, len);
+ }
+@@ -279,6 +283,11 @@ static ssize_t aql_txq_limit_write(struc
+       else
+               buf[count] = '\0';
++      if (sscanf(buf, "mcast %u", &q_limit_low) == 1) {
++              local->aql_txq_limit_bc = q_limit_low;
++              return count;
++      }
++
+       if (sscanf(buf, "%u %u %u", &ac, &q_limit_low, &q_limit_high) != 3)
+               return -EINVAL;
+--- a/net/mac80211/ieee80211_i.h
++++ b/net/mac80211/ieee80211_i.h
+@@ -1328,10 +1328,12 @@ struct ieee80211_local {
+       spinlock_t handle_wake_tx_queue_lock;
+       u16 airtime_flags;
++      u32 aql_txq_limit_bc;
+       u32 aql_txq_limit_low[IEEE80211_NUM_ACS];
+       u32 aql_txq_limit_high[IEEE80211_NUM_ACS];
+       u32 aql_threshold;
+       atomic_t aql_total_pending_airtime;
++      atomic_t aql_bc_pending_airtime;
+       atomic_t aql_ac_pending_airtime[IEEE80211_NUM_ACS];
+       const struct ieee80211_ops *ops;
+--- a/net/mac80211/main.c
++++ b/net/mac80211/main.c
+@@ -788,6 +788,7 @@ struct ieee80211_hw *ieee80211_alloc_hw_
+       spin_lock_init(&local->rx_path_lock);
+       spin_lock_init(&local->queue_stop_reason_lock);
++      local->aql_txq_limit_bc = IEEE80211_DEFAULT_AQL_TXQ_LIMIT_BC;
+       for (i = 0; i < IEEE80211_NUM_ACS; i++) {
+               INIT_LIST_HEAD(&local->active_txqs[i]);
+               spin_lock_init(&local->active_txq_lock[i]);
+--- a/net/mac80211/sta_info.c
++++ b/net/mac80211/sta_info.c
+@@ -2341,28 +2341,27 @@ void ieee80211_sta_update_pending_airtim
+                                         struct sta_info *sta, u8 ac,
+                                         u16 tx_airtime, bool tx_completed)
+ {
++      atomic_t *counter;
+       int tx_pending;
+       if (!wiphy_ext_feature_isset(local->hw.wiphy, NL80211_EXT_FEATURE_AQL))
+               return;
+-      if (!tx_completed) {
+-              if (sta)
+-                      atomic_add(tx_airtime,
+-                                 &sta->airtime[ac].aql_tx_pending);
++      if (sta)
++              counter = &sta->airtime[ac].aql_tx_pending;
++      else
++              counter = &local->aql_bc_pending_airtime;
++      if (!tx_completed) {
++              atomic_add(tx_airtime, counter);
+               atomic_add(tx_airtime, &local->aql_total_pending_airtime);
+               atomic_add(tx_airtime, &local->aql_ac_pending_airtime[ac]);
+               return;
+       }
+-      if (sta) {
+-              tx_pending = atomic_sub_return(tx_airtime,
+-                                             &sta->airtime[ac].aql_tx_pending);
+-              if (tx_pending < 0)
+-                      atomic_cmpxchg(&sta->airtime[ac].aql_tx_pending,
+-                                     tx_pending, 0);
+-      }
++      tx_pending = atomic_sub_return(tx_airtime, counter);
++      if (tx_pending < 0)
++              atomic_cmpxchg(counter, tx_pending, 0);
+       atomic_sub(tx_airtime, &local->aql_total_pending_airtime);
+       tx_pending = atomic_sub_return(tx_airtime,
+--- a/net/mac80211/tx.c
++++ b/net/mac80211/tx.c
+@@ -3958,9 +3958,8 @@ begin:
+ encap_out:
+       IEEE80211_SKB_CB(skb)->control.vif = vif;
+-      if (tx.sta &&
+-          wiphy_ext_feature_isset(local->hw.wiphy, NL80211_EXT_FEATURE_AQL)) {
+-              bool ampdu = txq->ac != IEEE80211_AC_VO;
++      if (wiphy_ext_feature_isset(local->hw.wiphy, NL80211_EXT_FEATURE_AQL)) {
++              bool ampdu = txq->sta && txq->ac != IEEE80211_AC_VO;
+               u32 airtime;
+               airtime = ieee80211_calc_expected_tx_airtime(hw, vif, txq->sta,
+@@ -4026,6 +4025,7 @@ struct ieee80211_txq *ieee80211_next_txq
+       struct ieee80211_txq *ret = NULL;
+       struct txq_info *txqi = NULL, *head = NULL;
+       bool found_eligible_txq = false;
++      bool aql_check;
+       spin_lock_bh(&local->active_txq_lock[ac]);
+@@ -4049,26 +4049,26 @@ struct ieee80211_txq *ieee80211_next_txq
+       if (!head)
+               head = txqi;
++      aql_check = ieee80211_txq_airtime_check(hw, &txqi->txq);
++      if (aql_check)
++              found_eligible_txq = true;
++
+       if (txqi->txq.sta) {
+               struct sta_info *sta = container_of(txqi->txq.sta,
+                                                   struct sta_info, sta);
+-              bool aql_check = ieee80211_txq_airtime_check(hw, &txqi->txq);
+-              s32 deficit = ieee80211_sta_deficit(sta, txqi->txq.ac);
+-
+-              if (aql_check)
+-                      found_eligible_txq = true;
+-
+-              if (deficit < 0)
++              if (ieee80211_sta_deficit(sta, txqi->txq.ac) < 0) {
+                       sta->airtime[txqi->txq.ac].deficit +=
+                               sta->airtime_weight << AIRTIME_QUANTUM_SHIFT;
+-
+-              if (deficit < 0 || !aql_check) {
+-                      list_move_tail(&txqi->schedule_order,
+-                                     &local->active_txqs[txqi->txq.ac]);
+-                      goto begin;
++                      aql_check = false;
+               }
+       }
++      if (!aql_check) {
++              list_move_tail(&txqi->schedule_order,
++                                 &local->active_txqs[txqi->txq.ac]);
++              goto begin;
++      }
++
+       if (txqi->schedule_round == local->schedule_round[ac])
+               goto out;
+@@ -4133,7 +4133,8 @@ bool ieee80211_txq_airtime_check(struct
+               return true;
+       if (!txq->sta)
+-              return true;
++              return atomic_read(&local->aql_bc_pending_airtime) <
++                     local->aql_txq_limit_bc;
+       if (unlikely(txq->tid == IEEE80211_NUM_TIDS))
+               return true;
+@@ -4182,15 +4183,15 @@ bool ieee80211_txq_may_transmit(struct i
+       spin_lock_bh(&local->active_txq_lock[ac]);
+-      if (!txqi->txq.sta)
+-              goto out;
+-
+       if (list_empty(&txqi->schedule_order))
+               goto out;
+       if (!ieee80211_txq_schedule_airtime_check(local, ac))
+               goto out;
++      if (!txqi->txq.sta)
++              goto out;
++
+       list_for_each_entry_safe(iter, tmp, &local->active_txqs[ac],
+                                schedule_order) {
+               if (iter == txqi)