--- /dev/null
+From: John Crispin <john@phrozen.org>
+Date: Tue, 29 Oct 2019 10:13:02 +0100
+Subject: [PATCH] mac80211: move store skb ack code to its own function
+
+This patch moves the code handling SKBTX_WIFI_STATUS inside the TX path
+into an extra function. This allows us to reuse it inside the 802.11 encap
+offloading datapath.
+
+Signed-off-by: John Crispin <john@phrozen.org>
+Link: https://lore.kernel.org/r/20191029091304.7330-2-john@phrozen.org
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+---
+
+--- a/net/mac80211/tx.c
++++ b/net/mac80211/tx.c
+@@ -2430,6 +2430,33 @@ static int ieee80211_lookup_ra_sta(struc
+ return 0;
+ }
+
++static int ieee80211_store_ack_skb(struct ieee80211_local *local,
++ struct sk_buff *skb,
++ u32 *info_flags)
++{
++ struct sk_buff *ack_skb = skb_clone_sk(skb);
++ u16 info_id = 0;
++
++ if (ack_skb) {
++ unsigned long flags;
++ int id;
++
++ spin_lock_irqsave(&local->ack_status_lock, flags);
++ id = idr_alloc(&local->ack_status_frames, ack_skb,
++ 1, 0x10000, GFP_ATOMIC);
++ spin_unlock_irqrestore(&local->ack_status_lock, flags);
++
++ if (id >= 0) {
++ info_id = id;
++ *info_flags |= IEEE80211_TX_CTL_REQ_TX_STATUS;
++ } else {
++ kfree_skb(ack_skb);
++ }
++ }
++
++ return info_id;
++}
++
+ /**
+ * ieee80211_build_hdr - build 802.11 header in the given frame
+ * @sdata: virtual interface to build the header for
+@@ -2723,26 +2750,8 @@ static struct sk_buff *ieee80211_build_h
+ }
+
+ if (unlikely(!multicast && skb->sk &&
+- skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS)) {
+- struct sk_buff *ack_skb = skb_clone_sk(skb);
+-
+- if (ack_skb) {
+- unsigned long flags;
+- int id;
+-
+- spin_lock_irqsave(&local->ack_status_lock, flags);
+- id = idr_alloc(&local->ack_status_frames, ack_skb,
+- 1, 0x10000, GFP_ATOMIC);
+- spin_unlock_irqrestore(&local->ack_status_lock, flags);
+-
+- if (id >= 0) {
+- info_id = id;
+- info_flags |= IEEE80211_TX_CTL_REQ_TX_STATUS;
+- } else {
+- kfree_skb(ack_skb);
+- }
+- }
+- }
++ skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS))
++ info_id = ieee80211_store_ack_skb(local, skb, &info_flags);
+
+ /*
+ * If the skb is shared we need to obtain our own copy.
--- /dev/null
+From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= <toke@redhat.com>
+Date: Wed, 23 Oct 2019 11:59:00 +0200
+Subject: [PATCH] mac80211: Shrink the size of ack_frame_id to make room for
+ tx_time_est
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+To implement airtime queue limiting, we need to keep a running account of
+the estimated airtime of all skbs queued into the device. Do to this
+correctly, we need to store the airtime estimate into the skb so we can
+decrease the outstanding balance when the skb is freed. This means that the
+time estimate must be stored somewhere that will survive for the lifetime
+of the skb.
+
+To get this, decrease the size of the ack_frame_id field to 6 bits, and
+lower the size of the ID space accordingly. This leaves 10 bits for use for
+tx_time_est, which is enough to store a maximum of 4096 us, if we shift the
+values so they become units of 4us.
+
+Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
+Link: https://lore.kernel.org/r/157182474063.150713.16132669599100802716.stgit@toke.dk
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+---
+
+--- a/include/net/mac80211.h
++++ b/include/net/mac80211.h
+@@ -967,6 +967,7 @@ ieee80211_rate_get_vht_nss(const struct
+ * @band: the band to transmit on (use for checking for races)
+ * @hw_queue: HW queue to put the frame on, skb_get_queue_mapping() gives the AC
+ * @ack_frame_id: internal frame ID for TX status, used internally
++ * @tx_time_est: TX time estimate in units of 4us, used internally
+ * @control: union part for control data
+ * @control.rates: TX rates array to try
+ * @control.rts_cts_rate_idx: rate for RTS or CTS
+@@ -1007,7 +1008,8 @@ struct ieee80211_tx_info {
+
+ u8 hw_queue;
+
+- u16 ack_frame_id;
++ u16 ack_frame_id:6;
++ u16 tx_time_est:10;
+
+ union {
+ struct {
+--- a/net/mac80211/cfg.c
++++ b/net/mac80211/cfg.c
+@@ -3427,7 +3427,7 @@ int ieee80211_attach_ack_skb(struct ieee
+
+ spin_lock_irqsave(&local->ack_status_lock, spin_flags);
+ id = idr_alloc(&local->ack_status_frames, ack_skb,
+- 1, 0x10000, GFP_ATOMIC);
++ 1, 0x40, GFP_ATOMIC);
+ spin_unlock_irqrestore(&local->ack_status_lock, spin_flags);
+
+ if (id < 0) {
+--- a/net/mac80211/tx.c
++++ b/net/mac80211/tx.c
+@@ -2443,7 +2443,7 @@ static int ieee80211_store_ack_skb(struc
+
+ spin_lock_irqsave(&local->ack_status_lock, flags);
+ id = idr_alloc(&local->ack_status_frames, ack_skb,
+- 1, 0x10000, GFP_ATOMIC);
++ 1, 0x40, GFP_ATOMIC);
+ spin_unlock_irqrestore(&local->ack_status_lock, flags);
+
+ if (id >= 0) {
--- /dev/null
+From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= <toke@redhat.com>
+Date: Tue, 12 Nov 2019 14:08:35 +0100
+Subject: [PATCH] mac80211: Add new sta_info getter by sta/vif addrs
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+In ieee80211_tx_status() we don't have an sdata struct when looking up the
+destination sta. Instead, we just do a lookup by the vif addr that is the
+source of the packet being completed. Factor this out into a new sta_info
+getter helper, since we need to use it for accounting AQL as well.
+
+Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
+Link: https://lore.kernel.org/r/20191112130835.382062-1-toke@redhat.com
+[remove internal rcu_read_lock(), document instead]
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+---
+
+--- a/net/mac80211/sta_info.c
++++ b/net/mac80211/sta_info.c
+@@ -210,6 +210,20 @@ struct sta_info *sta_info_get_bss(struct
+ return NULL;
+ }
+
++struct sta_info *sta_info_get_by_addrs(struct ieee80211_local *local,
++ const u8 *sta_addr, const u8 *vif_addr)
++{
++ struct rhlist_head *tmp;
++ struct sta_info *sta;
++
++ for_each_sta_info(local, sta_addr, sta, tmp) {
++ if (ether_addr_equal(vif_addr, sta->sdata->vif.addr))
++ return sta;
++ }
++
++ return NULL;
++}
++
+ struct sta_info *sta_info_get_by_idx(struct ieee80211_sub_if_data *sdata,
+ int idx)
+ {
+--- a/net/mac80211/sta_info.h
++++ b/net/mac80211/sta_info.h
+@@ -725,6 +725,10 @@ struct sta_info *sta_info_get(struct iee
+ struct sta_info *sta_info_get_bss(struct ieee80211_sub_if_data *sdata,
+ const u8 *addr);
+
++/* user must hold sta_mtx or be in RCU critical section */
++struct sta_info *sta_info_get_by_addrs(struct ieee80211_local *local,
++ const u8 *sta_addr, const u8 *vif_addr);
++
+ #define for_each_sta_info(local, _addr, _sta, _tmp) \
+ rhl_for_each_entry_rcu(_sta, _tmp, \
+ sta_info_hash_lookup(local, _addr), hash_node)
+--- a/net/mac80211/status.c
++++ b/net/mac80211/status.c
+@@ -1085,19 +1085,13 @@ void ieee80211_tx_status(struct ieee8021
+ .skb = skb,
+ .info = IEEE80211_SKB_CB(skb),
+ };
+- struct rhlist_head *tmp;
+ struct sta_info *sta;
+
+ rcu_read_lock();
+
+- for_each_sta_info(local, hdr->addr1, sta, tmp) {
+- /* skip wrong virtual interface */
+- if (!ether_addr_equal(hdr->addr2, sta->sdata->vif.addr))
+- continue;
+-
++ sta = sta_info_get_by_addrs(local, hdr->addr1, hdr->addr2);
++ if (sta)
+ status.sta = &sta->sta;
+- break;
+- }
+
+ __ieee80211_tx_status(hw, &status);
+ rcu_read_unlock();
--- /dev/null
+From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= <toke@redhat.com>
+Date: Mon, 18 Nov 2019 22:06:08 -0800
+Subject: [PATCH] mac80211: Import airtime calculation code from mt76
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Felix recently added code to calculate airtime of packets to the mt76
+driver. Import this into mac80211 so we can use it for airtime queue limit
+calculations.
+
+The airtime.c file is copied verbatim from the mt76 driver, and adjusted to
+be usable in mac80211. This involves:
+
+- Switching to mac80211 data structures.
+- Adding support for 160 MHz channels and HE mode.
+- Moving the symbol and duration calculations around a bit to avoid
+ rounding with the higher rates and longer symbol times used for HE rates.
+
+The per-rate TX rate calculation is also split out to its own function so
+it can be used directly for the AQL calculations later.
+
+Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
+Link: https://lore.kernel.org/r/20191119060610.76681-3-kyan@google.com
+[fix HE_GROUP_IDX() to use 3 * bw, since there are 3 _gi values]
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+---
+ create mode 100644 net/mac80211/airtime.c
+
+--- a/include/net/mac80211.h
++++ b/include/net/mac80211.h
+@@ -6417,4 +6417,33 @@ void ieee80211_nan_func_match(struct iee
+ struct cfg80211_nan_match_params *match,
+ gfp_t gfp);
+
++/**
++ * ieee80211_calc_rx_airtime - calculate estimated transmission airtime for RX.
++ *
++ * This function calculates the estimated airtime usage of a frame based on the
++ * rate information in the RX status struct and the frame length.
++ *
++ * @hw: pointer as obtained from ieee80211_alloc_hw()
++ * @status: &struct ieee80211_rx_status containing the transmission rate
++ * information.
++ * @len: frame length in bytes
++ */
++u32 ieee80211_calc_rx_airtime(struct ieee80211_hw *hw,
++ struct ieee80211_rx_status *status,
++ int len);
++
++/**
++ * ieee80211_calc_tx_airtime - calculate estimated transmission airtime for TX.
++ *
++ * This function calculates the estimated airtime usage of a frame based on the
++ * rate information in the TX info struct and the frame length.
++ *
++ * @hw: pointer as obtained from ieee80211_alloc_hw()
++ * @info: &struct ieee80211_tx_info of the frame.
++ * @len: frame length in bytes
++ */
++u32 ieee80211_calc_tx_airtime(struct ieee80211_hw *hw,
++ struct ieee80211_tx_info *info,
++ int len);
++
+ #endif /* MAC80211_H */
+--- a/net/mac80211/Makefile
++++ b/net/mac80211/Makefile
+@@ -31,7 +31,8 @@ mac80211-y := \
+ chan.o \
+ trace.o mlme.o \
+ tdls.o \
+- ocb.o
++ ocb.o \
++ airtime.o
+
+ mac80211-$(CPTCFG_MAC80211_LEDS) += led.o
+ mac80211-$(CPTCFG_MAC80211_DEBUGFS) += \
+--- /dev/null
++++ b/net/mac80211/airtime.c
+@@ -0,0 +1,597 @@
++// SPDX-License-Identifier: ISC
++/*
++ * Copyright (C) 2019 Felix Fietkau <nbd@nbd.name>
++ */
++
++#include <net/mac80211.h>
++#include "ieee80211_i.h"
++#include "sta_info.h"
++
++#define AVG_PKT_SIZE 1024
++
++/* Number of bits for an average sized packet */
++#define MCS_NBITS (AVG_PKT_SIZE << 3)
++
++/* Number of kilo-symbols (symbols * 1024) for a packet with (bps) bits per
++ * symbol. We use k-symbols to avoid rounding in the _TIME macros below.
++ */
++#define MCS_N_KSYMS(bps) DIV_ROUND_UP(MCS_NBITS << 10, (bps))
++
++/* Transmission time (in 1024 * usec) for a packet containing (ksyms) * 1024
++ * symbols.
++ */
++#define MCS_SYMBOL_TIME(sgi, ksyms) \
++ (sgi ? \
++ ((ksyms) * 4 * 18) / 20 : /* 3.6 us per sym */ \
++ ((ksyms) * 4) /* 4.0 us per sym */ \
++ )
++
++/* Transmit duration for the raw data part of an average sized packet */
++#define MCS_DURATION(streams, sgi, bps) \
++ ((u32)MCS_SYMBOL_TIME(sgi, MCS_N_KSYMS((streams) * (bps))))
++
++#define MCS_DURATION_S(shift, streams, sgi, bps) \
++ ((u16)((MCS_DURATION(streams, sgi, bps) >> shift)))
++
++/* These should match the values in enum nl80211_he_gi */
++#define HE_GI_08 0
++#define HE_GI_16 1
++#define HE_GI_32 2
++
++/* Transmission time (1024 usec) for a packet containing (ksyms) * k-symbols */
++#define HE_SYMBOL_TIME(gi, ksyms) \
++ (gi == HE_GI_08 ? \
++ ((ksyms) * 16 * 17) / 20 : /* 13.6 us per sym */ \
++ (gi == HE_GI_16 ? \
++ ((ksyms) * 16 * 18) / 20 : /* 14.4 us per sym */ \
++ ((ksyms) * 16) /* 16.0 us per sym */ \
++ ))
++
++/* Transmit duration for the raw data part of an average sized packet */
++#define HE_DURATION(streams, gi, bps) \
++ ((u32)HE_SYMBOL_TIME(gi, MCS_N_KSYMS((streams) * (bps))))
++
++#define HE_DURATION_S(shift, streams, gi, bps) \
++ (HE_DURATION(streams, gi, bps) >> shift)
++
++#define BW_20 0
++#define BW_40 1
++#define BW_80 2
++#define BW_160 3
++
++/*
++ * Define group sort order: HT40 -> SGI -> #streams
++ */
++#define IEEE80211_MAX_STREAMS 4
++#define IEEE80211_HT_STREAM_GROUPS 4 /* BW(=2) * SGI(=2) */
++#define IEEE80211_VHT_STREAM_GROUPS 8 /* BW(=4) * SGI(=2) */
++
++#define IEEE80211_HE_MAX_STREAMS 8
++#define IEEE80211_HE_STREAM_GROUPS 12 /* BW(=4) * GI(=3) */
++
++#define IEEE80211_HT_GROUPS_NB (IEEE80211_MAX_STREAMS * \
++ IEEE80211_HT_STREAM_GROUPS)
++#define IEEE80211_VHT_GROUPS_NB (IEEE80211_MAX_STREAMS * \
++ IEEE80211_VHT_STREAM_GROUPS)
++#define IEEE80211_HE_GROUPS_NB (IEEE80211_HE_MAX_STREAMS * \
++ IEEE80211_HE_STREAM_GROUPS)
++#define IEEE80211_GROUPS_NB (IEEE80211_HT_GROUPS_NB + \
++ IEEE80211_VHT_GROUPS_NB + \
++ IEEE80211_HE_GROUPS_NB)
++
++#define IEEE80211_HT_GROUP_0 0
++#define IEEE80211_VHT_GROUP_0 (IEEE80211_HT_GROUP_0 + IEEE80211_HT_GROUPS_NB)
++#define IEEE80211_HE_GROUP_0 (IEEE80211_VHT_GROUP_0 + IEEE80211_VHT_GROUPS_NB)
++
++#define MCS_GROUP_RATES 12
++
++#define HT_GROUP_IDX(_streams, _sgi, _ht40) \
++ IEEE80211_HT_GROUP_0 + \
++ IEEE80211_MAX_STREAMS * 2 * _ht40 + \
++ IEEE80211_MAX_STREAMS * _sgi + \
++ _streams - 1
++
++#define _MAX(a, b) (((a)>(b))?(a):(b))
++
++#define GROUP_SHIFT(duration) \
++ _MAX(0, 16 - __builtin_clz(duration))
++
++/* MCS rate information for an MCS group */
++#define __MCS_GROUP(_streams, _sgi, _ht40, _s) \
++ [HT_GROUP_IDX(_streams, _sgi, _ht40)] = { \
++ .shift = _s, \
++ .duration = { \
++ MCS_DURATION_S(_s, _streams, _sgi, _ht40 ? 54 : 26), \
++ MCS_DURATION_S(_s, _streams, _sgi, _ht40 ? 108 : 52), \
++ MCS_DURATION_S(_s, _streams, _sgi, _ht40 ? 162 : 78), \
++ MCS_DURATION_S(_s, _streams, _sgi, _ht40 ? 216 : 104), \
++ MCS_DURATION_S(_s, _streams, _sgi, _ht40 ? 324 : 156), \
++ MCS_DURATION_S(_s, _streams, _sgi, _ht40 ? 432 : 208), \
++ MCS_DURATION_S(_s, _streams, _sgi, _ht40 ? 486 : 234), \
++ MCS_DURATION_S(_s, _streams, _sgi, _ht40 ? 540 : 260) \
++ } \
++}
++
++#define MCS_GROUP_SHIFT(_streams, _sgi, _ht40) \
++ GROUP_SHIFT(MCS_DURATION(_streams, _sgi, _ht40 ? 54 : 26))
++
++#define MCS_GROUP(_streams, _sgi, _ht40) \
++ __MCS_GROUP(_streams, _sgi, _ht40, \
++ MCS_GROUP_SHIFT(_streams, _sgi, _ht40))
++
++#define VHT_GROUP_IDX(_streams, _sgi, _bw) \
++ (IEEE80211_VHT_GROUP_0 + \
++ IEEE80211_MAX_STREAMS * 2 * (_bw) + \
++ IEEE80211_MAX_STREAMS * (_sgi) + \
++ (_streams) - 1)
++
++#define BW2VBPS(_bw, r4, r3, r2, r1) \
++ (_bw == BW_160 ? r4 : _bw == BW_80 ? r3 : _bw == BW_40 ? r2 : r1)
++
++#define __VHT_GROUP(_streams, _sgi, _bw, _s) \
++ [VHT_GROUP_IDX(_streams, _sgi, _bw)] = { \
++ .shift = _s, \
++ .duration = { \
++ MCS_DURATION_S(_s, _streams, _sgi, \
++ BW2VBPS(_bw, 234, 117, 54, 26)), \
++ MCS_DURATION_S(_s, _streams, _sgi, \
++ BW2VBPS(_bw, 468, 234, 108, 52)), \
++ MCS_DURATION_S(_s, _streams, _sgi, \
++ BW2VBPS(_bw, 702, 351, 162, 78)), \
++ MCS_DURATION_S(_s, _streams, _sgi, \
++ BW2VBPS(_bw, 936, 468, 216, 104)), \
++ MCS_DURATION_S(_s, _streams, _sgi, \
++ BW2VBPS(_bw, 1404, 702, 324, 156)), \
++ MCS_DURATION_S(_s, _streams, _sgi, \
++ BW2VBPS(_bw, 1872, 936, 432, 208)), \
++ MCS_DURATION_S(_s, _streams, _sgi, \
++ BW2VBPS(_bw, 2106, 1053, 486, 234)), \
++ MCS_DURATION_S(_s, _streams, _sgi, \
++ BW2VBPS(_bw, 2340, 1170, 540, 260)), \
++ MCS_DURATION_S(_s, _streams, _sgi, \
++ BW2VBPS(_bw, 2808, 1404, 648, 312)), \
++ MCS_DURATION_S(_s, _streams, _sgi, \
++ BW2VBPS(_bw, 3120, 1560, 720, 346)) \
++ } \
++}
++
++#define VHT_GROUP_SHIFT(_streams, _sgi, _bw) \
++ GROUP_SHIFT(MCS_DURATION(_streams, _sgi, \
++ BW2VBPS(_bw, 243, 117, 54, 26)))
++
++#define VHT_GROUP(_streams, _sgi, _bw) \
++ __VHT_GROUP(_streams, _sgi, _bw, \
++ VHT_GROUP_SHIFT(_streams, _sgi, _bw))
++
++
++#define HE_GROUP_IDX(_streams, _gi, _bw) \
++ (IEEE80211_HE_GROUP_0 + \
++ IEEE80211_HE_MAX_STREAMS * 3 * (_bw) + \
++ IEEE80211_HE_MAX_STREAMS * (_gi) + \
++ (_streams) - 1)
++
++#define __HE_GROUP(_streams, _gi, _bw, _s) \
++ [HE_GROUP_IDX(_streams, _gi, _bw)] = { \
++ .shift = _s, \
++ .duration = { \
++ HE_DURATION_S(_s, _streams, _gi, \
++ BW2VBPS(_bw, 979, 489, 230, 115)), \
++ HE_DURATION_S(_s, _streams, _gi, \
++ BW2VBPS(_bw, 1958, 979, 475, 230)), \
++ HE_DURATION_S(_s, _streams, _gi, \
++ BW2VBPS(_bw, 2937, 1468, 705, 345)), \
++ HE_DURATION_S(_s, _streams, _gi, \
++ BW2VBPS(_bw, 3916, 1958, 936, 475)), \
++ HE_DURATION_S(_s, _streams, _gi, \
++ BW2VBPS(_bw, 5875, 2937, 1411, 705)), \
++ HE_DURATION_S(_s, _streams, _gi, \
++ BW2VBPS(_bw, 7833, 3916, 1872, 936)), \
++ HE_DURATION_S(_s, _streams, _gi, \
++ BW2VBPS(_bw, 8827, 4406, 2102, 1051)), \
++ HE_DURATION_S(_s, _streams, _gi, \
++ BW2VBPS(_bw, 9806, 4896, 2347, 1166)), \
++ HE_DURATION_S(_s, _streams, _gi, \
++ BW2VBPS(_bw, 11764, 5875, 2808, 1411)), \
++ HE_DURATION_S(_s, _streams, _gi, \
++ BW2VBPS(_bw, 13060, 6523, 3124, 1555)), \
++ HE_DURATION_S(_s, _streams, _gi, \
++ BW2VBPS(_bw, 14702, 7344, 3513, 1756)), \
++ HE_DURATION_S(_s, _streams, _gi, \
++ BW2VBPS(_bw, 16329, 8164, 3902, 1944)) \
++ } \
++}
++
++#define HE_GROUP_SHIFT(_streams, _gi, _bw) \
++ GROUP_SHIFT(HE_DURATION(_streams, _gi, \
++ BW2VBPS(_bw, 979, 489, 230, 115)))
++
++#define HE_GROUP(_streams, _gi, _bw) \
++ __HE_GROUP(_streams, _gi, _bw, \
++ HE_GROUP_SHIFT(_streams, _gi, _bw))
++struct mcs_group {
++ u8 shift;
++ u16 duration[MCS_GROUP_RATES];
++};
++
++static const struct mcs_group airtime_mcs_groups[] = {
++ MCS_GROUP(1, 0, BW_20),
++ MCS_GROUP(2, 0, BW_20),
++ MCS_GROUP(3, 0, BW_20),
++ MCS_GROUP(4, 0, BW_20),
++
++ MCS_GROUP(1, 1, BW_20),
++ MCS_GROUP(2, 1, BW_20),
++ MCS_GROUP(3, 1, BW_20),
++ MCS_GROUP(4, 1, BW_20),
++
++ MCS_GROUP(1, 0, BW_40),
++ MCS_GROUP(2, 0, BW_40),
++ MCS_GROUP(3, 0, BW_40),
++ MCS_GROUP(4, 0, BW_40),
++
++ MCS_GROUP(1, 1, BW_40),
++ MCS_GROUP(2, 1, BW_40),
++ MCS_GROUP(3, 1, BW_40),
++ MCS_GROUP(4, 1, BW_40),
++
++ VHT_GROUP(1, 0, BW_20),
++ VHT_GROUP(2, 0, BW_20),
++ VHT_GROUP(3, 0, BW_20),
++ VHT_GROUP(4, 0, BW_20),
++
++ VHT_GROUP(1, 1, BW_20),
++ VHT_GROUP(2, 1, BW_20),
++ VHT_GROUP(3, 1, BW_20),
++ VHT_GROUP(4, 1, BW_20),
++
++ VHT_GROUP(1, 0, BW_40),
++ VHT_GROUP(2, 0, BW_40),
++ VHT_GROUP(3, 0, BW_40),
++ VHT_GROUP(4, 0, BW_40),
++
++ VHT_GROUP(1, 1, BW_40),
++ VHT_GROUP(2, 1, BW_40),
++ VHT_GROUP(3, 1, BW_40),
++ VHT_GROUP(4, 1, BW_40),
++
++ VHT_GROUP(1, 0, BW_80),
++ VHT_GROUP(2, 0, BW_80),
++ VHT_GROUP(3, 0, BW_80),
++ VHT_GROUP(4, 0, BW_80),
++
++ VHT_GROUP(1, 1, BW_80),
++ VHT_GROUP(2, 1, BW_80),
++ VHT_GROUP(3, 1, BW_80),
++ VHT_GROUP(4, 1, BW_80),
++
++ VHT_GROUP(1, 0, BW_160),
++ VHT_GROUP(2, 0, BW_160),
++ VHT_GROUP(3, 0, BW_160),
++ VHT_GROUP(4, 0, BW_160),
++
++ VHT_GROUP(1, 1, BW_160),
++ VHT_GROUP(2, 1, BW_160),
++ VHT_GROUP(3, 1, BW_160),
++ VHT_GROUP(4, 1, BW_160),
++
++ HE_GROUP(1, HE_GI_08, BW_20),
++ HE_GROUP(2, HE_GI_08, BW_20),
++ HE_GROUP(3, HE_GI_08, BW_20),
++ HE_GROUP(4, HE_GI_08, BW_20),
++ HE_GROUP(5, HE_GI_08, BW_20),
++ HE_GROUP(6, HE_GI_08, BW_20),
++ HE_GROUP(7, HE_GI_08, BW_20),
++ HE_GROUP(8, HE_GI_08, BW_20),
++
++ HE_GROUP(1, HE_GI_16, BW_20),
++ HE_GROUP(2, HE_GI_16, BW_20),
++ HE_GROUP(3, HE_GI_16, BW_20),
++ HE_GROUP(4, HE_GI_16, BW_20),
++ HE_GROUP(5, HE_GI_16, BW_20),
++ HE_GROUP(6, HE_GI_16, BW_20),
++ HE_GROUP(7, HE_GI_16, BW_20),
++ HE_GROUP(8, HE_GI_16, BW_20),
++
++ HE_GROUP(1, HE_GI_32, BW_20),
++ HE_GROUP(2, HE_GI_32, BW_20),
++ HE_GROUP(3, HE_GI_32, BW_20),
++ HE_GROUP(4, HE_GI_32, BW_20),
++ HE_GROUP(5, HE_GI_32, BW_20),
++ HE_GROUP(6, HE_GI_32, BW_20),
++ HE_GROUP(7, HE_GI_32, BW_20),
++ HE_GROUP(8, HE_GI_32, BW_20),
++
++ HE_GROUP(1, HE_GI_08, BW_40),
++ HE_GROUP(2, HE_GI_08, BW_40),
++ HE_GROUP(3, HE_GI_08, BW_40),
++ HE_GROUP(4, HE_GI_08, BW_40),
++ HE_GROUP(5, HE_GI_08, BW_40),
++ HE_GROUP(6, HE_GI_08, BW_40),
++ HE_GROUP(7, HE_GI_08, BW_40),
++ HE_GROUP(8, HE_GI_08, BW_40),
++
++ HE_GROUP(1, HE_GI_16, BW_40),
++ HE_GROUP(2, HE_GI_16, BW_40),
++ HE_GROUP(3, HE_GI_16, BW_40),
++ HE_GROUP(4, HE_GI_16, BW_40),
++ HE_GROUP(5, HE_GI_16, BW_40),
++ HE_GROUP(6, HE_GI_16, BW_40),
++ HE_GROUP(7, HE_GI_16, BW_40),
++ HE_GROUP(8, HE_GI_16, BW_40),
++
++ HE_GROUP(1, HE_GI_32, BW_40),
++ HE_GROUP(2, HE_GI_32, BW_40),
++ HE_GROUP(3, HE_GI_32, BW_40),
++ HE_GROUP(4, HE_GI_32, BW_40),
++ HE_GROUP(5, HE_GI_32, BW_40),
++ HE_GROUP(6, HE_GI_32, BW_40),
++ HE_GROUP(7, HE_GI_32, BW_40),
++ HE_GROUP(8, HE_GI_32, BW_40),
++
++ HE_GROUP(1, HE_GI_08, BW_80),
++ HE_GROUP(2, HE_GI_08, BW_80),
++ HE_GROUP(3, HE_GI_08, BW_80),
++ HE_GROUP(4, HE_GI_08, BW_80),
++ HE_GROUP(5, HE_GI_08, BW_80),
++ HE_GROUP(6, HE_GI_08, BW_80),
++ HE_GROUP(7, HE_GI_08, BW_80),
++ HE_GROUP(8, HE_GI_08, BW_80),
++
++ HE_GROUP(1, HE_GI_16, BW_80),
++ HE_GROUP(2, HE_GI_16, BW_80),
++ HE_GROUP(3, HE_GI_16, BW_80),
++ HE_GROUP(4, HE_GI_16, BW_80),
++ HE_GROUP(5, HE_GI_16, BW_80),
++ HE_GROUP(6, HE_GI_16, BW_80),
++ HE_GROUP(7, HE_GI_16, BW_80),
++ HE_GROUP(8, HE_GI_16, BW_80),
++
++ HE_GROUP(1, HE_GI_32, BW_80),
++ HE_GROUP(2, HE_GI_32, BW_80),
++ HE_GROUP(3, HE_GI_32, BW_80),
++ HE_GROUP(4, HE_GI_32, BW_80),
++ HE_GROUP(5, HE_GI_32, BW_80),
++ HE_GROUP(6, HE_GI_32, BW_80),
++ HE_GROUP(7, HE_GI_32, BW_80),
++ HE_GROUP(8, HE_GI_32, BW_80),
++
++ HE_GROUP(1, HE_GI_08, BW_160),
++ HE_GROUP(2, HE_GI_08, BW_160),
++ HE_GROUP(3, HE_GI_08, BW_160),
++ HE_GROUP(4, HE_GI_08, BW_160),
++ HE_GROUP(5, HE_GI_08, BW_160),
++ HE_GROUP(6, HE_GI_08, BW_160),
++ HE_GROUP(7, HE_GI_08, BW_160),
++ HE_GROUP(8, HE_GI_08, BW_160),
++
++ HE_GROUP(1, HE_GI_16, BW_160),
++ HE_GROUP(2, HE_GI_16, BW_160),
++ HE_GROUP(3, HE_GI_16, BW_160),
++ HE_GROUP(4, HE_GI_16, BW_160),
++ HE_GROUP(5, HE_GI_16, BW_160),
++ HE_GROUP(6, HE_GI_16, BW_160),
++ HE_GROUP(7, HE_GI_16, BW_160),
++ HE_GROUP(8, HE_GI_16, BW_160),
++
++ HE_GROUP(1, HE_GI_32, BW_160),
++ HE_GROUP(2, HE_GI_32, BW_160),
++ HE_GROUP(3, HE_GI_32, BW_160),
++ HE_GROUP(4, HE_GI_32, BW_160),
++ HE_GROUP(5, HE_GI_32, BW_160),
++ HE_GROUP(6, HE_GI_32, BW_160),
++ HE_GROUP(7, HE_GI_32, BW_160),
++ HE_GROUP(8, HE_GI_32, BW_160),
++};
++
++static u32
++ieee80211_calc_legacy_rate_duration(u16 bitrate, bool short_pre,
++ bool cck, int len)
++{
++ u32 duration;
++
++ if (cck) {
++ duration = 144 + 48; /* preamble + PLCP */
++ if (short_pre)
++ duration >>= 1;
++
++ duration += 10; /* SIFS */
++ } else {
++ duration = 20 + 16; /* premable + SIFS */
++ }
++
++ len <<= 3;
++ duration += (len * 10) / bitrate;
++
++ return duration;
++}
++
++u32 ieee80211_calc_rx_airtime(struct ieee80211_hw *hw,
++ struct ieee80211_rx_status *status,
++ int len)
++{
++ struct ieee80211_supported_band *sband;
++ const struct ieee80211_rate *rate;
++ bool sgi = status->enc_flags & RX_ENC_FLAG_SHORT_GI;
++ bool sp = status->enc_flags & RX_ENC_FLAG_SHORTPRE;
++ int bw, streams;
++ int group, idx;
++ u32 duration;
++ bool cck;
++
++ switch (status->bw) {
++ case RATE_INFO_BW_20:
++ bw = BW_20;
++ break;
++ case RATE_INFO_BW_40:
++ bw = BW_40;
++ break;
++ case RATE_INFO_BW_80:
++ bw = BW_80;
++ break;
++ case RATE_INFO_BW_160:
++ bw = BW_160;
++ break;
++ default:
++ WARN_ON_ONCE(1);
++ return 0;
++ }
++
++ switch (status->encoding) {
++ case RX_ENC_LEGACY:
++ if (WARN_ON_ONCE(status->band > NL80211_BAND_5GHZ))
++ return 0;
++
++ sband = hw->wiphy->bands[status->band];
++ if (!sband || status->rate_idx > sband->n_bitrates)
++ return 0;
++
++ rate = &sband->bitrates[status->rate_idx];
++ cck = rate->flags & IEEE80211_RATE_MANDATORY_B;
++
++ return ieee80211_calc_legacy_rate_duration(rate->bitrate, sp,
++ cck, len);
++
++ case RX_ENC_VHT:
++ streams = status->nss;
++ idx = status->rate_idx;
++ group = VHT_GROUP_IDX(streams, sgi, bw);
++ break;
++ case RX_ENC_HT:
++ streams = ((status->rate_idx >> 3) & 3) + 1;
++ idx = status->rate_idx & 7;
++ group = HT_GROUP_IDX(streams, sgi, bw);
++ break;
++ case RX_ENC_HE:
++ streams = status->nss;
++ idx = status->rate_idx;
++ group = HE_GROUP_IDX(streams, status->he_gi, bw);
++ break;
++ default:
++ WARN_ON_ONCE(1);
++ return 0;
++ }
++
++ if (WARN_ON_ONCE((status->encoding != RX_ENC_HE && streams > 4) ||
++ (status->encoding == RX_ENC_HE && streams > 8)))
++ return 0;
++
++ duration = airtime_mcs_groups[group].duration[idx];
++ duration <<= airtime_mcs_groups[group].shift;
++ duration *= len;
++ duration /= AVG_PKT_SIZE;
++ duration /= 1024;
++
++ duration += 36 + (streams << 2);
++
++ return duration;
++}
++EXPORT_SYMBOL_GPL(ieee80211_calc_rx_airtime);
++
++static u32 ieee80211_calc_tx_airtime_rate(struct ieee80211_hw *hw,
++ struct ieee80211_tx_rate *rate,
++ u8 band, int len)
++{
++ struct ieee80211_rx_status stat = {
++ .band = band,
++ };
++
++ if (rate->idx < 0 || !rate->count)
++ return 0;
++
++ if (rate->flags & IEEE80211_TX_RC_80_MHZ_WIDTH)
++ stat.bw = RATE_INFO_BW_80;
++ else if (rate->flags & IEEE80211_TX_RC_40_MHZ_WIDTH)
++ stat.bw = RATE_INFO_BW_40;
++ else
++ stat.bw = RATE_INFO_BW_20;
++
++ stat.enc_flags = 0;
++ if (rate->flags & IEEE80211_TX_RC_USE_SHORT_PREAMBLE)
++ stat.enc_flags |= RX_ENC_FLAG_SHORTPRE;
++ if (rate->flags & IEEE80211_TX_RC_SHORT_GI)
++ stat.enc_flags |= RX_ENC_FLAG_SHORT_GI;
++
++ stat.rate_idx = rate->idx;
++ if (rate->flags & IEEE80211_TX_RC_VHT_MCS) {
++ stat.encoding = RX_ENC_VHT;
++ stat.rate_idx = ieee80211_rate_get_vht_mcs(rate);
++ stat.nss = ieee80211_rate_get_vht_nss(rate);
++ } else if (rate->flags & IEEE80211_TX_RC_MCS) {
++ stat.encoding = RX_ENC_HT;
++ } else {
++ stat.encoding = RX_ENC_LEGACY;
++ }
++
++ return ieee80211_calc_rx_airtime(hw, &stat, len);
++}
++
++u32 ieee80211_calc_tx_airtime(struct ieee80211_hw *hw,
++ struct ieee80211_tx_info *info,
++ int len)
++{
++ u32 duration = 0;
++ int i;
++
++ for (i = 0; i < ARRAY_SIZE(info->status.rates); i++) {
++ struct ieee80211_tx_rate *rate = &info->status.rates[i];
++ u32 cur_duration;
++
++ cur_duration = ieee80211_calc_tx_airtime_rate(hw, rate,
++ info->band, len);
++ if (!cur_duration)
++ break;
++
++ duration += cur_duration * rate->count;
++ }
++
++ return duration;
++}
++EXPORT_SYMBOL_GPL(ieee80211_calc_tx_airtime);
++
++u32 ieee80211_calc_expected_tx_airtime(struct ieee80211_hw *hw,
++ struct ieee80211_vif *vif,
++ struct ieee80211_sta *pubsta,
++ int len)
++{
++ struct ieee80211_supported_band *sband;
++ struct ieee80211_chanctx_conf *conf;
++ int rateidx, shift = 0;
++ bool cck, short_pream;
++ u32 basic_rates;
++ u8 band = 0;
++ u16 rate;
++
++ len += 38; /* Ethernet header length */
++
++ conf = rcu_dereference(vif->chanctx_conf);
++ if (conf) {
++ band = conf->def.chan->band;
++ shift = ieee80211_chandef_get_shift(&conf->def);
++ }
++
++ if (pubsta) {
++ struct sta_info *sta = container_of(pubsta, struct sta_info,
++ sta);
++
++ return ieee80211_calc_tx_airtime_rate(hw,
++ &sta->tx_stats.last_rate,
++ band, len);
++ }
++
++ if (!conf)
++ return 0;
++
++ /* No station to get latest rate from, so calculate the worst-case
++ * duration using the lowest configured basic rate.
++ */
++ sband = hw->wiphy->bands[band];
++
++ basic_rates = vif->bss_conf.basic_rates;
++ short_pream = vif->bss_conf.use_short_preamble;
++
++ rateidx = basic_rates ? ffs(basic_rates) - 1 : 0;
++ rate = sband->bitrates[rateidx].bitrate << shift;
++ cck = sband->bitrates[rateidx].flags & IEEE80211_RATE_MANDATORY_B;
++
++ return ieee80211_calc_legacy_rate_duration(rate, short_pream, cck, len);
++}
+--- a/net/mac80211/ieee80211_i.h
++++ b/net/mac80211/ieee80211_i.h
+@@ -2253,6 +2253,10 @@ const char *ieee80211_get_reason_code_st
+
+ extern const struct ethtool_ops ieee80211_ethtool_ops;
+
++u32 ieee80211_calc_expected_tx_airtime(struct ieee80211_hw *hw,
++ struct ieee80211_vif *vif,
++ struct ieee80211_sta *pubsta,
++ int len);
+ #ifdef CPTCFG_MAC80211_NOINLINE
+ #define debug_noinline noinline
+ #else
--- /dev/null
+From: Kan Yan <kyan@google.com>
+Date: Mon, 18 Nov 2019 22:06:09 -0800
+Subject: [PATCH] mac80211: Implement Airtime-based Queue Limit (AQL)
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+In order for the Fq_CoDel algorithm integrated in mac80211 layer to operate
+effectively to control excessive queueing latency, the CoDel algorithm
+requires an accurate measure of how long packets stays in the queue, AKA
+sojourn time. The sojourn time measured at the mac80211 layer doesn't
+include queueing latency in the lower layer (firmware/hardware) and CoDel
+expects lower layer to have a short queue. However, most 802.11ac chipsets
+offload tasks such TX aggregation to firmware or hardware, thus have a deep
+lower layer queue.
+
+Without a mechanism to control the lower layer queue size, packets only
+stay in mac80211 layer transiently before being sent to firmware queue.
+As a result, the sojourn time measured by CoDel in the mac80211 layer is
+almost always lower than the CoDel latency target, hence CoDel does little
+to control the latency, even when the lower layer queue causes excessive
+latency.
+
+The Byte Queue Limits (BQL) mechanism is commonly used to address the
+similar issue with wired network interface. However, this method cannot be
+applied directly to the wireless network interface. "Bytes" is not a
+suitable measure of queue depth in the wireless network, as the data rate
+can vary dramatically from station to station in the same network, from a
+few Mbps to over Gbps.
+
+This patch implements an Airtime-based Queue Limit (AQL) to make CoDel work
+effectively with wireless drivers that utilized firmware/hardware
+offloading. AQL allows each txq to release just enough packets to the lower
+layer to form 1-2 large aggregations to keep hardware fully utilized and
+retains the rest of the frames in mac80211 layer to be controlled by the
+CoDel algorithm.
+
+Signed-off-by: Kan Yan <kyan@google.com>
+[ Toke: Keep API to set pending airtime internal, fix nits in commit msg ]
+Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
+Link: https://lore.kernel.org/r/20191119060610.76681-4-kyan@google.com
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+---
+
+--- a/include/net/cfg80211.h
++++ b/include/net/cfg80211.h
+@@ -2603,6 +2603,13 @@ enum wiphy_params_flags {
+
+ #define IEEE80211_DEFAULT_AIRTIME_WEIGHT 256
+
++/* The per TXQ device queue limit in airtime */
++#define IEEE80211_DEFAULT_AQL_TXQ_LIMIT_L 5000
++#define IEEE80211_DEFAULT_AQL_TXQ_LIMIT_H 12000
++
++/* The per interface airtime threshold to switch to lower queue limit */
++#define IEEE80211_AQL_THRESHOLD 24000
++
+ /**
+ * struct cfg80211_pmksa - PMK Security Association
+ *
+--- a/include/net/mac80211.h
++++ b/include/net/mac80211.h
+@@ -5559,6 +5559,18 @@ void ieee80211_sta_register_airtime(stru
+ u32 tx_airtime, u32 rx_airtime);
+
+ /**
++ * ieee80211_txq_airtime_check - check if a txq can send frame to device
++ *
++ * @hw: pointer obtained from ieee80211_alloc_hw()
++ * @txq: pointer obtained from station or virtual interface
++ *
++ * Return true if the AQL's airtime limit has not been reached and the txq can
++ * continue to send more packets to the device. Otherwise return false.
++ */
++bool
++ieee80211_txq_airtime_check(struct ieee80211_hw *hw, struct ieee80211_txq *txq);
++
++/**
+ * ieee80211_iter_keys - iterate keys programmed into the device
+ * @hw: pointer obtained from ieee80211_alloc_hw()
+ * @vif: virtual interface to iterate, may be %NULL for all
+--- a/net/mac80211/debugfs.c
++++ b/net/mac80211/debugfs.c
+@@ -148,6 +148,87 @@ static const struct file_operations aqm_
+ .llseek = default_llseek,
+ };
+
++static ssize_t aql_txq_limit_read(struct file *file,
++ char __user *user_buf,
++ size_t count,
++ loff_t *ppos)
++{
++ struct ieee80211_local *local = file->private_data;
++ char buf[400];
++ int len = 0;
++
++ len = scnprintf(buf, sizeof(buf),
++ "AC AQL limit low AQL limit high\n"
++ "VO %u %u\n"
++ "VI %u %u\n"
++ "BE %u %u\n"
++ "BK %u %u\n",
++ local->aql_txq_limit_low[IEEE80211_AC_VO],
++ local->aql_txq_limit_high[IEEE80211_AC_VO],
++ local->aql_txq_limit_low[IEEE80211_AC_VI],
++ local->aql_txq_limit_high[IEEE80211_AC_VI],
++ local->aql_txq_limit_low[IEEE80211_AC_BE],
++ local->aql_txq_limit_high[IEEE80211_AC_BE],
++ local->aql_txq_limit_low[IEEE80211_AC_BK],
++ local->aql_txq_limit_high[IEEE80211_AC_BK]);
++ return simple_read_from_buffer(user_buf, count, ppos,
++ buf, len);
++}
++
++static ssize_t aql_txq_limit_write(struct file *file,
++ const char __user *user_buf,
++ size_t count,
++ loff_t *ppos)
++{
++ struct ieee80211_local *local = file->private_data;
++ char buf[100];
++ size_t len;
++ u32 ac, q_limit_low, q_limit_high, q_limit_low_old, q_limit_high_old;
++ struct sta_info *sta;
++
++ if (count > sizeof(buf))
++ return -EINVAL;
++
++ if (copy_from_user(buf, user_buf, count))
++ return -EFAULT;
++
++ buf[sizeof(buf) - 1] = 0;
++ len = strlen(buf);
++ if (len > 0 && buf[len - 1] == '\n')
++ buf[len - 1] = 0;
++
++ if (sscanf(buf, "%u %u %u", &ac, &q_limit_low, &q_limit_high) != 3)
++ return -EINVAL;
++
++ if (ac >= IEEE80211_NUM_ACS)
++ return -EINVAL;
++
++ q_limit_low_old = local->aql_txq_limit_low[ac];
++ q_limit_high_old = local->aql_txq_limit_high[ac];
++
++ local->aql_txq_limit_low[ac] = q_limit_low;
++ local->aql_txq_limit_high[ac] = q_limit_high;
++
++ mutex_lock(&local->sta_mtx);
++ list_for_each_entry(sta, &local->sta_list, list) {
++ /* If a sta has customized queue limits, keep it */
++ if (sta->airtime[ac].aql_limit_low == q_limit_low_old &&
++ sta->airtime[ac].aql_limit_high == q_limit_high_old) {
++ sta->airtime[ac].aql_limit_low = q_limit_low;
++ sta->airtime[ac].aql_limit_high = q_limit_high;
++ }
++ }
++ mutex_unlock(&local->sta_mtx);
++ return count;
++}
++
++static const struct file_operations aql_txq_limit_ops = {
++ .write = aql_txq_limit_write,
++ .read = aql_txq_limit_read,
++ .open = simple_open,
++ .llseek = default_llseek,
++};
++
+ static ssize_t force_tx_status_read(struct file *file,
+ char __user *user_buf,
+ size_t count,
+@@ -441,6 +522,10 @@ void debugfs_hw_add(struct ieee80211_loc
+ debugfs_create_u16("airtime_flags", 0600,
+ phyd, &local->airtime_flags);
+
++ DEBUGFS_ADD(aql_txq_limit);
++ debugfs_create_u32("aql_threshold", 0600,
++ phyd, &local->aql_threshold);
++
+ statsd = debugfs_create_dir("statistics", phyd);
+
+ /* if the dir failed, don't put all the other things into the root! */
+--- a/net/mac80211/debugfs_sta.c
++++ b/net/mac80211/debugfs_sta.c
+@@ -197,10 +197,12 @@ static ssize_t sta_airtime_read(struct f
+ {
+ struct sta_info *sta = file->private_data;
+ struct ieee80211_local *local = sta->sdata->local;
+- size_t bufsz = 200;
++ size_t bufsz = 400;
+ char *buf = kzalloc(bufsz, GFP_KERNEL), *p = buf;
+ u64 rx_airtime = 0, tx_airtime = 0;
+ s64 deficit[IEEE80211_NUM_ACS];
++ u32 q_depth[IEEE80211_NUM_ACS];
++ u32 q_limit_l[IEEE80211_NUM_ACS], q_limit_h[IEEE80211_NUM_ACS];
+ ssize_t rv;
+ int ac;
+
+@@ -212,19 +214,22 @@ static ssize_t sta_airtime_read(struct f
+ rx_airtime += sta->airtime[ac].rx_airtime;
+ tx_airtime += sta->airtime[ac].tx_airtime;
+ deficit[ac] = sta->airtime[ac].deficit;
++ q_limit_l[ac] = sta->airtime[ac].aql_limit_low;
++ q_limit_h[ac] = sta->airtime[ac].aql_limit_high;
+ spin_unlock_bh(&local->active_txq_lock[ac]);
++ q_depth[ac] = atomic_read(&sta->airtime[ac].aql_tx_pending);
+ }
+
+ p += scnprintf(p, bufsz + buf - p,
+ "RX: %llu us\nTX: %llu us\nWeight: %u\n"
+- "Deficit: VO: %lld us VI: %lld us BE: %lld us BK: %lld us\n",
+- rx_airtime,
+- tx_airtime,
+- sta->airtime_weight,
+- deficit[0],
+- deficit[1],
+- deficit[2],
+- deficit[3]);
++ "Deficit: VO: %lld us VI: %lld us BE: %lld us BK: %lld us\n"
++ "Q depth: VO: %u us VI: %u us BE: %u us BK: %u us\n"
++ "Q limit[low/high]: VO: %u/%u VI: %u/%u BE: %u/%u BK: %u/%u\n",
++ rx_airtime, tx_airtime, sta->airtime_weight,
++ deficit[0], deficit[1], deficit[2], deficit[3],
++ q_depth[0], q_depth[1], q_depth[2], q_depth[3],
++ q_limit_l[0], q_limit_h[0], q_limit_l[1], q_limit_h[1],
++ q_limit_l[2], q_limit_h[2], q_limit_l[3], q_limit_h[3]),
+
+ rv = simple_read_from_buffer(userbuf, count, ppos, buf, p - buf);
+ kfree(buf);
+@@ -236,7 +241,25 @@ static ssize_t sta_airtime_write(struct
+ {
+ struct sta_info *sta = file->private_data;
+ struct ieee80211_local *local = sta->sdata->local;
+- int ac;
++ u32 ac, q_limit_l, q_limit_h;
++ char _buf[100] = {}, *buf = _buf;
++
++ if (count > sizeof(_buf))
++ return -EINVAL;
++
++ if (copy_from_user(buf, userbuf, count))
++ return -EFAULT;
++
++ buf[sizeof(_buf) - 1] = '\0';
++ if (sscanf(buf, "queue limit %u %u %u", &ac, &q_limit_l, &q_limit_h)
++ != 3)
++ return -EINVAL;
++
++ if (ac >= IEEE80211_NUM_ACS)
++ return -EINVAL;
++
++ sta->airtime[ac].aql_limit_low = q_limit_l;
++ sta->airtime[ac].aql_limit_high = q_limit_h;
+
+ for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
+ spin_lock_bh(&local->active_txq_lock[ac]);
+--- a/net/mac80211/ieee80211_i.h
++++ b/net/mac80211/ieee80211_i.h
+@@ -1142,6 +1142,10 @@ struct ieee80211_local {
+ u16 schedule_round[IEEE80211_NUM_ACS];
+
+ u16 airtime_flags;
++ u32 aql_txq_limit_low[IEEE80211_NUM_ACS];
++ u32 aql_txq_limit_high[IEEE80211_NUM_ACS];
++ u32 aql_threshold;
++ atomic_t aql_total_pending_airtime;
+
+ const struct ieee80211_ops *ops;
+
+--- a/net/mac80211/main.c
++++ b/net/mac80211/main.c
+@@ -669,8 +669,16 @@ struct ieee80211_hw *ieee80211_alloc_hw_
+ for (i = 0; i < IEEE80211_NUM_ACS; i++) {
+ INIT_LIST_HEAD(&local->active_txqs[i]);
+ spin_lock_init(&local->active_txq_lock[i]);
++ local->aql_txq_limit_low[i] = IEEE80211_DEFAULT_AQL_TXQ_LIMIT_L;
++ local->aql_txq_limit_high[i] =
++ IEEE80211_DEFAULT_AQL_TXQ_LIMIT_H;
+ }
+- local->airtime_flags = AIRTIME_USE_TX | AIRTIME_USE_RX;
++
++ local->airtime_flags = AIRTIME_USE_TX |
++ AIRTIME_USE_RX |
++ AIRTIME_USE_AQL;
++ local->aql_threshold = IEEE80211_AQL_THRESHOLD;
++ atomic_set(&local->aql_total_pending_airtime, 0);
+
+ INIT_LIST_HEAD(&local->chanctx_list);
+ mutex_init(&local->chanctx_mtx);
+--- a/net/mac80211/sta_info.c
++++ b/net/mac80211/sta_info.c
+@@ -411,6 +411,9 @@ struct sta_info *sta_info_alloc(struct i
+ skb_queue_head_init(&sta->ps_tx_buf[i]);
+ skb_queue_head_init(&sta->tx_filtered[i]);
+ sta->airtime[i].deficit = sta->airtime_weight;
++ atomic_set(&sta->airtime[i].aql_tx_pending, 0);
++ sta->airtime[i].aql_limit_low = local->aql_txq_limit_low[i];
++ sta->airtime[i].aql_limit_high = local->aql_txq_limit_high[i];
+ }
+
+ for (i = 0; i < IEEE80211_NUM_TIDS; i++)
+@@ -1908,6 +1911,41 @@ void ieee80211_sta_register_airtime(stru
+ }
+ EXPORT_SYMBOL(ieee80211_sta_register_airtime);
+
++void ieee80211_sta_update_pending_airtime(struct ieee80211_local *local,
++ struct sta_info *sta, u8 ac,
++ u16 tx_airtime, bool tx_completed)
++{
++ int tx_pending;
++
++ if (!tx_completed) {
++ if (sta)
++ atomic_add(tx_airtime,
++ &sta->airtime[ac].aql_tx_pending);
++
++ atomic_add(tx_airtime, &local->aql_total_pending_airtime);
++ return;
++ }
++
++ if (sta) {
++ tx_pending = atomic_sub_return(tx_airtime,
++ &sta->airtime[ac].aql_tx_pending);
++ if (WARN_ONCE(tx_pending < 0,
++ "STA %pM AC %d txq pending airtime underflow: %u, %u",
++ sta->addr, ac, tx_pending, tx_airtime))
++ atomic_cmpxchg(&sta->airtime[ac].aql_tx_pending,
++ tx_pending, 0);
++ }
++
++ tx_pending = atomic_sub_return(tx_airtime,
++ &local->aql_total_pending_airtime);
++ if (WARN_ONCE(tx_pending < 0,
++ "Device %s AC %d pending airtime underflow: %u, %u",
++ wiphy_name(local->hw.wiphy), ac, tx_pending,
++ tx_airtime))
++ atomic_cmpxchg(&local->aql_total_pending_airtime,
++ tx_pending, 0);
++}
++
+ int sta_info_move_state(struct sta_info *sta,
+ enum ieee80211_sta_state new_state)
+ {
+--- a/net/mac80211/sta_info.h
++++ b/net/mac80211/sta_info.h
+@@ -127,13 +127,21 @@ enum ieee80211_agg_stop_reason {
+ /* Debugfs flags to enable/disable use of RX/TX airtime in scheduler */
+ #define AIRTIME_USE_TX BIT(0)
+ #define AIRTIME_USE_RX BIT(1)
++#define AIRTIME_USE_AQL BIT(2)
+
+ struct airtime_info {
+ u64 rx_airtime;
+ u64 tx_airtime;
+ s64 deficit;
++ atomic_t aql_tx_pending; /* Estimated airtime for frames pending */
++ u32 aql_limit_low;
++ u32 aql_limit_high;
+ };
+
++void ieee80211_sta_update_pending_airtime(struct ieee80211_local *local,
++ struct sta_info *sta, u8 ac,
++ u16 tx_airtime, bool tx_completed);
++
+ struct sta_info;
+
+ /**
+--- a/net/mac80211/tx.c
++++ b/net/mac80211/tx.c
+@@ -3667,7 +3667,8 @@ struct ieee80211_txq *ieee80211_next_txq
+ {
+ struct ieee80211_local *local = hw_to_local(hw);
+ struct ieee80211_txq *ret = NULL;
+- struct txq_info *txqi = NULL;
++ struct txq_info *txqi = NULL, *head = NULL;
++ bool found_eligible_txq = false;
+
+ spin_lock_bh(&local->active_txq_lock[ac]);
+
+@@ -3678,13 +3679,30 @@ struct ieee80211_txq *ieee80211_next_txq
+ if (!txqi)
+ goto out;
+
++ if (txqi == head) {
++ if (!found_eligible_txq)
++ goto out;
++ else
++ found_eligible_txq = false;
++ }
++
++ if (!head)
++ head = txqi;
++
+ if (txqi->txq.sta) {
+ struct sta_info *sta = container_of(txqi->txq.sta,
+- struct sta_info, sta);
++ struct sta_info, sta);
++ bool aql_check = ieee80211_txq_airtime_check(hw, &txqi->txq);
++ s64 deficit = sta->airtime[txqi->txq.ac].deficit;
+
+- if (sta->airtime[txqi->txq.ac].deficit < 0) {
++ if (aql_check)
++ found_eligible_txq = true;
++
++ if (deficit < 0)
+ sta->airtime[txqi->txq.ac].deficit +=
+ sta->airtime_weight;
++
++ if (deficit < 0 || !aql_check) {
+ list_move_tail(&txqi->schedule_order,
+ &local->active_txqs[txqi->txq.ac]);
+ goto begin;
+@@ -3738,6 +3756,33 @@ void __ieee80211_schedule_txq(struct iee
+ }
+ EXPORT_SYMBOL(__ieee80211_schedule_txq);
+
++bool ieee80211_txq_airtime_check(struct ieee80211_hw *hw,
++ struct ieee80211_txq *txq)
++{
++ struct sta_info *sta;
++ struct ieee80211_local *local = hw_to_local(hw);
++
++ if (!(local->airtime_flags & AIRTIME_USE_AQL))
++ return true;
++
++ if (!txq->sta)
++ return true;
++
++ sta = container_of(txq->sta, struct sta_info, sta);
++ if (atomic_read(&sta->airtime[txq->ac].aql_tx_pending) <
++ sta->airtime[txq->ac].aql_limit_low)
++ return true;
++
++ if (atomic_read(&local->aql_total_pending_airtime) <
++ local->aql_threshold &&
++ atomic_read(&sta->airtime[txq->ac].aql_tx_pending) <
++ sta->airtime[txq->ac].aql_limit_high)
++ return true;
++
++ return false;
++}
++EXPORT_SYMBOL(ieee80211_txq_airtime_check);
++
+ bool ieee80211_txq_may_transmit(struct ieee80211_hw *hw,
+ struct ieee80211_txq *txq)
+ {
--- /dev/null
+From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= <toke@redhat.com>
+Date: Mon, 18 Nov 2019 22:06:10 -0800
+Subject: [PATCH] mac80211: Use Airtime-based Queue Limits (AQL) on packet
+ dequeue
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+The previous commit added the ability to throttle stations when they queue
+too much airtime in the hardware. This commit enables the functionality by
+calculating the expected airtime usage of each packet that is dequeued from
+the TXQs in mac80211, and accounting that as pending airtime.
+
+The estimated airtime for each skb is stored in the tx_info, so we can
+subtract the same amount from the running total when the skb is freed or
+recycled. The throttling mechanism relies on this accounting to be
+accurate (i.e., that we are not freeing skbs without subtracting any
+airtime they were accounted for), so we put the subtraction into
+ieee80211_report_used_skb(). As an optimisation, we also subtract the
+airtime on regular TX completion, zeroing out the value stored in the
+packet afterwards, to avoid having to do an expensive lookup of the station
+from the packet data on every packet.
+
+This patch does *not* include any mechanism to wake a throttled TXQ again,
+on the assumption that this will happen anyway as a side effect of whatever
+freed the skb (most commonly a TX completion).
+
+Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
+Link: https://lore.kernel.org/r/20191119060610.76681-5-kyan@google.com
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+---
+
+--- a/include/net/mac80211.h
++++ b/include/net/mac80211.h
+@@ -1060,6 +1060,22 @@ struct ieee80211_tx_info {
+ };
+ };
+
++static inline u16
++ieee80211_info_set_tx_time_est(struct ieee80211_tx_info *info, u16 tx_time_est)
++{
++ /* We only have 10 bits in tx_time_est, so store airtime
++ * in increments of 4us and clamp the maximum to 2**12-1
++ */
++ info->tx_time_est = min_t(u16, tx_time_est, 4095) >> 2;
++ return info->tx_time_est << 2;
++}
++
++static inline u16
++ieee80211_info_get_tx_time_est(struct ieee80211_tx_info *info)
++{
++ return info->tx_time_est << 2;
++}
++
+ /**
+ * struct ieee80211_tx_status - extended tx staus info for rate control
+ *
+--- a/net/mac80211/status.c
++++ b/net/mac80211/status.c
+@@ -670,12 +670,26 @@ static void ieee80211_report_used_skb(st
+ struct sk_buff *skb, bool dropped)
+ {
+ struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
++ u16 tx_time_est = ieee80211_info_get_tx_time_est(info);
+ struct ieee80211_hdr *hdr = (void *)skb->data;
+ bool acked = info->flags & IEEE80211_TX_STAT_ACK;
+
+ if (dropped)
+ acked = false;
+
++ if (tx_time_est) {
++ struct sta_info *sta;
++
++ rcu_read_lock();
++
++ sta = sta_info_get_by_addrs(local, hdr->addr1, hdr->addr2);
++ ieee80211_sta_update_pending_airtime(local, sta,
++ skb_get_queue_mapping(skb),
++ tx_time_est,
++ true);
++ rcu_read_unlock();
++ }
++
+ if (info->flags & IEEE80211_TX_INTFL_MLME_CONN_TX) {
+ struct ieee80211_sub_if_data *sdata;
+
+@@ -885,6 +899,7 @@ static void __ieee80211_tx_status(struct
+ struct ieee80211_bar *bar;
+ int shift = 0;
+ int tid = IEEE80211_NUM_TIDS;
++ u16 tx_time_est;
+
+ rates_idx = ieee80211_tx_get_rates(hw, info, &retry_count);
+
+@@ -996,6 +1011,17 @@ static void __ieee80211_tx_status(struct
+ ieee80211_sta_register_airtime(&sta->sta, tid,
+ info->status.tx_time, 0);
+
++ if ((tx_time_est = ieee80211_info_get_tx_time_est(info)) > 0) {
++ /* Do this here to avoid the expensive lookup of the sta
++ * in ieee80211_report_used_skb().
++ */
++ ieee80211_sta_update_pending_airtime(local, sta,
++ skb_get_queue_mapping(skb),
++ tx_time_est,
++ true);
++ ieee80211_info_set_tx_time_est(info, 0);
++ }
++
+ if (ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) {
+ if (acked) {
+ if (sta->status_stats.lost_packets)
+--- a/net/mac80211/tx.c
++++ b/net/mac80211/tx.c
+@@ -3544,6 +3544,9 @@ struct sk_buff *ieee80211_tx_dequeue(str
+
+ WARN_ON_ONCE(softirq_count() == 0);
+
++ if (!ieee80211_txq_airtime_check(hw, txq))
++ return NULL;
++
+ begin:
+ spin_lock_bh(&fq->lock);
+
+@@ -3654,6 +3657,21 @@ begin:
+ }
+
+ IEEE80211_SKB_CB(skb)->control.vif = vif;
++
++ if (local->airtime_flags & AIRTIME_USE_AQL) {
++ u32 airtime;
++
++ airtime = ieee80211_calc_expected_tx_airtime(hw, vif, txq->sta,
++ skb->len);
++ if (airtime) {
++ airtime = ieee80211_info_set_tx_time_est(info, airtime);
++ ieee80211_sta_update_pending_airtime(local, tx.sta,
++ txq->ac,
++ airtime,
++ false);
++ }
++ }
++
+ return skb;
+
+ out:
--- /dev/null
+From: Dan Carpenter <dan.carpenter@oracle.com>
+Date: Tue, 26 Nov 2019 15:09:39 +0300
+Subject: [PATCH] mac80211: airtime: Fix an off by one in
+ ieee80211_calc_rx_airtime()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+This code was copied from mt76 and inherited an off by one bug from
+there. The > should be >= so that we don't read one element beyond
+the end of the array.
+
+Fixes: db3e1c40cf2f ("mac80211: Import airtime calculation code from mt76")
+Reported-by: Toke Høiland-Jørgensen <toke@redhat.com>
+Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
+Acked-by: Toke Høiland-Jørgensen <toke@redhat.com>
+Link: https://lore.kernel.org/r/20191126120910.ftr4t7me3by32aiz@kili.mountain
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+---
+
+--- a/net/mac80211/airtime.c
++++ b/net/mac80211/airtime.c
+@@ -442,7 +442,7 @@ u32 ieee80211_calc_rx_airtime(struct iee
+ return 0;
+
+ sband = hw->wiphy->bands[status->band];
+- if (!sband || status->rate_idx > sband->n_bitrates)
++ if (!sband || status->rate_idx >= sband->n_bitrates)
+ return 0;
+
+ rate = &sband->bitrates[status->rate_idx];
--- /dev/null
+From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= <toke@redhat.com>
+Date: Thu, 12 Dec 2019 12:14:37 +0100
+Subject: [PATCH] mac80211: Turn AQL into an NL80211_EXT_FEATURE
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Instead of just having an airtime flag in debugfs, turn AQL into a proper
+NL80211_EXT_FEATURE, so drivers can turn it on when they are ready, and so
+we also expose the presence of the feature to userspace.
+
+This also has the effect of flipping the default, so drivers have to opt in
+to using AQL instead of getting it by default with TXQs. To keep
+functionality the same as pre-patch, we set this feature for ath10k (which
+is where it is needed the most).
+
+While we're at it, split out the debugfs interface so AQL gets its own
+per-station debugfs file instead of using the 'airtime' file.
+
+[Johannes:]
+This effectively disables AQL for iwlwifi, where it fixes a number of
+issues:
+ * TSO in iwlwifi is causing underflows and associated warnings in AQL
+ * HE (802.11ax) rates aren't reported properly so at HE rates, AQL could
+ never have a valid estimate (it'd use 6 Mbps instead of up to 2400!)
+
+Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
+Link: https://lore.kernel.org/r/20191212111437.224294-1-toke@redhat.com
+Fixes: 3ace10f5b5ad ("mac80211: Implement Airtime-based Queue Limit (AQL)")
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+---
+
+--- a/drivers/net/wireless/ath/ath10k/mac.c
++++ b/drivers/net/wireless/ath/ath10k/mac.c
+@@ -8868,6 +8868,7 @@ int ath10k_mac_register(struct ath10k *a
+ wiphy_ext_feature_set(ar->hw->wiphy, NL80211_EXT_FEATURE_VHT_IBSS);
+ wiphy_ext_feature_set(ar->hw->wiphy,
+ NL80211_EXT_FEATURE_SET_SCAN_DWELL);
++ wiphy_ext_feature_set(ar->hw->wiphy, NL80211_EXT_FEATURE_AQL);
+
+ if (test_bit(WMI_SERVICE_TX_DATA_ACK_RSSI, ar->wmi.svc_map) ||
+ test_bit(WMI_SERVICE_HTT_MGMT_TX_COMP_VALID_FLAGS, ar->wmi.svc_map))
+--- a/include/uapi/linux/nl80211.h
++++ b/include/uapi/linux/nl80211.h
+@@ -5484,6 +5484,10 @@ enum nl80211_feature_flags {
+ * @NL80211_EXT_FEATURE_SAE_OFFLOAD: Device wants to do SAE authentication in
+ * station mode (SAE password is passed as part of the connect command).
+ *
++ * @NL80211_EXT_FEATURE_AQL: The driver supports the Airtime Queue Limit (AQL)
++ * feature, which prevents bufferbloat by using the expected transmission
++ * time to limit the amount of data buffered in the hardware.
++ *
+ * @NUM_NL80211_EXT_FEATURES: number of extended features.
+ * @MAX_NL80211_EXT_FEATURES: highest extended feature index.
+ */
+@@ -5529,6 +5533,8 @@ enum nl80211_ext_feature_index {
+ NL80211_EXT_FEATURE_EXT_KEY_ID,
+ NL80211_EXT_FEATURE_STA_TX_PWR,
+ NL80211_EXT_FEATURE_SAE_OFFLOAD,
++ NL80211_EXT_FEATURE_VLAN_OFFLOAD,
++ NL80211_EXT_FEATURE_AQL,
+
+ /* add new features before the definition below */
+ NUM_NL80211_EXT_FEATURES,
+--- a/net/mac80211/debugfs_sta.c
++++ b/net/mac80211/debugfs_sta.c
+@@ -201,8 +201,6 @@ static ssize_t sta_airtime_read(struct f
+ char *buf = kzalloc(bufsz, GFP_KERNEL), *p = buf;
+ u64 rx_airtime = 0, tx_airtime = 0;
+ s64 deficit[IEEE80211_NUM_ACS];
+- u32 q_depth[IEEE80211_NUM_ACS];
+- u32 q_limit_l[IEEE80211_NUM_ACS], q_limit_h[IEEE80211_NUM_ACS];
+ ssize_t rv;
+ int ac;
+
+@@ -214,6 +212,56 @@ static ssize_t sta_airtime_read(struct f
+ rx_airtime += sta->airtime[ac].rx_airtime;
+ tx_airtime += sta->airtime[ac].tx_airtime;
+ deficit[ac] = sta->airtime[ac].deficit;
++ spin_unlock_bh(&local->active_txq_lock[ac]);
++ }
++
++ p += scnprintf(p, bufsz + buf - p,
++ "RX: %llu us\nTX: %llu us\nWeight: %u\n"
++ "Deficit: VO: %lld us VI: %lld us BE: %lld us BK: %lld us\n",
++ rx_airtime, tx_airtime, sta->airtime_weight,
++ deficit[0], deficit[1], deficit[2], deficit[3]);
++
++ rv = simple_read_from_buffer(userbuf, count, ppos, buf, p - buf);
++ kfree(buf);
++ return rv;
++}
++
++static ssize_t sta_airtime_write(struct file *file, const char __user *userbuf,
++ size_t count, loff_t *ppos)
++{
++ struct sta_info *sta = file->private_data;
++ struct ieee80211_local *local = sta->sdata->local;
++ int ac;
++
++ for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
++ spin_lock_bh(&local->active_txq_lock[ac]);
++ sta->airtime[ac].rx_airtime = 0;
++ sta->airtime[ac].tx_airtime = 0;
++ sta->airtime[ac].deficit = sta->airtime_weight;
++ spin_unlock_bh(&local->active_txq_lock[ac]);
++ }
++
++ return count;
++}
++STA_OPS_RW(airtime);
++
++static ssize_t sta_aql_read(struct file *file, char __user *userbuf,
++ size_t count, loff_t *ppos)
++{
++ struct sta_info *sta = file->private_data;
++ struct ieee80211_local *local = sta->sdata->local;
++ size_t bufsz = 400;
++ char *buf = kzalloc(bufsz, GFP_KERNEL), *p = buf;
++ u32 q_depth[IEEE80211_NUM_ACS];
++ u32 q_limit_l[IEEE80211_NUM_ACS], q_limit_h[IEEE80211_NUM_ACS];
++ ssize_t rv;
++ int ac;
++
++ if (!buf)
++ return -ENOMEM;
++
++ for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
++ spin_lock_bh(&local->active_txq_lock[ac]);
+ q_limit_l[ac] = sta->airtime[ac].aql_limit_low;
+ q_limit_h[ac] = sta->airtime[ac].aql_limit_high;
+ spin_unlock_bh(&local->active_txq_lock[ac]);
+@@ -221,12 +269,8 @@ static ssize_t sta_airtime_read(struct f
+ }
+
+ p += scnprintf(p, bufsz + buf - p,
+- "RX: %llu us\nTX: %llu us\nWeight: %u\n"
+- "Deficit: VO: %lld us VI: %lld us BE: %lld us BK: %lld us\n"
+ "Q depth: VO: %u us VI: %u us BE: %u us BK: %u us\n"
+ "Q limit[low/high]: VO: %u/%u VI: %u/%u BE: %u/%u BK: %u/%u\n",
+- rx_airtime, tx_airtime, sta->airtime_weight,
+- deficit[0], deficit[1], deficit[2], deficit[3],
+ q_depth[0], q_depth[1], q_depth[2], q_depth[3],
+ q_limit_l[0], q_limit_h[0], q_limit_l[1], q_limit_h[1],
+ q_limit_l[2], q_limit_h[2], q_limit_l[3], q_limit_h[3]),
+@@ -236,11 +280,10 @@ static ssize_t sta_airtime_read(struct f
+ return rv;
+ }
+
+-static ssize_t sta_airtime_write(struct file *file, const char __user *userbuf,
++static ssize_t sta_aql_write(struct file *file, const char __user *userbuf,
+ size_t count, loff_t *ppos)
+ {
+ struct sta_info *sta = file->private_data;
+- struct ieee80211_local *local = sta->sdata->local;
+ u32 ac, q_limit_l, q_limit_h;
+ char _buf[100] = {}, *buf = _buf;
+
+@@ -251,7 +294,7 @@ static ssize_t sta_airtime_write(struct
+ return -EFAULT;
+
+ buf[sizeof(_buf) - 1] = '\0';
+- if (sscanf(buf, "queue limit %u %u %u", &ac, &q_limit_l, &q_limit_h)
++ if (sscanf(buf, "limit %u %u %u", &ac, &q_limit_l, &q_limit_h)
+ != 3)
+ return -EINVAL;
+
+@@ -261,17 +304,10 @@ static ssize_t sta_airtime_write(struct
+ sta->airtime[ac].aql_limit_low = q_limit_l;
+ sta->airtime[ac].aql_limit_high = q_limit_h;
+
+- for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
+- spin_lock_bh(&local->active_txq_lock[ac]);
+- sta->airtime[ac].rx_airtime = 0;
+- sta->airtime[ac].tx_airtime = 0;
+- sta->airtime[ac].deficit = sta->airtime_weight;
+- spin_unlock_bh(&local->active_txq_lock[ac]);
+- }
+-
+ return count;
+ }
+-STA_OPS_RW(airtime);
++STA_OPS_RW(aql);
++
+
+ static ssize_t sta_agg_status_read(struct file *file, char __user *userbuf,
+ size_t count, loff_t *ppos)
+@@ -1001,6 +1037,10 @@ void ieee80211_sta_debugfs_add(struct st
+ NL80211_EXT_FEATURE_AIRTIME_FAIRNESS))
+ DEBUGFS_ADD(airtime);
+
++ if (wiphy_ext_feature_isset(local->hw.wiphy,
++ NL80211_EXT_FEATURE_AQL))
++ DEBUGFS_ADD(aql);
++
+ if (sizeof(sta->driver_buffered_tids) == sizeof(u32))
+ debugfs_create_x32("driver_buffered_tids", 0400,
+ sta->debugfs_dir,
+--- a/net/mac80211/main.c
++++ b/net/mac80211/main.c
+@@ -674,9 +674,7 @@ struct ieee80211_hw *ieee80211_alloc_hw_
+ IEEE80211_DEFAULT_AQL_TXQ_LIMIT_H;
+ }
+
+- local->airtime_flags = AIRTIME_USE_TX |
+- AIRTIME_USE_RX |
+- AIRTIME_USE_AQL;
++ local->airtime_flags = AIRTIME_USE_TX | AIRTIME_USE_RX;
+ local->aql_threshold = IEEE80211_AQL_THRESHOLD;
+ atomic_set(&local->aql_total_pending_airtime, 0);
+
+--- a/net/mac80211/sta_info.c
++++ b/net/mac80211/sta_info.c
+@@ -1917,6 +1917,9 @@ void ieee80211_sta_update_pending_airtim
+ {
+ int tx_pending;
+
++ if (!wiphy_ext_feature_isset(local->hw.wiphy, NL80211_EXT_FEATURE_AQL))
++ return;
++
+ if (!tx_completed) {
+ if (sta)
+ atomic_add(tx_airtime,
+--- a/net/mac80211/sta_info.h
++++ b/net/mac80211/sta_info.h
+@@ -127,7 +127,6 @@ enum ieee80211_agg_stop_reason {
+ /* Debugfs flags to enable/disable use of RX/TX airtime in scheduler */
+ #define AIRTIME_USE_TX BIT(0)
+ #define AIRTIME_USE_RX BIT(1)
+-#define AIRTIME_USE_AQL BIT(2)
+
+ struct airtime_info {
+ u64 rx_airtime;
+--- a/net/mac80211/tx.c
++++ b/net/mac80211/tx.c
+@@ -3658,7 +3658,7 @@ begin:
+
+ IEEE80211_SKB_CB(skb)->control.vif = vif;
+
+- if (local->airtime_flags & AIRTIME_USE_AQL) {
++ if (wiphy_ext_feature_isset(local->hw.wiphy, NL80211_EXT_FEATURE_AQL)) {
+ u32 airtime;
+
+ airtime = ieee80211_calc_expected_tx_airtime(hw, vif, txq->sta,
+@@ -3780,7 +3780,7 @@ bool ieee80211_txq_airtime_check(struct
+ struct sta_info *sta;
+ struct ieee80211_local *local = hw_to_local(hw);
+
+- if (!(local->airtime_flags & AIRTIME_USE_AQL))
++ if (!wiphy_ext_feature_isset(local->hw.wiphy, NL80211_EXT_FEATURE_AQL))
+ return true;
+
+ if (!txq->sta)
--- /dev/null
+From: Johannes Berg <johannes.berg@intel.com>
+Date: Wed, 15 Jan 2020 12:25:50 +0100
+Subject: [PATCH] mac80211: use more bits for ack_frame_id
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+It turns out that this wasn't a good idea, I hit a test failure in
+hwsim due to this. That particular failure was easily worked around,
+but it raised questions: if an AP needs to, for example, send action
+frames to each connected station, the current limit is nowhere near
+enough (especially if those stations are sleeping and the frames are
+queued for a while.)
+
+Shuffle around some bits to make more room for ack_frame_id to allow
+up to 8192 queued up frames, that's enough for queueing 4 frames to
+each connected station, even at the maximum of 2007 stations on a
+single AP.
+
+We take the bits from band (which currently only 2 but I leave 3 in
+case we add another band) and from the hw_queue, which can only need
+4 since it has a limit of 16 queues.
+
+Fixes: 6912daed05e1 ("mac80211: Shrink the size of ack_frame_id to make room for tx_time_est")
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Acked-by: Toke Høiland-Jørgensen <toke@redhat.com>
+---
+
+--- a/include/net/mac80211.h
++++ b/include/net/mac80211.h
+@@ -1004,12 +1004,11 @@ ieee80211_rate_get_vht_nss(const struct
+ struct ieee80211_tx_info {
+ /* common information */
+ u32 flags;
+- u8 band;
+-
+- u8 hw_queue;
+-
+- u16 ack_frame_id:6;
+- u16 tx_time_est:10;
++ u32 band:3,
++ ack_frame_id:13,
++ hw_queue:4,
++ tx_time_est:10;
++ /* 2 free bits */
+
+ union {
+ struct {
+--- a/net/mac80211/cfg.c
++++ b/net/mac80211/cfg.c
+@@ -3427,7 +3427,7 @@ int ieee80211_attach_ack_skb(struct ieee
+
+ spin_lock_irqsave(&local->ack_status_lock, spin_flags);
+ id = idr_alloc(&local->ack_status_frames, ack_skb,
+- 1, 0x40, GFP_ATOMIC);
++ 1, 0x2000, GFP_ATOMIC);
+ spin_unlock_irqrestore(&local->ack_status_lock, spin_flags);
+
+ if (id < 0) {
+--- a/net/mac80211/tx.c
++++ b/net/mac80211/tx.c
+@@ -2443,7 +2443,7 @@ static int ieee80211_store_ack_skb(struc
+
+ spin_lock_irqsave(&local->ack_status_lock, flags);
+ id = idr_alloc(&local->ack_status_frames, ack_skb,
+- 1, 0x40, GFP_ATOMIC);
++ 1, 0x2000, GFP_ATOMIC);
+ spin_unlock_irqrestore(&local->ack_status_lock, flags);
+
+ if (id >= 0) {
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
-@@ -3447,6 +3447,7 @@ struct cfg80211_update_owe_info {
+@@ -3454,6 +3454,7 @@ struct cfg80211_update_owe_info {
* (as advertised by the nl80211 feature flag.)
* @get_tx_power: store the current TX power into the dbm variable;
* return 0 if successful
*
* @set_wds_peer: set the WDS peer for a WDS interface
*
-@@ -3759,6 +3760,7 @@ struct cfg80211_ops {
+@@ -3766,6 +3767,7 @@ struct cfg80211_ops {
enum nl80211_tx_power_setting type, int mbm);
int (*get_tx_power)(struct wiphy *wiphy, struct wireless_dev *wdev,
int *dbm);
const u8 *addr);
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
-@@ -1484,6 +1484,7 @@ enum ieee80211_smps_mode {
+@@ -1501,6 +1501,7 @@ enum ieee80211_smps_mode {
*
* @power_level: requested transmit power (in dBm), backward compatibility
* value only that is set to the minimum of all interfaces
*
* @chandef: the channel definition to tune to
* @radar_enabled: whether radar detection is enabled
-@@ -1504,6 +1505,7 @@ enum ieee80211_smps_mode {
+@@ -1521,6 +1522,7 @@ enum ieee80211_smps_mode {
struct ieee80211_conf {
u32 flags;
int power_level, dynamic_ps_timeout;
CFG80211_TESTMODE_CMD(ieee80211_testmode_cmd)
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
-@@ -1372,6 +1372,7 @@ struct ieee80211_local {
+@@ -1376,6 +1376,7 @@ struct ieee80211_local {
int dynamic_ps_forced_timeout;
int user_power_level; /* in dBm, for all interfaces */