--- /dev/null
+From: Manikanta Pubbisetty <mpubbise@codeaurora.org>
+Date: Wed, 11 Jul 2018 00:12:53 +0530
+Subject: [PATCH] mac80211: add stop/start logic for software TXQs
+
+Sometimes, it is required to stop the transmissions momentarily and
+resume it later; stopping the txqs becomes very critical in scenarios where
+the packet transmission has to be ceased completely. For example, during
+the hardware restart, during off channel operations,
+when initiating CSA(upon detecting a radar on the DFS channel), etc.
+
+The TX queue stop/start logic in mac80211 works well in stopping the TX
+when drivers make use of netdev queues, i.e, when Qdiscs in network layer
+take care of traffic scheduling. Since the devices implementing
+wake_tx_queue can run without Qdiscs, packets will be handed to mac80211
+directly without queueing them in the netdev queues.
+
+Also, mac80211 does not invoke any of the
+netif_stop_*/netif_wake_* APIs if wake_tx_queue is implemented.
+Since the queues are not stopped in this case, transmissions can continue
+and this will impact negatively on the operation of the wireless device.
+
+For example,
+During hardware restart, we stop the netdev queues so that packets are
+not sent to the driver. Since ath10k implements wake_tx_queue,
+TX queues will not be stopped and packets might reach the hardware while
+it is restarting; this can make hardware unresponsive and the only
+possible option for recovery is to reboot the entire system.
+
+There is another problem to this, it is observed that the packets
+were sent on the DFS channel for a prolonged duration after radar
+detection impacting the channel closing time.
+
+We can still invoke netif stop/wake APIs when wake_tx_queue is implemented
+but this could lead to packet drops in network layer; adding stop/start
+logic for software TXQs in mac80211 instead makes more sense; the change
+proposed adds the same in mac80211.
+
+Signed-off-by: Manikanta Pubbisetty <mpubbise@codeaurora.org>
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+---
+
+--- a/include/net/mac80211.h
++++ b/include/net/mac80211.h
+@@ -1504,6 +1504,8 @@ enum ieee80211_vif_flags {
+ * @drv_priv: data area for driver use, will always be aligned to
+ * sizeof(void \*).
+ * @txq: the multicast data TX queue (if driver uses the TXQ abstraction)
++ * @txqs_stopped: per AC flag to indicate that intermediate TXQs are stopped,
++ * protected by fq->lock.
+ */
+ struct ieee80211_vif {
+ enum nl80211_iftype type;
+@@ -1528,6 +1530,8 @@ struct ieee80211_vif {
+
+ unsigned int probe_req_reg;
+
++ bool txqs_stopped[IEEE80211_NUM_ACS];
++
+ /* must be last */
+ u8 drv_priv[0] __aligned(sizeof(void *));
+ };
+--- a/net/mac80211/ieee80211_i.h
++++ b/net/mac80211/ieee80211_i.h
+@@ -818,6 +818,7 @@ enum txq_info_flags {
+ IEEE80211_TXQ_STOP,
+ IEEE80211_TXQ_AMPDU,
+ IEEE80211_TXQ_NO_AMSDU,
++ IEEE80211_TXQ_STOP_NETIF_TX,
+ };
+
+ /**
+@@ -1226,6 +1227,7 @@ struct ieee80211_local {
+
+ struct sk_buff_head pending[IEEE80211_MAX_QUEUES];
+ struct tasklet_struct tx_pending_tasklet;
++ struct tasklet_struct wake_txqs_tasklet;
+
+ atomic_t agg_queue_stop[IEEE80211_MAX_QUEUES];
+
+@@ -2039,6 +2041,7 @@ void ieee80211_txq_remove_vlan(struct ie
+ struct ieee80211_sub_if_data *sdata);
+ void ieee80211_fill_txq_stats(struct cfg80211_txq_stats *txqstats,
+ struct txq_info *txqi);
++void ieee80211_wake_txqs(unsigned long data);
+ void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata,
+ u16 transaction, u16 auth_alg, u16 status,
+ const u8 *extra, size_t extra_len, const u8 *bssid,
+--- a/net/mac80211/main.c
++++ b/net/mac80211/main.c
+@@ -686,6 +686,10 @@ struct ieee80211_hw *ieee80211_alloc_hw_
+ tasklet_init(&local->tx_pending_tasklet, ieee80211_tx_pending,
+ (unsigned long)local);
+
++ if (ops->wake_tx_queue)
++ tasklet_init(&local->wake_txqs_tasklet, ieee80211_wake_txqs,
++ (unsigned long)local);
++
+ tasklet_init(&local->tasklet,
+ ieee80211_tasklet_handler,
+ (unsigned long) local);
+--- a/net/mac80211/tx.c
++++ b/net/mac80211/tx.c
+@@ -3482,13 +3482,19 @@ struct sk_buff *ieee80211_tx_dequeue(str
+ struct ieee80211_tx_info *info;
+ struct ieee80211_tx_data tx;
+ ieee80211_tx_result r;
+- struct ieee80211_vif *vif;
++ struct ieee80211_vif *vif = txq->vif;
+
+ spin_lock_bh(&fq->lock);
+
+- if (test_bit(IEEE80211_TXQ_STOP, &txqi->flags))
++ if (test_bit(IEEE80211_TXQ_STOP, &txqi->flags) ||
++ test_bit(IEEE80211_TXQ_STOP_NETIF_TX, &txqi->flags))
+ goto out;
+
++ if (vif->txqs_stopped[ieee80211_ac_from_tid(txq->tid)]) {
++ set_bit(IEEE80211_TXQ_STOP_NETIF_TX, &txqi->flags);
++ goto out;
++ }
++
+ /* Make sure fragments stay together. */
+ skb = __skb_dequeue(&txqi->frags);
+ if (skb)
+@@ -3583,6 +3589,7 @@ begin:
+ }
+
+ IEEE80211_SKB_CB(skb)->control.vif = vif;
++
+ out:
+ spin_unlock_bh(&fq->lock);
+
+--- a/net/mac80211/util.c
++++ b/net/mac80211/util.c
+@@ -240,6 +240,99 @@ __le16 ieee80211_ctstoself_duration(stru
+ }
+ EXPORT_SYMBOL(ieee80211_ctstoself_duration);
+
++static void __ieee80211_wake_txqs(struct ieee80211_sub_if_data *sdata, int ac)
++{
++ struct ieee80211_local *local = sdata->local;
++ struct ieee80211_vif *vif = &sdata->vif;
++ struct fq *fq = &local->fq;
++ struct ps_data *ps = NULL;
++ struct txq_info *txqi;
++ struct sta_info *sta;
++ int i;
++
++ spin_lock_bh(&fq->lock);
++
++ if (sdata->vif.type == NL80211_IFTYPE_AP)
++ ps = &sdata->bss->ps;
++
++ sdata->vif.txqs_stopped[ac] = false;
++
++ list_for_each_entry_rcu(sta, &local->sta_list, list) {
++ if (sdata != sta->sdata)
++ continue;
++
++ for (i = 0; i < ARRAY_SIZE(sta->sta.txq); i++) {
++ struct ieee80211_txq *txq = sta->sta.txq[i];
++
++ txqi = to_txq_info(txq);
++
++ if (ac != txq->ac)
++ continue;
++
++ if (!test_and_clear_bit(IEEE80211_TXQ_STOP_NETIF_TX,
++ &txqi->flags))
++ continue;
++
++ spin_unlock_bh(&fq->lock);
++ drv_wake_tx_queue(local, txqi);
++ spin_lock_bh(&fq->lock);
++ }
++ }
++
++ if (!vif->txq)
++ goto out;
++
++ txqi = to_txq_info(vif->txq);
++
++ if (!test_and_clear_bit(IEEE80211_TXQ_STOP_NETIF_TX, &txqi->flags) ||
++ (ps && atomic_read(&ps->num_sta_ps)) || ac != vif->txq->ac)
++ goto out;
++
++ spin_unlock_bh(&fq->lock);
++
++ drv_wake_tx_queue(local, txqi);
++ return;
++out:
++ spin_unlock_bh(&fq->lock);
++}
++
++void ieee80211_wake_txqs(unsigned long data)
++{
++ struct ieee80211_local *local = (struct ieee80211_local *)data;
++ struct ieee80211_sub_if_data *sdata;
++ int n_acs = IEEE80211_NUM_ACS;
++ unsigned long flags;
++ int i;
++
++ rcu_read_lock();
++ spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
++
++ if (local->hw.queues < IEEE80211_NUM_ACS)
++ n_acs = 1;
++
++ for (i = 0; i < local->hw.queues; i++) {
++ if (local->queue_stop_reasons[i])
++ continue;
++
++ spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
++ list_for_each_entry_rcu(sdata, &local->interfaces, list) {
++ int ac;
++
++ for (ac = 0; ac < n_acs; ac++) {
++ int ac_queue = sdata->vif.hw_queue[ac];
++
++ if (ac_queue == i ||
++ sdata->vif.cab_queue == i)
++ __ieee80211_wake_txqs(sdata, ac);
++ }
++ }
++ spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
++ }
++
++ spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
++ rcu_read_unlock();
++}
++
+ void ieee80211_propagate_queue_wake(struct ieee80211_local *local, int queue)
+ {
+ struct ieee80211_sub_if_data *sdata;
+@@ -308,6 +401,9 @@ static void __ieee80211_wake_queue(struc
+ rcu_read_unlock();
+ } else
+ tasklet_schedule(&local->tx_pending_tasklet);
++
++ if (local->ops->wake_tx_queue)
++ tasklet_schedule(&local->wake_txqs_tasklet);
+ }
+
+ void ieee80211_wake_queue_by_reason(struct ieee80211_hw *hw, int queue,
+@@ -351,9 +447,6 @@ static void __ieee80211_stop_queue(struc
+ if (__test_and_set_bit(reason, &local->queue_stop_reasons[queue]))
+ return;
+
+- if (local->ops->wake_tx_queue)
+- return;
+-
+ if (local->hw.queues < IEEE80211_NUM_ACS)
+ n_acs = 1;
+
+@@ -366,8 +459,15 @@ static void __ieee80211_stop_queue(struc
+
+ for (ac = 0; ac < n_acs; ac++) {
+ if (sdata->vif.hw_queue[ac] == queue ||
+- sdata->vif.cab_queue == queue)
+- netif_stop_subqueue(sdata->dev, ac);
++ sdata->vif.cab_queue == queue) {
++ if (!local->ops->wake_tx_queue) {
++ netif_stop_subqueue(sdata->dev, ac);
++ continue;
++ }
++ spin_lock(&local->fq.lock);
++ sdata->vif.txqs_stopped[ac] = true;
++ spin_unlock(&local->fq.lock);
++ }
+ }
+ }
+ rcu_read_unlock();
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
-@@ -2127,6 +2127,9 @@ struct ieee80211_txq {
+@@ -2131,6 +2131,9 @@ struct ieee80211_txq {
* @IEEE80211_HW_DOESNT_SUPPORT_QOS_NDP: The driver (or firmware) doesn't
* support QoS NDP for AP probing - that's most likely a driver bug.
*
* @NUM_IEEE80211_HW_FLAGS: number of hardware flags, used for sizing arrays
*/
enum ieee80211_hw_flags {
-@@ -2172,6 +2175,7 @@ enum ieee80211_hw_flags {
+@@ -2176,6 +2179,7 @@ enum ieee80211_hw_flags {
IEEE80211_HW_SUPPORTS_TDLS_BUFFER_STA,
IEEE80211_HW_DEAUTH_NEED_MGD_TX_PREP,
IEEE80211_HW_DOESNT_SUPPORT_QOS_NDP,
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
-@@ -1557,6 +1557,29 @@ ieee80211_vif_get_num_mcast_if(struct ie
+@@ -1559,6 +1559,29 @@ ieee80211_vif_get_num_mcast_if(struct ie
return -1;
}
/* We store the key here so there's no point in using rcu_dereference()
* but that's fine because the code that changes the pointers will call
* this function after doing so. For a single CPU that would be enough,
-@@ -3534,7 +3543,7 @@ begin:
+@@ -3540,7 +3549,7 @@ begin:
if (tx.key &&
(tx.key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV))
tx.key, skb);
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
-@@ -1288,6 +1288,7 @@ void ieee80211_send_auth(struct ieee8021
+@@ -1388,6 +1388,7 @@ void ieee80211_send_auth(struct ieee8021
u32 tx_flags)
{
struct ieee80211_local *local = sdata->local;
struct sk_buff *skb;
struct ieee80211_mgmt *mgmt;
unsigned int hdrlen;
-@@ -1314,7 +1315,7 @@ void ieee80211_send_auth(struct ieee8021
+@@ -1414,7 +1415,7 @@ void ieee80211_send_auth(struct ieee8021
skb_put_data(skb, extra, extra_len);
if (auth_alg == WLAN_AUTH_SHARED_KEY && transaction == 3) {