From: Felix Fietkau Date: Thu, 21 Jul 2016 14:17:08 +0000 (+0200) Subject: ath10k: add NAPI support X-Git-Tag: v17.01.0-rc1~1976 X-Git-Url: http://git.lede-project.org./?a=commitdiff_plain;h=14eb09d5c0458fb619dc7030c149322ef0b137c3;p=openwrt%2Fopenwrt.git ath10k: add NAPI support Signed-off-by: Felix Fietkau --- diff --git a/package/kernel/mac80211/patches/332-ath10k-implement-NAPI-support.patch b/package/kernel/mac80211/patches/332-ath10k-implement-NAPI-support.patch new file mode 100644 index 0000000000..c6cc145957 --- /dev/null +++ b/package/kernel/mac80211/patches/332-ath10k-implement-NAPI-support.patch @@ -0,0 +1,642 @@ +From: Rajkumar Manoharan +Date: Thu, 21 Jul 2016 11:50:00 +0530 +Subject: [PATCH] ath10k: implement NAPI support + +Add NAPI support for rx and tx completion. NAPI poll is scheduled +from interrupt handler. The design is as below + + - on interrupt + - schedule napi and mask interrupts + - on poll + - process all pipes (no actual Tx/Rx) + - process Rx within budget + - if quota exceeds budget reschedule napi poll by returning budget + - process Tx completions and update budget if necessary + - process Tx fetch indications (pull-push) + - push any other pending Tx (if possible) + - before resched or napi completion replenish htt rx ring buffer + - if work done < budget, complete napi poll and unmask interrupts + +This change also get rid of two tasklets (intr_tq and txrx_compl_task). + +Measured peak throughput with NAPI on IPQ4019 platform in controlled +environment. No noticeable reduction in throughput is seen and also +observed improvements in CPU usage. Approx. 15% CPU usage got reduced +in UDP uplink case. + +DL: AP DUT Tx +UL: AP DUT Rx + +IPQ4019 (avg. cpu usage %) +======== + TOT +NAPI + =========== ============= +TCP DL 644 Mbps (42%) 645 Mbps (36%) +TCP UL 673 Mbps (30%) 675 Mbps (26%) +UDP DL 682 Mbps (49%) 680 Mbps (49%) +UDP UL 720 Mbps (28%) 717 Mbps (11%) + +Signed-off-by: Rajkumar Manoharan +--- + +--- a/drivers/net/wireless/ath/ath10k/ahb.c ++++ b/drivers/net/wireless/ath/ath10k/ahb.c +@@ -462,13 +462,13 @@ static void ath10k_ahb_halt_chip(struct + static irqreturn_t ath10k_ahb_interrupt_handler(int irq, void *arg) + { + struct ath10k *ar = arg; +- struct ath10k_pci *ar_pci = ath10k_pci_priv(ar); + + if (!ath10k_pci_irq_pending(ar)) + return IRQ_NONE; + + ath10k_pci_disable_and_clear_legacy_irq(ar); +- tasklet_schedule(&ar_pci->intr_tq); ++ ath10k_pci_irq_msi_fw_mask(ar); ++ napi_schedule(&ar->napi); + + return IRQ_HANDLED; + } +@@ -831,7 +831,7 @@ static int ath10k_ahb_probe(struct platf + goto err_resource_deinit; + } + +- ath10k_pci_init_irq_tasklets(ar); ++ ath10k_pci_init_napi(ar); + + ret = ath10k_ahb_request_irq_legacy(ar); + if (ret) +--- a/drivers/net/wireless/ath/ath10k/core.c ++++ b/drivers/net/wireless/ath/ath10k/core.c +@@ -2226,6 +2226,8 @@ struct ath10k *ath10k_core_create(size_t + INIT_WORK(&ar->register_work, ath10k_core_register_work); + INIT_WORK(&ar->restart_work, ath10k_core_restart); + ++ init_dummy_netdev(&ar->napi_dev); ++ + ret = ath10k_debug_create(ar); + if (ret) + goto err_free_aux_wq; +--- a/drivers/net/wireless/ath/ath10k/core.h ++++ b/drivers/net/wireless/ath/ath10k/core.h +@@ -65,6 +65,10 @@ + #define ATH10K_KEEPALIVE_MAX_IDLE 3895 + #define ATH10K_KEEPALIVE_MAX_UNRESPONSIVE 3900 + ++/* NAPI poll budget */ ++#define ATH10K_NAPI_BUDGET 64 ++#define ATH10K_NAPI_QUOTA_LIMIT 60 ++ + struct ath10k; + + enum ath10k_bus { +@@ -933,6 +937,10 @@ struct ath10k { + struct ath10k_thermal thermal; + struct ath10k_wow wow; + ++ /* NAPI */ ++ struct net_device napi_dev; ++ struct napi_struct napi; ++ + /* must be last */ + u8 drv_priv[0] __aligned(sizeof(void *)); + }; +--- a/drivers/net/wireless/ath/ath10k/htt.h ++++ b/drivers/net/wireless/ath/ath10k/htt.h +@@ -1666,7 +1666,6 @@ struct ath10k_htt { + + /* This is used to group tx/rx completions separately and process them + * in batches to reduce cache stalls */ +- struct tasklet_struct txrx_compl_task; + struct sk_buff_head rx_compl_q; + struct sk_buff_head rx_in_ord_compl_q; + struct sk_buff_head tx_fetch_ind_q; +@@ -1799,5 +1798,6 @@ int ath10k_htt_tx(struct ath10k_htt *htt + struct sk_buff *msdu); + void ath10k_htt_rx_pktlog_completion_handler(struct ath10k *ar, + struct sk_buff *skb); ++int ath10k_htt_txrx_compl_task(struct ath10k *ar, int budget); + + #endif +--- a/drivers/net/wireless/ath/ath10k/htt_rx.c ++++ b/drivers/net/wireless/ath/ath10k/htt_rx.c +@@ -34,7 +34,6 @@ + #define HTT_RX_RING_REFILL_RESCHED_MS 5 + + static int ath10k_htt_rx_get_csum_state(struct sk_buff *skb); +-static void ath10k_htt_txrx_compl_task(unsigned long ptr); + + static struct sk_buff * + ath10k_htt_rx_find_skb_paddr(struct ath10k *ar, u32 paddr) +@@ -226,7 +225,6 @@ int ath10k_htt_rx_ring_refill(struct ath + void ath10k_htt_rx_free(struct ath10k_htt *htt) + { + del_timer_sync(&htt->rx_ring.refill_retry_timer); +- tasklet_kill(&htt->txrx_compl_task); + + skb_queue_purge(&htt->rx_compl_q); + skb_queue_purge(&htt->rx_in_ord_compl_q); +@@ -520,9 +518,6 @@ int ath10k_htt_rx_alloc(struct ath10k_ht + skb_queue_head_init(&htt->tx_fetch_ind_q); + atomic_set(&htt->num_mpdus_ready, 0); + +- tasklet_init(&htt->txrx_compl_task, ath10k_htt_txrx_compl_task, +- (unsigned long)htt); +- + ath10k_dbg(ar, ATH10K_DBG_BOOT, "htt rx ring size %d fill_level %d\n", + htt->rx_ring.size, htt->rx_ring.fill_level); + return 0; +@@ -958,7 +953,7 @@ static void ath10k_process_rx(struct ath + trace_ath10k_rx_hdr(ar, skb->data, skb->len); + trace_ath10k_rx_payload(ar, skb->data, skb->len); + +- ieee80211_rx(ar->hw, skb); ++ ieee80211_rx_napi(ar->hw, NULL, skb, &ar->napi); + } + + static int ath10k_htt_rx_nwifi_hdrlen(struct ath10k *ar, +@@ -1527,7 +1522,7 @@ static int ath10k_htt_rx_handle_amsdu(st + struct ath10k *ar = htt->ar; + struct ieee80211_rx_status *rx_status = &htt->rx_status; + struct sk_buff_head amsdu; +- int ret; ++ int ret, num_msdus; + + __skb_queue_head_init(&amsdu); + +@@ -1549,13 +1544,14 @@ static int ath10k_htt_rx_handle_amsdu(st + return ret; + } + ++ num_msdus = skb_queue_len(&amsdu); + ath10k_htt_rx_h_ppdu(ar, &amsdu, rx_status, 0xffff); + ath10k_htt_rx_h_unchain(ar, &amsdu, ret > 0); + ath10k_htt_rx_h_filter(ar, &amsdu, rx_status); + ath10k_htt_rx_h_mpdu(ar, &amsdu, rx_status); + ath10k_htt_rx_h_deliver(ar, &amsdu, rx_status); + +- return 0; ++ return num_msdus; + } + + static void ath10k_htt_rx_proc_rx_ind(struct ath10k_htt *htt, +@@ -1579,15 +1575,6 @@ static void ath10k_htt_rx_proc_rx_ind(st + mpdu_count += mpdu_ranges[i].mpdu_count; + + atomic_add(mpdu_count, &htt->num_mpdus_ready); +- +- tasklet_schedule(&htt->txrx_compl_task); +-} +- +-static void ath10k_htt_rx_frag_handler(struct ath10k_htt *htt) +-{ +- atomic_inc(&htt->num_mpdus_ready); +- +- tasklet_schedule(&htt->txrx_compl_task); + } + + static void ath10k_htt_rx_tx_compl_ind(struct ath10k *ar, +@@ -1772,14 +1759,15 @@ static void ath10k_htt_rx_h_rx_offload_p + RX_FLAG_MMIC_STRIPPED; + } + +-static void ath10k_htt_rx_h_rx_offload(struct ath10k *ar, +- struct sk_buff_head *list) ++static int ath10k_htt_rx_h_rx_offload(struct ath10k *ar, ++ struct sk_buff_head *list) + { + struct ath10k_htt *htt = &ar->htt; + struct ieee80211_rx_status *status = &htt->rx_status; + struct htt_rx_offload_msdu *rx; + struct sk_buff *msdu; + size_t offset; ++ int num_msdu = 0; + + while ((msdu = __skb_dequeue(list))) { + /* Offloaded frames don't have Rx descriptor. Instead they have +@@ -1819,10 +1807,12 @@ static void ath10k_htt_rx_h_rx_offload(s + ath10k_htt_rx_h_rx_offload_prot(status, msdu); + ath10k_htt_rx_h_channel(ar, status, NULL, rx->vdev_id); + ath10k_process_rx(ar, status, msdu); ++ num_msdu++; + } ++ return num_msdu; + } + +-static void ath10k_htt_rx_in_ord_ind(struct ath10k *ar, struct sk_buff *skb) ++static int ath10k_htt_rx_in_ord_ind(struct ath10k *ar, struct sk_buff *skb) + { + struct ath10k_htt *htt = &ar->htt; + struct htt_resp *resp = (void *)skb->data; +@@ -1835,12 +1825,12 @@ static void ath10k_htt_rx_in_ord_ind(str + u8 tid; + bool offload; + bool frag; +- int ret; ++ int ret, num_msdus = 0; + + lockdep_assert_held(&htt->rx_ring.lock); + + if (htt->rx_confused) +- return; ++ return -EIO; + + skb_pull(skb, sizeof(resp->hdr)); + skb_pull(skb, sizeof(resp->rx_in_ord_ind)); +@@ -1859,7 +1849,7 @@ static void ath10k_htt_rx_in_ord_ind(str + + if (skb->len < msdu_count * sizeof(*resp->rx_in_ord_ind.msdu_descs)) { + ath10k_warn(ar, "dropping invalid in order rx indication\n"); +- return; ++ return -EINVAL; + } + + /* The event can deliver more than 1 A-MSDU. Each A-MSDU is later +@@ -1870,14 +1860,14 @@ static void ath10k_htt_rx_in_ord_ind(str + if (ret < 0) { + ath10k_warn(ar, "failed to pop paddr list: %d\n", ret); + htt->rx_confused = true; +- return; ++ return -EIO; + } + + /* Offloaded frames are very different and need to be handled + * separately. + */ + if (offload) +- ath10k_htt_rx_h_rx_offload(ar, &list); ++ num_msdus = ath10k_htt_rx_h_rx_offload(ar, &list); + + while (!skb_queue_empty(&list)) { + __skb_queue_head_init(&amsdu); +@@ -1890,6 +1880,7 @@ static void ath10k_htt_rx_in_ord_ind(str + * better to report something than nothing though. This + * should still give an idea about rx rate to the user. + */ ++ num_msdus += skb_queue_len(&amsdu); + ath10k_htt_rx_h_ppdu(ar, &amsdu, status, vdev_id); + ath10k_htt_rx_h_filter(ar, &amsdu, status); + ath10k_htt_rx_h_mpdu(ar, &amsdu, status); +@@ -1902,9 +1893,10 @@ static void ath10k_htt_rx_in_ord_ind(str + ath10k_warn(ar, "failed to extract amsdu: %d\n", ret); + htt->rx_confused = true; + __skb_queue_purge(&list); +- return; ++ return -EIO; + } + } ++ return num_msdus; + } + + static void ath10k_htt_rx_tx_fetch_resp_id_confirm(struct ath10k *ar, +@@ -2267,7 +2259,6 @@ bool ath10k_htt_t2h_msg_handler(struct a + } + case HTT_T2H_MSG_TYPE_TX_COMPL_IND: + ath10k_htt_rx_tx_compl_ind(htt->ar, skb); +- tasklet_schedule(&htt->txrx_compl_task); + break; + case HTT_T2H_MSG_TYPE_SEC_IND: { + struct ath10k *ar = htt->ar; +@@ -2284,7 +2275,7 @@ bool ath10k_htt_t2h_msg_handler(struct a + case HTT_T2H_MSG_TYPE_RX_FRAG_IND: { + ath10k_dbg_dump(ar, ATH10K_DBG_HTT_DUMP, NULL, "htt event: ", + skb->data, skb->len); +- ath10k_htt_rx_frag_handler(htt); ++ atomic_inc(&htt->num_mpdus_ready); + break; + } + case HTT_T2H_MSG_TYPE_TEST: +@@ -2322,8 +2313,7 @@ bool ath10k_htt_t2h_msg_handler(struct a + break; + } + case HTT_T2H_MSG_TYPE_RX_IN_ORD_PADDR_IND: { +- skb_queue_tail(&htt->rx_in_ord_compl_q, skb); +- tasklet_schedule(&htt->txrx_compl_task); ++ __skb_queue_tail(&htt->rx_in_ord_compl_q, skb); + return false; + } + case HTT_T2H_MSG_TYPE_TX_CREDIT_UPDATE_IND: +@@ -2349,7 +2339,6 @@ bool ath10k_htt_t2h_msg_handler(struct a + break; + } + skb_queue_tail(&htt->tx_fetch_ind_q, tx_fetch_ind); +- tasklet_schedule(&htt->txrx_compl_task); + break; + } + case HTT_T2H_MSG_TYPE_TX_FETCH_CONFIRM: +@@ -2378,27 +2367,77 @@ void ath10k_htt_rx_pktlog_completion_han + } + EXPORT_SYMBOL(ath10k_htt_rx_pktlog_completion_handler); + +-static void ath10k_htt_txrx_compl_task(unsigned long ptr) ++int ath10k_htt_txrx_compl_task(struct ath10k *ar, int budget) + { +- struct ath10k_htt *htt = (struct ath10k_htt *)ptr; +- struct ath10k *ar = htt->ar; ++ struct ath10k_htt *htt = &ar->htt; + struct htt_tx_done tx_done = {}; +- struct sk_buff_head rx_ind_q; + struct sk_buff_head tx_ind_q; + struct sk_buff *skb; + unsigned long flags; +- int num_mpdus; ++ int quota = 0, done, num_rx_msdus; ++ bool resched_napi = false; + +- __skb_queue_head_init(&rx_ind_q); + __skb_queue_head_init(&tx_ind_q); + +- spin_lock_irqsave(&htt->rx_in_ord_compl_q.lock, flags); +- skb_queue_splice_init(&htt->rx_in_ord_compl_q, &rx_ind_q); +- spin_unlock_irqrestore(&htt->rx_in_ord_compl_q.lock, flags); ++ /* Since in-ord-ind can deliver more than 1 A-MSDU in single event, ++ * process it first to utilize full available quota. ++ */ ++ while (quota < budget) { ++ if (skb_queue_empty(&htt->rx_in_ord_compl_q)) ++ break; + +- spin_lock_irqsave(&htt->tx_fetch_ind_q.lock, flags); +- skb_queue_splice_init(&htt->tx_fetch_ind_q, &tx_ind_q); +- spin_unlock_irqrestore(&htt->tx_fetch_ind_q.lock, flags); ++ skb = __skb_dequeue(&htt->rx_in_ord_compl_q); ++ if (!skb) { ++ resched_napi = true; ++ goto exit; ++ } ++ ++ spin_lock_bh(&htt->rx_ring.lock); ++ num_rx_msdus = ath10k_htt_rx_in_ord_ind(ar, skb); ++ spin_unlock_bh(&htt->rx_ring.lock); ++ if (num_rx_msdus < 0) { ++ resched_napi = true; ++ goto exit; ++ } ++ ++ dev_kfree_skb_any(skb); ++ if (num_rx_msdus > 0) ++ quota += num_rx_msdus; ++ ++ if ((quota > ATH10K_NAPI_QUOTA_LIMIT) && ++ !skb_queue_empty(&htt->rx_in_ord_compl_q)) { ++ resched_napi = true; ++ goto exit; ++ } ++ } ++ ++ while (quota < budget) { ++ /* no more data to receive */ ++ if (!atomic_read(&htt->num_mpdus_ready)) ++ break; ++ ++ num_rx_msdus = ath10k_htt_rx_handle_amsdu(htt); ++ if (num_rx_msdus < 0) { ++ resched_napi = true; ++ goto exit; ++ } ++ ++ quota += num_rx_msdus; ++ atomic_dec(&htt->num_mpdus_ready); ++ if ((quota > ATH10K_NAPI_QUOTA_LIMIT) && ++ atomic_read(&htt->num_mpdus_ready)) { ++ resched_napi = true; ++ goto exit; ++ } ++ } ++ ++ /* From NAPI documentation: ++ * The napi poll() function may also process TX completions, in which ++ * case if it processes the entire TX ring then it should count that ++ * work as the rest of the budget. ++ */ ++ if ((quota < budget) && !kfifo_is_empty(&htt->txdone_fifo)) ++ quota = budget; + + /* kfifo_get: called only within txrx_tasklet so it's neatly serialized. + * From kfifo_get() documentation: +@@ -2408,27 +2447,22 @@ static void ath10k_htt_txrx_compl_task(u + while (kfifo_get(&htt->txdone_fifo, &tx_done)) + ath10k_txrx_tx_unref(htt, &tx_done); + ++ spin_lock_irqsave(&htt->tx_fetch_ind_q.lock, flags); ++ skb_queue_splice_init(&htt->tx_fetch_ind_q, &tx_ind_q); ++ spin_unlock_irqrestore(&htt->tx_fetch_ind_q.lock, flags); ++ + while ((skb = __skb_dequeue(&tx_ind_q))) { + ath10k_htt_rx_tx_fetch_ind(ar, skb); + dev_kfree_skb_any(skb); + } + +- num_mpdus = atomic_read(&htt->num_mpdus_ready); +- +- while (num_mpdus) { +- if (ath10k_htt_rx_handle_amsdu(htt)) +- break; +- +- num_mpdus--; +- atomic_dec(&htt->num_mpdus_ready); +- } +- +- while ((skb = __skb_dequeue(&rx_ind_q))) { +- spin_lock_bh(&htt->rx_ring.lock); +- ath10k_htt_rx_in_ord_ind(ar, skb); +- spin_unlock_bh(&htt->rx_ring.lock); +- dev_kfree_skb_any(skb); +- } +- ++exit: + ath10k_htt_rx_msdu_buff_replenish(htt); ++ /* In case of rx failure or more data to read, report budget ++ * to reschedule NAPI poll ++ */ ++ done = resched_napi ? budget : quota; ++ ++ return done; + } ++EXPORT_SYMBOL(ath10k_htt_txrx_compl_task); +--- a/drivers/net/wireless/ath/ath10k/htt_tx.c ++++ b/drivers/net/wireless/ath/ath10k/htt_tx.c +@@ -388,8 +388,6 @@ void ath10k_htt_tx_free(struct ath10k_ht + { + int size; + +- tasklet_kill(&htt->txrx_compl_task); +- + idr_for_each(&htt->pending_tx, ath10k_htt_tx_clean_up_pending, htt->ar); + idr_destroy(&htt->pending_tx); + +--- a/drivers/net/wireless/ath/ath10k/pci.c ++++ b/drivers/net/wireless/ath/ath10k/pci.c +@@ -1502,12 +1502,10 @@ void ath10k_pci_hif_send_complete_check( + ath10k_ce_per_engine_service(ar, pipe); + } + +-void ath10k_pci_kill_tasklet(struct ath10k *ar) ++static void ath10k_pci_rx_retry_sync(struct ath10k *ar) + { + struct ath10k_pci *ar_pci = ath10k_pci_priv(ar); + +- tasklet_kill(&ar_pci->intr_tq); +- + del_timer_sync(&ar_pci->rx_post_retry); + } + +@@ -1566,7 +1564,7 @@ void ath10k_pci_hif_get_default_pipe(str + ul_pipe, dl_pipe); + } + +-static void ath10k_pci_irq_msi_fw_mask(struct ath10k *ar) ++void ath10k_pci_irq_msi_fw_mask(struct ath10k *ar) + { + u32 val; + +@@ -1747,7 +1745,7 @@ void ath10k_pci_ce_deinit(struct ath10k + + void ath10k_pci_flush(struct ath10k *ar) + { +- ath10k_pci_kill_tasklet(ar); ++ ath10k_pci_rx_retry_sync(ar); + ath10k_pci_buffer_cleanup(ar); + } + +@@ -2754,35 +2752,53 @@ static irqreturn_t ath10k_pci_interrupt_ + return IRQ_NONE; + } + +- if (ar_pci->oper_irq_mode == ATH10K_PCI_IRQ_LEGACY) { +- if (!ath10k_pci_irq_pending(ar)) +- return IRQ_NONE; +- +- ath10k_pci_disable_and_clear_legacy_irq(ar); +- } ++ if ((ar_pci->oper_irq_mode == ATH10K_PCI_IRQ_LEGACY) && ++ !ath10k_pci_irq_pending(ar)) ++ return IRQ_NONE; + +- tasklet_schedule(&ar_pci->intr_tq); ++ ath10k_pci_disable_and_clear_legacy_irq(ar); ++ ath10k_pci_irq_msi_fw_mask(ar); ++ napi_schedule(&ar->napi); + + return IRQ_HANDLED; + } + +-static void ath10k_pci_tasklet(unsigned long data) ++static int ath10k_pci_napi_poll(struct napi_struct *ctx, int budget) + { +- struct ath10k *ar = (struct ath10k *)data; +- struct ath10k_pci *ar_pci = ath10k_pci_priv(ar); ++ struct ath10k *ar = container_of(ctx, struct ath10k, napi); ++ int done = 0; + + if (ath10k_pci_has_fw_crashed(ar)) { +- ath10k_pci_irq_disable(ar); + ath10k_pci_fw_crashed_clear(ar); + ath10k_pci_fw_crashed_dump(ar); +- return; ++ napi_complete(ctx); ++ return done; + } + + ath10k_ce_per_engine_service_any(ar); + +- /* Re-enable legacy irq that was disabled in the irq handler */ +- if (ar_pci->oper_irq_mode == ATH10K_PCI_IRQ_LEGACY) ++ done = ath10k_htt_txrx_compl_task(ar, budget); ++ ++ if (done < budget) { ++ napi_complete(ctx); ++ /* In case of MSI, it is possible that interrupts are received ++ * while NAPI poll is inprogress. So pending interrupts that are ++ * received after processing all copy engine pipes by NAPI poll ++ * will not be handled again. This is causing failure to ++ * complete boot sequence in x86 platform. So before enabling ++ * interrupts safer to check for pending interrupts for ++ * immediate servicing. ++ */ ++ if (CE_INTERRUPT_SUMMARY(ar)) { ++ napi_reschedule(&ar->napi); ++ goto out; ++ } + ath10k_pci_enable_legacy_irq(ar); ++ ath10k_pci_irq_msi_fw_unmask(ar); ++ } ++ ++out: ++ return done; + } + + static int ath10k_pci_request_irq_msi(struct ath10k *ar) +@@ -2840,11 +2856,11 @@ static void ath10k_pci_free_irq(struct a + free_irq(ar_pci->pdev->irq, ar); + } + +-void ath10k_pci_init_irq_tasklets(struct ath10k *ar) ++void ath10k_pci_init_napi(struct ath10k *ar) + { +- struct ath10k_pci *ar_pci = ath10k_pci_priv(ar); +- +- tasklet_init(&ar_pci->intr_tq, ath10k_pci_tasklet, (unsigned long)ar); ++ netif_napi_add(&ar->napi_dev, &ar->napi, ath10k_pci_napi_poll, ++ ATH10K_NAPI_BUDGET); ++ napi_enable(&ar->napi); + } + + static int ath10k_pci_init_irq(struct ath10k *ar) +@@ -2852,7 +2868,7 @@ static int ath10k_pci_init_irq(struct at + struct ath10k_pci *ar_pci = ath10k_pci_priv(ar); + int ret; + +- ath10k_pci_init_irq_tasklets(ar); ++ ath10k_pci_init_napi(ar); + + if (ath10k_pci_irq_mode != ATH10K_PCI_IRQ_AUTO) + ath10k_info(ar, "limiting irq mode to: %d\n", +@@ -3113,7 +3129,8 @@ int ath10k_pci_setup_resource(struct ath + + void ath10k_pci_release_resource(struct ath10k *ar) + { +- ath10k_pci_kill_tasklet(ar); ++ ath10k_pci_rx_retry_sync(ar); ++ netif_napi_del(&ar->napi); + ath10k_pci_ce_deinit(ar); + ath10k_pci_free_pipes(ar); + } +@@ -3274,7 +3291,7 @@ static int ath10k_pci_probe(struct pci_d + + err_free_irq: + ath10k_pci_free_irq(ar); +- ath10k_pci_kill_tasklet(ar); ++ ath10k_pci_rx_retry_sync(ar); + + err_deinit_irq: + ath10k_pci_deinit_irq(ar); +--- a/drivers/net/wireless/ath/ath10k/pci.h ++++ b/drivers/net/wireless/ath/ath10k/pci.h +@@ -177,8 +177,6 @@ struct ath10k_pci { + /* Operating interrupt mode */ + enum ath10k_pci_irq_mode oper_irq_mode; + +- struct tasklet_struct intr_tq; +- + struct ath10k_pci_pipe pipe_info[CE_COUNT_MAX]; + + /* Copy Engine used for Diagnostic Accesses */ +@@ -294,8 +292,7 @@ void ath10k_pci_free_pipes(struct ath10k + void ath10k_pci_free_pipes(struct ath10k *ar); + void ath10k_pci_rx_replenish_retry(unsigned long ptr); + void ath10k_pci_ce_deinit(struct ath10k *ar); +-void ath10k_pci_init_irq_tasklets(struct ath10k *ar); +-void ath10k_pci_kill_tasklet(struct ath10k *ar); ++void ath10k_pci_init_napi(struct ath10k *ar); + int ath10k_pci_init_pipes(struct ath10k *ar); + int ath10k_pci_init_config(struct ath10k *ar); + void ath10k_pci_rx_post(struct ath10k *ar); +@@ -303,6 +300,7 @@ void ath10k_pci_flush(struct ath10k *ar) + void ath10k_pci_enable_legacy_irq(struct ath10k *ar); + bool ath10k_pci_irq_pending(struct ath10k *ar); + void ath10k_pci_disable_and_clear_legacy_irq(struct ath10k *ar); ++void ath10k_pci_irq_msi_fw_mask(struct ath10k *ar); + int ath10k_pci_wait_for_target_init(struct ath10k *ar); + int ath10k_pci_setup_resource(struct ath10k *ar); + void ath10k_pci_release_resource(struct ath10k *ar);