iwlwifi: continue clean up - pcie/tx.c
authorEmmanuel Grumbach <emmanuel.grumbach@intel.com>
Wed, 14 Nov 2012 12:44:18 +0000 (14:44 +0200)
committerJohannes Berg <johannes.berg@intel.com>
Mon, 19 Nov 2012 14:04:20 +0000 (15:04 +0100)
Rename static functions. Function moved from trans.c to
tx.c. A few could be made static, others had to be exported.

Functions that implement the transport API are prefixed by
iwl_trans_pcie_, the others by iwl_pcie_.

Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
drivers/net/wireless/iwlwifi/pcie/internal.h
drivers/net/wireless/iwlwifi/pcie/trans.c
drivers/net/wireless/iwlwifi/pcie/tx.c

index a71a78237b62f37c0c3c312e386d54ca451f42f5..0d61e91c0a6f7ec66f1f49d0588e3ba12fea8831 100644 (file)
@@ -315,6 +315,10 @@ iwl_trans_pcie_get_trans(struct iwl_trans_pcie *trans_pcie)
                            trans_specific);
 }
 
+/*
+ * Convention: trans API functions: iwl_trans_pcie_XXX
+ *     Other functions: iwl_pcie_XXX
+ */
 struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev,
                                       const struct pci_device_id *ent,
                                       const struct iwl_cfg *cfg);
@@ -341,25 +345,21 @@ void iwl_pcie_disable_ict(struct iwl_trans *trans);
 /*****************************************************
 * TX / HCMD
 ******************************************************/
+int iwl_pcie_tx_init(struct iwl_trans *trans);
+void iwl_pcie_tx_start(struct iwl_trans *trans, u32 scd_base_addr);
+int iwl_pcie_tx_stop(struct iwl_trans *trans);
+void iwl_pcie_tx_free(struct iwl_trans *trans);
+void iwl_trans_pcie_txq_enable(struct iwl_trans *trans, int txq_id, int fifo,
+                              int sta_id, int tid, int frame_limit, u16 ssn);
+void iwl_trans_pcie_txq_disable(struct iwl_trans *trans, int queue);
+int iwl_trans_pcie_tx(struct iwl_trans *trans, struct sk_buff *skb,
+                     struct iwl_device_cmd *dev_cmd, int txq_id);
 void iwl_pcie_txq_inc_wr_ptr(struct iwl_trans *trans, struct iwl_txq *txq);
-int iwl_pcie_tx_build_tfd(struct iwl_trans *trans, struct iwl_txq *txq,
-                         dma_addr_t addr, u16 len, u8 reset);
-int iwl_pcie_send_cmd(struct iwl_trans *trans, struct iwl_host_cmd *cmd);
+int iwl_trans_pcie_send_hcmd(struct iwl_trans *trans, struct iwl_host_cmd *cmd);
 void iwl_pcie_hcmd_complete(struct iwl_trans *trans,
                            struct iwl_rx_cmd_buffer *rxb, int handler_status);
-void iwl_pcie_txq_update_byte_cnt_tbl(struct iwl_trans *trans,
-                                     struct iwl_txq *txq, u16 byte_cnt);
-void iwl_pcie_txq_enable(struct iwl_trans *trans, int txq_id, int fifo,
-                        int sta_id, int tid, int frame_limit, u16 ssn);
-void iwl_pcie_txq_disable(struct iwl_trans *trans, int queue);
-void iwl_pcie_txq_free_tfd(struct iwl_trans *trans, struct iwl_txq *txq,
-                          enum dma_data_direction dma_dir);
-int iwl_pcie_txq_reclaim(struct iwl_trans *trans, int txq_id, int index,
-                        struct sk_buff_head *skbs);
-void iwl_pcie_txq_unmap(struct iwl_trans *trans, int txq_id);
-int iwl_queue_init(struct iwl_queue *q, int count, int slots_num, u32 id);
-int iwl_queue_space(const struct iwl_queue *q);
-
+void iwl_trans_pcie_reclaim(struct iwl_trans *trans, int txq_id, int ssn,
+                           struct sk_buff_head *skbs);
 /*****************************************************
 * Error handling
 ******************************************************/
index 8a5b5af968ad3a6412db05ea7da0ad2a217fdaa4..19c11e3b548118d6dea86dfe33aad0e3a012a024 100644 (file)
 #include "iwl-prph.h"
 #include "iwl-agn-hw.h"
 #include "internal.h"
-/* FIXME: need to abstract out TX command (once we know what it looks like) */
-#include "dvm/commands.h"
-
-#define SCD_QUEUECHAIN_SEL_ALL(trans, trans_pcie)      \
-       (((1<<trans->cfg->base_params->num_of_queues) - 1) &\
-       (~(1<<(trans_pcie)->cmd_queue)))
-
-static int iwlagn_alloc_dma_ptr(struct iwl_trans *trans,
-                               struct iwl_dma_ptr *ptr, size_t size)
-{
-       if (WARN_ON(ptr->addr))
-               return -EINVAL;
-
-       ptr->addr = dma_alloc_coherent(trans->dev, size,
-                                      &ptr->dma, GFP_KERNEL);
-       if (!ptr->addr)
-               return -ENOMEM;
-       ptr->size = size;
-       return 0;
-}
-
-static void iwlagn_free_dma_ptr(struct iwl_trans *trans,
-                               struct iwl_dma_ptr *ptr)
-{
-       if (unlikely(!ptr->addr))
-               return;
-
-       dma_free_coherent(trans->dev, ptr->size, ptr->addr, ptr->dma);
-       memset(ptr, 0, sizeof(*ptr));
-}
-
-static void iwl_trans_pcie_queue_stuck_timer(unsigned long data)
-{
-       struct iwl_txq *txq = (void *)data;
-       struct iwl_queue *q = &txq->q;
-       struct iwl_trans_pcie *trans_pcie = txq->trans_pcie;
-       struct iwl_trans *trans = iwl_trans_pcie_get_trans(trans_pcie);
-       u32 scd_sram_addr = trans_pcie->scd_base_addr +
-                               SCD_TX_STTS_QUEUE_OFFSET(txq->q.id);
-       u8 buf[16];
-       int i;
-
-       spin_lock(&txq->lock);
-       /* check if triggered erroneously */
-       if (txq->q.read_ptr == txq->q.write_ptr) {
-               spin_unlock(&txq->lock);
-               return;
-       }
-       spin_unlock(&txq->lock);
-
-       IWL_ERR(trans, "Queue %d stuck for %u ms.\n", txq->q.id,
-               jiffies_to_msecs(trans_pcie->wd_timeout));
-       IWL_ERR(trans, "Current SW read_ptr %d write_ptr %d\n",
-               txq->q.read_ptr, txq->q.write_ptr);
-
-       iwl_read_targ_mem_bytes(trans, scd_sram_addr, buf, sizeof(buf));
-
-       iwl_print_hex_error(trans, buf, sizeof(buf));
-
-       for (i = 0; i < FH_TCSR_CHNL_NUM; i++)
-               IWL_ERR(trans, "FH TRBs(%d) = 0x%08x\n", i,
-                       iwl_read_direct32(trans, FH_TX_TRB_REG(i)));
-
-       for (i = 0; i < trans->cfg->base_params->num_of_queues; i++) {
-               u32 status = iwl_read_prph(trans, SCD_QUEUE_STATUS_BITS(i));
-               u8 fifo = (status >> SCD_QUEUE_STTS_REG_POS_TXF) & 0x7;
-               bool active = !!(status & BIT(SCD_QUEUE_STTS_REG_POS_ACTIVE));
-               u32 tbl_dw =
-                       iwl_read_targ_mem(trans,
-                                         trans_pcie->scd_base_addr +
-                                         SCD_TRANS_TBL_OFFSET_QUEUE(i));
-
-               if (i & 0x1)
-                       tbl_dw = (tbl_dw & 0xFFFF0000) >> 16;
-               else
-                       tbl_dw = tbl_dw & 0x0000FFFF;
-
-               IWL_ERR(trans,
-                       "Q %d is %sactive and mapped to fifo %d ra_tid 0x%04x [%d,%d]\n",
-                       i, active ? "" : "in", fifo, tbl_dw,
-                       iwl_read_prph(trans,
-                                     SCD_QUEUE_RDPTR(i)) & (txq->q.n_bd - 1),
-                       iwl_read_prph(trans, SCD_QUEUE_WRPTR(i)));
-       }
-
-       for (i = q->read_ptr; i != q->write_ptr;
-            i = iwl_queue_inc_wrap(i, q->n_bd)) {
-               struct iwl_tx_cmd *tx_cmd =
-                       (struct iwl_tx_cmd *)txq->entries[i].cmd->payload;
-               IWL_ERR(trans, "scratch %d = 0x%08x\n", i,
-                       get_unaligned_le32(&tx_cmd->scratch));
-       }
-
-       iwl_op_mode_nic_error(trans->op_mode);
-}
-
-static int iwl_trans_txq_alloc(struct iwl_trans *trans,
-                              struct iwl_txq *txq, int slots_num,
-                              u32 txq_id)
-{
-       struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
-       size_t tfd_sz = sizeof(struct iwl_tfd) * TFD_QUEUE_SIZE_MAX;
-       int i;
-
-       if (WARN_ON(txq->entries || txq->tfds))
-               return -EINVAL;
-
-       setup_timer(&txq->stuck_timer, iwl_trans_pcie_queue_stuck_timer,
-                   (unsigned long)txq);
-       txq->trans_pcie = trans_pcie;
-
-       txq->q.n_window = slots_num;
-
-       txq->entries = kcalloc(slots_num,
-                              sizeof(struct iwl_pcie_txq_entry),
-                              GFP_KERNEL);
-
-       if (!txq->entries)
-               goto error;
-
-       if (txq_id == trans_pcie->cmd_queue)
-               for (i = 0; i < slots_num; i++) {
-                       txq->entries[i].cmd =
-                               kmalloc(sizeof(struct iwl_device_cmd),
-                                       GFP_KERNEL);
-                       if (!txq->entries[i].cmd)
-                               goto error;
-               }
-
-       /* Circular buffer of transmit frame descriptors (TFDs),
-        * shared with device */
-       txq->tfds = dma_alloc_coherent(trans->dev, tfd_sz,
-                                      &txq->q.dma_addr, GFP_KERNEL);
-       if (!txq->tfds) {
-               IWL_ERR(trans, "dma_alloc_coherent(%zd) failed\n", tfd_sz);
-               goto error;
-       }
-       txq->q.id = txq_id;
-
-       return 0;
-error:
-       if (txq->entries && txq_id == trans_pcie->cmd_queue)
-               for (i = 0; i < slots_num; i++)
-                       kfree(txq->entries[i].cmd);
-       kfree(txq->entries);
-       txq->entries = NULL;
-
-       return -ENOMEM;
-
-}
-
-static int iwl_trans_txq_init(struct iwl_trans *trans, struct iwl_txq *txq,
-                             int slots_num, u32 txq_id)
-{
-       int ret;
-
-       txq->need_update = 0;
-
-       /* TFD_QUEUE_SIZE_MAX must be power-of-two size, otherwise
-        * iwl_queue_inc_wrap and iwl_queue_dec_wrap are broken. */
-       BUILD_BUG_ON(TFD_QUEUE_SIZE_MAX & (TFD_QUEUE_SIZE_MAX - 1));
-
-       /* Initialize queue's high/low-water marks, and head/tail indexes */
-       ret = iwl_queue_init(&txq->q, TFD_QUEUE_SIZE_MAX, slots_num,
-                       txq_id);
-       if (ret)
-               return ret;
-
-       spin_lock_init(&txq->lock);
-
-       /*
-        * Tell nic where to find circular buffer of Tx Frame Descriptors for
-        * given Tx queue, and enable the DMA channel used for that queue.
-        * Circular buffer (TFD queue in DRAM) physical base address */
-       iwl_write_direct32(trans, FH_MEM_CBBC_QUEUE(txq_id),
-                            txq->q.dma_addr >> 8);
-
-       return 0;
-}
-
-/*
- * iwl_pcie_txq_unmap -  Unmap any remaining DMA mappings and free skb's
- */
-void iwl_pcie_txq_unmap(struct iwl_trans *trans, int txq_id)
-{
-       struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
-       struct iwl_txq *txq = &trans_pcie->txq[txq_id];
-       struct iwl_queue *q = &txq->q;
-       enum dma_data_direction dma_dir;
-
-       if (!q->n_bd)
-               return;
-
-       /* In the command queue, all the TBs are mapped as BIDI
-        * so unmap them as such.
-        */
-       if (txq_id == trans_pcie->cmd_queue)
-               dma_dir = DMA_BIDIRECTIONAL;
-       else
-               dma_dir = DMA_TO_DEVICE;
-
-       spin_lock_bh(&txq->lock);
-       while (q->write_ptr != q->read_ptr) {
-               iwl_pcie_txq_free_tfd(trans, txq, dma_dir);
-               q->read_ptr = iwl_queue_inc_wrap(q->read_ptr, q->n_bd);
-       }
-       spin_unlock_bh(&txq->lock);
-}
-
-/*
- * iwl_txq_free - Deallocate DMA queue.
- * @txq: Transmit queue to deallocate.
- *
- * Empty queue by removing and destroying all BD's.
- * Free all buffers.
- * 0-fill, but do not free "txq" descriptor structure.
- */
-static void iwl_txq_free(struct iwl_trans *trans, int txq_id)
-{
-       struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
-       struct iwl_txq *txq = &trans_pcie->txq[txq_id];
-       struct device *dev = trans->dev;
-       int i;
-
-       if (WARN_ON(!txq))
-               return;
-
-       iwl_pcie_txq_unmap(trans, txq_id);
-
-       /* De-alloc array of command/tx buffers */
-       if (txq_id == trans_pcie->cmd_queue)
-               for (i = 0; i < txq->q.n_window; i++) {
-                       kfree(txq->entries[i].cmd);
-                       kfree(txq->entries[i].copy_cmd);
-                       kfree(txq->entries[i].free_buf);
-               }
-
-       /* De-alloc circular buffer of TFDs */
-       if (txq->q.n_bd) {
-               dma_free_coherent(dev, sizeof(struct iwl_tfd) *
-                                 txq->q.n_bd, txq->tfds, txq->q.dma_addr);
-               memset(&txq->q.dma_addr, 0, sizeof(txq->q.dma_addr));
-       }
-
-       kfree(txq->entries);
-       txq->entries = NULL;
-
-       del_timer_sync(&txq->stuck_timer);
-
-       /* 0-fill queue descriptor structure */
-       memset(txq, 0, sizeof(*txq));
-}
-
-/*
- * iwl_trans_tx_free - Free TXQ Context
- *
- * Destroy all TX DMA queues and structures
- */
-static void iwl_trans_pcie_tx_free(struct iwl_trans *trans)
-{
-       int txq_id;
-       struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
-
-       /* Tx queues */
-       if (trans_pcie->txq) {
-               for (txq_id = 0;
-                    txq_id < trans->cfg->base_params->num_of_queues; txq_id++)
-                       iwl_txq_free(trans, txq_id);
-       }
-
-       kfree(trans_pcie->txq);
-       trans_pcie->txq = NULL;
-
-       iwlagn_free_dma_ptr(trans, &trans_pcie->kw);
-
-       iwlagn_free_dma_ptr(trans, &trans_pcie->scd_bc_tbls);
-}
-
-/*
- * iwl_trans_tx_alloc - allocate TX context
- * Allocate all Tx DMA structures and initialize them
- */
-static int iwl_trans_tx_alloc(struct iwl_trans *trans)
-{
-       int ret;
-       int txq_id, slots_num;
-       struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
-
-       u16 scd_bc_tbls_size = trans->cfg->base_params->num_of_queues *
-                       sizeof(struct iwlagn_scd_bc_tbl);
-
-       /*It is not allowed to alloc twice, so warn when this happens.
-        * We cannot rely on the previous allocation, so free and fail */
-       if (WARN_ON(trans_pcie->txq)) {
-               ret = -EINVAL;
-               goto error;
-       }
-
-       ret = iwlagn_alloc_dma_ptr(trans, &trans_pcie->scd_bc_tbls,
-                                  scd_bc_tbls_size);
-       if (ret) {
-               IWL_ERR(trans, "Scheduler BC Table allocation failed\n");
-               goto error;
-       }
-
-       /* Alloc keep-warm buffer */
-       ret = iwlagn_alloc_dma_ptr(trans, &trans_pcie->kw, IWL_KW_SIZE);
-       if (ret) {
-               IWL_ERR(trans, "Keep Warm allocation failed\n");
-               goto error;
-       }
-
-       trans_pcie->txq = kcalloc(trans->cfg->base_params->num_of_queues,
-                                 sizeof(struct iwl_txq), GFP_KERNEL);
-       if (!trans_pcie->txq) {
-               IWL_ERR(trans, "Not enough memory for txq\n");
-               ret = ENOMEM;
-               goto error;
-       }
-
-       /* Alloc and init all Tx queues, including the command queue (#4/#9) */
-       for (txq_id = 0; txq_id < trans->cfg->base_params->num_of_queues;
-            txq_id++) {
-               slots_num = (txq_id == trans_pcie->cmd_queue) ?
-                                       TFD_CMD_SLOTS : TFD_TX_CMD_SLOTS;
-               ret = iwl_trans_txq_alloc(trans, &trans_pcie->txq[txq_id],
-                                         slots_num, txq_id);
-               if (ret) {
-                       IWL_ERR(trans, "Tx %d queue alloc failed\n", txq_id);
-                       goto error;
-               }
-       }
-
-       return 0;
-
-error:
-       iwl_trans_pcie_tx_free(trans);
-
-       return ret;
-}
-static int iwl_tx_init(struct iwl_trans *trans)
-{
-       struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
-       int ret;
-       int txq_id, slots_num;
-       unsigned long flags;
-       bool alloc = false;
-
-       if (!trans_pcie->txq) {
-               ret = iwl_trans_tx_alloc(trans);
-               if (ret)
-                       goto error;
-               alloc = true;
-       }
-
-       spin_lock_irqsave(&trans_pcie->irq_lock, flags);
-
-       /* Turn off all Tx DMA fifos */
-       iwl_write_prph(trans, SCD_TXFACT, 0);
-
-       /* Tell NIC where to find the "keep warm" buffer */
-       iwl_write_direct32(trans, FH_KW_MEM_ADDR_REG,
-                          trans_pcie->kw.dma >> 4);
-
-       spin_unlock_irqrestore(&trans_pcie->irq_lock, flags);
-
-       /* Alloc and init all Tx queues, including the command queue (#4/#9) */
-       for (txq_id = 0; txq_id < trans->cfg->base_params->num_of_queues;
-            txq_id++) {
-               slots_num = (txq_id == trans_pcie->cmd_queue) ?
-                                       TFD_CMD_SLOTS : TFD_TX_CMD_SLOTS;
-               ret = iwl_trans_txq_init(trans, &trans_pcie->txq[txq_id],
-                                        slots_num, txq_id);
-               if (ret) {
-                       IWL_ERR(trans, "Tx %d queue init failed\n", txq_id);
-                       goto error;
-               }
-       }
-
-       return 0;
-error:
-       /*Upon error, free only if we allocated something */
-       if (alloc)
-               iwl_trans_pcie_tx_free(trans);
-       return ret;
-}
 
 static void iwl_pcie_set_pwr_vmain(struct iwl_trans *trans)
 {
@@ -659,7 +273,7 @@ static int iwl_pcie_nic_init(struct iwl_trans *trans)
        iwl_pcie_rx_init(trans);
 
        /* Allocate or reset and init all Tx and Command queues */
-       if (iwl_tx_init(trans))
+       if (iwl_pcie_tx_init(trans))
                return -ENOMEM;
 
        if (trans->cfg->base_params->shadow_reg_enable) {
@@ -874,126 +488,10 @@ static int iwl_trans_pcie_start_fw(struct iwl_trans *trans,
        return iwl_pcie_load_given_ucode(trans, fw);
 }
 
-/*
- * Activate/Deactivate Tx DMA/FIFO channels according tx fifos mask
- */
-static void iwl_trans_txq_set_sched(struct iwl_trans *trans, u32 mask)
-{
-       struct iwl_trans_pcie __maybe_unused *trans_pcie =
-               IWL_TRANS_GET_PCIE_TRANS(trans);
-
-       iwl_write_prph(trans, SCD_TXFACT, mask);
-}
-
-static void iwl_tx_start(struct iwl_trans *trans, u32 scd_base_addr)
-{
-       struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
-       u32 a;
-       int chan;
-       u32 reg_val;
-
-       /* make sure all queue are not stopped/used */
-       memset(trans_pcie->queue_stopped, 0, sizeof(trans_pcie->queue_stopped));
-       memset(trans_pcie->queue_used, 0, sizeof(trans_pcie->queue_used));
-
-       trans_pcie->scd_base_addr =
-               iwl_read_prph(trans, SCD_SRAM_BASE_ADDR);
-
-       WARN_ON(scd_base_addr != 0 &&
-               scd_base_addr != trans_pcie->scd_base_addr);
-
-       a = trans_pcie->scd_base_addr + SCD_CONTEXT_MEM_LOWER_BOUND;
-       /* reset conext data memory */
-       for (; a < trans_pcie->scd_base_addr + SCD_CONTEXT_MEM_UPPER_BOUND;
-               a += 4)
-               iwl_write_targ_mem(trans, a, 0);
-       /* reset tx status memory */
-       for (; a < trans_pcie->scd_base_addr + SCD_TX_STTS_MEM_UPPER_BOUND;
-               a += 4)
-               iwl_write_targ_mem(trans, a, 0);
-       for (; a < trans_pcie->scd_base_addr +
-              SCD_TRANS_TBL_OFFSET_QUEUE(
-                               trans->cfg->base_params->num_of_queues);
-              a += 4)
-               iwl_write_targ_mem(trans, a, 0);
-
-       iwl_write_prph(trans, SCD_DRAM_BASE_ADDR,
-                      trans_pcie->scd_bc_tbls.dma >> 10);
-
-       /* The chain extension of the SCD doesn't work well. This feature is
-        * enabled by default by the HW, so we need to disable it manually.
-        */
-       iwl_write_prph(trans, SCD_CHAINEXT_EN, 0);
-
-       iwl_trans_ac_txq_enable(trans, trans_pcie->cmd_queue,
-                               trans_pcie->cmd_fifo);
-
-       /* Activate all Tx DMA/FIFO channels */
-       iwl_trans_txq_set_sched(trans, IWL_MASK(0, 7));
-
-       /* Enable DMA channel */
-       for (chan = 0; chan < FH_TCSR_CHNL_NUM ; chan++)
-               iwl_write_direct32(trans, FH_TCSR_CHNL_TX_CONFIG_REG(chan),
-                                  FH_TCSR_TX_CONFIG_REG_VAL_DMA_CHNL_ENABLE |
-                                  FH_TCSR_TX_CONFIG_REG_VAL_DMA_CREDIT_ENABLE);
-
-       /* Update FH chicken bits */
-       reg_val = iwl_read_direct32(trans, FH_TX_CHICKEN_BITS_REG);
-       iwl_write_direct32(trans, FH_TX_CHICKEN_BITS_REG,
-                          reg_val | FH_TX_CHICKEN_BITS_SCD_AUTO_RETRY_EN);
-
-       /* Enable L1-Active */
-       iwl_clear_bits_prph(trans, APMG_PCIDEV_STT_REG,
-                           APMG_PCIDEV_STT_VAL_L1_ACT_DIS);
-}
-
 static void iwl_trans_pcie_fw_alive(struct iwl_trans *trans, u32 scd_addr)
 {
        iwl_pcie_reset_ict(trans);
-       iwl_tx_start(trans, scd_addr);
-}
-
-/*
- * iwlagn_txq_ctx_stop - Stop all Tx DMA channels
- */
-static int iwl_trans_tx_stop(struct iwl_trans *trans)
-{
-       struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
-       int ch, txq_id, ret;
-       unsigned long flags;
-
-       /* Turn off all Tx DMA fifos */
-       spin_lock_irqsave(&trans_pcie->irq_lock, flags);
-
-       iwl_trans_txq_set_sched(trans, 0);
-
-       /* Stop each Tx DMA channel, and wait for it to be idle */
-       for (ch = 0; ch < FH_TCSR_CHNL_NUM; ch++) {
-               iwl_write_direct32(trans,
-                                  FH_TCSR_CHNL_TX_CONFIG_REG(ch), 0x0);
-               ret = iwl_poll_direct_bit(trans, FH_TSSR_TX_STATUS_REG,
-                       FH_TSSR_TX_STATUS_REG_MSK_CHNL_IDLE(ch), 1000);
-               if (ret < 0)
-                       IWL_ERR(trans,
-                               "Failing on timeout while stopping DMA channel %d [0x%08x]\n",
-                               ch,
-                               iwl_read_direct32(trans,
-                                                 FH_TSSR_TX_STATUS_REG));
-       }
-       spin_unlock_irqrestore(&trans_pcie->irq_lock, flags);
-
-       if (!trans_pcie->txq) {
-               IWL_WARN(trans,
-                        "Stopping tx queues that aren't allocated...\n");
-               return 0;
-       }
-
-       /* Unmap DMA from host system and free skb's */
-       for (txq_id = 0; txq_id < trans->cfg->base_params->num_of_queues;
-            txq_id++)
-               iwl_pcie_txq_unmap(trans, txq_id);
-
-       return 0;
+       iwl_pcie_tx_start(trans, scd_addr);
 }
 
 static void iwl_trans_pcie_stop_device(struct iwl_trans *trans)
@@ -1017,7 +515,7 @@ static void iwl_trans_pcie_stop_device(struct iwl_trans *trans)
         * already dead.
         */
        if (test_bit(STATUS_DEVICE_ENABLED, &trans_pcie->status)) {
-               iwl_trans_tx_stop(trans);
+               iwl_pcie_tx_stop(trans);
                iwl_pcie_rx_stop(trans);
 
                /* Power-down device's busmaster DMA clocks */
@@ -1070,170 +568,6 @@ static void iwl_trans_pcie_wowlan_suspend(struct iwl_trans *trans)
                      CSR_GP_CNTRL_REG_FLAG_MAC_ACCESS_REQ);
 }
 
-static int iwl_trans_pcie_tx(struct iwl_trans *trans, struct sk_buff *skb,
-                            struct iwl_device_cmd *dev_cmd, int txq_id)
-{
-       struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
-       struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
-       struct iwl_tx_cmd *tx_cmd = (struct iwl_tx_cmd *) dev_cmd->payload;
-       struct iwl_cmd_meta *out_meta;
-       struct iwl_txq *txq;
-       struct iwl_queue *q;
-       dma_addr_t phys_addr = 0;
-       dma_addr_t txcmd_phys;
-       dma_addr_t scratch_phys;
-       u16 len, firstlen, secondlen;
-       u8 wait_write_ptr = 0;
-       __le16 fc = hdr->frame_control;
-       u8 hdr_len = ieee80211_hdrlen(fc);
-       u16 __maybe_unused wifi_seq;
-
-       txq = &trans_pcie->txq[txq_id];
-       q = &txq->q;
-
-       if (unlikely(!test_bit(txq_id, trans_pcie->queue_used))) {
-               WARN_ON_ONCE(1);
-               return -EINVAL;
-       }
-
-       spin_lock(&txq->lock);
-
-       /* In AGG mode, the index in the ring must correspond to the WiFi
-        * sequence number. This is a HW requirements to help the SCD to parse
-        * the BA.
-        * Check here that the packets are in the right place on the ring.
-        */
-#ifdef CONFIG_IWLWIFI_DEBUG
-       wifi_seq = SEQ_TO_SN(le16_to_cpu(hdr->seq_ctrl));
-       WARN_ONCE((iwl_read_prph(trans, SCD_AGGR_SEL) & BIT(txq_id)) &&
-                 ((wifi_seq & 0xff) != q->write_ptr),
-                 "Q: %d WiFi Seq %d tfdNum %d",
-                 txq_id, wifi_seq, q->write_ptr);
-#endif
-
-       /* Set up driver data for this TFD */
-       txq->entries[q->write_ptr].skb = skb;
-       txq->entries[q->write_ptr].cmd = dev_cmd;
-
-       dev_cmd->hdr.cmd = REPLY_TX;
-       dev_cmd->hdr.sequence =
-               cpu_to_le16((u16)(QUEUE_TO_SEQ(txq_id) |
-                           INDEX_TO_SEQ(q->write_ptr)));
-
-       /* Set up first empty entry in queue's array of Tx/cmd buffers */
-       out_meta = &txq->entries[q->write_ptr].meta;
-
-       /*
-        * Use the first empty entry in this queue's command buffer array
-        * to contain the Tx command and MAC header concatenated together
-        * (payload data will be in another buffer).
-        * Size of this varies, due to varying MAC header length.
-        * If end is not dword aligned, we'll have 2 extra bytes at the end
-        * of the MAC header (device reads on dword boundaries).
-        * We'll tell device about this padding later.
-        */
-       len = sizeof(struct iwl_tx_cmd) +
-               sizeof(struct iwl_cmd_header) + hdr_len;
-       firstlen = (len + 3) & ~3;
-
-       /* Tell NIC about any 2-byte padding after MAC header */
-       if (firstlen != len)
-               tx_cmd->tx_flags |= TX_CMD_FLG_MH_PAD_MSK;
-
-       /* Physical address of this Tx command's header (not MAC header!),
-        * within command buffer array. */
-       txcmd_phys = dma_map_single(trans->dev,
-                                   &dev_cmd->hdr, firstlen,
-                                   DMA_BIDIRECTIONAL);
-       if (unlikely(dma_mapping_error(trans->dev, txcmd_phys)))
-               goto out_err;
-       dma_unmap_addr_set(out_meta, mapping, txcmd_phys);
-       dma_unmap_len_set(out_meta, len, firstlen);
-
-       if (!ieee80211_has_morefrags(fc)) {
-               txq->need_update = 1;
-       } else {
-               wait_write_ptr = 1;
-               txq->need_update = 0;
-       }
-
-       /* Set up TFD's 2nd entry to point directly to remainder of skb,
-        * if any (802.11 null frames have no payload). */
-       secondlen = skb->len - hdr_len;
-       if (secondlen > 0) {
-               phys_addr = dma_map_single(trans->dev, skb->data + hdr_len,
-                                          secondlen, DMA_TO_DEVICE);
-               if (unlikely(dma_mapping_error(trans->dev, phys_addr))) {
-                       dma_unmap_single(trans->dev,
-                                        dma_unmap_addr(out_meta, mapping),
-                                        dma_unmap_len(out_meta, len),
-                                        DMA_BIDIRECTIONAL);
-                       goto out_err;
-               }
-       }
-
-       /* Attach buffers to TFD */
-       iwl_pcie_tx_build_tfd(trans, txq, txcmd_phys, firstlen, 1);
-       if (secondlen > 0)
-               iwl_pcie_tx_build_tfd(trans, txq, phys_addr, secondlen, 0);
-
-       scratch_phys = txcmd_phys + sizeof(struct iwl_cmd_header) +
-                               offsetof(struct iwl_tx_cmd, scratch);
-
-       /* take back ownership of DMA buffer to enable update */
-       dma_sync_single_for_cpu(trans->dev, txcmd_phys, firstlen,
-                               DMA_BIDIRECTIONAL);
-       tx_cmd->dram_lsb_ptr = cpu_to_le32(scratch_phys);
-       tx_cmd->dram_msb_ptr = iwl_get_dma_hi_addr(scratch_phys);
-
-       IWL_DEBUG_TX(trans, "sequence nr = 0X%x\n",
-                    le16_to_cpu(dev_cmd->hdr.sequence));
-       IWL_DEBUG_TX(trans, "tx_flags = 0X%x\n", le32_to_cpu(tx_cmd->tx_flags));
-
-       /* Set up entry for this TFD in Tx byte-count array */
-       iwl_pcie_txq_update_byte_cnt_tbl(trans, txq, le16_to_cpu(tx_cmd->len));
-
-       dma_sync_single_for_device(trans->dev, txcmd_phys, firstlen,
-                                  DMA_BIDIRECTIONAL);
-
-       trace_iwlwifi_dev_tx(trans->dev, skb,
-                            &txq->tfds[txq->q.write_ptr],
-                            sizeof(struct iwl_tfd),
-                            &dev_cmd->hdr, firstlen,
-                            skb->data + hdr_len, secondlen);
-       trace_iwlwifi_dev_tx_data(trans->dev, skb,
-                                 skb->data + hdr_len, secondlen);
-
-       /* start timer if queue currently empty */
-       if (txq->need_update && q->read_ptr == q->write_ptr &&
-           trans_pcie->wd_timeout)
-               mod_timer(&txq->stuck_timer, jiffies + trans_pcie->wd_timeout);
-
-       /* Tell device the write index *just past* this latest filled TFD */
-       q->write_ptr = iwl_queue_inc_wrap(q->write_ptr, q->n_bd);
-       iwl_pcie_txq_inc_wr_ptr(trans, txq);
-
-       /*
-        * At this point the frame is "transmitted" successfully
-        * and we will get a TX status notification eventually,
-        * regardless of the value of ret. "ret" only indicates
-        * whether or not we should update the write pointer.
-        */
-       if (iwl_queue_space(q) < q->high_mark) {
-               if (wait_write_ptr) {
-                       txq->need_update = 1;
-                       iwl_pcie_txq_inc_wr_ptr(trans, txq);
-               } else {
-                       iwl_stop_queue(trans, txq);
-               }
-       }
-       spin_unlock(&txq->lock);
-       return 0;
- out_err:
-       spin_unlock(&txq->lock);
-       return -1;
-}
-
 static int iwl_trans_pcie_start_hw(struct iwl_trans *trans)
 {
        struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
@@ -1319,27 +653,6 @@ static void iwl_trans_pcie_stop_hw(struct iwl_trans *trans,
        }
 }
 
-static void iwl_trans_pcie_reclaim(struct iwl_trans *trans, int txq_id, int ssn,
-                                  struct sk_buff_head *skbs)
-{
-       struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
-       struct iwl_txq *txq = &trans_pcie->txq[txq_id];
-       /* n_bd is usually 256 => n_bd - 1 = 0xff */
-       int tfd_num = ssn & (txq->q.n_bd - 1);
-
-       spin_lock(&txq->lock);
-
-       if (txq->q.read_ptr != tfd_num) {
-               IWL_DEBUG_TX_REPLY(trans, "[Q %d] %d -> %d (%d)\n",
-                                  txq_id, txq->q.read_ptr, tfd_num, ssn);
-               iwl_pcie_txq_reclaim(trans, txq_id, tfd_num, skbs);
-               if (iwl_queue_space(&txq->q) > txq->q.low_mark)
-                       iwl_wake_queue(trans, txq);
-       }
-
-       spin_unlock(&txq->lock);
-}
-
 static void iwl_trans_pcie_write8(struct iwl_trans *trans, u32 ofs, u8 val)
 {
        writeb(val, IWL_TRANS_GET_PCIE_TRANS(trans)->hw_base + ofs);
@@ -1386,7 +699,7 @@ void iwl_trans_pcie_free(struct iwl_trans *trans)
 {
        struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
 
-       iwl_trans_pcie_tx_free(trans);
+       iwl_pcie_tx_free(trans);
        iwl_pcie_rx_free(trans);
 
        if (trans_pcie->irq_requested == true) {
@@ -1892,13 +1205,13 @@ static const struct iwl_trans_ops trans_ops_pcie = {
 
        .wowlan_suspend = iwl_trans_pcie_wowlan_suspend,
 
-       .send_cmd = iwl_pcie_send_cmd,
+       .send_cmd = iwl_trans_pcie_send_hcmd,
 
        .tx = iwl_trans_pcie_tx,
        .reclaim = iwl_trans_pcie_reclaim,
 
-       .txq_disable = iwl_pcie_txq_disable,
-       .txq_enable = iwl_pcie_txq_enable,
+       .txq_disable = iwl_trans_pcie_txq_disable,
+       .txq_enable = iwl_trans_pcie_txq_enable,
 
        .dbgfs_register = iwl_trans_pcie_dbgfs_register,
 
index eac0481a9c71000020a80e76c43f7bd5c5cbf2ea..4c03b8288c58ba04cadb9a295381d1e3628496a4 100644 (file)
 #define IWL_TX_CRC_SIZE 4
 #define IWL_TX_DELIMITER_SIZE 4
 
+/*************** DMA-QUEUE-GENERAL-FUNCTIONS  *****
+ * DMA services
+ *
+ * Theory of operation
+ *
+ * A Tx or Rx queue resides in host DRAM, and is comprised of a circular buffer
+ * of buffer descriptors, each of which points to one or more data buffers for
+ * the device to read from or fill.  Driver and device exchange status of each
+ * queue via "read" and "write" pointers.  Driver keeps minimum of 2 empty
+ * entries in each circular buffer, to protect against confusing empty and full
+ * queue states.
+ *
+ * The device reads or writes the data in the queues via the device's several
+ * DMA/FIFO channels.  Each queue is mapped to a single DMA channel.
+ *
+ * For Tx queue, there are low mark and high mark limits. If, after queuing
+ * the packet for Tx, free space become < low mark, Tx queue stopped. When
+ * reclaiming packets (on 'tx done IRQ), if free space become > high mark,
+ * Tx queue resumed.
+ *
+ ***************************************************/
+static int iwl_queue_space(const struct iwl_queue *q)
+{
+       int s = q->read_ptr - q->write_ptr;
+
+       if (q->read_ptr > q->write_ptr)
+               s -= q->n_bd;
+
+       if (s <= 0)
+               s += q->n_window;
+       /* keep some reserve to not confuse empty and full situations */
+       s -= 2;
+       if (s < 0)
+               s = 0;
+       return s;
+}
+
+/*
+ * iwl_queue_init - Initialize queue's high/low-water and read/write indexes
+ */
+static int iwl_queue_init(struct iwl_queue *q, int count, int slots_num, u32 id)
+{
+       q->n_bd = count;
+       q->n_window = slots_num;
+       q->id = id;
+
+       /* count must be power-of-two size, otherwise iwl_queue_inc_wrap
+        * and iwl_queue_dec_wrap are broken. */
+       if (WARN_ON(!is_power_of_2(count)))
+               return -EINVAL;
+
+       /* slots_num must be power-of-two size, otherwise
+        * get_cmd_index is broken. */
+       if (WARN_ON(!is_power_of_2(slots_num)))
+               return -EINVAL;
+
+       q->low_mark = q->n_window / 4;
+       if (q->low_mark < 4)
+               q->low_mark = 4;
+
+       q->high_mark = q->n_window / 8;
+       if (q->high_mark < 2)
+               q->high_mark = 2;
+
+       q->write_ptr = 0;
+       q->read_ptr = 0;
+
+       return 0;
+}
+
+
+static int iwl_pcie_alloc_dma_ptr(struct iwl_trans *trans,
+                                 struct iwl_dma_ptr *ptr, size_t size)
+{
+       if (WARN_ON(ptr->addr))
+               return -EINVAL;
+
+       ptr->addr = dma_alloc_coherent(trans->dev, size,
+                                      &ptr->dma, GFP_KERNEL);
+       if (!ptr->addr)
+               return -ENOMEM;
+       ptr->size = size;
+       return 0;
+}
+
+static void iwl_pcie_free_dma_ptr(struct iwl_trans *trans,
+                                 struct iwl_dma_ptr *ptr)
+{
+       if (unlikely(!ptr->addr))
+               return;
+
+       dma_free_coherent(trans->dev, ptr->size, ptr->addr, ptr->dma);
+       memset(ptr, 0, sizeof(*ptr));
+}
+
+static void iwl_pcie_txq_stuck_timer(unsigned long data)
+{
+       struct iwl_txq *txq = (void *)data;
+       struct iwl_queue *q = &txq->q;
+       struct iwl_trans_pcie *trans_pcie = txq->trans_pcie;
+       struct iwl_trans *trans = iwl_trans_pcie_get_trans(trans_pcie);
+       u32 scd_sram_addr = trans_pcie->scd_base_addr +
+                               SCD_TX_STTS_QUEUE_OFFSET(txq->q.id);
+       u8 buf[16];
+       int i;
+
+       spin_lock(&txq->lock);
+       /* check if triggered erroneously */
+       if (txq->q.read_ptr == txq->q.write_ptr) {
+               spin_unlock(&txq->lock);
+               return;
+       }
+       spin_unlock(&txq->lock);
+
+       IWL_ERR(trans, "Queue %d stuck for %u ms.\n", txq->q.id,
+               jiffies_to_msecs(trans_pcie->wd_timeout));
+       IWL_ERR(trans, "Current SW read_ptr %d write_ptr %d\n",
+               txq->q.read_ptr, txq->q.write_ptr);
+
+       iwl_read_targ_mem_bytes(trans, scd_sram_addr, buf, sizeof(buf));
+
+       iwl_print_hex_error(trans, buf, sizeof(buf));
+
+       for (i = 0; i < FH_TCSR_CHNL_NUM; i++)
+               IWL_ERR(trans, "FH TRBs(%d) = 0x%08x\n", i,
+                       iwl_read_direct32(trans, FH_TX_TRB_REG(i)));
+
+       for (i = 0; i < trans->cfg->base_params->num_of_queues; i++) {
+               u32 status = iwl_read_prph(trans, SCD_QUEUE_STATUS_BITS(i));
+               u8 fifo = (status >> SCD_QUEUE_STTS_REG_POS_TXF) & 0x7;
+               bool active = !!(status & BIT(SCD_QUEUE_STTS_REG_POS_ACTIVE));
+               u32 tbl_dw =
+                       iwl_read_targ_mem(trans,
+                                         trans_pcie->scd_base_addr +
+                                         SCD_TRANS_TBL_OFFSET_QUEUE(i));
+
+               if (i & 0x1)
+                       tbl_dw = (tbl_dw & 0xFFFF0000) >> 16;
+               else
+                       tbl_dw = tbl_dw & 0x0000FFFF;
+
+               IWL_ERR(trans,
+                       "Q %d is %sactive and mapped to fifo %d ra_tid 0x%04x [%d,%d]\n",
+                       i, active ? "" : "in", fifo, tbl_dw,
+                       iwl_read_prph(trans,
+                                     SCD_QUEUE_RDPTR(i)) & (txq->q.n_bd - 1),
+                       iwl_read_prph(trans, SCD_QUEUE_WRPTR(i)));
+       }
+
+       for (i = q->read_ptr; i != q->write_ptr;
+            i = iwl_queue_inc_wrap(i, q->n_bd)) {
+               struct iwl_tx_cmd *tx_cmd =
+                       (struct iwl_tx_cmd *)txq->entries[i].cmd->payload;
+               IWL_ERR(trans, "scratch %d = 0x%08x\n", i,
+                       get_unaligned_le32(&tx_cmd->scratch));
+       }
+
+       iwl_op_mode_nic_error(trans->op_mode);
+}
+
 /*
  * iwl_pcie_txq_update_byte_cnt_tbl - Set up entry in Tx byte-count array
  */
-void iwl_pcie_txq_update_byte_cnt_tbl(struct iwl_trans *trans,
-                                     struct iwl_txq *txq, u16 byte_cnt)
+static void iwl_pcie_txq_update_byte_cnt_tbl(struct iwl_trans *trans,
+                                            struct iwl_txq *txq, u16 byte_cnt)
 {
        struct iwlagn_scd_bc_tbl *scd_bc_tbl;
        struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
@@ -87,6 +247,32 @@ void iwl_pcie_txq_update_byte_cnt_tbl(struct iwl_trans *trans,
                        tfd_offset[TFD_QUEUE_SIZE_MAX + write_ptr] = bc_ent;
 }
 
+static void iwl_pcie_txq_inval_byte_cnt_tbl(struct iwl_trans *trans,
+                                           struct iwl_txq *txq)
+{
+       struct iwl_trans_pcie *trans_pcie =
+               IWL_TRANS_GET_PCIE_TRANS(trans);
+       struct iwlagn_scd_bc_tbl *scd_bc_tbl = trans_pcie->scd_bc_tbls.addr;
+       int txq_id = txq->q.id;
+       int read_ptr = txq->q.read_ptr;
+       u8 sta_id = 0;
+       __le16 bc_ent;
+       struct iwl_tx_cmd *tx_cmd =
+               (void *)txq->entries[txq->q.read_ptr].cmd->payload;
+
+       WARN_ON(read_ptr >= TFD_QUEUE_SIZE_MAX);
+
+       if (txq_id != trans_pcie->cmd_queue)
+               sta_id = tx_cmd->sta_id;
+
+       bc_ent = cpu_to_le16(1 | (sta_id << 12));
+       scd_bc_tbl[txq_id].tfd_offset[read_ptr] = bc_ent;
+
+       if (read_ptr < TFD_QUEUE_SIZE_BC_DUP)
+               scd_bc_tbl[txq_id].
+                       tfd_offset[TFD_QUEUE_SIZE_MAX + read_ptr] = bc_ent;
+}
+
 /*
  * iwl_pcie_txq_inc_wr_ptr - Send new write index to hardware
  */
@@ -136,7 +322,7 @@ void iwl_pcie_txq_inc_wr_ptr(struct iwl_trans *trans, struct iwl_txq *txq)
        txq->need_update = 0;
 }
 
-static inline dma_addr_t iwl_tfd_tb_get_addr(struct iwl_tfd *tfd, u8 idx)
+static inline dma_addr_t iwl_pcie_tfd_tb_get_addr(struct iwl_tfd *tfd, u8 idx)
 {
        struct iwl_tfd_tb *tb = &tfd->tbs[idx];
 
@@ -148,15 +334,15 @@ static inline dma_addr_t iwl_tfd_tb_get_addr(struct iwl_tfd *tfd, u8 idx)
        return addr;
 }
 
-static inline u16 iwl_tfd_tb_get_len(struct iwl_tfd *tfd, u8 idx)
+static inline u16 iwl_pcie_tfd_tb_get_len(struct iwl_tfd *tfd, u8 idx)
 {
        struct iwl_tfd_tb *tb = &tfd->tbs[idx];
 
        return le16_to_cpu(tb->hi_n_len) >> 4;
 }
 
-static inline void iwl_tfd_set_tb(struct iwl_tfd *tfd, u8 idx,
-                                 dma_addr_t addr, u16 len)
+static inline void iwl_pcie_tfd_set_tb(struct iwl_tfd *tfd, u8 idx,
+                                      dma_addr_t addr, u16 len)
 {
        struct iwl_tfd_tb *tb = &tfd->tbs[idx];
        u16 hi_n_len = len << 4;
@@ -170,19 +356,20 @@ static inline void iwl_tfd_set_tb(struct iwl_tfd *tfd, u8 idx,
        tfd->num_tbs = idx + 1;
 }
 
-static inline u8 iwl_tfd_get_num_tbs(struct iwl_tfd *tfd)
+static inline u8 iwl_pcie_tfd_get_num_tbs(struct iwl_tfd *tfd)
 {
        return tfd->num_tbs & 0x1f;
 }
 
-static void iwl_unmap_tfd(struct iwl_trans *trans, struct iwl_cmd_meta *meta,
-                         struct iwl_tfd *tfd, enum dma_data_direction dma_dir)
+static void iwl_pcie_tfd_unmap(struct iwl_trans *trans,
+                              struct iwl_cmd_meta *meta, struct iwl_tfd *tfd,
+                              enum dma_data_direction dma_dir)
 {
        int i;
        int num_tbs;
 
        /* Sanity check on number of chunks */
-       num_tbs = iwl_tfd_get_num_tbs(tfd);
+       num_tbs = iwl_pcie_tfd_get_num_tbs(tfd);
 
        if (num_tbs >= IWL_NUM_OF_TBS) {
                IWL_ERR(trans, "Too many chunks: %i\n", num_tbs);
@@ -199,8 +386,8 @@ static void iwl_unmap_tfd(struct iwl_trans *trans, struct iwl_cmd_meta *meta,
 
        /* Unmap chunks, if any. */
        for (i = 1; i < num_tbs; i++)
-               dma_unmap_single(trans->dev, iwl_tfd_tb_get_addr(tfd, i),
-                               iwl_tfd_tb_get_len(tfd, i), dma_dir);
+               dma_unmap_single(trans->dev, iwl_pcie_tfd_tb_get_addr(tfd, i),
+                                iwl_pcie_tfd_tb_get_len(tfd, i), dma_dir);
 
        tfd->num_tbs = 0;
 }
@@ -214,8 +401,8 @@ static void iwl_unmap_tfd(struct iwl_trans *trans, struct iwl_cmd_meta *meta,
  * Does NOT advance any TFD circular buffer read/write indexes
  * Does NOT free the TFD itself (which is within circular buffer)
  */
-void iwl_pcie_txq_free_tfd(struct iwl_trans *trans, struct iwl_txq *txq,
-                          enum dma_data_direction dma_dir)
+static void iwl_pcie_txq_free_tfd(struct iwl_trans *trans, struct iwl_txq *txq,
+                                 enum dma_data_direction dma_dir)
 {
        struct iwl_tfd *tfd_tmp = txq->tfds;
 
@@ -226,8 +413,8 @@ void iwl_pcie_txq_free_tfd(struct iwl_trans *trans, struct iwl_txq *txq,
        lockdep_assert_held(&txq->lock);
 
        /* We have only q->n_window txq->entries, but we use q->n_bd tfds */
-       iwl_unmap_tfd(trans, &txq->entries[idx].meta, &tfd_tmp[rd_ptr],
-                     dma_dir);
+       iwl_pcie_tfd_unmap(trans, &txq->entries[idx].meta, &tfd_tmp[rd_ptr],
+                          dma_dir);
 
        /* free SKB */
        if (txq->entries) {
@@ -246,8 +433,8 @@ void iwl_pcie_txq_free_tfd(struct iwl_trans *trans, struct iwl_txq *txq,
        }
 }
 
-int iwl_pcie_tx_build_tfd(struct iwl_trans *trans, struct iwl_txq *txq,
-                                dma_addr_t addr, u16 len, u8 reset)
+static int iwl_pcie_txq_build_tfd(struct iwl_trans *trans, struct iwl_txq *txq,
+                                 dma_addr_t addr, u16 len, u8 reset)
 {
        struct iwl_queue *q;
        struct iwl_tfd *tfd, *tfd_tmp;
@@ -257,127 +444,566 @@ int iwl_pcie_tx_build_tfd(struct iwl_trans *trans, struct iwl_txq *txq,
        tfd_tmp = txq->tfds;
        tfd = &tfd_tmp[q->write_ptr];
 
-       if (reset)
-               memset(tfd, 0, sizeof(*tfd));
+       if (reset)
+               memset(tfd, 0, sizeof(*tfd));
+
+       num_tbs = iwl_pcie_tfd_get_num_tbs(tfd);
+
+       /* Each TFD can point to a maximum 20 Tx buffers */
+       if (num_tbs >= IWL_NUM_OF_TBS) {
+               IWL_ERR(trans, "Error can not send more than %d chunks\n",
+                       IWL_NUM_OF_TBS);
+               return -EINVAL;
+       }
+
+       if (WARN_ON(addr & ~DMA_BIT_MASK(36)))
+               return -EINVAL;
+
+       if (unlikely(addr & ~IWL_TX_DMA_MASK))
+               IWL_ERR(trans, "Unaligned address = %llx\n",
+                       (unsigned long long)addr);
+
+       iwl_pcie_tfd_set_tb(tfd, num_tbs, addr, len);
+
+       return 0;
+}
+
+static int iwl_pcie_txq_alloc(struct iwl_trans *trans,
+                              struct iwl_txq *txq, int slots_num,
+                              u32 txq_id)
+{
+       struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
+       size_t tfd_sz = sizeof(struct iwl_tfd) * TFD_QUEUE_SIZE_MAX;
+       int i;
+
+       if (WARN_ON(txq->entries || txq->tfds))
+               return -EINVAL;
+
+       setup_timer(&txq->stuck_timer, iwl_pcie_txq_stuck_timer,
+                   (unsigned long)txq);
+       txq->trans_pcie = trans_pcie;
+
+       txq->q.n_window = slots_num;
+
+       txq->entries = kcalloc(slots_num,
+                              sizeof(struct iwl_pcie_txq_entry),
+                              GFP_KERNEL);
+
+       if (!txq->entries)
+               goto error;
+
+       if (txq_id == trans_pcie->cmd_queue)
+               for (i = 0; i < slots_num; i++) {
+                       txq->entries[i].cmd =
+                               kmalloc(sizeof(struct iwl_device_cmd),
+                                       GFP_KERNEL);
+                       if (!txq->entries[i].cmd)
+                               goto error;
+               }
+
+       /* Circular buffer of transmit frame descriptors (TFDs),
+        * shared with device */
+       txq->tfds = dma_alloc_coherent(trans->dev, tfd_sz,
+                                      &txq->q.dma_addr, GFP_KERNEL);
+       if (!txq->tfds) {
+               IWL_ERR(trans, "dma_alloc_coherent(%zd) failed\n", tfd_sz);
+               goto error;
+       }
+       txq->q.id = txq_id;
+
+       return 0;
+error:
+       if (txq->entries && txq_id == trans_pcie->cmd_queue)
+               for (i = 0; i < slots_num; i++)
+                       kfree(txq->entries[i].cmd);
+       kfree(txq->entries);
+       txq->entries = NULL;
+
+       return -ENOMEM;
+
+}
+
+static int iwl_pcie_txq_init(struct iwl_trans *trans, struct iwl_txq *txq,
+                             int slots_num, u32 txq_id)
+{
+       int ret;
+
+       txq->need_update = 0;
+
+       /* TFD_QUEUE_SIZE_MAX must be power-of-two size, otherwise
+        * iwl_queue_inc_wrap and iwl_queue_dec_wrap are broken. */
+       BUILD_BUG_ON(TFD_QUEUE_SIZE_MAX & (TFD_QUEUE_SIZE_MAX - 1));
+
+       /* Initialize queue's high/low-water marks, and head/tail indexes */
+       ret = iwl_queue_init(&txq->q, TFD_QUEUE_SIZE_MAX, slots_num,
+                       txq_id);
+       if (ret)
+               return ret;
+
+       spin_lock_init(&txq->lock);
+
+       /*
+        * Tell nic where to find circular buffer of Tx Frame Descriptors for
+        * given Tx queue, and enable the DMA channel used for that queue.
+        * Circular buffer (TFD queue in DRAM) physical base address */
+       iwl_write_direct32(trans, FH_MEM_CBBC_QUEUE(txq_id),
+                          txq->q.dma_addr >> 8);
+
+       return 0;
+}
+
+/*
+ * iwl_pcie_txq_unmap -  Unmap any remaining DMA mappings and free skb's
+ */
+static void iwl_pcie_txq_unmap(struct iwl_trans *trans, int txq_id)
+{
+       struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
+       struct iwl_txq *txq = &trans_pcie->txq[txq_id];
+       struct iwl_queue *q = &txq->q;
+       enum dma_data_direction dma_dir;
+
+       if (!q->n_bd)
+               return;
+
+       /* In the command queue, all the TBs are mapped as BIDI
+        * so unmap them as such.
+        */
+       if (txq_id == trans_pcie->cmd_queue)
+               dma_dir = DMA_BIDIRECTIONAL;
+       else
+               dma_dir = DMA_TO_DEVICE;
+
+       spin_lock_bh(&txq->lock);
+       while (q->write_ptr != q->read_ptr) {
+               iwl_pcie_txq_free_tfd(trans, txq, dma_dir);
+               q->read_ptr = iwl_queue_inc_wrap(q->read_ptr, q->n_bd);
+       }
+       spin_unlock_bh(&txq->lock);
+}
+
+/*
+ * iwl_pcie_txq_free - Deallocate DMA queue.
+ * @txq: Transmit queue to deallocate.
+ *
+ * Empty queue by removing and destroying all BD's.
+ * Free all buffers.
+ * 0-fill, but do not free "txq" descriptor structure.
+ */
+static void iwl_pcie_txq_free(struct iwl_trans *trans, int txq_id)
+{
+       struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
+       struct iwl_txq *txq = &trans_pcie->txq[txq_id];
+       struct device *dev = trans->dev;
+       int i;
+
+       if (WARN_ON(!txq))
+               return;
+
+       iwl_pcie_txq_unmap(trans, txq_id);
+
+       /* De-alloc array of command/tx buffers */
+       if (txq_id == trans_pcie->cmd_queue)
+               for (i = 0; i < txq->q.n_window; i++) {
+                       kfree(txq->entries[i].cmd);
+                       kfree(txq->entries[i].copy_cmd);
+                       kfree(txq->entries[i].free_buf);
+               }
+
+       /* De-alloc circular buffer of TFDs */
+       if (txq->q.n_bd) {
+               dma_free_coherent(dev, sizeof(struct iwl_tfd) *
+                                 txq->q.n_bd, txq->tfds, txq->q.dma_addr);
+               memset(&txq->q.dma_addr, 0, sizeof(txq->q.dma_addr));
+       }
+
+       kfree(txq->entries);
+       txq->entries = NULL;
+
+       del_timer_sync(&txq->stuck_timer);
+
+       /* 0-fill queue descriptor structure */
+       memset(txq, 0, sizeof(*txq));
+}
+
+/*
+ * Activate/Deactivate Tx DMA/FIFO channels according tx fifos mask
+ */
+static void iwl_pcie_txq_set_sched(struct iwl_trans *trans, u32 mask)
+{
+       struct iwl_trans_pcie __maybe_unused *trans_pcie =
+               IWL_TRANS_GET_PCIE_TRANS(trans);
+
+       iwl_write_prph(trans, SCD_TXFACT, mask);
+}
+
+void iwl_pcie_tx_start(struct iwl_trans *trans, u32 scd_base_addr)
+{
+       struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
+       u32 a;
+       int chan;
+       u32 reg_val;
+
+       /* make sure all queue are not stopped/used */
+       memset(trans_pcie->queue_stopped, 0, sizeof(trans_pcie->queue_stopped));
+       memset(trans_pcie->queue_used, 0, sizeof(trans_pcie->queue_used));
+
+       trans_pcie->scd_base_addr =
+               iwl_read_prph(trans, SCD_SRAM_BASE_ADDR);
+
+       WARN_ON(scd_base_addr != 0 &&
+               scd_base_addr != trans_pcie->scd_base_addr);
+
+       a = trans_pcie->scd_base_addr + SCD_CONTEXT_MEM_LOWER_BOUND;
+       /* reset conext data memory */
+       for (; a < trans_pcie->scd_base_addr + SCD_CONTEXT_MEM_UPPER_BOUND;
+               a += 4)
+               iwl_write_targ_mem(trans, a, 0);
+       /* reset tx status memory */
+       for (; a < trans_pcie->scd_base_addr + SCD_TX_STTS_MEM_UPPER_BOUND;
+               a += 4)
+               iwl_write_targ_mem(trans, a, 0);
+       for (; a < trans_pcie->scd_base_addr +
+              SCD_TRANS_TBL_OFFSET_QUEUE(
+                               trans->cfg->base_params->num_of_queues);
+              a += 4)
+               iwl_write_targ_mem(trans, a, 0);
+
+       iwl_write_prph(trans, SCD_DRAM_BASE_ADDR,
+                      trans_pcie->scd_bc_tbls.dma >> 10);
+
+       /* The chain extension of the SCD doesn't work well. This feature is
+        * enabled by default by the HW, so we need to disable it manually.
+        */
+       iwl_write_prph(trans, SCD_CHAINEXT_EN, 0);
+
+       iwl_trans_ac_txq_enable(trans, trans_pcie->cmd_queue,
+                               trans_pcie->cmd_fifo);
+
+       /* Activate all Tx DMA/FIFO channels */
+       iwl_pcie_txq_set_sched(trans, IWL_MASK(0, 7));
+
+       /* Enable DMA channel */
+       for (chan = 0; chan < FH_TCSR_CHNL_NUM; chan++)
+               iwl_write_direct32(trans, FH_TCSR_CHNL_TX_CONFIG_REG(chan),
+                                  FH_TCSR_TX_CONFIG_REG_VAL_DMA_CHNL_ENABLE |
+                                  FH_TCSR_TX_CONFIG_REG_VAL_DMA_CREDIT_ENABLE);
+
+       /* Update FH chicken bits */
+       reg_val = iwl_read_direct32(trans, FH_TX_CHICKEN_BITS_REG);
+       iwl_write_direct32(trans, FH_TX_CHICKEN_BITS_REG,
+                          reg_val | FH_TX_CHICKEN_BITS_SCD_AUTO_RETRY_EN);
+
+       /* Enable L1-Active */
+       iwl_clear_bits_prph(trans, APMG_PCIDEV_STT_REG,
+                           APMG_PCIDEV_STT_VAL_L1_ACT_DIS);
+}
+
+/*
+ * iwl_pcie_tx_stop - Stop all Tx DMA channels
+ */
+int iwl_pcie_tx_stop(struct iwl_trans *trans)
+{
+       struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
+       int ch, txq_id, ret;
+       unsigned long flags;
+
+       /* Turn off all Tx DMA fifos */
+       spin_lock_irqsave(&trans_pcie->irq_lock, flags);
+
+       iwl_pcie_txq_set_sched(trans, 0);
+
+       /* Stop each Tx DMA channel, and wait for it to be idle */
+       for (ch = 0; ch < FH_TCSR_CHNL_NUM; ch++) {
+               iwl_write_direct32(trans,
+                                  FH_TCSR_CHNL_TX_CONFIG_REG(ch), 0x0);
+               ret = iwl_poll_direct_bit(trans, FH_TSSR_TX_STATUS_REG,
+                       FH_TSSR_TX_STATUS_REG_MSK_CHNL_IDLE(ch), 1000);
+               if (ret < 0)
+                       IWL_ERR(trans,
+                               "Failing on timeout while stopping DMA channel %d [0x%08x]\n",
+                               ch,
+                               iwl_read_direct32(trans,
+                                                 FH_TSSR_TX_STATUS_REG));
+       }
+       spin_unlock_irqrestore(&trans_pcie->irq_lock, flags);
+
+       if (!trans_pcie->txq) {
+               IWL_WARN(trans,
+                        "Stopping tx queues that aren't allocated...\n");
+               return 0;
+       }
+
+       /* Unmap DMA from host system and free skb's */
+       for (txq_id = 0; txq_id < trans->cfg->base_params->num_of_queues;
+            txq_id++)
+               iwl_pcie_txq_unmap(trans, txq_id);
+
+       return 0;
+}
+
+/*
+ * iwl_trans_tx_free - Free TXQ Context
+ *
+ * Destroy all TX DMA queues and structures
+ */
+void iwl_pcie_tx_free(struct iwl_trans *trans)
+{
+       int txq_id;
+       struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
+
+       /* Tx queues */
+       if (trans_pcie->txq) {
+               for (txq_id = 0;
+                    txq_id < trans->cfg->base_params->num_of_queues; txq_id++)
+                       iwl_pcie_txq_free(trans, txq_id);
+       }
+
+       kfree(trans_pcie->txq);
+       trans_pcie->txq = NULL;
+
+       iwl_pcie_free_dma_ptr(trans, &trans_pcie->kw);
+
+       iwl_pcie_free_dma_ptr(trans, &trans_pcie->scd_bc_tbls);
+}
+
+/*
+ * iwl_pcie_tx_alloc - allocate TX context
+ * Allocate all Tx DMA structures and initialize them
+ */
+static int iwl_pcie_tx_alloc(struct iwl_trans *trans)
+{
+       int ret;
+       int txq_id, slots_num;
+       struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
+
+       u16 scd_bc_tbls_size = trans->cfg->base_params->num_of_queues *
+                       sizeof(struct iwlagn_scd_bc_tbl);
+
+       /*It is not allowed to alloc twice, so warn when this happens.
+        * We cannot rely on the previous allocation, so free and fail */
+       if (WARN_ON(trans_pcie->txq)) {
+               ret = -EINVAL;
+               goto error;
+       }
+
+       ret = iwl_pcie_alloc_dma_ptr(trans, &trans_pcie->scd_bc_tbls,
+                                  scd_bc_tbls_size);
+       if (ret) {
+               IWL_ERR(trans, "Scheduler BC Table allocation failed\n");
+               goto error;
+       }
+
+       /* Alloc keep-warm buffer */
+       ret = iwl_pcie_alloc_dma_ptr(trans, &trans_pcie->kw, IWL_KW_SIZE);
+       if (ret) {
+               IWL_ERR(trans, "Keep Warm allocation failed\n");
+               goto error;
+       }
+
+       trans_pcie->txq = kcalloc(trans->cfg->base_params->num_of_queues,
+                                 sizeof(struct iwl_txq), GFP_KERNEL);
+       if (!trans_pcie->txq) {
+               IWL_ERR(trans, "Not enough memory for txq\n");
+               ret = ENOMEM;
+               goto error;
+       }
+
+       /* Alloc and init all Tx queues, including the command queue (#4/#9) */
+       for (txq_id = 0; txq_id < trans->cfg->base_params->num_of_queues;
+            txq_id++) {
+               slots_num = (txq_id == trans_pcie->cmd_queue) ?
+                                       TFD_CMD_SLOTS : TFD_TX_CMD_SLOTS;
+               ret = iwl_pcie_txq_alloc(trans, &trans_pcie->txq[txq_id],
+                                         slots_num, txq_id);
+               if (ret) {
+                       IWL_ERR(trans, "Tx %d queue alloc failed\n", txq_id);
+                       goto error;
+               }
+       }
+
+       return 0;
+
+error:
+       iwl_pcie_tx_free(trans);
+
+       return ret;
+}
+int iwl_pcie_tx_init(struct iwl_trans *trans)
+{
+       struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
+       int ret;
+       int txq_id, slots_num;
+       unsigned long flags;
+       bool alloc = false;
+
+       if (!trans_pcie->txq) {
+               ret = iwl_pcie_tx_alloc(trans);
+               if (ret)
+                       goto error;
+               alloc = true;
+       }
+
+       spin_lock_irqsave(&trans_pcie->irq_lock, flags);
+
+       /* Turn off all Tx DMA fifos */
+       iwl_write_prph(trans, SCD_TXFACT, 0);
+
+       /* Tell NIC where to find the "keep warm" buffer */
+       iwl_write_direct32(trans, FH_KW_MEM_ADDR_REG,
+                          trans_pcie->kw.dma >> 4);
+
+       spin_unlock_irqrestore(&trans_pcie->irq_lock, flags);
+
+       /* Alloc and init all Tx queues, including the command queue (#4/#9) */
+       for (txq_id = 0; txq_id < trans->cfg->base_params->num_of_queues;
+            txq_id++) {
+               slots_num = (txq_id == trans_pcie->cmd_queue) ?
+                                       TFD_CMD_SLOTS : TFD_TX_CMD_SLOTS;
+               ret = iwl_pcie_txq_init(trans, &trans_pcie->txq[txq_id],
+                                        slots_num, txq_id);
+               if (ret) {
+                       IWL_ERR(trans, "Tx %d queue init failed\n", txq_id);
+                       goto error;
+               }
+       }
+
+       return 0;
+error:
+       /*Upon error, free only if we allocated something */
+       if (alloc)
+               iwl_pcie_tx_free(trans);
+       return ret;
+}
+
+static inline void iwl_pcie_txq_progress(struct iwl_trans_pcie *trans_pcie,
+                                          struct iwl_txq *txq)
+{
+       if (!trans_pcie->wd_timeout)
+               return;
+
+       /*
+        * if empty delete timer, otherwise move timer forward
+        * since we're making progress on this queue
+        */
+       if (txq->q.read_ptr == txq->q.write_ptr)
+               del_timer(&txq->stuck_timer);
+       else
+               mod_timer(&txq->stuck_timer, jiffies + trans_pcie->wd_timeout);
+}
+
+/* Frees buffers until index _not_ inclusive */
+static int iwl_pcie_txq_reclaim(struct iwl_trans *trans, int txq_id, int index,
+                               struct sk_buff_head *skbs)
+{
+       struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
+       struct iwl_txq *txq = &trans_pcie->txq[txq_id];
+       struct iwl_queue *q = &txq->q;
+       int last_to_free;
+       int freed = 0;
+
+       /* This function is not meant to release cmd queue*/
+       if (WARN_ON(txq_id == trans_pcie->cmd_queue))
+               return 0;
 
-       num_tbs = iwl_tfd_get_num_tbs(tfd);
+       lockdep_assert_held(&txq->lock);
 
-       /* Each TFD can point to a maximum 20 Tx buffers */
-       if (num_tbs >= IWL_NUM_OF_TBS) {
-               IWL_ERR(trans, "Error can not send more than %d chunks\n",
-                       IWL_NUM_OF_TBS);
-               return -EINVAL;
+       /*Since we free until index _not_ inclusive, the one before index is
+        * the last we will free. This one must be used */
+       last_to_free = iwl_queue_dec_wrap(index, q->n_bd);
+
+       if ((index >= q->n_bd) ||
+           (iwl_queue_used(q, last_to_free) == 0)) {
+               IWL_ERR(trans,
+                       "%s: Read index for DMA queue txq id (%d), last_to_free %d is out of range [0-%d] %d %d.\n",
+                       __func__, txq_id, last_to_free, q->n_bd,
+                       q->write_ptr, q->read_ptr);
+               return 0;
        }
 
-       if (WARN_ON(addr & ~DMA_BIT_MASK(36)))
-               return -EINVAL;
+       if (WARN_ON(!skb_queue_empty(skbs)))
+               return 0;
 
-       if (unlikely(addr & ~IWL_TX_DMA_MASK))
-               IWL_ERR(trans, "Unaligned address = %llx\n",
-                       (unsigned long long)addr);
+       for (;
+            q->read_ptr != index;
+            q->read_ptr = iwl_queue_inc_wrap(q->read_ptr, q->n_bd)) {
 
-       iwl_tfd_set_tb(tfd, num_tbs, addr, len);
+               if (WARN_ON_ONCE(txq->entries[txq->q.read_ptr].skb == NULL))
+                       continue;
 
-       return 0;
-}
+               __skb_queue_tail(skbs, txq->entries[txq->q.read_ptr].skb);
 
-/*************** DMA-QUEUE-GENERAL-FUNCTIONS  *****
- * DMA services
- *
- * Theory of operation
- *
- * A Tx or Rx queue resides in host DRAM, and is comprised of a circular buffer
- * of buffer descriptors, each of which points to one or more data buffers for
- * the device to read from or fill.  Driver and device exchange status of each
- * queue via "read" and "write" pointers.  Driver keeps minimum of 2 empty
- * entries in each circular buffer, to protect against confusing empty and full
- * queue states.
- *
- * The device reads or writes the data in the queues via the device's several
- * DMA/FIFO channels.  Each queue is mapped to a single DMA channel.
- *
- * For Tx queue, there are low mark and high mark limits. If, after queuing
- * the packet for Tx, free space become < low mark, Tx queue stopped. When
- * reclaiming packets (on 'tx done IRQ), if free space become > high mark,
- * Tx queue resumed.
- *
- ***************************************************/
+               txq->entries[txq->q.read_ptr].skb = NULL;
 
-int iwl_queue_space(const struct iwl_queue *q)
-{
-       int s = q->read_ptr - q->write_ptr;
+               iwl_pcie_txq_inval_byte_cnt_tbl(trans, txq);
 
-       if (q->read_ptr > q->write_ptr)
-               s -= q->n_bd;
+               iwl_pcie_txq_free_tfd(trans, txq, DMA_TO_DEVICE);
+               freed++;
+       }
 
-       if (s <= 0)
-               s += q->n_window;
-       /* keep some reserve to not confuse empty and full situations */
-       s -= 2;
-       if (s < 0)
-               s = 0;
-       return s;
+       iwl_pcie_txq_progress(trans_pcie, txq);
+
+       return freed;
 }
 
-/*
- * iwl_queue_init - Initialize queue's high/low-water and read/write indexes
- */
-int iwl_queue_init(struct iwl_queue *q, int count, int slots_num, u32 id)
+void iwl_trans_pcie_reclaim(struct iwl_trans *trans, int txq_id, int ssn,
+                           struct sk_buff_head *skbs)
 {
-       q->n_bd = count;
-       q->n_window = slots_num;
-       q->id = id;
-
-       /* count must be power-of-two size, otherwise iwl_queue_inc_wrap
-        * and iwl_queue_dec_wrap are broken. */
-       if (WARN_ON(!is_power_of_2(count)))
-               return -EINVAL;
-
-       /* slots_num must be power-of-two size, otherwise
-        * get_cmd_index is broken. */
-       if (WARN_ON(!is_power_of_2(slots_num)))
-               return -EINVAL;
-
-       q->low_mark = q->n_window / 4;
-       if (q->low_mark < 4)
-               q->low_mark = 4;
+       struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
+       struct iwl_txq *txq = &trans_pcie->txq[txq_id];
+       /* n_bd is usually 256 => n_bd - 1 = 0xff */
+       int tfd_num = ssn & (txq->q.n_bd - 1);
 
-       q->high_mark = q->n_window / 8;
-       if (q->high_mark < 2)
-               q->high_mark = 2;
+       spin_lock(&txq->lock);
 
-       q->write_ptr = q->read_ptr = 0;
+       if (txq->q.read_ptr != tfd_num) {
+               IWL_DEBUG_TX_REPLY(trans, "[Q %d] %d -> %d (%d)\n",
+                                  txq_id, txq->q.read_ptr, tfd_num, ssn);
+               iwl_pcie_txq_reclaim(trans, txq_id, tfd_num, skbs);
+               if (iwl_queue_space(&txq->q) > txq->q.low_mark)
+                       iwl_wake_queue(trans, txq);
+       }
 
-       return 0;
+       spin_unlock(&txq->lock);
 }
 
-static void iwlagn_txq_inval_byte_cnt_tbl(struct iwl_trans *trans,
-                                         struct iwl_txq *txq)
+/*
+ * iwl_pcie_cmdq_reclaim - Reclaim TX command queue entries already Tx'd
+ *
+ * When FW advances 'R' index, all entries between old and new 'R' index
+ * need to be reclaimed. As result, some free space forms.  If there is
+ * enough free space (> low mark), wake the stack that feeds us.
+ */
+static void iwl_pcie_cmdq_reclaim(struct iwl_trans *trans, int txq_id, int idx)
 {
-       struct iwl_trans_pcie *trans_pcie =
-               IWL_TRANS_GET_PCIE_TRANS(trans);
-       struct iwlagn_scd_bc_tbl *scd_bc_tbl = trans_pcie->scd_bc_tbls.addr;
-       int txq_id = txq->q.id;
-       int read_ptr = txq->q.read_ptr;
-       u8 sta_id = 0;
-       __le16 bc_ent;
-       struct iwl_tx_cmd *tx_cmd =
-               (void *)txq->entries[txq->q.read_ptr].cmd->payload;
+       struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
+       struct iwl_txq *txq = &trans_pcie->txq[txq_id];
+       struct iwl_queue *q = &txq->q;
+       int nfreed = 0;
 
-       WARN_ON(read_ptr >= TFD_QUEUE_SIZE_MAX);
+       lockdep_assert_held(&txq->lock);
 
-       if (txq_id != trans_pcie->cmd_queue)
-               sta_id = tx_cmd->sta_id;
+       if ((idx >= q->n_bd) || (iwl_queue_used(q, idx) == 0)) {
+               IWL_ERR(trans,
+                       "%s: Read index for DMA queue txq id (%d), index %d is out of range [0-%d] %d %d.\n",
+                       __func__, txq_id, idx, q->n_bd,
+                       q->write_ptr, q->read_ptr);
+               return;
+       }
 
-       bc_ent = cpu_to_le16(1 | (sta_id << 12));
-       scd_bc_tbl[txq_id].tfd_offset[read_ptr] = bc_ent;
+       for (idx = iwl_queue_inc_wrap(idx, q->n_bd); q->read_ptr != idx;
+            q->read_ptr = iwl_queue_inc_wrap(q->read_ptr, q->n_bd)) {
 
-       if (read_ptr < TFD_QUEUE_SIZE_BC_DUP)
-               scd_bc_tbl[txq_id].
-                       tfd_offset[TFD_QUEUE_SIZE_MAX + read_ptr] = bc_ent;
+               if (nfreed++ > 0) {
+                       IWL_ERR(trans, "HCMD skipped: index (%d) %d %d\n",
+                               idx, q->write_ptr, q->read_ptr);
+                       iwl_op_mode_nic_error(trans->op_mode);
+               }
+       }
+
+       iwl_pcie_txq_progress(trans_pcie, txq);
 }
 
-static int iwl_txq_set_ratid_map(struct iwl_trans *trans, u16 ra_tid,
+static int iwl_pcie_txq_set_ratid_map(struct iwl_trans *trans, u16 ra_tid,
                                 u16 txq_id)
 {
        struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
@@ -402,7 +1028,8 @@ static int iwl_txq_set_ratid_map(struct iwl_trans *trans, u16 ra_tid,
        return 0;
 }
 
-static inline void iwl_txq_set_inactive(struct iwl_trans *trans, u16 txq_id)
+static inline void iwl_pcie_txq_set_inactive(struct iwl_trans *trans,
+                                            u16 txq_id)
 {
        /* Simply stop the queue, but don't change any configuration;
         * the SCD_ACT_EN bit is the write-enable mask for the ACTIVE bit. */
@@ -412,8 +1039,8 @@ static inline void iwl_txq_set_inactive(struct iwl_trans *trans, u16 txq_id)
                (1 << SCD_QUEUE_STTS_REG_POS_SCD_ACT_EN));
 }
 
-void iwl_pcie_txq_enable(struct iwl_trans *trans, int txq_id, int fifo,
-                        int sta_id, int tid, int frame_limit, u16 ssn)
+void iwl_trans_pcie_txq_enable(struct iwl_trans *trans, int txq_id, int fifo,
+                              int sta_id, int tid, int frame_limit, u16 ssn)
 {
        struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
 
@@ -421,7 +1048,7 @@ void iwl_pcie_txq_enable(struct iwl_trans *trans, int txq_id, int fifo,
                WARN_ONCE(1, "queue %d already used - expect issues", txq_id);
 
        /* Stop this Tx queue before configuring it */
-       iwl_txq_set_inactive(trans, txq_id);
+       iwl_pcie_txq_set_inactive(trans, txq_id);
 
        /* Set this queue as a chain-building queue unless it is CMD queue */
        if (txq_id != trans_pcie->cmd_queue)
@@ -432,7 +1059,7 @@ void iwl_pcie_txq_enable(struct iwl_trans *trans, int txq_id, int fifo,
                u16 ra_tid = BUILD_RAxTID(sta_id, tid);
 
                /* Map receiver-address / traffic-ID to this queue */
-               iwl_txq_set_ratid_map(trans, ra_tid, txq_id);
+               iwl_pcie_txq_set_ratid_map(trans, ra_tid, txq_id);
 
                /* enable aggregations for the queue */
                iwl_set_bits_prph(trans, SCD_AGGR_SEL, BIT(txq_id));
@@ -474,7 +1101,7 @@ void iwl_pcie_txq_enable(struct iwl_trans *trans, int txq_id, int fifo,
                            txq_id, fifo, ssn & 0xff);
 }
 
-void iwl_pcie_txq_disable(struct iwl_trans *trans, int txq_id)
+void iwl_trans_pcie_txq_disable(struct iwl_trans *trans, int txq_id)
 {
        struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
        u32 stts_addr = trans_pcie->scd_base_addr +
@@ -486,7 +1113,7 @@ void iwl_pcie_txq_disable(struct iwl_trans *trans, int txq_id)
                return;
        }
 
-       iwl_txq_set_inactive(trans, txq_id);
+       iwl_pcie_txq_set_inactive(trans, txq_id);
 
        _iwl_write_targ_mem_dwords(trans, stts_addr,
                                   zero_val, ARRAY_SIZE(zero_val));
@@ -499,7 +1126,7 @@ void iwl_pcie_txq_disable(struct iwl_trans *trans, int txq_id)
 /*************** HOST COMMAND QUEUE FUNCTIONS   *****/
 
 /*
- * iwl_enqueue_hcmd - enqueue a uCode command
+ * iwl_pcie_enqueue_hcmd - enqueue a uCode command
  * @priv: device private data point
  * @cmd: a point to the ucode command structure
  *
@@ -507,7 +1134,8 @@ void iwl_pcie_txq_disable(struct iwl_trans *trans, int txq_id)
  * failed. On success, it turns the index (> 0) of command in the
  * command queue.
  */
-static int iwl_enqueue_hcmd(struct iwl_trans *trans, struct iwl_host_cmd *cmd)
+static int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans,
+                                struct iwl_host_cmd *cmd)
 {
        struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
        struct iwl_txq *txq = &trans_pcie->txq[trans_pcie->cmd_queue];
@@ -650,7 +1278,7 @@ static int iwl_enqueue_hcmd(struct iwl_trans *trans, struct iwl_host_cmd *cmd)
        dma_unmap_addr_set(out_meta, mapping, phys_addr);
        dma_unmap_len_set(out_meta, len, copy_size);
 
-       iwl_pcie_tx_build_tfd(trans, txq, phys_addr, copy_size, 1);
+       iwl_pcie_txq_build_tfd(trans, txq, phys_addr, copy_size, 1);
 
        for (i = 0; i < IWL_MAX_CMD_TFDS; i++) {
                const void *data = cmd->data[i];
@@ -665,14 +1293,14 @@ static int iwl_enqueue_hcmd(struct iwl_trans *trans, struct iwl_host_cmd *cmd)
                phys_addr = dma_map_single(trans->dev, (void *)data,
                                           cmd->len[i], DMA_BIDIRECTIONAL);
                if (dma_mapping_error(trans->dev, phys_addr)) {
-                       iwl_unmap_tfd(trans, out_meta,
-                                     &txq->tfds[q->write_ptr],
-                                     DMA_BIDIRECTIONAL);
+                       iwl_pcie_tfd_unmap(trans, out_meta,
+                                          &txq->tfds[q->write_ptr],
+                                          DMA_BIDIRECTIONAL);
                        idx = -ENOMEM;
                        goto out;
                }
 
-               iwl_pcie_tx_build_tfd(trans, txq, phys_addr, cmd->len[i], 0);
+               iwl_pcie_txq_build_tfd(trans, txq, phys_addr, cmd->len[i], 0);
        }
 
        out_meta->flags = cmd->flags;
@@ -701,61 +1329,6 @@ static int iwl_enqueue_hcmd(struct iwl_trans *trans, struct iwl_host_cmd *cmd)
        return idx;
 }
 
-static inline void iwl_queue_progress(struct iwl_trans_pcie *trans_pcie,
-                                     struct iwl_txq *txq)
-{
-       if (!trans_pcie->wd_timeout)
-               return;
-
-       /*
-        * if empty delete timer, otherwise move timer forward
-        * since we're making progress on this queue
-        */
-       if (txq->q.read_ptr == txq->q.write_ptr)
-               del_timer(&txq->stuck_timer);
-       else
-               mod_timer(&txq->stuck_timer, jiffies + trans_pcie->wd_timeout);
-}
-
-/*
- * iwl_hcmd_queue_reclaim - Reclaim TX command queue entries already Tx'd
- *
- * When FW advances 'R' index, all entries between old and new 'R' index
- * need to be reclaimed. As result, some free space forms.  If there is
- * enough free space (> low mark), wake the stack that feeds us.
- */
-static void iwl_hcmd_queue_reclaim(struct iwl_trans *trans, int txq_id,
-                                  int idx)
-{
-       struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
-       struct iwl_txq *txq = &trans_pcie->txq[txq_id];
-       struct iwl_queue *q = &txq->q;
-       int nfreed = 0;
-
-       lockdep_assert_held(&txq->lock);
-
-       if ((idx >= q->n_bd) || (iwl_queue_used(q, idx) == 0)) {
-               IWL_ERR(trans,
-                       "%s: Read index for DMA queue txq id (%d), index %d is out of range [0-%d] %d %d.\n",
-                       __func__, txq_id, idx, q->n_bd,
-                       q->write_ptr, q->read_ptr);
-               return;
-       }
-
-       for (idx = iwl_queue_inc_wrap(idx, q->n_bd); q->read_ptr != idx;
-            q->read_ptr = iwl_queue_inc_wrap(q->read_ptr, q->n_bd)) {
-
-               if (nfreed++ > 0) {
-                       IWL_ERR(trans, "HCMD skipped: index (%d) %d %d\n",
-                               idx, q->write_ptr, q->read_ptr);
-                       iwl_op_mode_nic_error(trans->op_mode);
-               }
-
-       }
-
-       iwl_queue_progress(trans_pcie, txq);
-}
-
 /*
  * iwl_pcie_hcmd_complete - Pull unused buffers off the queue and reclaim them
  * @rxb: Rx buffer to reclaim
@@ -797,7 +1370,7 @@ void iwl_pcie_hcmd_complete(struct iwl_trans *trans,
        cmd = txq->entries[cmd_index].cmd;
        meta = &txq->entries[cmd_index].meta;
 
-       iwl_unmap_tfd(trans, meta, &txq->tfds[index], DMA_BIDIRECTIONAL);
+       iwl_pcie_tfd_unmap(trans, meta, &txq->tfds[index], DMA_BIDIRECTIONAL);
 
        /* Input error checking is done when commands are added to queue. */
        if (meta->flags & CMD_WANT_SKB) {
@@ -809,7 +1382,7 @@ void iwl_pcie_hcmd_complete(struct iwl_trans *trans,
                meta->source->handler_status = handler_status;
        }
 
-       iwl_hcmd_queue_reclaim(trans, txq_id, index);
+       iwl_pcie_cmdq_reclaim(trans, txq_id, index);
 
        if (!(meta->flags & CMD_ASYNC)) {
                if (!test_bit(STATUS_HCMD_ACTIVE, &trans_pcie->status)) {
@@ -830,7 +1403,8 @@ void iwl_pcie_hcmd_complete(struct iwl_trans *trans,
 
 #define HOST_COMPLETE_TIMEOUT (2 * HZ)
 
-static int iwl_send_cmd_async(struct iwl_trans *trans, struct iwl_host_cmd *cmd)
+static int iwl_pcie_send_hcmd_async(struct iwl_trans *trans,
+                                   struct iwl_host_cmd *cmd)
 {
        struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
        int ret;
@@ -840,7 +1414,7 @@ static int iwl_send_cmd_async(struct iwl_trans *trans, struct iwl_host_cmd *cmd)
                return -EINVAL;
 
 
-       ret = iwl_enqueue_hcmd(trans, cmd);
+       ret = iwl_pcie_enqueue_hcmd(trans, cmd);
        if (ret < 0) {
                IWL_ERR(trans,
                        "Error sending %s: enqueue_hcmd failed: %d\n",
@@ -850,7 +1424,8 @@ static int iwl_send_cmd_async(struct iwl_trans *trans, struct iwl_host_cmd *cmd)
        return 0;
 }
 
-static int iwl_send_cmd_sync(struct iwl_trans *trans, struct iwl_host_cmd *cmd)
+static int iwl_pcie_send_hcmd_sync(struct iwl_trans *trans,
+                                  struct iwl_host_cmd *cmd)
 {
        struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
        int cmd_idx;
@@ -869,7 +1444,7 @@ static int iwl_send_cmd_sync(struct iwl_trans *trans, struct iwl_host_cmd *cmd)
        IWL_DEBUG_INFO(trans, "Setting HCMD_ACTIVE for command %s\n",
                       get_cmd_string(trans_pcie, cmd->id));
 
-       cmd_idx = iwl_enqueue_hcmd(trans, cmd);
+       cmd_idx = iwl_pcie_enqueue_hcmd(trans, cmd);
        if (cmd_idx < 0) {
                ret = cmd_idx;
                clear_bit(STATUS_HCMD_ACTIVE, &trans_pcie->status);
@@ -949,7 +1524,7 @@ cancel:
        return ret;
 }
 
-int iwl_pcie_send_cmd(struct iwl_trans *trans, struct iwl_host_cmd *cmd)
+int iwl_trans_pcie_send_hcmd(struct iwl_trans *trans, struct iwl_host_cmd *cmd)
 {
        struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
 
@@ -960,62 +1535,172 @@ int iwl_pcie_send_cmd(struct iwl_trans *trans, struct iwl_host_cmd *cmd)
                return -ERFKILL;
 
        if (cmd->flags & CMD_ASYNC)
-               return iwl_send_cmd_async(trans, cmd);
+               return iwl_pcie_send_hcmd_async(trans, cmd);
 
        /* We still can fail on RFKILL that can be asserted while we wait */
-       return iwl_send_cmd_sync(trans, cmd);
+       return iwl_pcie_send_hcmd_sync(trans, cmd);
 }
 
-/* Frees buffers until index _not_ inclusive */
-int iwl_pcie_txq_reclaim(struct iwl_trans *trans, int txq_id, int index,
-                        struct sk_buff_head *skbs)
+int iwl_trans_pcie_tx(struct iwl_trans *trans, struct sk_buff *skb,
+                     struct iwl_device_cmd *dev_cmd, int txq_id)
 {
        struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
-       struct iwl_txq *txq = &trans_pcie->txq[txq_id];
-       struct iwl_queue *q = &txq->q;
-       int last_to_free;
-       int freed = 0;
+       struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
+       struct iwl_tx_cmd *tx_cmd = (struct iwl_tx_cmd *)dev_cmd->payload;
+       struct iwl_cmd_meta *out_meta;
+       struct iwl_txq *txq;
+       struct iwl_queue *q;
+       dma_addr_t phys_addr = 0;
+       dma_addr_t txcmd_phys;
+       dma_addr_t scratch_phys;
+       u16 len, firstlen, secondlen;
+       u8 wait_write_ptr = 0;
+       __le16 fc = hdr->frame_control;
+       u8 hdr_len = ieee80211_hdrlen(fc);
+       u16 __maybe_unused wifi_seq;
+
+       txq = &trans_pcie->txq[txq_id];
+       q = &txq->q;
 
-       /* This function is not meant to release cmd queue*/
-       if (WARN_ON(txq_id == trans_pcie->cmd_queue))
-               return 0;
+       if (unlikely(!test_bit(txq_id, trans_pcie->queue_used))) {
+               WARN_ON_ONCE(1);
+               return -EINVAL;
+       }
 
-       lockdep_assert_held(&txq->lock);
+       spin_lock(&txq->lock);
 
-       /*Since we free until index _not_ inclusive, the one before index is
-        * the last we will free. This one must be used */
-       last_to_free = iwl_queue_dec_wrap(index, q->n_bd);
+       /* In AGG mode, the index in the ring must correspond to the WiFi
+        * sequence number. This is a HW requirements to help the SCD to parse
+        * the BA.
+        * Check here that the packets are in the right place on the ring.
+        */
+#ifdef CONFIG_IWLWIFI_DEBUG
+       wifi_seq = SEQ_TO_SN(le16_to_cpu(hdr->seq_ctrl));
+       WARN_ONCE((iwl_read_prph(trans, SCD_AGGR_SEL) & BIT(txq_id)) &&
+                 ((wifi_seq & 0xff) != q->write_ptr),
+                 "Q: %d WiFi Seq %d tfdNum %d",
+                 txq_id, wifi_seq, q->write_ptr);
+#endif
+
+       /* Set up driver data for this TFD */
+       txq->entries[q->write_ptr].skb = skb;
+       txq->entries[q->write_ptr].cmd = dev_cmd;
+
+       dev_cmd->hdr.cmd = REPLY_TX;
+       dev_cmd->hdr.sequence =
+               cpu_to_le16((u16)(QUEUE_TO_SEQ(txq_id) |
+                           INDEX_TO_SEQ(q->write_ptr)));
+
+       /* Set up first empty entry in queue's array of Tx/cmd buffers */
+       out_meta = &txq->entries[q->write_ptr].meta;
 
-       if ((index >= q->n_bd) ||
-          (iwl_queue_used(q, last_to_free) == 0)) {
-               IWL_ERR(trans,
-                       "%s: Read index for DMA queue txq id (%d), last_to_free %d is out of range [0-%d] %d %d.\n",
-                       __func__, txq_id, last_to_free, q->n_bd,
-                       q->write_ptr, q->read_ptr);
-               return 0;
+       /*
+        * Use the first empty entry in this queue's command buffer array
+        * to contain the Tx command and MAC header concatenated together
+        * (payload data will be in another buffer).
+        * Size of this varies, due to varying MAC header length.
+        * If end is not dword aligned, we'll have 2 extra bytes at the end
+        * of the MAC header (device reads on dword boundaries).
+        * We'll tell device about this padding later.
+        */
+       len = sizeof(struct iwl_tx_cmd) +
+               sizeof(struct iwl_cmd_header) + hdr_len;
+       firstlen = (len + 3) & ~3;
+
+       /* Tell NIC about any 2-byte padding after MAC header */
+       if (firstlen != len)
+               tx_cmd->tx_flags |= TX_CMD_FLG_MH_PAD_MSK;
+
+       /* Physical address of this Tx command's header (not MAC header!),
+        * within command buffer array. */
+       txcmd_phys = dma_map_single(trans->dev,
+                                   &dev_cmd->hdr, firstlen,
+                                   DMA_BIDIRECTIONAL);
+       if (unlikely(dma_mapping_error(trans->dev, txcmd_phys)))
+               goto out_err;
+       dma_unmap_addr_set(out_meta, mapping, txcmd_phys);
+       dma_unmap_len_set(out_meta, len, firstlen);
+
+       if (!ieee80211_has_morefrags(fc)) {
+               txq->need_update = 1;
+       } else {
+               wait_write_ptr = 1;
+               txq->need_update = 0;
        }
 
-       if (WARN_ON(!skb_queue_empty(skbs)))
-               return 0;
+       /* Set up TFD's 2nd entry to point directly to remainder of skb,
+        * if any (802.11 null frames have no payload). */
+       secondlen = skb->len - hdr_len;
+       if (secondlen > 0) {
+               phys_addr = dma_map_single(trans->dev, skb->data + hdr_len,
+                                          secondlen, DMA_TO_DEVICE);
+               if (unlikely(dma_mapping_error(trans->dev, phys_addr))) {
+                       dma_unmap_single(trans->dev,
+                                        dma_unmap_addr(out_meta, mapping),
+                                        dma_unmap_len(out_meta, len),
+                                        DMA_BIDIRECTIONAL);
+                       goto out_err;
+               }
+       }
 
-       for (;
-            q->read_ptr != index;
-            q->read_ptr = iwl_queue_inc_wrap(q->read_ptr, q->n_bd)) {
+       /* Attach buffers to TFD */
+       iwl_pcie_txq_build_tfd(trans, txq, txcmd_phys, firstlen, 1);
+       if (secondlen > 0)
+               iwl_pcie_txq_build_tfd(trans, txq, phys_addr, secondlen, 0);
 
-               if (WARN_ON_ONCE(txq->entries[txq->q.read_ptr].skb == NULL))
-                       continue;
+       scratch_phys = txcmd_phys + sizeof(struct iwl_cmd_header) +
+                               offsetof(struct iwl_tx_cmd, scratch);
 
-               __skb_queue_tail(skbs, txq->entries[txq->q.read_ptr].skb);
+       /* take back ownership of DMA buffer to enable update */
+       dma_sync_single_for_cpu(trans->dev, txcmd_phys, firstlen,
+                               DMA_BIDIRECTIONAL);
+       tx_cmd->dram_lsb_ptr = cpu_to_le32(scratch_phys);
+       tx_cmd->dram_msb_ptr = iwl_get_dma_hi_addr(scratch_phys);
 
-               txq->entries[txq->q.read_ptr].skb = NULL;
+       IWL_DEBUG_TX(trans, "sequence nr = 0X%x\n",
+                    le16_to_cpu(dev_cmd->hdr.sequence));
+       IWL_DEBUG_TX(trans, "tx_flags = 0X%x\n", le32_to_cpu(tx_cmd->tx_flags));
 
-               iwlagn_txq_inval_byte_cnt_tbl(trans, txq);
+       /* Set up entry for this TFD in Tx byte-count array */
+       iwl_pcie_txq_update_byte_cnt_tbl(trans, txq, le16_to_cpu(tx_cmd->len));
 
-               iwl_pcie_txq_free_tfd(trans, txq, DMA_TO_DEVICE);
-               freed++;
-       }
+       dma_sync_single_for_device(trans->dev, txcmd_phys, firstlen,
+                                  DMA_BIDIRECTIONAL);
 
-       iwl_queue_progress(trans_pcie, txq);
+       trace_iwlwifi_dev_tx(trans->dev, skb,
+                            &txq->tfds[txq->q.write_ptr],
+                            sizeof(struct iwl_tfd),
+                            &dev_cmd->hdr, firstlen,
+                            skb->data + hdr_len, secondlen);
+       trace_iwlwifi_dev_tx_data(trans->dev, skb,
+                                 skb->data + hdr_len, secondlen);
 
-       return freed;
+       /* start timer if queue currently empty */
+       if (txq->need_update && q->read_ptr == q->write_ptr &&
+           trans_pcie->wd_timeout)
+               mod_timer(&txq->stuck_timer, jiffies + trans_pcie->wd_timeout);
+
+       /* Tell device the write index *just past* this latest filled TFD */
+       q->write_ptr = iwl_queue_inc_wrap(q->write_ptr, q->n_bd);
+       iwl_pcie_txq_inc_wr_ptr(trans, txq);
+
+       /*
+        * At this point the frame is "transmitted" successfully
+        * and we will get a TX status notification eventually,
+        * regardless of the value of ret. "ret" only indicates
+        * whether or not we should update the write pointer.
+        */
+       if (iwl_queue_space(q) < q->high_mark) {
+               if (wait_write_ptr) {
+                       txq->need_update = 1;
+                       iwl_pcie_txq_inc_wr_ptr(trans, txq);
+               } else {
+                       iwl_stop_queue(trans, txq);
+               }
+       }
+       spin_unlock(&txq->lock);
+       return 0;
+out_err:
+       spin_unlock(&txq->lock);
+       return -1;
 }