ar71xx: split packets into multiple descriptors on ar716x
authorFelix Fietkau <nbd@openwrt.org>
Fri, 29 Aug 2014 19:42:08 +0000 (19:42 +0000)
committerFelix Fietkau <nbd@openwrt.org>
Fri, 29 Aug 2014 19:42:08 +0000 (19:42 +0000)
This improves performance when doing concurrent rx/tx on a single
ethernet MAC, e.g. when routing between VLANs.

Fixes #13072

Signed-off-by: Felix Fietkau <nbd@openwrt.org>
SVN-Revision: 42328

target/linux/ar71xx/files/drivers/net/ethernet/atheros/ag71xx/ag71xx.h
target/linux/ar71xx/files/drivers/net/ethernet/atheros/ag71xx/ag71xx_ethtool.c
target/linux/ar71xx/files/drivers/net/ethernet/atheros/ag71xx/ag71xx_main.c

index f6d85b908425f9591281b4e48961f307c6791b3f..9fb10fd8981e649af7c0b8543369b6a63dc91afc 100644 (file)
 
 #define AG71XX_TX_MTU_LEN      1540
 
-#define AG71XX_TX_RING_SIZE_DEFAULT    32
+#define AG71XX_TX_RING_SPLIT           256
+#define AG71XX_TX_RING_DS_PER_PKT      DIV_ROUND_UP(AG71XX_TX_MTU_LEN, \
+                                                    AG71XX_TX_RING_SPLIT)
+#define AG71XX_TX_RING_SIZE_DEFAULT    48
 #define AG71XX_RX_RING_SIZE_DEFAULT    128
 
-#define AG71XX_TX_RING_SIZE_MAX                32
+#define AG71XX_TX_RING_SIZE_MAX                48
 #define AG71XX_RX_RING_SIZE_MAX                128
 
 #ifdef CONFIG_AG71XX_DEBUG
@@ -99,7 +102,8 @@ struct ag71xx_ring {
        struct ag71xx_buf       *buf;
        u8                      *descs_cpu;
        dma_addr_t              descs_dma;
-       unsigned int            desc_size;
+       u16                     desc_split;
+       u16                     desc_size;
        unsigned int            curr;
        unsigned int            dirty;
        unsigned int            size;
index 498fbed1ff1d234b95eb32c7d5efcf6521f70318..cad9f2ff069e760f7b3ad67d95896c44721830ac 100644 (file)
@@ -75,6 +75,9 @@ static void ag71xx_ethtool_get_ringparam(struct net_device *dev,
        er->rx_pending = ag->rx_ring.size;
        er->rx_mini_pending = 0;
        er->rx_jumbo_pending = 0;
+
+       if (ag->tx_ring.desc_split)
+               er->tx_pending /= AG71XX_TX_RING_DS_PER_PKT;
 }
 
 static int ag71xx_ethtool_set_ringparam(struct net_device *dev,
@@ -103,6 +106,9 @@ static int ag71xx_ethtool_set_ringparam(struct net_device *dev,
                        return err;
        }
 
+       if (ag->tx_ring.desc_split)
+               tx_size *= AG71XX_TX_RING_DS_PER_PKT;
+
        ag->tx_ring.size = tx_size;
        ag->rx_ring.size = rx_size;
 
index d010373155a008a0b6c020bec92606620bf9a284..b0f39159c133520a52e17c1d56f84c2ef92656cf 100644 (file)
@@ -536,6 +536,7 @@ void ag71xx_link_adjust(struct ag71xx *ag)
        u32 cfg2;
        u32 ifctl;
        u32 fifo5;
+       u32 fifo3;
 
        if (!ag->link) {
                ag71xx_hw_stop(ag);
@@ -576,11 +577,18 @@ void ag71xx_link_adjust(struct ag71xx *ag)
        }
 
        if (pdata->is_ar91xx)
-               ag71xx_wr(ag, AG71XX_REG_FIFO_CFG3, 0x00780fff);
+               fifo3 = 0x00780fff;
        else if (pdata->is_ar724x)
-               ag71xx_wr(ag, AG71XX_REG_FIFO_CFG3, pdata->fifo_cfg3);
+               fifo3 = pdata->fifo_cfg3;
        else
-               ag71xx_wr(ag, AG71XX_REG_FIFO_CFG3, 0x008001ff);
+               fifo3 = 0x008001ff;
+
+       if (ag->tx_ring.desc_split) {
+               fifo3 &= 0xffff;
+               fifo3 |= ((2048 - ag->tx_ring.desc_split) / 4) << 16;
+       }
+
+       ag71xx_wr(ag, AG71XX_REG_FIFO_CFG3, fifo3);
 
        if (pdata->set_speed)
                pdata->set_speed(ag->speed);
@@ -675,6 +683,49 @@ static int ag71xx_stop(struct net_device *dev)
        return 0;
 }
 
+static int ag71xx_fill_dma_desc(struct ag71xx_ring *ring, u32 addr, int len)
+{
+       int i;
+       struct ag71xx_desc *desc;
+       int ndesc = 0;
+       int split = ring->desc_split;
+
+       if (!split)
+               split = len;
+
+       while (len > 0) {
+               unsigned int cur_len = len;
+
+               i = (ring->curr + ndesc) % ring->size;
+               desc = ring->buf[i].desc;
+
+               if (!ag71xx_desc_empty(desc))
+                       return -1;
+
+               if (cur_len > split) {
+                       cur_len = split;
+                       if (len < split + 4)
+                               cur_len -= 4;
+               }
+
+               desc->data = addr;
+               addr += cur_len;
+               len -= cur_len;
+
+               if (len > 0)
+                       cur_len |= DESC_MORE;
+
+               /* prevent early tx attempt of this descriptor */
+               if (!ndesc)
+                       cur_len |= DESC_EMPTY;
+
+               desc->ctrl = cur_len;
+               ndesc++;
+       }
+
+       return ndesc;
+}
+
 static netdev_tx_t ag71xx_hard_start_xmit(struct sk_buff *skb,
                                          struct net_device *dev)
 {
@@ -682,18 +733,12 @@ static netdev_tx_t ag71xx_hard_start_xmit(struct sk_buff *skb,
        struct ag71xx_ring *ring = &ag->tx_ring;
        struct ag71xx_desc *desc;
        dma_addr_t dma_addr;
-       int i;
-
-       i = ring->curr % ring->size;
-       desc = ring->buf[i].desc;
-
-       if (!ag71xx_desc_empty(desc))
-               goto err_drop;
+       int i, n, ring_min;
 
        if (ag71xx_has_ar8216(ag))
                ag71xx_add_ar8216_header(ag, skb);
 
-       if (skb->len <= 0) {
+       if (skb->len <= 4) {
                DBG("%s: packet len is too small\n", ag->dev->name);
                goto err_drop;
        }
@@ -701,21 +746,33 @@ static netdev_tx_t ag71xx_hard_start_xmit(struct sk_buff *skb,
        dma_addr = dma_map_single(&dev->dev, skb->data, skb->len,
                                  DMA_TO_DEVICE);
 
-       netdev_sent_queue(dev, skb->len);
+       i = ring->curr % ring->size;
+       desc = ring->buf[i].desc;
+
+       /* setup descriptor fields */
+       n = ag71xx_fill_dma_desc(ring, (u32) dma_addr, skb->len & ag->desc_pktlen_mask);
+       if (n < 0)
+               goto err_drop_unmap;
+
+       i = (ring->curr + n - 1) % ring->size;
        ring->buf[i].len = skb->len;
        ring->buf[i].skb = skb;
        ring->buf[i].timestamp = jiffies;
 
-       /* setup descriptor fields */
-       desc->data = (u32) dma_addr;
-       desc->ctrl = skb->len & ag->desc_pktlen_mask;
+       netdev_sent_queue(dev, skb->len);
+
+       desc->ctrl &= ~DESC_EMPTY;
+       ring->curr += n;
 
        /* flush descriptor */
        wmb();
 
-       ring->curr++;
-       if (ring->curr == (ring->dirty + ring->size)) {
-               DBG("%s: tx queue full\n", ag->dev->name);
+       ring_min = 2;
+       if (ring->desc_split)
+           ring_min *= AG71XX_TX_RING_DS_PER_PKT;
+
+       if (ring->curr - ring->dirty >= ring->size - ring_min) {
+               DBG("%s: tx queue full\n", dev->name);
                netif_stop_queue(dev);
        }
 
@@ -726,6 +783,9 @@ static netdev_tx_t ag71xx_hard_start_xmit(struct sk_buff *skb,
 
        return NETDEV_TX_OK;
 
+err_drop_unmap:
+       dma_unmap_single(&dev->dev, dma_addr, skb->len, DMA_TO_DEVICE);
+
 err_drop:
        dev->stats.tx_dropped++;
 
@@ -843,7 +903,6 @@ static int ag71xx_tx_packets(struct ag71xx *ag)
                unsigned int i = ring->dirty % ring->size;
                struct ag71xx_desc *desc = ring->buf[i].desc;
                struct sk_buff *skb = ring->buf[i].skb;
-               int len = ring->buf[i].len;
 
                if (!ag71xx_desc_empty(desc)) {
                        if (pdata->is_ar7240 &&
@@ -854,19 +913,22 @@ static int ag71xx_tx_packets(struct ag71xx *ag)
 
                ag71xx_wr(ag, AG71XX_REG_TX_STATUS, TX_STATUS_PS);
 
-               bytes_compl += len;
-               ag->dev->stats.tx_bytes += len;
-               ag->dev->stats.tx_packets++;
+               if (skb) {
+                       dev_kfree_skb_any(skb);
+                       ring->buf[i].skb = NULL;
 
-               dev_kfree_skb_any(skb);
-               ring->buf[i].skb = NULL;
+                       bytes_compl += ring->buf[i].len;
+                       sent++;
+               }
 
                ring->dirty++;
-               sent++;
        }
 
        DBG("%s: %d packets sent out\n", ag->dev->name, sent);
 
+       ag->dev->stats.tx_bytes += bytes_compl;
+       ag->dev->stats.tx_packets += sent;
+
        if (!sent)
                return 0;
 
@@ -1195,6 +1257,11 @@ static int ag71xx_probe(struct platform_device *pdev)
        ag->max_frame_len = pdata->max_frame_len;
        ag->desc_pktlen_mask = pdata->desc_pktlen_mask;
 
+       if (!pdata->is_ar724x && !pdata->is_ar91xx) {
+               ag->tx_ring.desc_split = AG71XX_TX_RING_SPLIT;
+               ag->tx_ring.size *= AG71XX_TX_RING_DS_PER_PKT;
+       }
+
        ag->stop_desc = dma_alloc_coherent(NULL,
                sizeof(struct ag71xx_desc), &ag->stop_desc_dma, GFP_KERNEL);