net: mvneta: Improve the buffer allocation method for SWBM
authorYelena Krivosheev <yelena@marvell.com>
Wed, 18 Jul 2018 16:10:57 +0000 (18:10 +0200)
committerDavid S. Miller <davem@davemloft.net>
Sun, 29 Jul 2018 05:12:55 +0000 (22:12 -0700)
With system having a small memory (around 256MB), the state "cannot
allocate memory to refill with new buffer" is reach pretty quickly.

By this patch we changed buffer allocation method to a better handling of
this use case by avoiding memory allocation issues.

Signed-off-by: Yelena Krivosheev <yelena@marvell.com>
[gregory: extract from a larger patch]
Signed-off-by: Gregory CLEMENT <gregory.clement@bootlin.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
drivers/net/ethernet/marvell/mvneta.c

index 079b515c54c186194ae248a0c20f3a03c44e3b47..55c2a56c5dae52178af62a0ab7d97f6ed054a3cf 100644 (file)
@@ -483,7 +483,10 @@ struct mvneta_port {
 #define MVNETA_RXD_ERR_RESOURCE                (BIT(17) | BIT(18))
 #define MVNETA_RXD_ERR_CODE_MASK       (BIT(17) | BIT(18))
 #define MVNETA_RXD_L3_IP4              BIT(25)
-#define MVNETA_RXD_FIRST_LAST_DESC     (BIT(26) | BIT(27))
+#define MVNETA_RXD_LAST_DESC           BIT(26)
+#define MVNETA_RXD_FIRST_DESC          BIT(27)
+#define MVNETA_RXD_FIRST_LAST_DESC     (MVNETA_RXD_FIRST_DESC | \
+                                        MVNETA_RXD_LAST_DESC)
 #define MVNETA_RXD_L4_CSUM_OK          BIT(30)
 
 #if defined(__LITTLE_ENDIAN)
@@ -611,6 +614,14 @@ struct mvneta_rx_queue {
        /* Index of the next RX DMA descriptor to process */
        int next_desc_to_proc;
 
+       /* Index of first RX DMA descriptor to refill */
+       int first_to_refill;
+       u32 refill_num;
+
+       /* pointer to uncomplete skb buffer */
+       struct sk_buff *skb;
+       int left_size;
+
        /* error counters */
        u32 skb_alloc_err;
        u32 refill_err;
@@ -626,6 +637,7 @@ static int txq_number = 8;
 static int rxq_def;
 
 static int rx_copybreak __read_mostly = 256;
+static int rx_header_size __read_mostly = 128;
 
 /* HW BM need that each port be identify by a unique ID */
 static int global_port_id;
@@ -1689,13 +1701,6 @@ static void mvneta_rx_error(struct mvneta_port *pp,
 {
        u32 status = rx_desc->status;
 
-       if (!mvneta_rxq_desc_is_first_last(status)) {
-               netdev_err(pp->dev,
-                          "bad rx status %08x (buffer oversize), size=%d\n",
-                          status, rx_desc->data_size);
-               return;
-       }
-
        switch (status & MVNETA_RXD_ERR_CODE_MASK) {
        case MVNETA_RXD_ERR_CRC:
                netdev_err(pp->dev, "bad rx status %08x (crc error), size=%d\n",
@@ -1882,6 +1887,8 @@ static void mvneta_rxq_drop_pkts(struct mvneta_port *pp,
        for (i = 0; i < rxq->size; i++) {
                struct mvneta_rx_desc *rx_desc = rxq->descs + i;
                void *data = rxq->buf_virt_addr[i];
+               if (!data || !(rx_desc->buf_phys_addr))
+                       continue;
 
                dma_unmap_single(pp->dev->dev.parent, rx_desc->buf_phys_addr,
                                 MVNETA_RX_BUF_SIZE(pp->pkt_size), DMA_FROM_DEVICE);
@@ -1889,117 +1896,183 @@ static void mvneta_rxq_drop_pkts(struct mvneta_port *pp,
        }
 }
 
+static inline
+int mvneta_rx_refill_queue(struct mvneta_port *pp, struct mvneta_rx_queue *rxq)
+{
+       struct mvneta_rx_desc *rx_desc;
+       int curr_desc = rxq->first_to_refill;
+       int i;
+
+       for (i = 0; (i < rxq->refill_num) && (i < 64); i++) {
+               rx_desc = rxq->descs + curr_desc;
+               if (!(rx_desc->buf_phys_addr)) {
+                       if (mvneta_rx_refill(pp, rx_desc, rxq, GFP_ATOMIC)) {
+                               pr_err("Can't refill queue %d. Done %d from %d\n",
+                                      rxq->id, i, rxq->refill_num);
+                               rxq->refill_err++;
+                               break;
+                       }
+               }
+               curr_desc = MVNETA_QUEUE_NEXT_DESC(rxq, curr_desc);
+       }
+       rxq->refill_num -= i;
+       rxq->first_to_refill = curr_desc;
+
+       return i;
+}
+
 /* Main rx processing when using software buffer management */
 static int mvneta_rx_swbm(struct napi_struct *napi,
-                         struct mvneta_port *pp, int rx_todo,
+                         struct mvneta_port *pp, int budget,
                          struct mvneta_rx_queue *rxq)
 {
        struct net_device *dev = pp->dev;
-       int rx_done;
+       int rx_todo, rx_proc;
+       int refill = 0;
        u32 rcvd_pkts = 0;
        u32 rcvd_bytes = 0;
 
        /* Get number of received packets */
-       rx_done = mvneta_rxq_busy_desc_num_get(pp, rxq);
-
-       if (rx_todo > rx_done)
-               rx_todo = rx_done;
-
-       rx_done = 0;
+       rx_todo = mvneta_rxq_busy_desc_num_get(pp, rxq);
+       rx_proc = 0;
 
        /* Fairness NAPI loop */
-       while (rx_done < rx_todo) {
+       while ((rcvd_pkts < budget) && (rx_proc < rx_todo)) {
                struct mvneta_rx_desc *rx_desc = mvneta_rxq_next_desc_get(rxq);
-               struct sk_buff *skb;
                unsigned char *data;
                struct page *page;
                dma_addr_t phys_addr;
-               u32 rx_status, frag_size;
-               int rx_bytes, err, index;
+               u32 rx_status, index;
+               int rx_bytes, skb_size, copy_size;
+               int frag_num, frag_size, frag_offset;
 
-               rx_done++;
-               rx_status = rx_desc->status;
-               rx_bytes = rx_desc->data_size - (ETH_FCS_LEN + MVNETA_MH_SIZE);
                index = rx_desc - rxq->descs;
                page = (struct page *)rxq->buf_virt_addr[index];
                data = page_address(page);
                /* Prefetch header */
                prefetch(data);
-               phys_addr = rx_desc->buf_phys_addr - pp->rx_offset_correction;
 
-               if (!mvneta_rxq_desc_is_first_last(rx_status) ||
-                   (rx_status & MVNETA_RXD_ERR_SUMMARY)) {
-                       mvneta_rx_error(pp, rx_desc);
-err_drop_frame:
-                       dev->stats.rx_errors++;
-                       /* leave the descriptor untouched */
-                       continue;
-               }
+               phys_addr = rx_desc->buf_phys_addr;
+               rx_status = rx_desc->status;
+               rx_proc++;
+               rxq->refill_num++;
+
+               if (rx_status & MVNETA_RXD_FIRST_DESC) {
+                       /* Check errors only for FIRST descriptor */
+                       if (rx_status & MVNETA_RXD_ERR_SUMMARY) {
+                               mvneta_rx_error(pp, rx_desc);
+                               dev->stats.rx_errors++;
+                               /* leave the descriptor untouched */
+                               continue;
+                       }
+                       rx_bytes = rx_desc->data_size -
+                                  (ETH_FCS_LEN + MVNETA_MH_SIZE);
 
-               if (rx_bytes <= rx_copybreak) {
-               /* better copy a small frame and not unmap the DMA region */
-                       skb = netdev_alloc_skb_ip_align(dev, rx_bytes);
-                       if (unlikely(!skb)) {
+                       /* Allocate small skb for each new packet */
+                       skb_size = max(rx_copybreak, rx_header_size);
+                       rxq->skb = netdev_alloc_skb_ip_align(dev, skb_size);
+                       if (unlikely(!rxq->skb)) {
                                netdev_err(dev,
                                           "Can't allocate skb on queue %d\n",
                                           rxq->id);
+                               dev->stats.rx_dropped++;
                                rxq->skb_alloc_err++;
-                               goto err_drop_frame;
+                               continue;
                        }
+                       copy_size = min(skb_size, rx_bytes);
+
+                       /* Copy data from buffer to SKB, skip Marvell header */
+                       memcpy(rxq->skb->data, data + MVNETA_MH_SIZE,
+                              copy_size);
+                       skb_put(rxq->skb, copy_size);
+                       rxq->left_size = rx_bytes - copy_size;
+
+                       mvneta_rx_csum(pp, rx_status, rxq->skb);
+                       if (rxq->left_size == 0) {
+                               int size = copy_size + MVNETA_MH_SIZE;
+
+                               dma_sync_single_range_for_cpu(dev->dev.parent,
+                                                             phys_addr, 0,
+                                                             size,
+                                                             DMA_FROM_DEVICE);
+
+                               /* leave the descriptor and buffer untouched */
+                       } else {
+                               /* refill descriptor with new buffer later */
+                               rx_desc->buf_phys_addr = 0;
+
+                               frag_num = 0;
+                               frag_offset = copy_size + MVNETA_MH_SIZE;
+                               frag_size = min(rxq->left_size,
+                                               (int)(PAGE_SIZE - frag_offset));
+                               skb_add_rx_frag(rxq->skb, frag_num, page,
+                                               frag_offset, frag_size,
+                                               PAGE_SIZE);
+                               dma_unmap_single(dev->dev.parent, phys_addr,
+                                                PAGE_SIZE, DMA_FROM_DEVICE);
+                               rxq->left_size -= frag_size;
+                       }
+               } else {
+                       /* Middle or Last descriptor */
+                       if (unlikely(!rxq->skb)) {
+                               pr_debug("no skb for rx_status 0x%x\n",
+                                        rx_status);
+                               continue;
+                       }
+                       if (!rxq->left_size) {
+                               /* last descriptor has only FCS */
+                               /* and can be discarded */
+                               dma_sync_single_range_for_cpu(dev->dev.parent,
+                                                             phys_addr, 0,
+                                                             ETH_FCS_LEN,
+                                                             DMA_FROM_DEVICE);
+                               /* leave the descriptor and buffer untouched */
+                       } else {
+                               /* refill descriptor with new buffer later */
+                               rx_desc->buf_phys_addr = 0;
+
+                               frag_num = skb_shinfo(rxq->skb)->nr_frags;
+                               frag_offset = 0;
+                               frag_size = min(rxq->left_size,
+                                               (int)(PAGE_SIZE - frag_offset));
+                               skb_add_rx_frag(rxq->skb, frag_num, page,
+                                               frag_offset, frag_size,
+                                               PAGE_SIZE);
+
+                               dma_unmap_single(dev->dev.parent, phys_addr,
+                                                PAGE_SIZE,
+                                                DMA_FROM_DEVICE);
+
+                               rxq->left_size -= frag_size;
+                       }
+               } /* Middle or Last descriptor */
 
-                       dma_sync_single_range_for_cpu(dev->dev.parent,
-                                                     phys_addr,
-                                                     MVNETA_MH_SIZE + NET_SKB_PAD,
-                                                     rx_bytes,
-                                                     DMA_FROM_DEVICE);
-                       skb_put_data(skb, data + MVNETA_MH_SIZE + NET_SKB_PAD,
-                                    rx_bytes);
-
-                       skb->protocol = eth_type_trans(skb, dev);
-                       mvneta_rx_csum(pp, rx_status, skb);
-                       napi_gro_receive(napi, skb);
-
-                       rcvd_pkts++;
-                       rcvd_bytes += rx_bytes;
-
-                       /* leave the descriptor and buffer untouched */
+               if (!(rx_status & MVNETA_RXD_LAST_DESC))
+                       /* no last descriptor this time */
                        continue;
-               }
 
-               /* Refill processing */
-               err = mvneta_rx_refill(pp, rx_desc, rxq, GFP_KERNEL);
-               if (err) {
-                       netdev_err(dev, "Linux processing - Can't refill\n");
-                       rxq->refill_err++;
-                       goto err_drop_frame;
+               if (rxq->left_size) {
+                       pr_err("get last desc, but left_size (%d) != 0\n",
+                              rxq->left_size);
+                       dev_kfree_skb_any(rxq->skb);
+                       rxq->left_size = 0;
+                       rxq->skb = NULL;
+                       continue;
                }
-
-               frag_size = pp->frag_size;
-
-               skb = build_skb(data, frag_size > PAGE_SIZE ? 0 : frag_size);
-
-               /* After refill old buffer has to be unmapped regardless
-                * the skb is successfully built or not.
-                */
-               dma_unmap_single(dev->dev.parent, phys_addr,
-                                MVNETA_RX_BUF_SIZE(pp->pkt_size),
-                                DMA_FROM_DEVICE);
-
-               if (!skb)
-                       goto err_drop_frame;
-
                rcvd_pkts++;
-               rcvd_bytes += rx_bytes;
+               rcvd_bytes += rxq->skb->len;
 
                /* Linux processing */
-               skb_reserve(skb, MVNETA_MH_SIZE + NET_SKB_PAD);
-               skb_put(skb, rx_bytes);
-
-               skb->protocol = eth_type_trans(skb, dev);
+               rxq->skb->protocol = eth_type_trans(rxq->skb, dev);
 
-               mvneta_rx_csum(pp, rx_status, skb);
+               if (dev->features & NETIF_F_GRO)
+                       napi_gro_receive(napi, rxq->skb);
+               else
+                       netif_receive_skb(rxq->skb);
 
-               napi_gro_receive(napi, skb);
+               /* clean uncomplete skb pointer in queue */
+               rxq->skb = NULL;
+               rxq->left_size = 0;
        }
 
        if (rcvd_pkts) {
@@ -2011,10 +2084,13 @@ err_drop_frame:
                u64_stats_update_end(&stats->syncp);
        }
 
+       /* return some buffers to hardware queue, one at a time is too slow */
+       refill = mvneta_rx_refill_queue(pp, rxq);
+
        /* Update rxq management counters */
-       mvneta_rxq_desc_num_update(pp, rxq, rx_done, rx_done);
+       mvneta_rxq_desc_num_update(pp, rxq, rx_proc, refill);
 
-       return rx_done;
+       return rcvd_pkts;
 }
 
 /* Main rx processing when using hardware buffer management */
@@ -2823,21 +2899,23 @@ static void mvneta_rxq_hw_init(struct mvneta_port *pp,
        mvreg_write(pp, MVNETA_RXQ_BASE_ADDR_REG(rxq->id), rxq->descs_phys);
        mvreg_write(pp, MVNETA_RXQ_SIZE_REG(rxq->id), rxq->size);
 
-       /* Set Offset */
-       mvneta_rxq_offset_set(pp, rxq, NET_SKB_PAD - pp->rx_offset_correction);
-
        /* Set coalescing pkts and time */
        mvneta_rx_pkts_coal_set(pp, rxq, rxq->pkts_coal);
        mvneta_rx_time_coal_set(pp, rxq, rxq->time_coal);
 
        if (!pp->bm_priv) {
-               /* Fill RXQ with buffers from RX pool */
-               mvneta_rxq_buf_size_set(pp, rxq,
-                                       MVNETA_RX_BUF_SIZE(pp->pkt_size));
+               /* Set Offset */
+               mvneta_rxq_offset_set(pp, rxq, 0);
+               mvneta_rxq_buf_size_set(pp, rxq, pp->frag_size);
                mvneta_rxq_bm_disable(pp, rxq);
                mvneta_rxq_fill(pp, rxq, rxq->size);
        } else {
+               /* Set Offset */
+               mvneta_rxq_offset_set(pp, rxq,
+                                     NET_SKB_PAD - pp->rx_offset_correction);
+
                mvneta_rxq_bm_enable(pp, rxq);
+               /* Fill RXQ with buffers from RX pool */
                mvneta_rxq_long_pool_set(pp, rxq);
                mvneta_rxq_short_pool_set(pp, rxq);
                mvneta_rxq_non_occup_desc_add(pp, rxq, rxq->size);
@@ -2866,6 +2944,9 @@ static void mvneta_rxq_deinit(struct mvneta_port *pp,
 {
        mvneta_rxq_drop_pkts(pp, rxq);
 
+       if (rxq->skb)
+               dev_kfree_skb_any(rxq->skb);
+
        if (rxq->descs)
                dma_free_coherent(pp->dev->dev.parent,
                                  rxq->size * MVNETA_DESC_ALIGNED_SIZE,
@@ -2876,6 +2957,10 @@ static void mvneta_rxq_deinit(struct mvneta_port *pp,
        rxq->last_desc         = 0;
        rxq->next_desc_to_proc = 0;
        rxq->descs_phys        = 0;
+       rxq->first_to_refill   = 0;
+       rxq->refill_num        = 0;
+       rxq->skb               = NULL;
+       rxq->left_size         = 0;
 }
 
 static int mvneta_txq_sw_init(struct mvneta_port *pp,
@@ -4366,14 +4451,6 @@ static int mvneta_probe(struct platform_device *pdev)
        pp->dn = dn;
 
        pp->rxq_def = rxq_def;
-
-       /* Set RX packet offset correction for platforms, whose
-        * NET_SKB_PAD, exceeds 64B. It should be 64B for 64-bit
-        * platforms and 0B for 32-bit ones.
-        */
-       pp->rx_offset_correction =
-               max(0, NET_SKB_PAD - MVNETA_RX_PKT_OFFSET_CORRECTION);
-
        pp->indir[0] = rxq_def;
 
        /* Get special SoC configurations */
@@ -4461,6 +4538,7 @@ static int mvneta_probe(struct platform_device *pdev)
        SET_NETDEV_DEV(dev, &pdev->dev);
 
        pp->id = global_port_id++;
+       pp->rx_offset_correction = 0; /* not relevant for SW BM */
 
        /* Obtain access to BM resources if enabled and already initialized */
        bm_node = of_parse_phandle(dn, "buffer-manager", 0);
@@ -4475,6 +4553,13 @@ static int mvneta_probe(struct platform_device *pdev)
                                pp->bm_priv = NULL;
                        }
                }
+               /* Set RX packet offset correction for platforms, whose
+                * NET_SKB_PAD, exceeds 64B. It should be 64B for 64-bit
+                * platforms and 0B for 32-bit ones.
+                */
+               pp->rx_offset_correction = max(0,
+                                              NET_SKB_PAD -
+                                              MVNETA_RX_PKT_OFFSET_CORRECTION);
        }
        of_node_put(bm_node);