From: Felix Fietkau Date: Wed, 6 Jun 2018 07:56:13 +0000 (+0200) Subject: ramips: improve ethernet driver performance with GRO/TSO X-Git-Url: http://git.lede-project.org./?a=commitdiff_plain;h=9a4253b81f3b3fff833ef92737ef73ad4c455ade;p=openwrt%2Fstaging%2Fthess.git ramips: improve ethernet driver performance with GRO/TSO GRO stores packets as fraglist. If they are routed back to the ethernet device, they need to be re-segmented if the driver does not support sending fraglists. Add the missing support for that, along with a missing feature flag that allows full routed GRO->TSO offload. Considerably reduces CPU utilization for routing Signed-off-by: Felix Fietkau --- diff --git a/target/linux/ramips/files-4.14/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/target/linux/ramips/files-4.14/drivers/net/ethernet/mediatek/mtk_eth_soc.c index d298fa9a97..e68ca204ed 100644 --- a/target/linux/ramips/files-4.14/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/target/linux/ramips/files-4.14/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -307,24 +307,20 @@ no_rx_mem: static void fe_txd_unmap(struct device *dev, struct fe_tx_buf *tx_buf) { - if (tx_buf->flags & FE_TX_FLAGS_SINGLE0) { - dma_unmap_single(dev, - dma_unmap_addr(tx_buf, dma_addr0), - dma_unmap_len(tx_buf, dma_len0), - DMA_TO_DEVICE); - } else if (tx_buf->flags & FE_TX_FLAGS_PAGE0) { + if (dma_unmap_len(tx_buf, dma_len0)) dma_unmap_page(dev, dma_unmap_addr(tx_buf, dma_addr0), dma_unmap_len(tx_buf, dma_len0), DMA_TO_DEVICE); - } - if (tx_buf->flags & FE_TX_FLAGS_PAGE1) + + if (dma_unmap_len(tx_buf, dma_len1)) dma_unmap_page(dev, dma_unmap_addr(tx_buf, dma_addr1), dma_unmap_len(tx_buf, dma_len1), DMA_TO_DEVICE); - tx_buf->flags = 0; + dma_unmap_len_set(tx_buf, dma_addr0, 0); + dma_unmap_len_set(tx_buf, dma_addr1, 0); if (tx_buf->skb && (tx_buf->skb != (struct sk_buff *)DMA_DUMMY_DESC)) dev_kfree_skb_any(tx_buf->skb); tx_buf->skb = NULL; @@ -559,6 +555,54 @@ static inline u32 fe_empty_txd(struct fe_tx_ring *ring) (ring->tx_ring_size - 1))); } +static int fe_tx_dma_map_page(struct device *dev, struct fe_tx_buf *tx_buf, + struct fe_tx_dma *txd, int idx, + struct page *page, size_t offset, size_t size) +{ + dma_addr_t mapped_addr; + + mapped_addr = dma_map_page(dev, page, offset, size, DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(dev, mapped_addr))) + return -EIO; + + if (idx & 1) { + txd->txd3 = mapped_addr; + txd->txd2 |= TX_DMA_PLEN1(size); + dma_unmap_addr_set(tx_buf, dma_addr1, mapped_addr); + dma_unmap_len_set(tx_buf, dma_len1, size); + } else { + tx_buf->skb = (struct sk_buff *)DMA_DUMMY_DESC; + txd->txd1 = mapped_addr; + txd->txd2 = TX_DMA_PLEN0(size); + dma_unmap_addr_set(tx_buf, dma_addr0, mapped_addr); + dma_unmap_len_set(tx_buf, dma_len0, size); + } + return 0; +} + +static int fe_tx_dma_map_skb(struct device *dev, struct fe_tx_buf *tx_buf, + struct fe_tx_dma *txd, int idx, + struct sk_buff *skb) +{ + struct page *page = virt_to_page(skb->data); + size_t offset = offset_in_page(skb->data); + size_t size = skb_headlen(skb); + + return fe_tx_dma_map_page(dev, tx_buf, txd, idx, page, offset, size); +} + +static inline struct sk_buff * +fe_next_frag(struct sk_buff *head, struct sk_buff *skb) +{ + if (skb != head) + return skb->next; + + if (skb_has_frag_list(skb)) + return skb_shinfo(skb)->frag_list; + + return NULL; +} + static int fe_tx_map_dma(struct sk_buff *skb, struct net_device *dev, int tx_num, struct fe_tx_ring *ring) { @@ -566,7 +610,7 @@ static int fe_tx_map_dma(struct sk_buff *skb, struct net_device *dev, struct skb_frag_struct *frag; struct fe_tx_dma txd, *ptxd; struct fe_tx_buf *tx_buf; - dma_addr_t mapped_addr; + struct sk_buff *head = skb; unsigned int nr_frags; u32 def_txd4; int i, j, k, frag_size, frag_map_size, offset; @@ -574,7 +618,6 @@ static int fe_tx_map_dma(struct sk_buff *skb, struct net_device *dev, tx_buf = &ring->tx_buf[ring->tx_next_idx]; memset(tx_buf, 0, sizeof(*tx_buf)); memset(&txd, 0, sizeof(txd)); - nr_frags = skb_shinfo(skb)->nr_frags; /* init tx descriptor */ if (priv->soc->tx_dma) @@ -613,82 +656,68 @@ static int fe_tx_map_dma(struct sk_buff *skb, struct net_device *dev, } } - mapped_addr = dma_map_single(&dev->dev, skb->data, - skb_headlen(skb), DMA_TO_DEVICE); - if (unlikely(dma_mapping_error(&dev->dev, mapped_addr))) - goto err_out; - txd.txd1 = mapped_addr; - txd.txd2 = TX_DMA_PLEN0(skb_headlen(skb)); + k = 0; + j = ring->tx_next_idx; - tx_buf->flags |= FE_TX_FLAGS_SINGLE0; - dma_unmap_addr_set(tx_buf, dma_addr0, mapped_addr); - dma_unmap_len_set(tx_buf, dma_len0, skb_headlen(skb)); +next_frag: + if (skb_headlen(skb)) { + if (fe_tx_dma_map_skb(&dev->dev, tx_buf, &txd, k++, skb)) + goto err_dma; + } /* TX SG offload */ - j = ring->tx_next_idx; - k = 0; + nr_frags = skb_shinfo(skb)->nr_frags; for (i = 0; i < nr_frags; i++) { - offset = 0; + struct page *page; + frag = &skb_shinfo(skb)->frags[i]; frag_size = skb_frag_size(frag); + offset = frag->page_offset; + page = skb_frag_page(frag); while (frag_size > 0) { frag_map_size = min(frag_size, TX_DMA_BUF_LEN); - mapped_addr = skb_frag_dma_map(&dev->dev, frag, offset, - frag_map_size, - DMA_TO_DEVICE); - if (unlikely(dma_mapping_error(&dev->dev, mapped_addr))) - goto err_dma; - - if (k & 0x1) { - j = NEXT_TX_DESP_IDX(j); - txd.txd1 = mapped_addr; - txd.txd2 = TX_DMA_PLEN0(frag_map_size); + if (!(k & 0x1)) { + fe_set_txd(&txd, &ring->tx_dma[j]); + memset(&txd, 0, sizeof(txd)); txd.txd4 = def_txd4; - + j = NEXT_TX_DESP_IDX(j); tx_buf = &ring->tx_buf[j]; - memset(tx_buf, 0, sizeof(*tx_buf)); - - tx_buf->flags |= FE_TX_FLAGS_PAGE0; - dma_unmap_addr_set(tx_buf, dma_addr0, - mapped_addr); - dma_unmap_len_set(tx_buf, dma_len0, - frag_map_size); - } else { - txd.txd3 = mapped_addr; - txd.txd2 |= TX_DMA_PLEN1(frag_map_size); - - tx_buf->skb = (struct sk_buff *)DMA_DUMMY_DESC; - tx_buf->flags |= FE_TX_FLAGS_PAGE1; - dma_unmap_addr_set(tx_buf, dma_addr1, - mapped_addr); - dma_unmap_len_set(tx_buf, dma_len1, - frag_map_size); - - if (!((i == (nr_frags - 1)) && - (frag_map_size == frag_size))) { - fe_set_txd(&txd, &ring->tx_dma[j]); - memset(&txd, 0, sizeof(txd)); - } } + + if (fe_tx_dma_map_page(&dev->dev, tx_buf, &txd, k++, + page, offset, frag_map_size)) + goto err_dma; + frag_size -= frag_map_size; offset += frag_map_size; - k++; } } + skb = fe_next_frag(head, skb); + if (skb) { + if (!(k & 0x1)) { + fe_set_txd(&txd, &ring->tx_dma[j]); + memset(&txd, 0, sizeof(txd)); + txd.txd4 = def_txd4; + j = NEXT_TX_DESP_IDX(j); + tx_buf = &ring->tx_buf[j]; + } + goto next_frag; + } + /* set last segment */ if (k & 0x1) - txd.txd2 |= TX_DMA_LS1; - else txd.txd2 |= TX_DMA_LS0; + else + txd.txd2 |= TX_DMA_LS1; fe_set_txd(&txd, &ring->tx_dma[j]); /* store skb to cleanup */ - tx_buf->skb = skb; + tx_buf->skb = head; - netdev_sent_queue(dev, skb->len); - skb_tx_timestamp(skb); + netdev_sent_queue(dev, head->len); + skb_tx_timestamp(head); ring->tx_next_idx = NEXT_TX_DESP_IDX(j); /* make sure that all changes to the dma ring are flushed before we @@ -702,7 +731,7 @@ static int fe_tx_map_dma(struct sk_buff *skb, struct net_device *dev, netif_wake_queue(dev); } - if (netif_xmit_stopped(netdev_get_tx_queue(dev, 0)) || !skb->xmit_more) + if (netif_xmit_stopped(netdev_get_tx_queue(dev, 0)) || !head->xmit_more) fe_reg_w32(ring->tx_next_idx, FE_REG_TX_CTX_IDX0); return 0; @@ -762,10 +791,12 @@ static inline int fe_skb_padto(struct sk_buff *skb, struct fe_priv *priv) static inline int fe_cal_txd_req(struct sk_buff *skb) { - int i, nfrags; + struct sk_buff *head = skb; + int i, nfrags = 0; struct skb_frag_struct *frag; - nfrags = 1; +next_frag: + nfrags++; if (skb_is_gso(skb)) { for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { frag = &skb_shinfo(skb)->frags[i]; @@ -775,6 +806,10 @@ static inline int fe_cal_txd_req(struct sk_buff *skb) nfrags += skb_shinfo(skb)->nr_frags; } + skb = fe_next_frag(head, skb); + if (skb) + goto next_frag; + return DIV_ROUND_UP(nfrags, 2); } diff --git a/target/linux/ramips/files-4.14/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/target/linux/ramips/files-4.14/drivers/net/ethernet/mediatek/mtk_eth_soc.h index dfaa5fd9ea..517d8ba4dc 100644 --- a/target/linux/ramips/files-4.14/drivers/net/ethernet/mediatek/mtk_eth_soc.h +++ b/target/linux/ramips/files-4.14/drivers/net/ethernet/mediatek/mtk_eth_soc.h @@ -435,19 +435,12 @@ struct fe_hw_stats { #undef _FE }; -enum fe_tx_flags { - FE_TX_FLAGS_SINGLE0 = 0x01, - FE_TX_FLAGS_PAGE0 = 0x02, - FE_TX_FLAGS_PAGE1 = 0x04, -}; - struct fe_tx_buf { struct sk_buff *skb; - u32 flags; DEFINE_DMA_UNMAP_ADDR(dma_addr0); - DEFINE_DMA_UNMAP_LEN(dma_len0); DEFINE_DMA_UNMAP_ADDR(dma_addr1); - DEFINE_DMA_UNMAP_LEN(dma_len1); + u16 dma_len0; + u16 dma_len1; }; struct fe_tx_ring { diff --git a/target/linux/ramips/files-4.14/drivers/net/ethernet/mediatek/soc_mt7621.c b/target/linux/ramips/files-4.14/drivers/net/ethernet/mediatek/soc_mt7621.c index 00dd45e01f..96d3909a48 100644 --- a/target/linux/ramips/files-4.14/drivers/net/ethernet/mediatek/soc_mt7621.c +++ b/target/linux/ramips/files-4.14/drivers/net/ethernet/mediatek/soc_mt7621.c @@ -143,7 +143,8 @@ static void mt7621_init_data(struct fe_soc_data *data, netdev->hw_features = NETIF_F_IP_CSUM | NETIF_F_RXCSUM | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_SG | NETIF_F_TSO | - NETIF_F_TSO6 | NETIF_F_IPV6_CSUM; + NETIF_F_TSO6 | NETIF_F_IPV6_CSUM | NETIF_F_FRAGLIST | + NETIF_F_TSO_MANGLEID; } static void mt7621_set_mac(struct fe_priv *priv, unsigned char *mac)