net/mlx5e: RX, Make sure packet header does not cross page boundary
authorTariq Toukan <tariqt@mellanox.com>
Tue, 25 Sep 2018 09:05:22 +0000 (12:05 +0300)
committerSaeed Mahameed <saeedm@mellanox.com>
Fri, 25 Jan 2019 20:16:13 +0000 (12:16 -0800)
In the non-linear SKB memory scheme of Striding RQ, a packet header
could cross page boundary. This requires special care in fast path
that costs LoC, additional runtime instructions and branches.

It could happen when the header (up to 256B) does not fit in
a single stride. Avoid this by working with a stride size that fits
the maximum possible header. Stride is increased form 64B to 256B.

Performance:
Tested packet rate for UDP streams, single ring, on ConnectX-5.

Configuration:
Set Striding RQ and LRO ON (to enabled the non-linear SKB scheme).
GRO OFF, early drop by TC rule.

64B: 4x worse memory utilization, no page-crossers headers
- No degradation (5,887,305 pps).
- The reduction in memory utilization is compensated by the saving of
  branches tests.

192B: 1.33x worse memory utilization, avoid page-crossers headers
- Before: 5,727,252. After: 5,777,037. ~1% gain.

256B: Same memory util, no page-crossers
- Before: 5,691,885. After: 5,748,007. ~1% gain.

Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
drivers/net/ethernet/mellanox/mlx5/core/en.h
drivers/net/ethernet/mellanox/mlx5/core/en_main.c
drivers/net/ethernet/mellanox/mlx5/core/en_rx.c

index 8fa8fdd30b8509f73a27fe4d31b094dfceda5e5d..6def2c972bf0b179dc4d04de709d8ed43d21e59c 100644 (file)
@@ -76,15 +76,14 @@ struct page_pool;
 #define MLX5_SKB_FRAG_SZ(len)  (SKB_DATA_ALIGN(len) +  \
                                 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
 
+#define MLX5E_RX_MAX_HEAD (256)
+
 #define MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(mdev) \
        (6 + MLX5_CAP_GEN(mdev, cache_line_128byte)) /* HW restriction */
 #define MLX5_MPWRQ_LOG_STRIDE_SZ(mdev, req) \
        max_t(u32, MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(mdev), req)
-#define MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev)       MLX5_MPWRQ_LOG_STRIDE_SZ(mdev, 6)
-#define MLX5_MPWRQ_CQE_CMPRS_LOG_STRIDE_SZ(mdev) MLX5_MPWRQ_LOG_STRIDE_SZ(mdev, 8)
-#define MLX5E_MPWQE_STRIDE_SZ(mdev, cqe_cmprs) \
-       (cqe_cmprs ? MLX5_MPWRQ_CQE_CMPRS_LOG_STRIDE_SZ(mdev) : \
-       MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev))
+#define MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev) \
+       MLX5_MPWRQ_LOG_STRIDE_SZ(mdev, order_base_2(MLX5E_RX_MAX_HEAD))
 
 #define MLX5_MPWRQ_LOG_WQE_SZ                  18
 #define MLX5_MPWRQ_WQE_PAGE_ORDER  (MLX5_MPWRQ_LOG_WQE_SZ - PAGE_SHIFT > 0 ? \
@@ -119,8 +118,6 @@ struct page_pool;
 
 #define MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW            0x2
 
-#define MLX5E_RX_MAX_HEAD (256)
-
 #define MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ                 (64 * 1024)
 #define MLX5E_DEFAULT_LRO_TIMEOUT                       32
 #define MLX5E_LRO_TIMEOUT_ARR_SIZE                      4
index 8cfd2ec7c0a209afe424eca2a7be3301cf79223f..1bf547a2b905c0a814b1bfce7b7be7364c873ec6 100644 (file)
@@ -171,8 +171,7 @@ static u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev,
        if (mlx5e_rx_mpwqe_is_linear_skb(mdev, params))
                return order_base_2(mlx5e_rx_get_linear_frag_sz(params));
 
-       return MLX5E_MPWQE_STRIDE_SZ(mdev,
-               MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS));
+       return MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev);
 }
 
 static u8 mlx5e_mpwqe_get_log_num_strides(struct mlx5_core_dev *mdev,
index f86e4804e83e2a0f1ce3ab57129db61cfe52ff2f..c02532254411238040cbe1d115432787fe30d19f 100644 (file)
@@ -369,7 +369,7 @@ mlx5e_add_skb_frag(struct mlx5e_rq *rq, struct sk_buff *skb,
 static inline void
 mlx5e_copy_skb_header(struct device *pdev, struct sk_buff *skb,
                      struct mlx5e_dma_info *dma_info,
-                     int offset_from, int offset_to, u32 headlen)
+                     int offset_from, u32 headlen)
 {
        const void *from = page_address(dma_info->page) + offset_from;
        /* Aligning len to sizeof(long) optimizes memcpy performance */
@@ -377,24 +377,7 @@ mlx5e_copy_skb_header(struct device *pdev, struct sk_buff *skb,
 
        dma_sync_single_for_cpu(pdev, dma_info->addr + offset_from, len,
                                DMA_FROM_DEVICE);
-       skb_copy_to_linear_data_offset(skb, offset_to, from, len);
-}
-
-static inline void
-mlx5e_copy_skb_header_mpwqe(struct device *pdev,
-                           struct sk_buff *skb,
-                           struct mlx5e_dma_info *dma_info,
-                           u32 offset, u32 headlen)
-{
-       u16 headlen_pg = min_t(u32, headlen, PAGE_SIZE - offset);
-
-       mlx5e_copy_skb_header(pdev, skb, dma_info, offset, 0, headlen_pg);
-
-       if (unlikely(offset + headlen > PAGE_SIZE)) {
-               dma_info++;
-               mlx5e_copy_skb_header(pdev, skb, dma_info, 0, headlen_pg,
-                                     headlen - headlen_pg);
-       }
+       skb_copy_to_linear_data(skb, from, len);
 }
 
 static void
@@ -973,8 +956,7 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
        }
 
        /* copy header */
-       mlx5e_copy_skb_header(rq->pdev, skb, head_wi->di, head_wi->offset,
-                             0, headlen);
+       mlx5e_copy_skb_header(rq->pdev, skb, head_wi->di, head_wi->offset, headlen);
        /* skb linear part was allocated with headlen and aligned to long */
        skb->tail += headlen;
        skb->len  += headlen;
@@ -1096,8 +1078,7 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w
                di++;
        }
        /* copy header */
-       mlx5e_copy_skb_header_mpwqe(rq->pdev, skb, head_di,
-                                   head_offset, headlen);
+       mlx5e_copy_skb_header(rq->pdev, skb, head_di, head_offset, headlen);
        /* skb linear part was allocated with headlen and aligned to long */
        skb->tail += headlen;
        skb->len  += headlen;