net/mlx5: WQ, fixes for fragmented WQ buffers API
authorTariq Toukan <tariqt@mellanox.com>
Tue, 21 Aug 2018 11:41:41 +0000 (14:41 +0300)
committerSaeed Mahameed <saeedm@mellanox.com>
Thu, 11 Oct 2018 01:26:16 +0000 (18:26 -0700)
mlx5e netdevice used to calculate fragment edges by a call to
mlx5_wq_cyc_get_frag_size(). This calculation did not give the correct
indication for queues smaller than a PAGE_SIZE, (broken by default on
PowerPC, where PAGE_SIZE == 64KB).  Here it is replaced by the correct new
calls/API.

Since (TX/RX) Work Queues buffers are fragmented, here we introduce
changes to the API in core driver, so that it gets a stride index and
returns the index of last stride on same fragment, and an additional
wrapping function that returns the number of physically contiguous
strides that can be written contiguously to the work queue.

This obsoletes the following API functions, and their buggy
usage in EN driver:
* mlx5_wq_cyc_get_frag_size()
* mlx5_wq_cyc_ctr2fragix()

The new API improves modularity and hides the details of such
calculation for mlx5e netdevice and mlx5_ib rdma drivers.

New calculation is also more efficient, and improves performance
as follows:

Packet rate test: pktgen, UDP / IPv4, 64byte, single ring, 8K ring size.

Before: 16,477,619 pps
After:  17,085,793 pps

3.7% improvement

Fixes: 3a2f70331226 ("net/mlx5: Use order-0 allocations for all WQ types")
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Reviewed-by: Eran Ben Elisha <eranbe@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h
drivers/net/ethernet/mellanox/mlx5/core/wq.c
drivers/net/ethernet/mellanox/mlx5/core/wq.h
include/linux/mlx5/driver.h

index 15d8ae28c040c17e50d37928adac65ead8311893..00172dee5339c42eebb6222cacd3ac9945073dd0 100644 (file)
@@ -432,10 +432,9 @@ static inline u16 mlx5e_icosq_wrap_cnt(struct mlx5e_icosq *sq)
 
 static inline void mlx5e_fill_icosq_frag_edge(struct mlx5e_icosq *sq,
                                              struct mlx5_wq_cyc *wq,
-                                             u16 pi, u16 frag_pi)
+                                             u16 pi, u16 nnops)
 {
        struct mlx5e_sq_wqe_info *edge_wi, *wi = &sq->db.ico_wqe[pi];
-       u8 nnops = mlx5_wq_cyc_get_frag_size(wq) - frag_pi;
 
        edge_wi = wi + nnops;
 
@@ -454,15 +453,14 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
        struct mlx5_wq_cyc *wq = &sq->wq;
        struct mlx5e_umr_wqe *umr_wqe;
        u16 xlt_offset = ix << (MLX5E_LOG_ALIGNED_MPWQE_PPW - 1);
-       u16 pi, frag_pi;
+       u16 pi, contig_wqebbs_room;
        int err;
        int i;
 
        pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
-       frag_pi = mlx5_wq_cyc_ctr2fragix(wq, sq->pc);
-
-       if (unlikely(frag_pi + MLX5E_UMR_WQEBBS > mlx5_wq_cyc_get_frag_size(wq))) {
-               mlx5e_fill_icosq_frag_edge(sq, wq, pi, frag_pi);
+       contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
+       if (unlikely(contig_wqebbs_room < MLX5E_UMR_WQEBBS)) {
+               mlx5e_fill_icosq_frag_edge(sq, wq, pi, contig_wqebbs_room);
                pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
        }
 
index ae73ea992845683358e3d4097ad5ce58a6abde6e..6dacaeba2fbff85e5091a1151f7ee731e70cf0cd 100644 (file)
@@ -290,10 +290,9 @@ dma_unmap_wqe_err:
 
 static inline void mlx5e_fill_sq_frag_edge(struct mlx5e_txqsq *sq,
                                           struct mlx5_wq_cyc *wq,
-                                          u16 pi, u16 frag_pi)
+                                          u16 pi, u16 nnops)
 {
        struct mlx5e_tx_wqe_info *edge_wi, *wi = &sq->db.wqe_info[pi];
-       u8 nnops = mlx5_wq_cyc_get_frag_size(wq) - frag_pi;
 
        edge_wi = wi + nnops;
 
@@ -348,8 +347,8 @@ netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
        struct mlx5e_tx_wqe_info *wi;
 
        struct mlx5e_sq_stats *stats = sq->stats;
+       u16 headlen, ihs, contig_wqebbs_room;
        u16 ds_cnt, ds_cnt_inl = 0;
-       u16 headlen, ihs, frag_pi;
        u8 num_wqebbs, opcode;
        u32 num_bytes;
        int num_dma;
@@ -386,9 +385,9 @@ netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
        }
 
        num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
-       frag_pi = mlx5_wq_cyc_ctr2fragix(wq, sq->pc);
-       if (unlikely(frag_pi + num_wqebbs > mlx5_wq_cyc_get_frag_size(wq))) {
-               mlx5e_fill_sq_frag_edge(sq, wq, pi, frag_pi);
+       contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
+       if (unlikely(contig_wqebbs_room < num_wqebbs)) {
+               mlx5e_fill_sq_frag_edge(sq, wq, pi, contig_wqebbs_room);
                mlx5e_sq_fetch_wqe(sq, &wqe, &pi);
        }
 
@@ -636,7 +635,7 @@ netdev_tx_t mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
        struct mlx5e_tx_wqe_info *wi;
 
        struct mlx5e_sq_stats *stats = sq->stats;
-       u16 headlen, ihs, pi, frag_pi;
+       u16 headlen, ihs, pi, contig_wqebbs_room;
        u16 ds_cnt, ds_cnt_inl = 0;
        u8 num_wqebbs, opcode;
        u32 num_bytes;
@@ -672,13 +671,14 @@ netdev_tx_t mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
        }
 
        num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
-       frag_pi = mlx5_wq_cyc_ctr2fragix(wq, sq->pc);
-       if (unlikely(frag_pi + num_wqebbs > mlx5_wq_cyc_get_frag_size(wq))) {
+       pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+       contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
+       if (unlikely(contig_wqebbs_room < num_wqebbs)) {
+               mlx5e_fill_sq_frag_edge(sq, wq, pi, contig_wqebbs_room);
                pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
-               mlx5e_fill_sq_frag_edge(sq, wq, pi, frag_pi);
        }
 
-       mlx5i_sq_fetch_wqe(sq, &wqe, &pi);
+       mlx5i_sq_fetch_wqe(sq, &wqe, pi);
 
        /* fill wqe */
        wi       = &sq->db.wqe_info[pi];
index 08eac92fc26cff8e8d1818d7b3f7dd76f356ca0c..0982c579ec740f0b8ecd467c2666bc678037b083 100644 (file)
@@ -109,12 +109,11 @@ struct mlx5i_tx_wqe {
 
 static inline void mlx5i_sq_fetch_wqe(struct mlx5e_txqsq *sq,
                                      struct mlx5i_tx_wqe **wqe,
-                                     u16 *pi)
+                                     u16 pi)
 {
        struct mlx5_wq_cyc *wq = &sq->wq;
 
-       *pi  = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
-       *wqe = mlx5_wq_cyc_get_wqe(wq, *pi);
+       *wqe = mlx5_wq_cyc_get_wqe(wq, pi);
        memset(*wqe, 0, sizeof(**wqe));
 }
 
index 68e7f8df2a6d310989a2b6ee9b2e6c488c3b17e0..ddca327e89505db22a029a53315ab21905f57d12 100644 (file)
@@ -39,11 +39,6 @@ u32 mlx5_wq_cyc_get_size(struct mlx5_wq_cyc *wq)
        return (u32)wq->fbc.sz_m1 + 1;
 }
 
-u16 mlx5_wq_cyc_get_frag_size(struct mlx5_wq_cyc *wq)
-{
-       return wq->fbc.frag_sz_m1 + 1;
-}
-
 u32 mlx5_cqwq_get_size(struct mlx5_cqwq *wq)
 {
        return wq->fbc.sz_m1 + 1;
index 3a1a170bb2d7f3244e7761a6acf6c1fb4a3534c3..b1293d153a587e7cd1c99820d1293d6c396136b0 100644 (file)
@@ -80,7 +80,6 @@ int mlx5_wq_cyc_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
                       void *wqc, struct mlx5_wq_cyc *wq,
                       struct mlx5_wq_ctrl *wq_ctrl);
 u32 mlx5_wq_cyc_get_size(struct mlx5_wq_cyc *wq);
-u16 mlx5_wq_cyc_get_frag_size(struct mlx5_wq_cyc *wq);
 
 int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
                      void *qpc, struct mlx5_wq_qp *wq,
@@ -140,11 +139,6 @@ static inline u16 mlx5_wq_cyc_ctr2ix(struct mlx5_wq_cyc *wq, u16 ctr)
        return ctr & wq->fbc.sz_m1;
 }
 
-static inline u16 mlx5_wq_cyc_ctr2fragix(struct mlx5_wq_cyc *wq, u16 ctr)
-{
-       return ctr & wq->fbc.frag_sz_m1;
-}
-
 static inline u16 mlx5_wq_cyc_get_head(struct mlx5_wq_cyc *wq)
 {
        return mlx5_wq_cyc_ctr2ix(wq, wq->wqe_ctr);
@@ -160,6 +154,11 @@ static inline void *mlx5_wq_cyc_get_wqe(struct mlx5_wq_cyc *wq, u16 ix)
        return mlx5_frag_buf_get_wqe(&wq->fbc, ix);
 }
 
+static inline u16 mlx5_wq_cyc_get_contig_wqebbs(struct mlx5_wq_cyc *wq, u16 ix)
+{
+       return mlx5_frag_buf_get_idx_last_contig_stride(&wq->fbc, ix) - ix + 1;
+}
+
 static inline int mlx5_wq_cyc_cc_bigger(u16 cc1, u16 cc2)
 {
        int equal   = (cc1 == cc2);
index 66d94b4557cf789906455683e4e69e40750f3b2c..88a041b73abfcc6a6b3aca7dd0fad0e3897f2075 100644 (file)
@@ -1032,6 +1032,14 @@ static inline void *mlx5_frag_buf_get_wqe(struct mlx5_frag_buf_ctrl *fbc,
                ((fbc->frag_sz_m1 & ix) << fbc->log_stride);
 }
 
+static inline u32
+mlx5_frag_buf_get_idx_last_contig_stride(struct mlx5_frag_buf_ctrl *fbc, u32 ix)
+{
+       u32 last_frag_stride_idx = (ix + fbc->strides_offset) | fbc->frag_sz_m1;
+
+       return min_t(u32, last_frag_stride_idx - fbc->strides_offset, fbc->sz_m1);
+}
+
 int mlx5_cmd_init(struct mlx5_core_dev *dev);
 void mlx5_cmd_cleanup(struct mlx5_core_dev *dev);
 void mlx5_cmd_use_events(struct mlx5_core_dev *dev);