From: Jesse Brandeburg Date: Tue, 16 Apr 2019 17:24:34 +0000 (-0700) Subject: ice: Reorganize tx_buf and ring structs X-Git-Url: http://git.lede-project.org./?a=commitdiff_plain;h=65124bbf980c6256e3385e7d002a5db102d30d3a;p=openwrt%2Fstaging%2Fblogic.git ice: Reorganize tx_buf and ring structs Use more efficient structure ordering by using the pahole tool and a lot of code inspection to get hot cache lines to have packed data (no holes if possible) and adjacent warm data. ice_ring prior to this change: /* size: 192, cachelines: 3, members: 23 */ /* sum members: 158, holes: 4, sum holes: 12 */ /* padding: 22 */ ice_ring after this change: /* size: 192, cachelines: 3, members: 25 */ /* sum members: 162, holes: 1, sum holes: 1 */ /* padding: 29 */ ice_tx_buf prior to this change: /* size: 48, cachelines: 1, members: 7 */ /* sum members: 38, holes: 2, sum holes: 6 */ /* padding: 4 */ /* last cacheline: 48 bytes */ ice_tx_buf after this change: /* size: 40, cachelines: 1, members: 7 */ /* sum members: 38, holes: 1, sum holes: 2 */ /* last cacheline: 40 bytes */ Signed-off-by: Jesse Brandeburg Signed-off-by: Anirudh Venkataramanan Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h index 66e05032ee56..4cff52ffefe0 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -58,19 +58,19 @@ struct ice_tx_buf { unsigned int bytecount; unsigned short gso_segs; u32 tx_flags; - DEFINE_DMA_UNMAP_ADDR(dma); DEFINE_DMA_UNMAP_LEN(len); + DEFINE_DMA_UNMAP_ADDR(dma); }; struct ice_tx_offload_params { - u8 header_len; + u64 cd_qw1; + struct ice_ring *tx_ring; u32 td_cmd; u32 td_offset; u32 td_l2tag1; - u16 cd_l2tag2; u32 cd_tunnel_params; - u64 cd_qw1; - struct ice_ring *tx_ring; + u16 cd_l2tag2; + u8 header_len; }; struct ice_rx_buf { @@ -150,6 +150,7 @@ enum ice_rx_dtype { /* descriptor ring, associated with a VSI */ struct ice_ring { + /* CL1 - 1st cacheline starts here */ struct ice_ring *next; /* pointer to next ring in q_vector */ void *desc; /* Descriptor ring memory */ struct device *dev; /* Used for DMA mapping */ @@ -161,11 +162,11 @@ struct ice_ring { struct ice_tx_buf *tx_buf; struct ice_rx_buf *rx_buf; }; + /* CL2 - 2nd cacheline starts here */ u16 q_index; /* Queue number of ring */ - u32 txq_teid; /* Added Tx queue TEID */ -#ifdef CONFIG_DCB - u8 dcb_tc; /* Traffic class of ring */ -#endif /* CONFIG_DCB */ + u16 q_handle; /* Queue handle per TC */ + + u8 ring_active; /* is ring online or not */ u16 count; /* Number of descriptors */ u16 reg_idx; /* HW register index of the ring */ @@ -173,8 +174,7 @@ struct ice_ring { /* used in interrupt processing */ u16 next_to_use; u16 next_to_clean; - - u8 ring_active; /* is ring online or not */ + u16 next_to_alloc; /* stats structs */ struct ice_q_stats stats; @@ -184,10 +184,17 @@ struct ice_ring { struct ice_rxq_stats rx_stats; }; - unsigned int size; /* length of descriptor ring in bytes */ - dma_addr_t dma; /* physical address of ring */ struct rcu_head rcu; /* to avoid race on free */ - u16 next_to_alloc; + /* CLX - the below items are only accessed infrequently and should be + * in their own cache line if possible + */ + dma_addr_t dma; /* physical address of ring */ + unsigned int size; /* length of descriptor ring in bytes */ + u32 txq_teid; /* Added Tx queue TEID */ + u16 rx_buf_len; +#ifdef CONFIG_DCB + u8 dcb_tc; /* Traffic class of ring */ +#endif /* CONFIG_DCB */ } ____cacheline_internodealigned_in_smp; struct ice_ring_container {