Switch to native NAPI polling, as this reduces overhead and complexity.
Normal path is faster, since one cmpxchg() is not anymore requested,
and busy polling with the NAPI polling has same performance.
Tested:
lpk50:~# cat /proc/sys/net/core/busy_read
70
lpk50:~# nstat >/dev/null;./netperf -H lpk55 -t TCP_RR;nstat
MIGRATED TCP REQUEST/RESPONSE TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to lpk55.prod.google.com () port 0 AF_INET : first burst 0
Local /Remote
Socket Size Request Resp. Elapsed Trans.
Send Recv Size Size Time Rate
bytes Bytes bytes bytes secs. per sec
16384 87380 1 1 10.00 40095.07
16384 87380
IpInReceives 401062 0.0
IpInDelivers 401062 0.0
IpOutRequests 401079 0.0
TcpActiveOpens 7 0.0
TcpPassiveOpens 3 0.0
TcpAttemptFails 3 0.0
TcpEstabResets 5 0.0
TcpInSegs 401036 0.0
TcpOutSegs 401052 0.0
TcpOutRsts 38 0.0
UdpInDatagrams 26 0.0
UdpOutDatagrams 27 0.0
Ip6OutNoRoutes 1 0.0
TcpExtDelayedACKs 1 0.0
TcpExtTCPPrequeued 98 0.0
TcpExtTCPDirectCopyFromPrequeue 98 0.0
TcpExtTCPHPHits 4 0.0
TcpExtTCPHPHitsToUser 98 0.0
TcpExtTCPPureAcks 5 0.0
TcpExtTCPHPAcks 101 0.0
TcpExtTCPAbortOnData 6 0.0
TcpExtBusyPollRxPackets 400832 0.0
TcpExtTCPOrigDataSent 400983 0.0
IpExtInOctets
21273867 0.0
IpExtOutOctets
21261254 0.0
IpExtInNoECTPkts 401064 0.0
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
struct napi_struct napi;
-#ifdef CONFIG_NET_RX_BUSY_POLL
- unsigned long busy_poll_state;
-#endif
-
union host_hc_status_block status_blk;
/* chip independent shortcuts into sb structure */
__le16 *sb_index_values;
#define bnx2x_fp_stats(bp, fp) (&((bp)->fp_stats[(fp)->index]))
#define bnx2x_fp_qstats(bp, fp) (&((bp)->fp_stats[(fp)->index].eth_q_stats))
-#ifdef CONFIG_NET_RX_BUSY_POLL
-
-enum bnx2x_fp_state {
- BNX2X_STATE_FP_NAPI = BIT(0), /* NAPI handler owns the queue */
-
- BNX2X_STATE_FP_NAPI_REQ_BIT = 1, /* NAPI would like to own the queue */
- BNX2X_STATE_FP_NAPI_REQ = BIT(1),
-
- BNX2X_STATE_FP_POLL_BIT = 2,
- BNX2X_STATE_FP_POLL = BIT(2), /* busy_poll owns the queue */
-
- BNX2X_STATE_FP_DISABLE_BIT = 3, /* queue is dismantled */
-};
-
-static inline void bnx2x_fp_busy_poll_init(struct bnx2x_fastpath *fp)
-{
- WRITE_ONCE(fp->busy_poll_state, 0);
-}
-
-/* called from the device poll routine to get ownership of a FP */
-static inline bool bnx2x_fp_lock_napi(struct bnx2x_fastpath *fp)
-{
- unsigned long prev, old = READ_ONCE(fp->busy_poll_state);
-
- while (1) {
- switch (old) {
- case BNX2X_STATE_FP_POLL:
- /* make sure bnx2x_fp_lock_poll() wont starve us */
- set_bit(BNX2X_STATE_FP_NAPI_REQ_BIT,
- &fp->busy_poll_state);
- /* fallthrough */
- case BNX2X_STATE_FP_POLL | BNX2X_STATE_FP_NAPI_REQ:
- return false;
- default:
- break;
- }
- prev = cmpxchg(&fp->busy_poll_state, old, BNX2X_STATE_FP_NAPI);
- if (unlikely(prev != old)) {
- old = prev;
- continue;
- }
- return true;
- }
-}
-
-static inline void bnx2x_fp_unlock_napi(struct bnx2x_fastpath *fp)
-{
- smp_wmb();
- fp->busy_poll_state = 0;
-}
-
-/* called from bnx2x_low_latency_poll() */
-static inline bool bnx2x_fp_lock_poll(struct bnx2x_fastpath *fp)
-{
- return cmpxchg(&fp->busy_poll_state, 0, BNX2X_STATE_FP_POLL) == 0;
-}
-
-static inline void bnx2x_fp_unlock_poll(struct bnx2x_fastpath *fp)
-{
- smp_mb__before_atomic();
- clear_bit(BNX2X_STATE_FP_POLL_BIT, &fp->busy_poll_state);
-}
-
-/* true if a socket is polling */
-static inline bool bnx2x_fp_ll_polling(struct bnx2x_fastpath *fp)
-{
- return READ_ONCE(fp->busy_poll_state) & BNX2X_STATE_FP_POLL;
-}
-
-/* false if fp is currently owned */
-static inline bool bnx2x_fp_ll_disable(struct bnx2x_fastpath *fp)
-{
- set_bit(BNX2X_STATE_FP_DISABLE_BIT, &fp->busy_poll_state);
- return !bnx2x_fp_ll_polling(fp);
-
-}
-#else
-static inline void bnx2x_fp_busy_poll_init(struct bnx2x_fastpath *fp)
-{
-}
-
-static inline bool bnx2x_fp_lock_napi(struct bnx2x_fastpath *fp)
-{
- return true;
-}
-
-static inline void bnx2x_fp_unlock_napi(struct bnx2x_fastpath *fp)
-{
-}
-
-static inline bool bnx2x_fp_lock_poll(struct bnx2x_fastpath *fp)
-{
- return false;
-}
-
-static inline void bnx2x_fp_unlock_poll(struct bnx2x_fastpath *fp)
-{
-}
-
-static inline bool bnx2x_fp_ll_polling(struct bnx2x_fastpath *fp)
-{
- return false;
-}
-static inline bool bnx2x_fp_ll_disable(struct bnx2x_fastpath *fp)
-{
- return true;
-}
-#endif /* CONFIG_NET_RX_BUSY_POLL */
-
/* Use 2500 as a mini-jumbo MTU for FCoE */
#define BNX2X_FCOE_MINI_JUMBO_MTU 2500
skb_mark_napi_id(skb, &fp->napi);
- if (bnx2x_fp_ll_polling(fp))
- netif_receive_skb(skb);
- else
- napi_gro_receive(&fp->napi, skb);
+ napi_gro_receive(&fp->napi, skb);
next_rx:
rx_buf->data = NULL;
int i;
for_each_rx_queue_cnic(bp, i) {
- bnx2x_fp_busy_poll_init(&bp->fp[i]);
napi_enable(&bnx2x_fp(bp, i, napi));
}
}
int i;
for_each_eth_queue(bp, i) {
- bnx2x_fp_busy_poll_init(&bp->fp[i]);
napi_enable(&bnx2x_fp(bp, i, napi));
}
}
for_each_rx_queue_cnic(bp, i) {
napi_disable(&bnx2x_fp(bp, i, napi));
- while (!bnx2x_fp_ll_disable(&bp->fp[i]))
- usleep_range(1000, 2000);
}
}
for_each_eth_queue(bp, i) {
napi_disable(&bnx2x_fp(bp, i, napi));
- while (!bnx2x_fp_ll_disable(&bp->fp[i]))
- usleep_range(1000, 2000);
}
}
return 0;
}
#endif
- if (!bnx2x_fp_lock_napi(fp))
- return budget;
-
for_each_cos_in_tx_queue(fp, cos)
if (bnx2x_tx_queue_has_work(fp->txdata_ptr[cos]))
bnx2x_tx_int(bp, fp->txdata_ptr[cos]);
work_done += bnx2x_rx_int(fp, budget - work_done);
/* must not complete if we consumed full budget */
- if (work_done >= budget) {
- bnx2x_fp_unlock_napi(fp);
+ if (work_done >= budget)
break;
- }
}
- bnx2x_fp_unlock_napi(fp);
-
/* Fall out from the NAPI loop if needed */
if (!(bnx2x_has_rx_work(fp) || bnx2x_has_tx_work(fp))) {
return work_done;
}
-#ifdef CONFIG_NET_RX_BUSY_POLL
-/* must be called with local_bh_disable()d */
-int bnx2x_low_latency_recv(struct napi_struct *napi)
-{
- struct bnx2x_fastpath *fp = container_of(napi, struct bnx2x_fastpath,
- napi);
- struct bnx2x *bp = fp->bp;
- int found = 0;
-
- if ((bp->state == BNX2X_STATE_CLOSED) ||
- (bp->state == BNX2X_STATE_ERROR) ||
- (bp->dev->features & (NETIF_F_LRO | NETIF_F_GRO)))
- return LL_FLUSH_FAILED;
-
- if (!bnx2x_fp_lock_poll(fp))
- return LL_FLUSH_BUSY;
-
- if (bnx2x_has_rx_work(fp))
- found = bnx2x_rx_int(fp, 4);
-
- bnx2x_fp_unlock_poll(fp);
-
- return found;
-}
-#endif
-
/* we split the first BD into headers and data BDs
* to ease the pain of our fellow microcode engineers
* we use one mapping for both BDs
*/
int bnx2x_enable_msi(struct bnx2x *bp);
-/**
- * bnx2x_low_latency_recv - LL callback
- *
- * @napi: napi structure
- */
-int bnx2x_low_latency_recv(struct napi_struct *napi);
-
/**
* bnx2x_alloc_mem_bp - allocate memories outsize main driver structure
*
.ndo_fcoe_get_wwn = bnx2x_fcoe_get_wwn,
#endif
-#ifdef CONFIG_NET_RX_BUSY_POLL
- .ndo_busy_poll = bnx2x_low_latency_recv,
-#endif
.ndo_get_phys_port_id = bnx2x_get_phys_port_id,
.ndo_set_vf_link_state = bnx2x_set_vf_link_state,
.ndo_features_check = bnx2x_features_check,