--- /dev/null
+From 7f3eb2174512fe6c9c0f062e96eccb0d3cc6d5cd Mon Sep 17 00:00:00 2001
+From: Christian Marangi <ansuelsmth@gmail.com>
+Date: Wed, 18 Oct 2023 14:35:47 +0200
+Subject: [PATCH] net: introduce napi_is_scheduled helper
+
+We currently have napi_if_scheduled_mark_missed that can be used to
+check if napi is scheduled but that does more thing than simply checking
+it and return a bool. Some driver already implement custom function to
+check if napi is scheduled.
+
+Drop these custom function and introduce napi_is_scheduled that simply
+check if napi is scheduled atomically.
+
+Update any driver and code that implement a similar check and instead
+use this new helper.
+
+Signed-off-by: Christian Marangi <ansuelsmth@gmail.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+---
+ drivers/net/ethernet/chelsio/cxgb3/sge.c | 8 --------
+ drivers/net/wireless/realtek/rtw89/core.c | 2 +-
+ include/linux/netdevice.h | 23 +++++++++++++++++++++++
+ net/core/dev.c | 2 +-
+ 4 files changed, 25 insertions(+), 10 deletions(-)
+
+--- a/drivers/net/ethernet/chelsio/cxgb3/sge.c
++++ b/drivers/net/ethernet/chelsio/cxgb3/sge.c
+@@ -2507,14 +2507,6 @@ static int napi_rx_handler(struct napi_s
+ return work_done;
+ }
+
+-/*
+- * Returns true if the device is already scheduled for polling.
+- */
+-static inline int napi_is_scheduled(struct napi_struct *napi)
+-{
+- return test_bit(NAPI_STATE_SCHED, &napi->state);
+-}
+-
+ /**
+ * process_pure_responses - process pure responses from a response queue
+ * @adap: the adapter
+--- a/drivers/net/wireless/realtek/rtw89/core.c
++++ b/drivers/net/wireless/realtek/rtw89/core.c
+@@ -1479,7 +1479,7 @@ static void rtw89_core_rx_to_mac80211(st
+ struct napi_struct *napi = &rtwdev->napi;
+
+ /* In low power mode, napi isn't scheduled. Receive it to netif. */
+- if (unlikely(!test_bit(NAPI_STATE_SCHED, &napi->state)))
++ if (unlikely(!napi_is_scheduled(napi)))
+ napi = NULL;
+
+ rtw89_core_hw_to_sband_rate(rx_status);
+--- a/include/linux/netdevice.h
++++ b/include/linux/netdevice.h
+@@ -468,6 +468,29 @@ static inline bool napi_prefer_busy_poll
+ return test_bit(NAPI_STATE_PREFER_BUSY_POLL, &n->state);
+ }
+
++/**
++ * napi_is_scheduled - test if NAPI is scheduled
++ * @n: NAPI context
++ *
++ * This check is "best-effort". With no locking implemented,
++ * a NAPI can be scheduled or terminate right after this check
++ * and produce not precise results.
++ *
++ * NAPI_STATE_SCHED is an internal state, napi_is_scheduled
++ * should not be used normally and napi_schedule should be
++ * used instead.
++ *
++ * Use only if the driver really needs to check if a NAPI
++ * is scheduled for example in the context of delayed timer
++ * that can be skipped if a NAPI is already scheduled.
++ *
++ * Return True if NAPI is scheduled, False otherwise.
++ */
++static inline bool napi_is_scheduled(struct napi_struct *n)
++{
++ return test_bit(NAPI_STATE_SCHED, &n->state);
++}
++
+ bool napi_schedule_prep(struct napi_struct *n);
+
+ /**
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -6533,7 +6533,7 @@ static int __napi_poll(struct napi_struc
+ * accidentally calling ->poll() when NAPI is not scheduled.
+ */
+ work = 0;
+- if (test_bit(NAPI_STATE_SCHED, &n->state)) {
++ if (napi_is_scheduled(n)) {
+ work = n->poll(n, weight);
+ trace_napi_poll(n, work, weight);
+ }
--- /dev/null
+From 2d1a42cf7f77cda54dbbee18d00b1200e7bc22aa Mon Sep 17 00:00:00 2001
+From: Christian Marangi <ansuelsmth@gmail.com>
+Date: Wed, 18 Oct 2023 14:35:48 +0200
+Subject: [PATCH 1/3] net: stmmac: improve TX timer arm logic
+
+There is currently a problem with the TX timer getting armed multiple
+unnecessary times causing big performance regression on some device that
+suffer from heavy handling of hrtimer rearm.
+
+The use of the TX timer is an old implementation that predates the napi
+implementation and the interrupt enable/disable handling.
+
+Due to stmmac being a very old code, the TX timer was never evaluated
+again with this new implementation and was kept there causing
+performance regression. The performance regression started to appear
+with kernel version 4.19 with 8fce33317023 ("net: stmmac: Rework coalesce
+timer and fix multi-queue races") where the timer was reduced to 1ms
+causing it to be armed 40 times more than before.
+
+Decreasing the timer made the problem more present and caused the
+regression in the other of 600-700mbps on some device (regression where
+this was notice is ipq806x).
+
+The problem is in the fact that handling the hrtimer on some target is
+expensive and recent kernel made the timer armed much more times.
+A solution that was proposed was reverting the hrtimer change and use
+mod_timer but such solution would still hide the real problem in the
+current implementation.
+
+To fix the regression, apply some additional logic and skip arming the
+timer when not needed.
+
+Arm the timer ONLY if a napi is not already scheduled. Running the timer
+is redundant since the same function (stmmac_tx_clean) will run in the
+napi TX poll. Also try to cancel any timer if a napi is scheduled to
+prevent redundant run of TX call.
+
+With the following new logic the original performance are restored while
+keeping using the hrtimer.
+
+Signed-off-by: Christian Marangi <ansuelsmth@gmail.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+---
+ .../net/ethernet/stmicro/stmmac/stmmac_main.c | 18 +++++++++++++++---
+ 1 file changed, 15 insertions(+), 3 deletions(-)
+
+--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
++++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+@@ -2975,13 +2975,25 @@ static void stmmac_tx_timer_arm(struct s
+ {
+ struct stmmac_tx_queue *tx_q = &priv->dma_conf.tx_queue[queue];
+ u32 tx_coal_timer = priv->tx_coal_timer[queue];
++ struct stmmac_channel *ch;
++ struct napi_struct *napi;
+
+ if (!tx_coal_timer)
+ return;
+
+- hrtimer_start(&tx_q->txtimer,
+- STMMAC_COAL_TIMER(tx_coal_timer),
+- HRTIMER_MODE_REL);
++ ch = &priv->channel[tx_q->queue_index];
++ napi = tx_q->xsk_pool ? &ch->rxtx_napi : &ch->tx_napi;
++
++ /* Arm timer only if napi is not already scheduled.
++ * Try to cancel any timer if napi is scheduled, timer will be armed
++ * again in the next scheduled napi.
++ */
++ if (unlikely(!napi_is_scheduled(napi)))
++ hrtimer_start(&tx_q->txtimer,
++ STMMAC_COAL_TIMER(tx_coal_timer),
++ HRTIMER_MODE_REL);
++ else
++ hrtimer_try_to_cancel(&tx_q->txtimer);
+ }
+
+ /**
--- /dev/null
+From a594166387fe08e6f5a32130c400249a35b298f9 Mon Sep 17 00:00:00 2001
+From: Christian Marangi <ansuelsmth@gmail.com>
+Date: Wed, 18 Oct 2023 14:35:49 +0200
+Subject: [PATCH 2/3] net: stmmac: move TX timer arm after DMA enable
+
+Move TX timer arm call after DMA interrupt is enabled again.
+
+The TX timer arm function changed logic and now is skipped if a napi is
+already scheduled. By moving the TX timer arm call after DMA is enabled,
+we permit to correctly skip if a DMA interrupt has been fired and a napi
+has been scheduled again.
+
+Signed-off-by: Christian Marangi <ansuelsmth@gmail.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+---
+ .../net/ethernet/stmicro/stmmac/stmmac_main.c | 22 +++++++++++++++----
+ 1 file changed, 18 insertions(+), 4 deletions(-)
+
+--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
++++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+@@ -2528,9 +2528,13 @@ static void stmmac_bump_dma_threshold(st
+ * @priv: driver private structure
+ * @budget: napi budget limiting this functions packet handling
+ * @queue: TX queue index
++ * @pending_packets: signal to arm the TX coal timer
+ * Description: it reclaims the transmit resources after transmission completes.
++ * If some packets still needs to be handled, due to TX coalesce, set
++ * pending_packets to true to make NAPI arm the TX coal timer.
+ */
+-static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue)
++static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue,
++ bool *pending_packets)
+ {
+ struct stmmac_tx_queue *tx_q = &priv->dma_conf.tx_queue[queue];
+ unsigned int bytes_compl = 0, pkts_compl = 0;
+@@ -2693,7 +2697,7 @@ static int stmmac_tx_clean(struct stmmac
+
+ /* We still have pending packets, let's call for a new scheduling */
+ if (tx_q->dirty_tx != tx_q->cur_tx)
+- stmmac_tx_timer_arm(priv, queue);
++ *pending_packets = true;
+
+ __netif_tx_unlock_bh(netdev_get_tx_queue(priv->dev, queue));
+
+@@ -5485,12 +5489,13 @@ static int stmmac_napi_poll_tx(struct na
+ struct stmmac_channel *ch =
+ container_of(napi, struct stmmac_channel, tx_napi);
+ struct stmmac_priv *priv = ch->priv_data;
++ bool pending_packets = false;
+ u32 chan = ch->index;
+ int work_done;
+
+ priv->xstats.napi_poll++;
+
+- work_done = stmmac_tx_clean(priv, budget, chan);
++ work_done = stmmac_tx_clean(priv, budget, chan, &pending_packets);
+ work_done = min(work_done, budget);
+
+ if (work_done < budget && napi_complete_done(napi, work_done)) {
+@@ -5501,6 +5506,10 @@ static int stmmac_napi_poll_tx(struct na
+ spin_unlock_irqrestore(&ch->lock, flags);
+ }
+
++ /* TX still have packet to handle, check if we need to arm tx timer */
++ if (pending_packets)
++ stmmac_tx_timer_arm(priv, chan);
++
+ return work_done;
+ }
+
+@@ -5509,12 +5518,13 @@ static int stmmac_napi_poll_rxtx(struct
+ struct stmmac_channel *ch =
+ container_of(napi, struct stmmac_channel, rxtx_napi);
+ struct stmmac_priv *priv = ch->priv_data;
++ bool tx_pending_packets = false;
+ int rx_done, tx_done, rxtx_done;
+ u32 chan = ch->index;
+
+ priv->xstats.napi_poll++;
+
+- tx_done = stmmac_tx_clean(priv, budget, chan);
++ tx_done = stmmac_tx_clean(priv, budget, chan, &tx_pending_packets);
+ tx_done = min(tx_done, budget);
+
+ rx_done = stmmac_rx_zc(priv, budget, chan);
+@@ -5539,6 +5549,10 @@ static int stmmac_napi_poll_rxtx(struct
+ spin_unlock_irqrestore(&ch->lock, flags);
+ }
+
++ /* TX still have packet to handle, check if we need to arm tx timer */
++ if (tx_pending_packets)
++ stmmac_tx_timer_arm(priv, chan);
++
+ return min(rxtx_done, budget - 1);
+ }
+
--- /dev/null
+From 039550960a2235cfe2dfaa773df9f98f8da31a0c Mon Sep 17 00:00:00 2001
+From: Christian Marangi <ansuelsmth@gmail.com>
+Date: Wed, 18 Oct 2023 14:35:50 +0200
+Subject: [PATCH 3/3] net: stmmac: increase TX coalesce timer to 5ms
+
+Commit 8fce33317023 ("net: stmmac: Rework coalesce timer and fix
+multi-queue races") decreased the TX coalesce timer from 40ms to 1ms.
+
+This caused some performance regression on some target (regression was
+reported at least on ipq806x) in the order of 600mbps dropping from
+gigabit handling to only 200mbps.
+
+The problem was identified in the TX timer getting armed too much time.
+While this was fixed and improved in another commit, performance can be
+improved even further by increasing the timer delay a bit moving from
+1ms to 5ms.
+
+The value is a good balance between battery saving by prevending too
+much interrupt to be generated and permitting good performance for
+internet oriented devices.
+
+Signed-off-by: Christian Marangi <ansuelsmth@gmail.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+---
+ drivers/net/ethernet/stmicro/stmmac/common.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/stmicro/stmmac/common.h
++++ b/drivers/net/ethernet/stmicro/stmmac/common.h
+@@ -286,7 +286,7 @@ struct stmmac_safety_stats {
+ #define MIN_DMA_RIWT 0x10
+ #define DEF_DMA_RIWT 0xa0
+ /* Tx coalesce parameters */
+-#define STMMAC_COAL_TX_TIMER 1000
++#define STMMAC_COAL_TX_TIMER 5000
+ #define STMMAC_MAX_COAL_TX_TICK 100000
+ #define STMMAC_TX_MAX_FRAMES 256
+ #define STMMAC_TX_FRAMES 25
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
-@@ -1668,7 +1668,6 @@ struct net_device_ops {
+@@ -1691,7 +1691,6 @@ struct net_device_ops {
* @IFF_FAILOVER: device is a failover master device
* @IFF_FAILOVER_SLAVE: device is lower dev of a failover master device
* @IFF_L3MDEV_RX_HANDLER: only invoke the rx handler of L3 master device
* @IFF_TX_SKB_NO_LINEAR: device/driver is capable of xmitting frames with
* skb_headlen(skb) == 0 (data starts from frag0)
* @IFF_CHANGE_PROTO_DOWN: device supports setting carrier via IFLA_PROTO_DOWN
-@@ -1704,7 +1703,7 @@ enum netdev_priv_flags {
+@@ -1727,7 +1726,7 @@ enum netdev_priv_flags {
IFF_FAILOVER = 1<<27,
IFF_FAILOVER_SLAVE = 1<<28,
IFF_L3MDEV_RX_HANDLER = 1<<29,
IFF_TX_SKB_NO_LINEAR = BIT_ULL(31),
IFF_CHANGE_PROTO_DOWN = BIT_ULL(32),
};
-@@ -1739,7 +1738,6 @@ enum netdev_priv_flags {
+@@ -1762,7 +1761,6 @@ enum netdev_priv_flags {
#define IFF_FAILOVER IFF_FAILOVER
#define IFF_FAILOVER_SLAVE IFF_FAILOVER_SLAVE
#define IFF_L3MDEV_RX_HANDLER IFF_L3MDEV_RX_HANDLER
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
-@@ -1707,6 +1707,7 @@ enum netdev_priv_flags {
+@@ -1730,6 +1730,7 @@ enum netdev_priv_flags {
/* was IFF_LIVE_RENAME_OK */
IFF_TX_SKB_NO_LINEAR = BIT_ULL(31),
IFF_CHANGE_PROTO_DOWN = BIT_ULL(32),
};
#define IFF_802_1Q_VLAN IFF_802_1Q_VLAN
-@@ -1740,6 +1741,7 @@ enum netdev_priv_flags {
+@@ -1763,6 +1764,7 @@ enum netdev_priv_flags {
#define IFF_FAILOVER_SLAVE IFF_FAILOVER_SLAVE
#define IFF_L3MDEV_RX_HANDLER IFF_L3MDEV_RX_HANDLER
#define IFF_TX_SKB_NO_LINEAR IFF_TX_SKB_NO_LINEAR
/* Specifies the type of the struct net_device::ml_priv pointer */
enum netdev_ml_priv_type {
-@@ -2108,6 +2110,11 @@ struct net_device {
+@@ -2131,6 +2133,11 @@ struct net_device {
const struct tlsdev_ops *tlsdev_ops;
#endif
const struct header_ops *header_ops;
unsigned char operstate;
-@@ -2183,6 +2190,10 @@ struct net_device {
+@@ -2206,6 +2213,10 @@ struct net_device {
struct mctp_dev __rcu *mctp_ptr;
#endif
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
-@@ -2134,6 +2134,8 @@ struct net_device {
+@@ -2157,6 +2157,8 @@ struct net_device {
struct netdev_hw_addr_list mc;
struct netdev_hw_addr_list dev_addrs;
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
-@@ -2169,7 +2169,7 @@ struct net_device {
+@@ -2192,7 +2192,7 @@ struct net_device {
#if IS_ENABLED(CONFIG_AX25)
void *ax25_ptr;
#endif
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
-@@ -520,6 +520,7 @@ static inline bool napi_complete(struct
+@@ -543,6 +543,7 @@ static inline bool napi_complete(struct
}
int dev_set_threaded(struct net_device *dev, bool threaded);
/**
* napi_disable - prevent NAPI from scheduling
-@@ -3129,6 +3130,7 @@ struct softnet_data {
+@@ -3152,6 +3153,7 @@ struct softnet_data {
unsigned int processed;
unsigned int time_squeeze;
unsigned int received_rps;
+++ /dev/null
-From b5532bdc6d09e6e789417f0c7a0b665b57b0e7be Mon Sep 17 00:00:00 2001
-From: Christian Marangi <ansuelsmth@gmail.com>
-Date: Mon, 18 Sep 2023 14:21:56 +0200
-Subject: [PATCH 1/4] net: introduce napi_is_scheduled helper
-
-We currently have napi_if_scheduled_mark_missed that can be used to
-check if napi is scheduled but that does more thing than simply checking
-it and return a bool. Some driver already implement custom function to
-check if napi is scheduled.
-
-Drop these custom function and introduce napi_is_scheduled that simply
-check if napi is scheduled atomically.
-
-Update any driver and code that implement a similar check and instead
-use this new helper.
-
-Signed-off-by: Christian Marangi <ansuelsmth@gmail.com>
----
- drivers/net/ethernet/chelsio/cxgb3/sge.c | 8 --------
- drivers/net/wireless/realtek/rtw89/core.c | 2 +-
- include/linux/netdevice.h | 5 +++++
- net/core/dev.c | 2 +-
- 4 files changed, 7 insertions(+), 10 deletions(-)
-
---- a/drivers/net/ethernet/chelsio/cxgb3/sge.c
-+++ b/drivers/net/ethernet/chelsio/cxgb3/sge.c
-@@ -2507,14 +2507,6 @@ static int napi_rx_handler(struct napi_s
- return work_done;
- }
-
--/*
-- * Returns true if the device is already scheduled for polling.
-- */
--static inline int napi_is_scheduled(struct napi_struct *napi)
--{
-- return test_bit(NAPI_STATE_SCHED, &napi->state);
--}
--
- /**
- * process_pure_responses - process pure responses from a response queue
- * @adap: the adapter
---- a/drivers/net/wireless/realtek/rtw89/core.c
-+++ b/drivers/net/wireless/realtek/rtw89/core.c
-@@ -1479,7 +1479,7 @@ static void rtw89_core_rx_to_mac80211(st
- struct napi_struct *napi = &rtwdev->napi;
-
- /* In low power mode, napi isn't scheduled. Receive it to netif. */
-- if (unlikely(!test_bit(NAPI_STATE_SCHED, &napi->state)))
-+ if (unlikely(!napi_is_scheduled(napi)))
- napi = NULL;
-
- rtw89_core_hw_to_sband_rate(rx_status);
---- a/include/linux/netdevice.h
-+++ b/include/linux/netdevice.h
-@@ -468,6 +468,11 @@ static inline bool napi_prefer_busy_poll
- return test_bit(NAPI_STATE_PREFER_BUSY_POLL, &n->state);
- }
-
-+static inline bool napi_is_scheduled(struct napi_struct *n)
-+{
-+ return test_bit(NAPI_STATE_SCHED, &n->state);
-+}
-+
- bool napi_schedule_prep(struct napi_struct *n);
-
- /**
---- a/net/core/dev.c
-+++ b/net/core/dev.c
-@@ -6594,7 +6594,7 @@ static int __napi_poll(struct napi_struc
- * accidentally calling ->poll() when NAPI is not scheduled.
- */
- work = 0;
-- if (test_bit(NAPI_STATE_SCHED, &n->state)) {
-+ if (napi_is_scheduled(n)) {
- work = n->poll(n, weight);
- trace_napi_poll(n, work, weight);
- }
+++ /dev/null
-From fb04db35447d1e8ff557c8e57139164cecab7de5 Mon Sep 17 00:00:00 2001
-From: Christian Marangi <ansuelsmth@gmail.com>
-Date: Wed, 27 Sep 2023 15:38:31 +0200
-Subject: [PATCH 2/4] net: stmmac: move TX timer arm after DMA enable
-
-Move TX timer arm call after DMA interrupt is enabled again.
-
-The TX timer arm function changed logic and now is skipped if a napi is
-already scheduled. By moving the TX timer arm call after DMA is enabled,
-we permit to correctly skip if a DMA interrupt has been fired and a napi
-has been scheduled again.
-
-Signed-off-by: Christian Marangi <ansuelsmth@gmail.com>
----
- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 19 +++++++++++++++----
- 1 file changed, 15 insertions(+), 4 deletions(-)
-
---- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
-+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
-@@ -2530,7 +2530,8 @@ static void stmmac_bump_dma_threshold(st
- * @queue: TX queue index
- * Description: it reclaims the transmit resources after transmission completes.
- */
--static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue)
-+static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue,
-+ bool *pending_packets)
- {
- struct stmmac_tx_queue *tx_q = &priv->dma_conf.tx_queue[queue];
- unsigned int bytes_compl = 0, pkts_compl = 0;
-@@ -2693,7 +2694,7 @@ static int stmmac_tx_clean(struct stmmac
-
- /* We still have pending packets, let's call for a new scheduling */
- if (tx_q->dirty_tx != tx_q->cur_tx)
-- stmmac_tx_timer_arm(priv, queue);
-+ *pending_packets = true;
-
- __netif_tx_unlock_bh(netdev_get_tx_queue(priv->dev, queue));
-
-@@ -5473,12 +5474,13 @@ static int stmmac_napi_poll_tx(struct na
- struct stmmac_channel *ch =
- container_of(napi, struct stmmac_channel, tx_napi);
- struct stmmac_priv *priv = ch->priv_data;
-+ bool pending_packets = false;
- u32 chan = ch->index;
- int work_done;
-
- priv->xstats.napi_poll++;
-
-- work_done = stmmac_tx_clean(priv, budget, chan);
-+ work_done = stmmac_tx_clean(priv, budget, chan, &pending_packets);
- work_done = min(work_done, budget);
-
- if (work_done < budget && napi_complete_done(napi, work_done)) {
-@@ -5489,6 +5491,10 @@ static int stmmac_napi_poll_tx(struct na
- spin_unlock_irqrestore(&ch->lock, flags);
- }
-
-+ /* TX still have packet to handle, check if we need to arm tx timer */
-+ if (pending_packets)
-+ stmmac_tx_timer_arm(priv, chan);
-+
- return work_done;
- }
-
-@@ -5498,11 +5504,12 @@ static int stmmac_napi_poll_rxtx(struct
- container_of(napi, struct stmmac_channel, rxtx_napi);
- struct stmmac_priv *priv = ch->priv_data;
- int rx_done, tx_done, rxtx_done;
-+ bool tx_pending_packets = false;
- u32 chan = ch->index;
-
- priv->xstats.napi_poll++;
-
-- tx_done = stmmac_tx_clean(priv, budget, chan);
-+ tx_done = stmmac_tx_clean(priv, budget, chan, &tx_pending_packets);
- tx_done = min(tx_done, budget);
-
- rx_done = stmmac_rx_zc(priv, budget, chan);
-@@ -5527,6 +5534,10 @@ static int stmmac_napi_poll_rxtx(struct
- spin_unlock_irqrestore(&ch->lock, flags);
- }
-
-+ /* TX still have packet to handle, check if we need to arm tx timer */
-+ if (tx_pending_packets)
-+ stmmac_tx_timer_arm(priv, chan);
-+
- return min(rxtx_done, budget - 1);
- }
-
+++ /dev/null
-From cd40cd8b1ca4a6f531c6c3fd78b306e5014f9c04 Mon Sep 17 00:00:00 2001
-From: Christian Marangi <ansuelsmth@gmail.com>
-Date: Mon, 18 Sep 2023 14:39:01 +0200
-Subject: [PATCH 3/4] net: stmmac: improve TX timer arm logic
-
-There is currently a problem with the TX timer getting armed multiple
-unnecessary times causing big performance regression on some device that
-suffer from heavy handling of hrtimer rearm.
-
-The use of the TX timer is an old implementation that predates the napi
-implementation and the interrupt enable/disable handling.
-
-Due to stmmac being a very old code, the TX timer was never evaluated
-again with this new implementation and was kept there causing
-performance regression. The performance regression started to appear
-with kernel version 4.19 with 8fce33317023 ("net: stmmac: Rework coalesce
-timer and fix multi-queue races") where the timer was reduced to 1ms
-causing it to be armed 40 times more than before.
-
-Decreasing the timer made the problem more present and caused the
-regression in the other of 600-700mbps on some device (regression where
-this was notice is ipq806x).
-
-The problem is in the fact that handling the hrtimer on some target is
-expensive and recent kernel made the timer armed much more times.
-A solution that was proposed was reverting the hrtimer change and use
-mod_timer but such solution would still hide the real problem in the
-current implementation.
-
-To fix the regression, apply some additional logic and skip arming the
-timer when not needed.
-
-Arm the timer ONLY if a napi is not already scheduled. Running the timer
-is redundant since the same function (stmmac_tx_clean) will run in the
-napi TX poll. Also try to cancel any timer if a napi is scheduled to
-prevent redundant run of TX call.
-
-With the following new logic the original performance are restored while
-keeping using the hrtimer.
-
-Signed-off-by: Christian Marangi <ansuelsmth@gmail.com>
----
- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 18 +++++++++++++++---
- 1 file changed, 15 insertions(+), 3 deletions(-)
-
---- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
-+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
-@@ -2976,13 +2976,25 @@ static void stmmac_tx_timer_arm(struct s
- {
- struct stmmac_tx_queue *tx_q = &priv->dma_conf.tx_queue[queue];
- u32 tx_coal_timer = priv->tx_coal_timer[queue];
-+ struct stmmac_channel *ch;
-+ struct napi_struct *napi;
-
- if (!tx_coal_timer)
- return;
-
-- hrtimer_start(&tx_q->txtimer,
-- STMMAC_COAL_TIMER(tx_coal_timer),
-- HRTIMER_MODE_REL);
-+ ch = &priv->channel[tx_q->queue_index];
-+ napi = tx_q->xsk_pool ? &ch->rxtx_napi : &ch->tx_napi;
-+
-+ /* Arm timer only if napi is not already scheduled.
-+ * Try to cancel any timer if napi is scheduled, timer will be armed
-+ * again in the next scheduled napi.
-+ */
-+ if (unlikely(!napi_is_scheduled(napi)))
-+ hrtimer_start(&tx_q->txtimer,
-+ STMMAC_COAL_TIMER(tx_coal_timer),
-+ HRTIMER_MODE_REL);
-+ else
-+ hrtimer_try_to_cancel(&tx_q->txtimer);
- }
-
- /**
+++ /dev/null
-From 95281ab33fbaa1e974bceb20cfb0f5c92934f2b3 Mon Sep 17 00:00:00 2001
-From: Christian Marangi <ansuelsmth@gmail.com>
-Date: Mon, 18 Sep 2023 15:11:13 +0200
-Subject: [PATCH 4/4] net: stmmac: increase TX coalesce timer to 5ms
-
-Commit 8fce33317023 ("net: stmmac: Rework coalesce timer and fix
-multi-queue races") decreased the TX coalesce timer from 40ms to 1ms.
-
-This caused some performance regression on some target (regression was
-reported at least on ipq806x) in the order of 600mbps dropping from
-gigabit handling to only 200mbps.
-
-The problem was identified in the TX timer getting armed too much time.
-While this was fixed and improved in another commit, performance can be
-improved even further by increasing the timer delay a bit moving from
-1ms to 5ms.
-
-The value is a good balance between battery saving by prevending too
-much interrupt to be generated and permitting good performance for
-internet oriented devices.
-
-Signed-off-by: Christian Marangi <ansuelsmth@gmail.com>
----
- drivers/net/ethernet/stmicro/stmmac/common.h | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
---- a/drivers/net/ethernet/stmicro/stmmac/common.h
-+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
-@@ -286,7 +286,7 @@ struct stmmac_safety_stats {
- #define MIN_DMA_RIWT 0x10
- #define DEF_DMA_RIWT 0xa0
- /* Tx coalesce parameters */
--#define STMMAC_COAL_TX_TIMER 1000
-+#define STMMAC_COAL_TX_TIMER 5000
- #define STMMAC_MAX_COAL_TX_TICK 100000
- #define STMMAC_TX_MAX_FRAMES 256
- #define STMMAC_TX_FRAMES 25