airoha: an7581: backport patch to support ETS and HTB sched
authorChristian Marangi <ansuelsmth@gmail.com>
Fri, 10 Jan 2025 13:22:31 +0000 (14:22 +0100)
committerChristian Marangi <ansuelsmth@gmail.com>
Fri, 10 Jan 2025 13:22:31 +0000 (14:22 +0100)
Backport patch to support ETS and HTB scheduler for airoha ethernet
driver.

Signed-off-by: Christian Marangi <ansuelsmth@gmail.com>
target/linux/airoha/patches-6.6/038-01-v6.14-net-airoha-Enable-Tx-drop-capability-for-each-Tx-DMA.patch [new file with mode: 0644]
target/linux/airoha/patches-6.6/038-02-v6.14-net-airoha-Introduce-ndo_select_queue-callback.patch [new file with mode: 0644]
target/linux/airoha/patches-6.6/038-03-v6.14-net-airoha-Add-sched-ETS-offload-support.patch [new file with mode: 0644]
target/linux/airoha/patches-6.6/038-04-v6.14-net-airoha-Add-sched-HTB-offload-support.patch [new file with mode: 0644]

diff --git a/target/linux/airoha/patches-6.6/038-01-v6.14-net-airoha-Enable-Tx-drop-capability-for-each-Tx-DMA.patch b/target/linux/airoha/patches-6.6/038-01-v6.14-net-airoha-Enable-Tx-drop-capability-for-each-Tx-DMA.patch
new file mode 100644 (file)
index 0000000..c8681aa
--- /dev/null
@@ -0,0 +1,27 @@
+From 5f795590380476f1c9b7ed0ac945c9b0269dc23a Mon Sep 17 00:00:00 2001
+From: Lorenzo Bianconi <lorenzo@kernel.org>
+Date: Fri, 3 Jan 2025 13:17:02 +0100
+Subject: [PATCH 1/4] net: airoha: Enable Tx drop capability for each Tx DMA
+ ring
+
+This is a preliminary patch in order to enable hw Qdisc offloading.
+
+Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+---
+ drivers/net/ethernet/mediatek/airoha_eth.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/drivers/net/ethernet/mediatek/airoha_eth.c
++++ b/drivers/net/ethernet/mediatek/airoha_eth.c
+@@ -1790,6 +1790,10 @@ static int airoha_qdma_init_tx_queue(str
+               WRITE_ONCE(q->desc[i].ctrl, cpu_to_le32(val));
+       }
++      /* xmit ring drop default setting */
++      airoha_qdma_set(qdma, REG_TX_RING_BLOCKING(qid),
++                      TX_RING_IRQ_BLOCKING_TX_DROP_EN_MASK);
++
+       airoha_qdma_wr(qdma, REG_TX_RING_BASE(qid), dma_addr);
+       airoha_qdma_rmw(qdma, REG_TX_CPU_IDX(qid), TX_RING_CPU_IDX_MASK,
+                       FIELD_PREP(TX_RING_CPU_IDX_MASK, q->head));
diff --git a/target/linux/airoha/patches-6.6/038-02-v6.14-net-airoha-Introduce-ndo_select_queue-callback.patch b/target/linux/airoha/patches-6.6/038-02-v6.14-net-airoha-Introduce-ndo_select_queue-callback.patch
new file mode 100644 (file)
index 0000000..75743bd
--- /dev/null
@@ -0,0 +1,86 @@
+From 2b288b81560b94958cd68bbe54673e55a1730c95 Mon Sep 17 00:00:00 2001
+From: Lorenzo Bianconi <lorenzo@kernel.org>
+Date: Fri, 3 Jan 2025 13:17:03 +0100
+Subject: [PATCH 2/4] net: airoha: Introduce ndo_select_queue callback
+
+Airoha EN7581 SoC supports 32 Tx DMA rings used to feed packets to QoS
+channels. Each channels supports 8 QoS queues where the user can apply
+QoS scheduling policies. In a similar way, the user can configure hw
+rate shaping for each QoS channel.
+Introduce ndo_select_queue callback in order to select the tx queue
+based on QoS channel and QoS queue. In particular, for dsa device select
+QoS channel according to the dsa user port index, rely on port id
+otherwise. Select QoS queue based on the skb priority.
+
+Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+---
+ drivers/net/ethernet/mediatek/airoha_eth.c | 30 ++++++++++++++++++++--
+ 1 file changed, 28 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/ethernet/mediatek/airoha_eth.c
++++ b/drivers/net/ethernet/mediatek/airoha_eth.c
+@@ -23,6 +23,8 @@
+ #define AIROHA_MAX_NUM_XSI_RSTS               5
+ #define AIROHA_MAX_MTU                        2000
+ #define AIROHA_MAX_PACKET_SIZE                2048
++#define AIROHA_NUM_QOS_CHANNELS               4
++#define AIROHA_NUM_QOS_QUEUES         8
+ #define AIROHA_NUM_TX_RING            32
+ #define AIROHA_NUM_RX_RING            32
+ #define AIROHA_FE_MC_MAX_VLAN_TABLE   64
+@@ -2422,21 +2424,44 @@ static void airoha_dev_get_stats64(struc
+       } while (u64_stats_fetch_retry(&port->stats.syncp, start));
+ }
++static u16 airoha_dev_select_queue(struct net_device *dev, struct sk_buff *skb,
++                                 struct net_device *sb_dev)
++{
++      struct airoha_gdm_port *port = netdev_priv(dev);
++      int queue, channel;
++
++      /* For dsa device select QoS channel according to the dsa user port
++       * index, rely on port id otherwise. Select QoS queue based on the
++       * skb priority.
++       */
++      channel = netdev_uses_dsa(dev) ? skb_get_queue_mapping(skb) : port->id;
++      channel = channel % AIROHA_NUM_QOS_CHANNELS;
++      queue = (skb->priority - 1) % AIROHA_NUM_QOS_QUEUES; /* QoS queue */
++      queue = channel * AIROHA_NUM_QOS_QUEUES + queue;
++
++      return queue < dev->num_tx_queues ? queue : 0;
++}
++
+ static netdev_tx_t airoha_dev_xmit(struct sk_buff *skb,
+                                  struct net_device *dev)
+ {
+       struct skb_shared_info *sinfo = skb_shinfo(skb);
+       struct airoha_gdm_port *port = netdev_priv(dev);
+-      u32 msg0 = 0, msg1, len = skb_headlen(skb);
+-      int i, qid = skb_get_queue_mapping(skb);
++      u32 msg0, msg1, len = skb_headlen(skb);
+       struct airoha_qdma *qdma = port->qdma;
+       u32 nr_frags = 1 + sinfo->nr_frags;
+       struct netdev_queue *txq;
+       struct airoha_queue *q;
+       void *data = skb->data;
++      int i, qid;
+       u16 index;
+       u8 fport;
++      qid = skb_get_queue_mapping(skb) % ARRAY_SIZE(qdma->q_tx);
++      msg0 = FIELD_PREP(QDMA_ETH_TXMSG_CHAN_MASK,
++                        qid / AIROHA_NUM_QOS_QUEUES) |
++             FIELD_PREP(QDMA_ETH_TXMSG_QUEUE_MASK,
++                        qid % AIROHA_NUM_QOS_QUEUES);
+       if (skb->ip_summed == CHECKSUM_PARTIAL)
+               msg0 |= FIELD_PREP(QDMA_ETH_TXMSG_TCO_MASK, 1) |
+                       FIELD_PREP(QDMA_ETH_TXMSG_UCO_MASK, 1) |
+@@ -2610,6 +2635,7 @@ static const struct net_device_ops airoh
+       .ndo_init               = airoha_dev_init,
+       .ndo_open               = airoha_dev_open,
+       .ndo_stop               = airoha_dev_stop,
++      .ndo_select_queue       = airoha_dev_select_queue,
+       .ndo_start_xmit         = airoha_dev_xmit,
+       .ndo_get_stats64        = airoha_dev_get_stats64,
+       .ndo_set_mac_address    = airoha_dev_set_macaddr,
diff --git a/target/linux/airoha/patches-6.6/038-03-v6.14-net-airoha-Add-sched-ETS-offload-support.patch b/target/linux/airoha/patches-6.6/038-03-v6.14-net-airoha-Add-sched-ETS-offload-support.patch
new file mode 100644 (file)
index 0000000..ad5e0e5
--- /dev/null
@@ -0,0 +1,292 @@
+From 20bf7d07c956e5c7a22d3076c599cbb7a6054917 Mon Sep 17 00:00:00 2001
+From: Lorenzo Bianconi <lorenzo@kernel.org>
+Date: Fri, 3 Jan 2025 13:17:04 +0100
+Subject: [PATCH 3/4] net: airoha: Add sched ETS offload support
+
+Introduce support for ETS Qdisc offload available on the Airoha EN7581
+ethernet controller. In order to be effective, ETS Qdisc must configured
+as leaf of a HTB Qdisc (HTB Qdisc offload will be added in the following
+patch). ETS Qdisc available on EN7581 ethernet controller supports at
+most 8 concurrent bands (QoS queues). We can enable an ETS Qdisc for
+each available QoS channel.
+
+Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+---
+ drivers/net/ethernet/mediatek/airoha_eth.c | 196 ++++++++++++++++++++-
+ 1 file changed, 195 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/mediatek/airoha_eth.c
++++ b/drivers/net/ethernet/mediatek/airoha_eth.c
+@@ -15,6 +15,7 @@
+ #include <linux/u64_stats_sync.h>
+ #include <net/dsa.h>
+ #include <net/page_pool/helpers.h>
++#include <net/pkt_cls.h>
+ #include <uapi/linux/ppp_defs.h>
+ #define AIROHA_MAX_NUM_GDM_PORTS      1
+@@ -543,9 +544,24 @@
+ #define INGRESS_SLOW_TICK_RATIO_MASK  GENMASK(29, 16)
+ #define INGRESS_FAST_TICK_MASK                GENMASK(15, 0)
++#define REG_QUEUE_CLOSE_CFG(_n)               (0x00a0 + ((_n) & 0xfc))
++#define TXQ_DISABLE_CHAN_QUEUE_MASK(_n, _m)   BIT((_m) + (((_n) & 0x3) << 3))
++
+ #define REG_TXQ_DIS_CFG_BASE(_n)      ((_n) ? 0x20a0 : 0x00a0)
+ #define REG_TXQ_DIS_CFG(_n, _m)               (REG_TXQ_DIS_CFG_BASE((_n)) + (_m) << 2)
++#define REG_CNTR_CFG(_n)              (0x0400 + ((_n) << 3))
++#define CNTR_EN_MASK                  BIT(31)
++#define CNTR_ALL_CHAN_EN_MASK         BIT(30)
++#define CNTR_ALL_QUEUE_EN_MASK                BIT(29)
++#define CNTR_ALL_DSCP_RING_EN_MASK    BIT(28)
++#define CNTR_SRC_MASK                 GENMASK(27, 24)
++#define CNTR_DSCP_RING_MASK           GENMASK(20, 16)
++#define CNTR_CHAN_MASK                        GENMASK(7, 3)
++#define CNTR_QUEUE_MASK                       GENMASK(2, 0)
++
++#define REG_CNTR_VAL(_n)              (0x0404 + ((_n) << 3))
++
+ #define REG_LMGR_INIT_CFG             0x1000
+ #define LMGR_INIT_START                       BIT(31)
+ #define LMGR_SRAM_MODE_MASK           BIT(30)
+@@ -571,9 +587,19 @@
+ #define TWRR_WEIGHT_SCALE_MASK                BIT(31)
+ #define TWRR_WEIGHT_BASE_MASK         BIT(3)
++#define REG_TXWRR_WEIGHT_CFG          0x1024
++#define TWRR_RW_CMD_MASK              BIT(31)
++#define TWRR_RW_CMD_DONE              BIT(30)
++#define TWRR_CHAN_IDX_MASK            GENMASK(23, 19)
++#define TWRR_QUEUE_IDX_MASK           GENMASK(18, 16)
++#define TWRR_VALUE_MASK                       GENMASK(15, 0)
++
+ #define REG_PSE_BUF_USAGE_CFG         0x1028
+ #define PSE_BUF_ESTIMATE_EN_MASK      BIT(29)
++#define REG_CHAN_QOS_MODE(_n)         (0x1040 + ((_n) << 2))
++#define CHAN_QOS_MODE_MASK(_n)                GENMASK(2 + ((_n) << 2), (_n) << 2)
++
+ #define REG_GLB_TRTCM_CFG             0x1080
+ #define GLB_TRTCM_EN_MASK             BIT(31)
+ #define GLB_TRTCM_MODE_MASK           BIT(30)
+@@ -722,6 +748,17 @@ enum {
+       FE_PSE_PORT_DROP = 0xf,
+ };
++enum tx_sched_mode {
++      TC_SCH_WRR8,
++      TC_SCH_SP,
++      TC_SCH_WRR7,
++      TC_SCH_WRR6,
++      TC_SCH_WRR5,
++      TC_SCH_WRR4,
++      TC_SCH_WRR3,
++      TC_SCH_WRR2,
++};
++
+ struct airoha_queue_entry {
+       union {
+               void *buf;
+@@ -812,6 +849,10 @@ struct airoha_gdm_port {
+       int id;
+       struct airoha_hw_stats stats;
++
++      /* qos stats counters */
++      u64 cpu_tx_packets;
++      u64 fwd_tx_packets;
+ };
+ struct airoha_eth {
+@@ -1962,6 +2003,27 @@ static void airoha_qdma_init_qos(struct
+                       FIELD_PREP(SLA_SLOW_TICK_RATIO_MASK, 40));
+ }
++static void airoha_qdma_init_qos_stats(struct airoha_qdma *qdma)
++{
++      int i;
++
++      for (i = 0; i < AIROHA_NUM_QOS_CHANNELS; i++) {
++              /* Tx-cpu transferred count */
++              airoha_qdma_wr(qdma, REG_CNTR_VAL(i << 1), 0);
++              airoha_qdma_wr(qdma, REG_CNTR_CFG(i << 1),
++                             CNTR_EN_MASK | CNTR_ALL_QUEUE_EN_MASK |
++                             CNTR_ALL_DSCP_RING_EN_MASK |
++                             FIELD_PREP(CNTR_CHAN_MASK, i));
++              /* Tx-fwd transferred count */
++              airoha_qdma_wr(qdma, REG_CNTR_VAL((i << 1) + 1), 0);
++              airoha_qdma_wr(qdma, REG_CNTR_CFG(i << 1),
++                             CNTR_EN_MASK | CNTR_ALL_QUEUE_EN_MASK |
++                             CNTR_ALL_DSCP_RING_EN_MASK |
++                             FIELD_PREP(CNTR_SRC_MASK, 1) |
++                             FIELD_PREP(CNTR_CHAN_MASK, i));
++      }
++}
++
+ static int airoha_qdma_hw_init(struct airoha_qdma *qdma)
+ {
+       int i;
+@@ -2012,6 +2074,7 @@ static int airoha_qdma_hw_init(struct ai
+       airoha_qdma_set(qdma, REG_TXQ_CNGST_CFG,
+                       TXQ_CNGST_DROP_EN | TXQ_CNGST_DEI_DROP_EN);
++      airoha_qdma_init_qos_stats(qdma);
+       return 0;
+ }
+@@ -2631,6 +2694,135 @@ airoha_ethtool_get_rmon_stats(struct net
+       } while (u64_stats_fetch_retry(&port->stats.syncp, start));
+ }
++static int airoha_qdma_set_chan_tx_sched(struct airoha_gdm_port *port,
++                                       int channel, enum tx_sched_mode mode,
++                                       const u16 *weights, u8 n_weights)
++{
++      int i;
++
++      for (i = 0; i < AIROHA_NUM_TX_RING; i++)
++              airoha_qdma_clear(port->qdma, REG_QUEUE_CLOSE_CFG(channel),
++                                TXQ_DISABLE_CHAN_QUEUE_MASK(channel, i));
++
++      for (i = 0; i < n_weights; i++) {
++              u32 status;
++              int err;
++
++              airoha_qdma_wr(port->qdma, REG_TXWRR_WEIGHT_CFG,
++                             TWRR_RW_CMD_MASK |
++                             FIELD_PREP(TWRR_CHAN_IDX_MASK, channel) |
++                             FIELD_PREP(TWRR_QUEUE_IDX_MASK, i) |
++                             FIELD_PREP(TWRR_VALUE_MASK, weights[i]));
++              err = read_poll_timeout(airoha_qdma_rr, status,
++                                      status & TWRR_RW_CMD_DONE,
++                                      USEC_PER_MSEC, 10 * USEC_PER_MSEC,
++                                      true, port->qdma,
++                                      REG_TXWRR_WEIGHT_CFG);
++              if (err)
++                      return err;
++      }
++
++      airoha_qdma_rmw(port->qdma, REG_CHAN_QOS_MODE(channel >> 3),
++                      CHAN_QOS_MODE_MASK(channel),
++                      mode << __ffs(CHAN_QOS_MODE_MASK(channel)));
++
++      return 0;
++}
++
++static int airoha_qdma_set_tx_prio_sched(struct airoha_gdm_port *port,
++                                       int channel)
++{
++      static const u16 w[AIROHA_NUM_QOS_QUEUES] = {};
++
++      return airoha_qdma_set_chan_tx_sched(port, channel, TC_SCH_SP, w,
++                                           ARRAY_SIZE(w));
++}
++
++static int airoha_qdma_set_tx_ets_sched(struct airoha_gdm_port *port,
++                                      int channel,
++                                      struct tc_ets_qopt_offload *opt)
++{
++      struct tc_ets_qopt_offload_replace_params *p = &opt->replace_params;
++      enum tx_sched_mode mode = TC_SCH_SP;
++      u16 w[AIROHA_NUM_QOS_QUEUES] = {};
++      int i, nstrict = 0;
++
++      if (p->bands > AIROHA_NUM_QOS_QUEUES)
++              return -EINVAL;
++
++      for (i = 0; i < p->bands; i++) {
++              if (!p->quanta[i])
++                      nstrict++;
++      }
++
++      /* this configuration is not supported by the hw */
++      if (nstrict == AIROHA_NUM_QOS_QUEUES - 1)
++              return -EINVAL;
++
++      for (i = 0; i < p->bands - nstrict; i++)
++              w[i] = p->weights[nstrict + i];
++
++      if (!nstrict)
++              mode = TC_SCH_WRR8;
++      else if (nstrict < AIROHA_NUM_QOS_QUEUES - 1)
++              mode = nstrict + 1;
++
++      return airoha_qdma_set_chan_tx_sched(port, channel, mode, w,
++                                           ARRAY_SIZE(w));
++}
++
++static int airoha_qdma_get_tx_ets_stats(struct airoha_gdm_port *port,
++                                      int channel,
++                                      struct tc_ets_qopt_offload *opt)
++{
++      u64 cpu_tx_packets = airoha_qdma_rr(port->qdma,
++                                          REG_CNTR_VAL(channel << 1));
++      u64 fwd_tx_packets = airoha_qdma_rr(port->qdma,
++                                          REG_CNTR_VAL((channel << 1) + 1));
++      u64 tx_packets = (cpu_tx_packets - port->cpu_tx_packets) +
++                       (fwd_tx_packets - port->fwd_tx_packets);
++      _bstats_update(opt->stats.bstats, 0, tx_packets);
++
++      port->cpu_tx_packets = cpu_tx_packets;
++      port->fwd_tx_packets = fwd_tx_packets;
++
++      return 0;
++}
++
++static int airoha_tc_setup_qdisc_ets(struct airoha_gdm_port *port,
++                                   struct tc_ets_qopt_offload *opt)
++{
++      int channel = TC_H_MAJ(opt->handle) >> 16;
++
++      if (opt->parent == TC_H_ROOT)
++              return -EINVAL;
++
++      switch (opt->command) {
++      case TC_ETS_REPLACE:
++              return airoha_qdma_set_tx_ets_sched(port, channel, opt);
++      case TC_ETS_DESTROY:
++              /* PRIO is default qdisc scheduler */
++              return airoha_qdma_set_tx_prio_sched(port, channel);
++      case TC_ETS_STATS:
++              return airoha_qdma_get_tx_ets_stats(port, channel, opt);
++      default:
++              return -EOPNOTSUPP;
++      }
++}
++
++static int airoha_dev_tc_setup(struct net_device *dev, enum tc_setup_type type,
++                             void *type_data)
++{
++      struct airoha_gdm_port *port = netdev_priv(dev);
++
++      switch (type) {
++      case TC_SETUP_QDISC_ETS:
++              return airoha_tc_setup_qdisc_ets(port, type_data);
++      default:
++              return -EOPNOTSUPP;
++      }
++}
++
+ static const struct net_device_ops airoha_netdev_ops = {
+       .ndo_init               = airoha_dev_init,
+       .ndo_open               = airoha_dev_open,
+@@ -2639,6 +2831,7 @@ static const struct net_device_ops airoh
+       .ndo_start_xmit         = airoha_dev_xmit,
+       .ndo_get_stats64        = airoha_dev_get_stats64,
+       .ndo_set_mac_address    = airoha_dev_set_macaddr,
++      .ndo_setup_tc           = airoha_dev_tc_setup,
+ };
+ static const struct ethtool_ops airoha_ethtool_ops = {
+@@ -2688,7 +2881,8 @@ static int airoha_alloc_gdm_port(struct
+       dev->watchdog_timeo = 5 * HZ;
+       dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_RXCSUM |
+                          NETIF_F_TSO6 | NETIF_F_IPV6_CSUM |
+-                         NETIF_F_SG | NETIF_F_TSO;
++                         NETIF_F_SG | NETIF_F_TSO |
++                         NETIF_F_HW_TC;
+       dev->features |= dev->hw_features;
+       dev->dev.of_node = np;
+       dev->irq = qdma->irq;
diff --git a/target/linux/airoha/patches-6.6/038-04-v6.14-net-airoha-Add-sched-HTB-offload-support.patch b/target/linux/airoha/patches-6.6/038-04-v6.14-net-airoha-Add-sched-HTB-offload-support.patch
new file mode 100644 (file)
index 0000000..1239b17
--- /dev/null
@@ -0,0 +1,371 @@
+From ef1ca9271313b4ea7b03de69576aacef1e78f381 Mon Sep 17 00:00:00 2001
+From: Lorenzo Bianconi <lorenzo@kernel.org>
+Date: Fri, 3 Jan 2025 13:17:05 +0100
+Subject: [PATCH 4/4] net: airoha: Add sched HTB offload support
+
+Introduce support for HTB Qdisc offload available in the Airoha EN7581
+ethernet controller. EN7581 can offload only one level of HTB leafs.
+Each HTB leaf represents a QoS channel supported by EN7581 SoC.
+The typical use-case is creating a HTB leaf for QoS channel to rate
+limit the egress traffic and attach an ETS Qdisc to each HTB leaf in
+order to enforce traffic prioritization.
+
+Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+---
+ drivers/net/ethernet/mediatek/airoha_eth.c | 288 ++++++++++++++++++++-
+ 1 file changed, 287 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/mediatek/airoha_eth.c
++++ b/drivers/net/ethernet/mediatek/airoha_eth.c
+@@ -28,6 +28,8 @@
+ #define AIROHA_NUM_QOS_QUEUES         8
+ #define AIROHA_NUM_TX_RING            32
+ #define AIROHA_NUM_RX_RING            32
++#define AIROHA_NUM_NETDEV_TX_RINGS    (AIROHA_NUM_TX_RING + \
++                                       AIROHA_NUM_QOS_CHANNELS)
+ #define AIROHA_FE_MC_MAX_VLAN_TABLE   64
+ #define AIROHA_FE_MC_MAX_VLAN_PORT    16
+ #define AIROHA_NUM_TX_IRQ             2
+@@ -43,6 +45,9 @@
+ #define PSE_RSV_PAGES                 128
+ #define PSE_QUEUE_RSV_PAGES           64
++#define QDMA_METER_IDX(_n)            ((_n) & 0xff)
++#define QDMA_METER_GROUP(_n)          (((_n) >> 8) & 0x3)
++
+ /* FE */
+ #define PSE_BASE                      0x0100
+ #define CSR_IFC_BASE                  0x0200
+@@ -583,6 +588,17 @@
+ #define EGRESS_SLOW_TICK_RATIO_MASK   GENMASK(29, 16)
+ #define EGRESS_FAST_TICK_MASK         GENMASK(15, 0)
++#define TRTCM_PARAM_RW_MASK           BIT(31)
++#define TRTCM_PARAM_RW_DONE_MASK      BIT(30)
++#define TRTCM_PARAM_TYPE_MASK         GENMASK(29, 28)
++#define TRTCM_METER_GROUP_MASK                GENMASK(27, 26)
++#define TRTCM_PARAM_INDEX_MASK                GENMASK(23, 17)
++#define TRTCM_PARAM_RATE_TYPE_MASK    BIT(16)
++
++#define REG_TRTCM_CFG_PARAM(_n)               ((_n) + 0x4)
++#define REG_TRTCM_DATA_LOW(_n)                ((_n) + 0x8)
++#define REG_TRTCM_DATA_HIGH(_n)               ((_n) + 0xc)
++
+ #define REG_TXWRR_MODE_CFG            0x1020
+ #define TWRR_WEIGHT_SCALE_MASK                BIT(31)
+ #define TWRR_WEIGHT_BASE_MASK         BIT(3)
+@@ -759,6 +775,29 @@ enum tx_sched_mode {
+       TC_SCH_WRR2,
+ };
++enum trtcm_param_type {
++      TRTCM_MISC_MODE, /* meter_en, pps_mode, tick_sel */
++      TRTCM_TOKEN_RATE_MODE,
++      TRTCM_BUCKETSIZE_SHIFT_MODE,
++      TRTCM_BUCKET_COUNTER_MODE,
++};
++
++enum trtcm_mode_type {
++      TRTCM_COMMIT_MODE,
++      TRTCM_PEAK_MODE,
++};
++
++enum trtcm_param {
++      TRTCM_TICK_SEL = BIT(0),
++      TRTCM_PKT_MODE = BIT(1),
++      TRTCM_METER_MODE = BIT(2),
++};
++
++#define MIN_TOKEN_SIZE                                4096
++#define MAX_TOKEN_SIZE_OFFSET                 17
++#define TRTCM_TOKEN_RATE_MASK                 GENMASK(23, 6)
++#define TRTCM_TOKEN_RATE_FRACTION_MASK                GENMASK(5, 0)
++
+ struct airoha_queue_entry {
+       union {
+               void *buf;
+@@ -850,6 +889,8 @@ struct airoha_gdm_port {
+       struct airoha_hw_stats stats;
++      DECLARE_BITMAP(qos_sq_bmap, AIROHA_NUM_QOS_CHANNELS);
++
+       /* qos stats counters */
+       u64 cpu_tx_packets;
+       u64 fwd_tx_packets;
+@@ -2810,6 +2851,243 @@ static int airoha_tc_setup_qdisc_ets(str
+       }
+ }
++static int airoha_qdma_get_trtcm_param(struct airoha_qdma *qdma, int channel,
++                                     u32 addr, enum trtcm_param_type param,
++                                     enum trtcm_mode_type mode,
++                                     u32 *val_low, u32 *val_high)
++{
++      u32 idx = QDMA_METER_IDX(channel), group = QDMA_METER_GROUP(channel);
++      u32 val, config = FIELD_PREP(TRTCM_PARAM_TYPE_MASK, param) |
++                        FIELD_PREP(TRTCM_METER_GROUP_MASK, group) |
++                        FIELD_PREP(TRTCM_PARAM_INDEX_MASK, idx) |
++                        FIELD_PREP(TRTCM_PARAM_RATE_TYPE_MASK, mode);
++
++      airoha_qdma_wr(qdma, REG_TRTCM_CFG_PARAM(addr), config);
++      if (read_poll_timeout(airoha_qdma_rr, val,
++                            val & TRTCM_PARAM_RW_DONE_MASK,
++                            USEC_PER_MSEC, 10 * USEC_PER_MSEC, true,
++                            qdma, REG_TRTCM_CFG_PARAM(addr)))
++              return -ETIMEDOUT;
++
++      *val_low = airoha_qdma_rr(qdma, REG_TRTCM_DATA_LOW(addr));
++      if (val_high)
++              *val_high = airoha_qdma_rr(qdma, REG_TRTCM_DATA_HIGH(addr));
++
++      return 0;
++}
++
++static int airoha_qdma_set_trtcm_param(struct airoha_qdma *qdma, int channel,
++                                     u32 addr, enum trtcm_param_type param,
++                                     enum trtcm_mode_type mode, u32 val)
++{
++      u32 idx = QDMA_METER_IDX(channel), group = QDMA_METER_GROUP(channel);
++      u32 config = TRTCM_PARAM_RW_MASK |
++                   FIELD_PREP(TRTCM_PARAM_TYPE_MASK, param) |
++                   FIELD_PREP(TRTCM_METER_GROUP_MASK, group) |
++                   FIELD_PREP(TRTCM_PARAM_INDEX_MASK, idx) |
++                   FIELD_PREP(TRTCM_PARAM_RATE_TYPE_MASK, mode);
++
++      airoha_qdma_wr(qdma, REG_TRTCM_DATA_LOW(addr), val);
++      airoha_qdma_wr(qdma, REG_TRTCM_CFG_PARAM(addr), config);
++
++      return read_poll_timeout(airoha_qdma_rr, val,
++                               val & TRTCM_PARAM_RW_DONE_MASK,
++                               USEC_PER_MSEC, 10 * USEC_PER_MSEC, true,
++                               qdma, REG_TRTCM_CFG_PARAM(addr));
++}
++
++static int airoha_qdma_set_trtcm_config(struct airoha_qdma *qdma, int channel,
++                                      u32 addr, enum trtcm_mode_type mode,
++                                      bool enable, u32 enable_mask)
++{
++      u32 val;
++
++      if (airoha_qdma_get_trtcm_param(qdma, channel, addr, TRTCM_MISC_MODE,
++                                      mode, &val, NULL))
++              return -EINVAL;
++
++      val = enable ? val | enable_mask : val & ~enable_mask;
++
++      return airoha_qdma_set_trtcm_param(qdma, channel, addr, TRTCM_MISC_MODE,
++                                         mode, val);
++}
++
++static int airoha_qdma_set_trtcm_token_bucket(struct airoha_qdma *qdma,
++                                            int channel, u32 addr,
++                                            enum trtcm_mode_type mode,
++                                            u32 rate_val, u32 bucket_size)
++{
++      u32 val, config, tick, unit, rate, rate_frac;
++      int err;
++
++      if (airoha_qdma_get_trtcm_param(qdma, channel, addr, TRTCM_MISC_MODE,
++                                      mode, &config, NULL))
++              return -EINVAL;
++
++      val = airoha_qdma_rr(qdma, addr);
++      tick = FIELD_GET(INGRESS_FAST_TICK_MASK, val);
++      if (config & TRTCM_TICK_SEL)
++              tick *= FIELD_GET(INGRESS_SLOW_TICK_RATIO_MASK, val);
++      if (!tick)
++              return -EINVAL;
++
++      unit = (config & TRTCM_PKT_MODE) ? 1000000 / tick : 8000 / tick;
++      if (!unit)
++              return -EINVAL;
++
++      rate = rate_val / unit;
++      rate_frac = rate_val % unit;
++      rate_frac = FIELD_PREP(TRTCM_TOKEN_RATE_MASK, rate_frac) / unit;
++      rate = FIELD_PREP(TRTCM_TOKEN_RATE_MASK, rate) |
++             FIELD_PREP(TRTCM_TOKEN_RATE_FRACTION_MASK, rate_frac);
++
++      err = airoha_qdma_set_trtcm_param(qdma, channel, addr,
++                                        TRTCM_TOKEN_RATE_MODE, mode, rate);
++      if (err)
++              return err;
++
++      val = max_t(u32, bucket_size, MIN_TOKEN_SIZE);
++      val = min_t(u32, __fls(val), MAX_TOKEN_SIZE_OFFSET);
++
++      return airoha_qdma_set_trtcm_param(qdma, channel, addr,
++                                         TRTCM_BUCKETSIZE_SHIFT_MODE,
++                                         mode, val);
++}
++
++static int airoha_qdma_set_tx_rate_limit(struct airoha_gdm_port *port,
++                                       int channel, u32 rate,
++                                       u32 bucket_size)
++{
++      int i, err;
++
++      for (i = 0; i <= TRTCM_PEAK_MODE; i++) {
++              err = airoha_qdma_set_trtcm_config(port->qdma, channel,
++                                                 REG_EGRESS_TRTCM_CFG, i,
++                                                 !!rate, TRTCM_METER_MODE);
++              if (err)
++                      return err;
++
++              err = airoha_qdma_set_trtcm_token_bucket(port->qdma, channel,
++                                                       REG_EGRESS_TRTCM_CFG,
++                                                       i, rate, bucket_size);
++              if (err)
++                      return err;
++      }
++
++      return 0;
++}
++
++static int airoha_tc_htb_alloc_leaf_queue(struct airoha_gdm_port *port,
++                                        struct tc_htb_qopt_offload *opt)
++{
++      u32 channel = TC_H_MIN(opt->classid) % AIROHA_NUM_QOS_CHANNELS;
++      u32 rate = div_u64(opt->rate, 1000) << 3; /* kbps */
++      struct net_device *dev = port->dev;
++      int num_tx_queues = dev->real_num_tx_queues;
++      int err;
++
++      if (opt->parent_classid != TC_HTB_CLASSID_ROOT) {
++              NL_SET_ERR_MSG_MOD(opt->extack, "invalid parent classid");
++              return -EINVAL;
++      }
++
++      err = airoha_qdma_set_tx_rate_limit(port, channel, rate, opt->quantum);
++      if (err) {
++              NL_SET_ERR_MSG_MOD(opt->extack,
++                                 "failed configuring htb offload");
++              return err;
++      }
++
++      if (opt->command == TC_HTB_NODE_MODIFY)
++              return 0;
++
++      err = netif_set_real_num_tx_queues(dev, num_tx_queues + 1);
++      if (err) {
++              airoha_qdma_set_tx_rate_limit(port, channel, 0, opt->quantum);
++              NL_SET_ERR_MSG_MOD(opt->extack,
++                                 "failed setting real_num_tx_queues");
++              return err;
++      }
++
++      set_bit(channel, port->qos_sq_bmap);
++      opt->qid = AIROHA_NUM_TX_RING + channel;
++
++      return 0;
++}
++
++static void airoha_tc_remove_htb_queue(struct airoha_gdm_port *port, int queue)
++{
++      struct net_device *dev = port->dev;
++
++      netif_set_real_num_tx_queues(dev, dev->real_num_tx_queues - 1);
++      airoha_qdma_set_tx_rate_limit(port, queue + 1, 0, 0);
++      clear_bit(queue, port->qos_sq_bmap);
++}
++
++static int airoha_tc_htb_delete_leaf_queue(struct airoha_gdm_port *port,
++                                         struct tc_htb_qopt_offload *opt)
++{
++      u32 channel = TC_H_MIN(opt->classid) % AIROHA_NUM_QOS_CHANNELS;
++
++      if (!test_bit(channel, port->qos_sq_bmap)) {
++              NL_SET_ERR_MSG_MOD(opt->extack, "invalid queue id");
++              return -EINVAL;
++      }
++
++      airoha_tc_remove_htb_queue(port, channel);
++
++      return 0;
++}
++
++static int airoha_tc_htb_destroy(struct airoha_gdm_port *port)
++{
++      int q;
++
++      for_each_set_bit(q, port->qos_sq_bmap, AIROHA_NUM_QOS_CHANNELS)
++              airoha_tc_remove_htb_queue(port, q);
++
++      return 0;
++}
++
++static int airoha_tc_get_htb_get_leaf_queue(struct airoha_gdm_port *port,
++                                          struct tc_htb_qopt_offload *opt)
++{
++      u32 channel = TC_H_MIN(opt->classid) % AIROHA_NUM_QOS_CHANNELS;
++
++      if (!test_bit(channel, port->qos_sq_bmap)) {
++              NL_SET_ERR_MSG_MOD(opt->extack, "invalid queue id");
++              return -EINVAL;
++      }
++
++      opt->qid = channel;
++
++      return 0;
++}
++
++static int airoha_tc_setup_qdisc_htb(struct airoha_gdm_port *port,
++                                   struct tc_htb_qopt_offload *opt)
++{
++      switch (opt->command) {
++      case TC_HTB_CREATE:
++              break;
++      case TC_HTB_DESTROY:
++              return airoha_tc_htb_destroy(port);
++      case TC_HTB_NODE_MODIFY:
++      case TC_HTB_LEAF_ALLOC_QUEUE:
++              return airoha_tc_htb_alloc_leaf_queue(port, opt);
++      case TC_HTB_LEAF_DEL:
++      case TC_HTB_LEAF_DEL_LAST:
++      case TC_HTB_LEAF_DEL_LAST_FORCE:
++              return airoha_tc_htb_delete_leaf_queue(port, opt);
++      case TC_HTB_LEAF_QUERY_QUEUE:
++              return airoha_tc_get_htb_get_leaf_queue(port, opt);
++      default:
++              return -EOPNOTSUPP;
++      }
++
++      return 0;
++}
++
+ static int airoha_dev_tc_setup(struct net_device *dev, enum tc_setup_type type,
+                              void *type_data)
+ {
+@@ -2818,6 +3096,8 @@ static int airoha_dev_tc_setup(struct ne
+       switch (type) {
+       case TC_SETUP_QDISC_ETS:
+               return airoha_tc_setup_qdisc_ets(port, type_data);
++      case TC_SETUP_QDISC_HTB:
++              return airoha_tc_setup_qdisc_htb(port, type_data);
+       default:
+               return -EOPNOTSUPP;
+       }
+@@ -2868,7 +3148,8 @@ static int airoha_alloc_gdm_port(struct
+       }
+       dev = devm_alloc_etherdev_mqs(eth->dev, sizeof(*port),
+-                                    AIROHA_NUM_TX_RING, AIROHA_NUM_RX_RING);
++                                    AIROHA_NUM_NETDEV_TX_RINGS,
++                                    AIROHA_NUM_RX_RING);
+       if (!dev) {
+               dev_err(eth->dev, "alloc_etherdev failed\n");
+               return -ENOMEM;
+@@ -2888,6 +3169,11 @@ static int airoha_alloc_gdm_port(struct
+       dev->irq = qdma->irq;
+       SET_NETDEV_DEV(dev, eth->dev);
++      /* reserve hw queues for HTB offloading */
++      err = netif_set_real_num_tx_queues(dev, AIROHA_NUM_TX_RING);
++      if (err)
++              return err;
++
+       err = of_get_ethdev_address(np, dev);
+       if (err) {
+               if (err == -EPROBE_DEFER)