--- /dev/null
+From: Felix Fietkau <nbd@nbd.name>
+Date: Thu, 27 Oct 2022 19:50:31 +0200
+Subject: [PATCH] net: ethernet: mtk_eth_soc: account for vlan in rx
+ header length
+
+The network stack assumes that devices can handle an extra VLAN tag without
+increasing the MTU
+
+Signed-off-by: Felix Fietkau <nbd@nbd.name>
+---
+
+--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
++++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+@@ -29,7 +29,7 @@
+ #define MTK_TX_DMA_BUF_LEN_V2 0xffff
+ #define MTK_DMA_SIZE 512
+ #define MTK_MAC_COUNT 2
+-#define MTK_RX_ETH_HLEN (ETH_HLEN + ETH_FCS_LEN)
++#define MTK_RX_ETH_HLEN (VLAN_ETH_HLEN + ETH_FCS_LEN)
+ #define MTK_RX_HLEN (NET_SKB_PAD + MTK_RX_ETH_HLEN + NET_IP_ALIGN)
+ #define MTK_DMA_DUMMY_DESC 0xffffffff
+ #define MTK_DEFAULT_MSG_ENABLE (NETIF_MSG_DRV | \
--- /dev/null
+From: Felix Fietkau <nbd@nbd.name>
+Date: Thu, 27 Oct 2022 19:53:57 +0200
+Subject: [PATCH] net: ethernet: mtk_eth_soc: increase tx ring side for
+ QDMA devices
+
+In order to use the hardware traffic shaper feature, a larger tx ring is
+needed, especially for the scratch ring, which the hardware shaper uses to
+reorder packets.
+
+Signed-off-by: Felix Fietkau <nbd@nbd.name>
+---
+
+--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
++++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+@@ -900,7 +900,7 @@ static int mtk_init_fq_dma(struct mtk_et
+ {
+ const struct mtk_soc_data *soc = eth->soc;
+ dma_addr_t phy_ring_tail;
+- int cnt = MTK_DMA_SIZE;
++ int cnt = MTK_QDMA_RING_SIZE;
+ dma_addr_t dma_addr;
+ int i;
+
+@@ -2154,19 +2154,25 @@ static int mtk_tx_alloc(struct mtk_eth *
+ struct mtk_tx_ring *ring = ð->tx_ring;
+ int i, sz = soc->txrx.txd_size;
+ struct mtk_tx_dma_v2 *txd;
++ int ring_size;
+
+- ring->buf = kcalloc(MTK_DMA_SIZE, sizeof(*ring->buf),
++ if (MTK_HAS_CAPS(soc->caps, MTK_QDMA))
++ ring_size = MTK_QDMA_RING_SIZE;
++ else
++ ring_size = MTK_DMA_SIZE;
++
++ ring->buf = kcalloc(ring_size, sizeof(*ring->buf),
+ GFP_KERNEL);
+ if (!ring->buf)
+ goto no_tx_mem;
+
+- ring->dma = dma_alloc_coherent(eth->dma_dev, MTK_DMA_SIZE * sz,
++ ring->dma = dma_alloc_coherent(eth->dma_dev, ring_size * sz,
+ &ring->phys, GFP_KERNEL);
+ if (!ring->dma)
+ goto no_tx_mem;
+
+- for (i = 0; i < MTK_DMA_SIZE; i++) {
+- int next = (i + 1) % MTK_DMA_SIZE;
++ for (i = 0; i < ring_size; i++) {
++ int next = (i + 1) % ring_size;
+ u32 next_ptr = ring->phys + next * sz;
+
+ txd = ring->dma + i * sz;
+@@ -2186,22 +2192,22 @@ static int mtk_tx_alloc(struct mtk_eth *
+ * descriptors in ring->dma_pdma.
+ */
+ if (!MTK_HAS_CAPS(soc->caps, MTK_QDMA)) {
+- ring->dma_pdma = dma_alloc_coherent(eth->dma_dev, MTK_DMA_SIZE * sz,
++ ring->dma_pdma = dma_alloc_coherent(eth->dma_dev, ring_size * sz,
+ &ring->phys_pdma, GFP_KERNEL);
+ if (!ring->dma_pdma)
+ goto no_tx_mem;
+
+- for (i = 0; i < MTK_DMA_SIZE; i++) {
++ for (i = 0; i < ring_size; i++) {
+ ring->dma_pdma[i].txd2 = TX_DMA_DESP2_DEF;
+ ring->dma_pdma[i].txd4 = 0;
+ }
+ }
+
+- ring->dma_size = MTK_DMA_SIZE;
+- atomic_set(&ring->free_count, MTK_DMA_SIZE - 2);
++ ring->dma_size = ring_size;
++ atomic_set(&ring->free_count, ring_size - 2);
+ ring->next_free = ring->dma;
+ ring->last_free = (void *)txd;
+- ring->last_free_ptr = (u32)(ring->phys + ((MTK_DMA_SIZE - 1) * sz));
++ ring->last_free_ptr = (u32)(ring->phys + ((ring_size - 1) * sz));
+ ring->thresh = MAX_SKB_FRAGS;
+
+ /* make sure that all changes to the dma ring are flushed before we
+@@ -2213,14 +2219,14 @@ static int mtk_tx_alloc(struct mtk_eth *
+ mtk_w32(eth, ring->phys, soc->reg_map->qdma.ctx_ptr);
+ mtk_w32(eth, ring->phys, soc->reg_map->qdma.dtx_ptr);
+ mtk_w32(eth,
+- ring->phys + ((MTK_DMA_SIZE - 1) * sz),
++ ring->phys + ((ring_size - 1) * sz),
+ soc->reg_map->qdma.crx_ptr);
+ mtk_w32(eth, ring->last_free_ptr, soc->reg_map->qdma.drx_ptr);
+ mtk_w32(eth, (QDMA_RES_THRES << 8) | QDMA_RES_THRES,
+ soc->reg_map->qdma.qtx_cfg);
+ } else {
+ mtk_w32(eth, ring->phys_pdma, MT7628_TX_BASE_PTR0);
+- mtk_w32(eth, MTK_DMA_SIZE, MT7628_TX_MAX_CNT0);
++ mtk_w32(eth, ring_size, MT7628_TX_MAX_CNT0);
+ mtk_w32(eth, 0, MT7628_TX_CTX_IDX0);
+ mtk_w32(eth, MT7628_PST_DTX_IDX0, soc->reg_map->pdma.rst_idx);
+ }
+@@ -2238,7 +2244,7 @@ static void mtk_tx_clean(struct mtk_eth
+ int i;
+
+ if (ring->buf) {
+- for (i = 0; i < MTK_DMA_SIZE; i++)
++ for (i = 0; i < ring->dma_size; i++)
+ mtk_tx_unmap(eth, &ring->buf[i], false);
+ kfree(ring->buf);
+ ring->buf = NULL;
+@@ -2246,14 +2252,14 @@ static void mtk_tx_clean(struct mtk_eth
+
+ if (ring->dma) {
+ dma_free_coherent(eth->dma_dev,
+- MTK_DMA_SIZE * soc->txrx.txd_size,
++ ring->dma_size * soc->txrx.txd_size,
+ ring->dma, ring->phys);
+ ring->dma = NULL;
+ }
+
+ if (ring->dma_pdma) {
+ dma_free_coherent(eth->dma_dev,
+- MTK_DMA_SIZE * soc->txrx.txd_size,
++ ring->dma_size * soc->txrx.txd_size,
+ ring->dma_pdma, ring->phys_pdma);
+ ring->dma_pdma = NULL;
+ }
+@@ -2773,7 +2779,7 @@ static void mtk_dma_free(struct mtk_eth
+ netdev_reset_queue(eth->netdev[i]);
+ if (eth->scratch_ring) {
+ dma_free_coherent(eth->dma_dev,
+- MTK_DMA_SIZE * soc->txrx.txd_size,
++ MTK_QDMA_RING_SIZE * soc->txrx.txd_size,
+ eth->scratch_ring, eth->phy_scratch_ring);
+ eth->scratch_ring = NULL;
+ eth->phy_scratch_ring = 0;
+--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
++++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+@@ -27,6 +27,7 @@
+ #define MTK_MAX_RX_LENGTH_2K 2048
+ #define MTK_TX_DMA_BUF_LEN 0x3fff
+ #define MTK_TX_DMA_BUF_LEN_V2 0xffff
++#define MTK_QDMA_RING_SIZE 2048
+ #define MTK_DMA_SIZE 512
+ #define MTK_MAC_COUNT 2
+ #define MTK_RX_ETH_HLEN (VLAN_ETH_HLEN + ETH_FCS_LEN)
--- /dev/null
+From: Felix Fietkau <nbd@nbd.name>
+Date: Fri, 4 Nov 2022 19:49:08 +0100
+Subject: [PATCH] net: ethernet: mtk_eth_soc: avoid port_mg assignment on
+ MT7622 and newer
+
+On newer chips, this field is unused and contains some bits related to queue
+assignment. Initialize it to 0 in those cases.
+Fix offload_version on MT7621 and MT7623, which still need the previous value.
+
+Signed-off-by: Felix Fietkau <nbd@nbd.name>
+---
+
+--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
++++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+@@ -4427,7 +4427,7 @@ static const struct mtk_soc_data mt7621_
+ .hw_features = MTK_HW_FEATURES,
+ .required_clks = MT7621_CLKS_BITMAP,
+ .required_pctl = false,
+- .offload_version = 2,
++ .offload_version = 1,
+ .hash_offset = 2,
+ .foe_entry_size = sizeof(struct mtk_foe_entry) - 16,
+ .txrx = {
+@@ -4466,7 +4466,7 @@ static const struct mtk_soc_data mt7623_
+ .hw_features = MTK_HW_FEATURES,
+ .required_clks = MT7623_CLKS_BITMAP,
+ .required_pctl = true,
+- .offload_version = 2,
++ .offload_version = 1,
+ .hash_offset = 2,
+ .foe_entry_size = sizeof(struct mtk_foe_entry) - 16,
+ .txrx = {
+--- a/drivers/net/ethernet/mediatek/mtk_ppe.c
++++ b/drivers/net/ethernet/mediatek/mtk_ppe.c
+@@ -175,6 +175,8 @@ int mtk_foe_entry_prepare(struct mtk_eth
+ val = FIELD_PREP(MTK_FOE_IB2_DEST_PORT_V2, pse_port) |
+ FIELD_PREP(MTK_FOE_IB2_PORT_AG_V2, 0xf);
+ } else {
++ int port_mg = eth->soc->offload_version > 1 ? 0 : 0x3f;
++
+ val = FIELD_PREP(MTK_FOE_IB1_STATE, MTK_FOE_STATE_BIND) |
+ FIELD_PREP(MTK_FOE_IB1_PACKET_TYPE, type) |
+ FIELD_PREP(MTK_FOE_IB1_UDP, l4proto == IPPROTO_UDP) |
+@@ -182,7 +184,7 @@ int mtk_foe_entry_prepare(struct mtk_eth
+ entry->ib1 = val;
+
+ val = FIELD_PREP(MTK_FOE_IB2_DEST_PORT, pse_port) |
+- FIELD_PREP(MTK_FOE_IB2_PORT_MG, 0x3f) |
++ FIELD_PREP(MTK_FOE_IB2_PORT_MG, port_mg) |
+ FIELD_PREP(MTK_FOE_IB2_PORT_AG, 0x1f);
+ }
+
--- /dev/null
+From: Felix Fietkau <nbd@nbd.name>
+Date: Thu, 27 Oct 2022 20:17:27 +0200
+Subject: [PATCH] net: ethernet: mtk_eth_soc: implement multi-queue
+ support for per-port queues
+
+When sending traffic to multiple ports with different link speeds, queued
+packets to one port can drown out tx to other ports.
+In order to better handle transmission to multiple ports, use the hardware
+shaper feature to implement weighted fair queueing between ports.
+Weight and maximum rate are automatically adjusted based on the link speed
+of the port.
+The first 3 queues are unrestricted and reserved for non-DSA direct tx on
+GMAC ports. The following queues are automatically assigned by the MTK DSA
+tag driver based on the target port number.
+The PPE offload code configures the queues for offloaded traffic in the same
+way.
+This feature is only supported on devices supporting QDMA. All queues still
+share the same DMA ring and descriptor pool.
+
+Signed-off-by: Felix Fietkau <nbd@nbd.name>
+---
+
+--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
++++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+@@ -55,6 +55,7 @@ static const struct mtk_reg_map mtk_reg_
+ },
+ .qdma = {
+ .qtx_cfg = 0x1800,
++ .qtx_sch = 0x1804,
+ .rx_ptr = 0x1900,
+ .rx_cnt_cfg = 0x1904,
+ .qcrx_ptr = 0x1908,
+@@ -62,6 +63,7 @@ static const struct mtk_reg_map mtk_reg_
+ .rst_idx = 0x1a08,
+ .delay_irq = 0x1a0c,
+ .fc_th = 0x1a10,
++ .tx_sch_rate = 0x1a14,
+ .int_grp = 0x1a20,
+ .hred = 0x1a44,
+ .ctx_ptr = 0x1b00,
+@@ -117,6 +119,7 @@ static const struct mtk_reg_map mt7986_r
+ },
+ .qdma = {
+ .qtx_cfg = 0x4400,
++ .qtx_sch = 0x4404,
+ .rx_ptr = 0x4500,
+ .rx_cnt_cfg = 0x4504,
+ .qcrx_ptr = 0x4508,
+@@ -134,6 +137,7 @@ static const struct mtk_reg_map mt7986_r
+ .fq_tail = 0x4724,
+ .fq_count = 0x4728,
+ .fq_blen = 0x472c,
++ .tx_sch_rate = 0x4798,
+ },
+ .gdm1_cnt = 0x1c00,
+ .gdma_to_ppe0 = 0x3333,
+@@ -576,6 +580,75 @@ static void mtk_mac_link_down(struct phy
+ mtk_w32(mac->hw, mcr, MTK_MAC_MCR(mac->id));
+ }
+
++static void mtk_set_queue_speed(struct mtk_eth *eth, unsigned int idx,
++ int speed)
++{
++ const struct mtk_soc_data *soc = eth->soc;
++ u32 ofs, val;
++
++ if (!MTK_HAS_CAPS(soc->caps, MTK_QDMA))
++ return;
++
++ val = MTK_QTX_SCH_MIN_RATE_EN |
++ /* minimum: 10 Mbps */
++ FIELD_PREP(MTK_QTX_SCH_MIN_RATE_MAN, 1) |
++ FIELD_PREP(MTK_QTX_SCH_MIN_RATE_EXP, 4) |
++ MTK_QTX_SCH_LEAKY_BUCKET_SIZE;
++ if (!MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2))
++ val |= MTK_QTX_SCH_LEAKY_BUCKET_EN;
++
++ if (IS_ENABLED(CONFIG_SOC_MT7621)) {
++ switch (speed) {
++ case SPEED_10:
++ val |= MTK_QTX_SCH_MAX_RATE_EN |
++ FIELD_PREP(MTK_QTX_SCH_MAX_RATE_MAN, 103) |
++ FIELD_PREP(MTK_QTX_SCH_MAX_RATE_EXP, 2) |
++ FIELD_PREP(MTK_QTX_SCH_MAX_RATE_WEIGHT, 1);
++ break;
++ case SPEED_100:
++ val |= MTK_QTX_SCH_MAX_RATE_EN |
++ FIELD_PREP(MTK_QTX_SCH_MAX_RATE_MAN, 103) |
++ FIELD_PREP(MTK_QTX_SCH_MAX_RATE_EXP, 3);
++ FIELD_PREP(MTK_QTX_SCH_MAX_RATE_WEIGHT, 1);
++ break;
++ case SPEED_1000:
++ val |= MTK_QTX_SCH_MAX_RATE_EN |
++ FIELD_PREP(MTK_QTX_SCH_MAX_RATE_MAN, 105) |
++ FIELD_PREP(MTK_QTX_SCH_MAX_RATE_EXP, 4) |
++ FIELD_PREP(MTK_QTX_SCH_MAX_RATE_WEIGHT, 10);
++ break;
++ default:
++ break;
++ }
++ } else {
++ switch (speed) {
++ case SPEED_10:
++ val |= MTK_QTX_SCH_MAX_RATE_EN |
++ FIELD_PREP(MTK_QTX_SCH_MAX_RATE_MAN, 1) |
++ FIELD_PREP(MTK_QTX_SCH_MAX_RATE_EXP, 4) |
++ FIELD_PREP(MTK_QTX_SCH_MAX_RATE_WEIGHT, 1);
++ break;
++ case SPEED_100:
++ val |= MTK_QTX_SCH_MAX_RATE_EN |
++ FIELD_PREP(MTK_QTX_SCH_MAX_RATE_MAN, 1) |
++ FIELD_PREP(MTK_QTX_SCH_MAX_RATE_EXP, 5);
++ FIELD_PREP(MTK_QTX_SCH_MAX_RATE_WEIGHT, 1);
++ break;
++ case SPEED_1000:
++ val |= MTK_QTX_SCH_MAX_RATE_EN |
++ FIELD_PREP(MTK_QTX_SCH_MAX_RATE_MAN, 10) |
++ FIELD_PREP(MTK_QTX_SCH_MAX_RATE_EXP, 5) |
++ FIELD_PREP(MTK_QTX_SCH_MAX_RATE_WEIGHT, 10);
++ break;
++ default:
++ break;
++ }
++ }
++
++ ofs = MTK_QTX_OFFSET * idx;
++ mtk_w32(eth, val, soc->reg_map->qdma.qtx_sch + ofs);
++}
++
+ static void mtk_mac_link_up(struct phylink_config *config,
+ struct phy_device *phy,
+ unsigned int mode, phy_interface_t interface,
+@@ -601,6 +674,8 @@ static void mtk_mac_link_up(struct phyli
+ break;
+ }
+
++ mtk_set_queue_speed(mac->hw, mac->id, speed);
++
+ /* Configure duplex */
+ if (duplex == DUPLEX_FULL)
+ mcr |= MAC_MCR_FORCE_DPX;
+@@ -1059,7 +1134,8 @@ static void mtk_tx_set_dma_desc_v1(struc
+
+ WRITE_ONCE(desc->txd1, info->addr);
+
+- data = TX_DMA_SWC | TX_DMA_PLEN0(info->size);
++ data = TX_DMA_SWC | TX_DMA_PLEN0(info->size) |
++ FIELD_PREP(TX_DMA_PQID, info->qid);
+ if (info->last)
+ data |= TX_DMA_LS0;
+ WRITE_ONCE(desc->txd3, data);
+@@ -1093,9 +1169,6 @@ static void mtk_tx_set_dma_desc_v2(struc
+ data |= TX_DMA_LS0;
+ WRITE_ONCE(desc->txd3, data);
+
+- if (!info->qid && mac->id)
+- info->qid = MTK_QDMA_GMAC2_QID;
+-
+ data = (mac->id + 1) << TX_DMA_FPORT_SHIFT_V2; /* forward port */
+ data |= TX_DMA_SWC_V2 | QID_BITS_V2(info->qid);
+ WRITE_ONCE(desc->txd4, data);
+@@ -1139,11 +1212,12 @@ static int mtk_tx_map(struct sk_buff *sk
+ .gso = gso,
+ .csum = skb->ip_summed == CHECKSUM_PARTIAL,
+ .vlan = skb_vlan_tag_present(skb),
+- .qid = skb->mark & MTK_QDMA_TX_MASK,
++ .qid = skb_get_queue_mapping(skb),
+ .vlan_tci = skb_vlan_tag_get(skb),
+ .first = true,
+ .last = !skb_is_nonlinear(skb),
+ };
++ struct netdev_queue *txq;
+ struct mtk_mac *mac = netdev_priv(dev);
+ struct mtk_eth *eth = mac->hw;
+ const struct mtk_soc_data *soc = eth->soc;
+@@ -1151,8 +1225,10 @@ static int mtk_tx_map(struct sk_buff *sk
+ struct mtk_tx_dma *itxd_pdma, *txd_pdma;
+ struct mtk_tx_buf *itx_buf, *tx_buf;
+ int i, n_desc = 1;
++ int queue = skb_get_queue_mapping(skb);
+ int k = 0;
+
++ txq = netdev_get_tx_queue(dev, queue);
+ itxd = ring->next_free;
+ itxd_pdma = qdma_to_pdma(ring, itxd);
+ if (itxd == ring->last_free)
+@@ -1201,7 +1277,7 @@ static int mtk_tx_map(struct sk_buff *sk
+ memset(&txd_info, 0, sizeof(struct mtk_tx_dma_desc_info));
+ txd_info.size = min_t(unsigned int, frag_size,
+ soc->txrx.dma_max_len);
+- txd_info.qid = skb->mark & MTK_QDMA_TX_MASK;
++ txd_info.qid = queue;
+ txd_info.last = i == skb_shinfo(skb)->nr_frags - 1 &&
+ !(frag_size - txd_info.size);
+ txd_info.addr = skb_frag_dma_map(eth->dma_dev, frag,
+@@ -1240,7 +1316,7 @@ static int mtk_tx_map(struct sk_buff *sk
+ txd_pdma->txd2 |= TX_DMA_LS1;
+ }
+
+- netdev_sent_queue(dev, skb->len);
++ netdev_tx_sent_queue(txq, skb->len);
+ skb_tx_timestamp(skb);
+
+ ring->next_free = mtk_qdma_phys_to_virt(ring, txd->txd2);
+@@ -1252,8 +1328,7 @@ static int mtk_tx_map(struct sk_buff *sk
+ wmb();
+
+ if (MTK_HAS_CAPS(soc->caps, MTK_QDMA)) {
+- if (netif_xmit_stopped(netdev_get_tx_queue(dev, 0)) ||
+- !netdev_xmit_more())
++ if (netif_xmit_stopped(txq) || !netdev_xmit_more())
+ mtk_w32(eth, txd->txd2, soc->reg_map->qdma.ctx_ptr);
+ } else {
+ int next_idx;
+@@ -1322,7 +1397,7 @@ static void mtk_wake_queue(struct mtk_et
+ for (i = 0; i < MTK_MAC_COUNT; i++) {
+ if (!eth->netdev[i])
+ continue;
+- netif_wake_queue(eth->netdev[i]);
++ netif_tx_wake_all_queues(eth->netdev[i]);
+ }
+ }
+
+@@ -1346,7 +1421,7 @@ static netdev_tx_t mtk_start_xmit(struct
+
+ tx_num = mtk_cal_txd_req(eth, skb);
+ if (unlikely(atomic_read(&ring->free_count) <= tx_num)) {
+- netif_stop_queue(dev);
++ netif_tx_stop_all_queues(dev);
+ netif_err(eth, tx_queued, dev,
+ "Tx Ring full when queue awake!\n");
+ spin_unlock(ð->page_lock);
+@@ -1372,7 +1447,7 @@ static netdev_tx_t mtk_start_xmit(struct
+ goto drop;
+
+ if (unlikely(atomic_read(&ring->free_count) <= ring->thresh))
+- netif_stop_queue(dev);
++ netif_tx_stop_all_queues(dev);
+
+ spin_unlock(ð->page_lock);
+
+@@ -1539,10 +1614,12 @@ static int mtk_xdp_submit_frame(struct m
+ struct skb_shared_info *sinfo = xdp_get_shared_info_from_frame(xdpf);
+ const struct mtk_soc_data *soc = eth->soc;
+ struct mtk_tx_ring *ring = ð->tx_ring;
++ struct mtk_mac *mac = netdev_priv(dev);
+ struct mtk_tx_dma_desc_info txd_info = {
+ .size = xdpf->len,
+ .first = true,
+ .last = !xdp_frame_has_frags(xdpf),
++ .qid = mac->id,
+ };
+ int err, index = 0, n_desc = 1, nr_frags;
+ struct mtk_tx_dma *htxd, *txd, *txd_pdma;
+@@ -1593,6 +1670,7 @@ static int mtk_xdp_submit_frame(struct m
+ memset(&txd_info, 0, sizeof(struct mtk_tx_dma_desc_info));
+ txd_info.size = skb_frag_size(&sinfo->frags[index]);
+ txd_info.last = index + 1 == nr_frags;
++ txd_info.qid = mac->id;
+ data = skb_frag_address(&sinfo->frags[index]);
+
+ index++;
+@@ -1944,8 +2022,46 @@ rx_done:
+ return done;
+ }
+
++struct mtk_poll_state {
++ struct netdev_queue *txq;
++ unsigned int total;
++ unsigned int done;
++ unsigned int bytes;
++};
++
++static void
++mtk_poll_tx_done(struct mtk_eth *eth, struct mtk_poll_state *state, u8 mac,
++ struct sk_buff *skb)
++{
++ struct netdev_queue *txq;
++ struct net_device *dev;
++ unsigned int bytes = skb->len;
++
++ state->total++;
++ eth->tx_packets++;
++ eth->tx_bytes += bytes;
++
++ dev = eth->netdev[mac];
++ if (!dev)
++ return;
++
++ txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
++ if (state->txq == txq) {
++ state->done++;
++ state->bytes += bytes;
++ return;
++ }
++
++ if (state->txq)
++ netdev_tx_completed_queue(state->txq, state->done, state->bytes);
++
++ state->txq = txq;
++ state->done = 1;
++ state->bytes = bytes;
++}
++
+ static int mtk_poll_tx_qdma(struct mtk_eth *eth, int budget,
+- unsigned int *done, unsigned int *bytes)
++ struct mtk_poll_state *state)
+ {
+ const struct mtk_reg_map *reg_map = eth->soc->reg_map;
+ struct mtk_tx_ring *ring = ð->tx_ring;
+@@ -1975,12 +2091,9 @@ static int mtk_poll_tx_qdma(struct mtk_e
+ break;
+
+ if (tx_buf->data != (void *)MTK_DMA_DUMMY_DESC) {
+- if (tx_buf->type == MTK_TYPE_SKB) {
+- struct sk_buff *skb = tx_buf->data;
++ if (tx_buf->type == MTK_TYPE_SKB)
++ mtk_poll_tx_done(eth, state, mac, tx_buf->data);
+
+- bytes[mac] += skb->len;
+- done[mac]++;
+- }
+ budget--;
+ }
+ mtk_tx_unmap(eth, tx_buf, true);
+@@ -1998,7 +2111,7 @@ static int mtk_poll_tx_qdma(struct mtk_e
+ }
+
+ static int mtk_poll_tx_pdma(struct mtk_eth *eth, int budget,
+- unsigned int *done, unsigned int *bytes)
++ struct mtk_poll_state *state)
+ {
+ struct mtk_tx_ring *ring = ð->tx_ring;
+ struct mtk_tx_buf *tx_buf;
+@@ -2014,12 +2127,8 @@ static int mtk_poll_tx_pdma(struct mtk_e
+ break;
+
+ if (tx_buf->data != (void *)MTK_DMA_DUMMY_DESC) {
+- if (tx_buf->type == MTK_TYPE_SKB) {
+- struct sk_buff *skb = tx_buf->data;
+-
+- bytes[0] += skb->len;
+- done[0]++;
+- }
++ if (tx_buf->type == MTK_TYPE_SKB)
++ mtk_poll_tx_done(eth, state, 0, tx_buf->data);
+ budget--;
+ }
+ mtk_tx_unmap(eth, tx_buf, true);
+@@ -2040,26 +2149,15 @@ static int mtk_poll_tx(struct mtk_eth *e
+ {
+ struct mtk_tx_ring *ring = ð->tx_ring;
+ struct dim_sample dim_sample = {};
+- unsigned int done[MTK_MAX_DEVS];
+- unsigned int bytes[MTK_MAX_DEVS];
+- int total = 0, i;
+-
+- memset(done, 0, sizeof(done));
+- memset(bytes, 0, sizeof(bytes));
++ struct mtk_poll_state state = {};
+
+ if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA))
+- budget = mtk_poll_tx_qdma(eth, budget, done, bytes);
++ budget = mtk_poll_tx_qdma(eth, budget, &state);
+ else
+- budget = mtk_poll_tx_pdma(eth, budget, done, bytes);
++ budget = mtk_poll_tx_pdma(eth, budget, &state);
+
+- for (i = 0; i < MTK_MAC_COUNT; i++) {
+- if (!eth->netdev[i] || !done[i])
+- continue;
+- netdev_completed_queue(eth->netdev[i], done[i], bytes[i]);
+- total += done[i];
+- eth->tx_packets += done[i];
+- eth->tx_bytes += bytes[i];
+- }
++ if (state.txq)
++ netdev_tx_completed_queue(state.txq, state.done, state.bytes);
+
+ dim_update_sample(eth->tx_events, eth->tx_packets, eth->tx_bytes,
+ &dim_sample);
+@@ -2069,7 +2167,7 @@ static int mtk_poll_tx(struct mtk_eth *e
+ (atomic_read(&ring->free_count) > ring->thresh))
+ mtk_wake_queue(eth);
+
+- return total;
++ return state.total;
+ }
+
+ static void mtk_handle_status_irq(struct mtk_eth *eth)
+@@ -2155,6 +2253,7 @@ static int mtk_tx_alloc(struct mtk_eth *
+ int i, sz = soc->txrx.txd_size;
+ struct mtk_tx_dma_v2 *txd;
+ int ring_size;
++ u32 ofs, val;
+
+ if (MTK_HAS_CAPS(soc->caps, MTK_QDMA))
+ ring_size = MTK_QDMA_RING_SIZE;
+@@ -2222,8 +2321,25 @@ static int mtk_tx_alloc(struct mtk_eth *
+ ring->phys + ((ring_size - 1) * sz),
+ soc->reg_map->qdma.crx_ptr);
+ mtk_w32(eth, ring->last_free_ptr, soc->reg_map->qdma.drx_ptr);
+- mtk_w32(eth, (QDMA_RES_THRES << 8) | QDMA_RES_THRES,
+- soc->reg_map->qdma.qtx_cfg);
++
++ for (i = 0, ofs = 0; i < MTK_QDMA_NUM_QUEUES; i++) {
++ val = (QDMA_RES_THRES << 8) | QDMA_RES_THRES;
++ mtk_w32(eth, val, soc->reg_map->qdma.qtx_cfg + ofs);
++
++ val = MTK_QTX_SCH_MIN_RATE_EN |
++ /* minimum: 10 Mbps */
++ FIELD_PREP(MTK_QTX_SCH_MIN_RATE_MAN, 1) |
++ FIELD_PREP(MTK_QTX_SCH_MIN_RATE_EXP, 4) |
++ MTK_QTX_SCH_LEAKY_BUCKET_SIZE;
++ if (!MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2))
++ val |= MTK_QTX_SCH_LEAKY_BUCKET_EN;
++ mtk_w32(eth, val, soc->reg_map->qdma.qtx_sch + ofs);
++ ofs += MTK_QTX_OFFSET;
++ }
++ val = MTK_QDMA_TX_SCH_MAX_WFQ | (MTK_QDMA_TX_SCH_MAX_WFQ << 16);
++ mtk_w32(eth, val, soc->reg_map->qdma.tx_sch_rate);
++ if (MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2))
++ mtk_w32(eth, val, soc->reg_map->qdma.tx_sch_rate + 4);
+ } else {
+ mtk_w32(eth, ring->phys_pdma, MT7628_TX_BASE_PTR0);
+ mtk_w32(eth, ring_size, MT7628_TX_MAX_CNT0);
+@@ -2903,7 +3019,7 @@ static int mtk_start_dma(struct mtk_eth
+ if (MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2))
+ val |= MTK_MUTLI_CNT | MTK_RESV_BUF |
+ MTK_WCOMP_EN | MTK_DMAD_WR_WDONE |
+- MTK_CHK_DDONE_EN;
++ MTK_CHK_DDONE_EN | MTK_LEAKY_BUCKET_EN;
+ else
+ val |= MTK_RX_BT_32DWORDS;
+ mtk_w32(eth, val, reg_map->qdma.glo_cfg);
+@@ -2949,6 +3065,45 @@ static void mtk_gdm_config(struct mtk_et
+ mtk_w32(eth, 0, MTK_RST_GL);
+ }
+
++static int mtk_device_event(struct notifier_block *n, unsigned long event, void *ptr)
++{
++ struct mtk_mac *mac = container_of(n, struct mtk_mac, device_notifier);
++ struct mtk_eth *eth = mac->hw;
++ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
++ struct ethtool_link_ksettings s;
++ struct net_device *ldev;
++ struct list_head *iter;
++ struct dsa_port *dp;
++
++ if (event != NETDEV_CHANGE)
++ return NOTIFY_DONE;
++
++ netdev_for_each_lower_dev(dev, ldev, iter) {
++ if (netdev_priv(ldev) == mac)
++ goto found;
++ }
++
++ return NOTIFY_DONE;
++
++found:
++ if (!dsa_slave_dev_check(dev))
++ return NOTIFY_DONE;
++
++ if (__ethtool_get_link_ksettings(dev, &s))
++ return NOTIFY_DONE;
++
++ if (s.base.speed == 0 || s.base.speed == ((__u32)-1))
++ return NOTIFY_DONE;
++
++ dp = dsa_port_from_netdev(dev);
++ if (dp->index >= MTK_QDMA_NUM_QUEUES)
++ return NOTIFY_DONE;
++
++ mtk_set_queue_speed(eth, dp->index + 3, s.base.speed);
++
++ return NOTIFY_DONE;
++}
++
+ static int mtk_open(struct net_device *dev)
+ {
+ struct mtk_mac *mac = netdev_priv(dev);
+@@ -2993,7 +3148,8 @@ static int mtk_open(struct net_device *d
+ refcount_inc(ð->dma_refcnt);
+
+ phylink_start(mac->phylink);
+- netif_start_queue(dev);
++ netif_tx_start_all_queues(dev);
++
+ return 0;
+ }
+
+@@ -3716,8 +3872,12 @@ static int mtk_unreg_dev(struct mtk_eth
+ int i;
+
+ for (i = 0; i < MTK_MAC_COUNT; i++) {
++ struct mtk_mac *mac;
+ if (!eth->netdev[i])
+ continue;
++ mac = netdev_priv(eth->netdev[i]);
++ if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA))
++ unregister_netdevice_notifier(&mac->device_notifier);
+ unregister_netdev(eth->netdev[i]);
+ }
+
+@@ -3934,6 +4094,23 @@ static int mtk_set_rxnfc(struct net_devi
+ return ret;
+ }
+
++static u16 mtk_select_queue(struct net_device *dev, struct sk_buff *skb,
++ struct net_device *sb_dev)
++{
++ struct mtk_mac *mac = netdev_priv(dev);
++ unsigned int queue = 0;
++
++ if (netdev_uses_dsa(dev))
++ queue = skb_get_queue_mapping(skb) + 3;
++ else
++ queue = mac->id;
++
++ if (queue >= dev->num_tx_queues)
++ queue = 0;
++
++ return queue;
++}
++
+ static const struct ethtool_ops mtk_ethtool_ops = {
+ .get_link_ksettings = mtk_get_link_ksettings,
+ .set_link_ksettings = mtk_set_link_ksettings,
+@@ -3969,6 +4146,7 @@ static const struct net_device_ops mtk_n
+ .ndo_setup_tc = mtk_eth_setup_tc,
+ .ndo_bpf = mtk_xdp,
+ .ndo_xdp_xmit = mtk_xdp_xmit,
++ .ndo_select_queue = mtk_select_queue,
+ };
+
+ static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np)
+@@ -3978,6 +4156,7 @@ static int mtk_add_mac(struct mtk_eth *e
+ struct phylink *phylink;
+ struct mtk_mac *mac;
+ int id, err;
++ int txqs = 1;
+
+ if (!_id) {
+ dev_err(eth->dev, "missing mac id\n");
+@@ -3995,7 +4174,10 @@ static int mtk_add_mac(struct mtk_eth *e
+ return -EINVAL;
+ }
+
+- eth->netdev[id] = alloc_etherdev(sizeof(*mac));
++ if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA))
++ txqs = MTK_QDMA_NUM_QUEUES;
++
++ eth->netdev[id] = alloc_etherdev_mqs(sizeof(*mac), txqs, 1);
+ if (!eth->netdev[id]) {
+ dev_err(eth->dev, "alloc_etherdev failed\n");
+ return -ENOMEM;
+@@ -4092,6 +4274,11 @@ static int mtk_add_mac(struct mtk_eth *e
+ else
+ eth->netdev[id]->max_mtu = MTK_MAX_RX_LENGTH_2K - MTK_RX_ETH_HLEN;
+
++ if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA)) {
++ mac->device_notifier.notifier_call = mtk_device_event;
++ register_netdevice_notifier(&mac->device_notifier);
++ }
++
+ return 0;
+
+ free_netdev:
+--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
++++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+@@ -22,6 +22,7 @@
+ #include <linux/bpf_trace.h>
+ #include "mtk_ppe.h"
+
++#define MTK_QDMA_NUM_QUEUES 16
+ #define MTK_QDMA_PAGE_SIZE 2048
+ #define MTK_MAX_RX_LENGTH 1536
+ #define MTK_MAX_RX_LENGTH_2K 2048
+@@ -215,8 +216,26 @@
+ #define MTK_RING_MAX_AGG_CNT_H ((MTK_HW_LRO_MAX_AGG_CNT >> 6) & 0x3)
+
+ /* QDMA TX Queue Configuration Registers */
++#define MTK_QTX_OFFSET 0x10
+ #define QDMA_RES_THRES 4
+
++/* QDMA Tx Queue Scheduler Configuration Registers */
++#define MTK_QTX_SCH_TX_SEL BIT(31)
++#define MTK_QTX_SCH_TX_SEL_V2 GENMASK(31, 30)
++
++#define MTK_QTX_SCH_LEAKY_BUCKET_EN BIT(30)
++#define MTK_QTX_SCH_LEAKY_BUCKET_SIZE GENMASK(29, 28)
++#define MTK_QTX_SCH_MIN_RATE_EN BIT(27)
++#define MTK_QTX_SCH_MIN_RATE_MAN GENMASK(26, 20)
++#define MTK_QTX_SCH_MIN_RATE_EXP GENMASK(19, 16)
++#define MTK_QTX_SCH_MAX_RATE_WEIGHT GENMASK(15, 12)
++#define MTK_QTX_SCH_MAX_RATE_EN BIT(11)
++#define MTK_QTX_SCH_MAX_RATE_MAN GENMASK(10, 4)
++#define MTK_QTX_SCH_MAX_RATE_EXP GENMASK(3, 0)
++
++/* QDMA TX Scheduler Rate Control Register */
++#define MTK_QDMA_TX_SCH_MAX_WFQ BIT(15)
++
+ /* QDMA Global Configuration Register */
+ #define MTK_RX_2B_OFFSET BIT(31)
+ #define MTK_RX_BT_32DWORDS (3 << 11)
+@@ -235,6 +254,7 @@
+ #define MTK_WCOMP_EN BIT(24)
+ #define MTK_RESV_BUF (0x40 << 16)
+ #define MTK_MUTLI_CNT (0x4 << 12)
++#define MTK_LEAKY_BUCKET_EN BIT(11)
+
+ /* QDMA Flow Control Register */
+ #define FC_THRES_DROP_MODE BIT(20)
+@@ -265,8 +285,6 @@
+ #define MTK_STAT_OFFSET 0x40
+
+ /* QDMA TX NUM */
+-#define MTK_QDMA_TX_NUM 16
+-#define MTK_QDMA_TX_MASK (MTK_QDMA_TX_NUM - 1)
+ #define QID_BITS_V2(x) (((x) & 0x3f) << 16)
+ #define MTK_QDMA_GMAC2_QID 8
+
+@@ -296,6 +314,7 @@
+ #define TX_DMA_PLEN0(x) (((x) & eth->soc->txrx.dma_max_len) << eth->soc->txrx.dma_len_offset)
+ #define TX_DMA_PLEN1(x) ((x) & eth->soc->txrx.dma_max_len)
+ #define TX_DMA_SWC BIT(14)
++#define TX_DMA_PQID GENMASK(3, 0)
+
+ /* PDMA on MT7628 */
+ #define TX_DMA_DONE BIT(31)
+@@ -952,6 +971,7 @@ struct mtk_reg_map {
+ } pdma;
+ struct {
+ u32 qtx_cfg; /* tx queue configuration */
++ u32 qtx_sch; /* tx queue scheduler configuration */
+ u32 rx_ptr; /* rx base pointer */
+ u32 rx_cnt_cfg; /* rx max count configuration */
+ u32 qcrx_ptr; /* rx cpu pointer */
+@@ -969,6 +989,7 @@ struct mtk_reg_map {
+ u32 fq_tail; /* fq tail pointer */
+ u32 fq_count; /* fq free page count */
+ u32 fq_blen; /* fq free page buffer length */
++ u32 tx_sch_rate; /* tx scheduler rate control registers */
+ } qdma;
+ u32 gdm1_cnt;
+ u32 gdma_to_ppe0;
+@@ -1172,6 +1193,7 @@ struct mtk_mac {
+ __be32 hwlro_ip[MTK_MAX_LRO_IP_CNT];
+ int hwlro_ip_cnt;
+ unsigned int syscfg0;
++ struct notifier_block device_notifier;
+ };
+
+ /* the struct describing the SoC. these are declared in the soc_xyz.c files */
--- /dev/null
+From: Felix Fietkau <nbd@nbd.name>
+Date: Fri, 28 Oct 2022 18:16:03 +0200
+Subject: [PATCH] net: dsa: tag_mtk: assign per-port queues
+
+Keeps traffic sent to the switch within link speed limits
+
+Signed-off-by: Felix Fietkau <nbd@nbd.name>
+---
+
+--- a/net/dsa/tag_mtk.c
++++ b/net/dsa/tag_mtk.c
+@@ -25,6 +25,8 @@ static struct sk_buff *mtk_tag_xmit(stru
+ u8 xmit_tpid;
+ u8 *mtk_tag;
+
++ skb_set_queue_mapping(skb, dp->index);
++
+ /* Build the special tag after the MAC Source Address. If VLAN header
+ * is present, it's required that VLAN header and special tag is
+ * being combined. Only in this way we can allow the switch can parse
--- /dev/null
+From: Felix Fietkau <nbd@nbd.name>
+Date: Thu, 3 Nov 2022 17:49:44 +0100
+Subject: [PATCH] net: ethernet: mediatek: ppe: assign per-port queues
+ for offloaded traffic
+
+Keeps traffic sent to the switch within link speed limits
+
+Signed-off-by: Felix Fietkau <nbd@nbd.name>
+---
+
+--- a/drivers/net/ethernet/mediatek/mtk_ppe.c
++++ b/drivers/net/ethernet/mediatek/mtk_ppe.c
+@@ -405,6 +405,24 @@ static inline bool mtk_foe_entry_usable(
+ FIELD_GET(MTK_FOE_IB1_STATE, entry->ib1) != MTK_FOE_STATE_BIND;
+ }
+
++int mtk_foe_entry_set_queue(struct mtk_eth *eth, struct mtk_foe_entry *entry,
++ unsigned int queue)
++{
++ u32 *ib2 = mtk_foe_entry_ib2(eth, entry);
++
++ if (MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2)) {
++ *ib2 &= ~MTK_FOE_IB2_QID_V2;
++ *ib2 |= FIELD_PREP(MTK_FOE_IB2_QID_V2, queue);
++ *ib2 |= MTK_FOE_IB2_PSE_QOS_V2;
++ } else {
++ *ib2 &= ~MTK_FOE_IB2_QID;
++ *ib2 |= FIELD_PREP(MTK_FOE_IB2_QID, queue);
++ *ib2 |= MTK_FOE_IB2_PSE_QOS;
++ }
++
++ return 0;
++}
++
+ static bool
+ mtk_flow_entry_match(struct mtk_eth *eth, struct mtk_flow_entry *entry,
+ struct mtk_foe_entry *data)
+--- a/drivers/net/ethernet/mediatek/mtk_ppe.h
++++ b/drivers/net/ethernet/mediatek/mtk_ppe.h
+@@ -68,7 +68,9 @@ enum {
+ #define MTK_FOE_IB2_DSCP GENMASK(31, 24)
+
+ /* CONFIG_MEDIATEK_NETSYS_V2 */
++#define MTK_FOE_IB2_QID_V2 GENMASK(6, 0)
+ #define MTK_FOE_IB2_PORT_MG_V2 BIT(7)
++#define MTK_FOE_IB2_PSE_QOS_V2 BIT(8)
+ #define MTK_FOE_IB2_DEST_PORT_V2 GENMASK(12, 9)
+ #define MTK_FOE_IB2_MULTICAST_V2 BIT(13)
+ #define MTK_FOE_IB2_WDMA_WINFO_V2 BIT(19)
+@@ -351,6 +353,8 @@ int mtk_foe_entry_set_pppoe(struct mtk_e
+ int sid);
+ int mtk_foe_entry_set_wdma(struct mtk_eth *eth, struct mtk_foe_entry *entry,
+ int wdma_idx, int txq, int bss, int wcid);
++int mtk_foe_entry_set_queue(struct mtk_eth *eth, struct mtk_foe_entry *entry,
++ unsigned int queue);
+ int mtk_foe_entry_commit(struct mtk_ppe *ppe, struct mtk_flow_entry *entry);
+ void mtk_foe_entry_clear(struct mtk_ppe *ppe, struct mtk_flow_entry *entry);
+ int mtk_foe_entry_idle_time(struct mtk_ppe *ppe, struct mtk_flow_entry *entry);
+--- a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
++++ b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
+@@ -188,7 +188,7 @@ mtk_flow_set_output_device(struct mtk_et
+ int *wed_index)
+ {
+ struct mtk_wdma_info info = {};
+- int pse_port, dsa_port;
++ int pse_port, dsa_port, queue;
+
+ if (mtk_flow_get_wdma_info(dev, dest_mac, &info) == 0) {
+ mtk_foe_entry_set_wdma(eth, foe, info.wdma_idx, info.queue,
+@@ -212,8 +212,6 @@ mtk_flow_set_output_device(struct mtk_et
+ }
+
+ dsa_port = mtk_flow_get_dsa_port(&dev);
+- if (dsa_port >= 0)
+- mtk_foe_entry_set_dsa(eth, foe, dsa_port);
+
+ if (dev == eth->netdev[0])
+ pse_port = 1;
+@@ -222,6 +220,14 @@ mtk_flow_set_output_device(struct mtk_et
+ else
+ return -EOPNOTSUPP;
+
++ if (dsa_port >= 0) {
++ mtk_foe_entry_set_dsa(eth, foe, dsa_port);
++ queue = 3 + dsa_port;
++ } else {
++ queue = pse_port - 1;
++ }
++ mtk_foe_entry_set_queue(eth, foe, queue);
++
+ out:
+ mtk_foe_entry_set_pse_port(eth, foe, pse_port);
+
--- /dev/null
+From: Felix Fietkau <nbd@nbd.name>
+Date: Thu, 27 Oct 2022 23:39:52 +0200
+Subject: [PATCH] net: ethernet: mtk_eth_soc: compile out netsys v2 code
+ on mt7621
+
+Avoid some branches in the hot path on low-end devices with limited CPU power,
+and reduce code size
+
+Signed-off-by: Felix Fietkau <nbd@nbd.name>
+---
+
+--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
++++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+@@ -916,7 +916,13 @@ enum mkt_eth_capabilities {
+ #define MTK_MUX_GMAC12_TO_GEPHY_SGMII \
+ (MTK_ETH_MUX_GMAC12_TO_GEPHY_SGMII | MTK_MUX)
+
+-#define MTK_HAS_CAPS(caps, _x) (((caps) & (_x)) == (_x))
++#ifdef CONFIG_SOC_MT7621
++#define MTK_CAP_MASK MTK_NETSYS_V2
++#else
++#define MTK_CAP_MASK 0
++#endif
++
++#define MTK_HAS_CAPS(caps, _x) (((caps) & (_x) & ~(MTK_CAP_MASK)) == (_x))
+
+ #define MT7621_CAPS (MTK_GMAC1_RGMII | MTK_GMAC1_TRGMII | \
+ MTK_GMAC2_RGMII | MTK_SHARED_INT | \
--- /dev/null
+From: Felix Fietkau <nbd@nbd.name>
+Date: Tue, 8 Nov 2022 15:03:15 +0100
+Subject: [PATCH] net: dsa: add support for DSA rx offloading via
+ metadata dst
+
+If a metadata dst is present with the type METADATA_HW_PORT_MUX on a dsa cpu
+port netdev, assume that it carries the port number and that there is no DSA
+tag present in the skb data.
+
+Signed-off-by: Felix Fietkau <nbd@nbd.name>
+---
+
+--- a/net/core/flow_dissector.c
++++ b/net/core/flow_dissector.c
+@@ -940,12 +940,14 @@ bool __skb_flow_dissect(const struct net
+ #if IS_ENABLED(CONFIG_NET_DSA)
+ if (unlikely(skb->dev && netdev_uses_dsa(skb->dev) &&
+ proto == htons(ETH_P_XDSA))) {
++ struct metadata_dst *md_dst = skb_metadata_dst(skb);
+ const struct dsa_device_ops *ops;
+ int offset = 0;
+
+ ops = skb->dev->dsa_ptr->tag_ops;
+ /* Only DSA header taggers break flow dissection */
+- if (ops->needed_headroom) {
++ if (ops->needed_headroom &&
++ (!md_dst || md_dst->type != METADATA_HW_PORT_MUX)) {
+ if (ops->flow_dissect)
+ ops->flow_dissect(skb, &proto, &offset);
+ else
+--- a/net/dsa/dsa.c
++++ b/net/dsa/dsa.c
+@@ -20,6 +20,7 @@
+ #include <linux/phy_fixed.h>
+ #include <linux/ptp_classify.h>
+ #include <linux/etherdevice.h>
++#include <net/dst_metadata.h>
+
+ #include "dsa_priv.h"
+
+@@ -225,6 +226,7 @@ static bool dsa_skb_defer_rx_timestamp(s
+ static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev,
+ struct packet_type *pt, struct net_device *unused)
+ {
++ struct metadata_dst *md_dst = skb_metadata_dst(skb);
+ struct dsa_port *cpu_dp = dev->dsa_ptr;
+ struct sk_buff *nskb = NULL;
+ struct dsa_slave_priv *p;
+@@ -238,7 +240,22 @@ static int dsa_switch_rcv(struct sk_buff
+ if (!skb)
+ return 0;
+
+- nskb = cpu_dp->rcv(skb, dev);
++ if (md_dst && md_dst->type == METADATA_HW_PORT_MUX) {
++ unsigned int port = md_dst->u.port_info.port_id;
++
++ skb_dst_drop(skb);
++ if (!skb_has_extensions(skb))
++ skb->slow_gro = 0;
++
++ skb->dev = dsa_master_find_slave(dev, 0, port);
++ if (likely(skb->dev)) {
++ dsa_default_offload_fwd_mark(skb);
++ nskb = skb;
++ }
++ } else {
++ nskb = cpu_dp->rcv(skb, dev);
++ }
++
+ if (!nskb) {
+ kfree_skb(skb);
+ return 0;
--- /dev/null
+From: Felix Fietkau <nbd@nbd.name>
+Date: Fri, 28 Oct 2022 11:01:12 +0200
+Subject: [PATCH] net: ethernet: mtk_eth_soc: fix VLAN rx hardware
+ acceleration
+
+- enable VLAN untagging for PDMA rx
+- make it possible to disable the feature via ethtool
+- pass VLAN tag to the DSA driver
+- untag special tag on PDMA only if no non-DSA devices are in use
+- disable special tag untagging on 7986 for now, since it's not working yet
+
+Signed-off-by: Felix Fietkau <nbd@nbd.name>
+---
+
+--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
++++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+@@ -23,6 +23,7 @@
+ #include <linux/jhash.h>
+ #include <linux/bitfield.h>
+ #include <net/dsa.h>
++#include <net/dst_metadata.h>
+
+ #include "mtk_eth_soc.h"
+ #include "mtk_wed.h"
+@@ -1973,16 +1974,22 @@ static int mtk_poll_rx(struct napi_struc
+ htons(RX_DMA_VPID(trxd.rxd4)),
+ RX_DMA_VID(trxd.rxd4));
+ } else if (trxd.rxd2 & RX_DMA_VTAG) {
+- __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
++ __vlan_hwaccel_put_tag(skb, htons(RX_DMA_VPID(trxd.rxd3)),
+ RX_DMA_VID(trxd.rxd3));
+ }
++ }
++
++ /* When using VLAN untagging in combination with DSA, the
++ * hardware treats the MTK special tag as a VLAN and untags it.
++ */
++ if (skb_vlan_tag_present(skb) && netdev_uses_dsa(netdev)) {
++ unsigned int port = ntohs(skb->vlan_proto) & GENMASK(2, 0);
+
+- /* If the device is attached to a dsa switch, the special
+- * tag inserted in VLAN field by hw switch can * be offloaded
+- * by RX HW VLAN offload. Clear vlan info.
+- */
+- if (netdev_uses_dsa(netdev))
+- __vlan_hwaccel_clear_tag(skb);
++ if (port < ARRAY_SIZE(eth->dsa_meta) &&
++ eth->dsa_meta[port])
++ skb_dst_set_noref(skb, ð->dsa_meta[port]->dst);
++
++ __vlan_hwaccel_clear_tag(skb);
+ }
+
+ skb_record_rx_queue(skb, 0);
+@@ -2799,15 +2806,30 @@ static netdev_features_t mtk_fix_feature
+
+ static int mtk_set_features(struct net_device *dev, netdev_features_t features)
+ {
+- int err = 0;
++ struct mtk_mac *mac = netdev_priv(dev);
++ struct mtk_eth *eth = mac->hw;
++ netdev_features_t diff = dev->features ^ features;
++ int i;
++
++ if ((diff & NETIF_F_LRO) && !(features & NETIF_F_LRO))
++ mtk_hwlro_netdev_disable(dev);
+
+- if (!((dev->features ^ features) & NETIF_F_LRO))
++ /* Set RX VLAN offloading */
++ if (!(diff & NETIF_F_HW_VLAN_CTAG_RX))
+ return 0;
+
+- if (!(features & NETIF_F_LRO))
+- mtk_hwlro_netdev_disable(dev);
++ mtk_w32(eth, !!(features & NETIF_F_HW_VLAN_CTAG_RX),
++ MTK_CDMP_EG_CTRL);
+
+- return err;
++ /* sync features with other MAC */
++ for (i = 0; i < MTK_MAC_COUNT; i++) {
++ if (!eth->netdev[i] || eth->netdev[i] == dev)
++ continue;
++ eth->netdev[i]->features &= ~NETIF_F_HW_VLAN_CTAG_RX;
++ eth->netdev[i]->features |= features & NETIF_F_HW_VLAN_CTAG_RX;
++ }
++
++ return 0;
+ }
+
+ /* wait for DMA to finish whatever it is doing before we start using it again */
+@@ -3104,11 +3126,45 @@ found:
+ return NOTIFY_DONE;
+ }
+
++static bool mtk_uses_dsa(struct net_device *dev)
++{
++#if IS_ENABLED(CONFIG_NET_DSA)
++ return netdev_uses_dsa(dev) &&
++ dev->dsa_ptr->tag_ops->proto == DSA_TAG_PROTO_MTK;
++#else
++ return false;
++#endif
++}
++
+ static int mtk_open(struct net_device *dev)
+ {
+ struct mtk_mac *mac = netdev_priv(dev);
+ struct mtk_eth *eth = mac->hw;
+- int err;
++ int i, err;
++
++ if (mtk_uses_dsa(dev) && !eth->prog) {
++ for (i = 0; i < ARRAY_SIZE(eth->dsa_meta); i++) {
++ struct metadata_dst *md_dst = eth->dsa_meta[i];
++
++ if (md_dst)
++ continue;
++
++ md_dst = metadata_dst_alloc(0, METADATA_HW_PORT_MUX,
++ GFP_KERNEL);
++ if (!md_dst)
++ return -ENOMEM;
++
++ md_dst->u.port_info.port_id = i;
++ eth->dsa_meta[i] = md_dst;
++ }
++ } else {
++ /* Hardware special tag parsing needs to be disabled if at least
++ * one MAC does not use DSA.
++ */
++ u32 val = mtk_r32(eth, MTK_CDMP_IG_CTRL);
++ val &= ~MTK_CDMP_STAG_EN;
++ mtk_w32(eth, val, MTK_CDMP_IG_CTRL);
++ }
+
+ err = phylink_of_phy_connect(mac->phylink, mac->of_node, 0);
+ if (err) {
+@@ -3631,6 +3687,10 @@ static int mtk_hw_init(struct mtk_eth *e
+ */
+ val = mtk_r32(eth, MTK_CDMQ_IG_CTRL);
+ mtk_w32(eth, val | MTK_CDMQ_STAG_EN, MTK_CDMQ_IG_CTRL);
++ if (!MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2)) {
++ val = mtk_r32(eth, MTK_CDMP_IG_CTRL);
++ mtk_w32(eth, val | MTK_CDMP_STAG_EN, MTK_CDMP_IG_CTRL);
++ }
+
+ /* Enable RX VLan Offloading */
+ mtk_w32(eth, 1, MTK_CDMP_EG_CTRL);
+@@ -3864,6 +3924,12 @@ static int mtk_free_dev(struct mtk_eth *
+ free_netdev(eth->netdev[i]);
+ }
+
++ for (i = 0; i < ARRAY_SIZE(eth->dsa_meta); i++) {
++ if (!eth->dsa_meta[i])
++ break;
++ metadata_dst_free(eth->dsa_meta[i]);
++ }
++
+ return 0;
+ }
+
+--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
++++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+@@ -22,6 +22,9 @@
+ #include <linux/bpf_trace.h>
+ #include "mtk_ppe.h"
+
++#define MTK_MAX_DSA_PORTS 7
++#define MTK_DSA_PORT_MASK GENMASK(2, 0)
++
+ #define MTK_QDMA_NUM_QUEUES 16
+ #define MTK_QDMA_PAGE_SIZE 2048
+ #define MTK_MAX_RX_LENGTH 1536
+@@ -105,6 +108,9 @@
+ #define MTK_CDMQ_IG_CTRL 0x1400
+ #define MTK_CDMQ_STAG_EN BIT(0)
+
++/* CDMQ Exgress Control Register */
++#define MTK_CDMQ_EG_CTRL 0x1404
++
+ /* CDMP Ingress Control Register */
+ #define MTK_CDMP_IG_CTRL 0x400
+ #define MTK_CDMP_STAG_EN BIT(0)
+@@ -1165,6 +1171,8 @@ struct mtk_eth {
+
+ int ip_align;
+
++ struct metadata_dst *dsa_meta[MTK_MAX_DSA_PORTS];
++
+ struct mtk_ppe *ppe[2];
+ struct rhashtable flow_table;
+
--- /dev/null
+From: Felix Fietkau <nbd@nbd.name>
+Date: Thu, 3 Nov 2022 17:46:25 +0100
+Subject: [PATCH] net: ethernet: mtk_eth_soc: drop packets to WDMA if the
+ ring is full
+
+Improves handling of DMA ring overflow.
+Clarify other WDMA drop related comment.
+
+Signed-off-by: Felix Fietkau <nbd@nbd.name>
+---
+
+--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
++++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+@@ -3711,9 +3711,12 @@ static int mtk_hw_init(struct mtk_eth *e
+ mtk_w32(eth, 0x21021000, MTK_FE_INT_GRP);
+
+ if (MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2)) {
+- /* PSE should not drop port8 and port9 packets */
++ /* PSE should not drop port8 and port9 packets from WDMA Tx */
+ mtk_w32(eth, 0x00000300, PSE_DROP_CFG);
+
++ /* PSE should drop packets to port 8/9 on WDMA Rx ring full */
++ mtk_w32(eth, 0x00000300, PSE_PPE0_DROP);
++
+ /* PSE Free Queue Flow Control */
+ mtk_w32(eth, 0x01fa01f4, PSE_FQFC_CFG2);
+
+--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
++++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+@@ -140,6 +140,7 @@
+ #define PSE_FQFC_CFG1 0x100
+ #define PSE_FQFC_CFG2 0x104
+ #define PSE_DROP_CFG 0x108
++#define PSE_PPE0_DROP 0x110
+
+ /* PSE Input Queue Reservation Register*/
+ #define PSE_IQ_REV(x) (0x140 + (((x) - 1) << 2))
--- /dev/null
+From: Felix Fietkau <nbd@nbd.name>
+Date: Thu, 17 Nov 2022 11:58:21 +0100
+Subject: [PATCH] net: ethernet: mtk_eth_soc: fix flow_offload related refcount
+ bug
+
+Since we call flow_block_cb_decref on FLOW_BLOCK_UNBIND, we need to call
+flow_block_cb_incref unconditionally, even for a newly allocated cb.
+Fixes a use-after-free bug
+
+Fixes: 502e84e2382d ("net: ethernet: mtk_eth_soc: add flow offloading support")
+Signed-off-by: Felix Fietkau <nbd@nbd.name>
+---
+
+--- a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
++++ b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
+@@ -554,6 +554,7 @@ mtk_eth_setup_tc_block(struct net_device
+ struct mtk_eth *eth = mac->hw;
+ static LIST_HEAD(block_cb_list);
+ struct flow_block_cb *block_cb;
++ bool register_block = false;
+ flow_setup_cb_t *cb;
+
+ if (!eth->soc->offload_version)
+@@ -568,16 +569,20 @@ mtk_eth_setup_tc_block(struct net_device
+ switch (f->command) {
+ case FLOW_BLOCK_BIND:
+ block_cb = flow_block_cb_lookup(f->block, cb, dev);
+- if (block_cb) {
+- flow_block_cb_incref(block_cb);
+- return 0;
++ if (!block_cb) {
++ block_cb = flow_block_cb_alloc(cb, dev, dev, NULL);
++ if (IS_ERR(block_cb))
++ return PTR_ERR(block_cb);
++
++ register_block = true;
+ }
+- block_cb = flow_block_cb_alloc(cb, dev, dev, NULL);
+- if (IS_ERR(block_cb))
+- return PTR_ERR(block_cb);
+
+- flow_block_cb_add(block_cb, f);
+- list_add_tail(&block_cb->driver_list, &block_cb_list);
++ flow_block_cb_incref(block_cb);
++
++ if (register_block) {
++ flow_block_cb_add(block_cb, f);
++ list_add_tail(&block_cb->driver_list, &block_cb_list);
++ }
+ return 0;
+ case FLOW_BLOCK_UNBIND:
+ block_cb = flow_block_cb_lookup(f->block, cb, dev);
--- a/drivers/net/ethernet/mediatek/mtk_ppe.c
+++ b/drivers/net/ethernet/mediatek/mtk_ppe.c
-@@ -601,8 +601,7 @@ mtk_foe_entry_commit_subflow(struct mtk_
+@@ -621,8 +621,7 @@ mtk_foe_entry_commit_subflow(struct mtk_
u32 ib1_mask = mtk_get_ib1_pkt_type_mask(ppe->eth) | MTK_FOE_IB1_UDP;
int type;
--- a/drivers/net/ethernet/mediatek/mtk_ppe.h
+++ b/drivers/net/ethernet/mediatek/mtk_ppe.h
-@@ -277,7 +277,6 @@ struct mtk_flow_entry {
+@@ -279,7 +279,6 @@ struct mtk_flow_entry {
struct {
struct mtk_flow_entry *base_flow;
struct hlist_node list;
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
-@@ -1027,11 +1027,13 @@ struct mtk_soc_data {
+@@ -1062,11 +1062,13 @@ struct mtk_soc_data {
* @regmap: The register map pointing at the range used to setup
* SGMII modes
* @ana_rgc3: The offset refers to register ANA_RGC3 related to regmap
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
-@@ -496,7 +496,7 @@
+@@ -523,7 +523,7 @@
#define SGMII_SPEED_10 FIELD_PREP(SGMII_SPEED_MASK, 0)
#define SGMII_SPEED_100 FIELD_PREP(SGMII_SPEED_MASK, 1)
#define SGMII_SPEED_1000 FIELD_PREP(SGMII_SPEED_MASK, 2)
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
-@@ -2823,8 +2823,8 @@ static irqreturn_t mtk_handle_irq_rx(int
+@@ -2967,8 +2967,8 @@ static irqreturn_t mtk_handle_irq_rx(int
eth->rx_events++;
if (likely(napi_schedule_prep(ð->rx_napi))) {
}
return IRQ_HANDLED;
-@@ -2836,8 +2836,8 @@ static irqreturn_t mtk_handle_irq_tx(int
+@@ -2980,8 +2980,8 @@ static irqreturn_t mtk_handle_irq_tx(int
eth->tx_events++;
if (likely(napi_schedule_prep(ð->tx_napi))) {
}
return IRQ_HANDLED;
-@@ -4350,6 +4350,8 @@ static int mtk_probe(struct platform_dev
+@@ -4612,6 +4612,8 @@ static int mtk_probe(struct platform_dev
* for NAPI to work
*/
init_dummy_netdev(ð->dummy_dev);
--- a/net/dsa/tag_mtk.c
+++ b/net/dsa/tag_mtk.c
-@@ -25,6 +25,13 @@ static struct sk_buff *mtk_tag_xmit(stru
- u8 xmit_tpid;
- u8 *mtk_tag;
+@@ -27,6 +27,13 @@ static struct sk_buff *mtk_tag_xmit(stru
+
+ skb_set_queue_mapping(skb, dp->index);
+ /* The Ethernet switch we are interfaced with needs packets to be at
+ * least 64 bytes (including FCS) otherwise their padding might be
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
-@@ -4320,7 +4320,9 @@ static int mtk_probe(struct platform_dev
+@@ -4582,7 +4582,9 @@ static int mtk_probe(struct platform_dev
u32 ppe_addr = eth->soc->reg_map->ppe_base + i * 0x400;
eth->ppe[i] = mtk_ppe_init(eth, eth->base + ppe_addr,
if (!eth->ppe[i]) {
err = -ENOMEM;
goto err_free_dev;
-@@ -4445,6 +4447,7 @@ static const struct mtk_soc_data mt7622_
+@@ -4707,6 +4709,7 @@ static const struct mtk_soc_data mt7622_
.required_pctl = false,
.offload_version = 2,
.hash_offset = 2,
.foe_entry_size = sizeof(struct mtk_foe_entry) - 16,
.txrx = {
.txd_size = sizeof(struct mtk_tx_dma),
-@@ -4482,6 +4485,7 @@ static const struct mtk_soc_data mt7629_
+@@ -4744,6 +4747,7 @@ static const struct mtk_soc_data mt7629_
.hw_features = MTK_HW_FEATURES,
.required_clks = MT7629_CLKS_BITMAP,
.required_pctl = false,
.txrx = {
.txd_size = sizeof(struct mtk_tx_dma),
.rxd_size = sizeof(struct mtk_rx_dma),
-@@ -4502,6 +4506,7 @@ static const struct mtk_soc_data mt7986_
+@@ -4764,6 +4768,7 @@ static const struct mtk_soc_data mt7986_
.offload_version = 2,
.hash_offset = 4,
.foe_entry_size = sizeof(struct mtk_foe_entry),
.rxd_size = sizeof(struct mtk_rx_dma_v2),
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
-@@ -1007,6 +1007,7 @@ struct mtk_soc_data {
+@@ -1042,6 +1042,7 @@ struct mtk_soc_data {
u8 hash_offset;
u16 foe_entry_size;
netdev_features_t hw_features;
static void mtk_ppe_cache_clear(struct mtk_ppe *ppe)
{
ppe_set(ppe, MTK_PPE_CACHE_CTL, MTK_PPE_CACHE_CTL_CLEAR);
-@@ -444,6 +484,13 @@ __mtk_foe_entry_clear(struct mtk_ppe *pp
+@@ -464,6 +504,13 @@ __mtk_foe_entry_clear(struct mtk_ppe *pp
hwe->ib1 &= ~MTK_FOE_IB1_STATE;
hwe->ib1 |= FIELD_PREP(MTK_FOE_IB1_STATE, MTK_FOE_STATE_INVALID);
dma_wmb();
}
entry->hash = 0xffff;
-@@ -551,6 +598,9 @@ __mtk_foe_entry_commit(struct mtk_ppe *p
+@@ -571,6 +618,9 @@ __mtk_foe_entry_commit(struct mtk_ppe *p
wmb();
hwe->ib1 = entry->ib1;
dma_wmb();
mtk_ppe_cache_clear(ppe);
-@@ -742,14 +792,42 @@ int mtk_ppe_prepare_reset(struct mtk_ppe
+@@ -762,14 +812,42 @@ int mtk_ppe_prepare_reset(struct mtk_ppe
return mtk_ppe_wait_busy(ppe);
}
ppe = devm_kzalloc(dev, sizeof(*ppe), GFP_KERNEL);
if (!ppe)
-@@ -764,6 +842,7 @@ struct mtk_ppe *mtk_ppe_init(struct mtk_
+@@ -784,6 +862,7 @@ struct mtk_ppe *mtk_ppe_init(struct mtk_
ppe->eth = eth;
ppe->dev = dev;
ppe->version = version;
foe = dmam_alloc_coherent(ppe->dev,
MTK_PPE_ENTRIES * soc->foe_entry_size,
-@@ -779,6 +858,25 @@ struct mtk_ppe *mtk_ppe_init(struct mtk_
+@@ -799,6 +878,25 @@ struct mtk_ppe *mtk_ppe_init(struct mtk_
if (!ppe->foe_flow)
return NULL;
mtk_ppe_debugfs_init(ppe, index);
return ppe;
-@@ -893,6 +991,16 @@ void mtk_ppe_start(struct mtk_ppe *ppe)
+@@ -913,6 +1011,16 @@ void mtk_ppe_start(struct mtk_ppe *ppe)
ppe_w32(ppe, MTK_PPE_DEFAULT_CPU_PORT1, 0xcb777);
ppe_w32(ppe, MTK_PPE_SBW_CTRL, 0x7f);
}
#define MTK_FOE_IB2_WDMA_DEVIDX BIT(16)
#define MTK_FOE_IB2_WDMA_WINFO BIT(17)
-@@ -283,16 +284,34 @@ struct mtk_flow_entry {
+@@ -285,16 +286,34 @@ struct mtk_flow_entry {
unsigned long cookie;
};
u16 foe_check_time[MTK_PPE_ENTRIES];
struct hlist_head *foe_flow;
-@@ -302,7 +321,7 @@ struct mtk_ppe {
+@@ -304,7 +323,7 @@ struct mtk_ppe {
};
struct mtk_ppe *mtk_ppe_init(struct mtk_eth *eth, void __iomem *base,
void mtk_ppe_start(struct mtk_ppe *ppe);
int mtk_ppe_stop(struct mtk_ppe *ppe);
int mtk_ppe_prepare_reset(struct mtk_ppe *ppe);
-@@ -354,5 +373,7 @@ int mtk_foe_entry_commit(struct mtk_ppe
+@@ -358,5 +377,7 @@ int mtk_foe_entry_commit(struct mtk_ppe
void mtk_foe_entry_clear(struct mtk_ppe *ppe, struct mtk_flow_entry *entry);
int mtk_foe_entry_idle_time(struct mtk_ppe *ppe, struct mtk_flow_entry *entry);
int mtk_ppe_debugfs_init(struct mtk_ppe *ppe, int index);
return 0;
--- a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
+++ b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
-@@ -491,6 +491,7 @@ static int
+@@ -497,6 +497,7 @@ static int
mtk_flow_offload_stats(struct mtk_eth *eth, struct flow_cls_offload *f)
{
struct mtk_flow_entry *entry;
u32 idle;
entry = rhashtable_lookup(ð->flow_table, &f->cookie,
-@@ -501,6 +502,12 @@ mtk_flow_offload_stats(struct mtk_eth *e
+@@ -507,6 +508,12 @@ mtk_flow_offload_stats(struct mtk_eth *e
idle = mtk_foe_entry_idle_time(eth->ppe[entry->ppe_index], entry);
f->stats.lastused = jiffies - idle * HZ;
--- /dev/null
+From: Felix Fietkau <nbd@nbd.name>
+Date: Sun, 20 Nov 2022 23:01:00 +0100
+Subject: [PATCH] net: ethernet: mtk_eth_soc: drop generic vlan rx offload,
+ only use DSA untagging
+
+Through testing I found out that hardware vlan rx offload support seems to
+have some hardware issues. At least when using multiple MACs and when receiving
+tagged packets on the secondary MAC, the hardware can sometimes start to emit
+wrong tags on the first MAC as well.
+
+In order to avoid such issues, drop the feature configuration and use the
+offload feature only for DSA hardware untagging on MT7621/MT7622 devices which
+only use one MAC.
+
+Signed-off-by: Felix Fietkau <nbd@nbd.name>
+---
+
+--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
++++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+@@ -1967,29 +1967,16 @@ static int mtk_poll_rx(struct napi_struc
+ if (reason == MTK_PPE_CPU_REASON_HIT_UNBIND_RATE_REACHED)
+ mtk_ppe_check_skb(eth->ppe[0], skb, hash);
+
+- if (netdev->features & NETIF_F_HW_VLAN_CTAG_RX) {
+- if (MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2)) {
+- if (trxd.rxd3 & RX_DMA_VTAG_V2)
+- __vlan_hwaccel_put_tag(skb,
+- htons(RX_DMA_VPID(trxd.rxd4)),
+- RX_DMA_VID(trxd.rxd4));
+- } else if (trxd.rxd2 & RX_DMA_VTAG) {
+- __vlan_hwaccel_put_tag(skb, htons(RX_DMA_VPID(trxd.rxd3)),
+- RX_DMA_VID(trxd.rxd3));
+- }
+- }
+-
+ /* When using VLAN untagging in combination with DSA, the
+ * hardware treats the MTK special tag as a VLAN and untags it.
+ */
+- if (skb_vlan_tag_present(skb) && netdev_uses_dsa(netdev)) {
+- unsigned int port = ntohs(skb->vlan_proto) & GENMASK(2, 0);
++ if (!MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2) &&
++ (trxd.rxd2 & RX_DMA_VTAG) && netdev_uses_dsa(netdev)) {
++ unsigned int port = RX_DMA_VPID(trxd.rxd3) & GENMASK(2, 0);
+
+ if (port < ARRAY_SIZE(eth->dsa_meta) &&
+ eth->dsa_meta[port])
+ skb_dst_set_noref(skb, ð->dsa_meta[port]->dst);
+-
+- __vlan_hwaccel_clear_tag(skb);
+ }
+
+ skb_record_rx_queue(skb, 0);
+@@ -2806,29 +2793,11 @@ static netdev_features_t mtk_fix_feature
+
+ static int mtk_set_features(struct net_device *dev, netdev_features_t features)
+ {
+- struct mtk_mac *mac = netdev_priv(dev);
+- struct mtk_eth *eth = mac->hw;
+ netdev_features_t diff = dev->features ^ features;
+- int i;
+
+ if ((diff & NETIF_F_LRO) && !(features & NETIF_F_LRO))
+ mtk_hwlro_netdev_disable(dev);
+
+- /* Set RX VLAN offloading */
+- if (!(diff & NETIF_F_HW_VLAN_CTAG_RX))
+- return 0;
+-
+- mtk_w32(eth, !!(features & NETIF_F_HW_VLAN_CTAG_RX),
+- MTK_CDMP_EG_CTRL);
+-
+- /* sync features with other MAC */
+- for (i = 0; i < MTK_MAC_COUNT; i++) {
+- if (!eth->netdev[i] || eth->netdev[i] == dev)
+- continue;
+- eth->netdev[i]->features &= ~NETIF_F_HW_VLAN_CTAG_RX;
+- eth->netdev[i]->features |= features & NETIF_F_HW_VLAN_CTAG_RX;
+- }
+-
+ return 0;
+ }
+
+@@ -3142,30 +3111,6 @@ static int mtk_open(struct net_device *d
+ struct mtk_eth *eth = mac->hw;
+ int i, err;
+
+- if (mtk_uses_dsa(dev) && !eth->prog) {
+- for (i = 0; i < ARRAY_SIZE(eth->dsa_meta); i++) {
+- struct metadata_dst *md_dst = eth->dsa_meta[i];
+-
+- if (md_dst)
+- continue;
+-
+- md_dst = metadata_dst_alloc(0, METADATA_HW_PORT_MUX,
+- GFP_KERNEL);
+- if (!md_dst)
+- return -ENOMEM;
+-
+- md_dst->u.port_info.port_id = i;
+- eth->dsa_meta[i] = md_dst;
+- }
+- } else {
+- /* Hardware special tag parsing needs to be disabled if at least
+- * one MAC does not use DSA.
+- */
+- u32 val = mtk_r32(eth, MTK_CDMP_IG_CTRL);
+- val &= ~MTK_CDMP_STAG_EN;
+- mtk_w32(eth, val, MTK_CDMP_IG_CTRL);
+- }
+-
+ err = phylink_of_phy_connect(mac->phylink, mac->of_node, 0);
+ if (err) {
+ netdev_err(dev, "%s: could not attach PHY: %d\n", __func__,
+@@ -3206,6 +3151,35 @@ static int mtk_open(struct net_device *d
+ phylink_start(mac->phylink);
+ netif_tx_start_all_queues(dev);
+
++ if (MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2))
++ return 0;
++
++ if (mtk_uses_dsa(dev) && !eth->prog) {
++ for (i = 0; i < ARRAY_SIZE(eth->dsa_meta); i++) {
++ struct metadata_dst *md_dst = eth->dsa_meta[i];
++
++ if (md_dst)
++ continue;
++
++ md_dst = metadata_dst_alloc(0, METADATA_HW_PORT_MUX,
++ GFP_KERNEL);
++ if (!md_dst)
++ return -ENOMEM;
++
++ md_dst->u.port_info.port_id = i;
++ eth->dsa_meta[i] = md_dst;
++ }
++ } else {
++ /* Hardware special tag parsing needs to be disabled if at least
++ * one MAC does not use DSA.
++ */
++ u32 val = mtk_r32(eth, MTK_CDMP_IG_CTRL);
++ val &= ~MTK_CDMP_STAG_EN;
++ mtk_w32(eth, val, MTK_CDMP_IG_CTRL);
++
++ mtk_w32(eth, 0, MTK_CDMP_EG_CTRL);
++ }
++
+ return 0;
+ }
+
+@@ -3690,10 +3664,9 @@ static int mtk_hw_init(struct mtk_eth *e
+ if (!MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2)) {
+ val = mtk_r32(eth, MTK_CDMP_IG_CTRL);
+ mtk_w32(eth, val | MTK_CDMP_STAG_EN, MTK_CDMP_IG_CTRL);
+- }
+
+- /* Enable RX VLan Offloading */
+- mtk_w32(eth, 1, MTK_CDMP_EG_CTRL);
++ mtk_w32(eth, 1, MTK_CDMP_EG_CTRL);
++ }
+
+ /* set interrupt delays based on current Net DIM sample */
+ mtk_dim_rx(ð->rx_dim.work);
+@@ -4331,7 +4304,7 @@ static int mtk_add_mac(struct mtk_eth *e
+ eth->netdev[id]->hw_features |= NETIF_F_LRO;
+
+ eth->netdev[id]->vlan_features = eth->soc->hw_features &
+- ~(NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX);
++ ~NETIF_F_HW_VLAN_CTAG_TX;
+ eth->netdev[id]->features |= eth->soc->hw_features;
+ eth->netdev[id]->ethtool_ops = &mtk_ethtool_ops;
+
+--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
++++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+@@ -48,7 +48,6 @@
+ #define MTK_HW_FEATURES (NETIF_F_IP_CSUM | \
+ NETIF_F_RXCSUM | \
+ NETIF_F_HW_VLAN_CTAG_TX | \
+- NETIF_F_HW_VLAN_CTAG_RX | \
+ NETIF_F_SG | NETIF_F_TSO | \
+ NETIF_F_TSO6 | \
+ NETIF_F_IPV6_CSUM |\
+++ /dev/null
-From: Felix Fietkau <nbd@nbd.name>
-Date: Thu, 27 Oct 2022 19:50:31 +0200
-Subject: [PATCH] net: ethernet: mtk_eth_soc: account for vlan in rx
- header length
-
-The network stack assumes that devices can handle an extra VLAN tag without
-increasing the MTU
-
-Signed-off-by: Felix Fietkau <nbd@nbd.name>
----
-
---- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
-+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
-@@ -29,7 +29,7 @@
- #define MTK_TX_DMA_BUF_LEN_V2 0xffff
- #define MTK_DMA_SIZE 512
- #define MTK_MAC_COUNT 2
--#define MTK_RX_ETH_HLEN (ETH_HLEN + ETH_FCS_LEN)
-+#define MTK_RX_ETH_HLEN (VLAN_ETH_HLEN + ETH_FCS_LEN)
- #define MTK_RX_HLEN (NET_SKB_PAD + MTK_RX_ETH_HLEN + NET_IP_ALIGN)
- #define MTK_DMA_DUMMY_DESC 0xffffffff
- #define MTK_DEFAULT_MSG_ENABLE (NETIF_MSG_DRV | \
--- /dev/null
+From: Felix Fietkau <nbd@nbd.name>
+Date: Thu, 3 Nov 2022 12:38:49 +0100
+Subject: [PATCH] net: ethernet: mtk_eth_soc: work around issue with sending
+ small fragments
+
+When lots of frames are sent with a number of very small fragments, an
+internal FIFO can overflow, causing the DMA engine to lock up lock up and
+transmit attempts time out.
+
+Fix this on MT7986 by increasing the reserved FIFO space.
+Fix this on older chips by detecting the presence of small fragments and use
+skb_gso_segment + skb_linearize to deal with them.
+
+Signed-off-by: Felix Fietkau <nbd@nbd.name>
+---
+
+--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
++++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+@@ -1402,12 +1402,28 @@ static void mtk_wake_queue(struct mtk_et
+ }
+ }
+
++static bool mtk_skb_has_small_frag(struct sk_buff *skb)
++{
++ int min_size = 16;
++ int i;
++
++ if (skb_headlen(skb) < min_size)
++ return true;
++
++ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
++ if (skb_frag_size(&skb_shinfo(skb)->frags[i]) < min_size)
++ return true;
++
++ return false;
++}
++
+ static netdev_tx_t mtk_start_xmit(struct sk_buff *skb, struct net_device *dev)
+ {
+ struct mtk_mac *mac = netdev_priv(dev);
+ struct mtk_eth *eth = mac->hw;
+ struct mtk_tx_ring *ring = ð->tx_ring;
+ struct net_device_stats *stats = &dev->stats;
++ struct sk_buff *segs, *next;
+ bool gso = false;
+ int tx_num;
+
+@@ -1429,6 +1445,18 @@ static netdev_tx_t mtk_start_xmit(struct
+ return NETDEV_TX_BUSY;
+ }
+
++ if (!MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2) &&
++ skb_is_gso(skb) && mtk_skb_has_small_frag(skb)) {
++ segs = skb_gso_segment(skb, dev->features & ~NETIF_F_ALL_TSO);
++ if (IS_ERR(segs))
++ goto drop;
++
++ if (segs) {
++ consume_skb(skb);
++ skb = segs;
++ }
++ }
++
+ /* TSO: fill MSS info in tcp checksum field */
+ if (skb_is_gso(skb)) {
+ if (skb_cow_head(skb, 0)) {
+@@ -1444,8 +1472,14 @@ static netdev_tx_t mtk_start_xmit(struct
+ }
+ }
+
+- if (mtk_tx_map(skb, dev, tx_num, ring, gso) < 0)
+- goto drop;
++ skb_list_walk_safe(skb, skb, next) {
++ if ((!MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2) &&
++ mtk_skb_has_small_frag(skb) && skb_linearize(skb)) ||
++ mtk_tx_map(skb, dev, tx_num, ring, gso) < 0) {
++ stats->tx_dropped++;
++ dev_kfree_skb_any(skb);
++ }
++ }
+
+ if (unlikely(atomic_read(&ring->free_count) <= ring->thresh))
+ netif_tx_stop_all_queues(dev);
+--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
++++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+@@ -258,7 +258,7 @@
+ #define MTK_CHK_DDONE_EN BIT(28)
+ #define MTK_DMAD_WR_WDONE BIT(26)
+ #define MTK_WCOMP_EN BIT(24)
+-#define MTK_RESV_BUF (0x40 << 16)
++#define MTK_RESV_BUF (0x80 << 16)
+ #define MTK_MUTLI_CNT (0x4 << 12)
+ #define MTK_LEAKY_BUCKET_EN BIT(11)
+
+++ /dev/null
-From: Felix Fietkau <nbd@nbd.name>
-Date: Thu, 27 Oct 2022 19:53:57 +0200
-Subject: [PATCH] net: ethernet: mtk_eth_soc: increase tx ring side for
- QDMA devices
-
-In order to use the hardware traffic shaper feature, a larger tx ring is
-needed, especially for the scratch ring, which the hardware shaper uses to
-reorder packets.
-
-Signed-off-by: Felix Fietkau <nbd@nbd.name>
----
-
---- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
-+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
-@@ -900,7 +900,7 @@ static int mtk_init_fq_dma(struct mtk_et
- {
- const struct mtk_soc_data *soc = eth->soc;
- dma_addr_t phy_ring_tail;
-- int cnt = MTK_DMA_SIZE;
-+ int cnt = MTK_QDMA_RING_SIZE;
- dma_addr_t dma_addr;
- int i;
-
-@@ -2154,19 +2154,25 @@ static int mtk_tx_alloc(struct mtk_eth *
- struct mtk_tx_ring *ring = ð->tx_ring;
- int i, sz = soc->txrx.txd_size;
- struct mtk_tx_dma_v2 *txd;
-+ int ring_size;
-
-- ring->buf = kcalloc(MTK_DMA_SIZE, sizeof(*ring->buf),
-+ if (MTK_HAS_CAPS(soc->caps, MTK_QDMA))
-+ ring_size = MTK_QDMA_RING_SIZE;
-+ else
-+ ring_size = MTK_DMA_SIZE;
-+
-+ ring->buf = kcalloc(ring_size, sizeof(*ring->buf),
- GFP_KERNEL);
- if (!ring->buf)
- goto no_tx_mem;
-
-- ring->dma = dma_alloc_coherent(eth->dma_dev, MTK_DMA_SIZE * sz,
-+ ring->dma = dma_alloc_coherent(eth->dma_dev, ring_size * sz,
- &ring->phys, GFP_KERNEL);
- if (!ring->dma)
- goto no_tx_mem;
-
-- for (i = 0; i < MTK_DMA_SIZE; i++) {
-- int next = (i + 1) % MTK_DMA_SIZE;
-+ for (i = 0; i < ring_size; i++) {
-+ int next = (i + 1) % ring_size;
- u32 next_ptr = ring->phys + next * sz;
-
- txd = ring->dma + i * sz;
-@@ -2186,22 +2192,22 @@ static int mtk_tx_alloc(struct mtk_eth *
- * descriptors in ring->dma_pdma.
- */
- if (!MTK_HAS_CAPS(soc->caps, MTK_QDMA)) {
-- ring->dma_pdma = dma_alloc_coherent(eth->dma_dev, MTK_DMA_SIZE * sz,
-+ ring->dma_pdma = dma_alloc_coherent(eth->dma_dev, ring_size * sz,
- &ring->phys_pdma, GFP_KERNEL);
- if (!ring->dma_pdma)
- goto no_tx_mem;
-
-- for (i = 0; i < MTK_DMA_SIZE; i++) {
-+ for (i = 0; i < ring_size; i++) {
- ring->dma_pdma[i].txd2 = TX_DMA_DESP2_DEF;
- ring->dma_pdma[i].txd4 = 0;
- }
- }
-
-- ring->dma_size = MTK_DMA_SIZE;
-- atomic_set(&ring->free_count, MTK_DMA_SIZE - 2);
-+ ring->dma_size = ring_size;
-+ atomic_set(&ring->free_count, ring_size - 2);
- ring->next_free = ring->dma;
- ring->last_free = (void *)txd;
-- ring->last_free_ptr = (u32)(ring->phys + ((MTK_DMA_SIZE - 1) * sz));
-+ ring->last_free_ptr = (u32)(ring->phys + ((ring_size - 1) * sz));
- ring->thresh = MAX_SKB_FRAGS;
-
- /* make sure that all changes to the dma ring are flushed before we
-@@ -2213,14 +2219,14 @@ static int mtk_tx_alloc(struct mtk_eth *
- mtk_w32(eth, ring->phys, soc->reg_map->qdma.ctx_ptr);
- mtk_w32(eth, ring->phys, soc->reg_map->qdma.dtx_ptr);
- mtk_w32(eth,
-- ring->phys + ((MTK_DMA_SIZE - 1) * sz),
-+ ring->phys + ((ring_size - 1) * sz),
- soc->reg_map->qdma.crx_ptr);
- mtk_w32(eth, ring->last_free_ptr, soc->reg_map->qdma.drx_ptr);
- mtk_w32(eth, (QDMA_RES_THRES << 8) | QDMA_RES_THRES,
- soc->reg_map->qdma.qtx_cfg);
- } else {
- mtk_w32(eth, ring->phys_pdma, MT7628_TX_BASE_PTR0);
-- mtk_w32(eth, MTK_DMA_SIZE, MT7628_TX_MAX_CNT0);
-+ mtk_w32(eth, ring_size, MT7628_TX_MAX_CNT0);
- mtk_w32(eth, 0, MT7628_TX_CTX_IDX0);
- mtk_w32(eth, MT7628_PST_DTX_IDX0, soc->reg_map->pdma.rst_idx);
- }
-@@ -2238,7 +2244,7 @@ static void mtk_tx_clean(struct mtk_eth
- int i;
-
- if (ring->buf) {
-- for (i = 0; i < MTK_DMA_SIZE; i++)
-+ for (i = 0; i < ring->dma_size; i++)
- mtk_tx_unmap(eth, &ring->buf[i], false);
- kfree(ring->buf);
- ring->buf = NULL;
-@@ -2246,14 +2252,14 @@ static void mtk_tx_clean(struct mtk_eth
-
- if (ring->dma) {
- dma_free_coherent(eth->dma_dev,
-- MTK_DMA_SIZE * soc->txrx.txd_size,
-+ ring->dma_size * soc->txrx.txd_size,
- ring->dma, ring->phys);
- ring->dma = NULL;
- }
-
- if (ring->dma_pdma) {
- dma_free_coherent(eth->dma_dev,
-- MTK_DMA_SIZE * soc->txrx.txd_size,
-+ ring->dma_size * soc->txrx.txd_size,
- ring->dma_pdma, ring->phys_pdma);
- ring->dma_pdma = NULL;
- }
-@@ -2773,7 +2779,7 @@ static void mtk_dma_free(struct mtk_eth
- netdev_reset_queue(eth->netdev[i]);
- if (eth->scratch_ring) {
- dma_free_coherent(eth->dma_dev,
-- MTK_DMA_SIZE * soc->txrx.txd_size,
-+ MTK_QDMA_RING_SIZE * soc->txrx.txd_size,
- eth->scratch_ring, eth->phy_scratch_ring);
- eth->scratch_ring = NULL;
- eth->phy_scratch_ring = 0;
---- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
-+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
-@@ -27,6 +27,7 @@
- #define MTK_MAX_RX_LENGTH_2K 2048
- #define MTK_TX_DMA_BUF_LEN 0x3fff
- #define MTK_TX_DMA_BUF_LEN_V2 0xffff
-+#define MTK_QDMA_RING_SIZE 2048
- #define MTK_DMA_SIZE 512
- #define MTK_MAC_COUNT 2
- #define MTK_RX_ETH_HLEN (VLAN_ETH_HLEN + ETH_FCS_LEN)
--- /dev/null
+From: Felix Fietkau <nbd@nbd.name>
+Date: Fri, 28 Oct 2022 12:54:48 +0200
+Subject: [PATCH] net: ethernet: mtk_eth_soc: set NETIF_F_ALL_TSO
+
+Significantly improves performance by avoiding unnecessary segmentation
+
+Signed-off-by: Felix Fietkau <nbd@nbd.name>
+---
+
+--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
++++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+@@ -48,8 +48,7 @@
+ #define MTK_HW_FEATURES (NETIF_F_IP_CSUM | \
+ NETIF_F_RXCSUM | \
+ NETIF_F_HW_VLAN_CTAG_TX | \
+- NETIF_F_SG | NETIF_F_TSO | \
+- NETIF_F_TSO6 | \
++ NETIF_F_SG | NETIF_F_ALL_TSO | \
+ NETIF_F_IPV6_CSUM |\
+ NETIF_F_HW_TC)
+ #define MTK_HW_FEATURES_MT7628 (NETIF_F_SG | NETIF_F_RXCSUM)
+++ /dev/null
-From: Felix Fietkau <nbd@nbd.name>
-Date: Fri, 4 Nov 2022 19:49:08 +0100
-Subject: [PATCH] net: ethernet: mtk_eth_soc: avoid port_mg assignment on
- MT7622 and newer
-
-On newer chips, this field is unused and contains some bits related to queue
-assignment. Initialize it to 0 in those cases.
-Fix offload_version on MT7621 and MT7623, which still need the previous value.
-
-Signed-off-by: Felix Fietkau <nbd@nbd.name>
----
-
---- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
-+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
-@@ -4431,7 +4431,7 @@ static const struct mtk_soc_data mt7621_
- .hw_features = MTK_HW_FEATURES,
- .required_clks = MT7621_CLKS_BITMAP,
- .required_pctl = false,
-- .offload_version = 2,
-+ .offload_version = 1,
- .hash_offset = 2,
- .foe_entry_size = sizeof(struct mtk_foe_entry) - 16,
- .txrx = {
-@@ -4471,7 +4471,7 @@ static const struct mtk_soc_data mt7623_
- .hw_features = MTK_HW_FEATURES,
- .required_clks = MT7623_CLKS_BITMAP,
- .required_pctl = true,
-- .offload_version = 2,
-+ .offload_version = 1,
- .hash_offset = 2,
- .foe_entry_size = sizeof(struct mtk_foe_entry) - 16,
- .txrx = {
---- a/drivers/net/ethernet/mediatek/mtk_ppe.c
-+++ b/drivers/net/ethernet/mediatek/mtk_ppe.c
-@@ -215,6 +215,8 @@ int mtk_foe_entry_prepare(struct mtk_eth
- val = FIELD_PREP(MTK_FOE_IB2_DEST_PORT_V2, pse_port) |
- FIELD_PREP(MTK_FOE_IB2_PORT_AG_V2, 0xf);
- } else {
-+ int port_mg = eth->soc->offload_version > 1 ? 0 : 0x3f;
-+
- val = FIELD_PREP(MTK_FOE_IB1_STATE, MTK_FOE_STATE_BIND) |
- FIELD_PREP(MTK_FOE_IB1_PACKET_TYPE, type) |
- FIELD_PREP(MTK_FOE_IB1_UDP, l4proto == IPPROTO_UDP) |
-@@ -222,7 +224,7 @@ int mtk_foe_entry_prepare(struct mtk_eth
- entry->ib1 = val;
-
- val = FIELD_PREP(MTK_FOE_IB2_DEST_PORT, pse_port) |
-- FIELD_PREP(MTK_FOE_IB2_PORT_MG, 0x3f) |
-+ FIELD_PREP(MTK_FOE_IB2_PORT_MG, port_mg) |
- FIELD_PREP(MTK_FOE_IB2_PORT_AG, 0x1f);
- }
-
+++ /dev/null
-From: Felix Fietkau <nbd@nbd.name>
-Date: Thu, 27 Oct 2022 20:17:27 +0200
-Subject: [PATCH] net: ethernet: mtk_eth_soc: implement multi-queue
- support for per-port queues
-
-When sending traffic to multiple ports with different link speeds, queued
-packets to one port can drown out tx to other ports.
-In order to better handle transmission to multiple ports, use the hardware
-shaper feature to implement weighted fair queueing between ports.
-Weight and maximum rate are automatically adjusted based on the link speed
-of the port.
-The first 3 queues are unrestricted and reserved for non-DSA direct tx on
-GMAC ports. The following queues are automatically assigned by the MTK DSA
-tag driver based on the target port number.
-The PPE offload code configures the queues for offloaded traffic in the same
-way.
-This feature is only supported on devices supporting QDMA. All queues still
-share the same DMA ring and descriptor pool.
-
-Signed-off-by: Felix Fietkau <nbd@nbd.name>
----
-
---- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
-+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
-@@ -55,6 +55,7 @@ static const struct mtk_reg_map mtk_reg_
- },
- .qdma = {
- .qtx_cfg = 0x1800,
-+ .qtx_sch = 0x1804,
- .rx_ptr = 0x1900,
- .rx_cnt_cfg = 0x1904,
- .qcrx_ptr = 0x1908,
-@@ -62,6 +63,7 @@ static const struct mtk_reg_map mtk_reg_
- .rst_idx = 0x1a08,
- .delay_irq = 0x1a0c,
- .fc_th = 0x1a10,
-+ .tx_sch_rate = 0x1a14,
- .int_grp = 0x1a20,
- .hred = 0x1a44,
- .ctx_ptr = 0x1b00,
-@@ -117,6 +119,7 @@ static const struct mtk_reg_map mt7986_r
- },
- .qdma = {
- .qtx_cfg = 0x4400,
-+ .qtx_sch = 0x4404,
- .rx_ptr = 0x4500,
- .rx_cnt_cfg = 0x4504,
- .qcrx_ptr = 0x4508,
-@@ -134,6 +137,7 @@ static const struct mtk_reg_map mt7986_r
- .fq_tail = 0x4724,
- .fq_count = 0x4728,
- .fq_blen = 0x472c,
-+ .tx_sch_rate = 0x4798,
- },
- .gdm1_cnt = 0x1c00,
- .gdma_to_ppe0 = 0x3333,
-@@ -576,6 +580,75 @@ static void mtk_mac_link_down(struct phy
- mtk_w32(mac->hw, mcr, MTK_MAC_MCR(mac->id));
- }
-
-+static void mtk_set_queue_speed(struct mtk_eth *eth, unsigned int idx,
-+ int speed)
-+{
-+ const struct mtk_soc_data *soc = eth->soc;
-+ u32 ofs, val;
-+
-+ if (!MTK_HAS_CAPS(soc->caps, MTK_QDMA))
-+ return;
-+
-+ val = MTK_QTX_SCH_MIN_RATE_EN |
-+ /* minimum: 10 Mbps */
-+ FIELD_PREP(MTK_QTX_SCH_MIN_RATE_MAN, 1) |
-+ FIELD_PREP(MTK_QTX_SCH_MIN_RATE_EXP, 4) |
-+ MTK_QTX_SCH_LEAKY_BUCKET_SIZE;
-+ if (!MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2))
-+ val |= MTK_QTX_SCH_LEAKY_BUCKET_EN;
-+
-+ if (IS_ENABLED(CONFIG_SOC_MT7621)) {
-+ switch (speed) {
-+ case SPEED_10:
-+ val |= MTK_QTX_SCH_MAX_RATE_EN |
-+ FIELD_PREP(MTK_QTX_SCH_MAX_RATE_MAN, 103) |
-+ FIELD_PREP(MTK_QTX_SCH_MAX_RATE_EXP, 2) |
-+ FIELD_PREP(MTK_QTX_SCH_MAX_RATE_WEIGHT, 1);
-+ break;
-+ case SPEED_100:
-+ val |= MTK_QTX_SCH_MAX_RATE_EN |
-+ FIELD_PREP(MTK_QTX_SCH_MAX_RATE_MAN, 103) |
-+ FIELD_PREP(MTK_QTX_SCH_MAX_RATE_EXP, 3);
-+ FIELD_PREP(MTK_QTX_SCH_MAX_RATE_WEIGHT, 1);
-+ break;
-+ case SPEED_1000:
-+ val |= MTK_QTX_SCH_MAX_RATE_EN |
-+ FIELD_PREP(MTK_QTX_SCH_MAX_RATE_MAN, 105) |
-+ FIELD_PREP(MTK_QTX_SCH_MAX_RATE_EXP, 4) |
-+ FIELD_PREP(MTK_QTX_SCH_MAX_RATE_WEIGHT, 10);
-+ break;
-+ default:
-+ break;
-+ }
-+ } else {
-+ switch (speed) {
-+ case SPEED_10:
-+ val |= MTK_QTX_SCH_MAX_RATE_EN |
-+ FIELD_PREP(MTK_QTX_SCH_MAX_RATE_MAN, 1) |
-+ FIELD_PREP(MTK_QTX_SCH_MAX_RATE_EXP, 4) |
-+ FIELD_PREP(MTK_QTX_SCH_MAX_RATE_WEIGHT, 1);
-+ break;
-+ case SPEED_100:
-+ val |= MTK_QTX_SCH_MAX_RATE_EN |
-+ FIELD_PREP(MTK_QTX_SCH_MAX_RATE_MAN, 1) |
-+ FIELD_PREP(MTK_QTX_SCH_MAX_RATE_EXP, 5);
-+ FIELD_PREP(MTK_QTX_SCH_MAX_RATE_WEIGHT, 1);
-+ break;
-+ case SPEED_1000:
-+ val |= MTK_QTX_SCH_MAX_RATE_EN |
-+ FIELD_PREP(MTK_QTX_SCH_MAX_RATE_MAN, 10) |
-+ FIELD_PREP(MTK_QTX_SCH_MAX_RATE_EXP, 5) |
-+ FIELD_PREP(MTK_QTX_SCH_MAX_RATE_WEIGHT, 10);
-+ break;
-+ default:
-+ break;
-+ }
-+ }
-+
-+ ofs = MTK_QTX_OFFSET * idx;
-+ mtk_w32(eth, val, soc->reg_map->qdma.qtx_sch + ofs);
-+}
-+
- static void mtk_mac_link_up(struct phylink_config *config,
- struct phy_device *phy,
- unsigned int mode, phy_interface_t interface,
-@@ -601,6 +674,8 @@ static void mtk_mac_link_up(struct phyli
- break;
- }
-
-+ mtk_set_queue_speed(mac->hw, mac->id, speed);
-+
- /* Configure duplex */
- if (duplex == DUPLEX_FULL)
- mcr |= MAC_MCR_FORCE_DPX;
-@@ -1059,7 +1134,8 @@ static void mtk_tx_set_dma_desc_v1(struc
-
- WRITE_ONCE(desc->txd1, info->addr);
-
-- data = TX_DMA_SWC | TX_DMA_PLEN0(info->size);
-+ data = TX_DMA_SWC | TX_DMA_PLEN0(info->size) |
-+ FIELD_PREP(TX_DMA_PQID, info->qid);
- if (info->last)
- data |= TX_DMA_LS0;
- WRITE_ONCE(desc->txd3, data);
-@@ -1093,9 +1169,6 @@ static void mtk_tx_set_dma_desc_v2(struc
- data |= TX_DMA_LS0;
- WRITE_ONCE(desc->txd3, data);
-
-- if (!info->qid && mac->id)
-- info->qid = MTK_QDMA_GMAC2_QID;
--
- data = (mac->id + 1) << TX_DMA_FPORT_SHIFT_V2; /* forward port */
- data |= TX_DMA_SWC_V2 | QID_BITS_V2(info->qid);
- WRITE_ONCE(desc->txd4, data);
-@@ -1139,11 +1212,12 @@ static int mtk_tx_map(struct sk_buff *sk
- .gso = gso,
- .csum = skb->ip_summed == CHECKSUM_PARTIAL,
- .vlan = skb_vlan_tag_present(skb),
-- .qid = skb->mark & MTK_QDMA_TX_MASK,
-+ .qid = skb_get_queue_mapping(skb),
- .vlan_tci = skb_vlan_tag_get(skb),
- .first = true,
- .last = !skb_is_nonlinear(skb),
- };
-+ struct netdev_queue *txq;
- struct mtk_mac *mac = netdev_priv(dev);
- struct mtk_eth *eth = mac->hw;
- const struct mtk_soc_data *soc = eth->soc;
-@@ -1151,8 +1225,10 @@ static int mtk_tx_map(struct sk_buff *sk
- struct mtk_tx_dma *itxd_pdma, *txd_pdma;
- struct mtk_tx_buf *itx_buf, *tx_buf;
- int i, n_desc = 1;
-+ int queue = skb_get_queue_mapping(skb);
- int k = 0;
-
-+ txq = netdev_get_tx_queue(dev, queue);
- itxd = ring->next_free;
- itxd_pdma = qdma_to_pdma(ring, itxd);
- if (itxd == ring->last_free)
-@@ -1201,7 +1277,7 @@ static int mtk_tx_map(struct sk_buff *sk
- memset(&txd_info, 0, sizeof(struct mtk_tx_dma_desc_info));
- txd_info.size = min_t(unsigned int, frag_size,
- soc->txrx.dma_max_len);
-- txd_info.qid = skb->mark & MTK_QDMA_TX_MASK;
-+ txd_info.qid = queue;
- txd_info.last = i == skb_shinfo(skb)->nr_frags - 1 &&
- !(frag_size - txd_info.size);
- txd_info.addr = skb_frag_dma_map(eth->dma_dev, frag,
-@@ -1240,7 +1316,7 @@ static int mtk_tx_map(struct sk_buff *sk
- txd_pdma->txd2 |= TX_DMA_LS1;
- }
-
-- netdev_sent_queue(dev, skb->len);
-+ netdev_tx_sent_queue(txq, skb->len);
- skb_tx_timestamp(skb);
-
- ring->next_free = mtk_qdma_phys_to_virt(ring, txd->txd2);
-@@ -1252,8 +1328,7 @@ static int mtk_tx_map(struct sk_buff *sk
- wmb();
-
- if (MTK_HAS_CAPS(soc->caps, MTK_QDMA)) {
-- if (netif_xmit_stopped(netdev_get_tx_queue(dev, 0)) ||
-- !netdev_xmit_more())
-+ if (netif_xmit_stopped(txq) || !netdev_xmit_more())
- mtk_w32(eth, txd->txd2, soc->reg_map->qdma.ctx_ptr);
- } else {
- int next_idx;
-@@ -1322,7 +1397,7 @@ static void mtk_wake_queue(struct mtk_et
- for (i = 0; i < MTK_MAC_COUNT; i++) {
- if (!eth->netdev[i])
- continue;
-- netif_wake_queue(eth->netdev[i]);
-+ netif_tx_wake_all_queues(eth->netdev[i]);
- }
- }
-
-@@ -1346,7 +1421,7 @@ static netdev_tx_t mtk_start_xmit(struct
-
- tx_num = mtk_cal_txd_req(eth, skb);
- if (unlikely(atomic_read(&ring->free_count) <= tx_num)) {
-- netif_stop_queue(dev);
-+ netif_tx_stop_all_queues(dev);
- netif_err(eth, tx_queued, dev,
- "Tx Ring full when queue awake!\n");
- spin_unlock(ð->page_lock);
-@@ -1372,7 +1447,7 @@ static netdev_tx_t mtk_start_xmit(struct
- goto drop;
-
- if (unlikely(atomic_read(&ring->free_count) <= ring->thresh))
-- netif_stop_queue(dev);
-+ netif_tx_stop_all_queues(dev);
-
- spin_unlock(ð->page_lock);
-
-@@ -1539,10 +1614,12 @@ static int mtk_xdp_submit_frame(struct m
- struct skb_shared_info *sinfo = xdp_get_shared_info_from_frame(xdpf);
- const struct mtk_soc_data *soc = eth->soc;
- struct mtk_tx_ring *ring = ð->tx_ring;
-+ struct mtk_mac *mac = netdev_priv(dev);
- struct mtk_tx_dma_desc_info txd_info = {
- .size = xdpf->len,
- .first = true,
- .last = !xdp_frame_has_frags(xdpf),
-+ .qid = mac->id,
- };
- int err, index = 0, n_desc = 1, nr_frags;
- struct mtk_tx_dma *htxd, *txd, *txd_pdma;
-@@ -1593,6 +1670,7 @@ static int mtk_xdp_submit_frame(struct m
- memset(&txd_info, 0, sizeof(struct mtk_tx_dma_desc_info));
- txd_info.size = skb_frag_size(&sinfo->frags[index]);
- txd_info.last = index + 1 == nr_frags;
-+ txd_info.qid = mac->id;
- data = skb_frag_address(&sinfo->frags[index]);
-
- index++;
-@@ -1944,8 +2022,46 @@ rx_done:
- return done;
- }
-
-+struct mtk_poll_state {
-+ struct netdev_queue *txq;
-+ unsigned int total;
-+ unsigned int done;
-+ unsigned int bytes;
-+};
-+
-+static void
-+mtk_poll_tx_done(struct mtk_eth *eth, struct mtk_poll_state *state, u8 mac,
-+ struct sk_buff *skb)
-+{
-+ struct netdev_queue *txq;
-+ struct net_device *dev;
-+ unsigned int bytes = skb->len;
-+
-+ state->total++;
-+ eth->tx_packets++;
-+ eth->tx_bytes += bytes;
-+
-+ dev = eth->netdev[mac];
-+ if (!dev)
-+ return;
-+
-+ txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
-+ if (state->txq == txq) {
-+ state->done++;
-+ state->bytes += bytes;
-+ return;
-+ }
-+
-+ if (state->txq)
-+ netdev_tx_completed_queue(state->txq, state->done, state->bytes);
-+
-+ state->txq = txq;
-+ state->done = 1;
-+ state->bytes = bytes;
-+}
-+
- static int mtk_poll_tx_qdma(struct mtk_eth *eth, int budget,
-- unsigned int *done, unsigned int *bytes)
-+ struct mtk_poll_state *state)
- {
- const struct mtk_reg_map *reg_map = eth->soc->reg_map;
- struct mtk_tx_ring *ring = ð->tx_ring;
-@@ -1975,12 +2091,9 @@ static int mtk_poll_tx_qdma(struct mtk_e
- break;
-
- if (tx_buf->data != (void *)MTK_DMA_DUMMY_DESC) {
-- if (tx_buf->type == MTK_TYPE_SKB) {
-- struct sk_buff *skb = tx_buf->data;
-+ if (tx_buf->type == MTK_TYPE_SKB)
-+ mtk_poll_tx_done(eth, state, mac, tx_buf->data);
-
-- bytes[mac] += skb->len;
-- done[mac]++;
-- }
- budget--;
- }
- mtk_tx_unmap(eth, tx_buf, true);
-@@ -1998,7 +2111,7 @@ static int mtk_poll_tx_qdma(struct mtk_e
- }
-
- static int mtk_poll_tx_pdma(struct mtk_eth *eth, int budget,
-- unsigned int *done, unsigned int *bytes)
-+ struct mtk_poll_state *state)
- {
- struct mtk_tx_ring *ring = ð->tx_ring;
- struct mtk_tx_buf *tx_buf;
-@@ -2014,12 +2127,8 @@ static int mtk_poll_tx_pdma(struct mtk_e
- break;
-
- if (tx_buf->data != (void *)MTK_DMA_DUMMY_DESC) {
-- if (tx_buf->type == MTK_TYPE_SKB) {
-- struct sk_buff *skb = tx_buf->data;
--
-- bytes[0] += skb->len;
-- done[0]++;
-- }
-+ if (tx_buf->type == MTK_TYPE_SKB)
-+ mtk_poll_tx_done(eth, state, 0, tx_buf->data);
- budget--;
- }
- mtk_tx_unmap(eth, tx_buf, true);
-@@ -2040,26 +2149,15 @@ static int mtk_poll_tx(struct mtk_eth *e
- {
- struct mtk_tx_ring *ring = ð->tx_ring;
- struct dim_sample dim_sample = {};
-- unsigned int done[MTK_MAX_DEVS];
-- unsigned int bytes[MTK_MAX_DEVS];
-- int total = 0, i;
--
-- memset(done, 0, sizeof(done));
-- memset(bytes, 0, sizeof(bytes));
-+ struct mtk_poll_state state = {};
-
- if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA))
-- budget = mtk_poll_tx_qdma(eth, budget, done, bytes);
-+ budget = mtk_poll_tx_qdma(eth, budget, &state);
- else
-- budget = mtk_poll_tx_pdma(eth, budget, done, bytes);
-+ budget = mtk_poll_tx_pdma(eth, budget, &state);
-
-- for (i = 0; i < MTK_MAC_COUNT; i++) {
-- if (!eth->netdev[i] || !done[i])
-- continue;
-- netdev_completed_queue(eth->netdev[i], done[i], bytes[i]);
-- total += done[i];
-- eth->tx_packets += done[i];
-- eth->tx_bytes += bytes[i];
-- }
-+ if (state.txq)
-+ netdev_tx_completed_queue(state.txq, state.done, state.bytes);
-
- dim_update_sample(eth->tx_events, eth->tx_packets, eth->tx_bytes,
- &dim_sample);
-@@ -2069,7 +2167,7 @@ static int mtk_poll_tx(struct mtk_eth *e
- (atomic_read(&ring->free_count) > ring->thresh))
- mtk_wake_queue(eth);
-
-- return total;
-+ return state.total;
- }
-
- static void mtk_handle_status_irq(struct mtk_eth *eth)
-@@ -2155,6 +2253,7 @@ static int mtk_tx_alloc(struct mtk_eth *
- int i, sz = soc->txrx.txd_size;
- struct mtk_tx_dma_v2 *txd;
- int ring_size;
-+ u32 ofs, val;
-
- if (MTK_HAS_CAPS(soc->caps, MTK_QDMA))
- ring_size = MTK_QDMA_RING_SIZE;
-@@ -2222,8 +2321,25 @@ static int mtk_tx_alloc(struct mtk_eth *
- ring->phys + ((ring_size - 1) * sz),
- soc->reg_map->qdma.crx_ptr);
- mtk_w32(eth, ring->last_free_ptr, soc->reg_map->qdma.drx_ptr);
-- mtk_w32(eth, (QDMA_RES_THRES << 8) | QDMA_RES_THRES,
-- soc->reg_map->qdma.qtx_cfg);
-+
-+ for (i = 0, ofs = 0; i < MTK_QDMA_NUM_QUEUES; i++) {
-+ val = (QDMA_RES_THRES << 8) | QDMA_RES_THRES;
-+ mtk_w32(eth, val, soc->reg_map->qdma.qtx_cfg + ofs);
-+
-+ val = MTK_QTX_SCH_MIN_RATE_EN |
-+ /* minimum: 10 Mbps */
-+ FIELD_PREP(MTK_QTX_SCH_MIN_RATE_MAN, 1) |
-+ FIELD_PREP(MTK_QTX_SCH_MIN_RATE_EXP, 4) |
-+ MTK_QTX_SCH_LEAKY_BUCKET_SIZE;
-+ if (!MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2))
-+ val |= MTK_QTX_SCH_LEAKY_BUCKET_EN;
-+ mtk_w32(eth, val, soc->reg_map->qdma.qtx_sch + ofs);
-+ ofs += MTK_QTX_OFFSET;
-+ }
-+ val = MTK_QDMA_TX_SCH_MAX_WFQ | (MTK_QDMA_TX_SCH_MAX_WFQ << 16);
-+ mtk_w32(eth, val, soc->reg_map->qdma.tx_sch_rate);
-+ if (MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2))
-+ mtk_w32(eth, val, soc->reg_map->qdma.tx_sch_rate + 4);
- } else {
- mtk_w32(eth, ring->phys_pdma, MT7628_TX_BASE_PTR0);
- mtk_w32(eth, ring_size, MT7628_TX_MAX_CNT0);
-@@ -2903,7 +3019,7 @@ static int mtk_start_dma(struct mtk_eth
- if (MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2))
- val |= MTK_MUTLI_CNT | MTK_RESV_BUF |
- MTK_WCOMP_EN | MTK_DMAD_WR_WDONE |
-- MTK_CHK_DDONE_EN;
-+ MTK_CHK_DDONE_EN | MTK_LEAKY_BUCKET_EN;
- else
- val |= MTK_RX_BT_32DWORDS;
- mtk_w32(eth, val, reg_map->qdma.glo_cfg);
-@@ -2949,6 +3065,45 @@ static void mtk_gdm_config(struct mtk_et
- mtk_w32(eth, 0, MTK_RST_GL);
- }
-
-+static int mtk_device_event(struct notifier_block *n, unsigned long event, void *ptr)
-+{
-+ struct mtk_mac *mac = container_of(n, struct mtk_mac, device_notifier);
-+ struct mtk_eth *eth = mac->hw;
-+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
-+ struct ethtool_link_ksettings s;
-+ struct net_device *ldev;
-+ struct list_head *iter;
-+ struct dsa_port *dp;
-+
-+ if (event != NETDEV_CHANGE)
-+ return NOTIFY_DONE;
-+
-+ netdev_for_each_lower_dev(dev, ldev, iter) {
-+ if (netdev_priv(ldev) == mac)
-+ goto found;
-+ }
-+
-+ return NOTIFY_DONE;
-+
-+found:
-+ if (!dsa_slave_dev_check(dev))
-+ return NOTIFY_DONE;
-+
-+ if (__ethtool_get_link_ksettings(dev, &s))
-+ return NOTIFY_DONE;
-+
-+ if (s.base.speed == 0 || s.base.speed == ((__u32)-1))
-+ return NOTIFY_DONE;
-+
-+ dp = dsa_port_from_netdev(dev);
-+ if (dp->index >= MTK_QDMA_NUM_QUEUES)
-+ return NOTIFY_DONE;
-+
-+ mtk_set_queue_speed(eth, dp->index + 3, s.base.speed);
-+
-+ return NOTIFY_DONE;
-+}
-+
- static int mtk_open(struct net_device *dev)
- {
- struct mtk_mac *mac = netdev_priv(dev);
-@@ -2993,7 +3148,8 @@ static int mtk_open(struct net_device *d
- refcount_inc(ð->dma_refcnt);
-
- phylink_start(mac->phylink);
-- netif_start_queue(dev);
-+ netif_tx_start_all_queues(dev);
-+
- return 0;
- }
-
-@@ -3716,8 +3872,12 @@ static int mtk_unreg_dev(struct mtk_eth
- int i;
-
- for (i = 0; i < MTK_MAC_COUNT; i++) {
-+ struct mtk_mac *mac;
- if (!eth->netdev[i])
- continue;
-+ mac = netdev_priv(eth->netdev[i]);
-+ if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA))
-+ unregister_netdevice_notifier(&mac->device_notifier);
- unregister_netdev(eth->netdev[i]);
- }
-
-@@ -3934,6 +4094,23 @@ static int mtk_set_rxnfc(struct net_devi
- return ret;
- }
-
-+static u16 mtk_select_queue(struct net_device *dev, struct sk_buff *skb,
-+ struct net_device *sb_dev)
-+{
-+ struct mtk_mac *mac = netdev_priv(dev);
-+ unsigned int queue = 0;
-+
-+ if (netdev_uses_dsa(dev))
-+ queue = skb_get_queue_mapping(skb) + 3;
-+ else
-+ queue = mac->id;
-+
-+ if (queue >= dev->num_tx_queues)
-+ queue = 0;
-+
-+ return queue;
-+}
-+
- static const struct ethtool_ops mtk_ethtool_ops = {
- .get_link_ksettings = mtk_get_link_ksettings,
- .set_link_ksettings = mtk_set_link_ksettings,
-@@ -3969,6 +4146,7 @@ static const struct net_device_ops mtk_n
- .ndo_setup_tc = mtk_eth_setup_tc,
- .ndo_bpf = mtk_xdp,
- .ndo_xdp_xmit = mtk_xdp_xmit,
-+ .ndo_select_queue = mtk_select_queue,
- };
-
- static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np)
-@@ -3978,6 +4156,7 @@ static int mtk_add_mac(struct mtk_eth *e
- struct phylink *phylink;
- struct mtk_mac *mac;
- int id, err;
-+ int txqs = 1;
-
- if (!_id) {
- dev_err(eth->dev, "missing mac id\n");
-@@ -3995,7 +4174,10 @@ static int mtk_add_mac(struct mtk_eth *e
- return -EINVAL;
- }
-
-- eth->netdev[id] = alloc_etherdev(sizeof(*mac));
-+ if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA))
-+ txqs = MTK_QDMA_NUM_QUEUES;
-+
-+ eth->netdev[id] = alloc_etherdev_mqs(sizeof(*mac), txqs, 1);
- if (!eth->netdev[id]) {
- dev_err(eth->dev, "alloc_etherdev failed\n");
- return -ENOMEM;
-@@ -4092,6 +4274,11 @@ static int mtk_add_mac(struct mtk_eth *e
- else
- eth->netdev[id]->max_mtu = MTK_MAX_RX_LENGTH_2K - MTK_RX_ETH_HLEN;
-
-+ if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA)) {
-+ mac->device_notifier.notifier_call = mtk_device_event;
-+ register_netdevice_notifier(&mac->device_notifier);
-+ }
-+
- return 0;
-
- free_netdev:
---- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
-+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
-@@ -22,6 +22,7 @@
- #include <linux/bpf_trace.h>
- #include "mtk_ppe.h"
-
-+#define MTK_QDMA_NUM_QUEUES 16
- #define MTK_QDMA_PAGE_SIZE 2048
- #define MTK_MAX_RX_LENGTH 1536
- #define MTK_MAX_RX_LENGTH_2K 2048
-@@ -215,8 +216,26 @@
- #define MTK_RING_MAX_AGG_CNT_H ((MTK_HW_LRO_MAX_AGG_CNT >> 6) & 0x3)
-
- /* QDMA TX Queue Configuration Registers */
-+#define MTK_QTX_OFFSET 0x10
- #define QDMA_RES_THRES 4
-
-+/* QDMA Tx Queue Scheduler Configuration Registers */
-+#define MTK_QTX_SCH_TX_SEL BIT(31)
-+#define MTK_QTX_SCH_TX_SEL_V2 GENMASK(31, 30)
-+
-+#define MTK_QTX_SCH_LEAKY_BUCKET_EN BIT(30)
-+#define MTK_QTX_SCH_LEAKY_BUCKET_SIZE GENMASK(29, 28)
-+#define MTK_QTX_SCH_MIN_RATE_EN BIT(27)
-+#define MTK_QTX_SCH_MIN_RATE_MAN GENMASK(26, 20)
-+#define MTK_QTX_SCH_MIN_RATE_EXP GENMASK(19, 16)
-+#define MTK_QTX_SCH_MAX_RATE_WEIGHT GENMASK(15, 12)
-+#define MTK_QTX_SCH_MAX_RATE_EN BIT(11)
-+#define MTK_QTX_SCH_MAX_RATE_MAN GENMASK(10, 4)
-+#define MTK_QTX_SCH_MAX_RATE_EXP GENMASK(3, 0)
-+
-+/* QDMA TX Scheduler Rate Control Register */
-+#define MTK_QDMA_TX_SCH_MAX_WFQ BIT(15)
-+
- /* QDMA Global Configuration Register */
- #define MTK_RX_2B_OFFSET BIT(31)
- #define MTK_RX_BT_32DWORDS (3 << 11)
-@@ -235,6 +254,7 @@
- #define MTK_WCOMP_EN BIT(24)
- #define MTK_RESV_BUF (0x40 << 16)
- #define MTK_MUTLI_CNT (0x4 << 12)
-+#define MTK_LEAKY_BUCKET_EN BIT(11)
-
- /* QDMA Flow Control Register */
- #define FC_THRES_DROP_MODE BIT(20)
-@@ -265,8 +285,6 @@
- #define MTK_STAT_OFFSET 0x40
-
- /* QDMA TX NUM */
--#define MTK_QDMA_TX_NUM 16
--#define MTK_QDMA_TX_MASK (MTK_QDMA_TX_NUM - 1)
- #define QID_BITS_V2(x) (((x) & 0x3f) << 16)
- #define MTK_QDMA_GMAC2_QID 8
-
-@@ -296,6 +314,7 @@
- #define TX_DMA_PLEN0(x) (((x) & eth->soc->txrx.dma_max_len) << eth->soc->txrx.dma_len_offset)
- #define TX_DMA_PLEN1(x) ((x) & eth->soc->txrx.dma_max_len)
- #define TX_DMA_SWC BIT(14)
-+#define TX_DMA_PQID GENMASK(3, 0)
-
- /* PDMA on MT7628 */
- #define TX_DMA_DONE BIT(31)
-@@ -952,6 +971,7 @@ struct mtk_reg_map {
- } pdma;
- struct {
- u32 qtx_cfg; /* tx queue configuration */
-+ u32 qtx_sch; /* tx queue scheduler configuration */
- u32 rx_ptr; /* rx base pointer */
- u32 rx_cnt_cfg; /* rx max count configuration */
- u32 qcrx_ptr; /* rx cpu pointer */
-@@ -969,6 +989,7 @@ struct mtk_reg_map {
- u32 fq_tail; /* fq tail pointer */
- u32 fq_count; /* fq free page count */
- u32 fq_blen; /* fq free page buffer length */
-+ u32 tx_sch_rate; /* tx scheduler rate control registers */
- } qdma;
- u32 gdm1_cnt;
- u32 gdma_to_ppe0;
-@@ -1175,6 +1196,7 @@ struct mtk_mac {
- __be32 hwlro_ip[MTK_MAX_LRO_IP_CNT];
- int hwlro_ip_cnt;
- unsigned int syscfg0;
-+ struct notifier_block device_notifier;
- };
-
- /* the struct describing the SoC. these are declared in the soc_xyz.c files */
+++ /dev/null
-From: Felix Fietkau <nbd@nbd.name>
-Date: Fri, 28 Oct 2022 18:16:03 +0200
-Subject: [PATCH] net: dsa: tag_mtk: assign per-port queues
-
-Keeps traffic sent to the switch within link speed limits
-
-Signed-off-by: Felix Fietkau <nbd@nbd.name>
----
-
---- a/net/dsa/tag_mtk.c
-+++ b/net/dsa/tag_mtk.c
-@@ -32,6 +32,8 @@ static struct sk_buff *mtk_tag_xmit(stru
- */
- eth_skb_pad(skb);
-
-+ skb_set_queue_mapping(skb, dp->index);
-+
- /* Build the special tag after the MAC Source Address. If VLAN header
- * is present, it's required that VLAN header and special tag is
- * being combined. Only in this way we can allow the switch can parse
+++ /dev/null
-From: Felix Fietkau <nbd@nbd.name>
-Date: Thu, 3 Nov 2022 17:49:44 +0100
-Subject: [PATCH] net: ethernet: mediatek: ppe: assign per-port queues
- for offloaded traffic
-
-Keeps traffic sent to the switch within link speed limits
-
-Signed-off-by: Felix Fietkau <nbd@nbd.name>
----
-
---- a/drivers/net/ethernet/mediatek/mtk_ppe.c
-+++ b/drivers/net/ethernet/mediatek/mtk_ppe.c
-@@ -445,6 +445,24 @@ static inline bool mtk_foe_entry_usable(
- FIELD_GET(MTK_FOE_IB1_STATE, entry->ib1) != MTK_FOE_STATE_BIND;
- }
-
-+int mtk_foe_entry_set_queue(struct mtk_eth *eth, struct mtk_foe_entry *entry,
-+ unsigned int queue)
-+{
-+ u32 *ib2 = mtk_foe_entry_ib2(eth, entry);
-+
-+ if (MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2)) {
-+ *ib2 &= ~MTK_FOE_IB2_QID_V2;
-+ *ib2 |= FIELD_PREP(MTK_FOE_IB2_QID_V2, queue);
-+ *ib2 |= MTK_FOE_IB2_PSE_QOS_V2;
-+ } else {
-+ *ib2 &= ~MTK_FOE_IB2_QID;
-+ *ib2 |= FIELD_PREP(MTK_FOE_IB2_QID, queue);
-+ *ib2 |= MTK_FOE_IB2_PSE_QOS;
-+ }
-+
-+ return 0;
-+}
-+
- static bool
- mtk_flow_entry_match(struct mtk_eth *eth, struct mtk_flow_entry *entry,
- struct mtk_foe_entry *data)
---- a/drivers/net/ethernet/mediatek/mtk_ppe.h
-+++ b/drivers/net/ethernet/mediatek/mtk_ppe.h
-@@ -69,7 +69,9 @@ enum {
- #define MTK_FOE_IB2_DSCP GENMASK(31, 24)
-
- /* CONFIG_MEDIATEK_NETSYS_V2 */
-+#define MTK_FOE_IB2_QID_V2 GENMASK(6, 0)
- #define MTK_FOE_IB2_PORT_MG_V2 BIT(7)
-+#define MTK_FOE_IB2_PSE_QOS_V2 BIT(8)
- #define MTK_FOE_IB2_DEST_PORT_V2 GENMASK(12, 9)
- #define MTK_FOE_IB2_MULTICAST_V2 BIT(13)
- #define MTK_FOE_IB2_WDMA_WINFO_V2 BIT(19)
-@@ -369,6 +371,8 @@ int mtk_foe_entry_set_pppoe(struct mtk_e
- int sid);
- int mtk_foe_entry_set_wdma(struct mtk_eth *eth, struct mtk_foe_entry *entry,
- int wdma_idx, int txq, int bss, int wcid);
-+int mtk_foe_entry_set_queue(struct mtk_eth *eth, struct mtk_foe_entry *entry,
-+ unsigned int queue);
- int mtk_foe_entry_commit(struct mtk_ppe *ppe, struct mtk_flow_entry *entry);
- void mtk_foe_entry_clear(struct mtk_ppe *ppe, struct mtk_flow_entry *entry);
- int mtk_foe_entry_idle_time(struct mtk_ppe *ppe, struct mtk_flow_entry *entry);
---- a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
-+++ b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
-@@ -188,7 +188,7 @@ mtk_flow_set_output_device(struct mtk_et
- int *wed_index)
- {
- struct mtk_wdma_info info = {};
-- int pse_port, dsa_port;
-+ int pse_port, dsa_port, queue;
-
- if (mtk_flow_get_wdma_info(dev, dest_mac, &info) == 0) {
- mtk_foe_entry_set_wdma(eth, foe, info.wdma_idx, info.queue,
-@@ -212,8 +212,6 @@ mtk_flow_set_output_device(struct mtk_et
- }
-
- dsa_port = mtk_flow_get_dsa_port(&dev);
-- if (dsa_port >= 0)
-- mtk_foe_entry_set_dsa(eth, foe, dsa_port);
-
- if (dev == eth->netdev[0])
- pse_port = 1;
-@@ -222,6 +220,14 @@ mtk_flow_set_output_device(struct mtk_et
- else
- return -EOPNOTSUPP;
-
-+ if (dsa_port >= 0) {
-+ mtk_foe_entry_set_dsa(eth, foe, dsa_port);
-+ queue = 3 + dsa_port;
-+ } else {
-+ queue = pse_port - 1;
-+ }
-+ mtk_foe_entry_set_queue(eth, foe, queue);
-+
- out:
- mtk_foe_entry_set_pse_port(eth, foe, pse_port);
-
+++ /dev/null
-From: Felix Fietkau <nbd@nbd.name>
-Date: Thu, 27 Oct 2022 23:39:52 +0200
-Subject: [PATCH] net: ethernet: mtk_eth_soc: compile out netsys v2 code
- on mt7621
-
-Avoid some branches in the hot path on low-end devices with limited CPU power,
-and reduce code size
-
-Signed-off-by: Felix Fietkau <nbd@nbd.name>
----
-
---- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
-+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
-@@ -916,7 +916,13 @@ enum mkt_eth_capabilities {
- #define MTK_MUX_GMAC12_TO_GEPHY_SGMII \
- (MTK_ETH_MUX_GMAC12_TO_GEPHY_SGMII | MTK_MUX)
-
--#define MTK_HAS_CAPS(caps, _x) (((caps) & (_x)) == (_x))
-+#ifdef CONFIG_SOC_MT7621
-+#define MTK_CAP_MASK MTK_NETSYS_V2
-+#else
-+#define MTK_CAP_MASK 0
-+#endif
-+
-+#define MTK_HAS_CAPS(caps, _x) (((caps) & (_x) & ~(MTK_CAP_MASK)) == (_x))
-
- #define MT7621_CAPS (MTK_GMAC1_RGMII | MTK_GMAC1_TRGMII | \
- MTK_GMAC2_RGMII | MTK_SHARED_INT | \
+++ /dev/null
-From: Felix Fietkau <nbd@nbd.name>
-Date: Tue, 8 Nov 2022 15:03:15 +0100
-Subject: [PATCH] net: dsa: add support for DSA rx offloading via
- metadata dst
-
-If a metadata dst is present with the type METADATA_HW_PORT_MUX on a dsa cpu
-port netdev, assume that it carries the port number and that there is no DSA
-tag present in the skb data.
-
-Signed-off-by: Felix Fietkau <nbd@nbd.name>
----
-
---- a/net/core/flow_dissector.c
-+++ b/net/core/flow_dissector.c
-@@ -940,12 +940,14 @@ bool __skb_flow_dissect(const struct net
- #if IS_ENABLED(CONFIG_NET_DSA)
- if (unlikely(skb->dev && netdev_uses_dsa(skb->dev) &&
- proto == htons(ETH_P_XDSA))) {
-+ struct metadata_dst *md_dst = skb_metadata_dst(skb);
- const struct dsa_device_ops *ops;
- int offset = 0;
-
- ops = skb->dev->dsa_ptr->tag_ops;
- /* Only DSA header taggers break flow dissection */
-- if (ops->needed_headroom) {
-+ if (ops->needed_headroom &&
-+ (!md_dst || md_dst->type != METADATA_HW_PORT_MUX)) {
- if (ops->flow_dissect)
- ops->flow_dissect(skb, &proto, &offset);
- else
---- a/net/dsa/dsa.c
-+++ b/net/dsa/dsa.c
-@@ -20,6 +20,7 @@
- #include <linux/phy_fixed.h>
- #include <linux/ptp_classify.h>
- #include <linux/etherdevice.h>
-+#include <net/dst_metadata.h>
-
- #include "dsa_priv.h"
-
-@@ -225,6 +226,7 @@ static bool dsa_skb_defer_rx_timestamp(s
- static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev,
- struct packet_type *pt, struct net_device *unused)
- {
-+ struct metadata_dst *md_dst = skb_metadata_dst(skb);
- struct dsa_port *cpu_dp = dev->dsa_ptr;
- struct sk_buff *nskb = NULL;
- struct dsa_slave_priv *p;
-@@ -238,7 +240,22 @@ static int dsa_switch_rcv(struct sk_buff
- if (!skb)
- return 0;
-
-- nskb = cpu_dp->rcv(skb, dev);
-+ if (md_dst && md_dst->type == METADATA_HW_PORT_MUX) {
-+ unsigned int port = md_dst->u.port_info.port_id;
-+
-+ skb_dst_drop(skb);
-+ if (!skb_has_extensions(skb))
-+ skb->slow_gro = 0;
-+
-+ skb->dev = dsa_master_find_slave(dev, 0, port);
-+ if (likely(skb->dev)) {
-+ dsa_default_offload_fwd_mark(skb);
-+ nskb = skb;
-+ }
-+ } else {
-+ nskb = cpu_dp->rcv(skb, dev);
-+ }
-+
- if (!nskb) {
- kfree_skb(skb);
- return 0;
+++ /dev/null
-From: Felix Fietkau <nbd@nbd.name>
-Date: Fri, 28 Oct 2022 11:01:12 +0200
-Subject: [PATCH] net: ethernet: mtk_eth_soc: fix VLAN rx hardware
- acceleration
-
-- enable VLAN untagging for PDMA rx
-- make it possible to disable the feature via ethtool
-- pass VLAN tag to the DSA driver
-- untag special tag on PDMA only if no non-DSA devices are in use
-- disable special tag untagging on 7986 for now, since it's not working yet
-
-Signed-off-by: Felix Fietkau <nbd@nbd.name>
----
-
---- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
-+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
-@@ -23,6 +23,7 @@
- #include <linux/jhash.h>
- #include <linux/bitfield.h>
- #include <net/dsa.h>
-+#include <net/dst_metadata.h>
-
- #include "mtk_eth_soc.h"
- #include "mtk_wed.h"
-@@ -1973,16 +1974,22 @@ static int mtk_poll_rx(struct napi_struc
- htons(RX_DMA_VPID(trxd.rxd4)),
- RX_DMA_VID(trxd.rxd4));
- } else if (trxd.rxd2 & RX_DMA_VTAG) {
-- __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
-+ __vlan_hwaccel_put_tag(skb, htons(RX_DMA_VPID(trxd.rxd3)),
- RX_DMA_VID(trxd.rxd3));
- }
-+ }
-+
-+ /* When using VLAN untagging in combination with DSA, the
-+ * hardware treats the MTK special tag as a VLAN and untags it.
-+ */
-+ if (skb_vlan_tag_present(skb) && netdev_uses_dsa(netdev)) {
-+ unsigned int port = ntohs(skb->vlan_proto) & GENMASK(2, 0);
-
-- /* If the device is attached to a dsa switch, the special
-- * tag inserted in VLAN field by hw switch can * be offloaded
-- * by RX HW VLAN offload. Clear vlan info.
-- */
-- if (netdev_uses_dsa(netdev))
-- __vlan_hwaccel_clear_tag(skb);
-+ if (port < ARRAY_SIZE(eth->dsa_meta) &&
-+ eth->dsa_meta[port])
-+ skb_dst_set_noref(skb, ð->dsa_meta[port]->dst);
-+
-+ __vlan_hwaccel_clear_tag(skb);
- }
-
- skb_record_rx_queue(skb, 0);
-@@ -2799,15 +2806,30 @@ static netdev_features_t mtk_fix_feature
-
- static int mtk_set_features(struct net_device *dev, netdev_features_t features)
- {
-- int err = 0;
-+ struct mtk_mac *mac = netdev_priv(dev);
-+ struct mtk_eth *eth = mac->hw;
-+ netdev_features_t diff = dev->features ^ features;
-+ int i;
-+
-+ if ((diff & NETIF_F_LRO) && !(features & NETIF_F_LRO))
-+ mtk_hwlro_netdev_disable(dev);
-
-- if (!((dev->features ^ features) & NETIF_F_LRO))
-+ /* Set RX VLAN offloading */
-+ if (!(diff & NETIF_F_HW_VLAN_CTAG_RX))
- return 0;
-
-- if (!(features & NETIF_F_LRO))
-- mtk_hwlro_netdev_disable(dev);
-+ mtk_w32(eth, !!(features & NETIF_F_HW_VLAN_CTAG_RX),
-+ MTK_CDMP_EG_CTRL);
-
-- return err;
-+ /* sync features with other MAC */
-+ for (i = 0; i < MTK_MAC_COUNT; i++) {
-+ if (!eth->netdev[i] || eth->netdev[i] == dev)
-+ continue;
-+ eth->netdev[i]->features &= ~NETIF_F_HW_VLAN_CTAG_RX;
-+ eth->netdev[i]->features |= features & NETIF_F_HW_VLAN_CTAG_RX;
-+ }
-+
-+ return 0;
- }
-
- /* wait for DMA to finish whatever it is doing before we start using it again */
-@@ -3104,11 +3126,45 @@ found:
- return NOTIFY_DONE;
- }
-
-+static bool mtk_uses_dsa(struct net_device *dev)
-+{
-+#if IS_ENABLED(CONFIG_NET_DSA)
-+ return netdev_uses_dsa(dev) &&
-+ dev->dsa_ptr->tag_ops->proto == DSA_TAG_PROTO_MTK;
-+#else
-+ return false;
-+#endif
-+}
-+
- static int mtk_open(struct net_device *dev)
- {
- struct mtk_mac *mac = netdev_priv(dev);
- struct mtk_eth *eth = mac->hw;
-- int err;
-+ int i, err;
-+
-+ if (mtk_uses_dsa(dev) && !eth->prog) {
-+ for (i = 0; i < ARRAY_SIZE(eth->dsa_meta); i++) {
-+ struct metadata_dst *md_dst = eth->dsa_meta[i];
-+
-+ if (md_dst)
-+ continue;
-+
-+ md_dst = metadata_dst_alloc(0, METADATA_HW_PORT_MUX,
-+ GFP_KERNEL);
-+ if (!md_dst)
-+ return -ENOMEM;
-+
-+ md_dst->u.port_info.port_id = i;
-+ eth->dsa_meta[i] = md_dst;
-+ }
-+ } else {
-+ /* Hardware special tag parsing needs to be disabled if at least
-+ * one MAC does not use DSA.
-+ */
-+ u32 val = mtk_r32(eth, MTK_CDMP_IG_CTRL);
-+ val &= ~MTK_CDMP_STAG_EN;
-+ mtk_w32(eth, val, MTK_CDMP_IG_CTRL);
-+ }
-
- err = phylink_of_phy_connect(mac->phylink, mac->of_node, 0);
- if (err) {
-@@ -3631,6 +3687,10 @@ static int mtk_hw_init(struct mtk_eth *e
- */
- val = mtk_r32(eth, MTK_CDMQ_IG_CTRL);
- mtk_w32(eth, val | MTK_CDMQ_STAG_EN, MTK_CDMQ_IG_CTRL);
-+ if (!MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2)) {
-+ val = mtk_r32(eth, MTK_CDMP_IG_CTRL);
-+ mtk_w32(eth, val | MTK_CDMP_STAG_EN, MTK_CDMP_IG_CTRL);
-+ }
-
- /* Enable RX VLan Offloading */
- mtk_w32(eth, 1, MTK_CDMP_EG_CTRL);
-@@ -3864,6 +3924,12 @@ static int mtk_free_dev(struct mtk_eth *
- free_netdev(eth->netdev[i]);
- }
-
-+ for (i = 0; i < ARRAY_SIZE(eth->dsa_meta); i++) {
-+ if (!eth->dsa_meta[i])
-+ break;
-+ metadata_dst_free(eth->dsa_meta[i]);
-+ }
-+
- return 0;
- }
-
---- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
-+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
-@@ -22,6 +22,9 @@
- #include <linux/bpf_trace.h>
- #include "mtk_ppe.h"
-
-+#define MTK_MAX_DSA_PORTS 7
-+#define MTK_DSA_PORT_MASK GENMASK(2, 0)
-+
- #define MTK_QDMA_NUM_QUEUES 16
- #define MTK_QDMA_PAGE_SIZE 2048
- #define MTK_MAX_RX_LENGTH 1536
-@@ -105,6 +108,9 @@
- #define MTK_CDMQ_IG_CTRL 0x1400
- #define MTK_CDMQ_STAG_EN BIT(0)
-
-+/* CDMQ Exgress Control Register */
-+#define MTK_CDMQ_EG_CTRL 0x1404
-+
- /* CDMP Ingress Control Register */
- #define MTK_CDMP_IG_CTRL 0x400
- #define MTK_CDMP_STAG_EN BIT(0)
-@@ -1168,6 +1174,8 @@ struct mtk_eth {
-
- int ip_align;
-
-+ struct metadata_dst *dsa_meta[MTK_MAX_DSA_PORTS];
-+
- struct mtk_ppe *ppe[2];
- struct rhashtable flow_table;
-
+++ /dev/null
-From: Felix Fietkau <nbd@nbd.name>
-Date: Thu, 3 Nov 2022 12:38:49 +0100
-Subject: [PATCH] net: ethernet: mtk_eth_soc: work around issue with sending
- small fragments
-
-When lots of frames are sent with a number of very small fragments, an
-internal FIFO can overflow, causing the DMA engine to lock up lock up and
-transmit attempts time out.
-
-Fix this on MT7986 by increasing the reserved FIFO space.
-Fix this on older chips by detecting the presence of small fragments and use
-skb_gso_segment + skb_linearize to deal with them.
-
-Signed-off-by: Felix Fietkau <nbd@nbd.name>
----
-
---- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
-+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
-@@ -1402,12 +1402,28 @@ static void mtk_wake_queue(struct mtk_et
- }
- }
-
-+static bool mtk_skb_has_small_frag(struct sk_buff *skb)
-+{
-+ int min_size = 16;
-+ int i;
-+
-+ if (skb_headlen(skb) < min_size)
-+ return true;
-+
-+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
-+ if (skb_frag_size(&skb_shinfo(skb)->frags[i]) < min_size)
-+ return true;
-+
-+ return false;
-+}
-+
- static netdev_tx_t mtk_start_xmit(struct sk_buff *skb, struct net_device *dev)
- {
- struct mtk_mac *mac = netdev_priv(dev);
- struct mtk_eth *eth = mac->hw;
- struct mtk_tx_ring *ring = ð->tx_ring;
- struct net_device_stats *stats = &dev->stats;
-+ struct sk_buff *segs, *next;
- bool gso = false;
- int tx_num;
-
-@@ -1429,6 +1445,18 @@ static netdev_tx_t mtk_start_xmit(struct
- return NETDEV_TX_BUSY;
- }
-
-+ if (!MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2) &&
-+ skb_is_gso(skb) && mtk_skb_has_small_frag(skb)) {
-+ segs = skb_gso_segment(skb, dev->features & ~NETIF_F_ALL_TSO);
-+ if (IS_ERR(segs))
-+ goto drop;
-+
-+ if (segs) {
-+ consume_skb(skb);
-+ skb = segs;
-+ }
-+ }
-+
- /* TSO: fill MSS info in tcp checksum field */
- if (skb_is_gso(skb)) {
- if (skb_cow_head(skb, 0)) {
-@@ -1444,8 +1472,14 @@ static netdev_tx_t mtk_start_xmit(struct
- }
- }
-
-- if (mtk_tx_map(skb, dev, tx_num, ring, gso) < 0)
-- goto drop;
-+ skb_list_walk_safe(skb, skb, next) {
-+ if ((!MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2) &&
-+ mtk_skb_has_small_frag(skb) && skb_linearize(skb)) ||
-+ mtk_tx_map(skb, dev, tx_num, ring, gso) < 0) {
-+ stats->tx_dropped++;
-+ dev_kfree_skb_any(skb);
-+ }
-+ }
-
- if (unlikely(atomic_read(&ring->free_count) <= ring->thresh))
- netif_tx_stop_all_queues(dev);
---- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
-+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
-@@ -258,7 +258,7 @@
- #define MTK_CHK_DDONE_EN BIT(28)
- #define MTK_DMAD_WR_WDONE BIT(26)
- #define MTK_WCOMP_EN BIT(24)
--#define MTK_RESV_BUF (0x40 << 16)
-+#define MTK_RESV_BUF (0x80 << 16)
- #define MTK_MUTLI_CNT (0x4 << 12)
- #define MTK_LEAKY_BUCKET_EN BIT(11)
-
+++ /dev/null
-From: Felix Fietkau <nbd@nbd.name>
-Date: Fri, 28 Oct 2022 12:54:48 +0200
-Subject: [PATCH] net: ethernet: mtk_eth_soc: set NETIF_F_ALL_TSO
-
-Significantly improves performance by avoiding unnecessary segmentation
-
-Signed-off-by: Felix Fietkau <nbd@nbd.name>
----
-
---- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
-+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
-@@ -49,8 +49,7 @@
- NETIF_F_RXCSUM | \
- NETIF_F_HW_VLAN_CTAG_TX | \
- NETIF_F_HW_VLAN_CTAG_RX | \
-- NETIF_F_SG | NETIF_F_TSO | \
-- NETIF_F_TSO6 | \
-+ NETIF_F_SG | NETIF_F_ALL_TSO | \
- NETIF_F_IPV6_CSUM |\
- NETIF_F_HW_TC)
- #define MTK_HW_FEATURES_MT7628 (NETIF_F_SG | NETIF_F_RXCSUM)
+++ /dev/null
-From: Felix Fietkau <nbd@nbd.name>
-Date: Thu, 3 Nov 2022 17:46:25 +0100
-Subject: [PATCH] net: ethernet: mtk_eth_soc: drop packets to WDMA if the
- ring is full
-
-Improves handling of DMA ring overflow.
-Clarify other WDMA drop related comment.
-
-Signed-off-by: Felix Fietkau <nbd@nbd.name>
----
-
---- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
-+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
-@@ -3745,9 +3745,12 @@ static int mtk_hw_init(struct mtk_eth *e
- mtk_w32(eth, 0x21021000, MTK_FE_INT_GRP);
-
- if (MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2)) {
-- /* PSE should not drop port8 and port9 packets */
-+ /* PSE should not drop port8 and port9 packets from WDMA Tx */
- mtk_w32(eth, 0x00000300, PSE_DROP_CFG);
-
-+ /* PSE should drop packets to port 8/9 on WDMA Rx ring full */
-+ mtk_w32(eth, 0x00000300, PSE_PPE0_DROP);
-+
- /* PSE Free Queue Flow Control */
- mtk_w32(eth, 0x01fa01f4, PSE_FQFC_CFG2);
-
---- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
-+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
-@@ -139,6 +139,7 @@
- #define PSE_FQFC_CFG1 0x100
- #define PSE_FQFC_CFG2 0x104
- #define PSE_DROP_CFG 0x108
-+#define PSE_PPE0_DROP 0x110
-
- /* PSE Input Queue Reservation Register*/
- #define PSE_IQ_REV(x) (0x140 + (((x) - 1) << 2))
+++ /dev/null
-From: Felix Fietkau <nbd@nbd.name>
-Date: Thu, 17 Nov 2022 11:58:21 +0100
-Subject: [PATCH] net: ethernet: mtk_eth_soc: fix flow_offload related refcount
- bug
-
-Since we call flow_block_cb_decref on FLOW_BLOCK_UNBIND, we need to call
-flow_block_cb_incref unconditionally, even for a newly allocated cb.
-Fixes a use-after-free bug
-
-Fixes: 502e84e2382d ("net: ethernet: mtk_eth_soc: add flow offloading support")
-Signed-off-by: Felix Fietkau <nbd@nbd.name>
----
-
---- a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
-+++ b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
-@@ -561,6 +561,7 @@ mtk_eth_setup_tc_block(struct net_device
- struct mtk_eth *eth = mac->hw;
- static LIST_HEAD(block_cb_list);
- struct flow_block_cb *block_cb;
-+ bool register_block = false;
- flow_setup_cb_t *cb;
-
- if (!eth->soc->offload_version)
-@@ -575,16 +576,20 @@ mtk_eth_setup_tc_block(struct net_device
- switch (f->command) {
- case FLOW_BLOCK_BIND:
- block_cb = flow_block_cb_lookup(f->block, cb, dev);
-- if (block_cb) {
-- flow_block_cb_incref(block_cb);
-- return 0;
-+ if (!block_cb) {
-+ block_cb = flow_block_cb_alloc(cb, dev, dev, NULL);
-+ if (IS_ERR(block_cb))
-+ return PTR_ERR(block_cb);
-+
-+ register_block = true;
- }
-- block_cb = flow_block_cb_alloc(cb, dev, dev, NULL);
-- if (IS_ERR(block_cb))
-- return PTR_ERR(block_cb);
-
-- flow_block_cb_add(block_cb, f);
-- list_add_tail(&block_cb->driver_list, &block_cb_list);
-+ flow_block_cb_incref(block_cb);
-+
-+ if (register_block) {
-+ flow_block_cb_add(block_cb, f);
-+ list_add_tail(&block_cb->driver_list, &block_cb_list);
-+ }
- return 0;
- case FLOW_BLOCK_UNBIND:
- block_cb = flow_block_cb_lookup(f->block, cb, dev);
+++ /dev/null
-From: Felix Fietkau <nbd@nbd.name>
-Date: Sun, 20 Nov 2022 23:01:00 +0100
-Subject: [PATCH] net: ethernet: mtk_eth_soc: drop generic vlan rx offload,
- only use DSA untagging
-
-Through testing I found out that hardware vlan rx offload support seems to
-have some hardware issues. At least when using multiple MACs and when receiving
-tagged packets on the secondary MAC, the hardware can sometimes start to emit
-wrong tags on the first MAC as well.
-
-In order to avoid such issues, drop the feature configuration and use the
-offload feature only for DSA hardware untagging on MT7621/MT7622 devices which
-only use one MAC.
-
-Signed-off-by: Felix Fietkau <nbd@nbd.name>
----
-
---- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
-+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
-@@ -2001,29 +2001,16 @@ static int mtk_poll_rx(struct napi_struc
- if (reason == MTK_PPE_CPU_REASON_HIT_UNBIND_RATE_REACHED)
- mtk_ppe_check_skb(eth->ppe[0], skb, hash);
-
-- if (netdev->features & NETIF_F_HW_VLAN_CTAG_RX) {
-- if (MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2)) {
-- if (trxd.rxd3 & RX_DMA_VTAG_V2)
-- __vlan_hwaccel_put_tag(skb,
-- htons(RX_DMA_VPID(trxd.rxd4)),
-- RX_DMA_VID(trxd.rxd4));
-- } else if (trxd.rxd2 & RX_DMA_VTAG) {
-- __vlan_hwaccel_put_tag(skb, htons(RX_DMA_VPID(trxd.rxd3)),
-- RX_DMA_VID(trxd.rxd3));
-- }
-- }
--
- /* When using VLAN untagging in combination with DSA, the
- * hardware treats the MTK special tag as a VLAN and untags it.
- */
-- if (skb_vlan_tag_present(skb) && netdev_uses_dsa(netdev)) {
-- unsigned int port = ntohs(skb->vlan_proto) & GENMASK(2, 0);
-+ if (!MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2) &&
-+ (trxd.rxd2 & RX_DMA_VTAG) && netdev_uses_dsa(netdev)) {
-+ unsigned int port = RX_DMA_VPID(trxd.rxd3) & GENMASK(2, 0);
-
- if (port < ARRAY_SIZE(eth->dsa_meta) &&
- eth->dsa_meta[port])
- skb_dst_set_noref(skb, ð->dsa_meta[port]->dst);
--
-- __vlan_hwaccel_clear_tag(skb);
- }
-
- skb_record_rx_queue(skb, 0);
-@@ -2840,29 +2827,11 @@ static netdev_features_t mtk_fix_feature
-
- static int mtk_set_features(struct net_device *dev, netdev_features_t features)
- {
-- struct mtk_mac *mac = netdev_priv(dev);
-- struct mtk_eth *eth = mac->hw;
- netdev_features_t diff = dev->features ^ features;
-- int i;
-
- if ((diff & NETIF_F_LRO) && !(features & NETIF_F_LRO))
- mtk_hwlro_netdev_disable(dev);
-
-- /* Set RX VLAN offloading */
-- if (!(diff & NETIF_F_HW_VLAN_CTAG_RX))
-- return 0;
--
-- mtk_w32(eth, !!(features & NETIF_F_HW_VLAN_CTAG_RX),
-- MTK_CDMP_EG_CTRL);
--
-- /* sync features with other MAC */
-- for (i = 0; i < MTK_MAC_COUNT; i++) {
-- if (!eth->netdev[i] || eth->netdev[i] == dev)
-- continue;
-- eth->netdev[i]->features &= ~NETIF_F_HW_VLAN_CTAG_RX;
-- eth->netdev[i]->features |= features & NETIF_F_HW_VLAN_CTAG_RX;
-- }
--
- return 0;
- }
-
-@@ -3176,30 +3145,6 @@ static int mtk_open(struct net_device *d
- struct mtk_eth *eth = mac->hw;
- int i, err;
-
-- if (mtk_uses_dsa(dev) && !eth->prog) {
-- for (i = 0; i < ARRAY_SIZE(eth->dsa_meta); i++) {
-- struct metadata_dst *md_dst = eth->dsa_meta[i];
--
-- if (md_dst)
-- continue;
--
-- md_dst = metadata_dst_alloc(0, METADATA_HW_PORT_MUX,
-- GFP_KERNEL);
-- if (!md_dst)
-- return -ENOMEM;
--
-- md_dst->u.port_info.port_id = i;
-- eth->dsa_meta[i] = md_dst;
-- }
-- } else {
-- /* Hardware special tag parsing needs to be disabled if at least
-- * one MAC does not use DSA.
-- */
-- u32 val = mtk_r32(eth, MTK_CDMP_IG_CTRL);
-- val &= ~MTK_CDMP_STAG_EN;
-- mtk_w32(eth, val, MTK_CDMP_IG_CTRL);
-- }
--
- err = phylink_of_phy_connect(mac->phylink, mac->of_node, 0);
- if (err) {
- netdev_err(dev, "%s: could not attach PHY: %d\n", __func__,
-@@ -3240,6 +3185,35 @@ static int mtk_open(struct net_device *d
- phylink_start(mac->phylink);
- netif_tx_start_all_queues(dev);
-
-+ if (MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2))
-+ return 0;
-+
-+ if (mtk_uses_dsa(dev) && !eth->prog) {
-+ for (i = 0; i < ARRAY_SIZE(eth->dsa_meta); i++) {
-+ struct metadata_dst *md_dst = eth->dsa_meta[i];
-+
-+ if (md_dst)
-+ continue;
-+
-+ md_dst = metadata_dst_alloc(0, METADATA_HW_PORT_MUX,
-+ GFP_KERNEL);
-+ if (!md_dst)
-+ return -ENOMEM;
-+
-+ md_dst->u.port_info.port_id = i;
-+ eth->dsa_meta[i] = md_dst;
-+ }
-+ } else {
-+ /* Hardware special tag parsing needs to be disabled if at least
-+ * one MAC does not use DSA.
-+ */
-+ u32 val = mtk_r32(eth, MTK_CDMP_IG_CTRL);
-+ val &= ~MTK_CDMP_STAG_EN;
-+ mtk_w32(eth, val, MTK_CDMP_IG_CTRL);
-+
-+ mtk_w32(eth, 0, MTK_CDMP_EG_CTRL);
-+ }
-+
- return 0;
- }
-
-@@ -3724,10 +3698,9 @@ static int mtk_hw_init(struct mtk_eth *e
- if (!MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2)) {
- val = mtk_r32(eth, MTK_CDMP_IG_CTRL);
- mtk_w32(eth, val | MTK_CDMP_STAG_EN, MTK_CDMP_IG_CTRL);
-- }
-
-- /* Enable RX VLan Offloading */
-- mtk_w32(eth, 1, MTK_CDMP_EG_CTRL);
-+ mtk_w32(eth, 1, MTK_CDMP_EG_CTRL);
-+ }
-
- /* set interrupt delays based on current Net DIM sample */
- mtk_dim_rx(ð->rx_dim.work);
-@@ -4365,7 +4338,7 @@ static int mtk_add_mac(struct mtk_eth *e
- eth->netdev[id]->hw_features |= NETIF_F_LRO;
-
- eth->netdev[id]->vlan_features = eth->soc->hw_features &
-- ~(NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX);
-+ ~NETIF_F_HW_VLAN_CTAG_TX;
- eth->netdev[id]->features |= eth->soc->hw_features;
- eth->netdev[id]->ethtool_ops = &mtk_ethtool_ops;
-
---- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
-+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
-@@ -48,7 +48,6 @@
- #define MTK_HW_FEATURES (NETIF_F_IP_CSUM | \
- NETIF_F_RXCSUM | \
- NETIF_F_HW_VLAN_CTAG_TX | \
-- NETIF_F_HW_VLAN_CTAG_RX | \
- NETIF_F_SG | NETIF_F_ALL_TSO | \
- NETIF_F_IPV6_CSUM |\
- NETIF_F_HW_TC)