--- /dev/null
+From: Felix Fietkau <nbd@openwrt.org>
+Date: Sun, 12 Apr 2015 09:58:56 +0200
+Subject: [PATCH] bgmac: simplify tx ring index handling
+
+Keep incrementing ring->start and ring->end instead of pointing it to
+the actual ring slot entry. This simplifies the calculation of the
+number of free slots.
+
+Signed-off-by: Felix Fietkau <nbd@openwrt.org>
+---
+
+--- a/drivers/net/ethernet/broadcom/bgmac.c
++++ b/drivers/net/ethernet/broadcom/bgmac.c
+@@ -142,11 +142,10 @@ static netdev_tx_t bgmac_dma_tx_add(stru
+ {
+ struct device *dma_dev = bgmac->core->dma_dev;
+ struct net_device *net_dev = bgmac->net_dev;
+- struct bgmac_slot_info *slot = &ring->slots[ring->end];
+- int free_slots;
++ int index = ring->end % BGMAC_TX_RING_SLOTS;
++ struct bgmac_slot_info *slot = &ring->slots[index];
+ int nr_frags;
+ u32 flags;
+- int index = ring->end;
+ int i;
+
+ if (skb->len > BGMAC_DESC_CTL1_LEN) {
+@@ -158,13 +157,7 @@ static netdev_tx_t bgmac_dma_tx_add(stru
+ skb_checksum_help(skb);
+
+ nr_frags = skb_shinfo(skb)->nr_frags;
+-
+- if (ring->start <= ring->end)
+- free_slots = ring->start - ring->end + BGMAC_TX_RING_SLOTS;
+- else
+- free_slots = ring->start - ring->end;
+-
+- if (free_slots <= nr_frags + 1) {
++ if (ring->end - ring->start + nr_frags + 1 >= BGMAC_TX_RING_SLOTS) {
+ bgmac_err(bgmac, "TX ring is full, queue should be stopped!\n");
+ netif_stop_queue(net_dev);
+ return NETDEV_TX_BUSY;
+@@ -200,7 +193,7 @@ static netdev_tx_t bgmac_dma_tx_add(stru
+ }
+
+ slot->skb = skb;
+-
++ ring->end += nr_frags + 1;
+ netdev_sent_queue(net_dev, skb->len);
+
+ wmb();
+@@ -208,13 +201,12 @@ static netdev_tx_t bgmac_dma_tx_add(stru
+ /* Increase ring->end to point empty slot. We tell hardware the first
+ * slot it should *not* read.
+ */
+- ring->end = (index + 1) % BGMAC_TX_RING_SLOTS;
+ bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_TX_INDEX,
+ ring->index_base +
+- ring->end * sizeof(struct bgmac_dma_desc));
++ (ring->end % BGMAC_TX_RING_SLOTS) *
++ sizeof(struct bgmac_dma_desc));
+
+- free_slots -= nr_frags + 1;
+- if (free_slots < 8)
++ if (ring->end - ring->start >= BGMAC_TX_RING_SLOTS - 8)
+ netif_stop_queue(net_dev);
+
+ return NETDEV_TX_OK;
+@@ -256,17 +248,17 @@ static void bgmac_dma_tx_free(struct bgm
+ empty_slot &= BGMAC_DMA_TX_STATDPTR;
+ empty_slot /= sizeof(struct bgmac_dma_desc);
+
+- while (ring->start != empty_slot) {
+- struct bgmac_slot_info *slot = &ring->slots[ring->start];
+- u32 ctl1 = le32_to_cpu(ring->cpu_base[ring->start].ctl1);
+- int len = ctl1 & BGMAC_DESC_CTL1_LEN;
++ while (ring->start != ring->end) {
++ int slot_idx = ring->start % BGMAC_TX_RING_SLOTS;
++ struct bgmac_slot_info *slot = &ring->slots[slot_idx];
++ u32 ctl1;
++ int len;
+
+- if (!slot->dma_addr) {
+- bgmac_err(bgmac, "Hardware reported transmission for empty TX ring slot %d! End of ring: %d\n",
+- ring->start, ring->end);
+- goto next;
+- }
++ if (slot_idx == empty_slot)
++ break;
+
++ ctl1 = le32_to_cpu(ring->cpu_base[slot_idx].ctl1);
++ len = ctl1 & BGMAC_DESC_CTL1_LEN;
+ if (ctl1 & BGMAC_DESC_CTL0_SOF)
+ /* Unmap no longer used buffer */
+ dma_unmap_single(dma_dev, slot->dma_addr, len,
+@@ -284,10 +276,8 @@ static void bgmac_dma_tx_free(struct bgm
+ slot->skb = NULL;
+ }
+
+-next:
+ slot->dma_addr = 0;
+- if (++ring->start >= BGMAC_TX_RING_SLOTS)
+- ring->start = 0;
++ ring->start++;
+ freed = true;
+ }
+
+--- a/drivers/net/ethernet/broadcom/bgmac.h
++++ b/drivers/net/ethernet/broadcom/bgmac.h
+@@ -414,10 +414,10 @@ enum bgmac_dma_ring_type {
+ * empty.
+ */
+ struct bgmac_dma_ring {
+- u16 num_slots;
+- u16 start;
+- u16 end;
++ u32 start;
++ u32 end;
+
++ u16 num_slots;
+ u16 mmio_base;
+ struct bgmac_dma_desc *cpu_base;
+ dma_addr_t dma_base;
--- /dev/null
+From: Felix Fietkau <nbd@openwrt.org>
+Date: Sun, 12 Apr 2015 10:08:04 +0200
+Subject: [PATCH] bgmac: leave interrupts disabled as long as there is work
+ to do
+
+Always poll rx and tx during NAPI poll instead of relying on the status
+of the first interrupt. This prevents bgmac_poll from leaving unfinished
+work around until the next IRQ.
+In my tests this makes bridging/routing throughput under heavy load more
+stable and ensures that no new IRQs arrive as long as bgmac_poll uses up
+the entire budget.
+
+Signed-off-by: Felix Fietkau <nbd@openwrt.org>
+---
+
+--- a/drivers/net/ethernet/broadcom/bgmac.c
++++ b/drivers/net/ethernet/broadcom/bgmac.c
+@@ -1105,8 +1105,6 @@ static void bgmac_chip_reset(struct bgma
+ bgmac_phy_init(bgmac);
+
+ netdev_reset_queue(bgmac->net_dev);
+-
+- bgmac->int_status = 0;
+ }
+
+ static void bgmac_chip_intrs_on(struct bgmac *bgmac)
+@@ -1221,14 +1219,13 @@ static irqreturn_t bgmac_interrupt(int i
+ if (!int_status)
+ return IRQ_NONE;
+
+- /* Ack */
+- bgmac_write(bgmac, BGMAC_INT_STATUS, int_status);
++ int_status &= ~(BGMAC_IS_TX0 | BGMAC_IS_RX);
++ if (int_status)
++ bgmac_err(bgmac, "Unknown IRQs: 0x%08X\n", int_status);
+
+ /* Disable new interrupts until handling existing ones */
+ bgmac_chip_intrs_off(bgmac);
+
+- bgmac->int_status = int_status;
+-
+ napi_schedule(&bgmac->napi);
+
+ return IRQ_HANDLED;
+@@ -1237,25 +1234,17 @@ static irqreturn_t bgmac_interrupt(int i
+ static int bgmac_poll(struct napi_struct *napi, int weight)
+ {
+ struct bgmac *bgmac = container_of(napi, struct bgmac, napi);
+- struct bgmac_dma_ring *ring;
+ int handled = 0;
+
+- if (bgmac->int_status & BGMAC_IS_TX0) {
+- ring = &bgmac->tx_ring[0];
+- bgmac_dma_tx_free(bgmac, ring);
+- bgmac->int_status &= ~BGMAC_IS_TX0;
+- }
++ /* Ack */
++ bgmac_write(bgmac, BGMAC_INT_STATUS, ~0);
+
+- if (bgmac->int_status & BGMAC_IS_RX) {
+- ring = &bgmac->rx_ring[0];
+- handled += bgmac_dma_rx_read(bgmac, ring, weight);
+- bgmac->int_status &= ~BGMAC_IS_RX;
+- }
++ bgmac_dma_tx_free(bgmac, &bgmac->tx_ring[0]);
++ handled += bgmac_dma_rx_read(bgmac, &bgmac->rx_ring[0], weight);
+
+- if (bgmac->int_status) {
+- bgmac_err(bgmac, "Unknown IRQs: 0x%08X\n", bgmac->int_status);
+- bgmac->int_status = 0;
+- }
++ /* poll again if more events arrived in the mean time */
++ if (bgmac_read(bgmac, BGMAC_INT_STATUS) & (BGMAC_IS_TX0 | BGMAC_IS_RX))
++ return handled;
+
+ if (handled < weight) {
+ napi_complete(napi);
+--- a/drivers/net/ethernet/broadcom/bgmac.h
++++ b/drivers/net/ethernet/broadcom/bgmac.h
+@@ -452,7 +452,6 @@ struct bgmac {
+
+ /* Int */
+ u32 int_mask;
+- u32 int_status;
+
+ /* Current MAC state */
+ int mac_speed;
--- /dev/null
+From: Felix Fietkau <nbd@openwrt.org>
+Date: Sun, 12 Apr 2015 10:13:28 +0200
+Subject: [PATCH] bgmac: set received skb headroom to NET_SKB_PAD
+
+A packet buffer offset of 30 bytes is inefficient, because the first 2
+bytes end up in a different cacheline.
+
+Signed-off-by: Felix Fietkau <nbd@openwrt.org>
+---
+
+--- a/drivers/net/ethernet/broadcom/bgmac.c
++++ b/drivers/net/ethernet/broadcom/bgmac.c
+@@ -342,13 +342,13 @@ static int bgmac_dma_rx_skb_for_slot(str
+ return -ENOMEM;
+
+ /* Poison - if everything goes fine, hardware will overwrite it */
+- rx = buf;
++ rx = buf + BGMAC_RX_BUF_OFFSET;
+ rx->len = cpu_to_le16(0xdead);
+ rx->flags = cpu_to_le16(0xbeef);
+
+ /* Map skb for the DMA */
+- dma_addr = dma_map_single(dma_dev, buf, BGMAC_RX_BUF_SIZE,
+- DMA_FROM_DEVICE);
++ dma_addr = dma_map_single(dma_dev, buf + BGMAC_RX_BUF_OFFSET,
++ BGMAC_RX_BUF_SIZE, DMA_FROM_DEVICE);
+ if (dma_mapping_error(dma_dev, dma_addr)) {
+ bgmac_err(bgmac, "DMA mapping error\n");
+ put_page(virt_to_head_page(buf));
+@@ -399,7 +399,7 @@ static int bgmac_dma_rx_read(struct bgma
+ while (ring->start != ring->end) {
+ struct device *dma_dev = bgmac->core->dma_dev;
+ struct bgmac_slot_info *slot = &ring->slots[ring->start];
+- struct bgmac_rx_header *rx = slot->buf;
++ struct bgmac_rx_header *rx = slot->buf + BGMAC_RX_BUF_OFFSET;
+ struct sk_buff *skb;
+ void *buf = slot->buf;
+ u16 len, flags;
+@@ -450,8 +450,10 @@ static int bgmac_dma_rx_read(struct bgma
+ BGMAC_RX_BUF_SIZE, DMA_FROM_DEVICE);
+
+ skb = build_skb(buf, BGMAC_RX_ALLOC_SIZE);
+- skb_put(skb, BGMAC_RX_FRAME_OFFSET + len);
+- skb_pull(skb, BGMAC_RX_FRAME_OFFSET);
++ skb_put(skb, BGMAC_RX_FRAME_OFFSET +
++ BGMAC_RX_BUF_OFFSET + len);
++ skb_pull(skb, BGMAC_RX_FRAME_OFFSET +
++ BGMAC_RX_BUF_OFFSET);
+
+ skb_checksum_none_assert(skb);
+ skb->protocol = eth_type_trans(skb, bgmac->net_dev);
+--- a/drivers/net/ethernet/broadcom/bgmac.h
++++ b/drivers/net/ethernet/broadcom/bgmac.h
+@@ -360,9 +360,11 @@
+
+ #define BGMAC_RX_HEADER_LEN 28 /* Last 24 bytes are unused. Well... */
+ #define BGMAC_RX_FRAME_OFFSET 30 /* There are 2 unused bytes between header and real data */
++#define BGMAC_RX_BUF_OFFSET (NET_SKB_PAD + NET_IP_ALIGN - \
++ BGMAC_RX_FRAME_OFFSET)
+ #define BGMAC_RX_MAX_FRAME_SIZE 1536 /* Copied from b44/tg3 */
+ #define BGMAC_RX_BUF_SIZE (BGMAC_RX_FRAME_OFFSET + BGMAC_RX_MAX_FRAME_SIZE)
+-#define BGMAC_RX_ALLOC_SIZE (SKB_DATA_ALIGN(BGMAC_RX_BUF_SIZE) + \
++#define BGMAC_RX_ALLOC_SIZE (SKB_DATA_ALIGN(BGMAC_RX_BUF_SIZE + BGMAC_RX_BUF_OFFSET) + \
+ SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
+
+ #define BGMAC_BFL_ENETROBO 0x0010 /* has ephy roboswitch spi */
--- /dev/null
+From: Felix Fietkau <nbd@openwrt.org>
+Date: Sun, 12 Apr 2015 11:59:47 +0200
+Subject: [PATCH] bgmac: fix DMA rx corruption
+
+The driver needs to inform the hardware about the first invalid (not yet
+filled) rx slot, by writing its DMA descriptor pointer offset to the
+BGMAC_DMA_RX_INDEX register.
+
+This register was set to a value exceeding the rx ring size, effectively
+allowing the hardware constant access to the full ring, regardless of
+which slots are initialized.
+
+Fix this by updating the register in bgmac_dma_rx_setup_desc.
+
+Signed-off-by: Felix Fietkau <nbd@openwrt.org>
+---
+
+--- a/drivers/net/ethernet/broadcom/bgmac.c
++++ b/drivers/net/ethernet/broadcom/bgmac.c
+@@ -380,6 +380,12 @@ static void bgmac_dma_rx_setup_desc(stru
+ dma_desc->addr_high = cpu_to_le32(upper_32_bits(ring->slots[desc_idx].dma_addr));
+ dma_desc->ctl0 = cpu_to_le32(ctl0);
+ dma_desc->ctl1 = cpu_to_le32(ctl1);
++
++ desc_idx = (desc_idx + 1) % BGMAC_RX_RING_SLOTS;
++
++ bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_RX_INDEX,
++ ring->index_base +
++ desc_idx * sizeof(struct bgmac_dma_desc));
+ }
+
+ static int bgmac_dma_rx_read(struct bgmac *bgmac, struct bgmac_dma_ring *ring,
+@@ -394,9 +400,7 @@ static int bgmac_dma_rx_read(struct bgma
+ end_slot &= BGMAC_DMA_RX_STATDPTR;
+ end_slot /= sizeof(struct bgmac_dma_desc);
+
+- ring->end = end_slot;
+-
+- while (ring->start != ring->end) {
++ while (ring->start != end_slot) {
+ struct device *dma_dev = bgmac->core->dma_dev;
+ struct bgmac_slot_info *slot = &ring->slots[ring->start];
+ struct bgmac_rx_header *rx = slot->buf + BGMAC_RX_BUF_OFFSET;
+@@ -693,10 +697,6 @@ static void bgmac_dma_init(struct bgmac
+ for (j = 0; j < ring->num_slots; j++)
+ bgmac_dma_rx_setup_desc(bgmac, ring, j);
+
+- bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_RX_INDEX,
+- ring->index_base +
+- ring->num_slots * sizeof(struct bgmac_dma_desc));
+-
+ ring->start = 0;
+ ring->end = 0;
+ }