From: Felix Fietkau <nbd@nbd.name>
Date: Sat, 21 May 2016 20:16:56 +0000 (+0200)
Subject: mvebu: backport upstream ethernet driver improvements and enable buffer manager support
X-Git-Url: http://git.lede-project.org./?a=commitdiff_plain;h=df98acc6a1252456608958fc17f7da3ae4cfb46c;p=openwrt%2Fstaging%2Fkaloz.git

mvebu: backport upstream ethernet driver improvements and enable buffer manager support

Signed-off-by: Felix Fietkau <nbd@nbd.name>
---

diff --git a/target/linux/mvebu/config-4.4 b/target/linux/mvebu/config-4.4
index 163af67347..f5b2346e44 100644
--- a/target/linux/mvebu/config-4.4
+++ b/target/linux/mvebu/config-4.4
@@ -209,6 +209,7 @@ CONFIG_HAVE_UID16=y
 CONFIG_HAVE_VIRT_CPU_ACCOUNTING_GEN=y
 CONFIG_HIGHMEM=y
 # CONFIG_HIGHPTE is not set
+CONFIG_HWBM=y
 CONFIG_HWMON=y
 CONFIG_HZ_FIXED=0
 CONFIG_HZ_PERIODIC=y
@@ -274,6 +275,7 @@ CONFIG_MVEBU_DEVBUS=y
 CONFIG_MVEBU_MBUS=y
 CONFIG_MVMDIO=y
 CONFIG_MVNETA=y
+CONFIG_MVNETA_BM=y
 CONFIG_MVSW61XX_PHY=y
 CONFIG_MV_XOR=y
 CONFIG_NEED_DMA_MAP_STATE=y
diff --git a/target/linux/mvebu/patches-4.4/021-mtd-pxa3xx_nand-Increase-the-initial-chunk-size.patch b/target/linux/mvebu/patches-4.4/021-mtd-pxa3xx_nand-Increase-the-initial-chunk-size.patch
index f928f46cc6..0b0e047c02 100644
--- a/target/linux/mvebu/patches-4.4/021-mtd-pxa3xx_nand-Increase-the-initial-chunk-size.patch
+++ b/target/linux/mvebu/patches-4.4/021-mtd-pxa3xx_nand-Increase-the-initial-chunk-size.patch
@@ -31,7 +31,7 @@ Signed-off-by: Brian Norris <computersforpeace@gmail.com>
 
 --- a/drivers/mtd/nand/pxa3xx_nand.c
 +++ b/drivers/mtd/nand/pxa3xx_nand.c
-@@ -1636,7 +1636,7 @@ static int pxa3xx_nand_scan(struct mtd_i
+@@ -1637,7 +1637,7 @@ static int pxa3xx_nand_scan(struct mtd_i
  		goto KEEP_CONFIG;
  
  	/* Set a default chunk size */
diff --git a/target/linux/mvebu/patches-4.4/022-mtd-pxa3xx_nand-Fix-initial-controller-configuration.patch b/target/linux/mvebu/patches-4.4/022-mtd-pxa3xx_nand-Fix-initial-controller-configuration.patch
index 260e7dc1bc..7d07fb9423 100644
--- a/target/linux/mvebu/patches-4.4/022-mtd-pxa3xx_nand-Fix-initial-controller-configuration.patch
+++ b/target/linux/mvebu/patches-4.4/022-mtd-pxa3xx_nand-Fix-initial-controller-configuration.patch
@@ -19,7 +19,7 @@ Signed-off-by: Brian Norris <computersforpeace@gmail.com>
 
 --- a/drivers/mtd/nand/pxa3xx_nand.c
 +++ b/drivers/mtd/nand/pxa3xx_nand.c
-@@ -1419,34 +1419,43 @@ static int pxa3xx_nand_waitfunc(struct m
+@@ -1420,34 +1420,43 @@ static int pxa3xx_nand_waitfunc(struct m
  	return NAND_STATUS_READY;
  }
  
@@ -71,7 +71,7 @@ Signed-off-by: Brian Norris <computersforpeace@gmail.com>
  	info->ndtr0cs0 = nand_readl(info, NDTR0CS0);
  	info->ndtr1cs0 = nand_readl(info, NDTR1CS0);
  	return 0;
-@@ -1635,10 +1644,7 @@ static int pxa3xx_nand_scan(struct mtd_i
+@@ -1636,10 +1645,7 @@ static int pxa3xx_nand_scan(struct mtd_i
  	if (pdata->keep_config && !pxa3xx_nand_detect_config(info))
  		goto KEEP_CONFIG;
  
@@ -83,7 +83,7 @@ Signed-off-by: Brian Norris <computersforpeace@gmail.com>
  	if (ret)
  		return ret;
  
-@@ -1651,7 +1657,6 @@ static int pxa3xx_nand_scan(struct mtd_i
+@@ -1652,7 +1658,6 @@ static int pxa3xx_nand_scan(struct mtd_i
  	}
  
  KEEP_CONFIG:
@@ -91,7 +91,7 @@ Signed-off-by: Brian Norris <computersforpeace@gmail.com>
  	if (info->reg_ndcr & NDCR_DWIDTH_M)
  		chip->options |= NAND_BUSWIDTH_16;
  
-@@ -1736,6 +1741,10 @@ KEEP_CONFIG:
+@@ -1737,6 +1742,10 @@ KEEP_CONFIG:
  		host->row_addr_cycles = 3;
  	else
  		host->row_addr_cycles = 2;
diff --git a/target/linux/mvebu/patches-4.4/023-bus-mvebu-mbus-provide-api-for-obtaining-IO-and-DRAM.patch b/target/linux/mvebu/patches-4.4/023-bus-mvebu-mbus-provide-api-for-obtaining-IO-and-DRAM.patch
new file mode 100644
index 0000000000..58687f36ad
--- /dev/null
+++ b/target/linux/mvebu/patches-4.4/023-bus-mvebu-mbus-provide-api-for-obtaining-IO-and-DRAM.patch
@@ -0,0 +1,94 @@
+From: Marcin Wojtas <mw@semihalf.com>
+Date: Mon, 14 Mar 2016 09:39:02 +0100
+Subject: [PATCH] bus: mvebu-mbus: provide api for obtaining IO and DRAM window
+ information
+
+This commit enables finding appropriate mbus window and obtaining its
+target id and attribute for given physical address in two separate
+routines, both for IO and DRAM windows. This functionality
+is needed for Armada XP/38x Network Controller's Buffer Manager and
+PnC configuration.
+
+[gregory.clement@free-electrons.com: Fix size test for
+mvebu_mbus_get_dram_win_info]
+
+Signed-off-by: Marcin Wojtas <mw@semihalf.com>
+[DRAM window information reference in LKv3.10]
+Signed-off-by: Evan Wang <xswang@marvell.com>
+Signed-off-by: Gregory CLEMENT <gregory.clement@free-electrons.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+---
+
+--- a/drivers/bus/mvebu-mbus.c
++++ b/drivers/bus/mvebu-mbus.c
+@@ -948,6 +948,58 @@ void mvebu_mbus_get_pcie_io_aperture(str
+ 	*res = mbus_state.pcie_io_aperture;
+ }
+ 
++int mvebu_mbus_get_dram_win_info(phys_addr_t phyaddr, u8 *target, u8 *attr)
++{
++	const struct mbus_dram_target_info *dram;
++	int i;
++
++	/* Get dram info */
++	dram = mv_mbus_dram_info();
++	if (!dram) {
++		pr_err("missing DRAM information\n");
++		return -ENODEV;
++	}
++
++	/* Try to find matching DRAM window for phyaddr */
++	for (i = 0; i < dram->num_cs; i++) {
++		const struct mbus_dram_window *cs = dram->cs + i;
++
++		if (cs->base <= phyaddr &&
++			phyaddr <= (cs->base + cs->size - 1)) {
++			*target = dram->mbus_dram_target_id;
++			*attr = cs->mbus_attr;
++			return 0;
++		}
++	}
++
++	pr_err("invalid dram address 0x%x\n", phyaddr);
++	return -EINVAL;
++}
++EXPORT_SYMBOL_GPL(mvebu_mbus_get_dram_win_info);
++
++int mvebu_mbus_get_io_win_info(phys_addr_t phyaddr, u32 *size, u8 *target,
++			       u8 *attr)
++{
++	int win;
++
++	for (win = 0; win < mbus_state.soc->num_wins; win++) {
++		u64 wbase;
++		int enabled;
++
++		mvebu_mbus_read_window(&mbus_state, win, &enabled, &wbase,
++				       size, target, attr, NULL);
++
++		if (!enabled)
++			continue;
++
++		if (wbase <= phyaddr && phyaddr <= wbase + *size)
++			return win;
++	}
++
++	return -EINVAL;
++}
++EXPORT_SYMBOL_GPL(mvebu_mbus_get_io_win_info);
++
+ static __init int mvebu_mbus_debugfs_init(void)
+ {
+ 	struct mvebu_mbus_state *s = &mbus_state;
+--- a/include/linux/mbus.h
++++ b/include/linux/mbus.h
+@@ -69,6 +69,9 @@ static inline const struct mbus_dram_tar
+ int mvebu_mbus_save_cpu_target(u32 *store_addr);
+ void mvebu_mbus_get_pcie_mem_aperture(struct resource *res);
+ void mvebu_mbus_get_pcie_io_aperture(struct resource *res);
++int mvebu_mbus_get_dram_win_info(phys_addr_t phyaddr, u8 *target, u8 *attr);
++int mvebu_mbus_get_io_win_info(phys_addr_t phyaddr, u32 *size, u8 *target,
++			       u8 *attr);
+ int mvebu_mbus_add_window_remap_by_id(unsigned int target,
+ 				      unsigned int attribute,
+ 				      phys_addr_t base, size_t size,
diff --git a/target/linux/mvebu/patches-4.4/030-mvneta-consolidate-autoneg-enabling.patch b/target/linux/mvebu/patches-4.4/030-mvneta-consolidate-autoneg-enabling.patch
new file mode 100644
index 0000000000..1d98c7decc
--- /dev/null
+++ b/target/linux/mvebu/patches-4.4/030-mvneta-consolidate-autoneg-enabling.patch
@@ -0,0 +1,55 @@
+From: Stas Sergeev <stsp@list.ru>
+Date: Wed, 2 Dec 2015 20:33:56 +0300
+Subject: [PATCH] mvneta: consolidate autoneg enabling
+
+This moves autoneg-related bit manipulations to the single place.
+
+CC: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
+CC: netdev@vger.kernel.org
+CC: linux-kernel@vger.kernel.org
+
+Signed-off-by: Stas Sergeev <stsp@users.sourceforge.net>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+---
+
+--- a/drivers/net/ethernet/marvell/mvneta.c
++++ b/drivers/net/ethernet/marvell/mvneta.c
+@@ -1067,15 +1067,28 @@ static void mvneta_defaults_set(struct m
+ 		       MVNETA_GMAC_AN_SPEED_EN |
+ 		       MVNETA_GMAC_AN_DUPLEX_EN;
+ 		mvreg_write(pp, MVNETA_GMAC_AUTONEG_CONFIG, val);
++
+ 		val = mvreg_read(pp, MVNETA_GMAC_CLOCK_DIVIDER);
+ 		val |= MVNETA_GMAC_1MS_CLOCK_ENABLE;
+ 		mvreg_write(pp, MVNETA_GMAC_CLOCK_DIVIDER, val);
++
++		val = mvreg_read(pp, MVNETA_GMAC_CTRL_2);
++		val |= MVNETA_GMAC2_INBAND_AN_ENABLE;
++		mvreg_write(pp, MVNETA_GMAC_CTRL_2, val);
+ 	} else {
+ 		val = mvreg_read(pp, MVNETA_GMAC_AUTONEG_CONFIG);
+ 		val &= ~(MVNETA_GMAC_INBAND_AN_ENABLE |
+ 		       MVNETA_GMAC_AN_SPEED_EN |
+ 		       MVNETA_GMAC_AN_DUPLEX_EN);
+ 		mvreg_write(pp, MVNETA_GMAC_AUTONEG_CONFIG, val);
++
++		val = mvreg_read(pp, MVNETA_GMAC_CLOCK_DIVIDER);
++		val &= ~MVNETA_GMAC_1MS_CLOCK_ENABLE;
++		mvreg_write(pp, MVNETA_GMAC_CLOCK_DIVIDER, val);
++
++		val = mvreg_read(pp, MVNETA_GMAC_CTRL_2);
++		val &= ~MVNETA_GMAC2_INBAND_AN_ENABLE;
++		mvreg_write(pp, MVNETA_GMAC_CTRL_2, val);
+ 	}
+ 
+ 	mvneta_set_ucast_table(pp, -1);
+@@ -3230,9 +3243,6 @@ static int mvneta_port_power_up(struct m
+ 		return -EINVAL;
+ 	}
+ 
+-	if (pp->use_inband_status)
+-		ctrl |= MVNETA_GMAC2_INBAND_AN_ENABLE;
+-
+ 	/* Cancel Port Reset */
+ 	ctrl &= ~MVNETA_GMAC2_PORT_RESET;
+ 	mvreg_write(pp, MVNETA_GMAC_CTRL_2, ctrl);
diff --git a/target/linux/mvebu/patches-4.4/031-mvneta-implement-ethtool-autonegotiation-control.patch b/target/linux/mvebu/patches-4.4/031-mvneta-implement-ethtool-autonegotiation-control.patch
new file mode 100644
index 0000000000..626b511c45
--- /dev/null
+++ b/target/linux/mvebu/patches-4.4/031-mvneta-implement-ethtool-autonegotiation-control.patch
@@ -0,0 +1,165 @@
+From: Stas Sergeev <stsp@list.ru>
+Date: Wed, 2 Dec 2015 20:35:11 +0300
+Subject: [PATCH] mvneta: implement ethtool autonegotiation control
+
+This patch allows to do
+ethtool -s eth0 autoneg off
+ethtool -s eth0 autoneg on
+to disable or enable autonegotiation at run-time.
+Without that functionality, the only way to control the autonegotiation
+is to modify the device tree.
+
+This is needed if you plan to use the same kernel with
+different ethernet switches, the ones that support the in-band
+status and the ones that not.
+
+CC: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
+CC: netdev@vger.kernel.org
+CC: linux-kernel@vger.kernel.org
+
+Signed-off-by: Stas Sergeev <stsp@users.sourceforge.net>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+---
+
+--- a/drivers/net/ethernet/marvell/mvneta.c
++++ b/drivers/net/ethernet/marvell/mvneta.c
+@@ -371,7 +371,7 @@ struct mvneta_port {
+ 	unsigned int duplex;
+ 	unsigned int speed;
+ 	unsigned int tx_csum_limit;
+-	int use_inband_status:1;
++	unsigned int use_inband_status:1;
+ 
+ 	u64 ethtool_stats[ARRAY_SIZE(mvneta_statistics)];
+ };
+@@ -973,6 +973,44 @@ static void mvneta_set_other_mcast_table
+ 		mvreg_write(pp, MVNETA_DA_FILT_OTH_MCAST + offset, val);
+ }
+ 
++static void mvneta_set_autoneg(struct mvneta_port *pp, int enable)
++{
++	u32 val;
++
++	if (enable) {
++		val = mvreg_read(pp, MVNETA_GMAC_AUTONEG_CONFIG);
++		val &= ~(MVNETA_GMAC_FORCE_LINK_PASS |
++			 MVNETA_GMAC_FORCE_LINK_DOWN |
++			 MVNETA_GMAC_AN_FLOW_CTRL_EN);
++		val |= MVNETA_GMAC_INBAND_AN_ENABLE |
++		       MVNETA_GMAC_AN_SPEED_EN |
++		       MVNETA_GMAC_AN_DUPLEX_EN;
++		mvreg_write(pp, MVNETA_GMAC_AUTONEG_CONFIG, val);
++
++		val = mvreg_read(pp, MVNETA_GMAC_CLOCK_DIVIDER);
++		val |= MVNETA_GMAC_1MS_CLOCK_ENABLE;
++		mvreg_write(pp, MVNETA_GMAC_CLOCK_DIVIDER, val);
++
++		val = mvreg_read(pp, MVNETA_GMAC_CTRL_2);
++		val |= MVNETA_GMAC2_INBAND_AN_ENABLE;
++		mvreg_write(pp, MVNETA_GMAC_CTRL_2, val);
++	} else {
++		val = mvreg_read(pp, MVNETA_GMAC_AUTONEG_CONFIG);
++		val &= ~(MVNETA_GMAC_INBAND_AN_ENABLE |
++		       MVNETA_GMAC_AN_SPEED_EN |
++		       MVNETA_GMAC_AN_DUPLEX_EN);
++		mvreg_write(pp, MVNETA_GMAC_AUTONEG_CONFIG, val);
++
++		val = mvreg_read(pp, MVNETA_GMAC_CLOCK_DIVIDER);
++		val &= ~MVNETA_GMAC_1MS_CLOCK_ENABLE;
++		mvreg_write(pp, MVNETA_GMAC_CLOCK_DIVIDER, val);
++
++		val = mvreg_read(pp, MVNETA_GMAC_CTRL_2);
++		val &= ~MVNETA_GMAC2_INBAND_AN_ENABLE;
++		mvreg_write(pp, MVNETA_GMAC_CTRL_2, val);
++	}
++}
++
+ /* This method sets defaults to the NETA port:
+  *	Clears interrupt Cause and Mask registers.
+  *	Clears all MAC tables.
+@@ -1058,39 +1096,7 @@ static void mvneta_defaults_set(struct m
+ 	val &= ~MVNETA_PHY_POLLING_ENABLE;
+ 	mvreg_write(pp, MVNETA_UNIT_CONTROL, val);
+ 
+-	if (pp->use_inband_status) {
+-		val = mvreg_read(pp, MVNETA_GMAC_AUTONEG_CONFIG);
+-		val &= ~(MVNETA_GMAC_FORCE_LINK_PASS |
+-			 MVNETA_GMAC_FORCE_LINK_DOWN |
+-			 MVNETA_GMAC_AN_FLOW_CTRL_EN);
+-		val |= MVNETA_GMAC_INBAND_AN_ENABLE |
+-		       MVNETA_GMAC_AN_SPEED_EN |
+-		       MVNETA_GMAC_AN_DUPLEX_EN;
+-		mvreg_write(pp, MVNETA_GMAC_AUTONEG_CONFIG, val);
+-
+-		val = mvreg_read(pp, MVNETA_GMAC_CLOCK_DIVIDER);
+-		val |= MVNETA_GMAC_1MS_CLOCK_ENABLE;
+-		mvreg_write(pp, MVNETA_GMAC_CLOCK_DIVIDER, val);
+-
+-		val = mvreg_read(pp, MVNETA_GMAC_CTRL_2);
+-		val |= MVNETA_GMAC2_INBAND_AN_ENABLE;
+-		mvreg_write(pp, MVNETA_GMAC_CTRL_2, val);
+-	} else {
+-		val = mvreg_read(pp, MVNETA_GMAC_AUTONEG_CONFIG);
+-		val &= ~(MVNETA_GMAC_INBAND_AN_ENABLE |
+-		       MVNETA_GMAC_AN_SPEED_EN |
+-		       MVNETA_GMAC_AN_DUPLEX_EN);
+-		mvreg_write(pp, MVNETA_GMAC_AUTONEG_CONFIG, val);
+-
+-		val = mvreg_read(pp, MVNETA_GMAC_CLOCK_DIVIDER);
+-		val &= ~MVNETA_GMAC_1MS_CLOCK_ENABLE;
+-		mvreg_write(pp, MVNETA_GMAC_CLOCK_DIVIDER, val);
+-
+-		val = mvreg_read(pp, MVNETA_GMAC_CTRL_2);
+-		val &= ~MVNETA_GMAC2_INBAND_AN_ENABLE;
+-		mvreg_write(pp, MVNETA_GMAC_CTRL_2, val);
+-	}
+-
++	mvneta_set_autoneg(pp, pp->use_inband_status);
+ 	mvneta_set_ucast_table(pp, -1);
+ 	mvneta_set_special_mcast_table(pp, -1);
+ 	mvneta_set_other_mcast_table(pp, -1);
+@@ -2956,10 +2962,43 @@ int mvneta_ethtool_get_settings(struct n
+ int mvneta_ethtool_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+ {
+ 	struct mvneta_port *pp = netdev_priv(dev);
++	struct phy_device *phydev = pp->phy_dev;
+ 
+-	if (!pp->phy_dev)
++	if (!phydev)
+ 		return -ENODEV;
+ 
++	if ((cmd->autoneg == AUTONEG_ENABLE) != pp->use_inband_status) {
++		u32 val;
++
++		mvneta_set_autoneg(pp, cmd->autoneg == AUTONEG_ENABLE);
++
++		if (cmd->autoneg == AUTONEG_DISABLE) {
++			val = mvreg_read(pp, MVNETA_GMAC_AUTONEG_CONFIG);
++			val &= ~(MVNETA_GMAC_CONFIG_MII_SPEED |
++				 MVNETA_GMAC_CONFIG_GMII_SPEED |
++				 MVNETA_GMAC_CONFIG_FULL_DUPLEX);
++
++			if (phydev->duplex)
++				val |= MVNETA_GMAC_CONFIG_FULL_DUPLEX;
++
++			if (phydev->speed == SPEED_1000)
++				val |= MVNETA_GMAC_CONFIG_GMII_SPEED;
++			else if (phydev->speed == SPEED_100)
++				val |= MVNETA_GMAC_CONFIG_MII_SPEED;
++
++			mvreg_write(pp, MVNETA_GMAC_AUTONEG_CONFIG, val);
++		}
++
++		pp->use_inband_status = (cmd->autoneg == AUTONEG_ENABLE);
++		netdev_info(pp->dev, "autoneg status set to %i\n",
++			    pp->use_inband_status);
++
++		if (netif_running(dev)) {
++			mvneta_port_down(pp);
++			mvneta_port_up(pp);
++		}
++	}
++
+ 	return phy_ethtool_sset(pp->phy_dev, cmd);
+ }
+ 
diff --git a/target/linux/mvebu/patches-4.4/032-net-mvneta-Make-the-default-queue-related-for-each-p.patch b/target/linux/mvebu/patches-4.4/032-net-mvneta-Make-the-default-queue-related-for-each-p.patch
new file mode 100644
index 0000000000..54c93998f6
--- /dev/null
+++ b/target/linux/mvebu/patches-4.4/032-net-mvneta-Make-the-default-queue-related-for-each-p.patch
@@ -0,0 +1,131 @@
+From: Gregory CLEMENT <gregory.clement@free-electrons.com>
+Date: Wed, 9 Dec 2015 18:23:48 +0100
+Subject: [PATCH] net: mvneta: Make the default queue related for each port
+
+Instead of using the same default queue for all the port. Move it in the
+port struct. It will allow have a different default queue for each port.
+
+Signed-off-by: Gregory CLEMENT <gregory.clement@free-electrons.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+---
+
+--- a/drivers/net/ethernet/marvell/mvneta.c
++++ b/drivers/net/ethernet/marvell/mvneta.c
+@@ -356,6 +356,7 @@ struct mvneta_port {
+ 	struct mvneta_tx_queue *txqs;
+ 	struct net_device *dev;
+ 	struct notifier_block cpu_notifier;
++	int rxq_def;
+ 
+ 	/* Core clock */
+ 	struct clk *clk;
+@@ -819,7 +820,7 @@ static void mvneta_port_up(struct mvneta
+ 	mvreg_write(pp, MVNETA_TXQ_CMD, q_map);
+ 
+ 	/* Enable all initialized RXQs. */
+-	mvreg_write(pp, MVNETA_RXQ_CMD, BIT(rxq_def));
++	mvreg_write(pp, MVNETA_RXQ_CMD, BIT(pp->rxq_def));
+ }
+ 
+ /* Stop the Ethernet port activity */
+@@ -1067,7 +1068,7 @@ static void mvneta_defaults_set(struct m
+ 	mvreg_write(pp, MVNETA_ACC_MODE, val);
+ 
+ 	/* Update val of portCfg register accordingly with all RxQueue types */
+-	val = MVNETA_PORT_CONFIG_DEFL_VALUE(rxq_def);
++	val = MVNETA_PORT_CONFIG_DEFL_VALUE(pp->rxq_def);
+ 	mvreg_write(pp, MVNETA_PORT_CONFIG, val);
+ 
+ 	val = 0;
+@@ -2101,19 +2102,19 @@ static void mvneta_set_rx_mode(struct ne
+ 	if (dev->flags & IFF_PROMISC) {
+ 		/* Accept all: Multicast + Unicast */
+ 		mvneta_rx_unicast_promisc_set(pp, 1);
+-		mvneta_set_ucast_table(pp, rxq_def);
+-		mvneta_set_special_mcast_table(pp, rxq_def);
+-		mvneta_set_other_mcast_table(pp, rxq_def);
++		mvneta_set_ucast_table(pp, pp->rxq_def);
++		mvneta_set_special_mcast_table(pp, pp->rxq_def);
++		mvneta_set_other_mcast_table(pp, pp->rxq_def);
+ 	} else {
+ 		/* Accept single Unicast */
+ 		mvneta_rx_unicast_promisc_set(pp, 0);
+ 		mvneta_set_ucast_table(pp, -1);
+-		mvneta_mac_addr_set(pp, dev->dev_addr, rxq_def);
++		mvneta_mac_addr_set(pp, dev->dev_addr, pp->rxq_def);
+ 
+ 		if (dev->flags & IFF_ALLMULTI) {
+ 			/* Accept all multicast */
+-			mvneta_set_special_mcast_table(pp, rxq_def);
+-			mvneta_set_other_mcast_table(pp, rxq_def);
++			mvneta_set_special_mcast_table(pp, pp->rxq_def);
++			mvneta_set_other_mcast_table(pp, pp->rxq_def);
+ 		} else {
+ 			/* Accept only initialized multicast */
+ 			mvneta_set_special_mcast_table(pp, -1);
+@@ -2122,7 +2123,7 @@ static void mvneta_set_rx_mode(struct ne
+ 			if (!netdev_mc_empty(dev)) {
+ 				netdev_for_each_mc_addr(ha, dev) {
+ 					mvneta_mcast_addr_set(pp, ha->addr,
+-							      rxq_def);
++							      pp->rxq_def);
+ 				}
+ 			}
+ 		}
+@@ -2205,7 +2206,7 @@ static int mvneta_poll(struct napi_struc
+ 	 * RX packets
+ 	 */
+ 	cause_rx_tx |= port->cause_rx_tx;
+-	rx_done = mvneta_rx(pp, budget, &pp->rxqs[rxq_def]);
++	rx_done = mvneta_rx(pp, budget, &pp->rxqs[pp->rxq_def]);
+ 	budget -= rx_done;
+ 
+ 	if (budget > 0) {
+@@ -2418,17 +2419,17 @@ static void mvneta_cleanup_txqs(struct m
+ /* Cleanup all Rx queues */
+ static void mvneta_cleanup_rxqs(struct mvneta_port *pp)
+ {
+-	mvneta_rxq_deinit(pp, &pp->rxqs[rxq_def]);
++	mvneta_rxq_deinit(pp, &pp->rxqs[pp->rxq_def]);
+ }
+ 
+ 
+ /* Init all Rx queues */
+ static int mvneta_setup_rxqs(struct mvneta_port *pp)
+ {
+-	int err = mvneta_rxq_init(pp, &pp->rxqs[rxq_def]);
++	int err = mvneta_rxq_init(pp, &pp->rxqs[pp->rxq_def]);
+ 	if (err) {
+ 		netdev_err(pp->dev, "%s: can't create rxq=%d\n",
+-			   __func__, rxq_def);
++			   __func__, pp->rxq_def);
+ 		mvneta_cleanup_rxqs(pp);
+ 		return err;
+ 	}
+@@ -2634,7 +2635,7 @@ static int mvneta_set_mac_addr(struct ne
+ 	mvneta_mac_addr_set(pp, dev->dev_addr, -1);
+ 
+ 	/* Set new addr in hw */
+-	mvneta_mac_addr_set(pp, sockaddr->sa_data, rxq_def);
++	mvneta_mac_addr_set(pp, sockaddr->sa_data, pp->rxq_def);
+ 
+ 	eth_commit_mac_addr_change(dev, addr);
+ 	return 0;
+@@ -2753,7 +2754,7 @@ static void mvneta_percpu_elect(struct m
+ {
+ 	int online_cpu_idx, cpu, i = 0;
+ 
+-	online_cpu_idx = rxq_def % num_online_cpus();
++	online_cpu_idx = pp->rxq_def % num_online_cpus();
+ 
+ 	for_each_online_cpu(cpu) {
+ 		if (i == online_cpu_idx)
+@@ -3363,6 +3364,8 @@ static int mvneta_probe(struct platform_
+ 				 strcmp(managed, "in-band-status") == 0);
+ 	pp->cpu_notifier.notifier_call = mvneta_percpu_notifier;
+ 
++	pp->rxq_def = rxq_def;
++
+ 	pp->clk = devm_clk_get(&pdev->dev, NULL);
+ 	if (IS_ERR(pp->clk)) {
+ 		err = PTR_ERR(pp->clk);
diff --git a/target/linux/mvebu/patches-4.4/033-net-mvneta-Associate-RX-queues-with-each-CPU.patch b/target/linux/mvebu/patches-4.4/033-net-mvneta-Associate-RX-queues-with-each-CPU.patch
new file mode 100644
index 0000000000..636c6cf364
--- /dev/null
+++ b/target/linux/mvebu/patches-4.4/033-net-mvneta-Associate-RX-queues-with-each-CPU.patch
@@ -0,0 +1,278 @@
+From: Gregory CLEMENT <gregory.clement@free-electrons.com>
+Date: Wed, 9 Dec 2015 18:23:49 +0100
+Subject: [PATCH] net: mvneta: Associate RX queues with each CPU
+
+We enable the percpu interrupt for all the CPU and we just associate a
+CPU to a few queue at the neta level. The mapping between the CPUs and
+the queues is static. The queues are associated to the CPU module the
+number of CPUs. However currently we only use on RX queue for a given
+Ethernet port.
+
+Signed-off-by: Gregory CLEMENT <gregory.clement@free-electrons.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+---
+
+--- a/drivers/net/ethernet/marvell/mvneta.c
++++ b/drivers/net/ethernet/marvell/mvneta.c
+@@ -110,9 +110,16 @@
+ #define MVNETA_CPU_MAP(cpu)                      (0x2540 + ((cpu) << 2))
+ #define      MVNETA_CPU_RXQ_ACCESS_ALL_MASK      0x000000ff
+ #define      MVNETA_CPU_TXQ_ACCESS_ALL_MASK      0x0000ff00
++#define      MVNETA_CPU_RXQ_ACCESS(rxq)		 BIT(rxq)
+ #define MVNETA_RXQ_TIME_COAL_REG(q)              (0x2580 + ((q) << 2))
+ 
+-/* Exception Interrupt Port/Queue Cause register */
++/* Exception Interrupt Port/Queue Cause register
++ *
++ * Their behavior depend of the mapping done using the PCPX2Q
++ * registers. For a given CPU if the bit associated to a queue is not
++ * set, then for the register a read from this CPU will always return
++ * 0 and a write won't do anything
++ */
+ 
+ #define MVNETA_INTR_NEW_CAUSE                    0x25a0
+ #define MVNETA_INTR_NEW_MASK                     0x25a4
+@@ -820,7 +827,13 @@ static void mvneta_port_up(struct mvneta
+ 	mvreg_write(pp, MVNETA_TXQ_CMD, q_map);
+ 
+ 	/* Enable all initialized RXQs. */
+-	mvreg_write(pp, MVNETA_RXQ_CMD, BIT(pp->rxq_def));
++	for (queue = 0; queue < rxq_number; queue++) {
++		struct mvneta_rx_queue *rxq = &pp->rxqs[queue];
++
++		if (rxq->descs != NULL)
++			q_map |= (1 << queue);
++	}
++	mvreg_write(pp, MVNETA_RXQ_CMD, q_map);
+ }
+ 
+ /* Stop the Ethernet port activity */
+@@ -1026,6 +1039,7 @@ static void mvneta_defaults_set(struct m
+ 	int cpu;
+ 	int queue;
+ 	u32 val;
++	int max_cpu = num_present_cpus();
+ 
+ 	/* Clear all Cause registers */
+ 	mvreg_write(pp, MVNETA_INTR_NEW_CAUSE, 0);
+@@ -1041,13 +1055,23 @@ static void mvneta_defaults_set(struct m
+ 	/* Enable MBUS Retry bit16 */
+ 	mvreg_write(pp, MVNETA_MBUS_RETRY, 0x20);
+ 
+-	/* Set CPU queue access map - all CPUs have access to all RX
+-	 * queues and to all TX queues
++	/* Set CPU queue access map. CPUs are assigned to the RX
++	 * queues modulo their number and all the TX queues are
++	 * assigned to the CPU associated to the default RX queue.
+ 	 */
+-	for_each_present_cpu(cpu)
+-		mvreg_write(pp, MVNETA_CPU_MAP(cpu),
+-			    (MVNETA_CPU_RXQ_ACCESS_ALL_MASK |
+-			     MVNETA_CPU_TXQ_ACCESS_ALL_MASK));
++	for_each_present_cpu(cpu) {
++		int rxq_map = 0, txq_map = 0;
++		int rxq;
++
++		for (rxq = 0; rxq < rxq_number; rxq++)
++			if ((rxq % max_cpu) == cpu)
++				rxq_map |= MVNETA_CPU_RXQ_ACCESS(rxq);
++
++		if (cpu == rxq_def)
++			txq_map = MVNETA_CPU_TXQ_ACCESS_ALL_MASK;
++
++		mvreg_write(pp, MVNETA_CPU_MAP(cpu), rxq_map | txq_map);
++	}
+ 
+ 	/* Reset RX and TX DMAs */
+ 	mvreg_write(pp, MVNETA_PORT_RX_RESET, MVNETA_PORT_RX_DMA_RESET);
+@@ -2174,6 +2198,7 @@ static int mvneta_poll(struct napi_struc
+ {
+ 	int rx_done = 0;
+ 	u32 cause_rx_tx;
++	int rx_queue;
+ 	struct mvneta_port *pp = netdev_priv(napi->dev);
+ 	struct mvneta_pcpu_port *port = this_cpu_ptr(pp->ports);
+ 
+@@ -2205,8 +2230,15 @@ static int mvneta_poll(struct napi_struc
+ 	/* For the case where the last mvneta_poll did not process all
+ 	 * RX packets
+ 	 */
++	rx_queue = fls(((cause_rx_tx >> 8) & 0xff));
++
+ 	cause_rx_tx |= port->cause_rx_tx;
+-	rx_done = mvneta_rx(pp, budget, &pp->rxqs[pp->rxq_def]);
++
++	if (rx_queue) {
++		rx_queue = rx_queue - 1;
++		rx_done = mvneta_rx(pp, budget, &pp->rxqs[rx_queue]);
++	}
++
+ 	budget -= rx_done;
+ 
+ 	if (budget > 0) {
+@@ -2419,19 +2451,27 @@ static void mvneta_cleanup_txqs(struct m
+ /* Cleanup all Rx queues */
+ static void mvneta_cleanup_rxqs(struct mvneta_port *pp)
+ {
+-	mvneta_rxq_deinit(pp, &pp->rxqs[pp->rxq_def]);
++	int queue;
++
++	for (queue = 0; queue < txq_number; queue++)
++		mvneta_rxq_deinit(pp, &pp->rxqs[queue]);
+ }
+ 
+ 
+ /* Init all Rx queues */
+ static int mvneta_setup_rxqs(struct mvneta_port *pp)
+ {
+-	int err = mvneta_rxq_init(pp, &pp->rxqs[pp->rxq_def]);
+-	if (err) {
+-		netdev_err(pp->dev, "%s: can't create rxq=%d\n",
+-			   __func__, pp->rxq_def);
+-		mvneta_cleanup_rxqs(pp);
+-		return err;
++	int queue;
++
++	for (queue = 0; queue < rxq_number; queue++) {
++		int err = mvneta_rxq_init(pp, &pp->rxqs[queue]);
++
++		if (err) {
++			netdev_err(pp->dev, "%s: can't create rxq=%d\n",
++				   __func__, queue);
++			mvneta_cleanup_rxqs(pp);
++			return err;
++		}
+ 	}
+ 
+ 	return 0;
+@@ -2455,6 +2495,19 @@ static int mvneta_setup_txqs(struct mvne
+ 	return 0;
+ }
+ 
++static void mvneta_percpu_unmask_interrupt(void *arg)
++{
++	struct mvneta_port *pp = arg;
++
++	/* All the queue are unmasked, but actually only the ones
++	 * maped to this CPU will be unmasked
++	 */
++	mvreg_write(pp, MVNETA_INTR_NEW_MASK,
++		    MVNETA_RX_INTR_MASK_ALL |
++		    MVNETA_TX_INTR_MASK_ALL |
++		    MVNETA_MISCINTR_INTR_MASK);
++}
++
+ static void mvneta_start_dev(struct mvneta_port *pp)
+ {
+ 	unsigned int cpu;
+@@ -2472,11 +2525,10 @@ static void mvneta_start_dev(struct mvne
+ 		napi_enable(&port->napi);
+ 	}
+ 
+-	/* Unmask interrupts */
+-	mvreg_write(pp, MVNETA_INTR_NEW_MASK,
+-		    MVNETA_RX_INTR_MASK(rxq_number) |
+-		    MVNETA_TX_INTR_MASK(txq_number) |
+-		    MVNETA_MISCINTR_INTR_MASK);
++	/* Unmask interrupts. It has to be done from each CPU */
++	for_each_online_cpu(cpu)
++		smp_call_function_single(cpu, mvneta_percpu_unmask_interrupt,
++					 pp, true);
+ 	mvreg_write(pp, MVNETA_INTR_MISC_MASK,
+ 		    MVNETA_CAUSE_PHY_STATUS_CHANGE |
+ 		    MVNETA_CAUSE_LINK_CHANGE |
+@@ -2752,22 +2804,35 @@ static void mvneta_percpu_disable(void *
+ 
+ static void mvneta_percpu_elect(struct mvneta_port *pp)
+ {
+-	int online_cpu_idx, cpu, i = 0;
++	int online_cpu_idx, max_cpu, cpu, i = 0;
+ 
+ 	online_cpu_idx = pp->rxq_def % num_online_cpus();
++	max_cpu = num_present_cpus();
+ 
+ 	for_each_online_cpu(cpu) {
+-		if (i == online_cpu_idx)
+-			/* Enable per-CPU interrupt on the one CPU we
+-			 * just elected
++		int rxq_map = 0, txq_map = 0;
++		int rxq;
++
++		for (rxq = 0; rxq < rxq_number; rxq++)
++			if ((rxq % max_cpu) == cpu)
++				rxq_map |= MVNETA_CPU_RXQ_ACCESS(rxq);
++
++		if (i == online_cpu_idx) {
++			/* Map the default receive queue and transmit
++			 * queue to the elected CPU
+ 			 */
+-			smp_call_function_single(cpu, mvneta_percpu_enable,
+-						pp, true);
+-		else
+-			/* Disable per-CPU interrupt on all the other CPU */
+-			smp_call_function_single(cpu, mvneta_percpu_disable,
+-						pp, true);
++			rxq_map |= MVNETA_CPU_RXQ_ACCESS(pp->rxq_def);
++			txq_map = MVNETA_CPU_TXQ_ACCESS_ALL_MASK;
++		}
++		mvreg_write(pp, MVNETA_CPU_MAP(cpu), rxq_map | txq_map);
++
++		/* Update the interrupt mask on each CPU according the
++		 * new mapping
++		 */
++		smp_call_function_single(cpu, mvneta_percpu_unmask_interrupt,
++					 pp, true);
+ 		i++;
++
+ 	}
+ };
+ 
+@@ -2802,12 +2867,22 @@ static int mvneta_percpu_notifier(struct
+ 		mvreg_write(pp, MVNETA_INTR_MISC_MASK, 0);
+ 		napi_enable(&port->napi);
+ 
++
++		/* Enable per-CPU interrupts on the CPU that is
++		 * brought up.
++		 */
++		smp_call_function_single(cpu, mvneta_percpu_enable,
++					 pp, true);
++
+ 		/* Enable per-CPU interrupt on the one CPU we care
+ 		 * about.
+ 		 */
+ 		mvneta_percpu_elect(pp);
+ 
+-		/* Unmask all ethernet port interrupts */
++		/* Unmask all ethernet port interrupts, as this
++		 * notifier is called for each CPU then the CPU to
++		 * Queue mapping is applied
++		 */
+ 		mvreg_write(pp, MVNETA_INTR_NEW_MASK,
+ 			MVNETA_RX_INTR_MASK(rxq_number) |
+ 			MVNETA_TX_INTR_MASK(txq_number) |
+@@ -2858,7 +2933,7 @@ static int mvneta_percpu_notifier(struct
+ static int mvneta_open(struct net_device *dev)
+ {
+ 	struct mvneta_port *pp = netdev_priv(dev);
+-	int ret;
++	int ret, cpu;
+ 
+ 	pp->pkt_size = MVNETA_RX_PKT_SIZE(pp->dev->mtu);
+ 	pp->frag_size = SKB_DATA_ALIGN(MVNETA_RX_BUF_SIZE(pp->pkt_size)) +
+@@ -2888,8 +2963,13 @@ static int mvneta_open(struct net_device
+ 	 */
+ 	mvneta_percpu_disable(pp);
+ 
+-	/* Elect a CPU to handle our RX queue interrupt */
+-	mvneta_percpu_elect(pp);
++	/* Enable per-CPU interrupt on all the CPU to handle our RX
++	 * queue interrupts
++	 */
++	for_each_online_cpu(cpu)
++		smp_call_function_single(cpu, mvneta_percpu_enable,
++					 pp, true);
++
+ 
+ 	/* Register a CPU notifier to handle the case where our CPU
+ 	 * might be taken offline.
diff --git a/target/linux/mvebu/patches-4.4/034-net-mvneta-Add-naive-RSS-support.patch b/target/linux/mvebu/patches-4.4/034-net-mvneta-Add-naive-RSS-support.patch
new file mode 100644
index 0000000000..629c44c8a1
--- /dev/null
+++ b/target/linux/mvebu/patches-4.4/034-net-mvneta-Add-naive-RSS-support.patch
@@ -0,0 +1,191 @@
+From: Gregory CLEMENT <gregory.clement@free-electrons.com>
+Date: Wed, 9 Dec 2015 18:23:50 +0100
+Subject: [PATCH] net: mvneta: Add naive RSS support
+
+This patch adds the support for the RSS related ethtool
+function. Currently it only uses one entry in the indirection table which
+allows associating an mvneta interface to a given CPU.
+
+Signed-off-by: Gregory CLEMENT <gregory.clement@free-electrons.com>
+Tested-by: Marcin Wojtas <mw@semihalf.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+---
+
+--- a/drivers/net/ethernet/marvell/mvneta.c
++++ b/drivers/net/ethernet/marvell/mvneta.c
+@@ -261,6 +261,11 @@
+ 
+ #define MVNETA_TX_MTU_MAX		0x3ffff
+ 
++/* The RSS lookup table actually has 256 entries but we do not use
++ * them yet
++ */
++#define MVNETA_RSS_LU_TABLE_SIZE	1
++
+ /* TSO header size */
+ #define TSO_HEADER_SIZE 128
+ 
+@@ -382,6 +387,8 @@ struct mvneta_port {
+ 	unsigned int use_inband_status:1;
+ 
+ 	u64 ethtool_stats[ARRAY_SIZE(mvneta_statistics)];
++
++	u32 indir[MVNETA_RSS_LU_TABLE_SIZE];
+ };
+ 
+ /* The mvneta_tx_desc and mvneta_rx_desc structures describe the
+@@ -1067,7 +1074,7 @@ static void mvneta_defaults_set(struct m
+ 			if ((rxq % max_cpu) == cpu)
+ 				rxq_map |= MVNETA_CPU_RXQ_ACCESS(rxq);
+ 
+-		if (cpu == rxq_def)
++		if (cpu == pp->rxq_def)
+ 			txq_map = MVNETA_CPU_TXQ_ACCESS_ALL_MASK;
+ 
+ 		mvreg_write(pp, MVNETA_CPU_MAP(cpu), rxq_map | txq_map);
+@@ -2508,6 +2515,18 @@ static void mvneta_percpu_unmask_interru
+ 		    MVNETA_MISCINTR_INTR_MASK);
+ }
+ 
++static void mvneta_percpu_mask_interrupt(void *arg)
++{
++	struct mvneta_port *pp = arg;
++
++	/* All the queue are masked, but actually only the ones
++	 * maped to this CPU will be masked
++	 */
++	mvreg_write(pp, MVNETA_INTR_NEW_MASK, 0);
++	mvreg_write(pp, MVNETA_INTR_OLD_MASK, 0);
++	mvreg_write(pp, MVNETA_INTR_MISC_MASK, 0);
++}
++
+ static void mvneta_start_dev(struct mvneta_port *pp)
+ {
+ 	unsigned int cpu;
+@@ -3231,6 +3250,106 @@ static int mvneta_ethtool_get_sset_count
+ 	return -EOPNOTSUPP;
+ }
+ 
++static u32 mvneta_ethtool_get_rxfh_indir_size(struct net_device *dev)
++{
++	return MVNETA_RSS_LU_TABLE_SIZE;
++}
++
++static int mvneta_ethtool_get_rxnfc(struct net_device *dev,
++				    struct ethtool_rxnfc *info,
++				    u32 *rules __always_unused)
++{
++	switch (info->cmd) {
++	case ETHTOOL_GRXRINGS:
++		info->data =  rxq_number;
++		return 0;
++	case ETHTOOL_GRXFH:
++		return -EOPNOTSUPP;
++	default:
++		return -EOPNOTSUPP;
++	}
++}
++
++static int  mvneta_config_rss(struct mvneta_port *pp)
++{
++	int cpu;
++	u32 val;
++
++	netif_tx_stop_all_queues(pp->dev);
++
++	for_each_online_cpu(cpu)
++		smp_call_function_single(cpu, mvneta_percpu_mask_interrupt,
++					 pp, true);
++
++	/* We have to synchronise on the napi of each CPU */
++	for_each_online_cpu(cpu) {
++		struct mvneta_pcpu_port *pcpu_port =
++			per_cpu_ptr(pp->ports, cpu);
++
++		napi_synchronize(&pcpu_port->napi);
++		napi_disable(&pcpu_port->napi);
++	}
++
++	pp->rxq_def = pp->indir[0];
++
++	/* Update unicast mapping */
++	mvneta_set_rx_mode(pp->dev);
++
++	/* Update val of portCfg register accordingly with all RxQueue types */
++	val = MVNETA_PORT_CONFIG_DEFL_VALUE(pp->rxq_def);
++	mvreg_write(pp, MVNETA_PORT_CONFIG, val);
++
++	/* Update the elected CPU matching the new rxq_def */
++	mvneta_percpu_elect(pp);
++
++	/* We have to synchronise on the napi of each CPU */
++	for_each_online_cpu(cpu) {
++		struct mvneta_pcpu_port *pcpu_port =
++			per_cpu_ptr(pp->ports, cpu);
++
++		napi_enable(&pcpu_port->napi);
++	}
++
++	netif_tx_start_all_queues(pp->dev);
++
++	return 0;
++}
++
++static int mvneta_ethtool_set_rxfh(struct net_device *dev, const u32 *indir,
++				   const u8 *key, const u8 hfunc)
++{
++	struct mvneta_port *pp = netdev_priv(dev);
++	/* We require at least one supported parameter to be changed
++	 * and no change in any of the unsupported parameters
++	 */
++	if (key ||
++	    (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP))
++		return -EOPNOTSUPP;
++
++	if (!indir)
++		return 0;
++
++	memcpy(pp->indir, indir, MVNETA_RSS_LU_TABLE_SIZE);
++
++	return mvneta_config_rss(pp);
++}
++
++static int mvneta_ethtool_get_rxfh(struct net_device *dev, u32 *indir, u8 *key,
++				   u8 *hfunc)
++{
++	struct mvneta_port *pp = netdev_priv(dev);
++
++	if (hfunc)
++		*hfunc = ETH_RSS_HASH_TOP;
++
++	if (!indir)
++		return 0;
++
++	memcpy(indir, pp->indir, MVNETA_RSS_LU_TABLE_SIZE);
++
++	return 0;
++}
++
+ static const struct net_device_ops mvneta_netdev_ops = {
+ 	.ndo_open            = mvneta_open,
+ 	.ndo_stop            = mvneta_stop,
+@@ -3255,6 +3374,10 @@ const struct ethtool_ops mvneta_eth_tool
+ 	.get_strings	= mvneta_ethtool_get_strings,
+ 	.get_ethtool_stats = mvneta_ethtool_get_stats,
+ 	.get_sset_count	= mvneta_ethtool_get_sset_count,
++	.get_rxfh_indir_size = mvneta_ethtool_get_rxfh_indir_size,
++	.get_rxnfc	= mvneta_ethtool_get_rxnfc,
++	.get_rxfh	= mvneta_ethtool_get_rxfh,
++	.set_rxfh	= mvneta_ethtool_set_rxfh,
+ };
+ 
+ /* Initialize hw */
+@@ -3446,6 +3569,8 @@ static int mvneta_probe(struct platform_
+ 
+ 	pp->rxq_def = rxq_def;
+ 
++	pp->indir[0] = rxq_def;
++
+ 	pp->clk = devm_clk_get(&pdev->dev, NULL);
+ 	if (IS_ERR(pp->clk)) {
+ 		err = PTR_ERR(pp->clk);
diff --git a/target/linux/mvebu/patches-4.4/035-net-mvneta-Configure-XPS-support.patch b/target/linux/mvebu/patches-4.4/035-net-mvneta-Configure-XPS-support.patch
new file mode 100644
index 0000000000..5b35b6442b
--- /dev/null
+++ b/target/linux/mvebu/patches-4.4/035-net-mvneta-Configure-XPS-support.patch
@@ -0,0 +1,124 @@
+From: Gregory CLEMENT <gregory.clement@free-electrons.com>
+Date: Wed, 9 Dec 2015 18:23:51 +0100
+Subject: [PATCH] net: mvneta: Configure XPS support
+
+With this patch each CPU is associated with its own set of TX queues.
+
+It also setup the XPS with an initial configuration which set the
+affinity matching the hardware configuration.
+
+Suggested-by: Arnd Bergmann <arnd@arndb.de>
+Signed-off-by: Gregory CLEMENT <gregory.clement@free-electrons.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+---
+
+--- a/drivers/net/ethernet/marvell/mvneta.c
++++ b/drivers/net/ethernet/marvell/mvneta.c
+@@ -111,6 +111,7 @@
+ #define      MVNETA_CPU_RXQ_ACCESS_ALL_MASK      0x000000ff
+ #define      MVNETA_CPU_TXQ_ACCESS_ALL_MASK      0x0000ff00
+ #define      MVNETA_CPU_RXQ_ACCESS(rxq)		 BIT(rxq)
++#define      MVNETA_CPU_TXQ_ACCESS(txq)		 BIT(txq + 8)
+ #define MVNETA_RXQ_TIME_COAL_REG(q)              (0x2580 + ((q) << 2))
+ 
+ /* Exception Interrupt Port/Queue Cause register
+@@ -514,6 +515,9 @@ struct mvneta_tx_queue {
+ 
+ 	/* DMA address of TSO headers */
+ 	dma_addr_t tso_hdrs_phys;
++
++	/* Affinity mask for CPUs*/
++	cpumask_t affinity_mask;
+ };
+ 
+ struct mvneta_rx_queue {
+@@ -1062,20 +1066,30 @@ static void mvneta_defaults_set(struct m
+ 	/* Enable MBUS Retry bit16 */
+ 	mvreg_write(pp, MVNETA_MBUS_RETRY, 0x20);
+ 
+-	/* Set CPU queue access map. CPUs are assigned to the RX
+-	 * queues modulo their number and all the TX queues are
+-	 * assigned to the CPU associated to the default RX queue.
++	/* Set CPU queue access map. CPUs are assigned to the RX and
++	 * TX queues modulo their number. If there is only one TX
++	 * queue then it is assigned to the CPU associated to the
++	 * default RX queue.
+ 	 */
+ 	for_each_present_cpu(cpu) {
+ 		int rxq_map = 0, txq_map = 0;
+-		int rxq;
++		int rxq, txq;
+ 
+ 		for (rxq = 0; rxq < rxq_number; rxq++)
+ 			if ((rxq % max_cpu) == cpu)
+ 				rxq_map |= MVNETA_CPU_RXQ_ACCESS(rxq);
+ 
+-		if (cpu == pp->rxq_def)
+-			txq_map = MVNETA_CPU_TXQ_ACCESS_ALL_MASK;
++		for (txq = 0; txq < txq_number; txq++)
++			if ((txq % max_cpu) == cpu)
++				txq_map |= MVNETA_CPU_TXQ_ACCESS(txq);
++
++		/* With only one TX queue we configure a special case
++		 * which will allow to get all the irq on a single
++		 * CPU
++		 */
++		if (txq_number == 1)
++			txq_map = (cpu == pp->rxq_def) ?
++				MVNETA_CPU_TXQ_ACCESS(1) : 0;
+ 
+ 		mvreg_write(pp, MVNETA_CPU_MAP(cpu), rxq_map | txq_map);
+ 	}
+@@ -2362,6 +2376,8 @@ static void mvneta_rxq_deinit(struct mvn
+ static int mvneta_txq_init(struct mvneta_port *pp,
+ 			   struct mvneta_tx_queue *txq)
+ {
++	int cpu;
++
+ 	txq->size = pp->tx_ring_size;
+ 
+ 	/* A queue must always have room for at least one skb.
+@@ -2414,6 +2430,14 @@ static int mvneta_txq_init(struct mvneta
+ 	}
+ 	mvneta_tx_done_pkts_coal_set(pp, txq, txq->done_pkts_coal);
+ 
++	/* Setup XPS mapping */
++	if (txq_number > 1)
++		cpu = txq->id % num_present_cpus();
++	else
++		cpu = pp->rxq_def % num_present_cpus();
++	cpumask_set_cpu(cpu, &txq->affinity_mask);
++	netif_set_xps_queue(pp->dev, &txq->affinity_mask, txq->id);
++
+ 	return 0;
+ }
+ 
+@@ -2836,13 +2860,23 @@ static void mvneta_percpu_elect(struct m
+ 			if ((rxq % max_cpu) == cpu)
+ 				rxq_map |= MVNETA_CPU_RXQ_ACCESS(rxq);
+ 
+-		if (i == online_cpu_idx) {
+-			/* Map the default receive queue and transmit
+-			 * queue to the elected CPU
++		if (i == online_cpu_idx)
++			/* Map the default receive queue queue to the
++			 * elected CPU
+ 			 */
+ 			rxq_map |= MVNETA_CPU_RXQ_ACCESS(pp->rxq_def);
+-			txq_map = MVNETA_CPU_TXQ_ACCESS_ALL_MASK;
+-		}
++
++		/* We update the TX queue map only if we have one
++		 * queue. In this case we associate the TX queue to
++		 * the CPU bound to the default RX queue
++		 */
++		if (txq_number == 1)
++			txq_map = (i == online_cpu_idx) ?
++				MVNETA_CPU_TXQ_ACCESS(1) : 0;
++		else
++			txq_map = mvreg_read(pp, MVNETA_CPU_MAP(cpu)) &
++				MVNETA_CPU_TXQ_ACCESS_ALL_MASK;
++
+ 		mvreg_write(pp, MVNETA_CPU_MAP(cpu), rxq_map | txq_map);
+ 
+ 		/* Update the interrupt mask on each CPU according the
diff --git a/target/linux/mvebu/patches-4.4/036-net-mvneta-fix-trivial-cut-off-issue-in-mvneta_ethto.patch b/target/linux/mvebu/patches-4.4/036-net-mvneta-fix-trivial-cut-off-issue-in-mvneta_ethto.patch
new file mode 100644
index 0000000000..2bd7a88032
--- /dev/null
+++ b/target/linux/mvebu/patches-4.4/036-net-mvneta-fix-trivial-cut-off-issue-in-mvneta_ethto.patch
@@ -0,0 +1,46 @@
+From: Jisheng Zhang <jszhang@marvell.com>
+Date: Wed, 20 Jan 2016 16:36:25 +0800
+Subject: [PATCH] net: mvneta: fix trivial cut-off issue in
+ mvneta_ethtool_update_stats
+
+When s->type is T_REG_64, the high 32bits are lost in val. This patch
+fixes this trivial issue.
+
+Signed-off-by: Jisheng Zhang <jszhang@marvell.com>
+Fixes: 9b0cdefa4cd5 ("net: mvneta: add ethtool statistics")
+Signed-off-by: David S. Miller <davem@davemloft.net>
+---
+
+--- a/drivers/net/ethernet/marvell/mvneta.c
++++ b/drivers/net/ethernet/marvell/mvneta.c
+@@ -3242,26 +3242,25 @@ static void mvneta_ethtool_update_stats(
+ 	const struct mvneta_statistic *s;
+ 	void __iomem *base = pp->base;
+ 	u32 high, low, val;
++	u64 val64;
+ 	int i;
+ 
+ 	for (i = 0, s = mvneta_statistics;
+ 	     s < mvneta_statistics + ARRAY_SIZE(mvneta_statistics);
+ 	     s++, i++) {
+-		val = 0;
+-
+ 		switch (s->type) {
+ 		case T_REG_32:
+ 			val = readl_relaxed(base + s->offset);
++			pp->ethtool_stats[i] += val;
+ 			break;
+ 		case T_REG_64:
+ 			/* Docs say to read low 32-bit then high */
+ 			low = readl_relaxed(base + s->offset);
+ 			high = readl_relaxed(base + s->offset + 4);
+-			val = (u64)high << 32 | low;
++			val64 = (u64)high << 32 | low;
++			pp->ethtool_stats[i] += val64;
+ 			break;
+ 		}
+-
+-		pp->ethtool_stats[i] += val;
+ 	}
+ }
+ 
diff --git a/target/linux/mvebu/patches-4.4/037-net-mvneta-Fix-for_each_present_cpu-usage.patch b/target/linux/mvebu/patches-4.4/037-net-mvneta-Fix-for_each_present_cpu-usage.patch
new file mode 100644
index 0000000000..9ded5b71fe
--- /dev/null
+++ b/target/linux/mvebu/patches-4.4/037-net-mvneta-Fix-for_each_present_cpu-usage.patch
@@ -0,0 +1,55 @@
+From: Gregory CLEMENT <gregory.clement@free-electrons.com>
+Date: Thu, 4 Feb 2016 22:09:23 +0100
+Subject: [PATCH] net: mvneta: Fix for_each_present_cpu usage
+
+This patch convert the for_each_present in on_each_cpu, instead of
+applying on the present cpus it will be applied only on the online cpus.
+This fix a bug reported on
+http://thread.gmane.org/gmane.linux.ports.arm.kernel/468173.
+
+Using the macro on_each_cpu (instead of a for_each_* loop) also ensures
+that all the calls will be done all at once.
+
+Fixes: f86428854480 ("net: mvneta: Statically assign queues to CPUs")
+Reported-by: Stefan Roese <stefan.roese@gmail.com>
+Suggested-by: Jisheng Zhang <jszhang@marvell.com>
+Suggested-by: Russell King <rmk+kernel@arm.linux.org.uk>
+Signed-off-by: Gregory CLEMENT <gregory.clement@free-electrons.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+---
+
+--- a/drivers/net/ethernet/marvell/mvneta.c
++++ b/drivers/net/ethernet/marvell/mvneta.c
+@@ -2562,7 +2562,7 @@ static void mvneta_start_dev(struct mvne
+ 	mvneta_port_enable(pp);
+ 
+ 	/* Enable polling on the port */
+-	for_each_present_cpu(cpu) {
++	for_each_online_cpu(cpu) {
+ 		struct mvneta_pcpu_port *port = per_cpu_ptr(pp->ports, cpu);
+ 
+ 		napi_enable(&port->napi);
+@@ -2587,7 +2587,7 @@ static void mvneta_stop_dev(struct mvnet
+ 
+ 	phy_stop(pp->phy_dev);
+ 
+-	for_each_present_cpu(cpu) {
++	for_each_online_cpu(cpu) {
+ 		struct mvneta_pcpu_port *port = per_cpu_ptr(pp->ports, cpu);
+ 
+ 		napi_disable(&port->napi);
+@@ -3055,13 +3055,11 @@ err_cleanup_rxqs:
+ static int mvneta_stop(struct net_device *dev)
+ {
+ 	struct mvneta_port *pp = netdev_priv(dev);
+-	int cpu;
+ 
+ 	mvneta_stop_dev(pp);
+ 	mvneta_mdio_remove(pp);
+ 	unregister_cpu_notifier(&pp->cpu_notifier);
+-	for_each_present_cpu(cpu)
+-		smp_call_function_single(cpu, mvneta_percpu_disable, pp, true);
++	on_each_cpu(mvneta_percpu_disable, pp, true);
+ 	free_percpu_irq(dev->irq, pp->ports);
+ 	mvneta_cleanup_rxqs(pp);
+ 	mvneta_cleanup_txqs(pp);
diff --git a/target/linux/mvebu/patches-4.4/038-net-mvneta-Fix-the-CPU-choice-in-mvneta_percpu_elect.patch b/target/linux/mvebu/patches-4.4/038-net-mvneta-Fix-the-CPU-choice-in-mvneta_percpu_elect.patch
new file mode 100644
index 0000000000..4fc9ccc76a
--- /dev/null
+++ b/target/linux/mvebu/patches-4.4/038-net-mvneta-Fix-the-CPU-choice-in-mvneta_percpu_elect.patch
@@ -0,0 +1,57 @@
+From: Gregory CLEMENT <gregory.clement@free-electrons.com>
+Date: Thu, 4 Feb 2016 22:09:24 +0100
+Subject: [PATCH] net: mvneta: Fix the CPU choice in mvneta_percpu_elect
+
+When passing to the management of multiple RX queue, the
+mvneta_percpu_elect function was broken. The use of the modulo can lead
+to elect the wrong cpu. For example with rxq_def=2, if the CPU 2 goes
+offline and then online, we ended with the third RX queue activated in
+the same time on CPU 0 and CPU2, which lead to a kernel crash.
+
+With this fix, we don't try to get "the closer" CPU if the default CPU is
+gone, now we just use CPU 0 which always be there. Thanks to this, the
+code becomes more readable, easier to maintain and more predicable.
+
+Cc: stable@vger.kernel.org
+Fixes: 2dcf75e2793c ("net: mvneta: Associate RX queues with each CPU")
+Signed-off-by: Gregory CLEMENT <gregory.clement@free-electrons.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+---
+
+--- a/drivers/net/ethernet/marvell/mvneta.c
++++ b/drivers/net/ethernet/marvell/mvneta.c
+@@ -2847,9 +2847,14 @@ static void mvneta_percpu_disable(void *
+ 
+ static void mvneta_percpu_elect(struct mvneta_port *pp)
+ {
+-	int online_cpu_idx, max_cpu, cpu, i = 0;
++	int elected_cpu = 0, max_cpu, cpu, i = 0;
++
++	/* Use the cpu associated to the rxq when it is online, in all
++	 * the other cases, use the cpu 0 which can't be offline.
++	 */
++	if (cpu_online(pp->rxq_def))
++		elected_cpu = pp->rxq_def;
+ 
+-	online_cpu_idx = pp->rxq_def % num_online_cpus();
+ 	max_cpu = num_present_cpus();
+ 
+ 	for_each_online_cpu(cpu) {
+@@ -2860,7 +2865,7 @@ static void mvneta_percpu_elect(struct m
+ 			if ((rxq % max_cpu) == cpu)
+ 				rxq_map |= MVNETA_CPU_RXQ_ACCESS(rxq);
+ 
+-		if (i == online_cpu_idx)
++		if (cpu == elected_cpu)
+ 			/* Map the default receive queue queue to the
+ 			 * elected CPU
+ 			 */
+@@ -2871,7 +2876,7 @@ static void mvneta_percpu_elect(struct m
+ 		 * the CPU bound to the default RX queue
+ 		 */
+ 		if (txq_number == 1)
+-			txq_map = (i == online_cpu_idx) ?
++			txq_map = (cpu == elected_cpu) ?
+ 				MVNETA_CPU_TXQ_ACCESS(1) : 0;
+ 		else
+ 			txq_map = mvreg_read(pp, MVNETA_CPU_MAP(cpu)) &
diff --git a/target/linux/mvebu/patches-4.4/039-net-mvneta-Use-on_each_cpu-when-possible.patch b/target/linux/mvebu/patches-4.4/039-net-mvneta-Use-on_each_cpu-when-possible.patch
new file mode 100644
index 0000000000..76257a2a71
--- /dev/null
+++ b/target/linux/mvebu/patches-4.4/039-net-mvneta-Use-on_each_cpu-when-possible.patch
@@ -0,0 +1,68 @@
+From: Gregory CLEMENT <gregory.clement@free-electrons.com>
+Date: Thu, 4 Feb 2016 22:09:25 +0100
+Subject: [PATCH] net: mvneta: Use on_each_cpu when possible
+
+Instead of using a for_each_* loop in which we just call the
+smp_call_function_single macro, it is more simple to directly use the
+on_each_cpu macro. Moreover, this macro ensures that the calls will be
+done all at once.
+
+Suggested-by: Russell King <rmk+kernel@arm.linux.org.uk>
+Signed-off-by: Gregory CLEMENT <gregory.clement@free-electrons.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+---
+
+--- a/drivers/net/ethernet/marvell/mvneta.c
++++ b/drivers/net/ethernet/marvell/mvneta.c
+@@ -2553,7 +2553,7 @@ static void mvneta_percpu_mask_interrupt
+ 
+ static void mvneta_start_dev(struct mvneta_port *pp)
+ {
+-	unsigned int cpu;
++	int cpu;
+ 
+ 	mvneta_max_rx_size_set(pp, pp->pkt_size);
+ 	mvneta_txq_max_tx_size_set(pp, pp->pkt_size);
+@@ -2569,9 +2569,8 @@ static void mvneta_start_dev(struct mvne
+ 	}
+ 
+ 	/* Unmask interrupts. It has to be done from each CPU */
+-	for_each_online_cpu(cpu)
+-		smp_call_function_single(cpu, mvneta_percpu_unmask_interrupt,
+-					 pp, true);
++	on_each_cpu(mvneta_percpu_unmask_interrupt, pp, true);
++
+ 	mvreg_write(pp, MVNETA_INTR_MISC_MASK,
+ 		    MVNETA_CAUSE_PHY_STATUS_CHANGE |
+ 		    MVNETA_CAUSE_LINK_CHANGE |
+@@ -2991,7 +2990,7 @@ static int mvneta_percpu_notifier(struct
+ static int mvneta_open(struct net_device *dev)
+ {
+ 	struct mvneta_port *pp = netdev_priv(dev);
+-	int ret, cpu;
++	int ret;
+ 
+ 	pp->pkt_size = MVNETA_RX_PKT_SIZE(pp->dev->mtu);
+ 	pp->frag_size = SKB_DATA_ALIGN(MVNETA_RX_BUF_SIZE(pp->pkt_size)) +
+@@ -3024,9 +3023,7 @@ static int mvneta_open(struct net_device
+ 	/* Enable per-CPU interrupt on all the CPU to handle our RX
+ 	 * queue interrupts
+ 	 */
+-	for_each_online_cpu(cpu)
+-		smp_call_function_single(cpu, mvneta_percpu_enable,
+-					 pp, true);
++	on_each_cpu(mvneta_percpu_enable, pp, true);
+ 
+ 
+ 	/* Register a CPU notifier to handle the case where our CPU
+@@ -3313,9 +3310,7 @@ static int  mvneta_config_rss(struct mvn
+ 
+ 	netif_tx_stop_all_queues(pp->dev);
+ 
+-	for_each_online_cpu(cpu)
+-		smp_call_function_single(cpu, mvneta_percpu_mask_interrupt,
+-					 pp, true);
++	on_each_cpu(mvneta_percpu_mask_interrupt, pp, true);
+ 
+ 	/* We have to synchronise on the napi of each CPU */
+ 	for_each_online_cpu(cpu) {
diff --git a/target/linux/mvebu/patches-4.4/040-net-mvneta-Modify-the-queue-related-fields-from-each.patch b/target/linux/mvebu/patches-4.4/040-net-mvneta-Modify-the-queue-related-fields-from-each.patch
new file mode 100644
index 0000000000..b025777df9
--- /dev/null
+++ b/target/linux/mvebu/patches-4.4/040-net-mvneta-Modify-the-queue-related-fields-from-each.patch
@@ -0,0 +1,179 @@
+From: Gregory CLEMENT <gregory.clement@free-electrons.com>
+Date: Thu, 4 Feb 2016 22:09:27 +0100
+Subject: [PATCH] net: mvneta: Modify the queue related fields from each cpu
+
+In the MVNETA_INTR_* registers, the queues related fields are per cpu,
+according to the datasheet (comment in [] are added by me):
+"In a multi-CPU system, bits of RX[or TX] queues for which the access by
+the reading[or writing] CPU is disabled are read as 0, and cannot be
+cleared[or written]."
+
+That means that each time we want to manipulate these bits we had to do
+it on each cpu and not only on the current cpu.
+
+Signed-off-by: Gregory CLEMENT <gregory.clement@free-electrons.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+---
+
+--- a/drivers/net/ethernet/marvell/mvneta.c
++++ b/drivers/net/ethernet/marvell/mvneta.c
+@@ -1036,6 +1036,43 @@ static void mvneta_set_autoneg(struct mv
+ 	}
+ }
+ 
++static void mvneta_percpu_unmask_interrupt(void *arg)
++{
++	struct mvneta_port *pp = arg;
++
++	/* All the queue are unmasked, but actually only the ones
++	 * mapped to this CPU will be unmasked
++	 */
++	mvreg_write(pp, MVNETA_INTR_NEW_MASK,
++		    MVNETA_RX_INTR_MASK_ALL |
++		    MVNETA_TX_INTR_MASK_ALL |
++		    MVNETA_MISCINTR_INTR_MASK);
++}
++
++static void mvneta_percpu_mask_interrupt(void *arg)
++{
++	struct mvneta_port *pp = arg;
++
++	/* All the queue are masked, but actually only the ones
++	 * mapped to this CPU will be masked
++	 */
++	mvreg_write(pp, MVNETA_INTR_NEW_MASK, 0);
++	mvreg_write(pp, MVNETA_INTR_OLD_MASK, 0);
++	mvreg_write(pp, MVNETA_INTR_MISC_MASK, 0);
++}
++
++static void mvneta_percpu_clear_intr_cause(void *arg)
++{
++	struct mvneta_port *pp = arg;
++
++	/* All the queue are cleared, but actually only the ones
++	 * mapped to this CPU will be cleared
++	 */
++	mvreg_write(pp, MVNETA_INTR_NEW_CAUSE, 0);
++	mvreg_write(pp, MVNETA_INTR_MISC_CAUSE, 0);
++	mvreg_write(pp, MVNETA_INTR_OLD_CAUSE, 0);
++}
++
+ /* This method sets defaults to the NETA port:
+  *	Clears interrupt Cause and Mask registers.
+  *	Clears all MAC tables.
+@@ -1053,14 +1090,10 @@ static void mvneta_defaults_set(struct m
+ 	int max_cpu = num_present_cpus();
+ 
+ 	/* Clear all Cause registers */
+-	mvreg_write(pp, MVNETA_INTR_NEW_CAUSE, 0);
+-	mvreg_write(pp, MVNETA_INTR_OLD_CAUSE, 0);
+-	mvreg_write(pp, MVNETA_INTR_MISC_CAUSE, 0);
++	on_each_cpu(mvneta_percpu_clear_intr_cause, pp, true);
+ 
+ 	/* Mask all interrupts */
+-	mvreg_write(pp, MVNETA_INTR_NEW_MASK, 0);
+-	mvreg_write(pp, MVNETA_INTR_OLD_MASK, 0);
+-	mvreg_write(pp, MVNETA_INTR_MISC_MASK, 0);
++	on_each_cpu(mvneta_percpu_mask_interrupt, pp, true);
+ 	mvreg_write(pp, MVNETA_INTR_ENABLE, 0);
+ 
+ 	/* Enable MBUS Retry bit16 */
+@@ -2526,31 +2559,6 @@ static int mvneta_setup_txqs(struct mvne
+ 	return 0;
+ }
+ 
+-static void mvneta_percpu_unmask_interrupt(void *arg)
+-{
+-	struct mvneta_port *pp = arg;
+-
+-	/* All the queue are unmasked, but actually only the ones
+-	 * maped to this CPU will be unmasked
+-	 */
+-	mvreg_write(pp, MVNETA_INTR_NEW_MASK,
+-		    MVNETA_RX_INTR_MASK_ALL |
+-		    MVNETA_TX_INTR_MASK_ALL |
+-		    MVNETA_MISCINTR_INTR_MASK);
+-}
+-
+-static void mvneta_percpu_mask_interrupt(void *arg)
+-{
+-	struct mvneta_port *pp = arg;
+-
+-	/* All the queue are masked, but actually only the ones
+-	 * maped to this CPU will be masked
+-	 */
+-	mvreg_write(pp, MVNETA_INTR_NEW_MASK, 0);
+-	mvreg_write(pp, MVNETA_INTR_OLD_MASK, 0);
+-	mvreg_write(pp, MVNETA_INTR_MISC_MASK, 0);
+-}
+-
+ static void mvneta_start_dev(struct mvneta_port *pp)
+ {
+ 	int cpu;
+@@ -2601,13 +2609,10 @@ static void mvneta_stop_dev(struct mvnet
+ 	mvneta_port_disable(pp);
+ 
+ 	/* Clear all ethernet port interrupts */
+-	mvreg_write(pp, MVNETA_INTR_MISC_CAUSE, 0);
+-	mvreg_write(pp, MVNETA_INTR_OLD_CAUSE, 0);
++	on_each_cpu(mvneta_percpu_clear_intr_cause, pp, true);
+ 
+ 	/* Mask all ethernet port interrupts */
+-	mvreg_write(pp, MVNETA_INTR_NEW_MASK, 0);
+-	mvreg_write(pp, MVNETA_INTR_OLD_MASK, 0);
+-	mvreg_write(pp, MVNETA_INTR_MISC_MASK, 0);
++	on_each_cpu(mvneta_percpu_mask_interrupt, pp, true);
+ 
+ 	mvneta_tx_reset(pp);
+ 	mvneta_rx_reset(pp);
+@@ -2919,9 +2924,7 @@ static int mvneta_percpu_notifier(struct
+ 		}
+ 
+ 		/* Mask all ethernet port interrupts */
+-		mvreg_write(pp, MVNETA_INTR_NEW_MASK, 0);
+-		mvreg_write(pp, MVNETA_INTR_OLD_MASK, 0);
+-		mvreg_write(pp, MVNETA_INTR_MISC_MASK, 0);
++		on_each_cpu(mvneta_percpu_mask_interrupt, pp, true);
+ 		napi_enable(&port->napi);
+ 
+ 
+@@ -2936,14 +2939,8 @@ static int mvneta_percpu_notifier(struct
+ 		 */
+ 		mvneta_percpu_elect(pp);
+ 
+-		/* Unmask all ethernet port interrupts, as this
+-		 * notifier is called for each CPU then the CPU to
+-		 * Queue mapping is applied
+-		 */
+-		mvreg_write(pp, MVNETA_INTR_NEW_MASK,
+-			MVNETA_RX_INTR_MASK(rxq_number) |
+-			MVNETA_TX_INTR_MASK(txq_number) |
+-			MVNETA_MISCINTR_INTR_MASK);
++		/* Unmask all ethernet port interrupts */
++		on_each_cpu(mvneta_percpu_unmask_interrupt, pp, true);
+ 		mvreg_write(pp, MVNETA_INTR_MISC_MASK,
+ 			MVNETA_CAUSE_PHY_STATUS_CHANGE |
+ 			MVNETA_CAUSE_LINK_CHANGE |
+@@ -2954,9 +2951,7 @@ static int mvneta_percpu_notifier(struct
+ 	case CPU_DOWN_PREPARE_FROZEN:
+ 		netif_tx_stop_all_queues(pp->dev);
+ 		/* Mask all ethernet port interrupts */
+-		mvreg_write(pp, MVNETA_INTR_NEW_MASK, 0);
+-		mvreg_write(pp, MVNETA_INTR_OLD_MASK, 0);
+-		mvreg_write(pp, MVNETA_INTR_MISC_MASK, 0);
++		on_each_cpu(mvneta_percpu_mask_interrupt, pp, true);
+ 
+ 		napi_synchronize(&port->napi);
+ 		napi_disable(&port->napi);
+@@ -2972,10 +2967,7 @@ static int mvneta_percpu_notifier(struct
+ 		/* Check if a new CPU must be elected now this on is down */
+ 		mvneta_percpu_elect(pp);
+ 		/* Unmask all ethernet port interrupts */
+-		mvreg_write(pp, MVNETA_INTR_NEW_MASK,
+-			MVNETA_RX_INTR_MASK(rxq_number) |
+-			MVNETA_TX_INTR_MASK(txq_number) |
+-			MVNETA_MISCINTR_INTR_MASK);
++		on_each_cpu(mvneta_percpu_unmask_interrupt, pp, true);
+ 		mvreg_write(pp, MVNETA_INTR_MISC_MASK,
+ 			MVNETA_CAUSE_PHY_STATUS_CHANGE |
+ 			MVNETA_CAUSE_LINK_CHANGE |
diff --git a/target/linux/mvebu/patches-4.4/041-net-mvneta-The-mvneta_percpu_elect-function-should-b.patch b/target/linux/mvebu/patches-4.4/041-net-mvneta-The-mvneta_percpu_elect-function-should-b.patch
new file mode 100644
index 0000000000..a77bef44e4
--- /dev/null
+++ b/target/linux/mvebu/patches-4.4/041-net-mvneta-The-mvneta_percpu_elect-function-should-b.patch
@@ -0,0 +1,68 @@
+From: Gregory CLEMENT <gregory.clement@free-electrons.com>
+Date: Thu, 4 Feb 2016 22:09:28 +0100
+Subject: [PATCH] net: mvneta: The mvneta_percpu_elect function should be
+ atomic
+
+Electing a CPU must be done in an atomic way: it should be done after or
+before the removal/insertion of a CPU and this function is not reentrant.
+
+During the loop of mvneta_percpu_elect we associates the queues to the
+CPUs, if there is a topology change during this loop, then the mapping
+between the CPUs and the queues could be wrong. During this loop the
+interrupt mask is also updating for each CPUs, It should not be changed
+in the same time by other part of the driver.
+
+This patch adds spinlock to create the needed critical sections.
+
+Signed-off-by: Gregory CLEMENT <gregory.clement@free-electrons.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+---
+
+--- a/drivers/net/ethernet/marvell/mvneta.c
++++ b/drivers/net/ethernet/marvell/mvneta.c
+@@ -370,6 +370,10 @@ struct mvneta_port {
+ 	struct net_device *dev;
+ 	struct notifier_block cpu_notifier;
+ 	int rxq_def;
++	/* Protect the access to the percpu interrupt registers,
++	 * ensuring that the configuration remains coherent.
++	 */
++	spinlock_t lock;
+ 
+ 	/* Core clock */
+ 	struct clk *clk;
+@@ -2853,6 +2857,12 @@ static void mvneta_percpu_elect(struct m
+ {
+ 	int elected_cpu = 0, max_cpu, cpu, i = 0;
+ 
++	/* Electing a CPU must be done in an atomic way: it should be
++	 * done after or before the removal/insertion of a CPU and
++	 * this function is not reentrant.
++	 */
++	spin_lock(&pp->lock);
++
+ 	/* Use the cpu associated to the rxq when it is online, in all
+ 	 * the other cases, use the cpu 0 which can't be offline.
+ 	 */
+@@ -2896,6 +2906,7 @@ static void mvneta_percpu_elect(struct m
+ 		i++;
+ 
+ 	}
++	spin_unlock(&pp->lock);
+ };
+ 
+ static int mvneta_percpu_notifier(struct notifier_block *nfb,
+@@ -2950,8 +2961,13 @@ static int mvneta_percpu_notifier(struct
+ 	case CPU_DOWN_PREPARE:
+ 	case CPU_DOWN_PREPARE_FROZEN:
+ 		netif_tx_stop_all_queues(pp->dev);
++		/* Thanks to this lock we are sure that any pending
++		 * cpu election is done
++		 */
++		spin_lock(&pp->lock);
+ 		/* Mask all ethernet port interrupts */
+ 		on_each_cpu(mvneta_percpu_mask_interrupt, pp, true);
++		spin_unlock(&pp->lock);
+ 
+ 		napi_synchronize(&port->napi);
+ 		napi_disable(&port->napi);
diff --git a/target/linux/mvebu/patches-4.4/042-net-mvneta-Fix-race-condition-during-stopping.patch b/target/linux/mvebu/patches-4.4/042-net-mvneta-Fix-race-condition-during-stopping.patch
new file mode 100644
index 0000000000..9936ebf320
--- /dev/null
+++ b/target/linux/mvebu/patches-4.4/042-net-mvneta-Fix-race-condition-during-stopping.patch
@@ -0,0 +1,128 @@
+From: Gregory CLEMENT <gregory.clement@free-electrons.com>
+Date: Thu, 4 Feb 2016 22:09:29 +0100
+Subject: [PATCH] net: mvneta: Fix race condition during stopping
+
+When stopping the port, the CPU notifier are still there whereas the
+mvneta_stop_dev function calls mvneta_percpu_disable() on each CPUs.
+It was possible to have a new CPU coming at this point which could be
+racy.
+
+This patch adds a flag preventing executing the code notifier for a new
+CPU when the port is stopping. It also uses the spinlock introduces
+previously. To avoid the deadlock, the lock has been moved outside the
+mvneta_percpu_elect function.
+
+Signed-off-by: Gregory CLEMENT <gregory.clement@free-electrons.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+---
+
+--- a/drivers/net/ethernet/marvell/mvneta.c
++++ b/drivers/net/ethernet/marvell/mvneta.c
+@@ -374,6 +374,7 @@ struct mvneta_port {
+ 	 * ensuring that the configuration remains coherent.
+ 	 */
+ 	spinlock_t lock;
++	bool is_stopped;
+ 
+ 	/* Core clock */
+ 	struct clk *clk;
+@@ -2853,16 +2854,14 @@ static void mvneta_percpu_disable(void *
+ 	disable_percpu_irq(pp->dev->irq);
+ }
+ 
++/* Electing a CPU must be done in an atomic way: it should be done
++ * after or before the removal/insertion of a CPU and this function is
++ * not reentrant.
++ */
+ static void mvneta_percpu_elect(struct mvneta_port *pp)
+ {
+ 	int elected_cpu = 0, max_cpu, cpu, i = 0;
+ 
+-	/* Electing a CPU must be done in an atomic way: it should be
+-	 * done after or before the removal/insertion of a CPU and
+-	 * this function is not reentrant.
+-	 */
+-	spin_lock(&pp->lock);
+-
+ 	/* Use the cpu associated to the rxq when it is online, in all
+ 	 * the other cases, use the cpu 0 which can't be offline.
+ 	 */
+@@ -2906,7 +2905,6 @@ static void mvneta_percpu_elect(struct m
+ 		i++;
+ 
+ 	}
+-	spin_unlock(&pp->lock);
+ };
+ 
+ static int mvneta_percpu_notifier(struct notifier_block *nfb,
+@@ -2920,6 +2918,14 @@ static int mvneta_percpu_notifier(struct
+ 	switch (action) {
+ 	case CPU_ONLINE:
+ 	case CPU_ONLINE_FROZEN:
++		spin_lock(&pp->lock);
++		/* Configuring the driver for a new CPU while the
++		 * driver is stopping is racy, so just avoid it.
++		 */
++		if (pp->is_stopped) {
++			spin_unlock(&pp->lock);
++			break;
++		}
+ 		netif_tx_stop_all_queues(pp->dev);
+ 
+ 		/* We have to synchronise on tha napi of each CPU
+@@ -2957,6 +2963,7 @@ static int mvneta_percpu_notifier(struct
+ 			MVNETA_CAUSE_LINK_CHANGE |
+ 			MVNETA_CAUSE_PSC_SYNC_CHANGE);
+ 		netif_tx_start_all_queues(pp->dev);
++		spin_unlock(&pp->lock);
+ 		break;
+ 	case CPU_DOWN_PREPARE:
+ 	case CPU_DOWN_PREPARE_FROZEN:
+@@ -2981,7 +2988,9 @@ static int mvneta_percpu_notifier(struct
+ 	case CPU_DEAD:
+ 	case CPU_DEAD_FROZEN:
+ 		/* Check if a new CPU must be elected now this on is down */
++		spin_lock(&pp->lock);
+ 		mvneta_percpu_elect(pp);
++		spin_unlock(&pp->lock);
+ 		/* Unmask all ethernet port interrupts */
+ 		on_each_cpu(mvneta_percpu_unmask_interrupt, pp, true);
+ 		mvreg_write(pp, MVNETA_INTR_MISC_MASK,
+@@ -3033,7 +3042,7 @@ static int mvneta_open(struct net_device
+ 	 */
+ 	on_each_cpu(mvneta_percpu_enable, pp, true);
+ 
+-
++	pp->is_stopped = false;
+ 	/* Register a CPU notifier to handle the case where our CPU
+ 	 * might be taken offline.
+ 	 */
+@@ -3066,9 +3075,18 @@ static int mvneta_stop(struct net_device
+ {
+ 	struct mvneta_port *pp = netdev_priv(dev);
+ 
++	/* Inform that we are stopping so we don't want to setup the
++	 * driver for new CPUs in the notifiers
++	 */
++	spin_lock(&pp->lock);
++	pp->is_stopped = true;
+ 	mvneta_stop_dev(pp);
+ 	mvneta_mdio_remove(pp);
+ 	unregister_cpu_notifier(&pp->cpu_notifier);
++	/* Now that the notifier are unregistered, we can release le
++	 * lock
++	 */
++	spin_unlock(&pp->lock);
+ 	on_each_cpu(mvneta_percpu_disable, pp, true);
+ 	free_percpu_irq(dev->irq, pp->ports);
+ 	mvneta_cleanup_rxqs(pp);
+@@ -3339,7 +3357,9 @@ static int  mvneta_config_rss(struct mvn
+ 	mvreg_write(pp, MVNETA_PORT_CONFIG, val);
+ 
+ 	/* Update the elected CPU matching the new rxq_def */
++	spin_lock(&pp->lock);
+ 	mvneta_percpu_elect(pp);
++	spin_unlock(&pp->lock);
+ 
+ 	/* We have to synchronise on the napi of each CPU */
+ 	for_each_online_cpu(cpu) {
diff --git a/target/linux/mvebu/patches-4.4/043-net-mvneta-sort-the-headers-in-alphabetic-order.patch b/target/linux/mvebu/patches-4.4/043-net-mvneta-sort-the-headers-in-alphabetic-order.patch
new file mode 100644
index 0000000000..502c2584d9
--- /dev/null
+++ b/target/linux/mvebu/patches-4.4/043-net-mvneta-sort-the-headers-in-alphabetic-order.patch
@@ -0,0 +1,56 @@
+From: Jisheng Zhang <jszhang@marvell.com>
+Date: Wed, 20 Jan 2016 19:27:22 +0800
+Subject: [PATCH] net: mvneta: sort the headers in alphabetic order
+
+Sorting the headers in alphabetic order will help to reduce the conflict
+when adding new headers in the future.
+
+Signed-off-by: Jisheng Zhang <jszhang@marvell.com>
+Acked-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+---
+
+--- a/drivers/net/ethernet/marvell/mvneta.c
++++ b/drivers/net/ethernet/marvell/mvneta.c
+@@ -11,28 +11,28 @@
+  * warranty of any kind, whether express or implied.
+  */
+ 
+-#include <linux/kernel.h>
+-#include <linux/netdevice.h>
++#include <linux/clk.h>
++#include <linux/cpu.h>
+ #include <linux/etherdevice.h>
+-#include <linux/platform_device.h>
+-#include <linux/skbuff.h>
++#include <linux/if_vlan.h>
+ #include <linux/inetdevice.h>
+-#include <linux/mbus.h>
+-#include <linux/module.h>
+ #include <linux/interrupt.h>
+-#include <linux/if_vlan.h>
+-#include <net/ip.h>
+-#include <net/ipv6.h>
+ #include <linux/io.h>
+-#include <net/tso.h>
++#include <linux/kernel.h>
++#include <linux/mbus.h>
++#include <linux/module.h>
++#include <linux/netdevice.h>
+ #include <linux/of.h>
++#include <linux/of_address.h>
+ #include <linux/of_irq.h>
+ #include <linux/of_mdio.h>
+ #include <linux/of_net.h>
+-#include <linux/of_address.h>
+ #include <linux/phy.h>
+-#include <linux/clk.h>
+-#include <linux/cpu.h>
++#include <linux/platform_device.h>
++#include <linux/skbuff.h>
++#include <net/ip.h>
++#include <net/ipv6.h>
++#include <net/tso.h>
+ 
+ /* Registers */
+ #define MVNETA_RXQ_CONFIG_REG(q)                (0x1400 + ((q) << 2))
diff --git a/target/linux/mvebu/patches-4.4/044-net-add-a-hardware-buffer-management-helper-API.patch b/target/linux/mvebu/patches-4.4/044-net-add-a-hardware-buffer-management-helper-API.patch
new file mode 100644
index 0000000000..d4bc6a0088
--- /dev/null
+++ b/target/linux/mvebu/patches-4.4/044-net-add-a-hardware-buffer-management-helper-API.patch
@@ -0,0 +1,159 @@
+From: Gregory CLEMENT <gregory.clement@free-electrons.com>
+Date: Mon, 14 Mar 2016 09:39:04 +0100
+Subject: [PATCH] net: add a hardware buffer management helper API
+
+This basic implementation allows to share code between driver using
+hardware buffer management. As the code is hardware agnostic, there is
+few helpers, most of the optimization brought by the an HW BM has to be
+done at driver level.
+
+Tested-by: Sebastian Careba <nitroshift@yahoo.com>
+Signed-off-by: Gregory CLEMENT <gregory.clement@free-electrons.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+---
+ create mode 100644 include/net/hwbm.h
+ create mode 100644 net/core/hwbm.c
+
+--- /dev/null
++++ b/include/net/hwbm.h
+@@ -0,0 +1,28 @@
++#ifndef _HWBM_H
++#define _HWBM_H
++
++struct hwbm_pool {
++	/* Capacity of the pool */
++	int size;
++	/* Size of the buffers managed */
++	int frag_size;
++	/* Number of buffers currently used by this pool */
++	int buf_num;
++	/* constructor called during alocation */
++	int (*construct)(struct hwbm_pool *bm_pool, void *buf);
++	/* protect acces to the buffer counter*/
++	spinlock_t lock;
++	/* private data */
++	void *priv;
++};
++#ifdef CONFIG_HWBM
++void hwbm_buf_free(struct hwbm_pool *bm_pool, void *buf);
++int hwbm_pool_refill(struct hwbm_pool *bm_pool, gfp_t gfp);
++int hwbm_pool_add(struct hwbm_pool *bm_pool, unsigned int buf_num, gfp_t gfp);
++#else
++void hwbm_buf_free(struct hwbm_pool *bm_pool, void *buf) {}
++int hwbm_pool_refill(struct hwbm_pool *bm_pool, gfp_t gfp) { return 0; }
++int hwbm_pool_add(struct hwbm_pool *bm_pool, unsigned int buf_num, gfp_t gfp)
++{ return 0; }
++#endif /* CONFIG_HWBM */
++#endif /* _HWBM_H */
+--- a/net/Kconfig
++++ b/net/Kconfig
+@@ -259,6 +259,9 @@ config XPS
+ 	depends on SMP
+ 	default y
+ 
++config HWBM
++	bool
++
+ config CGROUP_NET_PRIO
+ 	bool "Network priority cgroup"
+ 	depends on CGROUPS
+--- a/net/core/Makefile
++++ b/net/core/Makefile
+@@ -14,6 +14,7 @@ obj-y		     += dev.o ethtool.o dev_addr_
+ obj-$(CONFIG_SOCK_DIAG) += sock_diag.o
+ obj-$(CONFIG_XFRM) += flow.o
+ obj-y += net-sysfs.o
++obj-$(CONFIG_HWBM) += hwbm.o
+ obj-$(CONFIG_PROC_FS) += net-procfs.o
+ obj-$(CONFIG_NET_PKTGEN) += pktgen.o
+ obj-$(CONFIG_NETPOLL) += netpoll.o
+--- /dev/null
++++ b/net/core/hwbm.c
+@@ -0,0 +1,87 @@
++/* Support for hardware buffer manager.
++ *
++ * Copyright (C) 2016 Marvell
++ *
++ * Gregory CLEMENT <gregory.clement@free-electrons.com>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License as published by
++ *  the Free Software Foundation; either version 2 of the License, or
++ *  (at your option) any later version.
++ */
++#include <linux/kernel.h>
++#include <linux/printk.h>
++#include <linux/skbuff.h>
++#include <net/hwbm.h>
++
++void hwbm_buf_free(struct hwbm_pool *bm_pool, void *buf)
++{
++	if (likely(bm_pool->frag_size <= PAGE_SIZE))
++		skb_free_frag(buf);
++	else
++		kfree(buf);
++}
++EXPORT_SYMBOL_GPL(hwbm_buf_free);
++
++/* Refill processing for HW buffer management */
++int hwbm_pool_refill(struct hwbm_pool *bm_pool, gfp_t gfp)
++{
++	int frag_size = bm_pool->frag_size;
++	void *buf;
++
++	if (likely(frag_size <= PAGE_SIZE))
++		buf = netdev_alloc_frag(frag_size);
++	else
++		buf = kmalloc(frag_size, gfp);
++
++	if (!buf)
++		return -ENOMEM;
++
++	if (bm_pool->construct)
++		if (bm_pool->construct(bm_pool, buf)) {
++			hwbm_buf_free(bm_pool, buf);
++			return -ENOMEM;
++		}
++
++	return 0;
++}
++EXPORT_SYMBOL_GPL(hwbm_pool_refill);
++
++int hwbm_pool_add(struct hwbm_pool *bm_pool, unsigned int buf_num, gfp_t gfp)
++{
++	int err, i;
++	unsigned long flags;
++
++	spin_lock_irqsave(&bm_pool->lock, flags);
++	if (bm_pool->buf_num == bm_pool->size) {
++		pr_warn("pool already filled\n");
++		return bm_pool->buf_num;
++	}
++
++	if (buf_num + bm_pool->buf_num > bm_pool->size) {
++		pr_warn("cannot allocate %d buffers for pool\n",
++			buf_num);
++		return 0;
++	}
++
++	if ((buf_num + bm_pool->buf_num) < bm_pool->buf_num) {
++		pr_warn("Adding %d buffers to the %d current buffers will overflow\n",
++			buf_num,  bm_pool->buf_num);
++		return 0;
++	}
++
++	for (i = 0; i < buf_num; i++) {
++		err = hwbm_pool_refill(bm_pool, gfp);
++		if (err < 0)
++			break;
++	}
++
++	/* Update BM driver with number of buffers added to pool */
++	bm_pool->buf_num += i;
++
++	pr_debug("hwpm pool: %d of %d buffers added\n", i, buf_num);
++	spin_unlock_irqrestore(&bm_pool->lock, flags);
++
++	return i;
++}
++EXPORT_SYMBOL_GPL(hwbm_pool_add);
diff --git a/target/linux/mvebu/patches-4.4/045-net-mvneta-bm-add-support-for-hardware-buffer-manage.patch b/target/linux/mvebu/patches-4.4/045-net-mvneta-bm-add-support-for-hardware-buffer-manage.patch
new file mode 100644
index 0000000000..d343b3983f
--- /dev/null
+++ b/target/linux/mvebu/patches-4.4/045-net-mvneta-bm-add-support-for-hardware-buffer-manage.patch
@@ -0,0 +1,1684 @@
+From: Marcin Wojtas <mw@semihalf.com>
+Date: Mon, 14 Mar 2016 09:39:03 +0100
+Subject: [PATCH] net: mvneta: bm: add support for hardware buffer management
+
+Buffer manager (BM) is a dedicated hardware unit that can be used by all
+ethernet ports of Armada XP and 38x SoC's. It allows to offload CPU on RX
+path by sparing DRAM access on refilling buffer pool, hardware-based
+filling of descriptor ring data and better memory utilization due to HW
+arbitration for using 'short' pools for small packets.
+
+Tests performed with A388 SoC working as a network bridge between two
+packet generators showed increase of maximum processed 64B packets by
+~20k (~555k packets with BM enabled vs ~535 packets without BM). Also
+when pushing 1500B-packets with a line rate achieved, CPU load decreased
+from around 25% without BM to 20% with BM.
+
+BM comprise up to 4 buffer pointers' (BP) rings kept in DRAM, which
+are called external BP pools - BPPE. Allocating and releasing buffer
+pointers (BP) to/from BPPE is performed indirectly by write/read access
+to a dedicated internal SRAM, where internal BP pools (BPPI) are placed.
+BM hardware controls status of BPPE automatically, as well as assigning
+proper buffers to RX descriptors. For more details please refer to
+Functional Specification of Armada XP or 38x SoC.
+
+In order to enable support for a separate hardware block, common for all
+ports, a new driver has to be implemented ('mvneta_bm'). It provides
+initialization sequence of address space, clocks, registers, SRAM,
+empty pools' structures and also obtaining optional configuration
+from DT (please refer to device tree binding documentation). mvneta_bm
+exposes also a necessary API to mvneta driver, as well as a dedicated
+structure with BM information (bm_priv), whose presence is used as a
+flag notifying of BM usage by port. It has to be ensured that mvneta_bm
+probe is executed prior to the ones in ports' driver. In case BM is not
+used or its probe fails, mvneta falls back to use software buffer
+management.
+
+A sequence executed in mvneta_probe function is modified in order to have
+an access to needed resources before possible port's BM initialization is
+done. According to port-pools mapping provided by DT appropriate registers
+are configured and the buffer pools are filled. RX path is modified
+accordingly. Becaues the hardware allows a wide variety of configuration
+options, following assumptions are made:
+* using BM mechanisms can be selectively disabled/enabled basing
+  on DT configuration among the ports
+* 'long' pool's single buffer size is tied to port's MTU
+* using 'long' pool by port is obligatory and it cannot be shared
+* using 'short' pool for smaller packets is optional
+* one 'short' pool can be shared among all ports
+
+This commit enables hardware buffer management operation cooperating with
+existing mvneta driver. New device tree binding documentation is added and
+the one of mvneta is updated accordingly.
+
+[gregory.clement@free-electrons.com: removed the suspend/resume part]
+
+Signed-off-by: Marcin Wojtas <mw@semihalf.com>
+Signed-off-by: Gregory CLEMENT <gregory.clement@free-electrons.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+---
+ create mode 100644 Documentation/devicetree/bindings/net/marvell-neta-bm.txt
+ create mode 100644 drivers/net/ethernet/marvell/mvneta_bm.c
+ create mode 100644 drivers/net/ethernet/marvell/mvneta_bm.h
+
+--- a/Documentation/devicetree/bindings/net/marvell-armada-370-neta.txt
++++ b/Documentation/devicetree/bindings/net/marvell-armada-370-neta.txt
+@@ -13,15 +13,30 @@ Optional properties:
+   Value is presented in bytes. If not used, by default 1600B is set for
+   "marvell,armada-370-neta" and 9800B for others.
+ 
++Optional properties (valid only for Armada XP/38x):
++
++- buffer-manager: a phandle to a buffer manager node. Please refer to
++  Documentation/devicetree/bindings/net/marvell-neta-bm.txt
++- bm,pool-long: ID of a pool, that will accept all packets of a size
++  higher than 'short' pool's threshold (if set) and up to MTU value.
++  Obligatory, when the port is supposed to use hardware
++  buffer management.
++- bm,pool-short: ID of a pool, that will be used for accepting
++  packets of a size lower than given threshold. If not set, the port
++  will use a single 'long' pool for all packets, as defined above.
++
+ Example:
+ 
+-ethernet@d0070000 {
++ethernet@70000 {
+ 	compatible = "marvell,armada-370-neta";
+-	reg = <0xd0070000 0x2500>;
++	reg = <0x70000 0x2500>;
+ 	interrupts = <8>;
+ 	clocks = <&gate_clk 4>;
+ 	tx-csum-limit = <9800>
+ 	status = "okay";
+ 	phy = <&phy0>;
+ 	phy-mode = "rgmii-id";
++	buffer-manager = <&bm>;
++	bm,pool-long = <0>;
++	bm,pool-short = <1>;
+ };
+--- /dev/null
++++ b/Documentation/devicetree/bindings/net/marvell-neta-bm.txt
+@@ -0,0 +1,49 @@
++* Marvell Armada 380/XP Buffer Manager driver (BM)
++
++Required properties:
++
++- compatible: should be "marvell,armada-380-neta-bm".
++- reg: address and length of the register set for the device.
++- clocks: a pointer to the reference clock for this device.
++- internal-mem: a phandle to BM internal SRAM definition.
++
++Optional properties (port):
++
++- pool<0 : 3>,capacity: size of external buffer pointers' ring maintained
++  in DRAM. Can be set for each pool (id 0 : 3) separately. The value has
++  to be chosen between 128 and 16352 and it also has to be aligned to 32.
++  Otherwise the driver would adjust a given number or choose default if
++  not set.
++- pool<0 : 3>,pkt-size: maximum size of a packet accepted by a given buffer
++  pointers' pool (id 0 : 3). It will be taken into consideration only when pool
++  type is 'short'. For 'long' ones it would be overridden by port's MTU.
++  If not set a driver will choose a default value.
++
++In order to see how to hook the BM to a given ethernet port, please
++refer to Documentation/devicetree/bindings/net/marvell-armada-370-neta.txt.
++
++Example:
++
++- main node:
++
++bm: bm@c8000 {
++	compatible = "marvell,armada-380-neta-bm";
++	reg = <0xc8000 0xac>;
++	clocks = <&gateclk 13>;
++	internal-mem = <&bm_bppi>;
++	status = "okay";
++	pool2,capacity = <4096>;
++	pool1,pkt-size = <512>;
++};
++
++- internal SRAM node:
++
++bm_bppi: bm-bppi {
++	compatible = "mmio-sram";
++	reg = <MBUS_ID(0x0c, 0x04) 0 0x100000>;
++	ranges = <0 MBUS_ID(0x0c, 0x04) 0 0x100000>;
++	#address-cells = <1>;
++	#size-cells = <1>;
++	clocks = <&gateclk 13>;
++	status = "okay";
++};
+--- a/drivers/net/ethernet/marvell/Kconfig
++++ b/drivers/net/ethernet/marvell/Kconfig
+@@ -40,6 +40,19 @@ config MVMDIO
+ 
+ 	  This driver is used by the MV643XX_ETH and MVNETA drivers.
+ 
++config MVNETA_BM
++	tristate "Marvell Armada 38x/XP network interface BM support"
++	depends on MVNETA
++	---help---
++	  This driver supports auxiliary block of the network
++	  interface units in the Marvell ARMADA XP and ARMADA 38x SoC
++	  family, which is called buffer manager.
++
++	  This driver, when enabled, strictly cooperates with mvneta
++	  driver and is common for all network ports of the devices,
++	  even for Armada 370 SoC, which doesn't support hardware
++	  buffer management.
++
+ config MVNETA
+ 	tristate "Marvell Armada 370/38x/XP network interface support"
+ 	depends on PLAT_ORION
+--- a/drivers/net/ethernet/marvell/Makefile
++++ b/drivers/net/ethernet/marvell/Makefile
+@@ -4,6 +4,7 @@
+ 
+ obj-$(CONFIG_MVMDIO) += mvmdio.o
+ obj-$(CONFIG_MV643XX_ETH) += mv643xx_eth.o
++obj-$(CONFIG_MVNETA_BM) += mvneta_bm.o
+ obj-$(CONFIG_MVNETA) += mvneta.o
+ obj-$(CONFIG_MVPP2) += mvpp2.o
+ obj-$(CONFIG_PXA168_ETH) += pxa168_eth.o
+--- a/drivers/net/ethernet/marvell/mvneta.c
++++ b/drivers/net/ethernet/marvell/mvneta.c
+@@ -30,6 +30,7 @@
+ #include <linux/phy.h>
+ #include <linux/platform_device.h>
+ #include <linux/skbuff.h>
++#include "mvneta_bm.h"
+ #include <net/ip.h>
+ #include <net/ipv6.h>
+ #include <net/tso.h>
+@@ -37,6 +38,10 @@
+ /* Registers */
+ #define MVNETA_RXQ_CONFIG_REG(q)                (0x1400 + ((q) << 2))
+ #define      MVNETA_RXQ_HW_BUF_ALLOC            BIT(0)
++#define      MVNETA_RXQ_SHORT_POOL_ID_SHIFT	4
++#define      MVNETA_RXQ_SHORT_POOL_ID_MASK	0x30
++#define      MVNETA_RXQ_LONG_POOL_ID_SHIFT	6
++#define      MVNETA_RXQ_LONG_POOL_ID_MASK	0xc0
+ #define      MVNETA_RXQ_PKT_OFFSET_ALL_MASK     (0xf    << 8)
+ #define      MVNETA_RXQ_PKT_OFFSET_MASK(offs)   ((offs) << 8)
+ #define MVNETA_RXQ_THRESHOLD_REG(q)             (0x14c0 + ((q) << 2))
+@@ -50,6 +55,9 @@
+ #define MVNETA_RXQ_STATUS_UPDATE_REG(q)         (0x1500 + ((q) << 2))
+ #define      MVNETA_RXQ_ADD_NON_OCCUPIED_SHIFT  16
+ #define      MVNETA_RXQ_ADD_NON_OCCUPIED_MAX    255
++#define MVNETA_PORT_POOL_BUFFER_SZ_REG(pool)	(0x1700 + ((pool) << 2))
++#define      MVNETA_PORT_POOL_BUFFER_SZ_SHIFT	3
++#define      MVNETA_PORT_POOL_BUFFER_SZ_MASK	0xfff8
+ #define MVNETA_PORT_RX_RESET                    0x1cc0
+ #define      MVNETA_PORT_RX_DMA_RESET           BIT(0)
+ #define MVNETA_PHY_ADDR                         0x2000
+@@ -107,6 +115,7 @@
+ #define MVNETA_GMAC_CLOCK_DIVIDER                0x24f4
+ #define      MVNETA_GMAC_1MS_CLOCK_ENABLE        BIT(31)
+ #define MVNETA_ACC_MODE                          0x2500
++#define MVNETA_BM_ADDRESS                        0x2504
+ #define MVNETA_CPU_MAP(cpu)                      (0x2540 + ((cpu) << 2))
+ #define      MVNETA_CPU_RXQ_ACCESS_ALL_MASK      0x000000ff
+ #define      MVNETA_CPU_TXQ_ACCESS_ALL_MASK      0x0000ff00
+@@ -253,7 +262,10 @@
+ #define MVNETA_CPU_D_CACHE_LINE_SIZE    32
+ #define MVNETA_TX_CSUM_DEF_SIZE		1600
+ #define MVNETA_TX_CSUM_MAX_SIZE		9800
+-#define MVNETA_ACC_MODE_EXT		1
++#define MVNETA_ACC_MODE_EXT1		1
++#define MVNETA_ACC_MODE_EXT2		2
++
++#define MVNETA_MAX_DECODE_WIN		6
+ 
+ /* Timeout constants */
+ #define MVNETA_TX_DISABLE_TIMEOUT_MSEC	1000
+@@ -293,7 +305,8 @@
+ 	((addr >= txq->tso_hdrs_phys) && \
+ 	 (addr < txq->tso_hdrs_phys + txq->size * TSO_HEADER_SIZE))
+ 
+-#define MVNETA_RX_BUF_SIZE(pkt_size)   ((pkt_size) + NET_SKB_PAD)
++#define MVNETA_RX_GET_BM_POOL_ID(rxd) \
++	(((rxd)->status & MVNETA_RXD_BM_POOL_MASK) >> MVNETA_RXD_BM_POOL_SHIFT)
+ 
+ struct mvneta_statistic {
+ 	unsigned short offset;
+@@ -359,6 +372,7 @@ struct mvneta_pcpu_port {
+ };
+ 
+ struct mvneta_port {
++	u8 id;
+ 	struct mvneta_pcpu_port __percpu	*ports;
+ 	struct mvneta_pcpu_stats __percpu	*stats;
+ 
+@@ -392,6 +406,11 @@ struct mvneta_port {
+ 	unsigned int tx_csum_limit;
+ 	unsigned int use_inband_status:1;
+ 
++	struct mvneta_bm *bm_priv;
++	struct mvneta_bm_pool *pool_long;
++	struct mvneta_bm_pool *pool_short;
++	int bm_win_id;
++
+ 	u64 ethtool_stats[ARRAY_SIZE(mvneta_statistics)];
+ 
+ 	u32 indir[MVNETA_RSS_LU_TABLE_SIZE];
+@@ -417,6 +436,8 @@ struct mvneta_port {
+ #define MVNETA_TX_L4_CSUM_NOT	BIT(31)
+ 
+ #define MVNETA_RXD_ERR_CRC		0x0
++#define MVNETA_RXD_BM_POOL_SHIFT	13
++#define MVNETA_RXD_BM_POOL_MASK		(BIT(13) | BIT(14))
+ #define MVNETA_RXD_ERR_SUMMARY		BIT(16)
+ #define MVNETA_RXD_ERR_OVERRUN		BIT(17)
+ #define MVNETA_RXD_ERR_LEN		BIT(18)
+@@ -561,6 +582,9 @@ static int rxq_def;
+ 
+ static int rx_copybreak __read_mostly = 256;
+ 
++/* HW BM need that each port be identify by a unique ID */
++static int global_port_id;
++
+ #define MVNETA_DRIVER_NAME "mvneta"
+ #define MVNETA_DRIVER_VERSION "1.0"
+ 
+@@ -827,6 +851,214 @@ static void mvneta_rxq_bm_disable(struct
+ 	mvreg_write(pp, MVNETA_RXQ_CONFIG_REG(rxq->id), val);
+ }
+ 
++/* Enable buffer management (BM) */
++static void mvneta_rxq_bm_enable(struct mvneta_port *pp,
++				 struct mvneta_rx_queue *rxq)
++{
++	u32 val;
++
++	val = mvreg_read(pp, MVNETA_RXQ_CONFIG_REG(rxq->id));
++	val |= MVNETA_RXQ_HW_BUF_ALLOC;
++	mvreg_write(pp, MVNETA_RXQ_CONFIG_REG(rxq->id), val);
++}
++
++/* Notify HW about port's assignment of pool for bigger packets */
++static void mvneta_rxq_long_pool_set(struct mvneta_port *pp,
++				     struct mvneta_rx_queue *rxq)
++{
++	u32 val;
++
++	val = mvreg_read(pp, MVNETA_RXQ_CONFIG_REG(rxq->id));
++	val &= ~MVNETA_RXQ_LONG_POOL_ID_MASK;
++	val |= (pp->pool_long->id << MVNETA_RXQ_LONG_POOL_ID_SHIFT);
++
++	mvreg_write(pp, MVNETA_RXQ_CONFIG_REG(rxq->id), val);
++}
++
++/* Notify HW about port's assignment of pool for smaller packets */
++static void mvneta_rxq_short_pool_set(struct mvneta_port *pp,
++				      struct mvneta_rx_queue *rxq)
++{
++	u32 val;
++
++	val = mvreg_read(pp, MVNETA_RXQ_CONFIG_REG(rxq->id));
++	val &= ~MVNETA_RXQ_SHORT_POOL_ID_MASK;
++	val |= (pp->pool_short->id << MVNETA_RXQ_SHORT_POOL_ID_SHIFT);
++
++	mvreg_write(pp, MVNETA_RXQ_CONFIG_REG(rxq->id), val);
++}
++
++/* Set port's receive buffer size for assigned BM pool */
++static inline void mvneta_bm_pool_bufsize_set(struct mvneta_port *pp,
++					      int buf_size,
++					      u8 pool_id)
++{
++	u32 val;
++
++	if (!IS_ALIGNED(buf_size, 8)) {
++		dev_warn(pp->dev->dev.parent,
++			 "illegal buf_size value %d, round to %d\n",
++			 buf_size, ALIGN(buf_size, 8));
++		buf_size = ALIGN(buf_size, 8);
++	}
++
++	val = mvreg_read(pp, MVNETA_PORT_POOL_BUFFER_SZ_REG(pool_id));
++	val |= buf_size & MVNETA_PORT_POOL_BUFFER_SZ_MASK;
++	mvreg_write(pp, MVNETA_PORT_POOL_BUFFER_SZ_REG(pool_id), val);
++}
++
++/* Configure MBUS window in order to enable access BM internal SRAM */
++static int mvneta_mbus_io_win_set(struct mvneta_port *pp, u32 base, u32 wsize,
++				  u8 target, u8 attr)
++{
++	u32 win_enable, win_protect;
++	int i;
++
++	win_enable = mvreg_read(pp, MVNETA_BASE_ADDR_ENABLE);
++
++	if (pp->bm_win_id < 0) {
++		/* Find first not occupied window */
++		for (i = 0; i < MVNETA_MAX_DECODE_WIN; i++) {
++			if (win_enable & (1 << i)) {
++				pp->bm_win_id = i;
++				break;
++			}
++		}
++		if (i == MVNETA_MAX_DECODE_WIN)
++			return -ENOMEM;
++	} else {
++		i = pp->bm_win_id;
++	}
++
++	mvreg_write(pp, MVNETA_WIN_BASE(i), 0);
++	mvreg_write(pp, MVNETA_WIN_SIZE(i), 0);
++
++	if (i < 4)
++		mvreg_write(pp, MVNETA_WIN_REMAP(i), 0);
++
++	mvreg_write(pp, MVNETA_WIN_BASE(i), (base & 0xffff0000) |
++		    (attr << 8) | target);
++
++	mvreg_write(pp, MVNETA_WIN_SIZE(i), (wsize - 1) & 0xffff0000);
++
++	win_protect = mvreg_read(pp, MVNETA_ACCESS_PROTECT_ENABLE);
++	win_protect |= 3 << (2 * i);
++	mvreg_write(pp, MVNETA_ACCESS_PROTECT_ENABLE, win_protect);
++
++	win_enable &= ~(1 << i);
++	mvreg_write(pp, MVNETA_BASE_ADDR_ENABLE, win_enable);
++
++	return 0;
++}
++
++/* Assign and initialize pools for port. In case of fail
++ * buffer manager will remain disabled for current port.
++ */
++static int mvneta_bm_port_init(struct platform_device *pdev,
++			       struct mvneta_port *pp)
++{
++	struct device_node *dn = pdev->dev.of_node;
++	u32 long_pool_id, short_pool_id, wsize;
++	u8 target, attr;
++	int err;
++
++	/* Get BM window information */
++	err = mvebu_mbus_get_io_win_info(pp->bm_priv->bppi_phys_addr, &wsize,
++					 &target, &attr);
++	if (err < 0)
++		return err;
++
++	pp->bm_win_id = -1;
++
++	/* Open NETA -> BM window */
++	err = mvneta_mbus_io_win_set(pp, pp->bm_priv->bppi_phys_addr, wsize,
++				     target, attr);
++	if (err < 0) {
++		netdev_info(pp->dev, "fail to configure mbus window to BM\n");
++		return err;
++	}
++
++	if (of_property_read_u32(dn, "bm,pool-long", &long_pool_id)) {
++		netdev_info(pp->dev, "missing long pool id\n");
++		return -EINVAL;
++	}
++
++	/* Create port's long pool depending on mtu */
++	pp->pool_long = mvneta_bm_pool_use(pp->bm_priv, long_pool_id,
++					   MVNETA_BM_LONG, pp->id,
++					   MVNETA_RX_PKT_SIZE(pp->dev->mtu));
++	if (!pp->pool_long) {
++		netdev_info(pp->dev, "fail to obtain long pool for port\n");
++		return -ENOMEM;
++	}
++
++	pp->pool_long->port_map |= 1 << pp->id;
++
++	mvneta_bm_pool_bufsize_set(pp, pp->pool_long->buf_size,
++				   pp->pool_long->id);
++
++	/* If short pool id is not defined, assume using single pool */
++	if (of_property_read_u32(dn, "bm,pool-short", &short_pool_id))
++		short_pool_id = long_pool_id;
++
++	/* Create port's short pool */
++	pp->pool_short = mvneta_bm_pool_use(pp->bm_priv, short_pool_id,
++					    MVNETA_BM_SHORT, pp->id,
++					    MVNETA_BM_SHORT_PKT_SIZE);
++	if (!pp->pool_short) {
++		netdev_info(pp->dev, "fail to obtain short pool for port\n");
++		mvneta_bm_pool_destroy(pp->bm_priv, pp->pool_long, 1 << pp->id);
++		return -ENOMEM;
++	}
++
++	if (short_pool_id != long_pool_id) {
++		pp->pool_short->port_map |= 1 << pp->id;
++		mvneta_bm_pool_bufsize_set(pp, pp->pool_short->buf_size,
++					   pp->pool_short->id);
++	}
++
++	return 0;
++}
++
++/* Update settings of a pool for bigger packets */
++static void mvneta_bm_update_mtu(struct mvneta_port *pp, int mtu)
++{
++	struct mvneta_bm_pool *bm_pool = pp->pool_long;
++	int num;
++
++	/* Release all buffers from long pool */
++	mvneta_bm_bufs_free(pp->bm_priv, bm_pool, 1 << pp->id);
++	if (bm_pool->buf_num) {
++		WARN(1, "cannot free all buffers in pool %d\n",
++		     bm_pool->id);
++		goto bm_mtu_err;
++	}
++
++	bm_pool->pkt_size = MVNETA_RX_PKT_SIZE(mtu);
++	bm_pool->buf_size = MVNETA_RX_BUF_SIZE(bm_pool->pkt_size);
++	bm_pool->frag_size = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) +
++			  SKB_DATA_ALIGN(MVNETA_RX_BUF_SIZE(bm_pool->pkt_size));
++
++	/* Fill entire long pool */
++	num = mvneta_bm_bufs_add(pp->bm_priv, bm_pool, bm_pool->size);
++	if (num != bm_pool->size) {
++		WARN(1, "pool %d: %d of %d allocated\n",
++		     bm_pool->id, num, bm_pool->size);
++		goto bm_mtu_err;
++	}
++	mvneta_bm_pool_bufsize_set(pp, bm_pool->buf_size, bm_pool->id);
++
++	return;
++
++bm_mtu_err:
++	mvneta_bm_pool_destroy(pp->bm_priv, pp->pool_long, 1 << pp->id);
++	mvneta_bm_pool_destroy(pp->bm_priv, pp->pool_short, 1 << pp->id);
++
++	pp->bm_priv = NULL;
++	mvreg_write(pp, MVNETA_ACC_MODE, MVNETA_ACC_MODE_EXT1);
++	netdev_info(pp->dev, "fail to update MTU, fall back to software BM\n");
++}
++
+ /* Start the Ethernet port RX and TX activity */
+ static void mvneta_port_up(struct mvneta_port *pp)
+ {
+@@ -1147,9 +1379,17 @@ static void mvneta_defaults_set(struct m
+ 	mvreg_write(pp, MVNETA_PORT_RX_RESET, 0);
+ 
+ 	/* Set Port Acceleration Mode */
+-	val = MVNETA_ACC_MODE_EXT;
++	if (pp->bm_priv)
++		/* HW buffer management + legacy parser */
++		val = MVNETA_ACC_MODE_EXT2;
++	else
++		/* SW buffer management + legacy parser */
++		val = MVNETA_ACC_MODE_EXT1;
+ 	mvreg_write(pp, MVNETA_ACC_MODE, val);
+ 
++	if (pp->bm_priv)
++		mvreg_write(pp, MVNETA_BM_ADDRESS, pp->bm_priv->bppi_phys_addr);
++
+ 	/* Update val of portCfg register accordingly with all RxQueue types */
+ 	val = MVNETA_PORT_CONFIG_DEFL_VALUE(pp->rxq_def);
+ 	mvreg_write(pp, MVNETA_PORT_CONFIG, val);
+@@ -1516,23 +1756,25 @@ static void mvneta_txq_done(struct mvnet
+ 	}
+ }
+ 
+-static void *mvneta_frag_alloc(const struct mvneta_port *pp)
++void *mvneta_frag_alloc(unsigned int frag_size)
+ {
+-	if (likely(pp->frag_size <= PAGE_SIZE))
+-		return netdev_alloc_frag(pp->frag_size);
++	if (likely(frag_size <= PAGE_SIZE))
++		return netdev_alloc_frag(frag_size);
+ 	else
+-		return kmalloc(pp->frag_size, GFP_ATOMIC);
++		return kmalloc(frag_size, GFP_ATOMIC);
+ }
++EXPORT_SYMBOL_GPL(mvneta_frag_alloc);
+ 
+-static void mvneta_frag_free(const struct mvneta_port *pp, void *data)
++void mvneta_frag_free(unsigned int frag_size, void *data)
+ {
+-	if (likely(pp->frag_size <= PAGE_SIZE))
++	if (likely(frag_size <= PAGE_SIZE))
+ 		skb_free_frag(data);
+ 	else
+ 		kfree(data);
+ }
++EXPORT_SYMBOL_GPL(mvneta_frag_free);
+ 
+-/* Refill processing */
++/* Refill processing for SW buffer management */
+ static int mvneta_rx_refill(struct mvneta_port *pp,
+ 			    struct mvneta_rx_desc *rx_desc)
+ 
+@@ -1540,7 +1782,7 @@ static int mvneta_rx_refill(struct mvnet
+ 	dma_addr_t phys_addr;
+ 	void *data;
+ 
+-	data = mvneta_frag_alloc(pp);
++	data = mvneta_frag_alloc(pp->frag_size);
+ 	if (!data)
+ 		return -ENOMEM;
+ 
+@@ -1548,7 +1790,7 @@ static int mvneta_rx_refill(struct mvnet
+ 				   MVNETA_RX_BUF_SIZE(pp->pkt_size),
+ 				   DMA_FROM_DEVICE);
+ 	if (unlikely(dma_mapping_error(pp->dev->dev.parent, phys_addr))) {
+-		mvneta_frag_free(pp, data);
++		mvneta_frag_free(pp->frag_size, data);
+ 		return -ENOMEM;
+ 	}
+ 
+@@ -1594,22 +1836,156 @@ static void mvneta_rxq_drop_pkts(struct
+ 	int rx_done, i;
+ 
+ 	rx_done = mvneta_rxq_busy_desc_num_get(pp, rxq);
++	if (rx_done)
++		mvneta_rxq_desc_num_update(pp, rxq, rx_done, rx_done);
++
++	if (pp->bm_priv) {
++		for (i = 0; i < rx_done; i++) {
++			struct mvneta_rx_desc *rx_desc =
++						  mvneta_rxq_next_desc_get(rxq);
++			u8 pool_id = MVNETA_RX_GET_BM_POOL_ID(rx_desc);
++			struct mvneta_bm_pool *bm_pool;
++
++			bm_pool = &pp->bm_priv->bm_pools[pool_id];
++			/* Return dropped buffer to the pool */
++			mvneta_bm_pool_put_bp(pp->bm_priv, bm_pool,
++					      rx_desc->buf_phys_addr);
++		}
++		return;
++	}
++
+ 	for (i = 0; i < rxq->size; i++) {
+ 		struct mvneta_rx_desc *rx_desc = rxq->descs + i;
+ 		void *data = (void *)rx_desc->buf_cookie;
+ 
+ 		dma_unmap_single(pp->dev->dev.parent, rx_desc->buf_phys_addr,
+ 				 MVNETA_RX_BUF_SIZE(pp->pkt_size), DMA_FROM_DEVICE);
+-		mvneta_frag_free(pp, data);
++		mvneta_frag_free(pp->frag_size, data);
+ 	}
++}
+ 
+-	if (rx_done)
+-		mvneta_rxq_desc_num_update(pp, rxq, rx_done, rx_done);
++/* Main rx processing when using software buffer management */
++static int mvneta_rx_swbm(struct mvneta_port *pp, int rx_todo,
++			  struct mvneta_rx_queue *rxq)
++{
++	struct mvneta_pcpu_port *port = this_cpu_ptr(pp->ports);
++	struct net_device *dev = pp->dev;
++	int rx_done;
++	u32 rcvd_pkts = 0;
++	u32 rcvd_bytes = 0;
++
++	/* Get number of received packets */
++	rx_done = mvneta_rxq_busy_desc_num_get(pp, rxq);
++
++	if (rx_todo > rx_done)
++		rx_todo = rx_done;
++
++	rx_done = 0;
++
++	/* Fairness NAPI loop */
++	while (rx_done < rx_todo) {
++		struct mvneta_rx_desc *rx_desc = mvneta_rxq_next_desc_get(rxq);
++		struct sk_buff *skb;
++		unsigned char *data;
++		dma_addr_t phys_addr;
++		u32 rx_status, frag_size;
++		int rx_bytes, err;
++
++		rx_done++;
++		rx_status = rx_desc->status;
++		rx_bytes = rx_desc->data_size - (ETH_FCS_LEN + MVNETA_MH_SIZE);
++		data = (unsigned char *)rx_desc->buf_cookie;
++		phys_addr = rx_desc->buf_phys_addr;
++
++		if (!mvneta_rxq_desc_is_first_last(rx_status) ||
++		    (rx_status & MVNETA_RXD_ERR_SUMMARY)) {
++err_drop_frame:
++			dev->stats.rx_errors++;
++			mvneta_rx_error(pp, rx_desc);
++			/* leave the descriptor untouched */
++			continue;
++		}
++
++		if (rx_bytes <= rx_copybreak) {
++		/* better copy a small frame and not unmap the DMA region */
++			skb = netdev_alloc_skb_ip_align(dev, rx_bytes);
++			if (unlikely(!skb))
++				goto err_drop_frame;
++
++			dma_sync_single_range_for_cpu(dev->dev.parent,
++						      rx_desc->buf_phys_addr,
++						      MVNETA_MH_SIZE + NET_SKB_PAD,
++						      rx_bytes,
++						      DMA_FROM_DEVICE);
++			memcpy(skb_put(skb, rx_bytes),
++			       data + MVNETA_MH_SIZE + NET_SKB_PAD,
++			       rx_bytes);
++
++			skb->protocol = eth_type_trans(skb, dev);
++			mvneta_rx_csum(pp, rx_status, skb);
++			napi_gro_receive(&port->napi, skb);
++
++			rcvd_pkts++;
++			rcvd_bytes += rx_bytes;
++
++			/* leave the descriptor and buffer untouched */
++			continue;
++		}
++
++		/* Refill processing */
++		err = mvneta_rx_refill(pp, rx_desc);
++		if (err) {
++			netdev_err(dev, "Linux processing - Can't refill\n");
++			rxq->missed++;
++			goto err_drop_frame;
++		}
++
++		frag_size = pp->frag_size;
++
++		skb = build_skb(data, frag_size > PAGE_SIZE ? 0 : frag_size);
++
++		/* After refill old buffer has to be unmapped regardless
++		 * the skb is successfully built or not.
++		 */
++		dma_unmap_single(dev->dev.parent, phys_addr,
++				 MVNETA_RX_BUF_SIZE(pp->pkt_size),
++				 DMA_FROM_DEVICE);
++
++		if (!skb)
++			goto err_drop_frame;
++
++		rcvd_pkts++;
++		rcvd_bytes += rx_bytes;
++
++		/* Linux processing */
++		skb_reserve(skb, MVNETA_MH_SIZE + NET_SKB_PAD);
++		skb_put(skb, rx_bytes);
++
++		skb->protocol = eth_type_trans(skb, dev);
++
++		mvneta_rx_csum(pp, rx_status, skb);
++
++		napi_gro_receive(&port->napi, skb);
++	}
++
++	if (rcvd_pkts) {
++		struct mvneta_pcpu_stats *stats = this_cpu_ptr(pp->stats);
++
++		u64_stats_update_begin(&stats->syncp);
++		stats->rx_packets += rcvd_pkts;
++		stats->rx_bytes   += rcvd_bytes;
++		u64_stats_update_end(&stats->syncp);
++	}
++
++	/* Update rxq management counters */
++	mvneta_rxq_desc_num_update(pp, rxq, rx_done, rx_done);
++
++	return rx_done;
+ }
+ 
+-/* Main rx processing */
+-static int mvneta_rx(struct mvneta_port *pp, int rx_todo,
+-		     struct mvneta_rx_queue *rxq)
++/* Main rx processing when using hardware buffer management */
++static int mvneta_rx_hwbm(struct mvneta_port *pp, int rx_todo,
++			  struct mvneta_rx_queue *rxq)
+ {
+ 	struct mvneta_pcpu_port *port = this_cpu_ptr(pp->ports);
+ 	struct net_device *dev = pp->dev;
+@@ -1628,21 +2004,29 @@ static int mvneta_rx(struct mvneta_port
+ 	/* Fairness NAPI loop */
+ 	while (rx_done < rx_todo) {
+ 		struct mvneta_rx_desc *rx_desc = mvneta_rxq_next_desc_get(rxq);
++		struct mvneta_bm_pool *bm_pool = NULL;
+ 		struct sk_buff *skb;
+ 		unsigned char *data;
+ 		dma_addr_t phys_addr;
+-		u32 rx_status;
++		u32 rx_status, frag_size;
+ 		int rx_bytes, err;
++		u8 pool_id;
+ 
+ 		rx_done++;
+ 		rx_status = rx_desc->status;
+ 		rx_bytes = rx_desc->data_size - (ETH_FCS_LEN + MVNETA_MH_SIZE);
+ 		data = (unsigned char *)rx_desc->buf_cookie;
+ 		phys_addr = rx_desc->buf_phys_addr;
++		pool_id = MVNETA_RX_GET_BM_POOL_ID(rx_desc);
++		bm_pool = &pp->bm_priv->bm_pools[pool_id];
+ 
+ 		if (!mvneta_rxq_desc_is_first_last(rx_status) ||
+ 		    (rx_status & MVNETA_RXD_ERR_SUMMARY)) {
+-		err_drop_frame:
++err_drop_frame_ret_pool:
++			/* Return the buffer to the pool */
++			mvneta_bm_pool_put_bp(pp->bm_priv, bm_pool,
++					      rx_desc->buf_phys_addr);
++err_drop_frame:
+ 			dev->stats.rx_errors++;
+ 			mvneta_rx_error(pp, rx_desc);
+ 			/* leave the descriptor untouched */
+@@ -1653,7 +2037,7 @@ static int mvneta_rx(struct mvneta_port
+ 			/* better copy a small frame and not unmap the DMA region */
+ 			skb = netdev_alloc_skb_ip_align(dev, rx_bytes);
+ 			if (unlikely(!skb))
+-				goto err_drop_frame;
++				goto err_drop_frame_ret_pool;
+ 
+ 			dma_sync_single_range_for_cpu(dev->dev.parent,
+ 			                              rx_desc->buf_phys_addr,
+@@ -1671,26 +2055,31 @@ static int mvneta_rx(struct mvneta_port
+ 			rcvd_pkts++;
+ 			rcvd_bytes += rx_bytes;
+ 
++			/* Return the buffer to the pool */
++			mvneta_bm_pool_put_bp(pp->bm_priv, bm_pool,
++					      rx_desc->buf_phys_addr);
++
+ 			/* leave the descriptor and buffer untouched */
+ 			continue;
+ 		}
+ 
+ 		/* Refill processing */
+-		err = mvneta_rx_refill(pp, rx_desc);
++		err = mvneta_bm_pool_refill(pp->bm_priv, bm_pool);
+ 		if (err) {
+ 			netdev_err(dev, "Linux processing - Can't refill\n");
+ 			rxq->missed++;
+-			goto err_drop_frame;
++			goto err_drop_frame_ret_pool;
+ 		}
+ 
+-		skb = build_skb(data, pp->frag_size > PAGE_SIZE ? 0 : pp->frag_size);
++		frag_size = bm_pool->frag_size;
++
++		skb = build_skb(data, frag_size > PAGE_SIZE ? 0 : frag_size);
+ 
+ 		/* After refill old buffer has to be unmapped regardless
+ 		 * the skb is successfully built or not.
+ 		 */
+-		dma_unmap_single(dev->dev.parent, phys_addr,
+-				 MVNETA_RX_BUF_SIZE(pp->pkt_size), DMA_FROM_DEVICE);
+-
++		dma_unmap_single(&pp->bm_priv->pdev->dev, phys_addr,
++				 bm_pool->buf_size, DMA_FROM_DEVICE);
+ 		if (!skb)
+ 			goto err_drop_frame;
+ 
+@@ -2295,7 +2684,10 @@ static int mvneta_poll(struct napi_struc
+ 
+ 	if (rx_queue) {
+ 		rx_queue = rx_queue - 1;
+-		rx_done = mvneta_rx(pp, budget, &pp->rxqs[rx_queue]);
++		if (pp->bm_priv)
++			rx_done = mvneta_rx_hwbm(pp, budget, &pp->rxqs[rx_queue]);
++		else
++			rx_done = mvneta_rx_swbm(pp, budget, &pp->rxqs[rx_queue]);
+ 	}
+ 
+ 	budget -= rx_done;
+@@ -2384,9 +2776,17 @@ static int mvneta_rxq_init(struct mvneta
+ 	mvneta_rx_pkts_coal_set(pp, rxq, rxq->pkts_coal);
+ 	mvneta_rx_time_coal_set(pp, rxq, rxq->time_coal);
+ 
+-	/* Fill RXQ with buffers from RX pool */
+-	mvneta_rxq_buf_size_set(pp, rxq, MVNETA_RX_BUF_SIZE(pp->pkt_size));
+-	mvneta_rxq_bm_disable(pp, rxq);
++	if (!pp->bm_priv) {
++		/* Fill RXQ with buffers from RX pool */
++		mvneta_rxq_buf_size_set(pp, rxq,
++					MVNETA_RX_BUF_SIZE(pp->pkt_size));
++		mvneta_rxq_bm_disable(pp, rxq);
++	} else {
++		mvneta_rxq_bm_enable(pp, rxq);
++		mvneta_rxq_long_pool_set(pp, rxq);
++		mvneta_rxq_short_pool_set(pp, rxq);
++	}
++
+ 	mvneta_rxq_fill(pp, rxq, rxq->size);
+ 
+ 	return 0;
+@@ -2659,6 +3059,9 @@ static int mvneta_change_mtu(struct net_
+ 	dev->mtu = mtu;
+ 
+ 	if (!netif_running(dev)) {
++		if (pp->bm_priv)
++			mvneta_bm_update_mtu(pp, mtu);
++
+ 		netdev_update_features(dev);
+ 		return 0;
+ 	}
+@@ -2671,6 +3074,9 @@ static int mvneta_change_mtu(struct net_
+ 	mvneta_cleanup_txqs(pp);
+ 	mvneta_cleanup_rxqs(pp);
+ 
++	if (pp->bm_priv)
++		mvneta_bm_update_mtu(pp, mtu);
++
+ 	pp->pkt_size = MVNETA_RX_PKT_SIZE(dev->mtu);
+ 	pp->frag_size = SKB_DATA_ALIGN(MVNETA_RX_BUF_SIZE(pp->pkt_size)) +
+ 	                SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+@@ -3563,6 +3969,7 @@ static int mvneta_probe(struct platform_
+ 	struct resource *res;
+ 	struct device_node *dn = pdev->dev.of_node;
+ 	struct device_node *phy_node;
++	struct device_node *bm_node;
+ 	struct mvneta_port *pp;
+ 	struct net_device *dev;
+ 	const char *dt_mac_addr;
+@@ -3690,26 +4097,39 @@ static int mvneta_probe(struct platform_
+ 
+ 	pp->tx_csum_limit = tx_csum_limit;
+ 
++	dram_target_info = mv_mbus_dram_info();
++	if (dram_target_info)
++		mvneta_conf_mbus_windows(pp, dram_target_info);
++
+ 	pp->tx_ring_size = MVNETA_MAX_TXD;
+ 	pp->rx_ring_size = MVNETA_MAX_RXD;
+ 
+ 	pp->dev = dev;
+ 	SET_NETDEV_DEV(dev, &pdev->dev);
+ 
++	pp->id = global_port_id++;
++
++	/* Obtain access to BM resources if enabled and already initialized */
++	bm_node = of_parse_phandle(dn, "buffer-manager", 0);
++	if (bm_node && bm_node->data) {
++		pp->bm_priv = bm_node->data;
++		err = mvneta_bm_port_init(pdev, pp);
++		if (err < 0) {
++			dev_info(&pdev->dev, "use SW buffer management\n");
++			pp->bm_priv = NULL;
++		}
++	}
++
+ 	err = mvneta_init(&pdev->dev, pp);
+ 	if (err < 0)
+-		goto err_free_stats;
++		goto err_netdev;
+ 
+ 	err = mvneta_port_power_up(pp, phy_mode);
+ 	if (err < 0) {
+ 		dev_err(&pdev->dev, "can't power up port\n");
+-		goto err_free_stats;
++		goto err_netdev;
+ 	}
+ 
+-	dram_target_info = mv_mbus_dram_info();
+-	if (dram_target_info)
+-		mvneta_conf_mbus_windows(pp, dram_target_info);
+-
+ 	for_each_present_cpu(cpu) {
+ 		struct mvneta_pcpu_port *port = per_cpu_ptr(pp->ports, cpu);
+ 
+@@ -3744,6 +4164,13 @@ static int mvneta_probe(struct platform_
+ 
+ 	return 0;
+ 
++err_netdev:
++	unregister_netdev(dev);
++	if (pp->bm_priv) {
++		mvneta_bm_pool_destroy(pp->bm_priv, pp->pool_long, 1 << pp->id);
++		mvneta_bm_pool_destroy(pp->bm_priv, pp->pool_short,
++				       1 << pp->id);
++	}
+ err_free_stats:
+ 	free_percpu(pp->stats);
+ err_free_ports:
+@@ -3773,6 +4200,12 @@ static int mvneta_remove(struct platform
+ 	of_node_put(pp->phy_node);
+ 	free_netdev(dev);
+ 
++	if (pp->bm_priv) {
++		mvneta_bm_pool_destroy(pp->bm_priv, pp->pool_long, 1 << pp->id);
++		mvneta_bm_pool_destroy(pp->bm_priv, pp->pool_short,
++				       1 << pp->id);
++	}
++
+ 	return 0;
+ }
+ 
+--- /dev/null
++++ b/drivers/net/ethernet/marvell/mvneta_bm.c
+@@ -0,0 +1,546 @@
++/*
++ * Driver for Marvell NETA network controller Buffer Manager.
++ *
++ * Copyright (C) 2015 Marvell
++ *
++ * Marcin Wojtas <mw@semihalf.com>
++ *
++ * This file is licensed under the terms of the GNU General Public
++ * License version 2. This program is licensed "as is" without any
++ * warranty of any kind, whether express or implied.
++ */
++
++#include <linux/kernel.h>
++#include <linux/genalloc.h>
++#include <linux/platform_device.h>
++#include <linux/netdevice.h>
++#include <linux/skbuff.h>
++#include <linux/mbus.h>
++#include <linux/module.h>
++#include <linux/io.h>
++#include <linux/of.h>
++#include <linux/clk.h>
++#include "mvneta_bm.h"
++
++#define MVNETA_BM_DRIVER_NAME "mvneta_bm"
++#define MVNETA_BM_DRIVER_VERSION "1.0"
++
++static void mvneta_bm_write(struct mvneta_bm *priv, u32 offset, u32 data)
++{
++	writel(data, priv->reg_base + offset);
++}
++
++static u32 mvneta_bm_read(struct mvneta_bm *priv, u32 offset)
++{
++	return readl(priv->reg_base + offset);
++}
++
++static void mvneta_bm_pool_enable(struct mvneta_bm *priv, int pool_id)
++{
++	u32 val;
++
++	val = mvneta_bm_read(priv, MVNETA_BM_POOL_BASE_REG(pool_id));
++	val |= MVNETA_BM_POOL_ENABLE_MASK;
++	mvneta_bm_write(priv, MVNETA_BM_POOL_BASE_REG(pool_id), val);
++
++	/* Clear BM cause register */
++	mvneta_bm_write(priv, MVNETA_BM_INTR_CAUSE_REG, 0);
++}
++
++static void mvneta_bm_pool_disable(struct mvneta_bm *priv, int pool_id)
++{
++	u32 val;
++
++	val = mvneta_bm_read(priv, MVNETA_BM_POOL_BASE_REG(pool_id));
++	val &= ~MVNETA_BM_POOL_ENABLE_MASK;
++	mvneta_bm_write(priv, MVNETA_BM_POOL_BASE_REG(pool_id), val);
++}
++
++static inline void mvneta_bm_config_set(struct mvneta_bm *priv, u32 mask)
++{
++	u32 val;
++
++	val = mvneta_bm_read(priv, MVNETA_BM_CONFIG_REG);
++	val |= mask;
++	mvneta_bm_write(priv, MVNETA_BM_CONFIG_REG, val);
++}
++
++static inline void mvneta_bm_config_clear(struct mvneta_bm *priv, u32 mask)
++{
++	u32 val;
++
++	val = mvneta_bm_read(priv, MVNETA_BM_CONFIG_REG);
++	val &= ~mask;
++	mvneta_bm_write(priv, MVNETA_BM_CONFIG_REG, val);
++}
++
++static void mvneta_bm_pool_target_set(struct mvneta_bm *priv, int pool_id,
++				      u8 target_id, u8 attr)
++{
++	u32 val;
++
++	val = mvneta_bm_read(priv, MVNETA_BM_XBAR_POOL_REG(pool_id));
++	val &= ~MVNETA_BM_TARGET_ID_MASK(pool_id);
++	val &= ~MVNETA_BM_XBAR_ATTR_MASK(pool_id);
++	val |= MVNETA_BM_TARGET_ID_VAL(pool_id, target_id);
++	val |= MVNETA_BM_XBAR_ATTR_VAL(pool_id, attr);
++
++	mvneta_bm_write(priv, MVNETA_BM_XBAR_POOL_REG(pool_id), val);
++}
++
++/* Allocate skb for BM pool */
++void *mvneta_buf_alloc(struct mvneta_bm *priv, struct mvneta_bm_pool *bm_pool,
++		       dma_addr_t *buf_phys_addr)
++{
++	void *buf;
++	dma_addr_t phys_addr;
++
++	buf = mvneta_frag_alloc(bm_pool->frag_size);
++	if (!buf)
++		return NULL;
++
++	/* In order to update buf_cookie field of RX descriptor properly,
++	 * BM hardware expects buf virtual address to be placed in the
++	 * first four bytes of mapped buffer.
++	 */
++	*(u32 *)buf = (u32)buf;
++	phys_addr = dma_map_single(&priv->pdev->dev, buf, bm_pool->buf_size,
++				   DMA_FROM_DEVICE);
++	if (unlikely(dma_mapping_error(&priv->pdev->dev, phys_addr))) {
++		mvneta_frag_free(bm_pool->frag_size, buf);
++		return NULL;
++	}
++	*buf_phys_addr = phys_addr;
++
++	return buf;
++}
++
++/* Refill processing for HW buffer management */
++int mvneta_bm_pool_refill(struct mvneta_bm *priv,
++			  struct mvneta_bm_pool *bm_pool)
++{
++	dma_addr_t buf_phys_addr;
++	void *buf;
++
++	buf = mvneta_buf_alloc(priv, bm_pool, &buf_phys_addr);
++	if (!buf)
++		return -ENOMEM;
++
++	mvneta_bm_pool_put_bp(priv, bm_pool, buf_phys_addr);
++
++	return 0;
++}
++EXPORT_SYMBOL_GPL(mvneta_bm_pool_refill);
++
++/* Allocate buffers for the pool */
++int mvneta_bm_bufs_add(struct mvneta_bm *priv, struct mvneta_bm_pool *bm_pool,
++		       int buf_num)
++{
++	int err, i;
++
++	if (bm_pool->buf_num == bm_pool->size) {
++		dev_dbg(&priv->pdev->dev, "pool %d already filled\n",
++			bm_pool->id);
++		return bm_pool->buf_num;
++	}
++
++	if (buf_num < 0 ||
++	    (buf_num + bm_pool->buf_num > bm_pool->size)) {
++		dev_err(&priv->pdev->dev,
++			"cannot allocate %d buffers for pool %d\n",
++			buf_num, bm_pool->id);
++		return 0;
++	}
++
++	for (i = 0; i < buf_num; i++) {
++		err = mvneta_bm_pool_refill(priv, bm_pool);
++		if (err < 0)
++			break;
++	}
++
++	/* Update BM driver with number of buffers added to pool */
++	bm_pool->buf_num += i;
++
++	dev_dbg(&priv->pdev->dev,
++		"%s pool %d: pkt_size=%4d, buf_size=%4d, frag_size=%4d\n",
++		bm_pool->type == MVNETA_BM_SHORT ? "short" : "long",
++		bm_pool->id, bm_pool->pkt_size, bm_pool->buf_size,
++		bm_pool->frag_size);
++
++	dev_dbg(&priv->pdev->dev,
++		"%s pool %d: %d of %d buffers added\n",
++		bm_pool->type == MVNETA_BM_SHORT ? "short" : "long",
++		bm_pool->id, i, buf_num);
++
++	return i;
++}
++EXPORT_SYMBOL_GPL(mvneta_bm_bufs_add);
++
++/* Create pool */
++static int mvneta_bm_pool_create(struct mvneta_bm *priv,
++				 struct mvneta_bm_pool *bm_pool)
++{
++	struct platform_device *pdev = priv->pdev;
++	u8 target_id, attr;
++	int size_bytes, err;
++
++	size_bytes = sizeof(u32) * bm_pool->size;
++	bm_pool->virt_addr = dma_alloc_coherent(&pdev->dev, size_bytes,
++						&bm_pool->phys_addr,
++						GFP_KERNEL);
++	if (!bm_pool->virt_addr)
++		return -ENOMEM;
++
++	if (!IS_ALIGNED((u32)bm_pool->virt_addr, MVNETA_BM_POOL_PTR_ALIGN)) {
++		dma_free_coherent(&pdev->dev, size_bytes, bm_pool->virt_addr,
++				  bm_pool->phys_addr);
++		dev_err(&pdev->dev, "BM pool %d is not %d bytes aligned\n",
++			bm_pool->id, MVNETA_BM_POOL_PTR_ALIGN);
++		return -ENOMEM;
++	}
++
++	err = mvebu_mbus_get_dram_win_info(bm_pool->phys_addr, &target_id,
++					   &attr);
++	if (err < 0) {
++		dma_free_coherent(&pdev->dev, size_bytes, bm_pool->virt_addr,
++				  bm_pool->phys_addr);
++		return err;
++	}
++
++	/* Set pool address */
++	mvneta_bm_write(priv, MVNETA_BM_POOL_BASE_REG(bm_pool->id),
++			bm_pool->phys_addr);
++
++	mvneta_bm_pool_target_set(priv, bm_pool->id, target_id,  attr);
++	mvneta_bm_pool_enable(priv, bm_pool->id);
++
++	return 0;
++}
++
++/* Notify the driver that BM pool is being used as specific type and return the
++ * pool pointer on success
++ */
++struct mvneta_bm_pool *mvneta_bm_pool_use(struct mvneta_bm *priv, u8 pool_id,
++					  enum mvneta_bm_type type, u8 port_id,
++					  int pkt_size)
++{
++	struct mvneta_bm_pool *new_pool = &priv->bm_pools[pool_id];
++	int num, err;
++
++	if (new_pool->type == MVNETA_BM_LONG &&
++	    new_pool->port_map != 1 << port_id) {
++		dev_err(&priv->pdev->dev,
++			"long pool cannot be shared by the ports\n");
++		return NULL;
++	}
++
++	if (new_pool->type == MVNETA_BM_SHORT && new_pool->type != type) {
++		dev_err(&priv->pdev->dev,
++			"mixing pools' types between the ports is forbidden\n");
++		return NULL;
++	}
++
++	if (new_pool->pkt_size == 0 || type != MVNETA_BM_SHORT)
++		new_pool->pkt_size = pkt_size;
++
++	/* Allocate buffers in case BM pool hasn't been used yet */
++	if (new_pool->type == MVNETA_BM_FREE) {
++		new_pool->type = type;
++		new_pool->buf_size = MVNETA_RX_BUF_SIZE(new_pool->pkt_size);
++		new_pool->frag_size =
++			SKB_DATA_ALIGN(MVNETA_RX_BUF_SIZE(new_pool->pkt_size)) +
++			SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
++
++		/* Create new pool */
++		err = mvneta_bm_pool_create(priv, new_pool);
++		if (err) {
++			dev_err(&priv->pdev->dev, "fail to create pool %d\n",
++				new_pool->id);
++			return NULL;
++		}
++
++		/* Allocate buffers for this pool */
++		num = mvneta_bm_bufs_add(priv, new_pool, new_pool->size);
++		if (num != new_pool->size) {
++			WARN(1, "pool %d: %d of %d allocated\n",
++			     new_pool->id, num, new_pool->size);
++			return NULL;
++		}
++	}
++
++	return new_pool;
++}
++EXPORT_SYMBOL_GPL(mvneta_bm_pool_use);
++
++/* Free all buffers from the pool */
++void mvneta_bm_bufs_free(struct mvneta_bm *priv, struct mvneta_bm_pool *bm_pool,
++			 u8 port_map)
++{
++	int i;
++
++	bm_pool->port_map &= ~port_map;
++	if (bm_pool->port_map)
++		return;
++
++	mvneta_bm_config_set(priv, MVNETA_BM_EMPTY_LIMIT_MASK);
++
++	for (i = 0; i < bm_pool->buf_num; i++) {
++		dma_addr_t buf_phys_addr;
++		u32 *vaddr;
++
++		/* Get buffer physical address (indirect access) */
++		buf_phys_addr = mvneta_bm_pool_get_bp(priv, bm_pool);
++
++		/* Work-around to the problems when destroying the pool,
++		 * when it occurs that a read access to BPPI returns 0.
++		 */
++		if (buf_phys_addr == 0)
++			continue;
++
++		vaddr = phys_to_virt(buf_phys_addr);
++		if (!vaddr)
++			break;
++
++		dma_unmap_single(&priv->pdev->dev, buf_phys_addr,
++				 bm_pool->buf_size, DMA_FROM_DEVICE);
++		mvneta_frag_free(bm_pool->frag_size, vaddr);
++	}
++
++	mvneta_bm_config_clear(priv, MVNETA_BM_EMPTY_LIMIT_MASK);
++
++	/* Update BM driver with number of buffers removed from pool */
++	bm_pool->buf_num -= i;
++}
++EXPORT_SYMBOL_GPL(mvneta_bm_bufs_free);
++
++/* Cleanup pool */
++void mvneta_bm_pool_destroy(struct mvneta_bm *priv,
++			    struct mvneta_bm_pool *bm_pool, u8 port_map)
++{
++	bm_pool->port_map &= ~port_map;
++	if (bm_pool->port_map)
++		return;
++
++	bm_pool->type = MVNETA_BM_FREE;
++
++	mvneta_bm_bufs_free(priv, bm_pool, port_map);
++	if (bm_pool->buf_num)
++		WARN(1, "cannot free all buffers in pool %d\n", bm_pool->id);
++
++	if (bm_pool->virt_addr) {
++		dma_free_coherent(&priv->pdev->dev, sizeof(u32) * bm_pool->size,
++				  bm_pool->virt_addr, bm_pool->phys_addr);
++		bm_pool->virt_addr = NULL;
++	}
++
++	mvneta_bm_pool_disable(priv, bm_pool->id);
++}
++EXPORT_SYMBOL_GPL(mvneta_bm_pool_destroy);
++
++static void mvneta_bm_pools_init(struct mvneta_bm *priv)
++{
++	struct device_node *dn = priv->pdev->dev.of_node;
++	struct mvneta_bm_pool *bm_pool;
++	char prop[15];
++	u32 size;
++	int i;
++
++	/* Activate BM unit */
++	mvneta_bm_write(priv, MVNETA_BM_COMMAND_REG, MVNETA_BM_START_MASK);
++
++	/* Create all pools with maximum size */
++	for (i = 0; i < MVNETA_BM_POOLS_NUM; i++) {
++		bm_pool = &priv->bm_pools[i];
++		bm_pool->id = i;
++		bm_pool->type = MVNETA_BM_FREE;
++
++		/* Reset read pointer */
++		mvneta_bm_write(priv, MVNETA_BM_POOL_READ_PTR_REG(i), 0);
++
++		/* Reset write pointer */
++		mvneta_bm_write(priv, MVNETA_BM_POOL_WRITE_PTR_REG(i), 0);
++
++		/* Configure pool size according to DT or use default value */
++		sprintf(prop, "pool%d,capacity", i);
++		if (of_property_read_u32(dn, prop, &size)) {
++			size = MVNETA_BM_POOL_CAP_DEF;
++		} else if (size > MVNETA_BM_POOL_CAP_MAX) {
++			dev_warn(&priv->pdev->dev,
++				 "Illegal pool %d capacity %d, set to %d\n",
++				 i, size, MVNETA_BM_POOL_CAP_MAX);
++			size = MVNETA_BM_POOL_CAP_MAX;
++		} else if (size < MVNETA_BM_POOL_CAP_MIN) {
++			dev_warn(&priv->pdev->dev,
++				 "Illegal pool %d capacity %d, set to %d\n",
++				 i, size, MVNETA_BM_POOL_CAP_MIN);
++			size = MVNETA_BM_POOL_CAP_MIN;
++		} else if (!IS_ALIGNED(size, MVNETA_BM_POOL_CAP_ALIGN)) {
++			dev_warn(&priv->pdev->dev,
++				 "Illegal pool %d capacity %d, round to %d\n",
++				 i, size, ALIGN(size,
++				 MVNETA_BM_POOL_CAP_ALIGN));
++			size = ALIGN(size, MVNETA_BM_POOL_CAP_ALIGN);
++		}
++		bm_pool->size = size;
++
++		mvneta_bm_write(priv, MVNETA_BM_POOL_SIZE_REG(i),
++				bm_pool->size);
++
++		/* Obtain custom pkt_size from DT */
++		sprintf(prop, "pool%d,pkt-size", i);
++		if (of_property_read_u32(dn, prop, &bm_pool->pkt_size))
++			bm_pool->pkt_size = 0;
++	}
++}
++
++static void mvneta_bm_default_set(struct mvneta_bm *priv)
++{
++	u32 val;
++
++	/* Mask BM all interrupts */
++	mvneta_bm_write(priv, MVNETA_BM_INTR_MASK_REG, 0);
++
++	/* Clear BM cause register */
++	mvneta_bm_write(priv, MVNETA_BM_INTR_CAUSE_REG, 0);
++
++	/* Set BM configuration register */
++	val = mvneta_bm_read(priv, MVNETA_BM_CONFIG_REG);
++
++	/* Reduce MaxInBurstSize from 32 BPs to 16 BPs */
++	val &= ~MVNETA_BM_MAX_IN_BURST_SIZE_MASK;
++	val |= MVNETA_BM_MAX_IN_BURST_SIZE_16BP;
++	mvneta_bm_write(priv, MVNETA_BM_CONFIG_REG, val);
++}
++
++static int mvneta_bm_init(struct mvneta_bm *priv)
++{
++	mvneta_bm_default_set(priv);
++
++	/* Allocate and initialize BM pools structures */
++	priv->bm_pools = devm_kcalloc(&priv->pdev->dev, MVNETA_BM_POOLS_NUM,
++				      sizeof(struct mvneta_bm_pool),
++				      GFP_KERNEL);
++	if (!priv->bm_pools)
++		return -ENOMEM;
++
++	mvneta_bm_pools_init(priv);
++
++	return 0;
++}
++
++static int mvneta_bm_get_sram(struct device_node *dn,
++			      struct mvneta_bm *priv)
++{
++	priv->bppi_pool = of_gen_pool_get(dn, "internal-mem", 0);
++	if (!priv->bppi_pool)
++		return -ENOMEM;
++
++	priv->bppi_virt_addr = gen_pool_dma_alloc(priv->bppi_pool,
++						  MVNETA_BM_BPPI_SIZE,
++						  &priv->bppi_phys_addr);
++	if (!priv->bppi_virt_addr)
++		return -ENOMEM;
++
++	return 0;
++}
++
++static void mvneta_bm_put_sram(struct mvneta_bm *priv)
++{
++	gen_pool_free(priv->bppi_pool, priv->bppi_phys_addr,
++		      MVNETA_BM_BPPI_SIZE);
++}
++
++static int mvneta_bm_probe(struct platform_device *pdev)
++{
++	struct device_node *dn = pdev->dev.of_node;
++	struct mvneta_bm *priv;
++	struct resource *res;
++	int err;
++
++	priv = devm_kzalloc(&pdev->dev, sizeof(struct mvneta_bm), GFP_KERNEL);
++	if (!priv)
++		return -ENOMEM;
++
++	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
++	priv->reg_base = devm_ioremap_resource(&pdev->dev, res);
++	if (IS_ERR(priv->reg_base))
++		return PTR_ERR(priv->reg_base);
++
++	priv->clk = devm_clk_get(&pdev->dev, NULL);
++	if (IS_ERR(priv->clk))
++		return PTR_ERR(priv->clk);
++	err = clk_prepare_enable(priv->clk);
++	if (err < 0)
++		return err;
++
++	err = mvneta_bm_get_sram(dn, priv);
++	if (err < 0) {
++		dev_err(&pdev->dev, "failed to allocate internal memory\n");
++		goto err_clk;
++	}
++
++	priv->pdev = pdev;
++
++	/* Initialize buffer manager internals */
++	err = mvneta_bm_init(priv);
++	if (err < 0) {
++		dev_err(&pdev->dev, "failed to initialize controller\n");
++		goto err_sram;
++	}
++
++	dn->data = priv;
++	platform_set_drvdata(pdev, priv);
++
++	dev_info(&pdev->dev, "Buffer Manager for network controller enabled\n");
++
++	return 0;
++
++err_sram:
++	mvneta_bm_put_sram(priv);
++err_clk:
++	clk_disable_unprepare(priv->clk);
++	return err;
++}
++
++static int mvneta_bm_remove(struct platform_device *pdev)
++{
++	struct mvneta_bm *priv = platform_get_drvdata(pdev);
++	u8 all_ports_map = 0xff;
++	int i = 0;
++
++	for (i = 0; i < MVNETA_BM_POOLS_NUM; i++) {
++		struct mvneta_bm_pool *bm_pool = &priv->bm_pools[i];
++
++		mvneta_bm_pool_destroy(priv, bm_pool, all_ports_map);
++	}
++
++	mvneta_bm_put_sram(priv);
++
++	/* Dectivate BM unit */
++	mvneta_bm_write(priv, MVNETA_BM_COMMAND_REG, MVNETA_BM_STOP_MASK);
++
++	clk_disable_unprepare(priv->clk);
++
++	return 0;
++}
++
++static const struct of_device_id mvneta_bm_match[] = {
++	{ .compatible = "marvell,armada-380-neta-bm" },
++	{ }
++};
++MODULE_DEVICE_TABLE(of, mvneta_bm_match);
++
++static struct platform_driver mvneta_bm_driver = {
++	.probe = mvneta_bm_probe,
++	.remove = mvneta_bm_remove,
++	.driver = {
++		.name = MVNETA_BM_DRIVER_NAME,
++		.of_match_table = mvneta_bm_match,
++	},
++};
++
++module_platform_driver(mvneta_bm_driver);
++
++MODULE_DESCRIPTION("Marvell NETA Buffer Manager Driver - www.marvell.com");
++MODULE_AUTHOR("Marcin Wojtas <mw@semihalf.com>");
++MODULE_LICENSE("GPL v2");
+--- /dev/null
++++ b/drivers/net/ethernet/marvell/mvneta_bm.h
+@@ -0,0 +1,189 @@
++/*
++ * Driver for Marvell NETA network controller Buffer Manager.
++ *
++ * Copyright (C) 2015 Marvell
++ *
++ * Marcin Wojtas <mw@semihalf.com>
++ *
++ * This file is licensed under the terms of the GNU General Public
++ * License version 2. This program is licensed "as is" without any
++ * warranty of any kind, whether express or implied.
++ */
++
++#ifndef _MVNETA_BM_H_
++#define _MVNETA_BM_H_
++
++/* BM Configuration Register */
++#define MVNETA_BM_CONFIG_REG			0x0
++#define    MVNETA_BM_STATUS_MASK		0x30
++#define    MVNETA_BM_ACTIVE_MASK		BIT(4)
++#define    MVNETA_BM_MAX_IN_BURST_SIZE_MASK	0x60000
++#define    MVNETA_BM_MAX_IN_BURST_SIZE_16BP	BIT(18)
++#define    MVNETA_BM_EMPTY_LIMIT_MASK		BIT(19)
++
++/* BM Activation Register */
++#define MVNETA_BM_COMMAND_REG			0x4
++#define    MVNETA_BM_START_MASK			BIT(0)
++#define    MVNETA_BM_STOP_MASK			BIT(1)
++#define    MVNETA_BM_PAUSE_MASK			BIT(2)
++
++/* BM Xbar interface Register */
++#define MVNETA_BM_XBAR_01_REG			0x8
++#define MVNETA_BM_XBAR_23_REG			0xc
++#define MVNETA_BM_XBAR_POOL_REG(pool)		\
++		(((pool) < 2) ? MVNETA_BM_XBAR_01_REG : MVNETA_BM_XBAR_23_REG)
++#define     MVNETA_BM_TARGET_ID_OFFS(pool)	(((pool) & 1) ? 16 : 0)
++#define     MVNETA_BM_TARGET_ID_MASK(pool)	\
++		(0xf << MVNETA_BM_TARGET_ID_OFFS(pool))
++#define     MVNETA_BM_TARGET_ID_VAL(pool, id)	\
++		((id) << MVNETA_BM_TARGET_ID_OFFS(pool))
++#define     MVNETA_BM_XBAR_ATTR_OFFS(pool)	(((pool) & 1) ? 20 : 4)
++#define     MVNETA_BM_XBAR_ATTR_MASK(pool)	\
++		(0xff << MVNETA_BM_XBAR_ATTR_OFFS(pool))
++#define     MVNETA_BM_XBAR_ATTR_VAL(pool, attr)	\
++		((attr) << MVNETA_BM_XBAR_ATTR_OFFS(pool))
++
++/* Address of External Buffer Pointers Pool Register */
++#define MVNETA_BM_POOL_BASE_REG(pool)		(0x10 + ((pool) << 4))
++#define     MVNETA_BM_POOL_ENABLE_MASK		BIT(0)
++
++/* External Buffer Pointers Pool RD pointer Register */
++#define MVNETA_BM_POOL_READ_PTR_REG(pool)	(0x14 + ((pool) << 4))
++#define     MVNETA_BM_POOL_SET_READ_PTR_MASK	0xfffc
++#define     MVNETA_BM_POOL_GET_READ_PTR_OFFS	16
++#define     MVNETA_BM_POOL_GET_READ_PTR_MASK	0xfffc0000
++
++/* External Buffer Pointers Pool WR pointer */
++#define MVNETA_BM_POOL_WRITE_PTR_REG(pool)	(0x18 + ((pool) << 4))
++#define     MVNETA_BM_POOL_SET_WRITE_PTR_OFFS	0
++#define     MVNETA_BM_POOL_SET_WRITE_PTR_MASK	0xfffc
++#define     MVNETA_BM_POOL_GET_WRITE_PTR_OFFS	16
++#define     MVNETA_BM_POOL_GET_WRITE_PTR_MASK	0xfffc0000
++
++/* External Buffer Pointers Pool Size Register */
++#define MVNETA_BM_POOL_SIZE_REG(pool)		(0x1c + ((pool) << 4))
++#define     MVNETA_BM_POOL_SIZE_MASK		0x3fff
++
++/* BM Interrupt Cause Register */
++#define MVNETA_BM_INTR_CAUSE_REG		(0x50)
++
++/* BM interrupt Mask Register */
++#define MVNETA_BM_INTR_MASK_REG			(0x54)
++
++/* Other definitions */
++#define MVNETA_BM_SHORT_PKT_SIZE		256
++#define MVNETA_BM_POOLS_NUM			4
++#define MVNETA_BM_POOL_CAP_MIN			128
++#define MVNETA_BM_POOL_CAP_DEF			2048
++#define MVNETA_BM_POOL_CAP_MAX			\
++		(16 * 1024 - MVNETA_BM_POOL_CAP_ALIGN)
++#define MVNETA_BM_POOL_CAP_ALIGN		32
++#define MVNETA_BM_POOL_PTR_ALIGN		32
++
++#define MVNETA_BM_POOL_ACCESS_OFFS		8
++
++#define MVNETA_BM_BPPI_SIZE			0x100000
++
++#define MVNETA_RX_BUF_SIZE(pkt_size)   ((pkt_size) + NET_SKB_PAD)
++
++enum mvneta_bm_type {
++	MVNETA_BM_FREE,
++	MVNETA_BM_LONG,
++	MVNETA_BM_SHORT
++};
++
++struct mvneta_bm {
++	void __iomem *reg_base;
++	struct clk *clk;
++	struct platform_device *pdev;
++
++	struct gen_pool *bppi_pool;
++	/* BPPI virtual base address */
++	void __iomem *bppi_virt_addr;
++	/* BPPI physical base address */
++	dma_addr_t bppi_phys_addr;
++
++	/* BM pools */
++	struct mvneta_bm_pool *bm_pools;
++};
++
++struct mvneta_bm_pool {
++	/* Pool number in the range 0-3 */
++	u8 id;
++	enum mvneta_bm_type type;
++
++	/* Buffer Pointers Pool External (BPPE) size in number of bytes */
++	int size;
++	/* Number of buffers used by this pool */
++	int buf_num;
++	/* Pool buffer size */
++	int buf_size;
++	/* Packet size */
++	int pkt_size;
++	/* Single frag size */
++	u32 frag_size;
++
++	/* BPPE virtual base address */
++	u32 *virt_addr;
++	/* BPPE physical base address */
++	dma_addr_t phys_addr;
++
++	/* Ports using BM pool */
++	u8 port_map;
++
++	struct mvneta_bm *priv;
++};
++
++/* Declarations and definitions */
++void *mvneta_frag_alloc(unsigned int frag_size);
++void mvneta_frag_free(unsigned int frag_size, void *data);
++
++#if defined(CONFIG_MVNETA_BM) || defined(CONFIG_MVNETA_BM_MODULE)
++void mvneta_bm_pool_destroy(struct mvneta_bm *priv,
++			    struct mvneta_bm_pool *bm_pool, u8 port_map);
++void mvneta_bm_bufs_free(struct mvneta_bm *priv, struct mvneta_bm_pool *bm_pool,
++			 u8 port_map);
++int mvneta_bm_bufs_add(struct mvneta_bm *priv, struct mvneta_bm_pool *bm_pool,
++		       int buf_num);
++int mvneta_bm_pool_refill(struct mvneta_bm *priv,
++			  struct mvneta_bm_pool *bm_pool);
++struct mvneta_bm_pool *mvneta_bm_pool_use(struct mvneta_bm *priv, u8 pool_id,
++					  enum mvneta_bm_type type, u8 port_id,
++					  int pkt_size);
++
++static inline void mvneta_bm_pool_put_bp(struct mvneta_bm *priv,
++					 struct mvneta_bm_pool *bm_pool,
++					 dma_addr_t buf_phys_addr)
++{
++	writel_relaxed(buf_phys_addr, priv->bppi_virt_addr +
++		       (bm_pool->id << MVNETA_BM_POOL_ACCESS_OFFS));
++}
++
++static inline u32 mvneta_bm_pool_get_bp(struct mvneta_bm *priv,
++					struct mvneta_bm_pool *bm_pool)
++{
++	return readl_relaxed(priv->bppi_virt_addr +
++			     (bm_pool->id << MVNETA_BM_POOL_ACCESS_OFFS));
++}
++#else
++void mvneta_bm_pool_destroy(struct mvneta_bm *priv,
++			    struct mvneta_bm_pool *bm_pool, u8 port_map) {}
++void mvneta_bm_bufs_free(struct mvneta_bm *priv, struct mvneta_bm_pool *bm_pool,
++			 u8 port_map) {}
++int mvneta_bm_bufs_add(struct mvneta_bm *priv, struct mvneta_bm_pool *bm_pool,
++		       int buf_num) { return 0; }
++int mvneta_bm_pool_refill(struct mvneta_bm *priv,
++			  struct mvneta_bm_pool *bm_pool) {return 0; }
++struct mvneta_bm_pool *mvneta_bm_pool_use(struct mvneta_bm *priv, u8 pool_id,
++					  enum mvneta_bm_type type, u8 port_id,
++					  int pkt_size) { return NULL; }
++
++static inline void mvneta_bm_pool_put_bp(struct mvneta_bm *priv,
++					 struct mvneta_bm_pool *bm_pool,
++					 dma_addr_t buf_phys_addr) {}
++
++static inline u32 mvneta_bm_pool_get_bp(struct mvneta_bm *priv,
++					struct mvneta_bm_pool *bm_pool)
++{ return 0; }
++#endif /* CONFIG_MVNETA_BM */
++#endif
diff --git a/target/linux/mvebu/patches-4.4/046-net-mvneta-Use-the-new-hwbm-framework.patch b/target/linux/mvebu/patches-4.4/046-net-mvneta-Use-the-new-hwbm-framework.patch
new file mode 100644
index 0000000000..210504c39e
--- /dev/null
+++ b/target/linux/mvebu/patches-4.4/046-net-mvneta-Use-the-new-hwbm-framework.patch
@@ -0,0 +1,359 @@
+From: Gregory CLEMENT <gregory.clement@free-electrons.com>
+Date: Mon, 14 Mar 2016 09:39:05 +0100
+Subject: [PATCH] net: mvneta: Use the new hwbm framework
+
+Now that the hardware buffer management framework had been introduced,
+let's use it.
+
+Tested-by: Sebastian Careba <nitroshift@yahoo.com>
+Signed-off-by: Gregory CLEMENT <gregory.clement@free-electrons.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+---
+
+--- a/drivers/net/ethernet/marvell/Kconfig
++++ b/drivers/net/ethernet/marvell/Kconfig
+@@ -43,6 +43,7 @@ config MVMDIO
+ config MVNETA_BM
+ 	tristate "Marvell Armada 38x/XP network interface BM support"
+ 	depends on MVNETA
++	select HWBM
+ 	---help---
+ 	  This driver supports auxiliary block of the network
+ 	  interface units in the Marvell ARMADA XP and ARMADA 38x SoC
+--- a/drivers/net/ethernet/marvell/mvneta.c
++++ b/drivers/net/ethernet/marvell/mvneta.c
+@@ -30,6 +30,7 @@
+ #include <linux/phy.h>
+ #include <linux/platform_device.h>
+ #include <linux/skbuff.h>
++#include <net/hwbm.h>
+ #include "mvneta_bm.h"
+ #include <net/ip.h>
+ #include <net/ipv6.h>
+@@ -1024,11 +1025,12 @@ static int mvneta_bm_port_init(struct pl
+ static void mvneta_bm_update_mtu(struct mvneta_port *pp, int mtu)
+ {
+ 	struct mvneta_bm_pool *bm_pool = pp->pool_long;
++	struct hwbm_pool *hwbm_pool = &bm_pool->hwbm_pool;
+ 	int num;
+ 
+ 	/* Release all buffers from long pool */
+ 	mvneta_bm_bufs_free(pp->bm_priv, bm_pool, 1 << pp->id);
+-	if (bm_pool->buf_num) {
++	if (hwbm_pool->buf_num) {
+ 		WARN(1, "cannot free all buffers in pool %d\n",
+ 		     bm_pool->id);
+ 		goto bm_mtu_err;
+@@ -1036,14 +1038,14 @@ static void mvneta_bm_update_mtu(struct
+ 
+ 	bm_pool->pkt_size = MVNETA_RX_PKT_SIZE(mtu);
+ 	bm_pool->buf_size = MVNETA_RX_BUF_SIZE(bm_pool->pkt_size);
+-	bm_pool->frag_size = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) +
+-			  SKB_DATA_ALIGN(MVNETA_RX_BUF_SIZE(bm_pool->pkt_size));
++	hwbm_pool->frag_size = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) +
++			SKB_DATA_ALIGN(MVNETA_RX_BUF_SIZE(bm_pool->pkt_size));
+ 
+ 	/* Fill entire long pool */
+-	num = mvneta_bm_bufs_add(pp->bm_priv, bm_pool, bm_pool->size);
+-	if (num != bm_pool->size) {
++	num = hwbm_pool_add(hwbm_pool, hwbm_pool->size, GFP_ATOMIC);
++	if (num != hwbm_pool->size) {
+ 		WARN(1, "pool %d: %d of %d allocated\n",
+-		     bm_pool->id, num, bm_pool->size);
++		     bm_pool->id, num, hwbm_pool->size);
+ 		goto bm_mtu_err;
+ 	}
+ 	mvneta_bm_pool_bufsize_set(pp, bm_pool->buf_size, bm_pool->id);
+@@ -2064,14 +2066,14 @@ err_drop_frame:
+ 		}
+ 
+ 		/* Refill processing */
+-		err = mvneta_bm_pool_refill(pp->bm_priv, bm_pool);
++		err = hwbm_pool_refill(&bm_pool->hwbm_pool, GFP_ATOMIC);
+ 		if (err) {
+ 			netdev_err(dev, "Linux processing - Can't refill\n");
+ 			rxq->missed++;
+ 			goto err_drop_frame_ret_pool;
+ 		}
+ 
+-		frag_size = bm_pool->frag_size;
++		frag_size = bm_pool->hwbm_pool.frag_size;
+ 
+ 		skb = build_skb(data, frag_size > PAGE_SIZE ? 0 : frag_size);
+ 
+--- a/drivers/net/ethernet/marvell/mvneta_bm.c
++++ b/drivers/net/ethernet/marvell/mvneta_bm.c
+@@ -10,16 +10,17 @@
+  * warranty of any kind, whether express or implied.
+  */
+ 
+-#include <linux/kernel.h>
++#include <linux/clk.h>
+ #include <linux/genalloc.h>
+-#include <linux/platform_device.h>
+-#include <linux/netdevice.h>
+-#include <linux/skbuff.h>
++#include <linux/io.h>
++#include <linux/kernel.h>
+ #include <linux/mbus.h>
+ #include <linux/module.h>
+-#include <linux/io.h>
++#include <linux/netdevice.h>
+ #include <linux/of.h>
+-#include <linux/clk.h>
++#include <linux/platform_device.h>
++#include <linux/skbuff.h>
++#include <net/hwbm.h>
+ #include "mvneta_bm.h"
+ 
+ #define MVNETA_BM_DRIVER_NAME "mvneta_bm"
+@@ -88,17 +89,13 @@ static void mvneta_bm_pool_target_set(st
+ 	mvneta_bm_write(priv, MVNETA_BM_XBAR_POOL_REG(pool_id), val);
+ }
+ 
+-/* Allocate skb for BM pool */
+-void *mvneta_buf_alloc(struct mvneta_bm *priv, struct mvneta_bm_pool *bm_pool,
+-		       dma_addr_t *buf_phys_addr)
++int mvneta_bm_construct(struct hwbm_pool *hwbm_pool, void *buf)
+ {
+-	void *buf;
++	struct mvneta_bm_pool *bm_pool =
++		(struct mvneta_bm_pool *)hwbm_pool->priv;
++	struct mvneta_bm *priv = bm_pool->priv;
+ 	dma_addr_t phys_addr;
+ 
+-	buf = mvneta_frag_alloc(bm_pool->frag_size);
+-	if (!buf)
+-		return NULL;
+-
+ 	/* In order to update buf_cookie field of RX descriptor properly,
+ 	 * BM hardware expects buf virtual address to be placed in the
+ 	 * first four bytes of mapped buffer.
+@@ -106,75 +103,13 @@ void *mvneta_buf_alloc(struct mvneta_bm
+ 	*(u32 *)buf = (u32)buf;
+ 	phys_addr = dma_map_single(&priv->pdev->dev, buf, bm_pool->buf_size,
+ 				   DMA_FROM_DEVICE);
+-	if (unlikely(dma_mapping_error(&priv->pdev->dev, phys_addr))) {
+-		mvneta_frag_free(bm_pool->frag_size, buf);
+-		return NULL;
+-	}
+-	*buf_phys_addr = phys_addr;
+-
+-	return buf;
+-}
+-
+-/* Refill processing for HW buffer management */
+-int mvneta_bm_pool_refill(struct mvneta_bm *priv,
+-			  struct mvneta_bm_pool *bm_pool)
+-{
+-	dma_addr_t buf_phys_addr;
+-	void *buf;
+-
+-	buf = mvneta_buf_alloc(priv, bm_pool, &buf_phys_addr);
+-	if (!buf)
++	if (unlikely(dma_mapping_error(&priv->pdev->dev, phys_addr)))
+ 		return -ENOMEM;
+ 
+-	mvneta_bm_pool_put_bp(priv, bm_pool, buf_phys_addr);
+-
++	mvneta_bm_pool_put_bp(priv, bm_pool, phys_addr);
+ 	return 0;
+ }
+-EXPORT_SYMBOL_GPL(mvneta_bm_pool_refill);
+-
+-/* Allocate buffers for the pool */
+-int mvneta_bm_bufs_add(struct mvneta_bm *priv, struct mvneta_bm_pool *bm_pool,
+-		       int buf_num)
+-{
+-	int err, i;
+-
+-	if (bm_pool->buf_num == bm_pool->size) {
+-		dev_dbg(&priv->pdev->dev, "pool %d already filled\n",
+-			bm_pool->id);
+-		return bm_pool->buf_num;
+-	}
+-
+-	if (buf_num < 0 ||
+-	    (buf_num + bm_pool->buf_num > bm_pool->size)) {
+-		dev_err(&priv->pdev->dev,
+-			"cannot allocate %d buffers for pool %d\n",
+-			buf_num, bm_pool->id);
+-		return 0;
+-	}
+-
+-	for (i = 0; i < buf_num; i++) {
+-		err = mvneta_bm_pool_refill(priv, bm_pool);
+-		if (err < 0)
+-			break;
+-	}
+-
+-	/* Update BM driver with number of buffers added to pool */
+-	bm_pool->buf_num += i;
+-
+-	dev_dbg(&priv->pdev->dev,
+-		"%s pool %d: pkt_size=%4d, buf_size=%4d, frag_size=%4d\n",
+-		bm_pool->type == MVNETA_BM_SHORT ? "short" : "long",
+-		bm_pool->id, bm_pool->pkt_size, bm_pool->buf_size,
+-		bm_pool->frag_size);
+-
+-	dev_dbg(&priv->pdev->dev,
+-		"%s pool %d: %d of %d buffers added\n",
+-		bm_pool->type == MVNETA_BM_SHORT ? "short" : "long",
+-		bm_pool->id, i, buf_num);
+-
+-	return i;
+-}
+-EXPORT_SYMBOL_GPL(mvneta_bm_bufs_add);
++EXPORT_SYMBOL_GPL(mvneta_bm_construct);
+ 
+ /* Create pool */
+ static int mvneta_bm_pool_create(struct mvneta_bm *priv,
+@@ -183,8 +118,7 @@ static int mvneta_bm_pool_create(struct
+ 	struct platform_device *pdev = priv->pdev;
+ 	u8 target_id, attr;
+ 	int size_bytes, err;
+-
+-	size_bytes = sizeof(u32) * bm_pool->size;
++	size_bytes = sizeof(u32) * bm_pool->hwbm_pool.size;
+ 	bm_pool->virt_addr = dma_alloc_coherent(&pdev->dev, size_bytes,
+ 						&bm_pool->phys_addr,
+ 						GFP_KERNEL);
+@@ -245,11 +179,16 @@ struct mvneta_bm_pool *mvneta_bm_pool_us
+ 
+ 	/* Allocate buffers in case BM pool hasn't been used yet */
+ 	if (new_pool->type == MVNETA_BM_FREE) {
++		struct hwbm_pool *hwbm_pool = &new_pool->hwbm_pool;
++
++		new_pool->priv = priv;
+ 		new_pool->type = type;
+ 		new_pool->buf_size = MVNETA_RX_BUF_SIZE(new_pool->pkt_size);
+-		new_pool->frag_size =
++		hwbm_pool->frag_size =
+ 			SKB_DATA_ALIGN(MVNETA_RX_BUF_SIZE(new_pool->pkt_size)) +
+ 			SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
++		hwbm_pool->construct = mvneta_bm_construct;
++		hwbm_pool->priv = new_pool;
+ 
+ 		/* Create new pool */
+ 		err = mvneta_bm_pool_create(priv, new_pool);
+@@ -260,10 +199,10 @@ struct mvneta_bm_pool *mvneta_bm_pool_us
+ 		}
+ 
+ 		/* Allocate buffers for this pool */
+-		num = mvneta_bm_bufs_add(priv, new_pool, new_pool->size);
+-		if (num != new_pool->size) {
++		num = hwbm_pool_add(hwbm_pool, hwbm_pool->size, GFP_ATOMIC);
++		if (num != hwbm_pool->size) {
+ 			WARN(1, "pool %d: %d of %d allocated\n",
+-			     new_pool->id, num, new_pool->size);
++			     new_pool->id, num, hwbm_pool->size);
+ 			return NULL;
+ 		}
+ 	}
+@@ -284,7 +223,7 @@ void mvneta_bm_bufs_free(struct mvneta_b
+ 
+ 	mvneta_bm_config_set(priv, MVNETA_BM_EMPTY_LIMIT_MASK);
+ 
+-	for (i = 0; i < bm_pool->buf_num; i++) {
++	for (i = 0; i < bm_pool->hwbm_pool.buf_num; i++) {
+ 		dma_addr_t buf_phys_addr;
+ 		u32 *vaddr;
+ 
+@@ -303,13 +242,13 @@ void mvneta_bm_bufs_free(struct mvneta_b
+ 
+ 		dma_unmap_single(&priv->pdev->dev, buf_phys_addr,
+ 				 bm_pool->buf_size, DMA_FROM_DEVICE);
+-		mvneta_frag_free(bm_pool->frag_size, vaddr);
++		hwbm_buf_free(&bm_pool->hwbm_pool, vaddr);
+ 	}
+ 
+ 	mvneta_bm_config_clear(priv, MVNETA_BM_EMPTY_LIMIT_MASK);
+ 
+ 	/* Update BM driver with number of buffers removed from pool */
+-	bm_pool->buf_num -= i;
++	bm_pool->hwbm_pool.buf_num -= i;
+ }
+ EXPORT_SYMBOL_GPL(mvneta_bm_bufs_free);
+ 
+@@ -317,6 +256,7 @@ EXPORT_SYMBOL_GPL(mvneta_bm_bufs_free);
+ void mvneta_bm_pool_destroy(struct mvneta_bm *priv,
+ 			    struct mvneta_bm_pool *bm_pool, u8 port_map)
+ {
++	struct hwbm_pool *hwbm_pool = &bm_pool->hwbm_pool;
+ 	bm_pool->port_map &= ~port_map;
+ 	if (bm_pool->port_map)
+ 		return;
+@@ -324,11 +264,12 @@ void mvneta_bm_pool_destroy(struct mvnet
+ 	bm_pool->type = MVNETA_BM_FREE;
+ 
+ 	mvneta_bm_bufs_free(priv, bm_pool, port_map);
+-	if (bm_pool->buf_num)
++	if (hwbm_pool->buf_num)
+ 		WARN(1, "cannot free all buffers in pool %d\n", bm_pool->id);
+ 
+ 	if (bm_pool->virt_addr) {
+-		dma_free_coherent(&priv->pdev->dev, sizeof(u32) * bm_pool->size,
++		dma_free_coherent(&priv->pdev->dev,
++				  sizeof(u32) * hwbm_pool->size,
+ 				  bm_pool->virt_addr, bm_pool->phys_addr);
+ 		bm_pool->virt_addr = NULL;
+ 	}
+@@ -381,10 +322,10 @@ static void mvneta_bm_pools_init(struct
+ 				 MVNETA_BM_POOL_CAP_ALIGN));
+ 			size = ALIGN(size, MVNETA_BM_POOL_CAP_ALIGN);
+ 		}
+-		bm_pool->size = size;
++		bm_pool->hwbm_pool.size = size;
+ 
+ 		mvneta_bm_write(priv, MVNETA_BM_POOL_SIZE_REG(i),
+-				bm_pool->size);
++				bm_pool->hwbm_pool.size);
+ 
+ 		/* Obtain custom pkt_size from DT */
+ 		sprintf(prop, "pool%d,pkt-size", i);
+--- a/drivers/net/ethernet/marvell/mvneta_bm.h
++++ b/drivers/net/ethernet/marvell/mvneta_bm.h
+@@ -108,20 +108,15 @@ struct mvneta_bm {
+ };
+ 
+ struct mvneta_bm_pool {
++	struct hwbm_pool hwbm_pool;
+ 	/* Pool number in the range 0-3 */
+ 	u8 id;
+ 	enum mvneta_bm_type type;
+ 
+-	/* Buffer Pointers Pool External (BPPE) size in number of bytes */
+-	int size;
+-	/* Number of buffers used by this pool */
+-	int buf_num;
+-	/* Pool buffer size */
+-	int buf_size;
+ 	/* Packet size */
+ 	int pkt_size;
+-	/* Single frag size */
+-	u32 frag_size;
++	/* Size of the buffer acces through DMA*/
++	u32 buf_size;
+ 
+ 	/* BPPE virtual base address */
+ 	u32 *virt_addr;
+@@ -143,8 +138,7 @@ void mvneta_bm_pool_destroy(struct mvnet
+ 			    struct mvneta_bm_pool *bm_pool, u8 port_map);
+ void mvneta_bm_bufs_free(struct mvneta_bm *priv, struct mvneta_bm_pool *bm_pool,
+ 			 u8 port_map);
+-int mvneta_bm_bufs_add(struct mvneta_bm *priv, struct mvneta_bm_pool *bm_pool,
+-		       int buf_num);
++int mvneta_bm_construct(struct hwbm_pool *hwbm_pool, void *buf);
+ int mvneta_bm_pool_refill(struct mvneta_bm *priv,
+ 			  struct mvneta_bm_pool *bm_pool);
+ struct mvneta_bm_pool *mvneta_bm_pool_use(struct mvneta_bm *priv, u8 pool_id,
+@@ -170,8 +164,7 @@ void mvneta_bm_pool_destroy(struct mvnet
+ 			    struct mvneta_bm_pool *bm_pool, u8 port_map) {}
+ void mvneta_bm_bufs_free(struct mvneta_bm *priv, struct mvneta_bm_pool *bm_pool,
+ 			 u8 port_map) {}
+-int mvneta_bm_bufs_add(struct mvneta_bm *priv, struct mvneta_bm_pool *bm_pool,
+-		       int buf_num) { return 0; }
++int mvneta_bm_construct(struct hwbm_pool *hwbm_pool, void *buf) { return 0; }
+ int mvneta_bm_pool_refill(struct mvneta_bm *priv,
+ 			  struct mvneta_bm_pool *bm_pool) {return 0; }
+ struct mvneta_bm_pool *mvneta_bm_pool_use(struct mvneta_bm *priv, u8 pool_id,
diff --git a/target/linux/mvebu/patches-4.4/047-net-mvneta-Fix-spinlock-usage.patch b/target/linux/mvebu/patches-4.4/047-net-mvneta-Fix-spinlock-usage.patch
new file mode 100644
index 0000000000..a8e78df2e9
--- /dev/null
+++ b/target/linux/mvebu/patches-4.4/047-net-mvneta-Fix-spinlock-usage.patch
@@ -0,0 +1,52 @@
+From: Gregory CLEMENT <gregory.clement@free-electrons.com>
+Date: Sat, 12 Mar 2016 18:44:17 +0100
+Subject: [PATCH] net: mvneta: Fix spinlock usage
+
+In the previous patch, the spinlock was not initialized. While it didn't
+cause any trouble yet it could be a problem to use it uninitialized.
+
+The most annoying part was the critical section protected by the spinlock
+in mvneta_stop(). Some of the functions could sleep as pointed when
+activated CONFIG_DEBUG_ATOMIC_SLEEP. Actually, in mvneta_stop() we only
+need to protect the is_stopped flagged, indeed the code of the notifier
+for CPU online is protected by the same spinlock, so when we get the
+lock, the notifer work is done.
+
+Reported-by: Patrick Uiterwijk <patrick@puiterwijk.org>
+Signed-off-by: Gregory CLEMENT <gregory.clement@free-electrons.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+---
+
+--- a/drivers/net/ethernet/marvell/mvneta.c
++++ b/drivers/net/ethernet/marvell/mvneta.c
+@@ -3484,17 +3484,17 @@ static int mvneta_stop(struct net_device
+ 	struct mvneta_port *pp = netdev_priv(dev);
+ 
+ 	/* Inform that we are stopping so we don't want to setup the
+-	 * driver for new CPUs in the notifiers
++	 * driver for new CPUs in the notifiers. The code of the
++	 * notifier for CPU online is protected by the same spinlock,
++	 * so when we get the lock, the notifer work is done.
+ 	 */
+ 	spin_lock(&pp->lock);
+ 	pp->is_stopped = true;
++	spin_unlock(&pp->lock);
++
+ 	mvneta_stop_dev(pp);
+ 	mvneta_mdio_remove(pp);
+ 	unregister_cpu_notifier(&pp->cpu_notifier);
+-	/* Now that the notifier are unregistered, we can release le
+-	 * lock
+-	 */
+-	spin_unlock(&pp->lock);
+ 	on_each_cpu(mvneta_percpu_disable, pp, true);
+ 	free_percpu_irq(dev->irq, pp->ports);
+ 	mvneta_cleanup_rxqs(pp);
+@@ -4027,6 +4027,7 @@ static int mvneta_probe(struct platform_
+ 	dev->ethtool_ops = &mvneta_eth_tool_ops;
+ 
+ 	pp = netdev_priv(dev);
++	spin_lock_init(&pp->lock);
+ 	pp->phy_node = phy_node;
+ 	pp->phy_interface = phy_mode;
+ 
diff --git a/target/linux/mvebu/patches-4.4/048-net-mvneta-fix-error-messages-in-mvneta_port_down-fu.patch b/target/linux/mvebu/patches-4.4/048-net-mvneta-fix-error-messages-in-mvneta_port_down-fu.patch
new file mode 100644
index 0000000000..fd1f1ae203
--- /dev/null
+++ b/target/linux/mvebu/patches-4.4/048-net-mvneta-fix-error-messages-in-mvneta_port_down-fu.patch
@@ -0,0 +1,33 @@
+From: Dmitri Epshtein <dima@marvell.com>
+Date: Sat, 12 Mar 2016 18:44:19 +0100
+Subject: [PATCH] net: mvneta: fix error messages in mvneta_port_down function
+
+This commit corrects error printing when shutting down the port.
+
+[gregory.clement@free-electrons.com: split initial commit in two
+individual changes]
+Signed-off-by: Dmitri Epshtein <dima@marvell.com>
+Signed-off-by: Gregory CLEMENT <gregory.clement@free-electrons.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+---
+
+--- a/drivers/net/ethernet/marvell/mvneta.c
++++ b/drivers/net/ethernet/marvell/mvneta.c
+@@ -1105,7 +1105,7 @@ static void mvneta_port_down(struct mvne
+ 	do {
+ 		if (count++ >= MVNETA_RX_DISABLE_TIMEOUT_MSEC) {
+ 			netdev_warn(pp->dev,
+-				    "TIMEOUT for RX stopped ! rx_queue_cmd: 0x08%x\n",
++				    "TIMEOUT for RX stopped ! rx_queue_cmd: 0x%08x\n",
+ 				    val);
+ 			break;
+ 		}
+@@ -1144,7 +1144,7 @@ static void mvneta_port_down(struct mvne
+ 	do {
+ 		if (count++ >= MVNETA_TX_FIFO_EMPTY_TIMEOUT) {
+ 			netdev_warn(pp->dev,
+-				    "TX FIFO empty timeout status=0x08%x\n",
++				    "TX FIFO empty timeout status=0x%08x\n",
+ 				    val);
+ 			break;
+ 		}
diff --git a/target/linux/mvebu/patches-4.4/049-net-mvneta-replace-MVNETA_CPU_D_CACHE_LINE_SIZE-with.patch b/target/linux/mvebu/patches-4.4/049-net-mvneta-replace-MVNETA_CPU_D_CACHE_LINE_SIZE-with.patch
new file mode 100644
index 0000000000..c12d98a4e2
--- /dev/null
+++ b/target/linux/mvebu/patches-4.4/049-net-mvneta-replace-MVNETA_CPU_D_CACHE_LINE_SIZE-with.patch
@@ -0,0 +1,56 @@
+From: Jisheng Zhang <jszhang@marvell.com>
+Date: Wed, 30 Mar 2016 19:55:21 +0800
+Subject: [PATCH] net: mvneta: replace MVNETA_CPU_D_CACHE_LINE_SIZE with
+ L1_CACHE_BYTES
+
+The mvneta is also used in some Marvell berlin family SoCs which may
+have 64bytes cacheline size. Replace the MVNETA_CPU_D_CACHE_LINE_SIZE
+usage with L1_CACHE_BYTES.
+
+And since dma_alloc_coherent() is always cacheline size aligned, so
+remove the align checks.
+
+Signed-off-by: Jisheng Zhang <jszhang@marvell.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+---
+
+--- a/drivers/net/ethernet/marvell/mvneta.c
++++ b/drivers/net/ethernet/marvell/mvneta.c
+@@ -260,7 +260,6 @@
+ 
+ #define MVNETA_VLAN_TAG_LEN             4
+ 
+-#define MVNETA_CPU_D_CACHE_LINE_SIZE    32
+ #define MVNETA_TX_CSUM_DEF_SIZE		1600
+ #define MVNETA_TX_CSUM_MAX_SIZE		9800
+ #define MVNETA_ACC_MODE_EXT1		1
+@@ -300,7 +299,7 @@
+ #define MVNETA_RX_PKT_SIZE(mtu) \
+ 	ALIGN((mtu) + MVNETA_MH_SIZE + MVNETA_VLAN_TAG_LEN + \
+ 	      ETH_HLEN + ETH_FCS_LEN,			     \
+-	      MVNETA_CPU_D_CACHE_LINE_SIZE)
++	      L1_CACHE_BYTES)
+ 
+ #define IS_TSO_HEADER(txq, addr) \
+ 	((addr >= txq->tso_hdrs_phys) && \
+@@ -2762,9 +2761,6 @@ static int mvneta_rxq_init(struct mvneta
+ 	if (rxq->descs == NULL)
+ 		return -ENOMEM;
+ 
+-	BUG_ON(rxq->descs !=
+-	       PTR_ALIGN(rxq->descs, MVNETA_CPU_D_CACHE_LINE_SIZE));
+-
+ 	rxq->last_desc = rxq->size - 1;
+ 
+ 	/* Set Rx descriptors queue starting address */
+@@ -2835,10 +2831,6 @@ static int mvneta_txq_init(struct mvneta
+ 	if (txq->descs == NULL)
+ 		return -ENOMEM;
+ 
+-	/* Make sure descriptor address is cache line size aligned  */
+-	BUG_ON(txq->descs !=
+-	       PTR_ALIGN(txq->descs, MVNETA_CPU_D_CACHE_LINE_SIZE));
+-
+ 	txq->last_desc = txq->size - 1;
+ 
+ 	/* Set maximum bandwidth for enabled TXQs */
diff --git a/target/linux/mvebu/patches-4.4/050-net-mvneta-fix-changing-MTU-when-using-per-cpu-proce.patch b/target/linux/mvebu/patches-4.4/050-net-mvneta-fix-changing-MTU-when-using-per-cpu-proce.patch
new file mode 100644
index 0000000000..07862008a7
--- /dev/null
+++ b/target/linux/mvebu/patches-4.4/050-net-mvneta-fix-changing-MTU-when-using-per-cpu-proce.patch
@@ -0,0 +1,75 @@
+From: Marcin Wojtas <mw@semihalf.com>
+Date: Fri, 1 Apr 2016 15:21:18 +0200
+Subject: [PATCH] net: mvneta: fix changing MTU when using per-cpu processing
+
+After enabling per-cpu processing it appeared that under heavy load
+changing MTU can result in blocking all port's interrupts and
+transmitting data is not possible after the change.
+
+This commit fixes above issue by disabling percpu interrupts for the
+time, when TXQs and RXQs are reconfigured.
+
+Signed-off-by: Marcin Wojtas <mw@semihalf.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+---
+
+--- a/drivers/net/ethernet/marvell/mvneta.c
++++ b/drivers/net/ethernet/marvell/mvneta.c
+@@ -3040,6 +3040,20 @@ static int mvneta_check_mtu_valid(struct
+ 	return mtu;
+ }
+ 
++static void mvneta_percpu_enable(void *arg)
++{
++	struct mvneta_port *pp = arg;
++
++	enable_percpu_irq(pp->dev->irq, IRQ_TYPE_NONE);
++}
++
++static void mvneta_percpu_disable(void *arg)
++{
++	struct mvneta_port *pp = arg;
++
++	disable_percpu_irq(pp->dev->irq);
++}
++
+ /* Change the device mtu */
+ static int mvneta_change_mtu(struct net_device *dev, int mtu)
+ {
+@@ -3064,6 +3078,7 @@ static int mvneta_change_mtu(struct net_
+ 	 * reallocation of the queues
+ 	 */
+ 	mvneta_stop_dev(pp);
++	on_each_cpu(mvneta_percpu_disable, pp, true);
+ 
+ 	mvneta_cleanup_txqs(pp);
+ 	mvneta_cleanup_rxqs(pp);
+@@ -3087,6 +3102,7 @@ static int mvneta_change_mtu(struct net_
+ 		return ret;
+ 	}
+ 
++	on_each_cpu(mvneta_percpu_enable, pp, true);
+ 	mvneta_start_dev(pp);
+ 	mvneta_port_up(pp);
+ 
+@@ -3240,20 +3256,6 @@ static void mvneta_mdio_remove(struct mv
+ 	pp->phy_dev = NULL;
+ }
+ 
+-static void mvneta_percpu_enable(void *arg)
+-{
+-	struct mvneta_port *pp = arg;
+-
+-	enable_percpu_irq(pp->dev->irq, IRQ_TYPE_NONE);
+-}
+-
+-static void mvneta_percpu_disable(void *arg)
+-{
+-	struct mvneta_port *pp = arg;
+-
+-	disable_percpu_irq(pp->dev->irq);
+-}
+-
+ /* Electing a CPU must be done in an atomic way: it should be done
+  * after or before the removal/insertion of a CPU and this function is
+  * not reentrant.
diff --git a/target/linux/mvebu/patches-4.4/051-ARM-dts-armada-38x-add-buffer-manager-nodes.patch b/target/linux/mvebu/patches-4.4/051-ARM-dts-armada-38x-add-buffer-manager-nodes.patch
new file mode 100644
index 0000000000..b56de94e3d
--- /dev/null
+++ b/target/linux/mvebu/patches-4.4/051-ARM-dts-armada-38x-add-buffer-manager-nodes.patch
@@ -0,0 +1,53 @@
+From: Marcin Wojtas <mw@semihalf.com>
+Date: Mon, 14 Mar 2016 09:38:57 +0100
+Subject: [PATCH] ARM: dts: armada-38x: add buffer manager nodes
+
+Armada 38x network controller supports hardware buffer management (BM).
+Since it is now enabled in mvneta driver, appropriate nodes can be added
+to armada-38x.dtsi - for the actual common BM unit (bm@c8000) and its
+internal SRAM (bm-bppi), which is used for indirect access to buffer
+pointer ring residing in DRAM.
+
+Pools - ports mapping, bm-bppi entry in 'soc' node's ranges and optional
+parameters are supposed to be set in board files.
+
+Signed-off-by: Marcin Wojtas <mw@semihalf.com>
+Signed-off-by: Gregory CLEMENT <gregory.clement@free-electrons.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+---
+
+--- a/arch/arm/boot/dts/armada-38x.dtsi
++++ b/arch/arm/boot/dts/armada-38x.dtsi
+@@ -540,6 +540,14 @@
+ 				status = "disabled";
+ 			};
+ 
++			bm: bm@c8000 {
++				compatible = "marvell,armada-380-neta-bm";
++				reg = <0xc8000 0xac>;
++				clocks = <&gateclk 13>;
++				internal-mem = <&bm_bppi>;
++				status = "disabled";
++			};
++
+ 			sata@e0000 {
+ 				compatible = "marvell,armada-380-ahci";
+ 				reg = <0xe0000 0x2000>;
+@@ -618,6 +626,17 @@
+ 			#size-cells = <1>;
+ 			ranges = <0 MBUS_ID(0x09, 0x15) 0 0x800>;
+ 		};
++
++		bm_bppi: bm-bppi {
++			compatible = "mmio-sram";
++			reg = <MBUS_ID(0x0c, 0x04) 0 0x100000>;
++			ranges = <0 MBUS_ID(0x0c, 0x04) 0 0x100000>;
++			#address-cells = <1>;
++			#size-cells = <1>;
++			clocks = <&gateclk 13>;
++			no-memory-wc;
++			status = "disabled";
++		};
+ 	};
+ 
+ 	clocks {
diff --git a/target/linux/mvebu/patches-4.4/052-ARM-dts-armada-xp-add-buffer-manager-nodes.patch b/target/linux/mvebu/patches-4.4/052-ARM-dts-armada-xp-add-buffer-manager-nodes.patch
new file mode 100644
index 0000000000..65fafd7101
--- /dev/null
+++ b/target/linux/mvebu/patches-4.4/052-ARM-dts-armada-xp-add-buffer-manager-nodes.patch
@@ -0,0 +1,53 @@
+From: Marcin Wojtas <mw@semihalf.com>
+Date: Mon, 14 Mar 2016 09:38:59 +0100
+Subject: [PATCH] ARM: dts: armada-xp: add buffer manager nodes
+
+Armada XP network controller supports hardware buffer management (BM).
+Since it is now enabled in mvneta driver, appropriate nodes can be added
+to armada-xp.dtsi - for the actual common BM unit (bm@c0000) and its
+internal SRAM (bm-bppi), which is used for indirect access to buffer
+pointer ring residing in DRAM.
+
+Pools - ports mapping, bm-bppi entry in 'soc' node's ranges and optional
+parameters are supposed to be set in board files.
+
+Signed-off-by: Marcin Wojtas <mw@semihalf.com>
+Signed-off-by: Gregory CLEMENT <gregory.clement@free-electrons.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+---
+
+--- a/arch/arm/boot/dts/armada-xp.dtsi
++++ b/arch/arm/boot/dts/armada-xp.dtsi
+@@ -253,6 +253,14 @@
+ 				marvell,crypto-sram-size = <0x800>;
+ 			};
+ 
++			bm: bm@c0000 {
++				compatible = "marvell,armada-380-neta-bm";
++				reg = <0xc0000 0xac>;
++				clocks = <&gateclk 13>;
++				internal-mem = <&bm_bppi>;
++				status = "disabled";
++			};
++
+ 			xor@f0900 {
+ 				compatible = "marvell,orion-xor";
+ 				reg = <0xF0900 0x100
+@@ -291,6 +299,17 @@
+ 			#size-cells = <1>;
+ 			ranges = <0 MBUS_ID(0x09, 0x05) 0 0x800>;
+ 		};
++
++		bm_bppi: bm-bppi {
++			compatible = "mmio-sram";
++			reg = <MBUS_ID(0x0c, 0x04) 0 0x100000>;
++			ranges = <0 MBUS_ID(0x0c, 0x04) 0 0x100000>;
++			#address-cells = <1>;
++			#size-cells = <1>;
++			clocks = <&gateclk 13>;
++			no-memory-wc;
++			status = "disabled";
++		};
+ 	};
+ 
+ 	clocks {
diff --git a/target/linux/mvebu/patches-4.4/106-enable-bm-on-linksys-devices.patch b/target/linux/mvebu/patches-4.4/106-enable-bm-on-linksys-devices.patch
new file mode 100644
index 0000000000..e80bc39a69
--- /dev/null
+++ b/target/linux/mvebu/patches-4.4/106-enable-bm-on-linksys-devices.patch
@@ -0,0 +1,107 @@
+--- a/arch/arm/boot/dts/armada-385-linksys.dtsi
++++ b/arch/arm/boot/dts/armada-385-linksys.dtsi
+@@ -59,7 +59,8 @@
+ 		ranges = <MBUS_ID(0xf0, 0x01) 0 0xf1000000 0x100000
+ 			  MBUS_ID(0x01, 0x1d) 0 0xfff00000 0x100000
+ 			  MBUS_ID(0x09, 0x19) 0 0xf1100000 0x10000
+-			  MBUS_ID(0x09, 0x15) 0 0xf1110000 0x10000>;
++			  MBUS_ID(0x09, 0x15) 0 0xf1110000 0x10000
++			  MBUS_ID(0x0c, 0x04) 0 0xf1200000 0x100000>;
+ 
+ 		internal-regs {
+ 
+@@ -93,6 +94,9 @@
+ 			ethernet@70000 {
+ 				status = "okay";
+ 				phy-mode = "rgmii-id";
++				buffer-manager = <&bm>;
++				bm,pool-long = <0>;
++				bm,pool-short = <3>;
+ 				fixed-link {
+ 					speed = <1000>;
+ 					full-duplex;
+@@ -102,6 +106,9 @@
+ 			ethernet@34000 {
+ 				status = "okay";
+ 				phy-mode = "sgmii";
++				buffer-manager = <&bm>;
++				bm,pool-long = <2>;
++				bm,pool-short = <3>;
+ 				fixed-link {
+ 					speed = <1000>;
+ 					full-duplex;
+@@ -112,6 +119,10 @@
+ 				status = "okay";
+ 			};
+ 
++			bm@c8000 {
++				status = "okay";
++			};
++
+ 			sata@a8000 {
+ 				status = "okay";
+ 			};
+@@ -198,6 +209,10 @@
+ 			};
+ 		};
+ 
++		bm-bppi {
++			status = "okay";
++		};
++
+ 		pcie-controller {
+ 			status = "okay";
+ 
+--- a/arch/arm/boot/dts/armada-xp-linksys-mamba.dts
++++ b/arch/arm/boot/dts/armada-xp-linksys-mamba.dts
+@@ -71,7 +71,8 @@
+ 		ranges = <MBUS_ID(0xf0, 0x01) 0 0 0xf1000000 0x100000
+ 			  MBUS_ID(0x01, 0x1d) 0 0 0xfff00000 0x100000
+ 			  MBUS_ID(0x09, 0x09) 0 0 0xf1100000 0x10000
+-			  MBUS_ID(0x09, 0x05) 0 0 0xf1110000 0x10000>;
++			  MBUS_ID(0x09, 0x05) 0 0 0xf1110000 0x10000
++			  MBUS_ID(0x0c, 0x04) 0 0 0xf1200000 0x100000>;
+ 
+ 		pcie-controller {
+ 			status = "okay";
+@@ -205,6 +206,9 @@
+ 				pinctrl-names = "default";
+ 				status = "okay";
+ 				phy-mode = "rgmii-id";
++				buffer-manager = <&bm>;
++				bm,pool-long = <0>;
++				bm,pool-short = <3>;
+ 				fixed-link {
+ 					speed = <1000>;
+ 					full-duplex;
+@@ -216,12 +220,19 @@
+ 				pinctrl-names = "default";
+ 				status = "okay";
+ 				phy-mode = "rgmii-id";
++				buffer-manager = <&bm>;
++				bm,pool-long = <1>;
++				bm,pool-short = <3>;
+ 				fixed-link {
+ 					speed = <1000>;
+ 					full-duplex;
+ 				};
+ 			};
+ 
++			bm@c0000 {
++				status = "okay";
++			};
++
+ 			/* USB part of the eSATA/USB 2.0 port */
+ 			usb@50000 {
+ 				status = "okay";
+@@ -379,6 +390,10 @@
+ 				};
+ 			};
+ 		};
++
++		bm-bppi {
++			status = "okay";
++		};
+ 	};
+ 
+ 	gpio_keys {
diff --git a/target/linux/mvebu/patches-4.4/206-mvebu_wrt1900ac_use_pwm-fan_rather_than_gpio-fan.patch b/target/linux/mvebu/patches-4.4/206-mvebu_wrt1900ac_use_pwm-fan_rather_than_gpio-fan.patch
index 740fdcf8cf..bff58e9b75 100644
--- a/target/linux/mvebu/patches-4.4/206-mvebu_wrt1900ac_use_pwm-fan_rather_than_gpio-fan.patch
+++ b/target/linux/mvebu/patches-4.4/206-mvebu_wrt1900ac_use_pwm-fan_rather_than_gpio-fan.patch
@@ -9,7 +9,7 @@ Signed-off-by: Andrew Lunn <andrew@lunn.ch>
 
 --- a/arch/arm/boot/dts/armada-xp-linksys-mamba.dts
 +++ b/arch/arm/boot/dts/armada-xp-linksys-mamba.dts
-@@ -414,13 +414,11 @@
+@@ -428,13 +428,11 @@
  		};
  	};