#include <linux/etherdevice.h>
#include <linux/of_net.h>
#include <linux/pci.h>
+#include <linux/bpf.h>
/* Local includes */
#include "i40e.h"
}
}
+/**
+ * i40e_max_xdp_frame_size - returns the maximum allowed frame size for XDP
+ * @vsi: the vsi
+ **/
+static int i40e_max_xdp_frame_size(struct i40e_vsi *vsi)
+{
+ if (PAGE_SIZE >= 8192 || (vsi->back->flags & I40E_FLAG_LEGACY_RX))
+ return I40E_RXBUFFER_2048;
+ else
+ return I40E_RXBUFFER_3072;
+}
+
/**
* i40e_change_mtu - NDO callback to change the Maximum Transfer Unit
* @netdev: network interface device structure
struct i40e_vsi *vsi = np->vsi;
struct i40e_pf *pf = vsi->back;
+ if (i40e_enabled_xdp_vsi(vsi)) {
+ int frame_size = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
+
+ if (frame_size > i40e_max_xdp_frame_size(vsi))
+ return -EINVAL;
+ }
+
netdev_info(netdev, "changing MTU from %d to %d\n",
netdev->mtu, new_mtu);
netdev->mtu = new_mtu;
return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
}
+/**
+ * i40e_xdp_setup - add/remove an XDP program
+ * @vsi: VSI to changed
+ * @prog: XDP program
+ **/
+static int i40e_xdp_setup(struct i40e_vsi *vsi,
+ struct bpf_prog *prog)
+{
+ int frame_size = vsi->netdev->mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
+ struct i40e_pf *pf = vsi->back;
+ struct bpf_prog *old_prog;
+ bool need_reset;
+ int i;
+
+ /* Don't allow frames that span over multiple buffers */
+ if (frame_size > vsi->rx_buf_len)
+ return -EINVAL;
+
+ if (!i40e_enabled_xdp_vsi(vsi) && !prog)
+ return 0;
+
+ /* When turning XDP on->off/off->on we reset and rebuild the rings. */
+ need_reset = (i40e_enabled_xdp_vsi(vsi) != !!prog);
+
+ if (need_reset)
+ i40e_prep_for_reset(pf, true);
+
+ old_prog = xchg(&vsi->xdp_prog, prog);
+
+ if (need_reset)
+ i40e_reset_and_rebuild(pf, true, true);
+
+ for (i = 0; i < vsi->num_queue_pairs; i++)
+ WRITE_ONCE(vsi->rx_rings[i]->xdp_prog, vsi->xdp_prog);
+
+ if (old_prog)
+ bpf_prog_put(old_prog);
+
+ return 0;
+}
+
+/**
+ * i40e_xdp - implements ndo_xdp for i40e
+ * @dev: netdevice
+ * @xdp: XDP command
+ **/
+static int i40e_xdp(struct net_device *dev,
+ struct netdev_xdp *xdp)
+{
+ struct i40e_netdev_priv *np = netdev_priv(dev);
+ struct i40e_vsi *vsi = np->vsi;
+
+ if (vsi->type != I40E_VSI_MAIN)
+ return -EINVAL;
+
+ switch (xdp->command) {
+ case XDP_SETUP_PROG:
+ return i40e_xdp_setup(vsi, xdp->prog);
+ case XDP_QUERY_PROG:
+ xdp->prog_attached = i40e_enabled_xdp_vsi(vsi);
+ return 0;
+ default:
+ return -EINVAL;
+ }
+}
+
static const struct net_device_ops i40e_netdev_ops = {
.ndo_open = i40e_open,
.ndo_stop = i40e_close,
.ndo_features_check = i40e_features_check,
.ndo_bridge_getlink = i40e_ndo_bridge_getlink,
.ndo_bridge_setlink = i40e_ndo_bridge_setlink,
+ .ndo_xdp = i40e_xdp,
};
/**
#include <linux/prefetch.h>
#include <net/busy_poll.h>
+#include <linux/bpf_trace.h>
#include "i40e.h"
#include "i40e_trace.h"
#include "i40e_prototype.h"
void i40e_free_rx_resources(struct i40e_ring *rx_ring)
{
i40e_clean_rx_ring(rx_ring);
+ rx_ring->xdp_prog = NULL;
kfree(rx_ring->rx_bi);
rx_ring->rx_bi = NULL;
rx_ring->next_to_clean = 0;
rx_ring->next_to_use = 0;
+ rx_ring->xdp_prog = rx_ring->vsi->xdp_prog;
+
return 0;
err:
kfree(rx_ring->rx_bi);
* i40e_cleanup_headers - Correct empty headers
* @rx_ring: rx descriptor ring packet is being transacted on
* @skb: pointer to current skb being fixed
+ * @rx_desc: pointer to the EOP Rx descriptor
*
* Also address the case where we are pulling data in on pages only
* and as such no data is present in the skb header.
*
* Returns true if an error was encountered and skb was freed.
**/
-static bool i40e_cleanup_headers(struct i40e_ring *rx_ring, struct sk_buff *skb)
+static bool i40e_cleanup_headers(struct i40e_ring *rx_ring, struct sk_buff *skb,
+ union i40e_rx_desc *rx_desc)
+
{
+ /* XDP packets use error pointer so abort at this point */
+ if (IS_ERR(skb))
+ return true;
+
+ /* ERR_MASK will only have valid bits if EOP set, and
+ * what we are doing here is actually checking
+ * I40E_RX_DESC_ERROR_RXE_SHIFT, since it is the zeroth bit in
+ * the error field
+ */
+ if (unlikely(i40e_test_staterr(rx_desc,
+ BIT(I40E_RXD_QW1_ERROR_SHIFT)))) {
+ dev_kfree_skb_any(skb);
+ return true;
+ }
+
/* if eth_skb_pad returns an error the skb was freed */
if (eth_skb_pad(skb))
return true;
* i40e_construct_skb - Allocate skb and populate it
* @rx_ring: rx descriptor ring to transact packets on
* @rx_buffer: rx buffer to pull data from
- * @size: size of buffer to add to skb
+ * @xdp: xdp_buff pointing to the data
*
* This function allocates an skb. It then populates it with the page
* data from the current receive descriptor, taking care to set up the
*/
static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring,
struct i40e_rx_buffer *rx_buffer,
- unsigned int size)
+ struct xdp_buff *xdp)
{
- void *va = page_address(rx_buffer->page) + rx_buffer->page_offset;
+ unsigned int size = xdp->data_end - xdp->data;
#if (PAGE_SIZE < 8192)
unsigned int truesize = i40e_rx_pg_size(rx_ring) / 2;
#else
struct sk_buff *skb;
/* prefetch first cache line of first page */
- prefetch(va);
+ prefetch(xdp->data);
#if L1_CACHE_BYTES < 128
- prefetch(va + L1_CACHE_BYTES);
+ prefetch(xdp->data + L1_CACHE_BYTES);
#endif
/* allocate a skb to store the frags */
/* Determine available headroom for copy */
headlen = size;
if (headlen > I40E_RX_HDR_SIZE)
- headlen = eth_get_headlen(va, I40E_RX_HDR_SIZE);
+ headlen = eth_get_headlen(xdp->data, I40E_RX_HDR_SIZE);
/* align pull length to size of long to optimize memcpy performance */
- memcpy(__skb_put(skb, headlen), va, ALIGN(headlen, sizeof(long)));
+ memcpy(__skb_put(skb, headlen), xdp->data,
+ ALIGN(headlen, sizeof(long)));
/* update all of the pointers */
size -= headlen;
* i40e_build_skb - Build skb around an existing buffer
* @rx_ring: Rx descriptor ring to transact packets on
* @rx_buffer: Rx buffer to pull data from
- * @size: size of buffer to add to skb
+ * @xdp: xdp_buff pointing to the data
*
* This function builds an skb around an existing Rx buffer, taking care
* to set up the skb correctly and avoid any memcpy overhead.
*/
static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring,
struct i40e_rx_buffer *rx_buffer,
- unsigned int size)
+ struct xdp_buff *xdp)
{
- void *va = page_address(rx_buffer->page) + rx_buffer->page_offset;
+ unsigned int size = xdp->data_end - xdp->data;
#if (PAGE_SIZE < 8192)
unsigned int truesize = i40e_rx_pg_size(rx_ring) / 2;
#else
struct sk_buff *skb;
/* prefetch first cache line of first page */
- prefetch(va);
+ prefetch(xdp->data);
#if L1_CACHE_BYTES < 128
- prefetch(va + L1_CACHE_BYTES);
+ prefetch(xdp->data + L1_CACHE_BYTES);
#endif
/* build an skb around the page buffer */
- skb = build_skb(va - I40E_SKB_PAD, truesize);
+ skb = build_skb(xdp->data_hard_start, truesize);
if (unlikely(!skb))
return NULL;
return true;
}
+#define I40E_XDP_PASS 0
+#define I40E_XDP_CONSUMED 1
+
+/**
+ * i40e_run_xdp - run an XDP program
+ * @rx_ring: Rx ring being processed
+ * @xdp: XDP buffer containing the frame
+ **/
+static struct sk_buff *i40e_run_xdp(struct i40e_ring *rx_ring,
+ struct xdp_buff *xdp)
+{
+ int result = I40E_XDP_PASS;
+ struct bpf_prog *xdp_prog;
+ u32 act;
+
+ rcu_read_lock();
+ xdp_prog = READ_ONCE(rx_ring->xdp_prog);
+
+ if (!xdp_prog)
+ goto xdp_out;
+
+ act = bpf_prog_run_xdp(xdp_prog, xdp);
+ switch (act) {
+ case XDP_PASS:
+ break;
+ default:
+ bpf_warn_invalid_xdp_action(act);
+ case XDP_TX:
+ case XDP_ABORTED:
+ trace_xdp_exception(rx_ring->netdev, xdp_prog, act);
+ /* fallthrough -- handle aborts by dropping packet */
+ case XDP_DROP:
+ result = I40E_XDP_CONSUMED;
+ break;
+ }
+xdp_out:
+ rcu_read_unlock();
+ return ERR_PTR(-result);
+}
+
/**
* i40e_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf
* @rx_ring: rx descriptor ring to transact packets on
while (likely(total_rx_packets < budget)) {
struct i40e_rx_buffer *rx_buffer;
union i40e_rx_desc *rx_desc;
+ struct xdp_buff xdp;
unsigned int size;
u16 vlan_tag;
u8 rx_ptype;
rx_buffer = i40e_get_rx_buffer(rx_ring, size);
/* retrieve a buffer from the ring */
- if (skb)
+ if (!skb) {
+ xdp.data = page_address(rx_buffer->page) +
+ rx_buffer->page_offset;
+ xdp.data_hard_start = xdp.data -
+ i40e_rx_offset(rx_ring);
+ xdp.data_end = xdp.data + size;
+
+ skb = i40e_run_xdp(rx_ring, &xdp);
+ }
+
+ if (IS_ERR(skb)) {
+ total_rx_bytes += size;
+ total_rx_packets++;
+ rx_buffer->pagecnt_bias++;
+ } else if (skb) {
i40e_add_rx_frag(rx_ring, rx_buffer, skb, size);
- else if (ring_uses_build_skb(rx_ring))
- skb = i40e_build_skb(rx_ring, rx_buffer, size);
- else
- skb = i40e_construct_skb(rx_ring, rx_buffer, size);
+ } else if (ring_uses_build_skb(rx_ring)) {
+ skb = i40e_build_skb(rx_ring, rx_buffer, &xdp);
+ } else {
+ skb = i40e_construct_skb(rx_ring, rx_buffer, &xdp);
+ }
/* exit if we failed to retrieve a buffer */
if (!skb) {
if (i40e_is_non_eop(rx_ring, rx_desc, skb))
continue;
- /* ERR_MASK will only have valid bits if EOP set, and
- * what we are doing here is actually checking
- * I40E_RX_DESC_ERROR_RXE_SHIFT, since it is the zeroth bit in
- * the error field
- */
- if (unlikely(i40e_test_staterr(rx_desc, BIT(I40E_RXD_QW1_ERROR_SHIFT)))) {
- dev_kfree_skb_any(skb);
- skb = NULL;
- continue;
- }
-
- if (i40e_cleanup_headers(rx_ring, skb)) {
+ if (i40e_cleanup_headers(rx_ring, skb, rx_desc)) {
skb = NULL;
continue;
}