* that this is in fact a non-EOP buffer.
**/
static bool ixgbevf_is_non_eop(struct ixgbevf_ring *rx_ring,
- union ixgbe_adv_rx_desc *rx_desc,
- struct sk_buff *skb)
+ union ixgbe_adv_rx_desc *rx_desc)
{
u32 ntc = rx_ring->next_to_clean + 1;
return true;
}
-static bool ixgbevf_alloc_mapped_skb(struct ixgbevf_ring *rx_ring,
- struct ixgbevf_rx_buffer *bi)
+static bool ixgbevf_alloc_mapped_page(struct ixgbevf_ring *rx_ring,
+ struct ixgbevf_rx_buffer *bi)
{
- struct sk_buff *skb = bi->skb;
+ struct page *page = bi->page;
dma_addr_t dma = bi->dma;
- if (unlikely(skb))
+ /* since we are recycling buffers we should seldom need to alloc */
+ if (likely(page))
return true;
- skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
- rx_ring->rx_buf_len);
- if (unlikely(!skb)) {
- rx_ring->rx_stats.alloc_rx_buff_failed++;
+ /* alloc new page for storage */
+ page = dev_alloc_page();
+ if (unlikely(!page)) {
+ rx_ring->rx_stats.alloc_rx_page_failed++;
return false;
}
- dma = dma_map_single(rx_ring->dev, skb->data,
- rx_ring->rx_buf_len, DMA_FROM_DEVICE);
+ /* map page for use */
+ dma = dma_map_page(rx_ring->dev, page, 0,
+ PAGE_SIZE, DMA_FROM_DEVICE);
/* if mapping failed free memory back to system since
* there isn't much point in holding memory we can't use
*/
if (dma_mapping_error(rx_ring->dev, dma)) {
- dev_kfree_skb_any(skb);
+ __free_page(page);
rx_ring->rx_stats.alloc_rx_buff_failed++;
return false;
}
- bi->skb = skb;
bi->dma = dma;
+ bi->page = page;
+ bi->page_offset = 0;
return true;
}
i -= rx_ring->count;
do {
- if (!ixgbevf_alloc_mapped_skb(rx_ring, bi))
+ if (!ixgbevf_alloc_mapped_page(rx_ring, bi))
break;
/* Refresh the desc even if pkt_addr didn't change
* because each write-back erases this info.
*/
- rx_desc->read.pkt_addr = cpu_to_le64(bi->dma);
+ rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset);
rx_desc++;
bi++;
/* record the next descriptor to use */
rx_ring->next_to_use = i;
+ /* update next to alloc since we have filled the ring */
+ rx_ring->next_to_alloc = i;
+
/* Force memory writes to complete before letting h/w
* know there are new descriptors to fetch. (Only
* applicable for weak-ordered memory model archs,
}
}
+/* ixgbevf_pull_tail - ixgbevf specific version of skb_pull_tail
+ * @rx_ring: rx descriptor ring packet is being transacted on
+ * @skb: pointer to current skb being adjusted
+ *
+ * This function is an ixgbevf specific version of __pskb_pull_tail. The
+ * main difference between this version and the original function is that
+ * this function can make several assumptions about the state of things
+ * that allow for significant optimizations versus the standard function.
+ * As a result we can do things like drop a frag and maintain an accurate
+ * truesize for the skb.
+ */
+static void ixgbevf_pull_tail(struct ixgbevf_ring *rx_ring,
+ struct sk_buff *skb)
+{
+ struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];
+ unsigned char *va;
+ unsigned int pull_len;
+
+ /* it is valid to use page_address instead of kmap since we are
+ * working with pages allocated out of the lomem pool per
+ * alloc_page(GFP_ATOMIC)
+ */
+ va = skb_frag_address(frag);
+
+ /* we need the header to contain the greater of either ETH_HLEN or
+ * 60 bytes if the skb->len is less than 60 for skb_pad.
+ */
+ pull_len = eth_get_headlen(va, IXGBEVF_RX_HDR_SIZE);
+
+ /* align pull length to size of long to optimize memcpy performance */
+ skb_copy_to_linear_data(skb, va, ALIGN(pull_len, sizeof(long)));
+
+ /* update all of the pointers */
+ skb_frag_size_sub(frag, pull_len);
+ frag->page_offset += pull_len;
+ skb->data_len -= pull_len;
+ skb->tail += pull_len;
+}
+
+/* ixgbevf_cleanup_headers - Correct corrupted or empty headers
+ * @rx_ring: rx descriptor ring packet is being transacted on
+ * @rx_desc: pointer to the EOP Rx descriptor
+ * @skb: pointer to current skb being fixed
+ *
+ * Check for corrupted packet headers caused by senders on the local L2
+ * embedded NIC switch not setting up their Tx Descriptors right. These
+ * should be very rare.
+ *
+ * Also address the case where we are pulling data in on pages only
+ * and as such no data is present in the skb header.
+ *
+ * In addition if skb is not at least 60 bytes we need to pad it so that
+ * it is large enough to qualify as a valid Ethernet frame.
+ *
+ * Returns true if an error was encountered and skb was freed.
+ */
+static bool ixgbevf_cleanup_headers(struct ixgbevf_ring *rx_ring,
+ union ixgbe_adv_rx_desc *rx_desc,
+ struct sk_buff *skb)
+{
+ /* verify that the packet does not have any known errors */
+ if (unlikely(ixgbevf_test_staterr(rx_desc,
+ IXGBE_RXDADV_ERR_FRAME_ERR_MASK))) {
+ struct net_device *netdev = rx_ring->netdev;
+
+ if (!(netdev->features & NETIF_F_RXALL)) {
+ dev_kfree_skb_any(skb);
+ return true;
+ }
+ }
+
+ /* place header in linear portion of buffer */
+ if (skb_is_nonlinear(skb))
+ ixgbevf_pull_tail(rx_ring, skb);
+
+ /* if skb_pad returns an error the skb was freed */
+ if (unlikely(skb->len < 60)) {
+ int pad_len = 60 - skb->len;
+
+ if (skb_pad(skb, pad_len))
+ return true;
+ __skb_put(skb, pad_len);
+ }
+
+ return false;
+}
+
+/* ixgbevf_reuse_rx_page - page flip buffer and store it back on the ring
+ * @rx_ring: rx descriptor ring to store buffers on
+ * @old_buff: donor buffer to have page reused
+ *
+ * Synchronizes page for reuse by the adapter
+ */
+static void ixgbevf_reuse_rx_page(struct ixgbevf_ring *rx_ring,
+ struct ixgbevf_rx_buffer *old_buff)
+{
+ struct ixgbevf_rx_buffer *new_buff;
+ u16 nta = rx_ring->next_to_alloc;
+
+ new_buff = &rx_ring->rx_buffer_info[nta];
+
+ /* update, and store next to alloc */
+ nta++;
+ rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
+
+ /* transfer page from old buffer to new buffer */
+ new_buff->page = old_buff->page;
+ new_buff->dma = old_buff->dma;
+ new_buff->page_offset = old_buff->page_offset;
+
+ /* sync the buffer for use by the device */
+ dma_sync_single_range_for_device(rx_ring->dev, new_buff->dma,
+ new_buff->page_offset,
+ IXGBEVF_RX_BUFSZ,
+ DMA_FROM_DEVICE);
+}
+
+static inline bool ixgbevf_page_is_reserved(struct page *page)
+{
+ return (page_to_nid(page) != numa_mem_id()) || page->pfmemalloc;
+}
+
+/* ixgbevf_add_rx_frag - Add contents of Rx buffer to sk_buff
+ * @rx_ring: rx descriptor ring to transact packets on
+ * @rx_buffer: buffer containing page to add
+ * @rx_desc: descriptor containing length of buffer written by hardware
+ * @skb: sk_buff to place the data into
+ *
+ * This function will add the data contained in rx_buffer->page to the skb.
+ * This is done either through a direct copy if the data in the buffer is
+ * less than the skb header size, otherwise it will just attach the page as
+ * a frag to the skb.
+ *
+ * The function will then update the page offset if necessary and return
+ * true if the buffer can be reused by the adapter.
+ */
+static bool ixgbevf_add_rx_frag(struct ixgbevf_ring *rx_ring,
+ struct ixgbevf_rx_buffer *rx_buffer,
+ union ixgbe_adv_rx_desc *rx_desc,
+ struct sk_buff *skb)
+{
+ struct page *page = rx_buffer->page;
+ unsigned int size = le16_to_cpu(rx_desc->wb.upper.length);
+#if (PAGE_SIZE < 8192)
+ unsigned int truesize = IXGBEVF_RX_BUFSZ;
+#else
+ unsigned int truesize = ALIGN(size, L1_CACHE_BYTES);
+#endif
+
+ if ((size <= IXGBEVF_RX_HDR_SIZE) && !skb_is_nonlinear(skb)) {
+ unsigned char *va = page_address(page) + rx_buffer->page_offset;
+
+ memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long)));
+
+ /* page is not reserved, we can reuse buffer as is */
+ if (likely(!ixgbevf_page_is_reserved(page)))
+ return true;
+
+ /* this page cannot be reused so discard it */
+ put_page(page);
+ return false;
+ }
+
+ skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
+ rx_buffer->page_offset, size, truesize);
+
+ /* avoid re-using remote pages */
+ if (unlikely(ixgbevf_page_is_reserved(page)))
+ return false;
+
+#if (PAGE_SIZE < 8192)
+ /* if we are only owner of page we can reuse it */
+ if (unlikely(page_count(page) != 1))
+ return false;
+
+ /* flip page offset to other buffer */
+ rx_buffer->page_offset ^= IXGBEVF_RX_BUFSZ;
+
+#else
+ /* move offset up to the next cache line */
+ rx_buffer->page_offset += truesize;
+
+ if (rx_buffer->page_offset > (PAGE_SIZE - IXGBEVF_RX_BUFSZ))
+ return false;
+
+#endif
+ /* Even if we own the page, we are not allowed to use atomic_set()
+ * This would break get_page_unless_zero() users.
+ */
+ atomic_inc(&page->_count);
+
+ return true;
+}
+
+static struct sk_buff *ixgbevf_fetch_rx_buffer(struct ixgbevf_ring *rx_ring,
+ union ixgbe_adv_rx_desc *rx_desc,
+ struct sk_buff *skb)
+{
+ struct ixgbevf_rx_buffer *rx_buffer;
+ struct page *page;
+
+ rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean];
+ page = rx_buffer->page;
+ prefetchw(page);
+
+ if (likely(!skb)) {
+ void *page_addr = page_address(page) +
+ rx_buffer->page_offset;
+
+ /* prefetch first cache line of first page */
+ prefetch(page_addr);
+#if L1_CACHE_BYTES < 128
+ prefetch(page_addr + L1_CACHE_BYTES);
+#endif
+
+ /* allocate a skb to store the frags */
+ skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
+ IXGBEVF_RX_HDR_SIZE);
+ if (unlikely(!skb)) {
+ rx_ring->rx_stats.alloc_rx_buff_failed++;
+ return NULL;
+ }
+
+ /* we will be copying header into skb->data in
+ * pskb_may_pull so it is in our interest to prefetch
+ * it now to avoid a possible cache miss
+ */
+ prefetchw(skb->data);
+ }
+
+ /* we are reusing so sync this buffer for CPU use */
+ dma_sync_single_range_for_cpu(rx_ring->dev,
+ rx_buffer->dma,
+ rx_buffer->page_offset,
+ IXGBEVF_RX_BUFSZ,
+ DMA_FROM_DEVICE);
+
+ /* pull page into skb */
+ if (ixgbevf_add_rx_frag(rx_ring, rx_buffer, rx_desc, skb)) {
+ /* hand second half of page back to the ring */
+ ixgbevf_reuse_rx_page(rx_ring, rx_buffer);
+ } else {
+ /* we are not reusing the buffer so unmap it */
+ dma_unmap_page(rx_ring->dev, rx_buffer->dma,
+ PAGE_SIZE, DMA_FROM_DEVICE);
+ }
+
+ /* clear contents of buffer_info */
+ rx_buffer->dma = 0;
+ rx_buffer->page = NULL;
+
+ return skb;
+}
+
static inline void ixgbevf_irq_enable_queues(struct ixgbevf_adapter *adapter,
u32 qmask)
{
{
unsigned int total_rx_bytes = 0, total_rx_packets = 0;
u16 cleaned_count = ixgbevf_desc_unused(rx_ring);
+ struct sk_buff *skb = rx_ring->skb;
do {
union ixgbe_adv_rx_desc *rx_desc;
- struct ixgbevf_rx_buffer *rx_buffer;
- struct sk_buff *skb;
- u16 ntc;
/* return some buffers to hardware, one at a time is too slow */
if (cleaned_count >= IXGBEVF_RX_BUFFER_WRITE) {
cleaned_count = 0;
}
- ntc = rx_ring->next_to_clean;
- rx_desc = IXGBEVF_RX_DESC(rx_ring, ntc);
- rx_buffer = &rx_ring->rx_buffer_info[ntc];
+ rx_desc = IXGBEVF_RX_DESC(rx_ring, rx_ring->next_to_clean);
if (!ixgbevf_test_staterr(rx_desc, IXGBE_RXD_STAT_DD))
break;
*/
rmb();
- skb = rx_buffer->skb;
- prefetch(skb->data);
-
- /* pull the header of the skb in */
- __skb_put(skb, le16_to_cpu(rx_desc->wb.upper.length));
+ /* retrieve a buffer from the ring */
+ skb = ixgbevf_fetch_rx_buffer(rx_ring, rx_desc, skb);
- dma_unmap_single(rx_ring->dev, rx_buffer->dma,
- rx_ring->rx_buf_len,
- DMA_FROM_DEVICE);
-
- /* clear skb reference in buffer info structure */
- rx_buffer->skb = NULL;
- rx_buffer->dma = 0;
+ /* exit if we failed to retrieve a buffer */
+ if (!skb)
+ break;
cleaned_count++;
- /* place incomplete frames back on ring for completion */
- if (ixgbevf_is_non_eop(rx_ring, rx_desc, skb))
- continue;
-
- /* we should not be chaining buffers, if we did drop the skb */
- if (IXGBE_CB(skb)->prev) {
- do {
- struct sk_buff *this = skb;
- skb = IXGBE_CB(skb)->prev;
- dev_kfree_skb(this);
- } while (skb);
+ /* fetch next buffer in frame if non-eop */
+ if (ixgbevf_is_non_eop(rx_ring, rx_desc))
continue;
- }
- /* ERR_MASK will only have valid bits if EOP set */
- if (unlikely(ixgbevf_test_staterr(rx_desc,
- IXGBE_RXDADV_ERR_FRAME_ERR_MASK))) {
- dev_kfree_skb_irq(skb);
+ /* verify the packet layout is correct */
+ if (ixgbevf_cleanup_headers(rx_ring, rx_desc, skb)) {
+ skb = NULL;
continue;
}
ixgbevf_rx_skb(q_vector, skb);
+ /* reset skb pointer */
+ skb = NULL;
+
/* update budget accounting */
budget--;
} while (likely(budget));
+ /* place incomplete frames back on ring for completion */
+ rx_ring->skb = skb;
+
u64_stats_update_begin(&rx_ring->syncp);
rx_ring->stats.packets += total_rx_packets;
rx_ring->stats.bytes += total_rx_bytes;
q_vector->rx.total_packets += total_rx_packets;
q_vector->rx.total_bytes += total_rx_bytes;
- if (cleaned_count)
- ixgbevf_alloc_rx_buffers(rx_ring, cleaned_count);
-
return total_rx_packets;
}
static void ixgbevf_configure_srrctl(struct ixgbevf_adapter *adapter, int index)
{
- struct ixgbevf_ring *rx_ring;
struct ixgbe_hw *hw = &adapter->hw;
u32 srrctl;
- rx_ring = adapter->rx_ring[index];
-
srrctl = IXGBE_SRRCTL_DROP_EN;
+ srrctl |= IXGBEVF_RX_HDR_SIZE << IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT;
+ srrctl |= IXGBEVF_RX_BUFSZ >> IXGBE_SRRCTL_BSIZEPKT_SHIFT;
srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
- srrctl |= ALIGN(rx_ring->rx_buf_len, 1024) >>
- IXGBE_SRRCTL_BSIZEPKT_SHIFT;
-
IXGBE_WRITE_REG(hw, IXGBE_VFSRRCTL(index), srrctl);
}
IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
}
-static void ixgbevf_set_rx_buffer_len(struct ixgbevf_adapter *adapter)
-{
- struct ixgbe_hw *hw = &adapter->hw;
- struct net_device *netdev = adapter->netdev;
- int max_frame = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
- int i;
- u16 rx_buf_len;
-
- /* notify the PF of our intent to use this size of frame */
- ixgbevf_rlpml_set_vf(hw, max_frame);
-
- /* PF will allow an extra 4 bytes past for vlan tagged frames */
- max_frame += VLAN_HLEN;
-
- /*
- * Allocate buffer sizes that fit well into 32K and
- * take into account max frame size of 9.5K
- */
- if ((hw->mac.type == ixgbe_mac_X540_vf) &&
- (max_frame <= MAXIMUM_ETHERNET_VLAN_SIZE))
- rx_buf_len = MAXIMUM_ETHERNET_VLAN_SIZE;
- else if (max_frame <= IXGBEVF_RXBUFFER_2K)
- rx_buf_len = IXGBEVF_RXBUFFER_2K;
- else if (max_frame <= IXGBEVF_RXBUFFER_4K)
- rx_buf_len = IXGBEVF_RXBUFFER_4K;
- else if (max_frame <= IXGBEVF_RXBUFFER_8K)
- rx_buf_len = IXGBEVF_RXBUFFER_8K;
- else
- rx_buf_len = IXGBEVF_RXBUFFER_10K;
-
- for (i = 0; i < adapter->num_rx_queues; i++)
- adapter->rx_ring[i]->rx_buf_len = rx_buf_len;
-}
-
#define IXGBEVF_MAX_RX_DESC_POLL 10
static void ixgbevf_disable_rx_queue(struct ixgbevf_adapter *adapter,
struct ixgbevf_ring *ring)
/* reset ntu and ntc to place SW in sync with hardwdare */
ring->next_to_clean = 0;
ring->next_to_use = 0;
+ ring->next_to_alloc = 0;
ixgbevf_configure_srrctl(adapter, reg_idx);
- /* prevent DMA from exceeding buffer space available */
- rxdctl &= ~IXGBE_RXDCTL_RLPMLMASK;
- rxdctl |= ring->rx_buf_len | IXGBE_RXDCTL_RLPML_EN;
+ /* allow any size packet since we can handle overflow */
+ rxdctl &= ~IXGBE_RXDCTL_RLPML_EN;
+
rxdctl |= IXGBE_RXDCTL_ENABLE | IXGBE_RXDCTL_VME;
IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(reg_idx), rxdctl);
static void ixgbevf_configure_rx(struct ixgbevf_adapter *adapter)
{
int i;
+ struct ixgbe_hw *hw = &adapter->hw;
+ struct net_device *netdev = adapter->netdev;
ixgbevf_setup_psrtype(adapter);
- /* set_rx_buffer_len must be called before ring initialization */
- ixgbevf_set_rx_buffer_len(adapter);
+ /* notify the PF of our intent to use this size of frame */
+ ixgbevf_rlpml_set_vf(hw, netdev->mtu + ETH_HLEN + ETH_FCS_LEN);
/* Setup the HW Rx Head and Tail Descriptor Pointers and
* the Base and Length of the Rx Descriptor Ring */
**/
static void ixgbevf_clean_rx_ring(struct ixgbevf_ring *rx_ring)
{
+ struct device *dev = rx_ring->dev;
unsigned long size;
unsigned int i;
+ /* Free Rx ring sk_buff */
+ if (rx_ring->skb) {
+ dev_kfree_skb(rx_ring->skb);
+ rx_ring->skb = NULL;
+ }
+
+ /* ring already cleared, nothing to do */
if (!rx_ring->rx_buffer_info)
return;
- /* Free all the Rx ring sk_buffs */
+ /* Free all the Rx ring pages */
for (i = 0; i < rx_ring->count; i++) {
- struct ixgbevf_rx_buffer *rx_buffer_info;
+ struct ixgbevf_rx_buffer *rx_buffer;
- rx_buffer_info = &rx_ring->rx_buffer_info[i];
- if (rx_buffer_info->dma) {
- dma_unmap_single(rx_ring->dev, rx_buffer_info->dma,
- rx_ring->rx_buf_len,
- DMA_FROM_DEVICE);
- rx_buffer_info->dma = 0;
- }
- if (rx_buffer_info->skb) {
- struct sk_buff *skb = rx_buffer_info->skb;
- rx_buffer_info->skb = NULL;
- do {
- struct sk_buff *this = skb;
- skb = IXGBE_CB(skb)->prev;
- dev_kfree_skb(this);
- } while (skb);
- }
+ rx_buffer = &rx_ring->rx_buffer_info[i];
+ if (rx_buffer->dma)
+ dma_unmap_page(dev, rx_buffer->dma,
+ PAGE_SIZE, DMA_FROM_DEVICE);
+ rx_buffer->dma = 0;
+ if (rx_buffer->page)
+ __free_page(rx_buffer->page);
+ rx_buffer->page = NULL;
}
size = sizeof(struct ixgbevf_rx_buffer) * rx_ring->count;
static int ixgbevf_change_mtu(struct net_device *netdev, int new_mtu)
{
struct ixgbevf_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_hw *hw = &adapter->hw;
int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
int max_possible_frame = MAXIMUM_ETHERNET_VLAN_SIZE;
if ((new_mtu < 68) || (max_frame > max_possible_frame))
return -EINVAL;
- hw_dbg(&adapter->hw, "changing MTU from %d to %d\n",
+ hw_dbg(hw, "changing MTU from %d to %d\n",
netdev->mtu, new_mtu);
/* must set new MTU before calling down or up */
netdev->mtu = new_mtu;
- if (netif_running(netdev))
- ixgbevf_reinit_locked(adapter);
+ /* notify the PF of our intent to use this size of frame */
+ ixgbevf_rlpml_set_vf(hw, max_frame);
return 0;
}