ixgbe: update {P}FC thresholds to account for X540 and loopback
authorJohn Fastabend <john.r.fastabend@intel.com>
Tue, 23 Aug 2011 03:14:22 +0000 (03:14 +0000)
committerJeff Kirsher <jeffrey.t.kirsher@intel.com>
Thu, 29 Sep 2011 06:09:59 +0000 (23:09 -0700)
Revise high and low threshold marks wrt flow control to account
for the X540 devices and latency introduced by the loopback
switch.

Without this it was in theory possible to drop frames on a
supposedly lossless link with X540 or SR-IOV enabled.

Previously we used a magic number in a define to calculate the
threshold values. This made it difficult to sort out exactly
which latencies were or were not being accounted for. Here
I was overly explicit and tried to used #define names that would
be recognizable after reading the IEEE 802.1Qbb specification.

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
Tested-by: Ross Brattain <ross.b.brattain@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c
drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
drivers/net/ethernet/intel/ixgbe/ixgbe_dcb.h
drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82598.c
drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82599.c
drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
drivers/net/ethernet/intel/ixgbe/ixgbe_type.h

index b816a624a6ceab95dd88090f8ef5519099c22574..fa079bbab89a45b58cd784c8481ddbe79d8c58e9 100644 (file)
@@ -358,7 +358,6 @@ static s32 ixgbe_fc_enable_82598(struct ixgbe_hw *hw, s32 packetbuf_num)
        u32 fctrl_reg;
        u32 rmcs_reg;
        u32 reg;
-       u32 rx_pba_size;
        u32 link_speed = 0;
        bool link_up;
 
@@ -461,16 +460,13 @@ static s32 ixgbe_fc_enable_82598(struct ixgbe_hw *hw, s32 packetbuf_num)
 
        /* Set up and enable Rx high/low water mark thresholds, enable XON. */
        if (hw->fc.current_mode & ixgbe_fc_tx_pause) {
-               rx_pba_size = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(packetbuf_num));
-               rx_pba_size >>= IXGBE_RXPBSIZE_SHIFT;
-
-               reg = (rx_pba_size - hw->fc.low_water) << 6;
+               reg = hw->fc.low_water << 6;
                if (hw->fc.send_xon)
                        reg |= IXGBE_FCRTL_XONE;
 
                IXGBE_WRITE_REG(hw, IXGBE_FCRTL(packetbuf_num), reg);
 
-               reg = (rx_pba_size - hw->fc.high_water) << 6;
+               reg = hw->fc.high_water[packetbuf_num] << 6;
                reg |= IXGBE_FCRTH_FCEN;
 
                IXGBE_WRITE_REG(hw, IXGBE_FCRTH(packetbuf_num), reg);
index 84ed9ef7288d1cee8b7163ff905ae80cb0c37def..59cd54cfdc1f2015b720b07d3688572935b56ac6 100644 (file)
@@ -1932,7 +1932,6 @@ s32 ixgbe_fc_enable_generic(struct ixgbe_hw *hw, s32 packetbuf_num)
        s32 ret_val = 0;
        u32 mflcn_reg, fccfg_reg;
        u32 reg;
-       u32 rx_pba_size;
        u32 fcrtl, fcrth;
 
 #ifdef CONFIG_DCB
@@ -2012,11 +2011,8 @@ s32 ixgbe_fc_enable_generic(struct ixgbe_hw *hw, s32 packetbuf_num)
        IXGBE_WRITE_REG(hw, IXGBE_MFLCN, mflcn_reg);
        IXGBE_WRITE_REG(hw, IXGBE_FCCFG, fccfg_reg);
 
-       rx_pba_size = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(packetbuf_num));
-       rx_pba_size >>= IXGBE_RXPBSIZE_SHIFT;
-
-       fcrth = (rx_pba_size - hw->fc.high_water) << 10;
-       fcrtl = (rx_pba_size - hw->fc.low_water) << 10;
+       fcrth = hw->fc.high_water[packetbuf_num] << 10;
+       fcrtl = hw->fc.low_water << 10;
 
        if (hw->fc.current_mode & ixgbe_fc_tx_pause) {
                fcrth |= IXGBE_FCRTH_FCEN;
@@ -2293,7 +2289,9 @@ static s32 ixgbe_setup_fc(struct ixgbe_hw *hw, s32 packetbuf_num)
         * Validate the water mark configuration.  Zero water marks are invalid
         * because it causes the controller to just blast out fc packets.
         */
-       if (!hw->fc.low_water || !hw->fc.high_water || !hw->fc.pause_time) {
+       if (!hw->fc.low_water ||
+           !hw->fc.high_water[packetbuf_num] ||
+           !hw->fc.pause_time) {
                hw_dbg(hw, "Invalid water mark configuration\n");
                ret_val = IXGBE_ERR_INVALID_LINK_SETTINGS;
                goto out;
index 0a68aa7f5d18c0d3c6cfdf7d160e73c8edbe0933..df095a9bbe2bd30cd2002bc03b4b22641c970bd5 100644 (file)
@@ -36,7 +36,6 @@
 
 #define IXGBE_MAX_PACKET_BUFFERS 8
 #define MAX_USER_PRIORITY        8
-#define MAX_TRAFFIC_CLASS        8
 #define MAX_BW_GROUP             8
 #define BW_PERCENT               100
 
index 2288c3cac0106927ef566e97b11d9fdfc143bec2..fcd0e479721f793a975fe4ae3b2d374ac733f330 100644 (file)
@@ -191,7 +191,7 @@ s32 ixgbe_dcb_config_tx_data_arbiter_82598(struct ixgbe_hw *hw,
  */
 s32 ixgbe_dcb_config_pfc_82598(struct ixgbe_hw *hw, u8 pfc_en)
 {
-       u32 reg, rx_pba_size;
+       u32 reg;
        u8  i;
 
        if (pfc_en) {
@@ -222,9 +222,8 @@ s32 ixgbe_dcb_config_pfc_82598(struct ixgbe_hw *hw, u8 pfc_en)
         */
        for (i = 0; i < MAX_TRAFFIC_CLASS; i++) {
                int enabled = pfc_en & (1 << i);
-               rx_pba_size = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
-               rx_pba_size >>= IXGBE_RXPBSIZE_SHIFT;
-               reg = (rx_pba_size - hw->fc.low_water) << 10;
+
+               reg = hw->fc.low_water << 10;
 
                if (enabled == pfc_enabled_tx ||
                    enabled == pfc_enabled_full)
@@ -232,7 +231,7 @@ s32 ixgbe_dcb_config_pfc_82598(struct ixgbe_hw *hw, u8 pfc_en)
 
                IXGBE_WRITE_REG(hw, IXGBE_FCRTL(i), reg);
 
-               reg = (rx_pba_size - hw->fc.high_water) << 10;
+               reg = hw->fc.high_water[i] << 10;
                if (enabled == pfc_enabled_tx ||
                    enabled == pfc_enabled_full)
                        reg |= IXGBE_FCRTH_FCEN;
index d64fb872978ebb14cf36ab9c5e7dd839a5e5453b..02f6724bf48e003cc7b4c172f5fdd63190ea6d84 100644 (file)
@@ -210,21 +210,19 @@ s32 ixgbe_dcb_config_tx_data_arbiter_82599(struct ixgbe_hw *hw,
  */
 s32 ixgbe_dcb_config_pfc_82599(struct ixgbe_hw *hw, u8 pfc_en)
 {
-       u32 i, reg, rx_pba_size;
+       u32 i, reg;
 
        /* Configure PFC Tx thresholds per TC */
        for (i = 0; i < MAX_TRAFFIC_CLASS; i++) {
                int enabled = pfc_en & (1 << i);
-               rx_pba_size = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
-               rx_pba_size >>= IXGBE_RXPBSIZE_SHIFT;
 
-               reg = (rx_pba_size - hw->fc.low_water) << 10;
+               reg = hw->fc.low_water << 10;
 
                if (enabled)
                        reg |= IXGBE_FCRTL_XONE;
                IXGBE_WRITE_REG(hw, IXGBE_FCRTL_82599(i), reg);
 
-               reg = (rx_pba_size - hw->fc.high_water) << 10;
+               reg = hw->fc.high_water[i] << 10;
                if (enabled)
                        reg |= IXGBE_FCRTH_FCEN;
                IXGBE_WRITE_REG(hw, IXGBE_FCRTH_82599(i), reg);
index 3594b09f4993a121d33cc110e89e32968f4b9108..ba703d30f3a956b75f0936690a18efcf7fa7faad 100644 (file)
@@ -3351,9 +3351,128 @@ static void ixgbe_configure_dcb(struct ixgbe_adapter *adapter)
                IXGBE_WRITE_REG(hw, IXGBE_RQTC, reg);
        }
 }
+#endif
+
+/* Additional bittime to account for IXGBE framing */
+#define IXGBE_ETH_FRAMING 20
+
+/*
+ * ixgbe_hpbthresh - calculate high water mark for flow control
+ *
+ * @adapter: board private structure to calculate for
+ * @pb - packet buffer to calculate
+ */
+static int ixgbe_hpbthresh(struct ixgbe_adapter *adapter, int pb)
+{
+       struct ixgbe_hw *hw = &adapter->hw;
+       struct net_device *dev = adapter->netdev;
+       int link, tc, kb, marker;
+       u32 dv_id, rx_pba;
+
+       /* Calculate max LAN frame size */
+       tc = link = dev->mtu + ETH_HLEN + ETH_FCS_LEN + IXGBE_ETH_FRAMING;
+
+#ifdef IXGBE_FCOE
+       /* FCoE traffic class uses FCOE jumbo frames */
+       if (dev->features & NETIF_F_FCOE_MTU) {
+               int fcoe_pb = 0;
 
+#ifdef CONFIG_IXGBE_DCB
+               fcoe_pb = netdev_get_prio_tc_map(dev, adapter->fcoe.up);
+
+#endif
+               if (fcoe_pb == pb && tc < IXGBE_FCOE_JUMBO_FRAME_SIZE)
+                       tc = IXGBE_FCOE_JUMBO_FRAME_SIZE;
+       }
 #endif
 
+       /* Calculate delay value for device */
+       switch (hw->mac.type) {
+       case ixgbe_mac_X540:
+               dv_id = IXGBE_DV_X540(link, tc);
+               break;
+       default:
+               dv_id = IXGBE_DV(link, tc);
+               break;
+       }
+
+       /* Loopback switch introduces additional latency */
+       if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED)
+               dv_id += IXGBE_B2BT(tc);
+
+       /* Delay value is calculated in bit times convert to KB */
+       kb = IXGBE_BT2KB(dv_id);
+       rx_pba = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(pb)) >> 10;
+
+       marker = rx_pba - kb;
+
+       /* It is possible that the packet buffer is not large enough
+        * to provide required headroom. In this case throw an error
+        * to user and a do the best we can.
+        */
+       if (marker < 0) {
+               e_warn(drv, "Packet Buffer(%i) can not provide enough"
+                           "headroom to support flow control."
+                           "Decrease MTU or number of traffic classes\n", pb);
+               marker = tc + 1;
+       }
+
+       return marker;
+}
+
+/*
+ * ixgbe_lpbthresh - calculate low water mark for for flow control
+ *
+ * @adapter: board private structure to calculate for
+ * @pb - packet buffer to calculate
+ */
+static int ixgbe_lpbthresh(struct ixgbe_adapter *adapter)
+{
+       struct ixgbe_hw *hw = &adapter->hw;
+       struct net_device *dev = adapter->netdev;
+       int tc;
+       u32 dv_id;
+
+       /* Calculate max LAN frame size */
+       tc = dev->mtu + ETH_HLEN + ETH_FCS_LEN;
+
+       /* Calculate delay value for device */
+       switch (hw->mac.type) {
+       case ixgbe_mac_X540:
+               dv_id = IXGBE_LOW_DV_X540(tc);
+               break;
+       default:
+               dv_id = IXGBE_LOW_DV(tc);
+               break;
+       }
+
+       /* Delay value is calculated in bit times convert to KB */
+       return IXGBE_BT2KB(dv_id);
+}
+
+/*
+ * ixgbe_pbthresh_setup - calculate and setup high low water marks
+ */
+static void ixgbe_pbthresh_setup(struct ixgbe_adapter *adapter)
+{
+       struct ixgbe_hw *hw = &adapter->hw;
+       int num_tc = netdev_get_num_tc(adapter->netdev);
+       int i;
+
+       if (!num_tc)
+               num_tc = 1;
+
+       hw->fc.low_water = ixgbe_lpbthresh(adapter);
+
+       for (i = 0; i < num_tc; i++) {
+               hw->fc.high_water[i] = ixgbe_hpbthresh(adapter, i);
+
+               /* Low water marks must not be larger than high water marks */
+               if (hw->fc.low_water > hw->fc.high_water[i])
+                       hw->fc.low_water = 0;
+       }
+}
+
 static void ixgbe_configure_pb(struct ixgbe_adapter *adapter)
 {
        struct ixgbe_hw *hw = &adapter->hw;
@@ -3367,6 +3486,7 @@ static void ixgbe_configure_pb(struct ixgbe_adapter *adapter)
                hdrm = 0;
 
        hw->mac.ops.set_rxpba(hw, tc, hdrm, PBA_STRATEGY_EQUAL);
+       ixgbe_pbthresh_setup(adapter);
 }
 
 static void ixgbe_fdir_filter_restore(struct ixgbe_adapter *adapter)
@@ -4769,13 +4889,11 @@ static int __devinit ixgbe_sw_init(struct ixgbe_adapter *adapter)
 {
        struct ixgbe_hw *hw = &adapter->hw;
        struct pci_dev *pdev = adapter->pdev;
-       struct net_device *dev = adapter->netdev;
        unsigned int rss;
 #ifdef CONFIG_IXGBE_DCB
        int j;
        struct tc_configuration *tc;
 #endif
-       int max_frame = dev->mtu + ETH_HLEN + ETH_FCS_LEN;
 
        /* PCI config space info */
 
@@ -4851,8 +4969,7 @@ static int __devinit ixgbe_sw_init(struct ixgbe_adapter *adapter)
 #ifdef CONFIG_DCB
        adapter->last_lfc_mode = hw->fc.current_mode;
 #endif
-       hw->fc.high_water = FC_HIGH_WATER(max_frame);
-       hw->fc.low_water = FC_LOW_WATER(max_frame);
+       ixgbe_pbthresh_setup(adapter);
        hw->fc.pause_time = IXGBE_DEFAULT_FCPAUSE;
        hw->fc.send_xon = true;
        hw->fc.disable_fc_autoneg = false;
@@ -5119,9 +5236,6 @@ static int ixgbe_change_mtu(struct net_device *netdev, int new_mtu)
        /* must set new MTU before calling down or up */
        netdev->mtu = new_mtu;
 
-       hw->fc.high_water = FC_HIGH_WATER(max_frame);
-       hw->fc.low_water = FC_LOW_WATER(max_frame);
-
        if (netif_running(netdev))
                ixgbe_reinit_locked(adapter);
 
index 9a03341e5261a078834b32e951d31755f3173e8f..16dd461d4af3c86eb851ed186930fc069ed8385a 100644 (file)
 #define IXGBE_WUPL_LENGTH_MASK 0xFFFF
 
 /* DCB registers */
+#define MAX_TRAFFIC_CLASS        8
 #define IXGBE_RMCS      0x03D00
 #define IXGBE_DPMCS     0x07F40
 #define IXGBE_PDPMCS    0x0CD00
@@ -2323,13 +2324,60 @@ typedef u32 ixgbe_physical_layer;
 #define IXGBE_PHYSICAL_LAYER_10GBASE_XAUI 0x1000
 #define IXGBE_PHYSICAL_LAYER_SFP_ACTIVE_DA 0x2000
 
-/* Flow Control Macros */
-#define PAUSE_RTT      8
-#define PAUSE_MTU(MTU) ((MTU + 1024 - 1) / 1024)
+/* Flow Control Data Sheet defined values
+ * Calculation and defines taken from 802.1bb Annex O
+ */
+
+/* BitTimes (BT) conversion */
+#define IXGBE_BT2KB(BT) ((BT + 1023) / (8 * 1024))
+#define IXGBE_B2BT(BT) (BT * 8)
+
+/* Calculate Delay to respond to PFC */
+#define IXGBE_PFC_D    672
+
+/* Calculate Cable Delay */
+#define IXGBE_CABLE_DC 5556 /* Delay Copper */
+#define IXGBE_CABLE_DO 5000 /* Delay Optical */
+
+/* Calculate Interface Delay X540 */
+#define IXGBE_PHY_DC   25600   /* Delay 10G BASET */
+#define IXGBE_MAC_DC   8192    /* Delay Copper XAUI interface */
+#define IXGBE_XAUI_DC  (2 * 2048) /* Delay Copper Phy */
+
+#define IXGBE_ID_X540  (IXGBE_MAC_DC + IXGBE_XAUI_DC + IXGBE_PHY_DC)
+
+/* Calculate Interface Delay 82598, 82599 */
+#define IXGBE_PHY_D    12800
+#define IXGBE_MAC_D    4096
+#define IXGBE_XAUI_D   (2 * 1024)
+
+#define IXGBE_ID       (IXGBE_MAC_D + IXGBE_XAUI_D + IXGBE_PHY_D)
+
+/* Calculate Delay incurred from higher layer */
+#define IXGBE_HD       6144
+
+/* Calculate PCI Bus delay for low thresholds */
+#define IXGBE_PCI_DELAY        10000
+
+/* Calculate X540 delay value in bit times */
+#define IXGBE_FILL_RATE (36 / 25)
+
+#define IXGBE_DV_X540(LINK, TC) (IXGBE_FILL_RATE * \
+                                (IXGBE_B2BT(LINK) + IXGBE_PFC_D + \
+                                (2 * IXGBE_CABLE_DC) + \
+                                (2 * IXGBE_ID_X540) + \
+                                IXGBE_HD + IXGBE_B2BT(TC)))
+
+/* Calculate 82599, 82598 delay value in bit times */
+#define IXGBE_DV(LINK, TC) (IXGBE_FILL_RATE * \
+                           (IXGBE_B2BT(LINK) + IXGBE_PFC_D + \
+                           (2 * IXGBE_CABLE_DC) + (2 * IXGBE_ID) + \
+                           IXGBE_HD + IXGBE_B2BT(TC)))
 
-#define FC_HIGH_WATER(MTU) ((((PAUSE_RTT + PAUSE_MTU(MTU)) * 144) + 99) / 100 +\
-                               PAUSE_MTU(MTU))
-#define FC_LOW_WATER(MTU)  (2 * (2 * PAUSE_MTU(MTU) + PAUSE_RTT))
+/* Calculate low threshold delay values */
+#define IXGBE_LOW_DV_X540(TC) (2 * IXGBE_B2BT(TC) + \
+                              (IXGBE_FILL_RATE * IXGBE_PCI_DELAY))
+#define IXGBE_LOW_DV(TC)      (2 * IXGBE_LOW_DV_X540(TC))
 
 /* Software ATR hash keys */
 #define IXGBE_ATR_BUCKET_HASH_KEY    0x3DAD14E2
@@ -2548,7 +2596,7 @@ struct ixgbe_bus_info {
 
 /* Flow control parameters */
 struct ixgbe_fc_info {
-       u32 high_water; /* Flow Control High-water */
+       u32 high_water[MAX_TRAFFIC_CLASS]; /* Flow Control High-water */
        u32 low_water; /* Flow Control Low-water */
        u16 pause_time; /* Flow Control Pause timer */
        bool send_xon; /* Flow control send XON */