net/mlx5e: Fix GRE key by controlling port tunnel entropy calculation
authorEli Britstein <elibr@mellanox.com>
Mon, 14 Jan 2019 08:07:44 +0000 (10:07 +0200)
committerSaeed Mahameed <saeedm@mellanox.com>
Fri, 22 Feb 2019 21:38:23 +0000 (13:38 -0800)
Flow entropy is calculated on the inner packet headers and used for
flow distribution in processing, routing etc. For GRE-type
encapsulations the entropy value is placed in the eight LSB of the key
field in the GRE header as defined in NVGRE RFC 7637. For UDP based
encapsulations the entropy value is placed in the source port of the
UDP header.
The hardware may support entropy calculation specifically for GRE and
for all tunneling protocols. With commit df2ef3bff193 ("net/mlx5e: Add
GRE protocol offloading") GRE is offloaded, but the hardware is
configured by default to calculate flow entropy so packets transmitted
on the wire have a wrong key. To support UDP based tunnels (i.e VXLAN),
GRE (i.e. no flow entropy) and NVGRE (i.e. with flow entropy) the
hardware behaviour must be controlled by the driver.

Ensure port entropy calculation is enabled for offloaded VXLAN tunnels
and disable port entropy calculation in the presence of offloaded GRE
tunnels by monitoring the presence of entropy enabling tunnels (i.e
VXLAN) and entropy disabing tunnels (i.e GRE).

Fixes: df2ef3bff193 ("net/mlx5e: Add GRE protocol offloading")
Signed-off-by: Eli Britstein <elibr@mellanox.com>
Reviewed-by: Oz Shlomo <ozsh@mellanox.com>
Reviewed-by: Roi Dayan <roid@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
drivers/net/ethernet/mellanox/mlx5/core/Makefile
drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.c [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.h [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlx5/core/port.c
include/linux/mlx5/port.h

index 82d636baaa4eb71fce2d621972afac26d78dd601..17f1a8b28c0a734b221494b547c68f5cf4a9ce46 100644 (file)
@@ -30,7 +30,7 @@ mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \
 mlx5_core-$(CONFIG_MLX5_EN_ARFS)     += en_arfs.o
 mlx5_core-$(CONFIG_MLX5_EN_RXNFC)    += en_fs_ethtool.o
 mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o en/port_buffer.o
-mlx5_core-$(CONFIG_MLX5_ESWITCH)     += en_rep.o en_tc.o en/tc_tun.o
+mlx5_core-$(CONFIG_MLX5_ESWITCH)     += en_rep.o en_tc.o en/tc_tun.o lib/port_tun.o
 
 #
 # Core extra
index 287d48e5b073944db1e7ea0636d893c76114685c..4d033e01f6ab3ec4d4f4477de288b3f18a197ac7 100644 (file)
@@ -44,6 +44,7 @@
 #include "en_tc.h"
 #include "en/tc_tun.h"
 #include "fs_core.h"
+#include "lib/port_tun.h"
 
 #define MLX5E_REP_PARAMS_DEF_LOG_SQ_SIZE \
         max(0x7, MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE)
@@ -1044,14 +1045,23 @@ static void mlx5e_rep_neigh_entry_destroy(struct mlx5e_priv *priv,
 int mlx5e_rep_encap_entry_attach(struct mlx5e_priv *priv,
                                 struct mlx5e_encap_entry *e)
 {
+       struct mlx5e_rep_priv *rpriv = priv->ppriv;
+       struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv;
+       struct mlx5_tun_entropy *tun_entropy = &uplink_priv->tun_entropy;
        struct mlx5e_neigh_hash_entry *nhe;
        int err;
 
+       err = mlx5_tun_entropy_refcount_inc(tun_entropy, e->reformat_type);
+       if (err)
+               return err;
        nhe = mlx5e_rep_neigh_entry_lookup(priv, &e->m_neigh);
        if (!nhe) {
                err = mlx5e_rep_neigh_entry_create(priv, e, &nhe);
-               if (err)
+               if (err) {
+                       mlx5_tun_entropy_refcount_dec(tun_entropy,
+                                                     e->reformat_type);
                        return err;
+               }
        }
        list_add(&e->encap_list, &nhe->encap_list);
        return 0;
@@ -1060,6 +1070,9 @@ int mlx5e_rep_encap_entry_attach(struct mlx5e_priv *priv,
 void mlx5e_rep_encap_entry_detach(struct mlx5e_priv *priv,
                                  struct mlx5e_encap_entry *e)
 {
+       struct mlx5e_rep_priv *rpriv = priv->ppriv;
+       struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv;
+       struct mlx5_tun_entropy *tun_entropy = &uplink_priv->tun_entropy;
        struct mlx5e_neigh_hash_entry *nhe;
 
        list_del(&e->encap_list);
@@ -1067,6 +1080,7 @@ void mlx5e_rep_encap_entry_detach(struct mlx5e_priv *priv,
 
        if (list_empty(&nhe->encap_list))
                mlx5e_rep_neigh_entry_destroy(priv, nhe);
+       mlx5_tun_entropy_refcount_dec(tun_entropy, e->reformat_type);
 }
 
 static int mlx5e_vf_rep_open(struct net_device *dev)
@@ -1564,6 +1578,8 @@ static int mlx5e_init_rep_tx(struct mlx5e_priv *priv)
                if (err)
                        goto destroy_tises;
 
+               mlx5_init_port_tun_entropy(&uplink_priv->tun_entropy, priv->mdev);
+
                /* init indirect block notifications */
                INIT_LIST_HEAD(&uplink_priv->tc_indr_block_priv_list);
                uplink_priv->netdevice_nb.notifier_call = mlx5e_nic_rep_netdevice_event;
index 36eafc877e6bf576e8dbb831bcfd27dd8f73d0aa..1aa3e110bb970d604bcc7ced63a7df78b32f54a6 100644 (file)
@@ -37,6 +37,7 @@
 #include <linux/rhashtable.h>
 #include "eswitch.h"
 #include "en.h"
+#include "lib/port_tun.h"
 
 #ifdef CONFIG_MLX5_ESWITCH
 struct mlx5e_neigh_update_table {
@@ -71,6 +72,8 @@ struct mlx5_rep_uplink_priv {
         */
        struct list_head            tc_indr_block_priv_list;
        struct notifier_block       netdevice_nb;
+
+       struct mlx5_tun_entropy tun_entropy;
 };
 
 struct mlx5e_rep_priv {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.c
new file mode 100644 (file)
index 0000000..40f4a19
--- /dev/null
@@ -0,0 +1,205 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include <linux/module.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/port.h>
+#include <linux/mlx5/cmd.h>
+#include "mlx5_core.h"
+#include "lib/port_tun.h"
+
+struct mlx5_port_tun_entropy_flags {
+       bool force_supported, force_enabled;
+       bool calc_supported, calc_enabled;
+       bool gre_calc_supported, gre_calc_enabled;
+};
+
+static void mlx5_query_port_tun_entropy(struct mlx5_core_dev *mdev,
+                                       struct mlx5_port_tun_entropy_flags *entropy_flags)
+{
+       u32 out[MLX5_ST_SZ_DW(pcmr_reg)];
+       /* Default values for FW which do not support MLX5_REG_PCMR */
+       entropy_flags->force_supported = false;
+       entropy_flags->calc_supported = false;
+       entropy_flags->gre_calc_supported = false;
+       entropy_flags->force_enabled = false;
+       entropy_flags->calc_enabled = true;
+       entropy_flags->gre_calc_enabled = true;
+
+       if (!MLX5_CAP_GEN(mdev, ports_check))
+               return;
+
+       if (mlx5_query_ports_check(mdev, out, sizeof(out)))
+               return;
+
+       entropy_flags->force_supported = !!(MLX5_GET(pcmr_reg, out, entropy_force_cap));
+       entropy_flags->calc_supported = !!(MLX5_GET(pcmr_reg, out, entropy_calc_cap));
+       entropy_flags->gre_calc_supported = !!(MLX5_GET(pcmr_reg, out, entropy_gre_calc_cap));
+       entropy_flags->force_enabled = !!(MLX5_GET(pcmr_reg, out, entropy_force));
+       entropy_flags->calc_enabled = !!(MLX5_GET(pcmr_reg, out, entropy_calc));
+       entropy_flags->gre_calc_enabled = !!(MLX5_GET(pcmr_reg, out, entropy_gre_calc));
+}
+
+static int mlx5_set_port_tun_entropy_calc(struct mlx5_core_dev *mdev, u8 enable,
+                                         u8 force)
+{
+       u32 in[MLX5_ST_SZ_DW(pcmr_reg)] = {0};
+       int err;
+
+       err = mlx5_query_ports_check(mdev, in, sizeof(in));
+       if (err)
+               return err;
+       MLX5_SET(pcmr_reg, in, local_port, 1);
+       MLX5_SET(pcmr_reg, in, entropy_force, force);
+       MLX5_SET(pcmr_reg, in, entropy_calc, enable);
+       return mlx5_set_ports_check(mdev, in, sizeof(in));
+}
+
+static int mlx5_set_port_gre_tun_entropy_calc(struct mlx5_core_dev *mdev,
+                                             u8 enable, u8 force)
+{
+       u32 in[MLX5_ST_SZ_DW(pcmr_reg)] = {0};
+       int err;
+
+       err = mlx5_query_ports_check(mdev, in, sizeof(in));
+       if (err)
+               return err;
+       MLX5_SET(pcmr_reg, in, local_port, 1);
+       MLX5_SET(pcmr_reg, in, entropy_force, force);
+       MLX5_SET(pcmr_reg, in, entropy_gre_calc, enable);
+       return mlx5_set_ports_check(mdev, in, sizeof(in));
+}
+
+void mlx5_init_port_tun_entropy(struct mlx5_tun_entropy *tun_entropy,
+                               struct mlx5_core_dev *mdev)
+{
+       struct mlx5_port_tun_entropy_flags entropy_flags;
+
+       tun_entropy->mdev = mdev;
+       mutex_init(&tun_entropy->lock);
+       mlx5_query_port_tun_entropy(mdev, &entropy_flags);
+       tun_entropy->num_enabling_entries = 0;
+       tun_entropy->num_disabling_entries = 0;
+       tun_entropy->enabled = entropy_flags.calc_enabled;
+       tun_entropy->enabled =
+               (entropy_flags.calc_supported) ?
+               entropy_flags.calc_enabled : true;
+}
+
+static int mlx5_set_entropy(struct mlx5_tun_entropy *tun_entropy,
+                           int reformat_type, bool enable)
+{
+       struct mlx5_port_tun_entropy_flags entropy_flags;
+       int err;
+
+       mlx5_query_port_tun_entropy(tun_entropy->mdev, &entropy_flags);
+       /* Tunnel entropy calculation may be controlled either on port basis
+        * for all tunneling protocols or specifically for GRE protocol.
+        * Prioritize GRE protocol control (if capable) over global port
+        * configuration.
+        */
+       if (entropy_flags.gre_calc_supported &&
+           reformat_type == MLX5_REFORMAT_TYPE_L2_TO_NVGRE) {
+               /* Other applications may change the global FW entropy
+                * calculations settings. Check that the current entropy value
+                * is the negative of the updated value.
+                */
+               if (entropy_flags.force_enabled &&
+                   enable == entropy_flags.gre_calc_enabled) {
+                       mlx5_core_warn(tun_entropy->mdev,
+                                      "Unexpected GRE entropy calc setting - expected %d",
+                                      !entropy_flags.gre_calc_enabled);
+                       return -EOPNOTSUPP;
+               }
+               err = mlx5_set_port_gre_tun_entropy_calc(tun_entropy->mdev, enable,
+                                                        entropy_flags.force_supported);
+               if (err)
+                       return err;
+               /* if we turn on the entropy we don't need to force it anymore */
+               if (entropy_flags.force_supported && enable) {
+                       err = mlx5_set_port_gre_tun_entropy_calc(tun_entropy->mdev, 1, 0);
+                       if (err)
+                               return err;
+               }
+       } else if (entropy_flags.calc_supported) {
+               /* Other applications may change the global FW entropy
+                * calculations settings. Check that the current entropy value
+                * is the negative of the updated value.
+                */
+               if (entropy_flags.force_enabled &&
+                   enable == entropy_flags.calc_enabled) {
+                       mlx5_core_warn(tun_entropy->mdev,
+                                      "Unexpected entropy calc setting - expected %d",
+                                      !entropy_flags.calc_enabled);
+                       return -EOPNOTSUPP;
+               }
+               /* GRE requires disabling entropy calculation. if there are
+                * enabling entries (i.e VXLAN) we cannot turn it off for them,
+                * thus fail.
+                */
+               if (tun_entropy->num_enabling_entries)
+                       return -EOPNOTSUPP;
+               err = mlx5_set_port_tun_entropy_calc(tun_entropy->mdev, enable,
+                                                    entropy_flags.force_supported);
+               if (err)
+                       return err;
+               tun_entropy->enabled = enable;
+               /* if we turn on the entropy we don't need to force it anymore */
+               if (entropy_flags.force_supported && enable) {
+                       err = mlx5_set_port_tun_entropy_calc(tun_entropy->mdev, 1, 0);
+                       if (err)
+                               return err;
+               }
+       }
+
+       return 0;
+}
+
+/* the function manages the refcount for enabling/disabling tunnel types.
+ * the return value indicates if the inc is successful or not, depending on
+ * entropy capabilities and configuration.
+ */
+int mlx5_tun_entropy_refcount_inc(struct mlx5_tun_entropy *tun_entropy,
+                                 int reformat_type)
+{
+       /* the default is error for unknown (non VXLAN/GRE tunnel types) */
+       int err = -EOPNOTSUPP;
+
+       mutex_lock(&tun_entropy->lock);
+       if (reformat_type == MLX5_REFORMAT_TYPE_L2_TO_VXLAN &&
+           tun_entropy->enabled) {
+               /* in case entropy calculation is enabled for all tunneling
+                * types, it is ok for VXLAN, so approve.
+                * otherwise keep the error default.
+                */
+               tun_entropy->num_enabling_entries++;
+               err = 0;
+       } else if (reformat_type == MLX5_REFORMAT_TYPE_L2_TO_NVGRE) {
+               /* turn off the entropy only for the first GRE rule.
+                * for the next rules the entropy was already disabled
+                * successfully.
+                */
+               if (tun_entropy->num_disabling_entries == 0)
+                       err = mlx5_set_entropy(tun_entropy, reformat_type, 0);
+               else
+                       err = 0;
+               if (!err)
+                       tun_entropy->num_disabling_entries++;
+       }
+       mutex_unlock(&tun_entropy->lock);
+
+       return err;
+}
+
+void mlx5_tun_entropy_refcount_dec(struct mlx5_tun_entropy *tun_entropy,
+                                  int reformat_type)
+{
+       mutex_lock(&tun_entropy->lock);
+       if (reformat_type == MLX5_REFORMAT_TYPE_L2_TO_VXLAN)
+               tun_entropy->num_enabling_entries--;
+       else if (reformat_type == MLX5_REFORMAT_TYPE_L2_TO_NVGRE &&
+                --tun_entropy->num_disabling_entries == 0)
+               mlx5_set_entropy(tun_entropy, reformat_type, 1);
+       mutex_unlock(&tun_entropy->lock);
+}
+
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.h
new file mode 100644 (file)
index 0000000..54c42a8
--- /dev/null
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __MLX5_PORT_TUN_H__
+#define __MLX5_PORT_TUN_H__
+
+#include <linux/mlx5/driver.h>
+
+struct mlx5_tun_entropy {
+       struct mlx5_core_dev *mdev;
+       u32 num_enabling_entries;
+       u32 num_disabling_entries;
+       u8  enabled;
+       struct mutex lock;      /* lock the entropy fields */
+};
+
+void mlx5_init_port_tun_entropy(struct mlx5_tun_entropy *tun_entropy,
+                               struct mlx5_core_dev *mdev);
+int mlx5_tun_entropy_refcount_inc(struct mlx5_tun_entropy *tun_entropy,
+                                 int reformat_type);
+void mlx5_tun_entropy_refcount_dec(struct mlx5_tun_entropy *tun_entropy,
+                                  int reformat_type);
+
+#endif /* __MLX5_PORT_TUN_H__ */
index 55b30d21a73a410700432ae3780c70f90de15463..21b7f05b16a5f6053a88c1cdb9067c0f9e26ea10 100644 (file)
@@ -764,8 +764,7 @@ int mlx5_query_port_wol(struct mlx5_core_dev *mdev, u8 *wol_mode)
 }
 EXPORT_SYMBOL_GPL(mlx5_query_port_wol);
 
-static int mlx5_query_ports_check(struct mlx5_core_dev *mdev, u32 *out,
-                                 int outlen)
+int mlx5_query_ports_check(struct mlx5_core_dev *mdev, u32 *out, int outlen)
 {
        u32 in[MLX5_ST_SZ_DW(pcmr_reg)] = {0};
 
@@ -774,7 +773,7 @@ static int mlx5_query_ports_check(struct mlx5_core_dev *mdev, u32 *out,
                                    outlen, MLX5_REG_PCMR, 0, 0);
 }
 
-static int mlx5_set_ports_check(struct mlx5_core_dev *mdev, u32 *in, int inlen)
+int mlx5_set_ports_check(struct mlx5_core_dev *mdev, u32 *in, int inlen)
 {
        u32 out[MLX5_ST_SZ_DW(pcmr_reg)];
 
index 814fa194663bb01463c74a436565819d34481cdc..64e78394fc9c3e4169fa8e8fecc28f678c73932b 100644 (file)
@@ -182,6 +182,8 @@ int mlx5_query_port_ets_rate_limit(struct mlx5_core_dev *mdev,
 int mlx5_set_port_wol(struct mlx5_core_dev *mdev, u8 wol_mode);
 int mlx5_query_port_wol(struct mlx5_core_dev *mdev, u8 *wol_mode);
 
+int mlx5_query_ports_check(struct mlx5_core_dev *mdev, u32 *out, int outlen);
+int mlx5_set_ports_check(struct mlx5_core_dev *mdev, u32 *in, int inlen);
 int mlx5_set_port_fcs(struct mlx5_core_dev *mdev, u8 enable);
 void mlx5_query_port_fcs(struct mlx5_core_dev *mdev, bool *supported,
                         bool *enabled);