From: Parav Pandit Date: Wed, 18 Dec 2019 08:51:19 +0000 (-0600) Subject: net/mlx5: E-switch, Protect eswitch mode changes X-Git-Url: http://git.lede-project.org./?a=commitdiff_plain;h=8e0aa4bc959c98c14ed0aaee522d77ca52690189;p=openwrt%2Fstaging%2Fblogic.git net/mlx5: E-switch, Protect eswitch mode changes Currently eswitch mode change is occurring from 2 different execution contexts as below. 1. sriov sysfs enable/disable 2. devlink eswitch set commands Both of them need to access eswitch related data structures in synchronized manner. Without any synchronization below race condition exist. SR-IOV enable/disable with devlink eswitch mode change: cpu-0 cpu-1 ----- ----- mlx5_device_disable_sriov() mlx5_devlink_eswitch_mode_set() mlx5_eswitch_disable() esw_offloads_stop() esw_offloads_disable() mlx5_eswitch_disable() esw_offloads_disable() Hence, they are synchronized using a new mode_lock. eswitch's state_lock is not used as it can lead to a deadlock scenario below and state_lock is only for vport and fdb exclusive access. ip link set vf netlink rcv_msg() - Lock A rtnl_lock vfinfo() esw->state_lock() - Lock B devlink eswitch_set devlink_mutex esw->state_lock() - Lock B attach_netdev() register_netdev() rtnl_lock - Lock A Alternatives considered: 1. Acquiring rtnl lock before taking esw->state_lock to follow similar locking sequence as ip link flow during eswitch mode set. rtnl lock is not good idea for two reasons. (a) Holding rtnl lock for several hundred device commands is not good idea. (b) It leads to below and more similar deadlocks. devlink eswitch_set devlink_mutex rtnl_lock - Lock A esw->state_lock() - Lock B eswitch_disable() reload() ib_register_device() ib_cache_setup_one() rtnl_lock() 2. Exporting devlink lock may lead to undesired use of it in vendor driver(s) in future. 3. Unloading representors outside of the mode_lock requires serialization with other process trying to enable the eswitch. 4. Differing the representors life cycle to a different workqueue requires synchronization with func_change_handler workqueue. Reviewed-by: Roi Dayan Reviewed-by: Bodong Wang Reviewed-by: Mark Bloch Signed-off-by: Parav Pandit Signed-off-by: Saeed Mahameed --- diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index a22f9c77e4c3..7f618a443bfd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -2092,7 +2092,7 @@ mlx5_eswitch_update_num_of_vfs(struct mlx5_eswitch *esw, int num_vfs) } /** - * mlx5_eswitch_enable - Enable eswitch + * mlx5_eswitch_enable_locked - Enable eswitch * @esw: Pointer to eswitch * @mode: Eswitch mode to enable * @num_vfs: Enable eswitch for given number of VFs. This is optional. @@ -2104,16 +2104,17 @@ mlx5_eswitch_update_num_of_vfs(struct mlx5_eswitch *esw, int num_vfs) * eswitch. Caller should pass < 0 when num_vfs should be * completely ignored. This is typically the case when eswitch * is enabled without sriov regardless of PF/ECPF system. - * mlx5_eswitch_enable() Enables eswitch in either legacy or offloads mode. - * If num_vfs >=0 is provided, it setup VF related eswitch vports. It returns - * 0 on success or error code on failure. + * mlx5_eswitch_enable_locked() Enables eswitch in either legacy or offloads + * mode. If num_vfs >=0 is provided, it setup VF related eswitch vports. + * It returns 0 on success or error code on failure. */ -int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int mode, int num_vfs) +int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int mode, int num_vfs) { int err; - if (!ESW_ALLOWED(esw) || - !MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ft_support)) { + lockdep_assert_held(&esw->mode_lock); + + if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ft_support)) { esw_warn(esw->dev, "FDB is not supported, aborting ...\n"); return -EOPNOTSUPP; } @@ -2164,11 +2165,34 @@ abort: return err; } -void mlx5_eswitch_disable(struct mlx5_eswitch *esw, bool clear_vf) +/** + * mlx5_eswitch_enable - Enable eswitch + * @esw: Pointer to eswitch + * @num_vfs: Enable eswitch swich for given number of VFs. + * Caller must pass num_vfs > 0 when enabling eswitch for + * vf vports. + * mlx5_eswitch_enable() returns 0 on success or error code on failure. + */ +int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs) +{ + int ret; + + if (!ESW_ALLOWED(esw)) + return 0; + + mutex_lock(&esw->mode_lock); + ret = mlx5_eswitch_enable_locked(esw, MLX5_ESWITCH_LEGACY, num_vfs); + mutex_unlock(&esw->mode_lock); + return ret; +} + +void mlx5_eswitch_disable_locked(struct mlx5_eswitch *esw, bool clear_vf) { int old_mode; - if (!ESW_ALLOWED(esw) || esw->mode == MLX5_ESWITCH_NONE) + lockdep_assert_held_write(&esw->mode_lock); + + if (esw->mode == MLX5_ESWITCH_NONE) return; esw_info(esw->dev, "Disable: mode(%s), nvfs(%d), active vports(%d)\n", @@ -2197,6 +2221,16 @@ void mlx5_eswitch_disable(struct mlx5_eswitch *esw, bool clear_vf) mlx5_eswitch_clear_vf_vports_info(esw); } +void mlx5_eswitch_disable(struct mlx5_eswitch *esw, bool clear_vf) +{ + if (!ESW_ALLOWED(esw)) + return; + + mutex_lock(&esw->mode_lock); + mlx5_eswitch_disable_locked(esw, clear_vf); + mutex_unlock(&esw->mode_lock); +} + int mlx5_eswitch_init(struct mlx5_core_dev *dev) { struct mlx5_eswitch *esw; @@ -2248,6 +2282,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) hash_init(esw->offloads.mod_hdr.hlist); atomic64_set(&esw->offloads.num_flows, 0); mutex_init(&esw->state_lock); + mutex_init(&esw->mode_lock); mlx5_esw_for_all_vports(esw, i, vport) { vport->vport = mlx5_eswitch_index_to_vport_num(esw, i); @@ -2282,6 +2317,7 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) esw->dev->priv.eswitch = NULL; destroy_workqueue(esw->work_queue); esw_offloads_cleanup_reps(esw); + mutex_destroy(&esw->mode_lock); mutex_destroy(&esw->state_lock); mutex_destroy(&esw->offloads.mod_hdr.lock); mutex_destroy(&esw->offloads.encap_tbl_lock); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 752d399bdffb..39f42f985fbd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -258,6 +258,11 @@ struct mlx5_eswitch { */ struct mutex state_lock; + /* Protects eswitch mode change that occurs via one or more + * user commands, i.e. sriov state change, devlink commands. + */ + struct mutex mode_lock; + struct { bool enabled; u32 root_tsar_id; @@ -298,7 +303,9 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev); void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw); #define MLX5_ESWITCH_IGNORE_NUM_VFS (-1) -int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int mode, int num_vfs); +int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int mode, int num_vfs); +int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs); +void mlx5_eswitch_disable_locked(struct mlx5_eswitch *esw, bool clear_vf); void mlx5_eswitch_disable(struct mlx5_eswitch *esw, bool clear_vf); int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, u16 vport, u8 mac[ETH_ALEN]); @@ -674,7 +681,7 @@ void mlx5_eswitch_unload_vf_vports(struct mlx5_eswitch *esw, u16 num_vfs); /* eswitch API stubs */ static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; } static inline void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) {} -static inline int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int mode, int num_vfs) { return 0; } +static inline int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs) { return 0; } static inline void mlx5_eswitch_disable(struct mlx5_eswitch *esw, bool clear_vf) {} static inline bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1) { return true; } static inline bool mlx5_eswitch_is_funcs_handler(struct mlx5_core_dev *dev) { return false; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 84a38f0739d0..612bc7d1cdcd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -1592,14 +1592,14 @@ static int esw_offloads_start(struct mlx5_eswitch *esw, return -EINVAL; } - mlx5_eswitch_disable(esw, false); - err = mlx5_eswitch_enable(esw, MLX5_ESWITCH_OFFLOADS, - esw->dev->priv.sriov.num_vfs); + mlx5_eswitch_disable_locked(esw, false); + err = mlx5_eswitch_enable_locked(esw, MLX5_ESWITCH_OFFLOADS, + esw->dev->priv.sriov.num_vfs); if (err) { NL_SET_ERR_MSG_MOD(extack, "Failed setting eswitch to offloads"); - err1 = mlx5_eswitch_enable(esw, MLX5_ESWITCH_LEGACY, - MLX5_ESWITCH_IGNORE_NUM_VFS); + err1 = mlx5_eswitch_enable_locked(esw, MLX5_ESWITCH_LEGACY, + MLX5_ESWITCH_IGNORE_NUM_VFS); if (err1) { NL_SET_ERR_MSG_MOD(extack, "Failed setting eswitch back to legacy"); @@ -2397,13 +2397,13 @@ static int esw_offloads_stop(struct mlx5_eswitch *esw, { int err, err1; - mlx5_eswitch_disable(esw, false); - err = mlx5_eswitch_enable(esw, MLX5_ESWITCH_LEGACY, - MLX5_ESWITCH_IGNORE_NUM_VFS); + mlx5_eswitch_disable_locked(esw, false); + err = mlx5_eswitch_enable_locked(esw, MLX5_ESWITCH_LEGACY, + MLX5_ESWITCH_IGNORE_NUM_VFS); if (err) { NL_SET_ERR_MSG_MOD(extack, "Failed setting eswitch to legacy"); - err1 = mlx5_eswitch_enable(esw, MLX5_ESWITCH_OFFLOADS, - MLX5_ESWITCH_IGNORE_NUM_VFS); + err1 = mlx5_eswitch_enable_locked(esw, MLX5_ESWITCH_OFFLOADS, + MLX5_ESWITCH_IGNORE_NUM_VFS); if (err1) { NL_SET_ERR_MSG_MOD(extack, "Failed setting eswitch back to offloads"); @@ -2525,6 +2525,7 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode, struct netlink_ext_ack *extack) { struct mlx5_core_dev *dev = devlink_priv(devlink); + struct mlx5_eswitch *esw = dev->priv.eswitch; u16 cur_mlx5_mode, mlx5_mode = 0; int err; @@ -2532,40 +2533,50 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode, if (err) return err; - err = eswitch_devlink_esw_mode_check(dev->priv.eswitch); - if (err) - return err; - - cur_mlx5_mode = dev->priv.eswitch->mode; - if (esw_mode_from_devlink(mode, &mlx5_mode)) return -EINVAL; + mutex_lock(&esw->mode_lock); + err = eswitch_devlink_esw_mode_check(esw); + if (err) + goto unlock; + + cur_mlx5_mode = esw->mode; + if (cur_mlx5_mode == mlx5_mode) - return 0; + goto unlock; if (mode == DEVLINK_ESWITCH_MODE_SWITCHDEV) - return esw_offloads_start(dev->priv.eswitch, extack); + err = esw_offloads_start(esw, extack); else if (mode == DEVLINK_ESWITCH_MODE_LEGACY) - return esw_offloads_stop(dev->priv.eswitch, extack); + err = esw_offloads_stop(esw, extack); else - return -EINVAL; + err = -EINVAL; + +unlock: + mutex_unlock(&esw->mode_lock); + return err; } int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode) { struct mlx5_core_dev *dev = devlink_priv(devlink); + struct mlx5_eswitch *esw = dev->priv.eswitch; int err; err = mlx5_eswitch_check(dev); if (err) return err; + mutex_lock(&esw->mode_lock); err = eswitch_devlink_esw_mode_check(dev->priv.eswitch); if (err) - return err; + goto unlock; - return esw_mode_to_devlink(dev->priv.eswitch->mode, mode); + err = esw_mode_to_devlink(esw->mode, mode); +unlock: + mutex_unlock(&esw->mode_lock); + return err; } int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode, @@ -2580,18 +2591,20 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode, if (err) return err; + mutex_lock(&esw->mode_lock); err = eswitch_devlink_esw_mode_check(esw); if (err) - return err; + goto out; switch (MLX5_CAP_ETH(dev, wqe_inline_mode)) { case MLX5_CAP_INLINE_MODE_NOT_REQUIRED: if (mode == DEVLINK_ESWITCH_INLINE_MODE_NONE) - return 0; + goto out; /* fall through */ case MLX5_CAP_INLINE_MODE_L2: NL_SET_ERR_MSG_MOD(extack, "Inline mode can't be set"); - return -EOPNOTSUPP; + err = -EOPNOTSUPP; + goto out; case MLX5_CAP_INLINE_MODE_VPORT_CONTEXT: break; } @@ -2599,7 +2612,8 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode, if (atomic64_read(&esw->offloads.num_flows) > 0) { NL_SET_ERR_MSG_MOD(extack, "Can't set inline mode when flows are configured"); - return -EOPNOTSUPP; + err = -EOPNOTSUPP; + goto out; } err = esw_inline_mode_from_devlink(mode, &mlx5_mode); @@ -2616,6 +2630,7 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode, } esw->offloads.inline_mode = mlx5_mode; + mutex_unlock(&esw->mode_lock); return 0; revert_inline_mode: @@ -2625,6 +2640,7 @@ revert_inline_mode: vport, esw->offloads.inline_mode); out: + mutex_unlock(&esw->mode_lock); return err; } @@ -2638,11 +2654,15 @@ int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode) if (err) return err; + mutex_lock(&esw->mode_lock); err = eswitch_devlink_esw_mode_check(esw); if (err) - return err; + goto unlock; - return esw_inline_mode_to_devlink(esw->offloads.inline_mode, mode); + err = esw_inline_mode_to_devlink(esw->offloads.inline_mode, mode); +unlock: + mutex_unlock(&esw->mode_lock); + return err; } int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, @@ -2657,30 +2677,36 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, if (err) return err; + mutex_lock(&esw->mode_lock); err = eswitch_devlink_esw_mode_check(esw); if (err) - return err; + goto unlock; if (encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE && (!MLX5_CAP_ESW_FLOWTABLE_FDB(dev, reformat) || - !MLX5_CAP_ESW_FLOWTABLE_FDB(dev, decap))) - return -EOPNOTSUPP; + !MLX5_CAP_ESW_FLOWTABLE_FDB(dev, decap))) { + err = -EOPNOTSUPP; + goto unlock; + } - if (encap && encap != DEVLINK_ESWITCH_ENCAP_MODE_BASIC) - return -EOPNOTSUPP; + if (encap && encap != DEVLINK_ESWITCH_ENCAP_MODE_BASIC) { + err = -EOPNOTSUPP; + goto unlock; + } if (esw->mode == MLX5_ESWITCH_LEGACY) { esw->offloads.encap = encap; - return 0; + goto unlock; } if (esw->offloads.encap == encap) - return 0; + goto unlock; if (atomic64_read(&esw->offloads.num_flows) > 0) { NL_SET_ERR_MSG_MOD(extack, "Can't set encapsulation when flows are configured"); - return -EOPNOTSUPP; + err = -EOPNOTSUPP; + goto unlock; } esw_destroy_offloads_fdb_tables(esw); @@ -2696,6 +2722,8 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, (void)esw_create_offloads_fdb_tables(esw, esw->nvports); } +unlock: + mutex_unlock(&esw->mode_lock); return err; } @@ -2710,11 +2738,14 @@ int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, if (err) return err; + mutex_lock(&esw->mode_lock); err = eswitch_devlink_esw_mode_check(esw); if (err) - return err; + goto unlock; *encap = esw->offloads.encap; +unlock: + mutex_unlock(&esw->mode_lock); return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c index 10a64b91d04c..3094d20297a9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c @@ -77,8 +77,7 @@ static int mlx5_device_enable_sriov(struct mlx5_core_dev *dev, int num_vfs) if (!MLX5_ESWITCH_MANAGER(dev)) goto enable_vfs_hca; - err = mlx5_eswitch_enable(dev->priv.eswitch, MLX5_ESWITCH_LEGACY, - num_vfs); + err = mlx5_eswitch_enable(dev->priv.eswitch, num_vfs); if (err) { mlx5_core_warn(dev, "failed to enable eswitch SRIOV (%d)\n", err);