From 491c37e49b48e7a18dc79e29f3ac13e0c3d0fb54 Mon Sep 17 00:00:00 2001 From: Rabie Loulou Date: Wed, 6 Jun 2018 15:49:27 +0300 Subject: [PATCH] net/mlx5e: In case of LAG, one switch parent id is used for all representors When the uplink representors are put into lag, set all the representors (VFs and uplinks) of the same NIC to return the same switchdev id. Currently, the route lookup code on the encapsulation offload path assumes that same switchdev id for the source and dest devices means that the dest is also mlx5 HW netdev. This doesn't hold anymore when we align the switchdev Id of the uplinks to be same, which in turn causes the bond/team to return that id to the caller. As such, enhance the relevant check to take into account the uplink lag case. Signed-off-by: Rabie Loulou Signed-off-by: Aviv Heller Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/en/tc_tun.c | 41 ++++++++++++++----- .../net/ethernet/mellanox/mlx5/core/en_rep.c | 20 +++++++-- 2 files changed, 48 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c index c1515f013501..d5d161ab0dbc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c @@ -14,7 +14,8 @@ static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv, u8 *out_ttl) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - struct mlx5e_rep_priv *uplink_rpriv; + struct net_device *uplink_dev, *uplink_upper; + bool dst_is_lag_dev; struct rtable *rt; struct neighbour *n = NULL; @@ -28,10 +29,20 @@ static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv, #else return -EOPNOTSUPP; #endif - uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); - /* if the egress device isn't on the same HW e-switch, we use the uplink */ - if (!switchdev_port_same_parent_id(priv->netdev, rt->dst.dev)) - *out_dev = uplink_rpriv->netdev; + + uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH); + uplink_upper = netdev_master_upper_dev_get(uplink_dev); + dst_is_lag_dev = (uplink_upper && + netif_is_lag_master(uplink_upper) && + rt->dst.dev == uplink_upper && + mlx5_lag_is_active(priv->mdev)); + + /* if the egress device isn't on the same HW e-switch or + * it's a LAG device, use the uplink + */ + if (!switchdev_port_same_parent_id(priv->netdev, rt->dst.dev) || + dst_is_lag_dev) + *out_dev = uplink_dev; else *out_dev = rt->dst.dev; @@ -65,8 +76,9 @@ static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv, struct dst_entry *dst; #if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6) - struct mlx5e_rep_priv *uplink_rpriv; struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct net_device *uplink_dev, *uplink_upper; + bool dst_is_lag_dev; int ret; ret = ipv6_stub->ipv6_dst_lookup(dev_net(mirred_dev), NULL, &dst, @@ -77,10 +89,19 @@ static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv, if (!(*out_ttl)) *out_ttl = ip6_dst_hoplimit(dst); - uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); - /* if the egress device isn't on the same HW e-switch, we use the uplink */ - if (!switchdev_port_same_parent_id(priv->netdev, dst->dev)) - *out_dev = uplink_rpriv->netdev; + uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH); + uplink_upper = netdev_master_upper_dev_get(uplink_dev); + dst_is_lag_dev = (uplink_upper && + netif_is_lag_master(uplink_upper) && + dst->dev == uplink_upper && + mlx5_lag_is_active(priv->mdev)); + + /* if the egress device isn't on the same HW e-switch or + * it's a LAG device, use the uplink + */ + if (!switchdev_port_same_parent_id(priv->netdev, dst->dev) || + dst_is_lag_dev) + *out_dev = uplink_dev; else *out_dev = dst->dev; #else diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index e5230049612f..17f24127a3ba 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -297,17 +297,31 @@ static const struct ethtool_ops mlx5e_rep_ethtool_ops = { int mlx5e_attr_get(struct net_device *dev, struct switchdev_attr *attr) { struct mlx5e_priv *priv = netdev_priv(dev); - struct mlx5e_rep_priv *rpriv = priv->ppriv; - struct mlx5_eswitch_rep *rep = rpriv->rep; struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct net_device *uplink_upper = NULL; + struct mlx5e_priv *uplink_priv = NULL; + struct net_device *uplink_dev; if (esw->mode == SRIOV_NONE) return -EOPNOTSUPP; + uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH); + if (uplink_dev) { + uplink_upper = netdev_master_upper_dev_get(uplink_dev); + uplink_priv = netdev_priv(uplink_dev); + } + switch (attr->id) { case SWITCHDEV_ATTR_ID_PORT_PARENT_ID: attr->u.ppid.id_len = ETH_ALEN; - ether_addr_copy(attr->u.ppid.id, rep->hw_id); + if (uplink_upper && mlx5_lag_is_active(uplink_priv->mdev)) { + ether_addr_copy(attr->u.ppid.id, uplink_upper->dev_addr); + } else { + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; + + ether_addr_copy(attr->u.ppid.id, rep->hw_id); + } break; default: return -EOPNOTSUPP; -- 2.30.2