net/mlx5e: Infrastructure for duplicated offloading of TC flows
authorRoi Dayan <roid@mellanox.com>
Sun, 11 Nov 2018 20:24:03 +0000 (22:24 +0200)
committerSaeed Mahameed <saeedm@mellanox.com>
Fri, 14 Dec 2018 21:28:52 +0000 (13:28 -0800)
Under uplink LAG or multipath schemes, traffic that matches one flow
might arrive on both uplink ports and transmitted through both
as part of supporting aggregation and high-availability.

To cope with the fact that the SW model might use logical SW port
(e.g uplink team or bond) but we have two HW ports with e-switch on
each, there are cases where in order to offload a SW TC rule we
need to duplicate it to two HW flows.

Since each HW rule has its own counter we also aggregate the counter
of both rules when a flow stats query is executed from user-space.

Introduce the changes for the different elements (add/delete/stats),
currently nothing is duplicated.

Signed-off-by: Roi Dayan <roid@mellanox.com>
Signed-off-by: Aviv Heller <avivh@mellanox.com>
Signed-off-by: Shahar Klein <shahark@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c

index 0921213561cb9893d9af5f5ed93f54d89017efe1..eacccac05dda5920f0b8933c8806b36c2dfe3f97 100644 (file)
@@ -52,6 +52,7 @@
 #include "fs_core.h"
 #include "en/port.h"
 #include "en/tc_tun.h"
+#include "lib/devcom.h"
 
 struct mlx5_nic_flow_attr {
        u32 action;
@@ -74,6 +75,7 @@ enum {
        MLX5E_TC_FLOW_HAIRPIN   = BIT(MLX5E_TC_FLOW_BASE + 3),
        MLX5E_TC_FLOW_HAIRPIN_RSS = BIT(MLX5E_TC_FLOW_BASE + 4),
        MLX5E_TC_FLOW_SLOW        = BIT(MLX5E_TC_FLOW_BASE + 5),
+       MLX5E_TC_FLOW_DUP         = BIT(MLX5E_TC_FLOW_BASE + 6),
 };
 
 #define MLX5E_TC_MAX_SPLITS 1
@@ -111,8 +113,10 @@ struct mlx5e_tc_flow {
         * destinations.
         */
        struct encap_flow_item encaps[MLX5_MAX_FLOW_FWD_VPORTS];
+       struct mlx5e_tc_flow    *peer_flow;
        struct list_head        mod_hdr; /* flows sharing the same mod hdr ID */
        struct list_head        hairpin; /* flows sharing the same hairpin */
+       struct list_head        peer;    /* flows with peer flow */
        union {
                struct mlx5_esw_flow_attr esw_attr[0];
                struct mlx5_nic_flow_attr nic_attr[0];
@@ -1249,13 +1253,48 @@ static void mlx5e_detach_encap(struct mlx5e_priv *priv,
        }
 }
 
+static void __mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow)
+{
+       struct mlx5_eswitch *esw = flow->priv->mdev->priv.eswitch;
+
+       if (!(flow->flags & MLX5E_TC_FLOW_ESWITCH) ||
+           !(flow->flags & MLX5E_TC_FLOW_DUP))
+               return;
+
+       mutex_lock(&esw->offloads.peer_mutex);
+       list_del(&flow->peer);
+       mutex_unlock(&esw->offloads.peer_mutex);
+
+       flow->flags &= ~MLX5E_TC_FLOW_DUP;
+
+       mlx5e_tc_del_fdb_flow(flow->peer_flow->priv, flow->peer_flow);
+       kvfree(flow->peer_flow);
+       flow->peer_flow = NULL;
+}
+
+static void mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow)
+{
+       struct mlx5_core_dev *dev = flow->priv->mdev;
+       struct mlx5_devcom *devcom = dev->priv.devcom;
+       struct mlx5_eswitch *peer_esw;
+
+       peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+       if (!peer_esw)
+               return;
+
+       __mlx5e_tc_del_fdb_peer_flow(flow);
+       mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+}
+
 static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
                              struct mlx5e_tc_flow *flow)
 {
-       if (flow->flags & MLX5E_TC_FLOW_ESWITCH)
+       if (flow->flags & MLX5E_TC_FLOW_ESWITCH) {
+               mlx5e_tc_del_fdb_peer_flow(flow);
                mlx5e_tc_del_fdb_flow(priv, flow);
-       else
+       } else {
                mlx5e_tc_del_nic_flow(priv, flow);
+       }
 }
 
 
@@ -2660,6 +2699,11 @@ static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv)
                return &priv->fs.tc.ht;
 }
 
+static bool is_peer_flow_needed(struct mlx5e_tc_flow *flow)
+{
+       return false;
+}
+
 static int
 mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size,
                 struct tc_cls_flower_offload *f, u16 flow_flags,
@@ -2693,11 +2737,13 @@ err_free:
 }
 
 static int
-mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
-                  struct tc_cls_flower_offload *f,
-                  u16 flow_flags,
-                  struct net_device *filter_dev,
-                  struct mlx5e_tc_flow **__flow)
+__mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
+                    struct tc_cls_flower_offload *f,
+                    u16 flow_flags,
+                    struct net_device *filter_dev,
+                    struct mlx5_eswitch_rep *in_rep,
+                    struct mlx5_core_dev *in_mdev,
+                    struct mlx5e_tc_flow **__flow)
 {
        struct netlink_ext_ack *extack = f->common.extack;
        struct mlx5e_tc_flow_parse_attr *parse_attr;
@@ -2723,6 +2769,8 @@ mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
        if (err)
                goto err_free;
 
+       flow->esw_attr->in_rep = in_rep;
+       flow->esw_attr->in_mdev = in_mdev;
        err = mlx5e_tc_add_fdb_flow(priv, parse_attr, flow, extack);
        if (err)
                goto err_free;
@@ -2738,6 +2786,87 @@ out:
        return err;
 }
 
+static int mlx5e_tc_add_fdb_peer_flow(struct tc_cls_flower_offload *f,
+                                     struct mlx5e_tc_flow *flow)
+{
+       struct mlx5e_priv *priv = flow->priv, *peer_priv;
+       struct mlx5_eswitch *esw = priv->mdev->priv.eswitch, *peer_esw;
+       struct mlx5_devcom *devcom = priv->mdev->priv.devcom;
+       struct mlx5e_tc_flow_parse_attr *parse_attr;
+       struct mlx5e_rep_priv *peer_urpriv;
+       struct mlx5e_tc_flow *peer_flow;
+       struct mlx5_core_dev *in_mdev;
+       int err = 0;
+
+       peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+       if (!peer_esw)
+               return -ENODEV;
+
+       peer_urpriv = mlx5_eswitch_get_uplink_priv(peer_esw, REP_ETH);
+       peer_priv = netdev_priv(peer_urpriv->netdev);
+
+       /* in_mdev is assigned of which the packet originated from.
+        * So packets redirected to uplink use the same mdev of the
+        * original flow and packets redirected from uplink use the
+        * peer mdev.
+        */
+       if (flow->esw_attr->in_rep->vport == FDB_UPLINK_VPORT)
+               in_mdev = peer_priv->mdev;
+       else
+               in_mdev = priv->mdev;
+
+       parse_attr = flow->esw_attr->parse_attr;
+       err = __mlx5e_add_fdb_flow(peer_priv, f, flow->flags,
+                                  parse_attr->filter_dev,
+                                  flow->esw_attr->in_rep, in_mdev, &peer_flow);
+       if (err)
+               goto out;
+
+       flow->peer_flow = peer_flow;
+       flow->flags |= MLX5E_TC_FLOW_DUP;
+       mutex_lock(&esw->offloads.peer_mutex);
+       list_add_tail(&flow->peer, &esw->offloads.peer_flows);
+       mutex_unlock(&esw->offloads.peer_mutex);
+
+out:
+       mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+       return err;
+}
+
+static int
+mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
+                  struct tc_cls_flower_offload *f,
+                  u16 flow_flags,
+                  struct net_device *filter_dev,
+                  struct mlx5e_tc_flow **__flow)
+{
+       struct mlx5e_rep_priv *rpriv = priv->ppriv;
+       struct mlx5_eswitch_rep *in_rep = rpriv->rep;
+       struct mlx5_core_dev *in_mdev = priv->mdev;
+       struct mlx5e_tc_flow *flow;
+       int err;
+
+       err = __mlx5e_add_fdb_flow(priv, f, flow_flags, filter_dev, in_rep,
+                                  in_mdev, &flow);
+       if (err)
+               goto out;
+
+       if (is_peer_flow_needed(flow)) {
+               err = mlx5e_tc_add_fdb_peer_flow(f, flow);
+               if (err) {
+                       mlx5e_tc_del_fdb_flow(priv, flow);
+                       goto out;
+               }
+       }
+
+       *__flow = flow;
+
+       return 0;
+
+out:
+       return err;
+}
+
 static int
 mlx5e_add_nic_flow(struct mlx5e_priv *priv,
                   struct tc_cls_flower_offload *f,
@@ -2882,7 +3011,9 @@ int mlx5e_delete_flower(struct net_device *dev, struct mlx5e_priv *priv,
 int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv,
                       struct tc_cls_flower_offload *f, int flags)
 {
+       struct mlx5_devcom *devcom = priv->mdev->priv.devcom;
        struct rhashtable *tc_ht = get_tc_ht(priv);
+       struct mlx5_eswitch *peer_esw;
        struct mlx5e_tc_flow *flow;
        struct mlx5_fc *counter;
        u64 bytes;
@@ -2902,6 +3033,27 @@ int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv,
 
        mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse);
 
+       peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+       if (!peer_esw)
+               goto out;
+
+       if ((flow->flags & MLX5E_TC_FLOW_DUP) &&
+           (flow->peer_flow->flags & MLX5E_TC_FLOW_OFFLOADED)) {
+               u64 bytes2;
+               u64 packets2;
+               u64 lastuse2;
+
+               counter = mlx5e_tc_get_counter(flow->peer_flow);
+               mlx5_fc_query_cached(counter, &bytes2, &packets2, &lastuse2);
+
+               bytes += bytes2;
+               packets += packets2;
+               lastuse = max_t(u64, lastuse, lastuse2);
+       }
+
+       mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+
+out:
        tcf_exts_stats_update(f->exts, bytes, packets, lastuse);
 
        return 0;
@@ -3014,3 +3166,11 @@ int mlx5e_tc_num_filters(struct mlx5e_priv *priv)
 
        return atomic_read(&tc_ht->nelems);
 }
+
+void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw)
+{
+       struct mlx5e_tc_flow *flow, *tmp;
+
+       list_for_each_entry_safe(flow, tmp, &esw->offloads.peer_flows, peer)
+               __mlx5e_tc_del_fdb_peer_flow(flow);
+}
index 9dba6ad5744d201b97b6683536804d5852b172bb..4d048f7e703b96a793f9554202a7ac2fa9b47f8e 100644 (file)
@@ -167,6 +167,8 @@ struct mlx5_esw_offload {
        struct mlx5_flow_table *ft_offloads;
        struct mlx5_flow_group *vport_rx_group;
        struct mlx5_eswitch_rep *vport_reps;
+       struct list_head peer_flows;
+       struct mutex peer_mutex;
        DECLARE_HASHTABLE(encap_tbl, 8);
        DECLARE_HASHTABLE(mod_hdr_tbl, 8);
        u8 inline_mode;
index a6927ca3d4cab91b0392f73849d6101ba4416137..76cb5720247433ce33048f25c0844169e1c6ee27 100644 (file)
@@ -563,7 +563,7 @@ static void peer_miss_rules_setup(struct mlx5_core_dev *peer_dev,
        dest->type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
        dest->vport.num = 0;
        dest->vport.vhca_id = MLX5_CAP_GEN(peer_dev, vhca_id);
-       dest->vport.vhca_id_valid = 1;
+       dest->vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID;
 }
 
 static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw,
@@ -1313,8 +1313,11 @@ static int mlx5_esw_offloads_pair(struct mlx5_eswitch *esw,
        return 0;
 }
 
+void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw);
+
 static void mlx5_esw_offloads_unpair(struct mlx5_eswitch *esw)
 {
+       mlx5e_tc_clean_fdb_peer_flows(esw);
        esw_del_fdb_peer_miss_rules(esw);
 }
 
@@ -1365,6 +1368,9 @@ static void esw_offloads_devcom_init(struct mlx5_eswitch *esw)
 {
        struct mlx5_devcom *devcom = esw->dev->priv.devcom;
 
+       INIT_LIST_HEAD(&esw->offloads.peer_flows);
+       mutex_init(&esw->offloads.peer_mutex);
+
        if (!MLX5_CAP_ESW(esw->dev, merged_eswitch))
                return;