From b6ffaeffaea4d92f05f5ba1ef54df407cb7c8517 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Wed, 12 Mar 2014 12:00:39 +0200 Subject: [PATCH] mlx4: In RoCE allow guests to have multiple GIDS The GIDs are statically distributed, as follows: PF: gets 16 GIDs VFs: Remaining GIDS are divided evenly between VFs activated by the driver. If the division is not even, lower-numbered VFs get an extra GID. For an IB interface, the number of gids per guest remains as before: one gid per guest. Signed-off-by: Jack Morgenstein Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/infiniband/hw/mlx4/mad.c | 34 ++++- drivers/net/ethernet/mellanox/mlx4/fw.c | 5 +- drivers/net/ethernet/mellanox/mlx4/main.c | 6 +- drivers/net/ethernet/mellanox/mlx4/mlx4.h | 3 + drivers/net/ethernet/mellanox/mlx4/port.c | 117 +++++++++++++++--- .../ethernet/mellanox/mlx4/resource_tracker.c | 64 ++++++++-- include/linux/mlx4/device.h | 1 + 7 files changed, 192 insertions(+), 38 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index c2e9879a5a34..c5bca0f0da4a 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -511,9 +511,7 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port, memset(&attr, 0, sizeof attr); attr.port_num = port; if (is_eth) { - ret = mlx4_get_roce_gid_from_slave(dev->dev, port, slave, attr.grh.dgid.raw); - if (ret) - return ret; + memcpy(&attr.grh.dgid.raw[0], &grh->dgid.raw[0], 16); attr.ah_flags = IB_AH_GRH; } ah = ib_create_ah(tun_ctx->pd, &attr); @@ -1216,6 +1214,34 @@ out: return ret; } +static int get_slave_base_gid_ix(struct mlx4_ib_dev *dev, int slave, int port) +{ + int gids; + int vfs; + + if (rdma_port_get_link_layer(&dev->ib_dev, port) == IB_LINK_LAYER_INFINIBAND) + return slave; + + gids = MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS; + vfs = dev->dev->num_vfs; + + if (slave == 0) + return 0; + if (slave <= gids % vfs) + return MLX4_ROCE_PF_GIDS + ((gids / vfs) + 1) * (slave - 1); + + return MLX4_ROCE_PF_GIDS + (gids % vfs) + ((gids / vfs) * (slave - 1)); +} + +static void fill_in_real_sgid_index(struct mlx4_ib_dev *dev, int slave, int port, + struct ib_ah_attr *ah_attr) +{ + if (rdma_port_get_link_layer(&dev->ib_dev, port) == IB_LINK_LAYER_INFINIBAND) + ah_attr->grh.sgid_index = slave; + else + ah_attr->grh.sgid_index += get_slave_base_gid_ix(dev, slave, port); +} + static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc *wc) { struct mlx4_ib_dev *dev = to_mdev(ctx->ib_dev); @@ -1303,7 +1329,7 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc ah.ibah.device = ctx->ib_dev; mlx4_ib_query_ah(&ah.ibah, &ah_attr); if (ah_attr.ah_flags & IB_AH_GRH) - ah_attr.grh.sgid_index = slave; + fill_in_real_sgid_index(dev, slave, ctx->port, &ah_attr); mlx4_ib_send_to_wire(dev, slave, ctx->port, is_proxy_qp0(dev, wc->src_qp, slave) ? diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index 9cdf452140da..6e1ee2170a39 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -934,7 +934,10 @@ int mlx4_QUERY_PORT_wrapper(struct mlx4_dev *dev, int slave, MLX4_PUT(outbox->buf, port_type, QUERY_PORT_SUPPORTED_TYPE_OFFSET); - short_field = 1; /* slave max gids */ + if (dev->caps.port_type[vhcr->in_modifier] == MLX4_PORT_TYPE_ETH) + short_field = mlx4_get_slave_num_gids(dev, slave); + else + short_field = 1; /* slave max gids */ MLX4_PUT(outbox->buf, short_field, QUERY_PORT_CUR_MAX_GID_OFFSET); diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 979ea4364efb..4c441aa83016 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -1462,7 +1462,11 @@ static void mlx4_parav_master_pf_caps(struct mlx4_dev *dev) int i; for (i = 1; i <= dev->caps.num_ports; i++) { - dev->caps.gid_table_len[i] = 1; + if (dev->caps.port_type[i] == MLX4_PORT_TYPE_ETH) + dev->caps.gid_table_len[i] = + mlx4_get_slave_num_gids(dev, 0); + else + dev->caps.gid_table_len[i] = 1; dev->caps.pkey_table_len[i] = dev->phys_caps.pkey_phys_table_len[i] - 1; } diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index da829f4ef938..6ba38c98c492 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -1287,4 +1287,7 @@ void mlx4_vf_immed_vlan_work_handler(struct work_struct *_work); void mlx4_init_quotas(struct mlx4_dev *dev); +int mlx4_get_slave_num_gids(struct mlx4_dev *dev, int slave); +int mlx4_get_base_gid_ix(struct mlx4_dev *dev, int slave); + #endif /* MLX4_H */ diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c index 591740b06043..ece328166e94 100644 --- a/drivers/net/ethernet/mellanox/mlx4/port.c +++ b/drivers/net/ethernet/mellanox/mlx4/port.c @@ -507,6 +507,31 @@ int mlx4_get_port_ib_caps(struct mlx4_dev *dev, u8 port, __be32 *caps) } static struct mlx4_roce_gid_entry zgid_entry; +int mlx4_get_slave_num_gids(struct mlx4_dev *dev, int slave) +{ + if (slave == 0) + return MLX4_ROCE_PF_GIDS; + if (slave <= ((MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS) % dev->num_vfs)) + return ((MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS) / dev->num_vfs) + 1; + return (MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS) / dev->num_vfs; +} + +int mlx4_get_base_gid_ix(struct mlx4_dev *dev, int slave) +{ + int gids; + int vfs; + + gids = MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS; + vfs = dev->num_vfs; + + if (slave == 0) + return 0; + if (slave <= gids % vfs) + return MLX4_ROCE_PF_GIDS + ((gids / vfs) + 1) * (slave - 1); + + return MLX4_ROCE_PF_GIDS + (gids % vfs) + ((gids / vfs) * (slave - 1)); +} + static int mlx4_common_set_port(struct mlx4_dev *dev, int slave, u32 in_mod, u8 op_mod, struct mlx4_cmd_mailbox *inbox) { @@ -516,15 +541,18 @@ static int mlx4_common_set_port(struct mlx4_dev *dev, int slave, u32 in_mod, struct mlx4_slave_state *slave_st = &master->slave_state[slave]; struct mlx4_set_port_rqp_calc_context *qpn_context; struct mlx4_set_port_general_context *gen_context; - struct mlx4_roce_gid_entry *gid_entry; + struct mlx4_roce_gid_entry *gid_entry_tbl, *gid_entry_mbox, *gid_entry_mb1; int reset_qkey_viols; int port; int is_eth; + int num_gids; + int base; u32 in_modifier; u32 promisc; u16 mtu, prev_mtu; int err; - int i; + int i, j; + int offset; __be32 agg_cap_mask; __be32 slave_cap_mask; __be32 new_cap_mask; @@ -585,26 +613,65 @@ static int mlx4_common_set_port(struct mlx4_dev *dev, int slave, u32 in_mod, gen_context->mtu = cpu_to_be16(master->max_mtu[port]); break; case MLX4_SET_PORT_GID_TABLE: - gid_entry = (struct mlx4_roce_gid_entry *)(inbox->buf); - /* check that do not have duplicates */ - if (memcmp(gid_entry->raw, zgid_entry.raw, 16)) { - for (i = 0; i < MLX4_ROCE_MAX_GIDS; i++) { - if (slave != i && - !memcmp(gid_entry->raw, priv->roce_gids[port - 1][i].raw, 16)) { - mlx4_warn(dev, "requested gid entry for slave:%d " - "is a duplicate of slave %d\n", - slave, i); - return -EEXIST; + /* change to MULTIPLE entries: number of guest's gids + * need a FOR-loop here over number of gids the guest has. + * 1. Check no duplicates in gids passed by slave + */ + num_gids = mlx4_get_slave_num_gids(dev, slave); + base = mlx4_get_base_gid_ix(dev, slave); + gid_entry_mbox = (struct mlx4_roce_gid_entry *)(inbox->buf); + for (i = 0; i < num_gids; gid_entry_mbox++, i++) { + if (!memcmp(gid_entry_mbox->raw, zgid_entry.raw, + sizeof(zgid_entry))) + continue; + gid_entry_mb1 = gid_entry_mbox + 1; + for (j = i + 1; j < num_gids; gid_entry_mb1++, j++) { + if (!memcmp(gid_entry_mb1->raw, + zgid_entry.raw, sizeof(zgid_entry))) + continue; + if (!memcmp(gid_entry_mb1->raw, gid_entry_mbox->raw, + sizeof(gid_entry_mbox->raw))) { + /* found duplicate */ + return -EINVAL; } } } - /* insert slave GID at proper index */ - memcpy(priv->roce_gids[port - 1][slave].raw, gid_entry->raw, 16); - /* rewrite roce port gids table to FW */ + + /* 2. Check that do not have duplicates in OTHER + * entries in the port GID table + */ for (i = 0; i < MLX4_ROCE_MAX_GIDS; i++) { - memcpy(gid_entry->raw, priv->roce_gids[port - 1][i].raw, 16); - gid_entry++; + if (i >= base && i < base + num_gids) + continue; /* don't compare to slave's current gids */ + gid_entry_tbl = &priv->roce_gids[port - 1][i]; + if (!memcmp(gid_entry_tbl->raw, zgid_entry.raw, sizeof(zgid_entry))) + continue; + gid_entry_mbox = (struct mlx4_roce_gid_entry *)(inbox->buf); + for (j = 0; j < num_gids; gid_entry_mbox++, j++) { + if (!memcmp(gid_entry_mbox->raw, zgid_entry.raw, + sizeof(zgid_entry))) + continue; + if (!memcmp(gid_entry_mbox->raw, gid_entry_tbl->raw, + sizeof(gid_entry_tbl->raw))) { + /* found duplicate */ + mlx4_warn(dev, "requested gid entry for slave:%d " + "is a duplicate of gid at index %d\n", + slave, i); + return -EINVAL; + } + } } + + /* insert slave GIDs with memcpy, starting at slave's base index */ + gid_entry_mbox = (struct mlx4_roce_gid_entry *)(inbox->buf); + for (i = 0, offset = base; i < num_gids; gid_entry_mbox++, offset++, i++) + memcpy(priv->roce_gids[port - 1][offset].raw, gid_entry_mbox->raw, 16); + + /* Now, copy roce port gids table to current mailbox for passing to FW */ + gid_entry_mbox = (struct mlx4_roce_gid_entry *)(inbox->buf); + for (i = 0; i < MLX4_ROCE_MAX_GIDS; gid_entry_mbox++, i++) + memcpy(gid_entry_mbox->raw, priv->roce_gids[port - 1][i].raw, 16); + break; } return mlx4_cmd(dev, inbox->dma, in_mod, op_mod, @@ -958,6 +1025,7 @@ int mlx4_get_slave_from_roce_gid(struct mlx4_dev *dev, int port, u8 *gid, { struct mlx4_priv *priv = mlx4_priv(dev); int i, found_ix = -1; + int vf_gids = MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS; if (!mlx4_is_mfunc(dev)) return -EINVAL; @@ -969,8 +1037,19 @@ int mlx4_get_slave_from_roce_gid(struct mlx4_dev *dev, int port, u8 *gid, } } - if (found_ix >= 0) - *slave_id = found_ix; + if (found_ix >= 0) { + if (found_ix < MLX4_ROCE_PF_GIDS) + *slave_id = 0; + else if (found_ix < MLX4_ROCE_PF_GIDS + (vf_gids % dev->num_vfs) * + (vf_gids / dev->num_vfs + 1)) + *slave_id = ((found_ix - MLX4_ROCE_PF_GIDS) / + (vf_gids / dev->num_vfs + 1)) + 1; + else + *slave_id = + ((found_ix - MLX4_ROCE_PF_GIDS - + ((vf_gids % dev->num_vfs) * ((vf_gids / dev->num_vfs + 1)))) / + (vf_gids / dev->num_vfs)) + vf_gids % dev->num_vfs + 1; + } return (found_ix >= 0) ? 0 : -EINVAL; } diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index 57428a0cb9dd..1c3634eab5e1 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -219,6 +219,11 @@ struct res_fs_rule { int qpn; }; +static int mlx4_is_eth(struct mlx4_dev *dev, int port) +{ + return dev->caps.port_mask[port] == MLX4_PORT_TYPE_IB ? 0 : 1; +} + static void *res_tracker_lookup(struct rb_root *root, u64 res_id) { struct rb_node *node = root->rb_node; @@ -600,15 +605,34 @@ static void update_gid(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *inbox, struct mlx4_qp_context *qp_ctx = inbox->buf + 8; enum mlx4_qp_optpar optpar = be32_to_cpu(*(__be32 *) inbox->buf); u32 ts = (be32_to_cpu(qp_ctx->flags) >> 16) & 0xff; + int port; - if (MLX4_QP_ST_UD == ts) - qp_ctx->pri_path.mgid_index = 0x80 | slave; - - if (MLX4_QP_ST_RC == ts || MLX4_QP_ST_UC == ts) { - if (optpar & MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH) - qp_ctx->pri_path.mgid_index = slave & 0x7F; - if (optpar & MLX4_QP_OPTPAR_ALT_ADDR_PATH) - qp_ctx->alt_path.mgid_index = slave & 0x7F; + if (MLX4_QP_ST_UD == ts) { + port = (qp_ctx->pri_path.sched_queue >> 6 & 1) + 1; + if (mlx4_is_eth(dev, port)) + qp_ctx->pri_path.mgid_index = mlx4_get_base_gid_ix(dev, slave) | 0x80; + else + qp_ctx->pri_path.mgid_index = slave | 0x80; + + } else if (MLX4_QP_ST_RC == ts || MLX4_QP_ST_XRC == ts || MLX4_QP_ST_UC == ts) { + if (optpar & MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH) { + port = (qp_ctx->pri_path.sched_queue >> 6 & 1) + 1; + if (mlx4_is_eth(dev, port)) { + qp_ctx->pri_path.mgid_index += mlx4_get_base_gid_ix(dev, slave); + qp_ctx->pri_path.mgid_index &= 0x7f; + } else { + qp_ctx->pri_path.mgid_index = slave & 0x7F; + } + } + if (optpar & MLX4_QP_OPTPAR_ALT_ADDR_PATH) { + port = (qp_ctx->alt_path.sched_queue >> 6 & 1) + 1; + if (mlx4_is_eth(dev, port)) { + qp_ctx->alt_path.mgid_index += mlx4_get_base_gid_ix(dev, slave); + qp_ctx->alt_path.mgid_index &= 0x7f; + } else { + qp_ctx->alt_path.mgid_index = slave & 0x7F; + } + } } } @@ -2734,6 +2758,8 @@ static int verify_qp_parameters(struct mlx4_dev *dev, u32 qp_type; struct mlx4_qp_context *qp_ctx; enum mlx4_qp_optpar optpar; + int port; + int num_gids; qp_ctx = inbox->buf + 8; qp_type = (be32_to_cpu(qp_ctx->flags) >> 16) & 0xff; @@ -2741,6 +2767,7 @@ static int verify_qp_parameters(struct mlx4_dev *dev, switch (qp_type) { case MLX4_QP_ST_RC: + case MLX4_QP_ST_XRC: case MLX4_QP_ST_UC: switch (transition) { case QP_TRANS_INIT2RTR: @@ -2749,13 +2776,24 @@ static int verify_qp_parameters(struct mlx4_dev *dev, case QP_TRANS_SQD2SQD: case QP_TRANS_SQD2RTS: if (slave != mlx4_master_func_num(dev)) - /* slaves have only gid index 0 */ - if (optpar & MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH) - if (qp_ctx->pri_path.mgid_index) + if (optpar & MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH) { + port = (qp_ctx->pri_path.sched_queue >> 6 & 1) + 1; + if (dev->caps.port_mask[port] != MLX4_PORT_TYPE_IB) + num_gids = mlx4_get_slave_num_gids(dev, slave); + else + num_gids = 1; + if (qp_ctx->pri_path.mgid_index >= num_gids) return -EINVAL; - if (optpar & MLX4_QP_OPTPAR_ALT_ADDR_PATH) - if (qp_ctx->alt_path.mgid_index) + } + if (optpar & MLX4_QP_OPTPAR_ALT_ADDR_PATH) { + port = (qp_ctx->alt_path.sched_queue >> 6 & 1) + 1; + if (dev->caps.port_mask[port] != MLX4_PORT_TYPE_IB) + num_gids = mlx4_get_slave_num_gids(dev, slave); + else + num_gids = 1; + if (qp_ctx->alt_path.mgid_index >= num_gids) return -EINVAL; + } break; default: break; diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index e4853650679d..86e02e5c2c77 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -49,6 +49,7 @@ #define MIN_MSIX_P_PORT 5 #define MLX4_ROCE_MAX_GIDS 128 +#define MLX4_ROCE_PF_GIDS 16 enum { MLX4_FLAG_MSI_X = 1 << 0, -- 2.30.2