From 26bbf13ce1ca21ec69175bcc4b995cb8ffdf8593 Mon Sep 17 00:00:00 2001 From: Yosef Etigin Date: Sat, 19 May 2007 08:51:54 -0700 Subject: [PATCH] IPoIB: Handle P_Key table reordering SM reconfiguration or failover possibly causes a shuffling of the values in the P_Key table. Right now, IPoIB only queries for the P_Key index once when it creates the device QP, and hence there are problems if the index of a P_Key value changes. Fix this by using the PKEY_CHANGE event to trigger a recheck of the P_Key index. Signed-off-by: Yosef Etigin Acked-by: Michael S. Tsirkin Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/ipoib/ipoib.h | 7 +- drivers/infiniband/ulp/ipoib/ipoib_ib.c | 85 +++++++++++++++++----- drivers/infiniband/ulp/ipoib/ipoib_main.c | 7 +- drivers/infiniband/ulp/ipoib/ipoib_verbs.c | 38 ++++------ 4 files changed, 92 insertions(+), 45 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 87310eeb6df0..93d4a9a1e1dd 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -201,15 +201,17 @@ struct ipoib_dev_priv { struct list_head multicast_list; struct rb_root multicast_tree; - struct delayed_work pkey_task; + struct delayed_work pkey_poll_task; struct delayed_work mcast_task; struct work_struct flush_task; struct work_struct restart_task; struct delayed_work ah_reap_task; + struct work_struct pkey_event_task; struct ib_device *ca; u8 port; u16 pkey; + u16 pkey_index; struct ib_pd *pd; struct ib_mr *mr; struct ib_cq *cq; @@ -333,12 +335,13 @@ struct ipoib_dev_priv *ipoib_intf_alloc(const char *format); int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port); void ipoib_ib_dev_flush(struct work_struct *work); +void ipoib_pkey_event(struct work_struct *work); void ipoib_ib_dev_cleanup(struct net_device *dev); int ipoib_ib_dev_open(struct net_device *dev); int ipoib_ib_dev_up(struct net_device *dev); int ipoib_ib_dev_down(struct net_device *dev, int flush); -int ipoib_ib_dev_stop(struct net_device *dev); +int ipoib_ib_dev_stop(struct net_device *dev, int flush); int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port); void ipoib_dev_cleanup(struct net_device *dev); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index 68d72c6f7ffb..e3b0937011d1 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -448,6 +448,13 @@ int ipoib_ib_dev_open(struct net_device *dev) struct ipoib_dev_priv *priv = netdev_priv(dev); int ret; + if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &priv->pkey_index)) { + ipoib_warn(priv, "P_Key 0x%04x not found\n", priv->pkey); + clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); + return -1; + } + set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); + ret = ipoib_init_qp(dev); if (ret) { ipoib_warn(priv, "ipoib_init_qp returned %d\n", ret); @@ -457,14 +464,14 @@ int ipoib_ib_dev_open(struct net_device *dev) ret = ipoib_ib_post_receives(dev); if (ret) { ipoib_warn(priv, "ipoib_ib_post_receives returned %d\n", ret); - ipoib_ib_dev_stop(dev); + ipoib_ib_dev_stop(dev, 1); return -1; } ret = ipoib_cm_dev_open(dev); if (ret) { ipoib_warn(priv, "ipoib_ib_post_receives returned %d\n", ret); - ipoib_ib_dev_stop(dev); + ipoib_ib_dev_stop(dev, 1); return -1; } @@ -516,7 +523,7 @@ int ipoib_ib_dev_down(struct net_device *dev, int flush) if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) { mutex_lock(&pkey_mutex); set_bit(IPOIB_PKEY_STOP, &priv->flags); - cancel_delayed_work(&priv->pkey_task); + cancel_delayed_work(&priv->pkey_poll_task); mutex_unlock(&pkey_mutex); if (flush) flush_workqueue(ipoib_workqueue); @@ -543,7 +550,7 @@ static int recvs_pending(struct net_device *dev) return pending; } -int ipoib_ib_dev_stop(struct net_device *dev) +int ipoib_ib_dev_stop(struct net_device *dev, int flush) { struct ipoib_dev_priv *priv = netdev_priv(dev); struct ib_qp_attr qp_attr; @@ -629,7 +636,8 @@ timeout: /* Wait for all AHs to be reaped */ set_bit(IPOIB_STOP_REAPER, &priv->flags); cancel_delayed_work(&priv->ah_reap_task); - flush_workqueue(ipoib_workqueue); + if (flush) + flush_workqueue(ipoib_workqueue); begin = jiffies; @@ -673,13 +681,24 @@ int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port) return 0; } -void ipoib_ib_dev_flush(struct work_struct *work) +static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, int pkey_event) { - struct ipoib_dev_priv *cpriv, *priv = - container_of(work, struct ipoib_dev_priv, flush_task); + struct ipoib_dev_priv *cpriv; struct net_device *dev = priv->dev; + u16 new_index; + + mutex_lock(&priv->vlan_mutex); - if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags) ) { + /* + * Flush any child interfaces too -- they might be up even if + * the parent is down. + */ + list_for_each_entry(cpriv, &priv->child_intfs, list) + __ipoib_ib_dev_flush(cpriv, pkey_event); + + mutex_unlock(&priv->vlan_mutex); + + if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags)) { ipoib_dbg(priv, "Not flushing - IPOIB_FLAG_INITIALIZED not set.\n"); return; } @@ -689,10 +708,32 @@ void ipoib_ib_dev_flush(struct work_struct *work) return; } + if (pkey_event) { + if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &new_index)) { + clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); + ipoib_ib_dev_down(dev, 0); + ipoib_pkey_dev_delay_open(dev); + return; + } + set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); + + /* restart QP only if P_Key index is changed */ + if (new_index == priv->pkey_index) { + ipoib_dbg(priv, "Not flushing - P_Key index not changed.\n"); + return; + } + priv->pkey_index = new_index; + } + ipoib_dbg(priv, "flushing\n"); ipoib_ib_dev_down(dev, 0); + if (pkey_event) { + ipoib_ib_dev_stop(dev, 0); + ipoib_ib_dev_open(dev); + } + /* * The device could have been brought down between the start and when * we get here, don't bring it back up if it's not configured up @@ -701,14 +742,24 @@ void ipoib_ib_dev_flush(struct work_struct *work) ipoib_ib_dev_up(dev); ipoib_mcast_restart_task(&priv->restart_task); } +} - mutex_lock(&priv->vlan_mutex); +void ipoib_ib_dev_flush(struct work_struct *work) +{ + struct ipoib_dev_priv *priv = + container_of(work, struct ipoib_dev_priv, flush_task); - /* Flush any child interfaces too */ - list_for_each_entry(cpriv, &priv->child_intfs, list) - ipoib_ib_dev_flush(&cpriv->flush_task); + ipoib_dbg(priv, "Flushing %s\n", priv->dev->name); + __ipoib_ib_dev_flush(priv, 0); +} - mutex_unlock(&priv->vlan_mutex); +void ipoib_pkey_event(struct work_struct *work) +{ + struct ipoib_dev_priv *priv = + container_of(work, struct ipoib_dev_priv, pkey_event_task); + + ipoib_dbg(priv, "Flushing %s and restarting its QP\n", priv->dev->name); + __ipoib_ib_dev_flush(priv, 1); } void ipoib_ib_dev_cleanup(struct net_device *dev) @@ -736,7 +787,7 @@ void ipoib_ib_dev_cleanup(struct net_device *dev) void ipoib_pkey_poll(struct work_struct *work) { struct ipoib_dev_priv *priv = - container_of(work, struct ipoib_dev_priv, pkey_task.work); + container_of(work, struct ipoib_dev_priv, pkey_poll_task.work); struct net_device *dev = priv->dev; ipoib_pkey_dev_check_presence(dev); @@ -747,7 +798,7 @@ void ipoib_pkey_poll(struct work_struct *work) mutex_lock(&pkey_mutex); if (!test_bit(IPOIB_PKEY_STOP, &priv->flags)) queue_delayed_work(ipoib_workqueue, - &priv->pkey_task, + &priv->pkey_poll_task, HZ); mutex_unlock(&pkey_mutex); } @@ -766,7 +817,7 @@ int ipoib_pkey_dev_delay_open(struct net_device *dev) mutex_lock(&pkey_mutex); clear_bit(IPOIB_PKEY_STOP, &priv->flags); queue_delayed_work(ipoib_workqueue, - &priv->pkey_task, + &priv->pkey_poll_task, HZ); mutex_unlock(&pkey_mutex); return 1; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 0a428f2b05c7..894b1dcdf3eb 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -107,7 +107,7 @@ int ipoib_open(struct net_device *dev) return -EINVAL; if (ipoib_ib_dev_up(dev)) { - ipoib_ib_dev_stop(dev); + ipoib_ib_dev_stop(dev, 1); return -EINVAL; } @@ -152,7 +152,7 @@ static int ipoib_stop(struct net_device *dev) flush_workqueue(ipoib_workqueue); ipoib_ib_dev_down(dev, 1); - ipoib_ib_dev_stop(dev); + ipoib_ib_dev_stop(dev, 1); if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) { struct ipoib_dev_priv *cpriv; @@ -988,7 +988,8 @@ static void ipoib_setup(struct net_device *dev) INIT_LIST_HEAD(&priv->dead_ahs); INIT_LIST_HEAD(&priv->multicast_list); - INIT_DELAYED_WORK(&priv->pkey_task, ipoib_pkey_poll); + INIT_DELAYED_WORK(&priv->pkey_poll_task, ipoib_pkey_poll); + INIT_WORK(&priv->pkey_event_task, ipoib_pkey_event); INIT_DELAYED_WORK(&priv->mcast_task, ipoib_mcast_join_task); INIT_WORK(&priv->flush_task, ipoib_ib_dev_flush); INIT_WORK(&priv->restart_task, ipoib_mcast_restart_task); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c index 5c3c6a43a52b..791252621b26 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c @@ -33,8 +33,6 @@ * $Id: ipoib_verbs.c 1349 2004-12-16 21:09:43Z roland $ */ -#include - #include "ipoib.h" int ipoib_mcast_attach(struct net_device *dev, u16 mlid, union ib_gid *mgid) @@ -49,7 +47,7 @@ int ipoib_mcast_attach(struct net_device *dev, u16 mlid, union ib_gid *mgid) if (!qp_attr) goto out; - if (ib_find_cached_pkey(priv->ca, priv->port, priv->pkey, &pkey_index)) { + if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &pkey_index)) { clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); ret = -ENXIO; goto out; @@ -94,26 +92,16 @@ int ipoib_init_qp(struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); int ret; - u16 pkey_index; struct ib_qp_attr qp_attr; int attr_mask; - /* - * Search through the port P_Key table for the requested pkey value. - * The port has to be assigned to the respective IB partition in - * advance. - */ - ret = ib_find_cached_pkey(priv->ca, priv->port, priv->pkey, &pkey_index); - if (ret) { - clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); - return ret; - } - set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); + if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) + return -1; qp_attr.qp_state = IB_QPS_INIT; qp_attr.qkey = 0; qp_attr.port_num = priv->port; - qp_attr.pkey_index = pkey_index; + qp_attr.pkey_index = priv->pkey_index; attr_mask = IB_QP_QKEY | IB_QP_PORT | @@ -259,14 +247,18 @@ void ipoib_event(struct ib_event_handler *handler, struct ipoib_dev_priv *priv = container_of(handler, struct ipoib_dev_priv, event_handler); - if ((record->event == IB_EVENT_PORT_ERR || - record->event == IB_EVENT_PKEY_CHANGE || - record->event == IB_EVENT_PORT_ACTIVE || - record->event == IB_EVENT_LID_CHANGE || - record->event == IB_EVENT_SM_CHANGE || - record->event == IB_EVENT_CLIENT_REREGISTER) && - record->element.port_num == priv->port) { + if (record->element.port_num != priv->port) + return; + + if (record->event == IB_EVENT_PORT_ERR || + record->event == IB_EVENT_PORT_ACTIVE || + record->event == IB_EVENT_LID_CHANGE || + record->event == IB_EVENT_SM_CHANGE || + record->event == IB_EVENT_CLIENT_REREGISTER) { ipoib_dbg(priv, "Port state change event\n"); queue_work(ipoib_workqueue, &priv->flush_task); + } else if (record->event == IB_EVENT_PKEY_CHANGE) { + ipoib_dbg(priv, "P_Key change event on port:%d\n", priv->port); + queue_work(ipoib_workqueue, &priv->pkey_event_task); } } -- 2.30.2