scsi: zfcp: fix erp_action use-before-initialize in REC action trace
authorSteffen Maier <maier@linux.vnet.ibm.com>
Fri, 13 Oct 2017 13:40:07 +0000 (15:40 +0200)
committerMartin K. Petersen <martin.petersen@oracle.com>
Tue, 17 Oct 2017 02:45:26 +0000 (22:45 -0400)
v4.10 commit 6f2ce1c6af37 ("scsi: zfcp: fix rport unblock race with LUN
recovery") extended accessing parent pointer fields of struct
zfcp_erp_action for tracing.  If an erp_action has never been enqueued
before, these parent pointer fields are uninitialized and NULL. Examples
are zfcp objects freshly added to the parent object's children list,
before enqueueing their first recovery subsequently. In
zfcp_erp_try_rport_unblock(), we iterate such list. Accessing erp_action
fields can cause a NULL pointer dereference.  Since the kernel can read
from lowcore on s390, it does not immediately cause a kernel page
fault. Instead it can cause hangs on trying to acquire the wrong
erp_action->adapter->dbf->rec_lock in zfcp_dbf_rec_action_lvl()
                      ^bogus^
while holding already other locks with IRQs disabled.

Real life example from attaching lots of LUNs in parallel on many CPUs:

crash> bt 17723
PID: 17723  TASK: ...               CPU: 25  COMMAND: "zfcperp0.0.1800"
 LOWCORE INFO:
  -psw      : 0x0404300180000000 0x000000000038e424
  -function : _raw_spin_lock_wait_flags at 38e424
...
 #0 [fdde8fc90] zfcp_dbf_rec_action_lvl at 3e0004e9862 [zfcp]
 #1 [fdde8fce8] zfcp_erp_try_rport_unblock at 3e0004dfddc [zfcp]
 #2 [fdde8fd38] zfcp_erp_strategy at 3e0004e0234 [zfcp]
 #3 [fdde8fda8] zfcp_erp_thread at 3e0004e0a12 [zfcp]
 #4 [fdde8fe60] kthread at 173550
 #5 [fdde8feb8] kernel_thread_starter at 10add2

zfcp_adapter
 zfcp_port
  zfcp_unit <address>, 0x404040d600000000
  scsi_device NULL, returning early!
zfcp_scsi_dev.status = 0x40000000
0x40000000 ZFCP_STATUS_COMMON_RUNNING

crash> zfcp_unit <address>
struct zfcp_unit {
  erp_action = {
    adapter = 0x0,
    port = 0x0,
    unit = 0x0,
  },
}

zfcp_erp_action is always fully embedded into its container object. Such
container object is never moved in its object tree (only add or delete).
Hence, erp_action parent pointers can never change.

To fix the issue, initialize the erp_action parent pointers before
adding the erp_action container to any list and thus before it becomes
accessible from outside of its initializing function.

In order to also close the time window between zfcp_erp_setup_act()
memsetting the entire erp_action to zero and setting the parent pointers
again, drop the memset and instead explicitly initialize individually
all erp_action fields except for parent pointers. To be extra careful
not to introduce any other unintended side effect, even keep zeroing the
erp_action fields for list and timer. Also double-check with
WARN_ON_ONCE that erp_action parent pointers never change, so we get to
know when we would deviate from previous behavior.

Signed-off-by: Steffen Maier <maier@linux.vnet.ibm.com>
Fixes: 6f2ce1c6af37 ("scsi: zfcp: fix rport unblock race with LUN recovery")
Cc: <stable@vger.kernel.org> #2.6.32+
Reviewed-by: Benjamin Block <bblock@linux.vnet.ibm.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
drivers/s390/scsi/zfcp_aux.c
drivers/s390/scsi/zfcp_erp.c
drivers/s390/scsi/zfcp_scsi.c

index 82ac331d91254e72debc5d021808a6282e93dc1b..84752152d41fd682c5ae350ddb4bd3ac80d47cde 100644 (file)
@@ -357,6 +357,8 @@ struct zfcp_adapter *zfcp_adapter_enqueue(struct ccw_device *ccw_device)
 
        adapter->next_port_scan = jiffies;
 
+       adapter->erp_action.adapter = adapter;
+
        if (zfcp_qdio_setup(adapter))
                goto failed;
 
@@ -513,6 +515,9 @@ struct zfcp_port *zfcp_port_enqueue(struct zfcp_adapter *adapter, u64 wwpn,
        port->dev.groups = zfcp_port_attr_groups;
        port->dev.release = zfcp_port_release;
 
+       port->erp_action.adapter = adapter;
+       port->erp_action.port = port;
+
        if (dev_set_name(&port->dev, "0x%016llx", (unsigned long long)wwpn)) {
                kfree(port);
                goto err_out;
index 37408f5f81ce84e5f4f598069bbc417edff71469..ec2532ee1822ea6c068a12a42dce145aef45b333 100644 (file)
@@ -193,9 +193,8 @@ static struct zfcp_erp_action *zfcp_erp_setup_act(int need, u32 act_status,
                atomic_or(ZFCP_STATUS_COMMON_ERP_INUSE,
                                &zfcp_sdev->status);
                erp_action = &zfcp_sdev->erp_action;
-               memset(erp_action, 0, sizeof(struct zfcp_erp_action));
-               erp_action->port = port;
-               erp_action->sdev = sdev;
+               WARN_ON_ONCE(erp_action->port != port);
+               WARN_ON_ONCE(erp_action->sdev != sdev);
                if (!(atomic_read(&zfcp_sdev->status) &
                      ZFCP_STATUS_COMMON_RUNNING))
                        act_status |= ZFCP_STATUS_ERP_CLOSE_ONLY;
@@ -208,8 +207,8 @@ static struct zfcp_erp_action *zfcp_erp_setup_act(int need, u32 act_status,
                zfcp_erp_action_dismiss_port(port);
                atomic_or(ZFCP_STATUS_COMMON_ERP_INUSE, &port->status);
                erp_action = &port->erp_action;
-               memset(erp_action, 0, sizeof(struct zfcp_erp_action));
-               erp_action->port = port;
+               WARN_ON_ONCE(erp_action->port != port);
+               WARN_ON_ONCE(erp_action->sdev != NULL);
                if (!(atomic_read(&port->status) & ZFCP_STATUS_COMMON_RUNNING))
                        act_status |= ZFCP_STATUS_ERP_CLOSE_ONLY;
                break;
@@ -219,7 +218,8 @@ static struct zfcp_erp_action *zfcp_erp_setup_act(int need, u32 act_status,
                zfcp_erp_action_dismiss_adapter(adapter);
                atomic_or(ZFCP_STATUS_COMMON_ERP_INUSE, &adapter->status);
                erp_action = &adapter->erp_action;
-               memset(erp_action, 0, sizeof(struct zfcp_erp_action));
+               WARN_ON_ONCE(erp_action->port != NULL);
+               WARN_ON_ONCE(erp_action->sdev != NULL);
                if (!(atomic_read(&adapter->status) &
                      ZFCP_STATUS_COMMON_RUNNING))
                        act_status |= ZFCP_STATUS_ERP_CLOSE_ONLY;
@@ -229,7 +229,11 @@ static struct zfcp_erp_action *zfcp_erp_setup_act(int need, u32 act_status,
                return NULL;
        }
 
-       erp_action->adapter = adapter;
+       WARN_ON_ONCE(erp_action->adapter != adapter);
+       memset(&erp_action->list, 0, sizeof(erp_action->list));
+       memset(&erp_action->timer, 0, sizeof(erp_action->timer));
+       erp_action->step = ZFCP_ERP_STEP_UNINITIALIZED;
+       erp_action->fsf_req_id = 0;
        erp_action->action = need;
        erp_action->status = act_status;
 
index ec3ddd1d31d5f5be43fb70425dd3b8981a179f63..6cf8732627e012137514075ea4958fdd5cbf82e4 100644 (file)
@@ -115,10 +115,15 @@ static int zfcp_scsi_slave_alloc(struct scsi_device *sdev)
        struct zfcp_unit *unit;
        int npiv = adapter->connection_features & FSF_FEATURE_NPIV_MODE;
 
+       zfcp_sdev->erp_action.adapter = adapter;
+       zfcp_sdev->erp_action.sdev = sdev;
+
        port = zfcp_get_port_by_wwpn(adapter, rport->port_name);
        if (!port)
                return -ENXIO;
 
+       zfcp_sdev->erp_action.port = port;
+
        unit = zfcp_unit_find(port, zfcp_scsi_dev_lun(sdev));
        if (unit)
                put_device(&unit->dev);