IB/Hfi1: Read CCE Revision register to verify the device is responsive
authorKamenee Arumugam <kamenee.arumugam@intel.com>
Wed, 2 May 2018 13:43:31 +0000 (06:43 -0700)
committerDoug Ledford <dledford@redhat.com>
Wed, 9 May 2018 19:53:30 +0000 (15:53 -0400)
When Hfi1 device is unresponsive, reading the RcvArrayCnt register
will return all 1's. This value is then used to remap chip's RcvArray.
The incorrect all ones value used in remapping RcvArray
will cause warn on as shown by trace below:

[<ffffffff81685eac>] dump_stack+0x19/0x1b
[<ffffffff81085820>] warn_slowpath_common+0x70/0xb0
[<ffffffff810858bc>] warn_slowpath_fmt+0x5c/0x80
[<ffffffff81065c29>] __ioremap_caller+0x279/0x320
[<ffffffff8142873c>] ? _dev_info+0x6c/0x90
[<ffffffffa021d155>] ? hfi1_pcie_ddinit+0x1d5/0x330 [hfi1]
[<ffffffff81065d62>] ioremap_wc+0x32/0x40
[<ffffffffa021d155>] hfi1_pcie_ddinit+0x1d5/0x330 [hfi1]
[<ffffffffa0204851>] hfi1_init_dd+0x1d1/0x2440 [hfi1]
[<ffffffff813503dc>] ? pci_write_config_word+0x1c/0x20

Read CCE revision register first to verify that WFR device is
responsive. If the read return "all ones", bail out from init
and fail the driver load.

Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Reviewed-by: Michael J. Ruhl <michael.j.ruhl@intel.com>
Signed-off-by: Kamenee Arumugam <kamenee.arumugam@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
drivers/infiniband/hw/hfi1/chip.c
drivers/infiniband/hw/hfi1/pcie.c

index 582cf7eb779f2e25c9e6d4a134551939abda632a..0fab6df0a3450490510da60b7f05e9de4fdd49cb 100644 (file)
@@ -15038,13 +15038,6 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
        if (ret < 0)
                goto bail_cleanup;
 
-       /* verify that reads actually work, save revision for reset check */
-       dd->revision = read_csr(dd, CCE_REVISION);
-       if (dd->revision == ~(u64)0) {
-               dd_dev_err(dd, "cannot read chip CSRs\n");
-               ret = -EINVAL;
-               goto bail_cleanup;
-       }
        dd->majrev = (dd->revision >> CCE_REVISION_CHIP_REV_MAJOR_SHIFT)
                        & CCE_REVISION_CHIP_REV_MAJOR_MASK;
        dd->minrev = (dd->revision >> CCE_REVISION_CHIP_REV_MINOR_SHIFT)
index c1c982908b4bb2ad000fc1aa507be0036f78789d..87bd6b60cb53cf0a9761a96e5f04577eddea7b6d 100644 (file)
@@ -183,6 +183,14 @@ int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev)
                return -ENOMEM;
        }
        dd_dev_info(dd, "UC base1: %p for %x\n", dd->kregbase1, RCV_ARRAY);
+
+       /* verify that reads actually work, save revision for reset check */
+       dd->revision = readq(dd->kregbase1 + CCE_REVISION);
+       if (dd->revision == ~(u64)0) {
+               dd_dev_err(dd, "Cannot read chip CSRs\n");
+               goto nomem;
+       }
+
        dd->chip_rcv_array_count = readq(dd->kregbase1 + RCV_ARRAY_CNT);
        dd_dev_info(dd, "RcvArray count: %u\n", dd->chip_rcv_array_count);
        dd->base2_start  = RCV_ARRAY + dd->chip_rcv_array_count * 8;