powerpc/eeh: Improve recovery of passed-through devices
authorSam Bobroff <sbobroff@linux.ibm.com>
Thu, 29 Nov 2018 03:16:41 +0000 (14:16 +1100)
committerMichael Ellerman <mpe@ellerman.id.au>
Tue, 5 Feb 2019 00:55:44 +0000 (11:55 +1100)
Currently, the EEH recovery process considers passed-through devices
as if they were not EEH-aware, which can cause them to be removed as
part of recovery.  Because device removal requires cooperation from
the guest, this may lead to the process stalling or deadlocking.
Also, if devices are removed on the host side, they will be removed
from their IOMMU group, making recovery in the guest impossible.

Therefore, alter the recovery process so that passed-through devices
are not removed but are instead left frozen (and marked isolated)
until the guest performs it's own recovery.  If firmware thaws a
passed-through PE because it's parent PE has been thawed (because it
was not passed through), re-freeze it.

Signed-off-by: Sam Bobroff <sbobroff@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
arch/powerpc/include/asm/eeh.h
arch/powerpc/include/asm/ppc-pci.h
arch/powerpc/kernel/eeh.c
arch/powerpc/kernel/eeh_driver.c
drivers/vfio/vfio_spapr_eeh.c

index 2ff123f745cc410e58480220c28cc19b4dea5f79..0b655810f32d3380f32672838f6433a90d5b7a49 100644 (file)
@@ -300,7 +300,7 @@ void eeh_dev_release(struct pci_dev *pdev);
 struct eeh_pe *eeh_iommu_group_to_pe(struct iommu_group *group);
 int eeh_pe_set_option(struct eeh_pe *pe, int option);
 int eeh_pe_get_state(struct eeh_pe *pe);
-int eeh_pe_reset(struct eeh_pe *pe, int option);
+int eeh_pe_reset(struct eeh_pe *pe, int option, bool include_passed);
 int eeh_pe_configure(struct eeh_pe *pe);
 int eeh_pe_inject_err(struct eeh_pe *pe, int type, int func,
                      unsigned long addr, unsigned long mask);
index 08e094eaeccf725b525928a58e314ce356da3882..f191ef0d2a0a5ce48c161613760581ef0f0eda7e 100644 (file)
@@ -53,7 +53,7 @@ void eeh_addr_cache_rmv_dev(struct pci_dev *dev);
 struct eeh_dev *eeh_addr_cache_get_dev(unsigned long addr);
 void eeh_slot_error_detail(struct eeh_pe *pe, int severity);
 int eeh_pci_enable(struct eeh_pe *pe, int function);
-int eeh_pe_reset_full(struct eeh_pe *pe);
+int eeh_pe_reset_full(struct eeh_pe *pe, bool include_passed);
 void eeh_save_bars(struct eeh_dev *edev);
 int rtas_write_config(struct pci_dn *, int where, int size, u32 val);
 int rtas_read_config(struct pci_dn *, int where, int size, u32 *val);
index 8d32587b07dca5096569882d67cc84c0d4c47642..416d1ef4976268e83ca0ee3ee8650c8044d1a285 100644 (file)
@@ -877,6 +877,24 @@ static void *eeh_set_dev_freset(struct eeh_dev *edev, void *flag)
        return NULL;
 }
 
+static void eeh_pe_refreeze_passed(struct eeh_pe *root)
+{
+       struct eeh_pe *pe;
+       int state;
+
+       eeh_for_each_pe(root, pe) {
+               if (eeh_pe_passed(pe)) {
+                       state = eeh_ops->get_state(pe, NULL);
+                       if (state &
+                          (EEH_STATE_MMIO_ACTIVE | EEH_STATE_MMIO_ENABLED)) {
+                               pr_info("EEH: Passed-through PE PHB#%x-PE#%x was thawed by reset, re-freezing for safety.\n",
+                                       pe->phb->global_number, pe->addr);
+                               eeh_pe_set_option(pe, EEH_OPT_FREEZE_PE);
+                       }
+               }
+       }
+}
+
 /**
  * eeh_pe_reset_full - Complete a full reset process on the indicated PE
  * @pe: EEH PE
@@ -889,7 +907,7 @@ static void *eeh_set_dev_freset(struct eeh_dev *edev, void *flag)
  *
  * This function will attempt to reset a PE three times before failing.
  */
-int eeh_pe_reset_full(struct eeh_pe *pe)
+int eeh_pe_reset_full(struct eeh_pe *pe, bool include_passed)
 {
        int reset_state = (EEH_PE_RESET | EEH_PE_CFG_BLOCKED);
        int type = EEH_RESET_HOT;
@@ -911,11 +929,11 @@ int eeh_pe_reset_full(struct eeh_pe *pe)
 
        /* Make three attempts at resetting the bus */
        for (i = 0; i < 3; i++) {
-               ret = eeh_pe_reset(pe, type);
+               ret = eeh_pe_reset(pe, type, include_passed);
                if (ret)
                        break;
 
-               ret = eeh_pe_reset(pe, EEH_RESET_DEACTIVATE);
+               ret = eeh_pe_reset(pe, EEH_RESET_DEACTIVATE, include_passed);
                if (ret)
                        break;
 
@@ -936,6 +954,12 @@ int eeh_pe_reset_full(struct eeh_pe *pe)
                        __func__, state, pe->phb->global_number, pe->addr, (i + 1));
        }
 
+       /* Resetting the PE may have unfrozen child PEs. If those PEs have been
+        * (potentially) passed through to a guest, re-freeze them:
+        */
+       if (!include_passed)
+               eeh_pe_refreeze_passed(pe);
+
        eeh_pe_state_clear(pe, reset_state, true);
        return ret;
 }
@@ -1611,13 +1635,12 @@ int eeh_pe_get_state(struct eeh_pe *pe)
 }
 EXPORT_SYMBOL_GPL(eeh_pe_get_state);
 
-static int eeh_pe_reenable_devices(struct eeh_pe *pe)
+static int eeh_pe_reenable_devices(struct eeh_pe *pe, bool include_passed)
 {
        struct eeh_dev *edev, *tmp;
        struct pci_dev *pdev;
        int ret = 0;
 
-       /* Restore config space */
        eeh_pe_restore_bars(pe);
 
        /*
@@ -1638,9 +1661,13 @@ static int eeh_pe_reenable_devices(struct eeh_pe *pe)
        }
 
        /* The PE is still in frozen state */
-       ret = eeh_unfreeze_pe(pe);
+       if (include_passed || !eeh_pe_passed(pe)) {
+               ret = eeh_unfreeze_pe(pe);
+       } else
+               pr_info("EEH: Note: Leaving passthrough PHB#%x-PE#%x frozen.\n",
+                       pe->phb->global_number, pe->addr);
        if (!ret)
-               eeh_pe_state_clear(pe, EEH_PE_ISOLATED, true);
+               eeh_pe_state_clear(pe, EEH_PE_ISOLATED, include_passed);
        return ret;
 }
 
@@ -1654,7 +1681,7 @@ static int eeh_pe_reenable_devices(struct eeh_pe *pe)
  * indicated type, either fundamental reset or hot reset.
  * PE reset is the most important part for error recovery.
  */
-int eeh_pe_reset(struct eeh_pe *pe, int option)
+int eeh_pe_reset(struct eeh_pe *pe, int option, bool include_passed)
 {
        int ret = 0;
 
@@ -1668,11 +1695,11 @@ int eeh_pe_reset(struct eeh_pe *pe, int option)
        switch (option) {
        case EEH_RESET_DEACTIVATE:
                ret = eeh_ops->reset(pe, option);
-               eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED, true);
+               eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED, include_passed);
                if (ret)
                        break;
 
-               ret = eeh_pe_reenable_devices(pe);
+               ret = eeh_pe_reenable_devices(pe, include_passed);
                break;
        case EEH_RESET_HOT:
        case EEH_RESET_FUNDAMENTAL:
index 91629b3f3b744a24298f53701e793ca1c2f39084..89623962c7275235db369f3bf28581e13ec019cb 100644 (file)
@@ -510,22 +510,11 @@ static void *eeh_rmv_device(struct eeh_dev *edev, void *userdata)
         * support EEH. So we just care about PCI devices for
         * simplicity here.
         */
-       if (!dev || (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE))
-               return NULL;
-
-       /*
-        * We rely on count-based pcibios_release_device() to
-        * detach permanently offlined PEs. Unfortunately, that's
-        * not reliable enough. We might have the permanently
-        * offlined PEs attached, but we needn't take care of
-        * them and their child devices.
-        */
-       if (eeh_dev_removed(edev))
+       if (!eeh_edev_actionable(edev) ||
+           (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE))
                return NULL;
 
        if (rmv_data) {
-               if (eeh_pe_passed(edev->pe))
-                       return NULL;
                driver = eeh_pcid_get(dev);
                if (driver) {
                        if (driver->err_handler &&
@@ -539,8 +528,8 @@ static void *eeh_rmv_device(struct eeh_dev *edev, void *userdata)
        }
 
        /* Remove it from PCI subsystem */
-       pr_debug("EEH: Removing %s without EEH sensitive driver\n",
-                pci_name(dev));
+       pr_info("EEH: Removing %s without EEH sensitive driver\n",
+               pci_name(dev));
        edev->mode |= EEH_DEV_DISCONNECTED;
        if (rmv_data)
                rmv_data->removed_dev_count++;
@@ -624,7 +613,7 @@ int eeh_pe_reset_and_recover(struct eeh_pe *pe)
        eeh_pe_dev_traverse(pe, eeh_dev_save_state, NULL);
 
        /* Issue reset */
-       ret = eeh_pe_reset_full(pe);
+       ret = eeh_pe_reset_full(pe, true);
        if (ret) {
                eeh_pe_state_clear(pe, EEH_PE_RECOVERING, true);
                return ret;
@@ -664,6 +653,11 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
        time64_t tstamp;
        int cnt, rc;
        struct eeh_dev *edev;
+       struct eeh_pe *tmp_pe;
+       bool any_passed = false;
+
+       eeh_for_each_pe(pe, tmp_pe)
+               any_passed |= eeh_pe_passed(tmp_pe);
 
        /* pcibios will clear the counter; save the value */
        cnt = pe->freeze_count;
@@ -676,7 +670,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
         * into pci_hp_add_devices().
         */
        eeh_pe_state_mark(pe, EEH_PE_KEEP);
-       if (driver_eeh_aware || (pe->type & EEH_PE_VF)) {
+       if (any_passed || driver_eeh_aware || (pe->type & EEH_PE_VF)) {
                eeh_pe_dev_traverse(pe, eeh_rmv_device, rmv_data);
        } else {
                pci_lock_rescan_remove();
@@ -693,7 +687,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
         * config accesses. So we prefer to block them. However, controlled
         * PCI config accesses initiated from EEH itself are allowed.
         */
-       rc = eeh_pe_reset_full(pe);
+       rc = eeh_pe_reset_full(pe, false);
        if (rc)
                return rc;
 
@@ -704,7 +698,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
        eeh_pe_restore_bars(pe);
 
        /* Clear frozen state */
-       rc = eeh_clear_pe_frozen_state(pe, true);
+       rc = eeh_clear_pe_frozen_state(pe, false);
        if (rc) {
                pci_unlock_rescan_remove();
                return rc;
index 38edeb4729a9d475445bffab51c25ead9c6ae5bb..1a742fe8f6dbf4bdd4ad013b26df6f741279fc4b 100644 (file)
@@ -74,13 +74,13 @@ long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
                        ret = eeh_pe_get_state(pe);
                        break;
                case VFIO_EEH_PE_RESET_DEACTIVATE:
-                       ret = eeh_pe_reset(pe, EEH_RESET_DEACTIVATE);
+                       ret = eeh_pe_reset(pe, EEH_RESET_DEACTIVATE, true);
                        break;
                case VFIO_EEH_PE_RESET_HOT:
-                       ret = eeh_pe_reset(pe, EEH_RESET_HOT);
+                       ret = eeh_pe_reset(pe, EEH_RESET_HOT, true);
                        break;
                case VFIO_EEH_PE_RESET_FUNDAMENTAL:
-                       ret = eeh_pe_reset(pe, EEH_RESET_FUNDAMENTAL);
+                       ret = eeh_pe_reset(pe, EEH_RESET_FUNDAMENTAL, true);
                        break;
                case VFIO_EEH_PE_CONFIGURE:
                        ret = eeh_pe_configure(pe);