From cb5b242c8c14a4b1dcd358400da28208fde78947 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Wed, 15 Jan 2014 13:16:13 +0800 Subject: [PATCH] powerpc/eeh: Escalate error on non-existing PE Sometimes, especially in sinario of loading another kernel with kdump, we got EEH error on non-existing PE. That means the PEEV / PEST in the corresponding PHB would be messy and we can't handle that case. The patch escalates the error to fenced PHB so that the PHB could be rested in order to revoer the errors on non-existing PEs. Reported-by: Mahesh Salgaonkar Signed-off-by: Gavin Shan Tested-by: Mahesh Salgaonkar Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/platforms/powernv/eeh-ioda.c | 31 +++++++++++++++-------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c index 8dd16f4675a2..e1e71618b70c 100644 --- a/arch/powerpc/platforms/powernv/eeh-ioda.c +++ b/arch/powerpc/platforms/powernv/eeh-ioda.c @@ -713,11 +713,7 @@ static int ioda_eeh_get_pe(struct pci_controller *hose, dev.phb = hose; dev.pe_config_addr = pe_no; dev_pe = eeh_pe_get(&dev); - if (!dev_pe) { - pr_warning("%s: Can't find PE for PHB#%x - PE#%x\n", - __func__, hose->global_number, pe_no); - return -EEXIST; - } + if (!dev_pe) return -EEXIST; *pe = dev_pe; return 0; @@ -831,12 +827,27 @@ static int ioda_eeh_next_error(struct eeh_pe **pe) break; case OPAL_EEH_PE_ERROR: - if (ioda_eeh_get_pe(hose, frozen_pe_no, pe)) - break; + /* + * If we can't find the corresponding PE, the + * PEEV / PEST would be messy. So we force an + * fenced PHB so that it can be recovered. + */ + if (ioda_eeh_get_pe(hose, frozen_pe_no, pe)) { + if (!ioda_eeh_get_phb_pe(hose, pe)) { + pr_err("EEH: Escalated fenced PHB#%x " + "detected for PE#%llx\n", + hose->global_number, + frozen_pe_no); + ret = EEH_NEXT_ERR_FENCED_PHB; + } else { + ret = EEH_NEXT_ERR_NONE; + } + } else { + pr_err("EEH: Frozen PE#%x on PHB#%x detected\n", + (*pe)->addr, (*pe)->phb->global_number); + ret = EEH_NEXT_ERR_FROZEN_PE; + } - pr_err("EEH: Frozen PE#%x on PHB#%x detected\n", - (*pe)->addr, (*pe)->phb->global_number); - ret = EEH_NEXT_ERR_FROZEN_PE; break; default: pr_warn("%s: Unexpected error type %d\n", -- 2.30.2