be2net: implement EEH pci error recovery handlers
authorSathya Perla <sathyap@serverengines.com>
Sun, 14 Feb 2010 21:22:01 +0000 (21:22 +0000)
committerDavid S. Miller <davem@davemloft.net>
Tue, 16 Feb 2010 05:49:51 +0000 (21:49 -0800)
The code has been tested on IBM pSeries server.

Signed-off-by: Sathya Perla <sathyap@serverengines.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
drivers/net/benet/be.h
drivers/net/benet/be_cmds.c
drivers/net/benet/be_main.c

index b39b38542623dddae2b18bf18ee3b7e7c7571c73..5038c16bfe9b82cfdce37fe2c7137c6367e8c8c9 100644 (file)
@@ -265,6 +265,7 @@ struct be_adapter {
        u32 if_handle;          /* Used to configure filtering */
        u32 pmac_id;            /* MAC addr handle used by BE card */
 
+       bool eeh_err;
        bool link_up;
        u32 port_num;
        bool promiscuous;
index d1a0e5ede4b315f72d7de026827572c32315ba63..3397ee327e1fe7fe4fdc7d068d64b3abc5de6608 100644 (file)
@@ -167,7 +167,14 @@ static int be_mbox_db_ready_wait(struct be_adapter *adapter, void __iomem *db)
        u32 ready;
 
        do {
-               ready = ioread32(db) & MPU_MAILBOX_DB_RDY_MASK;
+               ready = ioread32(db);
+               if (ready == 0xffffffff) {
+                       dev_err(&adapter->pdev->dev,
+                               "pci slot disconnected\n");
+                       return -1;
+               }
+
+               ready &= MPU_MAILBOX_DB_RDY_MASK;
                if (ready)
                        break;
 
@@ -198,6 +205,11 @@ static int be_mbox_notify_wait(struct be_adapter *adapter)
        struct be_mcc_mailbox *mbox = mbox_mem->va;
        struct be_mcc_compl *compl = &mbox->compl;
 
+       /* wait for ready to be set */
+       status = be_mbox_db_ready_wait(adapter, db);
+       if (status != 0)
+               return status;
+
        val |= MPU_MAILBOX_DB_HI_MASK;
        /* at bits 2 - 31 place mbox dma addr msb bits 34 - 63 */
        val |= (upper_32_bits(mbox_mem->dma) >> 2) << 2;
@@ -396,6 +408,9 @@ int be_cmd_fw_clean(struct be_adapter *adapter)
        u8 *wrb;
        int status;
 
+       if (adapter->eeh_err)
+               return -EIO;
+
        spin_lock(&adapter->mbox_lock);
 
        wrb = (u8 *)wrb_from_mbox(adapter);
@@ -768,6 +783,9 @@ int be_cmd_q_destroy(struct be_adapter *adapter, struct be_queue_info *q,
        u8 subsys = 0, opcode = 0;
        int status;
 
+       if (adapter->eeh_err)
+               return -EIO;
+
        spin_lock(&adapter->mbox_lock);
 
        wrb = wrb_from_mbox(adapter);
@@ -856,6 +874,9 @@ int be_cmd_if_destroy(struct be_adapter *adapter, u32 interface_id)
        struct be_cmd_req_if_destroy *req;
        int status;
 
+       if (adapter->eeh_err)
+               return -EIO;
+
        spin_lock(&adapter->mbox_lock);
 
        wrb = wrb_from_mbox(adapter);
index 92c55f6794664960c551dfa48d112d00cc33ac7e..cbfaa3feb7c481401e93d2b2ce8b15c7796650e1 100644 (file)
@@ -68,6 +68,9 @@ static void be_intr_set(struct be_adapter *adapter, bool enable)
        u32 reg = ioread32(addr);
        u32 enabled = reg & MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
 
+       if (adapter->eeh_err)
+               return;
+
        if (!enabled && enable)
                reg |= MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
        else if (enabled && !enable)
@@ -99,6 +102,10 @@ static void be_eq_notify(struct be_adapter *adapter, u16 qid,
 {
        u32 val = 0;
        val |= qid & DB_EQ_RING_ID_MASK;
+
+       if (adapter->eeh_err)
+               return;
+
        if (arm)
                val |= 1 << DB_EQ_REARM_SHIFT;
        if (clear_int)
@@ -112,6 +119,10 @@ void be_cq_notify(struct be_adapter *adapter, u16 qid, bool arm, u16 num_popped)
 {
        u32 val = 0;
        val |= qid & DB_CQ_RING_ID_MASK;
+
+       if (adapter->eeh_err)
+               return;
+
        if (arm)
                val |= 1 << DB_CQ_REARM_SHIFT;
        val |= num_popped << DB_CQ_NUM_POPPED_SHIFT;
@@ -2154,6 +2165,7 @@ static int be_ctrl_init(struct be_adapter *adapter)
        spin_lock_init(&adapter->mcc_lock);
        spin_lock_init(&adapter->mcc_cq_lock);
 
+       pci_save_state(adapter->pdev);
        return 0;
 
 free_mbox:
@@ -2417,13 +2429,102 @@ static int be_resume(struct pci_dev *pdev)
        return 0;
 }
 
+static pci_ers_result_t be_eeh_err_detected(struct pci_dev *pdev,
+                               pci_channel_state_t state)
+{
+       struct be_adapter *adapter = pci_get_drvdata(pdev);
+       struct net_device *netdev =  adapter->netdev;
+
+       dev_err(&adapter->pdev->dev, "EEH error detected\n");
+
+       adapter->eeh_err = true;
+
+       netif_device_detach(netdev);
+
+       if (netif_running(netdev)) {
+               rtnl_lock();
+               be_close(netdev);
+               rtnl_unlock();
+       }
+       be_clear(adapter);
+
+       if (state == pci_channel_io_perm_failure)
+               return PCI_ERS_RESULT_DISCONNECT;
+
+       pci_disable_device(pdev);
+
+       return PCI_ERS_RESULT_NEED_RESET;
+}
+
+static pci_ers_result_t be_eeh_reset(struct pci_dev *pdev)
+{
+       struct be_adapter *adapter = pci_get_drvdata(pdev);
+       int status;
+
+       dev_info(&adapter->pdev->dev, "EEH reset\n");
+       adapter->eeh_err = false;
+
+       status = pci_enable_device(pdev);
+       if (status)
+               return PCI_ERS_RESULT_DISCONNECT;
+
+       pci_set_master(pdev);
+       pci_set_power_state(pdev, 0);
+       pci_restore_state(pdev);
+
+       /* Check if card is ok and fw is ready */
+       status = be_cmd_POST(adapter);
+       if (status)
+               return PCI_ERS_RESULT_DISCONNECT;
+
+       return PCI_ERS_RESULT_RECOVERED;
+}
+
+static void be_eeh_resume(struct pci_dev *pdev)
+{
+       int status = 0;
+       struct be_adapter *adapter = pci_get_drvdata(pdev);
+       struct net_device *netdev =  adapter->netdev;
+
+       dev_info(&adapter->pdev->dev, "EEH resume\n");
+
+       pci_save_state(pdev);
+
+       /* tell fw we're ready to fire cmds */
+       status = be_cmd_fw_init(adapter);
+       if (status)
+               goto err;
+
+       status = be_setup(adapter);
+       if (status)
+               goto err;
+
+       if (netif_running(netdev)) {
+               status = be_open(netdev);
+               if (status)
+                       goto err;
+       }
+       netif_device_attach(netdev);
+       return;
+err:
+       dev_err(&adapter->pdev->dev, "EEH resume failed\n");
+       return;
+}
+
+static struct pci_error_handlers be_eeh_handlers = {
+       .error_detected = be_eeh_err_detected,
+       .slot_reset = be_eeh_reset,
+       .resume = be_eeh_resume,
+};
+
 static struct pci_driver be_driver = {
        .name = DRV_NAME,
        .id_table = be_dev_ids,
        .probe = be_probe,
        .remove = be_remove,
        .suspend = be_suspend,
-       .resume = be_resume
+       .resume = be_resume,
+       .err_handler = &be_eeh_handlers
 };
 
 static int __init be_init_module(void)