scsi: ufs: Add history of fatal events
authorStanley Chu <stanley.chu@mediatek.com>
Wed, 10 Jul 2019 13:38:21 +0000 (21:38 +0800)
committerMartin K. Petersen <martin.petersen@oracle.com>
Mon, 22 Jul 2019 20:44:06 +0000 (16:44 -0400)
Currently only "interrupt-based" errors have their own history, however
there are some "non-interrupt-based" errors or events which need history
to improve debugging or help know the health status of UFS devices.

Example of fatal errors:

 - Link startup error

 - Suspend error

 - Resume error

Example of abnormal events:

 - Task or request abort

 - Device reset (now equals to Logical Unit Reset)

 - Host reset

This patch tries to track above errors and events by existed UFS error
history mechanism.

Signed-off-by: Stanley Chu <stanley.chu@mediatek.com>
Reviewed-by: Avri Altman <avri.altman@wdc.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
drivers/scsi/ufs/ufshcd.c
drivers/scsi/ufs/ufshcd.h

index f272ec3cad5890a452789f48bb35912be387afae..3804a704e565e9768779c9a0bea164bc5083e6a9 100644 (file)
@@ -429,9 +429,17 @@ static void ufshcd_print_host_regs(struct ufs_hba *hba)
        ufshcd_print_err_hist(hba, &hba->ufs_stats.nl_err, "nl_err");
        ufshcd_print_err_hist(hba, &hba->ufs_stats.tl_err, "tl_err");
        ufshcd_print_err_hist(hba, &hba->ufs_stats.dme_err, "dme_err");
-       ufshcd_print_err_hist(hba, &hba->ufs_stats.fatal_err, "fatal_err");
        ufshcd_print_err_hist(hba, &hba->ufs_stats.auto_hibern8_err,
                              "auto_hibern8_err");
+       ufshcd_print_err_hist(hba, &hba->ufs_stats.fatal_err, "fatal_err");
+       ufshcd_print_err_hist(hba, &hba->ufs_stats.link_startup_err,
+                             "link_startup_fail");
+       ufshcd_print_err_hist(hba, &hba->ufs_stats.resume_err, "resume_fail");
+       ufshcd_print_err_hist(hba, &hba->ufs_stats.suspend_err,
+                             "suspend_fail");
+       ufshcd_print_err_hist(hba, &hba->ufs_stats.dev_reset, "dev_reset");
+       ufshcd_print_err_hist(hba, &hba->ufs_stats.host_reset, "host_reset");
+       ufshcd_print_err_hist(hba, &hba->ufs_stats.task_abort, "task_abort");
 
        ufshcd_print_clk_freqs(hba);
 
@@ -4330,6 +4338,14 @@ static inline int ufshcd_disable_device_tx_lcc(struct ufs_hba *hba)
        return ufshcd_disable_tx_lcc(hba, true);
 }
 
+static void ufshcd_update_reg_hist(struct ufs_err_reg_hist *reg_hist,
+                                  u32 reg)
+{
+       reg_hist->reg[reg_hist->pos] = reg;
+       reg_hist->tstamp[reg_hist->pos] = ktime_get();
+       reg_hist->pos = (reg_hist->pos + 1) % UFS_ERR_REG_HIST_LENGTH;
+}
+
 /**
  * ufshcd_link_startup - Initialize unipro link startup
  * @hba: per adapter instance
@@ -4357,6 +4373,8 @@ link_startup:
 
                /* check if device is detected by inter-connect layer */
                if (!ret && !ufshcd_is_device_present(hba)) {
+                       ufshcd_update_reg_hist(&hba->ufs_stats.link_startup_err,
+                                              0);
                        dev_err(hba->dev, "%s: Device not present\n", __func__);
                        ret = -ENXIO;
                        goto out;
@@ -4367,13 +4385,19 @@ link_startup:
                 * but we can't be sure if the link is up until link startup
                 * succeeds. So reset the local Uni-Pro and try again.
                 */
-               if (ret && ufshcd_hba_enable(hba))
+               if (ret && ufshcd_hba_enable(hba)) {
+                       ufshcd_update_reg_hist(&hba->ufs_stats.link_startup_err,
+                                              (u32)ret);
                        goto out;
+               }
        } while (ret && retries--);
 
-       if (ret)
+       if (ret) {
                /* failed to get the link up... retire */
+               ufshcd_update_reg_hist(&hba->ufs_stats.link_startup_err,
+                                      (u32)ret);
                goto out;
+       }
 
        if (link_startup_again) {
                link_startup_again = false;
@@ -5349,14 +5373,6 @@ out:
        pm_runtime_put_sync(hba->dev);
 }
 
-static void ufshcd_update_reg_hist(struct ufs_err_reg_hist *reg_hist,
-                                  u32 reg)
-{
-       reg_hist->reg[reg_hist->pos] = reg;
-       reg_hist->tstamp[reg_hist->pos] = ktime_get();
-       reg_hist->pos = (reg_hist->pos + 1) % UFS_ERR_REG_HIST_LENGTH;
-}
-
 /**
  * ufshcd_update_uic_error - check and set fatal UIC error flags.
  * @hba: per-adapter instance
@@ -5949,6 +5965,7 @@ static int ufshcd_eh_device_reset_handler(struct scsi_cmnd *cmd)
 
 out:
        hba->req_abort_count = 0;
+       ufshcd_update_reg_hist(&hba->ufs_stats.dev_reset, (u32)err);
        if (!err) {
                err = SUCCESS;
        } else {
@@ -6042,6 +6059,7 @@ static int ufshcd_abort(struct scsi_cmnd *cmd)
         */
        scsi_print_command(hba->lrb[tag].cmd);
        if (!hba->req_abort_count) {
+               ufshcd_update_reg_hist(&hba->ufs_stats.task_abort, 0);
                ufshcd_print_host_regs(hba);
                ufshcd_print_host_state(hba);
                ufshcd_print_pwr_info(hba);
@@ -6177,7 +6195,7 @@ static int ufshcd_host_reset_and_restore(struct ufs_hba *hba)
 out:
        if (err)
                dev_err(hba->dev, "%s: Host init failed %d\n", __func__, err);
-
+       ufshcd_update_reg_hist(&hba->ufs_stats.host_reset, (u32)err);
        return err;
 }
 
@@ -7819,6 +7837,8 @@ enable_gating:
        ufshcd_release(hba);
 out:
        hba->pm_op_in_progress = 0;
+       if (ret)
+               ufshcd_update_reg_hist(&hba->ufs_stats.suspend_err, (u32)ret);
        return ret;
 }
 
@@ -7921,6 +7941,8 @@ disable_irq_and_vops_clks:
        ufshcd_setup_clocks(hba, false);
 out:
        hba->pm_op_in_progress = 0;
+       if (ret)
+               ufshcd_update_reg_hist(&hba->ufs_stats.resume_err, (u32)ret);
        return ret;
 }
 
index c6ec5c749cebd0a672e7142c13587da32e7e0bf1..a43c7135f33da946dd75bc849355f2fe92263a29 100644 (file)
@@ -436,8 +436,14 @@ struct ufs_err_reg_hist {
  * @nl_err: tracks nl-uic errors
  * @tl_err: tracks tl-uic errors
  * @dme_err: tracks dme errors
- * @fatal_err: tracks fatal errors
  * @auto_hibern8_err: tracks auto-hibernate errors
+ * @fatal_err: tracks fatal errors
+ * @linkup_err: tracks link-startup errors
+ * @resume_err: tracks resume errors
+ * @suspend_err: tracks suspend errors
+ * @dev_reset: tracks device reset events
+ * @host_reset: tracks host reset events
+ * @tsk_abort: tracks task abort events
  */
 struct ufs_stats {
        u32 hibern8_exit_cnt;
@@ -451,8 +457,16 @@ struct ufs_stats {
        struct ufs_err_reg_hist dme_err;
 
        /* fatal errors */
-       struct ufs_err_reg_hist fatal_err;
        struct ufs_err_reg_hist auto_hibern8_err;
+       struct ufs_err_reg_hist fatal_err;
+       struct ufs_err_reg_hist link_startup_err;
+       struct ufs_err_reg_hist resume_err;
+       struct ufs_err_reg_hist suspend_err;
+
+       /* abnormal events */
+       struct ufs_err_reg_hist dev_reset;
+       struct ufs_err_reg_hist host_reset;
+       struct ufs_err_reg_hist task_abort;
 };
 
 /**