return 0;
}
-static void health_recover_work(struct work_struct *work)
-{
- struct mlx5_core_health *health;
- struct mlx5_core_dev *dev;
- struct mlx5_priv *priv;
-
- health = container_of(work, struct mlx5_core_health, work);
- priv = container_of(health, struct mlx5_priv, health);
- dev = container_of(priv, struct mlx5_core_dev, priv);
-
- mlx5_health_try_recover(dev);
-}
-
static const char *hsynd_str(u8 synd)
{
switch (synd) {
return err;
}
+static void mlx5_fw_fatal_reporter_err_work(struct work_struct *work)
+{
+ struct mlx5_fw_reporter_ctx fw_reporter_ctx;
+ struct mlx5_core_health *health;
+ struct mlx5_core_dev *dev;
+ struct mlx5_priv *priv;
+
+ health = container_of(work, struct mlx5_core_health, fatal_report_work);
+ priv = container_of(health, struct mlx5_priv, health);
+ dev = container_of(priv, struct mlx5_core_dev, priv);
+
+ mlx5_enter_error_state(dev, false);
+ if (IS_ERR_OR_NULL(health->fw_fatal_reporter)) {
+ if (mlx5_health_try_recover(dev))
+ mlx5_core_err(dev, "health recovery failed\n");
+ return;
+ }
+ fw_reporter_ctx.err_synd = health->synd;
+ fw_reporter_ctx.miss_counter = health->miss_counter;
+ devlink_health_report(health->fw_fatal_reporter,
+ "FW fatal error reported", &fw_reporter_ctx);
+}
+
static const struct devlink_health_reporter_ops mlx5_fw_fatal_reporter_ops = {
.name = "fw_fatal",
.recover = mlx5_fw_fatal_reporter_recover,
spin_lock_irqsave(&health->wq_lock, flags);
if (!test_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags))
- queue_work(health->wq, &health->work);
+ queue_work(health->wq, &health->fatal_report_work);
else
mlx5_core_err(dev, "new health works are not permitted at this stage\n");
spin_unlock_irqrestore(&health->wq_lock, flags);
set_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags);
spin_unlock_irqrestore(&health->wq_lock, flags);
cancel_work_sync(&health->report_work);
- cancel_work_sync(&health->work);
+ cancel_work_sync(&health->fatal_report_work);
}
void mlx5_health_flush(struct mlx5_core_dev *dev)
if (!health->wq)
goto out_err;
spin_lock_init(&health->wq_lock);
- INIT_WORK(&health->work, health_recover_work);
+ INIT_WORK(&health->fatal_report_work, mlx5_fw_fatal_reporter_err_work);
INIT_WORK(&health->report_work, mlx5_fw_reporter_err_work);
return 0;
mlx5_enter_error_state(dev, false);
mlx5_error_sw_reset(dev);
mlx5_unload_one(dev, false);
- /* In case of kernel call drain the health wq */
- if (state) {
- mlx5_drain_health_wq(dev);
- mlx5_pci_disable_device(dev);
- }
+ mlx5_drain_health_wq(dev);
+ mlx5_pci_disable_device(dev);
return state == pci_channel_io_perm_failure ?
PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET;
void mlx5_disable_device(struct mlx5_core_dev *dev)
{
- mlx5_pci_err_detected(dev->pdev, 0);
+ mlx5_error_sw_reset(dev);
+ mlx5_unload_one(dev, false);
}
void mlx5_recover_device(struct mlx5_core_dev *dev)