MLX5_HEALTH_SYNDR_HIGH_TEMP = 0x10
};
+enum {
+ MLX5_NIC_IFC_FULL = 0,
+ MLX5_NIC_IFC_DISABLED = 1,
+ MLX5_NIC_IFC_NO_DRAM_NIC = 2
+};
+
+static u8 get_nic_interface(struct mlx5_core_dev *dev)
+{
+ return (ioread32be(&dev->iseg->cmdq_addr_l_sz) >> 8) & 3;
+}
+
+static int in_fatal(struct mlx5_core_dev *dev)
+{
+ struct mlx5_core_health *health = &dev->priv.health;
+ struct health_buffer __iomem *h = health->health;
+
+ if (get_nic_interface(dev) == MLX5_NIC_IFC_DISABLED)
+ return 1;
+
+ if (ioread32be(&h->fw_ver) == 0xffffffff)
+ return 1;
+
+ return 0;
+}
+
static void health_care(struct work_struct *work)
{
struct mlx5_core_health *health;
dev_err(&dev->pdev->dev, "ext_synd 0x%04x\n", ioread16be(&h->ext_synd));
}
+static unsigned long get_next_poll_jiffies(void)
+{
+ unsigned long next;
+
+ get_random_bytes(&next, sizeof(next));
+ next %= HZ;
+ next += jiffies + MLX5_HEALTH_POLL_INTERVAL;
+
+ return next;
+}
+
static void poll_health(unsigned long data)
{
struct mlx5_core_dev *dev = (struct mlx5_core_dev *)data;
struct mlx5_core_health *health = &dev->priv.health;
- unsigned long next;
u32 count;
count = ioread32be(health->health_counter);
health->prev = count;
if (health->miss_counter == MAX_MISSES) {
- mlx5_core_err(dev, "device's health compromised\n");
+ dev_err(&dev->pdev->dev, "device's health compromised - reached miss count\n");
print_health_info(dev);
- queue_work(health->wq, &health->work);
} else {
- get_random_bytes(&next, sizeof(next));
- next %= HZ;
- next += jiffies + MLX5_HEALTH_POLL_INTERVAL;
- mod_timer(&health->timer, next);
+ mod_timer(&health->timer, get_next_poll_jiffies());
+ }
+
+ if (in_fatal(dev) && !health->sick) {
+ health->sick = true;
+ print_health_info(dev);
+ queue_work(health->wq, &health->work);
}
}