net/mlx5: Add temperature warning event to log
authorIlan Tayari <ilant@mellanox.com>
Wed, 30 May 2018 17:59:49 +0000 (10:59 -0700)
committerDavid S. Miller <davem@davemloft.net>
Thu, 31 May 2018 19:35:37 +0000 (15:35 -0400)
Temperature warning event is sent by FW to indicate high temperature
as detected by one of the sensors on the board.
Add handling of this event by writing the numbers of the alert sensors
to the kernel log.

Signed-off-by: Ilan Tayari <ilant@mellanox.com>
Signed-off-by: Adi Nissim <adin@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
drivers/net/ethernet/mellanox/mlx5/core/eq.c
include/linux/mlx5/device.h
include/linux/mlx5/mlx5_ifc.h

index 1814f803bd2cbf8b00baa4b9091cc4e954dd2304..1a3a2b9a723245ad04c3fdf085863b39be3a3720 100644 (file)
@@ -144,6 +144,8 @@ static const char *eqe_type_str(u8 type)
                return "MLX5_EVENT_TYPE_GPIO_EVENT";
        case MLX5_EVENT_TYPE_PORT_MODULE_EVENT:
                return "MLX5_EVENT_TYPE_PORT_MODULE_EVENT";
+       case MLX5_EVENT_TYPE_TEMP_WARN_EVENT:
+               return "MLX5_EVENT_TYPE_TEMP_WARN_EVENT";
        case MLX5_EVENT_TYPE_REMOTE_CONFIG:
                return "MLX5_EVENT_TYPE_REMOTE_CONFIG";
        case MLX5_EVENT_TYPE_DB_BF_CONGESTION:
@@ -396,6 +398,20 @@ static void general_event_handler(struct mlx5_core_dev *dev,
        }
 }
 
+static void mlx5_temp_warning_event(struct mlx5_core_dev *dev,
+                                   struct mlx5_eqe *eqe)
+{
+       u64 value_lsb;
+       u64 value_msb;
+
+       value_lsb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_lsb);
+       value_msb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_msb);
+
+       mlx5_core_warn(dev,
+                      "High temperature on sensors with bit set %llx %llx",
+                      value_msb, value_lsb);
+}
+
 /* caller must eventually call mlx5_cq_put on the returned cq */
 static struct mlx5_core_cq *mlx5_eq_cq_get(struct mlx5_eq *eq, u32 cqn)
 {
@@ -550,6 +566,10 @@ static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr)
                        mlx5_fpga_event(dev, eqe->type, &eqe->data.raw);
                        break;
 
+               case MLX5_EVENT_TYPE_TEMP_WARN_EVENT:
+                       mlx5_temp_warning_event(dev, eqe);
+                       break;
+
                case MLX5_EVENT_TYPE_GENERAL_EVENT:
                        general_event_handler(dev, eqe);
                        break;
@@ -827,6 +847,9 @@ int mlx5_start_eqs(struct mlx5_core_dev *dev)
                async_event_mask |= (1ull << MLX5_EVENT_TYPE_DCT_DRAINED);
 
 
+       if (MLX5_CAP_GEN(dev, temp_warn_event))
+               async_event_mask |= (1ull << MLX5_EVENT_TYPE_TEMP_WARN_EVENT);
+
        err = mlx5_create_map_eq(dev, &table->cmd_eq, MLX5_EQ_VEC_CMD,
                                 MLX5_NUM_CMD_EQE, 1ull << MLX5_EVENT_TYPE_CMD,
                                 "mlx5_cmd_eq", MLX5_EQ_TYPE_ASYNC);
index db0332a6d23c7344416616adca9e20d1cc22e2bd..c1095e08f0c8848c74a5248383cb83465f3ef7ed 100644 (file)
@@ -314,6 +314,7 @@ enum mlx5_event {
        MLX5_EVENT_TYPE_PORT_CHANGE        = 0x09,
        MLX5_EVENT_TYPE_GPIO_EVENT         = 0x15,
        MLX5_EVENT_TYPE_PORT_MODULE_EVENT  = 0x16,
+       MLX5_EVENT_TYPE_TEMP_WARN_EVENT    = 0x17,
        MLX5_EVENT_TYPE_REMOTE_CONFIG      = 0x19,
        MLX5_EVENT_TYPE_GENERAL_EVENT      = 0x22,
        MLX5_EVENT_TYPE_PPS_EVENT          = 0x25,
@@ -626,6 +627,11 @@ struct mlx5_eqe_dct {
        __be32  dctn;
 };
 
+struct mlx5_eqe_temp_warning {
+       __be64 sensor_warning_msb;
+       __be64 sensor_warning_lsb;
+} __packed;
+
 union ev_data {
        __be32                          raw[7];
        struct mlx5_eqe_cmd             cmd;
@@ -642,6 +648,7 @@ union ev_data {
        struct mlx5_eqe_port_module     port_module;
        struct mlx5_eqe_pps             pps;
        struct mlx5_eqe_dct             dct;
+       struct mlx5_eqe_temp_warning    temp_warning;
 } __packed;
 
 struct mlx5_eqe {
index 05b480fae27d17ebbecf94538702db5f148de9fb..ca6c0dfb5ffe5f2afbda7c9565f0fe582399ac83 100644 (file)
@@ -912,7 +912,7 @@ struct mlx5_ifc_cmd_hca_cap_bits {
        u8         log_max_msg[0x5];
        u8         reserved_at_1c8[0x4];
        u8         max_tc[0x4];
-       u8         reserved_at_1d0[0x1];
+       u8         temp_warn_event[0x1];
        u8         dcbx[0x1];
        u8         general_notification_event[0x1];
        u8         reserved_at_1d3[0x2];