drm/amd/powerplay: add thermal ctf support for navi10
authorKevin Wang <kevin1.wang@amd.com>
Thu, 30 May 2019 10:00:22 +0000 (18:00 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Fri, 21 Jun 2019 23:59:31 +0000 (18:59 -0500)
add sw-CTF support for navi10

Signed-off-by: Kevin Wang <kevin1.wang@amd.com>
Reviewed-by: Huang Rui <ray.huang@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
drivers/gpu/drm/amd/powerplay/navi10_ppt.c
drivers/gpu/drm/amd/powerplay/smu_v11_0.c

index 7d1d91975705ff7ddc178b709082cb30d5eae7ca..1d0523831a15bcd37195d1ed2e5045d84af2cee5 100644 (file)
@@ -1007,6 +1007,10 @@ static int smu_hw_init(void *handle)
        if (ret)
                goto failed;
 
+       ret = smu_register_irq_handler(smu);
+       if (ret)
+               goto failed;
+
        mutex_unlock(&smu->mutex);
 
        if (!smu->pm_enabled)
@@ -1051,6 +1055,9 @@ static int smu_hw_fini(void *handle)
        kfree(table_context->od8_settings);
        table_context->od8_settings = NULL;
 
+       kfree(smu->irq_source);
+       smu->irq_source = NULL;
+
        ret = smu_fini_fb_allocations(smu);
        if (ret)
                return ret;
index f89971992e6f377a84688337aa7e4f3487f0b0fd..5e49b38ddd0da02632bce17c2b04460922efcd3c 100644 (file)
@@ -497,6 +497,7 @@ struct mclock_latency_table {
 struct smu_context
 {
        struct amdgpu_device            *adev;
+       struct amdgpu_irq_src           *irq_source;
 
        const struct smu_funcs          *funcs;
        const struct pptable_funcs      *ppt_funcs;
@@ -687,6 +688,7 @@ struct smu_funcs
        int (*set_fan_speed_rpm)(struct smu_context *smu, uint32_t speed);
        int (*set_xgmi_pstate)(struct smu_context *smu, uint32_t pstate);
        int (*gfx_off_control)(struct smu_context *smu, bool enable);
+       int (*register_irq_handler)(struct smu_context *smu);
 };
 
 #define smu_init_microcode(smu) \
@@ -895,6 +897,8 @@ struct smu_funcs
        ((smu)->ppt_funcs->get_current_clk_freq_by_table ? (smu)->ppt_funcs->get_current_clk_freq_by_table((smu), (clk_type), (value)) : 0)
 #define smu_get_thermal_temperature_range(smu, range) \
        ((smu)->ppt_funcs->get_thermal_temperature_range? (smu)->ppt_funcs->get_thermal_temperature_range((smu), (range)) : 0)
+#define smu_register_irq_handler(smu) \
+       ((smu)->funcs->register_irq_handler ? (smu)->funcs->register_irq_handler(smu) : 0)
 
 extern int smu_get_atom_data_table(struct smu_context *smu, uint32_t table,
                                   uint16_t *size, uint8_t *frev, uint8_t *crev,
index c737725f18b452b00e7afbb54b451df22bbf50ff..0d8a359d79d37f27592ff3b9789876e1dc1f5868 100644 (file)
@@ -476,6 +476,8 @@ static int navi10_store_powerplay_table(struct smu_context *smu)
        memcpy(table_context->driver_pptable, &powerplay_table->smc_pptable,
               sizeof(PPTable_t));
 
+       table_context->thermal_controller_type = powerplay_table->thermal_controller_type;
+
        return 0;
 }
 
index d519fe8842816c628519b67ee14bfb5d8f8b73e3..3200d8c729dd08261a00a65d92b8d9982f0be9bf 100644 (file)
@@ -1131,6 +1131,8 @@ static int smu_v11_0_set_thermal_range(struct smu_context *smu,
        val = RREG32_SOC15(THM, 0, mmTHM_THERMAL_INT_CTRL);
        val = REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, MAX_IH_CREDIT, 5);
        val = REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, THERM_IH_HW_ENA, 1);
+       val = REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, THERM_INTH_MASK, 0);
+       val = REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, THERM_INTL_MASK, 0);
        val = REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, DIG_THERM_INTH, (high / SMU_TEMPERATURE_UNITS_PER_CENTIGRADES));
        val = REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, DIG_THERM_INTL, (low / SMU_TEMPERATURE_UNITS_PER_CENTIGRADES));
        val = val & (~THM_THERMAL_INT_CTRL__THERM_TRIGGER_MASK_MASK);
@@ -1181,6 +1183,7 @@ static int smu_v11_0_start_thermal_control(struct smu_context *smu)
                ret = smu_v11_0_enable_thermal_alert(smu);
                if (ret)
                        return ret;
+
                ret = smu_set_thermal_fan_table(smu);
                if (ret)
                        return ret;
@@ -1662,6 +1665,81 @@ static int smu_v11_0_set_xgmi_pstate(struct smu_context *smu,
        return ret;
 }
 
+#define THM_11_0__SRCID__THM_DIG_THERM_L2H             0               /* ASIC_TEMP > CG_THERMAL_INT.DIG_THERM_INTH  */
+#define THM_11_0__SRCID__THM_DIG_THERM_H2L             1               /* ASIC_TEMP < CG_THERMAL_INT.DIG_THERM_INTL  */
+
+static int smu_v11_0_irq_process(struct amdgpu_device *adev,
+                                struct amdgpu_irq_src *source,
+                                struct amdgpu_iv_entry *entry)
+{
+       uint32_t client_id = entry->client_id;
+       uint32_t src_id = entry->src_id;
+
+       if (client_id == SOC15_IH_CLIENTID_THM) {
+               switch (src_id) {
+               case THM_11_0__SRCID__THM_DIG_THERM_L2H:
+                       pr_warn("GPU over temperature range detected on PCIe %d:%d.%d!\n",
+                               PCI_BUS_NUM(adev->pdev->devfn),
+                               PCI_SLOT(adev->pdev->devfn),
+                               PCI_FUNC(adev->pdev->devfn));
+               break;
+               case THM_11_0__SRCID__THM_DIG_THERM_H2L:
+                       pr_warn("GPU under temperature range detected on PCIe %d:%d.%d!\n",
+                               PCI_BUS_NUM(adev->pdev->devfn),
+                               PCI_SLOT(adev->pdev->devfn),
+                               PCI_FUNC(adev->pdev->devfn));
+               break;
+               default:
+                       pr_warn("GPU under temperature range unknown src id (%d), detected on PCIe %d:%d.%d!\n",
+                               src_id,
+                               PCI_BUS_NUM(adev->pdev->devfn),
+                               PCI_SLOT(adev->pdev->devfn),
+                               PCI_FUNC(adev->pdev->devfn));
+               break;
+
+               }
+       }
+
+       return 0;
+}
+
+static const struct amdgpu_irq_src_funcs smu_v11_0_irq_funcs =
+{
+       .process = smu_v11_0_irq_process,
+};
+
+static int smu_v11_0_register_irq_handler(struct smu_context *smu)
+{
+       struct amdgpu_device *adev = smu->adev;
+       struct amdgpu_irq_src *irq_src = smu->irq_source;
+       int ret = 0;
+
+       /* already register */
+       if (irq_src)
+               return 0;
+
+       irq_src = kzalloc(sizeof(struct amdgpu_irq_src), GFP_KERNEL);
+       if (!irq_src)
+               return -ENOMEM;
+       smu->irq_source = irq_src;
+
+       irq_src->funcs = &smu_v11_0_irq_funcs;
+
+       ret = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_THM,
+                               THM_11_0__SRCID__THM_DIG_THERM_L2H,
+                               irq_src);
+       if (ret)
+               return ret;
+
+       ret = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_THM,
+                               THM_11_0__SRCID__THM_DIG_THERM_H2L,
+                               irq_src);
+       if (ret)
+               return ret;
+
+       return ret;
+}
+
 static const struct smu_funcs smu_v11_0_funcs = {
        .init_microcode = smu_v11_0_init_microcode,
        .load_microcode = smu_v11_0_load_microcode,
@@ -1711,6 +1789,7 @@ static const struct smu_funcs smu_v11_0_funcs = {
        .set_fan_speed_rpm = smu_v11_0_set_fan_speed_rpm,
        .set_xgmi_pstate = smu_v11_0_set_xgmi_pstate,
        .gfx_off_control = smu_v11_0_gfx_off_control,
+       .register_irq_handler = smu_v11_0_register_irq_handler,
 };
 
 void smu_v11_0_set_smu_funcs(struct smu_context *smu)