drm/scheduler: remove timeout work_struct from drm_sched_job (v3)
authorNayan Deshmukh <nayan26deshmukh@gmail.com>
Tue, 25 Sep 2018 17:09:02 +0000 (02:09 +0900)
committerAlex Deucher <alexander.deucher@amd.com>
Thu, 27 Sep 2018 14:55:45 +0000 (09:55 -0500)
having a delayed work item per job is redundant as we only need one
per scheduler to track the time out the currently executing job.

v2: the first element of the ring mirror list is the currently
executing job so we don't need a additional variable for it

v3: squash in fixes for v3d and etnaviv

Signed-off-by: Nayan Deshmukh <nayan26deshmukh@gmail.com>
Suggested-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/etnaviv/etnaviv_sched.c
drivers/gpu/drm/scheduler/sched_main.c
drivers/gpu/drm/v3d/v3d_sched.c
include/drm/gpu_scheduler.h

index 69e9b431bf1f02ec7c87e5f740f80595ad98dd53..e7c3ed6c9a2e10ddcd7665e851a1bffb9ff0247f 100644 (file)
@@ -105,7 +105,7 @@ static void etnaviv_sched_timedout_job(struct drm_sched_job *sched_job)
        change = dma_addr - gpu->hangcheck_dma_addr;
        if (change < 0 || change > 16) {
                gpu->hangcheck_dma_addr = dma_addr;
-               schedule_delayed_work(&sched_job->work_tdr,
+               schedule_delayed_work(&sched_job->sched->work_tdr,
                                      sched_job->sched->timeout);
                return;
        }
index 9ca741f3a0bc861f1aef17cedd5fb7954339b724..4e8505d51795ea8baa789b67f0f6332ba0540c88 100644 (file)
@@ -197,19 +197,15 @@ static void drm_sched_job_finish(struct work_struct *work)
         * manages to find this job as the next job in the list, the fence
         * signaled check below will prevent the timeout to be restarted.
         */
-       cancel_delayed_work_sync(&s_job->work_tdr);
+       cancel_delayed_work_sync(&sched->work_tdr);
 
        spin_lock(&sched->job_list_lock);
-       /* queue TDR for next job */
-       if (sched->timeout != MAX_SCHEDULE_TIMEOUT &&
-           !list_is_last(&s_job->node, &sched->ring_mirror_list)) {
-               struct drm_sched_job *next = list_next_entry(s_job, node);
-
-               if (!dma_fence_is_signaled(&next->s_fence->finished))
-                       schedule_delayed_work(&next->work_tdr, sched->timeout);
-       }
        /* remove job from ring_mirror_list */
        list_del(&s_job->node);
+       /* queue TDR for next job */
+       if (sched->timeout != MAX_SCHEDULE_TIMEOUT &&
+           !list_empty(&sched->ring_mirror_list))
+               schedule_delayed_work(&sched->work_tdr, sched->timeout);
        spin_unlock(&sched->job_list_lock);
 
        dma_fence_put(&s_job->s_fence->finished);
@@ -236,16 +232,21 @@ static void drm_sched_job_begin(struct drm_sched_job *s_job)
        if (sched->timeout != MAX_SCHEDULE_TIMEOUT &&
            list_first_entry_or_null(&sched->ring_mirror_list,
                                     struct drm_sched_job, node) == s_job)
-               schedule_delayed_work(&s_job->work_tdr, sched->timeout);
+               schedule_delayed_work(&sched->work_tdr, sched->timeout);
        spin_unlock(&sched->job_list_lock);
 }
 
 static void drm_sched_job_timedout(struct work_struct *work)
 {
-       struct drm_sched_job *job = container_of(work, struct drm_sched_job,
-                                                work_tdr.work);
+       struct drm_gpu_scheduler *sched;
+       struct drm_sched_job *job;
+
+       sched = container_of(work, struct drm_gpu_scheduler, work_tdr.work);
+       job = list_first_entry_or_null(&sched->ring_mirror_list,
+                                      struct drm_sched_job, node);
 
-       job->sched->ops->timedout_job(job);
+       if (job)
+               job->sched->ops->timedout_job(job);
 }
 
 /**
@@ -315,7 +316,7 @@ void drm_sched_job_recovery(struct drm_gpu_scheduler *sched)
        s_job = list_first_entry_or_null(&sched->ring_mirror_list,
                                         struct drm_sched_job, node);
        if (s_job && sched->timeout != MAX_SCHEDULE_TIMEOUT)
-               schedule_delayed_work(&s_job->work_tdr, sched->timeout);
+               schedule_delayed_work(&sched->work_tdr, sched->timeout);
 
        list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) {
                struct drm_sched_fence *s_fence = s_job->s_fence;
@@ -384,7 +385,6 @@ int drm_sched_job_init(struct drm_sched_job *job,
 
        INIT_WORK(&job->finish_work, drm_sched_job_finish);
        INIT_LIST_HEAD(&job->node);
-       INIT_DELAYED_WORK(&job->work_tdr, drm_sched_job_timedout);
 
        return 0;
 }
@@ -575,6 +575,7 @@ int drm_sched_init(struct drm_gpu_scheduler *sched,
        INIT_LIST_HEAD(&sched->ring_mirror_list);
        spin_lock_init(&sched->job_list_lock);
        atomic_set(&sched->hw_rq_count, 0);
+       INIT_DELAYED_WORK(&sched->work_tdr, drm_sched_job_timedout);
        atomic_set(&sched->num_jobs, 0);
        atomic64_set(&sched->job_id_count, 0);
 
index a5501581d96b3a3c4293e4504895a01de3c1d0d2..9243dea6e6ad106169c81dd384d5d4bc36c6d3ea 100644 (file)
@@ -168,7 +168,7 @@ v3d_job_timedout(struct drm_sched_job *sched_job)
                job->timedout_ctca = ctca;
                job->timedout_ctra = ctra;
 
-               schedule_delayed_work(&job->base.work_tdr,
+               schedule_delayed_work(&job->base.sched->work_tdr,
                                      job->base.sched->timeout);
                return;
        }
index daec50f887b30615bb0d0797ef8df8d8208c3702..d87b268f1781cb50fd8674d6349a0626760e125f 100644 (file)
@@ -175,8 +175,6 @@ struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f);
  *               finished to remove the job from the
  *               @drm_gpu_scheduler.ring_mirror_list.
  * @node: used to append this struct to the @drm_gpu_scheduler.ring_mirror_list.
- * @work_tdr: schedules a delayed call to @drm_sched_job_timedout after the timeout
- *            interval is over.
  * @id: a unique id assigned to each job scheduled on the scheduler.
  * @karma: increment on every hang caused by this job. If this exceeds the hang
  *         limit of the scheduler then the job is marked guilty and will not
@@ -195,7 +193,6 @@ struct drm_sched_job {
        struct dma_fence_cb             finish_cb;
        struct work_struct              finish_work;
        struct list_head                node;
-       struct delayed_work             work_tdr;
        uint64_t                        id;
        atomic_t                        karma;
        enum drm_sched_priority         s_priority;
@@ -259,6 +256,8 @@ struct drm_sched_backend_ops {
  *                 finished.
  * @hw_rq_count: the number of jobs currently in the hardware queue.
  * @job_id_count: used to assign unique id to the each job.
+ * @work_tdr: schedules a delayed call to @drm_sched_job_timedout after the
+ *            timeout interval is over.
  * @thread: the kthread on which the scheduler which run.
  * @ring_mirror_list: the list of jobs which are currently in the job queue.
  * @job_list_lock: lock to protect the ring_mirror_list.
@@ -278,6 +277,7 @@ struct drm_gpu_scheduler {
        wait_queue_head_t               job_scheduled;
        atomic_t                        hw_rq_count;
        atomic64_t                      job_id_count;
+       struct delayed_work             work_tdr;
        struct task_struct              *thread;
        struct list_head                ring_mirror_list;
        spinlock_t                      job_list_lock;