From f25a49ab8ab9c1b5587837c8a386b276403f315c Mon Sep 17 00:00:00 2001 From: Colin Xu Date: Sat, 19 May 2018 12:28:54 +0800 Subject: [PATCH] drm/i915/gvt: Use vgpu_lock to protect per vgpu access The patch set splits out 2 small locks from the original big gvt lock: - vgpu_lock protects per-vGPU data and logic, especially the vGPU trap emulation path. - sched_lock protects gvt scheudler structure, context schedule logic and vGPU's schedule data. Use vgpu_lock to replace the gvt big lock. By doing this, the mmio read/write trap path, vgpu virtual event emulation and other vgpu related process, would be protected under per vgpu_lock. v9: - Change commit author since the patches are improved a lot compared with original version. Original author: Pei Zhang - Rebase to latest gvt-staging. v8: - Correct coding and comment style. - Rebase to latest gvt-staging. v7: - Remove gtt_lock since already proteced by gvt_lock and vgpu_lock. - Fix a typo in intel_gvt_deactivate_vgpu, unlock the wrong lock. v6: - Rebase to latest gvt-staging. v5: - Rebase to latest gvt-staging. - intel_vgpu_page_track_handler should use vgpu_lock. v4: - Rebase to latest gvt-staging. - Protect vgpu->active access with vgpu_lock. - Do not wait gpu idle in vgpu_lock. v3: update to latest code base v2: add gvt->lock in function gvt_check_vblank_emulation Performance comparison on Kabylake platform. - Configuration: Host: Ubuntu 16.04. Guest 1 & 2: Ubuntu 16.04. glmark2 score comparison: - Configuration: Host: glxgears. Guests: glmark2. +--------------------------------+-----------------+ | Setup | glmark2 score | +--------------------------------+-----------------+ | unified lock, iommu=on | 58~62 (avg. 60) | +--------------------------------+-----------------+ | unified lock, iommu=igfx_off | 57~61 (avg. 59) | +--------------------------------+-----------------+ | per-logic lock, iommu=on | 60~68 (avg. 64) | +--------------------------------+-----------------+ | per-logic lock, iommu=igfx_off | 61~67 (avg. 64) | +--------------------------------+-----------------+ lock_stat comparison: - Configuration: Stop lock stat immediately after boot up. Boot 2 VM Guests. Run glmark2 in guests. Start perf lock_stat for 20 seconds and stop again. - Legend: c - contentions; w - waittime-avg +------------+-----------------+-----------+---------------+------------+ | | gvt_lock |sched_lock | vgpu_lock | gtt_lock | + lock type; +-----------------+-----------+---------------+------------+ | iommu set | c | w | c | w | c | w | c | w | +------------+-------+---------+----+------+------+--------+-----+------+ | unified; | 20697 | 839 |N/A | N/A | N/A | N/A | N/A | N/A | | on | | | | | | | | | +------------+-------+---------+----+------+------+--------+-----+------+ | unified; | 21838 | 658.15 |N/A | N/A | N/A | N/A | N/A | N/A | | igfx_off | | | | | | | | | +------------+-------+---------+----+------+------+--------+-----+------+ | per-logic; | 1553 | 1599.96 |9458|429.97| 5846 | 274.33 | 0 | 0.00 | | on | | | | | | | | | +------------+-------+---------+----+------+------+--------+-----+------+ | per-logic; | 1911 | 1678.32 |8335|445.16| 5451 | 244.80 | 0 | 0.00 | | igfx_off | | | | | | | | | +------------+-------+---------+----+------+------+--------+-----+------+ Signed-off-by: Pei Zhang Signed-off-by: Colin Xu Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/display.c | 35 ++++++++++-------- drivers/gpu/drm/i915/gvt/gvt.c | 5 +-- drivers/gpu/drm/i915/gvt/gvt.h | 8 ++++ drivers/gpu/drm/i915/gvt/handlers.c | 4 ++ drivers/gpu/drm/i915/gvt/mmio.c | 12 +++--- drivers/gpu/drm/i915/gvt/page_track.c | 5 +-- drivers/gpu/drm/i915/gvt/scheduler.c | 9 +++-- drivers/gpu/drm/i915/gvt/vgpu.c | 53 ++++++++++++++------------- 8 files changed, 72 insertions(+), 59 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/display.c b/drivers/gpu/drm/i915/gvt/display.c index 6d8180e8d1e2..8e4a63c5b7d1 100644 --- a/drivers/gpu/drm/i915/gvt/display.c +++ b/drivers/gpu/drm/i915/gvt/display.c @@ -337,26 +337,28 @@ void intel_gvt_check_vblank_emulation(struct intel_gvt *gvt) struct intel_gvt_irq *irq = &gvt->irq; struct intel_vgpu *vgpu; int pipe, id; + int found = false; - if (WARN_ON(!mutex_is_locked(&gvt->lock))) - return; - + mutex_lock(&gvt->lock); for_each_active_vgpu(gvt, vgpu, id) { for (pipe = 0; pipe < I915_MAX_PIPES; pipe++) { - if (pipe_is_enabled(vgpu, pipe)) - goto out; + if (pipe_is_enabled(vgpu, pipe)) { + found = true; + break; + } } + if (found) + break; } /* all the pipes are disabled */ - hrtimer_cancel(&irq->vblank_timer.timer); - return; - -out: - hrtimer_start(&irq->vblank_timer.timer, - ktime_add_ns(ktime_get(), irq->vblank_timer.period), - HRTIMER_MODE_ABS); - + if (!found) + hrtimer_cancel(&irq->vblank_timer.timer); + else + hrtimer_start(&irq->vblank_timer.timer, + ktime_add_ns(ktime_get(), irq->vblank_timer.period), + HRTIMER_MODE_ABS); + mutex_unlock(&gvt->lock); } static void emulate_vblank_on_pipe(struct intel_vgpu *vgpu, int pipe) @@ -393,8 +395,10 @@ static void emulate_vblank(struct intel_vgpu *vgpu) { int pipe; + mutex_lock(&vgpu->vgpu_lock); for_each_pipe(vgpu->gvt->dev_priv, pipe) emulate_vblank_on_pipe(vgpu, pipe); + mutex_unlock(&vgpu->vgpu_lock); } /** @@ -409,11 +413,10 @@ void intel_gvt_emulate_vblank(struct intel_gvt *gvt) struct intel_vgpu *vgpu; int id; - if (WARN_ON(!mutex_is_locked(&gvt->lock))) - return; - + mutex_lock(&gvt->lock); for_each_active_vgpu(gvt, vgpu, id) emulate_vblank(vgpu); + mutex_unlock(&gvt->lock); } /** diff --git a/drivers/gpu/drm/i915/gvt/gvt.c b/drivers/gpu/drm/i915/gvt/gvt.c index 61bd14fcb649..769e06c3ae23 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.c +++ b/drivers/gpu/drm/i915/gvt/gvt.c @@ -271,11 +271,8 @@ static int gvt_service_thread(void *data) continue; if (test_and_clear_bit(INTEL_GVT_REQUEST_EMULATE_VBLANK, - (void *)&gvt->service_request)) { - mutex_lock(&gvt->lock); + (void *)&gvt->service_request)) intel_gvt_emulate_vblank(gvt); - mutex_unlock(&gvt->lock); - } if (test_bit(INTEL_GVT_REQUEST_SCHED, (void *)&gvt->service_request) || diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index 05d15a095310..3600553c8b56 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -170,6 +170,7 @@ struct intel_vgpu_submission { struct intel_vgpu { struct intel_gvt *gvt; + struct mutex vgpu_lock; int id; unsigned long handle; /* vGPU handle used by hypervisor MPT modules */ bool active; @@ -294,6 +295,9 @@ struct intel_vgpu_type { }; struct intel_gvt { + /* GVT scope lock, protect GVT itself, and all resource currently + * not yet protected by special locks(vgpu and scheduler lock). + */ struct mutex lock; struct drm_i915_private *dev_priv; struct idr vgpu_idr; /* vGPU IDR pool */ @@ -314,6 +318,10 @@ struct intel_gvt { struct task_struct *service_thread; wait_queue_head_t service_thread_wq; + + /* service_request is always used in bit operation, we should always + * use it with atomic bit ops so that no need to use gvt big lock. + */ unsigned long service_request; struct { diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index d5e206661048..d60c2bee00fb 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -316,6 +316,7 @@ static int gdrst_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, } } + /* vgpu_lock already hold by emulate mmio r/w */ intel_gvt_reset_vgpu_locked(vgpu, false, engine_mask); /* sw will wait for the device to ack the reset request */ @@ -420,7 +421,10 @@ static int pipeconf_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, vgpu_vreg(vgpu, offset) |= I965_PIPECONF_ACTIVE; else vgpu_vreg(vgpu, offset) &= ~I965_PIPECONF_ACTIVE; + /* vgpu_lock already hold by emulate mmio r/w */ + mutex_unlock(&vgpu->vgpu_lock); intel_gvt_check_vblank_emulation(vgpu->gvt); + mutex_lock(&vgpu->vgpu_lock); return 0; } diff --git a/drivers/gpu/drm/i915/gvt/mmio.c b/drivers/gpu/drm/i915/gvt/mmio.c index e4960aff68bd..2be1be2cf49a 100644 --- a/drivers/gpu/drm/i915/gvt/mmio.c +++ b/drivers/gpu/drm/i915/gvt/mmio.c @@ -67,7 +67,7 @@ static void failsafe_emulate_mmio_rw(struct intel_vgpu *vgpu, uint64_t pa, return; gvt = vgpu->gvt; - mutex_lock(&gvt->lock); + mutex_lock(&vgpu->vgpu_lock); offset = intel_vgpu_gpa_to_mmio_offset(vgpu, pa); if (reg_is_mmio(gvt, offset)) { if (read) @@ -85,7 +85,7 @@ static void failsafe_emulate_mmio_rw(struct intel_vgpu *vgpu, uint64_t pa, memcpy(pt, p_data, bytes); } - mutex_unlock(&gvt->lock); + mutex_unlock(&vgpu->vgpu_lock); } /** @@ -109,7 +109,7 @@ int intel_vgpu_emulate_mmio_read(struct intel_vgpu *vgpu, uint64_t pa, failsafe_emulate_mmio_rw(vgpu, pa, p_data, bytes, true); return 0; } - mutex_lock(&gvt->lock); + mutex_lock(&vgpu->vgpu_lock); offset = intel_vgpu_gpa_to_mmio_offset(vgpu, pa); @@ -156,7 +156,7 @@ err: gvt_vgpu_err("fail to emulate MMIO read %08x len %d\n", offset, bytes); out: - mutex_unlock(&gvt->lock); + mutex_unlock(&vgpu->vgpu_lock); return ret; } @@ -182,7 +182,7 @@ int intel_vgpu_emulate_mmio_write(struct intel_vgpu *vgpu, uint64_t pa, return 0; } - mutex_lock(&gvt->lock); + mutex_lock(&vgpu->vgpu_lock); offset = intel_vgpu_gpa_to_mmio_offset(vgpu, pa); @@ -220,7 +220,7 @@ err: gvt_vgpu_err("fail to emulate MMIO write %08x len %d\n", offset, bytes); out: - mutex_unlock(&gvt->lock); + mutex_unlock(&vgpu->vgpu_lock); return ret; } diff --git a/drivers/gpu/drm/i915/gvt/page_track.c b/drivers/gpu/drm/i915/gvt/page_track.c index 53e2bd79c97d..256d0db8bbb1 100644 --- a/drivers/gpu/drm/i915/gvt/page_track.c +++ b/drivers/gpu/drm/i915/gvt/page_track.c @@ -157,11 +157,10 @@ int intel_vgpu_disable_page_track(struct intel_vgpu *vgpu, unsigned long gfn) int intel_vgpu_page_track_handler(struct intel_vgpu *vgpu, u64 gpa, void *data, unsigned int bytes) { - struct intel_gvt *gvt = vgpu->gvt; struct intel_vgpu_page_track *page_track; int ret = 0; - mutex_lock(&gvt->lock); + mutex_lock(&vgpu->vgpu_lock); page_track = intel_vgpu_find_page_track(vgpu, gpa >> PAGE_SHIFT); if (!page_track) { @@ -179,6 +178,6 @@ int intel_vgpu_page_track_handler(struct intel_vgpu *vgpu, u64 gpa, } out: - mutex_unlock(&gvt->lock); + mutex_unlock(&vgpu->vgpu_lock); return ret; } diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index c2d183b91500..c0848dcf79c7 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -680,6 +680,7 @@ static int dispatch_workload(struct intel_vgpu_workload *workload) gvt_dbg_sched("ring id %d prepare to dispatch workload %p\n", ring_id, workload); + mutex_lock(&vgpu->vgpu_lock); mutex_lock(&dev_priv->drm.struct_mutex); ret = intel_gvt_scan_and_shadow_workload(workload); @@ -704,6 +705,7 @@ out: } mutex_unlock(&dev_priv->drm.struct_mutex); + mutex_unlock(&vgpu->vgpu_lock); return ret; } @@ -861,14 +863,14 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id) int event; mutex_lock(&gvt->lock); + mutex_lock(&vgpu->vgpu_lock); /* For the workload w/ request, needs to wait for the context * switch to make sure request is completed. * For the workload w/o request, directly complete the workload. */ if (workload->req) { - struct drm_i915_private *dev_priv = - workload->vgpu->gvt->dev_priv; + struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; struct intel_engine_cs *engine = dev_priv->engine[workload->ring_id]; wait_event(workload->shadow_ctx_status_wq, @@ -939,6 +941,7 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id) if (gvt->scheduler.need_reschedule) intel_gvt_request_service(gvt, INTEL_GVT_REQUEST_EVENT_SCHED); + mutex_unlock(&vgpu->vgpu_lock); mutex_unlock(&gvt->lock); } @@ -991,9 +994,7 @@ static int workload_thread(void *priv) intel_uncore_forcewake_get(gvt->dev_priv, FORCEWAKE_ALL); - mutex_lock(&gvt->lock); ret = dispatch_workload(workload); - mutex_unlock(&gvt->lock); if (ret) { vgpu = workload->vgpu; diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c index bf75300c1ec1..889d10f8ee96 100644 --- a/drivers/gpu/drm/i915/gvt/vgpu.c +++ b/drivers/gpu/drm/i915/gvt/vgpu.c @@ -226,22 +226,20 @@ void intel_gvt_activate_vgpu(struct intel_vgpu *vgpu) */ void intel_gvt_deactivate_vgpu(struct intel_vgpu *vgpu) { - struct intel_gvt *gvt = vgpu->gvt; - - mutex_lock(&gvt->lock); + mutex_lock(&vgpu->vgpu_lock); vgpu->active = false; if (atomic_read(&vgpu->submission.running_workload_num)) { - mutex_unlock(&gvt->lock); + mutex_unlock(&vgpu->vgpu_lock); intel_gvt_wait_vgpu_idle(vgpu); - mutex_lock(&gvt->lock); + mutex_lock(&vgpu->vgpu_lock); } intel_vgpu_stop_schedule(vgpu); intel_vgpu_dmabuf_cleanup(vgpu); - mutex_unlock(&gvt->lock); + mutex_unlock(&vgpu->vgpu_lock); } /** @@ -255,14 +253,11 @@ void intel_gvt_destroy_vgpu(struct intel_vgpu *vgpu) { struct intel_gvt *gvt = vgpu->gvt; - mutex_lock(&gvt->lock); + mutex_lock(&vgpu->vgpu_lock); WARN(vgpu->active, "vGPU is still active!\n"); intel_gvt_debugfs_remove_vgpu(vgpu); - idr_remove(&gvt->vgpu_idr, vgpu->id); - if (idr_is_empty(&gvt->vgpu_idr)) - intel_gvt_clean_irq(gvt); intel_vgpu_clean_sched_policy(vgpu); intel_vgpu_clean_submission(vgpu); intel_vgpu_clean_display(vgpu); @@ -272,10 +267,16 @@ void intel_gvt_destroy_vgpu(struct intel_vgpu *vgpu) intel_vgpu_free_resource(vgpu); intel_vgpu_clean_mmio(vgpu); intel_vgpu_dmabuf_cleanup(vgpu); - vfree(vgpu); + mutex_unlock(&vgpu->vgpu_lock); + mutex_lock(&gvt->lock); + idr_remove(&gvt->vgpu_idr, vgpu->id); + if (idr_is_empty(&gvt->vgpu_idr)) + intel_gvt_clean_irq(gvt); intel_gvt_update_vgpu_types(gvt); mutex_unlock(&gvt->lock); + + vfree(vgpu); } #define IDLE_VGPU_IDR 0 @@ -301,6 +302,7 @@ struct intel_vgpu *intel_gvt_create_idle_vgpu(struct intel_gvt *gvt) vgpu->id = IDLE_VGPU_IDR; vgpu->gvt = gvt; + mutex_init(&vgpu->vgpu_lock); for (i = 0; i < I915_NUM_ENGINES; i++) INIT_LIST_HEAD(&vgpu->submission.workload_q_head[i]); @@ -327,7 +329,10 @@ out_free_vgpu: */ void intel_gvt_destroy_idle_vgpu(struct intel_vgpu *vgpu) { + mutex_lock(&vgpu->vgpu_lock); intel_vgpu_clean_sched_policy(vgpu); + mutex_unlock(&vgpu->vgpu_lock); + vfree(vgpu); } @@ -345,8 +350,6 @@ static struct intel_vgpu *__intel_gvt_create_vgpu(struct intel_gvt *gvt, if (!vgpu) return ERR_PTR(-ENOMEM); - mutex_lock(&gvt->lock); - ret = idr_alloc(&gvt->vgpu_idr, vgpu, IDLE_VGPU_IDR + 1, GVT_MAX_VGPU, GFP_KERNEL); if (ret < 0) @@ -356,6 +359,7 @@ static struct intel_vgpu *__intel_gvt_create_vgpu(struct intel_gvt *gvt, vgpu->handle = param->handle; vgpu->gvt = gvt; vgpu->sched_ctl.weight = param->weight; + mutex_init(&vgpu->vgpu_lock); INIT_LIST_HEAD(&vgpu->dmabuf_obj_list_head); INIT_RADIX_TREE(&vgpu->page_track_tree, GFP_KERNEL); idr_init(&vgpu->object_idr); @@ -403,8 +407,6 @@ static struct intel_vgpu *__intel_gvt_create_vgpu(struct intel_gvt *gvt, if (ret) goto out_clean_sched_policy; - mutex_unlock(&gvt->lock); - return vgpu; out_clean_sched_policy: @@ -427,7 +429,6 @@ out_clean_idr: idr_remove(&gvt->vgpu_idr, vgpu->id); out_free_vgpu: vfree(vgpu); - mutex_unlock(&gvt->lock); return ERR_PTR(ret); } @@ -459,12 +460,12 @@ struct intel_vgpu *intel_gvt_create_vgpu(struct intel_gvt *gvt, param.low_gm_sz = BYTES_TO_MB(param.low_gm_sz); param.high_gm_sz = BYTES_TO_MB(param.high_gm_sz); + mutex_lock(&gvt->lock); vgpu = __intel_gvt_create_vgpu(gvt, ¶m); - if (IS_ERR(vgpu)) - return vgpu; - - /* calculate left instance change for types */ - intel_gvt_update_vgpu_types(gvt); + if (!IS_ERR(vgpu)) + /* calculate left instance change for types */ + intel_gvt_update_vgpu_types(gvt); + mutex_unlock(&gvt->lock); return vgpu; } @@ -476,7 +477,7 @@ struct intel_vgpu *intel_gvt_create_vgpu(struct intel_gvt *gvt, * @engine_mask: engines to reset for GT reset * * This function is called when user wants to reset a virtual GPU through - * device model reset or GT reset. The caller should hold the gvt lock. + * device model reset or GT reset. The caller should hold the vgpu lock. * * vGPU Device Model Level Reset (DMLR) simulates the PCI level reset to reset * the whole vGPU to default state as when it is created. This vGPU function @@ -516,9 +517,9 @@ void intel_gvt_reset_vgpu_locked(struct intel_vgpu *vgpu, bool dmlr, * scheduler when the reset is triggered by current vgpu. */ if (scheduler->current_vgpu == NULL) { - mutex_unlock(&gvt->lock); + mutex_unlock(&vgpu->vgpu_lock); intel_gvt_wait_vgpu_idle(vgpu); - mutex_lock(&gvt->lock); + mutex_lock(&vgpu->vgpu_lock); } intel_vgpu_reset_submission(vgpu, resetting_eng); @@ -558,7 +559,7 @@ void intel_gvt_reset_vgpu_locked(struct intel_vgpu *vgpu, bool dmlr, */ void intel_gvt_reset_vgpu(struct intel_vgpu *vgpu) { - mutex_lock(&vgpu->gvt->lock); + mutex_lock(&vgpu->vgpu_lock); intel_gvt_reset_vgpu_locked(vgpu, true, 0); - mutex_unlock(&vgpu->gvt->lock); + mutex_unlock(&vgpu->vgpu_lock); } -- 2.30.2