drm/i915: Fix locking around GuC firmware load
authorDaniel Stone <daniels@collabora.com>
Tue, 3 Nov 2015 21:42:31 +0000 (21:42 +0000)
committerDave Airlie <airlied@redhat.com>
Wed, 4 Nov 2015 03:23:15 +0000 (13:23 +1000)
The GuC firmware load requires struct_mutex to create a GEM object,
but this collides badly with request_firmware. Move struct_mutex
locking down into the loader itself, so we don't hold it across the
entire load process, including request_firmware.

[   20.451400] ======================================================
[   20.451420] [ INFO: possible circular locking dependency detected ]
[   20.451441] 4.3.0-rc5+ #1 Tainted: G        W
[   20.451457] -------------------------------------------------------
[   20.451477] plymouthd/371 is trying to acquire lock:
[   20.451494]  (&dev->struct_mutex){+.+.+.}, at: [<ffffffffa0093c62>]
drm_gem_mmap+0x112/0x290 [drm]
[   20.451538]
               but task is already holding lock:
[   20.451557]  (&mm->mmap_sem){++++++}, at: [<ffffffff811fd9ac>]
vm_mmap_pgoff+0x8c/0xf0
[   20.451591]
               which lock already depends on the new lock.

[   20.451617]
               the existing dependency chain (in reverse order) is:
[   20.451640]
               -> #3 (&mm->mmap_sem){++++++}:
[   20.451661]        [<ffffffff8110644e>] lock_acquire+0xce/0x1c0
[   20.451683]        [<ffffffff8120ec9a>] __might_fault+0x7a/0xa0
[   20.451705]        [<ffffffff8127e34e>] filldir+0x9e/0x130
[   20.451726]        [<ffffffff81295b86>] dcache_readdir+0x186/0x230
[   20.451748]        [<ffffffff8127e117>] iterate_dir+0x97/0x130
[   20.451769]        [<ffffffff8127e66a>] SyS_getdents+0x9a/0x130
[   20.451790]        [<ffffffff8184f2f2>] entry_SYSCALL_64_fastpath+0x12/0x76
[   20.451829]
               -> #2 (&sb->s_type->i_mutex_key#2){+.+.+.}:
[   20.451852]        [<ffffffff8110644e>] lock_acquire+0xce/0x1c0
[   20.451872]        [<ffffffff8184b516>] mutex_lock_nested+0x86/0x400
[   20.451893]        [<ffffffff81277790>] walk_component+0x1d0/0x2a0
[   20.451914]        [<ffffffff812779f0>] link_path_walk+0x190/0x5a0
[   20.451935]        [<ffffffff8127803b>] path_openat+0xab/0x1260
[   20.451955]        [<ffffffff8127a651>] do_filp_open+0x91/0x100
[   20.451975]        [<ffffffff81267e67>] file_open_name+0xf7/0x150
[   20.451995]        [<ffffffff81267ef3>] filp_open+0x33/0x60
[   20.452014]        [<ffffffff8157e1e7>] _request_firmware+0x277/0x880
[   20.452038]        [<ffffffff8157e9e4>] request_firmware_work_func+0x34/0x80
[   20.452060]        [<ffffffff810c7020>] process_one_work+0x230/0x680
[   20.452082]        [<ffffffff810c74be>] worker_thread+0x4e/0x450
[   20.452102]        [<ffffffff810ce511>] kthread+0x101/0x120
[   20.452121]        [<ffffffff8184f66f>] ret_from_fork+0x3f/0x70
[   20.452140]
               -> #1 (umhelper_sem){++++.+}:
[   20.452159]        [<ffffffff8110644e>] lock_acquire+0xce/0x1c0
[   20.452178]        [<ffffffff8184c5c1>] down_read+0x51/0xa0
[   20.452197]        [<ffffffff810c203b>]
usermodehelper_read_trylock+0x5b/0x130
[   20.452221]        [<ffffffff8157e147>] _request_firmware+0x1d7/0x880
[   20.452242]        [<ffffffff8157e821>] request_firmware+0x31/0x50
[   20.452262]        [<ffffffffa01b54a4>]
intel_guc_ucode_init+0xf4/0x400 [i915]
[   20.452305]        [<ffffffffa0213913>] i915_driver_load+0xd63/0x16e0 [i915]
[   20.452343]        [<ffffffffa00987d9>] drm_dev_register+0xa9/0xc0 [drm]
[   20.452369]        [<ffffffffa009ae3d>] drm_get_pci_dev+0x8d/0x1e0 [drm]
[   20.452396]        [<ffffffffa01521e4>] i915_pci_probe+0x34/0x50 [i915]
[   20.452421]        [<ffffffff81464675>] local_pci_probe+0x45/0xa0
[   20.452443]        [<ffffffff81465a6d>] pci_device_probe+0xfd/0x140
[   20.452464]        [<ffffffff8156a2e4>] driver_probe_device+0x224/0x480
[   20.452486]        [<ffffffff8156a5c8>] __driver_attach+0x88/0x90
[   20.452505]        [<ffffffff81567cf3>] bus_for_each_dev+0x73/0xc0
[   20.452526]        [<ffffffff81569a7e>] driver_attach+0x1e/0x20
[   20.452546]        [<ffffffff815695ae>] bus_add_driver+0x1ee/0x280
[   20.452566]        [<ffffffff8156b100>] driver_register+0x60/0xe0
[   20.453197]        [<ffffffff81464050>] __pci_register_driver+0x60/0x70
[   20.453845]        [<ffffffffa009b070>] drm_pci_init+0xe0/0x110 [drm]
[   20.454497]        [<ffffffffa027f092>] 0xffffffffa027f092
[   20.455156]        [<ffffffff81002123>] do_one_initcall+0xb3/0x200
[   20.455796]        [<ffffffff811d8c01>] do_init_module+0x5f/0x1e7
[   20.456434]        [<ffffffff8114c4e6>] load_module+0x2126/0x27d0
[   20.457071]        [<ffffffff8114cdf9>] SyS_finit_module+0xb9/0xf0
[   20.457738]        [<ffffffff8184f2f2>] entry_SYSCALL_64_fastpath+0x12/0x76
[   20.458370]
               -> #0 (&dev->struct_mutex){+.+.+.}:
[   20.459773]        [<ffffffff8110584f>] __lock_acquire+0x191f/0x1ba0
[   20.460451]        [<ffffffff8110644e>] lock_acquire+0xce/0x1c0
[   20.461074]        [<ffffffffa0093c88>] drm_gem_mmap+0x138/0x290 [drm]
[   20.461693]        [<ffffffff8121a5ec>] mmap_region+0x3ec/0x670
[   20.462298]        [<ffffffff8121abb2>] do_mmap+0x342/0x420
[   20.462901]        [<ffffffff811fd9d2>] vm_mmap_pgoff+0xb2/0xf0
[   20.463532]        [<ffffffff81218f62>] SyS_mmap_pgoff+0x1f2/0x290
[   20.464118]        [<ffffffff8102187b>] SyS_mmap+0x1b/0x30
[   20.464702]        [<ffffffff8184f2f2>] entry_SYSCALL_64_fastpath+0x12/0x76
[   20.465289]
               other info that might help us debug this:

[   20.467179] Chain exists of:
                 &dev->struct_mutex --> &sb->s_type->i_mutex_key#2 -->
&mm->mmap_sem

[   20.468928]  Possible unsafe locking scenario:

[   20.470161]        CPU0                    CPU1
[   20.470745]        ----                    ----
[   20.471325]   lock(&mm->mmap_sem);
[   20.471902]                                lock(&sb->s_type->i_mutex_key#2);
[   20.472538]                                lock(&mm->mmap_sem);
[   20.473118]   lock(&dev->struct_mutex);
[   20.473704]
                *** DEADLOCK ***

Signed-off-by: Daniel Stone <daniels@collabora.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
drivers/gpu/drm/i915/i915_dma.c
drivers/gpu/drm/i915/intel_guc_loader.c

index 2336af92d94bb5c5d3d101227cce4b8041feffb7..b4741d121a744f67b95f1d4af95fd0518e1ddf4f 100644 (file)
@@ -406,10 +406,7 @@ static int i915_load_modeset_init(struct drm_device *dev)
         * working irqs for e.g. gmbus and dp aux transfers. */
        intel_modeset_init(dev);
 
-       /* intel_guc_ucode_init() needs the mutex to allocate GEM objects */
-       mutex_lock(&dev->struct_mutex);
        intel_guc_ucode_init(dev);
-       mutex_unlock(&dev->struct_mutex);
 
        ret = i915_gem_init(dev);
        if (ret)
@@ -452,9 +449,7 @@ cleanup_gem:
        i915_gem_context_fini(dev);
        mutex_unlock(&dev->struct_mutex);
 cleanup_irq:
-       mutex_lock(&dev->struct_mutex);
        intel_guc_ucode_fini(dev);
-       mutex_unlock(&dev->struct_mutex);
        drm_irq_uninstall(dev);
 cleanup_gem_stolen:
        i915_gem_cleanup_stolen(dev);
@@ -1193,8 +1188,8 @@ int i915_driver_unload(struct drm_device *dev)
        /* Flush any outstanding unpin_work. */
        flush_workqueue(dev_priv->wq);
 
-       mutex_lock(&dev->struct_mutex);
        intel_guc_ucode_fini(dev);
+       mutex_lock(&dev->struct_mutex);
        i915_gem_cleanup_ringbuffer(dev);
        i915_gem_context_fini(dev);
        mutex_unlock(&dev->struct_mutex);
index a17b6a56be83283c990a6f40516c71c8c9e6a61f..3541f76c65a7ab8ce1559bd34d14acb04ea47858 100644 (file)
@@ -504,7 +504,9 @@ static void guc_fw_fetch(struct drm_device *dev, struct intel_guc_fw *guc_fw)
                        guc_fw->guc_fw_major_found, guc_fw->guc_fw_minor_found,
                        guc_fw->guc_fw_major_wanted, guc_fw->guc_fw_minor_wanted);
 
+       mutex_lock(&dev->struct_mutex);
        obj = i915_gem_object_create_from_data(dev, fw->data, fw->size);
+       mutex_unlock(&dev->struct_mutex);
        if (IS_ERR_OR_NULL(obj)) {
                err = obj ? PTR_ERR(obj) : -ENOMEM;
                goto fail;
@@ -540,8 +542,6 @@ fail:
  * @dev:       drm device
  *
  * Called early during driver load, but after GEM is initialised.
- * The device struct_mutex must be held by the caller, as we're
- * going to allocate a GEM object to hold the firmware image.
  *
  * The firmware will be transferred to the GuC's memory later,
  * when intel_guc_ucode_load() is called.
@@ -598,9 +598,11 @@ void intel_guc_ucode_fini(struct drm_device *dev)
        direct_interrupts_to_host(dev_priv);
        i915_guc_submission_fini(dev);
 
+       mutex_lock(&dev->struct_mutex);
        if (guc_fw->guc_fw_obj)
                drm_gem_object_unreference(&guc_fw->guc_fw_obj->base);
        guc_fw->guc_fw_obj = NULL;
+       mutex_unlock(&dev->struct_mutex);
 
        guc_fw->guc_fw_fetch_status = GUC_FIRMWARE_NONE;
 }