drm/amdgpu: move get_user_pages out of amdgpu_ttm_tt_pin_userptr v6
authorChristian König <christian.koenig@amd.com>
Tue, 23 Feb 2016 11:36:59 +0000 (12:36 +0100)
committerAlex Deucher <alexander.deucher@amd.com>
Tue, 8 Mar 2016 16:01:50 +0000 (11:01 -0500)
That avoids lock inversion between the BO reservation lock
and the anon_vma lock.

v2:
* Changed amdgpu_bo_list_entry.user_pages to an array of pointers
* Lock mmap_sem only for get_user_pages
* Added invalidation of unbound userpointer BOs
* Fixed memory leak and page reference leak

v3 (chk):
* Revert locking mmap_sem only for_get user_pages
* Revert adding invalidation of unbound userpointer BOs
* Sanitize and fix error handling

v4 (chk):
* Init userpages pointer everywhere.
* Fix error handling when get_user_pages() fails.
* Add invalidation of unbound userpointer BOs again.

v5 (chk):
* Add maximum number of tries.

v6 (chk):
* Fix error handling when we run out of tries.

Signed-off-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com> (v4)
Acked-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu.h
drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

index e0727de9c2b27f690b54397f0d8b8b6ae49abfc8..e32ab13227d488285e2ffdda6fa546cabc595782 100644 (file)
@@ -471,6 +471,8 @@ struct amdgpu_bo_list_entry {
        struct ttm_validate_buffer      tv;
        struct amdgpu_bo_va             *bo_va;
        uint32_t                        priority;
+       struct page                     **user_pages;
+       int                             user_invalidated;
 };
 
 struct amdgpu_bo_va_mapping {
@@ -2365,11 +2367,14 @@ int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type,
                       struct amdgpu_ring **out_ring);
 void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *rbo, u32 domain);
 bool amdgpu_ttm_bo_is_amdgpu_bo(struct ttm_buffer_object *bo);
+int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages);
 int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
                                     uint32_t flags);
 struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm);
 bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
                                  unsigned long end);
+bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
+                                      int *last_invalidated);
 bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm);
 uint32_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
                                 struct ttm_mem_reg *mem);
index 9763e52886fe1ceeb9dd3dbe1d147a94b7644d2b..eacd810fc09b12d43607e0f00e77acdd87a5b768 100644 (file)
@@ -200,6 +200,7 @@ void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list,
 
                list_add_tail(&list->array[i].tv.head,
                              &bucket[priority]);
+               list->array[i].user_pages = NULL;
        }
 
        /* Connect the sorted buckets in the output list. */
index 7833dfb1ff6e7457e3035e6c0547880bb09eebd1..4f5ef4149e8773a52f4dee030d975295336440a4 100644 (file)
@@ -111,6 +111,7 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
        p->uf_entry.priority = 0;
        p->uf_entry.tv.bo = &p->uf_entry.robj->tbo;
        p->uf_entry.tv.shared = true;
+       p->uf_entry.user_pages = NULL;
 
        drm_gem_object_unreference_unlocked(gobj);
        return 0;
@@ -297,6 +298,7 @@ int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
 
        list_for_each_entry(lobj, validated, tv.head) {
                struct amdgpu_bo *bo = lobj->robj;
+               bool binding_userptr = false;
                struct mm_struct *usermm;
                uint32_t domain;
 
@@ -304,6 +306,15 @@ int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
                if (usermm && usermm != current->mm)
                        return -EPERM;
 
+               /* Check if we have user pages and nobody bound the BO already */
+               if (lobj->user_pages && bo->tbo.ttm->state != tt_bound) {
+                       size_t size = sizeof(struct page *);
+
+                       size *= bo->tbo.ttm->num_pages;
+                       memcpy(bo->tbo.ttm->pages, lobj->user_pages, size);
+                       binding_userptr = true;
+               }
+
                if (bo->pin_count)
                        continue;
 
@@ -334,6 +345,11 @@ int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
                        }
                        return r;
                }
+
+               if (binding_userptr) {
+                       drm_free_large(lobj->user_pages);
+                       lobj->user_pages = NULL;
+               }
        }
        return 0;
 }
@@ -342,8 +358,10 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
                                union drm_amdgpu_cs *cs)
 {
        struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
+       struct amdgpu_bo_list_entry *e;
        struct list_head duplicates;
        bool need_mmap_lock = false;
+       unsigned i, tries = 10;
        int r;
 
        INIT_LIST_HEAD(&p->validated);
@@ -364,9 +382,81 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
        if (need_mmap_lock)
                down_read(&current->mm->mmap_sem);
 
-       r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true, &duplicates);
-       if (unlikely(r != 0))
-               goto error_reserve;
+       while (1) {
+               struct list_head need_pages;
+               unsigned i;
+
+               r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true,
+                                          &duplicates);
+               if (unlikely(r != 0))
+                       goto error_free_pages;
+
+               /* Without a BO list we don't have userptr BOs */
+               if (!p->bo_list)
+                       break;
+
+               INIT_LIST_HEAD(&need_pages);
+               for (i = p->bo_list->first_userptr;
+                    i < p->bo_list->num_entries; ++i) {
+
+                       e = &p->bo_list->array[i];
+
+                       if (amdgpu_ttm_tt_userptr_invalidated(e->robj->tbo.ttm,
+                                &e->user_invalidated) && e->user_pages) {
+
+                               /* We acquired a page array, but somebody
+                                * invalidated it. Free it an try again
+                                */
+                               release_pages(e->user_pages,
+                                             e->robj->tbo.ttm->num_pages,
+                                             false);
+                               drm_free_large(e->user_pages);
+                               e->user_pages = NULL;
+                       }
+
+                       if (e->robj->tbo.ttm->state != tt_bound &&
+                           !e->user_pages) {
+                               list_del(&e->tv.head);
+                               list_add(&e->tv.head, &need_pages);
+
+                               amdgpu_bo_unreserve(e->robj);
+                       }
+               }
+
+               if (list_empty(&need_pages))
+                       break;
+
+               /* Unreserve everything again. */
+               ttm_eu_backoff_reservation(&p->ticket, &p->validated);
+
+               /* We tried to often, just abort */
+               if (!--tries) {
+                       r = -EDEADLK;
+                       goto error_free_pages;
+               }
+
+               /* Fill the page arrays for all useptrs. */
+               list_for_each_entry(e, &need_pages, tv.head) {
+                       struct ttm_tt *ttm = e->robj->tbo.ttm;
+
+                       e->user_pages = drm_calloc_large(ttm->num_pages,
+                                                        sizeof(struct page*));
+                       if (!e->user_pages) {
+                               r = -ENOMEM;
+                               goto error_free_pages;
+                       }
+
+                       r = amdgpu_ttm_tt_get_user_pages(ttm, e->user_pages);
+                       if (r) {
+                               drm_free_large(e->user_pages);
+                               e->user_pages = NULL;
+                               goto error_free_pages;
+                       }
+               }
+
+               /* And try again. */
+               list_splice(&need_pages, &p->validated);
+       }
 
        amdgpu_vm_get_pt_bos(&fpriv->vm, &duplicates);
 
@@ -398,10 +488,26 @@ error_validate:
                ttm_eu_backoff_reservation(&p->ticket, &p->validated);
        }
 
-error_reserve:
+error_free_pages:
+
        if (need_mmap_lock)
                up_read(&current->mm->mmap_sem);
 
+       if (p->bo_list) {
+               for (i = p->bo_list->first_userptr;
+                    i < p->bo_list->num_entries; ++i) {
+                       e = &p->bo_list->array[i];
+
+                       if (!e->user_pages)
+                               continue;
+
+                       release_pages(e->user_pages,
+                                     e->robj->tbo.ttm->num_pages,
+                                     false);
+                       drm_free_large(e->user_pages);
+               }
+       }
+
        return r;
 }
 
index 2e26a517f2d6301e295f1a8f736f25072011b8c2..cb27754ec07fe375cf3c6c06d01f4a6caf20d516 100644 (file)
@@ -274,18 +274,23 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
 
        if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE) {
                down_read(&current->mm->mmap_sem);
+
+               r = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm,
+                                                bo->tbo.ttm->pages);
+               if (r)
+                       goto unlock_mmap_sem;
+
                r = amdgpu_bo_reserve(bo, true);
-               if (r) {
-                       up_read(&current->mm->mmap_sem);
-                       goto release_object;
-               }
+               if (r)
+                       goto free_pages;
 
                amdgpu_ttm_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
                r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
                amdgpu_bo_unreserve(bo);
-               up_read(&current->mm->mmap_sem);
                if (r)
-                       goto release_object;
+                       goto free_pages;
+
+               up_read(&current->mm->mmap_sem);
        }
 
        r = drm_gem_handle_create(filp, gobj, &handle);
@@ -297,6 +302,12 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
        args->handle = handle;
        return 0;
 
+free_pages:
+       release_pages(bo->tbo.ttm->pages, bo->tbo.ttm->num_pages, false);
+
+unlock_mmap_sem:
+       up_read(&current->mm->mmap_sem);
+
 release_object:
        drm_gem_object_unreference_unlocked(gobj);
 
index 53e6279707989a1b9c3224745e4b5693da73b896..ed0d2e7cc57daac9cf88153de73f9c65099baf01 100644 (file)
@@ -508,22 +508,18 @@ struct amdgpu_ttm_tt {
        uint32_t                userflags;
        spinlock_t              guptasklock;
        struct list_head        guptasks;
+       atomic_t                mmu_invalidations;
 };
 
-/* prepare the sg table with the user pages */
-static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm)
+int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
 {
-       struct amdgpu_device *adev = amdgpu_get_adev(ttm->bdev);
        struct amdgpu_ttm_tt *gtt = (void *)ttm;
-       unsigned pinned = 0, nents;
-       int r;
-
        int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
-       enum dma_data_direction direction = write ?
-               DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
+       unsigned pinned = 0;
+       int r;
 
        if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) {
-               /* check that we only pin down anonymous memory
+               /* check that we only use anonymous memory
                   to prevent problems with writeback */
                unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;
                struct vm_area_struct *vma;
@@ -536,7 +532,7 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm)
        do {
                unsigned num_pages = ttm->num_pages - pinned;
                uint64_t userptr = gtt->userptr + pinned * PAGE_SIZE;
-               struct page **pages = ttm->pages + pinned;
+               struct page **p = pages + pinned;
                struct amdgpu_ttm_gup_task_list guptask;
 
                guptask.task = current;
@@ -545,7 +541,7 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm)
                spin_unlock(&gtt->guptasklock);
 
                r = get_user_pages(current, current->mm, userptr, num_pages,
-                                  write, 0, pages, NULL);
+                                  write, 0, p, NULL);
 
                spin_lock(&gtt->guptasklock);
                list_del(&guptask.list);
@@ -558,6 +554,25 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm)
 
        } while (pinned < ttm->num_pages);
 
+       return 0;
+
+release_pages:
+       release_pages(pages, pinned, 0);
+       return r;
+}
+
+/* prepare the sg table with the user pages */
+static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm)
+{
+       struct amdgpu_device *adev = amdgpu_get_adev(ttm->bdev);
+       struct amdgpu_ttm_tt *gtt = (void *)ttm;
+       unsigned nents;
+       int r;
+
+       int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
+       enum dma_data_direction direction = write ?
+               DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
+
        r = sg_alloc_table_from_pages(ttm->sg, ttm->pages, ttm->num_pages, 0,
                                      ttm->num_pages << PAGE_SHIFT,
                                      GFP_KERNEL);
@@ -576,9 +591,6 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm)
 
 release_sg:
        kfree(ttm->sg);
-
-release_pages:
-       release_pages(ttm->pages, pinned, 0);
        return r;
 }
 
@@ -803,6 +815,7 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
        gtt->userflags = flags;
        spin_lock_init(&gtt->guptasklock);
        INIT_LIST_HEAD(&gtt->guptasks);
+       atomic_set(&gtt->mmu_invalidations, 0);
 
        return 0;
 }
@@ -840,9 +853,21 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
        }
        spin_unlock(&gtt->guptasklock);
 
+       atomic_inc(&gtt->mmu_invalidations);
+
        return true;
 }
 
+bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
+                                      int *last_invalidated)
+{
+       struct amdgpu_ttm_tt *gtt = (void *)ttm;
+       int prev_invalidated = *last_invalidated;
+
+       *last_invalidated = atomic_read(&gtt->mmu_invalidations);
+       return prev_invalidated != *last_invalidated;
+}
+
 bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm)
 {
        struct amdgpu_ttm_tt *gtt = (void *)ttm;
index a0896c761108112a88e551a9ede410791eb4a3f8..cc967b44f1048a8b3c4279b2c7329125fd1bba89 100644 (file)
@@ -95,6 +95,7 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
        entry->priority = 0;
        entry->tv.bo = &vm->page_directory->tbo;
        entry->tv.shared = true;
+       entry->user_pages = NULL;
        list_add(&entry->tv.head, validated);
 }
 
@@ -1186,6 +1187,7 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
                entry->priority = 0;
                entry->tv.bo = &entry->robj->tbo;
                entry->tv.shared = true;
+               entry->user_pages = NULL;
                vm->page_tables[pt_idx].addr = 0;
        }