gpu: ion: Fix performance issue in faulting code
authorRebecca Schultz Zavin <rebecca@android.com>
Fri, 13 Dec 2013 22:24:45 +0000 (14:24 -0800)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 14 Dec 2013 16:57:17 +0000 (08:57 -0800)
Previously the code to fault ion buffers in one page at a time had a
performance problem caused by the requirement to traverse the sg list
looking for the right page to load in (a result of the fact that the items in
the list may not be of uniform size).  To fix the problem, for buffers
that will be faulted in, also keep a flat array of all the pages in the buffer
to use from the fault handler.  To recover some of the additional memory
footprint this creates per buffer, dirty bits used to indicate which
pages have been faulted in to the cpu are now stored in the low bit of each
page struct pointer in the page array.

Signed-off-by: Rebecca Schultz Zavin <rebecca@android.com>
[jstultz: modified patch to apply to staging directory]
Signed-off-by: John Stultz <john.stultz@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
drivers/staging/android/ion/ion.c
drivers/staging/android/ion/ion_cma_heap.c
drivers/staging/android/ion/ion_heap.c
drivers/staging/android/ion/ion_priv.h
drivers/staging/android/ion/ion_system_heap.c

index ddf8fde9ae938a202df44a28f1a33b5a515a0692..38d65c2b296aef6e9e762d22b3ed04026111f1b9 100644 (file)
@@ -31,6 +31,7 @@
 #include <linux/slab.h>
 #include <linux/seq_file.h>
 #include <linux/uaccess.h>
+#include <linux/vmalloc.h>
 #include <linux/debugfs.h>
 #include <linux/dma-buf.h>
 
@@ -104,13 +105,33 @@ struct ion_handle {
 
 bool ion_buffer_fault_user_mappings(struct ion_buffer *buffer)
 {
-        return ((buffer->flags & ION_FLAG_CACHED) &&
-                !(buffer->flags & ION_FLAG_CACHED_NEEDS_SYNC));
+       return ((buffer->flags & ION_FLAG_CACHED) &&
+               !(buffer->flags & ION_FLAG_CACHED_NEEDS_SYNC));
 }
 
 bool ion_buffer_cached(struct ion_buffer *buffer)
 {
-        return !!(buffer->flags & ION_FLAG_CACHED);
+       return !!(buffer->flags & ION_FLAG_CACHED);
+}
+
+static inline struct page *ion_buffer_page(struct page *page)
+{
+       return (struct page *)((unsigned long)page & ~(1UL));
+}
+
+static inline bool ion_buffer_page_is_dirty(struct page *page)
+{
+       return !!((unsigned long)page & 1UL);
+}
+
+static inline void ion_buffer_page_dirty(struct page **page)
+{
+       *page = (struct page *)((unsigned long)(*page) | 1UL);
+}
+
+static inline void ion_buffer_page_clean(struct page **page)
+{
+       *page = (struct page *)((unsigned long)(*page) & ~(1UL));
 }
 
 /* this function should only be called while dev->lock is held */
@@ -139,8 +160,6 @@ static void ion_buffer_add(struct ion_device *dev,
        rb_insert_color(&buffer->node, &dev->buffers);
 }
 
-static int ion_buffer_alloc_dirty(struct ion_buffer *buffer);
-
 /* this function should only be called while dev->lock is held */
 static struct ion_buffer *ion_buffer_create(struct ion_heap *heap,
                                     struct ion_device *dev,
@@ -185,17 +204,23 @@ static struct ion_buffer *ion_buffer_create(struct ion_heap *heap,
        }
        buffer->sg_table = table;
        if (ion_buffer_fault_user_mappings(buffer)) {
-               for_each_sg(buffer->sg_table->sgl, sg, buffer->sg_table->nents,
-                           i) {
-                       if (sg_dma_len(sg) == PAGE_SIZE)
-                               continue;
-                       pr_err("%s: cached mappings that will be faulted in "
-                              "must have pagewise sg_lists\n", __func__);
-                       ret = -EINVAL;
-                       goto err;
+               int num_pages = PAGE_ALIGN(buffer->size) / PAGE_SIZE;
+               struct scatterlist *sg;
+               int i, j, k = 0;
+
+               buffer->pages = vmalloc(sizeof(struct page *) * num_pages);
+               if (!buffer->pages) {
+                       ret = -ENOMEM;
+                       goto err1;
+               }
+
+               for_each_sg(table->sgl, sg, table->nents, i) {
+                       struct page *page = sg_page(sg);
+
+                       for (j = 0; j < sg_dma_len(sg) / PAGE_SIZE; j++)
+                               buffer->pages[k++] = page++;
                }
 
-               ret = ion_buffer_alloc_dirty(buffer);
                if (ret)
                        goto err;
        }
@@ -222,6 +247,9 @@ static struct ion_buffer *ion_buffer_create(struct ion_heap *heap,
 err:
        heap->ops->unmap_dma(heap, buffer);
        heap->ops->free(buffer);
+err1:
+       if (buffer->pages)
+               vfree(buffer->pages);
 err2:
        kfree(buffer);
        return ERR_PTR(ret);
@@ -233,8 +261,8 @@ void ion_buffer_destroy(struct ion_buffer *buffer)
                buffer->heap->ops->unmap_kernel(buffer->heap, buffer);
        buffer->heap->ops->unmap_dma(buffer->heap, buffer);
        buffer->heap->ops->free(buffer);
-       if (buffer->flags & ION_FLAG_CACHED)
-               kfree(buffer->dirty);
+       if (buffer->pages)
+               vfree(buffer->pages);
        kfree(buffer);
 }
 
@@ -764,17 +792,6 @@ static void ion_unmap_dma_buf(struct dma_buf_attachment *attachment,
 {
 }
 
-static int ion_buffer_alloc_dirty(struct ion_buffer *buffer)
-{
-       unsigned long pages = buffer->sg_table->nents;
-       unsigned long length = (pages + BITS_PER_LONG - 1)/BITS_PER_LONG;
-
-       buffer->dirty = kzalloc(length * sizeof(unsigned long), GFP_KERNEL);
-       if (!buffer->dirty)
-               return -ENOMEM;
-       return 0;
-}
-
 struct ion_vma_list {
        struct list_head list;
        struct vm_area_struct *vma;
@@ -784,9 +801,9 @@ static void ion_buffer_sync_for_device(struct ion_buffer *buffer,
                                       struct device *dev,
                                       enum dma_data_direction dir)
 {
-       struct scatterlist *sg;
-       int i;
        struct ion_vma_list *vma_list;
+       int pages = PAGE_ALIGN(buffer->size) / PAGE_SIZE;
+       int i;
 
        pr_debug("%s: syncing for device %s\n", __func__,
                 dev ? dev_name(dev) : "null");
@@ -795,11 +812,12 @@ static void ion_buffer_sync_for_device(struct ion_buffer *buffer,
                return;
 
        mutex_lock(&buffer->lock);
-       for_each_sg(buffer->sg_table->sgl, sg, buffer->sg_table->nents, i) {
-               if (!test_bit(i, buffer->dirty))
-                       continue;
-               dma_sync_sg_for_device(dev, sg, 1, dir);
-               clear_bit(i, buffer->dirty);
+       for (i = 0; i < pages; i++) {
+               struct page *page = buffer->pages[i];
+
+               if (ion_buffer_page_is_dirty(page))
+                       __dma_page_cpu_to_dev(page, 0, PAGE_SIZE, dir);
+               ion_buffer_page_clean(buffer->pages + i);
        }
        list_for_each_entry(vma_list, &buffer->vmas, list) {
                struct vm_area_struct *vma = vma_list->vma;
@@ -813,21 +831,18 @@ static void ion_buffer_sync_for_device(struct ion_buffer *buffer,
 int ion_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
        struct ion_buffer *buffer = vma->vm_private_data;
-       struct scatterlist *sg;
-       int i;
+       int ret;
 
        mutex_lock(&buffer->lock);
-       set_bit(vmf->pgoff, buffer->dirty);
+       ion_buffer_page_dirty(buffer->pages + vmf->pgoff);
 
-       for_each_sg(buffer->sg_table->sgl, sg, buffer->sg_table->nents, i) {
-               if (i != vmf->pgoff)
-                       continue;
-               dma_sync_sg_for_cpu(NULL, sg, 1, DMA_BIDIRECTIONAL);
-               vm_insert_page(vma, (unsigned long)vmf->virtual_address,
-                              sg_page(sg));
-               break;
-       }
+       BUG_ON(!buffer->pages || !buffer->pages[vmf->pgoff]);
+       ret = vm_insert_page(vma, (unsigned long)vmf->virtual_address,
+                            ion_buffer_page(buffer->pages[vmf->pgoff]));
        mutex_unlock(&buffer->lock);
+       if (ret)
+               return VM_FAULT_ERROR;
+
        return VM_FAULT_NOPAGE;
 }
 
index 74e3c77f1873ff7f0798fa0068ad73e3bb42fbc6..86b6cf5da824d7d9f5fcb0bc87cff9ed3faca87f 100644 (file)
@@ -58,29 +58,6 @@ int ion_cma_get_sgtable(struct device *dev, struct sg_table *sgt,
        return 0;
 }
 
-/*
- * Create scatter-list for each page of the already allocated DMA buffer.
- */
-int ion_cma_get_sgtable_per_page(struct device *dev, struct sg_table *sgt,
-                       void *cpu_addr, dma_addr_t handle, size_t size)
-{
-       struct page *page = virt_to_page(cpu_addr);
-       int ret, i;
-       struct scatterlist *sg;
-
-       ret = sg_alloc_table(sgt, PAGE_ALIGN(size) / PAGE_SIZE, GFP_KERNEL);
-       if (unlikely(ret))
-               return ret;
-
-       sg = sgt->sgl;
-       for (i = 0; i < (PAGE_ALIGN(size) / PAGE_SIZE); i++) {
-               page = virt_to_page(cpu_addr + (i * PAGE_SIZE));
-               sg_set_page(sg, page, PAGE_SIZE, 0);
-               sg = sg_next(sg);
-       }
-       return 0;
-}
-
 /* ION CMA heap operations functions */
 static int ion_cma_allocate(struct ion_heap *heap, struct ion_buffer *buffer,
                            unsigned long len, unsigned long align,
@@ -111,15 +88,9 @@ static int ion_cma_allocate(struct ion_heap *heap, struct ion_buffer *buffer,
                goto free_mem;
        }
 
-       if (ion_buffer_fault_user_mappings(buffer)) {
-               if (ion_cma_get_sgtable_per_page
-                       (dev, info->table, info->cpu_addr, info->handle, len))
-                       goto free_table;
-       } else {
-               if (ion_cma_get_sgtable
-                       (dev, info->table, info->cpu_addr, info->handle, len))
-                       goto free_table;
-       }
+       if (ion_cma_get_sgtable
+           (dev, info->table, info->cpu_addr, info->handle, len))
+               goto free_table;
        /* keep this for memory release */
        buffer->priv_virt = info;
        dev_dbg(dev, "Allocate buffer %p\n", buffer);
index 3fc1dcc8153ba91452529837b7ce3316720fb38c..cc2a42547605d797df2ba8d7469253708ae2b457 100644 (file)
@@ -134,8 +134,22 @@ end:
        return ret;
 }
 
-void ion_heap_free_page(struct ion_buffer *buffer, struct page *page,
-                      unsigned int order)
+struct page *ion_heap_alloc_pages(struct ion_buffer *buffer, gfp_t gfp_flags,
+                                 unsigned int order)
+{
+       struct page *page = alloc_pages(gfp_flags, order);
+
+       if (!page)
+               return page;
+
+       if (ion_buffer_fault_user_mappings(buffer))
+               split_page(page, order);
+
+       return page;
+}
+
+void ion_heap_free_pages(struct ion_buffer *buffer, struct page *page,
+                        unsigned int order)
 {
        int i;
 
index f5a09b6a4182677706a8390ea15da36889bf1ae1..965471afa47eb7a847faee01e70aa26aeb1eb38e 100644 (file)
@@ -46,9 +46,8 @@ struct ion_buffer *ion_handle_buffer(struct ion_handle *handle);
  * @vaddr:             the kenrel mapping if kmap_cnt is not zero
  * @dmap_cnt:          number of times the buffer is mapped for dma
  * @sg_table:          the sg table for the buffer if dmap_cnt is not zero
- * @dirty:             bitmask representing which pages of this buffer have
- *                     been dirtied by the cpu and need cache maintenance
- *                     before dma
+ * @pages:             flat array of pages in the buffer -- used by fault
+ *                     handler and only valid for buffers that are faulted in
  * @vmas:              list of vma's mapping this buffer
  * @handle_count:      count of handles referencing this buffer
  * @task_comm:         taskcomm of last client to reference this buffer in a
@@ -75,7 +74,7 @@ struct ion_buffer {
        void *vaddr;
        int dmap_cnt;
        struct sg_table *sg_table;
-       unsigned long *dirty;
+       struct page **pages;
        struct list_head vmas;
        /* used to track orphaned buffers */
        int handle_count;
@@ -213,6 +212,19 @@ int ion_heap_map_user(struct ion_heap *, struct ion_buffer *,
                        struct vm_area_struct *);
 int ion_heap_buffer_zero(struct ion_buffer *buffer);
 
+/**
+ * ion_heap_alloc_pages - allocate pages from alloc_pages
+ * @buffer:            the buffer to allocate for, used to extract the flags
+ * @gfp_flags:         the gfp_t for the allocation
+ * @order:             the order of the allocatoin
+ *
+ * This funciton allocations from alloc pages and also does any other
+ * necessary operations based on the buffer->flags.  For buffers which
+ * will be faulted in the pages are split using split_page
+ */
+struct page *ion_heap_alloc_pages(struct ion_buffer *buffer, gfp_t gfp_flags,
+                                 unsigned int order);
+
 /**
  * ion_heap_init_deferred_free -- initialize deferred free functionality
  * @heap:              the heap
index 344f7538400af59366e386f4f28452b839fd4255..b9b10365f450861466abd706104e37052a9546f6 100644 (file)
@@ -64,7 +64,6 @@ static struct page *alloc_buffer_page(struct ion_system_heap *heap,
                                      unsigned long order)
 {
        bool cached = ion_buffer_cached(buffer);
-       bool split_pages = ion_buffer_fault_user_mappings(buffer);
        struct ion_page_pool *pool = heap->pools[order_to_index(order)];
        struct page *page;
 
@@ -75,7 +74,7 @@ static struct page *alloc_buffer_page(struct ion_system_heap *heap,
 
                if (order > 4)
                        gfp_flags = high_order_gfp_flags;
-               page = alloc_pages(gfp_flags, order);
+               page = ion_heap_alloc_pages(buffer, gfp_flags, order);
                if (!page)
                        return 0;
                arm_dma_ops.sync_single_for_device(NULL,
@@ -85,8 +84,6 @@ static struct page *alloc_buffer_page(struct ion_system_heap *heap,
        if (!page)
                return 0;
 
-       if (split_pages)
-               split_page(page, order);
        return page;
 }
 
@@ -153,7 +150,6 @@ static int ion_system_heap_allocate(struct ion_heap *heap,
        int i = 0;
        long size_remaining = PAGE_ALIGN(size);
        unsigned int max_order = orders[0];
-       bool split_pages = ion_buffer_fault_user_mappings(buffer);
 
        INIT_LIST_HEAD(&pages);
        while (size_remaining > 0) {
@@ -170,28 +166,15 @@ static int ion_system_heap_allocate(struct ion_heap *heap,
        if (!table)
                goto err;
 
-       if (split_pages)
-               ret = sg_alloc_table(table, PAGE_ALIGN(size) / PAGE_SIZE,
-                                    GFP_KERNEL);
-       else
-               ret = sg_alloc_table(table, i, GFP_KERNEL);
-
+       ret = sg_alloc_table(table, i, GFP_KERNEL);
        if (ret)
                goto err1;
 
        sg = table->sgl;
        list_for_each_entry_safe(info, tmp_info, &pages, list) {
                struct page *page = info->page;
-               if (split_pages) {
-                       for (i = 0; i < (1 << info->order); i++) {
-                               sg_set_page(sg, page + i, PAGE_SIZE, 0);
-                               sg = sg_next(sg);
-                       }
-               } else {
-                       sg_set_page(sg, page, (1 << info->order) * PAGE_SIZE,
-                                   0);
-                       sg = sg_next(sg);
-               }
+               sg_set_page(sg, page, (1 << info->order) * PAGE_SIZE, 0);
+               sg = sg_next(sg);
                list_del(&info->list);
                kfree(info);
        }