From fc18afcf5fb2d8776414076d81d907d8be82b362 Mon Sep 17 00:00:00 2001
From: Thomas Hellstrom <thellstrom@vmware.com>
Date: Wed, 26 Sep 2018 15:36:52 +0200
Subject: [PATCH] drm/vmwgfx: Use a validation context allocator for
 relocations and validations

A common trait of these objects are that they are allocated during the
command validation phase and freed after command submission. Furthermore
they are accessed by a single thread only. So provide a simple unprotected
stack-like allocator from which these objects can be allocated. Their
memory is freed with the validation context when the command submission
is done.

Note that the mm subsystem maintains a per-cpu cache of single pages to
make single page allocation and freeing efficient.

Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
Reviewed-by: Sinclair Yeh <syeh@vmware.com>
---
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.h        | 12 +--
 drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c    | 80 ++++++++++---------
 drivers/gpu/drm/vmwgfx/vmwgfx_validation.c | 92 ++++++++++++++++++----
 drivers/gpu/drm/vmwgfx/vmwgfx_validation.h | 12 ++-
 4 files changed, 132 insertions(+), 64 deletions(-)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index 3b5598967e5c..61866294147e 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -205,12 +205,6 @@ struct vmw_fifo_state {
 	bool dx;
 };
 
-struct vmw_relocation {
-	SVGAMobId *mob_loc;
-	SVGAGuestPtr *location;
-	struct vmw_buffer_object *vbo;
-};
-
 /**
  * struct vmw_res_cache_entry - resource information cache entry
  * @handle: User-space handle of a resource.
@@ -303,12 +297,11 @@ struct vmw_ctx_validation_info;
  * than from user-space
  * @fp: If @kernel is false, points to the file of the client. Otherwise
  * NULL
- * @relocs: Array of buffer object relocations
- * @cur_reloc: Cursor pointing to the current relocation
  * @cmd_bounce: Command bounce buffer used for command validation before
  * copying to fifo space
  * @cmd_bounce_size: Current command bounce buffer size
  * @cur_query_bo: Current buffer object used as query result buffer
+ * @bo_relocations: List of buffer object relocations
  * @res_relocations: List of resource relocations
  * @buf_start: Pointer to start of memory where command validation takes
  * place
@@ -335,11 +328,10 @@ struct vmw_sw_context{
 	bool res_ht_initialized;
 	bool kernel;
 	struct vmw_fpriv *fp;
-	struct vmw_relocation relocs[VMWGFX_MAX_RELOCATIONS];
-	uint32_t cur_reloc;
 	uint32_t *cmd_bounce;
 	uint32_t cmd_bounce_size;
 	struct vmw_buffer_object *cur_query_bo;
+	struct list_head bo_relocations;
 	struct list_head res_relocations;
 	uint32_t *buf_start;
 	struct vmw_res_cache_entry res_cache[vmw_res_max];
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
index 85821a5b227c..da341cc6ff47 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
@@ -35,6 +35,21 @@
 
 #define VMW_RES_HT_ORDER 12
 
+/*
+ * struct vmw_relocation - Buffer object relocation
+ *
+ * @head: List head for the command submission context's relocation list
+ * @mob_loc: Pointer to location for mob id to be modified
+ * @location: Pointer to location for guest pointer to be modified
+ * @vbo: Non ref-counted pointer to buffer object
+ */
+struct vmw_relocation {
+	struct list_head head;
+	SVGAMobId *mob_loc;
+	SVGAGuestPtr *location;
+	struct vmw_buffer_object *vbo;
+};
+
 /**
  * enum vmw_resource_relocation_type - Relocation type for resources
  *
@@ -132,11 +147,9 @@ static size_t vmw_ptr_diff(void *a, void *b)
 static void vmw_execbuf_bindings_commit(struct vmw_sw_context *sw_context,
 					bool backoff)
 {
-	struct vmw_ctx_validation_info *entry, *next;
-
-	list_for_each_entry_safe(entry, next, &sw_context->ctx_list, head) {
-		list_del(&entry->head);
+	struct vmw_ctx_validation_info *entry;
 
+	list_for_each_entry(entry, &sw_context->ctx_list, head) {
 		if (!backoff)
 			vmw_binding_state_commit(entry->cur, entry->staged);
 		if (entry->staged != sw_context->staged_bindings)
@@ -144,6 +157,9 @@ static void vmw_execbuf_bindings_commit(struct vmw_sw_context *sw_context,
 		else
 			sw_context->staged_bindings_inuse = false;
 	}
+
+	/* List entries are freed with the validation context */
+	INIT_LIST_HEAD(&sw_context->ctx_list);
 }
 
 /**
@@ -397,7 +413,7 @@ static int vmw_resource_context_res_add(struct vmw_private *dev_priv,
  * id that needs fixup is located. Granularity is one byte.
  * @rel_type: Relocation type.
  */
-static int vmw_resource_relocation_add(struct list_head *list,
+static int vmw_resource_relocation_add(struct vmw_sw_context *sw_context,
 				       const struct vmw_resource *res,
 				       unsigned long offset,
 				       enum vmw_resource_relocation_type
@@ -405,7 +421,7 @@ static int vmw_resource_relocation_add(struct list_head *list,
 {
 	struct vmw_resource_relocation *rel;
 
-	rel = kmalloc(sizeof(*rel), GFP_KERNEL);
+	rel = vmw_validation_mem_alloc(sw_context->ctx, sizeof(*rel));
 	if (unlikely(!rel)) {
 		DRM_ERROR("Failed to allocate a resource relocation.\n");
 		return -ENOMEM;
@@ -414,7 +430,7 @@ static int vmw_resource_relocation_add(struct list_head *list,
 	rel->res = res;
 	rel->offset = offset;
 	rel->rel_type = rel_type;
-	list_add_tail(&rel->head, list);
+	list_add_tail(&rel->head, &sw_context->res_relocations);
 
 	return 0;
 }
@@ -422,16 +438,13 @@ static int vmw_resource_relocation_add(struct list_head *list,
 /**
  * vmw_resource_relocations_free - Free all relocations on a list
  *
- * @list: Pointer to the head of the relocation list.
+ * @list: Pointer to the head of the relocation list
  */
 static void vmw_resource_relocations_free(struct list_head *list)
 {
-	struct vmw_resource_relocation *rel, *n;
+	/* Memory is validation context memory, so no need to free it */
 
-	list_for_each_entry_safe(rel, n, list, head) {
-		list_del(&rel->head);
-		kfree(rel);
-	}
+	INIT_LIST_HEAD(list);
 }
 
 /**
@@ -532,8 +545,7 @@ static int vmw_cmd_res_reloc_add(struct vmw_private *dev_priv,
 {
 	int ret;
 
-	ret = vmw_resource_relocation_add(&sw_context->res_relocations,
-					  res,
+	ret = vmw_resource_relocation_add(sw_context, res,
 					  vmw_ptr_diff(sw_context->buf_start,
 						       id_loc),
 					  vmw_res_rel_normal);
@@ -597,7 +609,7 @@ vmw_cmd_res_check(struct vmw_private *dev_priv,
 			*p_res = res;
 
 		return vmw_resource_relocation_add
-			(&sw_context->res_relocations, res,
+			(sw_context, res,
 			 vmw_ptr_diff(sw_context->buf_start, id_loc),
 			 vmw_res_rel_normal);
 	}
@@ -1150,14 +1162,10 @@ static int vmw_translate_mob_ptr(struct vmw_private *dev_priv,
 		goto out_no_reloc;
 	}
 
-	if (unlikely(sw_context->cur_reloc >= VMWGFX_MAX_RELOCATIONS)) {
-		DRM_ERROR("Max number relocations per submission"
-			  " exceeded\n");
-		ret = -EINVAL;
+	reloc = vmw_validation_mem_alloc(sw_context->ctx, sizeof(*reloc));
+	if (!reloc)
 		goto out_no_reloc;
-	}
 
-	reloc = &sw_context->relocs[sw_context->cur_reloc++];
 	reloc->mob_loc = id;
 	reloc->location = NULL;
 	reloc->vbo = vmw_bo;
@@ -1167,6 +1175,8 @@ static int vmw_translate_mob_ptr(struct vmw_private *dev_priv,
 		goto out_no_reloc;
 
 	*vmw_bo_p = vmw_bo;
+	list_add_tail(&reloc->head, &sw_context->bo_relocations);
+
 	return 0;
 
 out_no_reloc:
@@ -1211,14 +1221,10 @@ static int vmw_translate_guest_ptr(struct vmw_private *dev_priv,
 		goto out_no_reloc;
 	}
 
-	if (unlikely(sw_context->cur_reloc >= VMWGFX_MAX_RELOCATIONS)) {
-		DRM_ERROR("Max number relocations per submission"
-			  " exceeded\n");
-		ret = -EINVAL;
+	reloc = vmw_validation_mem_alloc(sw_context->ctx, sizeof(*reloc));
+	if (!reloc)
 		goto out_no_reloc;
-	}
 
-	reloc = &sw_context->relocs[sw_context->cur_reloc++];
 	reloc->location = ptr;
 	reloc->vbo = vmw_bo;
 
@@ -1227,6 +1233,8 @@ static int vmw_translate_guest_ptr(struct vmw_private *dev_priv,
 		goto out_no_reloc;
 
 	*vmw_bo_p = vmw_bo;
+	list_add_tail(&reloc->head, &sw_context->bo_relocations);
+
 	return 0;
 
 out_no_reloc:
@@ -2055,7 +2063,7 @@ static int vmw_cmd_shader_define(struct vmw_private *dev_priv,
 	if (unlikely(ret != 0))
 		return ret;
 
-	return vmw_resource_relocation_add(&sw_context->res_relocations,
+	return vmw_resource_relocation_add(sw_context,
 					   NULL,
 					   vmw_ptr_diff(sw_context->buf_start,
 							&cmd->header.id),
@@ -2100,7 +2108,7 @@ static int vmw_cmd_shader_destroy(struct vmw_private *dev_priv,
 	if (unlikely(ret != 0))
 		return ret;
 
-	return vmw_resource_relocation_add(&sw_context->res_relocations,
+	return vmw_resource_relocation_add(sw_context,
 					   NULL,
 					   vmw_ptr_diff(sw_context->buf_start,
 							&cmd->header.id),
@@ -2801,7 +2809,7 @@ static int vmw_cmd_dx_view_remove(struct vmw_private *dev_priv,
 	 * relocation to conditionally make this command a NOP to avoid
 	 * device errors.
 	 */
-	return vmw_resource_relocation_add(&sw_context->res_relocations,
+	return vmw_resource_relocation_add(sw_context,
 					   view,
 					   vmw_ptr_diff(sw_context->buf_start,
 							&cmd->header.id),
@@ -3504,17 +3512,17 @@ static int vmw_cmd_check_all(struct vmw_private *dev_priv,
 
 static void vmw_free_relocations(struct vmw_sw_context *sw_context)
 {
-	sw_context->cur_reloc = 0;
+	/* Memory is validation context memory, so no need to free it */
+
+	INIT_LIST_HEAD(&sw_context->bo_relocations);
 }
 
 static void vmw_apply_relocations(struct vmw_sw_context *sw_context)
 {
-	uint32_t i;
 	struct vmw_relocation *reloc;
 	struct ttm_buffer_object *bo;
 
-	for (i = 0; i < sw_context->cur_reloc; ++i) {
-		reloc = &sw_context->relocs[i];
+	list_for_each_entry(reloc, &sw_context->bo_relocations, head) {
 		bo = &reloc->vbo->base;
 		switch (bo->mem.mem_type) {
 		case TTM_PL_VRAM:
@@ -3914,7 +3922,6 @@ int vmw_execbuf_process(struct drm_file *file_priv,
 		sw_context->kernel = true;
 
 	sw_context->fp = vmw_fpriv(file_priv);
-	sw_context->cur_reloc = 0;
 	INIT_LIST_HEAD(&sw_context->ctx_list);
 	sw_context->cur_query_bo = dev_priv->pinned_bo;
 	sw_context->last_query_ctx = NULL;
@@ -3924,6 +3931,7 @@ int vmw_execbuf_process(struct drm_file *file_priv,
 	sw_context->dx_query_ctx = NULL;
 	memset(sw_context->res_cache, 0, sizeof(sw_context->res_cache));
 	INIT_LIST_HEAD(&sw_context->res_relocations);
+	INIT_LIST_HEAD(&sw_context->bo_relocations);
 	if (sw_context->staged_bindings)
 		vmw_binding_state_reset(sw_context->staged_bindings);
 
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_validation.c b/drivers/gpu/drm/vmwgfx/vmwgfx_validation.c
index dbb58cce0987..3158fe161b2d 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_validation.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_validation.c
@@ -79,6 +79,66 @@ struct vmw_validation_res_node {
 	unsigned long private[0];
 };
 
+/**
+ * vmw_validation_mem_alloc - Allocate kernel memory from the validation
+ * context based allocator
+ * @ctx: The validation context
+ * @size: The number of bytes to allocated.
+ *
+ * The memory allocated may not exceed PAGE_SIZE, and the returned
+ * address is aligned to sizeof(long). All memory allocated this way is
+ * reclaimed after validation when calling any of the exported functions:
+ * vmw_validation_unref_lists()
+ * vmw_validation_revert()
+ * vmw_validation_done()
+ *
+ * Return: Pointer to the allocated memory on success. NULL on failure.
+ */
+void *vmw_validation_mem_alloc(struct vmw_validation_context *ctx, size_t size)
+{
+	void *addr;
+
+	size = ALIGN(size, sizeof(long));
+	if (size > PAGE_SIZE)
+		return NULL;
+
+	if (ctx->mem_size_left < size) {
+		struct page *page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+
+		if (!page)
+			return NULL;
+
+		list_add_tail(&page->lru, &ctx->page_list);
+		ctx->page_address = page_address(page);
+		ctx->mem_size_left = PAGE_SIZE;
+	}
+
+	addr = (void *) (ctx->page_address + (PAGE_SIZE - ctx->mem_size_left));
+	ctx->mem_size_left -= size;
+
+	return addr;
+}
+
+/**
+ * vmw_validation_mem_free - Free all memory allocated using
+ * vmw_validation_mem_alloc()
+ * @ctx: The validation context
+ *
+ * All memory previously allocated for this context using
+ * vmw_validation_mem_alloc() is freed.
+ */
+static void vmw_validation_mem_free(struct vmw_validation_context *ctx)
+{
+	struct page *entry, *next;
+
+	list_for_each_entry_safe(entry, next, &ctx->page_list, lru) {
+		list_del_init(&entry->lru);
+		__free_page(entry);
+	}
+
+	ctx->mem_size_left = 0;
+}
+
 /**
  * vmw_validation_find_bo_dup - Find a duplicate buffer object entry in the
  * validation context's lists.
@@ -188,7 +248,7 @@ int vmw_validation_add_bo(struct vmw_validation_context *ctx,
 		struct ttm_validate_buffer *val_buf;
 		int ret;
 
-		bo_node = kmalloc(sizeof(*bo_node), GFP_KERNEL);
+		bo_node = vmw_validation_mem_alloc(ctx, sizeof(*bo_node));
 		if (!bo_node)
 			return -ENOMEM;
 
@@ -198,7 +258,6 @@ int vmw_validation_add_bo(struct vmw_validation_context *ctx,
 			if (ret) {
 				DRM_ERROR("Failed to initialize a buffer "
 					  "validation entry.\n");
-				kfree(bo_node);
 				return ret;
 			}
 		}
@@ -238,7 +297,7 @@ int vmw_validation_add_resource(struct vmw_validation_context *ctx,
 		goto out_fill;
 	}
 
-	node = kzalloc(sizeof(*node) + priv_size, GFP_KERNEL);
+	node = vmw_validation_mem_alloc(ctx, sizeof(*node) + priv_size);
 	if (!node) {
 		DRM_ERROR("Failed to allocate a resource validation "
 			  "entry.\n");
@@ -251,7 +310,6 @@ int vmw_validation_add_resource(struct vmw_validation_context *ctx,
 		if (ret) {
 			DRM_ERROR("Failed to initialize a resource validation "
 				  "entry.\n");
-			kfree(node);
 			return ret;
 		}
 	}
@@ -542,25 +600,24 @@ void vmw_validation_drop_ht(struct vmw_validation_context *ctx)
  */
 void vmw_validation_unref_lists(struct vmw_validation_context *ctx)
 {
-	struct vmw_validation_bo_node *entry, *next;
-	struct vmw_validation_res_node *val, *val_next;
+	struct vmw_validation_bo_node *entry;
+	struct vmw_validation_res_node *val;
 
-	list_for_each_entry_safe(entry, next, &ctx->bo_list, base.head) {
-		list_del(&entry->base.head);
+	list_for_each_entry(entry, &ctx->bo_list, base.head)
 		ttm_bo_unref(&entry->base.bo);
-		kfree(entry);
-	}
 
 	list_splice_init(&ctx->resource_ctx_list, &ctx->resource_list);
-	list_for_each_entry_safe(val, val_next, &ctx->resource_list, head) {
-		list_del(&val->head);
+	list_for_each_entry(val, &ctx->resource_list, head)
 		vmw_resource_unreference(&val->res);
-		kfree(val);
-	}
 
-	WARN_ON(!list_empty(&ctx->bo_list));
-	WARN_ON(!list_empty(&ctx->resource_list));
-	WARN_ON(!list_empty(&ctx->resource_ctx_list));
+	/*
+	 * No need to detach each list entry since they are all freed with
+	 * vmw_validation_free_mem. Just make the inaccessible.
+	 */
+	INIT_LIST_HEAD(&ctx->bo_list);
+	INIT_LIST_HEAD(&ctx->resource_list);
+
+	vmw_validation_mem_free(ctx);
 }
 
 /**
@@ -637,6 +694,7 @@ void vmw_validation_revert(struct vmw_validation_context *ctx)
 	vmw_validation_res_unreserve(ctx, true);
 	if (ctx->res_mutex)
 		mutex_unlock(ctx->res_mutex);
+	vmw_validation_unref_lists(ctx);
 }
 
 /**
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_validation.h b/drivers/gpu/drm/vmwgfx/vmwgfx_validation.h
index 85f9387983a2..0eb2d02d0c0c 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_validation.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_validation.h
@@ -40,19 +40,25 @@
  * @resource_ctx_list: List head for resource validation metadata for
  * resources that need to be validated before those in @resource_list
  * @bo_list: List head for buffer objects
+ * @page_list: List of pages used by the memory allocator
  * @ticket: Ticked used for ww mutex locking
  * @res_mutex: Pointer to mutex used for resource reserving
  * @merge_dups: Whether to merge metadata for duplicate resources or
  * buffer objects
+ * @mem_size_left: Free memory left in the last page in @page_list
+ * @page_address: Kernel virtual address of the last page in @page_list
  */
 struct vmw_validation_context {
 	struct drm_open_hash *ht;
 	struct list_head resource_list;
 	struct list_head resource_ctx_list;
 	struct list_head bo_list;
+	struct list_head page_list;
 	struct ww_acquire_ctx ticket;
 	struct mutex *res_mutex;
 	unsigned int merge_dups;
+	unsigned int mem_size_left;
+	u8 *page_address;
 };
 
 struct vmw_buffer_object;
@@ -76,8 +82,10 @@ struct vmw_fence_obj;
 	  .resource_list = LIST_HEAD_INIT((_name).resource_list),	\
 	  .resource_ctx_list = LIST_HEAD_INIT((_name).resource_ctx_list), \
 	  .bo_list = LIST_HEAD_INIT((_name).bo_list),			\
+	  .page_list = LIST_HEAD_INIT((_name).page_list),		\
+	  .res_mutex = NULL,						\
 	  .merge_dups = _merge_dups,					\
-	  .res_mutex = NULL						\
+	  .mem_size_left = 0,						\
 	}
 
 /**
@@ -199,4 +207,6 @@ void vmw_validation_revert(struct vmw_validation_context *ctx);
 void vmw_validation_done(struct vmw_validation_context *ctx,
 			 struct vmw_fence_obj *fence);
 
+void *vmw_validation_mem_alloc(struct vmw_validation_context *ctx, size_t size);
+
 #endif
-- 
2.30.2