drm/amdgpu: Throttle visible VRAM moves separately

author John Brooks <john@fastquake.com>

Wed, 28 Jun 2017 02:33:18 +0000 (22:33 -0400)

committer Alex Deucher <alexander.deucher@amd.com>

Fri, 14 Jul 2017 15:06:33 +0000 (11:06 -0400)
author John Brooks <john@fastquake.com>
Wed, 28 Jun 2017 02:33:18 +0000 (22:33 -0400)
committer Alex Deucher <alexander.deucher@amd.com>
Fri, 14 Jul 2017 15:06:33 +0000 (11:06 -0400)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h

index 0d6b0617cdf085c7926dcddab183227b4a982058..c290b262d7da86044659c90841282aeef4a772a3 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1117,7 +1117,9 @@ struct amdgpu_cs_parser {
         struct list_head                validated;
         struct dma_fence                *fence;
         uint64_t                        bytes_moved_threshold;
+       uint64_t                        bytes_moved_vis_threshold;
         uint64_t                        bytes_moved;
+       uint64_t                        bytes_moved_vis;
         struct amdgpu_bo_list_entry     *evictable;
  
         /* user fence */
@@ -1555,6 +1557,7 @@ struct amdgpu_device {
                 spinlock_t              lock;
                 s64                     last_update_us;
                 s64                     accum_us; /* accumulated microseconds */
+               s64                     accum_us_vis; /* for visible VRAM */
                 u32                     log2_max_MBps;
         } mm_stats;
  
@@ -1846,7 +1849,8 @@ void amdgpu_pci_config_reset(struct amdgpu_device *adev);
  bool amdgpu_need_post(struct amdgpu_device *adev);
  void amdgpu_update_display_priority(struct amdgpu_device *adev);
  
-void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes);
+void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes,
+                                 u64 num_vis_bytes);
  void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain);
  bool amdgpu_ttm_bo_is_amdgpu_bo(struct ttm_buffer_object *bo);
  int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c

index 5599c01b265d40c105b59b5ca0f3f14c2efdf40e..33789510e663f96594468e0eab258ff62d524f0b 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -223,10 +223,11 @@ static s64 bytes_to_us(struct amdgpu_device *adev, u64 bytes)
   * ticks. The accumulated microseconds (us) are converted to bytes and
   * returned.
   */
-static u64 amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev)
+static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev,
+                                             u64 *max_bytes,
+                                             u64 *max_vis_bytes)
  {
         s64 time_us, increment_us;
-       u64 max_bytes;
         u64 free_vram, total_vram, used_vram;
  
         /* Allow a maximum of 200 accumulated ms. This is basically per-IB
@@ -238,8 +239,11 @@ static u64 amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev)
          */
         const s64 us_upper_bound = 200000;
  
-       if (!adev->mm_stats.log2_max_MBps)
-               return 0;
+       if (!adev->mm_stats.log2_max_MBps) {
+               *max_bytes = 0;
+               *max_vis_bytes = 0;
+               return;
+       }
  
         total_vram = adev->mc.real_vram_size - adev->vram_pin_size;
         used_vram = atomic64_read(&adev->vram_usage);
@@ -280,23 +284,45 @@ static u64 amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev)
                 adev->mm_stats.accum_us = max(min_us, adev->mm_stats.accum_us);
         }
  
-       /* This returns 0 if the driver is in debt to disallow (optional)
+       /* This is set to 0 if the driver is in debt to disallow (optional)
          * buffer moves.
          */
-       max_bytes = us_to_bytes(adev, adev->mm_stats.accum_us);
+       *max_bytes = us_to_bytes(adev, adev->mm_stats.accum_us);
+
+       /* Do the same for visible VRAM if half of it is free */
+       if (adev->mc.visible_vram_size < adev->mc.real_vram_size) {
+               u64 total_vis_vram = adev->mc.visible_vram_size;
+               u64 used_vis_vram = atomic64_read(&adev->vram_vis_usage);
+
+               if (used_vis_vram < total_vis_vram) {
+                       u64 free_vis_vram = total_vis_vram - used_vis_vram;
+                       adev->mm_stats.accum_us_vis = min(adev->mm_stats.accum_us_vis +
+                                                         increment_us, us_upper_bound);
+
+                       if (free_vis_vram >= total_vis_vram / 2)
+                               adev->mm_stats.accum_us_vis =
+                                       max(bytes_to_us(adev, free_vis_vram / 2),
+                                           adev->mm_stats.accum_us_vis);
+               }
+
+               *max_vis_bytes = us_to_bytes(adev, adev->mm_stats.accum_us_vis);
+       } else {
+               *max_vis_bytes = 0;
+       }
  
         spin_unlock(&adev->mm_stats.lock);
-       return max_bytes;
  }
  
  /* Report how many bytes have really been moved for the last command
   * submission. This can result in a debt that can stop buffer migrations
   * temporarily.
   */
-void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes)
+void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes,
+                                 u64 num_vis_bytes)
  {
         spin_lock(&adev->mm_stats.lock);
         adev->mm_stats.accum_us -= bytes_to_us(adev, num_bytes);
+       adev->mm_stats.accum_us_vis -= bytes_to_us(adev, num_vis_bytes);
         spin_unlock(&adev->mm_stats.lock);
  }
  
@@ -304,7 +330,7 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,
                                  struct amdgpu_bo *bo)
  {
         struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
-       u64 initial_bytes_moved;
+       u64 initial_bytes_moved, bytes_moved;
         uint32_t domain;
         int r;
  
@@ -314,17 +340,35 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,
         /* Don't move this buffer if we have depleted our allowance
          * to move it. Don't move anything if the threshold is zero.
          */
-       if (p->bytes_moved < p->bytes_moved_threshold)
-               domain = bo->prefered_domains;
-       else
+       if (p->bytes_moved < p->bytes_moved_threshold) {
+               if (adev->mc.visible_vram_size < adev->mc.real_vram_size &&
+                   (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) {
+                       /* And don't move a CPU_ACCESS_REQUIRED BO to limited
+                        * visible VRAM if we've depleted our allowance to do
+                        * that.
+                        */
+                       if (p->bytes_moved_vis < p->bytes_moved_vis_threshold)
+                               domain = bo->prefered_domains;
+                       else
+                               domain = bo->allowed_domains;
+               } else {
+                       domain = bo->prefered_domains;
+               }
+       } else {
                 domain = bo->allowed_domains;
+       }
  
  retry:
         amdgpu_ttm_placement_from_domain(bo, domain);
         initial_bytes_moved = atomic64_read(&adev->num_bytes_moved);
         r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
-       p->bytes_moved += atomic64_read(&adev->num_bytes_moved) -
-               initial_bytes_moved;
+       bytes_moved = atomic64_read(&adev->num_bytes_moved) -
+                     initial_bytes_moved;
+       p->bytes_moved += bytes_moved;
+       if (adev->mc.visible_vram_size < adev->mc.real_vram_size &&
+           bo->tbo.mem.mem_type == TTM_PL_VRAM &&
+           bo->tbo.mem.start < adev->mc.visible_vram_size >> PAGE_SHIFT)
+               p->bytes_moved_vis += bytes_moved;
  
         if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) {
                 domain = bo->allowed_domains;
@@ -350,7 +394,8 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p,
                 struct amdgpu_bo_list_entry *candidate = p->evictable;
                 struct amdgpu_bo *bo = candidate->robj;
                 struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
-               u64 initial_bytes_moved;
+               u64 initial_bytes_moved, bytes_moved;
+               bool update_bytes_moved_vis;
                 uint32_t other;
  
                 /* If we reached our current BO we can forget it */
@@ -370,10 +415,17 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p,
  
                 /* Good we can try to move this BO somewhere else */
                 amdgpu_ttm_placement_from_domain(bo, other);
+               update_bytes_moved_vis =
+                       adev->mc.visible_vram_size < adev->mc.real_vram_size &&
+                       bo->tbo.mem.mem_type == TTM_PL_VRAM &&
+                       bo->tbo.mem.start < adev->mc.visible_vram_size >> PAGE_SHIFT;
                 initial_bytes_moved = atomic64_read(&adev->num_bytes_moved);
                 r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
-               p->bytes_moved += atomic64_read(&adev->num_bytes_moved) -
+               bytes_moved = atomic64_read(&adev->num_bytes_moved) -
                         initial_bytes_moved;
+               p->bytes_moved += bytes_moved;
+               if (update_bytes_moved_vis)
+                       p->bytes_moved_vis += bytes_moved;
  
                 if (unlikely(r))
                         break;
@@ -554,8 +606,10 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
                 list_splice(&need_pages, &p->validated);
         }
  
-       p->bytes_moved_threshold = amdgpu_cs_get_threshold_for_moves(p->adev);
+       amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
+                                         &p->bytes_moved_vis_threshold);
         p->bytes_moved = 0;
+       p->bytes_moved_vis = 0;
         p->evictable = list_last_entry(&p->validated,
                                        struct amdgpu_bo_list_entry,
                                        tv.head);
@@ -579,8 +633,8 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
                 goto error_validate;
         }
  
-       amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved);
-
+       amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved,
+                                    p->bytes_moved_vis);
         fpriv->vm.last_eviction_counter =
                 atomic64_read(&p->adev->num_evictions);
  
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c

index a85e75327456c2c41089113132a87e463805e4ee..e429829ae93d628a256d24d307c435094bba8d6d 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -322,7 +322,7 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev,
         struct amdgpu_bo *bo;
         enum ttm_bo_type type;
         unsigned long page_align;
-       u64 initial_bytes_moved;
+       u64 initial_bytes_moved, bytes_moved;
         size_t acc_size;
         int r;
  
@@ -398,8 +398,14 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev,
         r = ttm_bo_init_reserved(&adev->mman.bdev, &bo->tbo, size, type,
                                  &bo->placement, page_align, !kernel, NULL,
                                  acc_size, sg, resv, &amdgpu_ttm_bo_destroy);
-       amdgpu_cs_report_moved_bytes(adev,
-               atomic64_read(&adev->num_bytes_moved) - initial_bytes_moved);
+       bytes_moved = atomic64_read(&adev->num_bytes_moved) -
+                     initial_bytes_moved;
+       if (adev->mc.visible_vram_size < adev->mc.real_vram_size &&
+           bo->tbo.mem.mem_type == TTM_PL_VRAM &&
+           bo->tbo.mem.start < adev->mc.visible_vram_size >> PAGE_SHIFT)
+               amdgpu_cs_report_moved_bytes(adev, bytes_moved, bytes_moved);
+       else
+               amdgpu_cs_report_moved_bytes(adev, bytes_moved, 0);
  
         if (unlikely(r != 0))
                 return r;
author	John Brooks <john@fastquake.com>
	Wed, 28 Jun 2017 02:33:18 +0000 (22:33 -0400)
committer	Alex Deucher <alexander.deucher@amd.com>
	Fri, 14 Jul 2017 15:06:33 +0000 (11:06 -0400)
drivers/gpu/drm/amd/amdgpu/amdgpu.h		patch \| blob \| history
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c		patch \| blob \| history
drivers/gpu/drm/amd/amdgpu/amdgpu_object.c		patch \| blob \| history