drm/amdgpu: stop splitting PTE commands into smaller ones
authorChristian König <christian.koenig@amd.com>
Fri, 12 Aug 2016 10:59:59 +0000 (12:59 +0200)
committerAlex Deucher <alexander.deucher@amd.com>
Tue, 16 Aug 2016 14:44:34 +0000 (10:44 -0400)
It doesn't make much sense to create bigger commands first which we then need
to split into smaller one again. Just make sure the commands we create aren't
to big in the first place.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Edward O'Callaghan <funfunctor@folklore1984.net>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu.h
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
drivers/gpu/drm/amd/amdgpu/cik_sdma.c
drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c

index 387b4979f45aa89a509c76a632ee65a87b470882..1a7e05da470e09f737fa0b14fd483dc5364b007e 100644 (file)
@@ -833,6 +833,9 @@ struct amdgpu_ring {
 /* maximum number of VMIDs */
 #define AMDGPU_NUM_VM  16
 
+/* Maximum number of PTEs the hardware can write with one command */
+#define AMDGPU_VM_MAX_UPDATE_SIZE      0x3FFFF
+
 /* number of entries in page table */
 #define AMDGPU_VM_PTE_COUNT (1 << amdgpu_vm_block_size)
 
index 12925016370b6d55dd03bd8152f46f7a82efb8e2..673c258e49db8669689948a8f2141caa05305090 100644 (file)
@@ -639,7 +639,8 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
 
                pde = pd_addr + pt_idx * 8;
                if (((last_pde + 8 * count) != pde) ||
-                   ((last_pt + incr * count) != pt)) {
+                   ((last_pt + incr * count) != pt) ||
+                   (count == AMDGPU_VM_MAX_UPDATE_SIZE)) {
 
                        if (count) {
                                amdgpu_vm_update_pages(&params, last_pde,
@@ -743,7 +744,8 @@ static void amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
                next_pe_start = amdgpu_bo_gpu_offset(pt);
                next_pe_start += (addr & mask) * 8;
 
-               if ((cur_pe_start + 8 * cur_nptes) == next_pe_start) {
+               if ((cur_pe_start + 8 * cur_nptes) == next_pe_start &&
+                   ((cur_nptes + nptes) <= AMDGPU_VM_MAX_UPDATE_SIZE)) {
                        /* The next ptb is consecutive to current ptb.
                         * Don't call amdgpu_vm_update_pages now.
                         * Will update two ptbs together in future.
index e5e44f42e20e77993754b1604acb1e51137d10a5..e71cd12104b3b14ad376e18e6b489dede9313b6e 100644 (file)
@@ -694,24 +694,16 @@ static void cik_sdma_vm_copy_pte(struct amdgpu_ib *ib,
                                 uint64_t pe, uint64_t src,
                                 unsigned count)
 {
-       while (count) {
-               unsigned bytes = count * 8;
-               if (bytes > 0x1FFFF8)
-                       bytes = 0x1FFFF8;
-
-               ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_COPY,
-                       SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
-               ib->ptr[ib->length_dw++] = bytes;
-               ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
-               ib->ptr[ib->length_dw++] = lower_32_bits(src);
-               ib->ptr[ib->length_dw++] = upper_32_bits(src);
-               ib->ptr[ib->length_dw++] = lower_32_bits(pe);
-               ib->ptr[ib->length_dw++] = upper_32_bits(pe);
-
-               pe += bytes;
-               src += bytes;
-               count -= bytes / 8;
-       }
+       unsigned bytes = count * 8;
+
+       ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_COPY,
+               SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
+       ib->ptr[ib->length_dw++] = bytes;
+       ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
+       ib->ptr[ib->length_dw++] = lower_32_bits(src);
+       ib->ptr[ib->length_dw++] = upper_32_bits(src);
+       ib->ptr[ib->length_dw++] = lower_32_bits(pe);
+       ib->ptr[ib->length_dw++] = upper_32_bits(pe);
 }
 
 /**
@@ -755,40 +747,21 @@ static void cik_sdma_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe,
  *
  * Update the page tables using sDMA (CIK).
  */
-static void cik_sdma_vm_set_pte_pde(struct amdgpu_ib *ib,
-                                   uint64_t pe,
+static void cik_sdma_vm_set_pte_pde(struct amdgpu_ib *ib, uint64_t pe,
                                    uint64_t addr, unsigned count,
                                    uint32_t incr, uint32_t flags)
 {
-       uint64_t value;
-       unsigned ndw;
-
-       while (count) {
-               ndw = count;
-               if (ndw > 0x7FFFF)
-                       ndw = 0x7FFFF;
-
-               if (flags & AMDGPU_PTE_VALID)
-                       value = addr;
-               else
-                       value = 0;
-
-               /* for physically contiguous pages (vram) */
-               ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0);
-               ib->ptr[ib->length_dw++] = pe; /* dst addr */
-               ib->ptr[ib->length_dw++] = upper_32_bits(pe);
-               ib->ptr[ib->length_dw++] = flags; /* mask */
-               ib->ptr[ib->length_dw++] = 0;
-               ib->ptr[ib->length_dw++] = value; /* value */
-               ib->ptr[ib->length_dw++] = upper_32_bits(value);
-               ib->ptr[ib->length_dw++] = incr; /* increment size */
-               ib->ptr[ib->length_dw++] = 0;
-               ib->ptr[ib->length_dw++] = ndw; /* number of entries */
-
-               pe += ndw * 8;
-               addr += ndw * incr;
-               count -= ndw;
-       }
+       /* for physically contiguous pages (vram) */
+       ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0);
+       ib->ptr[ib->length_dw++] = lower_32_bits(pe); /* dst addr */
+       ib->ptr[ib->length_dw++] = upper_32_bits(pe);
+       ib->ptr[ib->length_dw++] = flags; /* mask */
+       ib->ptr[ib->length_dw++] = 0;
+       ib->ptr[ib->length_dw++] = lower_32_bits(addr); /* value */
+       ib->ptr[ib->length_dw++] = upper_32_bits(addr);
+       ib->ptr[ib->length_dw++] = incr; /* increment size */
+       ib->ptr[ib->length_dw++] = 0;
+       ib->ptr[ib->length_dw++] = count; /* number of entries */
 }
 
 /**
index af0f0d283472477242226eb2c31467bd452d6526..e82229686783873173db79c1072a748f89078377 100644 (file)
@@ -749,24 +749,16 @@ static void sdma_v2_4_vm_copy_pte(struct amdgpu_ib *ib,
                                  uint64_t pe, uint64_t src,
                                  unsigned count)
 {
-       while (count) {
-               unsigned bytes = count * 8;
-               if (bytes > 0x1FFFF8)
-                       bytes = 0x1FFFF8;
-
-               ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
-                       SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
-               ib->ptr[ib->length_dw++] = bytes;
-               ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
-               ib->ptr[ib->length_dw++] = lower_32_bits(src);
-               ib->ptr[ib->length_dw++] = upper_32_bits(src);
-               ib->ptr[ib->length_dw++] = lower_32_bits(pe);
-               ib->ptr[ib->length_dw++] = upper_32_bits(pe);
-
-               pe += bytes;
-               src += bytes;
-               count -= bytes / 8;
-       }
+       unsigned bytes = count * 8;
+
+       ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
+               SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
+       ib->ptr[ib->length_dw++] = bytes;
+       ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
+       ib->ptr[ib->length_dw++] = lower_32_bits(src);
+       ib->ptr[ib->length_dw++] = upper_32_bits(src);
+       ib->ptr[ib->length_dw++] = lower_32_bits(pe);
+       ib->ptr[ib->length_dw++] = upper_32_bits(pe);
 }
 
 /**
@@ -810,40 +802,21 @@ static void sdma_v2_4_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe,
  *
  * Update the page tables using sDMA (CIK).
  */
-static void sdma_v2_4_vm_set_pte_pde(struct amdgpu_ib *ib,
-                                    uint64_t pe,
+static void sdma_v2_4_vm_set_pte_pde(struct amdgpu_ib *ib, uint64_t pe,
                                     uint64_t addr, unsigned count,
                                     uint32_t incr, uint32_t flags)
 {
-       uint64_t value;
-       unsigned ndw;
-
-       while (count) {
-               ndw = count;
-               if (ndw > 0x7FFFF)
-                       ndw = 0x7FFFF;
-
-               if (flags & AMDGPU_PTE_VALID)
-                       value = addr;
-               else
-                       value = 0;
-
-               /* for physically contiguous pages (vram) */
-               ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_GEN_PTEPDE);
-               ib->ptr[ib->length_dw++] = pe; /* dst addr */
-               ib->ptr[ib->length_dw++] = upper_32_bits(pe);
-               ib->ptr[ib->length_dw++] = flags; /* mask */
-               ib->ptr[ib->length_dw++] = 0;
-               ib->ptr[ib->length_dw++] = value; /* value */
-               ib->ptr[ib->length_dw++] = upper_32_bits(value);
-               ib->ptr[ib->length_dw++] = incr; /* increment size */
-               ib->ptr[ib->length_dw++] = 0;
-               ib->ptr[ib->length_dw++] = ndw; /* number of entries */
-
-               pe += ndw * 8;
-               addr += ndw * incr;
-               count -= ndw;
-       }
+       /* for physically contiguous pages (vram) */
+       ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_GEN_PTEPDE);
+       ib->ptr[ib->length_dw++] = lower_32_bits(pe); /* dst addr */
+       ib->ptr[ib->length_dw++] = upper_32_bits(pe);
+       ib->ptr[ib->length_dw++] = flags; /* mask */
+       ib->ptr[ib->length_dw++] = 0;
+       ib->ptr[ib->length_dw++] = lower_32_bits(addr); /* value */
+       ib->ptr[ib->length_dw++] = upper_32_bits(addr);
+       ib->ptr[ib->length_dw++] = incr; /* increment size */
+       ib->ptr[ib->length_dw++] = 0;
+       ib->ptr[ib->length_dw++] = count; /* number of entries */
 }
 
 /**
index 88faaee37258704d3ff2712659dad8540f247f4a..bee4978bec73d4fbe09375dfc72f4d1a3eea71a6 100644 (file)
@@ -976,24 +976,16 @@ static void sdma_v3_0_vm_copy_pte(struct amdgpu_ib *ib,
                                  uint64_t pe, uint64_t src,
                                  unsigned count)
 {
-       while (count) {
-               unsigned bytes = count * 8;
-               if (bytes > 0x1FFFF8)
-                       bytes = 0x1FFFF8;
-
-               ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
-                       SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
-               ib->ptr[ib->length_dw++] = bytes;
-               ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
-               ib->ptr[ib->length_dw++] = lower_32_bits(src);
-               ib->ptr[ib->length_dw++] = upper_32_bits(src);
-               ib->ptr[ib->length_dw++] = lower_32_bits(pe);
-               ib->ptr[ib->length_dw++] = upper_32_bits(pe);
-
-               pe += bytes;
-               src += bytes;
-               count -= bytes / 8;
-       }
+       unsigned bytes = count * 8;
+
+       ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
+               SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
+       ib->ptr[ib->length_dw++] = bytes;
+       ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
+       ib->ptr[ib->length_dw++] = lower_32_bits(src);
+       ib->ptr[ib->length_dw++] = upper_32_bits(src);
+       ib->ptr[ib->length_dw++] = lower_32_bits(pe);
+       ib->ptr[ib->length_dw++] = upper_32_bits(pe);
 }
 
 /**
@@ -1037,40 +1029,21 @@ static void sdma_v3_0_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe,
  *
  * Update the page tables using sDMA (CIK).
  */
-static void sdma_v3_0_vm_set_pte_pde(struct amdgpu_ib *ib,
-                                    uint64_t pe,
+static void sdma_v3_0_vm_set_pte_pde(struct amdgpu_ib *ib, uint64_t pe,
                                     uint64_t addr, unsigned count,
                                     uint32_t incr, uint32_t flags)
 {
-       uint64_t value;
-       unsigned ndw;
-
-       while (count) {
-               ndw = count;
-               if (ndw > 0x7FFFF)
-                       ndw = 0x7FFFF;
-
-               if (flags & AMDGPU_PTE_VALID)
-                       value = addr;
-               else
-                       value = 0;
-
-               /* for physically contiguous pages (vram) */
-               ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_GEN_PTEPDE);
-               ib->ptr[ib->length_dw++] = pe; /* dst addr */
-               ib->ptr[ib->length_dw++] = upper_32_bits(pe);
-               ib->ptr[ib->length_dw++] = flags; /* mask */
-               ib->ptr[ib->length_dw++] = 0;
-               ib->ptr[ib->length_dw++] = value; /* value */
-               ib->ptr[ib->length_dw++] = upper_32_bits(value);
-               ib->ptr[ib->length_dw++] = incr; /* increment size */
-               ib->ptr[ib->length_dw++] = 0;
-               ib->ptr[ib->length_dw++] = ndw; /* number of entries */
-
-               pe += ndw * 8;
-               addr += ndw * incr;
-               count -= ndw;
-       }
+       /* for physically contiguous pages (vram) */
+       ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_GEN_PTEPDE);
+       ib->ptr[ib->length_dw++] = lower_32_bits(pe); /* dst addr */
+       ib->ptr[ib->length_dw++] = upper_32_bits(pe);
+       ib->ptr[ib->length_dw++] = flags; /* mask */
+       ib->ptr[ib->length_dw++] = 0;
+       ib->ptr[ib->length_dw++] = lower_32_bits(addr); /* value */
+       ib->ptr[ib->length_dw++] = upper_32_bits(addr);
+       ib->ptr[ib->length_dw++] = incr; /* increment size */
+       ib->ptr[ib->length_dw++] = 0;
+       ib->ptr[ib->length_dw++] = count; /* number of entries */
 }
 
 /**