drm/i915/gtt: Tidy up ppgtt insertion for gen8
authorChris Wilson <chris@chris-wilson.co.uk>
Fri, 12 Jul 2019 11:27:25 +0000 (12:27 +0100)
committerChris Wilson <chris@chris-wilson.co.uk>
Tue, 16 Jul 2019 15:41:57 +0000 (16:41 +0100)
Apply the new radix shift helpers to extract the multi-level indices
cleanly when inserting pte into the gtt tree.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Abdiel Janulgue <abdiel.janulgue@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190712112725.2892-5-chris@chris-wilson.co.uk
drivers/gpu/drm/i915/i915_gem_gtt.c
drivers/gpu/drm/i915/i915_gem_gtt.h

index 49de6d39488f2316bff73515a31a326c21883ab5..220aba5a94d2d296e50ca6e0ed1f71ec9e6ecb89 100644 (file)
@@ -1131,47 +1131,28 @@ static inline struct sgt_dma {
        return (struct sgt_dma) { sg, addr, addr + sg->length };
 }
 
-struct gen8_insert_pte {
-       u16 pml4e;
-       u16 pdpe;
-       u16 pde;
-       u16 pte;
-};
-
-static __always_inline struct gen8_insert_pte gen8_insert_pte(u64 start)
-{
-       return (struct gen8_insert_pte) {
-                gen8_pml4e_index(start),
-                gen8_pdpe_index(start),
-                gen8_pde_index(start),
-                gen8_pte_index(start),
-       };
-}
-
-static __always_inline bool
+static __always_inline u64
 gen8_ppgtt_insert_pte_entries(struct i915_ppgtt *ppgtt,
                              struct i915_page_directory *pdp,
                              struct sgt_dma *iter,
-                             struct gen8_insert_pte *idx,
+                             u64 idx,
                              enum i915_cache_level cache_level,
                              u32 flags)
 {
        struct i915_page_directory *pd;
        const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags);
        gen8_pte_t *vaddr;
-       bool ret;
 
-       GEM_BUG_ON(idx->pdpe >= i915_pdpes_per_pdp(&ppgtt->vm));
-       pd = i915_pd_entry(pdp, idx->pdpe);
-       vaddr = kmap_atomic_px(i915_pt_entry(pd, idx->pde));
+       pd = i915_pd_entry(pdp, gen8_pd_index(idx, 2));
+       vaddr = kmap_atomic_px(i915_pt_entry(pd, gen8_pd_index(idx, 1)));
        do {
-               vaddr[idx->pte] = pte_encode | iter->dma;
+               vaddr[gen8_pd_index(idx, 0)] = pte_encode | iter->dma;
 
                iter->dma += I915_GTT_PAGE_SIZE;
                if (iter->dma >= iter->max) {
                        iter->sg = __sg_next(iter->sg);
                        if (!iter->sg) {
-                               ret = false;
+                               idx = 0;
                                break;
                        }
 
@@ -1179,30 +1160,22 @@ gen8_ppgtt_insert_pte_entries(struct i915_ppgtt *ppgtt,
                        iter->max = iter->dma + iter->sg->length;
                }
 
-               if (++idx->pte == GEN8_PTES) {
-                       idx->pte = 0;
-
-                       if (++idx->pde == I915_PDES) {
-                               idx->pde = 0;
-
+               if (gen8_pd_index(++idx, 0) == 0) {
+                       if (gen8_pd_index(idx, 1) == 0) {
                                /* Limited by sg length for 3lvl */
-                               if (++idx->pdpe == GEN8_PML4ES_PER_PML4) {
-                                       idx->pdpe = 0;
-                                       ret = true;
+                               if (gen8_pd_index(idx, 2) == 0)
                                        break;
-                               }
 
-                               GEM_BUG_ON(idx->pdpe >= i915_pdpes_per_pdp(&ppgtt->vm));
-                               pd = pdp->entry[idx->pdpe];
+                               pd = pdp->entry[gen8_pd_index(idx, 2)];
                        }
 
                        kunmap_atomic(vaddr);
-                       vaddr = kmap_atomic_px(i915_pt_entry(pd, idx->pde));
+                       vaddr = kmap_atomic_px(i915_pt_entry(pd, gen8_pd_index(idx, 1)));
                }
        } while (1);
        kunmap_atomic(vaddr);
 
-       return ret;
+       return idx;
 }
 
 static void gen8_ppgtt_insert_3lvl(struct i915_address_space *vm,
@@ -1212,9 +1185,9 @@ static void gen8_ppgtt_insert_3lvl(struct i915_address_space *vm,
 {
        struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
        struct sgt_dma iter = sgt_dma(vma);
-       struct gen8_insert_pte idx = gen8_insert_pte(vma->node.start);
 
-       gen8_ppgtt_insert_pte_entries(ppgtt, ppgtt->pd, &iter, &idx,
+       gen8_ppgtt_insert_pte_entries(ppgtt, ppgtt->pd, &iter,
+                                     vma->node.start >> GEN8_PTE_SHIFT,
                                      cache_level, flags);
 
        vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
@@ -1231,39 +1204,38 @@ static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma,
        dma_addr_t rem = iter->sg->length;
 
        do {
-               struct gen8_insert_pte idx = gen8_insert_pte(start);
                struct i915_page_directory *pdp =
-                       i915_pdp_entry(pml4, idx.pml4e);
-               struct i915_page_directory *pd = i915_pd_entry(pdp, idx.pdpe);
-               unsigned int page_size;
-               bool maybe_64K = false;
+                       i915_pd_entry(pml4, __gen8_pte_index(start, 3));
+               struct i915_page_directory *pd =
+                       i915_pd_entry(pdp, __gen8_pte_index(start, 2));
                gen8_pte_t encode = pte_encode;
+               unsigned int maybe_64K = -1;
+               unsigned int page_size;
                gen8_pte_t *vaddr;
-               u16 index, max;
+               u16 index;
 
                if (vma->page_sizes.sg & I915_GTT_PAGE_SIZE_2M &&
                    IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_2M) &&
-                   rem >= I915_GTT_PAGE_SIZE_2M && !idx.pte) {
-                       index = idx.pde;
-                       max = I915_PDES;
-                       page_size = I915_GTT_PAGE_SIZE_2M;
-
+                   rem >= I915_GTT_PAGE_SIZE_2M &&
+                   !__gen8_pte_index(start, 0)) {
+                       index = __gen8_pte_index(start, 1);
                        encode |= GEN8_PDE_PS_2M;
+                       page_size = I915_GTT_PAGE_SIZE_2M;
 
                        vaddr = kmap_atomic_px(pd);
                } else {
-                       struct i915_page_table *pt = i915_pt_entry(pd, idx.pde);
+                       struct i915_page_table *pt =
+                               i915_pt_entry(pd, __gen8_pte_index(start, 1));
 
-                       index = idx.pte;
-                       max = GEN8_PTES;
+                       index = __gen8_pte_index(start, 0);
                        page_size = I915_GTT_PAGE_SIZE;
 
                        if (!index &&
                            vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K &&
                            IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) &&
                            (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) ||
-                            rem >= (max - index) * I915_GTT_PAGE_SIZE))
-                               maybe_64K = true;
+                            rem >= (I915_PDES - index) * I915_GTT_PAGE_SIZE))
+                               maybe_64K = __gen8_pte_index(start, 1);
 
                        vaddr = kmap_atomic_px(pt);
                }
@@ -1284,16 +1256,16 @@ static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma,
                                iter->dma = sg_dma_address(iter->sg);
                                iter->max = iter->dma + rem;
 
-                               if (maybe_64K && index < max &&
+                               if (maybe_64K != -1 && index < I915_PDES &&
                                    !(IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) &&
                                      (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) ||
-                                      rem >= (max - index) * I915_GTT_PAGE_SIZE)))
-                                       maybe_64K = false;
+                                      rem >= (I915_PDES - index) * I915_GTT_PAGE_SIZE)))
+                                       maybe_64K = -1;
 
                                if (unlikely(!IS_ALIGNED(iter->dma, page_size)))
                                        break;
                        }
-               } while (rem >= page_size && index < max);
+               } while (rem >= page_size && index < I915_PDES);
 
                kunmap_atomic(vaddr);
 
@@ -1303,14 +1275,14 @@ static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma,
                 * it and have reached the end of the sg table and we have
                 * enough padding.
                 */
-               if (maybe_64K &&
-                   (index == max ||
+               if (maybe_64K != -1 &&
+                   (index == I915_PDES ||
                     (i915_vm_has_scratch_64K(vma->vm) &&
                      !iter->sg && IS_ALIGNED(vma->node.start +
                                              vma->node.size,
                                              I915_GTT_PAGE_SIZE_2M)))) {
                        vaddr = kmap_atomic_px(pd);
-                       vaddr[idx.pde] |= GEN8_PDE_IPS_64K;
+                       vaddr[maybe_64K] |= GEN8_PDE_IPS_64K;
                        kunmap_atomic(vaddr);
                        page_size = I915_GTT_PAGE_SIZE_64K;
 
@@ -1327,8 +1299,7 @@ static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma,
                                u16 i;
 
                                encode = vma->vm->scratch[0].encode;
-                               vaddr = kmap_atomic_px(i915_pt_entry(pd,
-                                                                    idx.pde));
+                               vaddr = kmap_atomic_px(i915_pt_entry(pd, maybe_64K));
 
                                for (i = 1; i < index; i += 16)
                                        memset64(vaddr + i, encode, 15);
@@ -1354,13 +1325,13 @@ static void gen8_ppgtt_insert_4lvl(struct i915_address_space *vm,
                gen8_ppgtt_insert_huge_entries(vma, pml4, &iter, cache_level,
                                               flags);
        } else {
-               struct gen8_insert_pte idx = gen8_insert_pte(vma->node.start);
+               u64 idx = vma->node.start >> GEN8_PTE_SHIFT;
 
-               while (gen8_ppgtt_insert_pte_entries(ppgtt,
-                                                    i915_pdp_entry(pml4, idx.pml4e++),
-                                                    &iter, &idx, cache_level,
-                                                    flags))
-                       GEM_BUG_ON(idx.pml4e >= GEN8_PML4ES_PER_PML4);
+               while ((idx = gen8_ppgtt_insert_pte_entries(ppgtt,
+                                                           i915_pd_entry(pml4, gen8_pd_index(idx, 3)),
+                                                           &iter, idx, cache_level,
+                                                           flags)))
+                       ;
 
                vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
        }
index de156634a8894b32c1c0ea1618a026345ba17bcc..cea59ef1a365a1a8779db51d1a0740d71faf08af 100644 (file)
@@ -115,29 +115,18 @@ typedef u64 gen8_pte_t;
 #define HSW_GTT_ADDR_ENCODE(addr)      ((addr) | (((addr) >> 28) & 0x7f0))
 #define HSW_PTE_ADDR_ENCODE(addr)      HSW_GTT_ADDR_ENCODE(addr)
 
-/* GEN8 32b style address is defined as a 3 level page table:
+/*
+ * GEN8 32b style address is defined as a 3 level page table:
  * 31:30 | 29:21 | 20:12 |  11:0
  * PDPE  |  PDE  |  PTE  | offset
  * The difference as compared to normal x86 3 level page table is the PDPEs are
  * programmed via register.
- */
-#define GEN8_3LVL_PDPES                        4
-#define GEN8_PDE_SHIFT                 21
-#define GEN8_PDE_MASK                  0x1ff
-#define GEN8_PTE_MASK                  0x1ff
-#define GEN8_PTES                      I915_PTES(sizeof(gen8_pte_t))
-
-/* GEN8 48b style address is defined as a 4 level page table:
+ *
+ * GEN8 48b style address is defined as a 4 level page table:
  * 47:39 | 38:30 | 29:21 | 20:12 |  11:0
  * PML4E | PDPE  |  PDE  |  PTE  | offset
  */
-#define GEN8_PML4ES_PER_PML4           512
-#define GEN8_PML4E_SHIFT               39
-#define GEN8_PML4E_MASK                        (GEN8_PML4ES_PER_PML4 - 1)
-#define GEN8_PDPE_SHIFT                        30
-/* NB: GEN8_PDPE_MASK is untrue for 32b platforms, but it has no impact on 32b page
- * tables */
-#define GEN8_PDPE_MASK                 0x1ff
+#define GEN8_3LVL_PDPES                        4
 
 #define PPAT_UNCACHED                  (_PAGE_PWT | _PAGE_PCD)
 #define PPAT_CACHED_PDE                        0 /* WB LLC */
@@ -521,15 +510,6 @@ static inline u32 gen6_pde_index(u32 addr)
        return i915_pde_index(addr, GEN6_PDE_SHIFT);
 }
 
-static inline unsigned int
-i915_pdpes_per_pdp(const struct i915_address_space *vm)
-{
-       if (i915_vm_is_4lvl(vm))
-               return GEN8_PML4ES_PER_PML4;
-
-       return GEN8_3LVL_PDPES;
-}
-
 static inline struct i915_page_table *
 i915_pt_entry(const struct i915_page_directory * const pd,
              const unsigned short n)
@@ -544,66 +524,6 @@ i915_pd_entry(const struct i915_page_directory * const pdp,
        return pdp->entry[n];
 }
 
-static inline struct i915_page_directory *
-i915_pdp_entry(const struct i915_page_directory * const pml4,
-              const unsigned short n)
-{
-       return pml4->entry[n];
-}
-
-/* Equivalent to the gen6 version, For each pde iterates over every pde
- * between from start until start + length. On gen8+ it simply iterates
- * over every page directory entry in a page directory.
- */
-#define gen8_for_each_pde(pt, pd, start, length, iter)                 \
-       for (iter = gen8_pde_index(start);                              \
-            length > 0 && iter < I915_PDES &&                          \
-                    (pt = i915_pt_entry(pd, iter), true);              \
-            ({ u64 temp = ALIGN(start+1, 1 << GEN8_PDE_SHIFT);         \
-                   temp = min(temp - start, length);                   \
-                   start += temp, length -= temp; }), ++iter)
-
-#define gen8_for_each_pdpe(pd, pdp, start, length, iter)               \
-       for (iter = gen8_pdpe_index(start);                             \
-            length > 0 && iter < i915_pdpes_per_pdp(vm) &&             \
-                    (pd = i915_pd_entry(pdp, iter), true);             \
-            ({ u64 temp = ALIGN(start+1, 1 << GEN8_PDPE_SHIFT);        \
-                   temp = min(temp - start, length);                   \
-                   start += temp, length -= temp; }), ++iter)
-
-#define gen8_for_each_pml4e(pdp, pml4, start, length, iter)            \
-       for (iter = gen8_pml4e_index(start);                            \
-            length > 0 && iter < GEN8_PML4ES_PER_PML4 &&               \
-                    (pdp = i915_pdp_entry(pml4, iter), true);          \
-            ({ u64 temp = ALIGN(start+1, 1ULL << GEN8_PML4E_SHIFT);    \
-                   temp = min(temp - start, length);                   \
-                   start += temp, length -= temp; }), ++iter)
-
-static inline u32 gen8_pte_index(u64 address)
-{
-       return i915_pte_index(address, GEN8_PDE_SHIFT);
-}
-
-static inline u32 gen8_pde_index(u64 address)
-{
-       return i915_pde_index(address, GEN8_PDE_SHIFT);
-}
-
-static inline u32 gen8_pdpe_index(u64 address)
-{
-       return (address >> GEN8_PDPE_SHIFT) & GEN8_PDPE_MASK;
-}
-
-static inline u32 gen8_pml4e_index(u64 address)
-{
-       return (address >> GEN8_PML4E_SHIFT) & GEN8_PML4E_MASK;
-}
-
-static inline u64 gen8_pte_count(u64 address, u64 length)
-{
-       return i915_pte_count(address, length, GEN8_PDE_SHIFT);
-}
-
 static inline dma_addr_t
 i915_page_dir_dma_addr(const struct i915_ppgtt *ppgtt, const unsigned int n)
 {