drm/nouveau: implement explicitly coherent BOs

author Alexandre Courbot <acourbot@nvidia.com>

Mon, 27 Oct 2014 09:49:17 +0000 (18:49 +0900)

committer Ben Skeggs <bskeggs@redhat.com>

Tue, 2 Dec 2014 05:43:59 +0000 (15:43 +1000)
author Alexandre Courbot <acourbot@nvidia.com>
Mon, 27 Oct 2014 09:49:17 +0000 (18:49 +0900)
committer Ben Skeggs <bskeggs@redhat.com>
Tue, 2 Dec 2014 05:43:59 +0000 (15:43 +1000)
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c

index 9a8adeec80cd160a5afb4011d0d89cd842cf428b..ed9a6946f6d65c79e8d526b96f286ab1e23d1256 100644 (file)
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -214,6 +214,9 @@ nouveau_bo_new(struct drm_device *dev, int size, int align,
         nvbo->tile_flags = tile_flags;
         nvbo->bo.bdev = &drm->ttm.bdev;
  
+       if (!nv_device_is_cpu_coherent(nvkm_device(&drm->device)))
+               nvbo->force_coherent = flags & TTM_PL_FLAG_UNCACHED;
+
         nvbo->page_shift = 12;
         if (drm->client.vm) {
                 if (!(flags & TTM_PL_FLAG_TT) && size > 256 * 1024)
@@ -291,8 +294,9 @@ void
  nouveau_bo_placement_set(struct nouveau_bo *nvbo, uint32_t type, uint32_t busy)
  {
         struct ttm_placement *pl = &nvbo->placement;
-       uint32_t flags = TTM_PL_MASK_CACHING |
-               (nvbo->pin_refcnt ? TTM_PL_FLAG_NO_EVICT : 0);
+       uint32_t flags = (nvbo->force_coherent ? TTM_PL_FLAG_UNCACHED :
+                                                TTM_PL_MASK_CACHING) |
+                        (nvbo->pin_refcnt ? TTM_PL_FLAG_NO_EVICT : 0);
  
         pl->placement = nvbo->placements;
         set_placement_list(nvbo->placements, &pl->num_placement,
@@ -396,7 +400,14 @@ nouveau_bo_map(struct nouveau_bo *nvbo)
         if (ret)
                 return ret;
  
-       ret = ttm_bo_kmap(&nvbo->bo, 0, nvbo->bo.mem.num_pages, &nvbo->kmap);
+       /*
+        * TTM buffers allocated using the DMA API already have a mapping, let's
+        * use it instead.
+        */
+       if (!nvbo->force_coherent)
+               ret = ttm_bo_kmap(&nvbo->bo, 0, nvbo->bo.mem.num_pages,
+                                 &nvbo->kmap);
+
         ttm_bo_unreserve(&nvbo->bo);
         return ret;
  }
@@ -404,7 +415,14 @@ nouveau_bo_map(struct nouveau_bo *nvbo)
  void
  nouveau_bo_unmap(struct nouveau_bo *nvbo)
  {
-       if (nvbo)
+       if (!nvbo)
+               return;
+
+       /*
+        * TTM buffers allocated using the DMA API already had a coherent
+        * mapping which we used, no need to unmap.
+        */
+       if (!nvbo->force_coherent)
                 ttm_bo_kunmap(&nvbo->kmap);
  }
  
@@ -422,12 +440,36 @@ nouveau_bo_validate(struct nouveau_bo *nvbo, bool interruptible,
         return 0;
  }
  
+static inline void *
+_nouveau_bo_mem_index(struct nouveau_bo *nvbo, unsigned index, void *mem, u8 sz)
+{
+       struct ttm_dma_tt *dma_tt;
+       u8 *m = mem;
+
+       index *= sz;
+
+       if (m) {
+               /* kmap'd address, return the corresponding offset */
+               m += index;
+       } else {
+               /* DMA-API mapping, lookup the right address */
+               dma_tt = (struct ttm_dma_tt *)nvbo->bo.ttm;
+               m = dma_tt->cpu_address[index / PAGE_SIZE];
+               m += index % PAGE_SIZE;
+       }
+
+       return m;
+}
+#define nouveau_bo_mem_index(o, i, m) _nouveau_bo_mem_index(o, i, m, sizeof(*m))
+
  u16
  nouveau_bo_rd16(struct nouveau_bo *nvbo, unsigned index)
  {
         bool is_iomem;
         u16 *mem = ttm_kmap_obj_virtual(&nvbo->kmap, &is_iomem);
-       mem = &mem[index];
+
+       mem = nouveau_bo_mem_index(nvbo, index, mem);
+
         if (is_iomem)
                 return ioread16_native((void __force __iomem *)mem);
         else
@@ -439,7 +481,9 @@ nouveau_bo_wr16(struct nouveau_bo *nvbo, unsigned index, u16 val)
  {
         bool is_iomem;
         u16 *mem = ttm_kmap_obj_virtual(&nvbo->kmap, &is_iomem);
-       mem = &mem[index];
+
+       mem = nouveau_bo_mem_index(nvbo, index, mem);
+
         if (is_iomem)
                 iowrite16_native(val, (void __force __iomem *)mem);
         else
@@ -451,7 +495,9 @@ nouveau_bo_rd32(struct nouveau_bo *nvbo, unsigned index)
  {
         bool is_iomem;
         u32 *mem = ttm_kmap_obj_virtual(&nvbo->kmap, &is_iomem);
-       mem = &mem[index];
+
+       mem = nouveau_bo_mem_index(nvbo, index, mem);
+
         if (is_iomem)
                 return ioread32_native((void __force __iomem *)mem);
         else
@@ -463,7 +509,9 @@ nouveau_bo_wr32(struct nouveau_bo *nvbo, unsigned index, u32 val)
  {
         bool is_iomem;
         u32 *mem = ttm_kmap_obj_virtual(&nvbo->kmap, &is_iomem);
-       mem = &mem[index];
+
+       mem = nouveau_bo_mem_index(nvbo, index, mem);
+
         if (is_iomem)
                 iowrite32_native(val, (void __force __iomem *)mem);
         else
@@ -1383,6 +1431,14 @@ nouveau_ttm_tt_populate(struct ttm_tt *ttm)
         dev = drm->dev;
         pdev = nv_device_base(device);
  
+       /*
+        * Objects matching this condition have been marked as force_coherent,
+        * so use the DMA API for them.
+        */
+       if (!nv_device_is_cpu_coherent(device) &&
+           ttm->caching_state == tt_uncached)
+               return ttm_dma_populate(ttm_dma, dev->dev);
+
  #if __OS_HAS_AGP
         if (drm->agp.stat == ENABLED) {
                 return ttm_agp_tt_populate(ttm);
@@ -1440,6 +1496,14 @@ nouveau_ttm_tt_unpopulate(struct ttm_tt *ttm)
         dev = drm->dev;
         pdev = nv_device_base(device);
  
+       /*
+        * Objects matching this condition have been marked as force_coherent,
+        * so use the DMA API for them.
+        */
+       if (!nv_device_is_cpu_coherent(device) &&
+           ttm->caching_state == tt_uncached)
+               ttm_dma_unpopulate(ttm_dma, dev->dev);
+
  #if __OS_HAS_AGP
         if (drm->agp.stat == ENABLED) {
                 ttm_agp_tt_unpopulate(ttm);
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.h b/drivers/gpu/drm/nouveau/nouveau_bo.h

index 22d2c764d80bd17c9fe755abc56b225722eee513..0f8bbd48a0b9a9579ebc158f522eea24b2609b4c 100644 (file)
--- a/drivers/gpu/drm/nouveau/nouveau_bo.h
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.h
@@ -13,6 +13,7 @@ struct nouveau_bo {
         u32 valid_domains;
         struct ttm_place placements[3];
         struct ttm_place busy_placements[3];
+       bool force_coherent;
         struct ttm_bo_kmap_obj kmap;
         struct list_head head;
author	Alexandre Courbot <acourbot@nvidia.com>
	Mon, 27 Oct 2014 09:49:17 +0000 (18:49 +0900)
committer	Ben Skeggs <bskeggs@redhat.com>
	Tue, 2 Dec 2014 05:43:59 +0000 (15:43 +1000)
drivers/gpu/drm/nouveau/nouveau_bo.c		patch \| blob \| history
drivers/gpu/drm/nouveau/nouveau_bo.h		patch \| blob \| history