From 79e542f5af79918e5e766c441561fb9bff8af3aa Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Tue, 15 May 2018 10:35:42 +0800 Subject: [PATCH] drm/i915/kvmgt: Support setting dma map for huge pages To support huge gtt, we need to support huge pages in kvmgt first. This patch adds a 'size' param to the intel_gvt_mpt::dma_map_guest_page API and implements it in kvmgt. v2: rebase. Signed-off-by: Changbin Du Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/gtt.c | 6 +- drivers/gpu/drm/i915/gvt/hypercall.h | 2 +- drivers/gpu/drm/i915/gvt/kvmgt.c | 126 ++++++++++++++++++++------- drivers/gpu/drm/i915/gvt/mpt.h | 7 +- 4 files changed, 102 insertions(+), 39 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index 54c221dedfe8..e26c01da2bd6 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -1106,7 +1106,7 @@ static int split_64KB_gtt_entry(struct intel_vgpu *vgpu, for (i = 0; i < GTT_64K_PTE_STRIDE; i++) { ret = intel_gvt_hypervisor_dma_map_guest_page(vgpu, - start_gfn + i, &dma_addr); + start_gfn + i, PAGE_SIZE, &dma_addr); if (ret) return ret; @@ -1152,7 +1152,7 @@ static int ppgtt_populate_shadow_entry(struct intel_vgpu *vgpu, }; /* direct shadow */ - ret = intel_gvt_hypervisor_dma_map_guest_page(vgpu, gfn, &dma_addr); + ret = intel_gvt_hypervisor_dma_map_guest_page(vgpu, gfn, PAGE_SIZE, &dma_addr); if (ret) return -ENXIO; @@ -2080,7 +2080,7 @@ static int emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off, } ret = intel_gvt_hypervisor_dma_map_guest_page(vgpu, gfn, - &dma_addr); + PAGE_SIZE, &dma_addr); if (ret) { gvt_vgpu_err("fail to populate guest ggtt entry\n"); /* guest driver may read/write the entry when partial diff --git a/drivers/gpu/drm/i915/gvt/hypercall.h b/drivers/gpu/drm/i915/gvt/hypercall.h index f6dd9f717888..5af11cf1b482 100644 --- a/drivers/gpu/drm/i915/gvt/hypercall.h +++ b/drivers/gpu/drm/i915/gvt/hypercall.h @@ -53,7 +53,7 @@ struct intel_gvt_mpt { unsigned long (*gfn_to_mfn)(unsigned long handle, unsigned long gfn); int (*dma_map_guest_page)(unsigned long handle, unsigned long gfn, - dma_addr_t *dma_addr); + unsigned long size, dma_addr_t *dma_addr); void (*dma_unmap_guest_page)(unsigned long handle, dma_addr_t dma_addr); int (*map_gfn_to_mfn)(unsigned long handle, unsigned long gfn, diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index 1466d8769ec9..685cb3de6dab 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -94,6 +94,7 @@ struct gvt_dma { struct rb_node dma_addr_node; gfn_t gfn; dma_addr_t dma_addr; + unsigned long size; struct kref ref; }; @@ -106,45 +107,103 @@ static int kvmgt_guest_init(struct mdev_device *mdev); static void intel_vgpu_release_work(struct work_struct *work); static bool kvmgt_guest_exit(struct kvmgt_guest_info *info); +static void gvt_unpin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn, + unsigned long size) +{ + int total_pages; + int npage; + int ret; + + total_pages = roundup(size, PAGE_SIZE) / PAGE_SIZE; + + for (npage = 0; npage < total_pages; npage++) { + unsigned long cur_gfn = gfn + npage; + + ret = vfio_unpin_pages(mdev_dev(vgpu->vdev.mdev), &cur_gfn, 1); + WARN_ON(ret != 1); + } +} + +/* Pin a normal or compound guest page for dma. */ +static int gvt_pin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn, + unsigned long size, struct page **page) +{ + unsigned long base_pfn = 0; + int total_pages; + int npage; + int ret; + + total_pages = roundup(size, PAGE_SIZE) / PAGE_SIZE; + /* + * We pin the pages one-by-one to avoid allocating a big arrary + * on stack to hold pfns. + */ + for (npage = 0; npage < total_pages; npage++) { + unsigned long cur_gfn = gfn + npage; + unsigned long pfn; + + ret = vfio_pin_pages(mdev_dev(vgpu->vdev.mdev), &cur_gfn, 1, + IOMMU_READ | IOMMU_WRITE, &pfn); + if (ret != 1) { + gvt_vgpu_err("vfio_pin_pages failed for gfn 0x%lx, ret %d\n", + cur_gfn, ret); + goto err; + } + + if (!pfn_valid(pfn)) { + gvt_vgpu_err("pfn 0x%lx is not mem backed\n", pfn); + npage++; + ret = -EFAULT; + goto err; + } + + if (npage == 0) + base_pfn = pfn; + else if (base_pfn + npage != pfn) { + gvt_vgpu_err("The pages are not continuous\n"); + ret = -EINVAL; + npage++; + goto err; + } + } + + *page = pfn_to_page(base_pfn); + return 0; +err: + gvt_unpin_guest_page(vgpu, gfn, npage * PAGE_SIZE); + return ret; +} + static int gvt_dma_map_page(struct intel_vgpu *vgpu, unsigned long gfn, - dma_addr_t *dma_addr) + dma_addr_t *dma_addr, unsigned long size) { struct device *dev = &vgpu->gvt->dev_priv->drm.pdev->dev; - struct page *page; - unsigned long pfn; + struct page *page = NULL; int ret; - /* Pin the page first. */ - ret = vfio_pin_pages(mdev_dev(vgpu->vdev.mdev), &gfn, 1, - IOMMU_READ | IOMMU_WRITE, &pfn); - if (ret != 1) { - gvt_vgpu_err("vfio_pin_pages failed for gfn 0x%lx: %d\n", - gfn, ret); - return -EINVAL; - } + ret = gvt_pin_guest_page(vgpu, gfn, size, &page); + if (ret) + return ret; /* Setup DMA mapping. */ - page = pfn_to_page(pfn); - *dma_addr = dma_map_page(dev, page, 0, PAGE_SIZE, - PCI_DMA_BIDIRECTIONAL); - if (dma_mapping_error(dev, *dma_addr)) { - gvt_vgpu_err("DMA mapping failed for gfn 0x%lx\n", gfn); - vfio_unpin_pages(mdev_dev(vgpu->vdev.mdev), &gfn, 1); - return -ENOMEM; + *dma_addr = dma_map_page(dev, page, 0, size, PCI_DMA_BIDIRECTIONAL); + ret = dma_mapping_error(dev, *dma_addr); + if (ret) { + gvt_vgpu_err("DMA mapping failed for pfn 0x%lx, ret %d\n", + page_to_pfn(page), ret); + gvt_unpin_guest_page(vgpu, gfn, size); } - return 0; + return ret; } static void gvt_dma_unmap_page(struct intel_vgpu *vgpu, unsigned long gfn, - dma_addr_t dma_addr) + dma_addr_t dma_addr, unsigned long size) { struct device *dev = &vgpu->gvt->dev_priv->drm.pdev->dev; - int ret; - dma_unmap_page(dev, dma_addr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); - ret = vfio_unpin_pages(mdev_dev(vgpu->vdev.mdev), &gfn, 1); - WARN_ON(ret != 1); + dma_unmap_page(dev, dma_addr, size, PCI_DMA_BIDIRECTIONAL); + gvt_unpin_guest_page(vgpu, gfn, size); } static struct gvt_dma *__gvt_cache_find_dma_addr(struct intel_vgpu *vgpu, @@ -185,7 +244,7 @@ static struct gvt_dma *__gvt_cache_find_gfn(struct intel_vgpu *vgpu, gfn_t gfn) } static int __gvt_cache_add(struct intel_vgpu *vgpu, gfn_t gfn, - dma_addr_t dma_addr) + dma_addr_t dma_addr, unsigned long size) { struct gvt_dma *new, *itr; struct rb_node **link, *parent = NULL; @@ -197,6 +256,7 @@ static int __gvt_cache_add(struct intel_vgpu *vgpu, gfn_t gfn, new->vgpu = vgpu; new->gfn = gfn; new->dma_addr = dma_addr; + new->size = size; kref_init(&new->ref); /* gfn_cache maps gfn to struct gvt_dma. */ @@ -254,7 +314,7 @@ static void gvt_cache_destroy(struct intel_vgpu *vgpu) break; } dma = rb_entry(node, struct gvt_dma, gfn_node); - gvt_dma_unmap_page(vgpu, dma->gfn, dma->dma_addr); + gvt_dma_unmap_page(vgpu, dma->gfn, dma->dma_addr, dma->size); __gvt_cache_remove_entry(vgpu, dma); mutex_unlock(&vgpu->vdev.cache_lock); } @@ -509,7 +569,8 @@ static int intel_vgpu_iommu_notifier(struct notifier_block *nb, if (!entry) continue; - gvt_dma_unmap_page(vgpu, entry->gfn, entry->dma_addr); + gvt_dma_unmap_page(vgpu, entry->gfn, entry->dma_addr, + entry->size); __gvt_cache_remove_entry(vgpu, entry); } mutex_unlock(&vgpu->vdev.cache_lock); @@ -1616,7 +1677,7 @@ static unsigned long kvmgt_gfn_to_pfn(unsigned long handle, unsigned long gfn) } int kvmgt_dma_map_guest_page(unsigned long handle, unsigned long gfn, - dma_addr_t *dma_addr) + unsigned long size, dma_addr_t *dma_addr) { struct kvmgt_guest_info *info; struct intel_vgpu *vgpu; @@ -1633,11 +1694,11 @@ int kvmgt_dma_map_guest_page(unsigned long handle, unsigned long gfn, entry = __gvt_cache_find_gfn(info->vgpu, gfn); if (!entry) { - ret = gvt_dma_map_page(vgpu, gfn, dma_addr); + ret = gvt_dma_map_page(vgpu, gfn, dma_addr, size); if (ret) goto err_unlock; - ret = __gvt_cache_add(info->vgpu, gfn, *dma_addr); + ret = __gvt_cache_add(info->vgpu, gfn, *dma_addr, size); if (ret) goto err_unmap; } else { @@ -1649,7 +1710,7 @@ int kvmgt_dma_map_guest_page(unsigned long handle, unsigned long gfn, return 0; err_unmap: - gvt_dma_unmap_page(vgpu, gfn, *dma_addr); + gvt_dma_unmap_page(vgpu, gfn, *dma_addr, size); err_unlock: mutex_unlock(&info->vgpu->vdev.cache_lock); return ret; @@ -1659,7 +1720,8 @@ static void __gvt_dma_release(struct kref *ref) { struct gvt_dma *entry = container_of(ref, typeof(*entry), ref); - gvt_dma_unmap_page(entry->vgpu, entry->gfn, entry->dma_addr); + gvt_dma_unmap_page(entry->vgpu, entry->gfn, entry->dma_addr, + entry->size); __gvt_cache_remove_entry(entry->vgpu, entry); } diff --git a/drivers/gpu/drm/i915/gvt/mpt.h b/drivers/gpu/drm/i915/gvt/mpt.h index 32ffcd566cdd..67f19992b226 100644 --- a/drivers/gpu/drm/i915/gvt/mpt.h +++ b/drivers/gpu/drm/i915/gvt/mpt.h @@ -230,17 +230,18 @@ static inline unsigned long intel_gvt_hypervisor_gfn_to_mfn( /** * intel_gvt_hypervisor_dma_map_guest_page - setup dma map for guest page * @vgpu: a vGPU - * @gpfn: guest pfn + * @gfn: guest pfn + * @size: page size * @dma_addr: retrieve allocated dma addr * * Returns: * 0 on success, negative error code if failed. */ static inline int intel_gvt_hypervisor_dma_map_guest_page( - struct intel_vgpu *vgpu, unsigned long gfn, + struct intel_vgpu *vgpu, unsigned long gfn, unsigned long size, dma_addr_t *dma_addr) { - return intel_gvt_host.mpt->dma_map_guest_page(vgpu->handle, gfn, + return intel_gvt_host.mpt->dma_map_guest_page(vgpu->handle, gfn, size, dma_addr); } -- 2.30.2