Netdev Archive on lore.kernel.org
help / color / mirror / Atom feed
From: Xie Yongji <xieyongji@bytedance.com>
To: mst@redhat.com, jasowang@redhat.com, stefanha@redhat.com,
	sgarzare@redhat.com, parav@nvidia.com, hch@infradead.org,
	christian.brauner@canonical.com, rdunlap@infradead.org,
	willy@infradead.org, viro@zeniv.linux.org.uk, axboe@kernel.dk,
	bcrl@kvack.org, corbet@lwn.net, mika.penttila@nextfour.com,
	dan.carpenter@oracle.com, joro@8bytes.org,
	gregkh@linuxfoundation.org, zhe.he@windriver.com,
	xiaodong.liu@intel.com, joe@perches.com
Cc: songmuchun@bytedance.com,
	virtualization@lists.linux-foundation.org,
	netdev@vger.kernel.org, kvm@vger.kernel.org,
	linux-fsdevel@vger.kernel.org, iommu@lists.linux-foundation.org,
	linux-kernel@vger.kernel.org
Subject: [PATCH v10 14/17] vdpa: Support transferring virtual addressing during DMA mapping
Date: Thu, 29 Jul 2021 15:35:00 +0800	[thread overview]
Message-ID: <20210729073503.187-15-xieyongji@bytedance.com> (raw)
In-Reply-To: <20210729073503.187-1-xieyongji@bytedance.com>

This patch introduces an attribute for vDPA device to indicate
whether virtual address can be used. If vDPA device driver set
it, vhost-vdpa bus driver will not pin user page and transfer
userspace virtual address instead of physical address during
DMA mapping. And corresponding vma->vm_file and offset will be
also passed as an opaque pointer.

Suggested-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Xie Yongji <xieyongji@bytedance.com>
Acked-by: Jason Wang <jasowang@redhat.com>
---
 drivers/vdpa/ifcvf/ifcvf_main.c   |  2 +-
 drivers/vdpa/mlx5/net/mlx5_vnet.c |  2 +-
 drivers/vdpa/vdpa.c               |  9 +++-
 drivers/vdpa/vdpa_sim/vdpa_sim.c  |  2 +-
 drivers/vdpa/virtio_pci/vp_vdpa.c |  2 +-
 drivers/vhost/vdpa.c              | 99 ++++++++++++++++++++++++++++++++++-----
 include/linux/vdpa.h              | 19 ++++++--
 7 files changed, 116 insertions(+), 19 deletions(-)

diff --git a/drivers/vdpa/ifcvf/ifcvf_main.c b/drivers/vdpa/ifcvf/ifcvf_main.c
index b4b89a66607f..fc848aafe5a9 100644
--- a/drivers/vdpa/ifcvf/ifcvf_main.c
+++ b/drivers/vdpa/ifcvf/ifcvf_main.c
@@ -492,7 +492,7 @@ static int ifcvf_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	}
 
 	adapter = vdpa_alloc_device(struct ifcvf_adapter, vdpa,
-				    dev, &ifc_vdpa_ops, NULL);
+				    dev, &ifc_vdpa_ops, NULL, false);
 	if (adapter == NULL) {
 		IFCVF_ERR(pdev, "Failed to allocate vDPA structure");
 		return -ENOMEM;
diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
index 683452c41a36..79cb2f130bf0 100644
--- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
+++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
@@ -2035,7 +2035,7 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name)
 	max_vqs = min_t(u32, max_vqs, MLX5_MAX_SUPPORTED_VQS);
 
 	ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mlx5_vdpa_ops,
-				 name);
+				 name, false);
 	if (IS_ERR(ndev))
 		return PTR_ERR(ndev);
 
diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c
index d77d59811389..41377df674d5 100644
--- a/drivers/vdpa/vdpa.c
+++ b/drivers/vdpa/vdpa.c
@@ -71,6 +71,7 @@ static void vdpa_release_dev(struct device *d)
  * @config: the bus operations that is supported by this device
  * @size: size of the parent structure that contains private data
  * @name: name of the vdpa device; optional.
+ * @use_va: indicate whether virtual address must be used by this device
  *
  * Driver should use vdpa_alloc_device() wrapper macro instead of
  * using this directly.
@@ -80,7 +81,8 @@ static void vdpa_release_dev(struct device *d)
  */
 struct vdpa_device *__vdpa_alloc_device(struct device *parent,
 					const struct vdpa_config_ops *config,
-					size_t size, const char *name)
+					size_t size, const char *name,
+					bool use_va)
 {
 	struct vdpa_device *vdev;
 	int err = -EINVAL;
@@ -91,6 +93,10 @@ struct vdpa_device *__vdpa_alloc_device(struct device *parent,
 	if (!!config->dma_map != !!config->dma_unmap)
 		goto err;
 
+	/* It should only work for the device that use on-chip IOMMU */
+	if (use_va && !(config->dma_map || config->set_map))
+		goto err;
+
 	err = -ENOMEM;
 	vdev = kzalloc(size, GFP_KERNEL);
 	if (!vdev)
@@ -106,6 +112,7 @@ struct vdpa_device *__vdpa_alloc_device(struct device *parent,
 	vdev->index = err;
 	vdev->config = config;
 	vdev->features_valid = false;
+	vdev->use_va = use_va;
 
 	if (name)
 		err = dev_set_name(&vdev->dev, "%s", name);
diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c
index f456f4baf86d..472d573d755c 100644
--- a/drivers/vdpa/vdpa_sim/vdpa_sim.c
+++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c
@@ -250,7 +250,7 @@ struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr)
 		ops = &vdpasim_config_ops;
 
 	vdpasim = vdpa_alloc_device(struct vdpasim, vdpa, NULL, ops,
-				    dev_attr->name);
+				    dev_attr->name, false);
 	if (!vdpasim)
 		goto err_alloc;
 
diff --git a/drivers/vdpa/virtio_pci/vp_vdpa.c b/drivers/vdpa/virtio_pci/vp_vdpa.c
index ee723c645aec..c828b9f9d92a 100644
--- a/drivers/vdpa/virtio_pci/vp_vdpa.c
+++ b/drivers/vdpa/virtio_pci/vp_vdpa.c
@@ -435,7 +435,7 @@ static int vp_vdpa_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 		return ret;
 
 	vp_vdpa = vdpa_alloc_device(struct vp_vdpa, vdpa,
-				    dev, &vp_vdpa_ops, NULL);
+				    dev, &vp_vdpa_ops, NULL, false);
 	if (vp_vdpa == NULL) {
 		dev_err(dev, "vp_vdpa: Failed to allocate vDPA structure\n");
 		return -ENOMEM;
diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
index dec43895e84b..975ae8b8606a 100644
--- a/drivers/vhost/vdpa.c
+++ b/drivers/vhost/vdpa.c
@@ -533,8 +533,28 @@ static void vhost_vdpa_pa_unmap(struct vhost_vdpa *v, u64 start, u64 last)
 	}
 }
 
+static void vhost_vdpa_va_unmap(struct vhost_vdpa *v, u64 start, u64 last)
+{
+	struct vhost_dev *dev = &v->vdev;
+	struct vhost_iotlb *iotlb = dev->iotlb;
+	struct vhost_iotlb_map *map;
+	struct vdpa_map_file *map_file;
+
+	while ((map = vhost_iotlb_itree_first(iotlb, start, last)) != NULL) {
+		map_file = (struct vdpa_map_file *)map->opaque;
+		fput(map_file->file);
+		kfree(map_file);
+		vhost_iotlb_map_free(iotlb, map);
+	}
+}
+
 static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v, u64 start, u64 last)
 {
+	struct vdpa_device *vdpa = v->vdpa;
+
+	if (vdpa->use_va)
+		return vhost_vdpa_va_unmap(v, start, last);
+
 	return vhost_vdpa_pa_unmap(v, start, last);
 }
 
@@ -569,21 +589,21 @@ static int perm_to_iommu_flags(u32 perm)
 	return flags | IOMMU_CACHE;
 }
 
-static int vhost_vdpa_map(struct vhost_vdpa *v,
-			  u64 iova, u64 size, u64 pa, u32 perm)
+static int vhost_vdpa_map(struct vhost_vdpa *v, u64 iova,
+			  u64 size, u64 pa, u32 perm, void *opaque)
 {
 	struct vhost_dev *dev = &v->vdev;
 	struct vdpa_device *vdpa = v->vdpa;
 	const struct vdpa_config_ops *ops = vdpa->config;
 	int r = 0;
 
-	r = vhost_iotlb_add_range(dev->iotlb, iova, iova + size - 1,
-				  pa, perm);
+	r = vhost_iotlb_add_range_ctx(dev->iotlb, iova, iova + size - 1,
+				      pa, perm, opaque);
 	if (r)
 		return r;
 
 	if (ops->dma_map) {
-		r = ops->dma_map(vdpa, iova, size, pa, perm, NULL);
+		r = ops->dma_map(vdpa, iova, size, pa, perm, opaque);
 	} else if (ops->set_map) {
 		if (!v->in_batch)
 			r = ops->set_map(vdpa, dev->iotlb);
@@ -591,13 +611,15 @@ static int vhost_vdpa_map(struct vhost_vdpa *v,
 		r = iommu_map(v->domain, iova, pa, size,
 			      perm_to_iommu_flags(perm));
 	}
-
-	if (r)
+	if (r) {
 		vhost_iotlb_del_range(dev->iotlb, iova, iova + size - 1);
-	else
+		return r;
+	}
+
+	if (!vdpa->use_va)
 		atomic64_add(size >> PAGE_SHIFT, &dev->mm->pinned_vm);
 
-	return r;
+	return 0;
 }
 
 static void vhost_vdpa_unmap(struct vhost_vdpa *v, u64 iova, u64 size)
@@ -618,6 +640,56 @@ static void vhost_vdpa_unmap(struct vhost_vdpa *v, u64 iova, u64 size)
 	}
 }
 
+static int vhost_vdpa_va_map(struct vhost_vdpa *v,
+			     u64 iova, u64 size, u64 uaddr, u32 perm)
+{
+	struct vhost_dev *dev = &v->vdev;
+	u64 offset, map_size, map_iova = iova;
+	struct vdpa_map_file *map_file;
+	struct vm_area_struct *vma;
+	int ret;
+
+	mmap_read_lock(dev->mm);
+
+	while (size) {
+		vma = find_vma(dev->mm, uaddr);
+		if (!vma) {
+			ret = -EINVAL;
+			break;
+		}
+		map_size = min(size, vma->vm_end - uaddr);
+		if (!(vma->vm_file && (vma->vm_flags & VM_SHARED) &&
+			!(vma->vm_flags & (VM_IO | VM_PFNMAP))))
+			goto next;
+
+		map_file = kzalloc(sizeof(*map_file), GFP_KERNEL);
+		if (!map_file) {
+			ret = -ENOMEM;
+			break;
+		}
+		offset = (vma->vm_pgoff << PAGE_SHIFT) + uaddr - vma->vm_start;
+		map_file->offset = offset;
+		map_file->file = get_file(vma->vm_file);
+		ret = vhost_vdpa_map(v, map_iova, map_size, uaddr,
+				     perm, map_file);
+		if (ret) {
+			fput(map_file->file);
+			kfree(map_file);
+			break;
+		}
+next:
+		size -= map_size;
+		uaddr += map_size;
+		map_iova += map_size;
+	}
+	if (ret)
+		vhost_vdpa_unmap(v, iova, map_iova - iova);
+
+	mmap_read_unlock(dev->mm);
+
+	return ret;
+}
+
 static int vhost_vdpa_pa_map(struct vhost_vdpa *v,
 			     u64 iova, u64 size, u64 uaddr, u32 perm)
 {
@@ -684,7 +756,7 @@ static int vhost_vdpa_pa_map(struct vhost_vdpa *v,
 				csize = (last_pfn - map_pfn + 1) << PAGE_SHIFT;
 				ret = vhost_vdpa_map(v, iova, csize,
 						     map_pfn << PAGE_SHIFT,
-						     perm);
+						     perm, NULL);
 				if (ret) {
 					/*
 					 * Unpin the pages that are left unmapped
@@ -713,7 +785,7 @@ static int vhost_vdpa_pa_map(struct vhost_vdpa *v,
 
 	/* Pin the rest chunk */
 	ret = vhost_vdpa_map(v, iova, (last_pfn - map_pfn + 1) << PAGE_SHIFT,
-			     map_pfn << PAGE_SHIFT, perm);
+			     map_pfn << PAGE_SHIFT, perm, NULL);
 out:
 	if (ret) {
 		if (nchunks) {
@@ -746,6 +818,7 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
 					   struct vhost_iotlb_msg *msg)
 {
 	struct vhost_dev *dev = &v->vdev;
+	struct vdpa_device *vdpa = v->vdpa;
 	struct vhost_iotlb *iotlb = dev->iotlb;
 
 	if (msg->iova < v->range.first || !msg->size ||
@@ -757,6 +830,10 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
 				    msg->iova + msg->size - 1))
 		return -EEXIST;
 
+	if (vdpa->use_va)
+		return vhost_vdpa_va_map(v, msg->iova, msg->size,
+					 msg->uaddr, msg->perm);
+
 	return vhost_vdpa_pa_map(v, msg->iova, msg->size, msg->uaddr,
 				 msg->perm);
 }
diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h
index 4655afc1359b..005e718e4e09 100644
--- a/include/linux/vdpa.h
+++ b/include/linux/vdpa.h
@@ -66,6 +66,7 @@ struct vdpa_mgmt_dev;
  * @config: the configuration ops for this device.
  * @index: device index
  * @features_valid: were features initialized? for legacy guests
+ * @use_va: indicate whether virtual address must be used by this device
  * @nvqs: maximum number of supported virtqueues
  * @mdev: management device pointer; caller must setup when registering device as part
  *	  of dev_add() mgmtdev ops callback before invoking _vdpa_register_device().
@@ -76,6 +77,7 @@ struct vdpa_device {
 	const struct vdpa_config_ops *config;
 	unsigned int index;
 	bool features_valid;
+	bool use_va;
 	int nvqs;
 	struct vdpa_mgmt_dev *mdev;
 };
@@ -91,6 +93,16 @@ struct vdpa_iova_range {
 };
 
 /**
+ * Corresponding file area for device memory mapping
+ * @file: vma->vm_file for the mapping
+ * @offset: mapping offset in the vm_file
+ */
+struct vdpa_map_file {
+	struct file *file;
+	u64 offset;
+};
+
+/**
  * struct vdpa_config_ops - operations for configuring a vDPA device.
  * Note: vDPA device drivers are required to implement all of the
  * operations unless it is mentioned to be optional in the following
@@ -277,14 +289,15 @@ struct vdpa_config_ops {
 
 struct vdpa_device *__vdpa_alloc_device(struct device *parent,
 					const struct vdpa_config_ops *config,
-					size_t size, const char *name);
+					size_t size, const char *name,
+					bool use_va);
 
-#define vdpa_alloc_device(dev_struct, member, parent, config, name)   \
+#define vdpa_alloc_device(dev_struct, member, parent, config, name, use_va)   \
 			  container_of(__vdpa_alloc_device( \
 				       parent, config, \
 				       sizeof(dev_struct) + \
 				       BUILD_BUG_ON_ZERO(offsetof( \
-				       dev_struct, member)), name), \
+				       dev_struct, member)), name, use_va), \
 				       dev_struct, member)
 
 int vdpa_register_device(struct vdpa_device *vdev, int nvqs);
-- 
2.11.0


  parent reply	other threads:[~2021-07-29  7:37 UTC|newest]

Thread overview: 55+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-07-29  7:34 [PATCH v10 00/17] Introduce VDUSE - vDPA Device in Userspace Xie Yongji
2021-07-29  7:34 ` [PATCH v10 01/17] iova: Export alloc_iova_fast() and free_iova_fast() Xie Yongji
2021-08-03  7:41   ` Jason Wang
2021-08-03  7:41   ` Jason Wang
2021-08-03  8:54     ` Yongji Xie
2021-08-03 10:53       ` Robin Murphy
2021-08-04  5:02         ` Yongji Xie
2021-08-04 15:43           ` Robin Murphy
2021-08-05 12:34             ` Yongji Xie
2021-08-05 13:31               ` Jason Wang
2021-08-09  5:56                 ` Yongji Xie
2021-08-10  3:02                   ` Jason Wang
2021-08-10  7:43                     ` Yongji Xie
2021-07-29  7:34 ` [PATCH v10 02/17] file: Export receive_fd() to modules Xie Yongji
2021-08-03  7:45   ` Jason Wang
2021-08-03  9:01     ` Yongji Xie
2021-08-04  8:27       ` Jason Wang
2021-07-29  7:34 ` [PATCH v10 03/17] vdpa: Fix code indentation Xie Yongji
2021-08-03  7:50   ` Jason Wang
2021-08-03  9:13     ` Yongji Xie
2021-07-29  7:34 ` [PATCH v10 04/17] vdpa: Fail the vdpa_reset() if fail to set device status to zero Xie Yongji
2021-08-03  7:58   ` Jason Wang
2021-08-03  9:31     ` Yongji Xie
2021-08-04  8:30       ` Jason Wang
2021-07-29  7:34 ` [PATCH v10 05/17] vhost-vdpa: Fail the vhost_vdpa_set_status() on reset failure Xie Yongji
2021-08-03  8:10   ` Jason Wang
2021-08-03  9:50     ` Yongji Xie
2021-08-04  8:33       ` Jason Wang
2021-07-29  7:34 ` [PATCH v10 06/17] vhost-vdpa: Handle the failure of vdpa_reset() Xie Yongji
2021-07-29  7:34 ` [PATCH v10 07/17] virtio: Don't set FAILED status bit on device index allocation failure Xie Yongji
2021-08-03  8:02   ` Jason Wang
2021-08-03  9:17     ` Yongji Xie
2021-07-29  7:34 ` [PATCH v10 08/17] virtio_config: Add a return value to reset function Xie Yongji
2021-07-29  7:34 ` [PATCH v10 09/17] virtio-vdpa: Handle the failure of vdpa_reset() Xie Yongji
2021-07-29  7:34 ` [PATCH v10 10/17] virtio: Handle device reset failure in register_virtio_device() Xie Yongji
2021-08-03  8:09   ` Jason Wang
2021-08-03  9:38     ` Yongji Xie
2021-08-04  8:32       ` Jason Wang
2021-08-04  8:50         ` Yongji Xie
2021-08-04  8:54           ` Jason Wang
2021-08-04  9:07             ` Yongji Xie
2021-08-05  7:12               ` Jason Wang
2021-07-29  7:34 ` [PATCH v10 11/17] vhost-iotlb: Add an opaque pointer for vhost IOTLB Xie Yongji
2021-07-29  7:34 ` [PATCH v10 12/17] vdpa: Add an opaque pointer for vdpa_config_ops.dma_map() Xie Yongji
2021-07-29  7:34 ` [PATCH v10 13/17] vdpa: factor out vhost_vdpa_pa_map() and vhost_vdpa_pa_unmap() Xie Yongji
2021-07-29  7:35 ` Xie Yongji [this message]
2021-07-29  7:35 ` [PATCH v10 15/17] vduse: Implement an MMU-based software IOTLB Xie Yongji
2021-07-29  7:35 ` [PATCH v10 16/17] vduse: Introduce VDUSE - vDPA Device in Userspace Xie Yongji
2021-07-29  9:00   ` Greg KH
2021-07-29  9:57     ` Yongji Xie
2021-08-03  7:30   ` Jason Wang
2021-08-03  8:39     ` Yongji Xie
2021-07-29  7:35 ` [PATCH v10 17/17] Documentation: Add documentation for VDUSE Xie Yongji
2021-08-03  7:35   ` Jason Wang
2021-08-03  8:52     ` Yongji Xie

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210729073503.187-15-xieyongji@bytedance.com \
    --to=xieyongji@bytedance.com \
    --cc=axboe@kernel.dk \
    --cc=bcrl@kvack.org \
    --cc=christian.brauner@canonical.com \
    --cc=corbet@lwn.net \
    --cc=dan.carpenter@oracle.com \
    --cc=gregkh@linuxfoundation.org \
    --cc=hch@infradead.org \
    --cc=iommu@lists.linux-foundation.org \
    --cc=jasowang@redhat.com \
    --cc=joe@perches.com \
    --cc=joro@8bytes.org \
    --cc=kvm@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mika.penttila@nextfour.com \
    --cc=mst@redhat.com \
    --cc=netdev@vger.kernel.org \
    --cc=parav@nvidia.com \
    --cc=rdunlap@infradead.org \
    --cc=sgarzare@redhat.com \
    --cc=songmuchun@bytedance.com \
    --cc=stefanha@redhat.com \
    --cc=viro@zeniv.linux.org.uk \
    --cc=virtualization@lists.linux-foundation.org \
    --cc=willy@infradead.org \
    --cc=xiaodong.liu@intel.com \
    --cc=zhe.he@windriver.com \
    --subject='Re: [PATCH v10 14/17] vdpa: Support transferring virtual addressing during DMA mapping' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
on how to clone and mirror all data and code used for this inbox