LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
From: Wei Liu <wei.liu@kernel.org>
To: Linux on Hyper-V List <linux-hyperv@vger.kernel.org>
Cc: virtualization@lists.linux-foundation.org,
	Linux Kernel List <linux-kernel@vger.kernel.org>,
	Michael Kelley <mikelley@microsoft.com>,
	Vineeth Pillai <viremana@linux.microsoft.com>,
	Sunil Muthuswamy <sunilmut@microsoft.com>,
	Nuno Das Neves <nunodasneves@linux.microsoft.com>,
	kumarpraveen@linux.microsoft.com, pasha.tatashin@soleen.com,
	Wei Liu <wei.liu@kernel.org>, Jonathan Corbet <corbet@lwn.net>,
	"K. Y. Srinivasan" <kys@microsoft.com>,
	Haiyang Zhang <haiyangz@microsoft.com>,
	Stephen Hemminger <sthemmin@microsoft.com>,
	Dexuan Cui <decui@microsoft.com>,
	Lillian Grassin-Drake <ligrassi@microsoft.com>,
	Muminul Islam <muislam@microsoft.com>,
	linux-doc@vger.kernel.org (open list:DOCUMENTATION)
Subject: [RFC v1 7/8] mshv: implement in-kernel device framework
Date: Fri,  9 Jul 2021 11:43:38 +0000	[thread overview]
Message-ID: <20210709114339.3467637-8-wei.liu@kernel.org> (raw)
In-Reply-To: <20210709114339.3467637-1-wei.liu@kernel.org>

This is basically the same code adopted from KVM. The main user case is
the future MSHV-VFIO bridge device. We don't have any plan to support
in-kernel device emulation yet, but it wouldn't hurt to make the code
more flexible.

Signed-off-by: Wei Liu <wei.liu@kernel.org>
---
 Documentation/virt/mshv/api.rst |  12 +++
 drivers/hv/mshv_main.c          | 181 ++++++++++++++++++++++++++++++++
 include/linux/mshv.h            |  57 ++++++++++
 include/uapi/linux/mshv.h       |  36 +++++++
 4 files changed, 286 insertions(+)

diff --git a/Documentation/virt/mshv/api.rst b/Documentation/virt/mshv/api.rst
index 56a6edfcfe29..7d35dd589831 100644
--- a/Documentation/virt/mshv/api.rst
+++ b/Documentation/virt/mshv/api.rst
@@ -170,4 +170,16 @@ Can be used to get/set various properties of a partition.
 Some properties can only be set at partition creation. For these, there are
 parameters in MSHV_CREATE_PARTITION.
 
+3.12 MSHV_CREATE_DEVICE
+-----------------------
+:Type: partition ioctl
+:Parameters: struct mshv_create_device
+:Returns: 0 on success
+
+Can be used to create an in-kernel device.
+
+If the MSHV_CREATE_DEVICE_TEST flag is set, only test whether the
+device type is supported (not necessarily whether it can be created
+in the current vm).
 
+Currently only supports VFIO type device.
diff --git a/drivers/hv/mshv_main.c b/drivers/hv/mshv_main.c
index 4cbc520471e4..84c774a561de 100644
--- a/drivers/hv/mshv_main.c
+++ b/drivers/hv/mshv_main.c
@@ -20,6 +20,8 @@
 #include <linux/random.h>
 #include <linux/mshv.h>
 #include <linux/mshv_eventfd.h>
+#include <linux/hyperv.h>
+#include <linux/nospec.h>
 #include <asm/mshyperv.h>
 
 #include "mshv.h"
@@ -33,6 +35,7 @@ static int mshv_vp_release(struct inode *inode, struct file *filp);
 static long mshv_vp_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg);
 static struct mshv_partition *mshv_partition_get(struct mshv_partition *partition);
 static void mshv_partition_put(struct mshv_partition *partition);
+static void mshv_partition_put_no_destroy(struct mshv_partition *partition);
 static int mshv_partition_release(struct inode *inode, struct file *filp);
 static long mshv_partition_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg);
 static int mshv_dev_open(struct inode *inode, struct file *filp);
@@ -912,6 +915,172 @@ mshv_partition_ioctl_set_msi_routing(struct mshv_partition *partition,
 	return ret;
 }
 
+static int mshv_device_ioctl_attr(struct mshv_device *dev,
+				 int (*accessor)(struct mshv_device *dev,
+						 struct mshv_device_attr *attr),
+				 unsigned long arg)
+{
+	struct mshv_device_attr attr;
+
+	if (!accessor)
+		return -EPERM;
+
+	if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
+		return -EFAULT;
+
+	return accessor(dev, &attr);
+}
+
+static long mshv_device_ioctl(struct file *filp, unsigned int ioctl,
+			      unsigned long arg)
+{
+	struct mshv_device *dev = filp->private_data;
+
+	switch (ioctl) {
+	case MSHV_SET_DEVICE_ATTR:
+		return mshv_device_ioctl_attr(dev, dev->ops->set_attr, arg);
+	case MSHV_GET_DEVICE_ATTR:
+		return mshv_device_ioctl_attr(dev, dev->ops->get_attr, arg);
+	case MSHV_HAS_DEVICE_ATTR:
+		return mshv_device_ioctl_attr(dev, dev->ops->has_attr, arg);
+	default:
+		if (dev->ops->ioctl)
+			return dev->ops->ioctl(dev, ioctl, arg);
+
+		return -ENOTTY;
+	}
+}
+
+static int mshv_device_release(struct inode *inode, struct file *filp)
+{
+	struct mshv_device *dev = filp->private_data;
+	struct mshv_partition *partition = dev->partition;
+
+	if (dev->ops->release) {
+		mutex_lock(&partition->mutex);
+		list_del(&dev->partition_node);
+		dev->ops->release(dev);
+		mutex_unlock(&partition->mutex);
+	}
+
+	mshv_partition_put(partition);
+	return 0;
+}
+
+static const struct file_operations mshv_device_fops = {
+	.unlocked_ioctl = mshv_device_ioctl,
+	.release = mshv_device_release,
+};
+
+static const struct mshv_device_ops *mshv_device_ops_table[MSHV_DEV_TYPE_MAX];
+
+int mshv_register_device_ops(const struct mshv_device_ops *ops, u32 type)
+{
+	if (type >= ARRAY_SIZE(mshv_device_ops_table))
+		return -ENOSPC;
+
+	if (mshv_device_ops_table[type] != NULL)
+		return -EEXIST;
+
+	mshv_device_ops_table[type] = ops;
+	return 0;
+}
+
+void mshv_unregister_device_ops(u32 type)
+{
+	if (type >= ARRAY_SIZE(mshv_device_ops_table))
+		return;
+	mshv_device_ops_table[type] = NULL;
+}
+
+static long
+mshv_partition_ioctl_create_device(struct mshv_partition *partition,
+	void __user *user_args)
+{
+	long r;
+	struct mshv_create_device tmp, *cd;
+	struct mshv_device *dev;
+	const struct mshv_device_ops *ops;
+	int type;
+
+	if (copy_from_user(&tmp, user_args, sizeof(tmp))) {
+		r = -EFAULT;
+		goto out;
+	}
+
+	cd = &tmp;
+
+	if (cd->type >= ARRAY_SIZE(mshv_device_ops_table)) {
+		r = -ENODEV;
+		goto out;
+	}
+
+	type = array_index_nospec(cd->type, ARRAY_SIZE(mshv_device_ops_table));
+	ops = mshv_device_ops_table[type];
+	if (ops == NULL) {
+		r = -ENODEV;
+		goto out;
+	}
+
+	if (cd->flags & MSHV_CREATE_DEVICE_TEST) {
+		r = 0;
+		goto out;
+	}
+
+	dev = kzalloc(sizeof(*dev), GFP_KERNEL_ACCOUNT);
+	if (!dev) {
+		r = -ENOMEM;
+		goto out;
+	}
+
+	dev->ops = ops;
+	dev->partition = partition;
+
+	r = ops->create(dev, type);
+	if (r < 0) {
+		kfree(dev);
+		goto out;
+	}
+
+	list_add(&dev->partition_node, &partition->devices);
+
+	if (ops->init)
+		ops->init(dev);
+
+	mshv_partition_get(partition);
+	r = anon_inode_getfd(ops->name, &mshv_device_fops, dev, O_RDWR | O_CLOEXEC);
+	if (r < 0) {
+		mshv_partition_put_no_destroy(partition);
+		list_del(&dev->partition_node);
+		ops->destroy(dev);
+		goto out;
+	}
+
+	cd->fd = r;
+	r = 0;
+
+	if (copy_to_user(user_args, &tmp, sizeof(tmp))) {
+		r = -EFAULT;
+		goto out;
+	}
+out:
+	return r;
+}
+
+static void mshv_destroy_devices(struct mshv_partition *partition)
+{
+	struct mshv_device *dev, *tmp;
+
+	/*
+	 * No need to take any lock since at this point nobody else can
+	 * reference this partition.
+	 */
+	list_for_each_entry_safe(dev, tmp, &partition->devices, partition_node) {
+		list_del(&dev->partition_node);
+		dev->ops->destroy(dev);
+	}
+}
+
 static long
 mshv_partition_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
 {
@@ -965,6 +1134,9 @@ mshv_partition_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
 	case MSHV_GET_GPA_ACCESS_STATES:
 		ret = mshv_partition_ioctl_get_gpa_access_state(partition,
 							   (void __user *)arg);
+	case MSHV_CREATE_DEVICE:
+		ret = mshv_partition_ioctl_create_device(partition,
+							 (void __user *)arg);
 		break;
 	default:
 		ret = -ENOTTY;
@@ -1033,6 +1205,7 @@ destroy_partition(struct mshv_partition *partition)
 		vfree(region->pages);
 	}
 
+	mshv_destroy_devices(partition);
 	mshv_free_msi_routing(partition);
 	kfree(partition);
 }
@@ -1052,6 +1225,12 @@ mshv_partition_put(struct mshv_partition *partition)
 		destroy_partition(partition);
 }
 
+static void
+mshv_partition_put_no_destroy(struct mshv_partition *partition)
+{
+	WARN_ON(refcount_dec_and_test(&partition->ref_count));
+}
+
 static int
 mshv_partition_release(struct inode *inode, struct file *filp)
 {
@@ -1122,6 +1301,8 @@ mshv_ioctl_create_partition(void __user *user_arg)
 
 	INIT_HLIST_HEAD(&partition->irq_ack_notifier_list);
 
+	INIT_LIST_HEAD(&partition->devices);
+
 	fd = get_unused_fd_flags(O_CLOEXEC);
 	if (fd < 0) {
 		ret = fd;
diff --git a/include/linux/mshv.h b/include/linux/mshv.h
index fc655b60c5cd..c557ffeec90c 100644
--- a/include/linux/mshv.h
+++ b/include/linux/mshv.h
@@ -59,6 +59,8 @@ struct mshv_partition {
 	struct srcu_struct irq_srcu;
 	struct hlist_head irq_ack_notifier_list;
 
+	struct list_head devices;
+
 	struct {
 		spinlock_t        lock;
 		struct list_head  items;
@@ -121,4 +123,59 @@ struct mshv {
 	} partitions;
 };
 
+struct mshv_device {
+	const struct mshv_device_ops *ops;
+	struct mshv_partition *partition;
+	void *private;
+	struct list_head partition_node;
+
+};
+
+/* create, destroy, and name are mandatory */
+struct mshv_device_ops {
+	const char *name;
+
+	/*
+	 * create is called holding partition->mutex and any operations not suitable
+	 * to do while holding the lock should be deferred to init (see
+	 * below).
+	 */
+	int (*create)(struct mshv_device *dev, u32 type);
+
+	/*
+	 * init is called after create if create is successful and is called
+	 * outside of holding partition->mutex.
+	 */
+	void (*init)(struct mshv_device *dev);
+
+	/*
+	 * Destroy is responsible for freeing dev.
+	 *
+	 * Destroy may be called before or after destructors are called
+	 * on emulated I/O regions, depending on whether a reference is
+	 * held by a vcpu or other mshv component that gets destroyed
+	 * after the emulated I/O.
+	 */
+	void (*destroy)(struct mshv_device *dev);
+
+	/*
+	 * Release is an alternative method to free the device. It is
+	 * called when the device file descriptor is closed. Once
+	 * release is called, the destroy method will not be called
+	 * anymore as the device is removed from the device list of
+	 * the VM. partition->mutex is held.
+	 */
+	void (*release)(struct mshv_device *dev);
+
+	int (*set_attr)(struct mshv_device *dev, struct mshv_device_attr *attr);
+	int (*get_attr)(struct mshv_device *dev, struct mshv_device_attr *attr);
+	int (*has_attr)(struct mshv_device *dev, struct mshv_device_attr *attr);
+	long (*ioctl)(struct mshv_device *dev, unsigned int ioctl,
+		      unsigned long arg);
+	int (*mmap)(struct mshv_device *dev, struct vm_area_struct *vma);
+};
+
+int mshv_register_device_ops(const struct mshv_device_ops *ops, u32 type);
+void mshv_unregister_device_ops(u32 type);
+
 #endif
diff --git a/include/uapi/linux/mshv.h b/include/uapi/linux/mshv.h
index cba318ee7cf5..ed110109492f 100644
--- a/include/uapi/linux/mshv.h
+++ b/include/uapi/linux/mshv.h
@@ -158,6 +158,14 @@ struct mshv_msi_routing {
 #define MSHV_SET_VP_STATE	_IOWR(MSHV_IOCTL, 0x0B, struct mshv_vp_state)
 #define MSHV_TRANSLATE_GVA	_IOWR(MSHV_IOCTL, 0x0E, struct mshv_translate_gva)
 
+/* ioctl for device fd */
+#define MSHV_CREATE_DEVICE	  _IOWR(MSHV_IOCTL, 0x13, struct mshv_create_device)
+
+/* ioctls for fds returned by MSHV_CREATE_DEVICE */
+#define MSHV_SET_DEVICE_ATTR	  _IOW(MSHV_IOCTL, 0x14, struct mshv_device_attr)
+#define MSHV_GET_DEVICE_ATTR	  _IOW(MSHV_IOCTL, 0x15, struct mshv_device_attr)
+#define MSHV_HAS_DEVICE_ATTR	  _IOW(MSHV_IOCTL, 0x16, struct mshv_device_attr)
+
 /* register page mapping example:
  * struct hv_vp_register_page *regs = mmap(NULL,
  *					   4096,
@@ -184,4 +192,32 @@ union mshv_partition_property_page_access_tracking_config {
 	__u64 as_uint64;
 } __packed;
 
+/*
+ * Device control API.
+ */
+#define MSHV_CREATE_DEVICE_TEST		1
+
+struct mshv_create_device {
+	__u32	type;	/* in: MSHV_DEV_TYPE_xxx */
+	__u32	fd;	/* out: device handle */
+	__u32	flags;	/* in: MSHV_CREATE_DEVICE_xxx */
+};
+
+#define  MSHV_DEV_VFIO_GROUP			1
+#define   MSHV_DEV_VFIO_GROUP_ADD			1
+#define   MSHV_DEV_VFIO_GROUP_DEL			2
+
+enum mshv_device_type {
+	MSHV_DEV_TYPE_VFIO,
+#define MSHV_DEV_TYPE_VFIO		MSHV_DEV_TYPE_VFIO
+	MSHV_DEV_TYPE_MAX,
+};
+
+struct mshv_device_attr {
+	__u32	flags;		/* no flags currently defined */
+	__u32	group;		/* device-defined */
+	__u64	attr;		/* group-defined */
+	__u64	addr;		/* userspace address of attr data */
+};
+
 #endif
-- 
2.30.2


  parent reply	other threads:[~2021-07-09 11:44 UTC|newest]

Thread overview: 35+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-07-09 11:43 [RFC v1 0/8] MSHV: add PV-IOMMU driver Wei Liu
2021-07-09 11:43 ` [RFC v1 1/8] x86/hyperv: export hv_build_pci_dev_id Wei Liu
2021-07-09 11:43 ` [RFC v1 2/8] asm-generic/hyperv: add device domain definitions Wei Liu
2021-07-09 11:43 ` [RFC v1 3/8] intel/vt-d: make DMAR table parsing code more flexible Wei Liu
2021-07-09 12:56   ` Robin Murphy
2021-07-09 13:42     ` Wei Liu
2021-07-09 11:43 ` [RFC v1 4/8] intel/vt-d: export intel_iommu_get_resv_regions Wei Liu
2021-07-09 14:17   ` Lu Baolu
2021-07-09 14:21     ` Wei Liu
2021-07-09 11:43 ` [RFC v1 5/8] mshv: add paravirtualized IOMMU support Wei Liu
2021-08-03 18:40   ` Praveen Kumar
2021-08-03 21:47     ` Wei Liu
2021-08-04  6:43       ` Praveen Kumar
2021-08-10 10:46         ` Wei Liu
2021-07-09 11:43 ` [RFC v1 6/8] mshv: command line option to skip devices in PV-IOMMU Wei Liu
2021-07-09 12:46   ` Robin Murphy
2021-07-09 13:34     ` Wei Liu
2021-08-03 18:50   ` Praveen Kumar
2021-08-03 21:56     ` Wei Liu
2021-08-04  7:03       ` Praveen Kumar
2021-08-10 10:04         ` Wei Liu
2021-07-09 11:43 ` Wei Liu [this message]
2021-07-09 13:02   ` [RFC v1 7/8] mshv: implement in-kernel device framework Matthew Wilcox
2021-07-09 13:50     ` Wei Liu
2021-07-09 15:32       ` Matthew Wilcox
2021-07-09 16:27         ` Wei Liu
2021-07-09 16:38           ` Matthew Wilcox
2021-07-09 19:14             ` Wei Liu
2021-07-09 19:48               ` Matthew Wilcox
2021-07-09 20:11                 ` Wei Liu
2021-08-03 19:12   ` Praveen Kumar
2021-08-03 22:04     ` Wei Liu
2021-07-09 11:43 ` [RFC v1 8/8] mshv: add vfio bridge device Wei Liu
2021-08-03 19:27   ` Praveen Kumar
2021-08-10 10:52     ` Wei Liu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210709114339.3467637-8-wei.liu@kernel.org \
    --to=wei.liu@kernel.org \
    --cc=corbet@lwn.net \
    --cc=decui@microsoft.com \
    --cc=haiyangz@microsoft.com \
    --cc=kumarpraveen@linux.microsoft.com \
    --cc=kys@microsoft.com \
    --cc=ligrassi@microsoft.com \
    --cc=linux-doc@vger.kernel.org \
    --cc=linux-hyperv@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mikelley@microsoft.com \
    --cc=muislam@microsoft.com \
    --cc=nunodasneves@linux.microsoft.com \
    --cc=pasha.tatashin@soleen.com \
    --cc=sthemmin@microsoft.com \
    --cc=sunilmut@microsoft.com \
    --cc=viremana@linux.microsoft.com \
    --cc=virtualization@lists.linux-foundation.org \
    --subject='Re: [RFC v1 7/8] mshv: implement in-kernel device framework' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).