LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
* [PATCH V5 0/4] Validate used buffer length
@ 2021-10-27  2:21 Jason Wang
  2021-10-27  2:21 ` [PATCH V5 1/4] virtio_ring: validate " Jason Wang
                   ` (3 more replies)
  0 siblings, 4 replies; 32+ messages in thread
From: Jason Wang @ 2021-10-27  2:21 UTC (permalink / raw)
  To: mst, virtualization
  Cc: linux-kernel, f.hetzelt, david.kaplan, konrad.wilk, Jason Wang

Hi All:

This patch tries to validate the used buffer length in the virtio
core. This help to eliminate the unexpected result caused by buggy or
mailicous devices. For the drivers that can do the validation itself,
they can ask the virtio core to suppress the check.

Changes since V4:

- Fix the out of date description in the commit log

Changes since V3:

- Initialize the buflen to zero when the validation is done by the
  driver.

Jason Wang (4):
  virtio_ring: validate used buffer length
  virtio-net: don't let virtio core to validate used length
  virtio-blk: don't let virtio core to validate used length
  virtio-scsi: don't let virtio core to validate used buffer length

 drivers/block/virtio_blk.c   |  1 +
 drivers/net/virtio_net.c     |  1 +
 drivers/scsi/virtio_scsi.c   |  1 +
 drivers/virtio/virtio_ring.c | 60 ++++++++++++++++++++++++++++++++++++
 include/linux/virtio.h       |  2 ++
 5 files changed, 65 insertions(+)

-- 
2.25.1


^ permalink raw reply	[flat|nested] 32+ messages in thread

* [PATCH V5 1/4] virtio_ring: validate used buffer length
  2021-10-27  2:21 [PATCH V5 0/4] Validate used buffer length Jason Wang
@ 2021-10-27  2:21 ` Jason Wang
       [not found]   ` <1635823138.4631283-1-xuanzhuo@linux.alibaba.com>
  2021-11-19 15:09   ` Halil Pasic
  2021-10-27  2:21 ` [PATCH V5 2/4] virtio-net: don't let virtio core to validate used length Jason Wang
                   ` (2 subsequent siblings)
  3 siblings, 2 replies; 32+ messages in thread
From: Jason Wang @ 2021-10-27  2:21 UTC (permalink / raw)
  To: mst, virtualization
  Cc: linux-kernel, f.hetzelt, david.kaplan, konrad.wilk, Jason Wang

This patch validate the used buffer length provided by the device
before trying to use it. This is done by record the in buffer length
in a new field in desc_state structure during virtqueue_add(), then we
can fail the virtqueue_get_buf() when we find the device is trying to
give us a used buffer length which is greater than the in buffer
length.

Since some drivers have already done the validation by themselves,
this patch tries to makes the core validation optional. For the driver
that doesn't want the validation, it can set the
suppress_used_validation to be true (which could be overridden by
force_used_validation module parameter). To be more efficient, a
dedicate array is used for storing the validate used length, this
helps to eliminate the cache stress if validation is done by the
driver.

Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 drivers/virtio/virtio_ring.c | 60 ++++++++++++++++++++++++++++++++++++
 include/linux/virtio.h       |  2 ++
 2 files changed, 62 insertions(+)

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 4c0ec82cef56..a6e5a3b94337 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -14,6 +14,9 @@
 #include <linux/spinlock.h>
 #include <xen/xen.h>
 
+static bool force_used_validation = false;
+module_param(force_used_validation, bool, 0444);
+
 #ifdef DEBUG
 /* For development, we want to crash whenever the ring is screwed. */
 #define BAD_RING(_vq, fmt, args...)				\
@@ -182,6 +185,9 @@ struct vring_virtqueue {
 		} packed;
 	};
 
+	/* Per-descriptor in buffer length */
+	u32 *buflen;
+
 	/* How to notify other side. FIXME: commonalize hcalls! */
 	bool (*notify)(struct virtqueue *vq);
 
@@ -490,6 +496,7 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
 	unsigned int i, n, avail, descs_used, prev, err_idx;
 	int head;
 	bool indirect;
+	u32 buflen = 0;
 
 	START_USE(vq);
 
@@ -571,6 +578,7 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
 						     VRING_DESC_F_NEXT |
 						     VRING_DESC_F_WRITE,
 						     indirect);
+			buflen += sg->length;
 		}
 	}
 	/* Last one doesn't continue. */
@@ -610,6 +618,10 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
 	else
 		vq->split.desc_state[head].indir_desc = ctx;
 
+	/* Store in buffer length if necessary */
+	if (vq->buflen)
+		vq->buflen[head] = buflen;
+
 	/* Put entry in available array (but don't update avail->idx until they
 	 * do sync). */
 	avail = vq->split.avail_idx_shadow & (vq->split.vring.num - 1);
@@ -784,6 +796,11 @@ static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
 		BAD_RING(vq, "id %u is not a head!\n", i);
 		return NULL;
 	}
+	if (vq->buflen && unlikely(*len > vq->buflen[i])) {
+		BAD_RING(vq, "used len %d is larger than in buflen %u\n",
+			*len, vq->buflen[i]);
+		return NULL;
+	}
 
 	/* detach_buf_split clears data, so grab it now. */
 	ret = vq->split.desc_state[i].data;
@@ -1062,6 +1079,7 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq,
 	unsigned int i, n, err_idx;
 	u16 head, id;
 	dma_addr_t addr;
+	u32 buflen = 0;
 
 	head = vq->packed.next_avail_idx;
 	desc = alloc_indirect_packed(total_sg, gfp);
@@ -1091,6 +1109,8 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq,
 			desc[i].addr = cpu_to_le64(addr);
 			desc[i].len = cpu_to_le32(sg->length);
 			i++;
+			if (n >= out_sgs)
+				buflen += sg->length;
 		}
 	}
 
@@ -1144,6 +1164,10 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq,
 	vq->packed.desc_state[id].indir_desc = desc;
 	vq->packed.desc_state[id].last = id;
 
+	/* Store in buffer length if necessary */
+	if (vq->buflen)
+		vq->buflen[id] = buflen;
+
 	vq->num_added += 1;
 
 	pr_debug("Added buffer head %i to %p\n", head, vq);
@@ -1179,6 +1203,7 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq,
 	__le16 head_flags, flags;
 	u16 head, id, prev, curr, avail_used_flags;
 	int err;
+	u32 buflen = 0;
 
 	START_USE(vq);
 
@@ -1258,6 +1283,8 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq,
 					1 << VRING_PACKED_DESC_F_AVAIL |
 					1 << VRING_PACKED_DESC_F_USED;
 			}
+			if (n >= out_sgs)
+				buflen += sg->length;
 		}
 	}
 
@@ -1277,6 +1304,10 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq,
 	vq->packed.desc_state[id].indir_desc = ctx;
 	vq->packed.desc_state[id].last = prev;
 
+	/* Store in buffer length if necessary */
+	if (vq->buflen)
+		vq->buflen[id] = buflen;
+
 	/*
 	 * A driver MUST NOT make the first descriptor in the list
 	 * available before all subsequent descriptors comprising
@@ -1463,6 +1494,11 @@ static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq,
 		BAD_RING(vq, "id %u is not a head!\n", id);
 		return NULL;
 	}
+	if (vq->buflen && unlikely(*len > vq->buflen[id])) {
+		BAD_RING(vq, "used len %d is larger than in buflen %u\n",
+			*len, vq->buflen[id]);
+		return NULL;
+	}
 
 	/* detach_buf_packed clears data, so grab it now. */
 	ret = vq->packed.desc_state[id].data;
@@ -1668,6 +1704,7 @@ static struct virtqueue *vring_create_virtqueue_packed(
 	struct vring_virtqueue *vq;
 	struct vring_packed_desc *ring;
 	struct vring_packed_desc_event *driver, *device;
+	struct virtio_driver *drv = drv_to_virtio(vdev->dev.driver);
 	dma_addr_t ring_dma_addr, driver_event_dma_addr, device_event_dma_addr;
 	size_t ring_size_in_bytes, event_size_in_bytes;
 
@@ -1757,6 +1794,15 @@ static struct virtqueue *vring_create_virtqueue_packed(
 	if (!vq->packed.desc_extra)
 		goto err_desc_extra;
 
+	if (!drv->suppress_used_validation || force_used_validation) {
+		vq->buflen = kmalloc_array(num, sizeof(*vq->buflen),
+					   GFP_KERNEL);
+		if (!vq->buflen)
+			goto err_buflen;
+	} else {
+		vq->buflen = NULL;
+	}
+
 	/* No callback?  Tell other side not to bother us. */
 	if (!callback) {
 		vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE;
@@ -1769,6 +1815,8 @@ static struct virtqueue *vring_create_virtqueue_packed(
 	spin_unlock(&vdev->vqs_list_lock);
 	return &vq->vq;
 
+err_buflen:
+	kfree(vq->packed.desc_extra);
 err_desc_extra:
 	kfree(vq->packed.desc_state);
 err_desc_state:
@@ -2176,6 +2224,7 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index,
 					void (*callback)(struct virtqueue *),
 					const char *name)
 {
+	struct virtio_driver *drv = drv_to_virtio(vdev->dev.driver);
 	struct vring_virtqueue *vq;
 
 	if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
@@ -2235,6 +2284,15 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index,
 	if (!vq->split.desc_extra)
 		goto err_extra;
 
+	if (!drv->suppress_used_validation || force_used_validation) {
+		vq->buflen = kmalloc_array(vring.num, sizeof(*vq->buflen),
+					   GFP_KERNEL);
+		if (!vq->buflen)
+			goto err_buflen;
+	} else {
+		vq->buflen = NULL;
+	}
+
 	/* Put everything in free lists. */
 	vq->free_head = 0;
 	memset(vq->split.desc_state, 0, vring.num *
@@ -2245,6 +2303,8 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index,
 	spin_unlock(&vdev->vqs_list_lock);
 	return &vq->vq;
 
+err_buflen:
+	kfree(vq->split.desc_extra);
 err_extra:
 	kfree(vq->split.desc_state);
 err_state:
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 41edbc01ffa4..44d0e09da2d9 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -152,6 +152,7 @@ size_t virtio_max_dma_size(struct virtio_device *vdev);
  * @feature_table_size: number of entries in the feature table array.
  * @feature_table_legacy: same as feature_table but when working in legacy mode.
  * @feature_table_size_legacy: number of entries in feature table legacy array.
+ * @suppress_used_validation: set to not have core validate used length
  * @probe: the function to call when a device is found.  Returns 0 or -errno.
  * @scan: optional function to call after successful probe; intended
  *    for virtio-scsi to invoke a scan.
@@ -168,6 +169,7 @@ struct virtio_driver {
 	unsigned int feature_table_size;
 	const unsigned int *feature_table_legacy;
 	unsigned int feature_table_size_legacy;
+	bool suppress_used_validation;
 	int (*validate)(struct virtio_device *dev);
 	int (*probe)(struct virtio_device *dev);
 	void (*scan)(struct virtio_device *dev);
-- 
2.25.1


^ permalink raw reply	[flat|nested] 32+ messages in thread

* [PATCH V5 2/4] virtio-net: don't let virtio core to validate used length
  2021-10-27  2:21 [PATCH V5 0/4] Validate used buffer length Jason Wang
  2021-10-27  2:21 ` [PATCH V5 1/4] virtio_ring: validate " Jason Wang
@ 2021-10-27  2:21 ` Jason Wang
  2021-10-27  2:21 ` [PATCH V5 3/4] virtio-blk: " Jason Wang
  2021-10-27  2:21 ` [PATCH V5 4/4] virtio-scsi: don't let virtio core to validate used buffer length Jason Wang
  3 siblings, 0 replies; 32+ messages in thread
From: Jason Wang @ 2021-10-27  2:21 UTC (permalink / raw)
  To: mst, virtualization
  Cc: linux-kernel, f.hetzelt, david.kaplan, konrad.wilk, Jason Wang

For RX virtuqueue, the used length is validated in all the three paths
(big, small and mergeable). For control vq, we never tries to use used
length. So this patch forbids the core to validate the used length.

Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 drivers/net/virtio_net.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 6d8c8745bf24..7c43bfc1ce44 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -3385,6 +3385,7 @@ static struct virtio_driver virtio_net_driver = {
 	.feature_table_size = ARRAY_SIZE(features),
 	.feature_table_legacy = features_legacy,
 	.feature_table_size_legacy = ARRAY_SIZE(features_legacy),
+	.suppress_used_validation = true,
 	.driver.name =	KBUILD_MODNAME,
 	.driver.owner =	THIS_MODULE,
 	.id_table =	id_table,
-- 
2.25.1


^ permalink raw reply	[flat|nested] 32+ messages in thread

* [PATCH V5 3/4] virtio-blk: don't let virtio core to validate used length
  2021-10-27  2:21 [PATCH V5 0/4] Validate used buffer length Jason Wang
  2021-10-27  2:21 ` [PATCH V5 1/4] virtio_ring: validate " Jason Wang
  2021-10-27  2:21 ` [PATCH V5 2/4] virtio-net: don't let virtio core to validate used length Jason Wang
@ 2021-10-27  2:21 ` Jason Wang
  2021-10-27  2:21 ` [PATCH V5 4/4] virtio-scsi: don't let virtio core to validate used buffer length Jason Wang
  3 siblings, 0 replies; 32+ messages in thread
From: Jason Wang @ 2021-10-27  2:21 UTC (permalink / raw)
  To: mst, virtualization
  Cc: linux-kernel, f.hetzelt, david.kaplan, konrad.wilk, Jason Wang

We never tries to use used length, so the patch prevents the virtio
core from validating used length.

Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 drivers/block/virtio_blk.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index c7d05ff24084..36d645ec5002 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -1041,6 +1041,7 @@ static struct virtio_driver virtio_blk = {
 	.feature_table_size		= ARRAY_SIZE(features),
 	.feature_table_legacy		= features_legacy,
 	.feature_table_size_legacy	= ARRAY_SIZE(features_legacy),
+	.suppress_used_validation	= true,
 	.driver.name			= KBUILD_MODNAME,
 	.driver.owner			= THIS_MODULE,
 	.id_table			= id_table,
-- 
2.25.1


^ permalink raw reply	[flat|nested] 32+ messages in thread

* [PATCH V5 4/4] virtio-scsi: don't let virtio core to validate used buffer length
  2021-10-27  2:21 [PATCH V5 0/4] Validate used buffer length Jason Wang
                   ` (2 preceding siblings ...)
  2021-10-27  2:21 ` [PATCH V5 3/4] virtio-blk: " Jason Wang
@ 2021-10-27  2:21 ` Jason Wang
  3 siblings, 0 replies; 32+ messages in thread
From: Jason Wang @ 2021-10-27  2:21 UTC (permalink / raw)
  To: mst, virtualization
  Cc: linux-kernel, f.hetzelt, david.kaplan, konrad.wilk, Jason Wang

We never tries to use used length, so the patch prevents the virtio
core from validating used length.

Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 drivers/scsi/virtio_scsi.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c
index 07d0250f17c3..03b09ecea42d 100644
--- a/drivers/scsi/virtio_scsi.c
+++ b/drivers/scsi/virtio_scsi.c
@@ -977,6 +977,7 @@ static unsigned int features[] = {
 static struct virtio_driver virtio_scsi_driver = {
 	.feature_table = features,
 	.feature_table_size = ARRAY_SIZE(features),
+	.suppress_used_validation = true,
 	.driver.name = KBUILD_MODNAME,
 	.driver.owner = THIS_MODULE,
 	.id_table = id_table,
-- 
2.25.1


^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH V5 1/4] virtio_ring: validate used buffer length
       [not found]   ` <1635823138.4631283-1-xuanzhuo@linux.alibaba.com>
@ 2021-11-02  3:54     ` Jason Wang
  0 siblings, 0 replies; 32+ messages in thread
From: Jason Wang @ 2021-11-02  3:54 UTC (permalink / raw)
  To: Xuan Zhuo
  Cc: Hetzelt, Felicitas, linux-kernel, kaplan, david,
	Konrad Rzeszutek Wilk, mst, virtualization

On Tue, Nov 2, 2021 at 11:22 AM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
>
> On Wed, 27 Oct 2021 10:21:04 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > This patch validate the used buffer length provided by the device
> > before trying to use it. This is done by record the in buffer length
> > in a new field in desc_state structure during virtqueue_add(), then we
> > can fail the virtqueue_get_buf() when we find the device is trying to
> > give us a used buffer length which is greater than the in buffer
> > length.
> >
> > Since some drivers have already done the validation by themselves,
> > this patch tries to makes the core validation optional. For the driver
> > that doesn't want the validation, it can set the
> > suppress_used_validation to be true (which could be overridden by
> > force_used_validation module parameter). To be more efficient, a
> > dedicate array is used for storing the validate used length, this
> > helps to eliminate the cache stress if validation is done by the
> > driver.
> >
> > Signed-off-by: Jason Wang <jasowang@redhat.com>
> > ---
> >  drivers/virtio/virtio_ring.c | 60 ++++++++++++++++++++++++++++++++++++
> >  include/linux/virtio.h       |  2 ++
> >  2 files changed, 62 insertions(+)
> >
> > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > index 4c0ec82cef56..a6e5a3b94337 100644
> > --- a/drivers/virtio/virtio_ring.c
> > +++ b/drivers/virtio/virtio_ring.c
> > @@ -14,6 +14,9 @@
> >  #include <linux/spinlock.h>
> >  #include <xen/xen.h>
> >
> > +static bool force_used_validation = false;
> > +module_param(force_used_validation, bool, 0444);
> > +
> >  #ifdef DEBUG
> >  /* For development, we want to crash whenever the ring is screwed. */
> >  #define BAD_RING(_vq, fmt, args...)                          \
> > @@ -182,6 +185,9 @@ struct vring_virtqueue {
> >               } packed;
> >       };
> >
> > +     /* Per-descriptor in buffer length */
> > +     u32 *buflen;
> > +
> >       /* How to notify other side. FIXME: commonalize hcalls! */
> >       bool (*notify)(struct virtqueue *vq);
> >
> > @@ -490,6 +496,7 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
> >       unsigned int i, n, avail, descs_used, prev, err_idx;
> >       int head;
> >       bool indirect;
> > +     u32 buflen = 0;
> >
> >       START_USE(vq);
> >
> > @@ -571,6 +578,7 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
> >                                                    VRING_DESC_F_NEXT |
> >                                                    VRING_DESC_F_WRITE,
> >                                                    indirect);
> > +                     buflen += sg->length;
> >               }
> >       }
> >       /* Last one doesn't continue. */
> > @@ -610,6 +618,10 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
> >       else
> >               vq->split.desc_state[head].indir_desc = ctx;
> >
> > +     /* Store in buffer length if necessary */
> > +     if (vq->buflen)
> > +             vq->buflen[head] = buflen;
> > +
> >       /* Put entry in available array (but don't update avail->idx until they
> >        * do sync). */
> >       avail = vq->split.avail_idx_shadow & (vq->split.vring.num - 1);
> > @@ -784,6 +796,11 @@ static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
> >               BAD_RING(vq, "id %u is not a head!\n", i);
> >               return NULL;
> >       }
> > +     if (vq->buflen && unlikely(*len > vq->buflen[i])) {
> > +             BAD_RING(vq, "used len %d is larger than in buflen %u\n",
> > +                     *len, vq->buflen[i]);
> > +             return NULL;
> > +     }
> >
> >       /* detach_buf_split clears data, so grab it now. */
> >       ret = vq->split.desc_state[i].data;
> > @@ -1062,6 +1079,7 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq,
> >       unsigned int i, n, err_idx;
> >       u16 head, id;
> >       dma_addr_t addr;
> > +     u32 buflen = 0;
> >
> >       head = vq->packed.next_avail_idx;
> >       desc = alloc_indirect_packed(total_sg, gfp);
> > @@ -1091,6 +1109,8 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq,
> >                       desc[i].addr = cpu_to_le64(addr);
> >                       desc[i].len = cpu_to_le32(sg->length);
> >                       i++;
> > +                     if (n >= out_sgs)
> > +                             buflen += sg->length;
> >               }
> >       }
> >
> > @@ -1144,6 +1164,10 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq,
> >       vq->packed.desc_state[id].indir_desc = desc;
> >       vq->packed.desc_state[id].last = id;
> >
> > +     /* Store in buffer length if necessary */
> > +     if (vq->buflen)
> > +             vq->buflen[id] = buflen;
> > +
> >       vq->num_added += 1;
> >
> >       pr_debug("Added buffer head %i to %p\n", head, vq);
> > @@ -1179,6 +1203,7 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq,
> >       __le16 head_flags, flags;
> >       u16 head, id, prev, curr, avail_used_flags;
> >       int err;
> > +     u32 buflen = 0;
> >
> >       START_USE(vq);
> >
> > @@ -1258,6 +1283,8 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq,
> >                                       1 << VRING_PACKED_DESC_F_AVAIL |
> >                                       1 << VRING_PACKED_DESC_F_USED;
> >                       }
> > +                     if (n >= out_sgs)
> > +                             buflen += sg->length;
> >               }
> >       }
> >
> > @@ -1277,6 +1304,10 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq,
> >       vq->packed.desc_state[id].indir_desc = ctx;
> >       vq->packed.desc_state[id].last = prev;
> >
> > +     /* Store in buffer length if necessary */
> > +     if (vq->buflen)
> > +             vq->buflen[id] = buflen;
> > +
> >       /*
> >        * A driver MUST NOT make the first descriptor in the list
> >        * available before all subsequent descriptors comprising
> > @@ -1463,6 +1494,11 @@ static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq,
> >               BAD_RING(vq, "id %u is not a head!\n", id);
> >               return NULL;
> >       }
> > +     if (vq->buflen && unlikely(*len > vq->buflen[id])) {
> > +             BAD_RING(vq, "used len %d is larger than in buflen %u\n",
> > +                     *len, vq->buflen[id]);
> > +             return NULL;
> > +     }
> >
> >       /* detach_buf_packed clears data, so grab it now. */
> >       ret = vq->packed.desc_state[id].data;
> > @@ -1668,6 +1704,7 @@ static struct virtqueue *vring_create_virtqueue_packed(
> >       struct vring_virtqueue *vq;
> >       struct vring_packed_desc *ring;
> >       struct vring_packed_desc_event *driver, *device;
> > +     struct virtio_driver *drv = drv_to_virtio(vdev->dev.driver);
> >       dma_addr_t ring_dma_addr, driver_event_dma_addr, device_event_dma_addr;
> >       size_t ring_size_in_bytes, event_size_in_bytes;
> >
> > @@ -1757,6 +1794,15 @@ static struct virtqueue *vring_create_virtqueue_packed(
> >       if (!vq->packed.desc_extra)
> >               goto err_desc_extra;
> >
> > +     if (!drv->suppress_used_validation || force_used_validation) {
> > +             vq->buflen = kmalloc_array(num, sizeof(*vq->buflen),
> > +                                        GFP_KERNEL);
> > +             if (!vq->buflen)
> > +                     goto err_buflen;
>
> The reason for not using "struct vring_desc_state_split/ struct vring_desc_state_split"
> is to occupy less memory when turning off this function?

Less memory and less cache pressure.

Thanks

>
> Thanks.
>
> > +     } else {
> > +             vq->buflen = NULL;
> > +     }
> > +
> >       /* No callback?  Tell other side not to bother us. */
> >       if (!callback) {
> >               vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE;
> > @@ -1769,6 +1815,8 @@ static struct virtqueue *vring_create_virtqueue_packed(
> >       spin_unlock(&vdev->vqs_list_lock);
> >       return &vq->vq;
> >
> > +err_buflen:
> > +     kfree(vq->packed.desc_extra);
> >  err_desc_extra:
> >       kfree(vq->packed.desc_state);
> >  err_desc_state:
> > @@ -2176,6 +2224,7 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index,
> >                                       void (*callback)(struct virtqueue *),
> >                                       const char *name)
> >  {
> > +     struct virtio_driver *drv = drv_to_virtio(vdev->dev.driver);
> >       struct vring_virtqueue *vq;
> >
> >       if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
> > @@ -2235,6 +2284,15 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index,
> >       if (!vq->split.desc_extra)
> >               goto err_extra;
> >
> > +     if (!drv->suppress_used_validation || force_used_validation) {
> > +             vq->buflen = kmalloc_array(vring.num, sizeof(*vq->buflen),
> > +                                        GFP_KERNEL);
> > +             if (!vq->buflen)
> > +                     goto err_buflen;
> > +     } else {
> > +             vq->buflen = NULL;
> > +     }
> > +
> >       /* Put everything in free lists. */
> >       vq->free_head = 0;
> >       memset(vq->split.desc_state, 0, vring.num *
> > @@ -2245,6 +2303,8 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index,
> >       spin_unlock(&vdev->vqs_list_lock);
> >       return &vq->vq;
> >
> > +err_buflen:
> > +     kfree(vq->split.desc_extra);
> >  err_extra:
> >       kfree(vq->split.desc_state);
> >  err_state:
> > diff --git a/include/linux/virtio.h b/include/linux/virtio.h
> > index 41edbc01ffa4..44d0e09da2d9 100644
> > --- a/include/linux/virtio.h
> > +++ b/include/linux/virtio.h
> > @@ -152,6 +152,7 @@ size_t virtio_max_dma_size(struct virtio_device *vdev);
> >   * @feature_table_size: number of entries in the feature table array.
> >   * @feature_table_legacy: same as feature_table but when working in legacy mode.
> >   * @feature_table_size_legacy: number of entries in feature table legacy array.
> > + * @suppress_used_validation: set to not have core validate used length
> >   * @probe: the function to call when a device is found.  Returns 0 or -errno.
> >   * @scan: optional function to call after successful probe; intended
> >   *    for virtio-scsi to invoke a scan.
> > @@ -168,6 +169,7 @@ struct virtio_driver {
> >       unsigned int feature_table_size;
> >       const unsigned int *feature_table_legacy;
> >       unsigned int feature_table_size_legacy;
> > +     bool suppress_used_validation;
> >       int (*validate)(struct virtio_device *dev);
> >       int (*probe)(struct virtio_device *dev);
> >       void (*scan)(struct virtio_device *dev);
> > --
> > 2.25.1
> >
> > _______________________________________________
> > Virtualization mailing list
> > Virtualization@lists.linux-foundation.org
> > https://lists.linuxfoundation.org/mailman/listinfo/virtualization
>


^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH V5 1/4] virtio_ring: validate used buffer length
  2021-10-27  2:21 ` [PATCH V5 1/4] virtio_ring: validate " Jason Wang
       [not found]   ` <1635823138.4631283-1-xuanzhuo@linux.alibaba.com>
@ 2021-11-19 15:09   ` Halil Pasic
  2021-11-22  3:51     ` Jason Wang
  1 sibling, 1 reply; 32+ messages in thread
From: Halil Pasic @ 2021-11-19 15:09 UTC (permalink / raw)
  To: Jason Wang
  Cc: mst, virtualization, f.hetzelt, linux-kernel, david.kaplan,
	konrad.wilk, Halil Pasic

On Wed, 27 Oct 2021 10:21:04 +0800
Jason Wang <jasowang@redhat.com> wrote:

> This patch validate the used buffer length provided by the device
> before trying to use it. This is done by record the in buffer length
> in a new field in desc_state structure during virtqueue_add(), then we
> can fail the virtqueue_get_buf() when we find the device is trying to
> give us a used buffer length which is greater than the in buffer
> length.
> 
> Since some drivers have already done the validation by themselves,
> this patch tries to makes the core validation optional. For the driver
> that doesn't want the validation, it can set the
> suppress_used_validation to be true (which could be overridden by
> force_used_validation module parameter). To be more efficient, a
> dedicate array is used for storing the validate used length, this
> helps to eliminate the cache stress if validation is done by the
> driver.
> 
> Signed-off-by: Jason Wang <jasowang@redhat.com>

Hi Jason!

Our CI has detected, that virtio-vsock became unusable with this
patch on s390x. I didn't test on x86 yet. The guest kernel says
something like:
vmw_vsock_virtio_transport virtio1: tx: used len 44 is larger than in buflen 0

Did you, or anybody else, see something like this on platforms other that
s390x?

I had a quick look at this code, and I speculate that it probably
uncovers a pre-existig bug, rather than introducing a new one.

If somebody is already working on this please reach out to me.

Regards,
Halil

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH V5 1/4] virtio_ring: validate used buffer length
  2021-11-19 15:09   ` Halil Pasic
@ 2021-11-22  3:51     ` Jason Wang
  2021-11-22  5:35       ` Halil Pasic
  2021-11-22  7:42       ` Stefano Garzarella
  0 siblings, 2 replies; 32+ messages in thread
From: Jason Wang @ 2021-11-22  3:51 UTC (permalink / raw)
  To: Halil Pasic
  Cc: mst, virtualization, Hetzelt, Felicitas, linux-kernel, kaplan,
	david, Konrad Rzeszutek Wilk, Stefan Hajnoczi,
	Stefano Garzarella

On Fri, Nov 19, 2021 at 11:10 PM Halil Pasic <pasic@linux.ibm.com> wrote:
>
> On Wed, 27 Oct 2021 10:21:04 +0800
> Jason Wang <jasowang@redhat.com> wrote:
>
> > This patch validate the used buffer length provided by the device
> > before trying to use it. This is done by record the in buffer length
> > in a new field in desc_state structure during virtqueue_add(), then we
> > can fail the virtqueue_get_buf() when we find the device is trying to
> > give us a used buffer length which is greater than the in buffer
> > length.
> >
> > Since some drivers have already done the validation by themselves,
> > this patch tries to makes the core validation optional. For the driver
> > that doesn't want the validation, it can set the
> > suppress_used_validation to be true (which could be overridden by
> > force_used_validation module parameter). To be more efficient, a
> > dedicate array is used for storing the validate used length, this
> > helps to eliminate the cache stress if validation is done by the
> > driver.
> >
> > Signed-off-by: Jason Wang <jasowang@redhat.com>
>
> Hi Jason!
>
> Our CI has detected, that virtio-vsock became unusable with this
> patch on s390x. I didn't test on x86 yet. The guest kernel says
> something like:
> vmw_vsock_virtio_transport virtio1: tx: used len 44 is larger than in buflen 0
>
> Did you, or anybody else, see something like this on platforms other that
> s390x?

Adding Stefan and Stefano.

I think it should be a common issue, looking at
vhost_vsock_handle_tx_kick(), it did:

len += sizeof(pkt->hdr);
vhost_add_used(vq, head, len);

which looks like a violation of the spec since it's TX.

>
> I had a quick look at this code, and I speculate that it probably
> uncovers a pre-existig bug, rather than introducing a new one.

I agree.

>
> If somebody is already working on this please reach out to me.

AFAIK, no. I think the plan is to fix both the device and drive side
(but I'm not sure we need a new feature for this if we stick to the
validation).

Thanks

>
> Regards,
> Halil
>


^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH V5 1/4] virtio_ring: validate used buffer length
  2021-11-22  3:51     ` Jason Wang
@ 2021-11-22  5:35       ` Halil Pasic
  2021-11-22  5:49         ` Halil Pasic
  2021-11-22  7:42       ` Stefano Garzarella
  1 sibling, 1 reply; 32+ messages in thread
From: Halil Pasic @ 2021-11-22  5:35 UTC (permalink / raw)
  To: Jason Wang
  Cc: mst, virtualization, Hetzelt, Felicitas, linux-kernel, kaplan,
	david, Konrad Rzeszutek Wilk, Stefan Hajnoczi,
	Stefano Garzarella, Halil Pasic

On Mon, 22 Nov 2021 11:51:09 +0800
Jason Wang <jasowang@redhat.com> wrote:

> On Fri, Nov 19, 2021 at 11:10 PM Halil Pasic <pasic@linux.ibm.com> wrote:
> >
> > On Wed, 27 Oct 2021 10:21:04 +0800
> > Jason Wang <jasowang@redhat.com> wrote:
> >  
> > > This patch validate the used buffer length provided by the device
> > > before trying to use it. This is done by record the in buffer length
> > > in a new field in desc_state structure during virtqueue_add(), then we
> > > can fail the virtqueue_get_buf() when we find the device is trying to
> > > give us a used buffer length which is greater than the in buffer
> > > length.
> > >
> > > Since some drivers have already done the validation by themselves,
> > > this patch tries to makes the core validation optional. For the driver
> > > that doesn't want the validation, it can set the
> > > suppress_used_validation to be true (which could be overridden by
> > > force_used_validation module parameter). To be more efficient, a
> > > dedicate array is used for storing the validate used length, this
> > > helps to eliminate the cache stress if validation is done by the
> > > driver.
> > >
> > > Signed-off-by: Jason Wang <jasowang@redhat.com>  
> >
> > Hi Jason!
> >
> > Our CI has detected, that virtio-vsock became unusable with this
> > patch on s390x. I didn't test on x86 yet. The guest kernel says
> > something like:
> > vmw_vsock_virtio_transport virtio1: tx: used len 44 is larger than in buflen 0
> >
> > Did you, or anybody else, see something like this on platforms other that
> > s390x?  
> 
> Adding Stefan and Stefano.
> 
> I think it should be a common issue, looking at
> vhost_vsock_handle_tx_kick(), it did:
> 
> len += sizeof(pkt->hdr);
> vhost_add_used(vq, head, len);
> 
> which looks like a violation of the spec since it's TX.

I'm not sure the lines above look like a violation of the spec. If you
examine vhost_vsock_alloc_pkt() I believe that you will agree that:
len == pkt->len == pkt->hdr.len
which makes sense since according to the spec both tx and rx messages
are hdr+payload. And I believe hdr.len is the size of the payload,
although that does not seem to be properly documented by the spec.

On the other hand tx messages are stated to be device read-only (in the
spec) so if the device writes stuff, that is certainly wrong.

If that is what happens. 

Looking at virtqueue_get_buf_ctx_split() I'm not sure that is what
happens. My hypothesis is that we just a last descriptor is an 'in'
type descriptor (i.e. a device writable one). For tx that assumption
would be wrong.

I will have another look at this today and send a fix patch if my
suspicion is confirmed.


> 
> >
> > I had a quick look at this code, and I speculate that it probably
> > uncovers a pre-existig bug, rather than introducing a new one.  
> 
> I agree.
> 

:) I'm not so sure any more myself.

> >
> > If somebody is already working on this please reach out to me.  
> 
> AFAIK, no. 

Thanks for the info! Then I will dig a little deeper. I asked in order
to avoid doing the debugging and fixing just to see that somebody was
faster :D

> I think the plan is to fix both the device and drive side
> (but I'm not sure we need a new feature for this if we stick to the
> validation).
> 
> Thanks
> 

Thank you!

Regards,
Halil

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH V5 1/4] virtio_ring: validate used buffer length
  2021-11-22  5:35       ` Halil Pasic
@ 2021-11-22  5:49         ` Halil Pasic
  2021-11-22  6:25           ` Jason Wang
  0 siblings, 1 reply; 32+ messages in thread
From: Halil Pasic @ 2021-11-22  5:49 UTC (permalink / raw)
  To: Jason Wang
  Cc: mst, virtualization, Hetzelt, Felicitas, linux-kernel, kaplan,
	david, Konrad Rzeszutek Wilk, Stefan Hajnoczi,
	Stefano Garzarella, Halil Pasic

On Mon, 22 Nov 2021 06:35:18 +0100
Halil Pasic <pasic@linux.ibm.com> wrote:

> > I think it should be a common issue, looking at
> > vhost_vsock_handle_tx_kick(), it did:
> > 
> > len += sizeof(pkt->hdr);
> > vhost_add_used(vq, head, len);
> > 
> > which looks like a violation of the spec since it's TX.  
> 
> I'm not sure the lines above look like a violation of the spec. If you
> examine vhost_vsock_alloc_pkt() I believe that you will agree that:
> len == pkt->len == pkt->hdr.len
> which makes sense since according to the spec both tx and rx messages
> are hdr+payload. And I believe hdr.len is the size of the payload,
> although that does not seem to be properly documented by the spec.
> 
> On the other hand tx messages are stated to be device read-only (in the
> spec) so if the device writes stuff, that is certainly wrong.
> 
> If that is what happens. 
> 
> Looking at virtqueue_get_buf_ctx_split() I'm not sure that is what
> happens. My hypothesis is that we just a last descriptor is an 'in'
> type descriptor (i.e. a device writable one). For tx that assumption
> would be wrong.
> 
> I will have another look at this today and send a fix patch if my
> suspicion is confirmed.

If my suspicion is right something like:

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 00f64f2f8b72..efb57898920b 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -764,6 +764,7 @@ static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
        struct vring_virtqueue *vq = to_vvq(_vq);
        void *ret;
        unsigned int i;
+       bool has_in;
        u16 last_used;
 
        START_USE(vq);
@@ -787,6 +788,9 @@ static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
                        vq->split.vring.used->ring[last_used].id);
        *len = virtio32_to_cpu(_vq->vdev,
                        vq->split.vring.used->ring[last_used].len);
+       has_in = virtio16_to_cpu(_vq->vdev,
+                       vq->split.vring.used->ring[last_used].flags)
+                               & VRING_DESC_F_WRITE;
 
        if (unlikely(i >= vq->split.vring.num)) {
                BAD_RING(vq, "id %u out of range\n", i);
@@ -796,7 +800,7 @@ static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
                BAD_RING(vq, "id %u is not a head!\n", i);
                return NULL;
        }
-       if (vq->buflen && unlikely(*len > vq->buflen[i])) {
+       if (has_in && q->buflen && unlikely(*len > vq->buflen[i])) {
                BAD_RING(vq, "used len %d is larger than in buflen %u\n",
                        *len, vq->buflen[i]);
                return NULL;

would fix the problem for split. I will try that out and let you know
later.

Regards,
Halil

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH V5 1/4] virtio_ring: validate used buffer length
  2021-11-22  5:49         ` Halil Pasic
@ 2021-11-22  6:25           ` Jason Wang
  2021-11-22  7:55             ` Stefano Garzarella
                               ` (2 more replies)
  0 siblings, 3 replies; 32+ messages in thread
From: Jason Wang @ 2021-11-22  6:25 UTC (permalink / raw)
  To: Halil Pasic
  Cc: mst, virtualization, Hetzelt, Felicitas, linux-kernel, kaplan,
	david, Konrad Rzeszutek Wilk, Stefan Hajnoczi,
	Stefano Garzarella

On Mon, Nov 22, 2021 at 1:49 PM Halil Pasic <pasic@linux.ibm.com> wrote:
>
> On Mon, 22 Nov 2021 06:35:18 +0100
> Halil Pasic <pasic@linux.ibm.com> wrote:
>
> > > I think it should be a common issue, looking at
> > > vhost_vsock_handle_tx_kick(), it did:
> > >
> > > len += sizeof(pkt->hdr);
> > > vhost_add_used(vq, head, len);
> > >
> > > which looks like a violation of the spec since it's TX.
> >
> > I'm not sure the lines above look like a violation of the spec. If you
> > examine vhost_vsock_alloc_pkt() I believe that you will agree that:
> > len == pkt->len == pkt->hdr.len
> > which makes sense since according to the spec both tx and rx messages
> > are hdr+payload. And I believe hdr.len is the size of the payload,
> > although that does not seem to be properly documented by the spec.

Sorry for being unclear, what I meant is that we probably should use
zero here. TX doesn't use in buffer actually.

According to the spec, 0 should be the used length:

"and len the total of bytes written into the buffer."

> >
> > On the other hand tx messages are stated to be device read-only (in the
> > spec) so if the device writes stuff, that is certainly wrong.
> >

Yes.

> > If that is what happens.
> >
> > Looking at virtqueue_get_buf_ctx_split() I'm not sure that is what
> > happens. My hypothesis is that we just a last descriptor is an 'in'
> > type descriptor (i.e. a device writable one). For tx that assumption
> > would be wrong.
> >
> > I will have another look at this today and send a fix patch if my
> > suspicion is confirmed.
>
> If my suspicion is right something like:
>
> diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> index 00f64f2f8b72..efb57898920b 100644
> --- a/drivers/virtio/virtio_ring.c
> +++ b/drivers/virtio/virtio_ring.c
> @@ -764,6 +764,7 @@ static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
>         struct vring_virtqueue *vq = to_vvq(_vq);
>         void *ret;
>         unsigned int i;
> +       bool has_in;
>         u16 last_used;
>
>         START_USE(vq);
> @@ -787,6 +788,9 @@ static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
>                         vq->split.vring.used->ring[last_used].id);
>         *len = virtio32_to_cpu(_vq->vdev,
>                         vq->split.vring.used->ring[last_used].len);
> +       has_in = virtio16_to_cpu(_vq->vdev,
> +                       vq->split.vring.used->ring[last_used].flags)
> +                               & VRING_DESC_F_WRITE;

Did you mean vring.desc actually? If yes, it's better not depend on
the descriptor ring which can be modified by the device. We've stored
the flags in desc_extra[].

>
>         if (unlikely(i >= vq->split.vring.num)) {
>                 BAD_RING(vq, "id %u out of range\n", i);
> @@ -796,7 +800,7 @@ static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
>                 BAD_RING(vq, "id %u is not a head!\n", i);
>                 return NULL;
>         }
> -       if (vq->buflen && unlikely(*len > vq->buflen[i])) {
> +       if (has_in && q->buflen && unlikely(*len > vq->buflen[i])) {
>                 BAD_RING(vq, "used len %d is larger than in buflen %u\n",
>                         *len, vq->buflen[i]);
>                 return NULL;
>
> would fix the problem for split. I will try that out and let you know
> later.

I'm not sure I get this, in virtqueue_add_split, the buflen[i] only
contains the in buffer length.

I think the fixes are:

1) fixing the vhost vsock
2) use suppress_used_validation=true to let vsock driver to validate
the in buffer length
3) probably a new feature so the driver can only enable the validation
when the feature is enabled.

Thanks

>
> Regards,
> Halil
>


^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH V5 1/4] virtio_ring: validate used buffer length
  2021-11-22  3:51     ` Jason Wang
  2021-11-22  5:35       ` Halil Pasic
@ 2021-11-22  7:42       ` Stefano Garzarella
  1 sibling, 0 replies; 32+ messages in thread
From: Stefano Garzarella @ 2021-11-22  7:42 UTC (permalink / raw)
  To: Jason Wang, Halil Pasic
  Cc: mst, virtualization, Hetzelt, Felicitas, linux-kernel, kaplan,
	david, Konrad Rzeszutek Wilk, Stefan Hajnoczi

On Mon, Nov 22, 2021 at 11:51:09AM +0800, Jason Wang wrote:
>On Fri, Nov 19, 2021 at 11:10 PM Halil Pasic <pasic@linux.ibm.com> wrote:
>>
>> On Wed, 27 Oct 2021 10:21:04 +0800
>> Jason Wang <jasowang@redhat.com> wrote:
>>
>> > This patch validate the used buffer length provided by the device
>> > before trying to use it. This is done by record the in buffer length
>> > in a new field in desc_state structure during virtqueue_add(), then we
>> > can fail the virtqueue_get_buf() when we find the device is trying to
>> > give us a used buffer length which is greater than the in buffer
>> > length.
>> >
>> > Since some drivers have already done the validation by themselves,
>> > this patch tries to makes the core validation optional. For the driver
>> > that doesn't want the validation, it can set the
>> > suppress_used_validation to be true (which could be overridden by
>> > force_used_validation module parameter). To be more efficient, a
>> > dedicate array is used for storing the validate used length, this
>> > helps to eliminate the cache stress if validation is done by the
>> > driver.
>> >
>> > Signed-off-by: Jason Wang <jasowang@redhat.com>
>>
>> Hi Jason!
>>
>> Our CI has detected, that virtio-vsock became unusable with this
>> patch on s390x. I didn't test on x86 yet. The guest kernel says
>> something like:
>> vmw_vsock_virtio_transport virtio1: tx: used len 44 is larger than in buflen 0
>>
>> Did you, or anybody else, see something like this on platforms other that
>> s390x?
>
>Adding Stefan and Stefano.
>
>I think it should be a common issue, looking at

Yep, I confirm the same behaviour on x86_64. On Friday evening I had the 
same failure while testing latest QEMU and Linux kernel.

>vhost_vsock_handle_tx_kick(), it did:
>
>len += sizeof(pkt->hdr);
>vhost_add_used(vq, head, len);
>
>which looks like a violation of the spec since it's TX.
>
>>
>> I had a quick look at this code, and I speculate that it probably
>> uncovers a pre-existig bug, rather than introducing a new one.
>
>I agree.
>
>>
>> If somebody is already working on this please reach out to me.
>

My plan was to debug and test it today, so let me know if you need some 
help.

>AFAIK, no. I think the plan is to fix both the device and drive side
>(but I'm not sure we need a new feature for this if we stick to the
>validation).
>

Yes, maybe we need a new feature, since I believe there has been this 
problem since the beginning.

Thanks,
Stefano


^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH V5 1/4] virtio_ring: validate used buffer length
  2021-11-22  6:25           ` Jason Wang
@ 2021-11-22  7:55             ` Stefano Garzarella
  2021-11-22 11:08               ` Stefano Garzarella
  2021-11-22 13:50             ` Halil Pasic
  2021-11-22 20:23             ` Halil Pasic
  2 siblings, 1 reply; 32+ messages in thread
From: Stefano Garzarella @ 2021-11-22  7:55 UTC (permalink / raw)
  To: Jason Wang
  Cc: Halil Pasic, mst, virtualization, Hetzelt, Felicitas,
	linux-kernel, kaplan, david, Konrad Rzeszutek Wilk,
	Stefan Hajnoczi

On Mon, Nov 22, 2021 at 02:25:26PM +0800, Jason Wang wrote:
>On Mon, Nov 22, 2021 at 1:49 PM Halil Pasic <pasic@linux.ibm.com> wrote:
>>
>> On Mon, 22 Nov 2021 06:35:18 +0100
>> Halil Pasic <pasic@linux.ibm.com> wrote:
>>
>> > > I think it should be a common issue, looking at
>> > > vhost_vsock_handle_tx_kick(), it did:
>> > >
>> > > len += sizeof(pkt->hdr);
>> > > vhost_add_used(vq, head, len);
>> > >
>> > > which looks like a violation of the spec since it's TX.
>> >
>> > I'm not sure the lines above look like a violation of the spec. If you
>> > examine vhost_vsock_alloc_pkt() I believe that you will agree that:
>> > len == pkt->len == pkt->hdr.len
>> > which makes sense since according to the spec both tx and rx messages
>> > are hdr+payload. And I believe hdr.len is the size of the payload,
>> > although that does not seem to be properly documented by the spec.
>
>Sorry for being unclear, what I meant is that we probably should use
>zero here. TX doesn't use in buffer actually.
>
>According to the spec, 0 should be the used length:
>
>"and len the total of bytes written into the buffer."
>
>> >
>> > On the other hand tx messages are stated to be device read-only (in the
>> > spec) so if the device writes stuff, that is certainly wrong.
>> >
>
>Yes.
>
>> > If that is what happens.
>> >
>> > Looking at virtqueue_get_buf_ctx_split() I'm not sure that is what
>> > happens. My hypothesis is that we just a last descriptor is an 'in'
>> > type descriptor (i.e. a device writable one). For tx that assumption
>> > would be wrong.
>> >
>> > I will have another look at this today and send a fix patch if my
>> > suspicion is confirmed.
>>
>> If my suspicion is right something like:
>>
>> diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
>> index 00f64f2f8b72..efb57898920b 100644
>> --- a/drivers/virtio/virtio_ring.c
>> +++ b/drivers/virtio/virtio_ring.c
>> @@ -764,6 +764,7 @@ static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
>>         struct vring_virtqueue *vq = to_vvq(_vq);
>>         void *ret;
>>         unsigned int i;
>> +       bool has_in;
>>         u16 last_used;
>>
>>         START_USE(vq);
>> @@ -787,6 +788,9 @@ static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
>>                         vq->split.vring.used->ring[last_used].id);
>>         *len = virtio32_to_cpu(_vq->vdev,
>>                         vq->split.vring.used->ring[last_used].len);
>> +       has_in = virtio16_to_cpu(_vq->vdev,
>> +                       vq->split.vring.used->ring[last_used].flags)
>> +                               & VRING_DESC_F_WRITE;
>
>Did you mean vring.desc actually? If yes, it's better not depend on
>the descriptor ring which can be modified by the device. We've stored
>the flags in desc_extra[].
>
>>
>>         if (unlikely(i >= vq->split.vring.num)) {
>>                 BAD_RING(vq, "id %u out of range\n", i);
>> @@ -796,7 +800,7 @@ static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
>>                 BAD_RING(vq, "id %u is not a head!\n", i);
>>                 return NULL;
>>         }
>> -       if (vq->buflen && unlikely(*len > vq->buflen[i])) {
>> +       if (has_in && q->buflen && unlikely(*len > vq->buflen[i])) {
>>                 BAD_RING(vq, "used len %d is larger than in buflen %u\n",
>>                         *len, vq->buflen[i]);
>>                 return NULL;
>>
>> would fix the problem for split. I will try that out and let you know
>> later.
>
>I'm not sure I get this, in virtqueue_add_split, the buflen[i] only
>contains the in buffer length.
>
>I think the fixes are:
>
>1) fixing the vhost vsock

Yep, in vhost_vsock_handle_tx_kick() we should have vhost_add_used(vq, 
head, 0) since the device doesn't write anything.

>2) use suppress_used_validation=true to let vsock driver to validate
>the in buffer length
>3) probably a new feature so the driver can only enable the validation
>when the feature is enabled.

I fully agree with these steps.

Thanks,
Stefano


^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH V5 1/4] virtio_ring: validate used buffer length
  2021-11-22  7:55             ` Stefano Garzarella
@ 2021-11-22 11:08               ` Stefano Garzarella
  2021-11-22 14:24                 ` Halil Pasic
  0 siblings, 1 reply; 32+ messages in thread
From: Stefano Garzarella @ 2021-11-22 11:08 UTC (permalink / raw)
  To: Jason Wang, Halil Pasic
  Cc: mst, virtualization, Hetzelt, Felicitas, linux-kernel, kaplan,
	david, Konrad Rzeszutek Wilk, Stefan Hajnoczi

On Mon, Nov 22, 2021 at 08:55:24AM +0100, Stefano Garzarella wrote:
>On Mon, Nov 22, 2021 at 02:25:26PM +0800, Jason Wang wrote:
>>On Mon, Nov 22, 2021 at 1:49 PM Halil Pasic <pasic@linux.ibm.com> wrote:
>>>
>>>On Mon, 22 Nov 2021 06:35:18 +0100
>>>Halil Pasic <pasic@linux.ibm.com> wrote:
>>>
>>>> > I think it should be a common issue, looking at
>>>> > vhost_vsock_handle_tx_kick(), it did:
>>>> >
>>>> > len += sizeof(pkt->hdr);
>>>> > vhost_add_used(vq, head, len);
>>>> >
>>>> > which looks like a violation of the spec since it's TX.
>>>>
>>>> I'm not sure the lines above look like a violation of the spec. If you
>>>> examine vhost_vsock_alloc_pkt() I believe that you will agree that:
>>>> len == pkt->len == pkt->hdr.len
>>>> which makes sense since according to the spec both tx and rx messages
>>>> are hdr+payload. And I believe hdr.len is the size of the payload,
>>>> although that does not seem to be properly documented by the spec.
>>
>>Sorry for being unclear, what I meant is that we probably should use
>>zero here. TX doesn't use in buffer actually.
>>
>>According to the spec, 0 should be the used length:
>>
>>"and len the total of bytes written into the buffer."
>>
>>>>
>>>> On the other hand tx messages are stated to be device read-only (in the
>>>> spec) so if the device writes stuff, that is certainly wrong.
>>>>
>>
>>Yes.
>>
>>>> If that is what happens.
>>>>
>>>> Looking at virtqueue_get_buf_ctx_split() I'm not sure that is what
>>>> happens. My hypothesis is that we just a last descriptor is an 'in'
>>>> type descriptor (i.e. a device writable one). For tx that assumption
>>>> would be wrong.
>>>>
>>>> I will have another look at this today and send a fix patch if my
>>>> suspicion is confirmed.
>>>
>>>If my suspicion is right something like:
>>>
>>>diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
>>>index 00f64f2f8b72..efb57898920b 100644
>>>--- a/drivers/virtio/virtio_ring.c
>>>+++ b/drivers/virtio/virtio_ring.c
>>>@@ -764,6 +764,7 @@ static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
>>>        struct vring_virtqueue *vq = to_vvq(_vq);
>>>        void *ret;
>>>        unsigned int i;
>>>+       bool has_in;
>>>        u16 last_used;
>>>
>>>        START_USE(vq);
>>>@@ -787,6 +788,9 @@ static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
>>>                        vq->split.vring.used->ring[last_used].id);
>>>        *len = virtio32_to_cpu(_vq->vdev,
>>>                        vq->split.vring.used->ring[last_used].len);
>>>+       has_in = virtio16_to_cpu(_vq->vdev,
>>>+                       vq->split.vring.used->ring[last_used].flags)
>>>+                               & VRING_DESC_F_WRITE;
>>
>>Did you mean vring.desc actually? If yes, it's better not depend on
>>the descriptor ring which can be modified by the device. We've stored
>>the flags in desc_extra[].
>>
>>>
>>>        if (unlikely(i >= vq->split.vring.num)) {
>>>                BAD_RING(vq, "id %u out of range\n", i);
>>>@@ -796,7 +800,7 @@ static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
>>>                BAD_RING(vq, "id %u is not a head!\n", i);
>>>                return NULL;
>>>        }
>>>-       if (vq->buflen && unlikely(*len > vq->buflen[i])) {
>>>+       if (has_in && q->buflen && unlikely(*len > vq->buflen[i])) {
>>>                BAD_RING(vq, "used len %d is larger than in buflen %u\n",
>>>                        *len, vq->buflen[i]);
>>>                return NULL;
>>>
>>>would fix the problem for split. I will try that out and let you know
>>>later.
>>
>>I'm not sure I get this, in virtqueue_add_split, the buflen[i] only
>>contains the in buffer length.
>>
>>I think the fixes are:
>>
>>1) fixing the vhost vsock
>
>Yep, in vhost_vsock_handle_tx_kick() we should have vhost_add_used(vq, 
>head, 0) since the device doesn't write anything.
>
>>2) use suppress_used_validation=true to let vsock driver to validate
>>the in buffer length
>>3) probably a new feature so the driver can only enable the validation
>>when the feature is enabled.
>
>I fully agree with these steps.

Michael sent a patch to suppress the validation, so I think we should 
just fix vhost-vsock. I mean something like this:

diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
index 938aefbc75ec..4e3b95af7ee4 100644
--- a/drivers/vhost/vsock.c
+++ b/drivers/vhost/vsock.c
@@ -554,7 +554,7 @@ static void vhost_vsock_handle_tx_kick(struct vhost_work *work)
                         virtio_transport_free_pkt(pkt);

                 len += sizeof(pkt->hdr);
-               vhost_add_used(vq, head, len);
+               vhost_add_used(vq, head, 0);
                 total_len += len;
                 added = true;
         } while(likely(!vhost_exceeds_weight(vq, ++pkts, total_len)));

I checked and the problem is there from the first commit, so we should 
add:

Fixes: 433fc58e6bf2 ("VSOCK: Introduce vhost_vsock.ko")

I tested this patch and it works even without suppressing validation in 
the virtio core.  But for backwards compatibility we have to suppress it 
for sure as Michael did.

Maybe we can have a patch just with this change to backport it easily 
and one after to clean up a bit the code that was added after (len, 
total_len).

@Halil Let me know if you want to do it, otherwise I can do it.

Stefano


^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH V5 1/4] virtio_ring: validate used buffer length
  2021-11-22  6:25           ` Jason Wang
  2021-11-22  7:55             ` Stefano Garzarella
@ 2021-11-22 13:50             ` Halil Pasic
  2021-11-23  2:30               ` Jason Wang
  2021-11-23 12:17               ` Michael S. Tsirkin
  2021-11-22 20:23             ` Halil Pasic
  2 siblings, 2 replies; 32+ messages in thread
From: Halil Pasic @ 2021-11-22 13:50 UTC (permalink / raw)
  To: Jason Wang
  Cc: mst, virtualization, Hetzelt, Felicitas, linux-kernel, kaplan,
	david, Konrad Rzeszutek Wilk, Stefan Hajnoczi,
	Stefano Garzarella, Halil Pasic

On Mon, 22 Nov 2021 14:25:26 +0800
Jason Wang <jasowang@redhat.com> wrote:

> On Mon, Nov 22, 2021 at 1:49 PM Halil Pasic <pasic@linux.ibm.com> wrote:
> >
> > On Mon, 22 Nov 2021 06:35:18 +0100
> > Halil Pasic <pasic@linux.ibm.com> wrote:
> >  
> > > > I think it should be a common issue, looking at
> > > > vhost_vsock_handle_tx_kick(), it did:
> > > >
> > > > len += sizeof(pkt->hdr);
> > > > vhost_add_used(vq, head, len);
> > > >
> > > > which looks like a violation of the spec since it's TX.  
> > >
> > > I'm not sure the lines above look like a violation of the spec. If you
> > > examine vhost_vsock_alloc_pkt() I believe that you will agree that:
> > > len == pkt->len == pkt->hdr.len
> > > which makes sense since according to the spec both tx and rx messages
> > > are hdr+payload. And I believe hdr.len is the size of the payload,
> > > although that does not seem to be properly documented by the spec.  
> 
> Sorry for being unclear, what I meant is that we probably should use
> zero here. TX doesn't use in buffer actually.
> 
> According to the spec, 0 should be the used length:
> 
> "and len the total of bytes written into the buffer."

Right, I was wrong. I somehow assumed this is the total length and not
just the number of bytes written.

> 
> > >
> > > On the other hand tx messages are stated to be device read-only (in the
> > > spec) so if the device writes stuff, that is certainly wrong.
> > >  
> 
> Yes.
> 
> > > If that is what happens.
> > >
> > > Looking at virtqueue_get_buf_ctx_split() I'm not sure that is what
> > > happens. My hypothesis is that we just a last descriptor is an 'in'
> > > type descriptor (i.e. a device writable one). For tx that assumption
> > > would be wrong.
> > >
> > > I will have another look at this today and send a fix patch if my
> > > suspicion is confirmed.

Yeah, I didn't remember the semantic of
vq->split.vring.used->ring[last_used].len
correctly, and in fact also how exactly the rings work. So your objection
is correct. 

Maybe updating some stuff would make it easier to not make this mistake.

For example the spec and also the linux header says:

/* le32 is used here for ids for padding reasons. */ 
struct virtq_used_elem { 
        /* Index of start of used descriptor chain. */ 
        le32 id; 
        /* Total length of the descriptor chain which was used (written to) */ 
        le32 len; 
};

I think that comment isn't as clear as it could be. I would prefer:
/* The number of bytes written into the device writable portion of the
buffer described by the descriptor chain. */

I believe "the descriptor chain which was used" includes both the
descriptors that map the device read only and the device write
only portions of the buffer described by the descriptor chain. And the
total length of that descriptor chain may be defined either as a number
of the descriptors that form the chain, or the length of the buffer.

One has to use the descriptor chain even if the whole buffer is device
read only. So "used" == "written to" does not make any sense to me.

Also something like
int vhost_add_used(struct vhost_virtqueue *vq, unsigned int head, int bytes_written)
instead of
int vhost_add_used(struct vhost_virtqueue *vq, unsigned int head, int len)
would make it easier to read the code correctly.

> >
> > If my suspicion is right something like:
> >
> > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > index 00f64f2f8b72..efb57898920b 100644
> > --- a/drivers/virtio/virtio_ring.c
> > +++ b/drivers/virtio/virtio_ring.c
> > @@ -764,6 +764,7 @@ static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
> >         struct vring_virtqueue *vq = to_vvq(_vq);
> >         void *ret;
> >         unsigned int i;
> > +       bool has_in;
> >         u16 last_used;
> >
> >         START_USE(vq);
> > @@ -787,6 +788,9 @@ static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
> >                         vq->split.vring.used->ring[last_used].id);
> >         *len = virtio32_to_cpu(_vq->vdev,
> >                         vq->split.vring.used->ring[last_used].len);
> > +       has_in = virtio16_to_cpu(_vq->vdev,
> > +                       vq->split.vring.used->ring[last_used].flags)
> > +                               & VRING_DESC_F_WRITE;  
> 
> Did you mean vring.desc actually? If yes, it's better not depend on
> the descriptor ring which can be modified by the device. We've stored
> the flags in desc_extra[].
> 
> >
> >         if (unlikely(i >= vq->split.vring.num)) {
> >                 BAD_RING(vq, "id %u out of range\n", i);
> > @@ -796,7 +800,7 @@ static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
> >                 BAD_RING(vq, "id %u is not a head!\n", i);
> >                 return NULL;
> >         }
> > -       if (vq->buflen && unlikely(*len > vq->buflen[i])) {
> > +       if (has_in && q->buflen && unlikely(*len > vq->buflen[i])) {
> >                 BAD_RING(vq, "used len %d is larger than in buflen %u\n",
> >                         *len, vq->buflen[i]);
> >                 return NULL;
> >
> > would fix the problem for split. I will try that out and let you know
> > later.  
> 
> I'm not sure I get this, in virtqueue_add_split, the buflen[i] only
> contains the in buffer length.

Sorry my diff is indeed silly.

> 
> I think the fixes are:
> 
> 1) fixing the vhost vsock
> 2) use suppress_used_validation=true to let vsock driver to validate
> the in buffer length
> 3) probably a new feature so the driver can only enable the validation
> when the feature is enabled.
> 

Makes sense!

Regards,
Halil


^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH V5 1/4] virtio_ring: validate used buffer length
  2021-11-22 11:08               ` Stefano Garzarella
@ 2021-11-22 14:24                 ` Halil Pasic
  2021-11-22 16:23                   ` Stefano Garzarella
  0 siblings, 1 reply; 32+ messages in thread
From: Halil Pasic @ 2021-11-22 14:24 UTC (permalink / raw)
  To: Stefano Garzarella
  Cc: Jason Wang, mst, virtualization, Hetzelt, Felicitas,
	linux-kernel, kaplan, david, Konrad Rzeszutek Wilk,
	Stefan Hajnoczi, Halil Pasic

On Mon, 22 Nov 2021 12:08:22 +0100
Stefano Garzarella <sgarzare@redhat.com> wrote:

> On Mon, Nov 22, 2021 at 08:55:24AM +0100, Stefano Garzarella wrote:
> >On Mon, Nov 22, 2021 at 02:25:26PM +0800, Jason Wang wrote:  
> >>On Mon, Nov 22, 2021 at 1:49 PM Halil Pasic <pasic@linux.ibm.com> wrote:  
> >>>
> >>>On Mon, 22 Nov 2021 06:35:18 +0100
> >>>Halil Pasic <pasic@linux.ibm.com> wrote:
> >>>  
> >>>> > I think it should be a common issue, looking at
> >>>> > vhost_vsock_handle_tx_kick(), it did:
> >>>> >
> >>>> > len += sizeof(pkt->hdr);
> >>>> > vhost_add_used(vq, head, len);
> >>>> >
> >>>> > which looks like a violation of the spec since it's TX.  
> >>>>
> >>>> I'm not sure the lines above look like a violation of the spec. If you
> >>>> examine vhost_vsock_alloc_pkt() I believe that you will agree that:
> >>>> len == pkt->len == pkt->hdr.len
> >>>> which makes sense since according to the spec both tx and rx messages
> >>>> are hdr+payload. And I believe hdr.len is the size of the payload,
> >>>> although that does not seem to be properly documented by the spec.  
> >>
> >>Sorry for being unclear, what I meant is that we probably should use
> >>zero here. TX doesn't use in buffer actually.
> >>
> >>According to the spec, 0 should be the used length:
> >>
> >>"and len the total of bytes written into the buffer."
> >>  
> >>>>
> >>>> On the other hand tx messages are stated to be device read-only (in the
> >>>> spec) so if the device writes stuff, that is certainly wrong.
> >>>>  
> >>
> >>Yes.
> >>  
> >>>> If that is what happens.
> >>>>
> >>>> Looking at virtqueue_get_buf_ctx_split() I'm not sure that is what
> >>>> happens. My hypothesis is that we just a last descriptor is an 'in'
> >>>> type descriptor (i.e. a device writable one). For tx that assumption
> >>>> would be wrong.
> >>>>
> >>>> I will have another look at this today and send a fix patch if my
> >>>> suspicion is confirmed.  
> >>>
> >>>If my suspicion is right something like:
> >>>
> >>>diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> >>>index 00f64f2f8b72..efb57898920b 100644
> >>>--- a/drivers/virtio/virtio_ring.c
> >>>+++ b/drivers/virtio/virtio_ring.c
> >>>@@ -764,6 +764,7 @@ static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
> >>>        struct vring_virtqueue *vq = to_vvq(_vq);
> >>>        void *ret;
> >>>        unsigned int i;
> >>>+       bool has_in;
> >>>        u16 last_used;
> >>>
> >>>        START_USE(vq);
> >>>@@ -787,6 +788,9 @@ static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
> >>>                        vq->split.vring.used->ring[last_used].id);
> >>>        *len = virtio32_to_cpu(_vq->vdev,
> >>>                        vq->split.vring.used->ring[last_used].len);
> >>>+       has_in = virtio16_to_cpu(_vq->vdev,
> >>>+                       vq->split.vring.used->ring[last_used].flags)
> >>>+                               & VRING_DESC_F_WRITE;  
> >>
> >>Did you mean vring.desc actually? If yes, it's better not depend on
> >>the descriptor ring which can be modified by the device. We've stored
> >>the flags in desc_extra[].
> >>  
> >>>
> >>>        if (unlikely(i >= vq->split.vring.num)) {
> >>>                BAD_RING(vq, "id %u out of range\n", i);
> >>>@@ -796,7 +800,7 @@ static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
> >>>                BAD_RING(vq, "id %u is not a head!\n", i);
> >>>                return NULL;
> >>>        }
> >>>-       if (vq->buflen && unlikely(*len > vq->buflen[i])) {
> >>>+       if (has_in && q->buflen && unlikely(*len > vq->buflen[i])) {
> >>>                BAD_RING(vq, "used len %d is larger than in buflen %u\n",
> >>>                        *len, vq->buflen[i]);
> >>>                return NULL;
> >>>
> >>>would fix the problem for split. I will try that out and let you know
> >>>later.  
> >>
> >>I'm not sure I get this, in virtqueue_add_split, the buflen[i] only
> >>contains the in buffer length.
> >>
> >>I think the fixes are:
> >>
> >>1) fixing the vhost vsock  
> >
> >Yep, in vhost_vsock_handle_tx_kick() we should have vhost_add_used(vq, 
> >head, 0) since the device doesn't write anything.
> >  
> >>2) use suppress_used_validation=true to let vsock driver to validate
> >>the in buffer length
> >>3) probably a new feature so the driver can only enable the validation
> >>when the feature is enabled.  
> >
> >I fully agree with these steps.  
> 
> Michael sent a patch to suppress the validation, so I think we should 
> just fix vhost-vsock. I mean something like this:
> 
> diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
> index 938aefbc75ec..4e3b95af7ee4 100644
> --- a/drivers/vhost/vsock.c
> +++ b/drivers/vhost/vsock.c
> @@ -554,7 +554,7 @@ static void vhost_vsock_handle_tx_kick(struct vhost_work *work)
>                          virtio_transport_free_pkt(pkt);
> 
>                  len += sizeof(pkt->hdr);
> -               vhost_add_used(vq, head, len);
> +               vhost_add_used(vq, head, 0);
>                  total_len += len;
>                  added = true;
>          } while(likely(!vhost_exceeds_weight(vq, ++pkts, total_len)));
> 
> I checked and the problem is there from the first commit, so we should 
> add:
> 
> Fixes: 433fc58e6bf2 ("VSOCK: Introduce vhost_vsock.ko")
> 
> I tested this patch and it works even without suppressing validation in 
> the virtio core.  But for backwards compatibility we have to suppress it 
> for sure as Michael did.
> 
> Maybe we can have a patch just with this change to backport it easily 
> and one after to clean up a bit the code that was added after (len, 
> total_len).
> 
> @Halil Let me know if you want to do it, otherwise I can do it.
> 

It is fine, it was you guys who figured out the solution so I think
it should either be Jason or you who take credit for the patch. Thanks
for addressing the issue this quickly!

Regards,
Halil

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH V5 1/4] virtio_ring: validate used buffer length
  2021-11-22 14:24                 ` Halil Pasic
@ 2021-11-22 16:23                   ` Stefano Garzarella
  0 siblings, 0 replies; 32+ messages in thread
From: Stefano Garzarella @ 2021-11-22 16:23 UTC (permalink / raw)
  To: Halil Pasic
  Cc: Jason Wang, mst, virtualization, Hetzelt, Felicitas,
	linux-kernel, kaplan, david, Konrad Rzeszutek Wilk,
	Stefan Hajnoczi

On Mon, Nov 22, 2021 at 03:24:32PM +0100, Halil Pasic wrote:
>On Mon, 22 Nov 2021 12:08:22 +0100
>Stefano Garzarella <sgarzare@redhat.com> wrote:
>
>> On Mon, Nov 22, 2021 at 08:55:24AM +0100, Stefano Garzarella wrote:
>> >On Mon, Nov 22, 2021 at 02:25:26PM +0800, Jason Wang wrote:
>> >>On Mon, Nov 22, 2021 at 1:49 PM Halil Pasic <pasic@linux.ibm.com> wrote:
>> >>>
>> >>>On Mon, 22 Nov 2021 06:35:18 +0100
>> >>>Halil Pasic <pasic@linux.ibm.com> wrote:
>> >>>
>> >>>> > I think it should be a common issue, looking at
>> >>>> > vhost_vsock_handle_tx_kick(), it did:
>> >>>> >
>> >>>> > len += sizeof(pkt->hdr);
>> >>>> > vhost_add_used(vq, head, len);
>> >>>> >
>> >>>> > which looks like a violation of the spec since it's TX.
>> >>>>
>> >>>> I'm not sure the lines above look like a violation of the spec. If you
>> >>>> examine vhost_vsock_alloc_pkt() I believe that you will agree that:
>> >>>> len == pkt->len == pkt->hdr.len
>> >>>> which makes sense since according to the spec both tx and rx messages
>> >>>> are hdr+payload. And I believe hdr.len is the size of the payload,
>> >>>> although that does not seem to be properly documented by the spec.
>> >>
>> >>Sorry for being unclear, what I meant is that we probably should use
>> >>zero here. TX doesn't use in buffer actually.
>> >>
>> >>According to the spec, 0 should be the used length:
>> >>
>> >>"and len the total of bytes written into the buffer."
>> >>
>> >>>>
>> >>>> On the other hand tx messages are stated to be device read-only (in the
>> >>>> spec) so if the device writes stuff, that is certainly wrong.
>> >>>>
>> >>
>> >>Yes.
>> >>
>> >>>> If that is what happens.
>> >>>>
>> >>>> Looking at virtqueue_get_buf_ctx_split() I'm not sure that is what
>> >>>> happens. My hypothesis is that we just a last descriptor is an 'in'
>> >>>> type descriptor (i.e. a device writable one). For tx that assumption
>> >>>> would be wrong.
>> >>>>
>> >>>> I will have another look at this today and send a fix patch if my
>> >>>> suspicion is confirmed.
>> >>>
>> >>>If my suspicion is right something like:
>> >>>
>> >>>diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
>> >>>index 00f64f2f8b72..efb57898920b 100644
>> >>>--- a/drivers/virtio/virtio_ring.c
>> >>>+++ b/drivers/virtio/virtio_ring.c
>> >>>@@ -764,6 +764,7 @@ static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
>> >>>        struct vring_virtqueue *vq = to_vvq(_vq);
>> >>>        void *ret;
>> >>>        unsigned int i;
>> >>>+       bool has_in;
>> >>>        u16 last_used;
>> >>>
>> >>>        START_USE(vq);
>> >>>@@ -787,6 +788,9 @@ static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
>> >>>                        vq->split.vring.used->ring[last_used].id);
>> >>>        *len = virtio32_to_cpu(_vq->vdev,
>> >>>                        vq->split.vring.used->ring[last_used].len);
>> >>>+       has_in = virtio16_to_cpu(_vq->vdev,
>> >>>+                       vq->split.vring.used->ring[last_used].flags)
>> >>>+                               & VRING_DESC_F_WRITE;
>> >>
>> >>Did you mean vring.desc actually? If yes, it's better not depend on
>> >>the descriptor ring which can be modified by the device. We've stored
>> >>the flags in desc_extra[].
>> >>
>> >>>
>> >>>        if (unlikely(i >= vq->split.vring.num)) {
>> >>>                BAD_RING(vq, "id %u out of range\n", i);
>> >>>@@ -796,7 +800,7 @@ static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
>> >>>                BAD_RING(vq, "id %u is not a head!\n", i);
>> >>>                return NULL;
>> >>>        }
>> >>>-       if (vq->buflen && unlikely(*len > vq->buflen[i])) {
>> >>>+       if (has_in && q->buflen && unlikely(*len > vq->buflen[i])) {
>> >>>                BAD_RING(vq, "used len %d is larger than in buflen %u\n",
>> >>>                        *len, vq->buflen[i]);
>> >>>                return NULL;
>> >>>
>> >>>would fix the problem for split. I will try that out and let you know
>> >>>later.
>> >>
>> >>I'm not sure I get this, in virtqueue_add_split, the buflen[i] only
>> >>contains the in buffer length.
>> >>
>> >>I think the fixes are:
>> >>
>> >>1) fixing the vhost vsock
>> >
>> >Yep, in vhost_vsock_handle_tx_kick() we should have vhost_add_used(vq,
>> >head, 0) since the device doesn't write anything.
>> >
>> >>2) use suppress_used_validation=true to let vsock driver to validate
>> >>the in buffer length
>> >>3) probably a new feature so the driver can only enable the validation
>> >>when the feature is enabled.
>> >
>> >I fully agree with these steps.
>>
>> Michael sent a patch to suppress the validation, so I think we should
>> just fix vhost-vsock. I mean something like this:
>>
>> diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
>> index 938aefbc75ec..4e3b95af7ee4 100644
>> --- a/drivers/vhost/vsock.c
>> +++ b/drivers/vhost/vsock.c
>> @@ -554,7 +554,7 @@ static void vhost_vsock_handle_tx_kick(struct vhost_work *work)
>>                          virtio_transport_free_pkt(pkt);
>>
>>                  len += sizeof(pkt->hdr);
>> -               vhost_add_used(vq, head, len);
>> +               vhost_add_used(vq, head, 0);
>>                  total_len += len;
>>                  added = true;
>>          } while(likely(!vhost_exceeds_weight(vq, ++pkts, total_len)));
>>
>> I checked and the problem is there from the first commit, so we should
>> add:
>>
>> Fixes: 433fc58e6bf2 ("VSOCK: Introduce vhost_vsock.ko")
>>
>> I tested this patch and it works even without suppressing validation in
>> the virtio core.  But for backwards compatibility we have to suppress it
>> for sure as Michael did.
>>
>> Maybe we can have a patch just with this change to backport it easily
>> and one after to clean up a bit the code that was added after (len,
>> total_len).
>>
>> @Halil Let me know if you want to do it, otherwise I can do it.
>>
>
>It is fine, it was you guys who figured out the solution so I think
>it should either be Jason or you who take credit for the patch.

Okay, I'm finishing the tests and sending the patch.

>Thanks for addressing the issue this quickly!

Thanks for reporting!

Stefano


^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH V5 1/4] virtio_ring: validate used buffer length
  2021-11-22  6:25           ` Jason Wang
  2021-11-22  7:55             ` Stefano Garzarella
  2021-11-22 13:50             ` Halil Pasic
@ 2021-11-22 20:23             ` Halil Pasic
  2021-11-23  2:25               ` Jason Wang
  2 siblings, 1 reply; 32+ messages in thread
From: Halil Pasic @ 2021-11-22 20:23 UTC (permalink / raw)
  To: Jason Wang
  Cc: mst, virtualization, Hetzelt, Felicitas, linux-kernel, kaplan,
	david, Konrad Rzeszutek Wilk, Stefan Hajnoczi,
	Stefano Garzarella, Halil Pasic

On Mon, 22 Nov 2021 14:25:26 +0800
Jason Wang <jasowang@redhat.com> wrote:

> I think the fixes are:
> 
> 1) fixing the vhost vsock
> 2) use suppress_used_validation=true to let vsock driver to validate
> the in buffer length
> 3) probably a new feature so the driver can only enable the validation
> when the feature is enabled.

I'm not sure, I would consider a F_DEV_Y_FIXED_BUG_X a perfectly good
feature. Frankly the set of such bugs is device implementation
specific and it makes little sense to specify a feature bit
that says the device implementation claims to adhere to some
aspect of the specification. Also what would be the semantic
of not negotiating F_DEV_Y_FIXED_BUG_X?

On the other hand I see no other way to keep the validation
permanently enabled for fixed implementations, and get around the problem
with broken implementations. So we could have something like
VHOST_USED_LEN_STRICT.

Maybe, we can also think of 'warn and don't alter behavior' instead of
'warn' and alter behavior. Or maybe even not having such checks on in
production, but only when testing.

Regards,
Halil

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH V5 1/4] virtio_ring: validate used buffer length
  2021-11-22 20:23             ` Halil Pasic
@ 2021-11-23  2:25               ` Jason Wang
  2021-11-23 11:05                 ` Michael S. Tsirkin
  0 siblings, 1 reply; 32+ messages in thread
From: Jason Wang @ 2021-11-23  2:25 UTC (permalink / raw)
  To: Halil Pasic
  Cc: mst, virtualization, Hetzelt, Felicitas, linux-kernel, kaplan,
	david, Konrad Rzeszutek Wilk, Stefan Hajnoczi,
	Stefano Garzarella

On Tue, Nov 23, 2021 at 4:24 AM Halil Pasic <pasic@linux.ibm.com> wrote:
>
> On Mon, 22 Nov 2021 14:25:26 +0800
> Jason Wang <jasowang@redhat.com> wrote:
>
> > I think the fixes are:
> >
> > 1) fixing the vhost vsock
> > 2) use suppress_used_validation=true to let vsock driver to validate
> > the in buffer length
> > 3) probably a new feature so the driver can only enable the validation
> > when the feature is enabled.
>
> I'm not sure, I would consider a F_DEV_Y_FIXED_BUG_X a perfectly good
> feature. Frankly the set of such bugs is device implementation
> specific and it makes little sense to specify a feature bit
> that says the device implementation claims to adhere to some
> aspect of the specification. Also what would be the semantic
> of not negotiating F_DEV_Y_FIXED_BUG_X?

Yes, I agree. Rethink of the feature bit, it seems unnecessary,
especially considering the driver should not care about the used
length for tx.

>
> On the other hand I see no other way to keep the validation
> permanently enabled for fixed implementations, and get around the problem
> with broken implementations. So we could have something like
> VHOST_USED_LEN_STRICT.

It's more about a choice of the driver's knowledge. For vsock TX it
should be fine. If we introduce a parameter and disable it by default,
it won't be very useful.

>
> Maybe, we can also think of 'warn and don't alter behavior' instead of
> 'warn' and alter behavior. Or maybe even not having such checks on in
> production, but only when testing.

I think there's an agreement that virtio drivers need more hardening,
that's why a lot of patches were merged. Especially considering the
new requirements came from confidential computing, smart NIC and
VDUSE. For virtio drivers, enabling the validation may help to

1) protect the driver from the buggy and malicious device
2) uncover the bugs of the devices (as vsock did, and probably rpmsg)
3) force the have a smart driver that can do the validation itself
then we can finally remove the validation in the core

So I'd like to keep it enabled.

Thanks

>
> Regards,
> Halil
>


^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH V5 1/4] virtio_ring: validate used buffer length
  2021-11-22 13:50             ` Halil Pasic
@ 2021-11-23  2:30               ` Jason Wang
  2021-11-23 12:17               ` Michael S. Tsirkin
  1 sibling, 0 replies; 32+ messages in thread
From: Jason Wang @ 2021-11-23  2:30 UTC (permalink / raw)
  To: Halil Pasic
  Cc: mst, virtualization, Hetzelt, Felicitas, linux-kernel, kaplan,
	david, Konrad Rzeszutek Wilk, Stefan Hajnoczi,
	Stefano Garzarella

On Mon, Nov 22, 2021 at 9:50 PM Halil Pasic <pasic@linux.ibm.com> wrote:
>
> On Mon, 22 Nov 2021 14:25:26 +0800
> Jason Wang <jasowang@redhat.com> wrote:
>
> > On Mon, Nov 22, 2021 at 1:49 PM Halil Pasic <pasic@linux.ibm.com> wrote:
> > >
> > > On Mon, 22 Nov 2021 06:35:18 +0100
> > > Halil Pasic <pasic@linux.ibm.com> wrote:
> > >
> > > > > I think it should be a common issue, looking at
> > > > > vhost_vsock_handle_tx_kick(), it did:
> > > > >
> > > > > len += sizeof(pkt->hdr);
> > > > > vhost_add_used(vq, head, len);
> > > > >
> > > > > which looks like a violation of the spec since it's TX.
> > > >
> > > > I'm not sure the lines above look like a violation of the spec. If you
> > > > examine vhost_vsock_alloc_pkt() I believe that you will agree that:
> > > > len == pkt->len == pkt->hdr.len
> > > > which makes sense since according to the spec both tx and rx messages
> > > > are hdr+payload. And I believe hdr.len is the size of the payload,
> > > > although that does not seem to be properly documented by the spec.
> >
> > Sorry for being unclear, what I meant is that we probably should use
> > zero here. TX doesn't use in buffer actually.
> >
> > According to the spec, 0 should be the used length:
> >
> > "and len the total of bytes written into the buffer."
>
> Right, I was wrong. I somehow assumed this is the total length and not
> just the number of bytes written.
>
> >
> > > >
> > > > On the other hand tx messages are stated to be device read-only (in the
> > > > spec) so if the device writes stuff, that is certainly wrong.
> > > >
> >
> > Yes.
> >
> > > > If that is what happens.
> > > >
> > > > Looking at virtqueue_get_buf_ctx_split() I'm not sure that is what
> > > > happens. My hypothesis is that we just a last descriptor is an 'in'
> > > > type descriptor (i.e. a device writable one). For tx that assumption
> > > > would be wrong.
> > > >
> > > > I will have another look at this today and send a fix patch if my
> > > > suspicion is confirmed.
>
> Yeah, I didn't remember the semantic of
> vq->split.vring.used->ring[last_used].len
> correctly, and in fact also how exactly the rings work. So your objection
> is correct.
>
> Maybe updating some stuff would make it easier to not make this mistake.
>
> For example the spec and also the linux header says:
>
> /* le32 is used here for ids for padding reasons. */
> struct virtq_used_elem {
>         /* Index of start of used descriptor chain. */
>         le32 id;
>         /* Total length of the descriptor chain which was used (written to) */
>         le32 len;
> };
>
> I think that comment isn't as clear as it could be. I would prefer:
> /* The number of bytes written into the device writable portion of the
> buffer described by the descriptor chain. */
>
> I believe "the descriptor chain which was used" includes both the
> descriptors that map the device read only and the device write
> only portions of the buffer described by the descriptor chain. And the
> total length of that descriptor chain may be defined either as a number
> of the descriptors that form the chain, or the length of the buffer.
>
> One has to use the descriptor chain even if the whole buffer is device
> read only. So "used" == "written to" does not make any sense to me.

Not a native speaker but if others are fine I'm ok with this tweak on
the comment.

>
> Also something like
> int vhost_add_used(struct vhost_virtqueue *vq, unsigned int head, int bytes_written)
> instead of
> int vhost_add_used(struct vhost_virtqueue *vq, unsigned int head, int len)
> would make it easier to read the code correctly.

Or maybe a comment to explain the len.

Thanks

>
> > >
> > > If my suspicion is right something like:
> > >
> > > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > > index 00f64f2f8b72..efb57898920b 100644
> > > --- a/drivers/virtio/virtio_ring.c
> > > +++ b/drivers/virtio/virtio_ring.c
> > > @@ -764,6 +764,7 @@ static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
> > >         struct vring_virtqueue *vq = to_vvq(_vq);
> > >         void *ret;
> > >         unsigned int i;
> > > +       bool has_in;
> > >         u16 last_used;
> > >
> > >         START_USE(vq);
> > > @@ -787,6 +788,9 @@ static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
> > >                         vq->split.vring.used->ring[last_used].id);
> > >         *len = virtio32_to_cpu(_vq->vdev,
> > >                         vq->split.vring.used->ring[last_used].len);
> > > +       has_in = virtio16_to_cpu(_vq->vdev,
> > > +                       vq->split.vring.used->ring[last_used].flags)
> > > +                               & VRING_DESC_F_WRITE;
> >
> > Did you mean vring.desc actually? If yes, it's better not depend on
> > the descriptor ring which can be modified by the device. We've stored
> > the flags in desc_extra[].
> >
> > >
> > >         if (unlikely(i >= vq->split.vring.num)) {
> > >                 BAD_RING(vq, "id %u out of range\n", i);
> > > @@ -796,7 +800,7 @@ static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
> > >                 BAD_RING(vq, "id %u is not a head!\n", i);
> > >                 return NULL;
> > >         }
> > > -       if (vq->buflen && unlikely(*len > vq->buflen[i])) {
> > > +       if (has_in && q->buflen && unlikely(*len > vq->buflen[i])) {
> > >                 BAD_RING(vq, "used len %d is larger than in buflen %u\n",
> > >                         *len, vq->buflen[i]);
> > >                 return NULL;
> > >
> > > would fix the problem for split. I will try that out and let you know
> > > later.
> >
> > I'm not sure I get this, in virtqueue_add_split, the buflen[i] only
> > contains the in buffer length.
>
> Sorry my diff is indeed silly.
>
> >
> > I think the fixes are:
> >
> > 1) fixing the vhost vsock
> > 2) use suppress_used_validation=true to let vsock driver to validate
> > the in buffer length
> > 3) probably a new feature so the driver can only enable the validation
> > when the feature is enabled.
> >
>
> Makes sense!
>
> Regards,
> Halil
>


^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH V5 1/4] virtio_ring: validate used buffer length
  2021-11-23  2:25               ` Jason Wang
@ 2021-11-23 11:05                 ` Michael S. Tsirkin
  2021-11-24  1:30                   ` Michael Ellerman
  0 siblings, 1 reply; 32+ messages in thread
From: Michael S. Tsirkin @ 2021-11-23 11:05 UTC (permalink / raw)
  To: Jason Wang
  Cc: Halil Pasic, virtualization, Hetzelt, Felicitas, linux-kernel,
	kaplan, david, Konrad Rzeszutek Wilk, Stefan Hajnoczi,
	Stefano Garzarella

On Tue, Nov 23, 2021 at 10:25:20AM +0800, Jason Wang wrote:
> On Tue, Nov 23, 2021 at 4:24 AM Halil Pasic <pasic@linux.ibm.com> wrote:
> >
> > On Mon, 22 Nov 2021 14:25:26 +0800
> > Jason Wang <jasowang@redhat.com> wrote:
> >
> > > I think the fixes are:
> > >
> > > 1) fixing the vhost vsock
> > > 2) use suppress_used_validation=true to let vsock driver to validate
> > > the in buffer length
> > > 3) probably a new feature so the driver can only enable the validation
> > > when the feature is enabled.
> >
> > I'm not sure, I would consider a F_DEV_Y_FIXED_BUG_X a perfectly good
> > feature. Frankly the set of such bugs is device implementation
> > specific and it makes little sense to specify a feature bit
> > that says the device implementation claims to adhere to some
> > aspect of the specification. Also what would be the semantic
> > of not negotiating F_DEV_Y_FIXED_BUG_X?
> 
> Yes, I agree. Rethink of the feature bit, it seems unnecessary,
> especially considering the driver should not care about the used
> length for tx.
> 
> >
> > On the other hand I see no other way to keep the validation
> > permanently enabled for fixed implementations, and get around the problem
> > with broken implementations. So we could have something like
> > VHOST_USED_LEN_STRICT.
> 
> It's more about a choice of the driver's knowledge. For vsock TX it
> should be fine. If we introduce a parameter and disable it by default,
> it won't be very useful.
> 
> >
> > Maybe, we can also think of 'warn and don't alter behavior' instead of
> > 'warn' and alter behavior. Or maybe even not having such checks on in
> > production, but only when testing.
> 
> I think there's an agreement that virtio drivers need more hardening,
> that's why a lot of patches were merged. Especially considering the
> new requirements came from confidential computing, smart NIC and
> VDUSE. For virtio drivers, enabling the validation may help to
> 
> 1) protect the driver from the buggy and malicious device
> 2) uncover the bugs of the devices (as vsock did, and probably rpmsg)
> 3) force the have a smart driver that can do the validation itself
> then we can finally remove the validation in the core
> 
> So I'd like to keep it enabled.
> 
> Thanks

Let's see how far we can get. But yes, maybe we were too aggressive in
breaking things by default, a warning might be a better choice for a
couple of cycles.

> >
> > Regards,
> > Halil
> >


^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH V5 1/4] virtio_ring: validate used buffer length
  2021-11-22 13:50             ` Halil Pasic
  2021-11-23  2:30               ` Jason Wang
@ 2021-11-23 12:17               ` Michael S. Tsirkin
  2021-11-23 12:43                 ` Halil Pasic
  1 sibling, 1 reply; 32+ messages in thread
From: Michael S. Tsirkin @ 2021-11-23 12:17 UTC (permalink / raw)
  To: Halil Pasic
  Cc: Jason Wang, virtualization, Hetzelt, Felicitas, linux-kernel,
	kaplan, david, Konrad Rzeszutek Wilk, Stefan Hajnoczi,
	Stefano Garzarella

On Mon, Nov 22, 2021 at 02:50:03PM +0100, Halil Pasic wrote:
> On Mon, 22 Nov 2021 14:25:26 +0800
> Jason Wang <jasowang@redhat.com> wrote:
> 
> > On Mon, Nov 22, 2021 at 1:49 PM Halil Pasic <pasic@linux.ibm.com> wrote:
> > >
> > > On Mon, 22 Nov 2021 06:35:18 +0100
> > > Halil Pasic <pasic@linux.ibm.com> wrote:
> > >  
> > > > > I think it should be a common issue, looking at
> > > > > vhost_vsock_handle_tx_kick(), it did:
> > > > >
> > > > > len += sizeof(pkt->hdr);
> > > > > vhost_add_used(vq, head, len);
> > > > >
> > > > > which looks like a violation of the spec since it's TX.  
> > > >
> > > > I'm not sure the lines above look like a violation of the spec. If you
> > > > examine vhost_vsock_alloc_pkt() I believe that you will agree that:
> > > > len == pkt->len == pkt->hdr.len
> > > > which makes sense since according to the spec both tx and rx messages
> > > > are hdr+payload. And I believe hdr.len is the size of the payload,
> > > > although that does not seem to be properly documented by the spec.  
> > 
> > Sorry for being unclear, what I meant is that we probably should use
> > zero here. TX doesn't use in buffer actually.
> > 
> > According to the spec, 0 should be the used length:
> > 
> > "and len the total of bytes written into the buffer."
> 
> Right, I was wrong. I somehow assumed this is the total length and not
> just the number of bytes written.
> 
> > 
> > > >
> > > > On the other hand tx messages are stated to be device read-only (in the
> > > > spec) so if the device writes stuff, that is certainly wrong.
> > > >  
> > 
> > Yes.
> > 
> > > > If that is what happens.
> > > >
> > > > Looking at virtqueue_get_buf_ctx_split() I'm not sure that is what
> > > > happens. My hypothesis is that we just a last descriptor is an 'in'
> > > > type descriptor (i.e. a device writable one). For tx that assumption
> > > > would be wrong.
> > > >
> > > > I will have another look at this today and send a fix patch if my
> > > > suspicion is confirmed.
> 
> Yeah, I didn't remember the semantic of
> vq->split.vring.used->ring[last_used].len
> correctly, and in fact also how exactly the rings work. So your objection
> is correct. 
> 
> Maybe updating some stuff would make it easier to not make this mistake.
> 
> For example the spec and also the linux header says:
> 
> /* le32 is used here for ids for padding reasons. */ 
> struct virtq_used_elem { 
>         /* Index of start of used descriptor chain. */ 
>         le32 id; 
>         /* Total length of the descriptor chain which was used (written to) */ 
>         le32 len; 
> };
> 
> I think that comment isn't as clear as it could be. I would prefer:
> /* The number of bytes written into the device writable portion of the
> buffer described by the descriptor chain. */
> 
> I believe "the descriptor chain which was used" includes both the
> descriptors that map the device read only and the device write
> only portions of the buffer described by the descriptor chain. And the
> total length of that descriptor chain may be defined either as a number
> of the descriptors that form the chain, or the length of the buffer.
> 
> One has to use the descriptor chain even if the whole buffer is device
> read only. So "used" == "written to" does not make any sense to me.

The virtio spec actually says

Total length of the descriptor chain which was written to

without the "used" part.

> Also something like
> int vhost_add_used(struct vhost_virtqueue *vq, unsigned int head, int bytes_written)
> instead of
> int vhost_add_used(struct vhost_virtqueue *vq, unsigned int head, int len)
> would make it easier to read the code correctly.

I think we agree here. Patches?

> > >
> > > If my suspicion is right something like:
> > >
> > > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > > index 00f64f2f8b72..efb57898920b 100644
> > > --- a/drivers/virtio/virtio_ring.c
> > > +++ b/drivers/virtio/virtio_ring.c
> > > @@ -764,6 +764,7 @@ static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
> > >         struct vring_virtqueue *vq = to_vvq(_vq);
> > >         void *ret;
> > >         unsigned int i;
> > > +       bool has_in;
> > >         u16 last_used;
> > >
> > >         START_USE(vq);
> > > @@ -787,6 +788,9 @@ static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
> > >                         vq->split.vring.used->ring[last_used].id);
> > >         *len = virtio32_to_cpu(_vq->vdev,
> > >                         vq->split.vring.used->ring[last_used].len);
> > > +       has_in = virtio16_to_cpu(_vq->vdev,
> > > +                       vq->split.vring.used->ring[last_used].flags)
> > > +                               & VRING_DESC_F_WRITE;  
> > 
> > Did you mean vring.desc actually? If yes, it's better not depend on
> > the descriptor ring which can be modified by the device. We've stored
> > the flags in desc_extra[].
> > 
> > >
> > >         if (unlikely(i >= vq->split.vring.num)) {
> > >                 BAD_RING(vq, "id %u out of range\n", i);
> > > @@ -796,7 +800,7 @@ static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
> > >                 BAD_RING(vq, "id %u is not a head!\n", i);
> > >                 return NULL;
> > >         }
> > > -       if (vq->buflen && unlikely(*len > vq->buflen[i])) {
> > > +       if (has_in && q->buflen && unlikely(*len > vq->buflen[i])) {
> > >                 BAD_RING(vq, "used len %d is larger than in buflen %u\n",
> > >                         *len, vq->buflen[i]);
> > >                 return NULL;
> > >
> > > would fix the problem for split. I will try that out and let you know
> > > later.  
> > 
> > I'm not sure I get this, in virtqueue_add_split, the buflen[i] only
> > contains the in buffer length.
> 
> Sorry my diff is indeed silly.
> 
> > 
> > I think the fixes are:
> > 
> > 1) fixing the vhost vsock
> > 2) use suppress_used_validation=true to let vsock driver to validate
> > the in buffer length
> > 3) probably a new feature so the driver can only enable the validation
> > when the feature is enabled.
> > 
> 
> Makes sense!
> 
> Regards,
> Halil


^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH V5 1/4] virtio_ring: validate used buffer length
  2021-11-23 12:17               ` Michael S. Tsirkin
@ 2021-11-23 12:43                 ` Halil Pasic
  0 siblings, 0 replies; 32+ messages in thread
From: Halil Pasic @ 2021-11-23 12:43 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Jason Wang, virtualization, Hetzelt, Felicitas, linux-kernel,
	kaplan, david, Konrad Rzeszutek Wilk, Stefan Hajnoczi,
	Stefano Garzarella, Halil Pasic

On Tue, 23 Nov 2021 07:17:05 -0500
"Michael S. Tsirkin" <mst@redhat.com> wrote:

> On Mon, Nov 22, 2021 at 02:50:03PM +0100, Halil Pasic wrote:
> > On Mon, 22 Nov 2021 14:25:26 +0800
> > Jason Wang <jasowang@redhat.com> wrote:
> >   
> > > On Mon, Nov 22, 2021 at 1:49 PM Halil Pasic <pasic@linux.ibm.com> wrote:  
> > > >
> > > > On Mon, 22 Nov 2021 06:35:18 +0100
> > > > Halil Pasic <pasic@linux.ibm.com> wrote:
> > > >    
> > > > > > I think it should be a common issue, looking at
> > > > > > vhost_vsock_handle_tx_kick(), it did:
> > > > > >
> > > > > > len += sizeof(pkt->hdr);
> > > > > > vhost_add_used(vq, head, len);
> > > > > >
> > > > > > which looks like a violation of the spec since it's TX.    
> > > > >
> > > > > I'm not sure the lines above look like a violation of the spec. If you
> > > > > examine vhost_vsock_alloc_pkt() I believe that you will agree that:
> > > > > len == pkt->len == pkt->hdr.len
> > > > > which makes sense since according to the spec both tx and rx messages
> > > > > are hdr+payload. And I believe hdr.len is the size of the payload,
> > > > > although that does not seem to be properly documented by the spec.    
> > > 
> > > Sorry for being unclear, what I meant is that we probably should use
> > > zero here. TX doesn't use in buffer actually.
> > > 
> > > According to the spec, 0 should be the used length:
> > > 
> > > "and len the total of bytes written into the buffer."  
> > 
> > Right, I was wrong. I somehow assumed this is the total length and not
> > just the number of bytes written.
> >   
> > >   
> > > > >
> > > > > On the other hand tx messages are stated to be device read-only (in the
> > > > > spec) so if the device writes stuff, that is certainly wrong.
> > > > >    
> > > 
> > > Yes.
> > >   
> > > > > If that is what happens.
> > > > >
> > > > > Looking at virtqueue_get_buf_ctx_split() I'm not sure that is what
> > > > > happens. My hypothesis is that we just a last descriptor is an 'in'
> > > > > type descriptor (i.e. a device writable one). For tx that assumption
> > > > > would be wrong.
> > > > >
> > > > > I will have another look at this today and send a fix patch if my
> > > > > suspicion is confirmed.  
> > 
> > Yeah, I didn't remember the semantic of
> > vq->split.vring.used->ring[last_used].len
> > correctly, and in fact also how exactly the rings work. So your objection
> > is correct. 
> > 
> > Maybe updating some stuff would make it easier to not make this mistake.
> > 
> > For example the spec and also the linux header says:
> > 
> > /* le32 is used here for ids for padding reasons. */ 
> > struct virtq_used_elem { 
> >         /* Index of start of used descriptor chain. */ 
> >         le32 id; 
> >         /* Total length of the descriptor chain which was used (written to) */ 
> >         le32 len; 
> > };
> > 
> > I think that comment isn't as clear as it could be. I would prefer:
> > /* The number of bytes written into the device writable portion of the
> > buffer described by the descriptor chain. */
> > 
> > I believe "the descriptor chain which was used" includes both the
> > descriptors that map the device read only and the device write
> > only portions of the buffer described by the descriptor chain. And the
> > total length of that descriptor chain may be defined either as a number
> > of the descriptors that form the chain, or the length of the buffer.
> > 
> > One has to use the descriptor chain even if the whole buffer is device
> > read only. So "used" == "written to" does not make any sense to me.  
> 
> The virtio spec actually says
> 
> Total length of the descriptor chain which was written to
> 
> without the "used" part.

Yes, that is in the text after the (pseudo-)code listing which contains
the description I was referring to (in 2.6.8 The Virtqueue Used Ring).
> 
> > Also something like
> > int vhost_add_used(struct vhost_virtqueue *vq, unsigned int head, int bytes_written)
> > instead of
> > int vhost_add_used(struct vhost_virtqueue *vq, unsigned int head, int len)
> > would make it easier to read the code correctly.  
> 
> I think we agree here. Patches?
> 

Will send some :D

Thanks!

[..]

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH V5 1/4] virtio_ring: validate used buffer length
  2021-11-23 11:05                 ` Michael S. Tsirkin
@ 2021-11-24  1:30                   ` Michael Ellerman
  2021-11-24  2:26                     ` Jason Wang
  0 siblings, 1 reply; 32+ messages in thread
From: Michael Ellerman @ 2021-11-24  1:30 UTC (permalink / raw)
  To: Michael S. Tsirkin, Jason Wang
  Cc: Halil Pasic, virtualization, Hetzelt, Felicitas, linux-kernel,
	kaplan, david, Konrad Rzeszutek Wilk, Stefan Hajnoczi,
	Stefano Garzarella, mcgrof, david

"Michael S. Tsirkin" <mst@redhat.com> writes:
> On Tue, Nov 23, 2021 at 10:25:20AM +0800, Jason Wang wrote:
>> On Tue, Nov 23, 2021 at 4:24 AM Halil Pasic <pasic@linux.ibm.com> wrote:
>> >
>> > On Mon, 22 Nov 2021 14:25:26 +0800
>> > Jason Wang <jasowang@redhat.com> wrote:
>> >
>> > > I think the fixes are:
>> > >
>> > > 1) fixing the vhost vsock
>> > > 2) use suppress_used_validation=true to let vsock driver to validate
>> > > the in buffer length
>> > > 3) probably a new feature so the driver can only enable the validation
>> > > when the feature is enabled.
>> >
>> > I'm not sure, I would consider a F_DEV_Y_FIXED_BUG_X a perfectly good
>> > feature. Frankly the set of such bugs is device implementation
>> > specific and it makes little sense to specify a feature bit
>> > that says the device implementation claims to adhere to some
>> > aspect of the specification. Also what would be the semantic
>> > of not negotiating F_DEV_Y_FIXED_BUG_X?
>> 
>> Yes, I agree. Rethink of the feature bit, it seems unnecessary,
>> especially considering the driver should not care about the used
>> length for tx.
>> 
>> >
>> > On the other hand I see no other way to keep the validation
>> > permanently enabled for fixed implementations, and get around the problem
>> > with broken implementations. So we could have something like
>> > VHOST_USED_LEN_STRICT.
>> 
>> It's more about a choice of the driver's knowledge. For vsock TX it
>> should be fine. If we introduce a parameter and disable it by default,
>> it won't be very useful.
>> 
>> >
>> > Maybe, we can also think of 'warn and don't alter behavior' instead of
>> > 'warn' and alter behavior. Or maybe even not having such checks on in
>> > production, but only when testing.
>> 
>> I think there's an agreement that virtio drivers need more hardening,
>> that's why a lot of patches were merged. Especially considering the
>> new requirements came from confidential computing, smart NIC and
>> VDUSE. For virtio drivers, enabling the validation may help to
>> 
>> 1) protect the driver from the buggy and malicious device
>> 2) uncover the bugs of the devices (as vsock did, and probably rpmsg)
>> 3) force the have a smart driver that can do the validation itself
>> then we can finally remove the validation in the core
>> 
>> So I'd like to keep it enabled.
>> 
>> Thanks
>
> Let's see how far we can get. But yes, maybe we were too aggressive in
> breaking things by default, a warning might be a better choice for a
> couple of cycles.

This series appears to break the virtio_balloon driver as well.

The symptom is soft lockup warnings, eg:

  INFO: task kworker/1:1:109 blocked for more than 614 seconds.
        Not tainted 5.16.0-rc2-gcc-10.3.0 #21
  "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
  task:kworker/1:1     state:D stack:12496 pid:  109 ppid:     2 flags:0x00000800
  Workqueue: events_freezable update_balloon_size_func
  Call Trace:
  [c000000003cef7c0] [c000000003cef820] 0xc000000003cef820 (unreliable)
  [c000000003cef9b0] [c00000000001e238] __switch_to+0x1e8/0x2f0
  [c000000003cefa10] [c000000000f0a00c] __schedule+0x2cc/0xb50
  [c000000003cefae0] [c000000000f0a8fc] schedule+0x6c/0x140
  [c000000003cefb10] [c00000000095b6c4] tell_host+0xe4/0x130
  [c000000003cefba0] [c00000000095d234] update_balloon_size_func+0x394/0x3f0
  [c000000003cefc70] [c000000000178064] process_one_work+0x2c4/0x5b0
  [c000000003cefd10] [c0000000001783f8] worker_thread+0xa8/0x640
  [c000000003cefda0] [c000000000185444] kthread+0x1b4/0x1c0
  [c000000003cefe10] [c00000000000cee4] ret_from_kernel_thread+0x5c/0x64

Similar backtrace reported here by Luis:

  https://lore.kernel.org/lkml/YY2duTi0wAyAKUTJ@bombadil.infradead.org/

Bisect points to:

  # first bad commit: [939779f5152d161b34f612af29e7dc1ac4472fcf] virtio_ring: validate used buffer length

Adding suppress used validation to the virtio balloon driver "fixes" it, eg.

diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index c22ff0117b46..a14b82ceebb2 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -1150,6 +1150,7 @@ static unsigned int features[] = {
 };
 
 static struct virtio_driver virtio_balloon_driver = {
+	.suppress_used_validation = true,
 	.feature_table = features,
 	.feature_table_size = ARRAY_SIZE(features),
 	.driver.name =	KBUILD_MODNAME,


cheers

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH V5 1/4] virtio_ring: validate used buffer length
  2021-11-24  1:30                   ` Michael Ellerman
@ 2021-11-24  2:26                     ` Jason Wang
  2021-11-24  2:33                       ` Jason Wang
  0 siblings, 1 reply; 32+ messages in thread
From: Jason Wang @ 2021-11-24  2:26 UTC (permalink / raw)
  To: Michael Ellerman
  Cc: Michael S. Tsirkin, Halil Pasic, virtualization, Hetzelt,
	Felicitas, linux-kernel, kaplan, david, Konrad Rzeszutek Wilk,
	Stefan Hajnoczi, Stefano Garzarella, mcgrof, David Hildenbrand

On Wed, Nov 24, 2021 at 9:30 AM Michael Ellerman <mpe@ellerman.id.au> wrote:
>
> "Michael S. Tsirkin" <mst@redhat.com> writes:
> > On Tue, Nov 23, 2021 at 10:25:20AM +0800, Jason Wang wrote:
> >> On Tue, Nov 23, 2021 at 4:24 AM Halil Pasic <pasic@linux.ibm.com> wrote:
> >> >
> >> > On Mon, 22 Nov 2021 14:25:26 +0800
> >> > Jason Wang <jasowang@redhat.com> wrote:
> >> >
> >> > > I think the fixes are:
> >> > >
> >> > > 1) fixing the vhost vsock
> >> > > 2) use suppress_used_validation=true to let vsock driver to validate
> >> > > the in buffer length
> >> > > 3) probably a new feature so the driver can only enable the validation
> >> > > when the feature is enabled.
> >> >
> >> > I'm not sure, I would consider a F_DEV_Y_FIXED_BUG_X a perfectly good
> >> > feature. Frankly the set of such bugs is device implementation
> >> > specific and it makes little sense to specify a feature bit
> >> > that says the device implementation claims to adhere to some
> >> > aspect of the specification. Also what would be the semantic
> >> > of not negotiating F_DEV_Y_FIXED_BUG_X?
> >>
> >> Yes, I agree. Rethink of the feature bit, it seems unnecessary,
> >> especially considering the driver should not care about the used
> >> length for tx.
> >>
> >> >
> >> > On the other hand I see no other way to keep the validation
> >> > permanently enabled for fixed implementations, and get around the problem
> >> > with broken implementations. So we could have something like
> >> > VHOST_USED_LEN_STRICT.
> >>
> >> It's more about a choice of the driver's knowledge. For vsock TX it
> >> should be fine. If we introduce a parameter and disable it by default,
> >> it won't be very useful.
> >>
> >> >
> >> > Maybe, we can also think of 'warn and don't alter behavior' instead of
> >> > 'warn' and alter behavior. Or maybe even not having such checks on in
> >> > production, but only when testing.
> >>
> >> I think there's an agreement that virtio drivers need more hardening,
> >> that's why a lot of patches were merged. Especially considering the
> >> new requirements came from confidential computing, smart NIC and
> >> VDUSE. For virtio drivers, enabling the validation may help to
> >>
> >> 1) protect the driver from the buggy and malicious device
> >> 2) uncover the bugs of the devices (as vsock did, and probably rpmsg)
> >> 3) force the have a smart driver that can do the validation itself
> >> then we can finally remove the validation in the core
> >>
> >> So I'd like to keep it enabled.
> >>
> >> Thanks
> >
> > Let's see how far we can get. But yes, maybe we were too aggressive in
> > breaking things by default, a warning might be a better choice for a
> > couple of cycles.

Ok, considering we saw the issues with balloons I think I can post a
patch to use warn instead. I wonder if we need to taint the kernel in
this case.

>
> This series appears to break the virtio_balloon driver as well.
>
> The symptom is soft lockup warnings, eg:
>
>   INFO: task kworker/1:1:109 blocked for more than 614 seconds.
>         Not tainted 5.16.0-rc2-gcc-10.3.0 #21
>   "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
>   task:kworker/1:1     state:D stack:12496 pid:  109 ppid:     2 flags:0x00000800
>   Workqueue: events_freezable update_balloon_size_func
>   Call Trace:
>   [c000000003cef7c0] [c000000003cef820] 0xc000000003cef820 (unreliable)
>   [c000000003cef9b0] [c00000000001e238] __switch_to+0x1e8/0x2f0
>   [c000000003cefa10] [c000000000f0a00c] __schedule+0x2cc/0xb50
>   [c000000003cefae0] [c000000000f0a8fc] schedule+0x6c/0x140
>   [c000000003cefb10] [c00000000095b6c4] tell_host+0xe4/0x130
>   [c000000003cefba0] [c00000000095d234] update_balloon_size_func+0x394/0x3f0
>   [c000000003cefc70] [c000000000178064] process_one_work+0x2c4/0x5b0
>   [c000000003cefd10] [c0000000001783f8] worker_thread+0xa8/0x640
>   [c000000003cefda0] [c000000000185444] kthread+0x1b4/0x1c0
>   [c000000003cefe10] [c00000000000cee4] ret_from_kernel_thread+0x5c/0x64
>
> Similar backtrace reported here by Luis:
>
>   https://lore.kernel.org/lkml/YY2duTi0wAyAKUTJ@bombadil.infradead.org/
>
> Bisect points to:
>
>   # first bad commit: [939779f5152d161b34f612af29e7dc1ac4472fcf] virtio_ring: validate used buffer length
>
> Adding suppress used validation to the virtio balloon driver "fixes" it, eg.
>
> diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
> index c22ff0117b46..a14b82ceebb2 100644
> --- a/drivers/virtio/virtio_balloon.c
> +++ b/drivers/virtio/virtio_balloon.c
> @@ -1150,6 +1150,7 @@ static unsigned int features[] = {
>  };
>
>  static struct virtio_driver virtio_balloon_driver = {
> +       .suppress_used_validation = true,
>         .feature_table = features,
>         .feature_table_size = ARRAY_SIZE(features),
>         .driver.name =  KBUILD_MODNAME,

Looks good, we need a formal patch for this.

And we need fix Qemu as well which advertise non zero used length for
inflate/deflate queue:

static void virtio_balloon_handle_output(VirtIODevice *vdev, VirtQueue *vq)
...
        virtqueue_push(vq, elem, offset);

Thanks

>
>
> cheers
>


^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH V5 1/4] virtio_ring: validate used buffer length
  2021-11-24  2:26                     ` Jason Wang
@ 2021-11-24  2:33                       ` Jason Wang
  2021-11-24  7:22                         ` Michael S. Tsirkin
  2021-11-24 11:33                         ` Halil Pasic
  0 siblings, 2 replies; 32+ messages in thread
From: Jason Wang @ 2021-11-24  2:33 UTC (permalink / raw)
  To: Michael Ellerman
  Cc: Michael S. Tsirkin, Halil Pasic, virtualization, Hetzelt,
	Felicitas, linux-kernel, kaplan, david, Konrad Rzeszutek Wilk,
	Stefan Hajnoczi, Stefano Garzarella, mcgrof, David Hildenbrand

On Wed, Nov 24, 2021 at 10:26 AM Jason Wang <jasowang@redhat.com> wrote:
>
> On Wed, Nov 24, 2021 at 9:30 AM Michael Ellerman <mpe@ellerman.id.au> wrote:
> >
> > "Michael S. Tsirkin" <mst@redhat.com> writes:
> > > On Tue, Nov 23, 2021 at 10:25:20AM +0800, Jason Wang wrote:
> > >> On Tue, Nov 23, 2021 at 4:24 AM Halil Pasic <pasic@linux.ibm.com> wrote:
> > >> >
> > >> > On Mon, 22 Nov 2021 14:25:26 +0800
> > >> > Jason Wang <jasowang@redhat.com> wrote:
> > >> >
> > >> > > I think the fixes are:
> > >> > >
> > >> > > 1) fixing the vhost vsock
> > >> > > 2) use suppress_used_validation=true to let vsock driver to validate
> > >> > > the in buffer length
> > >> > > 3) probably a new feature so the driver can only enable the validation
> > >> > > when the feature is enabled.
> > >> >
> > >> > I'm not sure, I would consider a F_DEV_Y_FIXED_BUG_X a perfectly good
> > >> > feature. Frankly the set of such bugs is device implementation
> > >> > specific and it makes little sense to specify a feature bit
> > >> > that says the device implementation claims to adhere to some
> > >> > aspect of the specification. Also what would be the semantic
> > >> > of not negotiating F_DEV_Y_FIXED_BUG_X?
> > >>
> > >> Yes, I agree. Rethink of the feature bit, it seems unnecessary,
> > >> especially considering the driver should not care about the used
> > >> length for tx.
> > >>
> > >> >
> > >> > On the other hand I see no other way to keep the validation
> > >> > permanently enabled for fixed implementations, and get around the problem
> > >> > with broken implementations. So we could have something like
> > >> > VHOST_USED_LEN_STRICT.
> > >>
> > >> It's more about a choice of the driver's knowledge. For vsock TX it
> > >> should be fine. If we introduce a parameter and disable it by default,
> > >> it won't be very useful.
> > >>
> > >> >
> > >> > Maybe, we can also think of 'warn and don't alter behavior' instead of
> > >> > 'warn' and alter behavior. Or maybe even not having such checks on in
> > >> > production, but only when testing.
> > >>
> > >> I think there's an agreement that virtio drivers need more hardening,
> > >> that's why a lot of patches were merged. Especially considering the
> > >> new requirements came from confidential computing, smart NIC and
> > >> VDUSE. For virtio drivers, enabling the validation may help to
> > >>
> > >> 1) protect the driver from the buggy and malicious device
> > >> 2) uncover the bugs of the devices (as vsock did, and probably rpmsg)
> > >> 3) force the have a smart driver that can do the validation itself
> > >> then we can finally remove the validation in the core
> > >>
> > >> So I'd like to keep it enabled.
> > >>
> > >> Thanks
> > >
> > > Let's see how far we can get. But yes, maybe we were too aggressive in
> > > breaking things by default, a warning might be a better choice for a
> > > couple of cycles.
>
> Ok, considering we saw the issues with balloons I think I can post a
> patch to use warn instead. I wonder if we need to taint the kernel in
> this case.

Rethink this, consider we still have some time, I tend to convert the
drivers to validate the length by themselves. Does this make sense?

Thanks

>
> >
> > This series appears to break the virtio_balloon driver as well.
> >
> > The symptom is soft lockup warnings, eg:
> >
> >   INFO: task kworker/1:1:109 blocked for more than 614 seconds.
> >         Not tainted 5.16.0-rc2-gcc-10.3.0 #21
> >   "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
> >   task:kworker/1:1     state:D stack:12496 pid:  109 ppid:     2 flags:0x00000800
> >   Workqueue: events_freezable update_balloon_size_func
> >   Call Trace:
> >   [c000000003cef7c0] [c000000003cef820] 0xc000000003cef820 (unreliable)
> >   [c000000003cef9b0] [c00000000001e238] __switch_to+0x1e8/0x2f0
> >   [c000000003cefa10] [c000000000f0a00c] __schedule+0x2cc/0xb50
> >   [c000000003cefae0] [c000000000f0a8fc] schedule+0x6c/0x140
> >   [c000000003cefb10] [c00000000095b6c4] tell_host+0xe4/0x130
> >   [c000000003cefba0] [c00000000095d234] update_balloon_size_func+0x394/0x3f0
> >   [c000000003cefc70] [c000000000178064] process_one_work+0x2c4/0x5b0
> >   [c000000003cefd10] [c0000000001783f8] worker_thread+0xa8/0x640
> >   [c000000003cefda0] [c000000000185444] kthread+0x1b4/0x1c0
> >   [c000000003cefe10] [c00000000000cee4] ret_from_kernel_thread+0x5c/0x64
> >
> > Similar backtrace reported here by Luis:
> >
> >   https://lore.kernel.org/lkml/YY2duTi0wAyAKUTJ@bombadil.infradead.org/
> >
> > Bisect points to:
> >
> >   # first bad commit: [939779f5152d161b34f612af29e7dc1ac4472fcf] virtio_ring: validate used buffer length
> >
> > Adding suppress used validation to the virtio balloon driver "fixes" it, eg.
> >
> > diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
> > index c22ff0117b46..a14b82ceebb2 100644
> > --- a/drivers/virtio/virtio_balloon.c
> > +++ b/drivers/virtio/virtio_balloon.c
> > @@ -1150,6 +1150,7 @@ static unsigned int features[] = {
> >  };
> >
> >  static struct virtio_driver virtio_balloon_driver = {
> > +       .suppress_used_validation = true,
> >         .feature_table = features,
> >         .feature_table_size = ARRAY_SIZE(features),
> >         .driver.name =  KBUILD_MODNAME,
>
> Looks good, we need a formal patch for this.
>
> And we need fix Qemu as well which advertise non zero used length for
> inflate/deflate queue:
>
> static void virtio_balloon_handle_output(VirtIODevice *vdev, VirtQueue *vq)
> ...
>         virtqueue_push(vq, elem, offset);
>
> Thanks
>
> >
> >
> > cheers
> >


^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH V5 1/4] virtio_ring: validate used buffer length
  2021-11-24  2:33                       ` Jason Wang
@ 2021-11-24  7:22                         ` Michael S. Tsirkin
  2021-11-24  7:59                           ` Jason Wang
  2021-11-24 11:33                         ` Halil Pasic
  1 sibling, 1 reply; 32+ messages in thread
From: Michael S. Tsirkin @ 2021-11-24  7:22 UTC (permalink / raw)
  To: Jason Wang
  Cc: Michael Ellerman, Halil Pasic, virtualization, Hetzelt,
	Felicitas, linux-kernel, kaplan, david, Konrad Rzeszutek Wilk,
	Stefan Hajnoczi, Stefano Garzarella, mcgrof, David Hildenbrand

On Wed, Nov 24, 2021 at 10:33:28AM +0800, Jason Wang wrote:
> On Wed, Nov 24, 2021 at 10:26 AM Jason Wang <jasowang@redhat.com> wrote:
> >
> > On Wed, Nov 24, 2021 at 9:30 AM Michael Ellerman <mpe@ellerman.id.au> wrote:
> > >
> > > "Michael S. Tsirkin" <mst@redhat.com> writes:
> > > > On Tue, Nov 23, 2021 at 10:25:20AM +0800, Jason Wang wrote:
> > > >> On Tue, Nov 23, 2021 at 4:24 AM Halil Pasic <pasic@linux.ibm.com> wrote:
> > > >> >
> > > >> > On Mon, 22 Nov 2021 14:25:26 +0800
> > > >> > Jason Wang <jasowang@redhat.com> wrote:
> > > >> >
> > > >> > > I think the fixes are:
> > > >> > >
> > > >> > > 1) fixing the vhost vsock
> > > >> > > 2) use suppress_used_validation=true to let vsock driver to validate
> > > >> > > the in buffer length
> > > >> > > 3) probably a new feature so the driver can only enable the validation
> > > >> > > when the feature is enabled.
> > > >> >
> > > >> > I'm not sure, I would consider a F_DEV_Y_FIXED_BUG_X a perfectly good
> > > >> > feature. Frankly the set of such bugs is device implementation
> > > >> > specific and it makes little sense to specify a feature bit
> > > >> > that says the device implementation claims to adhere to some
> > > >> > aspect of the specification. Also what would be the semantic
> > > >> > of not negotiating F_DEV_Y_FIXED_BUG_X?
> > > >>
> > > >> Yes, I agree. Rethink of the feature bit, it seems unnecessary,
> > > >> especially considering the driver should not care about the used
> > > >> length for tx.
> > > >>
> > > >> >
> > > >> > On the other hand I see no other way to keep the validation
> > > >> > permanently enabled for fixed implementations, and get around the problem
> > > >> > with broken implementations. So we could have something like
> > > >> > VHOST_USED_LEN_STRICT.
> > > >>
> > > >> It's more about a choice of the driver's knowledge. For vsock TX it
> > > >> should be fine. If we introduce a parameter and disable it by default,
> > > >> it won't be very useful.
> > > >>
> > > >> >
> > > >> > Maybe, we can also think of 'warn and don't alter behavior' instead of
> > > >> > 'warn' and alter behavior. Or maybe even not having such checks on in
> > > >> > production, but only when testing.
> > > >>
> > > >> I think there's an agreement that virtio drivers need more hardening,
> > > >> that's why a lot of patches were merged. Especially considering the
> > > >> new requirements came from confidential computing, smart NIC and
> > > >> VDUSE. For virtio drivers, enabling the validation may help to
> > > >>
> > > >> 1) protect the driver from the buggy and malicious device
> > > >> 2) uncover the bugs of the devices (as vsock did, and probably rpmsg)
> > > >> 3) force the have a smart driver that can do the validation itself
> > > >> then we can finally remove the validation in the core
> > > >>
> > > >> So I'd like to keep it enabled.
> > > >>
> > > >> Thanks
> > > >
> > > > Let's see how far we can get. But yes, maybe we were too aggressive in
> > > > breaking things by default, a warning might be a better choice for a
> > > > couple of cycles.
> >
> > Ok, considering we saw the issues with balloons I think I can post a
> > patch to use warn instead. I wonder if we need to taint the kernel in
> > this case.
> 
> Rethink this, consider we still have some time, I tend to convert the
> drivers to validate the length by themselves. Does this make sense?
> 
> Thanks

That's separate but let's stop crashing guests for people ASAP.


> >
> > >
> > > This series appears to break the virtio_balloon driver as well.
> > >
> > > The symptom is soft lockup warnings, eg:
> > >
> > >   INFO: task kworker/1:1:109 blocked for more than 614 seconds.
> > >         Not tainted 5.16.0-rc2-gcc-10.3.0 #21
> > >   "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
> > >   task:kworker/1:1     state:D stack:12496 pid:  109 ppid:     2 flags:0x00000800
> > >   Workqueue: events_freezable update_balloon_size_func
> > >   Call Trace:
> > >   [c000000003cef7c0] [c000000003cef820] 0xc000000003cef820 (unreliable)
> > >   [c000000003cef9b0] [c00000000001e238] __switch_to+0x1e8/0x2f0
> > >   [c000000003cefa10] [c000000000f0a00c] __schedule+0x2cc/0xb50
> > >   [c000000003cefae0] [c000000000f0a8fc] schedule+0x6c/0x140
> > >   [c000000003cefb10] [c00000000095b6c4] tell_host+0xe4/0x130
> > >   [c000000003cefba0] [c00000000095d234] update_balloon_size_func+0x394/0x3f0
> > >   [c000000003cefc70] [c000000000178064] process_one_work+0x2c4/0x5b0
> > >   [c000000003cefd10] [c0000000001783f8] worker_thread+0xa8/0x640
> > >   [c000000003cefda0] [c000000000185444] kthread+0x1b4/0x1c0
> > >   [c000000003cefe10] [c00000000000cee4] ret_from_kernel_thread+0x5c/0x64
> > >
> > > Similar backtrace reported here by Luis:
> > >
> > >   https://lore.kernel.org/lkml/YY2duTi0wAyAKUTJ@bombadil.infradead.org/
> > >
> > > Bisect points to:
> > >
> > >   # first bad commit: [939779f5152d161b34f612af29e7dc1ac4472fcf] virtio_ring: validate used buffer length
> > >
> > > Adding suppress used validation to the virtio balloon driver "fixes" it, eg.
> > >
> > > diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
> > > index c22ff0117b46..a14b82ceebb2 100644
> > > --- a/drivers/virtio/virtio_balloon.c
> > > +++ b/drivers/virtio/virtio_balloon.c
> > > @@ -1150,6 +1150,7 @@ static unsigned int features[] = {
> > >  };
> > >
> > >  static struct virtio_driver virtio_balloon_driver = {
> > > +       .suppress_used_validation = true,
> > >         .feature_table = features,
> > >         .feature_table_size = ARRAY_SIZE(features),
> > >         .driver.name =  KBUILD_MODNAME,
> >
> > Looks good, we need a formal patch for this.
> >
> > And we need fix Qemu as well which advertise non zero used length for
> > inflate/deflate queue:
> >
> > static void virtio_balloon_handle_output(VirtIODevice *vdev, VirtQueue *vq)
> > ...
> >         virtqueue_push(vq, elem, offset);
> >
> > Thanks
> >
> > >
> > >
> > > cheers
> > >


^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH V5 1/4] virtio_ring: validate used buffer length
  2021-11-24  7:22                         ` Michael S. Tsirkin
@ 2021-11-24  7:59                           ` Jason Wang
  2021-11-24  8:24                             ` Michael S. Tsirkin
  0 siblings, 1 reply; 32+ messages in thread
From: Jason Wang @ 2021-11-24  7:59 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Michael Ellerman, Halil Pasic, virtualization, Hetzelt,
	Felicitas, linux-kernel, kaplan, david, Konrad Rzeszutek Wilk,
	Stefan Hajnoczi, Stefano Garzarella, mcgrof, David Hildenbrand

On Wed, Nov 24, 2021 at 3:22 PM Michael S. Tsirkin <mst@redhat.com> wrote:
>
> On Wed, Nov 24, 2021 at 10:33:28AM +0800, Jason Wang wrote:
> > On Wed, Nov 24, 2021 at 10:26 AM Jason Wang <jasowang@redhat.com> wrote:
> > >
> > > On Wed, Nov 24, 2021 at 9:30 AM Michael Ellerman <mpe@ellerman.id.au> wrote:
> > > >
> > > > "Michael S. Tsirkin" <mst@redhat.com> writes:
> > > > > On Tue, Nov 23, 2021 at 10:25:20AM +0800, Jason Wang wrote:
> > > > >> On Tue, Nov 23, 2021 at 4:24 AM Halil Pasic <pasic@linux.ibm.com> wrote:
> > > > >> >
> > > > >> > On Mon, 22 Nov 2021 14:25:26 +0800
> > > > >> > Jason Wang <jasowang@redhat.com> wrote:
> > > > >> >
> > > > >> > > I think the fixes are:
> > > > >> > >
> > > > >> > > 1) fixing the vhost vsock
> > > > >> > > 2) use suppress_used_validation=true to let vsock driver to validate
> > > > >> > > the in buffer length
> > > > >> > > 3) probably a new feature so the driver can only enable the validation
> > > > >> > > when the feature is enabled.
> > > > >> >
> > > > >> > I'm not sure, I would consider a F_DEV_Y_FIXED_BUG_X a perfectly good
> > > > >> > feature. Frankly the set of such bugs is device implementation
> > > > >> > specific and it makes little sense to specify a feature bit
> > > > >> > that says the device implementation claims to adhere to some
> > > > >> > aspect of the specification. Also what would be the semantic
> > > > >> > of not negotiating F_DEV_Y_FIXED_BUG_X?
> > > > >>
> > > > >> Yes, I agree. Rethink of the feature bit, it seems unnecessary,
> > > > >> especially considering the driver should not care about the used
> > > > >> length for tx.
> > > > >>
> > > > >> >
> > > > >> > On the other hand I see no other way to keep the validation
> > > > >> > permanently enabled for fixed implementations, and get around the problem
> > > > >> > with broken implementations. So we could have something like
> > > > >> > VHOST_USED_LEN_STRICT.
> > > > >>
> > > > >> It's more about a choice of the driver's knowledge. For vsock TX it
> > > > >> should be fine. If we introduce a parameter and disable it by default,
> > > > >> it won't be very useful.
> > > > >>
> > > > >> >
> > > > >> > Maybe, we can also think of 'warn and don't alter behavior' instead of
> > > > >> > 'warn' and alter behavior. Or maybe even not having such checks on in
> > > > >> > production, but only when testing.
> > > > >>
> > > > >> I think there's an agreement that virtio drivers need more hardening,
> > > > >> that's why a lot of patches were merged. Especially considering the
> > > > >> new requirements came from confidential computing, smart NIC and
> > > > >> VDUSE. For virtio drivers, enabling the validation may help to
> > > > >>
> > > > >> 1) protect the driver from the buggy and malicious device
> > > > >> 2) uncover the bugs of the devices (as vsock did, and probably rpmsg)
> > > > >> 3) force the have a smart driver that can do the validation itself
> > > > >> then we can finally remove the validation in the core
> > > > >>
> > > > >> So I'd like to keep it enabled.
> > > > >>
> > > > >> Thanks
> > > > >
> > > > > Let's see how far we can get. But yes, maybe we were too aggressive in
> > > > > breaking things by default, a warning might be a better choice for a
> > > > > couple of cycles.
> > >
> > > Ok, considering we saw the issues with balloons I think I can post a
> > > patch to use warn instead. I wonder if we need to taint the kernel in
> > > this case.
> >
> > Rethink this, consider we still have some time, I tend to convert the
> > drivers to validate the length by themselves. Does this make sense?
> >
> > Thanks
>
> That's separate but let's stop crashing guests for people ASAP.

Ok, will post a patch soon.

Thanks

>
>
> > >
> > > >
> > > > This series appears to break the virtio_balloon driver as well.
> > > >
> > > > The symptom is soft lockup warnings, eg:
> > > >
> > > >   INFO: task kworker/1:1:109 blocked for more than 614 seconds.
> > > >         Not tainted 5.16.0-rc2-gcc-10.3.0 #21
> > > >   "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
> > > >   task:kworker/1:1     state:D stack:12496 pid:  109 ppid:     2 flags:0x00000800
> > > >   Workqueue: events_freezable update_balloon_size_func
> > > >   Call Trace:
> > > >   [c000000003cef7c0] [c000000003cef820] 0xc000000003cef820 (unreliable)
> > > >   [c000000003cef9b0] [c00000000001e238] __switch_to+0x1e8/0x2f0
> > > >   [c000000003cefa10] [c000000000f0a00c] __schedule+0x2cc/0xb50
> > > >   [c000000003cefae0] [c000000000f0a8fc] schedule+0x6c/0x140
> > > >   [c000000003cefb10] [c00000000095b6c4] tell_host+0xe4/0x130
> > > >   [c000000003cefba0] [c00000000095d234] update_balloon_size_func+0x394/0x3f0
> > > >   [c000000003cefc70] [c000000000178064] process_one_work+0x2c4/0x5b0
> > > >   [c000000003cefd10] [c0000000001783f8] worker_thread+0xa8/0x640
> > > >   [c000000003cefda0] [c000000000185444] kthread+0x1b4/0x1c0
> > > >   [c000000003cefe10] [c00000000000cee4] ret_from_kernel_thread+0x5c/0x64
> > > >
> > > > Similar backtrace reported here by Luis:
> > > >
> > > >   https://lore.kernel.org/lkml/YY2duTi0wAyAKUTJ@bombadil.infradead.org/
> > > >
> > > > Bisect points to:
> > > >
> > > >   # first bad commit: [939779f5152d161b34f612af29e7dc1ac4472fcf] virtio_ring: validate used buffer length
> > > >
> > > > Adding suppress used validation to the virtio balloon driver "fixes" it, eg.
> > > >
> > > > diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
> > > > index c22ff0117b46..a14b82ceebb2 100644
> > > > --- a/drivers/virtio/virtio_balloon.c
> > > > +++ b/drivers/virtio/virtio_balloon.c
> > > > @@ -1150,6 +1150,7 @@ static unsigned int features[] = {
> > > >  };
> > > >
> > > >  static struct virtio_driver virtio_balloon_driver = {
> > > > +       .suppress_used_validation = true,
> > > >         .feature_table = features,
> > > >         .feature_table_size = ARRAY_SIZE(features),
> > > >         .driver.name =  KBUILD_MODNAME,
> > >
> > > Looks good, we need a formal patch for this.
> > >
> > > And we need fix Qemu as well which advertise non zero used length for
> > > inflate/deflate queue:
> > >
> > > static void virtio_balloon_handle_output(VirtIODevice *vdev, VirtQueue *vq)
> > > ...
> > >         virtqueue_push(vq, elem, offset);
> > >
> > > Thanks
> > >
> > > >
> > > >
> > > > cheers
> > > >
>


^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH V5 1/4] virtio_ring: validate used buffer length
  2021-11-24  7:59                           ` Jason Wang
@ 2021-11-24  8:24                             ` Michael S. Tsirkin
  2021-11-24  8:28                               ` Jason Wang
  0 siblings, 1 reply; 32+ messages in thread
From: Michael S. Tsirkin @ 2021-11-24  8:24 UTC (permalink / raw)
  To: Jason Wang
  Cc: Michael Ellerman, Halil Pasic, virtualization, Hetzelt,
	Felicitas, linux-kernel, kaplan, david, Konrad Rzeszutek Wilk,
	Stefan Hajnoczi, Stefano Garzarella, mcgrof, David Hildenbrand

On Wed, Nov 24, 2021 at 03:59:12PM +0800, Jason Wang wrote:
> On Wed, Nov 24, 2021 at 3:22 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> >
> > On Wed, Nov 24, 2021 at 10:33:28AM +0800, Jason Wang wrote:
> > > On Wed, Nov 24, 2021 at 10:26 AM Jason Wang <jasowang@redhat.com> wrote:
> > > >
> > > > On Wed, Nov 24, 2021 at 9:30 AM Michael Ellerman <mpe@ellerman.id.au> wrote:
> > > > >
> > > > > "Michael S. Tsirkin" <mst@redhat.com> writes:
> > > > > > On Tue, Nov 23, 2021 at 10:25:20AM +0800, Jason Wang wrote:
> > > > > >> On Tue, Nov 23, 2021 at 4:24 AM Halil Pasic <pasic@linux.ibm.com> wrote:
> > > > > >> >
> > > > > >> > On Mon, 22 Nov 2021 14:25:26 +0800
> > > > > >> > Jason Wang <jasowang@redhat.com> wrote:
> > > > > >> >
> > > > > >> > > I think the fixes are:
> > > > > >> > >
> > > > > >> > > 1) fixing the vhost vsock
> > > > > >> > > 2) use suppress_used_validation=true to let vsock driver to validate
> > > > > >> > > the in buffer length
> > > > > >> > > 3) probably a new feature so the driver can only enable the validation
> > > > > >> > > when the feature is enabled.
> > > > > >> >
> > > > > >> > I'm not sure, I would consider a F_DEV_Y_FIXED_BUG_X a perfectly good
> > > > > >> > feature. Frankly the set of such bugs is device implementation
> > > > > >> > specific and it makes little sense to specify a feature bit
> > > > > >> > that says the device implementation claims to adhere to some
> > > > > >> > aspect of the specification. Also what would be the semantic
> > > > > >> > of not negotiating F_DEV_Y_FIXED_BUG_X?
> > > > > >>
> > > > > >> Yes, I agree. Rethink of the feature bit, it seems unnecessary,
> > > > > >> especially considering the driver should not care about the used
> > > > > >> length for tx.
> > > > > >>
> > > > > >> >
> > > > > >> > On the other hand I see no other way to keep the validation
> > > > > >> > permanently enabled for fixed implementations, and get around the problem
> > > > > >> > with broken implementations. So we could have something like
> > > > > >> > VHOST_USED_LEN_STRICT.
> > > > > >>
> > > > > >> It's more about a choice of the driver's knowledge. For vsock TX it
> > > > > >> should be fine. If we introduce a parameter and disable it by default,
> > > > > >> it won't be very useful.
> > > > > >>
> > > > > >> >
> > > > > >> > Maybe, we can also think of 'warn and don't alter behavior' instead of
> > > > > >> > 'warn' and alter behavior. Or maybe even not having such checks on in
> > > > > >> > production, but only when testing.
> > > > > >>
> > > > > >> I think there's an agreement that virtio drivers need more hardening,
> > > > > >> that's why a lot of patches were merged. Especially considering the
> > > > > >> new requirements came from confidential computing, smart NIC and
> > > > > >> VDUSE. For virtio drivers, enabling the validation may help to
> > > > > >>
> > > > > >> 1) protect the driver from the buggy and malicious device
> > > > > >> 2) uncover the bugs of the devices (as vsock did, and probably rpmsg)
> > > > > >> 3) force the have a smart driver that can do the validation itself
> > > > > >> then we can finally remove the validation in the core
> > > > > >>
> > > > > >> So I'd like to keep it enabled.
> > > > > >>
> > > > > >> Thanks
> > > > > >
> > > > > > Let's see how far we can get. But yes, maybe we were too aggressive in
> > > > > > breaking things by default, a warning might be a better choice for a
> > > > > > couple of cycles.
> > > >
> > > > Ok, considering we saw the issues with balloons I think I can post a
> > > > patch to use warn instead. I wonder if we need to taint the kernel in
> > > > this case.
> > >
> > > Rethink this, consider we still have some time, I tend to convert the
> > > drivers to validate the length by themselves. Does this make sense?
> > >
> > > Thanks
> >
> > That's separate but let's stop crashing guests for people ASAP.
> 
> Ok, will post a patch soon.
> 
> Thanks

So let's err on the side of caution now, I will just revert for this
release.

For the next one I think a good plan is:
- no checks by default
- module param to check and warn
- keep adding validation in the drivers as appropriate

> >
> >
> > > >
> > > > >
> > > > > This series appears to break the virtio_balloon driver as well.
> > > > >
> > > > > The symptom is soft lockup warnings, eg:
> > > > >
> > > > >   INFO: task kworker/1:1:109 blocked for more than 614 seconds.
> > > > >         Not tainted 5.16.0-rc2-gcc-10.3.0 #21
> > > > >   "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
> > > > >   task:kworker/1:1     state:D stack:12496 pid:  109 ppid:     2 flags:0x00000800
> > > > >   Workqueue: events_freezable update_balloon_size_func
> > > > >   Call Trace:
> > > > >   [c000000003cef7c0] [c000000003cef820] 0xc000000003cef820 (unreliable)
> > > > >   [c000000003cef9b0] [c00000000001e238] __switch_to+0x1e8/0x2f0
> > > > >   [c000000003cefa10] [c000000000f0a00c] __schedule+0x2cc/0xb50
> > > > >   [c000000003cefae0] [c000000000f0a8fc] schedule+0x6c/0x140
> > > > >   [c000000003cefb10] [c00000000095b6c4] tell_host+0xe4/0x130
> > > > >   [c000000003cefba0] [c00000000095d234] update_balloon_size_func+0x394/0x3f0
> > > > >   [c000000003cefc70] [c000000000178064] process_one_work+0x2c4/0x5b0
> > > > >   [c000000003cefd10] [c0000000001783f8] worker_thread+0xa8/0x640
> > > > >   [c000000003cefda0] [c000000000185444] kthread+0x1b4/0x1c0
> > > > >   [c000000003cefe10] [c00000000000cee4] ret_from_kernel_thread+0x5c/0x64
> > > > >
> > > > > Similar backtrace reported here by Luis:
> > > > >
> > > > >   https://lore.kernel.org/lkml/YY2duTi0wAyAKUTJ@bombadil.infradead.org/
> > > > >
> > > > > Bisect points to:
> > > > >
> > > > >   # first bad commit: [939779f5152d161b34f612af29e7dc1ac4472fcf] virtio_ring: validate used buffer length
> > > > >
> > > > > Adding suppress used validation to the virtio balloon driver "fixes" it, eg.
> > > > >
> > > > > diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
> > > > > index c22ff0117b46..a14b82ceebb2 100644
> > > > > --- a/drivers/virtio/virtio_balloon.c
> > > > > +++ b/drivers/virtio/virtio_balloon.c
> > > > > @@ -1150,6 +1150,7 @@ static unsigned int features[] = {
> > > > >  };
> > > > >
> > > > >  static struct virtio_driver virtio_balloon_driver = {
> > > > > +       .suppress_used_validation = true,
> > > > >         .feature_table = features,
> > > > >         .feature_table_size = ARRAY_SIZE(features),
> > > > >         .driver.name =  KBUILD_MODNAME,
> > > >
> > > > Looks good, we need a formal patch for this.
> > > >
> > > > And we need fix Qemu as well which advertise non zero used length for
> > > > inflate/deflate queue:
> > > >
> > > > static void virtio_balloon_handle_output(VirtIODevice *vdev, VirtQueue *vq)
> > > > ...
> > > >         virtqueue_push(vq, elem, offset);
> > > >
> > > > Thanks
> > > >
> > > > >
> > > > >
> > > > > cheers
> > > > >
> >


^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH V5 1/4] virtio_ring: validate used buffer length
  2021-11-24  8:24                             ` Michael S. Tsirkin
@ 2021-11-24  8:28                               ` Jason Wang
  0 siblings, 0 replies; 32+ messages in thread
From: Jason Wang @ 2021-11-24  8:28 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Michael Ellerman, Halil Pasic, virtualization, Hetzelt,
	Felicitas, linux-kernel, kaplan, david, Konrad Rzeszutek Wilk,
	Stefan Hajnoczi, Stefano Garzarella, mcgrof, David Hildenbrand

On Wed, Nov 24, 2021 at 4:24 PM Michael S. Tsirkin <mst@redhat.com> wrote:
>
> On Wed, Nov 24, 2021 at 03:59:12PM +0800, Jason Wang wrote:
> > On Wed, Nov 24, 2021 at 3:22 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> > >
> > > On Wed, Nov 24, 2021 at 10:33:28AM +0800, Jason Wang wrote:
> > > > On Wed, Nov 24, 2021 at 10:26 AM Jason Wang <jasowang@redhat.com> wrote:
> > > > >
> > > > > On Wed, Nov 24, 2021 at 9:30 AM Michael Ellerman <mpe@ellerman.id.au> wrote:
> > > > > >
> > > > > > "Michael S. Tsirkin" <mst@redhat.com> writes:
> > > > > > > On Tue, Nov 23, 2021 at 10:25:20AM +0800, Jason Wang wrote:
> > > > > > >> On Tue, Nov 23, 2021 at 4:24 AM Halil Pasic <pasic@linux.ibm.com> wrote:
> > > > > > >> >
> > > > > > >> > On Mon, 22 Nov 2021 14:25:26 +0800
> > > > > > >> > Jason Wang <jasowang@redhat.com> wrote:
> > > > > > >> >
> > > > > > >> > > I think the fixes are:
> > > > > > >> > >
> > > > > > >> > > 1) fixing the vhost vsock
> > > > > > >> > > 2) use suppress_used_validation=true to let vsock driver to validate
> > > > > > >> > > the in buffer length
> > > > > > >> > > 3) probably a new feature so the driver can only enable the validation
> > > > > > >> > > when the feature is enabled.
> > > > > > >> >
> > > > > > >> > I'm not sure, I would consider a F_DEV_Y_FIXED_BUG_X a perfectly good
> > > > > > >> > feature. Frankly the set of such bugs is device implementation
> > > > > > >> > specific and it makes little sense to specify a feature bit
> > > > > > >> > that says the device implementation claims to adhere to some
> > > > > > >> > aspect of the specification. Also what would be the semantic
> > > > > > >> > of not negotiating F_DEV_Y_FIXED_BUG_X?
> > > > > > >>
> > > > > > >> Yes, I agree. Rethink of the feature bit, it seems unnecessary,
> > > > > > >> especially considering the driver should not care about the used
> > > > > > >> length for tx.
> > > > > > >>
> > > > > > >> >
> > > > > > >> > On the other hand I see no other way to keep the validation
> > > > > > >> > permanently enabled for fixed implementations, and get around the problem
> > > > > > >> > with broken implementations. So we could have something like
> > > > > > >> > VHOST_USED_LEN_STRICT.
> > > > > > >>
> > > > > > >> It's more about a choice of the driver's knowledge. For vsock TX it
> > > > > > >> should be fine. If we introduce a parameter and disable it by default,
> > > > > > >> it won't be very useful.
> > > > > > >>
> > > > > > >> >
> > > > > > >> > Maybe, we can also think of 'warn and don't alter behavior' instead of
> > > > > > >> > 'warn' and alter behavior. Or maybe even not having such checks on in
> > > > > > >> > production, but only when testing.
> > > > > > >>
> > > > > > >> I think there's an agreement that virtio drivers need more hardening,
> > > > > > >> that's why a lot of patches were merged. Especially considering the
> > > > > > >> new requirements came from confidential computing, smart NIC and
> > > > > > >> VDUSE. For virtio drivers, enabling the validation may help to
> > > > > > >>
> > > > > > >> 1) protect the driver from the buggy and malicious device
> > > > > > >> 2) uncover the bugs of the devices (as vsock did, and probably rpmsg)
> > > > > > >> 3) force the have a smart driver that can do the validation itself
> > > > > > >> then we can finally remove the validation in the core
> > > > > > >>
> > > > > > >> So I'd like to keep it enabled.
> > > > > > >>
> > > > > > >> Thanks
> > > > > > >
> > > > > > > Let's see how far we can get. But yes, maybe we were too aggressive in
> > > > > > > breaking things by default, a warning might be a better choice for a
> > > > > > > couple of cycles.
> > > > >
> > > > > Ok, considering we saw the issues with balloons I think I can post a
> > > > > patch to use warn instead. I wonder if we need to taint the kernel in
> > > > > this case.
> > > >
> > > > Rethink this, consider we still have some time, I tend to convert the
> > > > drivers to validate the length by themselves. Does this make sense?
> > > >
> > > > Thanks
> > >
> > > That's separate but let's stop crashing guests for people ASAP.
> >
> > Ok, will post a patch soon.
> >
> > Thanks
>
> So let's err on the side of caution now, I will just revert for this
> release.
>
> For the next one I think a good plan is:
> - no checks by default
> - module param to check and warn
> - keep adding validation in the drivers as appropriate

Fine, I will do that.

Thanks

>
> > >
> > >
> > > > >
> > > > > >
> > > > > > This series appears to break the virtio_balloon driver as well.
> > > > > >
> > > > > > The symptom is soft lockup warnings, eg:
> > > > > >
> > > > > >   INFO: task kworker/1:1:109 blocked for more than 614 seconds.
> > > > > >         Not tainted 5.16.0-rc2-gcc-10.3.0 #21
> > > > > >   "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
> > > > > >   task:kworker/1:1     state:D stack:12496 pid:  109 ppid:     2 flags:0x00000800
> > > > > >   Workqueue: events_freezable update_balloon_size_func
> > > > > >   Call Trace:
> > > > > >   [c000000003cef7c0] [c000000003cef820] 0xc000000003cef820 (unreliable)
> > > > > >   [c000000003cef9b0] [c00000000001e238] __switch_to+0x1e8/0x2f0
> > > > > >   [c000000003cefa10] [c000000000f0a00c] __schedule+0x2cc/0xb50
> > > > > >   [c000000003cefae0] [c000000000f0a8fc] schedule+0x6c/0x140
> > > > > >   [c000000003cefb10] [c00000000095b6c4] tell_host+0xe4/0x130
> > > > > >   [c000000003cefba0] [c00000000095d234] update_balloon_size_func+0x394/0x3f0
> > > > > >   [c000000003cefc70] [c000000000178064] process_one_work+0x2c4/0x5b0
> > > > > >   [c000000003cefd10] [c0000000001783f8] worker_thread+0xa8/0x640
> > > > > >   [c000000003cefda0] [c000000000185444] kthread+0x1b4/0x1c0
> > > > > >   [c000000003cefe10] [c00000000000cee4] ret_from_kernel_thread+0x5c/0x64
> > > > > >
> > > > > > Similar backtrace reported here by Luis:
> > > > > >
> > > > > >   https://lore.kernel.org/lkml/YY2duTi0wAyAKUTJ@bombadil.infradead.org/
> > > > > >
> > > > > > Bisect points to:
> > > > > >
> > > > > >   # first bad commit: [939779f5152d161b34f612af29e7dc1ac4472fcf] virtio_ring: validate used buffer length
> > > > > >
> > > > > > Adding suppress used validation to the virtio balloon driver "fixes" it, eg.
> > > > > >
> > > > > > diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
> > > > > > index c22ff0117b46..a14b82ceebb2 100644
> > > > > > --- a/drivers/virtio/virtio_balloon.c
> > > > > > +++ b/drivers/virtio/virtio_balloon.c
> > > > > > @@ -1150,6 +1150,7 @@ static unsigned int features[] = {
> > > > > >  };
> > > > > >
> > > > > >  static struct virtio_driver virtio_balloon_driver = {
> > > > > > +       .suppress_used_validation = true,
> > > > > >         .feature_table = features,
> > > > > >         .feature_table_size = ARRAY_SIZE(features),
> > > > > >         .driver.name =  KBUILD_MODNAME,
> > > > >
> > > > > Looks good, we need a formal patch for this.
> > > > >
> > > > > And we need fix Qemu as well which advertise non zero used length for
> > > > > inflate/deflate queue:
> > > > >
> > > > > static void virtio_balloon_handle_output(VirtIODevice *vdev, VirtQueue *vq)
> > > > > ...
> > > > >         virtqueue_push(vq, elem, offset);
> > > > >
> > > > > Thanks
> > > > >
> > > > > >
> > > > > >
> > > > > > cheers
> > > > > >
> > >
>


^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH V5 1/4] virtio_ring: validate used buffer length
  2021-11-24  2:33                       ` Jason Wang
  2021-11-24  7:22                         ` Michael S. Tsirkin
@ 2021-11-24 11:33                         ` Halil Pasic
  2021-11-25  2:27                           ` Jason Wang
  1 sibling, 1 reply; 32+ messages in thread
From: Halil Pasic @ 2021-11-24 11:33 UTC (permalink / raw)
  To: Jason Wang
  Cc: Michael Ellerman, Michael S. Tsirkin, virtualization, Hetzelt,
	Felicitas, linux-kernel, kaplan, david, Konrad Rzeszutek Wilk,
	Stefan Hajnoczi, Stefano Garzarella, mcgrof, David Hildenbrand,
	Halil Pasic

On Wed, 24 Nov 2021 10:33:28 +0800
Jason Wang <jasowang@redhat.com> wrote:

> > > > Let's see how far we can get. But yes, maybe we were too aggressive in
> > > > breaking things by default, a warning might be a better choice for a
> > > > couple of cycles.  
> >
> > Ok, considering we saw the issues with balloons I think I can post a
> > patch to use warn instead. I wonder if we need to taint the kernel in
> > this case.  
> 
> Rethink this, consider we still have some time, I tend to convert the
> drivers to validate the length by themselves. Does this make sense?

I do find value in doing the validation in a single place for every
driver. This is really a common concern. But I think, not breaking
what used to work before is a good idea. So I would opt for producing
a warning, but otherwise preserving old behavior unless there is an
explicit opt-in for something more strict.

BTW AFAIU if we detect a problem here, there are basically two
cases:
(1) Either the device is over-reporting what it has written, or
(2) we have a memory corruption in the guest because the device has
written beyond the end of the provided buffer. This can be because
  (2.1) the driver provided a smaller buffer than mandated by the spec,
  or
  (2.2) the device is broken.

Case (1) is relatively harmless, and I believe a warning for it is more
than appropriate. Whoever sees the warning should push for a fixed device
if possible.

Case (2) is nasty. What would be the sanest course of action if we were
reasonably sure we have have case (2.2)?

Maybe we can detect case (2) with a canary. I.e. artificially extend 
the buffer with an extra descriptor that has a poisoned buffer, and
check if the value of that poisoned buffer is different than poison. I'm
not sure it is worth the effort though in production.

Regards,
Halil

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH V5 1/4] virtio_ring: validate used buffer length
  2021-11-24 11:33                         ` Halil Pasic
@ 2021-11-25  2:27                           ` Jason Wang
  0 siblings, 0 replies; 32+ messages in thread
From: Jason Wang @ 2021-11-25  2:27 UTC (permalink / raw)
  To: Halil Pasic
  Cc: Michael Ellerman, Michael S. Tsirkin, virtualization, Hetzelt,
	Felicitas, linux-kernel, kaplan, david, Konrad Rzeszutek Wilk,
	Stefan Hajnoczi, Stefano Garzarella, mcgrof, David Hildenbrand

On Wed, Nov 24, 2021 at 7:33 PM Halil Pasic <pasic@linux.ibm.com> wrote:
>
> On Wed, 24 Nov 2021 10:33:28 +0800
> Jason Wang <jasowang@redhat.com> wrote:
>
> > > > > Let's see how far we can get. But yes, maybe we were too aggressive in
> > > > > breaking things by default, a warning might be a better choice for a
> > > > > couple of cycles.
> > >
> > > Ok, considering we saw the issues with balloons I think I can post a
> > > patch to use warn instead. I wonder if we need to taint the kernel in
> > > this case.
> >
> > Rethink this, consider we still have some time, I tend to convert the
> > drivers to validate the length by themselves. Does this make sense?
>
> I do find value in doing the validation in a single place for every
> driver. This is really a common concern. But I think, not breaking
> what used to work before is a good idea. So I would opt for producing
> a warning, but otherwise preserving old behavior unless there is an
> explicit opt-in for something more strict.

Yes, I totally agree with you after more thought and discussion.

>
> BTW AFAIU if we detect a problem here, there are basically two
> cases:
> (1) Either the device is over-reporting what it has written, or
> (2) we have a memory corruption in the guest because the device has
> written beyond the end of the provided buffer. This can be because
>   (2.1) the driver provided a smaller buffer than mandated by the spec,
>   or
>   (2.2) the device is broken.
>
> Case (1) is relatively harmless, and I believe a warning for it is more
> than appropriate. Whoever sees the warning should push for a fixed device
> if possible.

Yes.

>
> Case (2) is nasty. What would be the sanest course of action if we were
> reasonably sure we have have case (2.2)?

I think that's why a per driver validation is more preferable. The
driver can choose the comfortable action, e.g for networking it may
just drop the packets.

>
> Maybe we can detect case (2) with a canary. I.e. artificially extend
> the buffer with an extra descriptor that has a poisoned buffer, and
> check if the value of that poisoned buffer is different than poison. I'm
> not sure it is worth the effort though in production.

This might work but it might cause performance overhead. I still think
doing the validation per driver is better, the driver can choose to
fix the used length and taint the kernel anyway.

Thanks

>
> Regards,
> Halil
>


^ permalink raw reply	[flat|nested] 32+ messages in thread

end of thread, other threads:[~2021-11-25  2:30 UTC | newest]

Thread overview: 32+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-10-27  2:21 [PATCH V5 0/4] Validate used buffer length Jason Wang
2021-10-27  2:21 ` [PATCH V5 1/4] virtio_ring: validate " Jason Wang
     [not found]   ` <1635823138.4631283-1-xuanzhuo@linux.alibaba.com>
2021-11-02  3:54     ` Jason Wang
2021-11-19 15:09   ` Halil Pasic
2021-11-22  3:51     ` Jason Wang
2021-11-22  5:35       ` Halil Pasic
2021-11-22  5:49         ` Halil Pasic
2021-11-22  6:25           ` Jason Wang
2021-11-22  7:55             ` Stefano Garzarella
2021-11-22 11:08               ` Stefano Garzarella
2021-11-22 14:24                 ` Halil Pasic
2021-11-22 16:23                   ` Stefano Garzarella
2021-11-22 13:50             ` Halil Pasic
2021-11-23  2:30               ` Jason Wang
2021-11-23 12:17               ` Michael S. Tsirkin
2021-11-23 12:43                 ` Halil Pasic
2021-11-22 20:23             ` Halil Pasic
2021-11-23  2:25               ` Jason Wang
2021-11-23 11:05                 ` Michael S. Tsirkin
2021-11-24  1:30                   ` Michael Ellerman
2021-11-24  2:26                     ` Jason Wang
2021-11-24  2:33                       ` Jason Wang
2021-11-24  7:22                         ` Michael S. Tsirkin
2021-11-24  7:59                           ` Jason Wang
2021-11-24  8:24                             ` Michael S. Tsirkin
2021-11-24  8:28                               ` Jason Wang
2021-11-24 11:33                         ` Halil Pasic
2021-11-25  2:27                           ` Jason Wang
2021-11-22  7:42       ` Stefano Garzarella
2021-10-27  2:21 ` [PATCH V5 2/4] virtio-net: don't let virtio core to validate used length Jason Wang
2021-10-27  2:21 ` [PATCH V5 3/4] virtio-blk: " Jason Wang
2021-10-27  2:21 ` [PATCH V5 4/4] virtio-scsi: don't let virtio core to validate used buffer length Jason Wang

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).