LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
From: Claudio Imbrenda <imbrenda@linux.ibm.com>
To: kvm@vger.kernel.org
Cc: cohuck@redhat.com, borntraeger@de.ibm.com, frankja@linux.ibm.com,
	thuth@redhat.com, pasic@linux.ibm.com, david@redhat.com,
	linux-s390@vger.kernel.org, linux-kernel@vger.kernel.org,
	Ulrich.Weigand@de.ibm.com
Subject: [PATCH v3 11/14] KVM: s390: pv: extend lazy destroy to handle shutdown
Date: Wed,  4 Aug 2021 17:40:43 +0200	[thread overview]
Message-ID: <20210804154046.88552-12-imbrenda@linux.ibm.com> (raw)
In-Reply-To: <20210804154046.88552-1-imbrenda@linux.ibm.com>

Extend the lazy destroy infrastructure to handle almost all kinds of
exit of the userspace process. The only case not handled is if the
process is killed by the OOM, in which case the old behaviour will
still be in effect.

Add the uv_destroy_page_lazy function to set aside pages when unmapping
them during mm teardown; the pages will be processed and freed when the
protected VM is torn down.

Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
---
 arch/s390/include/asm/mmu.h         |  3 ++
 arch/s390/include/asm/mmu_context.h |  2 ++
 arch/s390/include/asm/pgtable.h     |  9 ++++--
 arch/s390/include/asm/uv.h          | 15 +++++++++
 arch/s390/kernel/uv.c               | 47 +++++++++++++++++++++++++++++
 arch/s390/kvm/pv.c                  | 41 ++++++++++++++++++-------
 6 files changed, 104 insertions(+), 13 deletions(-)

diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h
index e12ff0f29d1a..e2250dbe3d9d 100644
--- a/arch/s390/include/asm/mmu.h
+++ b/arch/s390/include/asm/mmu.h
@@ -18,6 +18,7 @@ typedef struct {
 	unsigned long vdso_base;
 	/* The mmu context belongs to a secure guest. */
 	atomic_t is_protected;
+	struct list_head deferred_list;
 	/*
 	 * The following bitfields need a down_write on the mm
 	 * semaphore when they are written to. As they are only
@@ -34,6 +35,8 @@ typedef struct {
 	unsigned int uses_cmm:1;
 	/* The gmaps associated with this context are allowed to use huge pages. */
 	unsigned int allow_gmap_hpage_1m:1;
+	/* The mmu context should be destroyed synchronously */
+	unsigned int pv_sync_destroy:1;
 } mm_context_t;
 
 #define INIT_MM_CONTEXT(name)						   \
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
index c7937f369e62..5c598afe07a8 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -27,8 +27,10 @@ static inline int init_new_context(struct task_struct *tsk,
 	cpumask_clear(&mm->context.cpu_attach_mask);
 	atomic_set(&mm->context.flush_count, 0);
 	atomic_set(&mm->context.is_protected, 0);
+	mm->context.pv_sync_destroy = 0;
 	mm->context.gmap_asce = 0;
 	mm->context.flush_mm = 0;
+	INIT_LIST_HEAD(&mm->context.deferred_list);
 #ifdef CONFIG_PGSTE
 	mm->context.alloc_pgste = page_table_allocate_pgste ||
 		test_thread_flag(TIF_PGSTE) ||
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 0f1af2232ebe..c311503edc3b 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -1118,9 +1118,14 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
 	} else {
 		res = ptep_xchg_lazy(mm, addr, ptep, __pte(_PAGE_INVALID));
 	}
+
 	/* At this point the reference through the mapping is still present */
-	if (mm_is_protected(mm) && pte_present(res))
-		uv_convert_owned_from_secure(pte_val(res) & PAGE_MASK);
+	if (pte_present(res) && mm_is_protected(mm)) {
+		if (full && !mm->context.pv_sync_destroy)
+			uv_destroy_page_lazy(mm, pte_val(res) & PAGE_MASK);
+		else
+			uv_convert_owned_from_secure(pte_val(res) & PAGE_MASK);
+	}
 	return res;
 }
 
diff --git a/arch/s390/include/asm/uv.h b/arch/s390/include/asm/uv.h
index c8bd72be8974..934227611773 100644
--- a/arch/s390/include/asm/uv.h
+++ b/arch/s390/include/asm/uv.h
@@ -350,6 +350,15 @@ static inline int uv_remove_shared(unsigned long addr) { return 0; }
 #if IS_ENABLED(CONFIG_KVM)
 extern int prot_virt_host;
 
+struct destroy_page_lazy {
+	struct list_head list;
+	unsigned short count;
+	unsigned long pfns[];
+};
+
+/* This guarantees that up to PV_MAX_LAZY_COUNT can fit in a page */
+#define PV_MAX_LAZY_COUNT ((PAGE_SIZE - sizeof(struct destroy_page_lazy)) / sizeof(long))
+
 static inline int is_prot_virt_host(void)
 {
 	return prot_virt_host;
@@ -358,6 +367,7 @@ static inline int is_prot_virt_host(void)
 int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb);
 int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr);
 int uv_destroy_owned_page(unsigned long paddr);
+int uv_destroy_page_lazy(struct mm_struct *mm, unsigned long paddr);
 int uv_convert_from_secure(unsigned long paddr);
 int uv_convert_owned_from_secure(unsigned long paddr);
 int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr);
@@ -374,6 +384,11 @@ static inline int uv_destroy_owned_page(unsigned long paddr)
 	return 0;
 }
 
+static inline int uv_destroy_page_lazy(struct mm_struct *mm, unsigned long paddr)
+{
+	return 0;
+}
+
 static inline int uv_convert_from_secure(unsigned long paddr)
 {
 	return 0;
diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c
index bc79085d7152..5b632846c4a4 100644
--- a/arch/s390/kernel/uv.c
+++ b/arch/s390/kernel/uv.c
@@ -186,6 +186,53 @@ int uv_convert_owned_from_secure(unsigned long paddr)
 	return rc;
 }
 
+/*
+ * Set aside the given page and put it in the list of pages to be cleared in
+ * background. The caller must already hold a reference to the page.
+ */
+int uv_destroy_page_lazy(struct mm_struct *mm, unsigned long paddr)
+{
+	struct list_head *head = &mm->context.deferred_list;
+	struct destroy_page_lazy *lazy;
+	struct page *page;
+	int rc;
+
+	/* get an extra reference here */
+	get_page(phys_to_page(paddr));
+
+	lazy = list_first_entry(head, struct destroy_page_lazy, list);
+	/*
+	 * We need a fresh page to store more pointers. The current page
+	 * might be shared, so it cannot be used directly. Instead, make it
+	 * accessible and release it, and let the normal unmap code free it
+	 * later, if needed.
+	 * Afterwards, try to allocate a new page, but not very hard. If the
+	 * allocation fails, we simply return. The next call to this
+	 * function will attempt to do the same again, until enough pages
+	 * have been freed.
+	 */
+	if (list_empty(head) || lazy->count >= PV_MAX_LAZY_COUNT) {
+		rc = uv_convert_owned_from_secure(paddr);
+		/* in case of failure, we intentionally leak the page */
+		if (rc)
+			return rc;
+		/* release the extra reference */
+		put_page(phys_to_page(paddr));
+
+		/* try to allocate a new page quickly, but allow failures */
+		page = alloc_page(GFP_ATOMIC | __GFP_NOMEMALLOC | __GFP_NOWARN);
+		if (!page)
+			return -ENOMEM;
+		lazy = page_to_virt(page);
+		lazy->count = 0;
+		list_add(&lazy->list, head);
+		return 0;
+	}
+	/* the array of pointers has space, just add this entry */
+	lazy->pfns[lazy->count++] = phys_to_pfn(paddr);
+	return 0;
+}
+
 /*
  * Calculate the expected ref_count for a page that would otherwise have no
  * further pins. This was cribbed from similar functions in other places in
diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c
index 8267376a9a0d..e10392a8b1ae 100644
--- a/arch/s390/kvm/pv.c
+++ b/arch/s390/kvm/pv.c
@@ -19,6 +19,7 @@
 
 struct deferred_priv {
 	struct mm_struct *mm;
+	bool has_mm;
 	unsigned long old_table;
 	u64 handle;
 	void *stor_var;
@@ -197,19 +198,34 @@ static int kvm_s390_pv_deinit_vm_now(struct kvm *kvm, u16 *rc, u16 *rrc)
 
 static int kvm_s390_pv_destroy_vm_thread(void *priv)
 {
+	struct destroy_page_lazy *lazy, *next;
 	struct deferred_priv *p = priv;
 	u16 rc, rrc;
 	int r;
 
-	/* Clear all the pages as long as we are not the only users of the mm */
-	s390_uv_destroy_range(p->mm, 1, 0, TASK_SIZE_MAX);
-	/*
-	 * If we were the last user of the mm, synchronously free (and clear
-	 * if needed) all pages.
-	 * Otherwise simply decrease the reference counter; in this case we
-	 * have already cleared all pages.
-	 */
-	mmput(p->mm);
+	list_for_each_entry_safe(lazy, next, &p->mm->context.deferred_list, list) {
+		list_del(&lazy->list);
+		s390_uv_destroy_pfns(lazy->count, lazy->pfns);
+		free_page(__pa(lazy));
+	}
+
+	if (p->has_mm) {
+		/* Clear all the pages as long as we are not the only users of the mm */
+		s390_uv_destroy_range(p->mm, 1, 0, TASK_SIZE_MAX);
+		if (atomic_read(&p->mm->mm_users) == 1) {
+			mmap_write_lock(p->mm);
+			/* destroy synchronously if there are no other users */
+			p->mm->context.pv_sync_destroy = 1;
+			mmap_write_unlock(p->mm);
+		}
+		/*
+		 * If we were the last user of the mm, synchronously free
+		 * (and clear if needed) all pages.
+		 * Otherwise simply decrease the reference counter; in this
+		 * case we have already cleared all pages.
+		 */
+		mmput(p->mm);
+	}
 
 	r = uv_cmd_nodata(p->handle, UVC_CMD_DESTROY_SEC_CONF, &rc, &rrc);
 	WARN_ONCE(r, "protvirt destroy vm failed rc %x rrc %x", rc, rrc);
@@ -241,7 +257,9 @@ static int deferred_destroy(struct kvm *kvm, struct deferred_priv *priv, u16 *rc
 	priv->old_table = (unsigned long)kvm->arch.gmap->table;
 	WRITE_ONCE(kvm->arch.gmap->guest_handle, 0);
 
-	if (s390_replace_asce(kvm->arch.gmap))
+	if (!priv->has_mm)
+		s390_remove_old_asce(kvm->arch.gmap);
+	else if (s390_replace_asce(kvm->arch.gmap))
 		goto fail;
 
 	t = kthread_create(kvm_s390_pv_destroy_vm_thread, priv,
@@ -300,8 +318,9 @@ int kvm_s390_pv_deinit_vm_deferred(struct kvm *kvm, u16 *rc, u16 *rrc)
 
 	mmgrab(kvm->mm);
 	if (mmget_not_zero(kvm->mm)) {
+		priv->has_mm = true;
 		kvm_s390_clear_2g(kvm);
-	} else {
+	} else if (list_empty(&kvm->mm->context.deferred_list)) {
 		/* No deferred work to do */
 		mmdrop(kvm->mm);
 		kfree(priv);
-- 
2.31.1


  parent reply	other threads:[~2021-08-04 15:41 UTC|newest]

Thread overview: 28+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-08-04 15:40 [PATCH v3 00/14] KVM: s390: pv: implement lazy destroy Claudio Imbrenda
2021-08-04 15:40 ` [PATCH v3 01/14] KVM: s390: pv: add macros for UVC CC values Claudio Imbrenda
2021-08-06  7:26   ` David Hildenbrand
2021-08-06  9:34     ` Claudio Imbrenda
2021-08-06 15:15     ` Janosch Frank
2021-08-04 15:40 ` [PATCH v3 02/14] KVM: s390: pv: avoid stall notifications for some UVCs Claudio Imbrenda
2021-08-06  7:30   ` David Hildenbrand
2021-08-06  9:33     ` Claudio Imbrenda
2021-08-04 15:40 ` [PATCH v3 03/14] KVM: s390: pv: leak the ASCE page when destroy fails Claudio Imbrenda
2021-08-06  7:31   ` David Hildenbrand
2021-08-06  9:32     ` Claudio Imbrenda
2021-08-06 11:39       ` David Hildenbrand
2021-08-04 15:40 ` [PATCH v3 04/14] KVM: s390: pv: properly handle page flags for protected guests Claudio Imbrenda
2021-08-04 15:40 ` [PATCH v3 05/14] KVM: s390: pv: handle secure storage violations " Claudio Imbrenda
2021-08-04 15:40 ` [PATCH v3 06/14] KVM: s390: pv: handle secure storage exceptions for normal guests Claudio Imbrenda
2021-08-04 15:40 ` [PATCH v3 07/14] KVM: s390: pv: refactor s390_reset_acc Claudio Imbrenda
2021-08-04 15:40 ` [PATCH v3 08/14] KVM: s390: pv: usage counter instead of flag Claudio Imbrenda
2021-08-04 15:40 ` [PATCH v3 09/14] KVM: s390: pv: add export before import Claudio Imbrenda
2021-08-04 15:40 ` [PATCH v3 10/14] KVM: s390: pv: lazy destroy for reboot Claudio Imbrenda
2021-08-04 15:40 ` Claudio Imbrenda [this message]
2021-08-04 15:40 ` [PATCH v3 12/14] KVM: s390: pv: module parameter to fence lazy destroy Claudio Imbrenda
2021-08-04 15:40 ` [PATCH v3 13/14] KVM: s390: pv: add OOM notifier for " Claudio Imbrenda
2021-08-04 15:40 ` [PATCH v3 14/14] KVM: s390: pv: avoid export before import if possible Claudio Imbrenda
2021-08-06  7:10 ` [PATCH v3 00/14] KVM: s390: pv: implement lazy destroy David Hildenbrand
2021-08-06  9:30   ` Claudio Imbrenda
2021-08-06 11:30     ` David Hildenbrand
2021-08-06 13:44       ` Claudio Imbrenda
2021-08-09  8:50         ` David Hildenbrand

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210804154046.88552-12-imbrenda@linux.ibm.com \
    --to=imbrenda@linux.ibm.com \
    --cc=Ulrich.Weigand@de.ibm.com \
    --cc=borntraeger@de.ibm.com \
    --cc=cohuck@redhat.com \
    --cc=david@redhat.com \
    --cc=frankja@linux.ibm.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-s390@vger.kernel.org \
    --cc=pasic@linux.ibm.com \
    --cc=thuth@redhat.com \
    --subject='Re: [PATCH v3 11/14] KVM: s390: pv: extend lazy destroy to handle shutdown' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).