Linux-Fsdevel Archive on lore.kernel.org
help / color / mirror / Atom feed
From: Anthony Yznaga <anthony.yznaga@oracle.com>
To: linux-mm@kvack.org, linux-kernel@vger.kernel.org
Cc: willy@infradead.org, corbet@lwn.net, tglx@linutronix.de,
	mingo@redhat.com, bp@alien8.de, x86@kernel.org, hpa@zytor.com,
	dave.hansen@linux.intel.com, luto@kernel.org,
	peterz@infradead.org, rppt@linux.ibm.com,
	akpm@linux-foundation.org, hughd@google.com,
	ebiederm@xmission.com, masahiroy@kernel.org, ardb@kernel.org,
	ndesaulniers@google.com, dima@golovin.in,
	daniel.kiper@oracle.com, nivedita@alum.mit.edu,
	rafael.j.wysocki@intel.com, dan.j.williams@intel.com,
	zhenzhong.duan@oracle.com, jroedel@suse.de, bhe@redhat.com,
	guro@fb.com, Thomas.Lendacky@amd.com,
	andriy.shevchenko@linux.intel.com, keescook@chromium.org,
	hannes@cmpxchg.org, minchan@kernel.org, mhocko@kernel.org,
	ying.huang@intel.com, yang.shi@linux.alibaba.com,
	gustavo@embeddedor.com, ziqian.lzq@antfin.com,
	vdavydov.dev@gmail.com, jason.zeng@intel.com,
	kevin.tian@intel.com, zhiyuan.lv@intel.com, lei.l.li@intel.com,
	paul.c.lai@intel.com, ashok.raj@intel.com,
	linux-fsdevel@vger.kernel.org, linux-doc@vger.kernel.org,
	kexec@lists.infradead.org
Subject: [RFC 14/43] mm: memblock: PKRAM: prevent memblock resize from clobbering preserved pages
Date: Wed,  6 May 2020 17:41:40 -0700	[thread overview]
Message-ID: <1588812129-8596-15-git-send-email-anthony.yznaga@oracle.com> (raw)
In-Reply-To: <1588812129-8596-1-git-send-email-anthony.yznaga@oracle.com>

The size of the memblock reserved array may be increased while preserved
pages are being reserved. When this happens, preserved pages that have
not yet been reserved are at risk for being clobbered when space for a
larger array is allocated.
When called from memblock_double_array(), a wrapper around
memblock_find_in_range() walks the preserved pages pagetable to find
sufficiently sized ranges without preserved pages and passes them to
memblock_find_in_range().

Signed-off-by: Anthony Yznaga <anthony.yznaga@oracle.com>
---
 include/linux/pkram.h |  3 +++
 mm/memblock.c         | 15 +++++++++++++--
 mm/pkram.c            | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 67 insertions(+), 2 deletions(-)

diff --git a/include/linux/pkram.h b/include/linux/pkram.h
index edc5d8bef9d3..409022e1472f 100644
--- a/include/linux/pkram.h
+++ b/include/linux/pkram.h
@@ -62,6 +62,9 @@ struct page *pkram_load_page(struct pkram_stream *ps, unsigned long *index,
 ssize_t pkram_write(struct pkram_stream *ps, const void *buf, size_t count);
 size_t pkram_read(struct pkram_stream *ps, void *buf, size_t count);
 
+phys_addr_t pkram_memblock_find_in_range(phys_addr_t start, phys_addr_t end,
+					 phys_addr_t size, phys_addr_t align);
+
 #ifdef CONFIG_PKRAM
 extern unsigned long pkram_reserved_pages;
 void pkram_reserve(void);
diff --git a/mm/memblock.c b/mm/memblock.c
index c79ba6f9920c..69ae883b8d21 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -16,6 +16,7 @@
 #include <linux/kmemleak.h>
 #include <linux/seq_file.h>
 #include <linux/memblock.h>
+#include <linux/pkram.h>
 
 #include <asm/sections.h>
 #include <linux/io.h>
@@ -349,6 +350,16 @@ phys_addr_t __init_memblock memblock_find_in_range(phys_addr_t start,
 	return ret;
 }
 
+phys_addr_t __init_memblock __memblock_find_in_range(phys_addr_t start,
+					phys_addr_t end, phys_addr_t size,
+					phys_addr_t align)
+{
+	if (IS_ENABLED(CONFIG_PKRAM))
+		return pkram_memblock_find_in_range(start, end, size, align);
+	else
+		return memblock_find_in_range(start, end, size, align);
+}
+
 static void __init_memblock memblock_remove_region(struct memblock_type *type, unsigned long r)
 {
 	type->total_size -= type->regions[r].size;
@@ -447,11 +458,11 @@ static int __init_memblock memblock_double_array(struct memblock_type *type,
 		if (type != &memblock.reserved)
 			new_area_start = new_area_size = 0;
 
-		addr = memblock_find_in_range(new_area_start + new_area_size,
+		addr = __memblock_find_in_range(new_area_start + new_area_size,
 						memblock.current_limit,
 						new_alloc_size, PAGE_SIZE);
 		if (!addr && new_area_size)
-			addr = memblock_find_in_range(0,
+			addr = __memblock_find_in_range(0,
 				min(new_area_start, memblock.current_limit),
 				new_alloc_size, PAGE_SIZE);
 
diff --git a/mm/pkram.c b/mm/pkram.c
index dd3c89614010..e49c9bcd3854 100644
--- a/mm/pkram.c
+++ b/mm/pkram.c
@@ -1238,3 +1238,54 @@ void pkram_free_pgt(void)
 	__free_pages_core(virt_to_page(pkram_pgd), 0);
 	pkram_pgd = NULL;
 }
+
+static int __init_memblock pkram_memblock_find_cb(struct pkram_pg_state *st, unsigned long base, unsigned long size)
+{
+	unsigned long end = base + size;
+	unsigned long addr;
+
+	if (size < st->min_size)
+		return 0;
+
+	addr =  memblock_find_in_range(base, end, st->min_size, PAGE_SIZE);
+	if (!addr)
+		return 0;
+
+	st->retval = addr;
+	return 1;
+}
+
+/*
+ * It may be necessary to allocate a larger reserved memblock array
+ * while populating it with ranges of preserved pages.  To avoid
+ * trampling preserved pages that have not yet been added to the
+ * memblock reserved list this function implements a wrapper around
+ * memblock_find_in_range() that restricts searches to subranges
+ * that do not contain preserved pages.
+ */
+phys_addr_t __init_memblock pkram_memblock_find_in_range(phys_addr_t start,
+					phys_addr_t end, phys_addr_t size,
+					phys_addr_t align)
+{
+	struct pkram_pg_state st = {
+		.range_cb = pkram_memblock_find_cb,
+		.min_addr = start,
+		.max_addr = end,
+		.min_size = PAGE_ALIGN(size),
+		.find_holes = true,
+	};
+
+	if (!pkram_reservation_in_progress)
+		return memblock_find_in_range(start, end, size, align);
+
+	if (!pkram_pgd) {
+		WARN_ONCE(1, "No preserved pages pagetable\n");
+		return memblock_find_in_range(start, end, size, align);
+	}
+
+	WARN_ONCE(memblock_bottom_up(), "PKRAM: bottom up memblock allocation not yet supported\n");
+
+	pkram_walk_pgt_rev(&st, pkram_pgd);
+
+	return st.retval;
+}
-- 
2.13.3


  parent reply	other threads:[~2020-05-07  0:44 UTC|newest]

Thread overview: 50+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-05-07  0:41 [RFC 00/43] PKRAM: Preserved-over-Kexec RAM Anthony Yznaga
2020-05-07  0:41 ` [RFC 01/43] mm: add PKRAM API stubs and Kconfig Anthony Yznaga
2020-05-07  0:41 ` [RFC 02/43] mm: PKRAM: implement node load and save functions Anthony Yznaga
2020-05-07  0:41 ` [RFC 03/43] mm: PKRAM: implement object " Anthony Yznaga
2020-05-07  0:41 ` [RFC 04/43] mm: PKRAM: implement page stream operations Anthony Yznaga
2020-05-07  0:41 ` [RFC 05/43] mm: PKRAM: support preserving transparent hugepages Anthony Yznaga
2020-05-07  0:41 ` [RFC 06/43] mm: PKRAM: implement byte stream operations Anthony Yznaga
2020-05-07  0:41 ` [RFC 07/43] mm: PKRAM: link nodes by pfn before reboot Anthony Yznaga
2020-05-07  0:41 ` [RFC 08/43] mm: PKRAM: introduce super block Anthony Yznaga
2020-05-07  0:41 ` [RFC 09/43] PKRAM: build a physical mapping pagetable of pages to be preserved Anthony Yznaga
2020-05-07  0:41 ` [RFC 10/43] PKRAM: add code for walking the preserved pages pagetable Anthony Yznaga
2020-05-07  0:41 ` [RFC 11/43] PKRAM: pass the preserved pages pagetable to the next kernel Anthony Yznaga
2020-05-07  0:41 ` [RFC 12/43] mm: PKRAM: reserve preserved memory at boot Anthony Yznaga
2020-05-07  0:41 ` [RFC 13/43] mm: PKRAM: free preserved pages pagetable Anthony Yznaga
2020-05-07  0:41 ` Anthony Yznaga [this message]
2020-05-11 13:57   ` [RFC 14/43] mm: memblock: PKRAM: prevent memblock resize from clobbering preserved pages Mike Rapoport
2020-05-11 23:29     ` Anthony Yznaga
2020-05-07  0:41 ` [RFC 15/43] PKRAM: provide a way to ban pages from use by PKRAM Anthony Yznaga
2020-05-07  0:41 ` [RFC 16/43] kexec: PKRAM: prevent kexec clobbering preserved pages in some cases Anthony Yznaga
2020-05-07  0:41 ` [RFC 17/43] PKRAM: provide a way to check if a memory range has preserved pages Anthony Yznaga
2020-05-07  0:41 ` [RFC 18/43] kexec: PKRAM: avoid clobbering already " Anthony Yznaga
2020-05-07  0:41 ` [RFC 19/43] mm: PKRAM: allow preserved memory to be freed from userspace Anthony Yznaga
2020-05-07  0:41 ` [RFC 20/43] PKRAM: disable feature when running the kdump kernel Anthony Yznaga
2020-05-07  0:41 ` [RFC 21/43] x86/KASLR: PKRAM: support physical kaslr Anthony Yznaga
2020-05-07 17:51   ` Kees Cook
2020-05-07 18:41     ` Anthony Yznaga
2020-05-07  0:41 ` [RFC 22/43] mm: shmem: introduce shmem_insert_page Anthony Yznaga
2020-05-07  0:41 ` [RFC 23/43] mm: shmem: enable saving to PKRAM Anthony Yznaga
2020-05-07  0:41 ` [RFC 24/43] mm: shmem: prevent swapping of PKRAM-enabled tmpfs pages Anthony Yznaga
2020-05-07  0:41 ` [RFC 25/43] mm: shmem: specify the mm to use when inserting pages Anthony Yznaga
2020-05-07  0:41 ` [RFC 26/43] mm: shmem: when inserting, handle pages already charged to a memcg Anthony Yznaga
2020-05-07  0:41 ` [RFC 27/43] x86/mm/numa: add numa_isolate_memblocks() Anthony Yznaga
2020-05-07  0:41 ` [RFC 28/43] PKRAM: ensure memblocks with preserved pages init'd for numa Anthony Yznaga
2020-05-07  0:41 ` [RFC 29/43] memblock: PKRAM: mark memblocks that contain preserved pages Anthony Yznaga
2020-05-07  0:41 ` [RFC 30/43] memblock: add for_each_reserved_mem_range() Anthony Yznaga
2020-05-07  0:41 ` [RFC 31/43] memblock, mm: defer initialization of preserved pages Anthony Yznaga
2020-05-07  0:41 ` [RFC 32/43] shmem: PKRAM: preserve shmem files a chunk at a time Anthony Yznaga
2020-05-07  0:41 ` [RFC 33/43] PKRAM: atomically add and remove link pages Anthony Yznaga
2020-05-07  0:42 ` [RFC 34/43] shmem: PKRAM: multithread preserving and restoring shmem pages Anthony Yznaga
2020-05-07 16:30   ` Randy Dunlap
2020-05-07 17:59     ` Anthony Yznaga
2020-05-07  0:42 ` [RFC 35/43] shmem: introduce shmem_insert_pages() Anthony Yznaga
2020-05-07  0:42 ` [RFC 36/43] PKRAM: add support for loading pages in bulk Anthony Yznaga
2020-05-07  0:42 ` [RFC 37/43] shmem: PKRAM: enable bulk loading of preserved pages into shmem Anthony Yznaga
2020-05-07  0:42 ` [RFC 38/43] mm: implement splicing a list of pages to the LRU Anthony Yznaga
2020-05-07  0:42 ` [RFC 39/43] shmem: optimize adding pages to the LRU in shmem_insert_pages() Anthony Yznaga
2020-05-07  0:42 ` [RFC 40/43] shmem: initial support for adding multiple pages to pagecache Anthony Yznaga
2020-05-07  0:42 ` [RFC 41/43] XArray: add xas_export_node() and xas_import_node() Anthony Yznaga
2020-05-07  0:42 ` [RFC 42/43] shmem: reduce time holding xa_lock when inserting pages Anthony Yznaga
2020-05-07  0:42 ` [RFC 43/43] PKRAM: improve index alignment of pkram_link entries Anthony Yznaga

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1588812129-8596-15-git-send-email-anthony.yznaga@oracle.com \
    --to=anthony.yznaga@oracle.com \
    --cc=Thomas.Lendacky@amd.com \
    --cc=akpm@linux-foundation.org \
    --cc=andriy.shevchenko@linux.intel.com \
    --cc=ardb@kernel.org \
    --cc=ashok.raj@intel.com \
    --cc=bhe@redhat.com \
    --cc=bp@alien8.de \
    --cc=corbet@lwn.net \
    --cc=dan.j.williams@intel.com \
    --cc=daniel.kiper@oracle.com \
    --cc=dave.hansen@linux.intel.com \
    --cc=dima@golovin.in \
    --cc=ebiederm@xmission.com \
    --cc=guro@fb.com \
    --cc=gustavo@embeddedor.com \
    --cc=hannes@cmpxchg.org \
    --cc=hpa@zytor.com \
    --cc=hughd@google.com \
    --cc=jason.zeng@intel.com \
    --cc=jroedel@suse.de \
    --cc=keescook@chromium.org \
    --cc=kevin.tian@intel.com \
    --cc=kexec@lists.infradead.org \
    --cc=lei.l.li@intel.com \
    --cc=linux-doc@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=luto@kernel.org \
    --cc=masahiroy@kernel.org \
    --cc=mhocko@kernel.org \
    --cc=minchan@kernel.org \
    --cc=mingo@redhat.com \
    --cc=ndesaulniers@google.com \
    --cc=nivedita@alum.mit.edu \
    --cc=paul.c.lai@intel.com \
    --cc=peterz@infradead.org \
    --cc=rafael.j.wysocki@intel.com \
    --cc=rppt@linux.ibm.com \
    --cc=tglx@linutronix.de \
    --cc=vdavydov.dev@gmail.com \
    --cc=willy@infradead.org \
    --cc=x86@kernel.org \
    --cc=yang.shi@linux.alibaba.com \
    --cc=ying.huang@intel.com \
    --cc=zhenzhong.duan@oracle.com \
    --cc=zhiyuan.lv@intel.com \
    --cc=ziqian.lzq@antfin.com \
    --subject='Re: [RFC 14/43] mm: memblock: PKRAM: prevent memblock resize from clobbering preserved pages' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).