LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
From: Rick Edgecombe <rick.p.edgecombe@intel.com>
To: dave.hansen@intel.com, luto@kernel.org, peterz@infradead.org,
	x86@kernel.org, akpm@linux-foundation.org, keescook@chromium.org,
	shakeelb@google.com, vbabka@suse.cz, rppt@kernel.org
Cc: Rick Edgecombe <rick.p.edgecombe@intel.com>,
	linux-mm@kvack.org, linux-hardening@vger.kernel.org,
	kernel-hardening@lists.openwall.com, ira.weiny@intel.com,
	dan.j.williams@intel.com, linux-kernel@vger.kernel.org
Subject: [RFC PATCH v2 03/19] x86/mm/cpa: Add grouped page allocations
Date: Mon, 30 Aug 2021 16:59:11 -0700	[thread overview]
Message-ID: <20210830235927.6443-4-rick.p.edgecombe@intel.com> (raw)
In-Reply-To: <20210830235927.6443-1-rick.p.edgecombe@intel.com>

For x86, setting memory permissions on the direct map results in fracturing
large pages. Direct map fracturing can be reduced by locating pages that
will have their permissions set close together.

Create a simple page cache that allocates pages from huge page size
blocks. Don't guarantee that a page will come from a huge page grouping,
instead fallback to non-grouped pages to fulfill the allocation if
needed. Also, register a shrinker such that the system can ask for the
pages back if needed. Since this is only needed when there is a direct
map, compile it out on highmem systems.

Free pages in the cache are kept track of in per-node list inside a
list_lru. NUMA_NO_NODE requests are serviced by checking each per-node
list in a round robin fashion. If pages are requested for a certain node
but the cache is empty for that node, a whole additional huge page size
page is allocated.

Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
---
 arch/x86/include/asm/set_memory.h |  14 +++
 arch/x86/mm/pat/set_memory.c      | 156 ++++++++++++++++++++++++++++++
 2 files changed, 170 insertions(+)

diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h
index 43fa081a1adb..6e897ab91b77 100644
--- a/arch/x86/include/asm/set_memory.h
+++ b/arch/x86/include/asm/set_memory.h
@@ -4,6 +4,9 @@
 
 #include <asm/page.h>
 #include <asm-generic/set_memory.h>
+#include <linux/gfp.h>
+#include <linux/list_lru.h>
+#include <linux/shrinker.h>
 
 /*
  * The set_memory_* API can be used to change various attributes of a virtual
@@ -135,4 +138,15 @@ static inline int clear_mce_nospec(unsigned long pfn)
  */
 #endif
 
+struct grouped_page_cache {
+	struct shrinker shrinker;
+	struct list_lru lru;
+	gfp_t gfp;
+	atomic_t nid_round_robin;
+};
+
+int init_grouped_page_cache(struct grouped_page_cache *gpc, gfp_t gfp);
+struct page *get_grouped_page(int node, struct grouped_page_cache *gpc);
+void free_grouped_page(struct grouped_page_cache *gpc, struct page *page);
+
 #endif /* _ASM_X86_SET_MEMORY_H */
diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index ad8a5c586a35..e9527811f476 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -2314,6 +2314,162 @@ int __init kernel_unmap_pages_in_pgd(pgd_t *pgd, unsigned long address,
 	return retval;
 }
 
+#ifndef HIGHMEM
+static struct page *__alloc_page_order(int node, gfp_t gfp_mask, int order)
+{
+	if (node == NUMA_NO_NODE)
+		return alloc_pages(gfp_mask, order);
+
+	return alloc_pages_node(node, gfp_mask, order);
+}
+
+static struct grouped_page_cache *__get_gpc_from_sc(struct shrinker *shrinker)
+{
+	return container_of(shrinker, struct grouped_page_cache, shrinker);
+}
+
+static unsigned long grouped_shrink_count(struct shrinker *shrinker,
+					  struct shrink_control *sc)
+{
+	struct grouped_page_cache *gpc = __get_gpc_from_sc(shrinker);
+	unsigned long page_cnt = list_lru_shrink_count(&gpc->lru, sc);
+
+	return page_cnt ? page_cnt : SHRINK_EMPTY;
+}
+
+static enum lru_status grouped_isolate(struct list_head *item,
+				       struct list_lru_one *list,
+				       spinlock_t *lock, void *cb_arg)
+{
+	struct list_head *dispose = cb_arg;
+
+	list_lru_isolate_move(list, item, dispose);
+
+	return LRU_REMOVED;
+}
+
+static void __dispose_pages(struct grouped_page_cache *gpc, struct list_head *head)
+{
+	struct list_head *cur, *next;
+
+	list_for_each_safe(cur, next, head) {
+		struct page *page = list_entry(head, struct page, lru);
+
+		list_del(cur);
+
+		__free_pages(page, 0);
+	}
+}
+
+static unsigned long grouped_shrink_scan(struct shrinker *shrinker,
+					 struct shrink_control *sc)
+{
+	struct grouped_page_cache *gpc = __get_gpc_from_sc(shrinker);
+	unsigned long isolated;
+	LIST_HEAD(freeable);
+
+	if (!(sc->gfp_mask & gpc->gfp))
+		return SHRINK_STOP;
+
+	isolated = list_lru_shrink_walk(&gpc->lru, sc, grouped_isolate,
+					&freeable);
+	__dispose_pages(gpc, &freeable);
+
+	/* Every item walked gets isolated */
+	sc->nr_scanned += isolated;
+
+	return isolated;
+}
+
+static struct page *__remove_first_page(struct grouped_page_cache *gpc, int node)
+{
+	unsigned int start_nid, i;
+	struct list_head *head;
+
+	if (node != NUMA_NO_NODE) {
+		head = list_lru_get_mru(&gpc->lru, node);
+		if (head)
+			return list_entry(head, struct page, lru);
+		return NULL;
+	}
+
+	/* If NUMA_NO_NODE, search the nodes in round robin for a page */
+	start_nid = (unsigned int)atomic_fetch_inc(&gpc->nid_round_robin) % nr_node_ids;
+	for (i = 0; i < nr_node_ids; i++) {
+		int cur_nid = (start_nid + i) % nr_node_ids;
+
+		head = list_lru_get_mru(&gpc->lru, cur_nid);
+		if (head)
+			return list_entry(head, struct page, lru);
+	}
+
+	return NULL;
+}
+
+/* Get and add some new pages to the cache to be used by VM_GROUP_PAGES */
+static struct page *__replenish_grouped_pages(struct grouped_page_cache *gpc, int node)
+{
+	const unsigned int hpage_cnt = HPAGE_SIZE >> PAGE_SHIFT;
+	struct page *page;
+	int i;
+
+	page = __alloc_page_order(node, gpc->gfp, HUGETLB_PAGE_ORDER);
+	if (!page)
+		return __alloc_page_order(node, gpc->gfp, 0);
+
+	split_page(page, HUGETLB_PAGE_ORDER);
+
+	for (i = 1; i < hpage_cnt; i++)
+		free_grouped_page(gpc, &page[i]);
+
+	return &page[0];
+}
+
+int init_grouped_page_cache(struct grouped_page_cache *gpc, gfp_t gfp)
+{
+	int err = 0;
+
+	memset(gpc, 0, sizeof(struct grouped_page_cache));
+
+	if (list_lru_init(&gpc->lru))
+		goto out;
+
+	gpc->shrinker.count_objects = grouped_shrink_count;
+	gpc->shrinker.scan_objects = grouped_shrink_scan;
+	gpc->shrinker.seeks = DEFAULT_SEEKS;
+	gpc->shrinker.flags = SHRINKER_NUMA_AWARE;
+
+	err = register_shrinker(&gpc->shrinker);
+	if (err)
+		list_lru_destroy(&gpc->lru);
+
+out:
+	return err;
+}
+
+struct page *get_grouped_page(int node, struct grouped_page_cache *gpc)
+{
+	struct page *page;
+
+	if (in_interrupt()) {
+		pr_warn_once("grouped pages: Cannot allocate grouped page in interrupt");
+		return NULL;
+	}
+
+	page = __remove_first_page(gpc, node);
+
+	if (page)
+		return page;
+
+	return __replenish_grouped_pages(gpc, node);
+}
+
+void free_grouped_page(struct grouped_page_cache *gpc, struct page *page)
+{
+	INIT_LIST_HEAD(&page->lru);
+	list_lru_add_node(&gpc->lru, &page->lru, page_to_nid(page));
+}
+#endif /* !HIGHMEM */
 /*
  * The testcases use internal knowledge of the implementation that shouldn't
  * be exposed to the rest of the kernel. Include these directly here.
-- 
2.17.1


  parent reply	other threads:[~2021-08-31  0:00 UTC|newest]

Thread overview: 35+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-08-30 23:59 [RFC PATCH v2 00/19] PKS write protected page tables Rick Edgecombe
2021-08-30 23:59 ` [RFC PATCH v2 01/19] list: Support getting most recent element in list_lru Rick Edgecombe
2021-08-30 23:59 ` [RFC PATCH v2 02/19] list: Support list head not in object for list_lru Rick Edgecombe
2021-08-30 23:59 ` Rick Edgecombe [this message]
2021-08-30 23:59 ` [RFC PATCH v2 04/19] mm: Explicitly zero page table lock ptr Rick Edgecombe
2021-08-30 23:59 ` [RFC PATCH v2 05/19] x86, mm: Use cache of page tables Rick Edgecombe
2021-08-31  8:40   ` Mike Rapoport
2021-08-31 19:09     ` Edgecombe, Rick P
2021-08-30 23:59 ` [RFC PATCH v2 06/19] x86/mm/cpa: Add perm callbacks to grouped pages Rick Edgecombe
2021-08-30 23:59 ` [RFC PATCH v2 07/19] x86/cpufeatures: Add feature for pks tables Rick Edgecombe
2021-08-30 23:59 ` [RFC PATCH v2 08/19] x86/mm/cpa: Add get_grouped_page_atomic() Rick Edgecombe
2021-08-30 23:59 ` [RFC PATCH v2 09/19] x86/mm: Support GFP_ATOMIC in alloc_table_node() Rick Edgecombe
2021-08-31  8:32   ` Mike Rapoport
2021-08-30 23:59 ` [RFC PATCH v2 10/19] x86/mm: Use alloc_table() for fill_pte(), etc Rick Edgecombe
2021-08-31  8:47   ` Mike Rapoport
2021-08-31 18:48     ` Edgecombe, Rick P
2021-08-30 23:59 ` [RFC PATCH v2 11/19] mm/sparsemem: Use alloc_table() for table allocations Rick Edgecombe
2021-08-31  8:55   ` Mike Rapoport
2021-08-31 18:25     ` Edgecombe, Rick P
2021-09-01  7:22       ` Mike Rapoport
2021-09-02 13:56         ` Vlastimil Babka
2021-08-30 23:59 ` [RFC PATCH v2 12/19] x86/mm: Use free_table in unmap path Rick Edgecombe
2021-08-30 23:59 ` [RFC PATCH v2 13/19] mm/debug_vm_page_table: Use setters instead of WRITE_ONCE Rick Edgecombe
2021-08-30 23:59 ` [RFC PATCH v2 14/19] x86/efi: Toggle table protections when copying Rick Edgecombe
2021-08-30 23:59 ` [RFC PATCH v2 15/19] x86/mm/cpa: Add set_memory_pks() Rick Edgecombe
2021-08-30 23:59 ` [RFC PATCH v2 16/19] x86/mm: Protect page tables with PKS Rick Edgecombe
2021-08-31  8:56   ` Mike Rapoport
2021-08-31 17:55     ` Edgecombe, Rick P
2021-08-30 23:59 ` [RFC PATCH v2 17/19] x86/mm/cpa: PKS protect direct map page tables Rick Edgecombe
2021-08-31 10:14   ` Mike Rapoport
2021-08-31 17:58     ` Edgecombe, Rick P
2021-08-30 23:59 ` [RFC PATCH v2 18/19] x86/mm: Add PKS table soft mode Rick Edgecombe
2021-08-31  3:49   ` Randy Dunlap
2021-08-31 17:55     ` Edgecombe, Rick P
2021-08-30 23:59 ` [RFC PATCH v2 19/19] x86/mm: Add PKS table debug checking Rick Edgecombe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210830235927.6443-4-rick.p.edgecombe@intel.com \
    --to=rick.p.edgecombe@intel.com \
    --cc=akpm@linux-foundation.org \
    --cc=dan.j.williams@intel.com \
    --cc=dave.hansen@intel.com \
    --cc=ira.weiny@intel.com \
    --cc=keescook@chromium.org \
    --cc=kernel-hardening@lists.openwall.com \
    --cc=linux-hardening@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=luto@kernel.org \
    --cc=peterz@infradead.org \
    --cc=rppt@kernel.org \
    --cc=shakeelb@google.com \
    --cc=vbabka@suse.cz \
    --cc=x86@kernel.org \
    --subject='Re: [RFC PATCH v2 03/19] x86/mm/cpa: Add grouped page allocations' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).