LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
From: Mike Kravetz <mike.kravetz@oracle.com>
To: linux-mm@kvack.org, linux-kernel@vger.kernel.org
Cc: David Hildenbrand <david@redhat.com>,
	Michal Hocko <mhocko@suse.com>,
	Oscar Salvador <osalvador@suse.de>, Zi Yan <ziy@nvidia.com>,
	Muchun Song <songmuchun@bytedance.com>,
	Naoya Horiguchi <naoya.horiguchi@linux.dev>,
	David Rientjes <rientjes@google.com>,
	Andrew Morton <akpm@linux-foundation.org>,
	Mike Kravetz <mike.kravetz@oracle.com>
Subject: [PATCH 1/8] hugetlb: add demote hugetlb page sysfs interfaces
Date: Mon, 16 Aug 2021 15:49:46 -0700	[thread overview]
Message-ID: <20210816224953.157796-2-mike.kravetz@oracle.com> (raw)
In-Reply-To: <20210816224953.157796-1-mike.kravetz@oracle.com>

Two new sysfs files are added to demote hugtlb pages.  These files are
both per-hugetlb page size and per node.  Files are:
  demote_size - The size in Kb that pages are demoted to. (read-only)
  demote - The number of huge pages to demote. (write-only)

Writing a value to demote will result in an attempt to demote that
number of hugetlb pages to an appropriate number of demote_size pages.

This patch does not provide full demote functionality.  It only provides
the sysfs interfaces and uses existing code to free pages to the buddy
allocator if demote_size == PAGESIZE.

Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
---
 include/linux/hugetlb.h |   1 +
 mm/hugetlb.c            | 121 +++++++++++++++++++++++++++++++++++++++-
 2 files changed, 121 insertions(+), 1 deletion(-)

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index f7ca1a3870ea..d96e11ce986c 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -596,6 +596,7 @@ struct hstate {
 	int next_nid_to_alloc;
 	int next_nid_to_free;
 	unsigned int order;
+	unsigned int demote_order;
 	unsigned long mask;
 	unsigned long max_huge_pages;
 	unsigned long nr_huge_pages;
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 6337697f7ee4..0f16306993b3 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2986,7 +2986,7 @@ static void __init hugetlb_hstate_alloc_pages(struct hstate *h)
 
 static void __init hugetlb_init_hstates(void)
 {
-	struct hstate *h;
+	struct hstate *h, *h2;
 
 	for_each_hstate(h) {
 		if (minimum_order > huge_page_order(h))
@@ -2995,6 +2995,17 @@ static void __init hugetlb_init_hstates(void)
 		/* oversize hugepages were init'ed in early boot */
 		if (!hstate_is_gigantic(h))
 			hugetlb_hstate_alloc_pages(h);
+
+		/*
+		 * Set demote order for each hstate.  Note that
+		 * h->demote_order is initially 0.
+		 */
+		for_each_hstate(h2) {
+			if (h2 == h)
+				continue;
+			if (h2->order < h->order && h2->order > h->demote_order)
+				h->demote_order = h2->order;
+		}
 	}
 	VM_BUG_ON(minimum_order == UINT_MAX);
 }
@@ -3235,9 +3246,36 @@ static int set_max_huge_pages(struct hstate *h, unsigned long count, int nid,
 	return 0;
 }
 
+static int demote_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed)
+	__must_hold(&hugetlb_lock)
+{
+	int rc = 0;
+
+	lockdep_assert_held(&hugetlb_lock);
+	/* If no demote order, free to buddy */
+	if (!h->demote_order) {
+		struct page *page = remove_pool_huge_page(h, nodes_allowed, 0);
+
+		if (!page)
+			return rc;
+		spin_unlock_irq(&hugetlb_lock);
+		update_and_free_page(h, page, false);
+		spin_lock_irq(&hugetlb_lock);
+		return 1;
+	}
+
+	/*
+	 * TODO - demote fucntionality will be added in subsequent patch
+	 */
+	return rc;
+}
+
 #define HSTATE_ATTR_RO(_name) \
 	static struct kobj_attribute _name##_attr = __ATTR_RO(_name)
 
+#define HSTATE_ATTR_WO(_name) \
+	static struct kobj_attribute _name##_attr = __ATTR_WO(_name)
+
 #define HSTATE_ATTR(_name) \
 	static struct kobj_attribute _name##_attr = \
 		__ATTR(_name, 0644, _name##_show, _name##_store)
@@ -3433,12 +3471,91 @@ static ssize_t surplus_hugepages_show(struct kobject *kobj,
 }
 HSTATE_ATTR_RO(surplus_hugepages);
 
+static ssize_t demote_store(struct kobject *kobj,
+	       struct kobj_attribute *attr, const char *buf, size_t len)
+{
+	unsigned long nr_demote;
+	unsigned long nr_available;
+	nodemask_t nodes_allowed, *n_mask;
+	struct hstate *h;
+	int err;
+	int nid;
+
+	err = kstrtoul(buf, 10, &nr_demote);
+	if (err)
+		return err;
+	h = kobj_to_hstate(kobj, &nid);
+
+	/* Synchronize with other sysfs operations modifying huge pages */
+	mutex_lock(&h->resize_lock);
+
+	spin_lock_irq(&hugetlb_lock);
+	if (nid != NUMA_NO_NODE) {
+		nr_available = h->free_huge_pages_node[nid];
+		init_nodemask_of_node(&nodes_allowed, nid);
+		n_mask = &nodes_allowed;
+	} else {
+		nr_available = h->free_huge_pages;
+		n_mask = &node_states[N_MEMORY];
+	}
+	nr_available -= h->resv_huge_pages;
+	if (nr_available <= 0)
+		goto out;
+	nr_demote = min(nr_available, nr_demote);
+
+	while (nr_demote) {
+		if (!demote_pool_huge_page(h, n_mask))
+			break;
+
+		/*
+		 * We may have dropped the lock in the routines to
+		 * demote/free a page.  Recompute nr_demote as counts could
+		 * have changed and we want to make sure we do not demote
+		 * a reserved huge page.
+		 */
+		nr_demote--;
+		if (nid != NUMA_NO_NODE)
+			nr_available = h->free_huge_pages_node[nid];
+		else
+			nr_available = h->free_huge_pages;
+		nr_available -= h->resv_huge_pages;
+		if (nr_available <= 0)
+			nr_demote = 0;
+		else
+			nr_demote = min(nr_available, nr_demote);
+	}
+
+out:
+	spin_unlock_irq(&hugetlb_lock);
+	mutex_unlock(&h->resize_lock);
+
+	return len;
+}
+HSTATE_ATTR_WO(demote);
+
+static ssize_t demote_size_show(struct kobject *kobj,
+					struct kobj_attribute *attr, char *buf)
+{
+	struct hstate *h;
+	unsigned long demote_size;
+	int nid;
+
+	h = kobj_to_hstate(kobj, &nid);
+	demote_size = h->demote_order;
+
+	return sysfs_emit(buf, "%lukB\n",
+			(unsigned long)(PAGE_SIZE << h->demote_order) / SZ_1K);
+}
+HSTATE_ATTR_RO(demote_size);
+
 static struct attribute *hstate_attrs[] = {
 	&nr_hugepages_attr.attr,
 	&nr_overcommit_hugepages_attr.attr,
 	&free_hugepages_attr.attr,
 	&resv_hugepages_attr.attr,
 	&surplus_hugepages_attr.attr,
+	&demote_size_attr.attr,
+	&demote_attr.attr,
 #ifdef CONFIG_NUMA
 	&nr_hugepages_mempolicy_attr.attr,
 #endif
@@ -3508,6 +3625,8 @@ static struct attribute *per_node_hstate_attrs[] = {
 	&nr_hugepages_attr.attr,
 	&free_hugepages_attr.attr,
 	&surplus_hugepages_attr.attr,
+	&demote_size_attr.attr,
+	&demote_attr.attr,
 	NULL,
 };
 
-- 
2.31.1


  reply	other threads:[~2021-08-16 22:50 UTC|newest]

Thread overview: 39+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-08-16 22:49 [PATCH RESEND 0/8] hugetlb: add demote/split page functionality Mike Kravetz
2021-08-16 22:49 ` Mike Kravetz [this message]
2021-08-16 22:49 ` [PATCH 2/8] hugetlb: add HPageCma flag and code to free non-gigantic pages in CMA Mike Kravetz
2021-08-16 22:49 ` [PATCH 3/8] hugetlb: add demote bool to gigantic page routines Mike Kravetz
2021-08-16 22:49 ` [PATCH 4/8] hugetlb: add hugetlb demote page support Mike Kravetz
2021-08-16 22:49 ` [PATCH 5/8] hugetlb: document the demote sysfs interfaces Mike Kravetz
2021-08-16 23:28   ` Andrew Morton
2021-08-17  1:04     ` Mike Kravetz
2021-09-21 13:52   ` Aneesh Kumar K.V
2021-09-21 17:17     ` Mike Kravetz
2021-08-16 22:49 ` [PATCH 6/8] hugetlb: vmemmap optimizations when demoting hugetlb pages Mike Kravetz
2021-08-16 22:49 ` [PATCH 7/8] hugetlb: prepare destroy and prep routines for vmemmap optimized pages Mike Kravetz
2021-08-16 22:49 ` [PATCH 8/8] hugetlb: Optimized demote vmemmap optimizatized pages Mike Kravetz
2021-08-16 23:23 ` [PATCH RESEND 0/8] hugetlb: add demote/split page functionality Andrew Morton
2021-08-17  0:17   ` Mike Kravetz
2021-08-17  0:39     ` Andrew Morton
2021-08-17  0:58       ` Mike Kravetz
2021-08-16 23:27 ` Andrew Morton
2021-08-17  0:46   ` Mike Kravetz
2021-08-17  1:46     ` Andrew Morton
2021-08-17  7:30       ` David Hildenbrand
2021-08-17 16:19         ` Mike Kravetz
2021-08-17 18:49           ` David Hildenbrand
2021-08-24 22:08       ` Mike Kravetz
2021-08-27 17:22         ` Vlastimil Babka
2021-08-27 23:04           ` Mike Kravetz
2021-08-30 10:11             ` Vlastimil Babka
2021-09-02 18:17               ` Mike Kravetz
2021-09-06 14:40                 ` Vlastimil Babka
     [not found]                 ` <20210907085001.3773-1-hdanton@sina.com>
2021-09-08 21:00                   ` Mike Kravetz
2021-09-09 11:54                     ` Michal Hocko
2021-09-09 13:45                       ` Vlastimil Babka
2021-09-09 21:31                         ` Mike Kravetz
2021-09-10  8:20                         ` Michal Hocko
2021-09-11  0:11                           ` Mike Kravetz
2021-09-13 15:50                             ` Michal Hocko
2021-09-15 16:57                               ` Mike Kravetz
2021-09-17 20:44                                 ` Mike Kravetz
  -- strict thread matches above, loose matches on Subject: below --
2021-07-21 23:05 [PATCH " Mike Kravetz
2021-07-21 23:05 ` [PATCH 1/8] hugetlb: add demote hugetlb page sysfs interfaces Mike Kravetz

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210816224953.157796-2-mike.kravetz@oracle.com \
    --to=mike.kravetz@oracle.com \
    --cc=akpm@linux-foundation.org \
    --cc=david@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mhocko@suse.com \
    --cc=naoya.horiguchi@linux.dev \
    --cc=osalvador@suse.de \
    --cc=rientjes@google.com \
    --cc=songmuchun@bytedance.com \
    --cc=ziy@nvidia.com \
    --subject='Re: [PATCH 1/8] hugetlb: add demote hugetlb page sysfs interfaces' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).