LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
From: Christoph Lameter <clameter@sgi.com>
To: akpm@linux-foundation.org
Cc: Andrea Arcangeli <andrea@qumranet.com>
Cc: Robin Holt <holt@sgi.com>, Avi Kivity <avi@qumranet.com>,
	Izik Eidus <izike@qumranet.com>
Cc: kvm-devel@lists.sourceforge.net
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>, general@lists.openfabrics.org
Cc: Steve Wise <swise@opengridcomputing.com>
Cc: Roland Dreier <rdreier@cisco.com>
Cc: Kanoj Sarcar <kanojsarcar@yahoo.com>
Cc: steiner@sgi.com, linux-kernel@vger.kernel.org, linux-mm@kvack.org
Cc: daniel.blueman@quadrics.com
Subject: [patch 5/6] mmu_notifier: Support for drivers with revers maps (f.e. for XPmem)
Date: Thu, 14 Feb 2008 22:49:04 -0800	[thread overview]
Message-ID: <20080215064933.376635032@sgi.com> (raw)
In-Reply-To: <20080215064859.384203497@sgi.com>

[-- Attachment #1: mmu_rmap_support --]
[-- Type: text/plain, Size: 8290 bytes --]

These special additional callbacks are required because XPmem (and likely
other mechanisms) do use their own rmap (multiple processes on a series
of remote Linux instances may be accessing the memory of a process).
F.e. XPmem may have to send out notifications to remote Linux instances
and receive confirmation before a page can be freed.

So we handle this like an additional Linux reverse map that is walked after
the existing rmaps have been walked. We leave the walking to the driver that
is then able to use something else than a spinlock to walk its reverse
maps. So we can actually call the driver without holding spinlocks while
we hold the Pagelock.

However, we cannot determine the mm_struct that a page belongs to at
that point. The mm_struct can only be determined from the rmaps by the
device driver.

We add another pageflag (PageExternalRmap) that is set if a page has
been remotely mapped (f.e. by a process from another Linux instance).
We can then only perform the callbacks for pages that are actually in
remote use.

Rmap notifiers need an extra page bit and are only available
on 64 bit platforms. This functionality is not available on 32 bit!

A notifier that uses the reverse maps callbacks does not need to provide
the invalidate_page() method that is called when locks are held.

Signed-off-by: Christoph Lameter <clameter@sgi.com>

---
 include/linux/mmu_notifier.h |   65 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/page-flags.h   |   11 +++++++
 mm/mmu_notifier.c            |   34 ++++++++++++++++++++++
 mm/rmap.c                    |    9 +++++
 4 files changed, 119 insertions(+)

Index: linux-2.6/include/linux/page-flags.h
===================================================================
--- linux-2.6.orig/include/linux/page-flags.h	2008-02-14 20:58:17.000000000 -0800
+++ linux-2.6/include/linux/page-flags.h	2008-02-14 21:21:04.000000000 -0800
@@ -105,6 +105,7 @@
  * 64 bit  |           FIELDS             | ??????         FLAGS         |
  *         63                            32                              0
  */
+#define PG_external_rmap	30	/* Page has external rmap */
 #define PG_uncached		31	/* Page has been mapped as uncached */
 #endif
 
@@ -296,6 +297,16 @@ static inline void __ClearPageTail(struc
 #define SetPageUncached(page)	set_bit(PG_uncached, &(page)->flags)
 #define ClearPageUncached(page)	clear_bit(PG_uncached, &(page)->flags)
 
+#if defined(CONFIG_MMU_NOTIFIER) && defined(CONFIG_64BIT)
+#define PageExternalRmap(page)	test_bit(PG_external_rmap, &(page)->flags)
+#define SetPageExternalRmap(page) set_bit(PG_external_rmap, &(page)->flags)
+#define ClearPageExternalRmap(page) clear_bit(PG_external_rmap, \
+							&(page)->flags)
+#else
+#define ClearPageExternalRmap(page) do {} while (0)
+#define PageExternalRmap(page)	0
+#endif
+
 struct page;	/* forward declaration */
 
 extern void cancel_dirty_page(struct page *page, unsigned int account_size);
Index: linux-2.6/include/linux/mmu_notifier.h
===================================================================
--- linux-2.6.orig/include/linux/mmu_notifier.h	2008-02-14 21:20:55.000000000 -0800
+++ linux-2.6/include/linux/mmu_notifier.h	2008-02-14 21:21:04.000000000 -0800
@@ -23,6 +23,18 @@
  * 	where sleeping is allowed or in atomic contexts. A flag is passed
  * 	to indicate an atomic context.
  *
+ *
+ * 2. mmu_rmap_notifier
+ *
+ *	Callbacks for subsystems that provide their own rmaps. These
+ *	need to walk their own rmaps for a page. The invalidate_page
+ *	callback is outside of locks so that we are not in a strictly
+ *	atomic context (but we may be in a PF_MEMALLOC context if the
+ *	notifier is called from reclaim code) and are able to sleep.
+ *
+ *	Rmap notifiers need an extra page bit and are only available
+ *	on 64 bit platforms.
+ *
  *	Pages must be marked dirty if dirty bits are found to be set in
  *	the external ptes.
  */
@@ -96,6 +108,23 @@ struct mmu_notifier_ops {
 				 int atomic);
 };
 
+struct mmu_rmap_notifier_ops;
+
+struct mmu_rmap_notifier {
+	struct hlist_node hlist;
+	const struct mmu_rmap_notifier_ops *ops;
+};
+
+struct mmu_rmap_notifier_ops {
+	/*
+	 * Called with the page lock held after ptes are modified or removed
+	 * so that a subsystem with its own rmap's can remove remote ptes
+	 * mapping a page.
+	 */
+	void (*invalidate_page)(struct mmu_rmap_notifier *mrn,
+						struct page *page);
+};
+
 #ifdef CONFIG_MMU_NOTIFIER
 
 /*
@@ -146,6 +175,27 @@ static inline void mmu_notifier_head_ini
 		}							\
 	} while (0)
 
+extern void mmu_rmap_notifier_register(struct mmu_rmap_notifier *mrn);
+extern void mmu_rmap_notifier_unregister(struct mmu_rmap_notifier *mrn);
+
+/* Must hold PageLock */
+extern void mmu_rmap_export_page(struct page *page);
+
+extern struct hlist_head mmu_rmap_notifier_list;
+
+#define mmu_rmap_notifier(function, args...)				\
+	do {								\
+		struct mmu_rmap_notifier *__mrn;			\
+		struct hlist_node *__n;					\
+									\
+		rcu_read_lock();					\
+		hlist_for_each_entry_rcu(__mrn, __n,			\
+				&mmu_rmap_notifier_list, hlist)		\
+			if (__mrn->ops->function)			\
+				__mrn->ops->function(__mrn, args);	\
+		rcu_read_unlock();					\
+	} while (0);
+
 #else /* CONFIG_MMU_NOTIFIER */
 
 /*
@@ -164,6 +214,16 @@ static inline void mmu_notifier_head_ini
 		};							\
 	} while (0)
 
+#define mmu_rmap_notifier(function, args...)				\
+	do {								\
+		if (0) {						\
+			struct mmu_rmap_notifier *__mrn;		\
+									\
+			__mrn = (struct mmu_rmap_notifier *)(0x00ff);	\
+			__mrn->ops->function(__mrn, args);		\
+		}							\
+	} while (0);
+
 static inline void mmu_notifier_register(struct mmu_notifier *mn,
 						struct mm_struct *mm) {}
 static inline void mmu_notifier_unregister(struct mmu_notifier *mn,
@@ -177,6 +237,11 @@ static inline int mmu_notifier_age_page(
 
 static inline void mmu_notifier_head_init(struct mmu_notifier_head *mmh) {}
 
+static inline void mmu_rmap_notifier_register(struct mmu_rmap_notifier *mrn)
+									{}
+static inline void mmu_rmap_notifier_unregister(struct mmu_rmap_notifier *mrn)
+									{}
+
 #endif /* CONFIG_MMU_NOTIFIER */
 
 #endif /* _LINUX_MMU_NOTIFIER_H */
Index: linux-2.6/mm/mmu_notifier.c
===================================================================
--- linux-2.6.orig/mm/mmu_notifier.c	2008-02-14 21:17:51.000000000 -0800
+++ linux-2.6/mm/mmu_notifier.c	2008-02-14 21:21:04.000000000 -0800
@@ -74,3 +74,37 @@ void mmu_notifier_unregister(struct mmu_
 }
 EXPORT_SYMBOL_GPL(mmu_notifier_unregister);
 
+#ifdef CONFIG_64BIT
+static DEFINE_SPINLOCK(mmu_notifier_list_lock);
+HLIST_HEAD(mmu_rmap_notifier_list);
+
+void mmu_rmap_notifier_register(struct mmu_rmap_notifier *mrn)
+{
+	spin_lock(&mmu_notifier_list_lock);
+	hlist_add_head_rcu(&mrn->hlist, &mmu_rmap_notifier_list);
+	spin_unlock(&mmu_notifier_list_lock);
+}
+EXPORT_SYMBOL(mmu_rmap_notifier_register);
+
+void mmu_rmap_notifier_unregister(struct mmu_rmap_notifier *mrn)
+{
+	spin_lock(&mmu_notifier_list_lock);
+	hlist_del_rcu(&mrn->hlist);
+	spin_unlock(&mmu_notifier_list_lock);
+}
+EXPORT_SYMBOL(mmu_rmap_notifier_unregister);
+
+/*
+ * Export a page.
+ *
+ * Pagelock must be held.
+ * Must be called before a page is put on an external rmap.
+ */
+void mmu_rmap_export_page(struct page *page)
+{
+	BUG_ON(!PageLocked(page));
+	SetPageExternalRmap(page);
+}
+EXPORT_SYMBOL(mmu_rmap_export_page);
+
+#endif
Index: linux-2.6/mm/rmap.c
===================================================================
--- linux-2.6.orig/mm/rmap.c	2008-02-14 21:21:00.000000000 -0800
+++ linux-2.6/mm/rmap.c	2008-02-14 21:21:04.000000000 -0800
@@ -497,6 +497,10 @@ int page_mkclean(struct page *page)
 		struct address_space *mapping = page_mapping(page);
 		if (mapping) {
 			ret = page_mkclean_file(mapping, page);
+			if (unlikely(PageExternalRmap(page))) {
+				mmu_rmap_notifier(invalidate_page, page);
+				ClearPageExternalRmap(page);
+			}
 			if (page_test_dirty(page)) {
 				page_clear_dirty(page);
 				ret = 1;
@@ -1013,6 +1017,11 @@ int try_to_unmap(struct page *page, int 
 	else
 		ret = try_to_unmap_file(page, migration);
 
+	if (unlikely(PageExternalRmap(page))) {
+		mmu_rmap_notifier(invalidate_page, page);
+		ClearPageExternalRmap(page);
+	}
+
 	if (!page_mapped(page))
 		ret = SWAP_SUCCESS;
 	return ret;

-- 

  parent reply	other threads:[~2008-02-15  6:52 UTC|newest]

Thread overview: 91+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-02-15  6:48 [patch 0/6] MMU Notifiers V7 Christoph Lameter
2008-02-15  6:49 ` [patch 1/6] mmu_notifier: Core code Christoph Lameter
2008-02-16  3:37   ` Andrew Morton
2008-02-16  8:45     ` Avi Kivity
2008-02-16  8:56       ` Andrew Morton
2008-02-16  9:21         ` Avi Kivity
2008-02-16 10:41     ` Brice Goglin
2008-02-16 10:58       ` Andrew Morton
2008-02-16 19:31         ` Christoph Lameter
2008-02-16 19:21     ` Christoph Lameter
2008-02-17  3:01       ` Andrea Arcangeli
2008-02-17 12:24         ` Robin Holt
2008-02-17  5:04     ` Doug Maxey
2008-02-18 22:33   ` Roland Dreier
2008-02-15  6:49 ` [patch 2/6] mmu_notifier: Callbacks to invalidate address ranges Christoph Lameter
2008-02-16  3:37   ` Andrew Morton
2008-02-16 19:26     ` Christoph Lameter
2008-02-19  8:54   ` Nick Piggin
2008-02-19 13:34     ` Andrea Arcangeli
2008-02-27 22:23       ` Christoph Lameter
2008-02-27 23:57         ` Andrea Arcangeli
2008-02-19 23:08   ` Nick Piggin
2008-02-20  1:00     ` Andrea Arcangeli
2008-02-20  3:00       ` Robin Holt
2008-02-20  3:11         ` Nick Piggin
2008-02-20  3:19           ` Robin Holt
2008-02-27 22:39       ` Christoph Lameter
2008-02-28  0:38         ` Andrea Arcangeli
2008-02-27 22:35     ` Christoph Lameter
2008-02-27 22:42       ` Jack Steiner
2008-02-28  0:10       ` Christoph Lameter
2008-02-28  0:11       ` Andrea Arcangeli
2008-02-28  0:14         ` Christoph Lameter
2008-02-28  0:52           ` Andrea Arcangeli
2008-02-28  1:03             ` Christoph Lameter
2008-02-28  1:10               ` Andrea Arcangeli
2008-02-28 18:43                 ` Christoph Lameter
2008-02-29  0:55                   ` Andrea Arcangeli
2008-02-29  0:59                     ` Christoph Lameter
2008-02-29 13:13                       ` Andrea Arcangeli
2008-02-29 19:55                         ` Christoph Lameter
2008-02-29 20:17                           ` Andrea Arcangeli
2008-02-29 21:03                             ` Christoph Lameter
2008-02-29 21:23                               ` Andrea Arcangeli
2008-02-29 21:29                                 ` Christoph Lameter
2008-02-29 21:34                                 ` Christoph Lameter
2008-02-29 21:48                                   ` Andrea Arcangeli
2008-02-29 22:12                                     ` Christoph Lameter
2008-02-29 22:41                                       ` Andrea Arcangeli
2008-02-28 10:53             ` Robin Holt
2008-03-03  5:11       ` Nick Piggin
2008-03-03 19:28         ` Christoph Lameter
2008-03-03 19:50           ` Nick Piggin
2008-03-04 18:58             ` Christoph Lameter
2008-03-05  0:52               ` Nick Piggin
2008-02-15  6:49 ` [patch 3/6] mmu_notifier: invalidate_page callbacks Christoph Lameter
2008-02-16  3:37   ` Andrew Morton
2008-02-16 11:07     ` Andrea Arcangeli
2008-02-16 19:22     ` Christoph Lameter
2008-02-16 19:54       ` Avi Kivity
2008-02-19  8:46       ` Nick Piggin
2008-02-19 13:30         ` Andrea Arcangeli
2008-02-18  1:51     ` Nick Piggin
2008-02-15  6:49 ` [patch 4/6] mmu_notifier: Skeleton driver for a simple mmu_notifier Christoph Lameter
2008-02-15  6:49 ` Christoph Lameter [this message]
2008-02-16  3:37   ` [patch 5/6] mmu_notifier: Support for drivers with revers maps (f.e. for XPmem) Andrew Morton
2008-02-16 19:28     ` Christoph Lameter
2008-02-19 23:55   ` Nick Piggin
2008-02-20  3:12     ` Robin Holt
2008-02-20  3:51       ` Nick Piggin
2008-02-20  9:00         ` Robin Holt
2008-02-20  9:05           ` Robin Holt
2008-02-21  4:20           ` Nick Piggin
2008-02-21 10:58             ` Robin Holt
2008-02-26  6:11               ` Nick Piggin
2008-02-26  7:21                 ` [ofa-general] " Gleb Natapov
2008-02-26  8:52                   ` Nick Piggin
2008-02-26  9:38                     ` Gleb Natapov
2008-02-26  9:52                       ` KOSAKI Motohiro
2008-02-26 12:28                     ` Robin Holt
2008-02-26 12:29                 ` Robin Holt
2008-02-27 22:43     ` Christoph Lameter
2008-02-28  0:42       ` Andrea Arcangeli
2008-02-28  1:01         ` Christoph Lameter
2008-02-15  6:49 ` [patch 6/6] mmu_rmap_notifier: Skeleton for complex driver that uses its own rmaps Christoph Lameter
2008-02-16 10:48 ` [PATCH] KVM swapping with MMU Notifiers V7 Andrea Arcangeli
2008-02-16 11:08   ` Andrew Morton
2008-02-18 12:17     ` Andrea Arcangeli
2008-02-16 11:51   ` Robin Holt
2008-02-18 12:35     ` Andrea Arcangeli
  -- strict thread matches above, loose matches on Subject: below --
2008-02-08 22:06 [patch 0/6] MMU Notifiers V6 Christoph Lameter
2008-02-08 22:06 ` [patch 5/6] mmu_notifier: Support for drivers with revers maps (f.e. for XPmem) Christoph Lameter

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20080215064933.376635032@sgi.com \
    --to=clameter@sgi.com \
    --cc=akpm@linux-foundation.org \
    --cc=andrea@qumranet.com \
    --subject='Re: [patch 5/6] mmu_notifier: Support for drivers with revers maps (f.e. for XPmem)' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).