LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
From: Christoph Lameter <clameter@sgi.com>
To: akpm@linux-foundation.org
Cc: Andrea Arcangeli <andrea@qumranet.com>
Cc: Robin Holt <holt@sgi.com>, Avi Kivity <avi@qumranet.com>,
	Izik Eidus <izike@qumranet.com>
Cc: kvm-devel@lists.sourceforge.net
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>, general@lists.openfabrics.org
Cc: Steve Wise <swise@opengridcomputing.com>
Cc: Roland Dreier <rdreier@cisco.com>
Cc: Kanoj Sarcar <kanojsarcar@yahoo.com>
Cc: steiner@sgi.com, linux-kernel@vger.kernel.org, linux-mm@kvack.org
Cc: daniel.blueman@quadrics.com
Subject: [patch 4/6] mmu_notifier: Skeleton driver for a simple mmu_notifier
Date: Thu, 14 Feb 2008 22:49:03 -0800	[thread overview]
Message-ID: <20080215064933.177587095@sgi.com> (raw)
In-Reply-To: <20080215064859.384203497@sgi.com>

[-- Attachment #1: mmu_skeleton --]
[-- Type: text/plain, Size: 7685 bytes --]

This is example code for a simple device driver interface to unmap
pages that were externally mapped.

Locking is simple through a single lock that is used to protect the
device drivers data structures as well as a counter that tracks the
active invalidates on a single address space.

The invalidation of extern ptes must be possible with code that does
not require sleeping. The lock is taken for all driver operations on
the mmu that the driver manages. Locking could be made more sophisticated
but I think this is going to be okay for most uses.

Signed-off-by: Christoph Lameter <clameter@sgi.com>

---
 Documentation/mmu_notifier/skeleton.c |  267 ++++++++++++++++++++++++++++++++++
 1 file changed, 267 insertions(+)

Index: linux-2.6/Documentation/mmu_notifier/skeleton.c
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6/Documentation/mmu_notifier/skeleton.c	2008-02-14 22:23:18.000000000 -0800
@@ -0,0 +1,267 @@
+#include <linux/mm.h>
+#include <linux/mmu_notifier.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/pagemap.h>
+
+/*
+ * Skeleton for an mmu notifier without rmap callbacks and no need to slepp
+ * during invalidate_page().
+ *
+ * (C) 2008 Silicon Graphics, Inc.
+ * 		Christoph Lameter <clameter@sgi.com>
+ *
+ * Note that the locking is fairly basic. One can add various optimizations
+ * here and there. There is a single lock for an address space which should be
+ * satisfactory for most cases. If not then the lock can be split like the
+ * pte_lock in Linux. It is most likely best to place the locks in the
+ * page table structure or into whatever the external mmu uses to
+ * track the mappings.
+ */
+
+struct my_mmu {
+	/* MMU notifier specific fields */
+	struct mmu_notifier notifier;
+	spinlock_t lock;	/* Protects counter and invidual zaps */
+	int invalidates;	/* Number of active range_invalidates */
+};
+
+/*
+ * Called with m->lock held
+ */
+static void my_mmu_insert_page(struct my_mmu *m,
+		unsigned long address, unsigned long pfn)
+{
+	/* Must be provided */
+	printk(KERN_INFO "insert page %p address=%lx pfn=%ld\n",
+							m, address, pfn);
+}
+
+/*
+ * Called with m->lock held (optional but usually required to
+ * protect data structures of the driver).
+ */
+static void my_mmu_zap_page(struct my_mmu *m, unsigned long address)
+{
+	/* Must be provided */
+	printk(KERN_INFO "zap page %p address=%lx\n", m, address);
+}
+
+/*
+ * Called with m->lock held
+ */
+static void my_mmu_zap_range(struct my_mmu *m,
+	unsigned long start, unsigned long end, int atomic)
+{
+	/* Must be provided */
+	printk(KERN_INFO "zap range %p address=%lx-%lx atomic=%d\n",
+						m, start, end, atomic);
+}
+
+/*
+ * Zap an individual page.
+ *
+ * Serialization with establishment of a new external pte occurs
+ * through the pte lock. The m->lock is taken to serialize access
+ * to the driver private data. If the driver does not need this
+ * serialization then the lock can be omitted.
+ */
+static void my_mmu_invalidate_page(struct mmu_notifier *mn,
+	struct mm_struct *mm, unsigned long address)
+{
+	struct my_mmu *m = container_of(mn, struct my_mmu, notifier);
+
+	spin_lock(&m->lock);
+	my_mmu_zap_page(m, address);
+	spin_unlock(&m->lock);
+}
+
+/*
+ * Increment and decrement of the number of range invalidates
+ */
+static inline void inc_active(struct my_mmu *m)
+{
+	spin_lock(&m->lock);
+	m->invalidates++;
+	spin_unlock(&m->lock);
+}
+
+static inline void dec_active(struct my_mmu *m)
+{
+	spin_lock(&m->lock);
+	m->invalidates--;
+	spin_unlock(&m->lock);
+}
+
+static void my_mmu_invalidate_range_begin(struct mmu_notifier *mn,
+	struct mm_struct *mm, unsigned long start, unsigned long end,
+	int atomic)
+{
+	struct my_mmu *m = container_of(mn, struct my_mmu, notifier);
+
+	inc_active(m);	/* Holds off new references */
+	my_mmu_zap_range(m, start, end, atomic);
+}
+
+static void my_mmu_invalidate_range_end(struct mmu_notifier *mn,
+	struct mm_struct *mm, unsigned long start, unsigned long end,
+	int atomic)
+{
+	struct my_mmu *m = container_of(mn, struct my_mmu, notifier);
+
+	dec_active(m);		/* Enables new references */
+}
+
+/*
+ * Populate a page.
+ *
+ * A return value of-EAGAIN means please retry this operation.
+ *
+ * Acquisition of mmap_sem can be omitted if the caller already holds
+ * the semaphore.
+ */
+struct page *my_mmu_populate_page(struct my_mmu *m,
+	struct vm_area_struct *vma,
+	unsigned long address, int atomic, int write)
+{
+	struct mm_struct *mm = vma->vm_mm;
+	struct page *page = ERR_PTR(-EAGAIN);
+	int err;
+	int done = 0;
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *ptep, pte;
+	spinlock_t *ptl;
+
+	/* No need to do anything if a range invalidate is running */
+	if (m->invalidates)
+		return ERR_PTR(-EAGAIN);
+
+	if (atomic) {
+		if (!down_read_trylock(&mm->mmap_sem))
+			return ERR_PTR(-EAGAIN);
+	} else
+		down_read(&mm->mmap_sem);
+
+	do {
+		page = ERR_PTR(-EAGAIN);
+
+		if (m->invalidates)
+			break;
+
+		pgd = pgd_offset(mm, address);
+		if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
+			goto check;
+
+		pud = pud_offset(pgd, address);
+		if (pud_none(*pud) || unlikely(pud_bad(*pud)))
+			goto check;
+
+		pmd = pmd_offset(pud, address);
+		if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
+			goto check;
+
+		ptep = pte_offset_map_lock(mm, pmd, address, &ptl);
+		if (!ptep)
+			goto check;
+
+		pte = *ptep;
+		if (!pte_present(pte))
+			goto pte_unlock;
+		if (write && !pte_write(pte))
+			goto pte_unlock;
+
+		page = vm_normal_page(vma, address, pte);
+		if (page) {
+			done = 1;
+			/*
+			 * The m->lock is held to ensure that the count of
+			 * current invalidates stays constant.
+			 * invalidate_page() is held off by the pte lock.
+			 */
+			spin_lock(&m->lock);
+
+			if (!m->invalidates)
+				my_mmu_insert_page(m, address, page_to_pfn(page));
+			else
+				page = ERR_PTR(-EAGAIN);
+
+			spin_unlock(&m->lock);
+		}
+pte_unlock:
+		pte_unmap_unlock(ptep, ptl);
+check:
+
+		if (done || atomic)
+			break;
+
+		/*
+		 * Need to run the page fault handler to get the pte entry
+		 * setup right.
+		 */
+		err = get_user_pages(current, vma->vm_mm, address, 1,
+					write, 1, NULL, NULL);
+
+		if (err < 0) {
+			page = ERR_PTR(err);
+			break;
+		}
+
+	} while (!done);
+
+	up_read(&vma->vm_mm->mmap_sem);
+	return page;
+}
+
+/*
+ * All other threads accessing this mm_struct must have terminated by now.
+ */
+static void my_mmu_release(struct mmu_notifier *mn, struct mm_struct *mm)
+{
+	struct my_mmu *m = container_of(mn, struct my_mmu, notifier);
+
+	my_mmu_zap_range(m, 0, TASK_SIZE, 0);
+	kfree(m);
+	printk(KERN_INFO "MMU Notifier detaching\n");
+}
+
+static struct mmu_notifier_ops my_mmu_ops = {
+	my_mmu_release,
+	NULL,			/* No aging function */
+	my_mmu_invalidate_page,
+	my_mmu_invalidate_range_begin,
+	my_mmu_invalidate_range_end
+};
+
+/*
+ * This function must be called to activate callbacks from a process
+ */
+int my_mmu_attach_to_process(struct mm_struct *mm)
+{
+	struct my_mmu *m = kzalloc(sizeof(struct my_mmu), GFP_KERNEL);
+
+	if (!m)
+		return -ENOMEM;
+
+	m->notifier.ops = &my_mmu_ops;
+	spin_lock_init(&m->lock);
+
+	/*
+	 * mmap_sem handling can be omitted if it is guaranteed that
+	 * the context from which my_mmu_attach_to_process is called
+	 * is already holding a writelock on mmap_sem.
+	 */
+	down_write(&mm->mmap_sem);
+	mmu_notifier_register(&m->notifier, mm);
+	up_write(&mm->mmap_sem);
+
+	/*
+	 * RCU sync is expensive but necessary if we need to guarantee
+	 * that multiple threads running on other cpus have seen the
+	 * notifier changes.
+	 */
+	synchronize_rcu();
+	return 0;
+}
+

-- 

  parent reply	other threads:[~2008-02-15  6:50 UTC|newest]

Thread overview: 91+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-02-15  6:48 [patch 0/6] MMU Notifiers V7 Christoph Lameter
2008-02-15  6:49 ` [patch 1/6] mmu_notifier: Core code Christoph Lameter
2008-02-16  3:37   ` Andrew Morton
2008-02-16  8:45     ` Avi Kivity
2008-02-16  8:56       ` Andrew Morton
2008-02-16  9:21         ` Avi Kivity
2008-02-16 10:41     ` Brice Goglin
2008-02-16 10:58       ` Andrew Morton
2008-02-16 19:31         ` Christoph Lameter
2008-02-16 19:21     ` Christoph Lameter
2008-02-17  3:01       ` Andrea Arcangeli
2008-02-17 12:24         ` Robin Holt
2008-02-17  5:04     ` Doug Maxey
2008-02-18 22:33   ` Roland Dreier
2008-02-15  6:49 ` [patch 2/6] mmu_notifier: Callbacks to invalidate address ranges Christoph Lameter
2008-02-16  3:37   ` Andrew Morton
2008-02-16 19:26     ` Christoph Lameter
2008-02-19  8:54   ` Nick Piggin
2008-02-19 13:34     ` Andrea Arcangeli
2008-02-27 22:23       ` Christoph Lameter
2008-02-27 23:57         ` Andrea Arcangeli
2008-02-19 23:08   ` Nick Piggin
2008-02-20  1:00     ` Andrea Arcangeli
2008-02-20  3:00       ` Robin Holt
2008-02-20  3:11         ` Nick Piggin
2008-02-20  3:19           ` Robin Holt
2008-02-27 22:39       ` Christoph Lameter
2008-02-28  0:38         ` Andrea Arcangeli
2008-02-27 22:35     ` Christoph Lameter
2008-02-27 22:42       ` Jack Steiner
2008-02-28  0:10       ` Christoph Lameter
2008-02-28  0:11       ` Andrea Arcangeli
2008-02-28  0:14         ` Christoph Lameter
2008-02-28  0:52           ` Andrea Arcangeli
2008-02-28  1:03             ` Christoph Lameter
2008-02-28  1:10               ` Andrea Arcangeli
2008-02-28 18:43                 ` Christoph Lameter
2008-02-29  0:55                   ` Andrea Arcangeli
2008-02-29  0:59                     ` Christoph Lameter
2008-02-29 13:13                       ` Andrea Arcangeli
2008-02-29 19:55                         ` Christoph Lameter
2008-02-29 20:17                           ` Andrea Arcangeli
2008-02-29 21:03                             ` Christoph Lameter
2008-02-29 21:23                               ` Andrea Arcangeli
2008-02-29 21:29                                 ` Christoph Lameter
2008-02-29 21:34                                 ` Christoph Lameter
2008-02-29 21:48                                   ` Andrea Arcangeli
2008-02-29 22:12                                     ` Christoph Lameter
2008-02-29 22:41                                       ` Andrea Arcangeli
2008-02-28 10:53             ` Robin Holt
2008-03-03  5:11       ` Nick Piggin
2008-03-03 19:28         ` Christoph Lameter
2008-03-03 19:50           ` Nick Piggin
2008-03-04 18:58             ` Christoph Lameter
2008-03-05  0:52               ` Nick Piggin
2008-02-15  6:49 ` [patch 3/6] mmu_notifier: invalidate_page callbacks Christoph Lameter
2008-02-16  3:37   ` Andrew Morton
2008-02-16 11:07     ` Andrea Arcangeli
2008-02-16 19:22     ` Christoph Lameter
2008-02-16 19:54       ` Avi Kivity
2008-02-19  8:46       ` Nick Piggin
2008-02-19 13:30         ` Andrea Arcangeli
2008-02-18  1:51     ` Nick Piggin
2008-02-15  6:49 ` Christoph Lameter [this message]
2008-02-15  6:49 ` [patch 5/6] mmu_notifier: Support for drivers with revers maps (f.e. for XPmem) Christoph Lameter
2008-02-16  3:37   ` Andrew Morton
2008-02-16 19:28     ` Christoph Lameter
2008-02-19 23:55   ` Nick Piggin
2008-02-20  3:12     ` Robin Holt
2008-02-20  3:51       ` Nick Piggin
2008-02-20  9:00         ` Robin Holt
2008-02-20  9:05           ` Robin Holt
2008-02-21  4:20           ` Nick Piggin
2008-02-21 10:58             ` Robin Holt
2008-02-26  6:11               ` Nick Piggin
2008-02-26  7:21                 ` [ofa-general] " Gleb Natapov
2008-02-26  8:52                   ` Nick Piggin
2008-02-26  9:38                     ` Gleb Natapov
2008-02-26  9:52                       ` KOSAKI Motohiro
2008-02-26 12:28                     ` Robin Holt
2008-02-26 12:29                 ` Robin Holt
2008-02-27 22:43     ` Christoph Lameter
2008-02-28  0:42       ` Andrea Arcangeli
2008-02-28  1:01         ` Christoph Lameter
2008-02-15  6:49 ` [patch 6/6] mmu_rmap_notifier: Skeleton for complex driver that uses its own rmaps Christoph Lameter
2008-02-16 10:48 ` [PATCH] KVM swapping with MMU Notifiers V7 Andrea Arcangeli
2008-02-16 11:08   ` Andrew Morton
2008-02-18 12:17     ` Andrea Arcangeli
2008-02-16 11:51   ` Robin Holt
2008-02-18 12:35     ` Andrea Arcangeli
  -- strict thread matches above, loose matches on Subject: below --
2008-02-08 22:06 [patch 0/6] MMU Notifiers V6 Christoph Lameter
2008-02-08 22:06 ` [patch 4/6] mmu_notifier: Skeleton driver for a simple mmu_notifier Christoph Lameter

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20080215064933.177587095@sgi.com \
    --to=clameter@sgi.com \
    --cc=akpm@linux-foundation.org \
    --cc=andrea@qumranet.com \
    --subject='Re: [patch 4/6] mmu_notifier: Skeleton driver for a simple mmu_notifier' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).