LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
From: Christoph Lameter <clameter@sgi.com>
To: akpm@linux-foundation.org
Cc: Andrea Arcangeli <andrea@qumranet.com>
Cc: Robin Holt <holt@sgi.com>, Avi Kivity <avi@qumranet.com>,
	Izik Eidus <izike@qumranet.com>
Cc: kvm-devel@lists.sourceforge.net
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>, general@lists.openfabrics.org
Cc: Steve Wise <swise@opengridcomputing.com>
Cc: Roland Dreier <rdreier@cisco.com>
Cc: Kanoj Sarcar <kanojsarcar@yahoo.com>
Cc: steiner@sgi.com, linux-kernel@vger.kernel.org, linux-mm@kvack.org
Cc: daniel.blueman@quadrics.com
Subject: [patch 6/6] mmu_rmap_notifier: Skeleton for complex driver that uses its own rmaps
Date: Thu, 14 Feb 2008 22:49:05 -0800	[thread overview]
Message-ID: <20080215064933.630179244@sgi.com> (raw)
In-Reply-To: <20080215064859.384203497@sgi.com>

[-- Attachment #1: mmu_rmap_skeleton --]
[-- Type: text/plain, Size: 8619 bytes --]

The skeleton for the rmap notifier leaves the invalidate_page method of
the mmu_notifier empty and hooks a new invalidate_page callback into the
global chain for mmu_rmap_notifiers.

There are seveal simplifications in here to avoid making this too complex.
The reverse maps need to consit of references to vma f.e.

Signed-off-by: Christoph Lameter <clameter@sgi.com>

---
 Documentation/mmu_notifier/skeleton_rmap.c |  311 +++++++++++++++++++++++++++++
 1 file changed, 311 insertions(+)

Index: linux-2.6/Documentation/mmu_notifier/skeleton_rmap.c
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6/Documentation/mmu_notifier/skeleton_rmap.c	2008-02-14 22:23:01.000000000 -0800
@@ -0,0 +1,311 @@
+#include <linux/mm.h>
+#include <linux/mmu_notifier.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/pagemap.h>
+
+/*
+ * Skeleton for an mmu notifier with rmap callbacks and sleeping during
+ * invalidate_page.
+ *
+ * (C) 2008 Silicon Graphics, Inc.
+ * 		Christoph Lameter <clameter@sgi.com>
+ *
+ * Note that the locking is fairly basic. One can add various optimizations
+ * here and there. There is a single lock for an address space which should be
+ * satisfactory for most cases. If not then the lock can be split like the
+ * pte_lock in Linux. It is most likely best to place the locks in the
+ * page table structure or into whatever the external mmu uses to
+ * track the mappings.
+ */
+
+struct my_mmu {
+	/* MMU notifier specific fields */
+	struct mmu_notifier notifier;
+	spinlock_t lock;	/* Protects counter and invidual zaps */
+	int invalidates;	/* Number of active range_invalidate */
+
+       /* Rmap support */
+       struct list_head list;	/* rmap list of my_mmu structs */
+       unsigned long base;
+};
+
+/*
+ * Called with m->lock held
+ */
+static void my_mmu_insert_page(struct my_mmu *m,
+		unsigned long address, unsigned long pfn)
+{
+	/* Must be provided */
+	printk(KERN_INFO "insert page %p address=%lx pfn=%ld\n",
+							m, address, pfn);
+}
+
+/*
+ * Called with m->lock held
+ */
+static void my_mmu_zap_range(struct my_mmu *m,
+	unsigned long start, unsigned long end, int atomic)
+{
+	/* Must be provided */
+	printk(KERN_INFO "zap range %p address=%lx-%lx atomic=%d\n",
+						m, start, end, atomic);
+}
+
+/*
+ * Called with m->lock held (optional but usually required to
+ * protect data structures of the driver).
+ */
+static void my_mmu_zap_page(struct my_mmu *m, unsigned long address)
+{
+	/* Must be provided */
+	printk(KERN_INFO "zap page %p address=%lx\n", m, address);
+}
+
+/*
+ * Increment and decrement of the number of range invalidates
+ */
+static inline void inc_active(struct my_mmu *m)
+{
+	spin_lock(&m->lock);
+	m->invalidates++;
+	spin_unlock(&m->lock);
+}
+
+static inline void dec_active(struct my_mmu *m)
+{
+	spin_lock(&m->lock);
+	m->invalidates--;
+	spin_unlock(&m->lock);
+}
+
+static void my_mmu_invalidate_range_begin(struct mmu_notifier *mn,
+	struct mm_struct *mm, unsigned long start, unsigned long end,
+	int atomic)
+{
+	struct my_mmu *m = container_of(mn, struct my_mmu, notifier);
+
+	inc_active(m);	/* Holds off new references */
+	my_mmu_zap_range(m, start, end, atomic);
+}
+
+static void my_mmu_invalidate_range_end(struct mmu_notifier *mn,
+	struct mm_struct *mm, unsigned long start, unsigned long end,
+	int atomic)
+{
+	struct my_mmu *m = container_of(mn, struct my_mmu, notifier);
+
+	dec_active(m);	/* Enables new references */
+}
+
+/*
+ * Populate a page.
+ *
+ * A return value of-EAGAIN means please retry this operation.
+ *
+ * Acuisition of mmap_sem can be omitted if the caller already holds
+ * the semaphore.
+ */
+struct page *my_mmu_populate_page(struct my_mmu *m,
+	struct vm_area_struct *vma,
+	unsigned long address, int write)
+{
+	struct mm_struct *mm = vma->vm_mm;
+	struct page *page;
+	int err;
+	int done = 0;
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *ptep, pte;
+	spinlock_t *ptl;
+
+	/* No need to do anything if a range invalidate is running */
+	if (m->invalidates)
+		return ERR_PTR(-EAGAIN);
+
+	down_read(&mm->mmap_sem);
+	do {
+		page = ERR_PTR(-EAGAIN);
+
+		if (m->invalidates)
+			break;
+
+		pgd = pgd_offset(mm, address);
+		if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
+			goto check;
+
+		pud = pud_offset(pgd, address);
+		if (pud_none(*pud) || unlikely(pud_bad(*pud)))
+			goto check;
+
+		pmd = pmd_offset(pud, address);
+		if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
+			goto check;
+
+		ptep = pte_offset_map_lock(mm, pmd, address, &ptl);
+		if (!ptep)
+			goto check;
+
+		pte = *ptep;
+		if (!pte_present(pte))
+			goto pte_unlock;
+		if (write && !pte_write(pte))
+			goto pte_unlock;
+
+		page = vm_normal_page(vma, address, pte);
+		if (page) {
+			done = 1;
+			/*
+			 * The m->lock is held to ensure that the count of
+			 * current invalidates stays constant.
+			 * invalidate_page() is held off by the pte lock.
+			 */
+			spin_lock(&m->lock);
+
+			if (!m->invalidates)
+				my_mmu_insert_page(m, address, page_to_pfn(page));
+			else
+				page = ERR_PTR(-EAGAIN);
+
+			spin_unlock(&m->lock);
+		}
+pte_unlock:
+		pte_unmap_unlock(ptep, ptl);
+check:
+
+		if (done)
+			break;
+
+		/*
+		 * Need to run the page fault handler to get the pte entry
+		 * setup right.
+		 */
+		err = get_user_pages(current, vma->vm_mm, address, 1,
+					write, 1, NULL, NULL);
+
+		if (err < 0) {
+			page = ERR_PTR(err);
+			break;
+		}
+
+	} while (!done);
+
+	up_read(&vma->vm_mm->mmap_sem);
+	return page;
+}
+
+/*
+ * All other threads accessing this mm_struct must have terminated by now.
+ */
+static void my_mmu_release(struct mmu_notifier *mn, struct mm_struct *mm)
+{
+	struct my_mmu *m = container_of(mn, struct my_mmu, notifier);
+
+	my_mmu_zap_range(m, 0, TASK_SIZE, 0);
+	/* No concurrent processes thus no worries about RCU */
+	list_del(&m->list);
+	kfree(m);
+	printk(KERN_INFO "MMU Notifier terminating\n");
+}
+
+static struct mmu_notifier_ops my_mmu_ops = {
+	my_mmu_release,
+	NULL,		/* No aging function */
+	NULL,		/* No atomic invalidate_page function */
+	my_mmu_invalidate_range_begin,
+	my_mmu_invalidate_range_end
+};
+
+/* Rmap specific fields */
+static LIST_HEAD(my_mmu_list);
+static struct rw_semaphore listlock;
+
+/*
+ * This function must be called to activate callbacks from a process
+ */
+int my_mmu_attach_to_process(struct mm_struct *mm)
+{
+	struct my_mmu *m = kzalloc(sizeof(struct my_mmu), GFP_KERNEL);
+
+	if (!m)
+		return -ENOMEM;
+
+	m->notifier.ops = &my_mmu_ops;
+	spin_lock_init(&m->lock);
+
+	/*
+	 * mmap_sem handling can be omitted if it is guaranteed that
+	 * the context from which my_mmu_attach_to_process is called
+	 * is already holding a writelock on mmap_sem.
+	 */
+	down_write(&mm->mmap_sem);
+	mmu_notifier_register(&m->notifier, mm);
+	up_write(&mm->mmap_sem);
+	down_write(&listlock);
+	list_add(&m->list, &my_mmu_list);
+	up_write(&listlock);
+
+	/*
+	 * RCU sync is expensive but necessary if we need to guarantee
+	 * that multiple threads running on other cpus have seen the
+	 * notifier changes.
+	 */
+	synchronize_rcu();
+	return 0;
+}
+
+
+static void my_sleeping_invalidate_page(struct my_mmu *m, unsigned long address)
+{
+	/* Must be provided */
+	spin_lock(&m->lock);	/* Only taken to ensure mmu data integrity */
+	my_mmu_zap_page(m, address);
+	spin_unlock(&m->lock);
+	printk(KERN_INFO "Sleeping invalidate_page %p address=%lx\n",
+                                                               m, address);
+}
+
+static unsigned long my_mmu_find_addr(struct my_mmu *m, struct page *page)
+{
+	/* Determine the address of a page in a mmu segment */
+	return -EFAULT;
+}
+
+/*
+ * A reference must be held on the page passed and the page passed
+ * must be locked. No spinlocks are held. invalidate_page() is held
+ * off by us holding the page lock.
+ */
+static void my_mmu_rmap_invalidate_page(struct mmu_rmap_notifier *mrn,
+							struct page *page)
+{
+	struct my_mmu *m;
+
+	BUG_ON(!PageLocked(page));
+	down_read(&listlock);
+	list_for_each_entry(m, &my_mmu_list, list) {
+		unsigned long address = my_mmu_find_addr(m, page);
+
+		if (address != -EFAULT)
+			my_sleeping_invalidate_page(m, address);
+	}
+	up_read(&listlock);
+}
+
+static struct mmu_rmap_notifier_ops my_mmu_rmap_ops = {
+	.invalidate_page = my_mmu_rmap_invalidate_page
+};
+
+static struct mmu_rmap_notifier my_mmu_rmap_notifier = {
+	.ops = &my_mmu_rmap_ops
+};
+
+static int __init my_mmu_init(void)
+{
+	mmu_rmap_notifier_register(&my_mmu_rmap_notifier);
+	return 0;
+}
+
+late_initcall(my_mmu_init);
+

-- 

  parent reply	other threads:[~2008-02-15  6:51 UTC|newest]

Thread overview: 91+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-02-15  6:48 [patch 0/6] MMU Notifiers V7 Christoph Lameter
2008-02-15  6:49 ` [patch 1/6] mmu_notifier: Core code Christoph Lameter
2008-02-16  3:37   ` Andrew Morton
2008-02-16  8:45     ` Avi Kivity
2008-02-16  8:56       ` Andrew Morton
2008-02-16  9:21         ` Avi Kivity
2008-02-16 10:41     ` Brice Goglin
2008-02-16 10:58       ` Andrew Morton
2008-02-16 19:31         ` Christoph Lameter
2008-02-16 19:21     ` Christoph Lameter
2008-02-17  3:01       ` Andrea Arcangeli
2008-02-17 12:24         ` Robin Holt
2008-02-17  5:04     ` Doug Maxey
2008-02-18 22:33   ` Roland Dreier
2008-02-15  6:49 ` [patch 2/6] mmu_notifier: Callbacks to invalidate address ranges Christoph Lameter
2008-02-16  3:37   ` Andrew Morton
2008-02-16 19:26     ` Christoph Lameter
2008-02-19  8:54   ` Nick Piggin
2008-02-19 13:34     ` Andrea Arcangeli
2008-02-27 22:23       ` Christoph Lameter
2008-02-27 23:57         ` Andrea Arcangeli
2008-02-19 23:08   ` Nick Piggin
2008-02-20  1:00     ` Andrea Arcangeli
2008-02-20  3:00       ` Robin Holt
2008-02-20  3:11         ` Nick Piggin
2008-02-20  3:19           ` Robin Holt
2008-02-27 22:39       ` Christoph Lameter
2008-02-28  0:38         ` Andrea Arcangeli
2008-02-27 22:35     ` Christoph Lameter
2008-02-27 22:42       ` Jack Steiner
2008-02-28  0:10       ` Christoph Lameter
2008-02-28  0:11       ` Andrea Arcangeli
2008-02-28  0:14         ` Christoph Lameter
2008-02-28  0:52           ` Andrea Arcangeli
2008-02-28  1:03             ` Christoph Lameter
2008-02-28  1:10               ` Andrea Arcangeli
2008-02-28 18:43                 ` Christoph Lameter
2008-02-29  0:55                   ` Andrea Arcangeli
2008-02-29  0:59                     ` Christoph Lameter
2008-02-29 13:13                       ` Andrea Arcangeli
2008-02-29 19:55                         ` Christoph Lameter
2008-02-29 20:17                           ` Andrea Arcangeli
2008-02-29 21:03                             ` Christoph Lameter
2008-02-29 21:23                               ` Andrea Arcangeli
2008-02-29 21:29                                 ` Christoph Lameter
2008-02-29 21:34                                 ` Christoph Lameter
2008-02-29 21:48                                   ` Andrea Arcangeli
2008-02-29 22:12                                     ` Christoph Lameter
2008-02-29 22:41                                       ` Andrea Arcangeli
2008-02-28 10:53             ` Robin Holt
2008-03-03  5:11       ` Nick Piggin
2008-03-03 19:28         ` Christoph Lameter
2008-03-03 19:50           ` Nick Piggin
2008-03-04 18:58             ` Christoph Lameter
2008-03-05  0:52               ` Nick Piggin
2008-02-15  6:49 ` [patch 3/6] mmu_notifier: invalidate_page callbacks Christoph Lameter
2008-02-16  3:37   ` Andrew Morton
2008-02-16 11:07     ` Andrea Arcangeli
2008-02-16 19:22     ` Christoph Lameter
2008-02-16 19:54       ` Avi Kivity
2008-02-19  8:46       ` Nick Piggin
2008-02-19 13:30         ` Andrea Arcangeli
2008-02-18  1:51     ` Nick Piggin
2008-02-15  6:49 ` [patch 4/6] mmu_notifier: Skeleton driver for a simple mmu_notifier Christoph Lameter
2008-02-15  6:49 ` [patch 5/6] mmu_notifier: Support for drivers with revers maps (f.e. for XPmem) Christoph Lameter
2008-02-16  3:37   ` Andrew Morton
2008-02-16 19:28     ` Christoph Lameter
2008-02-19 23:55   ` Nick Piggin
2008-02-20  3:12     ` Robin Holt
2008-02-20  3:51       ` Nick Piggin
2008-02-20  9:00         ` Robin Holt
2008-02-20  9:05           ` Robin Holt
2008-02-21  4:20           ` Nick Piggin
2008-02-21 10:58             ` Robin Holt
2008-02-26  6:11               ` Nick Piggin
2008-02-26  7:21                 ` [ofa-general] " Gleb Natapov
2008-02-26  8:52                   ` Nick Piggin
2008-02-26  9:38                     ` Gleb Natapov
2008-02-26  9:52                       ` KOSAKI Motohiro
2008-02-26 12:28                     ` Robin Holt
2008-02-26 12:29                 ` Robin Holt
2008-02-27 22:43     ` Christoph Lameter
2008-02-28  0:42       ` Andrea Arcangeli
2008-02-28  1:01         ` Christoph Lameter
2008-02-15  6:49 ` Christoph Lameter [this message]
2008-02-16 10:48 ` [PATCH] KVM swapping with MMU Notifiers V7 Andrea Arcangeli
2008-02-16 11:08   ` Andrew Morton
2008-02-18 12:17     ` Andrea Arcangeli
2008-02-16 11:51   ` Robin Holt
2008-02-18 12:35     ` Andrea Arcangeli
  -- strict thread matches above, loose matches on Subject: below --
2008-02-08 22:06 [patch 0/6] MMU Notifiers V6 Christoph Lameter
2008-02-08 22:06 ` [patch 6/6] mmu_rmap_notifier: Skeleton for complex driver that uses its own rmaps Christoph Lameter

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20080215064933.630179244@sgi.com \
    --to=clameter@sgi.com \
    --cc=akpm@linux-foundation.org \
    --cc=andrea@qumranet.com \
    --subject='Re: [patch 6/6] mmu_rmap_notifier: Skeleton for complex driver that uses its own rmaps' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).