LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
To: Miklos Szeredi <miklos@szeredi.hu>
Cc: salikhmetov@gmail.com, linux-mm@kvack.org, jakob@unthought.net,
	linux-kernel@vger.kernel.org, valdis.kletnieks@vt.edu,
	riel@redhat.com, ksm@42.dk, staubach@redhat.com,
	jesper.juhl@gmail.com, torvalds@linux-foundation.org,
	akpm@linux-foundation.org, protasnb@gmail.com,
	r.e.wolff@bitwizard.nl, hidave.darkstar@gmail.com,
	hch@infradead.org
Subject: Re: [PATCH -v6 2/2] Updating ctime and mtime for memory-mapped files
Date: Fri, 18 Jan 2008 12:00:50 +0100	[thread overview]
Message-ID: <1200654050.5920.14.camel@twins> (raw)
In-Reply-To: <E1JFobo-00009i-Dk@pomaz-ex.szeredi.hu>


On Fri, 2008-01-18 at 11:38 +0100, Miklos Szeredi wrote:
> > On Fri, 2008-01-18 at 10:51 +0100, Miklos Szeredi wrote:
> > 
> > > > diff --git a/mm/msync.c b/mm/msync.c
> > > > index a4de868..a49af28 100644
> > > > --- a/mm/msync.c
> > > > +++ b/mm/msync.c
> > > > @@ -13,11 +13,33 @@
> > > >  #include <linux/syscalls.h>
> > > >  
> > > >  /*
> > > > + * Scan the PTEs for pages belonging to the VMA and mark them read-only.
> > > > + * It will force a pagefault on the next write access.
> > > > + */
> > > > +static void vma_wrprotect(struct vm_area_struct *vma)
> > > > +{
> > > > +	unsigned long addr;
> > > > +
> > > > +	for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE) {
> > > > +		spinlock_t *ptl;
> > > > +		pgd_t *pgd = pgd_offset(vma->vm_mm, addr);
> > > > +		pud_t *pud = pud_offset(pgd, addr);
> > > > +		pmd_t *pmd = pmd_offset(pud, addr);
> > > > +		pte_t *pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
> > > > +
> > > > +		if (pte_dirty(*pte) && pte_write(*pte))
> > > > +			*pte = pte_wrprotect(*pte);
> > > > +		pte_unmap_unlock(pte, ptl);
> > > > +	}
> > > > +}
> > > 
> > > What about ram based filesystems?  They don't start out with read-only
> > > pte's, so I think they don't want them read-protected now either.
> > > Unless this is essential for correct mtime/ctime accounting on these
> > > filesystems (I don't think it really is).  But then the mapping should
> > > start out read-only as well, otherwise the time update will only work
> > > after an msync(MS_ASYNC).
> > 
> > page_mkclean() has all the needed logic for this, it also walks the rmap
> > and cleans out all other users, which I think is needed too for
> > consistencies sake:
> > 
> > Process A			Process B
> > 
> > mmap(foo.txt)			mmap(foo.txt)
> > 
> > dirty page
> > 				dirty page
> > 
> > msync(MS_ASYNC)
> > 
> > 				dirty page
> > 
> > msync(MS_ASYNC) <--- now what?!

how about:

diff --git a/mm/msync.c b/mm/msync.c
index 144a757..a1b3fc6 100644
--- a/mm/msync.c
+++ b/mm/msync.c
@@ -14,6 +14,122 @@
 #include <linux/syscalls.h>
 #include <linux/sched.h>
 
+unsigned long masync_pte_range(struct vm_area_struct *vma, pmd_t *pdm,
+		unsigned long addr, unsigned long end)
+{
+	pte_t *pte;
+	spinlock_t *ptl;
+
+	pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
+	arch_enter_lazy_mmu_mode();
+	do {
+		pte_t ptent = *pte;
+
+		if (pte_none(ptent))
+			continue;
+
+		if (!pte_present(ptent))
+			continue;
+
+		if (pte_dirty(ptent) && pte_write(ptent)) {
+			flush_cache_page(vma, addr, pte_pfn(ptent));
+			ptent = ptep_clear_flush(vma, addr, pte);
+			ptent = pte_wrprotect(ptent);
+			set_pte_at(vma->vm_mnm, addr, pte, ptent);
+		}
+	} while (pte++, addr += PAGE_SIZE, addr != end);
+	arch_leave_lazy_mmu_mode();
+	pte_unmap_unlock(pte - 1, ptl);
+
+	return addr;
+}
+
+unsigned long masync_pmd_range(struct vm_area_struct *vma, pud_t *pud,
+		unsigned long addr, unsigned long end)
+{
+	pmd_t *pmd;
+	unsigned long next;
+
+	pmd = pmd_offset(pud, addr);
+	do {
+		next = pmd_addr_end(addr, end);
+		if (pmd_none_or_clear_bad(pmd))
+			continue;
+		next = masync_pte_range(vma, pmd, addr, next);
+	} while (pmd++, addr = next, addr != end);
+
+	return addr;
+}
+
+unsigned long masync_pud_range(struct vm_area_struct *vma, pgd_t *pgd,
+	       	unsigned long addr, unsigned long end)
+{
+	pud_t *pud;
+	unsigned long next;
+
+	pud = pud_offset(pgd, addr);
+	do {
+		next = pud_addr_end(addr, end);
+		if (pud_none_or_clear_bad(pud))
+			continue;
+		next = masync_pmd_range(vma, pud, addr, next);
+	} while (pud++, addr = next, addr != end);
+
+	return addr;
+}
+
+unsigned long masync_pgd_range()
+{
+	pgd_t *pgd;
+	unsigned long next;
+
+	pgd = pgd_offset(vma->vm_mm, addr);
+	do {
+		next = pgd_addr_end(addr, end);
+		if (pgd_none_of_clear_bad(pgd))
+			continue;
+		next = masync_pud_range(vma, pgd, addr, next);
+	} while (pgd++, addr = next, addr != end);
+
+	return addr;
+}
+
+int masync_vma_one(struct vm_area_struct *vma,
+		unsigned long start, unsigned long end)
+{
+	if (start < vma->vm_start)
+		start = vma->vm_start;
+
+	if (end > vma->vm_end)
+		end = vma->vm_end;
+
+	masync_pgd_range(vma, start, end);
+
+	return 0;
+}
+
+int masync_vma(struct vm_area_struct *vma, 
+		unsigned long start, unsigned long end)
+{
+	struct address_space *mapping;
+	struct vm_area_struct *vma_iter;
+
+	if (!(vma->vm_flags & VM_SHARED))
+		return 0;
+
+	mapping = vma->vm_file->f_mapping;
+
+	if (!mapping_cap_account_dirty(mapping))
+		return 0;
+
+	spin_lock(&mapping->i_mmap_lock);
+	vma_prio_tree_foreach(vma_iter, &iter, &mapping->i_mmap, start, end)
+		masync_vma_one(vma_iter, start, end);
+	spin_unlock(&mapping->i_mmap_lock);
+
+	return 0;
+}
+
 /*
  * MS_SYNC syncs the entire file - including mappings.
  *




  reply	other threads:[~2008-01-18 11:01 UTC|newest]

Thread overview: 43+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-01-17 22:31 [PATCH -v6 0/2] Fixing the issue with memory-mapped file times Anton Salikhmetov
2008-01-17 22:31 ` [PATCH -v6 1/2] Massive code cleanup of sys_msync() Anton Salikhmetov
2008-01-18  9:33   ` Miklos Szeredi
2008-01-18 10:30     ` Anton Salikhmetov
2008-01-17 22:31 ` [PATCH -v6 2/2] Updating ctime and mtime for memory-mapped files Anton Salikhmetov
2008-01-18  9:51   ` Miklos Szeredi
2008-01-18 10:15     ` Peter Zijlstra
2008-01-18 10:25       ` Peter Zijlstra
2008-01-18 10:39         ` Anton Salikhmetov
2008-01-18 17:58         ` Linus Torvalds
2008-01-18 18:11           ` Miklos Szeredi
2008-01-18 18:28             ` Rik van Riel
2008-01-18 18:51               ` Miklos Szeredi
2008-01-18 18:43             ` Linus Torvalds
2008-01-18 18:57               ` Miklos Szeredi
2008-01-18 19:08                 ` Linus Torvalds
2008-01-18 19:22                   ` Miklos Szeredi
2008-01-18 19:35                     ` Linus Torvalds
2008-01-18 19:58                       ` Anton Salikhmetov
2008-01-18 20:22                         ` Linus Torvalds
2008-01-18 21:03                           ` Anton Salikhmetov
2008-01-18 21:27                             ` Linus Torvalds
2008-01-18 22:04                               ` Anton Salikhmetov
2008-01-18 22:21                                 ` Linus Torvalds
2008-01-18 22:35                                   ` Anton Salikhmetov
2008-01-18 22:32                       ` Ingo Oeser
2008-01-18 22:47                         ` Linus Torvalds
2008-01-18 22:54                           ` Rik van Riel
2008-01-19  0:50                             ` Matt Mackall
2008-01-19  4:25                               ` Rik van Riel
2008-01-19 10:22                               ` Miklos Szeredi
2008-01-19 15:49                                 ` Matt Mackall
2008-01-21 14:25                           ` Peter Staubach
2008-01-21 14:36                             ` Anton Salikhmetov
2008-01-18 10:38       ` Miklos Szeredi
2008-01-18 11:00         ` Peter Zijlstra [this message]
2008-01-18 11:17           ` Miklos Szeredi
2008-01-18 11:23             ` Peter Zijlstra
2008-01-18 11:36               ` Miklos Szeredi
2008-01-18  9:40 ` [PATCH -v6 0/2] Fixing the issue with memory-mapped file times Miklos Szeredi
2008-01-18 10:31   ` Anton Salikhmetov
2008-01-18 19:48   ` Anton Salikhmetov
2008-01-19 10:45     ` Miklos Szeredi

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1200654050.5920.14.camel@twins \
    --to=a.p.zijlstra@chello.nl \
    --cc=akpm@linux-foundation.org \
    --cc=hch@infradead.org \
    --cc=hidave.darkstar@gmail.com \
    --cc=jakob@unthought.net \
    --cc=jesper.juhl@gmail.com \
    --cc=ksm@42.dk \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=miklos@szeredi.hu \
    --cc=protasnb@gmail.com \
    --cc=r.e.wolff@bitwizard.nl \
    --cc=riel@redhat.com \
    --cc=salikhmetov@gmail.com \
    --cc=staubach@redhat.com \
    --cc=torvalds@linux-foundation.org \
    --cc=valdis.kletnieks@vt.edu \
    --subject='Re: [PATCH -v6 2/2] Updating ctime and mtime for memory-mapped files' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).