LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
From: Ian Kent <raven@themaw.net>
To: Olivier Galibert <galibert@pobox.com>
Cc: "Hack inc." <linux-kernel@vger.kernel.org>, autofs@linux.kernel.org
Subject: [PATCH 1/2] Re: [autofs] Bad race condition in the new autofs protocol somewhere
Date: Mon, 12 Feb 2007 15:43:14 +0900	[thread overview]
Message-ID: <1171262594.18376.13.camel@raven.themaw.net> (raw)
In-Reply-To: <1170901990.3383.18.camel@raven.themaw.net>

On Thu, 2007-02-08 at 11:33 +0900, Ian Kent wrote:
> On Wed, 2007-02-07 at 19:18 +0100, Olivier Galibert wrote:
> > On Thu, Feb 08, 2007 at 03:07:41AM +0900, Ian Kent wrote:
> > > It may be better to update to a later kernel so I don't have to port the
> > > patch to several different kernels. Is that possible?
> > 
> > Sure, 2.6.20 or -git?
> 
> 2.6.20 has all the patches I've proposed so far except for the one we're
> working on so that would be best for me.
> 
> Seems there may still be a problem with the patch so I'll let you know
> what's happening as soon as I can.

I think I'm just about done.

Could you try using the two patches here against 2.6.20 please:

Ian

---

--- linux-2.6.20/fs/autofs4/autofs_i.h.lookup-expire-race	2007-02-05 03:44:54.000000000 +0900
+++ linux-2.6.20/fs/autofs4/autofs_i.h	2007-02-12 12:15:17.000000000 +0900
@@ -52,6 +52,8 @@ struct autofs_info {
 
 	int		flags;
 
+	struct list_head rehash;
+
 	struct autofs_sb_info *sbi;
 	unsigned long last_used;
 	atomic_t count;
@@ -110,6 +112,8 @@ struct autofs_sb_info {
 	struct mutex wq_mutex;
 	spinlock_t fs_lock;
 	struct autofs_wait_queue *queues; /* Wait queue pointer */
+	spinlock_t rehash_lock;
+	struct list_head rehash_list;
 };
 
 static inline struct autofs_sb_info *autofs4_sbi(struct super_block *sb)
--- linux-2.6.20/fs/autofs4/root.c.lookup-expire-race	2007-02-05 03:44:54.000000000 +0900
+++ linux-2.6.20/fs/autofs4/root.c	2007-02-12 12:14:51.000000000 +0900
@@ -263,7 +263,7 @@ static int try_to_fill_dentry(struct den
 		 */
 		status = d_invalidate(dentry);
 		if (status != -EBUSY)
-			return -ENOENT;
+			return -EAGAIN;
 	}
 
 	DPRINTK("dentry=%p %.*s ino=%p",
@@ -413,7 +413,16 @@ static int autofs4_revalidate(struct den
 		 */
 		status = try_to_fill_dentry(dentry, flags);
 		if (status == 0)
-				return 1;
+			return 1;
+
+		/*
+		 * A status of EAGAIN here means that the dentry has gone
+		 * away while waiting for an expire to complete. If we are
+		 * racing with expire lookup will wait for it so this must
+		 * be a revalidate and we need to send it to lookup.
+		 */
+		if (status == -EAGAIN)
+			return 0;
 
 		return status;
 	}
@@ -459,9 +468,18 @@ void autofs4_dentry_release(struct dentr
 	de->d_fsdata = NULL;
 
 	if (inf) {
+		struct autofs_sb_info *sbi = autofs4_sbi(de->d_sb);
+
 		inf->dentry = NULL;
 		inf->inode = NULL;
 
+		if (sbi) {
+			spin_lock(&sbi->rehash_lock);
+			if (!list_empty(&inf->rehash))
+				list_del(&inf->rehash);
+			spin_unlock(&sbi->rehash_lock);
+		}
+
 		autofs4_free_ino(inf);
 	}
 }
@@ -478,10 +496,80 @@ static struct dentry_operations autofs4_
 	.d_release	= autofs4_dentry_release,
 };
 
+static struct dentry *autofs4_lookup_unhashed(struct autofs_sb_info *sbi, struct dentry *parent, struct qstr *name)
+{
+	unsigned int len = name->len;
+	unsigned int hash = name->hash;
+	const unsigned char *str = name->name;
+	struct list_head *p, *head;
+
+	spin_lock(&dcache_lock);
+	spin_lock(&sbi->rehash_lock);
+	head = &sbi->rehash_list;
+	list_for_each(p, head) {
+		struct autofs_info *ino;
+		struct dentry *dentry;
+		struct qstr *qstr;
+
+		ino = list_entry(p, struct autofs_info, rehash);
+		dentry = ino->dentry;
+
+		spin_lock(&dentry->d_lock);
+
+		/* Bad luck, we've already been dentry_iput */
+		if (!dentry->d_inode)
+			goto next;
+
+		qstr = &dentry->d_name;
+
+		if (dentry->d_name.hash != hash)
+			goto next;
+		if (dentry->d_parent != parent)
+			goto next;
+
+		if (qstr->len != len)
+			goto next;
+		if (memcmp(qstr->name, str, len))
+			goto next;
+
+		if (d_unhashed(dentry)) {
+			struct autofs_info *ino = autofs4_dentry_ino(dentry);
+			struct inode *inode = dentry->d_inode;
+
+			list_del_init(&ino->rehash);
+			dget(dentry);
+			/*
+			 * Make the rehashed dentry negative so the VFS
+			 * behaves as it should.
+			 */
+			if (inode) {
+				dentry->d_inode = NULL;
+				list_del_init(&dentry->d_alias);
+				spin_unlock(&dentry->d_lock);
+				spin_unlock(&sbi->rehash_lock);
+				spin_unlock(&dcache_lock);
+				iput(inode);
+				return dentry;
+			}
+			spin_unlock(&dentry->d_lock);
+			spin_unlock(&sbi->rehash_lock);
+			spin_unlock(&dcache_lock);
+			return dentry;
+		}
+next:
+		spin_unlock(&dentry->d_lock);
+	}
+	spin_unlock(&sbi->rehash_lock);
+	spin_unlock(&dcache_lock);
+
+	return NULL;
+}
+
 /* Lookups in the root directory */
 static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
 {
 	struct autofs_sb_info *sbi;
+	struct dentry *unhashed;
 	int oz_mode;
 
 	DPRINTK("name = %.*s",
@@ -497,25 +585,46 @@ static struct dentry *autofs4_lookup(str
 	DPRINTK("pid = %u, pgrp = %u, catatonic = %d, oz_mode = %d",
 		 current->pid, process_group(current), sbi->catatonic, oz_mode);
 
-	/*
-	 * Mark the dentry incomplete, but add it. This is needed so
-	 * that the VFS layer knows about the dentry, and we can count
-	 * on catching any lookups through the revalidate.
-	 *
-	 * Let all the hard work be done by the revalidate function that
-	 * needs to be able to do this anyway..
-	 *
-	 * We need to do this before we release the directory semaphore.
-	 */
-	dentry->d_op = &autofs4_root_dentry_operations;
+	unhashed = autofs4_lookup_unhashed(sbi, dentry->d_parent, &dentry->d_name);
+	if (!unhashed) {
+		/*
+		 * Mark the dentry incomplete, but add it. This is needed so
+		 * that the VFS layer knows about the dentry, and we can count
+		 * on catching any lookups through the revalidate.
+		 *
+		 * Let all the hard work be done by the revalidate function that
+		 * needs to be able to do this anyway..
+		 *
+		 * We need to do this before we release the directory semaphore.
+		 */
+		dentry->d_op = &autofs4_root_dentry_operations;
+
+		dentry->d_fsdata = NULL;
+		d_add(dentry, NULL);
+	} else {
+		struct autofs_info *ino = autofs4_dentry_ino(unhashed);
+		DPRINTK("rehash %p with %p", dentry, unhashed);
+		/*
+		 * If we are racing with expire the request might not
+		 * be quite complete but the directory has been removed
+		 * so it must have been successful, so just wait for it.
+		 */
+		if (ino && (ino->flags & AUTOFS_INF_EXPIRING)) {
+			DPRINTK("wait for incomplete expire %p name=%.*s",
+				unhashed, unhashed->d_name.len,
+				unhashed->d_name.name);
+			autofs4_wait(sbi, unhashed, NFY_NONE);
+			DPRINTK("request completed");
+		}
+		d_rehash(unhashed);
+		dentry = unhashed;
+	}
 
 	if (!oz_mode) {
 		spin_lock(&dentry->d_lock);
 		dentry->d_flags |= DCACHE_AUTOFS_PENDING;
 		spin_unlock(&dentry->d_lock);
 	}
-	dentry->d_fsdata = NULL;
-	d_add(dentry, NULL);
 
 	if (dentry->d_op && dentry->d_op->d_revalidate) {
 		mutex_unlock(&dir->i_mutex);
@@ -534,6 +643,8 @@ static struct dentry *autofs4_lookup(str
 			if (sigismember (sigset, SIGKILL) ||
 			    sigismember (sigset, SIGQUIT) ||
 			    sigismember (sigset, SIGINT)) {
+			    if (unhashed)
+				dput(unhashed);
 			    return ERR_PTR(-ERESTARTNOINTR);
 			}
 		}
@@ -548,8 +659,14 @@ static struct dentry *autofs4_lookup(str
 	 * doesn't do the right thing for all system calls, but it should
 	 * be OK for the operations we permit from an autofs.
 	 */
-	if (dentry->d_inode && d_unhashed(dentry))
+	if (dentry->d_inode && d_unhashed(dentry)) {
+		if (unhashed)
+			dput(unhashed);
 		return ERR_PTR(-ENOENT);
+	}
+
+	if (unhashed)
+		return dentry;
 
 	return NULL;
 }
@@ -611,9 +728,10 @@ static int autofs4_dir_symlink(struct in
  * Normal filesystems would do a "d_delete()" to tell the VFS dcache
  * that the file no longer exists. However, doing that means that the
  * VFS layer can turn the dentry into a negative dentry.  We don't want
- * this, because since the unlink is probably the result of an expire.
- * We simply d_drop it, which allows the dentry lookup to remount it
- * if necessary.
+ * this, because the unlink is probably the result of an expire.
+ * We simply d_drop it and add it to a rehash candidates list in the
+ * super block, which allows the dentry lookup to reuse it retaining
+ * the flags, such as expire in progress, in case we're racing with expire.
  *
  * If a process is blocked on the dentry waiting for the expire to finish,
  * it will invalidate the dentry and try to mount with a new one.
@@ -642,7 +760,14 @@ static int autofs4_dir_unlink(struct ino
 
 	dir->i_mtime = CURRENT_TIME;
 
-	d_drop(dentry);
+	spin_lock(&dcache_lock);
+	spin_lock(&sbi->rehash_lock);
+	list_add(&ino->rehash, &sbi->rehash_list);
+	spin_unlock(&sbi->rehash_lock);
+	spin_lock(&dentry->d_lock);
+	__d_drop(dentry);
+	spin_unlock(&dentry->d_lock);
+	spin_unlock(&dcache_lock);
 
 	return 0;
 }
@@ -653,6 +778,9 @@ static int autofs4_dir_rmdir(struct inod
 	struct autofs_info *ino = autofs4_dentry_ino(dentry);
 	struct autofs_info *p_ino;
 	
+	DPRINTK("dentry %p, removing %.*s",
+		dentry, dentry->d_name.len, dentry->d_name.name);
+
 	if (!autofs4_oz_mode(sbi))
 		return -EACCES;
 
@@ -661,6 +789,9 @@ static int autofs4_dir_rmdir(struct inod
 		spin_unlock(&dcache_lock);
 		return -ENOTEMPTY;
 	}
+	spin_lock(&sbi->rehash_lock);
+	list_add(&ino->rehash, &sbi->rehash_list);
+	spin_unlock(&sbi->rehash_lock);
 	spin_lock(&dentry->d_lock);
 	__d_drop(dentry);
 	spin_unlock(&dentry->d_lock);
--- linux-2.6.20/fs/autofs4/inode.c.lookup-expire-race	2007-02-05 03:44:54.000000000 +0900
+++ linux-2.6.20/fs/autofs4/inode.c	2007-02-12 12:16:27.000000000 +0900
@@ -48,6 +48,8 @@ struct autofs_info *autofs4_init_ino(str
 	ino->dentry = NULL;
 	ino->size = 0;
 
+	INIT_LIST_HEAD(&ino->rehash);
+
 	ino->last_used = jiffies;
 	atomic_set(&ino->count, 0);
 
@@ -158,14 +160,13 @@ void autofs4_kill_sb(struct super_block 
 	if (!sbi)
 		goto out_kill_sb;
 
-	sb->s_fs_info = NULL;
-
-	if ( !sbi->catatonic )
+	if (!sbi->catatonic)
 		autofs4_catatonic_mode(sbi); /* Free wait queues, close pipe */
 
 	/* Clean up and release dangling references */
 	autofs4_force_release(sbi);
 
+	sb->s_fs_info = NULL;
 	kfree(sbi);
 
 out_kill_sb:
@@ -336,6 +337,8 @@ int autofs4_fill_super(struct super_bloc
 	mutex_init(&sbi->wq_mutex);
 	spin_lock_init(&sbi->fs_lock);
 	sbi->queues = NULL;
+	spin_lock_init(&sbi->rehash_lock);
+	INIT_LIST_HEAD(&sbi->rehash_list);
 	s->s_blocksize = 1024;
 	s->s_blocksize_bits = 10;
 	s->s_magic = AUTOFS_SUPER_MAGIC;



  reply	other threads:[~2007-02-12  6:42 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-02-07 17:34 Olivier Galibert
2007-02-07 18:07 ` [autofs] " Ian Kent
2007-02-07 18:18   ` Olivier Galibert
2007-02-08  2:33     ` Ian Kent
2007-02-12  6:43       ` Ian Kent [this message]
2007-02-12  6:46         ` [PATCH 2/2] " Ian Kent
2007-02-12 13:57         ` [PATCH 1/2] " Olivier Galibert
2007-02-13  0:52           ` Ian Kent
2007-02-13  8:50             ` Olivier Galibert
2007-02-13 13:40               ` Ian Kent
2007-02-13 14:07               ` Chuck Ebbert
2007-02-13 15:54                 ` Olivier Galibert
2007-02-13 15:57                   ` Chuck Ebbert
2007-02-13 17:35                   ` Ian Kent
2007-02-13 20:43                     ` Olivier Galibert
2007-02-13 17:29                 ` Ian Kent

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1171262594.18376.13.camel@raven.themaw.net \
    --to=raven@themaw.net \
    --cc=autofs@linux.kernel.org \
    --cc=galibert@pobox.com \
    --cc=linux-kernel@vger.kernel.org \
    --subject='[PATCH 1/2] Re: [autofs] Bad race condition in the new autofs protocol somewhere' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).