LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
* Re: [PATCH] [14/18] BKL-removal: Add unlocked_fasync
[not found] ` <9Q5rE-3ZD-17@gated-at.bofh.it>
@ 2008-01-27 11:14 ` Bodo Eggert
2008-01-27 16:31 ` [PATCH] [14/18] BKL-removal: Add unlocked_fasync v2 Andi Kleen
0 siblings, 1 reply; 24+ messages in thread
From: Bodo Eggert @ 2008-01-27 11:14 UTC (permalink / raw)
To: Andi Kleen, linux-kernel, linux-fsdevel, akpm
> +++ linux/fs/fcntl.c
> @@ -240,11 +240,15 @@ static int setfl(int fd, struct file * f
>
> lock_kernel();
> if ((arg ^ filp->f_flags) & FASYNC) {
> - if (filp->f_op && filp->f_op->fasync) {
> + if (filp->f_op && filp->f_op->unlocked_fasync)
> + error = filp->f_op->unlocked_fasync(fd, filp,
> + !!(arg & FASYNC));
> + else if (filp->f_op && filp->f_op->fasync) {
> error = filp->f_op->fasync(fd, filp, (arg & FASYNC) !=
0);
> if (error < 0)
> goto out;
No goto if you use unlocked_fasync?
> }
> + /* AK: no else error = -EINVAL here? */
> }
>
> filp->f_flags = (arg & SETFL_MASK) | (filp->f_flags & ~SETFL_MASK);
> --
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH] [14/18] BKL-removal: Add unlocked_fasync v2
2008-01-27 11:14 ` [PATCH] [14/18] BKL-removal: Add unlocked_fasync Bodo Eggert
@ 2008-01-27 16:31 ` Andi Kleen
0 siblings, 0 replies; 24+ messages in thread
From: Andi Kleen @ 2008-01-27 16:31 UTC (permalink / raw)
To: Bodo Eggert; +Cc: Andi Kleen, linux-kernel, linux-fsdevel, akpm
> No goto if you use unlocked_fasync?
Indeed. There was another problem in the patch too. Here's an updated
patch that also fixes another latent bug.
The whole f_flags still seems to be somewhat fragile because
the checks tend to happen without any lock, but that has not
changed to the previous state.
---
Add unlocked_fasync v2
Add a new fops entry point to allow fasync without BKL. While it's arguably
unclear this entry point is called often enough for it really matters
it was still relatively easy to do. And there are far less async users
in the tree than ioctls so it's likely they can be all converted
eventually and then the non unlocked async entry point could be dropped.
There was still the problem of the actual flags change being
protected against other setters of flags. Instead of using BKL
for this use the i_mutex now.
I also added a mutex_lock against one other flags change
that was lockless and could potentially lose updates.
Signed-off-by: Andi Kleen <ak@suse.de>
---
Documentation/filesystems/vfs.txt | 5 ++++-
fs/fcntl.c | 6 +++++-
fs/ioctl.c | 5 ++++-
include/linux/fs.h | 1 +
4 files changed, 14 insertions(+), 3 deletions(-)
Index: linux/fs/fcntl.c
===================================================================
--- linux.orig/fs/fcntl.c
+++ linux/fs/fcntl.c
@@ -238,18 +238,26 @@ static int setfl(int fd, struct file * f
if (error)
return error;
- lock_kernel();
+ /* AK: potential race here. Since test is unlocked fasync could
+ be called several times in a row with on. We hope the drivers
+ deal with it. */
if ((arg ^ filp->f_flags) & FASYNC) {
- if (filp->f_op && filp->f_op->fasync) {
- error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0);
- if (error < 0)
- goto out;
+ int on = !!(arg & FASYNC);
+ if (filp->f_op && filp->f_op->unlocked_fasync)
+ error = filp->f_op->unlocked_fasync(fd, filp, on);
+ else if (filp->f_op && filp->f_op->fasync) {
+ lock_kernel();
+ error = filp->f_op->fasync(fd, filp, on);
+ unlock_kernel();
}
+ /* AK: no else error = -EINVAL here? */
+ if (error < 0)
+ return error;
}
+ mutex_lock(&filp->f_dentry->d_inode->i_mutex);
filp->f_flags = (arg & SETFL_MASK) | (filp->f_flags & ~SETFL_MASK);
- out:
- unlock_kernel();
+ mutex_unlock(&filp->f_dentry->d_inode->i_mutex);
return error;
}
Index: linux/fs/ioctl.c
===================================================================
--- linux.orig/fs/ioctl.c
+++ linux/fs/ioctl.c
@@ -105,10 +105,14 @@ int vfs_ioctl(struct file *filp, unsigne
if(O_NONBLOCK != O_NDELAY)
flag |= O_NDELAY;
#endif
+
+ mutex_lock(&filp->f_dentry->d_inode->i_mutex);
if (on)
filp->f_flags |= flag;
else
filp->f_flags &= ~flag;
+
+ mutex_unlock(&filp->f_dentry->d_inode->i_mutex);
break;
case FIOASYNC:
@@ -117,8 +121,13 @@ int vfs_ioctl(struct file *filp, unsigne
flag = on ? FASYNC : 0;
/* Did FASYNC state change ? */
+ /* AK: potential race here: ->fasync could be called with on two times
+ in a row. We hope the drivers deal with it. */
if ((flag ^ filp->f_flags) & FASYNC) {
- if (filp->f_op && filp->f_op->fasync) {
+ if (filp->f_op && filp->f_op->unlocked_fasync)
+ error = filp->f_op->unlocked_fasync(fd,
+ filp, on);
+ else if (filp->f_op && filp->f_op->fasync) {
lock_kernel();
error = filp->f_op->fasync(fd, filp, on);
unlock_kernel();
@@ -128,10 +137,12 @@ int vfs_ioctl(struct file *filp, unsigne
if (error != 0)
break;
+ mutex_lock(&filp->f_dentry->d_inode->i_mutex);
if (on)
filp->f_flags |= FASYNC;
else
filp->f_flags &= ~FASYNC;
+ mutex_unlock(&filp->f_dentry->d_inode->i_mutex);
break;
case FIOQSIZE:
Index: linux/include/linux/fs.h
===================================================================
--- linux.orig/include/linux/fs.h
+++ linux/include/linux/fs.h
@@ -1179,6 +1179,7 @@ struct file_operations {
int (*fsync) (struct file *, struct dentry *, int datasync);
int (*aio_fsync) (struct kiocb *, int datasync);
int (*fasync) (int, struct file *, int);
+ int (*unlocked_fasync) (int, struct file *, int);
int (*lock) (struct file *, int, struct file_lock *);
ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int);
unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
Index: linux/Documentation/filesystems/vfs.txt
===================================================================
--- linux.orig/Documentation/filesystems/vfs.txt
+++ linux/Documentation/filesystems/vfs.txt
@@ -769,6 +769,7 @@ struct file_operations {
int (*fsync) (struct file *, struct dentry *, int datasync);
int (*aio_fsync) (struct kiocb *, int datasync);
int (*fasync) (int, struct file *, int);
+ int (*unlocked_fasync) (int, struct file *, int);
int (*lock) (struct file *, int, struct file_lock *);
ssize_t (*readv) (struct file *, const struct iovec *, unsigned long, loff_t *);
ssize_t (*writev) (struct file *, const struct iovec *, unsigned long, loff_t *);
@@ -828,7 +829,9 @@ otherwise noted.
fsync: called by the fsync(2) system call
fasync: called by the fcntl(2) system call when asynchronous
- (non-blocking) mode is enabled for a file
+ (non-blocking) mode is enabled for a file. BKL hold
+
+ unlocked_fasync: like fasync, but without BKL
lock: called by the fcntl(2) system call for F_GETLK, F_SETLK, and F_SETLKW
commands
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH] [8/18] BKL-removal: Remove BKL from remote_llseek
[not found] ` <9Qugj-1PK-1@gated-at.bofh.it>
@ 2008-01-28 9:44 ` Bodo Eggert
0 siblings, 0 replies; 24+ messages in thread
From: Bodo Eggert @ 2008-01-28 9:44 UTC (permalink / raw)
To: Trond Myklebust, Andi Kleen, Steve French, swhiteho, sfrench,
vandrove, linux-kernel, linux-fsdevel, akpm
Trond Myklebust <Trond.Myklebust@netapp.com> wrote:
> On Mon, 2008-01-28 at 05:38 +0100, Andi Kleen wrote:
>> On Monday 28 January 2008 05:13:09 Trond Myklebust wrote:
>> > On Mon, 2008-01-28 at 03:58 +0100, Andi Kleen wrote:
>> > > The problem is that it's not a race in who gets to do its thing first,
>> > > but a parallel reader can actually see a corrupted value from the two
>> > > independent words on 32bit (e.g. during a 4GB). And this could actually
>> > > completely corrupt f_pos when it happens with two racing relative seeks
>> > > or read/write()s
>> > >
>> > > I would consider that a bug.
>> >
>> > I disagree. The corruption occurs because this isn't a situation that is
>> > allowed by either POSIX or SUSv2/v3. Exactly what spec are you referring
>> > to here?
>>
>> No specific spec, just general quality of implementation. We normally don't
>> have non thread safe system calls even if it was in theory allowed by some
>> specification.
>
> We've had the existing implementation for quite some time. The arguments
> against changing it have been the same all along: if your application
> wants to share files between threads, the portability argument implies
> that you should either use pread/pwrite or use a mutex or some other
> form of synchronisation primitive in order to ensure that
> lseek()/read()/write() do not overlap.
Does anything in the kernel depend on f_pos being valid?
E.g. is it possible to read beyond the EOF using this race, or to have files
larger than the ulimit?
If not, update the manpage and be done. ¢¢
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH] [8/18] BKL-removal: Remove BKL from remote_llseek
2008-01-28 18:33 ` Steve French
@ 2008-01-28 19:34 ` Dave Kleikamp
0 siblings, 0 replies; 24+ messages in thread
From: Dave Kleikamp @ 2008-01-28 19:34 UTC (permalink / raw)
To: Steve French
Cc: Andi Kleen, Andrew Morton, Trond Myklebust, swhiteho, sfrench,
vandrove, linux-kernel, linux-fsdevel
On Mon, 2008-01-28 at 12:33 -0600, Steve French wrote:
> On Jan 28, 2008 2:17 AM, Andi Kleen <ak@suse.de> wrote:
> > > I completely agree. If one thread writes A and another writes B then the
> > > kernel should record either A or B, not ((A & 0xffffffff00000000) | (B &
> > > 0xffffffff))
> >
> > The problem is pretty nasty unfortunately. To solve it properly I think
> > the file_operations->read/write prototypes would need to be changed
> > because otherwise it is not possible to do atomic relative updates
> > of f_pos. Right now the actual update is burrowed deeply in the low level
> > read/write implementation. But that would be a huge impact all over
> > the tree :/
>
> If there were a wrapper around reads and writes of f_pos as there is
> for i_size e.g. it would hit a lot of code, but not as many as I had
> originally thought. the most important ones are in the vfs itself, where
> there are only 59 uses of the field (not all need to be changed). ext3
> has fewer (25), and cifs only 12 uses.
Most of the uses in ext3 and cifs deal with a directory's f_pos in
readdir, which is protected by i_mutex, so I don't think we need to
worry about them at all.
--
David Kleikamp
IBM Linux Technology Center
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH] [8/18] BKL-removal: Remove BKL from remote_llseek
2008-01-28 8:17 ` Andi Kleen
@ 2008-01-28 18:33 ` Steve French
2008-01-28 19:34 ` Dave Kleikamp
0 siblings, 1 reply; 24+ messages in thread
From: Steve French @ 2008-01-28 18:33 UTC (permalink / raw)
To: Andi Kleen
Cc: Andrew Morton, Trond Myklebust, swhiteho, sfrench, vandrove,
linux-kernel, linux-fsdevel
On Jan 28, 2008 2:17 AM, Andi Kleen <ak@suse.de> wrote:
> > I completely agree. If one thread writes A and another writes B then the
> > kernel should record either A or B, not ((A & 0xffffffff00000000) | (B &
> > 0xffffffff))
>
> The problem is pretty nasty unfortunately. To solve it properly I think
> the file_operations->read/write prototypes would need to be changed
> because otherwise it is not possible to do atomic relative updates
> of f_pos. Right now the actual update is burrowed deeply in the low level
> read/write implementation. But that would be a huge impact all over
> the tree :/
If there were a wrapper around reads and writes of f_pos as there is
for i_size e.g. it would hit a lot of code, but not as many as I had
originally thought. the most important ones are in the vfs itself, where
there are only 59 uses of the field (not all need to be changed). ext3
has fewer (25), and cifs only 12 uses.
--
Thanks,
Steve
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH] [8/18] BKL-removal: Remove BKL from remote_llseek
2008-01-28 14:10 ` Andi Kleen
2008-01-28 14:50 ` Alan Cox
@ 2008-01-28 15:13 ` Diego Calleja
1 sibling, 0 replies; 24+ messages in thread
From: Diego Calleja @ 2008-01-28 15:13 UTC (permalink / raw)
To: Andi Kleen
Cc: Alan Cox, Andrew Morton, Trond Myklebust, Steve French, swhiteho,
sfrench, vandrove, linux-kernel, linux-fsdevel
El Mon, 28 Jan 2008 15:10:34 +0100, Andi Kleen <ak@suse.de> escribió:
> So you get overlapping reads. Probably not good.
This was discussed in the past i think ->
http://lkml.org/lkml/2006/4/13/124
http://lkml.org/lkml/2006/4/13/130
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH] [8/18] BKL-removal: Remove BKL from remote_llseek
2008-01-28 14:10 ` Andi Kleen
@ 2008-01-28 14:50 ` Alan Cox
2008-01-28 15:13 ` Diego Calleja
1 sibling, 0 replies; 24+ messages in thread
From: Alan Cox @ 2008-01-28 14:50 UTC (permalink / raw)
To: Andi Kleen
Cc: Andrew Morton, Trond Myklebust, Steve French, swhiteho, sfrench,
vandrove, linux-kernel, linux-fsdevel
On Mon, 28 Jan 2008 15:10:34 +0100
Andi Kleen <ak@suse.de> wrote:
> On Monday 28 January 2008 14:38:57 Alan Cox wrote:
> > > Also worse really fixing it would be a major change to the VFS
> > > because of the way ->read/write are defined :/
> >
> > I don't see a problem there. ->read and ->write update the passed pointer
> > which is not the real f_pos anyway. Just the copies need fixing.
>
> They are effectually doing a decoupled read/modify/write cycle. e.g.:
>
> A B
>
> read fpos
>
> read fpos
>
> fpos += A fpos += B
> write fpos
>
>
> write fpos
>
> So you get overlapping reads. Probably not good.
No unix system I'm aware of cares about the read/write positioning during
parallel simultaneous reads or writes, with the exception of O_APPEND
which is strictly defined. The problem case is getting fpos != either
valid value.
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH] [8/18] BKL-removal: Remove BKL from remote_llseek
2008-01-28 13:38 ` Alan Cox
@ 2008-01-28 14:10 ` Andi Kleen
2008-01-28 14:50 ` Alan Cox
2008-01-28 15:13 ` Diego Calleja
0 siblings, 2 replies; 24+ messages in thread
From: Andi Kleen @ 2008-01-28 14:10 UTC (permalink / raw)
To: Alan Cox
Cc: Andrew Morton, Trond Myklebust, Steve French, swhiteho, sfrench,
vandrove, linux-kernel, linux-fsdevel
On Monday 28 January 2008 14:38:57 Alan Cox wrote:
> > Also worse really fixing it would be a major change to the VFS
> > because of the way ->read/write are defined :/
>
> I don't see a problem there. ->read and ->write update the passed pointer
> which is not the real f_pos anyway. Just the copies need fixing.
They are effectually doing a decoupled read/modify/write cycle. e.g.:
A B
read fpos
read fpos
fpos += A fpos += B
write fpos
write fpos
So you get overlapping reads. Probably not good.
-Andi
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH] [8/18] BKL-removal: Remove BKL from remote_llseek
2008-01-28 13:27 ` Andi Kleen
@ 2008-01-28 13:38 ` Alan Cox
2008-01-28 14:10 ` Andi Kleen
0 siblings, 1 reply; 24+ messages in thread
From: Alan Cox @ 2008-01-28 13:38 UTC (permalink / raw)
To: Andi Kleen
Cc: Andrew Morton, Trond Myklebust, Steve French, swhiteho, sfrench,
vandrove, linux-kernel, linux-fsdevel
> Also worse really fixing it would be a major change to the VFS
> because of the way ->read/write are defined :/
I don't see a problem there. ->read and ->write update the passed pointer
which is not the real f_pos anyway. Just the copies need fixing.
Alan
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH] [8/18] BKL-removal: Remove BKL from remote_llseek
2008-01-28 12:56 ` Alan Cox
@ 2008-01-28 13:27 ` Andi Kleen
2008-01-28 13:38 ` Alan Cox
0 siblings, 1 reply; 24+ messages in thread
From: Andi Kleen @ 2008-01-28 13:27 UTC (permalink / raw)
To: Alan Cox
Cc: Andrew Morton, Trond Myklebust, Steve French, swhiteho, sfrench,
vandrove, linux-kernel, linux-fsdevel
On Monday 28 January 2008 13:56:05 Alan Cox wrote:
> > > No specific spec, just general quality of implementation.
> >
> > I completely agree. If one thread writes A and another writes B then the
> > kernel should record either A or B, not ((A & 0xffffffff00000000) | (B &
> > 0xffffffff))
>
> Agree entirely: the spec doesn't allow for random scribbling in the wrong
> place. It doesn't cover which goes first or who "wins" the race but
> provides pwrite/pread for that situation. Writing somewhere unrelated is
> definitely not to spec
Actually it would probably -- i guess it's undefined and in undefined
country such things can happen.
Also to be fair I think it's only a problem for the 4GB wrapping case
which is presumably rare (otherwise we would have heard about it)
Also worse really fixing it would be a major change to the VFS
because of the way ->read/write are defined :/
-Andi
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH] [8/18] BKL-removal: Remove BKL from remote_llseek
2008-01-28 5:13 ` Andrew Morton
2008-01-28 8:17 ` Andi Kleen
@ 2008-01-28 12:56 ` Alan Cox
2008-01-28 13:27 ` Andi Kleen
1 sibling, 1 reply; 24+ messages in thread
From: Alan Cox @ 2008-01-28 12:56 UTC (permalink / raw)
To: Andrew Morton
Cc: Andi Kleen, Trond Myklebust, Steve French, swhiteho, sfrench,
vandrove, linux-kernel, linux-fsdevel
> > No specific spec, just general quality of implementation.
>
> I completely agree. If one thread writes A and another writes B then the
> kernel should record either A or B, not ((A & 0xffffffff00000000) | (B &
> 0xffffffff))
Agree entirely: the spec doesn't allow for random scribbling in the wrong
place. It doesn't cover which goes first or who "wins" the race but
provides pwrite/pread for that situation. Writing somewhere unrelated is
definitely not to spec and not good.
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH] [8/18] BKL-removal: Remove BKL from remote_llseek
2008-01-28 5:13 ` Andrew Morton
@ 2008-01-28 8:17 ` Andi Kleen
2008-01-28 18:33 ` Steve French
2008-01-28 12:56 ` Alan Cox
1 sibling, 1 reply; 24+ messages in thread
From: Andi Kleen @ 2008-01-28 8:17 UTC (permalink / raw)
To: Andrew Morton
Cc: Trond Myklebust, Steve French, swhiteho, sfrench, vandrove,
linux-kernel, linux-fsdevel
> I completely agree. If one thread writes A and another writes B then the
> kernel should record either A or B, not ((A & 0xffffffff00000000) | (B &
> 0xffffffff))
The problem is pretty nasty unfortunately. To solve it properly I think
the file_operations->read/write prototypes would need to be changed
because otherwise it is not possible to do atomic relative updates
of f_pos. Right now the actual update is burrowed deeply in the low level
read/write implementation. But that would be a huge impact all over
the tree :/
Or maybe define a new read/write64 and keep the default as 32bit only-- i
suppose most users don't really need 64bit. Still would be a nasty API
change.
-Andi
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH] [8/18] BKL-removal: Remove BKL from remote_llseek
2008-01-28 4:38 ` Andi Kleen
2008-01-28 4:51 ` Trond Myklebust
@ 2008-01-28 5:13 ` Andrew Morton
2008-01-28 8:17 ` Andi Kleen
2008-01-28 12:56 ` Alan Cox
1 sibling, 2 replies; 24+ messages in thread
From: Andrew Morton @ 2008-01-28 5:13 UTC (permalink / raw)
To: Andi Kleen
Cc: Trond Myklebust, Steve French, swhiteho, sfrench, vandrove,
linux-kernel, linux-fsdevel
On Mon, 28 Jan 2008 05:38:25 +0100 Andi Kleen <ak@suse.de> wrote:
> On Monday 28 January 2008 05:13:09 Trond Myklebust wrote:
> >
> > On Mon, 2008-01-28 at 03:58 +0100, Andi Kleen wrote:
> > > The problem is that it's not a race in who gets to do its thing first, but a
> > > parallel reader can actually see a corrupted value from the two independent
> > > words on 32bit (e.g. during a 4GB). And this could actually completely corrupt
> > > f_pos when it happens with two racing relative seeks or read/write()s
> > >
> > > I would consider that a bug.
> >
> > I disagree. The corruption occurs because this isn't a situation that is
> > allowed by either POSIX or SUSv2/v3. Exactly what spec are you referring
> > to here?
>
> No specific spec, just general quality of implementation.
I completely agree. If one thread writes A and another writes B then the
kernel should record either A or B, not ((A & 0xffffffff00000000) | (B &
0xffffffff))
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH] [8/18] BKL-removal: Remove BKL from remote_llseek
2008-01-28 4:38 ` Andi Kleen
@ 2008-01-28 4:51 ` Trond Myklebust
2008-01-28 5:13 ` Andrew Morton
1 sibling, 0 replies; 24+ messages in thread
From: Trond Myklebust @ 2008-01-28 4:51 UTC (permalink / raw)
To: Andi Kleen
Cc: Steve French, swhiteho, sfrench, vandrove, linux-kernel,
linux-fsdevel, akpm
On Mon, 2008-01-28 at 05:38 +0100, Andi Kleen wrote:
> On Monday 28 January 2008 05:13:09 Trond Myklebust wrote:
> >
> > On Mon, 2008-01-28 at 03:58 +0100, Andi Kleen wrote:
> > > The problem is that it's not a race in who gets to do its thing first, but a
> > > parallel reader can actually see a corrupted value from the two independent
> > > words on 32bit (e.g. during a 4GB). And this could actually completely corrupt
> > > f_pos when it happens with two racing relative seeks or read/write()s
> > >
> > > I would consider that a bug.
> >
> > I disagree. The corruption occurs because this isn't a situation that is
> > allowed by either POSIX or SUSv2/v3. Exactly what spec are you referring
> > to here?
>
> No specific spec, just general quality of implementation. We normally don't have
> non thread safe system calls even if it was in theory allowed by some specification.
We've had the existing implementation for quite some time. The arguments
against changing it have been the same all along: if your application
wants to share files between threads, the portability argument implies
that you should either use pread/pwrite or use a mutex or some other
form of synchronisation primitive in order to ensure that
lseek()/read()/write() do not overlap.
Cheers
Trond
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH] [8/18] BKL-removal: Remove BKL from remote_llseek
2008-01-28 4:13 ` Trond Myklebust
@ 2008-01-28 4:38 ` Andi Kleen
2008-01-28 4:51 ` Trond Myklebust
2008-01-28 5:13 ` Andrew Morton
0 siblings, 2 replies; 24+ messages in thread
From: Andi Kleen @ 2008-01-28 4:38 UTC (permalink / raw)
To: Trond Myklebust
Cc: Steve French, swhiteho, sfrench, vandrove, linux-kernel,
linux-fsdevel, akpm
On Monday 28 January 2008 05:13:09 Trond Myklebust wrote:
>
> On Mon, 2008-01-28 at 03:58 +0100, Andi Kleen wrote:
> > The problem is that it's not a race in who gets to do its thing first, but a
> > parallel reader can actually see a corrupted value from the two independent
> > words on 32bit (e.g. during a 4GB). And this could actually completely corrupt
> > f_pos when it happens with two racing relative seeks or read/write()s
> >
> > I would consider that a bug.
>
> I disagree. The corruption occurs because this isn't a situation that is
> allowed by either POSIX or SUSv2/v3. Exactly what spec are you referring
> to here?
No specific spec, just general quality of implementation. We normally don't have
non thread safe system calls even if it was in theory allowed by some specification.
-Andi
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH] [8/18] BKL-removal: Remove BKL from remote_llseek
2008-01-28 2:58 ` Andi Kleen
@ 2008-01-28 4:13 ` Trond Myklebust
2008-01-28 4:38 ` Andi Kleen
0 siblings, 1 reply; 24+ messages in thread
From: Trond Myklebust @ 2008-01-28 4:13 UTC (permalink / raw)
To: Andi Kleen
Cc: Steve French, swhiteho, sfrench, vandrove, linux-kernel,
linux-fsdevel, akpm
On Mon, 2008-01-28 at 03:58 +0100, Andi Kleen wrote:
> The problem is that it's not a race in who gets to do its thing first, but a
> parallel reader can actually see a corrupted value from the two independent
> words on 32bit (e.g. during a 4GB). And this could actually completely corrupt
> f_pos when it happens with two racing relative seeks or read/write()s
>
> I would consider that a bug.
I disagree. The corruption occurs because this isn't a situation that is
allowed by either POSIX or SUSv2/v3. Exactly what spec are you referring
to here?
Trond
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH] [8/18] BKL-removal: Remove BKL from remote_llseek
2008-01-27 23:08 ` Trond Myklebust
@ 2008-01-28 2:58 ` Andi Kleen
2008-01-28 4:13 ` Trond Myklebust
0 siblings, 1 reply; 24+ messages in thread
From: Andi Kleen @ 2008-01-28 2:58 UTC (permalink / raw)
To: Trond Myklebust
Cc: Steve French, swhiteho, sfrench, vandrove, linux-kernel,
linux-fsdevel, akpm
On Monday 28 January 2008 00:08:56 Trond Myklebust wrote:
>
> On Sun, 2008-01-27 at 16:18 -0600, Steve French wrote:
> > If two seeks overlap, can't you end up with an f_pos value that is
> > different than what either thread seeked to? or if you have a seek and
> > a read overlap can't you end up with the read occurring in the midst
> > of an update of f_pos (which takes more than one instruction on
> > various architectures), e.g. reading an f_pos, which has only the
> > lower half of a 64 bit field updated? I agree that you shouldn't
> > have seeks racing in parallel but I think it is preferable to get
> > either the updated f_pos or the earlier f_pos not something 1/2
> > updated.
>
> Why? The threads are doing something inherently liable to corrupt data
> anyway. If they can race over the seek, why wouldn't they race over the
> read or write too?
> The race in lseek() should probably be the least of your worries in this
> case.
The problem is that it's not a race in who gets to do its thing first, but a
parallel reader can actually see a corrupted value from the two independent
words on 32bit (e.g. during a 4GB). And this could actually completely corrupt
f_pos when it happens with two racing relative seeks or read/write()s
I would consider that a bug.
Fixes would be either to always take a spinlock to update this (nasty on
platforms where spinlocks are expensive like P4) or define some architecture
specific way to read/write 64bit values consistently. In theory also some
lazy locking seqlock like mechanism could be used, but that had the disadvantage
of being theoretically starvable.
-Andi
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH] [8/18] BKL-removal: Remove BKL from remote_llseek
2008-01-27 22:18 ` Steve French
2008-01-27 23:08 ` Trond Myklebust
@ 2008-01-28 2:44 ` Andi Kleen
1 sibling, 0 replies; 24+ messages in thread
From: Andi Kleen @ 2008-01-28 2:44 UTC (permalink / raw)
To: Steve French
Cc: Trond Myklebust, swhiteho, sfrench, vandrove, linux-kernel,
linux-fsdevel, akpm
On Sunday 27 January 2008 23:18:26 Steve French wrote:
> If two seeks overlap, can't you end up with an f_pos value that is
> different than what either thread seeked to?
Yes you can on 32bit. Especially during the 4GB wrap
-Andi
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH] [8/18] BKL-removal: Remove BKL from remote_llseek
2008-01-27 16:57 ` Steve French
2008-01-27 17:56 ` Trond Myklebust
@ 2008-01-28 2:43 ` Andi Kleen
1 sibling, 0 replies; 24+ messages in thread
From: Andi Kleen @ 2008-01-28 2:43 UTC (permalink / raw)
To: Steve French
Cc: Trond.Myklebust, swhiteho, sfrench, vandrove, linux-kernel,
linux-fsdevel, akpm
On Sunday 27 January 2008 17:57:14 Steve French wrote:
> Don't you need to a spinlock/spinunlock(i_lock) or something similar
> (there isn't a spinlock in the file struct unfortunately) around the
> reads and writes from f_pos in fs/read_write.c in remote_llseek with
> your patch since the reads/writes from that field are not necessarily
> atomic and threads could be racing in seek on the same file struct?
Funny that you mention it. I actually noticed this too while working on this,
but noticed that it is wrong everywhere (as in even plain sys_write/read gets
it wrong). So I decided to not address it because it is already
broken.
I did actually send email to a few people about this, but no answer
yet.
I agree it's probably all broken on 32bit platforms, but I'm not
sure how to best address this. When it is comprehensively addressed remote_llseek
can use that new method too.
-Andi
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH] [8/18] BKL-removal: Remove BKL from remote_llseek
2008-01-27 22:18 ` Steve French
@ 2008-01-27 23:08 ` Trond Myklebust
2008-01-28 2:58 ` Andi Kleen
2008-01-28 2:44 ` Andi Kleen
1 sibling, 1 reply; 24+ messages in thread
From: Trond Myklebust @ 2008-01-27 23:08 UTC (permalink / raw)
To: Steve French
Cc: Andi Kleen, swhiteho, sfrench, vandrove, linux-kernel,
linux-fsdevel, akpm
On Sun, 2008-01-27 at 16:18 -0600, Steve French wrote:
> If two seeks overlap, can't you end up with an f_pos value that is
> different than what either thread seeked to? or if you have a seek and
> a read overlap can't you end up with the read occurring in the midst
> of an update of f_pos (which takes more than one instruction on
> various architectures), e.g. reading an f_pos, which has only the
> lower half of a 64 bit field updated? I agree that you shouldn't
> have seeks racing in parallel but I think it is preferable to get
> either the updated f_pos or the earlier f_pos not something 1/2
> updated.
Why? The threads are doing something inherently liable to corrupt data
anyway. If they can race over the seek, why wouldn't they race over the
read or write too?
The race in lseek() should probably be the least of your worries in this
case.
Trond
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH] [8/18] BKL-removal: Remove BKL from remote_llseek
2008-01-27 17:56 ` Trond Myklebust
@ 2008-01-27 22:18 ` Steve French
2008-01-27 23:08 ` Trond Myklebust
2008-01-28 2:44 ` Andi Kleen
0 siblings, 2 replies; 24+ messages in thread
From: Steve French @ 2008-01-27 22:18 UTC (permalink / raw)
To: Trond Myklebust
Cc: Andi Kleen, swhiteho, sfrench, vandrove, linux-kernel,
linux-fsdevel, akpm
If two seeks overlap, can't you end up with an f_pos value that is
different than what either thread seeked to? or if you have a seek and
a read overlap can't you end up with the read occurring in the midst
of an update of f_pos (which takes more than one instruction on
various architectures), e.g. reading an f_pos, which has only the
lower half of a 64 bit field updated? I agree that you shouldn't
have seeks racing in parallel but I think it is preferable to get
either the updated f_pos or the earlier f_pos not something 1/2
updated.
On Jan 27, 2008 11:56 AM, Trond Myklebust <Trond.Myklebust@netapp.com> wrote:
>
> On Sun, 2008-01-27 at 10:57 -0600, Steve French wrote:
> > Don't you need to a spinlock/spinunlock(i_lock) or something similar
> > (there isn't a spinlock in the file struct unfortunately) around the
> > reads and writes from f_pos in fs/read_write.c in remote_llseek with
> > your patch since the reads/writes from that field are not necessarily
> > atomic and threads could be racing in seek on the same file struct?
>
> Where does is state in POSIX or SUS that we need to cater to that kind
> of application?
> In any case, the current behaviour of f_pos if two threads are sharing
> the file struct is undefined no matter whether you spinlock or not,
> since there is no special locking around sys_read() or sys_write().
>
> Trond
>
--
Thanks,
Steve
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH] [8/18] BKL-removal: Remove BKL from remote_llseek
2008-01-27 16:57 ` Steve French
@ 2008-01-27 17:56 ` Trond Myklebust
2008-01-27 22:18 ` Steve French
2008-01-28 2:43 ` Andi Kleen
1 sibling, 1 reply; 24+ messages in thread
From: Trond Myklebust @ 2008-01-27 17:56 UTC (permalink / raw)
To: Steve French
Cc: Andi Kleen, swhiteho, sfrench, vandrove, linux-kernel,
linux-fsdevel, akpm
On Sun, 2008-01-27 at 10:57 -0600, Steve French wrote:
> Don't you need to a spinlock/spinunlock(i_lock) or something similar
> (there isn't a spinlock in the file struct unfortunately) around the
> reads and writes from f_pos in fs/read_write.c in remote_llseek with
> your patch since the reads/writes from that field are not necessarily
> atomic and threads could be racing in seek on the same file struct?
Where does is state in POSIX or SUS that we need to cater to that kind
of application?
In any case, the current behaviour of f_pos if two threads are sharing
the file struct is undefined no matter whether you spinlock or not,
since there is no special locking around sys_read() or sys_write().
Trond
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH] [8/18] BKL-removal: Remove BKL from remote_llseek
2008-01-27 2:17 ` [PATCH] [8/18] BKL-removal: Remove BKL from remote_llseek Andi Kleen
@ 2008-01-27 16:57 ` Steve French
2008-01-27 17:56 ` Trond Myklebust
2008-01-28 2:43 ` Andi Kleen
0 siblings, 2 replies; 24+ messages in thread
From: Steve French @ 2008-01-27 16:57 UTC (permalink / raw)
To: Andi Kleen
Cc: Trond.Myklebust, swhiteho, sfrench, vandrove, linux-kernel,
linux-fsdevel, akpm
Don't you need to a spinlock/spinunlock(i_lock) or something similar
(there isn't a spinlock in the file struct unfortunately) around the
reads and writes from f_pos in fs/read_write.c in remote_llseek with
your patch since the reads/writes from that field are not necessarily
atomic and threads could be racing in seek on the same file struct?
On Jan 26, 2008 8:17 PM, Andi Kleen <ak@suse.de> wrote:
>
> - Replace remote_llseek with remote_llseek_unlocked (to force compilation
> failures in all users)
> - Change all users to either use remote_llseek directly or take the
> BKL around. I changed the file systems who don't use the BKL
> for anything (CIFS, GFS) to call it directly. NCPFS and SMBFS and NFS
> take the BKL, but explicitely in their own source now.
>
> I moved them all over in a single patch to avoid unbisectable sections.
>
> Cc: Trond.Myklebust@netapp.com
> Cc: swhiteho@redhat.com
> Cc: sfrench@samba.org
> Cc: vandrove@vc.cvut.cz
>
> Signed-off-by: Andi Kleen <ak@suse.de>
>
> ---
> fs/cifs/cifsfs.c | 2 +-
> fs/gfs2/ops_file.c | 4 ++--
> fs/ncpfs/file.c | 12 +++++++++++-
> fs/nfs/file.c | 6 +++++-
> fs/read_write.c | 7 +++----
> fs/smbfs/file.c | 11 ++++++++++-
> include/linux/fs.h | 3 ++-
> 7 files changed, 34 insertions(+), 11 deletions(-)
>
> Index: linux/fs/cifs/cifsfs.c
> ===================================================================
> --- linux.orig/fs/cifs/cifsfs.c
> +++ linux/fs/cifs/cifsfs.c
> @@ -549,7 +549,7 @@ static loff_t cifs_llseek(struct file *f
> if (retval < 0)
> return (loff_t)retval;
> }
> - return remote_llseek(file, offset, origin);
> + return remote_llseek_unlocked(file, offset, origin);
> }
>
> static struct file_system_type cifs_fs_type = {
> Index: linux/fs/gfs2/ops_file.c
> ===================================================================
> --- linux.orig/fs/gfs2/ops_file.c
> +++ linux/fs/gfs2/ops_file.c
> @@ -107,11 +107,11 @@ static loff_t gfs2_llseek(struct file *f
> error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY,
> &i_gh);
> if (!error) {
> - error = remote_llseek(file, offset, origin);
> + error = remote_llseek_unlocked(file, offset, origin);
> gfs2_glock_dq_uninit(&i_gh);
> }
> } else
> - error = remote_llseek(file, offset, origin);
> + error = remote_llseek_unlocked(file, offset, origin);
>
> return error;
> }
> Index: linux/fs/ncpfs/file.c
> ===================================================================
> --- linux.orig/fs/ncpfs/file.c
> +++ linux/fs/ncpfs/file.c
> @@ -18,6 +18,7 @@
> #include <linux/slab.h>
> #include <linux/vmalloc.h>
> #include <linux/sched.h>
> +#include <linux/smp_lock.h>
>
> #include <linux/ncp_fs.h>
> #include "ncplib_kernel.h"
> @@ -281,9 +282,18 @@ static int ncp_release(struct inode *ino
> return 0;
> }
>
> +static loff_t ncp_remote_llseek(struct file *file, loff_t offset, int origin)
> +{
> + loff_t ret;
> + lock_kernel();
> + ret = remote_llseek_unlocked(file, offset, origin);
> + unlock_kernel();
> + return ret;
> +}
> +
> const struct file_operations ncp_file_operations =
> {
> - .llseek = remote_llseek,
> + .llseek = ncp_remote_llseek,
> .read = ncp_file_read,
> .write = ncp_file_write,
> .ioctl = ncp_ioctl,
> Index: linux/fs/read_write.c
> ===================================================================
> --- linux.orig/fs/read_write.c
> +++ linux/fs/read_write.c
> @@ -58,11 +58,10 @@ loff_t generic_file_llseek(struct file *
>
> EXPORT_SYMBOL(generic_file_llseek);
>
> -loff_t remote_llseek(struct file *file, loff_t offset, int origin)
> +loff_t remote_llseek_unlocked(struct file *file, loff_t offset, int origin)
> {
> long long retval;
>
> - lock_kernel();
> switch (origin) {
> case SEEK_END:
> offset += i_size_read(file->f_path.dentry->d_inode);
> @@ -73,15 +72,15 @@ loff_t remote_llseek(struct file *file,
> retval = -EINVAL;
> if (offset>=0 && offset<=file->f_path.dentry->d_inode->i_sb->s_maxbytes) {
> if (offset != file->f_pos) {
> + /* AK: do we need a lock for those? */
> file->f_pos = offset;
> file->f_version = 0;
> }
> retval = offset;
> }
> - unlock_kernel();
> return retval;
> }
> -EXPORT_SYMBOL(remote_llseek);
> +EXPORT_SYMBOL(remote_llseek_unlocked);
>
> loff_t no_llseek(struct file *file, loff_t offset, int origin)
> {
> Index: linux/fs/smbfs/file.c
> ===================================================================
> --- linux.orig/fs/smbfs/file.c
> +++ linux/fs/smbfs/file.c
> @@ -422,9 +422,18 @@ smb_file_permission(struct inode *inode,
> return error;
> }
>
> +static loff_t smb_remote_llseek(struct file *file, loff_t offset, int origin)
> +{
> + loff_t ret;
> + lock_kernel();
> + ret = remote_llseek_unlocked(file, offset, origin);
> + unlock_kernel();
> + return ret;
> +}
> +
> const struct file_operations smb_file_operations =
> {
> - .llseek = remote_llseek,
> + .llseek = smb_remote_llseek,
> .read = do_sync_read,
> .aio_read = smb_file_aio_read,
> .write = do_sync_write,
> Index: linux/include/linux/fs.h
> ===================================================================
> --- linux.orig/include/linux/fs.h
> +++ linux/include/linux/fs.h
> @@ -1825,7 +1825,8 @@ extern void
> file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping);
> extern loff_t no_llseek(struct file *file, loff_t offset, int origin);
> extern loff_t generic_file_llseek(struct file *file, loff_t offset, int origin);
> -extern loff_t remote_llseek(struct file *file, loff_t offset, int origin);
> +extern loff_t remote_llseek_unlocked(struct file *file, loff_t offset,
> + int origin);
> extern int generic_file_open(struct inode * inode, struct file * filp);
> extern int nonseekable_open(struct inode * inode, struct file * filp);
>
> Index: linux/fs/nfs/file.c
> ===================================================================
> --- linux.orig/fs/nfs/file.c
> +++ linux/fs/nfs/file.c
> @@ -166,6 +166,7 @@ force_reval:
>
> static loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin)
> {
> + loff_t loff;
> /* origin == SEEK_END => we must revalidate the cached file length */
> if (origin == SEEK_END) {
> struct inode *inode = filp->f_mapping->host;
> @@ -173,7 +174,10 @@ static loff_t nfs_file_llseek(struct fil
> if (retval < 0)
> return (loff_t)retval;
> }
> - return remote_llseek(filp, offset, origin);
> + lock_kernel(); /* BKL needed? */
> + loff = remote_llseek_unlocked(filp, offset, origin);
> + unlock_kernel();
> + return loff;
> }
>
> /*
>
--
Thanks,
Steve
^ permalink raw reply [flat|nested] 24+ messages in thread
* [PATCH] [8/18] BKL-removal: Remove BKL from remote_llseek
2008-01-27 2:17 [PATCH] [0/18] Implement some low hanging BKL removal fruit in fs/* Andi Kleen
@ 2008-01-27 2:17 ` Andi Kleen
2008-01-27 16:57 ` Steve French
0 siblings, 1 reply; 24+ messages in thread
From: Andi Kleen @ 2008-01-27 2:17 UTC (permalink / raw)
To: Trond.Myklebust, swhiteho, sfrench, vandrove, linux-kernel,
linux-fsdevel, akpm
- Replace remote_llseek with remote_llseek_unlocked (to force compilation
failures in all users)
- Change all users to either use remote_llseek directly or take the
BKL around. I changed the file systems who don't use the BKL
for anything (CIFS, GFS) to call it directly. NCPFS and SMBFS and NFS
take the BKL, but explicitely in their own source now.
I moved them all over in a single patch to avoid unbisectable sections.
Cc: Trond.Myklebust@netapp.com
Cc: swhiteho@redhat.com
Cc: sfrench@samba.org
Cc: vandrove@vc.cvut.cz
Signed-off-by: Andi Kleen <ak@suse.de>
---
fs/cifs/cifsfs.c | 2 +-
fs/gfs2/ops_file.c | 4 ++--
fs/ncpfs/file.c | 12 +++++++++++-
fs/nfs/file.c | 6 +++++-
fs/read_write.c | 7 +++----
fs/smbfs/file.c | 11 ++++++++++-
include/linux/fs.h | 3 ++-
7 files changed, 34 insertions(+), 11 deletions(-)
Index: linux/fs/cifs/cifsfs.c
===================================================================
--- linux.orig/fs/cifs/cifsfs.c
+++ linux/fs/cifs/cifsfs.c
@@ -549,7 +549,7 @@ static loff_t cifs_llseek(struct file *f
if (retval < 0)
return (loff_t)retval;
}
- return remote_llseek(file, offset, origin);
+ return remote_llseek_unlocked(file, offset, origin);
}
static struct file_system_type cifs_fs_type = {
Index: linux/fs/gfs2/ops_file.c
===================================================================
--- linux.orig/fs/gfs2/ops_file.c
+++ linux/fs/gfs2/ops_file.c
@@ -107,11 +107,11 @@ static loff_t gfs2_llseek(struct file *f
error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY,
&i_gh);
if (!error) {
- error = remote_llseek(file, offset, origin);
+ error = remote_llseek_unlocked(file, offset, origin);
gfs2_glock_dq_uninit(&i_gh);
}
} else
- error = remote_llseek(file, offset, origin);
+ error = remote_llseek_unlocked(file, offset, origin);
return error;
}
Index: linux/fs/ncpfs/file.c
===================================================================
--- linux.orig/fs/ncpfs/file.c
+++ linux/fs/ncpfs/file.c
@@ -18,6 +18,7 @@
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/sched.h>
+#include <linux/smp_lock.h>
#include <linux/ncp_fs.h>
#include "ncplib_kernel.h"
@@ -281,9 +282,18 @@ static int ncp_release(struct inode *ino
return 0;
}
+static loff_t ncp_remote_llseek(struct file *file, loff_t offset, int origin)
+{
+ loff_t ret;
+ lock_kernel();
+ ret = remote_llseek_unlocked(file, offset, origin);
+ unlock_kernel();
+ return ret;
+}
+
const struct file_operations ncp_file_operations =
{
- .llseek = remote_llseek,
+ .llseek = ncp_remote_llseek,
.read = ncp_file_read,
.write = ncp_file_write,
.ioctl = ncp_ioctl,
Index: linux/fs/read_write.c
===================================================================
--- linux.orig/fs/read_write.c
+++ linux/fs/read_write.c
@@ -58,11 +58,10 @@ loff_t generic_file_llseek(struct file *
EXPORT_SYMBOL(generic_file_llseek);
-loff_t remote_llseek(struct file *file, loff_t offset, int origin)
+loff_t remote_llseek_unlocked(struct file *file, loff_t offset, int origin)
{
long long retval;
- lock_kernel();
switch (origin) {
case SEEK_END:
offset += i_size_read(file->f_path.dentry->d_inode);
@@ -73,15 +72,15 @@ loff_t remote_llseek(struct file *file,
retval = -EINVAL;
if (offset>=0 && offset<=file->f_path.dentry->d_inode->i_sb->s_maxbytes) {
if (offset != file->f_pos) {
+ /* AK: do we need a lock for those? */
file->f_pos = offset;
file->f_version = 0;
}
retval = offset;
}
- unlock_kernel();
return retval;
}
-EXPORT_SYMBOL(remote_llseek);
+EXPORT_SYMBOL(remote_llseek_unlocked);
loff_t no_llseek(struct file *file, loff_t offset, int origin)
{
Index: linux/fs/smbfs/file.c
===================================================================
--- linux.orig/fs/smbfs/file.c
+++ linux/fs/smbfs/file.c
@@ -422,9 +422,18 @@ smb_file_permission(struct inode *inode,
return error;
}
+static loff_t smb_remote_llseek(struct file *file, loff_t offset, int origin)
+{
+ loff_t ret;
+ lock_kernel();
+ ret = remote_llseek_unlocked(file, offset, origin);
+ unlock_kernel();
+ return ret;
+}
+
const struct file_operations smb_file_operations =
{
- .llseek = remote_llseek,
+ .llseek = smb_remote_llseek,
.read = do_sync_read,
.aio_read = smb_file_aio_read,
.write = do_sync_write,
Index: linux/include/linux/fs.h
===================================================================
--- linux.orig/include/linux/fs.h
+++ linux/include/linux/fs.h
@@ -1825,7 +1825,8 @@ extern void
file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping);
extern loff_t no_llseek(struct file *file, loff_t offset, int origin);
extern loff_t generic_file_llseek(struct file *file, loff_t offset, int origin);
-extern loff_t remote_llseek(struct file *file, loff_t offset, int origin);
+extern loff_t remote_llseek_unlocked(struct file *file, loff_t offset,
+ int origin);
extern int generic_file_open(struct inode * inode, struct file * filp);
extern int nonseekable_open(struct inode * inode, struct file * filp);
Index: linux/fs/nfs/file.c
===================================================================
--- linux.orig/fs/nfs/file.c
+++ linux/fs/nfs/file.c
@@ -166,6 +166,7 @@ force_reval:
static loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin)
{
+ loff_t loff;
/* origin == SEEK_END => we must revalidate the cached file length */
if (origin == SEEK_END) {
struct inode *inode = filp->f_mapping->host;
@@ -173,7 +174,10 @@ static loff_t nfs_file_llseek(struct fil
if (retval < 0)
return (loff_t)retval;
}
- return remote_llseek(filp, offset, origin);
+ lock_kernel(); /* BKL needed? */
+ loff = remote_llseek_unlocked(filp, offset, origin);
+ unlock_kernel();
+ return loff;
}
/*
^ permalink raw reply [flat|nested] 24+ messages in thread
end of thread, other threads:[~2008-01-28 19:35 UTC | newest]
Thread overview: 24+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
[not found] <9Q5hR-3MI-9@gated-at.bofh.it>
[not found] ` <9Q5rE-3ZD-17@gated-at.bofh.it>
2008-01-27 11:14 ` [PATCH] [14/18] BKL-removal: Add unlocked_fasync Bodo Eggert
2008-01-27 16:31 ` [PATCH] [14/18] BKL-removal: Add unlocked_fasync v2 Andi Kleen
[not found] ` <9Qsob-7is-1@gated-at.bofh.it>
[not found] ` <9QtDz-121-11@gated-at.bofh.it>
[not found] ` <9QtWX-1qg-15@gated-at.bofh.it>
[not found] ` <9Qugj-1PK-1@gated-at.bofh.it>
2008-01-28 9:44 ` [PATCH] [8/18] BKL-removal: Remove BKL from remote_llseek Bodo Eggert
2008-01-27 2:17 [PATCH] [0/18] Implement some low hanging BKL removal fruit in fs/* Andi Kleen
2008-01-27 2:17 ` [PATCH] [8/18] BKL-removal: Remove BKL from remote_llseek Andi Kleen
2008-01-27 16:57 ` Steve French
2008-01-27 17:56 ` Trond Myklebust
2008-01-27 22:18 ` Steve French
2008-01-27 23:08 ` Trond Myklebust
2008-01-28 2:58 ` Andi Kleen
2008-01-28 4:13 ` Trond Myklebust
2008-01-28 4:38 ` Andi Kleen
2008-01-28 4:51 ` Trond Myklebust
2008-01-28 5:13 ` Andrew Morton
2008-01-28 8:17 ` Andi Kleen
2008-01-28 18:33 ` Steve French
2008-01-28 19:34 ` Dave Kleikamp
2008-01-28 12:56 ` Alan Cox
2008-01-28 13:27 ` Andi Kleen
2008-01-28 13:38 ` Alan Cox
2008-01-28 14:10 ` Andi Kleen
2008-01-28 14:50 ` Alan Cox
2008-01-28 15:13 ` Diego Calleja
2008-01-28 2:44 ` Andi Kleen
2008-01-28 2:43 ` Andi Kleen
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).