LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
* [PATCH/RFC 2.6.21 3/5] ehca: completion queue: remove use of do_mmap()
@ 2007-01-11 19:08 Hoang-Nam Nguyen
2007-01-11 19:20 ` Christoph Hellwig
0 siblings, 1 reply; 7+ messages in thread
From: Hoang-Nam Nguyen @ 2007-01-11 19:08 UTC (permalink / raw)
To: Roland Dreier, hch, linux-kernel, linuxppc-dev, openfabrics-ewg,
openib-general
Cc: raisch
Hello Roland and Christoph H.!
This is a patch for ehca_cq.c. It removes all direct calls of do_mmap()/munmap()
when creating and destroying a completion queue respectively.
Thanks
Nam
Signed-off-by Hoang-Nam Nguyen <hnguyen@de.ibm.com>
---
ehca_cq.c | 65 +++++++++++++++-----------------------------------------------
1 files changed, 16 insertions(+), 49 deletions(-)
diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c b/drivers/infiniband/hw/ehca/ehca_cq.c
index 93995b6..e86585a 100644
--- a/drivers/infiniband/hw/ehca/ehca_cq.c
+++ b/drivers/infiniband/hw/ehca/ehca_cq.c
@@ -267,7 +267,6 @@ struct ib_cq *ehca_create_cq(struct ib_d
if (context) {
struct ipz_queue *ipz_queue = &my_cq->ipz_queue;
struct ehca_create_cq_resp resp;
- struct vm_area_struct *vma;
memset(&resp, 0, sizeof(resp));
resp.cq_number = my_cq->cq_number;
resp.token = my_cq->token;
@@ -276,40 +275,14 @@ struct ib_cq *ehca_create_cq(struct ib_d
resp.ipz_queue.queue_length = ipz_queue->queue_length;
resp.ipz_queue.pagesize = ipz_queue->pagesize;
resp.ipz_queue.toggle_state = ipz_queue->toggle_state;
- ret = ehca_mmap_nopage(((u64)(my_cq->token) << 32) | 0x12000000,
- ipz_queue->queue_length,
- (void**)&resp.ipz_queue.queue,
- &vma);
- if (ret) {
- ehca_err(device, "Could not mmap queue pages");
- cq = ERR_PTR(ret);
- goto create_cq_exit4;
- }
- my_cq->uspace_queue = resp.ipz_queue.queue;
- resp.galpas = my_cq->galpas;
- ret = ehca_mmap_register(my_cq->galpas.user.fw_handle,
- (void**)&resp.galpas.kernel.fw_handle,
- &vma);
- if (ret) {
- ehca_err(device, "Could not mmap fw_handle");
- cq = ERR_PTR(ret);
- goto create_cq_exit5;
- }
- my_cq->uspace_fwh = (u64)resp.galpas.kernel.fw_handle;
if (ib_copy_to_udata(udata, &resp, sizeof(resp))) {
ehca_err(device, "Copy to udata failed.");
- goto create_cq_exit6;
+ goto create_cq_exit4;
}
}
return cq;
-create_cq_exit6:
- ehca_munmap(my_cq->uspace_fwh, EHCA_PAGESIZE);
-
-create_cq_exit5:
- ehca_munmap(my_cq->uspace_queue, my_cq->ipz_queue.queue_length);
-
create_cq_exit4:
ipz_queue_dtor(&my_cq->ipz_queue);
@@ -333,7 +306,6 @@ create_cq_exit1:
int ehca_destroy_cq(struct ib_cq *cq)
{
u64 h_ret;
- int ret;
struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
int cq_num = my_cq->cq_number;
struct ib_device *device = cq->device;
@@ -343,6 +315,20 @@ int ehca_destroy_cq(struct ib_cq *cq)
u32 cur_pid = current->tgid;
unsigned long flags;
+ if (cq->uobject) {
+ if (my_cq->mm_count_galpa || my_cq->mm_count_queue) {
+ ehca_err(device, "Resources still referenced in "
+ "user space cq_num=%x", my_cq->cq_number);
+ return -EINVAL;
+ }
+ if (my_cq->ownpid != cur_pid) {
+ ehca_err(device, "Invalid caller pid=%x ownpid=%x "
+ "cq_num=%x",
+ cur_pid, my_cq->ownpid, my_cq->cq_number);
+ return -EINVAL;
+ }
+ }
+
spin_lock_irqsave(&ehca_cq_idr_lock, flags);
while (my_cq->nr_callbacks)
yield();
@@ -350,25 +336,6 @@ int ehca_destroy_cq(struct ib_cq *cq)
idr_remove(&ehca_cq_idr, my_cq->token);
spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
- if (my_cq->uspace_queue && my_cq->ownpid != cur_pid) {
- ehca_err(device, "Invalid caller pid=%x ownpid=%x",
- cur_pid, my_cq->ownpid);
- return -EINVAL;
- }
-
- /* un-mmap if vma alloc */
- if (my_cq->uspace_queue ) {
- ret = ehca_munmap(my_cq->uspace_queue,
- my_cq->ipz_queue.queue_length);
- if (ret)
- ehca_err(device, "Could not munmap queue ehca_cq=%p "
- "cq_num=%x", my_cq, cq_num);
- ret = ehca_munmap(my_cq->uspace_fwh, EHCA_PAGESIZE);
- if (ret)
- ehca_err(device, "Could not munmap fwh ehca_cq=%p "
- "cq_num=%x", my_cq, cq_num);
- }
-
h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 0);
if (h_ret == H_R_STATE) {
/* cq in err: read err data and destroy it forcibly */
@@ -397,7 +364,7 @@ int ehca_resize_cq(struct ib_cq *cq, int
struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
u32 cur_pid = current->tgid;
- if (my_cq->uspace_queue && my_cq->ownpid != cur_pid) {
+ if (cq->uobject && my_cq->ownpid != cur_pid) {
ehca_err(cq->device, "Invalid caller pid=%x ownpid=%x",
cur_pid, my_cq->ownpid);
return -EINVAL;
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH/RFC 2.6.21 3/5] ehca: completion queue: remove use of do_mmap()
2007-01-11 19:08 [PATCH/RFC 2.6.21 3/5] ehca: completion queue: remove use of do_mmap() Hoang-Nam Nguyen
@ 2007-01-11 19:20 ` Christoph Hellwig
2007-01-11 19:40 ` Nathan Lynch
2007-01-12 15:36 ` Hoang-Nam Nguyen
0 siblings, 2 replies; 7+ messages in thread
From: Christoph Hellwig @ 2007-01-11 19:20 UTC (permalink / raw)
To: Hoang-Nam Nguyen
Cc: Roland Dreier, hch, linux-kernel, linuxppc-dev, openfabrics-ewg,
openib-general, raisch
On Thu, Jan 11, 2007 at 08:08:36PM +0100, Hoang-Nam Nguyen wrote:
> Hello Roland and Christoph H.!
> This is a patch for ehca_cq.c. It removes all direct calls of do_mmap()/munmap()
> when creating and destroying a completion queue respectively.
> Thanks
> Nam
This patch looks good, but there are some issues with the surrounding code:
> + if (my_cq->ownpid != cur_pid) {
> + ehca_err(device, "Invalid caller pid=%x ownpid=%x "
> + "cq_num=%x",
> + cur_pid, my_cq->ownpid, my_cq->cq_number);
> + return -EINVAL;
> + }
(for other reviewers: this is not new code, just moved around)
Owner tracking by pid is really dangerous. File descriptors can be
passed around by unix sockets, a single process can have files open
more than once, etc..
It seems ehca wants to prevent threads other than the creating one
from performing most operations. Can you explain the reason for this?
> spin_lock_irqsave(&ehca_cq_idr_lock, flags);
> while (my_cq->nr_callbacks)
> yield();
Calling yield is a very bad idea in general. You should probably
add a waitqueue that gets woken when nr_callbacks reaches zero to
sleep effectively here.
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH/RFC 2.6.21 3/5] ehca: completion queue: remove use of do_mmap()
2007-01-11 19:20 ` Christoph Hellwig
@ 2007-01-11 19:40 ` Nathan Lynch
2007-01-11 19:43 ` Christoph Hellwig
2007-01-11 19:56 ` Roland Dreier
2007-01-12 15:36 ` Hoang-Nam Nguyen
1 sibling, 2 replies; 7+ messages in thread
From: Nathan Lynch @ 2007-01-11 19:40 UTC (permalink / raw)
To: Christoph Hellwig, Hoang-Nam Nguyen, Roland Dreier, linux-kernel,
linuxppc-dev, openfabrics-ewg, openib-general, raisch
Christoph Hellwig wrote:
> On Thu, Jan 11, 2007 at 08:08:36PM +0100, Hoang-Nam Nguyen wrote:
>
> > spin_lock_irqsave(&ehca_cq_idr_lock, flags);
> > while (my_cq->nr_callbacks)
> > yield();
>
> Calling yield is a very bad idea in general. You should probably
> add a waitqueue that gets woken when nr_callbacks reaches zero to
> sleep effectively here.
Isn't that code outright buggy? Calling into the scheduler with a
spinlock held and local interrupts disabled...
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH/RFC 2.6.21 3/5] ehca: completion queue: remove use of do_mmap()
2007-01-11 19:40 ` Nathan Lynch
@ 2007-01-11 19:43 ` Christoph Hellwig
2007-01-11 19:56 ` Roland Dreier
1 sibling, 0 replies; 7+ messages in thread
From: Christoph Hellwig @ 2007-01-11 19:43 UTC (permalink / raw)
To: Nathan Lynch
Cc: Christoph Hellwig, Hoang-Nam Nguyen, Roland Dreier, linux-kernel,
linuxppc-dev, openfabrics-ewg, openib-general, raisch
On Thu, Jan 11, 2007 at 01:40:54PM -0600, Nathan Lynch wrote:
> Christoph Hellwig wrote:
> > On Thu, Jan 11, 2007 at 08:08:36PM +0100, Hoang-Nam Nguyen wrote:
> >
> > > spin_lock_irqsave(&ehca_cq_idr_lock, flags);
> > > while (my_cq->nr_callbacks)
> > > yield();
> >
> > Calling yield is a very bad idea in general. You should probably
> > add a waitqueue that gets woken when nr_callbacks reaches zero to
> > sleep effectively here.
>
> Isn't that code outright buggy? Calling into the scheduler with a
> spinlock held and local interrupts disabled...
Umm, yes - of course. I missed the spin_lock_irqsave line just above.
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH/RFC 2.6.21 3/5] ehca: completion queue: remove use of do_mmap()
2007-01-11 19:40 ` Nathan Lynch
2007-01-11 19:43 ` Christoph Hellwig
@ 2007-01-11 19:56 ` Roland Dreier
2007-01-12 15:23 ` Hoang-Nam Nguyen
1 sibling, 1 reply; 7+ messages in thread
From: Roland Dreier @ 2007-01-11 19:56 UTC (permalink / raw)
To: Nathan Lynch
Cc: Christoph Hellwig, Hoang-Nam Nguyen, linux-kernel, linuxppc-dev,
openfabrics-ewg, openib-general, raisch
> > spin_lock_irqsave(&ehca_cq_idr_lock, flags);
> > while (my_cq->nr_callbacks)
> > yield();
> Isn't that code outright buggy? Calling into the scheduler with a
> spinlock held and local interrupts disabled...
Yes, absolutely -- if nr_callbacks is ever nonzero then this will
obviously crash instantly.
- R.
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH/RFC 2.6.21 3/5] ehca: completion queue: remove use of do_mmap()
2007-01-11 19:56 ` Roland Dreier
@ 2007-01-12 15:23 ` Hoang-Nam Nguyen
0 siblings, 0 replies; 7+ messages in thread
From: Hoang-Nam Nguyen @ 2007-01-12 15:23 UTC (permalink / raw)
To: Roland Dreier
Cc: Nathan Lynch, Christoph Hellwig, linux-kernel, linuxppc-dev,
openfabrics-ewg, openib-general, raisch
Hi Roland!
> > > spin_lock_irqsave(&ehca_cq_idr_lock, flags);
> > > while (my_cq->nr_callbacks)
> > > yield();
>
> > Isn't that code outright buggy? Calling into the scheduler with a
> > spinlock held and local interrupts disabled...
>
> Yes, absolutely -- if nr_callbacks is ever nonzero then this will
> obviously crash instantly.
As Christoph R. mentioned in another thread I'm sending you a patch
to fix this bug. Thanks to all for this hint!
Purpose of the while loop is to wait until all completion entries
have been processed by a running completion handler. First then
the function continue with destroying completion queue. Thus, we do
unlock and lock around yield(), ie yield() is now called from a normal
process context without active lock. Hope that this pattern is ok.
In addition of yield issue this patch also fixes an unproper use of
spin_unlock() in ehca_irq.c.
Thanks
Nam
Signed-off-by Hoang-Nam Nguyen <hnguyen@de.ibm.com>
---
ehca_cq.c | 5 ++++-
ehca_irq.c | 4 +++-
2 files changed, 7 insertions(+), 2 deletions(-)
diff -Nurp infiniband_orig/drivers/infiniband/hw/ehca/ehca_cq.c infiniband_work/drivers/infiniband/hw/ehca/ehca_cq.c
--- infiniband_orig/drivers/infiniband/hw/ehca/ehca_cq.c 2007-01-11 19:54:06.000000000 +0100
+++ infiniband_work/drivers/infiniband/hw/ehca/ehca_cq.c 2007-01-12 15:27:50.000000000 +0100
@@ -330,8 +330,11 @@ int ehca_destroy_cq(struct ib_cq *cq)
}
spin_lock_irqsave(&ehca_cq_idr_lock, flags);
- while (my_cq->nr_callbacks)
+ while (my_cq->nr_callbacks) {
+ spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
yield();
+ spin_lock_irqsave(&ehca_cq_idr_lock, flags);
+ }
idr_remove(&ehca_cq_idr, my_cq->token);
spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
diff -Nurp infiniband_orig/drivers/infiniband/hw/ehca/ehca_irq.c infiniband_work/drivers/infiniband/hw/ehca/ehca_irq.c
--- infiniband_orig/drivers/infiniband/hw/ehca/ehca_irq.c 2007-01-11 19:53:33.000000000 +0100
+++ infiniband_work/drivers/infiniband/hw/ehca/ehca_irq.c 2007-01-12 15:27:50.000000000 +0100
@@ -440,7 +440,9 @@ void ehca_tasklet_eq(unsigned long data)
cq = idr_find(&ehca_cq_idr, token);
if (cq == NULL) {
- spin_unlock(&ehca_cq_idr_lock);
+ spin_unlock_irqrestore(
+ &ehca_cq_idr_lock,
+ flags);
break;
}
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH/RFC 2.6.21 3/5] ehca: completion queue: remove use of do_mmap()
2007-01-11 19:20 ` Christoph Hellwig
2007-01-11 19:40 ` Nathan Lynch
@ 2007-01-12 15:36 ` Hoang-Nam Nguyen
1 sibling, 0 replies; 7+ messages in thread
From: Hoang-Nam Nguyen @ 2007-01-12 15:36 UTC (permalink / raw)
To: Christoph Hellwig
Cc: Roland Dreier, linux-kernel, linuxppc-dev, openfabrics-ewg,
openib-general, raisch
Hi,
> > + if (my_cq->ownpid != cur_pid) {
> > + ehca_err(device, "Invalid caller pid=%x ownpid=%x "
> > + "cq_num=%x",
> > + cur_pid, my_cq->ownpid, my_cq->cq_number);
> > + return -EINVAL;
> > + }
>
> (for other reviewers: this is not new code, just moved around)
>
> Owner tracking by pid is really dangerous. File descriptors can be
> passed around by unix sockets, a single process can have files open
> more than once, etc..
>
> It seems ehca wants to prevent threads other than the creating one
> from performing most operations. Can you explain the reason for this?
you point to the right spot... This has a historic reason as we
have needed to support fork(), system("date") etc for kernel 2.6.9,
hence those vma flags manipulation and this pid checking as proactive
protection/restriction. For newer kernel, I guess >=2.6.12, this checking
were not necessary, but we would feel better after we had tested user
space stuff more thoroughly without this piece of code. Since this is
not new code, can we pls handle this later?
Regards
Nam
^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2007-01-12 15:40 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2007-01-11 19:08 [PATCH/RFC 2.6.21 3/5] ehca: completion queue: remove use of do_mmap() Hoang-Nam Nguyen
2007-01-11 19:20 ` Christoph Hellwig
2007-01-11 19:40 ` Nathan Lynch
2007-01-11 19:43 ` Christoph Hellwig
2007-01-11 19:56 ` Roland Dreier
2007-01-12 15:23 ` Hoang-Nam Nguyen
2007-01-12 15:36 ` Hoang-Nam Nguyen
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).