LKML Archive on lore.kernel.org help / color / mirror / Atom feed
* [PATCH] ide: don't execute the next queued command from the hard-IRQ context @ 2008-10-11 14:17 Bartlomiej Zolnierkiewicz 2008-10-12 18:02 ` Elias Oltmanns 0 siblings, 1 reply; 5+ messages in thread From: Bartlomiej Zolnierkiewicz @ 2008-10-11 14:17 UTC (permalink / raw) To: linux-kernel; +Cc: linux-ide From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com> Subject: [PATCH] ide: don't execute the next queued command from the hard-IRQ context * Tell the block layer that we are not done handling requests by using blk_plug_device() in ide_do_request() (request handling function) and ide_timer_expiry() (timeout handler) if the queue is not empty. * Remove optimization which directly calls ide_do_request() for the next queued command from the ide_intr() (IRQ handler) and ide_timer_expiry(). * Remove no longer needed IRQ masking from ide_do_request() - in case of IDE ports needing serialization disable_irq_nosync()/enable_irq() was used for the (possibly shared) IRQ of the other IDE port. * Put the misplaced comment in the right place in ide_do_request(). * Drop no longer needed 'int masked_irq' argument from ide_do_request(). * Merge ide_do_request() into do_ide_request(). * Remove no longer needed IDE_NO_IRQ define. While at it: * Don't use HWGROUP() macro in do_ide_request(). * Use __func__ in ide_intr(). This patch reduces IRQ hadling latency for IDE and improves the system-wide handling of shared IRQs (which should result in more timeout resistant and stable IDE systems). It also makes it possible to do some further changes later (i.e. replace some busy-waiting delays with sleeping equivalents). Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com> --- on top of per-hwgroup locks patch and with a special dedication to people complaining about improving IDE ;) drivers/ide/ide-io.c | 59 ++++++++++++++++++++++----------------------------- include/linux/ide.h | 7 ------ 2 files changed, 26 insertions(+), 40 deletions(-) Index: b/drivers/ide/ide-io.c =================================================================== --- a/drivers/ide/ide-io.c +++ b/drivers/ide/ide-io.c @@ -939,8 +939,10 @@ repeat: * the driver. This makes the driver much more friendlier to shared IRQs * than previous designs, while remaining 100% (?) SMP safe and capable. */ -static void ide_do_request (ide_hwgroup_t *hwgroup, int masked_irq) +void do_ide_request(struct request_queue *q) { + ide_drive_t *orig_drive = q->queuedata; + ide_hwgroup_t *hwgroup = orig_drive->hwif->hwgroup; ide_drive_t *drive; ide_hwif_t *hwif; struct request *rq; @@ -999,8 +1001,11 @@ static void ide_do_request (ide_hwgroup_ } /* no more work for this hwgroup (for now) */ - return; + goto plug_device; } + + if (drive != orig_drive) + goto plug_device; again: hwif = HWIF(drive); if (hwgroup->hwif->sharing_irq && hwif != hwgroup->hwif) { @@ -1055,41 +1060,27 @@ static void ide_do_request (ide_hwgroup_ goto again; /* We clear busy, there should be no pending ATA command at this point. */ hwgroup->busy = 0; - break; + goto plug_device; } hwgroup->rq = rq; - /* - * Some systems have trouble with IDE IRQs arriving while - * the driver is still setting things up. So, here we disable - * the IRQ used by this interface while the request is being started. - * This may look bad at first, but pretty much the same thing - * happens anyway when any interrupt comes in, IDE or otherwise - * -- the kernel masks the IRQ while it is being handled. - */ - if (masked_irq != IDE_NO_IRQ && hwif->irq != masked_irq) - disable_irq_nosync(hwif->irq); spin_unlock(&hwgroup->lock); + /* allow other IRQs while we start this request */ local_irq_enable_in_hardirq(); - /* allow other IRQs while we start this request */ startstop = start_request(drive, rq); spin_lock_irq(&hwgroup->lock); - if (masked_irq != IDE_NO_IRQ && hwif->irq != masked_irq) - enable_irq(hwif->irq); - if (startstop == ide_stopped) + + if (startstop == ide_stopped) { hwgroup->busy = 0; + goto plug_device; + } } -} + return; -/* - * Passes the stuff to ide_do_request - */ -void do_ide_request(struct request_queue *q) -{ - ide_drive_t *drive = q->queuedata; - - ide_do_request(HWGROUP(drive), IDE_NO_IRQ); +plug_device: + if (!elv_queue_empty(orig_drive->queue)) + blk_plug_device(orig_drive->queue); } /* @@ -1241,11 +1232,13 @@ void ide_timer_expiry (unsigned long dat drive->service_time = jiffies - drive->service_start; spin_lock_irq(&hwgroup->lock); enable_irq(hwif->irq); - if (startstop == ide_stopped) + if (startstop == ide_stopped) { hwgroup->busy = 0; + if (!elv_queue_empty(drive->queue)) + blk_plug_device(drive->queue); + } } } - ide_do_request(hwgroup, IDE_NO_IRQ); spin_unlock_irqrestore(&hwgroup->lock, flags); } @@ -1438,11 +1431,11 @@ irqreturn_t ide_intr (int irq, void *dev if (startstop == ide_stopped) { if (hwgroup->handler == NULL) { /* paranoia */ hwgroup->busy = 0; - ide_do_request(hwgroup, hwif->irq); - } else { - printk(KERN_ERR "%s: ide_intr: huh? expected NULL handler " - "on exit\n", drive->name); - } + if (!elv_queue_empty(drive->queue)) + blk_plug_device(drive->queue); + } else + printk(KERN_ERR "%s: %s: huh? expected NULL handler " + "on exit\n", __func__, drive->name); } out_handled: irq_ret = IRQ_HANDLED; Index: b/include/linux/ide.h =================================================================== --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -32,13 +32,6 @@ # define SUPPORT_VLB_SYNC 1 #endif -/* - * Used to indicate "no IRQ", should be a value that cannot be an IRQ - * number. - */ - -#define IDE_NO_IRQ (-1) - typedef unsigned char byte; /* used everywhere */ /* ^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] ide: don't execute the next queued command from the hard-IRQ context 2008-10-11 14:17 [PATCH] ide: don't execute the next queued command from the hard-IRQ context Bartlomiej Zolnierkiewicz @ 2008-10-12 18:02 ` Elias Oltmanns 2008-10-15 19:06 ` Bartlomiej Zolnierkiewicz 0 siblings, 1 reply; 5+ messages in thread From: Elias Oltmanns @ 2008-10-12 18:02 UTC (permalink / raw) To: Bartlomiej Zolnierkiewicz; +Cc: linux-kernel, linux-ide Bartlomiej Zolnierkiewicz <bzolnier@gmail.com> wrote: > From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com> > Subject: [PATCH] ide: don't execute the next queued command from the hard-IRQ context > > * Tell the block layer that we are not done handling requests by using > blk_plug_device() in ide_do_request() (request handling function) > and ide_timer_expiry() (timeout handler) if the queue is not empty. > > * Remove optimization which directly calls ide_do_request() for the next > queued command from the ide_intr() (IRQ handler) and ide_timer_expiry(). > > * Remove no longer needed IRQ masking from ide_do_request() - in case of > IDE ports needing serialization disable_irq_nosync()/enable_irq() was > used for the (possibly shared) IRQ of the other IDE port. > > * Put the misplaced comment in the right place in ide_do_request(). > > * Drop no longer needed 'int masked_irq' argument from ide_do_request(). > > * Merge ide_do_request() into do_ide_request(). > > * Remove no longer needed IDE_NO_IRQ define. > > While at it: > > * Don't use HWGROUP() macro in do_ide_request(). > > * Use __func__ in ide_intr(). > > This patch reduces IRQ hadling latency for IDE and improves the system-wide > handling of shared IRQs (which should result in more timeout resistant and > stable IDE systems). It also makes it possible to do some further changes > later (i.e. replace some busy-waiting delays with sleeping equivalents). > > Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com> > --- > on top of per-hwgroup locks patch and with a special dedication to people > complaining about improving IDE ;) Some comments / questions. Admittedly, I don't always know enough about the things I'm talking about here, but I'm hoping to learn something that way ;-). [...] > Index: b/drivers/ide/ide-io.c > =================================================================== > --- a/drivers/ide/ide-io.c > +++ b/drivers/ide/ide-io.c [...] > @@ -999,8 +1001,11 @@ static void ide_do_request (ide_hwgroup_ > } > > /* no more work for this hwgroup (for now) */ > - return; > + goto plug_device; > } > + > + if (drive != orig_drive) > + goto plug_device; > again: > hwif = HWIF(drive); Didn't you want to get rid of HWIF() too? > if (hwgroup->hwif->sharing_irq && hwif != hwgroup->hwif) { > @@ -1055,41 +1060,27 @@ static void ide_do_request (ide_hwgroup_ > goto again; > /* We clear busy, there should be no pending ATA command at this point. */ > hwgroup->busy = 0; > - break; > + goto plug_device; > } > > hwgroup->rq = rq; > > - /* > - * Some systems have trouble with IDE IRQs arriving while > - * the driver is still setting things up. So, here we disable > - * the IRQ used by this interface while the request is being started. > - * This may look bad at first, but pretty much the same thing > - * happens anyway when any interrupt comes in, IDE or otherwise > - * -- the kernel masks the IRQ while it is being handled. > - */ > - if (masked_irq != IDE_NO_IRQ && hwif->irq != masked_irq) > - disable_irq_nosync(hwif->irq); > spin_unlock(&hwgroup->lock); > + /* allow other IRQs while we start this request */ > local_irq_enable_in_hardirq(); > - /* allow other IRQs while we start this request */ That's the part I don't understand completely. Wouldn't it be alright to do just spin_unlock_irq() here and be done with it? SCSI does exactly that and as far as I can see, IDE is in a similar situation now that the ->request_fn() is not called from ide_intr() and ide_timer_expiry() anymore, i.e. the ->request_fn() will always be executed in process context. > startstop = start_request(drive, rq); > spin_lock_irq(&hwgroup->lock); > - if (masked_irq != IDE_NO_IRQ && hwif->irq != masked_irq) > - enable_irq(hwif->irq); > - if (startstop == ide_stopped) > + > + if (startstop == ide_stopped) { > hwgroup->busy = 0; > + goto plug_device; This goto statement is wrong. Simply set ->busy to zero and be done with it. This way, the loop will start again and either elv_next_request() returns NULL, in which case the queue need not be plugged, or the next request will be processed immediately, which is exactly what we want. [...] > @@ -1438,11 +1431,11 @@ irqreturn_t ide_intr (int irq, void *dev > if (startstop == ide_stopped) { > if (hwgroup->handler == NULL) { /* paranoia */ > hwgroup->busy = 0; > - ide_do_request(hwgroup, hwif->irq); > - } else { > - printk(KERN_ERR "%s: ide_intr: huh? expected NULL handler " > - "on exit\n", drive->name); > - } > + if (!elv_queue_empty(drive->queue)) > + blk_plug_device(drive->queue); This is perhaps not exactly what you really want. It basically means that there will be a delay (q->unplug_delay) after each command which may well hurt I/O performance. Instead, I'd suggest something like the following: if (!elv_queue_empty(drive->queue)) blk_schedule_queue_run(drive->queue); and a function void blk_schedule_queue_run(struct request_queue *q) { blk_plug_device(q); kblockd_schedule_work(&q->unplug_work); } in blk-core.c. This can also be used as a helper function in blk-core.c itself. Regards, Elias ^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] ide: don't execute the next queued command from the hard-IRQ context 2008-10-12 18:02 ` Elias Oltmanns @ 2008-10-15 19:06 ` Bartlomiej Zolnierkiewicz 2008-10-22 15:01 ` Elias Oltmanns 0 siblings, 1 reply; 5+ messages in thread From: Bartlomiej Zolnierkiewicz @ 2008-10-15 19:06 UTC (permalink / raw) To: Elias Oltmanns; +Cc: linux-kernel, linux-ide On Sunday 12 October 2008, Elias Oltmanns wrote: > Bartlomiej Zolnierkiewicz <bzolnier@gmail.com> wrote: > > From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com> > > Subject: [PATCH] ide: don't execute the next queued command from the hard-IRQ context > > > > * Tell the block layer that we are not done handling requests by using > > blk_plug_device() in ide_do_request() (request handling function) > > and ide_timer_expiry() (timeout handler) if the queue is not empty. > > > > * Remove optimization which directly calls ide_do_request() for the next > > queued command from the ide_intr() (IRQ handler) and ide_timer_expiry(). > > > > * Remove no longer needed IRQ masking from ide_do_request() - in case of > > IDE ports needing serialization disable_irq_nosync()/enable_irq() was > > used for the (possibly shared) IRQ of the other IDE port. > > > > * Put the misplaced comment in the right place in ide_do_request(). > > > > * Drop no longer needed 'int masked_irq' argument from ide_do_request(). > > > > * Merge ide_do_request() into do_ide_request(). > > > > * Remove no longer needed IDE_NO_IRQ define. > > > > While at it: > > > > * Don't use HWGROUP() macro in do_ide_request(). > > > > * Use __func__ in ide_intr(). > > > > This patch reduces IRQ hadling latency for IDE and improves the system-wide > > handling of shared IRQs (which should result in more timeout resistant and > > stable IDE systems). It also makes it possible to do some further changes > > later (i.e. replace some busy-waiting delays with sleeping equivalents). > > > > Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com> > > --- > > on top of per-hwgroup locks patch and with a special dedication to people > > complaining about improving IDE ;) > > Some comments / questions. Admittedly, I don't always know enough about > the things I'm talking about here, but I'm hoping to learn something > that way ;-). > > [...] > > Index: b/drivers/ide/ide-io.c > > =================================================================== > > --- a/drivers/ide/ide-io.c > > +++ b/drivers/ide/ide-io.c > [...] > > @@ -999,8 +1001,11 @@ static void ide_do_request (ide_hwgroup_ > > } > > > > /* no more work for this hwgroup (for now) */ > > - return; > > + goto plug_device; > > } > > + > > + if (drive != orig_drive) > > + goto plug_device; > > again: > > hwif = HWIF(drive); > > Didn't you want to get rid of HWIF() too? Fixed. > > if (hwgroup->hwif->sharing_irq && hwif != hwgroup->hwif) { > > @@ -1055,41 +1060,27 @@ static void ide_do_request (ide_hwgroup_ > > goto again; > > /* We clear busy, there should be no pending ATA command at this point. */ > > hwgroup->busy = 0; > > - break; > > + goto plug_device; > > } > > > > hwgroup->rq = rq; > > > > - /* > > - * Some systems have trouble with IDE IRQs arriving while > > - * the driver is still setting things up. So, here we disable > > - * the IRQ used by this interface while the request is being started. > > - * This may look bad at first, but pretty much the same thing > > - * happens anyway when any interrupt comes in, IDE or otherwise > > - * -- the kernel masks the IRQ while it is being handled. > > - */ > > - if (masked_irq != IDE_NO_IRQ && hwif->irq != masked_irq) > > - disable_irq_nosync(hwif->irq); > > spin_unlock(&hwgroup->lock); > > + /* allow other IRQs while we start this request */ > > local_irq_enable_in_hardirq(); > > - /* allow other IRQs while we start this request */ > > That's the part I don't understand completely. Wouldn't it be alright to > do just spin_unlock_irq() here and be done with it? SCSI does exactly > that and as far as I can see, IDE is in a similar situation now that the > ->request_fn() is not called from ide_intr() and ide_timer_expiry() > anymore, i.e. the ->request_fn() will always be executed in process > context. Fixed. > > startstop = start_request(drive, rq); > > spin_lock_irq(&hwgroup->lock); > > - if (masked_irq != IDE_NO_IRQ && hwif->irq != masked_irq) > > - enable_irq(hwif->irq); > > - if (startstop == ide_stopped) > > + > > + if (startstop == ide_stopped) { > > hwgroup->busy = 0; > > + goto plug_device; > > This goto statement is wrong. Simply set ->busy to zero and be done with > it. This way, the loop will start again and either elv_next_request() > returns NULL, in which case the queue need not be plugged, or the next > request will be processed immediately, which is exactly what we want. The problem is that the next loop can choose the different drive than the current one so we can end up with the situation where we will "lose" the blk_plug_device() call. I fixed it by just inlining "plug_device" code for now. > [...] > > @@ -1438,11 +1431,11 @@ irqreturn_t ide_intr (int irq, void *dev > > if (startstop == ide_stopped) { > > if (hwgroup->handler == NULL) { /* paranoia */ > > hwgroup->busy = 0; > > - ide_do_request(hwgroup, hwif->irq); > > - } else { > > - printk(KERN_ERR "%s: ide_intr: huh? expected NULL handler " > > - "on exit\n", drive->name); > > - } > > + if (!elv_queue_empty(drive->queue)) > > + blk_plug_device(drive->queue); > > This is perhaps not exactly what you really want. It basically means > that there will be a delay (q->unplug_delay) after each command which > may well hurt I/O performance. Instead, I'd suggest something like the > following: > > if (!elv_queue_empty(drive->queue)) > blk_schedule_queue_run(drive->queue); > > and a function > > void blk_schedule_queue_run(struct request_queue *q) > { > blk_plug_device(q); > kblockd_schedule_work(&q->unplug_work); > } > > in blk-core.c. This can also be used as a helper function in blk-core.c > itself. Care to make a patch adding such helper to blk-core.c? I'll update this patch to use it then, in the meantime v1->v2 interdiff: ... v2: Changes per review from Elias Oltmanns: - fix wrong goto statement in 'if (startstop == ide_stopped)' block - use spin_unlock_irq() - don't use obsolete HWIF() macro ... diff -u b/drivers/ide/ide-io.c b/drivers/ide/ide-io.c --- b/drivers/ide/ide-io.c +++ b/drivers/ide/ide-io.c @@ -1006,8 +1006,9 @@ if (drive != orig_drive) goto plug_device; - again: - hwif = HWIF(drive); +again: + hwif = drive->hwif; + if (hwgroup->hwif->sharing_irq && hwif != hwgroup->hwif) { /* * set nIEN for previous hwif, drives in the @@ -1065,15 +1066,14 @@ hwgroup->rq = rq; - spin_unlock(&hwgroup->lock); - /* allow other IRQs while we start this request */ - local_irq_enable_in_hardirq(); + spin_unlock_irq(&hwgroup->lock); startstop = start_request(drive, rq); spin_lock_irq(&hwgroup->lock); if (startstop == ide_stopped) { hwgroup->busy = 0; - goto plug_device; + if (!elv_queue_empty(orig_drive->queue)) + blk_plug_device(orig_drive->queue); } } return; ^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] ide: don't execute the next queued command from the hard-IRQ context 2008-10-15 19:06 ` Bartlomiej Zolnierkiewicz @ 2008-10-22 15:01 ` Elias Oltmanns 2008-10-22 20:47 ` Bartlomiej Zolnierkiewicz 0 siblings, 1 reply; 5+ messages in thread From: Elias Oltmanns @ 2008-10-22 15:01 UTC (permalink / raw) To: Bartlomiej Zolnierkiewicz; +Cc: linux-kernel, linux-ide Bartlomiej Zolnierkiewicz <bzolnier@gmail.com> wrote: > On Sunday 12 October 2008, Elias Oltmanns wrote: >> Bartlomiej Zolnierkiewicz <bzolnier@gmail.com> wrote: > >> > From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com> >> > Subject: [PATCH] ide: don't execute the next queued command from the hard-IRQ context >> > >> > * Tell the block layer that we are not done handling requests by using >> > blk_plug_device() in ide_do_request() (request handling function) >> > and ide_timer_expiry() (timeout handler) if the queue is not empty. >> > >> > * Remove optimization which directly calls ide_do_request() for the next >> > queued command from the ide_intr() (IRQ handler) and ide_timer_expiry(). >> > >> > * Remove no longer needed IRQ masking from ide_do_request() - in case of >> > IDE ports needing serialization disable_irq_nosync()/enable_irq() was >> > used for the (possibly shared) IRQ of the other IDE port. >> > >> > * Put the misplaced comment in the right place in ide_do_request(). >> > >> > * Drop no longer needed 'int masked_irq' argument from ide_do_request(). >> > >> > * Merge ide_do_request() into do_ide_request(). >> > >> > * Remove no longer needed IDE_NO_IRQ define. >> > >> > While at it: >> > >> > * Don't use HWGROUP() macro in do_ide_request(). >> > >> > * Use __func__ in ide_intr(). >> > >> > This patch reduces IRQ hadling latency for IDE and improves the system-wide >> > handling of shared IRQs (which should result in more timeout resistant and >> > stable IDE systems). It also makes it possible to do some further changes >> > later (i.e. replace some busy-waiting delays with sleeping equivalents). >> > >> > Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com> >> > --- [...] >> > Index: b/drivers/ide/ide-io.c >> > =================================================================== >> > --- a/drivers/ide/ide-io.c >> > +++ b/drivers/ide/ide-io.c [...] >> > startstop = start_request(drive, rq); >> > spin_lock_irq(&hwgroup->lock); >> > - if (masked_irq != IDE_NO_IRQ && hwif->irq != masked_irq) >> > - enable_irq(hwif->irq); >> > - if (startstop == ide_stopped) >> > + >> > + if (startstop == ide_stopped) { >> > hwgroup->busy = 0; >> > + goto plug_device; >> >> This goto statement is wrong. Simply set ->busy to zero and be done with >> it. This way, the loop will start again and either elv_next_request() >> returns NULL, in which case the queue need not be plugged, or the next >> request will be processed immediately, which is exactly what we want. > > The problem is that the next loop can choose the different drive than > the current one so we can end up with the situation where we will "lose" > the blk_plug_device() call. > > I fixed it by just inlining "plug_device" code for now. Right, I had missed that. Still, I'm not really convinced yet that this is the right way to handle things. In fact, I believe that the role of choose_drive() has changed since it is now called directly from the ->request_fn() and it should probably be rewritten and renamed along the way. However, this needs further discussion and the issue below has some bearing on this too. > >> [...] >> > @@ -1438,11 +1431,11 @@ irqreturn_t ide_intr (int irq, void *dev >> > if (startstop == ide_stopped) { >> > if (hwgroup->handler == NULL) { /* paranoia */ >> > hwgroup->busy = 0; >> > - ide_do_request(hwgroup, hwif->irq); >> > - } else { >> > - printk(KERN_ERR "%s: ide_intr: huh? expected NULL handler " >> > - "on exit\n", drive->name); >> > - } >> > + if (!elv_queue_empty(drive->queue)) >> > + blk_plug_device(drive->queue); >> >> This is perhaps not exactly what you really want. It basically means >> that there will be a delay (q->unplug_delay) after each command which >> may well hurt I/O performance. Instead, I'd suggest something like the >> following: >> >> if (!elv_queue_empty(drive->queue)) >> blk_schedule_queue_run(drive->queue); >> >> and a function >> >> void blk_schedule_queue_run(struct request_queue *q) >> { >> blk_plug_device(q); >> kblockd_schedule_work(&q->unplug_work); >> } >> >> in blk-core.c. This can also be used as a helper function in blk-core.c >> itself. > > Care to make a patch adding such helper to blk-core.c? Thinking about this a bit more, I'd like to raise this issue with Jens and discuss it in some more generality. Regards, Elias ^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] ide: don't execute the next queued command from the hard-IRQ context 2008-10-22 15:01 ` Elias Oltmanns @ 2008-10-22 20:47 ` Bartlomiej Zolnierkiewicz 0 siblings, 0 replies; 5+ messages in thread From: Bartlomiej Zolnierkiewicz @ 2008-10-22 20:47 UTC (permalink / raw) To: Elias Oltmanns; +Cc: linux-kernel, linux-ide On Wednesday 22 October 2008, Elias Oltmanns wrote: > Bartlomiej Zolnierkiewicz <bzolnier@gmail.com> wrote: > > On Sunday 12 October 2008, Elias Oltmanns wrote: > >> Bartlomiej Zolnierkiewicz <bzolnier@gmail.com> wrote: > > > >> > From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com> > >> > Subject: [PATCH] ide: don't execute the next queued command from the hard-IRQ context > >> > > >> > * Tell the block layer that we are not done handling requests by using > >> > blk_plug_device() in ide_do_request() (request handling function) > >> > and ide_timer_expiry() (timeout handler) if the queue is not empty. > >> > > >> > * Remove optimization which directly calls ide_do_request() for the next > >> > queued command from the ide_intr() (IRQ handler) and ide_timer_expiry(). > >> > > >> > * Remove no longer needed IRQ masking from ide_do_request() - in case of > >> > IDE ports needing serialization disable_irq_nosync()/enable_irq() was > >> > used for the (possibly shared) IRQ of the other IDE port. > >> > > >> > * Put the misplaced comment in the right place in ide_do_request(). > >> > > >> > * Drop no longer needed 'int masked_irq' argument from ide_do_request(). > >> > > >> > * Merge ide_do_request() into do_ide_request(). > >> > > >> > * Remove no longer needed IDE_NO_IRQ define. > >> > > >> > While at it: > >> > > >> > * Don't use HWGROUP() macro in do_ide_request(). > >> > > >> > * Use __func__ in ide_intr(). > >> > > >> > This patch reduces IRQ hadling latency for IDE and improves the system-wide > >> > handling of shared IRQs (which should result in more timeout resistant and > >> > stable IDE systems). It also makes it possible to do some further changes > >> > later (i.e. replace some busy-waiting delays with sleeping equivalents). > >> > > >> > Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com> > >> > --- > [...] > >> > Index: b/drivers/ide/ide-io.c > >> > =================================================================== > >> > --- a/drivers/ide/ide-io.c > >> > +++ b/drivers/ide/ide-io.c > [...] > >> > startstop = start_request(drive, rq); > >> > spin_lock_irq(&hwgroup->lock); > >> > - if (masked_irq != IDE_NO_IRQ && hwif->irq != masked_irq) > >> > - enable_irq(hwif->irq); > >> > - if (startstop == ide_stopped) > >> > + > >> > + if (startstop == ide_stopped) { > >> > hwgroup->busy = 0; > >> > + goto plug_device; > >> > >> This goto statement is wrong. Simply set ->busy to zero and be done with > >> it. This way, the loop will start again and either elv_next_request() > >> returns NULL, in which case the queue need not be plugged, or the next > >> request will be processed immediately, which is exactly what we want. > > > > The problem is that the next loop can choose the different drive than > > the current one so we can end up with the situation where we will "lose" > > the blk_plug_device() call. > > > > I fixed it by just inlining "plug_device" code for now. > > Right, I had missed that. Still, I'm not really convinced yet that this > is the right way to handle things. In fact, I believe that the role of > choose_drive() has changed since it is now called directly from the > ->request_fn() and it should probably be rewritten and renamed along the > way. However, this needs further discussion and the issue below has some > bearing on this too. Well, in my patch to use per-device request queue lock I was just going to remove choose_drive() since it should be handled at the block layer (probably using per-queue io priorites if needed). --- current draft patch just to visualize the idea drivers/ide/ide-io.c | 172 +++++++++++++++--------------------------------- drivers/ide/ide-probe.c | 3 include/linux/ide.h | 2 3 files changed, 55 insertions(+), 122 deletions(-) Index: b/drivers/ide/ide-io.c =================================================================== --- a/drivers/ide/ide-io.c +++ b/drivers/ide/ide-io.c @@ -828,85 +828,10 @@ void ide_stall_queue (ide_drive_t *drive drive->sleep = timeout + jiffies; drive->dev_flags |= IDE_DFLAG_SLEEPING; } - EXPORT_SYMBOL(ide_stall_queue); -#define WAKEUP(drive) ((drive)->service_start + 2 * (drive)->service_time) - -/** - * choose_drive - select a drive to service - * @hwgroup: hardware group to select on - * - * choose_drive() selects the next drive which will be serviced. - * This is necessary because the IDE layer can't issue commands - * to both drives on the same cable, unlike SCSI. - */ - -static inline ide_drive_t *choose_drive (ide_hwgroup_t *hwgroup) -{ - ide_drive_t *drive, *best; - -repeat: - best = NULL; - drive = hwgroup->drive; - - /* - * drive is doing pre-flush, ordered write, post-flush sequence. even - * though that is 3 requests, it must be seen as a single transaction. - * we must not preempt this drive until that is complete - */ - if (blk_queue_flushing(drive->queue)) { - /* - * small race where queue could get replugged during - * the 3-request flush cycle, just yank the plug since - * we want it to finish asap - */ - blk_remove_plug(drive->queue); - return drive; - } - - do { - u8 dev_s = !!(drive->dev_flags & IDE_DFLAG_SLEEPING); - u8 best_s = (best && !!(best->dev_flags & IDE_DFLAG_SLEEPING)); - - if ((dev_s == 0 || time_after_eq(jiffies, drive->sleep)) && - !elv_queue_empty(drive->queue)) { - if (best == NULL || - (dev_s && (best_s == 0 || time_before(drive->sleep, best->sleep))) || - (best_s == 0 && time_before(WAKEUP(drive), WAKEUP(best)))) { - if (!blk_queue_plugged(drive->queue)) - best = drive; - } - } - } while ((drive = drive->next) != hwgroup->drive); - - if (best && (best->dev_flags & IDE_DFLAG_NICE1) && - (best->dev_flags & IDE_DFLAG_SLEEPING) == 0 && - best != hwgroup->drive && best->service_time > WAIT_MIN_SLEEP) { - long t = (signed long)(WAKEUP(best) - jiffies); - if (t >= WAIT_MIN_SLEEP) { - /* - * We *may* have some time to spare, but first let's see if - * someone can potentially benefit from our nice mood today.. - */ - drive = best->next; - do { - if ((drive->dev_flags & IDE_DFLAG_SLEEPING) == 0 - && time_before(jiffies - best->service_time, WAKEUP(drive)) - && time_before(WAKEUP(drive), jiffies + t)) - { - ide_stall_queue(best, min_t(long, t, 10 * WAIT_MIN_SLEEP)); - goto repeat; - } - } while ((drive = drive->next) != best); - } - } - return best; -} - /* * Issue a new request to a drive from hwgroup - * Caller must have already done spin_lock_irqsave(&hwgroup->lock, ..); * * A hwgroup is a serialized group of IDE interfaces. Usually there is * exactly one hwif (interface) per hwgroup, but buggy controllers (eg. CMD640) @@ -941,13 +866,15 @@ repeat: */ void do_ide_request(struct request_queue *q) { - ide_drive_t *orig_drive = q->queuedata; - ide_hwgroup_t *hwgroup = orig_drive->hwif->hwgroup; - ide_drive_t *drive; + ide_drive_t *drive = q->queuedata; + ide_hwgroup_t *hwgroup = drive->hwif->hwgroup; ide_hwif_t *hwif; struct request *rq; ide_startstop_t startstop; + spin_unlock_irq(q->queue_lock); + spin_lock_irq(&hwgroup->lock); + /* for atari only: POSSIBLY BROKEN HERE(?) */ ide_get_lock(ide_intr, hwgroup); @@ -956,21 +883,13 @@ void do_ide_request(struct request_queue while (!hwgroup->busy) { hwgroup->busy = 1; - drive = choose_drive(hwgroup); - if (drive == NULL) { - int sleeping = 0; - unsigned long sleep = 0; /* shut up, gcc */ + + if (1) { hwgroup->rq = NULL; - drive = hwgroup->drive; - do { - if ((drive->dev_flags & IDE_DFLAG_SLEEPING) && - (sleeping == 0 || - time_before(drive->sleep, sleep))) { - sleeping = 1; - sleep = drive->sleep; - } - } while ((drive = drive->next) != hwgroup->drive); - if (sleeping) { + + if (drive->dev_flags & IDE_DFLAG_SLEEPING) { + unsigned long sleep = drive->sleep; + /* * Take a short snooze, and then wake up this hwgroup again. * This gives other hwgroups on the same a chance to @@ -989,23 +908,12 @@ void do_ide_request(struct request_queue mod_timer(&hwgroup->timer, sleep); /* we purposely leave hwgroup->busy==1 * while sleeping */ - } else { - /* Ugly, but how can we sleep for the lock - * otherwise? perhaps from tq_disk? - */ - /* for atari only */ - ide_release_lock(); - hwgroup->busy = 0; + /* no more work for this hwgroup (for now) */ + goto plug_device; } - - /* no more work for this hwgroup (for now) */ - goto plug_device; } - if (drive != orig_drive) - goto plug_device; - hwif = drive->hwif; if (hwgroup->hwif->sharing_irq && hwif != hwgroup->hwif) { @@ -1019,13 +927,17 @@ void do_ide_request(struct request_queue hwgroup->hwif = hwif; hwgroup->drive = drive; drive->dev_flags &= ~(IDE_DFLAG_SLEEPING | IDE_DFLAG_PARKED); - drive->service_start = jiffies; + spin_unlock_irq(&hwgroup->lock); + spin_lock_irq(q->queue_lock); /* * we know that the queue isn't empty, but this can happen * if the q->prep_rq_fn() decides to kill a request */ rq = elv_next_request(drive->queue); + spin_unlock_irq(q->queue_lock); + spin_lock_irq(&hwgroup->lock); + if (!rq) { hwgroup->busy = 0; break; @@ -1060,15 +972,21 @@ void do_ide_request(struct request_queue if (startstop == ide_stopped) { hwgroup->busy = 0; - if (!elv_queue_empty(orig_drive->queue)) - blk_plug_device(orig_drive->queue); + /* give other devices a chance */ + goto plug_device; } } + + spin_unlock_irq(&hwgroup->lock); + spin_lock_irq(q->queue_lock); return; plug_device: - if (!elv_queue_empty(orig_drive->queue)) - blk_plug_device(orig_drive->queue); + spin_unlock_irq(&hwgroup->lock); + spin_lock_irq(q->queue_lock); + + if (!elv_queue_empty(q)) + blk_plug_device(q); } /* @@ -1129,6 +1047,17 @@ out: return ret; } +static void ide_plug_device(ide_drive_t *drive) +{ + struct request_queue *q = drive->queue; + unsigned long flags; + + spin_lock_irqsave(q->queue_lock, flags); + if (!elv_queue_empty(q)) + blk_plug_device(q); + spin_unlock_irqrestore(q->queue_lock, flags); +} + /** * ide_timer_expiry - handle lack of an IDE interrupt * @data: timer callback magic (hwgroup) @@ -1146,10 +1075,12 @@ out: void ide_timer_expiry (unsigned long data) { ide_hwgroup_t *hwgroup = (ide_hwgroup_t *) data; + ide_drive_t *uninitialized_var(drive); ide_handler_t *handler; ide_expiry_t *expiry; unsigned long flags; unsigned long wait = -1; + int plug_device = 0; spin_lock_irqsave(&hwgroup->lock, flags); @@ -1166,7 +1097,7 @@ void ide_timer_expiry (unsigned long dat hwgroup->busy = 0; } } else { - ide_drive_t *drive = hwgroup->drive; + drive = hwgroup->drive; if (!drive) { printk(KERN_ERR "ide_timer_expiry: hwgroup->drive was NULL\n"); hwgroup->handler = NULL; @@ -1217,17 +1148,19 @@ void ide_timer_expiry (unsigned long dat ide_error(drive, "irq timeout", hwif->tp_ops->read_status(hwif)); } - drive->service_time = jiffies - drive->service_start; + spin_lock_irq(&hwgroup->lock); enable_irq(hwif->irq); if (startstop == ide_stopped) { hwgroup->busy = 0; - if (!elv_queue_empty(drive->queue)) - blk_plug_device(drive->queue); + plug_device = 1; } } } spin_unlock_irqrestore(&hwgroup->lock, flags); + + if (plug_device) + ide_plug_device(drive); } /** @@ -1321,10 +1254,11 @@ irqreturn_t ide_intr (int irq, void *dev unsigned long flags; ide_hwgroup_t *hwgroup = (ide_hwgroup_t *)dev_id; ide_hwif_t *hwif = hwgroup->hwif; - ide_drive_t *drive; + ide_drive_t *uninitialized_var(drive); ide_handler_t *handler; ide_startstop_t startstop; irqreturn_t irq_ret = IRQ_NONE; + int plug_device = 0; spin_lock_irqsave(&hwgroup->lock, flags); @@ -1415,12 +1349,10 @@ irqreturn_t ide_intr (int irq, void *dev * same irq as is currently being serviced here, and Linux * won't allow another of the same (on any CPU) until we return. */ - drive->service_time = jiffies - drive->service_start; if (startstop == ide_stopped) { if (hwgroup->handler == NULL) { /* paranoia */ hwgroup->busy = 0; - if (!elv_queue_empty(drive->queue)) - blk_plug_device(drive->queue); + plug_device = 1; } else printk(KERN_ERR "%s: %s: huh? expected NULL handler " "on exit\n", __func__, drive->name); @@ -1429,6 +1361,10 @@ out_handled: irq_ret = IRQ_HANDLED; out: spin_unlock_irqrestore(&hwgroup->lock, flags); + + if (plug_device) + ide_plug_device(drive); + return irq_ret; } Index: b/drivers/ide/ide-probe.c =================================================================== --- a/drivers/ide/ide-probe.c +++ b/drivers/ide/ide-probe.c @@ -905,8 +905,7 @@ static int ide_init_queue(ide_drive_t *d * do not. */ - q = blk_init_queue_node(do_ide_request, &hwif->hwgroup->lock, - hwif_to_node(hwif)); + q = blk_init_queue_node(do_ide_request, NULL, hwif_to_node(hwif)); if (!q) return 1; Index: b/include/linux/ide.h =================================================================== --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -605,8 +605,6 @@ struct ide_drive_s { unsigned long dev_flags; unsigned long sleep; /* sleep until this time */ - unsigned long service_start; /* time we started last request */ - unsigned long service_time; /* service time of last request */ unsigned long timeout; /* max time to wait for irq */ special_t special; /* special action flags */ ^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2008-10-22 20:49 UTC | newest] Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2008-10-11 14:17 [PATCH] ide: don't execute the next queued command from the hard-IRQ context Bartlomiej Zolnierkiewicz 2008-10-12 18:02 ` Elias Oltmanns 2008-10-15 19:06 ` Bartlomiej Zolnierkiewicz 2008-10-22 15:01 ` Elias Oltmanns 2008-10-22 20:47 ` Bartlomiej Zolnierkiewicz
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).