LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
* [PATCH 2.6.21-rc1 2/5] ehca: fix race condition/locking issues in scaling code
@ 2007-02-15 16:07 Hoang-Nam Nguyen
  0 siblings, 0 replies; 2+ messages in thread
From: Hoang-Nam Nguyen @ 2007-02-15 16:07 UTC (permalink / raw)
  To: Roland Dreier, linux-kernel, linuxppc-dev, openib-general, hch, bos
  Cc: raisch, heiko.carstens

fix a race condition in find_next_cpu_online() and some
other locking issues in scaling code


Signed-off-by: Hoang-Nam Nguyen <hnguyen@de.ibm.com>
---


 ehca_irq.c |   68 +++++++++++++++++++++++++++++--------------------------------
 1 files changed, 33 insertions(+), 35 deletions(-)


diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c
index b923b5d..9679b07 100644
--- a/drivers/infiniband/hw/ehca/ehca_irq.c
+++ b/drivers/infiniband/hw/ehca/ehca_irq.c
@@ -544,28 +544,30 @@ void ehca_tasklet_eq(unsigned long data)
 
 static inline int find_next_online_cpu(struct ehca_comp_pool* pool)
 {
-	unsigned long flags_last_cpu;
+	int cpu;
+	unsigned long flags;
 
+	WARN_ON_ONCE(!in_interrupt());
 	if (ehca_debug_level)
 		ehca_dmp(&cpu_online_map, sizeof(cpumask_t), "");
 
-	spin_lock_irqsave(&pool->last_cpu_lock, flags_last_cpu);
-	pool->last_cpu = next_cpu(pool->last_cpu, cpu_online_map);
-	if (pool->last_cpu == NR_CPUS)
-		pool->last_cpu = first_cpu(cpu_online_map);
-	spin_unlock_irqrestore(&pool->last_cpu_lock, flags_last_cpu);
+	spin_lock_irqsave(&pool->last_cpu_lock, flags);
+	cpu = next_cpu(pool->last_cpu, cpu_online_map);
+	if (cpu == NR_CPUS)
+		cpu = first_cpu(cpu_online_map);
+	pool->last_cpu = cpu;
+	spin_unlock_irqrestore(&pool->last_cpu_lock, flags);
 
-	return pool->last_cpu;
+	return cpu;
 }
 
 static void __queue_comp_task(struct ehca_cq *__cq,
 			      struct ehca_cpu_comp_task *cct)
 {
-	unsigned long flags_cct;
-	unsigned long flags_cq;
+	unsigned long flags;
 
-	spin_lock_irqsave(&cct->task_lock, flags_cct);
-	spin_lock_irqsave(&__cq->task_lock, flags_cq);
+	spin_lock_irqsave(&cct->task_lock, flags);
+	spin_lock(&__cq->task_lock);
 
 	if (__cq->nr_callbacks == 0) {
 		__cq->nr_callbacks++;
@@ -576,8 +578,8 @@ static void __queue_comp_task(struct ehc
 	else
 		__cq->nr_callbacks++;
 
-	spin_unlock_irqrestore(&__cq->task_lock, flags_cq);
-	spin_unlock_irqrestore(&cct->task_lock, flags_cct);
+	spin_unlock(&__cq->task_lock);
+	spin_unlock_irqrestore(&cct->task_lock, flags);
 }
 
 static void queue_comp_task(struct ehca_cq *__cq)
@@ -588,69 +590,69 @@ static void queue_comp_task(struct ehca_
 
 	cpu = get_cpu();
 	cpu_id = find_next_online_cpu(pool);
-
 	BUG_ON(!cpu_online(cpu_id));
 
 	cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id);
+	BUG_ON(!cct);
 
 	if (cct->cq_jobs > 0) {
 		cpu_id = find_next_online_cpu(pool);
 		cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id);
+		BUG_ON(!cct);
 	}
 
 	__queue_comp_task(__cq, cct);
-
-	put_cpu();
-
-	return;
 }
 
 static void run_comp_task(struct ehca_cpu_comp_task* cct)
 {
 	struct ehca_cq *cq;
-	unsigned long flags_cct;
-	unsigned long flags_cq;
+	unsigned long flags;
 
-	spin_lock_irqsave(&cct->task_lock, flags_cct);
+	spin_lock_irqsave(&cct->task_lock, flags);
 
 	while (!list_empty(&cct->cq_list)) {
 		cq = list_entry(cct->cq_list.next, struct ehca_cq, entry);
-		spin_unlock_irqrestore(&cct->task_lock, flags_cct);
+		spin_unlock_irqrestore(&cct->task_lock, flags);
 		comp_event_callback(cq);
-		spin_lock_irqsave(&cct->task_lock, flags_cct);
+		spin_lock_irqsave(&cct->task_lock, flags);
 
-		spin_lock_irqsave(&cq->task_lock, flags_cq);
+		spin_lock(&cq->task_lock);
 		cq->nr_callbacks--;
 		if (cq->nr_callbacks == 0) {
 			list_del_init(cct->cq_list.next);
 			cct->cq_jobs--;
 		}
-		spin_unlock_irqrestore(&cq->task_lock, flags_cq);
-
+		spin_unlock(&cq->task_lock);
 	}
 
-	spin_unlock_irqrestore(&cct->task_lock, flags_cct);
-
-	return;
+	spin_unlock_irqrestore(&cct->task_lock, flags);
 }
 
 static int comp_task(void *__cct)
 {
 	struct ehca_cpu_comp_task* cct = __cct;
+	int cql_empty;
 	DECLARE_WAITQUEUE(wait, current);
 
 	set_current_state(TASK_INTERRUPTIBLE);
 	while(!kthread_should_stop()) {
 		add_wait_queue(&cct->wait_queue, &wait);
 
-		if (list_empty(&cct->cq_list))
+		spin_lock_irq(&cct->task_lock);
+		cql_empty = list_empty(&cct->cq_list);
+		spin_unlock_irq(&cct->task_lock);
+		if (cql_empty)
 			schedule();
 		else
 			__set_current_state(TASK_RUNNING);
 
 		remove_wait_queue(&cct->wait_queue, &wait);
 
-		if (!list_empty(&cct->cq_list))
+		spin_lock_irq(&cct->task_lock);
+		cql_empty = list_empty(&cct->cq_list);
+		spin_unlock_irq(&cct->task_lock);
+		if (!cql_empty)
 			run_comp_task(__cct);
 
 		set_current_state(TASK_INTERRUPTIBLE);
@@ -693,8 +695,6 @@ static void destroy_comp_task(struct ehc
 
 	if (task)
 		kthread_stop(task);
-
-	return;
 }
 
 static void take_over_work(struct ehca_comp_pool *pool,
@@ -815,6 +815,4 @@ void ehca_destroy_comp_pool(void)
 	free_percpu(pool->cpu_comp_tasks);
 	kfree(pool);
 #endif
-
-	return;
 }


^ permalink raw reply	[flat|nested] 2+ messages in thread
* [PATCH 2.6.21-rc1 2/5] ehca: fix race condition/locking issues in scaling code
@ 2007-02-14 16:41 Hoang-Nam Nguyen
  0 siblings, 0 replies; 2+ messages in thread
From: Hoang-Nam Nguyen @ 2007-02-14 16:41 UTC (permalink / raw)
  To: Roland Dreier, linux-kernel, linuxppc-dev, openib-general, hch
  Cc: raisch, h.carstens

Hi,
this patch fixes a race condition in find_next_cpu_online() and some
other locking issues in scaling code.
Thanks
Nam


Signed-off-by: Hoang-Nam Nguyen <hnguyen@de.ibm.com>
---


 ehca_irq.c |   68 +++++++++++++++++++++++++++++--------------------------------
 1 files changed, 33 insertions(+), 35 deletions(-)


diff -Nurp infiniband_orig/drivers/infiniband/hw/ehca/ehca_irq.c infiniband_work/drivers/infiniband/hw/ehca/ehca_irq.c
--- infiniband_orig/drivers/infiniband/hw/ehca/ehca_irq.c       2007-02-14 14:16:45.000000000 +0100
+++ infiniband_work/drivers/infiniband/hw/ehca/ehca_irq.c       2007-02-14 14:16:35.000000000 +0100
@@ -544,28 +544,30 @@ void ehca_tasklet_eq(unsigned long data)

 static inline int find_next_online_cpu(struct ehca_comp_pool* pool)
 {
-       unsigned long flags_last_cpu;
+       int cpu;
+       unsigned long flags;

+       WARN_ON_ONCE(!in_interrupt());
        if (ehca_debug_level)
                ehca_dmp(&cpu_online_map, sizeof(cpumask_t), "");

-       spin_lock_irqsave(&pool->last_cpu_lock, flags_last_cpu);
-       pool->last_cpu = next_cpu(pool->last_cpu, cpu_online_map);
-       if (pool->last_cpu == NR_CPUS)
-               pool->last_cpu = first_cpu(cpu_online_map);
-       spin_unlock_irqrestore(&pool->last_cpu_lock, flags_last_cpu);
+       spin_lock_irqsave(&pool->last_cpu_lock, flags);
+       cpu = next_cpu(pool->last_cpu, cpu_online_map);
+       if (cpu == NR_CPUS)
+               cpu = first_cpu(cpu_online_map);
+       pool->last_cpu = cpu;
+       spin_unlock_irqrestore(&pool->last_cpu_lock, flags);

-       return pool->last_cpu;
+       return cpu;
 }

 static void __queue_comp_task(struct ehca_cq *__cq,
                              struct ehca_cpu_comp_task *cct)
 {
-       unsigned long flags_cct;
-       unsigned long flags_cq;
+       unsigned long flags;

-       spin_lock_irqsave(&cct->task_lock, flags_cct);
-       spin_lock_irqsave(&__cq->task_lock, flags_cq);
+       spin_lock_irqsave(&cct->task_lock, flags);
+       spin_lock(&__cq->task_lock);

        if (__cq->nr_callbacks == 0) {
                __cq->nr_callbacks++;
@@ -576,8 +578,8 @@ static void __queue_comp_task(struct ehc
        else
                __cq->nr_callbacks++;

-       spin_unlock_irqrestore(&__cq->task_lock, flags_cq);
-       spin_unlock_irqrestore(&cct->task_lock, flags_cct);
+       spin_unlock(&__cq->task_lock);
+       spin_unlock_irqrestore(&cct->task_lock, flags);
 }

 static void queue_comp_task(struct ehca_cq *__cq)
@@ -588,69 +590,69 @@ static void queue_comp_task(struct ehca_

        cpu = get_cpu();
        cpu_id = find_next_online_cpu(pool);
-
        BUG_ON(!cpu_online(cpu_id));

        cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id);
+       BUG_ON(!cct);

        if (cct->cq_jobs > 0) {
                cpu_id = find_next_online_cpu(pool);
                cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id);
+               BUG_ON(!cct);
        }

        __queue_comp_task(__cq, cct);
-
-       put_cpu();
-
-       return;
 }

 static void run_comp_task(struct ehca_cpu_comp_task* cct)
 {
        struct ehca_cq *cq;
-       unsigned long flags_cct;
-       unsigned long flags_cq;
+       unsigned long flags;

-       spin_lock_irqsave(&cct->task_lock, flags_cct);
+       spin_lock_irqsave(&cct->task_lock, flags);

        while (!list_empty(&cct->cq_list)) {
                cq = list_entry(cct->cq_list.next, struct ehca_cq, entry);
-               spin_unlock_irqrestore(&cct->task_lock, flags_cct);
+               spin_unlock_irqrestore(&cct->task_lock, flags);
                comp_event_callback(cq);
-               spin_lock_irqsave(&cct->task_lock, flags_cct);
+               spin_lock_irqsave(&cct->task_lock, flags);

-               spin_lock_irqsave(&cq->task_lock, flags_cq);
+               spin_lock(&cq->task_lock);
                cq->nr_callbacks--;
                if (cq->nr_callbacks == 0) {
                        list_del_init(cct->cq_list.next);
                        cct->cq_jobs--;
                }
-               spin_unlock_irqrestore(&cq->task_lock, flags_cq);
-
+               spin_unlock(&cq->task_lock);
        }

-       spin_unlock_irqrestore(&cct->task_lock, flags_cct);
-
-       return;
+       spin_unlock_irqrestore(&cct->task_lock, flags);
 }

 static int comp_task(void *__cct)
 {
        struct ehca_cpu_comp_task* cct = __cct;
+       int cql_empty;
        DECLARE_WAITQUEUE(wait, current);

        set_current_state(TASK_INTERRUPTIBLE);
        while(!kthread_should_stop()) {
                add_wait_queue(&cct->wait_queue, &wait);

-               if (list_empty(&cct->cq_list))
+               spin_lock_irq(&cct->task_lock);
+               cql_empty = list_empty(&cct->cq_list);
+               spin_unlock_irq(&cct->task_lock);
+               if (cql_empty)
                        schedule();
                else
                        __set_current_state(TASK_RUNNING);

                remove_wait_queue(&cct->wait_queue, &wait);

-               if (!list_empty(&cct->cq_list))
+               spin_lock_irq(&cct->task_lock);
+               cql_empty = list_empty(&cct->cq_list);
+               spin_unlock_irq(&cct->task_lock);
+               if (!cql_empty)
                        run_comp_task(__cct);

                set_current_state(TASK_INTERRUPTIBLE);
@@ -693,8 +695,6 @@ static void destroy_comp_task(struct ehc

        if (task)
                kthread_stop(task);
-
-       return;
 }

 static void take_over_work(struct ehca_comp_pool *pool,
@@ -815,6 +815,4 @@ void ehca_destroy_comp_pool(void)
        free_percpu(pool->cpu_comp_tasks);
        kfree(pool);
 #endif
-
-       return;
 }

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2007-02-15 16:04 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2007-02-15 16:07 [PATCH 2.6.21-rc1 2/5] ehca: fix race condition/locking issues in scaling code Hoang-Nam Nguyen
  -- strict thread matches above, loose matches on Subject: below --
2007-02-14 16:41 Hoang-Nam Nguyen

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).