[ofa-general] Re: [PATCH] IB/mlx4 mlx4_ib: commands timeout

Michael S. Tsirkin mst at dev.mellanox.co.il
Mon May 7 07:53:20 PDT 2007


> Quoting Roland Dreier <rdreier at cisco.com>:
> Subject: Re: [PATCH] IB/mlx4 mlx4_ib: commands timeout
> 
>  > When the system is busy it may happen that the command actually
>  > completed but it took more than the specified timeout till the
>  > task executing the command was actually given CPU time. This test
>  > checks that the completion is really missing before failing.
> 
>  > +	if (!wait_for_completion_timeout(&context->done, msecs_to_jiffies(timeout)))
>  > +		if (!context->done.done) {
>  > +			err = -EBUSY;
>  > +			goto out;
>  > +		}
> 
> This seems more like a bug in wait_for_completion_timeout().  Anyway,
> it's definitely not OK to poke inside the definition of struct
> completion in driver code, so we need to find a different way to solve
> this.
> 
> BTW the same completion handling code is in mthca -- is this also a
> problem there?

Google gave me this:
http://lkml.org/lkml/2007/3/1/156
so it seems a similiar problem was observed in mthca.

Thomas, Ingo, I think you were the ones to propose
wait_for_completion_timeout()/wait_for_completion_interruptible_timeout():
would it make sense to change these functions to return -ETIMEDOUT on
timeout, 0 on success? No one seems to use the actual timeout value,
as far as I can see.

Would something like the following, untested, patch, make sense?

Signed-off-by: Michael S. Tsirkin <mst at dev.mellanox.co.il>

--

diff --git a/include/linux/completion.h b/include/linux/completion.h
index 268c5a4..84360c8 100644
--- a/include/linux/completion.h
+++ b/include/linux/completion.h
@@ -44,11 +44,10 @@ static inline void init_completion(struct completion *x)
 
 extern void FASTCALL(wait_for_completion(struct completion *));
 extern int FASTCALL(wait_for_completion_interruptible(struct completion *x));
-extern unsigned long FASTCALL(wait_for_completion_timeout(struct completion *x,
-						   unsigned long timeout));
-extern unsigned long FASTCALL(wait_for_completion_interruptible_timeout(
-			struct completion *x, unsigned long timeout));
-
+extern int FASTCALL(wait_for_completion_timeout(struct completion *x,
+						unsigned long timeout));
+extern int FASTCALL(wait_for_completion_interruptible_timeout(struct completion *x,
+							      unsigned long timeout));
 extern void FASTCALL(complete(struct completion *));
 extern void FASTCALL(complete_all(struct completion *));
 
diff --git a/kernel/sched.c b/kernel/sched.c
index b9a6837..5ee3df6 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -3661,9 +3661,10 @@ void fastcall __sched wait_for_completion(struct completion *x)
 }
 EXPORT_SYMBOL(wait_for_completion);
 
-unsigned long fastcall __sched
+int fastcall __sched
 wait_for_completion_timeout(struct completion *x, unsigned long timeout)
 {
+	int ret = 0;
 	might_sleep();
 
 	spin_lock_irq(&x->wait.lock);
@@ -3672,22 +3673,24 @@ wait_for_completion_timeout(struct completion *x, unsigned long timeout)
 
 		wait.flags |= WQ_FLAG_EXCLUSIVE;
 		__add_wait_queue_tail(&x->wait, &wait);
-		do {
+		for (;;) {
 			__set_current_state(TASK_UNINTERRUPTIBLE);
 			spin_unlock_irq(&x->wait.lock);
 			timeout = schedule_timeout(timeout);
 			spin_lock_irq(&x->wait.lock);
+			if (x->done)
+				break;
 			if (!timeout) {
-				__remove_wait_queue(&x->wait, &wait);
-				goto out;
+				ret = -ETIMEDOUT;
+				break;
 			}
-		} while (!x->done);
+		}
 		__remove_wait_queue(&x->wait, &wait);
 	}
 	x->done--;
 out:
 	spin_unlock_irq(&x->wait.lock);
-	return timeout;
+	return ret;
 }
 EXPORT_SYMBOL(wait_for_completion_timeout);
 
@@ -3724,10 +3727,12 @@ out:
 }
 EXPORT_SYMBOL(wait_for_completion_interruptible);
 
-unsigned long fastcall __sched
+int fastcall __sched
 wait_for_completion_interruptible_timeout(struct completion *x,
 					  unsigned long timeout)
 {
+	int ret = 0;
+
 	might_sleep();
 
 	spin_lock_irq(&x->wait.lock);
@@ -3736,7 +3741,7 @@ wait_for_completion_interruptible_timeout(struct completion *x,
 
 		wait.flags |= WQ_FLAG_EXCLUSIVE;
 		__add_wait_queue_tail(&x->wait, &wait);
-		do {
+		for (;;) {
 			if (signal_pending(current)) {
 				timeout = -ERESTARTSYS;
 				__remove_wait_queue(&x->wait, &wait);
@@ -3746,17 +3751,19 @@ wait_for_completion_interruptible_timeout(struct completion *x,
 			spin_unlock_irq(&x->wait.lock);
 			timeout = schedule_timeout(timeout);
 			spin_lock_irq(&x->wait.lock);
+			if (x->done)
+				break;
 			if (!timeout) {
-				__remove_wait_queue(&x->wait, &wait);
-				goto out;
+				ret = -ETIMEDOUT;
+				break;
 			}
-		} while (!x->done);
+		}
 		__remove_wait_queue(&x->wait, &wait);
 	}
 	x->done--;
 out:
 	spin_unlock_irq(&x->wait.lock);
-	return timeout;
+	return ret;
 }
 EXPORT_SYMBOL(wait_for_completion_interruptible_timeout);
 


-- 
MST



More information about the general mailing list