[ofa-general] Re: [PATCH/RFC] IB/mad: Fix lock-lock-timer deadlock in RMPP code
Roland Dreier
rdreier at cisco.com
Tue Sep 22 11:27:25 PDT 2009
> The locking is needed to protect against items being removed from rmpp_list in
> recv_timeout_handler() and recv_cleanup_handler(). No new items should be added
> to the rmpp_list when ib_cancel_rmpp_recvs() is running (or there's a separate
> bug).
OK so how about something like this? Just hold the lock to mark the
items on the list as being canceled, and then actually cancel the
delayed work without the lock. I think this doesn't leave any races or
holes where the delayed work can mess up the cancel.
diff --git a/drivers/infiniband/core/mad_rmpp.c b/drivers/infiniband/core/mad_rmpp.c
index 57a3c6f..4e0f282 100644
--- a/drivers/infiniband/core/mad_rmpp.c
+++ b/drivers/infiniband/core/mad_rmpp.c
@@ -37,7 +37,8 @@
enum rmpp_state {
RMPP_STATE_ACTIVE,
RMPP_STATE_TIMEOUT,
- RMPP_STATE_COMPLETE
+ RMPP_STATE_COMPLETE,
+ RMPP_STATE_CANCELING
};
struct mad_rmpp_recv {
@@ -87,18 +88,22 @@ void ib_cancel_rmpp_recvs(struct ib_mad_agent_private *agent)
spin_lock_irqsave(&agent->lock, flags);
list_for_each_entry(rmpp_recv, &agent->rmpp_list, list) {
+ if (rmpp_recv->state != RMPP_STATE_COMPLETE)
+ ib_free_recv_mad(rmpp_recv->rmpp_wc);
+ rmpp_recv->state = RMPP_STATE_CANCELING;
+ }
+ spin_unlock_irqrestore(&agent->lock, flags);
+
+ list_for_each_entry(rmpp_recv, &agent->rmpp_list, list) {
cancel_delayed_work(&rmpp_recv->timeout_work);
cancel_delayed_work(&rmpp_recv->cleanup_work);
}
- spin_unlock_irqrestore(&agent->lock, flags);
flush_workqueue(agent->qp_info->port_priv->wq);
list_for_each_entry_safe(rmpp_recv, temp_rmpp_recv,
&agent->rmpp_list, list) {
list_del(&rmpp_recv->list);
- if (rmpp_recv->state != RMPP_STATE_COMPLETE)
- ib_free_recv_mad(rmpp_recv->rmpp_wc);
destroy_rmpp_recv(rmpp_recv);
}
}
@@ -260,6 +265,10 @@ static void recv_cleanup_handler(struct work_struct *work)
unsigned long flags;
spin_lock_irqsave(&rmpp_recv->agent->lock, flags);
+ if (rmpp_recv->state == RMPP_STATE_CANCELING) {
+ spin_unlock_irqrestore(&rmpp_recv->agent->lock, flags);
+ return;
+ }
list_del(&rmpp_recv->list);
spin_unlock_irqrestore(&rmpp_recv->agent->lock, flags);
destroy_rmpp_recv(rmpp_recv);
More information about the general
mailing list