[ofw] [Patch 40/62] Reference implementation of NDv2

Fab Tillier ftillier at microsoft.com
Wed Feb 20 18:27:25 PST 2013


Fix locking bug in mcast mgr.

When a device is added, the lock is held very early on, and is not released in case of errors.  This patch reduces the scope of the lock to only the insertion into the mcast manager's port list.

I also added comments and moved code around a bit for the callback invocations, where the lock was being released without having been acquired in that function, just to make things clearer.

Signed-off-by: Fab Tillier <ftillier at microsoft.com>

diff -dwup3 -X excl.txt -r \dev\openib\ofw\gen1\branches\mlx4_30\trunk\core\al\kernel\al_mcast_mgr.c .\core\al\kernel\al_mcast_mgr.c
--- \dev\openib\ofw\gen1\branches\mlx4_30\trunk\core\al\kernel\al_mcast_mgr.c	Thu May 31 11:22:16 2012
+++ .\core\al\kernel\al_mcast_mgr.c	Fri Aug 10 00:24:34 2012
@@ -439,8 +439,6 @@ mcast_mgr_add_ca_ports(
 		return IB_NOT_FOUND;
 	}
 
-	cl_spinlock_acquire( &g_mcast_mgr_db.mgr_list_lock );
-
 	for ( i = 0; i < p_ci_ca->num_ports; i++)
 	{
 		mcast_mgr_t*			p_mcast_mgr;
@@ -475,13 +473,13 @@ mcast_mgr_add_ca_ports(
 
 		p_mcast_mgr->close_pending = FALSE;
 
+        cl_spinlock_acquire( &g_mcast_mgr_db.mgr_list_lock );
 		InsertTailList(&g_mcast_mgr_db.mgr_list, &p_port_info->port_info_entry);
+        cl_spinlock_release( &g_mcast_mgr_db.mgr_list_lock );
 
 		AL_PRINT( TRACE_LEVEL_INFORMATION, AL_DBG_ERROR, ("Add CA port 0x%I64x\n", p_port_info->port_guid) );
 	}
 
-	cl_spinlock_release( &g_mcast_mgr_db.mgr_list_lock );
-
 	return IB_SUCCESS;
 
 error:
@@ -570,6 +568,9 @@ exit:
 }
 
 
+//
+// This function is called with the global mcast_mgr lock held.
+//
 ib_api_status_t
 mcast_mgr_process_next_request(
 	IN		mcast_mgr_t* 		p_mcast_mgr,
@@ -608,13 +609,18 @@ mcast_mgr_process_next_request(
 						mcast_mgr_print_node_msg(TRACE_LEVEL_ERROR, p_mcast_request, " running join request failed - inform user by callback");
 						
 						p_mcast_request->req_state = RUNNING_CB;
+                        //
+                        // Release the lock before invoking the callback.  Note that we remove the
+                        // request from the list before invoking the callback, so as to allow the
+                        // list to change during the callback without worry.
+                        //
+						RemoveEntryList(&p_mcast_request->request_entry);
 						cl_spinlock_release(&p_mcast_mgr->mgr_lock);
 						mcast_rec.mcast_context = p_join_req->mcast_req.mcast_context;
 						mcast_rec.status = status;
 		   	 			p_mcast_request->p_join_req->mcast_req.pfn_mcast_cb(&mcast_rec);
 						cl_spinlock_acquire(&p_mcast_mgr->mgr_lock);
 						
-						RemoveEntryList(&p_mcast_request->request_entry);
 						cl_event_signal( &p_mcast_request->cb_finished );
 						p_mcast_request->req_state = DONE;
 						p_mcast_node->h_mcast = NULL;
@@ -645,12 +651,17 @@ mcast_mgr_process_next_request(
 					p_mcast_node->connections++;
 
 					p_mcast_request->req_state = RUNNING_CB;
+                    //
+                    // Release the lock before invoking the callback.  Note that we remove the
+                    // request from the list before invoking the callback, so as to allow the
+                    // list to change during the callback without worry.
+                    //
+					RemoveEntryList(&p_mcast_request->request_entry);
 					cl_spinlock_release(&p_mcast_mgr->mgr_lock);
 					mcast_rec.mcast_context = p_mcast_request->p_join_req->mcast_req.mcast_context;
 					p_mcast_request->p_join_req->mcast_req.pfn_mcast_cb(&mcast_rec);
 					cl_spinlock_acquire(&p_mcast_mgr->mgr_lock);
 
-					RemoveEntryList(&p_mcast_request->request_entry);
 					InsertTailList(&p_mcast_node->connected_req_list, &p_mcast_request->request_entry);
 					p_mcast_request->req_state = DONE;
 					cl_event_signal( &p_mcast_request->cb_finished );
@@ -679,15 +690,19 @@ mcast_mgr_process_next_request(
 						p_mcast_node->connections--;
 
 						p_mcast_request->req_state = RUNNING_CB;
+                        //
+                        // Release the lock before invoking the callback.  Note that we remove the
+                        // request from the list before invoking the callback, so as to allow the
+                        // list to change during the callback without worry.
+                        //
+						RemoveEntryList(&p_mcast_request->request_entry);
+						RemoveEntryList(&p_join_request->request_entry);
 						cl_spinlock_release(&p_mcast_mgr->mgr_lock);
 						user_context = (void*)p_join_request->p_join_req->mcast_req.mcast_context;
 						p_join_request->req_dereg_status = IB_SUCCESS;
 						if (p_mcast_request->p_leave_req->leave_cb)
 							p_mcast_request->p_leave_req->leave_cb(user_context);
 						cl_spinlock_acquire(&p_mcast_mgr->mgr_lock);
-						
-						RemoveEntryList(&p_mcast_request->request_entry);
-						RemoveEntryList(&p_join_request->request_entry);
 
 						deref_request(p_join_request);
 						deref_request(p_mcast_request);
-------------- next part --------------
A non-text attachment was scrubbed...
Name: ndv2.40.patch
Type: application/octet-stream
Size: 4553 bytes
Desc: ndv2.40.patch
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20130221/4acec88d/attachment.obj>


More information about the ofw mailing list