[openib-general] OpenSM causes kernel trap

Roland Dreier rolandd at cisco.com
Thu Oct 27 15:17:26 PDT 2005


OK, I think I found it.  The problem was that ib_umad_write() wrote
through packet->msg in a few places where it should have used
packet->msg->mad, and therefore corrupted the address of the buffer.

I'll commit the patch below in a little while, which fixes this issue
and the packet->length race that Sean spotted, unless someone sees a
problem with it:

--- infiniband/core/user_mad.c	(revision 3867)
+++ infiniband/core/user_mad.c	(working copy)
@@ -297,8 +297,6 @@ static ssize_t ib_umad_write(struct file
 		goto err;
 	}
 
-	packet->length = length;
-
 	down_read(&file->agent_mutex);
 
 	agent = file->agent[packet->mad.hdr.id];
@@ -398,12 +396,12 @@ static ssize_t ib_umad_write(struct file
 	 * transaction ID matches the agent being used to send the
 	 * MAD.
 	 */
-	method = ((struct ib_mad_hdr *) packet->msg)->method;
+	method = ((struct ib_mad_hdr *) packet->msg->mad)->method;
 
 	if (!(method & IB_MGMT_METHOD_RESP)       &&
 	    method != IB_MGMT_METHOD_TRAP_REPRESS &&
 	    method != IB_MGMT_METHOD_SEND) {
-		tid = &((struct ib_mad_hdr *) packet->msg)->tid;
+		tid = &((struct ib_mad_hdr *) packet->msg->mad)->tid;
 		*tid = cpu_to_be64(((u64) agent->hi_tid) << 32 |
 				   (be64_to_cpup(tid) & 0xffffffff));
 	}
@@ -414,7 +412,7 @@ static ssize_t ib_umad_write(struct file
 
 	up_read(&file->agent_mutex);
 
-	return sizeof (struct ib_user_mad_hdr) + packet->length;
+	return count;
 
 err_msg:
 	ib_free_send_mad(packet->msg);



More information about the general mailing list