[openib-general] [RFC] [PATCH] user_mad: Support RMPP on send side

Hal Rosenstock halr at voltaire.com
Wed May 11 14:07:33 PDT 2005


user_mad: Support RMPP on send side

Note that this change will need a coordinated change to OpenSM and some
userspace/management libraries which will be done as soon as possible
after this patch is accepted.

Receive side support for RMPP will be added separately.

A couple of notes on this patch:
1. send side copying
There are currently 2 copies from user space done even in the non RMPP
case. It would be nice to reduce that to one in the non RMPP case.
This is an optimization to add at some point.
2. Also, I'm not sure about the effects of receiving RMPP on the read
side.

-- Hal

Index: infiniband/include/ib_user_mad.h
===================================================================
--- infiniband/include/ib_user_mad.h	(revision 2265)
+++ infiniband/include/ib_user_mad.h	(working copy)
@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2004 Topspin Communications.  All rights reserved.
+ * Copyright (c) 2005 Voltaire, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -42,7 +43,7 @@
  * Increment this value if any changes that break userspace ABI
  * compatibility are made.
  */
-#define IB_USER_MAD_ABI_VERSION	2
+#define IB_USER_MAD_ABI_VERSION	3
 
 /*
  * Make sure that all structs defined in this file remain laid out so
@@ -51,8 +52,7 @@
  */
 
 /**
- * ib_user_mad - MAD packet
- * @data - Contents of MAD
+ * ib_user_mad_hdr - MAD packet header
  * @id - ID of agent MAD received with/to be sent with
  * @status - 0 on successful receive, ETIMEDOUT if no response
  *   received (transaction ID in data[] will be set to TID of original
@@ -72,8 +72,7 @@
  *
  * All multi-byte quantities are stored in network (big endian) byte order.
  */
-struct ib_user_mad {
-	__u8	data[256];
+struct ib_user_mad_hdr {
 	__u32	id;
 	__u32	status;
 	__u32	timeout_ms;
@@ -91,6 +90,17 @@
 };
 
 /**
+ * ib_user_mad - MAD packet
+ * @hdr - MAD packet header
+ * @data - Contents of MAD
+ *
+ */
+struct ib_user_mad {
+	struct ib_user_mad_hdr hdr;
+	__u8	data[0];
+};
+
+/**
  * ib_user_mad_reg_req - MAD registration request
  * @id - Set by the kernel; used to identify agent in future requests.
  * @qpn - Queue pair number; must be 0 or 1.
@@ -103,6 +113,8 @@
  *   management class to receive.
  * @oui: Indicates IEEE OUI when mgmt_class is a vendor class
  *   in the range from 0x30 to 0x4f. Otherwise not used.
+ * @rmpp_version: If set, indicates the RMPP version used.
+ * 
  */
 struct ib_user_mad_reg_req {
 	__u32	id;
@@ -111,6 +123,7 @@
 	__u8	mgmt_class;
 	__u8	mgmt_class_version;
 	__u8    oui[3];
+	__u8	rmpp_version;
 };
 
 #define IB_IOCTL_MAGIC		0x1b
Index: infiniband/core/user_mad.c
===================================================================
--- infiniband/core/user_mad.c	(revision 2265)
+++ infiniband/core/user_mad.c	(working copy)
@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2004 Topspin Communications.  All rights reserved.
+ * Copyright (c) 2005 Voltaire, Inc. All rights reserved. 
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -94,10 +95,12 @@
 };
 
 struct ib_umad_packet {
-	struct ib_user_mad mad;
 	struct ib_ah      *ah;
+	struct ib_mad_send_buf *msg;
 	struct list_head   list;
+	int		   length;
 	DECLARE_PCI_UNMAP_ADDR(mapping)
+	struct ib_user_mad mad;
 };
 
 static const dev_t base_dev = MKDEV(IB_UMAD_MAJOR, IB_UMAD_MINOR_BASE);
@@ -114,10 +117,10 @@
 	int ret = 1;
 
 	down_read(&file->agent_mutex);
-	for (packet->mad.id = 0;
-	     packet->mad.id < IB_UMAD_MAX_AGENTS;
-	     packet->mad.id++)
-		if (agent == file->agent[packet->mad.id]) {
+	for (packet->mad.hdr.id = 0;
+	     packet->mad.hdr.id < IB_UMAD_MAX_AGENTS;
+	     packet->mad.hdr.id++)
+		if (agent == file->agent[packet->mad.hdr.id]) {
 			spin_lock_irq(&file->recv_lock);
 			list_add_tail(&packet->list, &file->recv_list);
 			spin_unlock_irq(&file->recv_lock);
@@ -138,14 +141,11 @@
 	struct ib_umad_packet *packet =
 		(void *) (unsigned long) send_wc->wr_id;
 
-	dma_unmap_single(agent->device->dma_device,
-			 pci_unmap_addr(packet, mapping),
-			 sizeof packet->mad.data,
-			 DMA_TO_DEVICE);
-	ib_destroy_ah(packet->ah);
+	ib_free_send_mad(packet->msg);
+	ib_destroy_ah(packet->msg->send_wr.wr.ud.ah);
 
 	if (send_wc->status == IB_WC_RESP_TIMEOUT_ERR) {
-		packet->mad.status = ETIMEDOUT;
+		packet->mad.hdr.status = ETIMEDOUT;
 
 		if (!queue_packet(file, agent, packet))
 			return;
@@ -159,30 +159,34 @@
 {
 	struct ib_umad_file *file = agent->context;
 	struct ib_umad_packet *packet;
+	int length;
 
+
 	if (mad_recv_wc->wc->status != IB_WC_SUCCESS)
 		goto out;
 
-	packet = kmalloc(sizeof *packet, GFP_KERNEL);
+	length = 256;	/* until RMPP is supported */
+	packet = kmalloc(sizeof *packet + length, GFP_KERNEL);
 	if (!packet)
 		goto out;
 
-	memset(packet, 0, sizeof *packet);
+	memset(packet, 0, sizeof *packet + length);
+	packet->length = length;
 
-	memcpy(packet->mad.data, mad_recv_wc->recv_buf.mad, sizeof packet->mad.data);
-	packet->mad.status        = 0;
-	packet->mad.qpn 	  = cpu_to_be32(mad_recv_wc->wc->src_qp);
-	packet->mad.lid 	  = cpu_to_be16(mad_recv_wc->wc->slid);
-	packet->mad.sl  	  = mad_recv_wc->wc->sl;
-	packet->mad.path_bits 	  = mad_recv_wc->wc->dlid_path_bits;
-	packet->mad.grh_present   = !!(mad_recv_wc->wc->wc_flags & IB_WC_GRH);
-	if (packet->mad.grh_present) {
+	memcpy(packet->mad.data, mad_recv_wc->recv_buf.mad, length);
+	packet->mad.hdr.status    = 0;
+	packet->mad.hdr.qpn 	  = cpu_to_be32(mad_recv_wc->wc->src_qp);
+	packet->mad.hdr.lid 	  = cpu_to_be16(mad_recv_wc->wc->slid);
+	packet->mad.hdr.sl  	  = mad_recv_wc->wc->sl;
+	packet->mad.hdr.path_bits = mad_recv_wc->wc->dlid_path_bits;
+	packet->mad.hdr.grh_present = !!(mad_recv_wc->wc->wc_flags & IB_WC_GRH);
+	if (packet->mad.hdr.grh_present) {
 		/* XXX parse GRH */
-		packet->mad.gid_index 	  = 0;
-		packet->mad.hop_limit 	  = 0;
-		packet->mad.traffic_class = 0;
-		memset(packet->mad.gid, 0, 16);
-		packet->mad.flow_label 	  = 0;
+		packet->mad.hdr.gid_index 	= 0;
+		packet->mad.hdr.hop_limit 	= 0;
+		packet->mad.hdr.traffic_class	= 0;
+		memset(packet->mad.hdr.gid, 0, 16);
+		packet->mad.hdr.flow_label	= 0;
 	}
 
 	if (queue_packet(file, agent, packet))
@@ -199,7 +203,7 @@
 	struct ib_umad_packet *packet;
 	ssize_t ret;
 
-	if (count < sizeof (struct ib_user_mad))
+	if (count < sizeof (struct ib_user_mad) + 256) /* until RMPP supported */ 
 		return -EINVAL;
 
 	spin_lock_irq(&file->recv_lock);
@@ -222,10 +226,12 @@
 
 	spin_unlock_irq(&file->recv_lock);
 
-	if (copy_to_user(buf, &packet->mad, sizeof packet->mad))
+	if (copy_to_user(buf, &packet->mad,
+			 min(count, packet->length +
+			     sizeof (struct ib_user_mad))))
 		ret = -EFAULT;
 	else
-		ret = sizeof packet->mad;
+		ret = count;
 
 	kfree(packet);
 	return ret;
@@ -238,106 +244,155 @@
 	struct ib_umad_packet *packet;
 	struct ib_mad_agent *agent;
 	struct ib_ah_attr ah_attr;
-	struct ib_sge      gather_list;
-	struct ib_send_wr *bad_wr, wr = {
-		.opcode      = IB_WR_SEND,
-		.sg_list     = &gather_list,
-		.num_sge     = 1,
-		.send_flags  = IB_SEND_SIGNALED,
-	};
+	struct ib_send_wr *bad_wr;
+	struct ib_rmpp_mad *rmpp_mad;
 	u8 method;
 	u64 *tid;
-	int ret;
+	int ret, length, hdr_len, data_len, rmpp_hdr_size;
+	int rmpp_active = 0;
 
 	if (count < sizeof (struct ib_user_mad))
 		return -EINVAL;
 
-	packet = kmalloc(sizeof *packet, GFP_KERNEL);
+	length = count - sizeof (struct ib_user_mad);
+	packet = kmalloc(sizeof *packet + sizeof(struct ib_mad_hdr) +
+			 sizeof(struct ib_rmpp_hdr), GFP_KERNEL);
 	if (!packet)
 		return -ENOMEM;
 
-	if (copy_from_user(&packet->mad, buf, sizeof packet->mad)) {
-		kfree(packet);
-		return -EFAULT;
+	if (copy_from_user(&packet->mad, buf,
+			    sizeof (struct ib_user_mad) +
+			    sizeof(struct ib_mad_hdr) +
+			    sizeof(struct ib_rmpp_hdr))) {
+		ret = -EFAULT;
+		goto err;
 	}
 
-	if (packet->mad.id < 0 || packet->mad.id >= IB_UMAD_MAX_AGENTS) {
+	if (packet->mad.hdr.id < 0 ||
+	    packet->mad.hdr.id >= IB_UMAD_MAX_AGENTS) {
 		ret = -EINVAL;
 		goto err;
 	}
 
+	packet->length = length;
+
 	down_read(&file->agent_mutex);
 
-	agent = file->agent[packet->mad.id];
+	agent = file->agent[packet->mad.hdr.id];
 	if (!agent) {
 		ret = -EINVAL;
 		goto err_up;
 	}
 
+	memset(&ah_attr, 0, sizeof ah_attr);
+	ah_attr.dlid          = be16_to_cpu(packet->mad.hdr.lid);
+	ah_attr.sl            = packet->mad.hdr.sl;
+	ah_attr.src_path_bits = packet->mad.hdr.path_bits;
+	ah_attr.port_num      = file->port->port_num;
+	if (packet->mad.hdr.grh_present) {
+		ah_attr.ah_flags = IB_AH_GRH;
+		memcpy(ah_attr.grh.dgid.raw, packet->mad.hdr.gid, 16);
+		ah_attr.grh.flow_label 	   = packet->mad.hdr.flow_label;
+		ah_attr.grh.hop_limit  	   = packet->mad.hdr.hop_limit;
+		ah_attr.grh.traffic_class  = packet->mad.hdr.traffic_class;
+	}
+
+	packet->ah = ib_create_ah(agent->qp->pd, &ah_attr);
+	if (IS_ERR(packet->ah)) {
+		ret = PTR_ERR(packet->ah);
+		goto err_up;
+	}
+
+	rmpp_mad = (struct ib_rmpp_mad *) packet->mad.data;
+	if (ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & IB_MGMT_RMPP_FLAG_ACTIVE) {
+		/* RMPP active */
+		if (!agent->rmpp_version) {
+			ret = -EINVAL;
+			goto err_ah;
+		}
+		/* Validate that management class can support RMPP */
+		if (rmpp_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_ADM) {
+			hdr_len = offsetof(struct ib_sa_mad, data);
+			data_len = length;
+		} else if ((rmpp_mad->mad_hdr.mgmt_class >= IB_MGMT_CLASS_VENDOR_RANGE2_START) &&
+			    (rmpp_mad->mad_hdr.mgmt_class <= IB_MGMT_CLASS_VENDOR_RANGE2_END)) {
+				hdr_len = offsetof(struct ib_vendor_mad, data);
+				data_len = length - hdr_len;
+		} else {
+			ret = -EINVAL;
+			goto err_ah;
+		}
+		rmpp_active = 1;
+	} else {
+		if (length != sizeof(struct ib_mad)) {
+			ret = -EINVAL;
+			goto err_ah;
+		}
+		hdr_len = offsetof(struct ib_mad, data);
+		data_len = length - hdr_len;
+	}
+	
+	packet->msg = ib_create_send_mad(agent,
+					 be32_to_cpu(packet->mad.hdr.qpn),
+					 0, packet->ah, rmpp_active,
+					 hdr_len, data_len,
+					 GFP_KERNEL);
+	if (IS_ERR(packet->msg)) {
+		ret = PTR_ERR(packet->msg);
+		goto err_ah;
+	}
+
+	packet->msg->send_wr.wr.ud.retries = 3; /* !!! */
+	packet->msg->send_wr.wr.ud.timeout_ms  = packet->mad.hdr.timeout_ms;
+	/* Override send WR WRID created by ib_create_send_mad */
+	packet->msg->send_wr.wr_id = (unsigned long) packet;
+
+	if (!rmpp_active) {
+		/* Copy  message from user into send buffer */
+		copy_from_user(packet->msg->mad,
+			       buf + sizeof(struct ib_user_mad), length);
+	} else {
+		rmpp_hdr_size = sizeof(struct ib_mad_hdr) +
+				sizeof(struct ib_rmpp_hdr);
+		/* Only copy MAD headers (RMPP header in place) */
+		memcpy(packet->msg->mad, packet->mad.data,
+		       sizeof(struct ib_mad_hdr));
+		/* Now, copy rest of message from user into send buffer */
+		 copy_from_user(((struct ib_rmpp_mad *)packet->msg->mad)->data,
+				buf + sizeof(struct ib_user_mad) + rmpp_hdr_size,
+				length - rmpp_hdr_size);
+	}
+
 	/*
 	 * If userspace is generating a request that will generate a
 	 * response, we need to make sure the high-order part of the
 	 * transaction ID matches the agent being used to send the
 	 * MAD.
 	 */
-	method = ((struct ib_mad_hdr *) packet->mad.data)->method;
+	method = packet->msg->mad->mad_hdr.method;
 
 	if (!(method & IB_MGMT_METHOD_RESP)       &&
 	    method != IB_MGMT_METHOD_TRAP_REPRESS &&
 	    method != IB_MGMT_METHOD_SEND) {
-		tid = &((struct ib_mad_hdr *) packet->mad.data)->tid;
+		tid = &packet->msg->mad->mad_hdr.tid;
 		*tid = cpu_to_be64(((u64) agent->hi_tid) << 32 |
 				   (be64_to_cpup(tid) & 0xffffffff));
 	}
 
-	memset(&ah_attr, 0, sizeof ah_attr);
-	ah_attr.dlid          = be16_to_cpu(packet->mad.lid);
-	ah_attr.sl            = packet->mad.sl;
-	ah_attr.src_path_bits = packet->mad.path_bits;
-	ah_attr.port_num      = file->port->port_num;
-	if (packet->mad.grh_present) {
-		ah_attr.ah_flags = IB_AH_GRH;
-		memcpy(ah_attr.grh.dgid.raw, packet->mad.gid, 16);
-		ah_attr.grh.flow_label 	   = packet->mad.flow_label;
-		ah_attr.grh.hop_limit  	   = packet->mad.hop_limit;
-		ah_attr.grh.traffic_class  = packet->mad.traffic_class;
-	}
+	ret = ib_post_send_mad(agent, &packet->msg->send_wr, &bad_wr);
+	if (ret)
+		goto err_msg;
 
-	packet->ah = ib_create_ah(agent->qp->pd, &ah_attr);
-	if (IS_ERR(packet->ah)) {
-		ret = PTR_ERR(packet->ah);
-		goto err_up;
-	}
+	up_read(&file->agent_mutex);
 
-	gather_list.addr = dma_map_single(agent->device->dma_device,
-					  packet->mad.data,
-					  sizeof packet->mad.data,
-					  DMA_TO_DEVICE);
-	gather_list.length = sizeof packet->mad.data;
-	gather_list.lkey   = file->mr[packet->mad.id]->lkey;
-	pci_unmap_addr_set(packet, mapping, gather_list.addr);
+	return sizeof (struct ib_user_mad_hdr) + packet->length;
 
-	wr.wr.ud.mad_hdr     = (struct ib_mad_hdr *) packet->mad.data;
-	wr.wr.ud.ah          = packet->ah;
-	wr.wr.ud.remote_qpn  = be32_to_cpu(packet->mad.qpn);
-	wr.wr.ud.remote_qkey = be32_to_cpu(packet->mad.qkey);
-	wr.wr.ud.timeout_ms  = packet->mad.timeout_ms;
+err_msg:
+	ib_free_send_mad(packet->msg);
 
-	wr.wr_id            = (unsigned long) packet;
+err_ah:
+	ib_destroy_ah(packet->ah);
 
-	ret = ib_post_send_mad(agent, &wr, &bad_wr);
-	if (ret) {
-		dma_unmap_single(agent->device->dma_device,
-				 pci_unmap_addr(packet, mapping),
-				 sizeof packet->mad.data,
-				 DMA_TO_DEVICE);
-		goto err_up;
-	}
-
-	up_read(&file->agent_mutex);
-
-	return sizeof packet->mad;
-
 err_up:
 	up_read(&file->agent_mutex);
 
@@ -399,7 +454,8 @@
 	agent = ib_register_mad_agent(file->port->ib_dev, file->port->port_num,
 				      ureq.qpn ? IB_QPT_GSI : IB_QPT_SMI,
 				      ureq.mgmt_class ? &req : NULL,
-				      0, send_handler, recv_handler, file);
+				      ureq.rmpp_version,
+				      send_handler, recv_handler, file);
 	if (IS_ERR(agent)) {
 		ret = PTR_ERR(agent);
 		goto out;
Index: docs/user_mad.txt
===================================================================
--- docs/user_mad.txt	(revision 2265)
+++ docs/user_mad.txt	(working copy)
@@ -28,11 +28,13 @@
 
 Receiving MADs
 
-  MADs are received using read().  The buffer passed to read() must be
-  large enough to hold at least one struct ib_user_mad.  For example:
+  MADs are received using read().  The receive side does not currently
+  support RMPP so the buffer passed to read() must be at least one
+  struct ib_user_mad + 256 bytes. For example:
 
-	struct ib_user_mad mad;
-	ret = read(fd, &mad, sizeof mad);
+	struct ib_user_mad *mad;
+	mad = malloc(sizeof *mad + 256);
+	ret = read(fd, mad, sizeof *mad + 256);
 	if (ret != sizeof mad)
 		perror("read");
 
@@ -50,18 +52,21 @@
 
   MADs are sent using write().  The agent ID for sending should be
   filled into the id field of the MAD, the destination LID should be
-  filled into the lid field, and so on.  For example:
+  filled into the lid field, and so on.  The send side does support
+  RMPP so arbitrary length MAD can be sent. For example:
 
-	struct ib_user_mad mad;
+	struct ib_user_mad *mad;
 
-	/* fill in mad.data */
+	mad = malloc(sizeof *mad + mad_length);
 
-	mad.id  = my_agent;	/* req.id from agent registration */
-	mad.lid = my_dest;	/* in network byte order... */
+	/* fill in mad->data */
+
+	mad->hdr.id  = my_agent;	/* req.id from agent registration */
+	mad->hdr.lid = my_dest;		/* in network byte order... */
 	/* etc. */
 
-	ret = write(fd, &mad, sizeof mad);
-	if (ret != sizeof mad)
+	ret = write(fd, &mad, sizeof *mad + mad_length);
+	if (ret != sizeof *mad + mad_length)
 		perror("write");
 
 Setting IsSM Capability Bit







More information about the general mailing list