[openib-general] Re: user_mad: large rmpp length problem

Michael S. Tsirkin mst at mellanox.co.il
Thu Nov 24 00:03:52 PST 2005


Quoting r. Michael S. Tsirkin <mst at mellanox.co.il>:
> Subject: user_mad: large rmpp length problem
> 
> Hello!
> ib_umad_write currently accepts a count parameter from user
> and attempts to allocate mad of size count - sizeof (struct ib_user_mad)
> in kernel memory.
> 
> This, obviously, fails with -ENOMEM, which means that we cant
> send large transactions with RMPP.
> 
> The proper fix appears to be to transfer the data by chunks,
> waking the user process and copying a fixed number of bytes each time.

Here's a very simple patch which, while not ideal, let us go up to 512KB.

---

Allocate memory for large MAD buffers with __get_free_pages,
making it possible to get buffers up to 512KB in size.

Signed-off-by: Michael S. Tsirkin <mst at mellanox.co.il>
Signed-off-by: Jack Morgenstein <jackm at mellanox.co.il>

Index: linux-kernel/drivers/infiniband/core/user_mad.c
===================================================================
--- linux-kernel.orig/drivers/infiniband/core/user_mad.c
+++ linux-kernel/drivers/infiniband/core/user_mad.c
@@ -204,6 +204,34 @@ out:
 	kfree(packet);
 }
 
+static struct ib_umad_packet *alloc_packet(int buf_size)
+{
+	struct ib_umad_packet *packet;
+	int length = sizeof *packet + buf_size;
+
+	if (length >= PAGE_SIZE)
+		packet = (void *)__get_free_pages(GFP_KERNEL, long_log2(roundup_pow_of_two(length)) - PAGE_SHIFT);
+	else
+		packet = kmalloc(length, GFP_KERNEL);
+
+	if (!packet)
+		return NULL;
+
+	memset(packet, 0, length);
+	return packet;
+}
+
+static void free_packet(struct ib_umad_packet *packet)
+{
+	int length = packet->length + sizeof *packet;
+	if (length >= PAGE_SIZE)
+		free_pages((unsigned long) packet, long_log2(roundup_pow_of_two(length)) - PAGE_SHIFT);
+	else
+		kfree(packet);
+}
+
+
+
 static void recv_handler(struct ib_mad_agent *agent,
 			 struct ib_mad_recv_wc *mad_recv_wc)
 {
@@ -215,7 +243,7 @@ static void recv_handler(struct ib_mad_a
 		goto out;
 
 	length = mad_recv_wc->mad_len;
-	packet = kzalloc(sizeof *packet + length, GFP_KERNEL);
+	packet = alloc_packet(length);
 	if (!packet)
 		goto out;
 
@@ -240,7 +268,7 @@ static void recv_handler(struct ib_mad_a
 	}
 
 	if (queue_packet(file, agent, packet))
-		kfree(packet);
+		free_packet(packet);
 
 out:
 	ib_free_recv_mad(mad_recv_wc);
@@ -294,7 +322,7 @@ static ssize_t ib_umad_read(struct file 
 		list_add(&packet->list, &file->recv_list);
 		spin_unlock_irq(&file->recv_lock);
 	} else
-		kfree(packet);
+		free_packet(packet);
 	return ret;
 }
 
Index: linux-kernel/drivers/infiniband/core/mad.c
===================================================================
--- linux-kernel.orig/drivers/infiniband/core/mad.c
+++ linux-kernel/drivers/infiniband/core/mad.c
@@ -779,7 +779,7 @@ struct ib_mad_send_buf * ib_create_send_
 {
 	struct ib_mad_agent_private *mad_agent_priv;
 	struct ib_mad_send_wr_private *mad_send_wr;
-	int buf_size;
+	int length, buf_size;
 	void *buf;
 
 	mad_agent_priv = container_of(mad_agent, struct ib_mad_agent_private,
@@ -791,10 +791,17 @@ struct ib_mad_send_buf * ib_create_send_
 	    (!rmpp_active && buf_size > sizeof(struct ib_mad)))
 		return ERR_PTR(-EINVAL);
 
-	buf = kzalloc(sizeof *mad_send_wr + buf_size, gfp_mask);
+	length = sizeof *mad_send_wr + buf_size;
+	if (length >= PAGE_SIZE)
+		buf = (void *)__get_free_pages(gfp_mask, long_log2(roundup_pow_of_two(length)) - PAGE_SHIFT);
+	else
+		buf = kmalloc(length, gfp_mask);
+
 	if (!buf)
 		return ERR_PTR(-ENOMEM);
 
+	memset(buf, 0, length);
+
 	mad_send_wr = buf + buf_size;
 	mad_send_wr->send_buf.mad = buf;
 
@@ -830,10 +837,19 @@ EXPORT_SYMBOL(ib_create_send_mad);
 void ib_free_send_mad(struct ib_mad_send_buf *send_buf)
 {
 	struct ib_mad_agent_private *mad_agent_priv;
+	void *mad_send_wr;
+	int length;
 
 	mad_agent_priv = container_of(send_buf->mad_agent,
 				      struct ib_mad_agent_private, agent);
-	kfree(send_buf->mad);
+	mad_send_wr = container_of(send_buf, struct ib_mad_send_wr_private,
+				   send_buf);
+
+	length = sizeof(struct ib_mad_send_wr_private) + (mad_send_wr - send_buf->mad);
+	if (length >= PAGE_SIZE)
+		free_pages((unsigned long)send_buf->mad, long_log2(roundup_pow_of_two(length)) - PAGE_SHIFT);
+	else
+		kfree(send_buf->mad);
 
 	if (atomic_dec_and_test(&mad_agent_priv->refcount))
 		wake_up(&mad_agent_priv->wait);

-- 
MST



More information about the general mailing list