[openib-general] [PATCH 3 of 3] mad: large RMPP support
Jack Morgenstein
jackm at mellanox.co.il
Mon Feb 6 23:41:33 PST 2006
patch 3 of 3
---
Large RMPP support, send side: split a multipacket MAD buffer to a list of
segments, (multipacket_list) and send these using an gather list of size 2.
Signed-off-by: Jack Morgenstein <jackm at mellanox.co.il>
Signed-off-by: Michael S. Tsirkin <mst at mellanox.co.il>
Index: last_stable/drivers/infiniband/core/mad_rmpp.c
===================================================================
--- last_stable.orig/drivers/infiniband/core/mad_rmpp.c
+++ last_stable/drivers/infiniband/core/mad_rmpp.c
@@ -570,16 +532,23 @@ start_rmpp(struct ib_mad_agent_private *
return mad_recv_wc;
}
-static inline u64 get_seg_addr(struct ib_mad_send_wr_private *mad_send_wr)
+static inline void *get_seg_addr(struct ib_mad_send_wr_private *mad_send_wr)
{
- return mad_send_wr->sg_list[0].addr + mad_send_wr->data_offset +
- (sizeof(struct ib_rmpp_mad) - mad_send_wr->data_offset) *
- (mad_send_wr->seg_num - 1);
+ struct ib_mad_multipacket_seg *seg;
+ int i = 2;
+
+ list_for_each_entry(seg, &mad_send_wr->multipacket_list, list) {
+ if (i == mad_send_wr->seg_num)
+ return seg->data;
+ i++;
+ }
+ return NULL;
}
-static int send_next_seg(struct ib_mad_send_wr_private *mad_send_wr)
+int send_next_seg(struct ib_mad_send_wr_private *mad_send_wr)
{
struct ib_rmpp_mad *rmpp_mad;
+ void *next_data;
int timeout;
u32 paylen;
@@ -592,14 +561,14 @@ static int send_next_seg(struct ib_mad_s
paylen = mad_send_wr->total_seg * IB_MGMT_RMPP_DATA -
mad_send_wr->pad;
rmpp_mad->rmpp_hdr.paylen_newwin = cpu_to_be32(paylen);
- mad_send_wr->sg_list[0].length = sizeof(struct ib_rmpp_mad);
} else {
- mad_send_wr->send_wr.num_sge = 2;
- mad_send_wr->sg_list[0].length = mad_send_wr->data_offset;
- mad_send_wr->sg_list[1].addr = get_seg_addr(mad_send_wr);
- mad_send_wr->sg_list[1].length = sizeof(struct ib_rmpp_mad) -
- mad_send_wr->data_offset;
- mad_send_wr->sg_list[1].lkey = mad_send_wr->sg_list[0].lkey;
+ next_data = get_seg_addr(mad_send_wr);
+ if (!next_data) {
+ printk(KERN_ERR PFX "send_next_seg: "
+ "could not find next segment\n");
+ return -EINVAL;
+ }
+ mad_send_wr->send_buf.mad_payload = next_data;
rmpp_mad->rmpp_hdr.paylen_newwin = 0;
}
@@ -838,7 +807,7 @@ out:
int ib_send_rmpp_mad(struct ib_mad_send_wr_private *mad_send_wr)
{
struct ib_rmpp_mad *rmpp_mad;
- int i, total_len, ret;
+ int ret;
rmpp_mad = mad_send_wr->send_buf.mad;
if (!(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) &
@@ -848,20 +817,16 @@ int ib_send_rmpp_mad(struct ib_mad_send_
if (rmpp_mad->rmpp_hdr.rmpp_type != IB_MGMT_RMPP_TYPE_DATA)
return IB_RMPP_RESULT_INTERNAL;
- if (mad_send_wr->send_wr.num_sge > 1)
- return -EINVAL; /* TODO: support num_sge > 1 */
+ if (mad_send_wr->send_wr.num_sge != 2)
+ return -EINVAL;
mad_send_wr->seg_num = 1;
mad_send_wr->newwin = 1;
mad_send_wr->data_offset = data_offset(rmpp_mad->mad_hdr.mgmt_class);
- total_len = 0;
- for (i = 0; i < mad_send_wr->send_wr.num_sge; i++)
- total_len += mad_send_wr->send_wr.sg_list[i].length;
-
- mad_send_wr->total_seg = (total_len - mad_send_wr->data_offset) /
+ mad_send_wr->total_seg = (mad_send_wr->total_length - mad_send_wr->data_offset) /
(sizeof(struct ib_rmpp_mad) - mad_send_wr->data_offset);
- mad_send_wr->pad = total_len - IB_MGMT_RMPP_HDR -
+ mad_send_wr->pad = mad_send_wr->total_length - IB_MGMT_RMPP_HDR -
be32_to_cpu(rmpp_mad->rmpp_hdr.paylen_newwin);
/* We need to wait for the final ACK even if there isn't a response */
Index: last_stable/drivers/infiniband/core/mad.c
===================================================================
--- last_stable.orig/drivers/infiniband/core/mad.c
+++ last_stable/drivers/infiniband/core/mad.c
@@ -779,6 +779,17 @@ static int get_buf_length(int hdr_len, i
return hdr_len + data_len + pad;
}
+static void free_send_multipacket_list(struct ib_mad_send_wr_private *
+ mad_send_wr)
+{
+ struct ib_mad_multipacket_seg *s, *t;
+
+ list_for_each_entry_safe(s, t, &mad_send_wr->multipacket_list, list) {
+ list_del(&s->list);
+ kfree(s);
+ }
+}
+
struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent,
u32 remote_qpn, u16 pkey_index,
int rmpp_active,
@@ -787,39 +798,38 @@ struct ib_mad_send_buf * ib_create_send_
{
struct ib_mad_agent_private *mad_agent_priv;
struct ib_mad_send_wr_private *mad_send_wr;
- int length, buf_size;
+ int length, message_size, seg_size;
void *buf;
mad_agent_priv = container_of(mad_agent, struct ib_mad_agent_private,
agent);
- buf_size = get_buf_length(hdr_len, data_len);
+ message_size = get_buf_length(hdr_len, data_len);
if ((!mad_agent->rmpp_version &&
- (rmpp_active || buf_size > sizeof(struct ib_mad))) ||
- (!rmpp_active && buf_size > sizeof(struct ib_mad)))
+ (rmpp_active || message_size > sizeof(struct ib_mad))) ||
+ (!rmpp_active && message_size > sizeof(struct ib_mad)))
return ERR_PTR(-EINVAL);
- length = sizeof *mad_send_wr + buf_size;
- if (length >= PAGE_SIZE)
- buf = (void *)__get_free_pages(gfp_mask, long_log2(roundup_pow_of_two(length)) - PAGE_SHIFT);
- else
- buf = kmalloc(length, gfp_mask);
+ length = sizeof *mad_send_wr + message_size;
+ buf = kzalloc(sizeof *mad_send_wr + sizeof(struct ib_mad), gfp_mask);
if (!buf)
return ERR_PTR(-ENOMEM);
- memset(buf, 0, length);
-
- mad_send_wr = buf + buf_size;
+ mad_send_wr = buf + sizeof(struct ib_mad);
+ INIT_LIST_HEAD(&mad_send_wr->multipacket_list);
mad_send_wr->send_buf.mad = buf;
+ mad_send_wr->send_buf.mad_payload = buf + hdr_len;
mad_send_wr->mad_agent_priv = mad_agent_priv;
- mad_send_wr->sg_list[0].length = buf_size;
+ mad_send_wr->sg_list[0].length = hdr_len;
mad_send_wr->sg_list[0].lkey = mad_agent->mr->lkey;
+ mad_send_wr->sg_list[1].length = sizeof(struct ib_mad) - hdr_len;
+ mad_send_wr->sg_list[1].lkey = mad_agent->mr->lkey;
mad_send_wr->send_wr.wr_id = (unsigned long) mad_send_wr;
mad_send_wr->send_wr.sg_list = mad_send_wr->sg_list;
- mad_send_wr->send_wr.num_sge = 1;
+ mad_send_wr->send_wr.num_sge = 2;
mad_send_wr->send_wr.opcode = IB_WR_SEND;
mad_send_wr->send_wr.send_flags = IB_SEND_SIGNALED;
mad_send_wr->send_wr.wr.ud.remote_qpn = remote_qpn;
@@ -827,6 +837,7 @@ struct ib_mad_send_buf * ib_create_send_
mad_send_wr->send_wr.wr.ud.pkey_index = pkey_index;
if (rmpp_active) {
+ struct ib_mad_multipacket_seg *seg;
struct ib_rmpp_mad *rmpp_mad = mad_send_wr->send_buf.mad;
rmpp_mad->rmpp_hdr.paylen_newwin = cpu_to_be32(hdr_len -
IB_MGMT_RMPP_HDR + data_len);
@@ -834,6 +845,27 @@ struct ib_mad_send_buf * ib_create_send_
rmpp_mad->rmpp_hdr.rmpp_type = IB_MGMT_RMPP_TYPE_DATA;
ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr,
IB_MGMT_RMPP_FLAG_ACTIVE);
+ mad_send_wr->total_length = message_size;
+ /* allocate RMPP buffers */
+ message_size -= sizeof(struct ib_mad);
+ seg_size = sizeof(struct ib_mad) - hdr_len;
+ while (message_size > 0) {
+ seg = kmalloc(sizeof(struct ib_mad_multipacket_seg) +
+ seg_size, gfp_mask);
+ if (!seg) {
+ printk(KERN_ERR "ib_create_send_mad: RMPP mem "
+ "alloc failed for len %zd, gfp %#x\n",
+ sizeof(struct ib_mad_multipacket_seg) +
+ seg_size, gfp_mask);
+ free_send_multipacket_list(mad_send_wr);
+ kfree(buf);
+ return ERR_PTR(-ENOMEM);
+ }
+ seg->size = seg_size;
+ list_add_tail(&seg->list,
+ &mad_send_wr->multipacket_list);
+ message_size -= seg_size;
+ }
}
mad_send_wr->send_buf.mad_agent = mad_agent;
@@ -842,23 +874,36 @@ struct ib_mad_send_buf * ib_create_send_
}
EXPORT_SYMBOL(ib_create_send_mad);
+struct ib_mad_multipacket_seg *ib_get_multipacket_seg(struct ib_mad_send_buf *
+ send_buf, int seg_num)
+{
+ struct ib_mad_send_wr_private *mad_send_wr;
+ struct ib_mad_multipacket_seg *seg;
+ int i = 2;
+
+ mad_send_wr = container_of(send_buf, struct ib_mad_send_wr_private,
+ send_buf);
+ list_for_each_entry(seg, &mad_send_wr->multipacket_list, list) {
+ if (i == seg_num)
+ return seg;
+ i++;
+ }
+ return NULL;
+}
+EXPORT_SYMBOL(ib_get_multipacket_seg);
+
void ib_free_send_mad(struct ib_mad_send_buf *send_buf)
{
struct ib_mad_agent_private *mad_agent_priv;
- void *mad_send_wr;
- int length;
+ struct ib_mad_send_wr_private *mad_send_wr;
mad_agent_priv = container_of(send_buf->mad_agent,
struct ib_mad_agent_private, agent);
mad_send_wr = container_of(send_buf, struct ib_mad_send_wr_private,
send_buf);
- length = sizeof(struct ib_mad_send_wr_private) + (mad_send_wr - send_buf->mad);
- if (length >= PAGE_SIZE)
- free_pages((unsigned long)send_buf->mad, long_log2(roundup_pow_of_two(length)) - PAGE_SHIFT);
- else
- kfree(send_buf->mad);
-
+ free_send_multipacket_list(mad_send_wr);
+ kfree(send_buf->mad);
if (atomic_dec_and_test(&mad_agent_priv->refcount))
wake_up(&mad_agent_priv->wait);
}
@@ -881,10 +926,17 @@ int ib_send_mad(struct ib_mad_send_wr_pr
mad_agent = mad_send_wr->send_buf.mad_agent;
sge = mad_send_wr->sg_list;
- sge->addr = dma_map_single(mad_agent->device->dma_device,
- mad_send_wr->send_buf.mad, sge->length,
- DMA_TO_DEVICE);
- pci_unmap_addr_set(mad_send_wr, mapping, sge->addr);
+ sge[0].addr = dma_map_single(mad_agent->device->dma_device,
+ mad_send_wr->send_buf.mad,
+ sge[0].length,
+ DMA_TO_DEVICE);
+ pci_unmap_addr_set(mad_send_wr, header_mapping, sge[0].addr);
+
+ sge[1].addr = dma_map_single(mad_agent->device->dma_device,
+ mad_send_wr->send_buf.mad_payload,
+ sge[1].length,
+ DMA_TO_DEVICE);
+ pci_unmap_addr_set(mad_send_wr, payload_mapping, sge[1].addr);
spin_lock_irqsave(&qp_info->send_queue.lock, flags);
if (qp_info->send_queue.count < qp_info->send_queue.max_active) {
@@ -901,11 +953,15 @@ int ib_send_mad(struct ib_mad_send_wr_pr
list_add_tail(&mad_send_wr->mad_list.list, list);
}
spin_unlock_irqrestore(&qp_info->send_queue.lock, flags);
- if (ret)
+ if (ret) {
dma_unmap_single(mad_agent->device->dma_device,
- pci_unmap_addr(mad_send_wr, mapping),
- sge->length, DMA_TO_DEVICE);
+ pci_unmap_addr(mad_send_wr, header_mapping),
+ sge[0].length, DMA_TO_DEVICE);
+ dma_unmap_single(mad_agent->device->dma_device,
+ pci_unmap_addr(mad_send_wr, payload_mapping),
+ sge[1].length, DMA_TO_DEVICE);
+ }
return ret;
}
@@ -1876,8 +1932,11 @@ static void ib_mad_send_done_handler(str
retry:
dma_unmap_single(mad_send_wr->send_buf.mad_agent->device->dma_device,
- pci_unmap_addr(mad_send_wr, mapping),
+ pci_unmap_addr(mad_send_wr, header_mapping),
mad_send_wr->sg_list[0].length, DMA_TO_DEVICE);
+ dma_unmap_single(mad_send_wr->send_buf.mad_agent->device->dma_device,
+ pci_unmap_addr(mad_send_wr, payload_mapping),
+ mad_send_wr->sg_list[1].length, DMA_TO_DEVICE);
queued_send_wr = NULL;
spin_lock_irqsave(&send_queue->lock, flags);
list_del(&mad_list->list);
Index: last_stable/drivers/infiniband/core/user_mad.c
===================================================================
--- last_stable.orig/drivers/infiniband/core/user_mad.c
+++ last_stable/drivers/infiniband/core/user_mad.c
@@ -187,7 +270,7 @@ static void send_handler(struct ib_mad_a
ib_free_send_mad(packet->msg);
if (send_wc->status == IB_WC_RESP_TIMEOUT_ERR) {
- timeout = kzalloc(sizeof *timeout + IB_MGMT_MAD_HDR, GFP_KERNEL);
+ timeout = alloc_packet();
if (!timeout)
goto out;
@@ -198,40 +281,12 @@ static void send_handler(struct ib_mad_a
sizeof (struct ib_mad_hdr));
if (queue_packet(file, agent, timeout))
- kfree(timeout);
+ free_packet(timeout);
}
out:
kfree(packet);
}
-static struct ib_umad_packet *alloc_packet(int buf_size)
-{
- struct ib_umad_packet *packet;
- int length = sizeof *packet + buf_size;
-
- if (length >= PAGE_SIZE)
- packet = (void *)__get_free_pages(GFP_KERNEL, long_log2(roundup_pow_of_two(length)) - PAGE_SHIFT);
- else
- packet = kmalloc(length, GFP_KERNEL);
-
- if (!packet)
- return NULL;
-
- memset(packet, 0, length);
- return packet;
-}
-
-static void free_packet(struct ib_umad_packet *packet)
-{
- int length = packet->length + sizeof *packet;
- if (length >= PAGE_SIZE)
- free_pages((unsigned long) packet, long_log2(roundup_pow_of_two(length)) - PAGE_SHIFT);
- else
- kfree(packet);
-}
-
-
-
static void recv_handler(struct ib_mad_agent *agent,
struct ib_mad_recv_wc *mad_recv_wc)
{
@@ -339,6 +422,8 @@ static ssize_t ib_umad_write(struct file
__be64 *tid;
int ret, length, hdr_len, copy_offset;
int rmpp_active, has_rmpp_header;
+ int s, seg_num;
+ struct ib_mad_multipacket_seg *seg;
if (count < sizeof (struct ib_user_mad) + IB_MGMT_RMPP_HDR)
return -EINVAL;
@@ -415,6 +500,11 @@ static ssize_t ib_umad_write(struct file
goto err_ah;
}
+ if (!rmpp_active && length > sizeof(struct ib_mad)) {
+ ret = -EINVAL;
+ goto err_ah;
+ }
+
packet->msg = ib_create_send_mad(agent,
be32_to_cpu(packet->mad.hdr.qpn),
0, rmpp_active,
@@ -432,14 +522,32 @@ static ssize_t ib_umad_write(struct file
/* Copy MAD headers (RMPP header in place) */
memcpy(packet->msg->mad, packet->mad.data, IB_MGMT_MAD_HDR);
- /* Now, copy rest of message from user into send buffer */
+ /* complete copying first 256 bytes of message into send buffer */
if (copy_from_user(packet->msg->mad + copy_offset,
buf + sizeof (struct ib_user_mad) + copy_offset,
- length - copy_offset)) {
+ min_t(int, length, sizeof(struct ib_mad)) - copy_offset)) {
ret = -EFAULT;
goto err_msg;
}
+ /* if RMPP, copy rest of send message from user to multipacket list */
+ length -= sizeof(struct ib_mad);
+ if (length > 0) {
+ buf += sizeof (struct ib_user_mad) + sizeof(struct ib_mad);
+ for (seg_num = 2; length > 0; ++seg_num, buf += s, length -= s) {
+ seg = ib_get_multipacket_seg(packet->msg, seg_num);
+ BUG_ON(!seg);
+ s = min_t(int, length, seg->size);
+ if (copy_from_user(seg->data, buf, s)) {
+ ret = -EFAULT;
+ goto err_msg;
+ }
+ }
+ /* Pad last segment with zeroes. */
+ if (seg->size - s)
+ memset(seg->data + s, 0, seg->size - s);
+ }
+
/*
* If userspace is generating a request that will generate a
* response, we need to make sure the high-order part of the
More information about the general
mailing list