<br><font size=2 face="sans-serif">1. splitting CQ and CQ handler into
send/recv, changing NUM_SEND(RECV)WC value to bigger size.</font><font size=3>
</font>
<br>
<br><font size=2 face="sans-serif">Signed-off-by: Shirley Ma <xma@us.ibm.com></font>
<br>
<div>
<br><font size=2 face="sans-serif">diff -urpN infiniband/ulp/ipoib/ipoib.h
infiniband-split-cq/ulp/ipoib/ipoib.h<br>
--- infiniband/ulp/ipoib/ipoib.h 2006-04-05
17:43:18.000000000 -0700<br>
+++ infiniband-split-cq/ulp/ipoib/ipoib.h 2006-05-22
08:48:38.000000000 -0700<br>
@@ -2,6 +2,8 @@<br>
* Copyright (c) 2004, 2005 Topspin Communications. All rights
reserved.<br>
* Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.<br>
* Copyright (c) 2004 Voltaire, Inc. All rights reserved.<br>
+ * Copyright (c) 2006 International Business Machines Corp.,<br>
+ * All rights reserved.<br>
*<br>
* This software is available to you under a choice of one of two<br>
* licenses. You may choose to be licensed under the terms
of the GNU<br>
@@ -71,7 +73,8 @@ enum {<br>
IPOIB_MAX_QUEUE_SIZE
= 8192,<br>
IPOIB_MIN_QUEUE_SIZE
= 2,<br>
<br>
- IPOIB_NUM_WC
= 4,<br>
+ IPOIB_NUM_SEND_WC
= 32,<br>
+ IPOIB_NUM_RECV_WC
= 4,<br>
<br>
IPOIB_MAX_PATH_REC_QUEUE = 3,<br>
IPOIB_MAX_MCAST_QUEUE
= 3,<br>
@@ -151,7 +154,8 @@ struct ipoib_dev_priv {<br>
u16
pkey;<br>
struct ib_pd
*pd;<br>
struct ib_mr
*mr;</font>
<br><font size=2 face="sans-serif">- struct
ib_cq *cq;<br>
+ struct ib_cq
*send_cq;<br>
+ struct ib_cq
*recv_cq;<br>
struct ib_qp
*qp;<br>
u32
qkey;<br>
<br>
@@ -164,15 +168,13 @@ struct ipoib_dev_priv {<br>
<br>
struct ipoib_rx_buf *rx_ring;<br>
<br>
- spinlock_t
tx_lock;<br>
+ spinlock_t
tx_lock ____cacheline_aligned_in_smp;<br>
struct ipoib_tx_buf *tx_ring;<br>
unsigned
tx_head;<br>
unsigned
tx_tail;<br>
struct ib_sge
tx_sge;<br>
struct ib_send_wr tx_wr;<br>
<br>
- struct ib_wc ibwc[IPOIB_NUM_WC];<br>
-<br>
struct list_head dead_ahs;<br>
<br>
struct ib_event_handler event_handler;<br>
@@ -245,7 +247,8 @@ extern struct workqueue_struct *ipoib_wo<br>
<br>
/* functions */<br>
<br>
-void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr);<br>
+void ipoib_ib_send_completion(struct ib_cq *cq, void *dev_ptr);<br>
+void ipoib_ib_recv_completion(struct ib_cq *cq, void *dev_ptr);<br>
<br>
struct ipoib_ah *ipoib_create_ah(struct net_device *dev,<br>
struct
ib_pd *pd, struct ib_ah_attr *attr);<br>
diff -urpN infiniband/ulp/ipoib/ipoib_ib.c infiniband-split-cq/ulp/ipoib/ipoib_ib.c</font>
<br><font size=2 face="sans-serif">--- infiniband/ulp/ipoib/ipoib_ib.c
2006-04-05 17:43:18.000000000 -0700<br>
+++ infiniband-split-cq/ulp/ipoib/ipoib_ib.c 2006-05-22
08:48:23.000000000 -0700<br>
@@ -3,6 +3,8 @@<br>
* Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.<br>
* Copyright (c) 2005 Mellanox Technologies. All rights reserved.<br>
* Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved.<br>
+ * Copyright (c) 2006 International Business Machines Corp.,<br>
+ * All rights reserved.<br>
*<br>
* This software is available to you under a choice of one of two<br>
* licenses. You may choose to be licensed under the terms
of the GNU<br>
@@ -50,7 +52,6 @@ MODULE_PARM_DESC(data_debug_level,<br>
"Enable
data path debug tracing if > 0");<br>
#endif<br>
<br>
-#define IPOIB_OP_RECV
(1ul << 31)<br>
<br>
static DEFINE_MUTEX(pkey_mutex);<br>
<br>
@@ -108,7 +109,7 @@ static int ipoib_ib_post_receive(struct <br>
list.lkey = priv->mr->lkey;<br>
<br>
param.next = NULL;<br>
- param.wr_id = id | IPOIB_OP_RECV;<br>
+ param.wr_id = id;<br>
param.sg_list = &list;<br>
param.num_sge = 1;<br>
<br>
@@ -175,8 +176,8 @@ static int ipoib_ib_post_receives(struct</font>
<br><font size=2 face="sans-serif"> return
0;<br>
}<br>
<br>
-static void ipoib_ib_handle_wc(struct net_device *dev,<br>
-
struct ib_wc *wc)<br>
+static void ipoib_ib_handle_recv_wc(struct net_device *dev,<br>
+
struct ib_wc *wc)<br>
{<br>
struct ipoib_dev_priv *priv = netdev_priv(dev);<br>
unsigned int wr_id = wc->wr_id;<br>
@@ -184,121 +185,142 @@ static void ipoib_ib_handle_wc(struct ne<br>
ipoib_dbg_data(priv, "called:
id %d, op %d, status: %d\n",<br>
wr_id, wc->opcode, wc->status);<br>
<br>
- if (wr_id & IPOIB_OP_RECV) {<br>
- wr_id
&= ~IPOIB_OP_RECV;<br>
-<br>
- if
(wr_id < ipoib_recvq_size) {<br>
-
struct sk_buff *skb = priv->rx_ring[wr_id].skb;<br>
-
dma_addr_t addr
= priv->rx_ring[wr_id].mapping;<br>
-<br>
-
if (unlikely(wc->status != IB_WC_SUCCESS))
{<br>
-
if
(wc->status != IB_WC_WR_FLUSH_ERR)<br>
-
ipoib_warn(priv, "failed recv event
"<br>
-
"(status=%d, wrid=%d vend_err %x)\n",<br>
-
wc->status, wr_id, wc->vendor_err);<br>
-
dma_unmap_single(priv->ca->dma_device,
addr,<br>
-
IPOIB_BUF_SIZE, DMA_FROM_DEVICE);<br>
-
dev_kfree_skb_any(skb);<br>
-
priv->rx_ring[wr_id].skb
= NULL;</font>
<br><font size=2 face="sans-serif">-
return;<br>
-
}<br>
-<br>
-
/*<br>
-
* If we can't allocate a new RX buffer,
dump<br>
-
* this packet and reuse the old buffer.<br>
-
*/<br>
-
if (unlikely(ipoib_alloc_rx_skb(dev,
wr_id))) {<br>
-
++priv->stats.rx_dropped;<br>
-
goto
repost;<br>
-
}<br>
-<br>
-
ipoib_dbg_data(priv, "received
%d bytes, SLID 0x%04x\n",<br>
-
wc->byte_len, wc->slid);<br>
<br>
+ if (wr_id < ipoib_recvq_size) {<br>
+ struct
sk_buff *skb = priv->rx_ring[wr_id].skb;<br>
+ dma_addr_t
addr = priv->rx_ring[wr_id].mapping;<br>
+<br>
+ if
(unlikely(wc->status != IB_WC_SUCCESS)) {<br>
+
if (wc->status != IB_WC_WR_FLUSH_ERR)<br>
+
ipoib_warn(priv,
"failed recv event "<br>
+
"(status=%d, wrid=%d vend_err
%x)\n",<br>
+
wc->status, wr_id, wc->vendor_err);<br>
dma_unmap_single(priv->ca->dma_device,
addr,<br>
IPOIB_BUF_SIZE, DMA_FROM_DEVICE);<br>
+
dev_kfree_skb_any(skb);<br>
+
priv->rx_ring[wr_id].skb = NULL;<br>
+
return;<br>
+ }<br>
<br>
-
skb_put(skb, wc->byte_len);<br>
-
skb_pull(skb, IB_GRH_BYTES);<br>
+ /*<br>
+
* If we can't allocate a new RX buffer, dump<br>
+
* this packet and reuse the old buffer.</font>
<br><font size=2 face="sans-serif">+
*/<br>
+ if
(unlikely(ipoib_alloc_rx_skb(dev, wr_id))) {<br>
+
++priv->stats.rx_dropped;<br>
+
goto repost;<br>
+ }<br>
<br>
-
if (wc->slid != priv->local_lid
||<br>
-
wc->src_qp != priv->qp->qp_num)
{<br>
-
skb->protocol
= ((struct ipoib_header *) skb->data)->proto;<br>
-
skb->mac.raw
= skb->data;<br>
-
skb_pull(skb,
IPOIB_ENCAP_LEN);<br>
-<br>
-
dev->last_rx
= jiffies;<br>
-
++priv->stats.rx_packets;<br>
-
priv->stats.rx_bytes
+= skb->len;<br>
-<br>
-
skb->dev
= dev;<br>
-
/*
XXX get correct PACKET_ type here */<br>
-
skb->pkt_type
= PACKET_HOST;<br>
-
netif_rx_ni(skb);<br>
-
} else {<br>
-
ipoib_dbg_data(priv,
"dropping loopback packet\n");<br>
-
dev_kfree_skb_any(skb);<br>
-
}<br>
+ ipoib_dbg_data(priv,
"received %d bytes, SLID 0x%04x\n",<br>
+
wc->byte_len,
wc->slid);<br>
<br>
- repost:<br>
-
if (unlikely(ipoib_ib_post_receive(dev,
wr_id)))<br>
-
ipoib_warn(priv,
"ipoib_ib_post_receive failed "<br>
-
"for buf %d\n", wr_id);<br>
- }
else<br>
-
ipoib_warn(priv, "completion event
with wrid %d\n",<br>
-
wr_id);<br>
+ dma_unmap_single(priv->ca->dma_device,
addr,</font>
<br><font size=2 face="sans-serif">+
IPOIB_BUF_SIZE, DMA_FROM_DEVICE);<br>
<br>
- } else {<br>
- struct
ipoib_tx_buf *tx_req;<br>
- unsigned
long flags;<br>
+ skb_put(skb,
wc->byte_len);<br>
+ skb_pull(skb,
IB_GRH_BYTES);<br>
<br>
- if
(wr_id >= ipoib_sendq_size) {<br>
-
ipoib_warn(priv, "completion event
with wrid %d (> %d)\n",<br>
-
wr_id, ipoib_sendq_size);<br>
-
return;<br>
+ if
(wc->slid != priv->local_lid ||<br>
+
wc->src_qp != priv->qp->qp_num) {<br>
+
skb->protocol = ((struct ipoib_header
*) skb->data)->proto;<br>
+
skb->mac.raw = skb->data;<br>
+
skb_pull(skb, IPOIB_ENCAP_LEN);<br>
+<br>
+
dev->last_rx = jiffies;<br>
+
++priv->stats.rx_packets;<br>
+
priv->stats.rx_bytes += skb->len;<br>
+<br>
+
skb->dev = dev;<br>
+
/* XXX get correct PACKET_ type here
*/<br>
+
skb->pkt_type = PACKET_HOST;<br>
+
netif_rx_ni(skb);<br>
+ }
else {<br>
+
ipoib_dbg_data(priv, "dropping
loopback packet\n");<br>
+
dev_kfree_skb_any(skb);<br>
}<br>
<br>
- ipoib_dbg_data(priv,
"send complete, wrid %d\n", wr_id);<br>
+ repost:<br>
+ if
(unlikely(ipoib_ib_post_receive(dev, wr_id)))<br>
+
ipoib_warn(priv, "ipoib_ib_post_receive
failed "</font>
<br><font size=2 face="sans-serif">+
"for buf %d\n", wr_id);<br>
+ } else<br>
+ ipoib_warn(priv,
"completion event with wrid %d\n",<br>
+
wr_id);<br>
+}<br>
+<br>
+static void ipoib_ib_handle_send_wc(struct net_device *dev,<br>
+
struct ib_wc *wc)<br>
+{<br>
+ struct ipoib_dev_priv *priv = netdev_priv(dev);<br>
+ unsigned int wr_id = wc->wr_id;<br>
+ struct ipoib_tx_buf *tx_req;<br>
+ unsigned long flags;<br>
+<br>
+ ipoib_dbg_data(priv, "called:
id %d, op %d, status: %d\n",<br>
+
wr_id, wc->opcode, wc->status);<br>
<br>
- tx_req
= &priv->tx_ring[wr_id];<br>
+ if (wr_id >= ipoib_sendq_size)
{<br>
+ ipoib_warn(priv,
"completion event with wrid %d (> %d)\n",<br>
+
wr_id, ipoib_sendq_size);<br>
+ return;<br>
+ }<br>
<br>
- dma_unmap_single(priv->ca->dma_device,<br>
-
pci_unmap_addr(tx_req, mapping),<br>
-
tx_req->skb->len,<br>
-
DMA_TO_DEVICE);<br>
+ ipoib_dbg_data(priv, "send complete,
wrid %d\n", wr_id);<br>
<br>
- ++priv->stats.tx_packets;<br>
- priv->stats.tx_bytes
+= tx_req->skb->len;<br>
+ tx_req = &priv->tx_ring[wr_id];<br>
<br>
- dev_kfree_skb_any(tx_req->skb);<br>
+ dma_unmap_single(priv->ca->dma_device,<br>
+
pci_unmap_addr(tx_req, mapping),</font>
<br><font size=2 face="sans-serif">+
tx_req->skb->len,<br>
+
DMA_TO_DEVICE);<br>
<br>
- spin_lock_irqsave(&priv->tx_lock,
flags);<br>
- ++priv->tx_tail;<br>
- if
(netif_queue_stopped(dev) &&<br>
-
priv->tx_head - priv->tx_tail <= ipoib_sendq_size
>> 1)<br>
-
netif_wake_queue(dev);<br>
- spin_unlock_irqrestore(&priv->tx_lock,
flags);<br>
+ ++priv->stats.tx_packets;<br>
+ priv->stats.tx_bytes += tx_req->skb->len;<br>
<br>
- if
(wc->status != IB_WC_SUCCESS &&<br>
-
wc->status != IB_WC_WR_FLUSH_ERR)<br>
-
ipoib_warn(priv, "failed send event
"<br>
-
"(status=%d, wrid=%d vend_err %x)\n",<br>
-
wc->status, wr_id, wc->vendor_err);<br>
- }<br>
+ dev_kfree_skb_any(tx_req->skb);<br>
+<br>
+ spin_lock_irqsave(&priv->tx_lock,
flags);<br>
+ ++priv->tx_tail;<br>
+ if (netif_queue_stopped(dev) &&<br>
+ priv->tx_head - priv->tx_tail
<= ipoib_sendq_size >> 1)<br>
+ netif_wake_queue(dev);<br>
+ spin_unlock_irqrestore(&priv->tx_lock,
flags);<br>
+<br>
+ if (wc->status != IB_WC_SUCCESS
&&<br>
+ wc->status != IB_WC_WR_FLUSH_ERR)<br>
+ ipoib_warn(priv,
"failed send event "<br>
+
"(status=%d, wrid=%d vend_err
%x)\n",<br>
+
wc->status, wr_id, wc->vendor_err);</font>
<br><font size=2 face="sans-serif">+}<br>
+<br>
+void ipoib_ib_send_completion(struct ib_cq *cq, void *dev_ptr)<br>
+{<br>
+ struct net_device *dev = (struct net_device
*) dev_ptr;<br>
+ struct ipoib_dev_priv *priv = netdev_priv(dev);<br>
+ struct ib_wc ibwc[IPOIB_NUM_SEND_WC];<br>
+ int n, i;<br>
+<br>
+ ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);<br>
+ do {<br>
+ n
= ib_poll_cq(cq, IPOIB_NUM_SEND_WC, ibwc);<br>
+ for
(i = 0; i < n; ++i)<br>
+
ipoib_ib_handle_send_wc(dev, ibwc +
i);<br>
+ } while (n != 0);<br>
}<br>
<br>
-void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr)<br>
+void ipoib_ib_recv_completion(struct ib_cq *cq, void *dev_ptr)<br>
{<br>
struct net_device *dev = (struct net_device
*) dev_ptr;<br>
struct ipoib_dev_priv *priv = netdev_priv(dev);<br>
+ struct ib_wc ibwc[IPOIB_NUM_RECV_WC];<br>
int n, i;<br>
<br>
ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);<br>
do {<br>
- n
= ib_poll_cq(cq, IPOIB_NUM_WC, priv->ibwc);<br>
+ n
= ib_poll_cq(cq, IPOIB_NUM_RECV_WC, ibwc);<br>
for
(i = 0; i < n; ++i)<br>
-
ipoib_ib_handle_wc(dev, priv->ibwc
+ i);<br>
- } while (n == IPOIB_NUM_WC);<br>
+
ipoib_ib_handle_recv_wc(dev, ibwc);<br>
+ } while (n != 0);<br>
}</font>
<br><font size=2 face="sans-serif"> <br>
static inline int post_send(struct ipoib_dev_priv *priv,<br>
diff -urpN infiniband/ulp/ipoib/ipoib_main.c infiniband-split-cq/ulp/ipoib/ipoib_main.c<br>
--- infiniband/ulp/ipoib/ipoib_main.c 2006-05-03
13:16:18.000000000 -0700<br>
+++ infiniband-split-cq/ulp/ipoib/ipoib_main.c 2006-05-22
08:48:47.000000000 -0700<br>
@@ -2,6 +2,8 @@<br>
* Copyright (c) 2004 Topspin Communications. All rights reserved.<br>
* Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.<br>
* Copyright (c) 2004 Voltaire, Inc. All rights reserved.<br>
+ * Copyright (c) 2006 International Business Machines Corp.,<br>
+ * All rights reserved.<br>
*<br>
* This software is available to you under a choice of one of two<br>
* licenses. You may choose to be licensed under the terms
of the GNU<br>
diff -urpN infiniband/ulp/ipoib/ipoib_verbs.c infiniband-split-cq/ulp/ipoib/ipoib_verbs.c<br>
--- infiniband/ulp/ipoib/ipoib_verbs.c 2006-04-05
17:43:18.000000000 -0700<br>
+++ infiniband-split-cq/ulp/ipoib/ipoib_verbs.c 2006-05-22
08:48:54.000000000 -0700<br>
@@ -1,6 +1,8 @@<br>
/*<br>
* Copyright (c) 2004, 2005 Topspin Communications. All rights
reserved.</font>
<br><font size=2 face="sans-serif"> * Copyright (c) 2005 Mellanox
Technologies. All rights reserved.<br>
+ * Copyright (c) 2006 International Business Machines Corp.,<br>
+ * All rights reserved.<br>
*<br>
* This software is available to you under a choice of one of two<br>
* licenses. You may choose to be licensed under the terms
of the GNU<br>
@@ -174,24 +176,35 @@ int ipoib_transport_dev_init(struct net_<br>
return
-ENODEV;<br>
}<br>
<br>
- priv->cq = ib_create_cq(priv->ca,
ipoib_ib_completion, NULL, dev,<br>
-
ipoib_sendq_size
+ ipoib_recvq_size + 1);<br>
- if (IS_ERR(priv->cq)) {<br>
- printk(KERN_WARNING
"%s: failed to create CQ\n", ca->name);<br>
+ priv->send_cq = ib_create_cq(priv->ca,
ipoib_ib_send_completion, NULL, dev,<br>
+
ipoib_sendq_size
+ 1);<br>
+ if (IS_ERR(priv->send_cq)) {<br>
+ printk(KERN_WARNING
"%s: failed to create send CQ\n", ca->name);<br>
goto
out_free_pd;<br>
}<br>
<br>
- if (ib_req_notify_cq(priv->cq,
IB_CQ_NEXT_COMP))<br>
- goto
out_free_cq;<br>
+ if (ib_req_notify_cq(priv->send_cq,
IB_CQ_NEXT_COMP))<br>
+ goto
out_free_send_cq;<br>
+<br>
+<br>
+ priv->recv_cq = ib_create_cq(priv->ca,
ipoib_ib_recv_completion, NULL, dev,</font>
<br><font size=2 face="sans-serif">+
ipoib_recvq_size + 1);<br>
+ if (IS_ERR(priv->recv_cq)) {<br>
+ printk(KERN_WARNING
"%s: failed to create recv CQ\n", ca->name);<br>
+ goto
out_free_send_cq;<br>
+ }<br>
+<br>
+ if (ib_req_notify_cq(priv->recv_cq,
IB_CQ_NEXT_COMP))<br>
+ goto
out_free_recv_cq;<br>
<br>
priv->mr = ib_get_dma_mr(priv->pd,
IB_ACCESS_LOCAL_WRITE);<br>
if (IS_ERR(priv->mr)) {<br>
printk(KERN_WARNING
"%s: ib_get_dma_mr failed\n", ca->name);<br>
- goto
out_free_cq;<br>
+ goto
out_free_recv_cq;<br>
}<br>
<br>
- init_attr.send_cq = priv->cq;<br>
- init_attr.recv_cq = priv->cq,<br>
+ init_attr.send_cq = priv->send_cq;<br>
+ init_attr.recv_cq = priv->recv_cq,<br>
<br>
priv->qp = ib_create_qp(priv->pd,
&init_attr);<br>
if (IS_ERR(priv->qp)) {<br>
@@ -215,8 +228,11 @@ int ipoib_transport_dev_init(struct net_<br>
out_free_mr:<br>
ib_dereg_mr(priv->mr);<br>
<br>
-out_free_cq:<br>
- ib_destroy_cq(priv->cq);<br>
+out_free_recv_cq:<br>
+ ib_destroy_cq(priv->recv_cq);<br>
+<br>
+out_free_send_cq:<br>
+ ib_destroy_cq(priv->send_cq);<br>
<br>
out_free_pd:<br>
ib_dealloc_pd(priv->pd);<br>
@@ -238,7 +254,10 @@ void ipoib_transport_dev_cleanup(struct <br>
if (ib_dereg_mr(priv->mr))</font>
<br><font size=2 face="sans-serif">
ipoib_warn(priv, "ib_dereg_mr failed\n");<br>
<br>
- if (ib_destroy_cq(priv->cq))<br>
+ if (ib_destroy_cq(priv->send_cq))<br>
+ ipoib_warn(priv,
"ib_cq_destroy failed\n");<br>
+<br>
+ if (ib_destroy_cq(priv->recv_cq))<br>
ipoib_warn(priv,
"ib_cq_destroy failed\n");<br>
<br>
if (ib_dealloc_pd(priv->pd))</font>
<br><font size=2 face="sans-serif"><br>
</font>
<br>
<br><font size=2 face="sans-serif">Thanks<br>
Shirley Ma<br>
IBM Linux Technology Center<br>
15300 SW Koll Parkway<br>
Beaverton, OR 97006-6063<br>
Phone(Fax): (503) 578-7638<br>
<br>
</font></div>