[ofa-general] Re: [PATCH 4/4] [RFC] IPoIB/cm: Add connected mode support for devices without SRQs
Pradeep Satyanarayana
pradeeps at linux.vnet.ibm.com
Wed Nov 7 13:30:47 PST 2007
Sorry. Resending comments as requested. This is based on the for-2.6.25 git tree that I pulled
from, and so is not in patch format.
static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_id,
struct ipoib_cm_rx *rx)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
int ret;
int i;
rx->rx_ring = kcalloc(ipoib_recvq_size, sizeof *rx->rx_ring, GFP_KERNEL);
if (!rx->rx_ring)
return -ENOMEM;
spin_lock_irq(&priv->lock);
if (priv->cm.nonsrq_conn_qp >= ipoib_max_conn_qp) {
spin_unlock_irq(&priv->lock);
ib_send_cm_rej(cm_id, IB_CM_REJ_NO_QP, NULL, 0, NULL, 0);
return -EINVAL;
} else
++priv->cm.nonsrq_conn_qp;
spin_unlock_irq(&priv->lock);
for (i = 0; i < ipoib_recvq_size; ++i) {
if (!ipoib_cm_alloc_rx_skb(dev, rx->rx_ring, i, IPOIB_CM_RX_SG - 1,
rx->rx_ring[i].mapping)) {
ipoib_warn(priv, "failed to allocate receive buffer %d\n", i);
ret = -ENOMEM;
This will cause an skb leak- see generic comments about ipoib_cm_alloc_rx_skb() below
goto err;
}
ret = ipoib_cm_post_receive_nonsrq(dev, rx, i);
if (ret) {
ipoib_warn(priv, "ipoib_cm_post_receive_nonsrq "
"failed for buf %d\n", i);
ret = -EIO;
This will cause an skb leak- see generic comments about ipoib_cm_alloc_rx_skb() below
goto err;
}
}
rx->recv_count = ipoib_recvq_size;
return 0;
err:
spin_lock_irq(&priv->lock);
--priv->cm.nonsrq_conn_qp;
spin_unlock_irq(&priv->lock);
kfree(rx_ring) is missing
return ret;
}
Generic comment about ipoib_cm_alloc_rx_skb() which is true for both the srq and non srq cases
(except in the receive wc handler):
I find that there will be skb leakage if ipoib_cm_alloc_rx_skb() fails before all the rx skbs
are allocated. We must undo those allocations and mappings. Probably we should call
ipoib_cm_dev_cleanup() and free the skbs and do the unmap in that routine.
void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ipoib_cm_rx_buf *rx_ring;
unsigned int wr_id = wc->wr_id & ~(IPOIB_OP_CM | IPOIB_OP_RECV);
struct sk_buff *skb, *newskb;
struct ipoib_cm_rx *p;
unsigned long flags;
u64 mapping[IPOIB_CM_RX_SG];
int frags;
ipoib_dbg_data(priv, "cm recv completion: id %d, status: %d\n",
wr_id, wc->status);
if (unlikely(wr_id >= ipoib_recvq_size)) {
printk(KERN_WARNING "Inside rx wc with wr_id=0x%x\n", wr_id);
if (wr_id == (IPOIB_CM_RX_DRAIN_WRID & ~(IPOIB_OP_CM | IPOIB_OP_RECV))) {
spin_lock_irqsave(&priv->lock, flags);
list_splice_init(&priv->cm.rx_drain_list, &priv->cm.rx_reap_list);
ipoib_cm_start_rx_drain(priv);
I do not understand why we need to call ipoib_cm_start_rx_drain(). We have already received a work
completion with RX_DRAIN set.
queue_work(ipoib_workqueue, &priv->cm.rx_reap_task);
spin_unlock_irqrestore(&priv->lock, flags);
} else
ipoib_warn(priv, "cm recv completion event with wrid %d (> %d)\n",
wr_id, ipoib_recvq_size);
return;
}
p = wc->qp->qp_context;
if (ipoib_cm_has_srq(dev))
rx_ring = priv->cm.srq_ring;
else
rx_ring = p->rx_ring;
skb = rx_ring[wr_id].skb;
if (unlikely(wc->status != IB_WC_SUCCESS)) {
ipoib_dbg(priv, "cm recv error "
"(status=%d, wrid=%d vend_err %x)\n",
wc->status, wr_id, wc->vendor_err);
++dev->stats.rx_dropped;
if (!p->rx_ring)
If rx_ring is indeed NULL, there is no point in continuing further,
we will probably crash sooner or later. We should insert a BUG_ON and
thus crash the system.
goto repost;
else {
if (!--p->recv_count) {
printk(KERN_WARNING "recv_count=0x%x\n", p->recv_count);
spin_lock_irqsave(&priv->lock, flags);
list_move(&p->list, &priv->cm.rx_reap_list);
spin_unlock_irqrestore(&priv->lock, flags);
queue_work(ipoib_workqueue, &priv->cm.rx_reap_task);
}
return;
}
}
if (unlikely(!(wr_id & IPOIB_CM_RX_UPDATE_MASK))) {
if (p && time_after_eq(jiffies, p->jiffies + IPOIB_CM_RX_UPDATE_TIME)) {
spin_lock_irqsave(&priv->lock, flags);
p->jiffies = jiffies;
/* Move this entry to list head, but do not re-add it
* if it has been moved out of list. */
if (p->state == IPOIB_CM_RX_LIVE)
list_move(&p->list, &priv->cm.passive_ids);
spin_unlock_irqrestore(&priv->lock, flags);
}
}
frags = PAGE_ALIGN(wc->byte_len - min(wc->byte_len,
(unsigned)IPOIB_CM_HEAD_SIZE)) / PAGE_SIZE;
newskb = ipoib_cm_alloc_rx_skb(dev, rx_ring, wr_id, frags, mapping);
if (unlikely(!newskb)) {
/*
* If we can't allocate a new RX buffer, dump
* this packet and reuse the old buffer.
*/
ipoib_dbg(priv, "failed to allocate receive buffer %d\n", wr_id);
++dev->stats.rx_dropped;
goto repost;
}
ipoib_cm_dma_unmap_rx(priv, frags, rx_ring[wr_id].mapping);
memcpy(rx_ring[wr_id].mapping, mapping, (frags + 1) * sizeof *mapping);
ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n",
wc->byte_len, wc->slid);
skb_put_frags(skb, IPOIB_CM_HEAD_SIZE, wc->byte_len, newskb);
skb->protocol = ((struct ipoib_header *) skb->data)->proto;
skb_reset_mac_header(skb);
skb_pull(skb, IPOIB_ENCAP_LEN);
dev->last_rx = jiffies;
++dev->stats.rx_packets;
dev->stats.rx_bytes += skb->len;
skb->dev = dev;
/* XXX get correct PACKET_ type here */
skb->pkt_type = PACKET_HOST;
netif_receive_skb(skb);
repost:
if (p->rx_ring) {
Shouldn't this be if(!ipoib_cm_has_srq())?
if (unlikely(ipoib_cm_post_receive_nonsrq(dev, p, wr_id))) {
--p->recv_count;
ipoib_warn(priv, "ipoib_cm_post_receive_nonsrq failed "
"for buf %d\n", wr_id);
}
} else {
if (unlikely(ipoib_cm_post_receive_srq(dev, wr_id)))
ipoib_warn(priv, "ipoib_cm_post_receive_srq failed "
"for buf %d\n", wr_id);
}
}
More information about the general
mailing list